diff options
Diffstat (limited to 'sysdeps/x86_64/fpu/multiarch')
211 files changed, 3901 insertions, 1383 deletions
diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile index 34542155aa..9f387248aa 100644 --- a/sysdeps/x86_64/fpu/multiarch/Makefile +++ b/sysdeps/x86_64/fpu/multiarch/Makefile @@ -1,12 +1,54 @@ ifeq ($(subdir),math) libm-sysdep_routines += s_floor-c s_ceil-c s_floorf-c s_ceilf-c \ - s_rint-c s_rintf-c s_nearbyint-c s_nearbyintf-c + s_rint-c s_rintf-c s_nearbyint-c s_nearbyintf-c \ + s_trunc-c s_truncf-c + +libm-sysdep_routines += s_ceil-sse4_1 s_ceilf-sse4_1 s_floor-sse4_1 \ + s_floorf-sse4_1 s_nearbyint-sse4_1 \ + s_nearbyintf-sse4_1 s_rint-sse4_1 s_rintf-sse4_1 \ + s_trunc-sse4_1 s_truncf-sse4_1 + +libm-sysdep_routines += e_exp-fma e_log-fma e_pow-fma s_atan-fma \ + e_asin-fma e_atan2-fma s_sin-fma s_tan-fma \ + mpa-fma \ + sincos32-fma doasin-fma dosincos-fma \ + mpatan2-fma mpatan-fma mpsqrt-fma mptan-fma + +CFLAGS-doasin-fma.c = -mfma -mavx2 +CFLAGS-dosincos-fma.c = -mfma -mavx2 +CFLAGS-e_asin-fma.c = -mfma -mavx2 +CFLAGS-e_atan2-fma.c = -mfma -mavx2 +CFLAGS-e_exp-fma.c = -mfma -mavx2 +CFLAGS-e_log-fma.c = -mfma -mavx2 +CFLAGS-e_pow-fma.c = -mfma -mavx2 $(config-cflags-nofma) +CFLAGS-mpa-fma.c = -mfma -mavx2 +CFLAGS-mpatan-fma.c = -mfma -mavx2 +CFLAGS-mpatan2-fma.c = -mfma -mavx2 +CFLAGS-mpsqrt-fma.c = -mfma -mavx2 +CFLAGS-mptan-fma.c = -mfma -mavx2 +CFLAGS-s_atan-fma.c = -mfma -mavx2 +CFLAGS-sincos32-fma.c = -mfma -mavx2 +CFLAGS-s_sin-fma.c = -mfma -mavx2 +CFLAGS-s_tan-fma.c = -mfma -mavx2 + +libm-sysdep_routines += s_sinf-sse2 s_cosf-sse2 s_sincosf-sse2 + +libm-sysdep_routines += e_exp2f-fma e_expf-fma e_log2f-fma e_logf-fma \ + e_powf-fma s_sinf-fma s_cosf-fma s_sincosf-fma + +CFLAGS-e_exp2f-fma.c = -mfma -mavx2 +CFLAGS-e_expf-fma.c = -mfma -mavx2 +CFLAGS-e_log2f-fma.c = -mfma -mavx2 +CFLAGS-e_logf-fma.c = -mfma -mavx2 +CFLAGS-e_powf-fma.c = -mfma -mavx2 +CFLAGS-s_sinf-fma.c = -mfma -mavx2 +CFLAGS-s_cosf-fma.c = -mfma -mavx2 +CFLAGS-s_sincosf-fma.c = -mfma -mavx2 libm-sysdep_routines += e_exp-fma4 e_log-fma4 e_pow-fma4 s_atan-fma4 \ e_asin-fma4 e_atan2-fma4 s_sin-fma4 s_tan-fma4 \ - mplog-fma4 mpa-fma4 slowexp-fma4 slowpow-fma4 \ + mpa-fma4 \ sincos32-fma4 doasin-fma4 dosincos-fma4 \ - halfulp-fma4 mpexp-fma4 \ mpatan2-fma4 mpatan-fma4 mpsqrt-fma4 mptan-fma4 CFLAGS-doasin-fma4.c = -mfma4 @@ -16,35 +58,26 @@ CFLAGS-e_atan2-fma4.c = -mfma4 CFLAGS-e_exp-fma4.c = -mfma4 CFLAGS-e_log-fma4.c = -mfma4 CFLAGS-e_pow-fma4.c = -mfma4 $(config-cflags-nofma) -CFLAGS-halfulp-fma4.c = -mfma4 CFLAGS-mpa-fma4.c = -mfma4 CFLAGS-mpatan-fma4.c = -mfma4 CFLAGS-mpatan2-fma4.c = -mfma4 -CFLAGS-mpexp-fma4.c = -mfma4 -CFLAGS-mplog-fma4.c = -mfma4 CFLAGS-mpsqrt-fma4.c = -mfma4 CFLAGS-mptan-fma4.c = -mfma4 CFLAGS-s_atan-fma4.c = -mfma4 CFLAGS-sincos32-fma4.c = -mfma4 -CFLAGS-slowexp-fma4.c = -mfma4 -CFLAGS-slowpow-fma4.c = -mfma4 CFLAGS-s_sin-fma4.c = -mfma4 CFLAGS-s_tan-fma4.c = -mfma4 libm-sysdep_routines += e_exp-avx e_log-avx s_atan-avx \ e_atan2-avx s_sin-avx s_tan-avx \ - mplog-avx mpa-avx slowexp-avx \ - mpexp-avx + mpa-avx CFLAGS-e_atan2-avx.c = -msse2avx -DSSE2AVX CFLAGS-e_exp-avx.c = -msse2avx -DSSE2AVX CFLAGS-e_log-avx.c = -msse2avx -DSSE2AVX CFLAGS-mpa-avx.c = -msse2avx -DSSE2AVX -CFLAGS-mpexp-avx.c = -msse2avx -DSSE2AVX -CFLAGS-mplog-avx.c = -msse2avx -DSSE2AVX CFLAGS-s_atan-avx.c = -msse2avx -DSSE2AVX CFLAGS-s_sin-avx.c = -msse2avx -DSSE2AVX -CFLAGS-slowexp-avx.c = -msse2avx -DSSE2AVX CFLAGS-s_tan-avx.c = -msse2avx -DSSE2AVX endif @@ -66,5 +99,35 @@ libmvec-sysdep_routines += svml_d_cos2_core_sse4 svml_d_cos4_core_avx2 \ svml_d_pow4_core_avx2 svml_d_pow8_core_avx512 \ svml_s_powf4_core_sse4 svml_s_powf8_core_avx2 \ svml_s_powf16_core_avx512 svml_s_sincosf4_core_sse4 \ - svml_s_sincosf8_core_avx2 svml_s_sincosf16_core_avx512 + svml_s_sincosf8_core_avx2 \ + svml_s_sincosf16_core_avx512 \ + svml_d_cos2_core-sse2 svml_d_cos4_core-sse \ + svml_d_cos8_core-avx2 svml_d_exp2_core-sse2 \ + svml_d_exp4_core-sse svml_d_exp8_core-avx2 \ + svml_d_log2_core-sse2 svml_d_log4_core-sse \ + svml_d_log8_core-avx2 svml_d_pow2_core-sse2 \ + svml_d_pow4_core-sse svml_d_pow8_core-avx2 \ + svml_d_sin2_core-sse2 svml_d_sin4_core-sse \ + svml_d_sin8_core-avx2 \ + svml_d_sincos2_core-sse2 \ + svml_d_sincos4_core-sse \ + svml_d_sincos8_core-avx2 \ + svml_s_cosf16_core-avx2 \ + svml_s_cosf4_core-sse2 \ + svml_s_cosf8_core-sse \ + svml_s_expf16_core-avx2 \ + svml_s_expf4_core-sse2 \ + svml_s_expf8_core-sse \ + svml_s_logf16_core-avx2 \ + svml_s_logf4_core-sse2 \ + svml_s_logf8_core-sse \ + svml_s_powf16_core-avx2 \ + svml_s_powf4_core-sse2 \ + svml_s_powf8_core-sse \ + svml_s_sincosf16_core-avx2 \ + svml_s_sincosf4_core-sse2 \ + svml_s_sincosf8_core-sse \ + svml_s_sinf16_core-avx2 \ + svml_s_sinf4_core-sse2 \ + svml_s_sinf8_core-sse endif diff --git a/sysdeps/x86_64/fpu/multiarch/doasin-fma.c b/sysdeps/x86_64/fpu/multiarch/doasin-fma.c new file mode 100644 index 0000000000..7a09865fca --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/doasin-fma.c @@ -0,0 +1,4 @@ +#define __doasin __doasin_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/doasin.c> diff --git a/sysdeps/x86_64/fpu/multiarch/dosincos-fma.c b/sysdeps/x86_64/fpu/multiarch/dosincos-fma.c new file mode 100644 index 0000000000..5744586bdb --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/dosincos-fma.c @@ -0,0 +1,6 @@ +#define __docos __docos_fma +#define __dubcos __dubcos_fma +#define __dubsin __dubsin_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/dosincos.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_asin-fma.c b/sysdeps/x86_64/fpu/multiarch/e_asin-fma.c new file mode 100644 index 0000000000..50e9c64247 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_asin-fma.c @@ -0,0 +1,11 @@ +#define __ieee754_acos __ieee754_acos_fma +#define __ieee754_asin __ieee754_asin_fma +#define __cos32 __cos32_fma +#define __doasin __doasin_fma +#define __docos __docos_fma +#define __dubcos __dubcos_fma +#define __dubsin __dubsin_fma +#define __sin32 __sin32_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/e_asin.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_asin.c b/sysdeps/x86_64/fpu/multiarch/e_asin.c index 111a5b99bd..8d47004e4f 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_asin.c +++ b/sysdeps/x86_64/fpu/multiarch/e_asin.c @@ -1,26 +1,40 @@ -#include <init-arch.h> -#include <math.h> -#include <math_private.h> - -extern double __ieee754_acos_sse2 (double); -extern double __ieee754_asin_sse2 (double); -extern double __ieee754_acos_fma4 (double); -extern double __ieee754_asin_fma4 (double); - -libm_ifunc (__ieee754_acos, - HAS_ARCH_FEATURE (FMA4_Usable) - ? __ieee754_acos_fma4 - : __ieee754_acos_sse2); -strong_alias (__ieee754_acos, __acos_finite) +/* Multiple versions of IEEE 754 asin and acos. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +extern double __redirect_ieee754_asin (double); +extern double __redirect_ieee754_acos (double); + +#define SYMBOL_NAME ieee754_asin +#include "ifunc-fma4.h" -libm_ifunc (__ieee754_asin, - HAS_ARCH_FEATURE (FMA4_Usable) - ? __ieee754_asin_fma4 - : __ieee754_asin_sse2); +libc_ifunc_redirected (__redirect_ieee754_asin, __ieee754_asin, + IFUNC_SELECTOR ()); strong_alias (__ieee754_asin, __asin_finite) -#define __ieee754_acos __ieee754_acos_sse2 -#define __ieee754_asin __ieee754_asin_sse2 +#undef SYMBOL_NAME +#define SYMBOL_NAME ieee754_acos +#include "ifunc-fma4.h" + +libc_ifunc_redirected (__redirect_ieee754_acos, __ieee754_acos, + IFUNC_SELECTOR ()); +strong_alias (__ieee754_acos, __acos_finite) +#define __ieee754_acos __ieee754_acos_sse2 +#define __ieee754_asin __ieee754_asin_sse2 #include <sysdeps/ieee754/dbl-64/e_asin.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2-fma.c b/sysdeps/x86_64/fpu/multiarch/e_atan2-fma.c new file mode 100644 index 0000000000..caba686496 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_atan2-fma.c @@ -0,0 +1,10 @@ +#define __ieee754_atan2 __ieee754_atan2_fma +#define __add __add_fma +#define __dbl_mp __dbl_mp_fma +#define __dvd __dvd_fma +#define __mpatan2 __mpatan2_fma +#define __mul __mul_fma +#define __sub __sub_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/e_atan2.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2.c b/sysdeps/x86_64/fpu/multiarch/e_atan2.c index 9ca3c02a44..6c2dd5af37 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_atan2.c +++ b/sysdeps/x86_64/fpu/multiarch/e_atan2.c @@ -1,18 +1,29 @@ -#include <init-arch.h> -#include <math.h> -#include <math_private.h> +/* Multiple versions of IEEE 754 atan. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. -extern double __ieee754_atan2_sse2 (double, double); -extern double __ieee754_atan2_avx (double, double); -extern double __ieee754_atan2_fma4 (double, double); + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. -libm_ifunc (__ieee754_atan2, - HAS_ARCH_FEATURE (FMA4_Usable) ? __ieee754_atan2_fma4 - : (HAS_ARCH_FEATURE (AVX_Usable) - ? __ieee754_atan2_avx : __ieee754_atan2_sse2)); -strong_alias (__ieee754_atan2, __atan2_finite) + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. -#define __ieee754_atan2 __ieee754_atan2_sse2 + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +extern double __redirect_ieee754_atan2 (double, double); +#define SYMBOL_NAME ieee754_atan2 +#include "ifunc-avx-fma4.h" +libc_ifunc_redirected (__redirect_ieee754_atan2, + __ieee754_atan2, IFUNC_SELECTOR ()); +strong_alias (__ieee754_atan2, __atan2_finite) + +#define __ieee754_atan2 __ieee754_atan2_sse2 #include <sysdeps/ieee754/dbl-64/e_atan2.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp-avx.c b/sysdeps/x86_64/fpu/multiarch/e_exp-avx.c index ee5dd6d2dc..afd917442a 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_exp-avx.c +++ b/sysdeps/x86_64/fpu/multiarch/e_exp-avx.c @@ -1,6 +1,5 @@ #define __ieee754_exp __ieee754_exp_avx #define __exp1 __exp1_avx -#define __slowexp __slowexp_avx #define SECTION __attribute__ ((section (".text.avx"))) #include <sysdeps/ieee754/dbl-64/e_exp.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp-fma.c b/sysdeps/x86_64/fpu/multiarch/e_exp-fma.c new file mode 100644 index 0000000000..765b1b9dd3 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_exp-fma.c @@ -0,0 +1,5 @@ +#define __ieee754_exp __ieee754_exp_fma +#define __exp1 __exp1_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/e_exp.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c index ae6eb67603..9ac7acad28 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c @@ -1,6 +1,5 @@ #define __ieee754_exp __ieee754_exp_fma4 #define __exp1 __exp1_fma4 -#define __slowexp __slowexp_fma4 #define SECTION __attribute__ ((section (".text.fma4"))) #include <sysdeps/ieee754/dbl-64/e_exp.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp.c b/sysdeps/x86_64/fpu/multiarch/e_exp.c index b7d7b5ff27..7cd7d1729c 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_exp.c +++ b/sysdeps/x86_64/fpu/multiarch/e_exp.c @@ -1,18 +1,29 @@ -#include <init-arch.h> -#include <math.h> -#include <math_private.h> +/* Multiple versions of IEEE 754 exp. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. -extern double __ieee754_exp_sse2 (double); -extern double __ieee754_exp_avx (double); -extern double __ieee754_exp_fma4 (double); + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. -libm_ifunc (__ieee754_exp, - HAS_ARCH_FEATURE (FMA4_Usable) ? __ieee754_exp_fma4 - : (HAS_ARCH_FEATURE (AVX_Usable) - ? __ieee754_exp_avx : __ieee754_exp_sse2)); -strong_alias (__ieee754_exp, __exp_finite) + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. -#define __ieee754_exp __ieee754_exp_sse2 + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +extern double __redirect_ieee754_exp (double); +#define SYMBOL_NAME ieee754_exp +#include "ifunc-avx-fma4.h" +libc_ifunc_redirected (__redirect_ieee754_exp, __ieee754_exp, + IFUNC_SELECTOR ()); +strong_alias (__ieee754_exp, __exp_finite) + +#define __ieee754_exp __ieee754_exp_sse2 #include <sysdeps/ieee754/dbl-64/e_exp.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp2f-fma.c b/sysdeps/x86_64/fpu/multiarch/e_exp2f-fma.c new file mode 100644 index 0000000000..c915a50794 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_exp2f-fma.c @@ -0,0 +1,3 @@ +#define __exp2f __exp2f_fma + +#include <sysdeps/ieee754/flt-32/e_exp2f.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp2f.c b/sysdeps/x86_64/fpu/multiarch/e_exp2f.c new file mode 100644 index 0000000000..e3a0706839 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_exp2f.c @@ -0,0 +1,40 @@ +/* Multiple versions of exp2f. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-float.h> + +extern float __redirect_exp2f (float); + +#define SYMBOL_NAME exp2f +#include "ifunc-fma.h" + +libc_ifunc_redirected (__redirect_exp2f, __exp2f, IFUNC_SELECTOR ()); + +#ifdef SHARED +# include <shlib-compat.h> +versioned_symbol (libm, __exp2f, exp2f, GLIBC_2_27); +libm_alias_float_other (__exp2, exp2) +#else +libm_alias_float (__exp2, exp2) +#endif + +strong_alias (__exp2f, __ieee754_exp2f) +strong_alias (__exp2f, __exp2f_finite) + +#define __exp2f __exp2f_sse2 +#include <sysdeps/ieee754/flt-32/e_exp2f.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_expf-fma.c b/sysdeps/x86_64/fpu/multiarch/e_expf-fma.c new file mode 100644 index 0000000000..4e01cd6a82 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_expf-fma.c @@ -0,0 +1,3 @@ +#define __expf __expf_fma + +#include <sysdeps/ieee754/flt-32/e_expf.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_expf.c b/sysdeps/x86_64/fpu/multiarch/e_expf.c new file mode 100644 index 0000000000..2b7c7ccbd0 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_expf.c @@ -0,0 +1,43 @@ +/* Multiple versions of expf. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-float.h> + +extern float __redirect_expf (float); + +#define SYMBOL_NAME expf +#include "ifunc-fma.h" + +libc_ifunc_redirected (__redirect_expf, __expf, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (__expf, __GI___expf, __redirect_expf) + __attribute__ ((visibility ("hidden"))); + +# include <shlib-compat.h> +versioned_symbol (libm, __expf, expf, GLIBC_2_27); +libm_alias_float_other (__exp, exp) +#else +libm_alias_float (__exp, exp) +#endif + +strong_alias (__expf, __ieee754_expf) +strong_alias (__expf, __expf_finite) + +#define __expf __expf_sse2 +#include <sysdeps/ieee754/flt-32/e_expf.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_log-avx.c b/sysdeps/x86_64/fpu/multiarch/e_log-avx.c index c669019bc2..b22a5767be 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_log-avx.c +++ b/sysdeps/x86_64/fpu/multiarch/e_log-avx.c @@ -1,8 +1,4 @@ #define __ieee754_log __ieee754_log_avx -#define __mplog __mplog_avx -#define __add __add_avx -#define __dbl_mp __dbl_mp_avx -#define __sub __sub_avx #define SECTION __attribute__ ((section (".text.avx"))) #include <sysdeps/ieee754/dbl-64/e_log.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_log-fma.c b/sysdeps/x86_64/fpu/multiarch/e_log-fma.c new file mode 100644 index 0000000000..bce0ee03c2 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_log-fma.c @@ -0,0 +1,4 @@ +#define __ieee754_log __ieee754_log_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/e_log.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_log-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_log-fma4.c index a2346cc618..f458f9c23c 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_log-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/e_log-fma4.c @@ -1,8 +1,4 @@ #define __ieee754_log __ieee754_log_fma4 -#define __mplog __mplog_fma4 -#define __add __add_fma4 -#define __dbl_mp __dbl_mp_fma4 -#define __sub __sub_fma4 #define SECTION __attribute__ ((section (".text.fma4"))) #include <sysdeps/ieee754/dbl-64/e_log.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_log.c b/sysdeps/x86_64/fpu/multiarch/e_log.c index cf9533d6c0..e0a1b02fae 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_log.c +++ b/sysdeps/x86_64/fpu/multiarch/e_log.c @@ -1,18 +1,29 @@ -#include <init-arch.h> -#include <math.h> -#include <math_private.h> +/* Multiple versions of IEEE 754 log. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. -extern double __ieee754_log_sse2 (double); -extern double __ieee754_log_avx (double); -extern double __ieee754_log_fma4 (double); + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. -libm_ifunc (__ieee754_log, - HAS_ARCH_FEATURE (FMA4_Usable) ? __ieee754_log_fma4 - : (HAS_ARCH_FEATURE (AVX_Usable) - ? __ieee754_log_avx : __ieee754_log_sse2)); -strong_alias (__ieee754_log, __log_finite) + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. -#define __ieee754_log __ieee754_log_sse2 + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +extern double __redirect_ieee754_log (double); +#define SYMBOL_NAME ieee754_log +#include "ifunc-avx-fma4.h" +libc_ifunc_redirected (__redirect_ieee754_log, __ieee754_log, + IFUNC_SELECTOR ()); +strong_alias (__ieee754_log, __log_finite) + +#define __ieee754_log __ieee754_log_sse2 #include <sysdeps/ieee754/dbl-64/e_log.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_log2f-fma.c b/sysdeps/x86_64/fpu/multiarch/e_log2f-fma.c new file mode 100644 index 0000000000..8a76b836fb --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_log2f-fma.c @@ -0,0 +1,3 @@ +#define __log2f __log2f_fma + +#include <sysdeps/ieee754/flt-32/e_log2f.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_log2f.c b/sysdeps/x86_64/fpu/multiarch/e_log2f.c new file mode 100644 index 0000000000..12d0c30dd3 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_log2f.c @@ -0,0 +1,43 @@ +/* Multiple versions of log2f. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-float.h> + +extern float __redirect_log2f (float); + +#define SYMBOL_NAME log2f +#include "ifunc-fma.h" + +libc_ifunc_redirected (__redirect_log2f, __log2f, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (__log2f, __GI___log2f, __redirect_log2f) + __attribute__ ((visibility ("hidden"))); + +# include <shlib-compat.h> +versioned_symbol (libm, __log2f, log2f, GLIBC_2_27); +libm_alias_float_other (__log2, log2) +#else +libm_alias_float (__log2, log2) +#endif + +strong_alias (__log2f, __ieee754_log2f) +strong_alias (__log2f, __log2f_finite) + +#define __log2f __log2f_sse2 +#include <sysdeps/ieee754/flt-32/e_log2f.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_logf-fma.c b/sysdeps/x86_64/fpu/multiarch/e_logf-fma.c new file mode 100644 index 0000000000..a47fd8195f --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_logf-fma.c @@ -0,0 +1,3 @@ +#define __logf __logf_fma + +#include <sysdeps/ieee754/flt-32/e_logf.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_logf.c b/sysdeps/x86_64/fpu/multiarch/e_logf.c new file mode 100644 index 0000000000..224d40a1e4 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_logf.c @@ -0,0 +1,43 @@ +/* Multiple versions of logf. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-float.h> + +extern float __redirect_logf (float); + +#define SYMBOL_NAME logf +#include "ifunc-fma.h" + +libc_ifunc_redirected (__redirect_logf, __logf, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (__logf, __GI___logf, __redirect_logf) + __attribute__ ((visibility ("hidden"))); + +# include <shlib-compat.h> +versioned_symbol (libm, __logf, logf, GLIBC_2_27); +libm_alias_float_other (__log, log) +#else +libm_alias_float (__log, log) +#endif + +strong_alias (__logf, __ieee754_logf) +strong_alias (__logf, __logf_finite) + +#define __logf __logf_sse2 +#include <sysdeps/ieee754/flt-32/e_logf.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow-fma.c b/sysdeps/x86_64/fpu/multiarch/e_pow-fma.c new file mode 100644 index 0000000000..73c1e7fb89 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_pow-fma.c @@ -0,0 +1,5 @@ +#define __ieee754_pow __ieee754_pow_fma +#define __exp1 __exp1_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/e_pow.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c index 5b3ea8e103..8971b655ca 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c @@ -1,6 +1,5 @@ #define __ieee754_pow __ieee754_pow_fma4 #define __exp1 __exp1_fma4 -#define __slowpow __slowpow_fma4 #define SECTION __attribute__ ((section (".text.fma4"))) #include <sysdeps/ieee754/dbl-64/e_pow.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow.c b/sysdeps/x86_64/fpu/multiarch/e_pow.c index a5c5d89c3e..084073c936 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_pow.c +++ b/sysdeps/x86_64/fpu/multiarch/e_pow.c @@ -1,17 +1,29 @@ -#include <init-arch.h> -#include <math.h> -#include <math_private.h> +/* Multiple versions of IEEE 754 pow. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. -extern double __ieee754_pow_sse2 (double, double); -extern double __ieee754_pow_fma4 (double, double); + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. -libm_ifunc (__ieee754_pow, - HAS_ARCH_FEATURE (FMA4_Usable) - ? __ieee754_pow_fma4 - : __ieee754_pow_sse2); -strong_alias (__ieee754_pow, __pow_finite) + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. -#define __ieee754_pow __ieee754_pow_sse2 + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +extern double __redirect_ieee754_pow (double, double); +#define SYMBOL_NAME ieee754_pow +#include "ifunc-fma4.h" +libc_ifunc_redirected (__redirect_ieee754_pow, + __ieee754_pow, IFUNC_SELECTOR ()); +strong_alias (__ieee754_pow, __pow_finite) + +#define __ieee754_pow __ieee754_pow_sse2 #include <sysdeps/ieee754/dbl-64/e_pow.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_powf-fma.c b/sysdeps/x86_64/fpu/multiarch/e_powf-fma.c new file mode 100644 index 0000000000..fdf5dcc56a --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_powf-fma.c @@ -0,0 +1,3 @@ +#define __powf __powf_fma + +#include <sysdeps/ieee754/flt-32/e_powf.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_powf.c b/sysdeps/x86_64/fpu/multiarch/e_powf.c new file mode 100644 index 0000000000..a185006f40 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_powf.c @@ -0,0 +1,46 @@ +/* Multiple versions of powf. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-float.h> + +#define powf __redirect_powf +#define __DECL_SIMD___redirect_powf +#include <math.h> +#undef powf + +#define SYMBOL_NAME powf +#include "ifunc-fma.h" + +libc_ifunc_redirected (__redirect_powf, __powf, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (__powf, __GI___powf, __redirect_powf) + __attribute__ ((visibility ("hidden"))); + +# include <shlib-compat.h> +versioned_symbol (libm, __powf, powf, GLIBC_2_27); +libm_alias_float_other (__pow, pow) +#else +libm_alias_float (__pow, pow) +#endif + +strong_alias (__powf, __ieee754_powf) +strong_alias (__powf, __powf_finite) + +#define __powf __powf_sse2 +#include <sysdeps/ieee754/flt-32/e_powf.c> diff --git a/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c b/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c deleted file mode 100644 index a00c17c016..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c +++ /dev/null @@ -1,4 +0,0 @@ -#define __halfulp __halfulp_fma4 -#define SECTION __attribute__ ((section (".text.fma4"))) - -#include <sysdeps/ieee754/dbl-64/halfulp.c> diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-avx-fma4.h b/sysdeps/x86_64/fpu/multiarch/ifunc-avx-fma4.h new file mode 100644 index 0000000000..a5f9375afc --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/ifunc-avx-fma4.h @@ -0,0 +1,43 @@ +/* Common definition for ifunc selections optimized with AVX, AVX2/FMA + and FMA4. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <init-arch.h> + +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (fma) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (fma4) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (void) +{ + const struct cpu_features* cpu_features = __get_cpu_features (); + + if (CPU_FEATURES_ARCH_P (cpu_features, FMA_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)) + return OPTIMIZE (fma); + + if (CPU_FEATURES_ARCH_P (cpu_features, FMA4_Usable)) + return OPTIMIZE (fma4); + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Usable)) + return OPTIMIZE (avx); + + return OPTIMIZE (sse2); +} diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-fma.h b/sysdeps/x86_64/fpu/multiarch/ifunc-fma.h new file mode 100644 index 0000000000..63a8cd221f --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/ifunc-fma.h @@ -0,0 +1,34 @@ +/* Common definition for ifunc selections optimized with AVX2/FMA. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <init-arch.h> + +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (fma) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (void) +{ + const struct cpu_features* cpu_features = __get_cpu_features (); + + if (CPU_FEATURES_ARCH_P (cpu_features, FMA_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)) + return OPTIMIZE (fma); + + return OPTIMIZE (sse2); +} diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h b/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h new file mode 100644 index 0000000000..a2526a2ee0 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h @@ -0,0 +1,39 @@ +/* Common definition for ifunc selections optimized with AVX2/FMA and + FMA4. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <init-arch.h> + +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (fma) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (fma4) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (void) +{ + const struct cpu_features* cpu_features = __get_cpu_features (); + + if (CPU_FEATURES_ARCH_P (cpu_features, FMA_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)) + return OPTIMIZE (fma); + + if (CPU_FEATURES_ARCH_P (cpu_features, FMA4_Usable)) + return OPTIMIZE (fma4); + + return OPTIMIZE (sse2); +} diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx2.h b/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx2.h new file mode 100644 index 0000000000..bd2d32e418 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx2.h @@ -0,0 +1,39 @@ +/* Common definition for libmathvec ifunc selections optimized with + AVX2. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <init-arch.h> + +#undef PASTER2 +#define PASTER2(x,y) x##_##y + +extern void REDIRECT_NAME (void); +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse_wrapper) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (void) +{ + const struct cpu_features* cpu_features = __get_cpu_features (); + + if (CPU_FEATURES_ARCH_P (cpu_features, FMA_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)) + return OPTIMIZE (avx2); + + return OPTIMIZE (sse_wrapper); +} diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx512.h b/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx512.h new file mode 100644 index 0000000000..174e462cfb --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx512.h @@ -0,0 +1,45 @@ +/* Common definition for libmathvec ifunc selections optimized with + AVX512. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <init-arch.h> + +#undef PASTER2 +#define PASTER2(x,y) x##_##y + +extern void REDIRECT_NAME (void); +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_wrapper) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (knl) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (skx) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (void) +{ + const struct cpu_features* cpu_features = __get_cpu_features (); + + if (!CPU_FEATURES_ARCH_P (cpu_features, MathVec_Prefer_No_AVX512)) + { + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512DQ_Usable)) + return OPTIMIZE (skx); + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable)) + return OPTIMIZE (knl); + } + + return OPTIMIZE (avx2_wrapper); +} diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-sse4_1.h b/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-sse4_1.h new file mode 100644 index 0000000000..c1e70ebfc1 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-sse4_1.h @@ -0,0 +1,38 @@ +/* Common definition for libmathvec ifunc selections optimized with + SSE4.1. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <init-arch.h> + +#undef PASTER2 +#define PASTER2(x,y) x##_##y + +extern void REDIRECT_NAME (void); +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (void) +{ + const struct cpu_features* cpu_features = __get_cpu_features (); + + if (CPU_FEATURES_CPU_P (cpu_features, SSE4_1)) + return OPTIMIZE (sse4); + + return OPTIMIZE (sse2); +} diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-sse4_1.h b/sysdeps/x86_64/fpu/multiarch/ifunc-sse4_1.h new file mode 100644 index 0000000000..a8710ba802 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/ifunc-sse4_1.h @@ -0,0 +1,33 @@ +/* Common definition for ifunc selections optimized with SSE4.1. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <init-arch.h> + +extern __typeof (REDIRECT_NAME) OPTIMIZE (c) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse41) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (void) +{ + const struct cpu_features* cpu_features = __get_cpu_features (); + + if (CPU_FEATURES_CPU_P (cpu_features, SSE4_1)) + return OPTIMIZE (sse41); + + return OPTIMIZE (c); +} diff --git a/sysdeps/x86_64/fpu/multiarch/mpa-fma.c b/sysdeps/x86_64/fpu/multiarch/mpa-fma.c new file mode 100644 index 0000000000..177cc2517f --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/mpa-fma.c @@ -0,0 +1,14 @@ +#define __add __add_fma +#define __mul __mul_fma +#define __sqr __sqr_fma +#define __sub __sub_fma +#define __dbl_mp __dbl_mp_fma +#define __dvd __dvd_fma + +#define NO___CPY 1 +#define NO___MP_DBL 1 +#define NO___ACR 1 +#define NO__CONST 1 +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/mpa.c> diff --git a/sysdeps/x86_64/fpu/multiarch/mpatan-fma.c b/sysdeps/x86_64/fpu/multiarch/mpatan-fma.c new file mode 100644 index 0000000000..d216f9142d --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/mpatan-fma.c @@ -0,0 +1,10 @@ +#define __mpatan __mpatan_fma +#define __add __add_fma +#define __dvd __dvd_fma +#define __mpsqrt __mpsqrt_fma +#define __mul __mul_fma +#define __sub __sub_fma +#define AVOID_MPATAN_H 1 +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/mpatan.c> diff --git a/sysdeps/x86_64/fpu/multiarch/mpatan2-fma.c b/sysdeps/x86_64/fpu/multiarch/mpatan2-fma.c new file mode 100644 index 0000000000..98df336f79 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/mpatan2-fma.c @@ -0,0 +1,9 @@ +#define __mpatan2 __mpatan2_fma +#define __add __add_fma +#define __dvd __dvd_fma +#define __mpatan __mpatan_fma +#define __mpsqrt __mpsqrt_fma +#define __mul __mul_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/mpatan2.c> diff --git a/sysdeps/x86_64/fpu/multiarch/mpexp-avx.c b/sysdeps/x86_64/fpu/multiarch/mpexp-avx.c deleted file mode 100644 index 87f29c96c9..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/mpexp-avx.c +++ /dev/null @@ -1,9 +0,0 @@ -#define __mpexp __mpexp_avx -#define __add __add_avx -#define __dbl_mp __dbl_mp_avx -#define __dvd __dvd_avx -#define __mul __mul_avx -#define AVOID_MPEXP_H 1 -#define SECTION __attribute__ ((section (".text.avx"))) - -#include <sysdeps/ieee754/dbl-64/mpexp.c> diff --git a/sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c b/sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c deleted file mode 100644 index 07ca6e9ad0..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c +++ /dev/null @@ -1,9 +0,0 @@ -#define __mpexp __mpexp_fma4 -#define __add __add_fma4 -#define __dbl_mp __dbl_mp_fma4 -#define __dvd __dvd_fma4 -#define __mul __mul_fma4 -#define AVOID_MPEXP_H 1 -#define SECTION __attribute__ ((section (".text.fma4"))) - -#include <sysdeps/ieee754/dbl-64/mpexp.c> diff --git a/sysdeps/x86_64/fpu/multiarch/mplog-avx.c b/sysdeps/x86_64/fpu/multiarch/mplog-avx.c deleted file mode 100644 index fd783d9a67..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/mplog-avx.c +++ /dev/null @@ -1,8 +0,0 @@ -#define __mplog __mplog_avx -#define __add __add_avx -#define __mpexp __mpexp_avx -#define __mul __mul_avx -#define __sub __sub_avx -#define SECTION __attribute__ ((section (".text.avx"))) - -#include <sysdeps/ieee754/dbl-64/mplog.c> diff --git a/sysdeps/x86_64/fpu/multiarch/mplog-fma4.c b/sysdeps/x86_64/fpu/multiarch/mplog-fma4.c deleted file mode 100644 index b4733118d7..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/mplog-fma4.c +++ /dev/null @@ -1,8 +0,0 @@ -#define __mplog __mplog_fma4 -#define __add __add_fma4 -#define __mpexp __mpexp_fma4 -#define __mul __mul_fma4 -#define __sub __sub_fma4 -#define SECTION __attribute__ ((section (".text.fma4"))) - -#include <sysdeps/ieee754/dbl-64/mplog.c> diff --git a/sysdeps/x86_64/fpu/multiarch/mpsqrt-fma.c b/sysdeps/x86_64/fpu/multiarch/mpsqrt-fma.c new file mode 100644 index 0000000000..44d7a23ae3 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/mpsqrt-fma.c @@ -0,0 +1,8 @@ +#define __mpsqrt __mpsqrt_fma +#define __dbl_mp __dbl_mp_fma +#define __mul __mul_fma +#define __sub __sub_fma +#define AVOID_MPSQRT_H 1 +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/mpsqrt.c> diff --git a/sysdeps/x86_64/fpu/multiarch/mptan-fma.c b/sysdeps/x86_64/fpu/multiarch/mptan-fma.c new file mode 100644 index 0000000000..d1a691413c --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/mptan-fma.c @@ -0,0 +1,7 @@ +#define __mptan __mptan_fma +#define __c32 __c32_fma +#define __dvd __dvd_fma +#define __mpranred __mpranred_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/mptan.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan-avx.c b/sysdeps/x86_64/fpu/multiarch/s_atan-avx.c index b5cb9c3a75..41816bfe6c 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_atan-avx.c +++ b/sysdeps/x86_64/fpu/multiarch/s_atan-avx.c @@ -1,4 +1,4 @@ -#define atan __atan_avx +#define __atan __atan_avx #define __add __add_avx #define __dbl_mp __dbl_mp_avx #define __mul __mul_avx diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan-fma.c b/sysdeps/x86_64/fpu/multiarch/s_atan-fma.c new file mode 100644 index 0000000000..363e32bcbd --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_atan-fma.c @@ -0,0 +1,9 @@ +#define __atan __atan_fma +#define __add __add_fma +#define __dbl_mp __dbl_mp_fma +#define __mpatan __mpatan_fma +#define __mul __mul_fma +#define __sub __sub_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/s_atan.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c b/sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c index 9e83e6cdab..ad8d3af579 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c @@ -1,4 +1,4 @@ -#define atan __atan_fma4 +#define __atan __atan_fma4 #define __add __add_fma4 #define __dbl_mp __dbl_mp_fma4 #define __mpatan __mpatan_fma4 diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan.c b/sysdeps/x86_64/fpu/multiarch/s_atan.c index 742e95cb96..f9ce8549ab 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_atan.c +++ b/sysdeps/x86_64/fpu/multiarch/s_atan.c @@ -1,15 +1,30 @@ -#include <init-arch.h> -#include <math.h> +/* Multiple versions of atan. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. -extern double __atan_sse2 (double); -extern double __atan_avx (double); -extern double __atan_fma4 (double); + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. -libm_ifunc (atan, (HAS_ARCH_FEATURE (FMA4_Usable) ? __atan_fma4 : - HAS_ARCH_FEATURE (AVX_Usable) - ? __atan_avx : __atan_sse2)); + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. -#define atan __atan_sse2 + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ +#include <libm-alias-double.h> +extern double __redirect_atan (double); + +#define SYMBOL_NAME atan +#include "ifunc-avx-fma4.h" + +libc_ifunc_redirected (__redirect_atan, __atan, IFUNC_SELECTOR ()); +libm_alias_double (__atan, atan) + +#define __atan __atan_sse2 #include <sysdeps/ieee754/dbl-64/s_atan.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil.S b/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S index 40fa729955..e90f05b42f 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_ceil.S +++ b/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2016 Free Software Foundation, Inc. +/* Copyright (C) 2011-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gmail.come>, 2011. @@ -16,23 +16,10 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <machine/asm.h> -#include <init-arch.h> - - -ENTRY(__ceil) - .type __ceil, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq __ceil_sse41(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jnz 2f - leaq __ceil_c(%rip), %rax -2: ret -END(__ceil) -weak_alias (__ceil, ceil) - +#include <sysdep.h> + .section .text.sse4.1,"ax",@progbits ENTRY(__ceil_sse41) - roundsd $2, %xmm0, %xmm0 + roundsd $10, %xmm0, %xmm0 ret END(__ceil_sse41) diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil.c b/sysdeps/x86_64/fpu/multiarch/s_ceil.c new file mode 100644 index 0000000000..070fcdddea --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_ceil.c @@ -0,0 +1,31 @@ +/* Multiple versions of __ceil. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-double.h> + +#define ceil __redirect_ceil +#define __ceil __redirect___ceil +#include <math.h> +#undef ceil +#undef __ceil + +#define SYMBOL_NAME ceil +#include "ifunc-sse4_1.h" + +libc_ifunc_redirected (__redirect_ceil, __ceil, IFUNC_SELECTOR ()); +libm_alias_double (__ceil, ceil) diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf.S b/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S index 9a06a5c174..c3bd24c5ae 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_ceilf.S +++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2016 Free Software Foundation, Inc. +/* Copyright (C) 2011-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gmail.come>, 2011. @@ -16,23 +16,10 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <machine/asm.h> -#include <init-arch.h> - - -ENTRY(__ceilf) - .type __ceilf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq __ceilf_sse41(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jnz 2f - leaq __ceilf_c(%rip), %rax -2: ret -END(__ceilf) -weak_alias (__ceilf, ceilf) - +#include <sysdep.h> + .section .text.sse4.1,"ax",@progbits ENTRY(__ceilf_sse41) - roundss $2, %xmm0, %xmm0 + roundss $10, %xmm0, %xmm0 ret END(__ceilf_sse41) diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf.c b/sysdeps/x86_64/fpu/multiarch/s_ceilf.c new file mode 100644 index 0000000000..db0c6c4bc3 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf.c @@ -0,0 +1,31 @@ +/* Multiple versions of __ceilf. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-float.h> + +#define ceilf __redirect_ceilf +#define __ceilf __redirect___ceilf +#include <math.h> +#undef ceilf +#undef __ceilf + +#define SYMBOL_NAME ceilf +#include "ifunc-sse4_1.h" + +libc_ifunc_redirected (__redirect_ceilf, __ceilf, IFUNC_SELECTOR ()); +libm_alias_float (__ceil, ceil) diff --git a/sysdeps/x86_64/fpu/multiarch/s_cosf-fma.c b/sysdeps/x86_64/fpu/multiarch/s_cosf-fma.c new file mode 100644 index 0000000000..5f9191aef9 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_cosf-fma.c @@ -0,0 +1,2 @@ +#define COSF __cosf_fma +#include <sysdeps/ieee754/flt-32/s_cosf.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_cosf-sse2.c b/sysdeps/x86_64/fpu/multiarch/s_cosf-sse2.c new file mode 100644 index 0000000000..87cf42a82a --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_cosf-sse2.c @@ -0,0 +1,2 @@ +#define COSF __cosf_sse2 +#include <sysdeps/ieee754/flt-32/s_cosf.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_cosf.c b/sysdeps/x86_64/fpu/multiarch/s_cosf.c new file mode 100644 index 0000000000..33959d3d01 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_cosf.c @@ -0,0 +1,28 @@ +/* Multiple versions of cosf. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-float.h> + +extern float __redirect_cosf (float); + +#define SYMBOL_NAME cosf +#include "ifunc-fma.h" + +libc_ifunc_redirected (__redirect_cosf, __cosf, IFUNC_SELECTOR ()); + +libm_alias_float (__cos, cos) diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor.S b/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S index 57a0eee5ba..b3c7aa29ff 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_floor.S +++ b/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2016 Free Software Foundation, Inc. +/* Copyright (C) 2011-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gmail.come>, 2011. @@ -16,23 +16,10 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <machine/asm.h> -#include <init-arch.h> - - -ENTRY(__floor) - .type __floor, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq __floor_sse41(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jnz 2f - leaq __floor_c(%rip), %rax -2: ret -END(__floor) -weak_alias (__floor, floor) - +#include <sysdep.h> + .section .text.sse4.1,"ax",@progbits ENTRY(__floor_sse41) - roundsd $1, %xmm0, %xmm0 + roundsd $9, %xmm0, %xmm0 ret END(__floor_sse41) diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor.c b/sysdeps/x86_64/fpu/multiarch/s_floor.c new file mode 100644 index 0000000000..58f8ed8eaf --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_floor.c @@ -0,0 +1,31 @@ +/* Multiple versions of __floor. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-double.h> + +#define floor __redirect_floor +#define __floor __redirect___floor +#include <math.h> +#undef floor +#undef __floor + +#define SYMBOL_NAME floor +#include "ifunc-sse4_1.h" + +libc_ifunc_redirected (__redirect_floor, __floor, IFUNC_SELECTOR ()); +libm_alias_double (__floor, floor) diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf.S b/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S index 74a149a950..43461d3e6b 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_floorf.S +++ b/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2016 Free Software Foundation, Inc. +/* Copyright (C) 2011-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gmail.come>, 2011. @@ -16,23 +16,10 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <machine/asm.h> -#include <init-arch.h> - - -ENTRY(__floorf) - .type __floorf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq __floorf_sse41(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jnz 2f - leaq __floorf_c(%rip), %rax -2: ret -END(__floorf) -weak_alias (__floorf, floorf) - +#include <sysdep.h> + .section .text.sse4.1,"ax",@progbits ENTRY(__floorf_sse41) - roundss $1, %xmm0, %xmm0 + roundss $9, %xmm0, %xmm0 ret END(__floorf_sse41) diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf.c b/sysdeps/x86_64/fpu/multiarch/s_floorf.c new file mode 100644 index 0000000000..5ef2fec2e3 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_floorf.c @@ -0,0 +1,31 @@ +/* Multiple versions of __floorf. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-float.h> + +#define floorf __redirect_floorf +#define __floorf __redirect___floorf +#include <math.h> +#undef floorf +#undef __floorf + +#define SYMBOL_NAME floorf +#include "ifunc-sse4_1.h" + +libc_ifunc_redirected (__redirect_floorf, __floorf, IFUNC_SELECTOR ()); +libm_alias_float (__floor, floor) diff --git a/sysdeps/x86_64/fpu/multiarch/s_fma.c b/sysdeps/x86_64/fpu/multiarch/s_fma.c index 1de1a84cbe..875c76d372 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_fma.c +++ b/sysdeps/x86_64/fpu/multiarch/s_fma.c @@ -1,5 +1,5 @@ /* FMA version of fma. - Copyright (C) 2009-2016 Free Software Foundation, Inc. + Copyright (C) 2009-2018 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. @@ -20,6 +20,7 @@ #include <config.h> #include <math.h> #include <init-arch.h> +#include <libm-alias-double.h> extern double __fma_sse2 (double x, double y, double z) attribute_hidden; @@ -43,7 +44,7 @@ __fma_fma4 (double x, double y, double z) libm_ifunc (__fma, HAS_ARCH_FEATURE (FMA_Usable) ? __fma_fma3 : (HAS_ARCH_FEATURE (FMA4_Usable) ? __fma_fma4 : __fma_sse2)); -weak_alias (__fma, fma) +libm_alias_double (__fma, fma) #define __fma __fma_sse2 diff --git a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c index 8905e4b54f..5f4c2ec0be 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c +++ b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c @@ -1,5 +1,5 @@ /* FMA version of fmaf. - Copyright (C) 2009-2016 Free Software Foundation, Inc. + Copyright (C) 2009-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -19,6 +19,7 @@ #include <config.h> #include <math.h> #include <init-arch.h> +#include <libm-alias-float.h> extern float __fmaf_sse2 (float x, float y, float z) attribute_hidden; @@ -42,7 +43,7 @@ __fmaf_fma4 (float x, float y, float z) libm_ifunc (__fmaf, HAS_ARCH_FEATURE (FMA_Usable) ? __fmaf_fma3 : (HAS_ARCH_FEATURE (FMA4_Usable) ? __fmaf_fma4 : __fmaf_sse2)); -weak_alias (__fmaf, fmaf) +libm_alias_float (__fma, fma) #define __fmaf __fmaf_sse2 diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S index 5091cf5813..f9ac36e4f0 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.S +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2016 Free Software Foundation, Inc. +/* Copyright (C) 2011-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gmail.come>, 2011. @@ -16,22 +16,9 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <machine/asm.h> -#include <init-arch.h> - - -ENTRY(__nearbyint) - .type __nearbyint, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq __nearbyint_sse41(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jnz 2f - leaq __nearbyint_c(%rip), %rax -2: ret -END(__nearbyint) -weak_alias (__nearbyint, nearbyint) - +#include <sysdep.h> + .section .text.sse4.1,"ax",@progbits ENTRY(__nearbyint_sse41) roundsd $0xc, %xmm0, %xmm0 ret diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c new file mode 100644 index 0000000000..d92945fd14 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c @@ -0,0 +1,32 @@ +/* Multiple versions of __nearbyint. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-double.h> + +#define nearbyint __redirect_nearbyint +#define __nearbyint __redirect___nearbyint +#include <math.h> +#undef nearbyint +#undef __nearbyint + +#define SYMBOL_NAME nearbyint +#include "ifunc-sse4_1.h" + +libc_ifunc_redirected (__redirect_nearbyint, __nearbyint, + IFUNC_SELECTOR ()); +libm_alias_double (__nearbyint, nearbyint) diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S index 4a13700001..2f427da778 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.S +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2016 Free Software Foundation, Inc. +/* Copyright (C) 2011-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gmail.come>, 2011. @@ -16,22 +16,9 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <machine/asm.h> -#include <init-arch.h> - - -ENTRY(__nearbyintf) - .type __nearbyintf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq __nearbyintf_sse41(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jnz 2f - leaq __nearbyintf_c(%rip), %rax -2: ret -END(__nearbyintf) -weak_alias (__nearbyintf, nearbyintf) - +#include <sysdep.h> + .section .text.sse4.1,"ax",@progbits ENTRY(__nearbyintf_sse41) roundss $0xc, %xmm0, %xmm0 ret diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c new file mode 100644 index 0000000000..ba7be27956 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c @@ -0,0 +1,32 @@ +/* Multiple versions of __nearbyintf. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-float.h> + +#define nearbyintf __redirect_nearbyintf +#define __nearbyintf __redirect___nearbyintf +#include <math.h> +#undef nearbyintf +#undef __nearbyintf + +#define SYMBOL_NAME nearbyintf +#include "ifunc-sse4_1.h" + +libc_ifunc_redirected (__redirect_nearbyintf, __nearbyintf, + IFUNC_SELECTOR ()); +libm_alias_float (__nearbyint, nearbyint) diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint.S b/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S index 1c0d1e14b7..7d7568a1a0 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_rint.S +++ b/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2016 Free Software Foundation, Inc. +/* Copyright (C) 2011-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gmail.come>, 2011. @@ -16,22 +16,9 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <machine/asm.h> -#include <init-arch.h> - - -ENTRY(__rint) - .type __rint, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq __rint_sse41(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jnz 2f - leaq __rint_c(%rip), %rax -2: ret -END(__rint) -weak_alias (__rint, rint) - +#include <sysdep.h> + .section .text.sse4.1,"ax",@progbits ENTRY(__rint_sse41) roundsd $4, %xmm0, %xmm0 ret diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint.c b/sysdeps/x86_64/fpu/multiarch/s_rint.c new file mode 100644 index 0000000000..f1cb2fed0c --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_rint.c @@ -0,0 +1,31 @@ +/* Multiple versions of __rint. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-double.h> + +#define rint __redirect_rint +#define __rint __redirect___rint +#include <math.h> +#undef rint +#undef __rint + +#define SYMBOL_NAME rint +#include "ifunc-sse4_1.h" + +libc_ifunc_redirected (__redirect_rint, __rint, IFUNC_SELECTOR ()); +libm_alias_double (__rint, rint) diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf.S b/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S index 8e42fa561f..ef5d896f55 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_rintf.S +++ b/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2016 Free Software Foundation, Inc. +/* Copyright (C) 2011-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gmail.come>, 2011. @@ -16,22 +16,9 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <machine/asm.h> -#include <init-arch.h> - - -ENTRY(__rintf) - .type __rintf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq __rintf_sse41(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jnz 2f - leaq __rintf_c(%rip), %rax -2: ret -END(__rintf) -weak_alias (__rintf, rintf) - +#include <sysdep.h> + .section .text.sse4.1,"ax",@progbits ENTRY(__rintf_sse41) roundss $4, %xmm0, %xmm0 ret diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf.c b/sysdeps/x86_64/fpu/multiarch/s_rintf.c new file mode 100644 index 0000000000..41323b3b5b --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_rintf.c @@ -0,0 +1,31 @@ +/* Multiple versions of __rintf. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-float.h> + +#define rintf __redirect_rintf +#define __rintf __redirect___rintf +#include <math.h> +#undef rintf +#undef __rintf + +#define SYMBOL_NAME rintf +#include "ifunc-sse4_1.h" + +libc_ifunc_redirected (__redirect_rintf, __rintf, IFUNC_SELECTOR ()); +libm_alias_float (__rint, rint) diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin-fma.c b/sysdeps/x86_64/fpu/multiarch/s_sin-fma.c new file mode 100644 index 0000000000..15f3c394d5 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_sin-fma.c @@ -0,0 +1,11 @@ +#define __cos __cos_fma +#define __sin __sin_fma +#define __docos __docos_fma +#define __dubsin __dubsin_fma +#define __mpcos __mpcos_fma +#define __mpcos1 __mpcos1_fma +#define __mpsin __mpsin_fma +#define __mpsin1 __mpsin1_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/s_sin.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin.c b/sysdeps/x86_64/fpu/multiarch/s_sin.c index 8ffd3e7125..b289269240 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_sin.c +++ b/sysdeps/x86_64/fpu/multiarch/s_sin.c @@ -1,26 +1,39 @@ -#include <init-arch.h> -#include <math.h> -#undef NAN - -extern double __cos_sse2 (double); -extern double __sin_sse2 (double); -extern double __cos_avx (double); -extern double __sin_avx (double); -extern double __cos_fma4 (double); -extern double __sin_fma4 (double); - -libm_ifunc (__cos, (HAS_ARCH_FEATURE (FMA4_Usable) ? __cos_fma4 : - HAS_ARCH_FEATURE (AVX_Usable) - ? __cos_avx : __cos_sse2)); -weak_alias (__cos, cos) - -libm_ifunc (__sin, (HAS_ARCH_FEATURE (FMA4_Usable) ? __sin_fma4 : - HAS_ARCH_FEATURE (AVX_Usable) - ? __sin_avx : __sin_sse2)); -weak_alias (__sin, sin) +/* Multiple versions of sin and cos. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. -#define __cos __cos_sse2 -#define __sin __sin_sse2 + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-double.h> + +extern double __redirect_sin (double); +extern double __redirect_cos (double); +#define SYMBOL_NAME sin +#include "ifunc-avx-fma4.h" +libc_ifunc_redirected (__redirect_sin, __sin, IFUNC_SELECTOR ()); +libm_alias_double (__sin, sin) + +#undef SYMBOL_NAME +#define SYMBOL_NAME cos +#include "ifunc-avx-fma4.h" + +libc_ifunc_redirected (__redirect_cos, __cos, IFUNC_SELECTOR ()); +libm_alias_double (__cos, cos) + +#define __cos __cos_sse2 +#define __sin __sin_sse2 #include <sysdeps/ieee754/dbl-64/s_sin.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c b/sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c new file mode 100644 index 0000000000..64abe7abca --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c @@ -0,0 +1,240 @@ +/* Compute sine and cosine of argument optimized with vector. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <errno.h> +#include <math.h> +#include <math_private.h> +#include <x86intrin.h> +#include <libm-alias-float.h> +#include "s_sincosf.h" + +#define SINCOSF __sincosf_fma + +#ifndef SINCOSF +# define SINCOSF_FUNC __sincosf +#else +# define SINCOSF_FUNC SINCOSF +#endif + +/* Chebyshev constants for sin and cos, range -PI/4 - PI/4. */ +static const __v2df V0 = { -0x1.5555555551cd9p-3, -0x1.ffffffffe98aep-2}; +static const __v2df V1 = { 0x1.1111110c2688bp-7, 0x1.55555545c50c7p-5 }; +static const __v2df V2 = { -0x1.a019f8b4bd1f9p-13, -0x1.6c16b348b6874p-10 }; +static const __v2df V3 = { 0x1.71d7264e6b5b4p-19, 0x1.a00eb9ac43ccp-16 }; +static const __v2df V4 = { -0x1.a947e1674b58ap-26, -0x1.23c97dd8844d7p-22 }; + +/* Chebyshev constants for sin and cos, range 2^-27 - 2^-5. */ +static const __v2df VC0 = { -0x1.555555543d49dp-3, -0x1.fffffff5cc6fdp-2 }; +static const __v2df VC1 = { 0x1.110f475cec8c5p-7, 0x1.55514b178dac5p-5 }; + +static const __v2df v2ones = { 1.0, 1.0 }; + +/* Compute the sine and cosine values using Chebyshev polynomials where + THETA is the range reduced absolute value of the input + and it is less than Pi/4, + N is calculated as trunc(|x|/(Pi/4)) + 1 and it is used to decide + whether a sine or cosine approximation is more accurate and + SIGNBIT is used to add the correct sign after the Chebyshev + polynomial is computed. */ +static void +reduced_sincos (const double theta, const unsigned int n, + const unsigned int signbit, float *sinx, float *cosx) +{ + __v2df v2x, v2sx, v2cx; + const __v2df v2theta = { theta, theta }; + const __v2df v2theta2 = v2theta * v2theta; + /* Here sinf() and cosf() are calculated using sin Chebyshev polynomial: + x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))). */ + v2x = V3 + v2theta2 * V4; /* S3+x^2*S4. */ + v2x = V2 + v2theta2 * v2x; /* S2+x^2*(S3+x^2*S4). */ + v2x = V1 + v2theta2 * v2x; /* S1+x^2*(S2+x^2*(S3+x^2*S4)). */ + v2x = V0 + v2theta2 * v2x; /* S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4))). */ + v2x = v2theta2 * v2x; + v2cx = v2ones + v2x; + v2sx = v2theta + v2theta * v2x; + /* We are operating on |x|, so we need to add back the original + signbit for sinf. */ + /* Determine positive or negative primary interval. */ + /* Are we in the primary interval of sin or cos? */ + if ((n & 2) == 0) + { + const __v2df v2sign = + { + ones[((n >> 2) & 1) ^ signbit], + ones[((n + 2) >> 2) & 1] + }; + v2cx[0] = v2sx[0]; + v2cx *= v2sign; + __v4sf v4sx = _mm_cvtpd_ps (v2cx); + *sinx = v4sx[0]; + *cosx = v4sx[1]; + } + else + { + const __v2df v2sign = + { + ones[((n + 2) >> 2) & 1], + ones[((n >> 2) & 1) ^ signbit] + }; + v2cx[0] = v2sx[0]; + v2cx *= v2sign; + __v4sf v4sx = _mm_cvtpd_ps (v2cx); + *sinx = v4sx[1]; + *cosx = v4sx[0]; + } +} + +void +SINCOSF_FUNC (float x, float *sinx, float *cosx) +{ + double theta = x; + double abstheta = fabs (theta); + uint32_t ix, xi; + GET_FLOAT_WORD (xi, x); + /* |x| */ + ix = xi & 0x7fffffff; + /* If |x|< Pi/4. */ + if (ix < 0x3f490fdb) + { + if (ix >= 0x3d000000) /* |x| >= 2^-5. */ + { + __v2df v2x, v2sx, v2cx; + const __v2df v2theta = { theta, theta }; + const __v2df v2theta2 = v2theta * v2theta; + /* Chebyshev polynomial of the form for sin and cos. */ + v2x = V3 + v2theta2 * V4; + v2x = V2 + v2theta2 * v2x; + v2x = V1 + v2theta2 * v2x; + v2x = V0 + v2theta2 * v2x; + v2x = v2theta2 * v2x; + v2cx = v2ones + v2x; + v2sx = v2theta + v2theta * v2x; + v2cx[0] = v2sx[0]; + __v4sf v4sx = _mm_cvtpd_ps (v2cx); + *sinx = v4sx[0]; + *cosx = v4sx[1]; + } + else if (ix >= 0x32000000) /* |x| >= 2^-27. */ + { + /* A simpler Chebyshev approximation is close enough for this range: + for sin: x+x^3*(SS0+x^2*SS1) + for cos: 1.0+x^2*(CC0+x^3*CC1). */ + __v2df v2x, v2sx, v2cx; + const __v2df v2theta = { theta, theta }; + const __v2df v2theta2 = v2theta * v2theta; + v2x = VC0 + v2theta * v2theta2 * VC1; + v2x = v2theta2 * v2x; + v2cx = v2ones + v2x; + v2sx = v2theta + v2theta * v2x; + v2cx[0] = v2sx[0]; + __v4sf v4sx = _mm_cvtpd_ps (v2cx); + *sinx = v4sx[0]; + *cosx = v4sx[1]; + } + else + { + /* Handle some special cases. */ + if (ix) + *sinx = theta - (theta * SMALL); + else + *sinx = theta; + *cosx = 1.0 - abstheta; + } + } + else /* |x| >= Pi/4. */ + { + unsigned int signbit = xi >> 31; + if (ix < 0x40e231d6) /* |x| < 9*Pi/4. */ + { + /* There are cases where FE_UPWARD rounding mode can + produce a result of abstheta * inv_PI_4 == 9, + where abstheta < 9pi/4, so the domain for + pio2_table must go to 5 (9 / 2 + 1). */ + unsigned int n = (abstheta * inv_PI_4) + 1; + theta = abstheta - pio2_table[n / 2]; + reduced_sincos (theta, n, signbit, sinx, cosx); + } + else if (ix < 0x7f800000) + { + if (ix < 0x4b000000) /* |x| < 2^23. */ + { + unsigned int n = ((unsigned int) (abstheta * inv_PI_4)) + 1; + double x = n / 2; + theta = (abstheta - x * PI_2_hi) - x * PI_2_lo; + /* Argument reduction needed. */ + reduced_sincos (theta, n, signbit, sinx, cosx); + } + else /* |x| >= 2^23. */ + { + x = fabsf (x); + int exponent + = (ix >> FLOAT_EXPONENT_SHIFT) - FLOAT_EXPONENT_BIAS; + exponent += 3; + exponent /= 28; + double a = invpio4_table[exponent] * x; + double b = invpio4_table[exponent + 1] * x; + double c = invpio4_table[exponent + 2] * x; + double d = invpio4_table[exponent + 3] * x; + uint64_t l = a; + l &= ~0x7; + a -= l; + double e = a + b; + l = e; + e = a - l; + if (l & 1) + { + e -= 1.0; + e += b; + e += c; + e += d; + e *= M_PI_4; + reduced_sincos (e, l + 1, signbit, sinx, cosx); + } + else + { + e += b; + e += c; + e += d; + if (e <= 1.0) + { + e *= M_PI_4; + reduced_sincos (e, l + 1, signbit, sinx, cosx); + } + else + { + l++; + e -= 2.0; + e *= M_PI_4; + reduced_sincos (e, l + 1, signbit, sinx, cosx); + } + } + } + } + else + { + if (ix == 0x7f800000) + __set_errno (EDOM); + /* sin/cos(Inf or NaN) is NaN. */ + *sinx = *cosx = x - x; + } + } +} + +#ifndef SINCOSF +libm_alias_float (__sincos, sincos) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincosf-sse2.S b/sysdeps/x86_64/fpu/multiarch/s_sincosf-sse2.S new file mode 100644 index 0000000000..51d012bb12 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_sincosf-sse2.S @@ -0,0 +1,2 @@ +#define __sincosf __sincosf_sse2 +#include <sysdeps/x86_64/fpu/s_sincosf.S> diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincosf.c b/sysdeps/x86_64/fpu/multiarch/s_sincosf.c new file mode 100644 index 0000000000..6cb4295558 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_sincosf.c @@ -0,0 +1,28 @@ +/* Multiple versions of sincosf. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-float.h> + +extern void __redirect_sincosf (float, float *, float *); + +#define SYMBOL_NAME sincosf +#include "ifunc-fma.h" + +libc_ifunc_redirected (__redirect_sincosf, __sincosf, IFUNC_SELECTOR ()); + +libm_alias_float (__sincos, sincos) diff --git a/sysdeps/x86_64/fpu/multiarch/s_sinf-fma.c b/sysdeps/x86_64/fpu/multiarch/s_sinf-fma.c new file mode 100644 index 0000000000..34440ebf4a --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_sinf-fma.c @@ -0,0 +1,2 @@ +#define SINF __sinf_fma +#include <sysdeps/ieee754/flt-32/s_sinf.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_sinf-sse2.c b/sysdeps/x86_64/fpu/multiarch/s_sinf-sse2.c new file mode 100644 index 0000000000..74e32c98db --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_sinf-sse2.c @@ -0,0 +1,2 @@ +#define SINF __sinf_sse2 +#include <sysdeps/ieee754/flt-32/s_sinf.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_sinf.c b/sysdeps/x86_64/fpu/multiarch/s_sinf.c new file mode 100644 index 0000000000..4fdfbd8d3e --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_sinf.c @@ -0,0 +1,28 @@ +/* Multiple versions of sinf. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-float.h> + +extern float __redirect_sinf (float); + +#define SYMBOL_NAME sinf +#include "ifunc-fma.h" + +libc_ifunc_redirected (__redirect_sinf, __sinf, IFUNC_SELECTOR ()); + +libm_alias_float (__sin, sin) diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan-avx.c b/sysdeps/x86_64/fpu/multiarch/s_tan-avx.c index 53de5d3c98..5ee29a9a06 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_tan-avx.c +++ b/sysdeps/x86_64/fpu/multiarch/s_tan-avx.c @@ -1,4 +1,4 @@ -#define tan __tan_avx +#define __tan __tan_avx #define __dbl_mp __dbl_mp_avx #define __sub __sub_avx #define SECTION __attribute__ ((section (".text.avx"))) diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan-fma.c b/sysdeps/x86_64/fpu/multiarch/s_tan-fma.c new file mode 100644 index 0000000000..1a1b9d2490 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_tan-fma.c @@ -0,0 +1,8 @@ +#define __tan __tan_fma +#define __dbl_mp __dbl_mp_fma +#define __mpranred __mpranred_fma +#define __mptan __mptan_fma +#define __sub __sub_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/s_tan.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c b/sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c index a805440b46..e4e9f6cb85 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c @@ -1,4 +1,4 @@ -#define tan __tan_fma4 +#define __tan __tan_fma4 #define __dbl_mp __dbl_mp_fma4 #define __mpranred __mpranred_fma4 #define __mptan __mptan_fma4 diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan.c b/sysdeps/x86_64/fpu/multiarch/s_tan.c index 25f3bca07e..bb75d8d0bc 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_tan.c +++ b/sysdeps/x86_64/fpu/multiarch/s_tan.c @@ -1,15 +1,30 @@ -#include <init-arch.h> -#include <math.h> +/* Multiple versions of tan. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. -extern double __tan_sse2 (double); -extern double __tan_avx (double); -extern double __tan_fma4 (double); + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. -libm_ifunc (tan, (HAS_ARCH_FEATURE (FMA4_Usable) ? __tan_fma4 : - HAS_ARCH_FEATURE (AVX_Usable) - ? __tan_avx : __tan_sse2)); + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. -#define tan __tan_sse2 + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ +#include <libm-alias-double.h> +extern double __redirect_tan (double); + +#define SYMBOL_NAME tan +#include "ifunc-avx-fma4.h" + +libc_ifunc_redirected (__redirect_tan, __tan, IFUNC_SELECTOR ()); +libm_alias_double (__tan, tan) + +#define __tan __tan_sse2 #include <sysdeps/ieee754/dbl-64/s_tan.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc-c.c b/sysdeps/x86_64/fpu/multiarch/s_trunc-c.c new file mode 100644 index 0000000000..6204ae3c77 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-c.c @@ -0,0 +1,2 @@ +#define __trunc __trunc_c +#include <sysdeps/ieee754/dbl-64/wordsize-64/s_trunc.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S new file mode 100644 index 0000000000..b8046bfa0c --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S @@ -0,0 +1,25 @@ +/* trunc for SSE4.1. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section .text.sse4.1,"ax",@progbits +ENTRY(__trunc_sse41) + roundsd $11, %xmm0, %xmm0 + ret +END(__trunc_sse41) diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc.c b/sysdeps/x86_64/fpu/multiarch/s_trunc.c new file mode 100644 index 0000000000..a1b0c60630 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_trunc.c @@ -0,0 +1,31 @@ +/* Multiple versions of __trunc. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-double.h> + +#define trunc __redirect_trunc +#define __trunc __redirect___trunc +#include <math.h> +#undef trunc +#undef __trunc + +#define SYMBOL_NAME trunc +#include "ifunc-sse4_1.h" + +libc_ifunc_redirected (__redirect_trunc, __trunc, IFUNC_SELECTOR ()); +libm_alias_double (__trunc, trunc) diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf-c.c b/sysdeps/x86_64/fpu/multiarch/s_truncf-c.c new file mode 100644 index 0000000000..7a5ac7da1f --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-c.c @@ -0,0 +1,2 @@ +#define __truncf __truncf_c +#include <sysdeps/ieee754/flt-32/s_truncf.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S new file mode 100644 index 0000000000..2dabc0be57 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S @@ -0,0 +1,25 @@ +/* truncf for SSE4.1. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section .text.sse4.1,"ax",@progbits +ENTRY(__truncf_sse41) + roundss $11, %xmm0, %xmm0 + ret +END(__truncf_sse41) diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf.c b/sysdeps/x86_64/fpu/multiarch/s_truncf.c new file mode 100644 index 0000000000..a7e220bd0c --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_truncf.c @@ -0,0 +1,31 @@ +/* Multiple versions of __truncf. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-float.h> + +#define truncf __redirect_truncf +#define __truncf __redirect___truncf +#include <math.h> +#undef truncf +#undef __truncf + +#define SYMBOL_NAME truncf +#include "ifunc-sse4_1.h" + +libc_ifunc_redirected (__redirect_truncf, __truncf, IFUNC_SELECTOR ()); +libm_alias_float (__trunc, trunc) diff --git a/sysdeps/x86_64/fpu/multiarch/sincos32-fma.c b/sysdeps/x86_64/fpu/multiarch/sincos32-fma.c new file mode 100644 index 0000000000..dcd44bc5e8 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/sincos32-fma.c @@ -0,0 +1,15 @@ +#define __cos32 __cos32_fma +#define __sin32 __sin32_fma +#define __c32 __c32_fma +#define __mpsin __mpsin_fma +#define __mpsin1 __mpsin1_fma +#define __mpcos __mpcos_fma +#define __mpcos1 __mpcos1_fma +#define __mpranred __mpranred_fma +#define __add __add_fma +#define __dbl_mp __dbl_mp_fma +#define __mul __mul_fma +#define __sub __sub_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/sincos32.c> diff --git a/sysdeps/x86_64/fpu/multiarch/slowexp-avx.c b/sysdeps/x86_64/fpu/multiarch/slowexp-avx.c deleted file mode 100644 index d01c6d71a4..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/slowexp-avx.c +++ /dev/null @@ -1,9 +0,0 @@ -#define __slowexp __slowexp_avx -#define __add __add_avx -#define __dbl_mp __dbl_mp_avx -#define __mpexp __mpexp_avx -#define __mul __mul_avx -#define __sub __sub_avx -#define SECTION __attribute__ ((section (".text.avx"))) - -#include <sysdeps/ieee754/dbl-64/slowexp.c> diff --git a/sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c b/sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c deleted file mode 100644 index 3bcde84233..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c +++ /dev/null @@ -1,9 +0,0 @@ -#define __slowexp __slowexp_fma4 -#define __add __add_fma4 -#define __dbl_mp __dbl_mp_fma4 -#define __mpexp __mpexp_fma4 -#define __mul __mul_fma4 -#define __sub __sub_fma4 -#define SECTION __attribute__ ((section (".text.fma4"))) - -#include <sysdeps/ieee754/dbl-64/slowexp.c> diff --git a/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c b/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c deleted file mode 100644 index 69d69823bb..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c +++ /dev/null @@ -1,11 +0,0 @@ -#define __slowpow __slowpow_fma4 -#define __add __add_fma4 -#define __dbl_mp __dbl_mp_fma4 -#define __mpexp __mpexp_fma4 -#define __mplog __mplog_fma4 -#define __mul __mul_fma4 -#define __sub __sub_fma4 -#define __halfulp __halfulp_fma4 -#define SECTION __attribute__ ((section (".text.fma4"))) - -#include <sysdeps/ieee754/dbl-64/slowpow.c> diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core-sse2.S index 7d720e2fcb..a85729807f 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core-sse2.S @@ -1,5 +1,5 @@ -/* Multiple versions of vectorized cos, vector length is 2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. +/* SSE2 version of vectorized cos, vector length is 2. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,21 +16,5 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVbN2v_cos) - .type _ZGVbN2v_cos, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVbN2v_cos_sse4(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jz 2f - ret -2: leaq _ZGVbN2v_cos_sse2(%rip), %rax - ret -END (_ZGVbN2v_cos) -libmvec_hidden_def (_ZGVbN2v_cos) - #define _ZGVbN2v_cos _ZGVbN2v_cos_sse2 #include "../svml_d_cos2_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.c new file mode 100644 index 0000000000..3ff39eecd7 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized cos, vector length is 2. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN2v_cos +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN2v_cos, __GI__ZGVbN2v_cos, __redirect__ZGVbN2v_cos) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S index 088fcae067..10be76e207 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S @@ -1,5 +1,5 @@ /* Function cos vectorized with SSE4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -205,7 +205,7 @@ ENTRY (_ZGVbN2v_cos_sse4) shlq $4, %r15 movsd 200(%rsp,%r15), %xmm0 - call cos@PLT + call JUMPTARGET(cos) movsd %xmm0, 264(%rsp,%r15) jmp .LBL_1_8 @@ -215,7 +215,7 @@ ENTRY (_ZGVbN2v_cos_sse4) shlq $4, %r15 movsd 192(%rsp,%r15), %xmm0 - call cos@PLT + call JUMPTARGET(cos) movsd %xmm0, 256(%rsp,%r15) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core-sse.S index 65a3570d2e..9f406ea7c9 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core-sse.S @@ -1,5 +1,5 @@ -/* Multiple versions of vectorized cos, vector length is 4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. +/* SSE version of vectorized cos, vector length is 4. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,21 +16,5 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVdN4v_cos) - .type _ZGVdN4v_cos, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVdN4v_cos_avx2(%rip), %rax - HAS_ARCH_FEATURE (AVX2_Usable) - jz 2f - ret -2: leaq _ZGVdN4v_cos_sse_wrapper(%rip), %rax - ret -END (_ZGVdN4v_cos) -libmvec_hidden_def (_ZGVdN4v_cos) - #define _ZGVdN4v_cos _ZGVdN4v_cos_sse_wrapper #include "../svml_d_cos4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.c new file mode 100644 index 0000000000..cb8405201a --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized cos, vector length is 4. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVdN4v_cos +#include "ifunc-mathvec-avx2.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVdN4v_cos, __GI__ZGVdN4v_cos, __redirect__ZGVdN4v_cos) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S index 4e653216d9..38cdc6bb03 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S @@ -1,5 +1,5 @@ /* Function cos vectorized with AVX2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -188,7 +188,7 @@ ENTRY (_ZGVdN4v_cos_avx2) vmovsd 328(%rsp,%r15), %xmm0 vzeroupper - call cos@PLT + call JUMPTARGET(cos) vmovsd %xmm0, 392(%rsp,%r15) jmp .LBL_1_8 @@ -199,7 +199,7 @@ ENTRY (_ZGVdN4v_cos_avx2) vmovsd 320(%rsp,%r15), %xmm0 vzeroupper - call cos@PLT + call JUMPTARGET(cos) vmovsd %xmm0, 384(%rsp,%r15) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core-avx2.S new file mode 100644 index 0000000000..081baeeff5 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core-avx2.S @@ -0,0 +1,20 @@ +/* AVX2 version of vectorized cos, vector length is 8. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVeN8v_cos _ZGVeN8v_cos_avx2_wrapper +#include "../svml_d_cos8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S deleted file mode 100644 index 3e7f16d44e..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Multiple versions of vectorized cos, vector length is 8. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVeN8v_cos) - .type _ZGVeN8v_cos, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX -1: leaq _ZGVeN8v_cos_skx(%rip), %rax - HAS_ARCH_FEATURE (AVX512DQ_Usable) - jnz 2f - leaq _ZGVeN8v_cos_knl(%rip), %rax - HAS_ARCH_FEATURE (AVX512F_Usable) - jnz 2f - leaq _ZGVeN8v_cos_avx2_wrapper(%rip), %rax -2: ret -END (_ZGVeN8v_cos) - -#define _ZGVeN8v_cos _ZGVeN8v_cos_avx2_wrapper -#include "../svml_d_cos8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.c new file mode 100644 index 0000000000..4aa12595bc --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized cos, vector length is 8. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVeN8v_cos +#include "ifunc-mathvec-avx512.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVeN8v_cos, __GI__ZGVeN8v_cos, __redirect__ZGVeN8v_cos) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S index 1cac1d827a..24e3b36357 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S @@ -1,5 +1,5 @@ /* Function cos vectorized with AVX-512, KNL and SKX versions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,7 +22,7 @@ .text ENTRY (_ZGVeN8v_cos_knl) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN4v_cos #else /* @@ -221,7 +221,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_cos movzbl %r12b, %r15d shlq $4, %r15 vmovsd 1160(%rsp,%r15), %xmm0 - call cos@PLT + call JUMPTARGET(cos) vmovsd %xmm0, 1224(%rsp,%r15) jmp .LBL_1_8 @@ -229,14 +229,14 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_cos movzbl %r12b, %r15d shlq $4, %r15 vmovsd 1152(%rsp,%r15), %xmm0 - call cos@PLT + call JUMPTARGET(cos) vmovsd %xmm0, 1216(%rsp,%r15) jmp .LBL_1_7 #endif END (_ZGVeN8v_cos_knl) ENTRY (_ZGVeN8v_cos_skx) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN4v_cos #else /* @@ -438,7 +438,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_cos vzeroupper vmovsd 1160(%rsp,%r15), %xmm0 - call cos@PLT + call JUMPTARGET(cos) vmovsd %xmm0, 1224(%rsp,%r15) jmp .LBL_2_8 @@ -450,7 +450,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_cos vzeroupper vmovsd 1152(%rsp,%r15), %xmm0 - call cos@PLT + call JUMPTARGET(cos) vmovsd %xmm0, 1216(%rsp,%r15) jmp .LBL_2_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core-sse2.S index 136c67a550..3591eb1f19 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core-sse2.S @@ -1,5 +1,5 @@ -/* Multiple versions of vectorized exp. - Copyright (C) 2014-2016 Free Software Foundation, Inc. +/* SSE2 version of vectorized exp. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,21 +16,5 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVbN2v_exp) - .type _ZGVbN2v_exp, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVbN2v_exp_sse4(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jz 2f - ret -2: leaq _ZGVbN2v_exp_sse2(%rip), %rax - ret -END (_ZGVbN2v_exp) -libmvec_hidden_def (_ZGVbN2v_exp) - #define _ZGVbN2v_exp _ZGVbN2v_exp_sse2 #include "../svml_d_exp2_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.c new file mode 100644 index 0000000000..2cfe8937c9 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized exp, vector length is 2. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN2v_exp +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN2v_exp, __GI__ZGVbN2v_exp, __redirect__ZGVbN2v_exp) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S index 445b230152..e98d11b311 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S @@ -1,5 +1,5 @@ /* Function exp vectorized with SSE4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -207,7 +207,7 @@ ENTRY (_ZGVbN2v_exp_sse4) shlq $4, %r15 movsd 200(%rsp,%r15), %xmm0 - call exp@PLT + call JUMPTARGET(__exp_finite) movsd %xmm0, 264(%rsp,%r15) jmp .LBL_1_8 @@ -217,7 +217,7 @@ ENTRY (_ZGVbN2v_exp_sse4) shlq $4, %r15 movsd 192(%rsp,%r15), %xmm0 - call exp@PLT + call JUMPTARGET(__exp_finite) movsd %xmm0, 256(%rsp,%r15) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core-sse.S index 9d6a47be0a..f8e0b5517a 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core-sse.S @@ -1,5 +1,5 @@ -/* Multiple versions of vectorized exp. - Copyright (C) 2014-2016 Free Software Foundation, Inc. +/* SSE version of vectorized exp. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,21 +16,5 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVdN4v_exp) - .type _ZGVdN4v_exp, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVdN4v_exp_avx2(%rip), %rax - HAS_ARCH_FEATURE (AVX2_Usable) - jz 2f - ret -2: leaq _ZGVdN4v_exp_sse_wrapper(%rip), %rax - ret -END (_ZGVdN4v_exp) -libmvec_hidden_def (_ZGVdN4v_exp) - #define _ZGVdN4v_exp _ZGVdN4v_exp_sse_wrapper #include "../svml_d_exp4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.c new file mode 100644 index 0000000000..59bb36984a --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized exp, vector length is 4. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVdN4v_exp +#include "ifunc-mathvec-avx2.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVdN4v_exp, __GI__ZGVdN4v_exp, __redirect__ZGVdN4v_exp) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S index 25f9e28941..87990f8ad7 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S @@ -1,5 +1,5 @@ /* Function exp vectorized with AVX2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -193,7 +193,7 @@ ENTRY (_ZGVdN4v_exp_avx2) vmovsd 328(%rsp,%r15), %xmm0 vzeroupper - call exp@PLT + call JUMPTARGET(__exp_finite) vmovsd %xmm0, 392(%rsp,%r15) jmp .LBL_1_8 @@ -204,7 +204,7 @@ ENTRY (_ZGVdN4v_exp_avx2) vmovsd 320(%rsp,%r15), %xmm0 vzeroupper - call exp@PLT + call JUMPTARGET(__exp_finite) vmovsd %xmm0, 384(%rsp,%r15) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core-avx2.S new file mode 100644 index 0000000000..b1d3cad0e1 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core-avx2.S @@ -0,0 +1,20 @@ +/* AVX2 version of vectorized exp. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVeN8v_exp _ZGVeN8v_exp_avx2_wrapper +#include "../svml_d_exp8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S deleted file mode 100644 index 317ee36e61..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Multiple versions of vectorized exp. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVeN8v_exp) - .type _ZGVeN8v_exp, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVeN8v_exp_skx(%rip), %rax - HAS_ARCH_FEATURE (AVX512DQ_Usable) - jnz 2f - leaq _ZGVeN8v_exp_knl(%rip), %rax - HAS_ARCH_FEATURE (AVX512F_Usable) - jnz 2f - leaq _ZGVeN8v_exp_avx2_wrapper(%rip), %rax -2: ret -END (_ZGVeN8v_exp) - -#define _ZGVeN8v_exp _ZGVeN8v_exp_avx2_wrapper -#include "../svml_d_exp8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.c new file mode 100644 index 0000000000..cfdc96ec86 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized exp, vector length is 8. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVeN8v_exp +#include "ifunc-mathvec-avx512.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVeN8v_exp, __GI__ZGVeN8v_exp, __redirect__ZGVeN8v_exp) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S index 74f1d2ce7b..8dd8a03e4b 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S @@ -1,5 +1,5 @@ /* Function exp vectorized with AVX-512. KNL and SKX versions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,7 +22,7 @@ .text ENTRY (_ZGVeN8v_exp_knl) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN4v_exp #else /* @@ -223,7 +223,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_exp movzbl %r12b, %r15d shlq $4, %r15 vmovsd 1160(%rsp,%r15), %xmm0 - call exp@PLT + call JUMPTARGET(__exp_finite) vmovsd %xmm0, 1224(%rsp,%r15) jmp .LBL_1_8 @@ -231,14 +231,14 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_exp movzbl %r12b, %r15d shlq $4, %r15 vmovsd 1152(%rsp,%r15), %xmm0 - call exp@PLT + call JUMPTARGET(__exp_finite) vmovsd %xmm0, 1216(%rsp,%r15) jmp .LBL_1_7 #endif END (_ZGVeN8v_exp_knl) ENTRY (_ZGVeN8v_exp_skx) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN4v_exp #else /* @@ -438,7 +438,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_exp vmovsd 1160(%rsp,%r15), %xmm0 vzeroupper vmovsd 1160(%rsp,%r15), %xmm0 - call exp@PLT + call JUMPTARGET(__exp_finite) vmovsd %xmm0, 1224(%rsp,%r15) jmp .LBL_2_8 @@ -448,7 +448,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_exp vmovsd 1152(%rsp,%r15), %xmm0 vzeroupper vmovsd 1152(%rsp,%r15), %xmm0 - call exp@PLT + call JUMPTARGET(__exp_finite) vmovsd %xmm0, 1216(%rsp,%r15) jmp .LBL_2_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core-sse2.S index 03d86a3e63..761a1a537d 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core-sse2.S @@ -1,5 +1,5 @@ -/* Multiple versions of vectorized log. - Copyright (C) 2014-2016 Free Software Foundation, Inc. +/* SSE2 version of vectorized log. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,21 +16,5 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVbN2v_log) - .type _ZGVbN2v_log, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVbN2v_log_sse4(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jz 2f - ret -2: leaq _ZGVbN2v_log_sse2(%rip), %rax - ret -END (_ZGVbN2v_log) -libmvec_hidden_def (_ZGVbN2v_log) - #define _ZGVbN2v_log _ZGVbN2v_log_sse2 #include "../svml_d_log2_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.c new file mode 100644 index 0000000000..c24437a3be --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized log, vector length is 2. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN2v_log +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN2v_log, __GI__ZGVbN2v_log, __redirect__ZGVbN2v_log) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S index 5d254288f6..eb854c68d6 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S @@ -1,5 +1,5 @@ /* Function log vectorized with SSE4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -211,7 +211,7 @@ ENTRY (_ZGVbN2v_log_sse4) shlq $4, %r15 movsd 200(%rsp,%r15), %xmm0 - call log@PLT + call JUMPTARGET(__log_finite) movsd %xmm0, 264(%rsp,%r15) jmp .LBL_1_8 @@ -221,7 +221,7 @@ ENTRY (_ZGVbN2v_log_sse4) shlq $4, %r15 movsd 192(%rsp,%r15), %xmm0 - call log@PLT + call JUMPTARGET(__log_finite) movsd %xmm0, 256(%rsp,%r15) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core-sse.S index 9f6ddbef15..2460512f78 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core-sse.S @@ -1,5 +1,5 @@ -/* Multiple versions of vectorized log. - Copyright (C) 2014-2016 Free Software Foundation, Inc. +/* SSE version of vectorized log. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,21 +16,5 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVdN4v_log) - .type _ZGVdN4v_log, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVdN4v_log_avx2(%rip), %rax - HAS_ARCH_FEATURE (AVX2_Usable) - jz 2f - ret -2: leaq _ZGVdN4v_log_sse_wrapper(%rip), %rax - ret -END (_ZGVdN4v_log) -libmvec_hidden_def (_ZGVdN4v_log) - #define _ZGVdN4v_log _ZGVdN4v_log_sse_wrapper #include "../svml_d_log4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.c new file mode 100644 index 0000000000..5751370d65 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized log, vector length is 4. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVdN4v_log +#include "ifunc-mathvec-avx2.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVdN4v_log, __GI__ZGVdN4v_log, __redirect__ZGVdN4v_log) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S index 5da298747d..81515850e1 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S @@ -1,5 +1,5 @@ /* Function log vectorized with AVX2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -191,7 +191,7 @@ ENTRY (_ZGVdN4v_log_avx2) vmovsd 328(%rsp,%r15), %xmm0 vzeroupper - call log@PLT + call JUMPTARGET(__log_finite) vmovsd %xmm0, 392(%rsp,%r15) jmp .LBL_1_8 @@ -202,7 +202,7 @@ ENTRY (_ZGVdN4v_log_avx2) vmovsd 320(%rsp,%r15), %xmm0 vzeroupper - call log@PLT + call JUMPTARGET(__log_finite) vmovsd %xmm0, 384(%rsp,%r15) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core-avx2.S new file mode 100644 index 0000000000..ecfbeafb23 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core-avx2.S @@ -0,0 +1,20 @@ +/* AVX2 version of vectorized log. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVeN8v_log _ZGVeN8v_log_avx2_wrapper +#include "../svml_d_log8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S deleted file mode 100644 index 2e1a1da1a5..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Multiple versions of vectorized log. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVeN8v_log) - .type _ZGVeN8v_log, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVeN8v_log_skx(%rip), %rax - HAS_ARCH_FEATURE (AVX512DQ_Usable) - jnz 2f - leaq _ZGVeN8v_log_knl(%rip), %rax - HAS_ARCH_FEATURE (AVX512F_Usable) - jnz 2f - leaq _ZGVeN8v_log_avx2_wrapper(%rip), %rax -2: ret -END (_ZGVeN8v_log) - -#define _ZGVeN8v_log _ZGVeN8v_log_avx2_wrapper -#include "../svml_d_log8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.c new file mode 100644 index 0000000000..1e796dcfdd --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized log, vector length is 8. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVeN8v_log +#include "ifunc-mathvec-avx512.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVeN8v_log, __GI__ZGVeN8v_log, __redirect__ZGVeN8v_log) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S index dca8e61f34..ae8af8d861 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S @@ -1,5 +1,5 @@ /* Function log vectorized with AVX-512. KNL and SKX versions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,7 +22,7 @@ .text ENTRY (_ZGVeN8v_log_knl) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN4v_log #else /* @@ -222,7 +222,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_log movzbl %r12b, %r15d shlq $4, %r15 vmovsd 1160(%rsp,%r15), %xmm0 - call log@PLT + call JUMPTARGET(__log_finite) vmovsd %xmm0, 1224(%rsp,%r15) jmp .LBL_1_8 @@ -230,14 +230,14 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_log movzbl %r12b, %r15d shlq $4, %r15 vmovsd 1152(%rsp,%r15), %xmm0 - call log@PLT + call JUMPTARGET(__log_finite) vmovsd %xmm0, 1216(%rsp,%r15) jmp .LBL_1_7 #endif END (_ZGVeN8v_log_knl) ENTRY (_ZGVeN8v_log_skx) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN4v_log #else /* @@ -443,7 +443,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_log vzeroupper vmovsd 1160(%rsp,%r15), %xmm0 - call log@PLT + call JUMPTARGET(__log_finite) vmovsd %xmm0, 1224(%rsp,%r15) jmp .LBL_2_8 @@ -455,7 +455,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_log vzeroupper vmovsd 1152(%rsp,%r15), %xmm0 - call log@PLT + call JUMPTARGET(__log_finite) vmovsd %xmm0, 1216(%rsp,%r15) jmp .LBL_2_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core-sse2.S index 4a50246889..2d8ad50681 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core-sse2.S @@ -1,5 +1,5 @@ -/* Multiple versions of vectorized pow. - Copyright (C) 2014-2016 Free Software Foundation, Inc. +/* SSE2 version of vectorized pow. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,21 +16,5 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVbN2vv_pow) - .type _ZGVbN2vv_pow, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVbN2vv_pow_sse4(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jz 2f - ret -2: leaq _ZGVbN2vv_pow_sse2(%rip), %rax - ret -END (_ZGVbN2vv_pow) -libmvec_hidden_def (_ZGVbN2vv_pow) - #define _ZGVbN2vv_pow _ZGVbN2vv_pow_sse2 #include "../svml_d_pow2_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.c new file mode 100644 index 0000000000..3424c0e326 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized pow, vector length is 2. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN2vv_pow +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN2vv_pow, __GI__ZGVbN2vv_pow, + __redirect__ZGVbN2vv_pow) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S index 064d170878..77828b44d5 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S @@ -1,5 +1,5 @@ /* Function pow vectorized with SSE4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -413,7 +413,7 @@ ENTRY (_ZGVbN2vv_pow_sse4) movsd 72(%rsp,%r15), %xmm0 movsd 136(%rsp,%r15), %xmm1 - call pow@PLT + call JUMPTARGET(__pow_finite) movsd %xmm0, 200(%rsp,%r15) jmp .LBL_1_8 @@ -424,7 +424,7 @@ ENTRY (_ZGVbN2vv_pow_sse4) movsd 64(%rsp,%r15), %xmm0 movsd 128(%rsp,%r15), %xmm1 - call pow@PLT + call JUMPTARGET(__pow_finite) movsd %xmm0, 192(%rsp,%r15) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core-sse.S index fb9f989adc..4dcd14ff20 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core-sse.S @@ -1,5 +1,5 @@ -/* Multiple versions of vectorized pow. - Copyright (C) 2014-2016 Free Software Foundation, Inc. +/* SSE version of vectorized pow. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,21 +16,5 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVdN4vv_pow) - .type _ZGVdN4vv_pow, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVdN4vv_pow_avx2(%rip), %rax - HAS_ARCH_FEATURE (AVX2_Usable) - jz 2f - ret -2: leaq _ZGVdN4vv_pow_sse_wrapper(%rip), %rax - ret -END (_ZGVdN4vv_pow) -libmvec_hidden_def (_ZGVdN4vv_pow) - #define _ZGVdN4vv_pow _ZGVdN4vv_pow_sse_wrapper #include "../svml_d_pow4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.c new file mode 100644 index 0000000000..447be39401 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized pow, vector length is 4. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVdN4vv_pow +#include "ifunc-mathvec-avx2.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVdN4vv_pow, __GI__ZGVdN4vv_pow, + __redirect__ZGVdN4vv_pow) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S index f2a73ffe1e..c43d62f202 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S @@ -1,5 +1,5 @@ /* Function pow vectorized with AVX2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -367,7 +367,7 @@ ENTRY (_ZGVdN4vv_pow_avx2) vmovsd 264(%rsp,%r15), %xmm1 vzeroupper - call pow@PLT + call JUMPTARGET(__pow_finite) vmovsd %xmm0, 328(%rsp,%r15) jmp .LBL_1_8 @@ -379,7 +379,7 @@ ENTRY (_ZGVdN4vv_pow_avx2) vmovsd 256(%rsp,%r15), %xmm1 vzeroupper - call pow@PLT + call JUMPTARGET(__pow_finite) vmovsd %xmm0, 320(%rsp,%r15) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core-avx2.S new file mode 100644 index 0000000000..8acf700e76 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core-avx2.S @@ -0,0 +1,20 @@ +/* AVX2 version of vectorized pow. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVeN8vv_pow _ZGVeN8vv_pow_avx2_wrapper +#include "../svml_d_pow8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S deleted file mode 100644 index 30bc53f2f7..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Multiple versions of vectorized pow. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVeN8vv_pow) - .type _ZGVeN8vv_pow, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVeN8vv_pow_skx(%rip), %rax - HAS_ARCH_FEATURE (AVX512DQ_Usable) - jnz 2f - leaq _ZGVeN8vv_pow_knl(%rip), %rax - HAS_ARCH_FEATURE (AVX512F_Usable) - jnz 2f - leaq _ZGVeN8vv_pow_avx2_wrapper(%rip), %rax -2: ret -END (_ZGVeN8vv_pow) - -#define _ZGVeN8vv_pow _ZGVeN8vv_pow_avx2_wrapper -#include "../svml_d_pow8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.c new file mode 100644 index 0000000000..62f96965bb --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized pow, vector length is 8. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVeN8vv_pow +#include "ifunc-mathvec-avx512.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVeN8vv_pow, __GI__ZGVeN8vv_pow, + __redirect__ZGVeN8vv_pow) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S index 4a515233fc..a28c39b73d 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S @@ -1,5 +1,5 @@ /* Function pow vectorized with AVX-512. KNL and SKX versions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -82,7 +82,7 @@ .text ENTRY (_ZGVeN8vv_pow_knl) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow #else pushq %rbp @@ -392,7 +392,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow shlq $4, %r15 vmovsd 1160(%rsp,%r15), %xmm0 vmovsd 1224(%rsp,%r15), %xmm1 - call pow@PLT + call JUMPTARGET(__pow_finite) vmovsd %xmm0, 1288(%rsp,%r15) jmp .LBL_1_8 @@ -401,7 +401,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow shlq $4, %r15 vmovsd 1152(%rsp,%r15), %xmm0 vmovsd 1216(%rsp,%r15), %xmm1 - call pow@PLT + call JUMPTARGET(__pow_finite) vmovsd %xmm0, 1280(%rsp,%r15) jmp .LBL_1_7 @@ -409,7 +409,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow END (_ZGVeN8vv_pow_knl) ENTRY (_ZGVeN8vv_pow_skx) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow #else pushq %rbp @@ -720,7 +720,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow vzeroupper vmovsd 1160(%rsp,%r15), %xmm0 - call pow@PLT + call JUMPTARGET(__pow_finite) vmovsd %xmm0, 1288(%rsp,%r15) jmp .LBL_2_8 @@ -732,7 +732,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow vzeroupper vmovsd 1152(%rsp,%r15), %xmm0 - call pow@PLT + call JUMPTARGET(__pow_finite) vmovsd %xmm0, 1280(%rsp,%r15) jmp .LBL_2_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core-sse2.S index 112bec2224..cb7b31aa1c 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core-sse2.S @@ -1,5 +1,5 @@ -/* Multiple versions of vectorized sin. - Copyright (C) 2014-2016 Free Software Foundation, Inc. +/* SSE2 version of vectorized sin. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,21 +16,5 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVbN2v_sin) - .type _ZGVbN2v_sin, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVbN2v_sin_sse4(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jz 2f - ret -2: leaq _ZGVbN2v_sin_sse2(%rip), %rax - ret -END (_ZGVbN2v_sin) -libmvec_hidden_def (_ZGVbN2v_sin) - #define _ZGVbN2v_sin _ZGVbN2v_sin_sse2 #include "../svml_d_sin2_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.c new file mode 100644 index 0000000000..1c5788f205 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized sin, vector length is 2. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN2v_sin +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN2v_sin, __GI__ZGVbN2v_sin, __redirect__ZGVbN2v_sin) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S index 5755ce6f74..15980e9eeb 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S @@ -1,5 +1,5 @@ /* Function sin vectorized with SSE4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -211,7 +211,7 @@ ENTRY (_ZGVbN2v_sin_sse4) shlq $4, %r15 movsd 200(%rsp,%r15), %xmm0 - call sin@PLT + call JUMPTARGET(sin) movsd %xmm0, 264(%rsp,%r15) jmp .LBL_1_8 @@ -221,7 +221,7 @@ ENTRY (_ZGVbN2v_sin_sse4) shlq $4, %r15 movsd 192(%rsp,%r15), %xmm0 - call sin@PLT + call JUMPTARGET(sin) movsd %xmm0, 256(%rsp,%r15) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core-sse.S index 700a1c629d..07fae6f3b4 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core-sse.S @@ -1,5 +1,5 @@ -/* Multiple versions of vectorized sin, vector length is 4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. +/* SSE version of vectorized sin, vector length is 4. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,21 +16,5 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVdN4v_sin) - .type _ZGVdN4v_sin, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVdN4v_sin_avx2(%rip), %rax - HAS_ARCH_FEATURE (AVX2_Usable) - jz 2f - ret -2: leaq _ZGVdN4v_sin_sse_wrapper(%rip), %rax - ret -END (_ZGVdN4v_sin) -libmvec_hidden_def (_ZGVdN4v_sin) - #define _ZGVdN4v_sin _ZGVdN4v_sin_sse_wrapper #include "../svml_d_sin4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.c new file mode 100644 index 0000000000..b5933914aa --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized sin, vector length is 4. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVdN4v_sin +#include "ifunc-mathvec-avx2.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVdN4v_sin, __GI__ZGVdN4v_sin, __redirect__ZGVdN4v_sin) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S index 46b557158a..4f0917c56d 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S @@ -1,5 +1,5 @@ /* Function sin vectorized with AVX2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -191,7 +191,7 @@ ENTRY (_ZGVdN4v_sin_avx2) vmovsd 328(%rsp,%r15), %xmm0 vzeroupper - call sin@PLT + call JUMPTARGET(sin) vmovsd %xmm0, 392(%rsp,%r15) jmp .LBL_1_8 @@ -202,7 +202,7 @@ ENTRY (_ZGVdN4v_sin_avx2) vmovsd 320(%rsp,%r15), %xmm0 vzeroupper - call sin@PLT + call JUMPTARGET(sin) vmovsd %xmm0, 384(%rsp,%r15) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core-avx2.S index 5afce0ed88..b64c3390d6 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core-avx2.S @@ -1,5 +1,5 @@ -/* Multiple versions of vectorized sin. - Copyright (C) 2014-2016 Free Software Foundation, Inc. +/* AVX2 version of vectorized sin. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -19,19 +19,5 @@ #include <sysdep.h> #include <init-arch.h> - .text -ENTRY (_ZGVeN8v_sin) - .type _ZGVeN8v_sin, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVeN8v_sin_skx(%rip), %rax - HAS_ARCH_FEATURE (AVX512DQ_Usable) - jnz 2f - leaq _ZGVeN8v_sin_knl(%rip), %rax - HAS_ARCH_FEATURE (AVX512F_Usable) - jnz 2f - leaq _ZGVeN8v_sin_avx2_wrapper(%rip), %rax -2: ret -END (_ZGVeN8v_sin) - #define _ZGVeN8v_sin _ZGVeN8v_sin_avx2_wrapper #include "../svml_d_sin8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.c new file mode 100644 index 0000000000..57023d8494 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized sin, vector length is 8. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVeN8v_sin +#include "ifunc-mathvec-avx512.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVeN8v_sin, __GI__ZGVeN8v_sin, __redirect__ZGVeN8v_sin) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S index 6c565f3861..2d4b14fd1b 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S @@ -1,5 +1,5 @@ /* Function sin vectorized with AVX-512, KNL and SKX versions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,7 +22,7 @@ .text ENTRY (_ZGVeN8v_sin_knl) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN4v_sin #else /* @@ -222,7 +222,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin movzbl %r12b, %r15d shlq $4, %r15 vmovsd 1160(%rsp,%r15), %xmm0 - call sin@PLT + call JUMPTARGET(sin) vmovsd %xmm0, 1224(%rsp,%r15) jmp .LBL_1_8 @@ -230,14 +230,14 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin movzbl %r12b, %r15d shlq $4, %r15 vmovsd 1152(%rsp,%r15), %xmm0 - call sin@PLT + call JUMPTARGET(sin) vmovsd %xmm0, 1216(%rsp,%r15) jmp .LBL_1_7 #endif END (_ZGVeN8v_sin_knl) ENTRY (_ZGVeN8v_sin_skx) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN4v_sin #else /* @@ -440,7 +440,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin vzeroupper vmovsd 1160(%rsp,%r15), %xmm0 - call sin@PLT + call JUMPTARGET(sin) vmovsd %xmm0, 1224(%rsp,%r15) jmp .LBL_2_8 @@ -452,7 +452,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin vzeroupper vmovsd 1152(%rsp,%r15), %xmm0 - call sin@PLT + call JUMPTARGET(sin) vmovsd %xmm0, 1216(%rsp,%r15) jmp .LBL_2_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core-sse2.S index 883d7d33a4..ab7f9c500d 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core-sse2.S @@ -1,5 +1,5 @@ -/* Multiple versions of vectorized sincos. - Copyright (C) 2014-2016 Free Software Foundation, Inc. +/* SSE2 version of vectorized sincos. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,21 +16,5 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVbN2vvv_sincos) - .type _ZGVbN2vvv_sincos, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVbN2vvv_sincos_sse4(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jz 2f - ret -2: leaq _ZGVbN2vvv_sincos_sse2(%rip), %rax - ret -END (_ZGVbN2vvv_sincos) -libmvec_hidden_def (_ZGVbN2vvv_sincos) - #define _ZGVbN2vvv_sincos _ZGVbN2vvv_sincos_sse2 #include "../svml_d_sincos2_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.c new file mode 100644 index 0000000000..f373bb40a3 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized sincos, vector length is 2. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN2vvv_sincos +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN2vvv_sincos, __GI__ZGVbN2vvv_sincos, + __redirect__ZGVbN2vvv_sincos) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S index 65ad540122..b4dfa37898 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S @@ -1,5 +1,5 @@ /* Function sincos vectorized with SSE4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -20,7 +20,7 @@ #include "svml_d_trig_data.h" .text -ENTRY (_ZGVbN2vvv_sincos_sse4) +ENTRY (_ZGVbN2vl8l8_sincos_sse4) /* ALGORITHM DESCRIPTION: @@ -287,12 +287,12 @@ ENTRY (_ZGVbN2vvv_sincos_sse4) shlq $4, %r15 movsd 136(%rsp,%r15), %xmm0 - call sin@PLT + call JUMPTARGET(sin) movsd %xmm0, 200(%rsp,%r15) movsd 136(%rsp,%r15), %xmm0 - call cos@PLT + call JUMPTARGET(cos) movsd %xmm0, 264(%rsp,%r15) jmp .LBL_1_8 @@ -302,13 +302,67 @@ ENTRY (_ZGVbN2vvv_sincos_sse4) shlq $4, %r15 movsd 128(%rsp,%r15), %xmm0 - call sin@PLT + call JUMPTARGET(sin) movsd %xmm0, 192(%rsp,%r15) movsd 128(%rsp,%r15), %xmm0 - call cos@PLT + call JUMPTARGET(cos) movsd %xmm0, 256(%rsp,%r15) jmp .LBL_1_7 +END (_ZGVbN2vl8l8_sincos_sse4) +libmvec_hidden_def(_ZGVbN2vl8l8_sincos_sse4) + +/* vvv version implemented with wrapper to vl8l8 variant. */ +ENTRY (_ZGVbN2vvv_sincos_sse4) +#ifndef __ILP32__ + subq $72, %rsp + .cfi_def_cfa_offset 80 + movdqu %xmm1, 32(%rsp) + lea (%rsp), %rdi + movdqu %xmm2, 48(%rdi) + lea 16(%rsp), %rsi + call HIDDEN_JUMPTARGET(_ZGVbN2vl8l8_sincos_sse4) + movq 32(%rsp), %rdx + movq 48(%rsp), %rsi + movq 40(%rsp), %r8 + movq 56(%rsp), %r10 + movq (%rsp), %rax + movq 16(%rsp), %rcx + movq 8(%rsp), %rdi + movq 24(%rsp), %r9 + movq %rax, (%rdx) + movq %rcx, (%rsi) + movq %rdi, (%r8) + movq %r9, (%r10) + addq $72, %rsp + .cfi_def_cfa_offset 8 + ret +#else + subl $72, %esp + .cfi_def_cfa_offset 80 + leal 48(%rsp), %esi + movaps %xmm1, 16(%esp) + leal 32(%rsp), %edi + movaps %xmm2, (%esp) + call HIDDEN_JUMPTARGET(_ZGVbN2vl8l8_sincos_sse4) + movdqa 16(%esp), %xmm1 + movsd 32(%esp), %xmm0 + movq %xmm1, %rax + movdqa (%esp), %xmm2 + movsd %xmm0, (%eax) + movsd 40(%esp), %xmm0 + pextrd $1, %xmm1, %eax + movsd %xmm0, (%eax) + movsd 48(%esp), %xmm0 + movq %xmm2, %rax + movsd %xmm0, (%eax) + movsd 56(%esp), %xmm0 + pextrd $1, %xmm2, %eax + movsd %xmm0, (%eax) + addl $72, %esp + .cfi_def_cfa_offset 8 + ret +#endif END (_ZGVbN2vvv_sincos_sse4) diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core-sse.S index 69a3f74650..10b4a2cf16 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core-sse.S @@ -1,5 +1,5 @@ -/* Multiple versions of vectorized sincos. - Copyright (C) 2014-2016 Free Software Foundation, Inc. +/* SSE version of vectorized sincos. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,21 +16,5 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVdN4vvv_sincos) - .type _ZGVdN4vvv_sincos, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVdN4vvv_sincos_avx2(%rip), %rax - HAS_ARCH_FEATURE (AVX2_Usable) - jz 2f - ret -2: leaq _ZGVdN4vvv_sincos_sse_wrapper(%rip), %rax - ret -END (_ZGVdN4vvv_sincos) -libmvec_hidden_def (_ZGVdN4vvv_sincos) - #define _ZGVdN4vvv_sincos _ZGVdN4vvv_sincos_sse_wrapper #include "../svml_d_sincos4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.c new file mode 100644 index 0000000000..1fabd7b471 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized sincos, vector length is 4. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVdN4vvv_sincos +#include "ifunc-mathvec-avx2.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVdN4vvv_sincos, __GI__ZGVdN4vvv_sincos, + __redirect__ZGVdN4vvv_sincos) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S index 60d03e9f8b..d56aa96ac9 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S @@ -1,5 +1,5 @@ /* Function sincos vectorized with AVX2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -20,7 +20,7 @@ #include "svml_d_trig_data.h" .text -ENTRY (_ZGVdN4vvv_sincos_avx2) +ENTRY (_ZGVdN4vl8l8_sincos_avx2) /* ALGORITHM DESCRIPTION: @@ -248,12 +248,12 @@ ENTRY (_ZGVdN4vvv_sincos_avx2) vmovsd 264(%rsp,%r15), %xmm0 vzeroupper - call sin@PLT + call JUMPTARGET(sin) vmovsd %xmm0, 328(%rsp,%r15) vmovsd 264(%rsp,%r15), %xmm0 - call cos@PLT + call JUMPTARGET(cos) vmovsd %xmm0, 392(%rsp,%r15) jmp .LBL_1_8 @@ -264,14 +264,110 @@ ENTRY (_ZGVdN4vvv_sincos_avx2) vmovsd 256(%rsp,%r15), %xmm0 vzeroupper - call sin@PLT + call JUMPTARGET(sin) vmovsd %xmm0, 320(%rsp,%r15) vmovsd 256(%rsp,%r15), %xmm0 - call cos@PLT + call JUMPTARGET(cos) vmovsd %xmm0, 384(%rsp,%r15) jmp .LBL_1_7 +END (_ZGVdN4vl8l8_sincos_avx2) +libmvec_hidden_def(_ZGVdN4vl8l8_sincos_avx2) + +/* vvv version implemented with wrapper to vl8l8 variant. */ +ENTRY (_ZGVdN4vvv_sincos_avx2) +#ifndef __ILP32__ + pushq %rbp + cfi_adjust_cfa_offset (8) + cfi_rel_offset (%rbp, 0) + movq %rsp, %rbp + cfi_def_cfa_register (%rbp) + andq $-32, %rsp + subq $128, %rsp + vmovdqu %ymm1, 64(%rsp) + lea (%rsp), %rdi + vmovdqu %ymm2, 96(%rdi) + lea 32(%rsp), %rsi + call HIDDEN_JUMPTARGET(_ZGVdN4vl8l8_sincos_avx2) + movq 64(%rsp), %rdx + movq 96(%rsp), %rsi + movq 72(%rsp), %r8 + movq 104(%rsp), %r10 + movq (%rsp), %rax + movq 32(%rsp), %rcx + movq 8(%rsp), %rdi + movq 40(%rsp), %r9 + movq %rax, (%rdx) + movq %rcx, (%rsi) + movq 80(%rsp), %rax + movq 112(%rsp), %rcx + movq %rdi, (%r8) + movq %r9, (%r10) + movq 88(%rsp), %rdi + movq 120(%rsp), %r9 + movq 16(%rsp), %r11 + movq 48(%rsp), %rdx + movq 24(%rsp), %rsi + movq 56(%rsp), %r8 + movq %r11, (%rax) + movq %rdx, (%rcx) + movq %rsi, (%rdi) + movq %r8, (%r9) + movq %rbp, %rsp + cfi_def_cfa_register (%rsp) + popq %rbp + cfi_adjust_cfa_offset (-8) + cfi_restore (%rbp) + ret +#else + leal 8(%rsp), %r10d + .cfi_def_cfa 10, 0 + andl $-32, %esp + pushq -8(%r10d) + pushq %rbp + .cfi_escape 0x10,0x6,0x2,0x76,0 + movl %esp, %ebp + pushq %r10 + .cfi_escape 0xf,0x3,0x76,0x78,0x6 + leal -48(%rbp), %esi + leal -80(%rbp), %edi + subl $104, %esp + vmovaps %xmm1, -96(%ebp) + vmovaps %xmm2, -112(%ebp) + call HIDDEN_JUMPTARGET(_ZGVdN4vl8l8_sincos_avx2) + movl -96(%ebp), %eax + vmovsd -80(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movl -92(%ebp), %eax + vmovsd -72(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movl -88(%ebp), %eax + vmovsd -64(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movl -84(%ebp), %eax + vmovsd -56(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movl -112(%ebp), %eax + vmovsd -48(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movl -108(%ebp), %eax + vmovsd -40(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movl -104(%ebp), %eax + vmovsd -32(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movl -100(%ebp), %eax + vmovsd -24(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + addl $104, %esp + popq %r10 + .cfi_def_cfa 10, 0 + popq %rbp + leal -8(%r10), %esp + .cfi_def_cfa 7, 8 + ret +#endif END (_ZGVdN4vvv_sincos_avx2) diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core-avx2.S new file mode 100644 index 0000000000..8cf88f6461 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core-avx2.S @@ -0,0 +1,20 @@ +/* AVX2 version of vectorized sincos. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVeN8vvv_sincos _ZGVeN8vvv_sincos_avx2_wrapper +#include "../svml_d_sincos8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S deleted file mode 100644 index 64cb08c5d1..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Multiple versions of vectorized sincos. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVeN8vvv_sincos) - .type _ZGVeN8vvv_sincos, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVeN8vvv_sincos_skx(%rip), %rax - HAS_ARCH_FEATURE (AVX512DQ_Usable) - jnz 2f - leaq _ZGVeN8vvv_sincos_knl(%rip), %rax - HAS_ARCH_FEATURE (AVX512F_Usable) - jnz 2f - leaq _ZGVeN8vvv_sincos_avx2_wrapper(%rip), %rax -2: ret -END (_ZGVeN8vvv_sincos) - -#define _ZGVeN8vvv_sincos _ZGVeN8vvv_sincos_avx2_wrapper -#include "../svml_d_sincos8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.c new file mode 100644 index 0000000000..1409872ed2 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized sincos, vector length is 8. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVeN8vvv_sincos +#include "ifunc-mathvec-avx512.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVeN8vvv_sincos, __GI__ZGVeN8vvv_sincos, + __redirect__ZGVeN8vvv_sincos) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S index 44700f90b8..2df626c0c1 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S @@ -1,5 +1,5 @@ /* Function sincos vectorized with AVX-512. KNL and SKX versions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -36,9 +36,9 @@ sin(R), sin(R') are approximated by corresponding polynomial. */ .text -ENTRY (_ZGVeN8vvv_sincos_knl) -#ifndef HAVE_AVX512_ASM_SUPPORT -WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos +ENTRY (_ZGVeN8vl8l8_sincos_knl) +#ifndef HAVE_AVX512DQ_ASM_SUPPORT +WRAPPER_IMPL_AVX512_fFF _ZGVdN4vl8l8_sincos #else pushq %rbp cfi_adjust_cfa_offset (8) @@ -278,12 +278,12 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos shlq $4, %r15 vmovsd 1160(%rsp,%r15), %xmm0 - call sin@PLT + call JUMPTARGET(sin) vmovsd %xmm0, 1224(%rsp,%r15) vmovsd 1160(%rsp,%r15), %xmm0 - call cos@PLT + call JUMPTARGET(cos) vmovsd %xmm0, 1288(%rsp,%r15) jmp .LBL_1_8 @@ -293,22 +293,23 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos shlq $4, %r15 vmovsd 1152(%rsp,%r15), %xmm0 - call sin@PLT + call JUMPTARGET(sin) vmovsd %xmm0, 1216(%rsp,%r15) vmovsd 1152(%rsp,%r15), %xmm0 - call cos@PLT + call JUMPTARGET(cos) vmovsd %xmm0, 1280(%rsp,%r15) jmp .LBL_1_7 #endif -END (_ZGVeN8vvv_sincos_knl) +END (_ZGVeN8vl8l8_sincos_knl) +libmvec_hidden_def(_ZGVeN8vl8l8_sincos_knl) -ENTRY (_ZGVeN8vvv_sincos_skx) -#ifndef HAVE_AVX512_ASM_SUPPORT -WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos +ENTRY (_ZGVeN8vl8l8_sincos_skx) +#ifndef HAVE_AVX512DQ_ASM_SUPPORT +WRAPPER_IMPL_AVX512_fFF _ZGVdN4vl8l8_sincos #else pushq %rbp cfi_adjust_cfa_offset (8) @@ -557,12 +558,12 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos vzeroupper vmovsd 1160(%rsp,%r15), %xmm0 - call sin@PLT + call JUMPTARGET(sin) vmovsd %xmm0, 1224(%rsp,%r15) vmovsd 1160(%rsp,%r15), %xmm0 - call cos@PLT + call JUMPTARGET(cos) vmovsd %xmm0, 1288(%rsp,%r15) jmp .LBL_2_8 @@ -574,17 +575,171 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos vzeroupper vmovsd 1152(%rsp,%r15), %xmm0 - call sin@PLT + call JUMPTARGET(sin) vmovsd %xmm0, 1216(%rsp,%r15) vmovsd 1152(%rsp,%r15), %xmm0 - call cos@PLT + call JUMPTARGET(cos) vmovsd %xmm0, 1280(%rsp,%r15) jmp .LBL_2_7 #endif +END (_ZGVeN8vl8l8_sincos_skx) +libmvec_hidden_def(_ZGVeN8vl8l8_sincos_skx) + +/* Wrapper between vvv and vl8l8 vector variants. */ +.macro WRAPPER_AVX512_vvv_vl8l8 callee +#ifndef __ILP32__ + pushq %rbp + cfi_adjust_cfa_offset (8) + cfi_rel_offset (%rbp, 0) + movq %rsp, %rbp + cfi_def_cfa_register (%rbp) + andq $-64, %rsp + subq $256, %rsp + vmovups %zmm1, 128(%rsp) + lea (%rsp), %rdi + vmovups %zmm2, 192(%rdi) + lea 64(%rsp), %rsi + call HIDDEN_JUMPTARGET(\callee) + movq 128(%rsp), %rdx + movq 136(%rsp), %rsi + movq 144(%rsp), %r8 + movq 152(%rsp), %r10 + movq (%rsp), %rax + movq 8(%rsp), %rcx + movq 16(%rsp), %rdi + movq 24(%rsp), %r9 + movq %rax, (%rdx) + movq %rcx, (%rsi) + movq 160(%rsp), %rax + movq 168(%rsp), %rcx + movq %rdi, (%r8) + movq %r9, (%r10) + movq 176(%rsp), %rdi + movq 184(%rsp), %r9 + movq 32(%rsp), %r11 + movq 40(%rsp), %rdx + movq 48(%rsp), %rsi + movq 56(%rsp), %r8 + movq %r11, (%rax) + movq %rdx, (%rcx) + movq 192(%rsp), %r11 + movq 200(%rsp), %rdx + movq %rsi, (%rdi) + movq %r8, (%r9) + movq 208(%rsp), %rsi + movq 216(%rsp), %r8 + movq 64(%rsp), %r10 + movq 72(%rsp), %rax + movq 80(%rsp), %rcx + movq 88(%rsp), %rdi + movq %r10, (%r11) + movq %rax, (%rdx) + movq 224(%rsp), %r10 + movq 232(%rsp), %rax + movq %rcx, (%rsi) + movq %rdi, (%r8) + movq 240(%rsp), %rcx + movq 248(%rsp), %rdi + movq 96(%rsp), %r9 + movq 104(%rsp), %r11 + movq 112(%rsp), %rdx + movq 120(%rsp), %rsi + movq %r9, (%r10) + movq %r11, (%rax) + movq %rdx, (%rcx) + movq %rsi, (%rdi) + movq %rbp, %rsp + cfi_def_cfa_register (%rsp) + popq %rbp + cfi_adjust_cfa_offset (-8) + cfi_restore (%rbp) + ret +#else + leal 8(%rsp), %r10d + .cfi_def_cfa 10, 0 + andl $-64, %esp + pushq -8(%r10d) + pushq %rbp + .cfi_escape 0x10,0x6,0x2,0x76,0 + movl %esp, %ebp + pushq %r10 + .cfi_escape 0xf,0x3,0x76,0x78,0x6 + leal -112(%rbp), %esi + leal -176(%rbp), %edi + subl $232, %esp + vmovdqa %ymm1, -208(%ebp) + vmovdqa %ymm2, -240(%ebp) + call HIDDEN_JUMPTARGET(\callee) + vmovdqa -208(%ebp), %xmm0 + vmovq %xmm0, %rax + vmovsd -176(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + shrq $32, %rax + vmovsd -168(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movq -200(%ebp), %rax + vmovsd -160(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + shrq $32, %rax + vmovsd -152(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movq -192(%ebp), %rax + vmovsd -144(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + shrq $32, %rax + vmovsd -136(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movq -184(%ebp), %rax + vmovsd -128(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + shrq $32, %rax + vmovsd -120(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + vmovdqa -240(%ebp), %xmm0 + vmovq %xmm0, %rax + vmovsd -112(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + shrq $32, %rax + vmovsd -104(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movq -232(%ebp), %rax + vmovsd -96(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + shrq $32, %rax + vmovsd -88(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movq -224(%ebp), %rax + vmovsd -80(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + shrq $32, %rax + vmovsd -72(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movq -216(%ebp), %rax + vmovsd -64(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + shrq $32, %rax + vmovsd -56(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + addl $232, %esp + popq %r10 + .cfi_def_cfa 10, 0 + popq %rbp + leal -8(%r10), %esp + .cfi_def_cfa 7, 8 + ret +#endif +.endm + +ENTRY (_ZGVeN8vvv_sincos_knl) +WRAPPER_AVX512_vvv_vl8l8 _ZGVeN8vl8l8_sincos_knl +END (_ZGVeN8vvv_sincos_knl) + +ENTRY (_ZGVeN8vvv_sincos_skx) +WRAPPER_AVX512_vvv_vl8l8 _ZGVeN8vl8l8_sincos_skx END (_ZGVeN8vvv_sincos_skx) .section .rodata, "a" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core-avx2.S new file mode 100644 index 0000000000..f01f89f294 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core-avx2.S @@ -0,0 +1,20 @@ +/* AVX2 version of vectorized cosf. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVeN16v_cosf _ZGVeN16v_cosf_avx2_wrapper +#include "../svml_s_cosf16_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S deleted file mode 100644 index 755254a280..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Multiple versions of vectorized cosf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVeN16v_cosf) - .type _ZGVeN16v_cosf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVeN16v_cosf_skx(%rip), %rax - HAS_ARCH_FEATURE (AVX512DQ_Usable) - jnz 2f - leaq _ZGVeN16v_cosf_knl(%rip), %rax - HAS_ARCH_FEATURE (AVX512F_Usable) - jnz 2f - leaq _ZGVeN16v_cosf_avx2_wrapper(%rip), %rax -2: ret -END (_ZGVeN16v_cosf) - -#define _ZGVeN16v_cosf _ZGVeN16v_cosf_avx2_wrapper -#include "../svml_s_cosf16_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.c new file mode 100644 index 0000000000..5bd0441b16 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized cosf, vector length is 16. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVeN16v_cosf +#include "ifunc-mathvec-avx512.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVeN16v_cosf, __GI__ZGVeN16v_cosf, + __redirect__ZGVeN16v_cosf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S index 5004cd4758..6ea1137b42 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S @@ -1,5 +1,5 @@ /* Function cosf vectorized with AVX-512. KNL and SKX versions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,7 +22,7 @@ .text ENTRY (_ZGVeN16v_cosf_knl) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf #else /* @@ -225,21 +225,21 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf cfi_restore_state movzbl %r12b, %r15d vmovss 1156(%rsp,%r15,8), %xmm0 - call cosf@PLT + call JUMPTARGET(cosf) vmovss %xmm0, 1220(%rsp,%r15,8) jmp .LBL_1_8 .LBL_1_12: movzbl %r12b, %r15d vmovss 1152(%rsp,%r15,8), %xmm0 - call cosf@PLT + call JUMPTARGET(cosf) vmovss %xmm0, 1216(%rsp,%r15,8) jmp .LBL_1_7 #endif END (_ZGVeN16v_cosf_knl) ENTRY (_ZGVeN16v_cosf_skx) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf #else /* @@ -440,7 +440,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf vmovss 1156(%rsp,%r15,8), %xmm0 vzeroupper vmovss 1156(%rsp,%r15,8), %xmm0 - call cosf@PLT + call JUMPTARGET(cosf) vmovss %xmm0, 1220(%rsp,%r15,8) jmp .LBL_2_8 .LBL_2_12: @@ -448,7 +448,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf vmovss 1152(%rsp,%r15,8), %xmm0 vzeroupper vmovss 1152(%rsp,%r15,8), %xmm0 - call cosf@PLT + call JUMPTARGET(cosf) vmovss %xmm0, 1216(%rsp,%r15,8) jmp .LBL_2_7 #endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core-sse2.S index ad7de18851..727189f8e6 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core-sse2.S @@ -1,5 +1,5 @@ -/* Multiple versions of vectorized cosf, vector length is 4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. +/* SSE2 version of vectorized cosf, vector length is 4. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,21 +16,5 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVbN4v_cosf) - .type _ZGVbN4v_cosf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVbN4v_cosf_sse4(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jz 2f - ret -2: leaq _ZGVbN4v_cosf_sse2(%rip), %rax - ret -END (_ZGVbN4v_cosf) -libmvec_hidden_def (_ZGVbN4v_cosf) - #define _ZGVbN4v_cosf _ZGVbN4v_cosf_sse2 #include "../svml_s_cosf4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.c new file mode 100644 index 0000000000..dde470af5d --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized cosf, vector length is 4. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN4v_cosf +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN4v_cosf, __GI__ZGVbN4v_cosf, + __redirect__ZGVbN4v_cosf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S index d23ff72a30..f4e0553bb3 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S @@ -1,5 +1,5 @@ /* Function cosf vectorized with SSE4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -211,7 +211,7 @@ ENTRY (_ZGVbN4v_cosf_sse4) movzbl %r12b, %r15d movss 196(%rsp,%r15,8), %xmm0 - call cosf@PLT + call JUMPTARGET(cosf) movss %xmm0, 260(%rsp,%r15,8) jmp .LBL_1_8 @@ -220,7 +220,7 @@ ENTRY (_ZGVbN4v_cosf_sse4) movzbl %r12b, %r15d movss 192(%rsp,%r15,8), %xmm0 - call cosf@PLT + call JUMPTARGET(cosf) movss %xmm0, 256(%rsp,%r15,8) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core-sse.S index 602c70e324..1e1a5540c3 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core-sse.S @@ -1,5 +1,5 @@ -/* Multiple versions of vectorized cosf, vector length is 8. - Copyright (C) 2014-2016 Free Software Foundation, Inc. +/* SSE version of vectorized cosf, vector length is 8. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,21 +16,5 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVdN8v_cosf) - .type _ZGVdN8v_cosf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVdN8v_cosf_avx2(%rip), %rax - HAS_ARCH_FEATURE (AVX2_Usable) - jz 2f - ret -2: leaq _ZGVdN8v_cosf_sse_wrapper(%rip), %rax - ret -END (_ZGVdN8v_cosf) -libmvec_hidden_def (_ZGVdN8v_cosf) - #define _ZGVdN8v_cosf _ZGVdN8v_cosf_sse_wrapper #include "../svml_s_cosf8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.c new file mode 100644 index 0000000000..56531b215a --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized cosf, vector length is 8. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVdN8v_cosf +#include "ifunc-mathvec-avx2.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVdN8v_cosf, __GI__ZGVdN8v_cosf, + __redirect__ZGVdN8v_cosf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S index 513f3c0a29..dbff4a7b7e 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S @@ -1,5 +1,5 @@ /* Function cosf vectorized with AVX2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -197,7 +197,7 @@ ENTRY (_ZGVdN8v_cosf_avx2) vmovss 324(%rsp,%r15,8), %xmm0 vzeroupper - call cosf@PLT + call JUMPTARGET(cosf) vmovss %xmm0, 388(%rsp,%r15,8) jmp .LBL_1_8 @@ -207,7 +207,7 @@ ENTRY (_ZGVdN8v_cosf_avx2) vmovss 320(%rsp,%r15,8), %xmm0 vzeroupper - call cosf@PLT + call JUMPTARGET(cosf) vmovss %xmm0, 384(%rsp,%r15,8) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core-avx2.S new file mode 100644 index 0000000000..e0b7fd787f --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core-avx2.S @@ -0,0 +1,23 @@ +/* AVX2 version of vectorized expf. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <init-arch.h> + +#define _ZGVeN16v_expf _ZGVeN16v_expf_avx2_wrapper +#include "../svml_s_expf16_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S deleted file mode 100644 index f990d36483..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Multiple versions of vectorized expf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVeN16v_expf) - .type _ZGVeN16v_expf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVeN16v_expf_skx(%rip), %rax - HAS_ARCH_FEATURE (AVX512DQ_Usable) - jnz 2f - leaq _ZGVeN16v_expf_knl(%rip), %rax - HAS_ARCH_FEATURE (AVX512F_Usable) - jnz 2f - leaq _ZGVeN16v_expf_avx2_wrapper(%rip), %rax -2: ret -END (_ZGVeN16v_expf) - -#define _ZGVeN16v_expf _ZGVeN16v_expf_avx2_wrapper -#include "../svml_s_expf16_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.c new file mode 100644 index 0000000000..d358d93546 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized expf, vector length is 16. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVeN16v_expf +#include "ifunc-mathvec-avx512.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVeN16v_expf, __GI__ZGVeN16v_expf, + __redirect__ZGVeN16v_expf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S index 7eb7a1b775..89ba0df28f 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S @@ -1,5 +1,5 @@ /* Function expf vectorized with AVX-512. KNL and SKX versions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,7 +22,7 @@ .text ENTRY (_ZGVeN16v_expf_knl) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN8v_expf #else /* @@ -212,14 +212,14 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_expf cfi_restore_state movzbl %r12b, %r15d vmovss 1156(%rsp,%r15,8), %xmm0 - call expf@PLT + call JUMPTARGET(__expf_finite) vmovss %xmm0, 1220(%rsp,%r15,8) jmp .LBL_1_8 .LBL_1_12: movzbl %r12b, %r15d vmovss 1152(%rsp,%r15,8), %xmm0 - call expf@PLT + call JUMPTARGET(__expf_finite) vmovss %xmm0, 1216(%rsp,%r15,8) jmp .LBL_1_7 @@ -227,7 +227,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_expf END (_ZGVeN16v_expf_knl) ENTRY (_ZGVeN16v_expf_skx) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN8v_expf #else /* @@ -422,7 +422,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_expf vzeroupper vmovss 1156(%rsp,%r15,8), %xmm0 - call expf@PLT + call JUMPTARGET(__expf_finite) vmovss %xmm0, 1220(%rsp,%r15,8) jmp .LBL_2_8 @@ -433,7 +433,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_expf vzeroupper vmovss 1152(%rsp,%r15,8), %xmm0 - call expf@PLT + call JUMPTARGET(__expf_finite) vmovss %xmm0, 1216(%rsp,%r15,8) jmp .LBL_2_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core-sse2.S index 2fbe6d475e..8f57e4bbd9 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core-sse2.S @@ -1,5 +1,5 @@ -/* Multiple versions of vectorized expf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. +/* SSE2 version of vectorized expf. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,21 +16,5 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVbN4v_expf) - .type _ZGVbN4v_expf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVbN4v_expf_sse4(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jz 2f - ret -2: leaq _ZGVbN4v_expf_sse2(%rip), %rax - ret -END (_ZGVbN4v_expf) -libmvec_hidden_def (_ZGVbN4v_expf) - #define _ZGVbN4v_expf _ZGVbN4v_expf_sse2 #include "../svml_s_expf4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.c new file mode 100644 index 0000000000..82befe0b5d --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized expf, vector length is 4. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN4v_expf +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN4v_expf, __GI__ZGVbN4v_expf, + __redirect__ZGVbN4v_expf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S index c6f91e8dc1..254ec94096 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S @@ -1,5 +1,5 @@ /* Function expf vectorized with SSE4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -195,7 +195,7 @@ ENTRY (_ZGVbN4v_expf_sse4) movzbl %r12b, %r15d movss 196(%rsp,%r15,8), %xmm0 - call expf@PLT + call JUMPTARGET(__expf_finite) movss %xmm0, 260(%rsp,%r15,8) jmp .LBL_1_8 @@ -204,7 +204,7 @@ ENTRY (_ZGVbN4v_expf_sse4) movzbl %r12b, %r15d movss 192(%rsp,%r15,8), %xmm0 - call expf@PLT + call JUMPTARGET(__expf_finite) movss %xmm0, 256(%rsp,%r15,8) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core-sse.S index 7d19bb423d..459699c80c 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core-sse.S @@ -1,5 +1,5 @@ -/* Multiple versions of vectorized expf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. +/* SSE version of vectorized expf. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,21 +16,5 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVdN8v_expf) - .type _ZGVdN8v_expf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVdN8v_expf_avx2(%rip), %rax - HAS_ARCH_FEATURE (AVX2_Usable) - jz 2f - ret -2: leaq _ZGVdN8v_expf_sse_wrapper(%rip), %rax - ret -END (_ZGVdN8v_expf) -libmvec_hidden_def (_ZGVdN8v_expf) - #define _ZGVdN8v_expf _ZGVdN8v_expf_sse_wrapper #include "../svml_s_expf8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.c new file mode 100644 index 0000000000..0b8a47ede0 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized expf, vector length is 8. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVdN8v_expf +#include "ifunc-mathvec-avx2.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVdN8v_expf, __GI__ZGVdN8v_expf, + __redirect__ZGVdN8v_expf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core_avx2.S index c6be6954f7..ae1d5317e4 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core_avx2.S @@ -1,5 +1,5 @@ /* Function expf vectorized with AVX2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -184,7 +184,7 @@ ENTRY(_ZGVdN8v_expf_avx2) vmovss 324(%rsp,%r15,8), %xmm0 vzeroupper - call expf@PLT + call JUMPTARGET(__expf_finite) vmovss %xmm0, 388(%rsp,%r15,8) jmp .LBL_1_8 @@ -194,7 +194,7 @@ ENTRY(_ZGVdN8v_expf_avx2) vmovss 320(%rsp,%r15,8), %xmm0 vzeroupper - call expf@PLT + call JUMPTARGET(__expf_finite) vmovss %xmm0, 384(%rsp,%r15,8) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core-avx2.S new file mode 100644 index 0000000000..b23bd12fa0 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core-avx2.S @@ -0,0 +1,20 @@ +/* AVX2 version of vectorized logf. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVeN16v_logf _ZGVeN16v_logf_avx2_wrapper +#include "../svml_s_logf16_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S deleted file mode 100644 index 9efb2fb7df..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Multiple versions of vectorized logf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVeN16v_logf) - .type _ZGVeN16v_logf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVeN16v_logf_skx(%rip), %rax - HAS_ARCH_FEATURE (AVX512DQ_Usable) - jnz 2f - leaq _ZGVeN16v_logf_knl(%rip), %rax - HAS_ARCH_FEATURE (AVX512F_Usable) - jnz 2f - leaq _ZGVeN16v_logf_avx2_wrapper(%rip), %rax -2: ret -END (_ZGVeN16v_logf) - -#define _ZGVeN16v_logf _ZGVeN16v_logf_avx2_wrapper -#include "../svml_s_logf16_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.c new file mode 100644 index 0000000000..fec61883b4 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized logf, vector length is 16. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVeN16v_logf +#include "ifunc-mathvec-avx512.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVeN16v_logf, __GI__ZGVeN16v_logf, + __redirect__ZGVeN16v_logf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S index 6209058381..4cf0a96fe4 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S @@ -1,5 +1,5 @@ /* Function logf vectorized with AVX-512. KNL and SKX versions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,7 +22,7 @@ .text ENTRY (_ZGVeN16v_logf_knl) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN8v_logf #else /* @@ -197,21 +197,21 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_logf cfi_restore_state movzbl %r12b, %r15d vmovss 1156(%rsp,%r15,8), %xmm0 - call logf@PLT + call JUMPTARGET(__logf_finite) vmovss %xmm0, 1220(%rsp,%r15,8) jmp .LBL_1_8 .LBL_1_12: movzbl %r12b, %r15d vmovss 1152(%rsp,%r15,8), %xmm0 - call logf@PLT + call JUMPTARGET(__logf_finite) vmovss %xmm0, 1216(%rsp,%r15,8) jmp .LBL_1_7 #endif END (_ZGVeN16v_logf_knl) ENTRY (_ZGVeN16v_logf_skx) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN8v_logf #else /* @@ -391,7 +391,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_logf vzeroupper vmovss 1156(%rsp,%r15,8), %xmm0 - call logf@PLT + call JUMPTARGET(__logf_finite) vmovss %xmm0, 1220(%rsp,%r15,8) jmp .LBL_2_8 @@ -402,7 +402,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_logf vzeroupper vmovss 1152(%rsp,%r15,8), %xmm0 - call logf@PLT + call JUMPTARGET(__logf_finite) vmovss %xmm0, 1216(%rsp,%r15,8) jmp .LBL_2_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core-sse2.S index c85615ac25..2c2331e1d8 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core-sse2.S @@ -1,5 +1,5 @@ -/* Multiple versions of vectorized logf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. +/* SSE2 version of vectorized logf. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,21 +16,5 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVbN4v_logf) - .type _ZGVbN4v_logf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVbN4v_logf_sse4(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jz 2f - ret -2: leaq _ZGVbN4v_logf_sse2(%rip), %rax - ret -END (_ZGVbN4v_logf) -libmvec_hidden_def (_ZGVbN4v_logf) - #define _ZGVbN4v_logf _ZGVbN4v_logf_sse2 #include "../svml_s_logf4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.c new file mode 100644 index 0000000000..f249c351bd --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized logf, vector length is 4. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN4v_logf +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN4v_logf, __GI__ZGVbN4v_logf, + __redirect__ZGVbN4v_logf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core_sse4.S index 1ce9838513..651eb5eb1a 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core_sse4.S @@ -1,5 +1,5 @@ /* Function logf vectorized with SSE4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -177,7 +177,7 @@ ENTRY (_ZGVbN4v_logf_sse4) movzbl %r12b, %r15d movss 196(%rsp,%r15,8), %xmm0 - call logf@PLT + call JUMPTARGET(__logf_finite) movss %xmm0, 260(%rsp,%r15,8) jmp .LBL_1_8 @@ -186,7 +186,7 @@ ENTRY (_ZGVbN4v_logf_sse4) movzbl %r12b, %r15d movss 192(%rsp,%r15,8), %xmm0 - call logf@PLT + call JUMPTARGET(__logf_finite) movss %xmm0, 256(%rsp,%r15,8) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core-sse.S index 8f6d83dd56..862379277b 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core-sse.S @@ -1,5 +1,5 @@ -/* Multiple versions of vectorized logf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. +/* SSE version of vectorized logf. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,21 +16,5 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVdN8v_logf) - .type _ZGVdN8v_logf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVdN8v_logf_avx2(%rip), %rax - HAS_ARCH_FEATURE (AVX2_Usable) - jz 2f - ret -2: leaq _ZGVdN8v_logf_sse_wrapper(%rip), %rax - ret -END (_ZGVdN8v_logf) -libmvec_hidden_def (_ZGVdN8v_logf) - #define _ZGVdN8v_logf _ZGVdN8v_logf_sse_wrapper #include "../svml_s_logf8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.c new file mode 100644 index 0000000000..dbd29657ca --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized logf, vector length is 8. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVdN8v_logf +#include "ifunc-mathvec-avx2.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVdN8v_logf, __GI__ZGVdN8v_logf, + __redirect__ZGVdN8v_logf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core_avx2.S index 91fb549ce6..c7f5448fcb 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core_avx2.S @@ -1,5 +1,5 @@ /* Function logf vectorized with AVX2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -166,7 +166,7 @@ ENTRY(_ZGVdN8v_logf_avx2) vmovss 324(%rsp,%r15,8), %xmm0 vzeroupper - call logf@PLT + call JUMPTARGET(__logf_finite) vmovss %xmm0, 388(%rsp,%r15,8) jmp .LBL_1_8 @@ -176,7 +176,7 @@ ENTRY(_ZGVdN8v_logf_avx2) vmovss 320(%rsp,%r15,8), %xmm0 vzeroupper - call logf@PLT + call JUMPTARGET(__logf_finite) vmovss %xmm0, 384(%rsp,%r15,8) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core-avx2.S new file mode 100644 index 0000000000..de705c8632 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core-avx2.S @@ -0,0 +1,20 @@ +/* AVX2 version of vectorized powf. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVeN16vv_powf _ZGVeN16vv_powf_avx2_wrapper +#include "../svml_s_powf16_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S deleted file mode 100644 index 80048ce977..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Multiple versions of vectorized powf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVeN16vv_powf) - .type _ZGVeN16vv_powf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVeN16vv_powf_skx(%rip), %rax - HAS_ARCH_FEATURE (AVX512DQ_Usable) - jnz 2f - leaq _ZGVeN16vv_powf_knl(%rip), %rax - HAS_ARCH_FEATURE (AVX512F_Usable) - jnz 2f - leaq _ZGVeN16vv_powf_avx2_wrapper(%rip), %rax -2: ret -END (_ZGVeN16vv_powf) - -#define _ZGVeN16vv_powf _ZGVeN16vv_powf_avx2_wrapper -#include "../svml_s_powf16_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.c new file mode 100644 index 0000000000..91ea810441 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized powf, vector length is 16. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVeN16vv_powf +#include "ifunc-mathvec-avx512.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVeN16vv_powf, __GI__ZGVeN16vv_powf, + __redirect__ZGVeN16vv_powf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S index 45d48723af..bdcd50afe1 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S @@ -1,5 +1,5 @@ /* Function powf vectorized with AVX-512. KNL and SKX versions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -82,7 +82,7 @@ .text ENTRY (_ZGVeN16vv_powf_knl) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512_ff _ZGVdN8vv_powf #else pushq %rbp @@ -344,7 +344,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN8vv_powf movzbl %r12b, %r15d vmovss 1156(%rsp,%r15,8), %xmm0 vmovss 1220(%rsp,%r15,8), %xmm1 - call powf@PLT + call JUMPTARGET(__powf_finite) vmovss %xmm0, 1284(%rsp,%r15,8) jmp .LBL_1_8 @@ -352,14 +352,14 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN8vv_powf movzbl %r12b, %r15d vmovss 1152(%rsp,%r15,8), %xmm0 vmovss 1216(%rsp,%r15,8), %xmm1 - call powf@PLT + call JUMPTARGET(__powf_finite) vmovss %xmm0, 1280(%rsp,%r15,8) jmp .LBL_1_7 #endif END (_ZGVeN16vv_powf_knl) ENTRY (_ZGVeN16vv_powf_skx) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512_ff _ZGVdN8vv_powf #else pushq %rbp @@ -629,7 +629,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN8vv_powf vmovss 1156(%rsp,%r15,8), %xmm1 vzeroupper vmovss 1092(%rsp,%r15,8), %xmm0 - call powf@PLT + call JUMPTARGET(__powf_finite) vmovss %xmm0, 1220(%rsp,%r15,8) jmp .LBL_2_8 @@ -638,7 +638,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN8vv_powf vmovss 1152(%rsp,%r15,8), %xmm1 vzeroupper vmovss 1088(%rsp,%r15,8), %xmm0 - call powf@PLT + call JUMPTARGET(__powf_finite) vmovss %xmm0, 1216(%rsp,%r15,8) jmp .LBL_2_7 #endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core-sse2.S index b46821189b..b6789a621d 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core-sse2.S @@ -1,5 +1,5 @@ -/* Multiple versions of vectorized powf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. +/* SSE2 version of vectorized powf. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,21 +16,5 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVbN4vv_powf) - .type _ZGVbN4vv_powf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVbN4vv_powf_sse4(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jz 2f - ret -2: leaq _ZGVbN4vv_powf_sse2(%rip), %rax - ret -END (_ZGVbN4vv_powf) -libmvec_hidden_def (_ZGVbN4vv_powf) - #define _ZGVbN4vv_powf _ZGVbN4vv_powf_sse2 #include "../svml_s_powf4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.c new file mode 100644 index 0000000000..8149d7c991 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized powf, vector length is 4. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN4vv_powf +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN4vv_powf, __GI__ZGVbN4vv_powf, + __redirect__ZGVbN4vv_powf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core_sse4.S index 420f98c6a6..bc59545c98 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core_sse4.S @@ -1,5 +1,5 @@ /* Function powf vectorized with SSE4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -356,7 +356,7 @@ ENTRY (_ZGVbN4vv_powf_sse4) movss 68(%rsp,%r15,8), %xmm0 movss 132(%rsp,%r15,8), %xmm1 - call powf@PLT + call JUMPTARGET(__powf_finite) movss %xmm0, 196(%rsp,%r15,8) jmp .LBL_1_8 @@ -366,7 +366,7 @@ ENTRY (_ZGVbN4vv_powf_sse4) movss 64(%rsp,%r15,8), %xmm0 movss 128(%rsp,%r15,8), %xmm1 - call powf@PLT + call JUMPTARGET(__powf_finite) movss %xmm0, 192(%rsp,%r15,8) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core-sse.S index 945908a2ff..48da6d25c7 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core-sse.S @@ -1,5 +1,5 @@ -/* Multiple versions of vectorized powf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. +/* SSE version of vectorized powf. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,21 +16,5 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVdN8vv_powf) - .type _ZGVdN8vv_powf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVdN8vv_powf_avx2(%rip), %rax - HAS_ARCH_FEATURE (AVX2_Usable) - jz 2f - ret -2: leaq _ZGVdN8vv_powf_sse_wrapper(%rip), %rax - ret -END (_ZGVdN8vv_powf) -libmvec_hidden_def (_ZGVdN8vv_powf) - #define _ZGVdN8vv_powf _ZGVdN8vv_powf_sse_wrapper #include "../svml_s_powf8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.c new file mode 100644 index 0000000000..0da188180e --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized sinf, vector length is 8. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVdN8vv_powf +#include "ifunc-mathvec-avx2.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVdN8vv_powf, __GI__ZGVdN8vv_powf, + __redirect__ZGVdN8vv_powf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core_avx2.S index 4446859130..53a4b4bc2b 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core_avx2.S @@ -1,5 +1,5 @@ /* Function powf vectorized with AVX2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -317,7 +317,7 @@ ENTRY(_ZGVdN8vv_powf_avx2) vmovss 132(%rsp,%r15,8), %xmm1 vzeroupper - call powf@PLT + call JUMPTARGET(__powf_finite) vmovss %xmm0, 196(%rsp,%r15,8) jmp .LBL_1_8 @@ -328,7 +328,7 @@ ENTRY(_ZGVdN8vv_powf_avx2) vmovss 128(%rsp,%r15,8), %xmm1 vzeroupper - call powf@PLT + call JUMPTARGET(__powf_finite) vmovss %xmm0, 192(%rsp,%r15,8) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core-avx2.S new file mode 100644 index 0000000000..c677e3f1cf --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core-avx2.S @@ -0,0 +1,20 @@ +/* AVX2 version of vectorized sincosf. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVeN16vvv_sincosf _ZGVeN16vvv_sincosf_avx2_wrapper +#include "../svml_s_sincosf16_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S deleted file mode 100644 index 16cee0c676..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Multiple versions of vectorized sincosf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVeN16vvv_sincosf) - .type _ZGVeN16vvv_sincosf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVeN16vvv_sincosf_skx(%rip), %rax - HAS_ARCH_FEATURE (AVX512DQ_Usable) - jnz 2f - leaq _ZGVeN16vvv_sincosf_knl(%rip), %rax - HAS_ARCH_FEATURE (AVX512F_Usable) - jnz 2f - leaq _ZGVeN16vvv_sincosf_avx2_wrapper(%rip), %rax -2: ret -END (_ZGVeN16vvv_sincosf) - -#define _ZGVeN16vvv_sincosf _ZGVeN16vvv_sincosf_avx2_wrapper -#include "../svml_s_sincosf16_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.c new file mode 100644 index 0000000000..b753be6bbd --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized sincosf, vector length is 16. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVeN16vvv_sincosf +#include "ifunc-mathvec-avx512.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVeN16vvv_sincosf, __GI__ZGVeN16vvv_sincosf, + __redirect__ZGVeN16vvv_sincosf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S index 758aeeaeed..5fa4bc412a 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S @@ -1,5 +1,5 @@ /* Function sincosf vectorized with AVX-512. KNL and SKX versions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -41,7 +41,7 @@ b) Calculate 2 polynomials for sin and cos: RS = X * ( A0 + X^2 * (A1 + x^2 * (A2 + x^2 * (A3)))); RC = B0 + X^2 * (B1 + x^2 * (B2 + x^2 * (B3 + x^2 * (B4)))); - c) Swap RS & RC if if first bit of obtained value after + c) Swap RS & RC if first bit of obtained value after Right Shifting is set to 1. Using And, Andnot & Or operations. 3) Destination sign setting a) Set shifted destination sign using XOR operation: @@ -49,9 +49,9 @@ R2 = XOR( RC, SC ). */ .text -ENTRY (_ZGVeN16vvv_sincosf_knl) -#ifndef HAVE_AVX512_ASM_SUPPORT -WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf +ENTRY (_ZGVeN16vl4l4_sincosf_knl) +#ifndef HAVE_AVX512DQ_ASM_SUPPORT +WRAPPER_IMPL_AVX512_fFF _ZGVdN8vl4l4_sincosf #else pushq %rbp cfi_adjust_cfa_offset (8) @@ -243,12 +243,12 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf movzbl %r12b, %r15d vmovss 1156(%rsp,%r15,8), %xmm0 - call sinf@PLT + call JUMPTARGET(sinf) vmovss %xmm0, 1220(%rsp,%r15,8) vmovss 1156(%rsp,%r15,8), %xmm0 - call cosf@PLT + call JUMPTARGET(cosf) vmovss %xmm0, 1284(%rsp,%r15,8) jmp .LBL_1_8 @@ -257,20 +257,21 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf movzbl %r12b, %r15d vmovss 1152(%rsp,%r15,8), %xmm0 - call sinf@PLT + call JUMPTARGET(sinf) vmovss %xmm0, 1216(%rsp,%r15,8) vmovss 1152(%rsp,%r15,8), %xmm0 - call cosf@PLT + call JUMPTARGET(cosf) vmovss %xmm0, 1280(%rsp,%r15,8) jmp .LBL_1_7 #endif -END (_ZGVeN16vvv_sincosf_knl) +END (_ZGVeN16vl4l4_sincosf_knl) +libmvec_hidden_def(_ZGVeN16vl4l4_sincosf_knl) -ENTRY (_ZGVeN16vvv_sincosf_skx) -#ifndef HAVE_AVX512_ASM_SUPPORT +ENTRY (_ZGVeN16vl4l4_sincosf_skx) +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf #else pushq %rbp @@ -470,12 +471,12 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf vzeroupper vmovss 1156(%rsp,%r15,8), %xmm0 - call sinf@PLT + call JUMPTARGET(sinf) vmovss %xmm0, 1220(%rsp,%r15,8) vmovss 1156(%rsp,%r15,8), %xmm0 - call cosf@PLT + call JUMPTARGET(cosf) vmovss %xmm0, 1284(%rsp,%r15,8) jmp .LBL_2_8 @@ -486,16 +487,266 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf vzeroupper vmovss 1152(%rsp,%r15,8), %xmm0 - call sinf@PLT + call JUMPTARGET(sinf) vmovss %xmm0, 1216(%rsp,%r15,8) vmovss 1152(%rsp,%r15,8), %xmm0 - call cosf@PLT + call JUMPTARGET(cosf) vmovss %xmm0, 1280(%rsp,%r15,8) jmp .LBL_2_7 #endif +END (_ZGVeN16vl4l4_sincosf_skx) +libmvec_hidden_def(_ZGVeN16vl4l4_sincosf_skx) + +/* Wrapper between vvv and vl4l4 vector variants. */ +.macro WRAPPER_AVX512_vvv_vl4l4 callee +#ifndef __ILP32__ + pushq %rbp + cfi_adjust_cfa_offset (8) + cfi_rel_offset (%rbp, 0) + movq %rsp, %rbp + cfi_def_cfa_register (%rbp) + andq $-64, %rsp + subq $384, %rsp + vmovups %zmm1, 128(%rsp) + lea (%rsp), %rdi + vmovups %zmm2, 192(%rdi) + vmovups %zmm3, 256(%rdi) + vmovups %zmm4, 320(%rdi) + lea 64(%rsp), %rsi + call HIDDEN_JUMPTARGET(\callee) + movq 128(%rsp), %rdx + movq 136(%rsp), %rsi + movq 144(%rsp), %r8 + movq 152(%rsp), %r10 + movl (%rsp), %eax + movl 4(%rsp), %ecx + movl 8(%rsp), %edi + movl 12(%rsp), %r9d + movl %eax, (%rdx) + movl %ecx, (%rsi) + movq 160(%rsp), %rax + movq 168(%rsp), %rcx + movl %edi, (%r8) + movl %r9d, (%r10) + movq 176(%rsp), %rdi + movq 184(%rsp), %r9 + movl 16(%rsp), %r11d + movl 20(%rsp), %edx + movl 24(%rsp), %esi + movl 28(%rsp), %r8d + movl %r11d, (%rax) + movl %edx, (%rcx) + movq 192(%rsp), %r11 + movq 200(%rsp), %rdx + movl %esi, (%rdi) + movl %r8d, (%r9) + movq 208(%rsp), %rsi + movq 216(%rsp), %r8 + movl 32(%rsp), %r10d + movl 36(%rsp), %eax + movl 40(%rsp), %ecx + movl 44(%rsp), %edi + movl %r10d, (%r11) + movl %eax, (%rdx) + movq 224(%rsp), %r10 + movq 232(%rsp), %rax + movl %ecx, (%rsi) + movl %edi, (%r8) + movq 240(%rsp), %rcx + movq 248(%rsp), %rdi + movl 48(%rsp), %r9d + movl 52(%rsp), %r11d + movl 56(%rsp), %edx + movl 60(%rsp), %esi + movl %r9d, (%r10) + movl %r11d, (%rax) + movq 256(%rsp), %r9 + movq 264(%rsp), %r11 + movl %edx, (%rcx) + movl %esi, (%rdi) + movq 272(%rsp), %rdx + movq 280(%rsp), %rsi + movl 64(%rsp), %r8d + movl 68(%rsp), %r10d + movl 72(%rsp), %eax + movl 76(%rsp), %ecx + movl %r8d, (%r9) + movl %r10d, (%r11) + movq 288(%rsp), %r8 + movq 296(%rsp), %r10 + movl %eax, (%rdx) + movl %ecx, (%rsi) + movq 304(%rsp), %rax + movq 312(%rsp), %rcx + movl 80(%rsp), %edi + movl 84(%rsp), %r9d + movl 88(%rsp), %r11d + movl 92(%rsp), %edx + movl %edi, (%r8) + movl %r9d, (%r10) + movq 320(%rsp), %rdi + movq 328(%rsp), %r9 + movl %r11d, (%rax) + movl %edx, (%rcx) + movq 336(%rsp), %r11 + movq 344(%rsp), %rdx + movl 96(%rsp), %esi + movl 100(%rsp), %r8d + movl 104(%rsp), %r10d + movl 108(%rsp), %eax + movl %esi, (%rdi) + movl %r8d, (%r9) + movq 352(%rsp), %rsi + movq 360(%rsp), %r8 + movl %r10d, (%r11) + movl %eax, (%rdx) + movq 368(%rsp), %r10 + movq 376(%rsp), %rax + movl 112(%rsp), %ecx + movl 116(%rsp), %edi + movl 120(%rsp), %r9d + movl 124(%rsp), %r11d + movl %ecx, (%rsi) + movl %edi, (%r8) + movl %r9d, (%r10) + movl %r11d, (%rax) + movq %rbp, %rsp + cfi_def_cfa_register (%rsp) + popq %rbp + cfi_adjust_cfa_offset (-8) + cfi_restore (%rbp) + ret +#else + leal 8(%rsp), %r10d + .cfi_def_cfa 10, 0 + andl $-64, %esp + pushq -8(%r10d) + pushq %rbp + .cfi_escape 0x10,0x6,0x2,0x76,0 + movl %esp, %ebp + pushq %r10 + .cfi_escape 0xf,0x3,0x76,0x78,0x6 + leal -112(%rbp), %esi + leal -176(%rbp), %edi + subl $296, %esp + vmovdqa64 %zmm1, -240(%ebp) + vmovdqa64 %zmm2, -304(%ebp) + call HIDDEN_JUMPTARGET(\callee) + movl -240(%ebp), %eax + vmovss -176(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -236(%ebp), %eax + vmovss -172(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -232(%ebp), %eax + vmovss -168(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -228(%ebp), %eax + vmovss -164(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -224(%ebp), %eax + vmovss -160(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -220(%ebp), %eax + vmovss -156(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -216(%ebp), %eax + vmovss -152(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -212(%ebp), %eax + vmovss -148(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -208(%ebp), %eax + vmovss -144(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -204(%ebp), %eax + vmovss -140(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -200(%ebp), %eax + vmovss -136(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -196(%ebp), %eax + vmovss -132(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -192(%ebp), %eax + vmovss -128(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -188(%ebp), %eax + vmovss -124(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -184(%ebp), %eax + vmovss -120(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -180(%ebp), %eax + vmovss -116(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -304(%ebp), %eax + vmovss -112(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -300(%ebp), %eax + vmovss -108(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -296(%ebp), %eax + vmovss -104(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -292(%ebp), %eax + vmovss -100(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -288(%ebp), %eax + vmovss -96(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -284(%ebp), %eax + vmovss -92(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -280(%ebp), %eax + vmovss -88(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -276(%ebp), %eax + vmovss -84(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -272(%ebp), %eax + vmovss -80(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -268(%ebp), %eax + vmovss -76(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -264(%ebp), %eax + vmovss -72(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -260(%ebp), %eax + vmovss -68(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -256(%ebp), %eax + vmovss -64(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -252(%ebp), %eax + vmovss -60(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -248(%ebp), %eax + vmovss -56(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -244(%ebp), %eax + vmovss -52(%ebp), %xmm0 + vmovss %xmm0, (%eax) + addl $296, %esp + popq %r10 + .cfi_def_cfa 10, 0 + popq %rbp + leal -8(%r10), %esp + .cfi_def_cfa 7, 8 + ret +#endif +.endm + +ENTRY (_ZGVeN16vvv_sincosf_knl) +WRAPPER_AVX512_vvv_vl4l4 _ZGVeN16vl4l4_sincosf_knl +END (_ZGVeN16vvv_sincosf_knl) + +ENTRY (_ZGVeN16vvv_sincosf_skx) +WRAPPER_AVX512_vvv_vl4l4 _ZGVeN16vl4l4_sincosf_skx END (_ZGVeN16vvv_sincosf_skx) .section .rodata, "a" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core-sse2.S index d72b4049e2..cc718b3a2e 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core-sse2.S @@ -1,5 +1,5 @@ -/* Multiple versions of vectorized sincosf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. +/* SSE2 version of vectorized sincosf. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,21 +16,5 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVbN4vvv_sincosf) - .type _ZGVbN4vvv_sincosf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVbN4vvv_sincosf_sse4(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jz 2f - ret -2: leaq _ZGVbN4vvv_sincosf_sse2(%rip), %rax - ret -END (_ZGVbN4vvv_sincosf) -libmvec_hidden_def (_ZGVbN4vvv_sincosf) - #define _ZGVbN4vvv_sincosf _ZGVbN4vvv_sincosf_sse2 #include "../svml_s_sincosf4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.c new file mode 100644 index 0000000000..705d96a8fb --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized sincosf, vector length is 4. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN4vvv_sincosf +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN4vvv_sincosf, __GI__ZGVbN4vvv_sincosf, + __redirect__ZGVbN4vvv_sincosf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S index 643fc0ca3b..d758ceeb30 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S @@ -1,5 +1,5 @@ /* Function sincosf vectorized with SSE4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -20,7 +20,7 @@ #include "svml_s_trig_data.h" .text -ENTRY (_ZGVbN4vvv_sincosf_sse4) +ENTRY (_ZGVbN4vl4l4_sincosf_sse4) /* ALGORITHM DESCRIPTION: @@ -42,7 +42,7 @@ ENTRY (_ZGVbN4vvv_sincosf_sse4) b) Calculate 2 polynomials for sin and cos: RS = X * ( A0 + X^2 * (A1 + x^2 * (A2 + x^2 * (A3)))); RC = B0 + X^2 * (B1 + x^2 * (B2 + x^2 * (B3 + x^2 * (B4)))); - c) Swap RS & RC if if first bit of obtained value after + c) Swap RS & RC if first bit of obtained value after Right Shifting is set to 1. Using And, Andnot & Or operations. 3) Destination sign setting a) Set shifted destination sign using XOR operation: @@ -241,12 +241,12 @@ ENTRY (_ZGVbN4vvv_sincosf_sse4) movzbl %r12b, %r15d movss 132(%rsp,%r15,8), %xmm0 - call sinf@PLT + call JUMPTARGET(sinf) movss %xmm0, 196(%rsp,%r15,8) movss 132(%rsp,%r15,8), %xmm0 - call cosf@PLT + call JUMPTARGET(cosf) movss %xmm0, 260(%rsp,%r15,8) jmp .LBL_1_8 @@ -255,14 +255,92 @@ ENTRY (_ZGVbN4vvv_sincosf_sse4) movzbl %r12b, %r15d movss 128(%rsp,%r15,8), %xmm0 - call sinf@PLT + call JUMPTARGET(sinf) movss %xmm0, 192(%rsp,%r15,8) movss 128(%rsp,%r15,8), %xmm0 - call cosf@PLT + call JUMPTARGET(cosf) movss %xmm0, 256(%rsp,%r15,8) jmp .LBL_1_7 +END (_ZGVbN4vl4l4_sincosf_sse4) +libmvec_hidden_def(_ZGVbN4vl4l4_sincosf_sse4) + +/* vvv version implemented with wrapper to vl4l4 variant. */ +ENTRY (_ZGVbN4vvv_sincosf_sse4) +#ifndef __ILP32__ + subq $104, %rsp + .cfi_def_cfa_offset 112 + movdqu %xmm1, 32(%rsp) + lea (%rsp), %rdi + movdqu %xmm2, 48(%rdi) + lea 16(%rsp), %rsi + movdqu %xmm3, 48(%rsi) + movdqu %xmm4, 64(%rsi) + call HIDDEN_JUMPTARGET(_ZGVbN4vl4l4_sincosf_sse4) + movq 32(%rsp), %rdx + movq 40(%rsp), %rsi + movq 48(%rsp), %r8 + movq 56(%rsp), %r10 + movl (%rsp), %eax + movl 4(%rsp), %ecx + movl 8(%rsp), %edi + movl 12(%rsp), %r9d + movl %eax, (%rdx) + movl %ecx, (%rsi) + movq 64(%rsp), %rax + movq 72(%rsp), %rcx + movl %edi, (%r8) + movl %r9d, (%r10) + movq 80(%rsp), %rdi + movq 88(%rsp), %r9 + movl 16(%rsp), %r11d + movl 20(%rsp), %edx + movl 24(%rsp), %esi + movl 28(%rsp), %r8d + movl %r11d, (%rax) + movl %edx, (%rcx) + movl %esi, (%rdi) + movl %r8d, (%r9) + addq $104, %rsp + .cfi_def_cfa_offset 8 + ret +#else + subl $72, %esp + .cfi_def_cfa_offset 80 + leal 48(%rsp), %esi + movaps %xmm1, 16(%esp) + leal 32(%rsp), %edi + movaps %xmm2, (%esp) + call HIDDEN_JUMPTARGET(_ZGVbN4vl4l4_sincosf_sse4) + movl 16(%esp), %eax + movss 32(%esp), %xmm0 + movss %xmm0, (%eax) + movl 20(%esp), %eax + movss 36(%esp), %xmm0 + movss %xmm0, (%eax) + movl 24(%esp), %eax + movss 40(%esp), %xmm0 + movss %xmm0, (%eax) + movl 28(%esp), %eax + movss 44(%esp), %xmm0 + movss %xmm0, (%eax) + movl (%esp), %eax + movss 48(%esp), %xmm0 + movss %xmm0, (%eax) + movl 4(%esp), %eax + movss 52(%esp), %xmm0 + movss %xmm0, (%eax) + movl 8(%esp), %eax + movss 56(%esp), %xmm0 + movss %xmm0, (%eax) + movl 12(%esp), %eax + movss 60(%esp), %xmm0 + movss %xmm0, (%eax) + addl $72, %esp + .cfi_def_cfa_offset 8 + ret +#endif END (_ZGVbN4vvv_sincosf_sse4) diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core-sse.S index 0123b8024e..348d1e6619 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core-sse.S @@ -1,5 +1,5 @@ -/* Multiple versions of vectorized sincosf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. +/* SSE version of vectorized sincosf. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,21 +16,5 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVdN8vvv_sincosf) - .type _ZGVdN8vvv_sincosf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVdN8vvv_sincosf_avx2(%rip), %rax - HAS_ARCH_FEATURE (AVX2_Usable) - jz 2f - ret -2: leaq _ZGVdN8vvv_sincosf_sse_wrapper(%rip), %rax - ret -END (_ZGVdN8vvv_sincosf) -libmvec_hidden_def (_ZGVdN8vvv_sincosf) - #define _ZGVdN8vvv_sincosf _ZGVdN8vvv_sincosf_sse_wrapper #include "../svml_s_sincosf8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.c new file mode 100644 index 0000000000..74f3d3f041 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized sincosf, vector length is 8. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVdN8vvv_sincosf +#include "ifunc-mathvec-avx2.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVdN8vvv_sincosf, __GI__ZGVdN8vvv_sincosf, + __redirect__ZGVdN8vvv_sincosf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S index f2a0ba7116..8b4b92dd94 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S @@ -1,5 +1,5 @@ /* Function sincosf vectorized with AVX2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -20,7 +20,7 @@ #include "svml_s_trig_data.h" .text -ENTRY(_ZGVdN8vvv_sincosf_avx2) +ENTRY (_ZGVdN8vl4l4_sincosf_avx2) /* ALGORITHM DESCRIPTION: @@ -42,7 +42,7 @@ ENTRY(_ZGVdN8vvv_sincosf_avx2) b) Calculate 2 polynomials for sin and cos: RS = X * ( A0 + X^2 * (A1 + x^2 * (A2 + x^2 * (A3)))); RC = B0 + X^2 * (B1 + x^2 * (B2 + x^2 * (B3 + x^2 * (B4)))); - c) Swap RS & RC if if first bit of obtained value after + c) Swap RS & RC if first bit of obtained value after Right Shifting is set to 1. Using And, Andnot & Or operations. 3) Destination sign setting a) Set shifted destination sign using XOR operation: @@ -213,12 +213,12 @@ ENTRY(_ZGVdN8vvv_sincosf_avx2) vmovss 260(%rsp,%r15,8), %xmm0 vzeroupper - call sinf@PLT + call JUMPTARGET(sinf) vmovss %xmm0, 324(%rsp,%r15,8) vmovss 260(%rsp,%r15,8), %xmm0 - call cosf@PLT + call JUMPTARGET(cosf) vmovss %xmm0, 388(%rsp,%r15,8) jmp .LBL_1_8 @@ -228,14 +228,162 @@ ENTRY(_ZGVdN8vvv_sincosf_avx2) vmovss 256(%rsp,%r15,8), %xmm0 vzeroupper - call sinf@PLT + call JUMPTARGET(sinf) vmovss %xmm0, 320(%rsp,%r15,8) vmovss 256(%rsp,%r15,8), %xmm0 - call cosf@PLT + call JUMPTARGET(cosf) vmovss %xmm0, 384(%rsp,%r15,8) jmp .LBL_1_7 -END(_ZGVdN8vvv_sincosf_avx2) +END (_ZGVdN8vl4l4_sincosf_avx2) +libmvec_hidden_def(_ZGVdN8vl4l4_sincosf_avx2) + +/* vvv version implemented with wrapper to vl4l4 variant. */ +ENTRY (_ZGVdN8vvv_sincosf_avx2) +#ifndef __ILP32__ + pushq %rbp + cfi_adjust_cfa_offset (8) + cfi_rel_offset (%rbp, 0) + movq %rsp, %rbp + cfi_def_cfa_register (%rbp) + andq $-32, %rsp + subq $192, %rsp + vmovdqu %ymm1, 64(%rsp) + lea (%rsp), %rdi + vmovdqu %ymm2, 96(%rdi) + vmovdqu %ymm3, 128(%rdi) + vmovdqu %ymm4, 160(%rdi) + lea 32(%rsp), %rsi + call HIDDEN_JUMPTARGET(_ZGVdN8vl4l4_sincosf_avx2) + movq 64(%rsp), %rdx + movq 72(%rsp), %rsi + movq 80(%rsp), %r8 + movq 88(%rsp), %r10 + movl (%rsp), %eax + movl 4(%rsp), %ecx + movl 8(%rsp), %edi + movl 12(%rsp), %r9d + movl %eax, (%rdx) + movl %ecx, (%rsi) + movq 96(%rsp), %rax + movq 104(%rsp), %rcx + movl %edi, (%r8) + movl %r9d, (%r10) + movq 112(%rsp), %rdi + movq 120(%rsp), %r9 + movl 16(%rsp), %r11d + movl 20(%rsp), %edx + movl 24(%rsp), %esi + movl 28(%rsp), %r8d + movl %r11d, (%rax) + movl %edx, (%rcx) + movq 128(%rsp), %r11 + movq 136(%rsp), %rdx + movl %esi, (%rdi) + movl %r8d, (%r9) + movq 144(%rsp), %rsi + movq 152(%rsp), %r8 + movl 32(%rsp), %r10d + movl 36(%rsp), %eax + movl 40(%rsp), %ecx + movl 44(%rsp), %edi + movl %r10d, (%r11) + movl %eax, (%rdx) + movq 160(%rsp), %r10 + movq 168(%rsp), %rax + movl %ecx, (%rsi) + movl %edi, (%r8) + movq 176(%rsp), %rcx + movq 184(%rsp), %rdi + movl 48(%rsp), %r9d + movl 52(%rsp), %r11d + movl 56(%rsp), %edx + movl 60(%rsp), %esi + movl %r9d, (%r10) + movl %r11d, (%rax) + movl %edx, (%rcx) + movl %esi, (%rdi) + movq %rbp, %rsp + cfi_def_cfa_register (%rsp) + popq %rbp + cfi_adjust_cfa_offset (-8) + cfi_restore (%rbp) + ret +#else + leal 8(%rsp), %r10d + .cfi_def_cfa 10, 0 + andl $-32, %esp + pushq -8(%r10d) + pushq %rbp + .cfi_escape 0x10,0x6,0x2,0x76,0 + movl %esp, %ebp + pushq %r10 + .cfi_escape 0xf,0x3,0x76,0x78,0x6 + leal -48(%rbp), %esi + leal -80(%rbp), %edi + subl $136, %esp + vmovdqa %ymm1, -112(%ebp) + vmovdqa %ymm2, -144(%ebp) + call HIDDEN_JUMPTARGET(_ZGVdN8vl4l4_sincosf_avx2) + vmovdqa -112(%ebp), %xmm0 + vmovq %xmm0, %rax + vmovss -80(%ebp), %xmm0 + vmovss %xmm0, (%eax) + vmovss -76(%ebp), %xmm0 + shrq $32, %rax + vmovss %xmm0, (%eax) + movq -104(%ebp), %rax + vmovss -72(%ebp), %xmm0 + vmovss %xmm0, (%eax) + vmovss -68(%ebp), %xmm0 + shrq $32, %rax + vmovss %xmm0, (%eax) + movq -96(%ebp), %rax + vmovss -64(%ebp), %xmm0 + vmovss %xmm0, (%eax) + vmovss -60(%ebp), %xmm0 + shrq $32, %rax + vmovss %xmm0, (%eax) + movq -88(%ebp), %rax + vmovss -56(%ebp), %xmm0 + vmovss %xmm0, (%eax) + vmovss -52(%ebp), %xmm0 + shrq $32, %rax + vmovss %xmm0, (%eax) + vmovdqa -144(%ebp), %xmm0 + vmovq %xmm0, %rax + vmovss -48(%ebp), %xmm0 + vmovss %xmm0, (%eax) + vmovss -44(%ebp), %xmm0 + shrq $32, %rax + vmovss %xmm0, (%eax) + movq -136(%ebp), %rax + vmovss -40(%ebp), %xmm0 + vmovss %xmm0, (%eax) + vmovss -36(%ebp), %xmm0 + shrq $32, %rax + vmovss %xmm0, (%eax) + movq -128(%ebp), %rax + vmovss -32(%ebp), %xmm0 + vmovss %xmm0, (%eax) + vmovss -28(%ebp), %xmm0 + shrq $32, %rax + vmovss %xmm0, (%eax) + movq -120(%ebp), %rax + vmovss -24(%ebp), %xmm0 + vmovss %xmm0, (%eax) + vmovss -20(%ebp), %xmm0 + shrq $32, %rax + vmovss %xmm0, (%eax) + addl $136, %esp + popq %r10 + .cfi_def_cfa 10, 0 + popq %rbp + leal -8(%r10), %esp + .cfi_def_cfa 7, 8 + ret +#endif +END (_ZGVdN8vvv_sincosf_avx2) diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core-avx2.S new file mode 100644 index 0000000000..fa521b9dac --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core-avx2.S @@ -0,0 +1,20 @@ +/* AVX2 version of vectorized sinf. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVeN16v_sinf _ZGVeN16v_sinf_avx2_wrapper +#include "../svml_s_sinf16_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S deleted file mode 100644 index 2212cdd94d..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Multiple versions of vectorized sinf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVeN16v_sinf) - .type _ZGVeN16v_sinf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVeN16v_sinf_skx(%rip), %rax - HAS_ARCH_FEATURE (AVX512DQ_Usable) - jnz 2f - leaq _ZGVeN16v_sinf_knl(%rip), %rax - HAS_ARCH_FEATURE (AVX512F_Usable) - jnz 2f - leaq _ZGVeN16v_sinf_avx2_wrapper(%rip), %rax -2: ret -END (_ZGVeN16v_sinf) - -#define _ZGVeN16v_sinf _ZGVeN16v_sinf_avx2_wrapper -#include "../svml_s_sinf16_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.c new file mode 100644 index 0000000000..97e5b58284 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized sinf, vector length is 16. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVeN16v_sinf +#include "ifunc-mathvec-avx512.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVeN16v_sinf, __GI__ZGVeN16v_sinf, + __redirect__ZGVeN16v_sinf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S index 61d8d3793a..141f747eb5 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S @@ -1,5 +1,5 @@ /* Function sinf vectorized with AVX-512. KNL and SKX versions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,7 +22,7 @@ .text ENTRY(_ZGVeN16v_sinf_knl) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN8v_sinf #else /* @@ -229,21 +229,21 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_sinf cfi_restore_state movzbl %r12b, %r15d vmovss 1156(%rsp,%r15,8), %xmm0 - call sinf@PLT + call JUMPTARGET(sinf) vmovss %xmm0, 1220(%rsp,%r15,8) jmp .LBL_1_8 .LBL_1_12: movzbl %r12b, %r15d vmovss 1152(%rsp,%r15,8), %xmm0 - call sinf@PLT + call JUMPTARGET(sinf) vmovss %xmm0, 1216(%rsp,%r15,8) jmp .LBL_1_7 #endif END(_ZGVeN16v_sinf_knl) ENTRY (_ZGVeN16v_sinf_skx) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN8v_sinf #else /* @@ -455,7 +455,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_sinf vzeroupper vmovss 1156(%rsp,%r15,8), %xmm0 - call sinf@PLT + call JUMPTARGET(sinf) vmovss %xmm0, 1220(%rsp,%r15,8) jmp .LBL_2_8 @@ -466,7 +466,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_sinf vzeroupper vmovss 1152(%rsp,%r15,8), %xmm0 - call sinf@PLT + call JUMPTARGET(sinf) vmovss %xmm0, 1216(%rsp,%r15,8) jmp .LBL_2_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core-sse2.S index b31554730d..1d2e65c39d 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core-sse2.S @@ -1,5 +1,5 @@ -/* Multiple versions of vectorized sinf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. +/* SSE2 version of vectorized sinf. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,21 +16,5 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVbN4v_sinf) - .type _ZGVbN4v_sinf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVbN4v_sinf_sse4(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jz 2f - ret -2: leaq _ZGVbN4v_sinf_sse2(%rip), %rax - ret -END (_ZGVbN4v_sinf) -libmvec_hidden_def (_ZGVbN4v_sinf) - #define _ZGVbN4v_sinf _ZGVbN4v_sinf_sse2 #include "../svml_s_sinf4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.c new file mode 100644 index 0000000000..93b8bfebbf --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized sinf, vector length is 4. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN4v_sinf +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN4v_sinf, __GI__ZGVbN4v_sinf, + __redirect__ZGVbN4v_sinf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core_sse4.S index 5268ab1f09..39a4c92235 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core_sse4.S @@ -1,5 +1,5 @@ /* Function sinf vectorized with SSE4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -207,7 +207,7 @@ ENTRY(_ZGVbN4v_sinf_sse4) movzbl %r12b, %r15d movss 196(%rsp,%r15,8), %xmm0 - call sinf@PLT + call JUMPTARGET(sinf) movss %xmm0, 260(%rsp,%r15,8) jmp .LBL_1_8 @@ -216,7 +216,7 @@ ENTRY(_ZGVbN4v_sinf_sse4) movzbl %r12b, %r15d movss 192(%rsp,%r15,8), %xmm0 - call sinf@PLT + call JUMPTARGET(sinf) movss %xmm0, 256(%rsp,%r15,8) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core-sse.S index 47fe0a4adc..f2af3a0b4b 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core-sse.S @@ -1,5 +1,5 @@ -/* Multiple versions of vectorized sinf, vector length is 8. - Copyright (C) 2014-2016 Free Software Foundation, Inc. +/* SSE version of vectorized sinf, vector length is 8. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,21 +16,5 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVdN8v_sinf) - .type _ZGVdN8v_sinf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX -1: leaq _ZGVdN8v_sinf_avx2(%rip), %rax - HAS_ARCH_FEATURE (AVX2_Usable) - jz 2f - ret -2: leaq _ZGVdN8v_sinf_sse_wrapper(%rip), %rax - ret -END (_ZGVdN8v_sinf) -libmvec_hidden_def (_ZGVdN8v_sinf) - #define _ZGVdN8v_sinf _ZGVdN8v_sinf_sse_wrapper #include "../svml_s_sinf8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.c new file mode 100644 index 0000000000..cf13b6647c --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized sinf, vector length is 8. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVdN8v_sinf +#include "ifunc-mathvec-avx2.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVdN8v_sinf, __GI__ZGVdN8v_sinf, + __redirect__ZGVdN8v_sinf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core_avx2.S index 9fdaadb2e8..5f7a95e9ad 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core_avx2.S @@ -1,5 +1,5 @@ /* Function sinf vectorized with AVX2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -201,7 +201,7 @@ ENTRY(_ZGVdN8v_sinf_avx2) vmovss 324(%rsp,%r15,8), %xmm0 vzeroupper - call sinf@PLT + call JUMPTARGET(sinf) vmovss %xmm0, 388(%rsp,%r15,8) jmp .LBL_1_8 @@ -211,7 +211,7 @@ ENTRY(_ZGVdN8v_sinf_avx2) vmovss 320(%rsp,%r15,8), %xmm0 vzeroupper - call sinf@PLT + call JUMPTARGET(sinf) vmovss %xmm0, 384(%rsp,%r15,8) jmp .LBL_1_7 |