summaryrefslogtreecommitdiff
path: root/sysdeps/x86_64
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/x86_64')
-rw-r--r--sysdeps/x86_64/Implies1
-rw-r--r--sysdeps/x86_64/Makefile82
-rw-r--r--sysdeps/x86_64/__longjmp.S46
-rw-r--r--sysdeps/x86_64/_mcount.S65
-rw-r--r--sysdeps/x86_64/add_n.S2
-rw-r--r--sysdeps/x86_64/addmul_1.S2
-rw-r--r--sysdeps/x86_64/atomic-machine.h13
-rw-r--r--sysdeps/x86_64/backtrace.c133
-rw-r--r--sysdeps/x86_64/bsd-_setjmp.S2
-rw-r--r--sysdeps/x86_64/bsd-setjmp.S2
-rw-r--r--sysdeps/x86_64/cacheinfo.c665
-rw-r--r--sysdeps/x86_64/configure60
-rw-r--r--sysdeps/x86_64/configure.ac46
-rw-r--r--sysdeps/x86_64/crti.S8
-rw-r--r--sysdeps/x86_64/crtn.S2
-rw-r--r--sysdeps/x86_64/dl-irel.h2
-rw-r--r--sysdeps/x86_64/dl-lookupcfg.h5
-rw-r--r--sysdeps/x86_64/dl-machine.h64
-rw-r--r--sysdeps/x86_64/dl-procinfo.c16
-rw-r--r--sysdeps/x86_64/dl-tls.c53
-rw-r--r--sysdeps/x86_64/dl-tls.h8
-rw-r--r--sysdeps/x86_64/dl-tlsdesc.S24
-rw-r--r--sysdeps/x86_64/dl-tlsdesc.h4
-rw-r--r--sysdeps/x86_64/dl-trampoline.S103
-rw-r--r--sysdeps/x86_64/dl-trampoline.h182
-rw-r--r--sysdeps/x86_64/ffs.c2
-rw-r--r--sysdeps/x86_64/ffsll.c2
-rw-r--r--sysdeps/x86_64/fpu/Makefile190
-rw-r--r--sysdeps/x86_64/fpu/dla.h8
-rw-r--r--sysdeps/x86_64/fpu/e_expf.S339
-rw-r--r--sysdeps/x86_64/fpu/e_expl.S10
-rw-r--r--sysdeps/x86_64/fpu/e_log10l.S1
-rw-r--r--sysdeps/x86_64/fpu/e_log2l.S1
-rw-r--r--sysdeps/x86_64/fpu/e_logl.S1
-rw-r--r--sysdeps/x86_64/fpu/e_powl.S45
-rw-r--r--sysdeps/x86_64/fpu/e_scalbl.S13
-rw-r--r--sysdeps/x86_64/fpu/e_sqrt.c2
-rw-r--r--sysdeps/x86_64/fpu/e_sqrtf.c2
-rw-r--r--sysdeps/x86_64/fpu/fclrexcpt.c2
-rw-r--r--sysdeps/x86_64/fpu/fedisblxcpt.c2
-rw-r--r--sysdeps/x86_64/fpu/feenablxcpt.c2
-rw-r--r--sysdeps/x86_64/fpu/fegetenv.c2
-rw-r--r--sysdeps/x86_64/fpu/fegetexcept.c2
-rw-r--r--sysdeps/x86_64/fpu/fegetmode.c28
-rw-r--r--sysdeps/x86_64/fpu/fegetround.c2
-rw-r--r--sysdeps/x86_64/fpu/feholdexcpt.c2
-rw-r--r--sysdeps/x86_64/fpu/fesetenv.c2
-rw-r--r--sysdeps/x86_64/fpu/fesetexcept.c31
-rw-r--r--sysdeps/x86_64/fpu/fesetmode.c50
-rw-r--r--sysdeps/x86_64/fpu/fesetround.c2
-rw-r--r--sysdeps/x86_64/fpu/feupdateenv.c2
-rw-r--r--sysdeps/x86_64/fpu/fgetexcptflg.c2
-rw-r--r--sysdeps/x86_64/fpu/fraiseexcpt.c2
-rw-r--r--sysdeps/x86_64/fpu/fsetexcptflg.c2
-rw-r--r--sysdeps/x86_64/fpu/ftestexcept.c2
-rw-r--r--sysdeps/x86_64/fpu/k_rem_pio2l.c1
-rw-r--r--sysdeps/x86_64/fpu/libm-test-ulps792
-rw-r--r--sysdeps/x86_64/fpu/libm-test-ulps-name1
-rw-r--r--sysdeps/x86_64/fpu/math-tests-arch.h10
-rw-r--r--sysdeps/x86_64/fpu/math_ldbl.h31
-rw-r--r--sysdeps/x86_64/fpu/math_private.h32
-rw-r--r--sysdeps/x86_64/fpu/multiarch/Makefile91
-rw-r--r--sysdeps/x86_64/fpu/multiarch/doasin-fma.c4
-rw-r--r--sysdeps/x86_64/fpu/multiarch/dosincos-fma.c6
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_asin-fma.c11
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_asin.c54
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_atan2-fma.c10
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_atan2.c35
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_exp-avx.c1
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_exp-fma.c5
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c1
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_exp.c35
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_exp2f-fma.c3
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_exp2f.c40
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_expf-fma.c3
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_expf.c43
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_log-avx.c4
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_log-fma.c4
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_log-fma4.c4
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_log.c35
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_log2f-fma.c3
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_log2f.c43
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_logf-fma.c3
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_logf.c43
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_pow-fma.c5
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c1
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_pow.c34
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_powf-fma.c3
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_powf.c46
-rw-r--r--sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c4
-rw-r--r--sysdeps/x86_64/fpu/multiarch/ifunc-avx-fma4.h43
-rw-r--r--sysdeps/x86_64/fpu/multiarch/ifunc-fma.h (renamed from sysdeps/x86_64/fpu/multiarch/s_ceilf.S)30
-rw-r--r--sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h39
-rw-r--r--sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx2.h39
-rw-r--r--sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx512.h45
-rw-r--r--sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-sse4_1.h38
-rw-r--r--sysdeps/x86_64/fpu/multiarch/ifunc-sse4_1.h (renamed from sysdeps/x86_64/fpu/multiarch/s_ceil.S)29
-rw-r--r--sysdeps/x86_64/fpu/multiarch/mpa-fma.c14
-rw-r--r--sysdeps/x86_64/fpu/multiarch/mpatan-fma.c10
-rw-r--r--sysdeps/x86_64/fpu/multiarch/mpatan2-fma.c9
-rw-r--r--sysdeps/x86_64/fpu/multiarch/mpexp-avx.c9
-rw-r--r--sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c9
-rw-r--r--sysdeps/x86_64/fpu/multiarch/mplog-avx.c8
-rw-r--r--sysdeps/x86_64/fpu/multiarch/mplog-fma4.c8
-rw-r--r--sysdeps/x86_64/fpu/multiarch/mpsqrt-fma.c8
-rw-r--r--sysdeps/x86_64/fpu/multiarch/mptan-fma.c7
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_atan-avx.c2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_atan-fma.c9
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_atan.c33
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S (renamed from sysdeps/x86_64/multiarch/mempcpy-avx512-no-vzeroupper.S)15
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_ceil.c31
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S25
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_ceilf.c31
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_cosf-fma.c2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_cosf-sse2.c2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_cosf.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S25
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_floor.c31
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S25
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_floorf.c31
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_fma.c5
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_fmaf.c5
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S25
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_nearbyint.S38
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_nearbyint.c32
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S25
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_nearbyintf.S38
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c (renamed from sysdeps/x86_64/fpu/multiarch/s_rint.S)32
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S25
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_rint.c31
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S25
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_rintf.S38
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_rintf.c31
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_sin-fma.c11
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_sin.c57
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c240
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_sincosf-sse2.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_sincosf.c (renamed from sysdeps/x86_64/fpu/test-float-vlen4.c)20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_sinf-fma.c2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_sinf-sse2.c2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_sinf.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_tan-avx.c2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_tan-fma.c8
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_tan.c33
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_trunc-c.c2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S (renamed from sysdeps/x86_64/multiarch/mempcpy-avx-unaligned.S)15
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_trunc.c31
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_truncf-c.c2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S (renamed from sysdeps/x86_64/multiarch/memmove-avx-unaligned.S)15
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_truncf.c31
-rw-r--r--sysdeps/x86_64/fpu/multiarch/sincos32-fma.c15
-rw-r--r--sysdeps/x86_64/fpu/multiarch/slowexp-avx.c9
-rw-r--r--sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c9
-rw-r--r--sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c11
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core-sse2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.S36
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.c27
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S6
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core-sse.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.S36
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.c27
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S6
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core-avx2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S37
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.c27
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S14
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core-sse2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.S36
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.c27
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S6
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core-sse.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.S36
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.c27
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S6
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core-avx2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S37
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.c27
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S14
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_log2_core-sse2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S36
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.c27
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S6
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_log4_core-sse.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S36
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.c27
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S6
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_log8_core-avx2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S37
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.c27
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S14
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core-sse2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.S36
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S6
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core-sse.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S6
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core-avx2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S37
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S14
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core-sse2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.S36
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.c27
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S6
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core-sse.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.S36
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.c27
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S6
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core-avx2.S23
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S37
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.c27
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S14
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core-sse2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.S36
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S66
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core-sse.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.S36
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S108
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core-avx2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S37
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S187
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core-avx2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S37
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S14
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core-sse2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S36
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S6
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core-sse.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S36
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S6
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core-avx2.S23
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S37
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S14
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core-sse2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.S36
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S6
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core-sse.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.S36
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core_avx2.S6
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core-avx2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S37
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S14
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core-sse2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.S36
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core_sse4.S6
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core-sse.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.S36
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core_avx2.S6
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core-avx2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S37
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S14
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core-sse2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.S36
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core_sse4.S6
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core-sse.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.S36
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core_avx2.S6
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core-avx2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S37
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S283
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core-sse2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.S36
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S92
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core-sse.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.S36
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S164
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core-avx2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S37
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S14
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core-sse2.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.S36
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core_sse4.S6
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core-sse.S20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.S36
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.c28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core_avx2.S6
-rw-r--r--sysdeps/x86_64/fpu/printf_fphex.c2
-rw-r--r--sysdeps/x86_64/fpu/s_ceill.S18
-rw-r--r--sysdeps/x86_64/fpu/s_copysign.S5
-rw-r--r--sysdeps/x86_64/fpu/s_copysignf.S5
-rw-r--r--sysdeps/x86_64/fpu/s_copysignl.S3
-rw-r--r--sysdeps/x86_64/fpu/s_cosf.S533
-rw-r--r--sysdeps/x86_64/fpu/s_fabs.c5
-rw-r--r--sysdeps/x86_64/fpu/s_fabsf.c5
-rw-r--r--sysdeps/x86_64/fpu/s_fabsl.S5
-rw-r--r--sysdeps/x86_64/fpu/s_floorl.S18
-rw-r--r--sysdeps/x86_64/fpu/s_fmax.S24
-rw-r--r--sysdeps/x86_64/fpu/s_fmaxf.S24
-rw-r--r--sysdeps/x86_64/fpu/s_fmaxl.S34
-rw-r--r--sysdeps/x86_64/fpu/s_fmin.S24
-rw-r--r--sysdeps/x86_64/fpu/s_fminf.S24
-rw-r--r--sysdeps/x86_64/fpu/s_fminl.S32
-rw-r--r--sysdeps/x86_64/fpu/s_llrint.S7
-rw-r--r--sysdeps/x86_64/fpu/s_llrintf.S7
-rw-r--r--sysdeps/x86_64/fpu/s_llrintl.S7
-rw-r--r--sysdeps/x86_64/fpu/s_log1pl.S1
-rw-r--r--sysdeps/x86_64/fpu/s_nearbyintl.S7
-rw-r--r--sysdeps/x86_64/fpu/s_signbit.S2
-rw-r--r--sysdeps/x86_64/fpu/s_signbitf.S2
-rw-r--r--sysdeps/x86_64/fpu/s_sincosf.S10
-rw-r--r--sysdeps/x86_64/fpu/s_sinf.S559
-rw-r--r--sysdeps/x86_64/fpu/s_truncl.S18
-rw-r--r--sysdeps/x86_64/fpu/svml_d_cos2_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_cos4_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_cos4_core_avx.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_cos8_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_exp2_core.S4
-rw-r--r--sysdeps/x86_64/fpu/svml_d_exp4_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_exp4_core_avx.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_exp8_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_exp_data.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_exp_data.h2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_log2_core.S4
-rw-r--r--sysdeps/x86_64/fpu/svml_d_log4_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_log4_core_avx.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_log8_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_log_data.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_log_data.h2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_pow2_core.S4
-rw-r--r--sysdeps/x86_64/fpu/svml_d_pow4_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_pow4_core_avx.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_pow8_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_pow_data.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_pow_data.h2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_sin2_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_sin4_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_sin4_core_avx.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_sin8_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_sincos2_core.S85
-rw-r--r--sysdeps/x86_64/fpu/svml_d_sincos4_core.S127
-rw-r--r--sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S122
-rw-r--r--sysdeps/x86_64/fpu/svml_d_sincos8_core.S170
-rw-r--r--sysdeps/x86_64/fpu/svml_d_trig_data.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_trig_data.h2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_wrapper_impl.h71
-rw-r--r--sysdeps/x86_64/fpu/svml_finite_alias.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_cosf16_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_cosf4_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_cosf8_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_cosf8_core_avx.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_expf16_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_expf4_core.S4
-rw-r--r--sysdeps/x86_64/fpu/svml_s_expf8_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_expf8_core_avx.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_expf_data.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_expf_data.h2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_logf16_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_logf4_core.S4
-rw-r--r--sysdeps/x86_64/fpu/svml_s_logf8_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_logf8_core_avx.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_logf_data.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_logf_data.h2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_powf16_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_powf4_core.S4
-rw-r--r--sysdeps/x86_64/fpu/svml_s_powf8_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_powf8_core_avx.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_powf_data.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_powf_data.h2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_sincosf16_core.S268
-rw-r--r--sysdeps/x86_64/fpu/svml_s_sincosf4_core.S128
-rw-r--r--sysdeps/x86_64/fpu/svml_s_sincosf8_core.S175
-rw-r--r--sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S181
-rw-r--r--sysdeps/x86_64/fpu/svml_s_sinf16_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_sinf4_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_sinf8_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_sinf8_core_avx.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_trig_data.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_trig_data.h2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_wrapper_impl.h83
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-alias-avx-main.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-alias-avx-mod.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-alias-avx.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-alias-avx2-main.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-alias-avx2-mod.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-alias-avx2.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-alias-avx512-main.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-alias-avx512-mod.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-alias-avx512.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-alias-main.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-alias-mod.c25
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-alias.c29
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx-main.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx2-main.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx2.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx512-main.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx512.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-sincos-main.c43
-rw-r--r--sysdeps/x86_64/fpu/test-double-libmvec-sincos.c44
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c8
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c12
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen4-avx2.h (renamed from sysdeps/x86_64/fpu/test-double-vlen4-avx2.c)14
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c12
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen4.h (renamed from sysdeps/x86_64/fpu/test-double-vlen4.c)13
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c12
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen8.h (renamed from sysdeps/x86_64/fpu/test-double-vlen8.c)13
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-alias-avx-main.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-alias-avx-mod.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-alias-avx.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-alias-avx2-main.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-alias-avx2-mod.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-alias-avx2.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-alias-avx512-main.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-alias-avx512-mod.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-alias-avx512.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-alias-main.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-alias-mod.c25
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-alias.c29
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx-main.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx2-main.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx2.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx512-main.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx512.c1
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-sincosf-main.c42
-rw-r--r--sysdeps/x86_64/fpu/test-float-libmvec-sincosf.c44
-rw-r--r--sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c12
-rw-r--r--sysdeps/x86_64/fpu/test-float-vlen16.h (renamed from sysdeps/x86_64/fpu/test-float-vlen16.c)13
-rw-r--r--sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c12
-rw-r--r--sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c15
-rw-r--r--sysdeps/x86_64/fpu/test-float-vlen8-avx2.h (renamed from sysdeps/x86_64/fpu/test-float-vlen8-avx2.c)14
-rw-r--r--sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c12
-rw-r--r--sysdeps/x86_64/fpu/test-float-vlen8.h (renamed from sysdeps/x86_64/fpu/test-float-vlen8.c)13
-rw-r--r--sysdeps/x86_64/fpu/test-libmvec-alias-mod.c66
-rw-r--r--sysdeps/x86_64/fpu/x86_64-math-asm.h2
-rw-r--r--sysdeps/x86_64/hp-timing.h2
-rw-r--r--sysdeps/x86_64/htonl.S2
-rw-r--r--sysdeps/x86_64/ifuncmain8.c2
-rw-r--r--sysdeps/x86_64/ifuncmod8.c3
-rw-r--r--sysdeps/x86_64/jmpbuf-offsets.h2
-rw-r--r--sysdeps/x86_64/jmpbuf-unwind.h2
-rw-r--r--sysdeps/x86_64/ldsodefs.h56
-rw-r--r--sysdeps/x86_64/localplt.data13
-rw-r--r--sysdeps/x86_64/lshift.S2
-rw-r--r--sysdeps/x86_64/machine-gmon.h2
-rw-r--r--sysdeps/x86_64/memchr.S103
-rw-r--r--sysdeps/x86_64/memcmp.S2
-rw-r--r--sysdeps/x86_64/memcopy.h1
-rw-r--r--sysdeps/x86_64/memcpy.S585
-rw-r--r--sysdeps/x86_64/memcpy_chk.S4
-rw-r--r--sysdeps/x86_64/memmove.S71
-rw-r--r--sysdeps/x86_64/memmove_chk.S (renamed from sysdeps/x86_64/fpu/s_fdiml.S)42
-rw-r--r--sysdeps/x86_64/mempcpy.S9
-rw-r--r--sysdeps/x86_64/mempcpy_chk.S4
-rw-r--r--sysdeps/x86_64/memrchr.S38
-rw-r--r--sysdeps/x86_64/memset.S137
-rw-r--r--sysdeps/x86_64/memset_chk.S2
-rw-r--r--sysdeps/x86_64/memusage.h2
-rw-r--r--sysdeps/x86_64/mul_1.S2
-rw-r--r--sysdeps/x86_64/multiarch/Makefile61
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-avx2.h36
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-defines.sym20
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-impl-list.c340
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-memcmp.h45
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-memmove.h81
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-memset.h69
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-sse4_2.h34
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-strcasecmp.h43
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-unaligned-ssse3.h40
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-wmemset.h42
-rw-r--r--sysdeps/x86_64/multiarch/memchr-avx2.S340
-rw-r--r--sysdeps/x86_64/multiarch/memchr-sse2.S28
-rw-r--r--sysdeps/x86_64/multiarch/memchr.c (renamed from sysdeps/x86_64/multiarch/wcscpy.S)35
-rw-r--r--sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S429
-rw-r--r--sysdeps/x86_64/multiarch/memcmp-sse2.S31
-rw-r--r--sysdeps/x86_64/multiarch/memcmp-sse4.S4
-rw-r--r--sysdeps/x86_64/multiarch/memcmp-ssse3.S2
-rw-r--r--sysdeps/x86_64/multiarch/memcmp.S67
-rw-r--r--sysdeps/x86_64/multiarch/memcmp.c (renamed from sysdeps/x86_64/multiarch/strspn.S)45
-rw-r--r--sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S376
-rw-r--r--sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S408
-rw-r--r--sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S175
-rw-r--r--sysdeps/x86_64/multiarch/memcpy-ssse3-back.S29
-rw-r--r--sysdeps/x86_64/multiarch/memcpy-ssse3.S147
-rw-r--r--sysdeps/x86_64/multiarch/memcpy.S89
-rw-r--r--sysdeps/x86_64/multiarch/memcpy.c39
-rw-r--r--sysdeps/x86_64/multiarch/memcpy_chk-nonshared.S21
-rw-r--r--sysdeps/x86_64/multiarch/memcpy_chk.S56
-rw-r--r--sysdeps/x86_64/multiarch/memcpy_chk.c31
-rw-r--r--sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S12
-rw-r--r--sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S405
-rw-r--r--sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S12
-rw-r--r--sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S (renamed from sysdeps/x86_64/fpu/multiarch/s_floorf.S)39
-rw-r--r--sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S565
-rw-r--r--sysdeps/x86_64/multiarch/memmove.c58
-rw-r--r--sysdeps/x86_64/multiarch/memmove_chk-nonshared.S21
-rw-r--r--sysdeps/x86_64/multiarch/memmove_chk.c37
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S4
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy-ssse3.S4
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy.S86
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy.c42
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy_chk-nonshared.S21
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy_chk.S56
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy_chk.c31
-rw-r--r--sysdeps/x86_64/multiarch/memrchr-avx2.S359
-rw-r--r--sysdeps/x86_64/multiarch/memrchr-sse2.S (renamed from sysdeps/x86_64/memmove.c)16
-rw-r--r--sysdeps/x86_64/multiarch/memrchr.c31
-rw-r--r--sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S22
-rw-r--r--sysdeps/x86_64/multiarch/memset-avx2.S168
-rw-r--r--sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S6
-rw-r--r--sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S24
-rw-r--r--sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S (renamed from sysdeps/x86_64/multiarch/wmemcmp.S)41
-rw-r--r--sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S274
-rw-r--r--sysdeps/x86_64/multiarch/memset.S64
-rw-r--r--sysdeps/x86_64/multiarch/memset.c35
-rw-r--r--sysdeps/x86_64/multiarch/memset_chk-nonshared.S21
-rw-r--r--sysdeps/x86_64/multiarch/memset_chk.S49
-rw-r--r--sysdeps/x86_64/multiarch/memset_chk.c31
-rw-r--r--sysdeps/x86_64/multiarch/rawmemchr-avx2.S4
-rw-r--r--sysdeps/x86_64/multiarch/rawmemchr-sse2.S29
-rw-r--r--sysdeps/x86_64/multiarch/rawmemchr.c38
-rw-r--r--sysdeps/x86_64/multiarch/sched_cpucount.c2
-rw-r--r--sysdeps/x86_64/multiarch/stpcpy-sse2.S33
-rw-r--r--sysdeps/x86_64/multiarch/stpcpy.S9
-rw-r--r--sysdeps/x86_64/multiarch/stpcpy.c42
-rw-r--r--sysdeps/x86_64/multiarch/stpncpy-c.c9
-rw-r--r--sysdeps/x86_64/multiarch/stpncpy.S8
-rw-r--r--sysdeps/x86_64/multiarch/stpncpy.c38
-rw-r--r--sysdeps/x86_64/multiarch/strcasecmp.c39
-rw-r--r--sysdeps/x86_64/multiarch/strcasecmp_l-avx.S22
-rw-r--r--sysdeps/x86_64/multiarch/strcasecmp_l-sse2.S23
-rw-r--r--sysdeps/x86_64/multiarch/strcasecmp_l-sse4_2.S21
-rw-r--r--sysdeps/x86_64/multiarch/strcasecmp_l.S8
-rw-r--r--sysdeps/x86_64/multiarch/strcasecmp_l.c40
-rw-r--r--sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S2
-rw-r--r--sysdeps/x86_64/multiarch/strcat-sse2.S28
-rw-r--r--sysdeps/x86_64/multiarch/strcat-ssse3.S2
-rw-r--r--sysdeps/x86_64/multiarch/strcat.S85
-rw-r--r--sysdeps/x86_64/multiarch/strcat.c35
-rw-r--r--sysdeps/x86_64/multiarch/strchr-avx2.S254
-rw-r--r--sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S2
-rw-r--r--sysdeps/x86_64/multiarch/strchr-sse2.S28
-rw-r--r--sysdeps/x86_64/multiarch/strchr.S57
-rw-r--r--sysdeps/x86_64/multiarch/strchr.c55
-rw-r--r--sysdeps/x86_64/multiarch/strchrnul-avx2.S3
-rw-r--r--sysdeps/x86_64/multiarch/strchrnul-sse2.S26
-rw-r--r--sysdeps/x86_64/multiarch/strchrnul.c34
-rw-r--r--sysdeps/x86_64/multiarch/strcmp-avx2.S847
-rw-r--r--sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S2
-rw-r--r--sysdeps/x86_64/multiarch/strcmp-sse2.S28
-rw-r--r--sysdeps/x86_64/multiarch/strcmp-sse42.S39
-rw-r--r--sysdeps/x86_64/multiarch/strcmp-sse4_2.S21
-rw-r--r--sysdeps/x86_64/multiarch/strcmp.S209
-rw-r--r--sysdeps/x86_64/multiarch/strcmp.c59
-rw-r--r--sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S6
-rw-r--r--sysdeps/x86_64/multiarch/strcpy-sse2.S28
-rw-r--r--sysdeps/x86_64/multiarch/strcpy-ssse3.S2
-rw-r--r--sysdeps/x86_64/multiarch/strcpy.S99
-rw-r--r--sysdeps/x86_64/multiarch/strcpy.c35
-rw-r--r--sysdeps/x86_64/multiarch/strcspn-c.c4
-rw-r--r--sysdeps/x86_64/multiarch/strcspn-sse2.S28
-rw-r--r--sysdeps/x86_64/multiarch/strcspn.S69
-rw-r--r--sysdeps/x86_64/multiarch/strcspn.c35
-rw-r--r--sysdeps/x86_64/multiarch/strlen-avx2.S393
-rw-r--r--sysdeps/x86_64/multiarch/strlen-sse2.S23
-rw-r--r--sysdeps/x86_64/multiarch/strlen.c34
-rw-r--r--sysdeps/x86_64/multiarch/strncase.c35
-rw-r--r--sysdeps/x86_64/multiarch/strncase_l-avx.S22
-rw-r--r--sysdeps/x86_64/multiarch/strncase_l-sse2.S23
-rw-r--r--sysdeps/x86_64/multiarch/strncase_l-sse4_2.S21
-rw-r--r--sysdeps/x86_64/multiarch/strncase_l.S8
-rw-r--r--sysdeps/x86_64/multiarch/strncase_l.c40
-rw-r--r--sysdeps/x86_64/multiarch/strncat-c.c8
-rw-r--r--sysdeps/x86_64/multiarch/strncat.S5
-rw-r--r--sysdeps/x86_64/multiarch/strncat.c35
-rw-r--r--sysdeps/x86_64/multiarch/strncmp-avx2.S3
-rw-r--r--sysdeps/x86_64/multiarch/strncmp-sse2.S (renamed from sysdeps/x86_64/fpu/test-double-vlen2.c)22
-rw-r--r--sysdeps/x86_64/multiarch/strncmp-sse4_2.S21
-rw-r--r--sysdeps/x86_64/multiarch/strncmp-ssse3.S34
-rw-r--r--sysdeps/x86_64/multiarch/strncmp.S5
-rw-r--r--sysdeps/x86_64/multiarch/strncmp.c60
-rw-r--r--sysdeps/x86_64/multiarch/strncpy-c.c7
-rw-r--r--sysdeps/x86_64/multiarch/strncpy.S5
-rw-r--r--sysdeps/x86_64/multiarch/strncpy.c35
-rw-r--r--sysdeps/x86_64/multiarch/strnlen-avx2.S4
-rw-r--r--sysdeps/x86_64/multiarch/strnlen-sse2.S28
-rw-r--r--sysdeps/x86_64/multiarch/strnlen.c39
-rw-r--r--sysdeps/x86_64/multiarch/strpbrk-c.c12
-rw-r--r--sysdeps/x86_64/multiarch/strpbrk-sse2.S29
-rw-r--r--sysdeps/x86_64/multiarch/strpbrk.S5
-rw-r--r--sysdeps/x86_64/multiarch/strpbrk.c35
-rw-r--r--sysdeps/x86_64/multiarch/strrchr-avx2.S235
-rw-r--r--sysdeps/x86_64/multiarch/strrchr-sse2.S28
-rw-r--r--sysdeps/x86_64/multiarch/strrchr.c34
-rw-r--r--sysdeps/x86_64/multiarch/strspn-c.c4
-rw-r--r--sysdeps/x86_64/multiarch/strspn-sse2.S28
-rw-r--r--sysdeps/x86_64/multiarch/strspn.c35
-rw-r--r--sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S2
-rw-r--r--sysdeps/x86_64/multiarch/strstr.c2
-rw-r--r--sysdeps/x86_64/multiarch/test-multiarch.c4
-rw-r--r--sysdeps/x86_64/multiarch/varshift.c2
-rw-r--r--sysdeps/x86_64/multiarch/varshift.h2
-rw-r--r--sysdeps/x86_64/multiarch/wcschr-avx2.S3
-rw-r--r--sysdeps/x86_64/multiarch/wcschr-sse2.S30
-rw-r--r--sysdeps/x86_64/multiarch/wcschr.c39
-rw-r--r--sysdeps/x86_64/multiarch/wcscmp-avx2.S4
-rw-r--r--sysdeps/x86_64/multiarch/wcscmp-sse2.S23
-rw-r--r--sysdeps/x86_64/multiarch/wcscmp.c37
-rw-r--r--sysdeps/x86_64/multiarch/wcscpy-ssse3.S2
-rw-r--r--sysdeps/x86_64/multiarch/wcscpy.c44
-rw-r--r--sysdeps/x86_64/multiarch/wcslen-avx2.S4
-rw-r--r--sysdeps/x86_64/multiarch/wcslen-sse2.S26
-rw-r--r--sysdeps/x86_64/multiarch/wcslen.c (renamed from sysdeps/x86_64/fpu/multiarch/s_floor.S)33
-rw-r--r--sysdeps/x86_64/multiarch/wcsncmp-avx2.S5
-rw-r--r--sysdeps/x86_64/multiarch/wcsncmp-sse2.c20
-rw-r--r--sysdeps/x86_64/multiarch/wcsncmp.c31
-rw-r--r--sysdeps/x86_64/multiarch/wcsnlen-avx2.S5
-rw-r--r--sysdeps/x86_64/multiarch/wcsnlen-c.c9
-rw-r--r--sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S5
-rw-r--r--sysdeps/x86_64/multiarch/wcsnlen.c51
-rw-r--r--sysdeps/x86_64/multiarch/wcsrchr-avx2.S3
-rw-r--r--sysdeps/x86_64/multiarch/wcsrchr-sse2.S23
-rw-r--r--sysdeps/x86_64/multiarch/wcsrchr.c29
-rw-r--r--sysdeps/x86_64/multiarch/wmemchr-avx2.S4
-rw-r--r--sysdeps/x86_64/multiarch/wmemchr-sse2.S4
-rw-r--r--sysdeps/x86_64/multiarch/wmemchr.c39
-rw-r--r--sysdeps/x86_64/multiarch/wmemcmp-avx2-movbe.S4
-rw-r--r--sysdeps/x86_64/multiarch/wmemcmp.c30
-rw-r--r--sysdeps/x86_64/multiarch/wmemset.c40
-rw-r--r--sysdeps/x86_64/multiarch/wmemset_chk-nonshared.S21
-rw-r--r--sysdeps/x86_64/multiarch/wmemset_chk.c31
-rw-r--r--sysdeps/x86_64/nptl/Makefile2
-rw-r--r--sysdeps/x86_64/nptl/pthread-offsets.h5
-rw-r--r--sysdeps/x86_64/nptl/pthread_spin_lock.S2
-rw-r--r--sysdeps/x86_64/nptl/pthread_spin_trylock.S2
-rw-r--r--sysdeps/x86_64/nptl/pthread_spin_unlock.S2
-rw-r--r--sysdeps/x86_64/nptl/pthreaddef.h2
-rw-r--r--sysdeps/x86_64/nptl/tcb-offsets.sym6
-rw-r--r--sysdeps/x86_64/nptl/tls.h59
-rw-r--r--sysdeps/x86_64/rawmemchr.S7
-rw-r--r--sysdeps/x86_64/rshift.S2
-rw-r--r--sysdeps/x86_64/rtld-offsets.sym6
-rw-r--r--sysdeps/x86_64/sched_cpucount.c2
-rw-r--r--sysdeps/x86_64/setjmp.S27
-rw-r--r--sysdeps/x86_64/stackinfo.h2
-rw-r--r--sysdeps/x86_64/start.S19
-rw-r--r--sysdeps/x86_64/strcasecmp_l-nonascii.c2
-rw-r--r--sysdeps/x86_64/strcat.S2
-rw-r--r--sysdeps/x86_64/strchr.S2
-rw-r--r--sysdeps/x86_64/strchrnul.S2
-rw-r--r--sysdeps/x86_64/strcmp.S4
-rw-r--r--sysdeps/x86_64/strcpy.S2
-rw-r--r--sysdeps/x86_64/strcspn.S7
-rw-r--r--sysdeps/x86_64/strlen.S63
-rw-r--r--sysdeps/x86_64/strncase_l-nonascii.c2
-rw-r--r--sysdeps/x86_64/strpbrk.S1
-rw-r--r--sysdeps/x86_64/strrchr.S2
-rw-r--r--sysdeps/x86_64/strspn.S2
-rw-r--r--sysdeps/x86_64/strtok.S208
-rw-r--r--sysdeps/x86_64/strtok_r.S5
-rw-r--r--sysdeps/x86_64/sub_n.S2
-rw-r--r--sysdeps/x86_64/submul_1.S2
-rw-r--r--sysdeps/x86_64/sysdep.h55
-rw-r--r--sysdeps/x86_64/tls_get_addr.S61
-rw-r--r--sysdeps/x86_64/tlsdesc.c3
-rw-r--r--sysdeps/x86_64/tlsdesc.sym3
-rw-r--r--sysdeps/x86_64/tst-audit.h2
-rw-r--r--sysdeps/x86_64/tst-audit10-aux.c (renamed from sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.S)45
-rw-r--r--sysdeps/x86_64/tst-audit10.c38
-rw-r--r--sysdeps/x86_64/tst-audit4-aux.c39
-rw-r--r--sysdeps/x86_64/tst-audit4.c45
-rw-r--r--sysdeps/x86_64/tst-auditmod10a.c2
-rw-r--r--sysdeps/x86_64/tst-auditmod10b.c4
-rw-r--r--sysdeps/x86_64/tst-auditmod3b.c2
-rw-r--r--sysdeps/x86_64/tst-auditmod4b.c2
-rw-r--r--sysdeps/x86_64/tst-auditmod5b.c2
-rw-r--r--sysdeps/x86_64/tst-auditmod6b.c2
-rw-r--r--sysdeps/x86_64/tst-auditmod6c.c2
-rw-r--r--sysdeps/x86_64/tst-auditmod7b.c2
-rw-r--r--sysdeps/x86_64/tst-avx-aux.c47
-rw-r--r--sysdeps/x86_64/tst-avx.c49
-rw-r--r--sysdeps/x86_64/tst-avx512-aux.c48
-rw-r--r--sysdeps/x86_64/tst-avx512.c57
-rw-r--r--sysdeps/x86_64/tst-avx512mod.c48
-rw-r--r--sysdeps/x86_64/tst-avxmod.c48
-rw-r--r--sysdeps/x86_64/tst-mallocalign1.c2
-rw-r--r--sysdeps/x86_64/tst-platform-1.c29
-rw-r--r--sysdeps/x86_64/tst-platformmod-1.c23
-rw-r--r--sysdeps/x86_64/tst-platformmod-2.c23
-rw-r--r--sysdeps/x86_64/tst-quad1.c2
-rw-r--r--sysdeps/x86_64/tst-quadmod1.S8
-rw-r--r--sysdeps/x86_64/tst-quadmod2.S8
-rw-r--r--sysdeps/x86_64/tst-sse.c46
-rw-r--r--sysdeps/x86_64/tst-ssemod.c46
-rw-r--r--sysdeps/x86_64/tst-stack-align.h2
-rw-r--r--sysdeps/x86_64/tst-x86_64-1.c26
-rw-r--r--sysdeps/x86_64/tst-x86_64mod-1.c22
-rw-r--r--sysdeps/x86_64/wcschr.S2
-rw-r--r--sysdeps/x86_64/wcscmp.S4
-rw-r--r--sysdeps/x86_64/wcslen.S2
-rw-r--r--sysdeps/x86_64/wcsrchr.S2
-rw-r--r--sysdeps/x86_64/wmemset.S1
-rw-r--r--sysdeps/x86_64/wmemset_chk.S33
-rw-r--r--sysdeps/x86_64/wordcopy.c1
-rw-r--r--sysdeps/x86_64/x32/dl-machine.h2
-rw-r--r--sysdeps/x86_64/x32/fpu/s_lrint.S5
-rw-r--r--sysdeps/x86_64/x32/fpu/s_lrintf.S5
-rw-r--r--sysdeps/x86_64/x32/fpu/s_lrintl.S5
-rw-r--r--sysdeps/x86_64/x32/gmp-mparam.h2
-rw-r--r--sysdeps/x86_64/x32/nptl/tls.h46
-rw-r--r--sysdeps/x86_64/x32/sysdep.h2
724 files changed, 16693 insertions, 8898 deletions
diff --git a/sysdeps/x86_64/Implies b/sysdeps/x86_64/Implies
index 811c19a8f2..3d7ded70d2 100644
--- a/sysdeps/x86_64/Implies
+++ b/sysdeps/x86_64/Implies
@@ -1,4 +1,5 @@
x86
+ieee754/float128
ieee754/ldbl-96
ieee754/dbl-64/wordsize-64
ieee754/dbl-64
diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
index 67ed5ba213..9f1562f1b2 100644
--- a/sysdeps/x86_64/Makefile
+++ b/sysdeps/x86_64/Makefile
@@ -7,6 +7,10 @@ endif
ifeq ($(subdir),gmon)
sysdep_routines += _mcount
+# We cannot compile _mcount.S with -pg because that would create
+# recursive calls when ENTRY is used. Just copy the normal static
+# object.
+sysdep_noprof += _mcount
endif
ifeq ($(subdir),malloc)
@@ -23,7 +27,7 @@ ifeq ($(subdir),elf)
CFLAGS-.os += $(if $(filter $(@F),$(patsubst %,%.os,$(all-rtld-routines))),\
-mno-mmx)
-sysdep-dl-routines += tlsdesc dl-tlsdesc
+sysdep-dl-routines += tlsdesc dl-tlsdesc tls_get_addr
tests += ifuncmain8
modules-names += ifuncmod8
@@ -39,28 +43,66 @@ $(objpfx)tst-quad2: $(objpfx)tst-quadmod2.so
quad-pie-test += tst-quad1pie tst-quad2pie
tests += $(quad-pie-test)
tests-pie += $(quad-pie-test)
+test-extras += tst-quadmod1pie tst-quadmod2pie
+extra-test-objs += tst-quadmod1pie.o tst-quadmod2pie.o
$(objpfx)tst-quad1pie: $(objpfx)tst-quadmod1pie.o
$(objpfx)tst-quad2pie: $(objpfx)tst-quadmod2pie.o
-tests += tst-audit3 tst-audit4 tst-audit5 tst-audit6 tst-audit7 tst-audit10
+CFLAGS-tst-quad1pie.c = $(PIE-ccflag)
+CFLAGS-tst-quad2pie.c = $(PIE-ccflag)
+tests += tst-x86_64-1
+modules-names += x86_64/tst-x86_64mod-1
+LDFLAGS-tst-x86_64mod-1.so = -Wl,-soname,tst-x86_64mod-1.so
+ifneq (no,$(have-tunables))
+# Test the state size for XSAVE when XSAVEC is disabled.
+tst-x86_64-1-ENV = GLIBC_TUNABLES=glibc.tune.hwcaps=-XSAVEC_Usable
+endif
+
+$(objpfx)tst-x86_64-1: $(objpfx)x86_64/tst-x86_64mod-1.so
+
+ifneq (no,$(have-tunables))
+tests += tst-platform-1
+modules-names += tst-platformmod-1 x86_64/tst-platformmod-2
+CFLAGS-tst-platform-1.c = -mno-avx
+CFLAGS-tst-platformmod-1.c = -mno-avx
+CFLAGS-tst-platformmod-2.c = -mno-avx
+LDFLAGS-tst-platformmod-2.so = -Wl,-soname,tst-platformmod-2.so
+$(objpfx)tst-platform-1: $(objpfx)tst-platformmod-1.so
+$(objpfx)tst-platform-1.out: $(objpfx)x86_64/tst-platformmod-2.so
+# Turn off AVX512F_Usable and AVX2_Usable so that GLRO(dl_platform) is
+# always set to x86_64.
+tst-platform-1-ENV = LD_PRELOAD=$(objpfx)\$$PLATFORM/tst-platformmod-2.so \
+ GLIBC_TUNABLES=glibc.tune.hwcaps=-AVX512F_Usable,-AVX2_Usable
+endif
+
+tests += tst-audit3 tst-audit4 tst-audit5 tst-audit6 tst-audit7 \
+ tst-audit10 tst-sse tst-avx tst-avx512
+test-extras += tst-audit4-aux tst-audit10-aux \
+ tst-avx-aux tst-avx512-aux
+extra-test-objs += tst-audit4-aux.o tst-audit10-aux.o \
+ tst-avx-aux.o tst-avx512-aux.o
+
+ifeq ($(have-insert),yes)
tests += tst-split-dynreloc
LDFLAGS-tst-split-dynreloc = -Wl,-T,$(..)sysdeps/x86_64/tst-split-dynreloc.lds
tst-split-dynreloc-ENV = LD_BIND_NOW=1
+endif
modules-names += tst-auditmod3a tst-auditmod3b \
tst-auditmod4a tst-auditmod4b \
tst-auditmod5a tst-auditmod5b \
tst-auditmod6a tst-auditmod6b tst-auditmod6c \
tst-auditmod7a tst-auditmod7b \
- tst-auditmod10a tst-auditmod10b
+ tst-auditmod10a tst-auditmod10b \
+ tst-ssemod tst-avxmod tst-avx512mod
$(objpfx)tst-audit3: $(objpfx)tst-auditmod3a.so
$(objpfx)tst-audit3.out: $(objpfx)tst-auditmod3b.so
tst-audit3-ENV = LD_AUDIT=$(objpfx)tst-auditmod3b.so
-$(objpfx)tst-audit4: $(objpfx)tst-auditmod4a.so
+$(objpfx)tst-audit4: $(objpfx)tst-audit4-aux.o $(objpfx)tst-auditmod4a.so
$(objpfx)tst-audit4.out: $(objpfx)tst-auditmod4b.so
tst-audit4-ENV = LD_AUDIT=$(objpfx)tst-auditmod4b.so
@@ -77,25 +119,49 @@ $(objpfx)tst-audit7: $(objpfx)tst-auditmod7a.so
$(objpfx)tst-audit7.out: $(objpfx)tst-auditmod7b.so
tst-audit7-ENV = LD_AUDIT=$(objpfx)tst-auditmod7b.so
-$(objpfx)tst-audit10: $(objpfx)tst-auditmod10a.so
+$(objpfx)tst-audit10: $(objpfx)tst-audit10-aux.o $(objpfx)tst-auditmod10a.so
$(objpfx)tst-audit10.out: $(objpfx)tst-auditmod10b.so
tst-audit10-ENV = LD_AUDIT=$(objpfx)tst-auditmod10b.so
+$(objpfx)tst-sse: $(objpfx)tst-ssemod.so
+$(objpfx)tst-avx: $(objpfx)tst-avx-aux.o $(objpfx)tst-avxmod.so
+$(objpfx)tst-avx512: $(objpfx)tst-avx512-aux.o $(objpfx)tst-avx512mod.so
+
AVX-CFLAGS=-mavx -mno-vzeroupper
-CFLAGS-tst-audit4.c += $(AVX-CFLAGS)
+CFLAGS-tst-audit4-aux.c += $(AVX-CFLAGS)
CFLAGS-tst-auditmod4a.c += $(AVX-CFLAGS)
CFLAGS-tst-auditmod4b.c += $(AVX-CFLAGS)
CFLAGS-tst-auditmod6b.c += $(AVX-CFLAGS)
CFLAGS-tst-auditmod6c.c += $(AVX-CFLAGS)
CFLAGS-tst-auditmod7b.c += $(AVX-CFLAGS)
+CFLAGS-tst-avx-aux.c += $(AVX-CFLAGS)
+CFLAGS-tst-avxmod.c += $(AVX-CFLAGS)
ifeq (yes,$(config-cflags-avx512))
AVX512-CFLAGS = -mavx512f
-CFLAGS-tst-audit10.c += $(AVX512-CFLAGS)
+CFLAGS-tst-audit10-aux.c += $(AVX512-CFLAGS)
CFLAGS-tst-auditmod10a.c += $(AVX512-CFLAGS)
CFLAGS-tst-auditmod10b.c += $(AVX512-CFLAGS)
+CFLAGS-tst-avx512-aux.c += $(AVX512-CFLAGS)
+CFLAGS-tst-avx512mod.c += $(AVX512-CFLAGS)
endif
endif
ifeq ($(subdir),csu)
-gen-as-const-headers += tlsdesc.sym
+gen-as-const-headers += tlsdesc.sym rtld-offsets.sym
endif
+
+$(objpfx)x86_64/tst-x86_64mod-1.os: $(objpfx)tst-x86_64mod-1.os
+ $(make-target-directory)
+ rm -f $@
+ ln $< $@
+
+do-tests-clean common-mostlyclean: tst-x86_64-1-clean
+
+.PHONY: tst-x86_64-1-clean
+tst-x86_64-1-clean:
+ -rm -rf $(objpfx)x86_64
+
+$(objpfx)x86_64/tst-platformmod-2.os: $(objpfx)tst-platformmod-2.os
+ $(make-target-directory)
+ rm -f $@
+ ln $< $@
diff --git a/sysdeps/x86_64/__longjmp.S b/sysdeps/x86_64/__longjmp.S
index c164626577..d7d123e4bc 100644
--- a/sysdeps/x86_64/__longjmp.S
+++ b/sysdeps/x86_64/__longjmp.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2001-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2001-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -17,9 +17,18 @@
#include <sysdep.h>
#include <jmpbuf-offsets.h>
+#include <jmp_buf-ssp.h>
#include <asm-syntax.h>
#include <stap-probe.h>
+/* Don't restore shadow stack register if
+ 1. Shadow stack isn't enabled. Or
+ 2. __longjmp is defined for __longjmp_cancel.
+ */
+#if !SHSTK_ENABLED || defined __longjmp
+# undef SHADOW_STACK_POINTER_OFFSET
+#endif
+
/* Jump to the position specified by ENV, causing the
setjmp call there to return VAL, or 1 if VAL is 0.
void __longjmp (__jmp_buf env, int val). */
@@ -42,6 +51,41 @@ ENTRY(__longjmp)
orq %rax, %r9
# endif
#endif
+#ifdef SHADOW_STACK_POINTER_OFFSET
+# if IS_IN (libc) && defined SHARED && defined FEATURE_1_OFFSET
+ /* Check if Shadow Stack is enabled. */
+ testl $X86_FEATURE_1_SHSTK, %fs:FEATURE_1_OFFSET
+ jz L(skip_ssp)
+# else
+ xorl %eax, %eax
+# endif
+ /* Check and adjust the Shadow-Stack-Pointer. */
+ /* Get the current ssp. */
+ rdsspq %rax
+ /* And compare it with the saved ssp value. */
+ subq SHADOW_STACK_POINTER_OFFSET(%rdi), %rax
+ je L(skip_ssp)
+ /* Count the number of frames to adjust and adjust it
+ with incssp instruction. The instruction can adjust
+ the ssp by [0..255] value only thus use a loop if
+ the number of frames is bigger than 255. */
+ negq %rax
+ shrq $3, %rax
+ /* NB: We saved Shadow-Stack-Pointer of setjmp. Since we are
+ restoring Shadow-Stack-Pointer of setjmp's caller, we
+ need to unwind shadow stack by one more frame. */
+ addq $1, %rax
+
+ movl $255, %ebx
+L(loop):
+ cmpq %rbx, %rax
+ cmovb %rax, %rbx
+ incsspq %rbx
+ subq %rbx, %rax
+ ja L(loop)
+
+L(skip_ssp):
+#endif
LIBC_PROBE (longjmp, 3, LP_SIZE@%RDI_LP, -4@%esi, LP_SIZE@%RDX_LP)
/* We add unwind information for the target here. */
cfi_def_cfa(%rdi, 0)
diff --git a/sysdeps/x86_64/_mcount.S b/sysdeps/x86_64/_mcount.S
index 5d7edd2a29..a2f4068b61 100644
--- a/sysdeps/x86_64/_mcount.S
+++ b/sysdeps/x86_64/_mcount.S
@@ -1,5 +1,5 @@
/* Machine-specific calling sequence for `mcount' profiling function. x86-64 version.
- Copyright (C) 2002-2016 Free Software Foundation, Inc.
+ Copyright (C) 2002-2018 Free Software Foundation, Inc.
Contributed by Andreas Jaeger <aj@suse.de>.
This file is part of the GNU C Library.
@@ -24,81 +24,102 @@
#include <sysdep.h>
- .globl C_SYMBOL_NAME(_mcount)
- .type C_SYMBOL_NAME(_mcount), @function
- .align ALIGNARG(4)
-C_LABEL(_mcount)
+ENTRY(_mcount)
/* Allocate space for 7 registers. */
subq $56,%rsp
+ cfi_adjust_cfa_offset (56)
movq %rax,(%rsp)
+ cfi_rel_offset (rax, 0)
movq %rcx,8(%rsp)
+ cfi_rel_offset (rcx, 8)
movq %rdx,16(%rsp)
+ cfi_rel_offset (rdx, 16)
movq %rsi,24(%rsp)
+ cfi_rel_offset (rsi, 24)
movq %rdi,32(%rsp)
+ cfi_rel_offset (rdi, 32)
movq %r8,40(%rsp)
+ cfi_rel_offset (r8, 40)
movq %r9,48(%rsp)
+ cfi_rel_offset (r9, 48)
/* Setup parameter for __mcount_internal. */
/* selfpc is the return address on the stack. */
movq 56(%rsp),%rsi
/* Get frompc via the frame pointer. */
movq 8(%rbp),%rdi
-#ifdef PIC
- call C_SYMBOL_NAME(__mcount_internal)@PLT
-#else
call C_SYMBOL_NAME(__mcount_internal)
-#endif
/* Pop the saved registers. Please note that `mcount' has no
return value. */
movq 48(%rsp),%r9
+ cfi_restore (r9)
movq 40(%rsp),%r8
+ cfi_restore (r8)
movq 32(%rsp),%rdi
+ cfi_restore (rdi)
movq 24(%rsp),%rsi
+ cfi_restore (rsi)
movq 16(%rsp),%rdx
+ cfi_restore (rdx)
movq 8(%rsp),%rcx
+ cfi_restore (rcx)
movq (%rsp),%rax
+ cfi_restore (rax)
addq $56,%rsp
+ cfi_adjust_cfa_offset (-56)
ret
-
- ASM_SIZE_DIRECTIVE(C_SYMBOL_NAME(_mcount))
+END(_mcount)
#undef mcount
weak_alias (_mcount, mcount)
- .globl C_SYMBOL_NAME(__fentry__)
- .type C_SYMBOL_NAME(__fentry__), @function
- .align ALIGNARG(4)
-C_LABEL(__fentry__)
- /* Allocate space for 7 registers. */
+/* __fentry__ is different from _mcount in that it is called before
+ function prolog. This means (among other things) that it has non-standard
+ stack alignment on entry: (%RSP & 0xF) == 0. */
+
+ENTRY(__fentry__)
+ /* Allocate space for 7 registers
+ (+8 bytes for proper stack alignment). */
subq $64,%rsp
+ cfi_adjust_cfa_offset (64)
movq %rax,(%rsp)
+ cfi_rel_offset (rax, 0)
movq %rcx,8(%rsp)
+ cfi_rel_offset (rcx, 8)
movq %rdx,16(%rsp)
+ cfi_rel_offset (rdx, 16)
movq %rsi,24(%rsp)
+ cfi_rel_offset (rsi, 24)
movq %rdi,32(%rsp)
+ cfi_rel_offset (rdi, 32)
movq %r8,40(%rsp)
+ cfi_rel_offset (r8, 40)
movq %r9,48(%rsp)
+ cfi_rel_offset (r9, 48)
/* Setup parameter for __mcount_internal. */
/* selfpc is the return address on the stack. */
movq 64(%rsp),%rsi
/* caller is the return address above it */
movq 72(%rsp),%rdi
-#ifdef PIC
- call C_SYMBOL_NAME(__mcount_internal)@PLT
-#else
call C_SYMBOL_NAME(__mcount_internal)
-#endif
/* Pop the saved registers. Please note that `__fentry__' has no
return value. */
movq 48(%rsp),%r9
+ cfi_restore (r9)
movq 40(%rsp),%r8
+ cfi_restore (r8)
movq 32(%rsp),%rdi
+ cfi_restore (rdi)
movq 24(%rsp),%rsi
+ cfi_restore (rsi)
movq 16(%rsp),%rdx
+ cfi_restore (rdx)
movq 8(%rsp),%rcx
+ cfi_restore (rcx)
movq (%rsp),%rax
+ cfi_restore (rax)
addq $64,%rsp
+ cfi_adjust_cfa_offset (-64)
ret
-
- ASM_SIZE_DIRECTIVE(C_SYMBOL_NAME(__fentry__))
+END(__fentry__)
diff --git a/sysdeps/x86_64/add_n.S b/sysdeps/x86_64/add_n.S
index fc99811476..ba9699a2f0 100644
--- a/sysdeps/x86_64/add_n.S
+++ b/sysdeps/x86_64/add_n.S
@@ -1,6 +1,6 @@
/* x86-64 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
sum in a third limb vector.
- Copyright (C) 2006-2016 Free Software Foundation, Inc.
+ Copyright (C) 2006-2018 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
diff --git a/sysdeps/x86_64/addmul_1.S b/sysdeps/x86_64/addmul_1.S
index ab7c2fa701..eefe8004b0 100644
--- a/sysdeps/x86_64/addmul_1.S
+++ b/sysdeps/x86_64/addmul_1.S
@@ -1,6 +1,6 @@
/* x86-64 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
the result to a second limb vector.
- Copyright (C) 2003-2016 Free Software Foundation, Inc.
+ Copyright (C) 2003-2018 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
diff --git a/sysdeps/x86_64/atomic-machine.h b/sysdeps/x86_64/atomic-machine.h
index a5b86eb3ce..9d31c64962 100644
--- a/sysdeps/x86_64/atomic-machine.h
+++ b/sysdeps/x86_64/atomic-machine.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@@ -16,10 +16,12 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <stdint.h>
-#include <tls.h> /* For tcbhead_t. */
-#include <libc-internal.h>
+#ifndef _X86_64_ATOMIC_MACHINE_H
+#define _X86_64_ATOMIC_MACHINE_H 1
+#include <stdint.h>
+#include <tls.h> /* For tcbhead_t. */
+#include <libc-pointer-arith.h> /* For cast_to_integer. */
typedef int8_t atomic8_t;
typedef uint8_t uatomic8_t;
@@ -57,6 +59,7 @@ typedef uintmax_t uatomic_max_t;
#define __HAVE_64B_ATOMICS 1
#define USE_ATOMIC_COMPILER_BUILTINS 1
+#define ATOMIC_EXCHANGE_USES_CAS 0
#define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \
__sync_val_compare_and_swap (mem, oldval, newval)
@@ -475,3 +478,5 @@ typedef uintmax_t uatomic_max_t;
__asm __volatile (LOCK_PREFIX "orl $0, (%%rsp)" ::: "memory")
#define atomic_read_barrier() __asm ("" ::: "memory")
#define atomic_write_barrier() __asm ("" ::: "memory")
+
+#endif /* atomic-machine.h */
diff --git a/sysdeps/x86_64/backtrace.c b/sysdeps/x86_64/backtrace.c
deleted file mode 100644
index e04407c516..0000000000
--- a/sysdeps/x86_64/backtrace.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/* Return backtrace of current program state.
- Copyright (C) 2003-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Jakub Jelinek <jakub@redhat.com>, 2003.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <libc-lock.h>
-#include <dlfcn.h>
-#include <execinfo.h>
-#include <stdlib.h>
-#include <unwind.h>
-
-struct trace_arg
-{
- void **array;
- _Unwind_Word cfa;
- int cnt;
- int size;
-};
-
-#ifdef SHARED
-static _Unwind_Reason_Code (*unwind_backtrace) (_Unwind_Trace_Fn, void *);
-static _Unwind_Ptr (*unwind_getip) (struct _Unwind_Context *);
-static _Unwind_Word (*unwind_getcfa) (struct _Unwind_Context *);
-static void *libgcc_handle;
-
-
-/* Dummy version in case libgcc_s does not contain the real code. */
-static _Unwind_Word
-dummy_getcfa (struct _Unwind_Context *ctx __attribute__ ((unused)))
-{
- return 0;
-}
-
-
-static void
-init (void)
-{
- libgcc_handle = __libc_dlopen ("libgcc_s.so.1");
-
- if (libgcc_handle == NULL)
- return;
-
- unwind_backtrace = __libc_dlsym (libgcc_handle, "_Unwind_Backtrace");
- unwind_getip = __libc_dlsym (libgcc_handle, "_Unwind_GetIP");
- if (unwind_getip == NULL)
- unwind_backtrace = NULL;
- unwind_getcfa = (__libc_dlsym (libgcc_handle, "_Unwind_GetCFA")
- ?: dummy_getcfa);
-}
-#else
-# define unwind_backtrace _Unwind_Backtrace
-# define unwind_getip _Unwind_GetIP
-# define unwind_getcfa _Unwind_GetCFA
-#endif
-
-static _Unwind_Reason_Code
-backtrace_helper (struct _Unwind_Context *ctx, void *a)
-{
- struct trace_arg *arg = a;
-
- /* We are first called with address in the __backtrace function.
- Skip it. */
- if (arg->cnt != -1)
- {
- arg->array[arg->cnt] = (void *) unwind_getip (ctx);
-
- /* Check whether we make any progress. */
- _Unwind_Word cfa = unwind_getcfa (ctx);
-
- if (arg->cnt > 0 && arg->array[arg->cnt - 1] == arg->array[arg->cnt]
- && cfa == arg->cfa)
- return _URC_END_OF_STACK;
- arg->cfa = cfa;
- }
- if (++arg->cnt == arg->size)
- return _URC_END_OF_STACK;
- return _URC_NO_REASON;
-}
-
-int
-__backtrace (void **array, int size)
-{
- struct trace_arg arg = { .array = array, .cfa = 0, .size = size, .cnt = -1 };
-
- if (size <= 0)
- return 0;
-
-#ifdef SHARED
- __libc_once_define (static, once);
-
- __libc_once (once, init);
- if (unwind_backtrace == NULL)
- return 0;
-#endif
-
- unwind_backtrace (backtrace_helper, &arg);
-
- /* _Unwind_Backtrace seems to put NULL address above
- _start. Fix it up here. */
- if (arg.cnt > 1 && arg.array[arg.cnt - 1] == NULL)
- --arg.cnt;
- return arg.cnt != -1 ? arg.cnt : 0;
-}
-weak_alias (__backtrace, backtrace)
-libc_hidden_def (__backtrace)
-
-
-#ifdef SHARED
-/* Free all resources if necessary. */
-libc_freeres_fn (free_mem)
-{
- unwind_backtrace = NULL;
- if (libgcc_handle != NULL)
- {
- __libc_dlclose (libgcc_handle);
- libgcc_handle = NULL;
- }
-}
-#endif
diff --git a/sysdeps/x86_64/bsd-_setjmp.S b/sysdeps/x86_64/bsd-_setjmp.S
index 1a2a94f1a6..58c997de59 100644
--- a/sysdeps/x86_64/bsd-_setjmp.S
+++ b/sysdeps/x86_64/bsd-_setjmp.S
@@ -1,5 +1,5 @@
/* BSD `_setjmp' entry point to `sigsetjmp (..., 0)'. x86-64 version.
- Copyright (C) 1994-2016 Free Software Foundation, Inc.
+ Copyright (C) 1994-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/bsd-setjmp.S b/sysdeps/x86_64/bsd-setjmp.S
index 11d9d8daa0..8e3c430dac 100644
--- a/sysdeps/x86_64/bsd-setjmp.S
+++ b/sysdeps/x86_64/bsd-setjmp.S
@@ -1,5 +1,5 @@
/* BSD `setjmp' entry point to `sigsetjmp (..., 1)'. x86-64 version.
- Copyright (C) 1994-2016 Free Software Foundation, Inc.
+ Copyright (C) 1994-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/cacheinfo.c b/sysdeps/x86_64/cacheinfo.c
deleted file mode 100644
index 96463df064..0000000000
--- a/sysdeps/x86_64/cacheinfo.c
+++ /dev/null
@@ -1,665 +0,0 @@
-/* x86_64 cache info.
- Copyright (C) 2003-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <assert.h>
-#include <stdbool.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <cpuid.h>
-#include <init-arch.h>
-
-#define is_intel GLRO(dl_x86_cpu_features).kind == arch_kind_intel
-#define is_amd GLRO(dl_x86_cpu_features).kind == arch_kind_amd
-#define max_cpuid GLRO(dl_x86_cpu_features).max_cpuid
-
-static const struct intel_02_cache_info
-{
- unsigned char idx;
- unsigned char assoc;
- unsigned char linesize;
- unsigned char rel_name;
- unsigned int size;
-} intel_02_known [] =
- {
-#define M(sc) ((sc) - _SC_LEVEL1_ICACHE_SIZE)
- { 0x06, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 8192 },
- { 0x08, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 16384 },
- { 0x09, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 32768 },
- { 0x0a, 2, 32, M(_SC_LEVEL1_DCACHE_SIZE), 8192 },
- { 0x0c, 4, 32, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
- { 0x0d, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
- { 0x0e, 6, 64, M(_SC_LEVEL1_DCACHE_SIZE), 24576 },
- { 0x21, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
- { 0x22, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 },
- { 0x23, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
- { 0x25, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
- { 0x29, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
- { 0x2c, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 },
- { 0x30, 8, 64, M(_SC_LEVEL1_ICACHE_SIZE), 32768 },
- { 0x39, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
- { 0x3a, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 196608 },
- { 0x3b, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
- { 0x3c, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
- { 0x3d, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 393216 },
- { 0x3e, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
- { 0x3f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
- { 0x41, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
- { 0x42, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
- { 0x43, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
- { 0x44, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
- { 0x45, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
- { 0x46, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
- { 0x47, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
- { 0x48, 12, 64, M(_SC_LEVEL2_CACHE_SIZE), 3145728 },
- { 0x49, 16, 64, M(_SC_LEVEL2_CACHE_SIZE), 4194304 },
- { 0x4a, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 6291456 },
- { 0x4b, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
- { 0x4c, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
- { 0x4d, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 16777216 },
- { 0x4e, 24, 64, M(_SC_LEVEL2_CACHE_SIZE), 6291456 },
- { 0x60, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
- { 0x66, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 8192 },
- { 0x67, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
- { 0x68, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 },
- { 0x78, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
- { 0x79, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
- { 0x7a, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
- { 0x7b, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
- { 0x7c, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
- { 0x7d, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
- { 0x7f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
- { 0x80, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
- { 0x82, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
- { 0x83, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
- { 0x84, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
- { 0x85, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
- { 0x86, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
- { 0x87, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
- { 0xd0, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 },
- { 0xd1, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
- { 0xd2, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
- { 0xd6, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
- { 0xd7, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
- { 0xd8, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
- { 0xdc, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
- { 0xdd, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
- { 0xde, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
- { 0xe2, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
- { 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
- { 0xe4, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
- { 0xea, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
- { 0xeb, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 18874368 },
- { 0xec, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 25165824 },
- };
-
-#define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
-
-static int
-intel_02_known_compare (const void *p1, const void *p2)
-{
- const struct intel_02_cache_info *i1;
- const struct intel_02_cache_info *i2;
-
- i1 = (const struct intel_02_cache_info *) p1;
- i2 = (const struct intel_02_cache_info *) p2;
-
- if (i1->idx == i2->idx)
- return 0;
-
- return i1->idx < i2->idx ? -1 : 1;
-}
-
-
-static long int
-__attribute__ ((noinline))
-intel_check_word (int name, unsigned int value, bool *has_level_2,
- bool *no_level_2_or_3)
-{
- if ((value & 0x80000000) != 0)
- /* The register value is reserved. */
- return 0;
-
- /* Fold the name. The _SC_ constants are always in the order SIZE,
- ASSOC, LINESIZE. */
- int folded_rel_name = (M(name) / 3) * 3;
-
- while (value != 0)
- {
- unsigned int byte = value & 0xff;
-
- if (byte == 0x40)
- {
- *no_level_2_or_3 = true;
-
- if (folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
- /* No need to look further. */
- break;
- }
- else if (byte == 0xff)
- {
- /* CPUID leaf 0x4 contains all the information. We need to
- iterate over it. */
- unsigned int eax;
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
-
- unsigned int round = 0;
- while (1)
- {
- __cpuid_count (4, round, eax, ebx, ecx, edx);
-
- enum { null = 0, data = 1, inst = 2, uni = 3 } type = eax & 0x1f;
- if (type == null)
- /* That was the end. */
- break;
-
- unsigned int level = (eax >> 5) & 0x7;
-
- if ((level == 1 && type == data
- && folded_rel_name == M(_SC_LEVEL1_DCACHE_SIZE))
- || (level == 1 && type == inst
- && folded_rel_name == M(_SC_LEVEL1_ICACHE_SIZE))
- || (level == 2 && folded_rel_name == M(_SC_LEVEL2_CACHE_SIZE))
- || (level == 3 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
- || (level == 4 && folded_rel_name == M(_SC_LEVEL4_CACHE_SIZE)))
- {
- unsigned int offset = M(name) - folded_rel_name;
-
- if (offset == 0)
- /* Cache size. */
- return (((ebx >> 22) + 1)
- * (((ebx >> 12) & 0x3ff) + 1)
- * ((ebx & 0xfff) + 1)
- * (ecx + 1));
- if (offset == 1)
- return (ebx >> 22) + 1;
-
- assert (offset == 2);
- return (ebx & 0xfff) + 1;
- }
-
- ++round;
- }
- /* There is no other cache information anywhere else. */
- break;
- }
- else
- {
- if (byte == 0x49 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
- {
- /* Intel reused this value. For family 15, model 6 it
- specifies the 3rd level cache. Otherwise the 2nd
- level cache. */
- unsigned int family = GLRO(dl_x86_cpu_features).family;
- unsigned int model = GLRO(dl_x86_cpu_features).model;
-
- if (family == 15 && model == 6)
- {
- /* The level 3 cache is encoded for this model like
- the level 2 cache is for other models. Pretend
- the caller asked for the level 2 cache. */
- name = (_SC_LEVEL2_CACHE_SIZE
- + (name - _SC_LEVEL3_CACHE_SIZE));
- folded_rel_name = M(_SC_LEVEL2_CACHE_SIZE);
- }
- }
-
- struct intel_02_cache_info *found;
- struct intel_02_cache_info search;
-
- search.idx = byte;
- found = bsearch (&search, intel_02_known, nintel_02_known,
- sizeof (intel_02_known[0]), intel_02_known_compare);
- if (found != NULL)
- {
- if (found->rel_name == folded_rel_name)
- {
- unsigned int offset = M(name) - folded_rel_name;
-
- if (offset == 0)
- /* Cache size. */
- return found->size;
- if (offset == 1)
- return found->assoc;
-
- assert (offset == 2);
- return found->linesize;
- }
-
- if (found->rel_name == M(_SC_LEVEL2_CACHE_SIZE))
- *has_level_2 = true;
- }
- }
-
- /* Next byte for the next round. */
- value >>= 8;
- }
-
- /* Nothing found. */
- return 0;
-}
-
-
-static long int __attribute__ ((noinline))
-handle_intel (int name, unsigned int maxidx)
-{
- assert (maxidx >= 2);
-
- /* OK, we can use the CPUID instruction to get all info about the
- caches. */
- unsigned int cnt = 0;
- unsigned int max = 1;
- long int result = 0;
- bool no_level_2_or_3 = false;
- bool has_level_2 = false;
-
- while (cnt++ < max)
- {
- unsigned int eax;
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
- __cpuid (2, eax, ebx, ecx, edx);
-
- /* The low byte of EAX in the first round contain the number of
- rounds we have to make. At least one, the one we are already
- doing. */
- if (cnt == 1)
- {
- max = eax & 0xff;
- eax &= 0xffffff00;
- }
-
- /* Process the individual registers' value. */
- result = intel_check_word (name, eax, &has_level_2, &no_level_2_or_3);
- if (result != 0)
- return result;
-
- result = intel_check_word (name, ebx, &has_level_2, &no_level_2_or_3);
- if (result != 0)
- return result;
-
- result = intel_check_word (name, ecx, &has_level_2, &no_level_2_or_3);
- if (result != 0)
- return result;
-
- result = intel_check_word (name, edx, &has_level_2, &no_level_2_or_3);
- if (result != 0)
- return result;
- }
-
- if (name >= _SC_LEVEL2_CACHE_SIZE && name <= _SC_LEVEL3_CACHE_LINESIZE
- && no_level_2_or_3)
- return -1;
-
- return 0;
-}
-
-
-static long int __attribute__ ((noinline))
-handle_amd (int name)
-{
- unsigned int eax;
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
- __cpuid (0x80000000, eax, ebx, ecx, edx);
-
- /* No level 4 cache (yet). */
- if (name > _SC_LEVEL3_CACHE_LINESIZE)
- return 0;
-
- unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
- if (eax < fn)
- return 0;
-
- __cpuid (fn, eax, ebx, ecx, edx);
-
- if (name < _SC_LEVEL1_DCACHE_SIZE)
- {
- name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
- ecx = edx;
- }
-
- switch (name)
- {
- case _SC_LEVEL1_DCACHE_SIZE:
- return (ecx >> 14) & 0x3fc00;
-
- case _SC_LEVEL1_DCACHE_ASSOC:
- ecx >>= 16;
- if ((ecx & 0xff) == 0xff)
- /* Fully associative. */
- return (ecx << 2) & 0x3fc00;
- return ecx & 0xff;
-
- case _SC_LEVEL1_DCACHE_LINESIZE:
- return ecx & 0xff;
-
- case _SC_LEVEL2_CACHE_SIZE:
- return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
-
- case _SC_LEVEL2_CACHE_ASSOC:
- switch ((ecx >> 12) & 0xf)
- {
- case 0:
- case 1:
- case 2:
- case 4:
- return (ecx >> 12) & 0xf;
- case 6:
- return 8;
- case 8:
- return 16;
- case 10:
- return 32;
- case 11:
- return 48;
- case 12:
- return 64;
- case 13:
- return 96;
- case 14:
- return 128;
- case 15:
- return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
- default:
- return 0;
- }
- /* NOTREACHED */
-
- case _SC_LEVEL2_CACHE_LINESIZE:
- return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
-
- case _SC_LEVEL3_CACHE_SIZE:
- return (edx & 0xf000) == 0 ? 0 : (edx & 0x3ffc0000) << 1;
-
- case _SC_LEVEL3_CACHE_ASSOC:
- switch ((edx >> 12) & 0xf)
- {
- case 0:
- case 1:
- case 2:
- case 4:
- return (edx >> 12) & 0xf;
- case 6:
- return 8;
- case 8:
- return 16;
- case 10:
- return 32;
- case 11:
- return 48;
- case 12:
- return 64;
- case 13:
- return 96;
- case 14:
- return 128;
- case 15:
- return ((edx & 0x3ffc0000) << 1) / (edx & 0xff);
- default:
- return 0;
- }
- /* NOTREACHED */
-
- case _SC_LEVEL3_CACHE_LINESIZE:
- return (edx & 0xf000) == 0 ? 0 : edx & 0xff;
-
- default:
- assert (! "cannot happen");
- }
- return -1;
-}
-
-
-/* Get the value of the system variable NAME. */
-long int
-attribute_hidden
-__cache_sysconf (int name)
-{
- if (is_intel)
- return handle_intel (name, max_cpuid);
-
- if (is_amd)
- return handle_amd (name);
-
- // XXX Fill in more vendors.
-
- /* CPU not known, we have no information. */
- return 0;
-}
-
-
-/* Data cache size for use in memory and string routines, typically
- L1 size, rounded to multiple of 256 bytes. */
-long int __x86_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
-long int __x86_data_cache_size attribute_hidden = 32 * 1024;
-/* Similar to __x86_data_cache_size_half, but not rounded. */
-long int __x86_raw_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
-/* Similar to __x86_data_cache_size, but not rounded. */
-long int __x86_raw_data_cache_size attribute_hidden = 32 * 1024;
-/* Shared cache size for use in memory and string routines, typically
- L2 or L3 size, rounded to multiple of 256 bytes. */
-long int __x86_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
-long int __x86_shared_cache_size attribute_hidden = 1024 * 1024;
-/* Similar to __x86_shared_cache_size_half, but not rounded. */
-long int __x86_raw_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
-/* Similar to __x86_shared_cache_size, but not rounded. */
-long int __x86_raw_shared_cache_size attribute_hidden = 1024 * 1024;
-
-#ifndef DISABLE_PREFETCHW
-/* PREFETCHW support flag for use in memory and string routines. */
-int __x86_prefetchw attribute_hidden;
-#endif
-
-
-static void
-__attribute__((constructor))
-init_cacheinfo (void)
-{
- /* Find out what brand of processor. */
- unsigned int eax;
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
- int max_cpuid_ex;
- long int data = -1;
- long int shared = -1;
- unsigned int level;
- unsigned int threads = 0;
-
- if (is_intel)
- {
- data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid);
-
- /* Try L3 first. */
- level = 3;
- shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid);
-
- if (shared <= 0)
- {
- /* Try L2 otherwise. */
- level = 2;
- shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
- }
-
- /* Figure out the number of logical threads that share the
- highest cache level. */
- if (max_cpuid >= 4)
- {
- unsigned int family = GLRO(dl_x86_cpu_features).family;
- unsigned int model = GLRO(dl_x86_cpu_features).model;
-
- int i = 0;
-
- /* Query until desired cache level is enumerated. */
- do
- {
- __cpuid_count (4, i++, eax, ebx, ecx, edx);
-
- /* There seems to be a bug in at least some Pentium Ds
- which sometimes fail to iterate all cache parameters.
- Do not loop indefinitely here, stop in this case and
- assume there is no such information. */
- if ((eax & 0x1f) == 0)
- goto intel_bug_no_cache_info;
- }
- while (((eax >> 5) & 0x7) != level);
-
- threads = (eax >> 14) & 0x3ff;
-
- /* If max_cpuid >= 11, THREADS is the maximum number of
- addressable IDs for logical processors sharing the
- cache, instead of the maximum number of threads
- sharing the cache. */
- if (threads && max_cpuid >= 11)
- {
- /* Find the number of logical processors shipped in
- one core and apply count mask. */
- i = 0;
- while (1)
- {
- __cpuid_count (11, i++, eax, ebx, ecx, edx);
-
- int shipped = ebx & 0xff;
- int type = ecx & 0xff0;
- if (shipped == 0 || type == 0)
- break;
- else if (type == 0x200)
- {
- int count_mask;
-
- /* Compute count mask. */
- asm ("bsr %1, %0"
- : "=r" (count_mask) : "g" (threads));
- count_mask = ~(-1 << (count_mask + 1));
- threads = (shipped - 1) & count_mask;
- break;
- }
- }
- }
- threads += 1;
- if (threads > 2 && level == 2 && family == 6)
- {
- switch (model)
- {
- case 0x57:
- /* Knights Landing has L2 cache shared by 2 cores. */
- case 0x37:
- case 0x4a:
- case 0x4d:
- case 0x5a:
- case 0x5d:
- /* Silvermont has L2 cache shared by 2 cores. */
- threads = 2;
- break;
- default:
- break;
- }
- }
- }
- else
- {
- intel_bug_no_cache_info:
- /* Assume that all logical threads share the highest cache level. */
-
- threads
- = ((GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].ebx
- >> 16) & 0xff);
- }
-
- /* Cap usage of highest cache level to the number of supported
- threads. */
- if (shared > 0 && threads > 0)
- shared /= threads;
- }
- /* This spells out "AuthenticAMD". */
- else if (is_amd)
- {
- data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
- long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
- shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
-
- /* Get maximum extended function. */
- __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
-
- if (shared <= 0)
- /* No shared L3 cache. All we have is the L2 cache. */
- shared = core;
- else
- {
- /* Figure out the number of logical threads that share L3. */
- if (max_cpuid_ex >= 0x80000008)
- {
- /* Get width of APIC ID. */
- __cpuid (0x80000008, max_cpuid_ex, ebx, ecx, edx);
- threads = 1 << ((ecx >> 12) & 0x0f);
- }
-
- if (threads == 0)
- {
- /* If APIC ID width is not available, use logical
- processor count. */
- __cpuid (0x00000001, max_cpuid_ex, ebx, ecx, edx);
-
- if ((edx & (1 << 28)) != 0)
- threads = (ebx >> 16) & 0xff;
- }
-
- /* Cap usage of highest cache level to the number of
- supported threads. */
- if (threads > 0)
- shared /= threads;
-
- /* Account for exclusive L2 and L3 caches. */
- shared += core;
- }
-
-#ifndef DISABLE_PREFETCHW
- if (max_cpuid_ex >= 0x80000001)
- {
- __cpuid (0x80000001, eax, ebx, ecx, edx);
- /* PREFETCHW || 3DNow! */
- if ((ecx & 0x100) || (edx & 0x80000000))
- __x86_prefetchw = -1;
- }
-#endif
- }
-
- if (data > 0)
- {
- __x86_raw_data_cache_size_half = data / 2;
- __x86_raw_data_cache_size = data;
- /* Round data cache size to multiple of 256 bytes. */
- data = data & ~255L;
- __x86_data_cache_size_half = data / 2;
- __x86_data_cache_size = data;
- }
-
- if (shared > 0)
- {
- __x86_raw_shared_cache_size_half = shared / 2;
- __x86_raw_shared_cache_size = shared;
- /* Round shared cache size to multiple of 256 bytes. */
- shared = shared & ~255L;
- __x86_shared_cache_size_half = shared / 2;
- __x86_shared_cache_size = shared;
- }
-}
diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure
index c72b9d3184..8674d14569 100644
--- a/sysdeps/x86_64/configure
+++ b/sysdeps/x86_64/configure
@@ -1,13 +1,12 @@
# This file is generated from configure.ac by Autoconf. DO NOT EDIT!
# Local configure fragment for sysdeps/x86_64.
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX512 support in assembler" >&5
-$as_echo_n "checking for AVX512 support in assembler... " >&6; }
-if ${libc_cv_asm_avx512+:} false; then :
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX512DQ support in assembler" >&5
+$as_echo_n "checking for AVX512DQ support in assembler... " >&6; }
+if ${libc_cv_asm_avx512dq+:} false; then :
$as_echo_n "(cached) " >&6
else
cat > conftest.s <<\EOF
- vmovdqu64 %zmm0, (%rsp)
vandpd (%rax), %zmm6, %zmm1
EOF
if { ac_try='${CC-cc} -c $ASFLAGS conftest.s 1>&5'
@@ -16,16 +15,16 @@ if { ac_try='${CC-cc} -c $ASFLAGS conftest.s 1>&5'
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }; then
- libc_cv_asm_avx512=yes
+ libc_cv_asm_avx512dq=yes
else
- libc_cv_asm_avx512=no
+ libc_cv_asm_avx512dq=no
fi
rm -f conftest*
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_avx512" >&5
-$as_echo "$libc_cv_asm_avx512" >&6; }
-if test $libc_cv_asm_avx512 == yes; then
- $as_echo "#define HAVE_AVX512_ASM_SUPPORT 1" >>confdefs.h
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_avx512dq" >&5
+$as_echo "$libc_cv_asm_avx512dq" >&6; }
+if test $libc_cv_asm_avx512dq = yes; then
+ $as_echo "#define HAVE_AVX512DQ_ASM_SUPPORT 1" >>confdefs.h
fi
@@ -40,7 +39,7 @@ else
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }; then :
- libc_cv_cc_avx512=$libc_cv_asm_avx512
+ libc_cv_cc_avx512=$libc_cv_asm_avx512dq
else
libc_cv_cc_avx512=no
fi
@@ -77,7 +76,7 @@ rm -f conftest*
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_mpx" >&5
$as_echo "$libc_cv_asm_mpx" >&6; }
-if test $libc_cv_asm_mpx == yes; then
+if test $libc_cv_asm_mpx = yes; then
$as_echo "#define HAVE_MPX_SUPPORT 1" >>confdefs.h
fi
@@ -86,6 +85,41 @@ if test x"$build_mathvec" = xnotset; then
build_mathvec=yes
fi
+if test "$static_pie" = yes; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for linker static PIE support" >&5
+$as_echo_n "checking for linker static PIE support... " >&6; }
+if ${libc_cv_ld_static_pie+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat > conftest.s <<\EOF
+ .text
+ .global _start
+ .weak foo
+_start:
+ leaq foo(%rip), %rax
+EOF
+ libc_cv_pie_option="-Wl,-pie"
+ if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostartfiles -nostdlib $no_ssp $libc_cv_pie_option -o conftest conftest.s 1>&5'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then
+ libc_cv_ld_static_pie=yes
+ else
+ libc_cv_ld_static_pie=no
+ fi
+rm -f conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_ld_static_pie" >&5
+$as_echo "$libc_cv_ld_static_pie" >&6; }
+ if test "$libc_cv_ld_static_pie" != yes; then
+ as_fn_error $? "linker support for static PIE needed" "$LINENO" 5
+ fi
+fi
+
$as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h
-# work around problem with autoconf and empty lines at the end of files
+
+test -n "$critic_missing" && as_fn_error $? "
+*** $critic_missing" "$LINENO" 5
diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac
index 37b1059af3..b7d2c0124f 100644
--- a/sysdeps/x86_64/configure.ac
+++ b/sysdeps/x86_64/configure.ac
@@ -1,25 +1,24 @@
GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
# Local configure fragment for sysdeps/x86_64.
-dnl Check if asm supports AVX512.
-AC_CACHE_CHECK(for AVX512 support in assembler, libc_cv_asm_avx512, [dnl
+dnl Check if asm supports AVX512DQ.
+AC_CACHE_CHECK(for AVX512DQ support in assembler, libc_cv_asm_avx512dq, [dnl
cat > conftest.s <<\EOF
- vmovdqu64 %zmm0, (%rsp)
vandpd (%rax), %zmm6, %zmm1
EOF
if AC_TRY_COMMAND(${CC-cc} -c $ASFLAGS conftest.s 1>&AS_MESSAGE_LOG_FD); then
- libc_cv_asm_avx512=yes
+ libc_cv_asm_avx512dq=yes
else
- libc_cv_asm_avx512=no
+ libc_cv_asm_avx512dq=no
fi
rm -f conftest*])
-if test $libc_cv_asm_avx512 == yes; then
- AC_DEFINE(HAVE_AVX512_ASM_SUPPORT)
+if test $libc_cv_asm_avx512dq = yes; then
+ AC_DEFINE(HAVE_AVX512DQ_ASM_SUPPORT)
fi
dnl Check if -mavx512f works.
AC_CACHE_CHECK(for AVX512 support, libc_cv_cc_avx512, [dnl
-LIBC_TRY_CC_OPTION([-mavx512f], [libc_cv_cc_avx512=$libc_cv_asm_avx512], [libc_cv_cc_avx512=no])
+LIBC_TRY_CC_OPTION([-mavx512f], [libc_cv_cc_avx512=$libc_cv_asm_avx512dq], [libc_cv_cc_avx512=no])
])
if test $libc_cv_cc_avx512 = yes; then
AC_DEFINE(HAVE_AVX512_SUPPORT)
@@ -37,7 +36,7 @@ else
libc_cv_asm_mpx=no
fi
rm -f conftest*])
-if test $libc_cv_asm_mpx == yes; then
+if test $libc_cv_asm_mpx = yes; then
AC_DEFINE(HAVE_MPX_SUPPORT)
fi
@@ -45,7 +44,34 @@ if test x"$build_mathvec" = xnotset; then
build_mathvec=yes
fi
+dnl Check if linker supports static PIE with the fix for
+dnl
+dnl https://sourceware.org/bugzilla/show_bug.cgi?id=21782
+dnl
+if test "$static_pie" = yes; then
+ AC_CACHE_CHECK(for linker static PIE support, libc_cv_ld_static_pie, [dnl
+cat > conftest.s <<\EOF
+ .text
+ .global _start
+ .weak foo
+_start:
+ leaq foo(%rip), %rax
+EOF
+ libc_cv_pie_option="-Wl,-pie"
+ if AC_TRY_COMMAND(${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostartfiles -nostdlib $no_ssp $libc_cv_pie_option -o conftest conftest.s 1>&AS_MESSAGE_LOG_FD); then
+ libc_cv_ld_static_pie=yes
+ else
+ libc_cv_ld_static_pie=no
+ fi
+rm -f conftest*])
+ if test "$libc_cv_ld_static_pie" != yes; then
+ AC_MSG_ERROR([linker support for static PIE needed])
+ fi
+fi
+
dnl It is always possible to access static and hidden symbols in an
dnl position independent way.
AC_DEFINE(PI_STATIC_AND_HIDDEN)
-# work around problem with autoconf and empty lines at the end of files
+
+test -n "$critic_missing" && AC_MSG_ERROR([
+*** $critic_missing])
diff --git a/sysdeps/x86_64/crti.S b/sysdeps/x86_64/crti.S
index a34525974a..067ac14884 100644
--- a/sysdeps/x86_64/crti.S
+++ b/sysdeps/x86_64/crti.S
@@ -1,5 +1,5 @@
/* Special .init and .fini section support for x86-64.
- Copyright (C) 2012-2016 Free Software Foundation, Inc.
+ Copyright (C) 2012-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -58,15 +58,17 @@
.section .init,"ax",@progbits
.p2align 2
.globl _init
+ .hidden _init
.type _init, @function
_init:
+ _CET_ENDBR
/* Maintain 16-byte stack alignment for called functions. */
subq $8, %rsp
#if PREINIT_FUNCTION_WEAK
movq PREINIT_FUNCTION@GOTPCREL(%rip), %rax
testq %rax, %rax
je .Lno_weak_fn
- call PREINIT_FUNCTION@PLT
+ call *%rax
.Lno_weak_fn:
#else
call PREINIT_FUNCTION
@@ -75,6 +77,8 @@ _init:
.section .fini,"ax",@progbits
.p2align 2
.globl _fini
+ .hidden _fini
.type _fini, @function
_fini:
+ _CET_ENDBR
subq $8, %rsp
diff --git a/sysdeps/x86_64/crtn.S b/sysdeps/x86_64/crtn.S
index b2fa0c6765..2463d742fd 100644
--- a/sysdeps/x86_64/crtn.S
+++ b/sysdeps/x86_64/crtn.S
@@ -1,5 +1,5 @@
/* Special .init and .fini section support for x86-64.
- Copyright (C) 2012-2016 Free Software Foundation, Inc.
+ Copyright (C) 2012-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/dl-irel.h b/sysdeps/x86_64/dl-irel.h
index 80d7d1dd78..6ecc50fb42 100644
--- a/sysdeps/x86_64/dl-irel.h
+++ b/sysdeps/x86_64/dl-irel.h
@@ -1,6 +1,6 @@
/* Machine-dependent ELF indirect relocation inline functions.
x86-64 version.
- Copyright (C) 2009-2016 Free Software Foundation, Inc.
+ Copyright (C) 2009-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/dl-lookupcfg.h b/sysdeps/x86_64/dl-lookupcfg.h
index 033b475889..5399cf25ab 100644
--- a/sysdeps/x86_64/dl-lookupcfg.h
+++ b/sysdeps/x86_64/dl-lookupcfg.h
@@ -1,5 +1,5 @@
/* Configuration of lookup functions.
- Copyright (C) 2005-2016 Free Software Foundation, Inc.
+ Copyright (C) 2005-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -26,7 +26,6 @@
struct link_map;
-extern void _dl_unmap (struct link_map *map)
- internal_function attribute_hidden;
+extern void _dl_unmap (struct link_map *map) attribute_hidden;
#define DL_UNMAP(map) _dl_unmap (map)
diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
index 980ca73cf2..1942ed5061 100644
--- a/sysdeps/x86_64/dl-machine.h
+++ b/sysdeps/x86_64/dl-machine.h
@@ -1,5 +1,5 @@
/* Machine-dependent ELF dynamic relocation inline functions. x86-64 version.
- Copyright (C) 2001-2016 Free Software Foundation, Inc.
+ Copyright (C) 2001-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>.
@@ -66,9 +66,9 @@ static inline int __attribute__ ((unused, always_inline))
elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
{
Elf64_Addr *got;
- extern void _dl_runtime_resolve_sse (ElfW(Word)) attribute_hidden;
- extern void _dl_runtime_resolve_avx (ElfW(Word)) attribute_hidden;
- extern void _dl_runtime_resolve_avx512 (ElfW(Word)) attribute_hidden;
+ extern void _dl_runtime_resolve_fxsave (ElfW(Word)) attribute_hidden;
+ extern void _dl_runtime_resolve_xsave (ElfW(Word)) attribute_hidden;
+ extern void _dl_runtime_resolve_xsavec (ElfW(Word)) attribute_hidden;
extern void _dl_runtime_profile_sse (ElfW(Word)) attribute_hidden;
extern void _dl_runtime_profile_avx (ElfW(Word)) attribute_hidden;
extern void _dl_runtime_profile_avx512 (ElfW(Word)) attribute_hidden;
@@ -117,12 +117,14 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
/* This function will get called to fix up the GOT entry
indicated by the offset on the stack, and then jump to
the resolved address. */
- if (HAS_ARCH_FEATURE (AVX512F_Usable))
- *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_resolve_avx512;
- else if (HAS_ARCH_FEATURE (AVX_Usable))
- *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_resolve_avx;
+ if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
+ *(ElfW(Addr) *) (got + 2)
+ = (HAS_ARCH_FEATURE (XSAVEC_Usable)
+ ? (ElfW(Addr)) &_dl_runtime_resolve_xsavec
+ : (ElfW(Addr)) &_dl_runtime_resolve_xsave);
else
- *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_resolve_sse;
+ *(ElfW(Addr) *) (got + 2)
+ = (ElfW(Addr)) &_dl_runtime_resolve_fxsave;
}
}
@@ -220,15 +222,20 @@ _dl_start_user:\n\
static inline void __attribute__ ((unused))
dl_platform_init (void)
{
+#if IS_IN (rtld)
+ /* init_cpu_features has been called early from __libc_start_main in
+ static executable. */
+ init_cpu_features (&GLRO(dl_x86_cpu_features));
+#else
if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0')
/* Avoid an empty string which would disturb us. */
GLRO(dl_platform) = NULL;
-
- init_cpu_features (&GLRO(dl_x86_cpu_features));
+#endif
}
static inline ElfW(Addr)
elf_machine_fixup_plt (struct link_map *map, lookup_t t,
+ const ElfW(Sym) *refsym, const ElfW(Sym) *sym,
const ElfW(Rela) *reloc,
ElfW(Addr) *reloc_addr, ElfW(Addr) value)
{
@@ -299,15 +306,29 @@ elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc,
const ElfW(Sym) *const refsym = sym;
# endif
struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
- ElfW(Addr) value = (sym == NULL ? 0
- : (ElfW(Addr)) sym_map->l_addr + sym->st_value);
+ ElfW(Addr) value = SYMBOL_ADDRESS (sym_map, sym, true);
if (sym != NULL
- && __builtin_expect (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC,
- 0)
- && __builtin_expect (sym->st_shndx != SHN_UNDEF, 1)
- && __builtin_expect (!skip_ifunc, 1))
- value = ((ElfW(Addr) (*) (void)) value) ();
+ && __glibc_unlikely (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC)
+ && __glibc_likely (sym->st_shndx != SHN_UNDEF)
+ && __glibc_likely (!skip_ifunc))
+ {
+# ifndef RTLD_BOOTSTRAP
+ if (sym_map != map
+ && sym_map->l_type != lt_executable
+ && !sym_map->l_relocated)
+ {
+ const char *strtab
+ = (const char *) D_PTR (map, l_info[DT_STRTAB]);
+ _dl_error_printf ("\
+%s: Relink `%s' with `%s' for IFUNC symbol `%s'\n",
+ RTLD_PROGNAME, map->l_name,
+ sym_map->l_name,
+ strtab + refsym->st_name);
+ }
+# endif
+ value = ((ElfW(Addr) (*) (void)) value) ();
+ }
switch (r_type)
{
@@ -477,8 +498,8 @@ elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc,
break;
memcpy (reloc_addr_arg, (void *) value,
MIN (sym->st_size, refsym->st_size));
- if (__builtin_expect (sym->st_size > refsym->st_size, 0)
- || (__builtin_expect (sym->st_size < refsym->st_size, 0)
+ if (__glibc_unlikely (sym->st_size > refsym->st_size)
+ || (__glibc_unlikely (sym->st_size < refsym->st_size)
&& GLRO(dl_verbose)))
{
fmt = "\
@@ -531,7 +552,8 @@ elf_machine_lazy_rel (struct link_map *map,
/* Check for unexpected PLT reloc type. */
if (__glibc_likely (r_type == R_X86_64_JUMP_SLOT))
{
- if (__builtin_expect (map->l_mach.plt, 0) == 0)
+ /* Prelink has been deprecated. */
+ if (__glibc_likely (map->l_mach.plt == 0))
*reloc_addr += l_addr;
else
*reloc_addr =
diff --git a/sysdeps/x86_64/dl-procinfo.c b/sysdeps/x86_64/dl-procinfo.c
index 4625695dfb..269ce7e87d 100644
--- a/sysdeps/x86_64/dl-procinfo.c
+++ b/sysdeps/x86_64/dl-procinfo.c
@@ -1,5 +1,5 @@
/* Data for x86-64 version of processor capability information.
- Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ Copyright (C) 2015-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -39,19 +39,7 @@
# define PROCINFO_CLASS
#endif
-#if !defined PROCINFO_DECL && defined SHARED
- ._dl_x86_cpu_features
-#else
-PROCINFO_CLASS struct cpu_features _dl_x86_cpu_features
-#endif
-#ifndef PROCINFO_DECL
-= { }
-#endif
-#if !defined SHARED || defined PROCINFO_DECL
-;
-#else
-,
-#endif
+#include <sysdeps/x86/dl-procinfo.c>
#undef PROCINFO_DECL
#undef PROCINFO_CLASS
diff --git a/sysdeps/x86_64/dl-tls.c b/sysdeps/x86_64/dl-tls.c
new file mode 100644
index 0000000000..533ee2b3a6
--- /dev/null
+++ b/sysdeps/x86_64/dl-tls.c
@@ -0,0 +1,53 @@
+/* Thread-local storage handling in the ELF dynamic linker. x86-64 version.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifdef SHARED
+/* Work around GCC PR58066, due to which __tls_get_addr may be called
+ with an unaligned stack. The compat implementation is in
+ tls_get_addr-compat.S. */
+
+# include <dl-tls.h>
+
+/* Define __tls_get_addr within elf/dl-tls.c under a different
+ name. */
+extern __typeof__ (__tls_get_addr) ___tls_get_addr;
+
+# define __tls_get_addr ___tls_get_addr
+# include <elf/dl-tls.c>
+# undef __tls_get_addr
+
+hidden_ver (___tls_get_addr, __tls_get_addr)
+
+/* Only handle slow paths for __tls_get_addr. */
+attribute_hidden
+void *
+__tls_get_addr_slow (GET_ADDR_ARGS)
+{
+ dtv_t *dtv = THREAD_DTV ();
+
+ if (__glibc_unlikely (dtv[0].counter != GL(dl_tls_generation)))
+ return update_get_addr (GET_ADDR_PARAM);
+
+ return tls_get_addr_tail (GET_ADDR_PARAM, dtv, NULL);
+}
+#else
+
+/* No compatibility symbol needed. */
+# include <elf/dl-tls.c>
+
+#endif
diff --git a/sysdeps/x86_64/dl-tls.h b/sysdeps/x86_64/dl-tls.h
index 0f101e6ac6..bc18e70b23 100644
--- a/sysdeps/x86_64/dl-tls.h
+++ b/sysdeps/x86_64/dl-tls.h
@@ -1,5 +1,5 @@
/* Thread-local storage handling in the ELF dynamic linker. x86-64 version.
- Copyright (C) 2002-2016 Free Software Foundation, Inc.
+ Copyright (C) 2002-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,6 +16,9 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
+#ifndef _X86_64_DL_TLS_H
+#define _X86_64_DL_TLS_H
+
#include <stdint.h>
/* Type used for the representation of TLS information in the GOT. */
@@ -28,5 +31,4 @@ typedef struct dl_tls_index
extern void *__tls_get_addr (tls_index *ti);
-/* Value used for dtv entries for which the allocation is delayed. */
-#define TLS_DTV_UNALLOCATED ((void *) -1l)
+#endif /* _X86_64_DL_TLS_H */
diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S
index 3cb7c3d031..80d771cd88 100644
--- a/sysdeps/x86_64/dl-tlsdesc.S
+++ b/sysdeps/x86_64/dl-tlsdesc.S
@@ -1,5 +1,5 @@
/* Thread-local storage handling in the ELF dynamic linker. x86_64 version.
- Copyright (C) 2004-2016 Free Software Foundation, Inc.
+ Copyright (C) 2004-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -37,6 +37,7 @@
cfi_startproc
.align 16
_dl_tlsdesc_return:
+ _CET_ENDBR
movq 8(%rax), %rax
ret
cfi_endproc
@@ -58,6 +59,7 @@ _dl_tlsdesc_return:
cfi_startproc
.align 16
_dl_tlsdesc_undefweak:
+ _CET_ENDBR
movq 8(%rax), %rax
subq %fs:0, %rax
ret
@@ -96,6 +98,7 @@ _dl_tlsdesc_dynamic (register struct tlsdesc *tdp asm ("%rax"))
cfi_startproc
.align 16
_dl_tlsdesc_dynamic:
+ _CET_ENDBR
/* Preserve call-clobbered registers that we modify.
We need two scratch regs anyway. */
movq %rsi, -16(%rsp)
@@ -128,7 +131,11 @@ _dl_tlsdesc_dynamic:
movq %r10, 40(%rsp)
movq %r11, 48(%rsp)
/* %rdi already points to the tlsinfo data structure. */
+#ifdef NO_RTLD_HIDDEN
+ call JUMPTARGET (__tls_get_addr)
+#else
call HIDDEN_JUMPTARGET (__tls_get_addr)
+#endif
movq 8(%rsp), %rdx
movq 16(%rsp), %rcx
movq 24(%rsp), %r8
@@ -162,15 +169,17 @@ _dl_tlsdesc_dynamic:
.align 16
/* The PLT entry will have pushed the link_map pointer. */
_dl_tlsdesc_resolve_rela:
+ _CET_ENDBR
cfi_adjust_cfa_offset (8)
- /* Save all call-clobbered registers. */
- subq $72, %rsp
- cfi_adjust_cfa_offset (72)
+ /* Save all call-clobbered registers. Add 8 bytes for push in
+ the PLT entry to align the stack. */
+ subq $80, %rsp
+ cfi_adjust_cfa_offset (80)
movq %rax, (%rsp)
movq %rdi, 8(%rsp)
movq %rax, %rdi /* Pass tlsdesc* in %rdi. */
movq %rsi, 16(%rsp)
- movq 72(%rsp), %rsi /* Pass link_map* in %rsi. */
+ movq 80(%rsp), %rsi /* Pass link_map* in %rsi. */
movq %r8, 24(%rsp)
movq %r9, 32(%rsp)
movq %r10, 40(%rsp)
@@ -187,8 +196,8 @@ _dl_tlsdesc_resolve_rela:
movq 48(%rsp), %r11
movq 56(%rsp), %rdx
movq 64(%rsp), %rcx
- addq $80, %rsp
- cfi_adjust_cfa_offset (-80)
+ addq $88, %rsp
+ cfi_adjust_cfa_offset (-88)
jmp *(%rax)
cfi_endproc
.size _dl_tlsdesc_resolve_rela, .-_dl_tlsdesc_resolve_rela
@@ -211,6 +220,7 @@ _dl_tlsdesc_resolve_rela:
.align 16
_dl_tlsdesc_resolve_hold:
0:
+ _CET_ENDBR
/* Save all call-clobbered registers. */
subq $72, %rsp
cfi_adjust_cfa_offset (72)
diff --git a/sysdeps/x86_64/dl-tlsdesc.h b/sysdeps/x86_64/dl-tlsdesc.h
index 11e1a50b8f..66e659bb5c 100644
--- a/sysdeps/x86_64/dl-tlsdesc.h
+++ b/sysdeps/x86_64/dl-tlsdesc.h
@@ -1,6 +1,6 @@
/* Thread-local storage descriptor handling in the ELF dynamic linker.
x86_64 version.
- Copyright (C) 2005-2016 Free Software Foundation, Inc.
+ Copyright (C) 2005-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -62,7 +62,7 @@ extern ptrdiff_t attribute_hidden
# ifdef SHARED
extern void *_dl_make_tlsdesc_dynamic (struct link_map *map,
size_t ti_offset)
- internal_function attribute_hidden;
+ attribute_hidden;
extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic(struct tlsdesc *);
# endif
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
index 9fb6b13983..ef1425cbb9 100644
--- a/sysdeps/x86_64/dl-trampoline.S
+++ b/sysdeps/x86_64/dl-trampoline.S
@@ -1,5 +1,5 @@
/* PLT trampolines. x86-64 version.
- Copyright (C) 2004-2016 Free Software Foundation, Inc.
+ Copyright (C) 2004-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -18,6 +18,7 @@
#include <config.h>
#include <sysdep.h>
+#include <cpu-features.h>
#include <link-defines.h>
#ifndef DL_STACK_ALIGNMENT
@@ -33,37 +34,24 @@
# define DL_STACK_ALIGNMENT 8
#endif
-#ifndef DL_RUNIME_UNALIGNED_VEC_SIZE
-/* The maximum size of unaligned vector load and store. */
-# define DL_RUNIME_UNALIGNED_VEC_SIZE 16
-#endif
-
-/* True if _dl_runtime_resolve should align stack to VEC_SIZE bytes. */
-#define DL_RUNIME_RESOLVE_REALIGN_STACK \
- (VEC_SIZE > DL_STACK_ALIGNMENT \
- && VEC_SIZE > DL_RUNIME_UNALIGNED_VEC_SIZE)
-
-/* Align vector register save area to 16 bytes. */
-#define REGISTER_SAVE_VEC_OFF 0
+/* True if _dl_runtime_resolve should align stack for STATE_SAVE or align
+ stack to 16 bytes before calling _dl_fixup. */
+#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
+ (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
+ || 16 > DL_STACK_ALIGNMENT)
/* Area on stack to save and restore registers used for parameter
passing when calling _dl_fixup. */
#ifdef __ILP32__
-# define REGISTER_SAVE_RAX (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 8)
# define PRESERVE_BND_REGS_PREFIX
#else
-/* Align bound register save area to 16 bytes. */
-# define REGISTER_SAVE_BND0 (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 8)
-# define REGISTER_SAVE_BND1 (REGISTER_SAVE_BND0 + 16)
-# define REGISTER_SAVE_BND2 (REGISTER_SAVE_BND1 + 16)
-# define REGISTER_SAVE_BND3 (REGISTER_SAVE_BND2 + 16)
-# define REGISTER_SAVE_RAX (REGISTER_SAVE_BND3 + 16)
# ifdef HAVE_MPX_SUPPORT
# define PRESERVE_BND_REGS_PREFIX bnd
# else
# define PRESERVE_BND_REGS_PREFIX .byte 0xf2
# endif
#endif
+#define REGISTER_SAVE_RAX 0
#define REGISTER_SAVE_RCX (REGISTER_SAVE_RAX + 8)
#define REGISTER_SAVE_RDX (REGISTER_SAVE_RCX + 8)
#define REGISTER_SAVE_RSI (REGISTER_SAVE_RDX + 8)
@@ -73,59 +61,58 @@
#define RESTORE_AVX
-#ifdef HAVE_AVX512_ASM_SUPPORT
-# define VEC_SIZE 64
-# define VMOVA vmovdqa64
-# if DL_RUNIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
-# define VMOV vmovdqa64
-# else
-# define VMOV vmovdqu64
-# endif
-# define VEC(i) zmm##i
-# define _dl_runtime_resolve _dl_runtime_resolve_avx512
-# define _dl_runtime_profile _dl_runtime_profile_avx512
-# include "dl-trampoline.h"
-# undef _dl_runtime_resolve
-# undef _dl_runtime_profile
-# undef VEC
-# undef VMOV
-# undef VMOVA
-# undef VEC_SIZE
-#else
-strong_alias (_dl_runtime_resolve_avx, _dl_runtime_resolve_avx512)
- .hidden _dl_runtime_resolve_avx512
-strong_alias (_dl_runtime_profile_avx, _dl_runtime_profile_avx512)
- .hidden _dl_runtime_profile_avx512
-#endif
+#define VEC_SIZE 64
+#define VMOVA vmovdqa64
+#define VEC(i) zmm##i
+#define _dl_runtime_profile _dl_runtime_profile_avx512
+#include "dl-trampoline.h"
+#undef _dl_runtime_profile
+#undef VEC
+#undef VMOVA
+#undef VEC_SIZE
#define VEC_SIZE 32
#define VMOVA vmovdqa
-#if DL_RUNIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
-# define VMOV vmovdqa
-#else
-# define VMOV vmovdqu
-#endif
#define VEC(i) ymm##i
-#define _dl_runtime_resolve _dl_runtime_resolve_avx
#define _dl_runtime_profile _dl_runtime_profile_avx
#include "dl-trampoline.h"
-#undef _dl_runtime_resolve
#undef _dl_runtime_profile
#undef VEC
-#undef VMOV
#undef VMOVA
#undef VEC_SIZE
/* movaps/movups is 1-byte shorter. */
#define VEC_SIZE 16
#define VMOVA movaps
-#if DL_RUNIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
-# define VMOV movaps
-#else
-# define VMOV movups
-#endif
#define VEC(i) xmm##i
-#define _dl_runtime_resolve _dl_runtime_resolve_sse
#define _dl_runtime_profile _dl_runtime_profile_sse
#undef RESTORE_AVX
#include "dl-trampoline.h"
+#undef _dl_runtime_profile
+#undef VEC
+#undef VMOVA
+#undef VEC_SIZE
+
+#define USE_FXSAVE
+#define STATE_SAVE_ALIGNMENT 16
+#define _dl_runtime_resolve _dl_runtime_resolve_fxsave
+#include "dl-trampoline.h"
+#undef _dl_runtime_resolve
+#undef USE_FXSAVE
+#undef STATE_SAVE_ALIGNMENT
+
+#define USE_XSAVE
+#define STATE_SAVE_ALIGNMENT 64
+#define _dl_runtime_resolve _dl_runtime_resolve_xsave
+#include "dl-trampoline.h"
+#undef _dl_runtime_resolve
+#undef USE_XSAVE
+#undef STATE_SAVE_ALIGNMENT
+
+#define USE_XSAVEC
+#define STATE_SAVE_ALIGNMENT 64
+#define _dl_runtime_resolve _dl_runtime_resolve_xsavec
+#include "dl-trampoline.h"
+#undef _dl_runtime_resolve
+#undef USE_XSAVEC
+#undef STATE_SAVE_ALIGNMENT
diff --git a/sysdeps/x86_64/dl-trampoline.h b/sysdeps/x86_64/dl-trampoline.h
index f4191833ab..a28b1e73a4 100644
--- a/sysdeps/x86_64/dl-trampoline.h
+++ b/sysdeps/x86_64/dl-trampoline.h
@@ -1,5 +1,5 @@
/* PLT trampolines. x86-64 version.
- Copyright (C) 2009-2016 Free Software Foundation, Inc.
+ Copyright (C) 2009-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,40 +16,47 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#undef REGISTER_SAVE_AREA_RAW
-#ifdef __ILP32__
-/* X32 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as VEC0 to
- VEC7. */
-# define REGISTER_SAVE_AREA_RAW (8 * 7 + VEC_SIZE * 8)
-#else
-/* X86-64 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as
- BND0, BND1, BND2, BND3 and VEC0 to VEC7. */
-# define REGISTER_SAVE_AREA_RAW (8 * 7 + 16 * 4 + VEC_SIZE * 8)
-#endif
+ .text
+#ifdef _dl_runtime_resolve
-#undef REGISTER_SAVE_AREA
-#undef LOCAL_STORAGE_AREA
-#undef BASE
-#if DL_RUNIME_RESOLVE_REALIGN_STACK
-# define REGISTER_SAVE_AREA (REGISTER_SAVE_AREA_RAW + 8)
-/* Local stack area before jumping to function address: RBX. */
-# define LOCAL_STORAGE_AREA 8
-# define BASE rbx
-# if (REGISTER_SAVE_AREA % VEC_SIZE) != 0
-# error REGISTER_SAVE_AREA must be multples of VEC_SIZE
+# undef REGISTER_SAVE_AREA
+# undef LOCAL_STORAGE_AREA
+# undef BASE
+
+# if (STATE_SAVE_ALIGNMENT % 16) != 0
+# error STATE_SAVE_ALIGNMENT must be multples of 16
+# endif
+
+# if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0
+# error STATE_SAVE_OFFSET must be multples of STATE_SAVE_ALIGNMENT
# endif
-#else
-# define REGISTER_SAVE_AREA REGISTER_SAVE_AREA_RAW
+
+# if DL_RUNTIME_RESOLVE_REALIGN_STACK
+/* Local stack area before jumping to function address: RBX. */
+# define LOCAL_STORAGE_AREA 8
+# define BASE rbx
+# ifdef USE_FXSAVE
+/* Use fxsave to save XMM registers. */
+# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET)
+# if (REGISTER_SAVE_AREA % 16) != 0
+# error REGISTER_SAVE_AREA must be multples of 16
+# endif
+# endif
+# else
+# ifndef USE_FXSAVE
+# error USE_FXSAVE must be defined
+# endif
+/* Use fxsave to save XMM registers. */
+# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET + 8)
/* Local stack area before jumping to function address: All saved
registers. */
-# define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA
-# define BASE rsp
-# if (REGISTER_SAVE_AREA % 16) != 8
-# error REGISTER_SAVE_AREA must be odd multples of 8
+# define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA
+# define BASE rsp
+# if (REGISTER_SAVE_AREA % 16) != 8
+# error REGISTER_SAVE_AREA must be odd multples of 8
+# endif
# endif
-#endif
- .text
.globl _dl_runtime_resolve
.hidden _dl_runtime_resolve
.type _dl_runtime_resolve, @function
@@ -57,19 +64,31 @@
cfi_startproc
_dl_runtime_resolve:
cfi_adjust_cfa_offset(16) # Incorporate PLT
-#if DL_RUNIME_RESOLVE_REALIGN_STACK
-# if LOCAL_STORAGE_AREA != 8
-# error LOCAL_STORAGE_AREA must be 8
-# endif
+ _CET_ENDBR
+# if DL_RUNTIME_RESOLVE_REALIGN_STACK
+# if LOCAL_STORAGE_AREA != 8
+# error LOCAL_STORAGE_AREA must be 8
+# endif
pushq %rbx # push subtracts stack by 8.
cfi_adjust_cfa_offset(8)
cfi_rel_offset(%rbx, 0)
mov %RSP_LP, %RBX_LP
cfi_def_cfa_register(%rbx)
- and $-VEC_SIZE, %RSP_LP
-#endif
+ and $-STATE_SAVE_ALIGNMENT, %RSP_LP
+# endif
+# ifdef REGISTER_SAVE_AREA
sub $REGISTER_SAVE_AREA, %RSP_LP
+# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
+# endif
+# else
+ # Allocate stack space of the required size to save the state.
+# if IS_IN (rtld)
+ sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
+# else
+ sub _dl_x86_cpu_features+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
+# endif
+# endif
# Preserve registers otherwise clobbered.
movq %rax, REGISTER_SAVE_RAX(%rsp)
movq %rcx, REGISTER_SAVE_RCX(%rsp)
@@ -78,59 +97,42 @@ _dl_runtime_resolve:
movq %rdi, REGISTER_SAVE_RDI(%rsp)
movq %r8, REGISTER_SAVE_R8(%rsp)
movq %r9, REGISTER_SAVE_R9(%rsp)
- VMOV %VEC(0), (REGISTER_SAVE_VEC_OFF)(%rsp)
- VMOV %VEC(1), (REGISTER_SAVE_VEC_OFF + VEC_SIZE)(%rsp)
- VMOV %VEC(2), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 2)(%rsp)
- VMOV %VEC(3), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 3)(%rsp)
- VMOV %VEC(4), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 4)(%rsp)
- VMOV %VEC(5), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 5)(%rsp)
- VMOV %VEC(6), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 6)(%rsp)
- VMOV %VEC(7), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 7)(%rsp)
-#ifndef __ILP32__
- # We also have to preserve bound registers. These are nops if
- # Intel MPX isn't available or disabled.
-# ifdef HAVE_MPX_SUPPORT
- bndmov %bnd0, REGISTER_SAVE_BND0(%rsp)
- bndmov %bnd1, REGISTER_SAVE_BND1(%rsp)
- bndmov %bnd2, REGISTER_SAVE_BND2(%rsp)
- bndmov %bnd3, REGISTER_SAVE_BND3(%rsp)
+# ifdef USE_FXSAVE
+ fxsave STATE_SAVE_OFFSET(%rsp)
# else
-# if REGISTER_SAVE_BND0 == 0
- .byte 0x66,0x0f,0x1b,0x04,0x24
+ movl $STATE_SAVE_MASK, %eax
+ xorl %edx, %edx
+ # Clear the XSAVE Header.
+# ifdef USE_XSAVE
+ movq %rdx, (STATE_SAVE_OFFSET + 512)(%rsp)
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8)(%rsp)
+# endif
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 2)(%rsp)
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 3)(%rsp)
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 4)(%rsp)
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 5)(%rsp)
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 6)(%rsp)
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 7)(%rsp)
+# ifdef USE_XSAVE
+ xsave STATE_SAVE_OFFSET(%rsp)
# else
- .byte 0x66,0x0f,0x1b,0x44,0x24,REGISTER_SAVE_BND0
+ xsavec STATE_SAVE_OFFSET(%rsp)
# endif
- .byte 0x66,0x0f,0x1b,0x4c,0x24,REGISTER_SAVE_BND1
- .byte 0x66,0x0f,0x1b,0x54,0x24,REGISTER_SAVE_BND2
- .byte 0x66,0x0f,0x1b,0x5c,0x24,REGISTER_SAVE_BND3
# endif
-#endif
# Copy args pushed by PLT in register.
# %rdi: link_map, %rsi: reloc_index
mov (LOCAL_STORAGE_AREA + 8)(%BASE), %RSI_LP
mov LOCAL_STORAGE_AREA(%BASE), %RDI_LP
call _dl_fixup # Call resolver.
mov %RAX_LP, %R11_LP # Save return value
-#ifndef __ILP32__
- # Restore bound registers. These are nops if Intel MPX isn't
- # avaiable or disabled.
-# ifdef HAVE_MPX_SUPPORT
- bndmov REGISTER_SAVE_BND3(%rsp), %bnd3
- bndmov REGISTER_SAVE_BND2(%rsp), %bnd2
- bndmov REGISTER_SAVE_BND1(%rsp), %bnd1
- bndmov REGISTER_SAVE_BND0(%rsp), %bnd0
+ # Get register content back.
+# ifdef USE_FXSAVE
+ fxrstor STATE_SAVE_OFFSET(%rsp)
# else
- .byte 0x66,0x0f,0x1a,0x5c,0x24,REGISTER_SAVE_BND3
- .byte 0x66,0x0f,0x1a,0x54,0x24,REGISTER_SAVE_BND2
- .byte 0x66,0x0f,0x1a,0x4c,0x24,REGISTER_SAVE_BND1
-# if REGISTER_SAVE_BND0 == 0
- .byte 0x66,0x0f,0x1a,0x04,0x24
-# else
- .byte 0x66,0x0f,0x1a,0x44,0x24,REGISTER_SAVE_BND0
-# endif
+ movl $STATE_SAVE_MASK, %eax
+ xorl %edx, %edx
+ xrstor STATE_SAVE_OFFSET(%rsp)
# endif
-#endif
- # Get register content back.
movq REGISTER_SAVE_R9(%rsp), %r9
movq REGISTER_SAVE_R8(%rsp), %r8
movq REGISTER_SAVE_RDI(%rsp), %rdi
@@ -138,20 +140,12 @@ _dl_runtime_resolve:
movq REGISTER_SAVE_RDX(%rsp), %rdx
movq REGISTER_SAVE_RCX(%rsp), %rcx
movq REGISTER_SAVE_RAX(%rsp), %rax
- VMOV (REGISTER_SAVE_VEC_OFF)(%rsp), %VEC(0)
- VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE)(%rsp), %VEC(1)
- VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 2)(%rsp), %VEC(2)
- VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 3)(%rsp), %VEC(3)
- VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 4)(%rsp), %VEC(4)
- VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 5)(%rsp), %VEC(5)
- VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 6)(%rsp), %VEC(6)
- VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 7)(%rsp), %VEC(7)
-#if DL_RUNIME_RESOLVE_REALIGN_STACK
+# if DL_RUNTIME_RESOLVE_REALIGN_STACK
mov %RBX_LP, %RSP_LP
cfi_def_cfa_register(%rsp)
movq (%rsp), %rbx
cfi_restore(%rbx)
-#endif
+# endif
# Adjust stack(PLT did 2 pushes)
add $(LOCAL_STORAGE_AREA + 16), %RSP_LP
cfi_adjust_cfa_offset(-(LOCAL_STORAGE_AREA + 16))
@@ -160,9 +154,10 @@ _dl_runtime_resolve:
jmp *%r11 # Jump to function address.
cfi_endproc
.size _dl_runtime_resolve, .-_dl_runtime_resolve
+#endif
-#ifndef PROF
+#if !defined PROF && defined _dl_runtime_profile
# if (LR_VECTOR_OFFSET % VEC_SIZE) != 0
# error LR_VECTOR_OFFSET must be multples of VEC_SIZE
# endif
@@ -174,6 +169,7 @@ _dl_runtime_resolve:
_dl_runtime_profile:
cfi_startproc
cfi_adjust_cfa_offset(16) # Incorporate PLT
+ _CET_ENDBR
/* The La_x86_64_regs data structure pointed to by the
fourth paramater must be VEC_SIZE-byte aligned. This must
be explicitly enforced. We have the set up a dynamically
@@ -446,8 +442,16 @@ _dl_runtime_profile:
# ifdef RESTORE_AVX
/* sizeof(La_x86_64_retval). Need extra space for 2 SSE
registers to detect if xmm0/xmm1 registers are changed
- by audit module. */
- sub $(LRV_SIZE + XMM_SIZE*2), %RSP_LP
+ by audit module. Since rsp is aligned to VEC_SIZE, we
+ need to make sure that the address of La_x86_64_retval +
+ LRV_VECTOR0_OFFSET is aligned to VEC_SIZE. */
+# define LRV_SPACE (LRV_SIZE + XMM_SIZE*2)
+# define LRV_MISALIGNED ((LRV_SIZE + LRV_VECTOR0_OFFSET) & (VEC_SIZE - 1))
+# if LRV_MISALIGNED == 0
+ sub $LRV_SPACE, %RSP_LP
+# else
+ sub $(LRV_SPACE + VEC_SIZE - LRV_MISALIGNED), %RSP_LP
+# endif
# else
sub $LRV_SIZE, %RSP_LP # sizeof(La_x86_64_retval)
# endif
diff --git a/sysdeps/x86_64/ffs.c b/sysdeps/x86_64/ffs.c
index be5b6c8589..fa5b20544d 100644
--- a/sysdeps/x86_64/ffs.c
+++ b/sysdeps/x86_64/ffs.c
@@ -1,7 +1,7 @@
/* ffs -- find first set bit in a word, counted from least significant end.
For AMD x86-64.
This file is part of the GNU C Library.
- Copyright (C) 1991-2016 Free Software Foundation, Inc.
+ Copyright (C) 1991-2018 Free Software Foundation, Inc.
Contributed by Ulrich Drepper <drepper@cygnus.com>.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/ffsll.c b/sysdeps/x86_64/ffsll.c
index c0f5abc446..206deb6810 100644
--- a/sysdeps/x86_64/ffsll.c
+++ b/sysdeps/x86_64/ffsll.c
@@ -1,7 +1,7 @@
/* ffsll -- find first set bit in a word, counted from least significant end.
For AMD x86-64.
This file is part of the GNU C Library.
- Copyright (C) 1991-2016 Free Software Foundation, Inc.
+ Copyright (C) 1991-2018 Free Software Foundation, Inc.
Contributed by Ulrich Drepper <drepper@cygnus.com>.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/Makefile b/sysdeps/x86_64/fpu/Makefile
index 88742faff1..2b7d69bb50 100644
--- a/sysdeps/x86_64/fpu/Makefile
+++ b/sysdeps/x86_64/fpu/Makefile
@@ -30,11 +30,171 @@ ifeq ($(subdir),math)
ifeq ($(build-mathvec),yes)
libmvec-tests += double-vlen2 double-vlen4 double-vlen4-avx2 \
float-vlen4 float-vlen8 float-vlen8-avx2
+tests += test-double-libmvec-alias test-double-libmvec-alias-avx \
+ test-double-libmvec-alias-avx2 test-double-libmvec-alias-main \
+ test-double-libmvec-alias-avx-main test-double-libmvec-alias-avx2-main \
+ test-float-libmvec-alias test-float-libmvec-alias-avx \
+ test-float-libmvec-alias-avx2 test-float-libmvec-alias-main \
+ test-float-libmvec-alias-avx-main test-float-libmvec-alias-avx2-main \
+ test-double-libmvec-sincos test-double-libmvec-sincos-avx \
+ test-double-libmvec-sincos-avx2 test-float-libmvec-sincosf \
+ test-float-libmvec-sincosf-avx test-float-libmvec-sincosf-avx2
+modules-names += test-double-libmvec-alias-mod \
+ test-double-libmvec-alias-avx-mod \
+ test-double-libmvec-alias-avx2-mod \
+ test-float-libmvec-alias-mod \
+ test-float-libmvec-alias-avx-mod \
+ test-float-libmvec-alias-avx2-mod
+modules-names-tests += test-double-libmvec-alias-mod \
+ test-double-libmvec-alias-avx-mod \
+ test-double-libmvec-alias-avx2-mod \
+ test-float-libmvec-alias-mod \
+ test-float-libmvec-alias-avx-mod \
+ test-float-libmvec-alias-avx2-mod
+extra-test-objs += test-double-libmvec-sincos-avx-main.o \
+ test-double-libmvec-sincos-avx2-main.o \
+ test-double-libmvec-sincos-main.o \
+ test-float-libmvec-sincosf-avx-main.o \
+ test-float-libmvec-sincosf-avx2-main.o\
+ test-float-libmvec-sincosf-main.o
+test-double-libmvec-alias-mod.so-no-z-defs = yes
+test-double-libmvec-alias-avx-mod.so-no-z-defs = yes
+test-double-libmvec-alias-avx2-mod.so-no-z-defs = yes
+test-float-libmvec-alias-mod.so-no-z-defs = yes
+test-float-libmvec-alias-avx-mod.so-no-z-defs = yes
+test-float-libmvec-alias-avx2-mod.so-no-z-defs = yes
+
+$(objpfx)test-double-libmvec-alias: \
+ $(objpfx)test-double-libmvec-alias-mod.so
+$(objpfx)test-double-libmvec-alias-mod.so: \
+ $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-double-libmvec-alias-avx: \
+ $(objpfx)test-double-libmvec-alias-avx-mod.so
+$(objpfx)test-double-libmvec-alias-avx-mod.so: \
+ $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-double-libmvec-alias-avx2: \
+ $(objpfx)test-double-libmvec-alias-avx2-mod.so
+$(objpfx)test-double-libmvec-alias-avx2-mod.so: \
+ $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-double-libmvec-alias-main: \
+ $(objpfx)test-double-libmvec-alias-mod.os \
+ $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-double-libmvec-alias-avx-main: \
+ $(objpfx)test-double-libmvec-alias-avx-mod.os \
+ $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-double-libmvec-alias-avx2-main: \
+ $(objpfx)test-double-libmvec-alias-avx2-mod.os \
+ $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-float-libmvec-alias: \
+ $(objpfx)test-float-libmvec-alias-mod.so
+$(objpfx)test-float-libmvec-alias-mod.so: \
+ $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-float-libmvec-alias-avx: \
+ $(objpfx)test-float-libmvec-alias-avx-mod.so
+$(objpfx)test-float-libmvec-alias-avx-mod.so: \
+ $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-float-libmvec-alias-avx2: \
+ $(objpfx)test-float-libmvec-alias-avx2-mod.so
+$(objpfx)test-float-libmvec-alias-avx2-mod.so: \
+ $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-float-libmvec-alias-main: \
+ $(objpfx)test-float-libmvec-alias-mod.os \
+ $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-float-libmvec-alias-avx-main: \
+ $(objpfx)test-float-libmvec-alias-avx-mod.os \
+ $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-float-libmvec-alias-avx2-main: \
+ $(objpfx)test-float-libmvec-alias-avx2-mod.os \
+ $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-double-libmvec-sincos: \
+ $(objpfx)test-double-libmvec-sincos.o \
+ $(objpfx)test-double-libmvec-sincos-main.o $(libmvec)
+
+$(objpfx)test-double-libmvec-sincos-avx: \
+ $(objpfx)test-double-libmvec-sincos-avx.o \
+ $(objpfx)test-double-libmvec-sincos-avx-main.o $(libmvec)
+
+$(objpfx)test-double-libmvec-sincos-avx2: \
+ $(objpfx)test-double-libmvec-sincos-avx2.o \
+ $(objpfx)test-double-libmvec-sincos-avx2-main.o $(libmvec)
+
+$(objpfx)test-float-libmvec-sincosf: \
+ $(objpfx)test-float-libmvec-sincosf.o \
+ $(objpfx)test-float-libmvec-sincosf-main.o $(libmvec)
+
+$(objpfx)test-float-libmvec-sincosf-avx: \
+ $(objpfx)test-float-libmvec-sincosf-avx.o \
+ $(objpfx)test-float-libmvec-sincosf-avx-main.o $(libmvec)
+
+$(objpfx)test-float-libmvec-sincosf-avx2: \
+ $(objpfx)test-float-libmvec-sincosf-avx2.o \
+ $(objpfx)test-float-libmvec-sincosf-avx2-main.o $(libmvec)
ifeq (yes,$(config-cflags-avx512))
libmvec-tests += double-vlen8 float-vlen16
+tests += test-double-libmvec-alias-avx512 \
+ test-float-libmvec-alias-avx512 \
+ test-double-libmvec-alias-avx512-main \
+ test-float-libmvec-alias-avx512-main \
+ test-double-libmvec-sincos-avx512 \
+ test-float-libmvec-sincosf-avx512
+modules-names += test-double-libmvec-alias-avx512-mod \
+ test-float-libmvec-alias-avx512-mod
+modules-names-tests += test-double-libmvec-alias-avx512-mod \
+ test-float-libmvec-alias-avx512-mod
+extra-test-objs += test-double-libmvec-sincos-avx512-main.o \
+ test-float-libmvec-sincosf-avx512-main.o
+test-double-libmvec-alias-avx512-mod.so-no-z-defs = yes
+test-float-libmvec-alias-avx512-mod.so-no-z-defs = yes
+
+$(objpfx)test-double-libmvec-alias-avx512: \
+ $(objpfx)test-double-libmvec-alias-avx512-mod.so
+$(objpfx)test-double-libmvec-alias-avx512-mod.so: \
+ $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-double-libmvec-alias-avx512-main: \
+ $(objpfx)test-double-libmvec-alias-avx512-mod.os \
+ $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-float-libmvec-alias-avx512: \
+ $(objpfx)test-float-libmvec-alias-avx512-mod.so
+$(objpfx)test-float-libmvec-alias-avx512-mod.so: \
+ $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-float-libmvec-alias-avx512-main: \
+ $(objpfx)test-float-libmvec-alias-avx512-mod.os \
+ $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-double-libmvec-sincos-avx512: \
+ $(objpfx)test-double-libmvec-sincos-avx512.o \
+ $(objpfx)test-double-libmvec-sincos-avx512-main.o $(libmvec)
+
+$(objpfx)test-float-libmvec-sincosf-avx512: \
+ $(objpfx)test-float-libmvec-sincosf-avx512.o \
+ $(objpfx)test-float-libmvec-sincosf-avx512-main.o $(libmvec)
endif
+double-vlen2-funcs = cos exp log pow sin sincos
+double-vlen4-funcs = cos exp log pow sin sincos
+double-vlen4-avx2-funcs = cos exp log pow sin sincos
+double-vlen8-funcs = cos exp log pow sin sincos
+float-vlen4-funcs = cos exp log pow sin sincos
+float-vlen8-funcs = cos exp log pow sin sincos
+float-vlen8-avx2-funcs = cos exp log pow sin sincos
+float-vlen16-funcs = cos exp log pow sin sincos
+
double-vlen4-arch-ext-cflags = -mavx
double-vlen4-arch-ext2-cflags = -mavx2
double-vlen8-arch-ext-cflags = -mavx512f
@@ -43,11 +203,37 @@ float-vlen8-arch-ext-cflags = -mavx
float-vlen8-arch-ext2-cflags = -mavx2
float-vlen16-arch-ext-cflags = -mavx512f
-CFLAGS-test-double-vlen4-avx2.c = $(libm-test-vec-cflags)
+libmvec-sincos-cflags = $(libm-test-fast-math-cflags) -fno-inline -fopenmp -Wno-unknown-pragmas
+libmvec-alias-cflags = $(libmvec-sincos-cflags) -ffloat-store -ffinite-math-only
+
+CFLAGS-test-double-libmvec-alias-mod.c = $(libmvec-alias-cflags)
+CFLAGS-test-double-libmvec-alias-avx-mod.c = $(double-vlen4-arch-ext-cflags) $(libmvec-alias-cflags) -DREQUIRE_AVX
+CFLAGS-test-double-libmvec-alias-avx2-mod.c = $(double-vlen4-arch-ext2-cflags) $(libmvec-alias-cflags) -DREQUIRE_AVX2
+CFLAGS-test-double-libmvec-alias-avx512-mod.c = $(double-vlen8-arch-ext-cflags) $(libmvec-alias-cflags) -DREQUIRE_AVX512F
+
+CFLAGS-test-float-libmvec-alias-mod.c = $(libmvec-alias-cflags)
+CFLAGS-test-float-libmvec-alias-avx-mod.c = $(double-vlen4-arch-ext-cflags) $(libmvec-alias-cflags) -DREQUIRE_AVX
+CFLAGS-test-float-libmvec-alias-avx2-mod.c = $(double-vlen4-arch-ext2-cflags) $(libmvec-alias-cflags) -DREQUIRE_AVX2
+CFLAGS-test-float-libmvec-alias-avx512-mod.c = $(double-vlen8-arch-ext-cflags) $(libmvec-alias-cflags) -DREQUIRE_AVX512F
+
CFLAGS-test-double-vlen4-avx2-wrappers.c = $(double-vlen4-arch-ext2-cflags)
-CFLAGS-test-float-vlen8-avx2.c = $(libm-test-vec-cflags)
CFLAGS-test-float-vlen8-avx2-wrappers.c = $(float-vlen8-arch-ext2-cflags)
+CFLAGS-test-double-libmvec-sincos-main.c = $(libmvec-sincos-cflags)
+CFLAGS-test-double-libmvec-sincos-avx.c = -DREQUIRE_AVX
+CFLAGS-test-double-libmvec-sincos-avx-main.c = $(libmvec-sincos-cflags) $(double-vlen4-arch-ext-cflags)
+CFLAGS-test-double-libmvec-sincos-avx2.c = -DREQUIRE_AVX2
+CFLAGS-test-double-libmvec-sincos-avx2-main.c = $(libmvec-sincos-cflags) $(double-vlen4-arch-ext2-cflags)
+CFLAGS-test-double-libmvec-sincos-avx512.c = -DREQUIRE_AVX512F
+CFLAGS-test-double-libmvec-sincos-avx512-main.c = $(libmvec-sincos-cflags) $(double-vlen8-arch-ext-cflags)
+
+CFLAGS-test-float-libmvec-sincosf-main.c = $(libmvec-sincos-cflags)
+CFLAGS-test-float-libmvec-sincosf-avx.c = -DREQUIRE_AVX
+CFLAGS-test-float-libmvec-sincosf-avx-main.c = $(libmvec-sincos-cflags) $(float-vlen8-arch-ext-cflags)
+CFLAGS-test-float-libmvec-sincosf-avx2.c = -DREQUIRE_AVX2
+CFLAGS-test-float-libmvec-sincosf-avx2-main.c = $(libmvec-sincos-cflags) $(float-vlen8-arch-ext2-cflags)
+CFLAGS-test-float-libmvec-sincosf-avx512.c = -DREQUIRE_AVX512F
+CFLAGS-test-float-libmvec-sincosf-avx512-main.c = $(libmvec-sincos-cflags) $(float-vlen16-arch-ext-cflags)
endif
endif
diff --git a/sysdeps/x86_64/fpu/dla.h b/sysdeps/x86_64/fpu/dla.h
deleted file mode 100644
index 688efa0f5b..0000000000
--- a/sysdeps/x86_64/fpu/dla.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#include <features.h>
-
-#ifdef __FMA4__
-# define DLA_FMS(x,y,z) \
- __builtin_fma (x, y, -(z))
-#endif
-
-#include "sysdeps/ieee754/dbl-64/dla.h"
diff --git a/sysdeps/x86_64/fpu/e_expf.S b/sysdeps/x86_64/fpu/e_expf.S
deleted file mode 100644
index d4b63a8d8e..0000000000
--- a/sysdeps/x86_64/fpu/e_expf.S
+++ /dev/null
@@ -1,339 +0,0 @@
-/* Optimized __ieee754_expf function.
- Copyright (C) 2012-2016 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-/* Short algorithm description:
- *
- * Let K = 64 (table size).
- * e^x = 2^(x/log(2)) = 2^n * T[j] * (1 + P(y))
- * where
- * x = m*log(2)/K + y, y in [0.0..log(2)/K]
- * m = n*K + j, m,n,j - signed integer, j in [0..K-1]
- * values of 2^(j/K) are tabulated as T[j].
- *
- * P(y) is a minimax polynomial approximation of expf(x)-1
- * on small interval [0.0..log(2)/K].
- *
- * P(y) = P3*y*y*y*y + P2*y*y*y + P1*y*y + P0*y, calculated as
- * z = y*y; P(y) = (P3*z + P1)*z + (P2*z + P0)*y
- *
- * Special cases:
- * expf(NaN) = NaN
- * expf(+INF) = +INF
- * expf(-INF) = 0
- * expf(x) = 1 for subnormals
- * for finite argument, only expf(0)=1 is exact
- * expf(x) overflows if x>88.7228317260742190
- * expf(x) underflows if x<-103.972076416015620
- */
-
- .text
-ENTRY(__ieee754_expf)
- /* Input: single precision x in %xmm0 */
- cvtss2sd %xmm0, %xmm1 /* Convert x to double precision */
- movd %xmm0, %ecx /* Copy x */
- movsd L(DP_KLN2)(%rip), %xmm2 /* DP K/log(2) */
- movsd L(DP_P2)(%rip), %xmm3 /* DP P2 */
- movl %ecx, %eax /* x */
- mulsd %xmm1, %xmm2 /* DP x*K/log(2) */
- andl $0x7fffffff, %ecx /* |x| */
- lea L(DP_T)(%rip), %rsi /* address of table T[j] */
- cmpl $0x42ad496b, %ecx /* |x|<125*log(2) ? */
- movsd L(DP_P3)(%rip), %xmm4 /* DP P3 */
- addsd L(DP_RS)(%rip), %xmm2 /* DP x*K/log(2)+RS */
- jae L(special_paths)
-
- /* Here if |x|<125*log(2) */
- cmpl $0x31800000, %ecx /* |x|<2^(-28) ? */
- jb L(small_arg)
-
- /* Main path: here if 2^(-28)<=|x|<125*log(2) */
- cvtsd2ss %xmm2, %xmm2 /* SP x*K/log(2)+RS */
- movd %xmm2, %eax /* bits of n*K+j with trash */
- subss L(SP_RS)(%rip), %xmm2 /* SP t=round(x*K/log(2)) */
- movl %eax, %edx /* n*K+j with trash */
- cvtss2sd %xmm2, %xmm2 /* DP t */
- andl $0x3f, %eax /* bits of j */
- mulsd L(DP_NLN2K)(%rip), %xmm2/* DP -t*log(2)/K */
- andl $0xffffffc0, %edx /* bits of n */
-#ifdef __AVX__
- vaddsd %xmm1, %xmm2, %xmm0 /* DP y=x-t*log(2)/K */
- vmulsd %xmm0, %xmm0, %xmm2 /* DP z=y*y */
-#else
- addsd %xmm1, %xmm2 /* DP y=x-t*log(2)/K */
- movaps %xmm2, %xmm0 /* DP y */
- mulsd %xmm2, %xmm2 /* DP z=y*y */
-#endif
- mulsd %xmm2, %xmm4 /* DP P3*z */
- addl $0x1fc0, %edx /* bits of n + SP exponent bias */
- mulsd %xmm2, %xmm3 /* DP P2*z */
- shll $17, %edx /* SP 2^n */
- addsd L(DP_P1)(%rip), %xmm4 /* DP P3*z+P1 */
- addsd L(DP_P0)(%rip), %xmm3 /* DP P2*z+P0 */
- movd %edx, %xmm1 /* SP 2^n */
- mulsd %xmm2, %xmm4 /* DP (P3*z+P1)*z */
- mulsd %xmm3, %xmm0 /* DP (P2*z+P0)*y */
- addsd %xmm4, %xmm0 /* DP P(y) */
- mulsd (%rsi,%rax,8), %xmm0 /* DP P(y)*T[j] */
- addsd (%rsi,%rax,8), %xmm0 /* DP T[j]*(P(y)+1) */
- cvtsd2ss %xmm0, %xmm0 /* SP T[j]*(P(y)+1) */
- mulss %xmm1, %xmm0 /* SP result=2^n*(T[j]*(P(y)+1)) */
- ret
-
- .p2align 4
-L(small_arg):
- /* Here if 0<=|x|<2^(-28) */
- addss L(SP_ONE)(%rip), %xmm0 /* 1.0 + x */
- /* Return 1.0 with inexact raised, except for x==0 */
- ret
-
- .p2align 4
-L(special_paths):
- /* Here if 125*log(2)<=|x| */
- shrl $31, %eax /* Get sign bit of x, and depending on it: */
- lea L(SP_RANGE)(%rip), %rdx /* load over/underflow bound */
- cmpl (%rdx,%rax,4), %ecx /* |x|<under/overflow bound ? */
- jbe L(near_under_or_overflow)
-
- /* Here if |x|>under/overflow bound */
- cmpl $0x7f800000, %ecx /* |x| is finite ? */
- jae L(arg_inf_or_nan)
-
- /* Here if |x|>under/overflow bound, and x is finite */
- testq %rax, %rax /* sign of x nonzero ? */
- je L(res_overflow)
-
- /* Here if -inf<x<underflow bound (x<0) */
- movss L(SP_SMALL)(%rip), %xmm0/* load small value 2^(-100) */
- mulss %xmm0, %xmm0 /* Return underflowed result (zero or subnormal) */
- ret
-
- .p2align 4
-L(res_overflow):
- /* Here if overflow bound<x<inf (x>0) */
- movss L(SP_LARGE)(%rip), %xmm0/* load large value 2^100 */
- mulss %xmm0, %xmm0 /* Return overflowed result (Inf or max normal) */
- ret
-
- .p2align 4
-L(arg_inf_or_nan):
- /* Here if |x| is Inf or NAN */
- jne L(arg_nan) /* |x| is Inf ? */
-
- /* Here if |x| is Inf */
- lea L(SP_INF_0)(%rip), %rdx /* depending on sign of x: */
- movss (%rdx,%rax,4), %xmm0 /* return zero or Inf */
- ret
-
- .p2align 4
-L(arg_nan):
- /* Here if |x| is NaN */
- addss %xmm0, %xmm0 /* Return x+x (raise invalid) */
- ret
-
- .p2align 4
-L(near_under_or_overflow):
- /* Here if 125*log(2)<=|x|<under/overflow bound */
- cvtsd2ss %xmm2, %xmm2 /* SP x*K/log(2)+RS */
- movd %xmm2, %eax /* bits of n*K+j with trash */
- subss L(SP_RS)(%rip), %xmm2 /* SP t=round(x*K/log(2)) */
- movl %eax, %edx /* n*K+j with trash */
- cvtss2sd %xmm2, %xmm2 /* DP t */
- andl $0x3f, %eax /* bits of j */
- mulsd L(DP_NLN2K)(%rip), %xmm2/* DP -t*log(2)/K */
- andl $0xffffffc0, %edx /* bits of n */
-#ifdef __AVX__
- vaddsd %xmm1, %xmm2, %xmm0 /* DP y=x-t*log(2)/K */
- vmulsd %xmm0, %xmm0, %xmm2 /* DP z=y*y */
-#else
- addsd %xmm1, %xmm2 /* DP y=x-t*log(2)/K */
- movaps %xmm2, %xmm0 /* DP y */
- mulsd %xmm2, %xmm2 /* DP z=y*y */
-#endif
- mulsd %xmm2, %xmm4 /* DP P3*z */
- addl $0xffc0, %edx /* bits of n + DP exponent bias */
- mulsd %xmm2, %xmm3 /* DP P2*z */
- shlq $46, %rdx /* DP 2^n */
- addsd L(DP_P1)(%rip), %xmm4 /* DP P3*z+P1 */
- addsd L(DP_P0)(%rip), %xmm3 /* DP P2*z+P0 */
- movd %rdx, %xmm1 /* DP 2^n */
- mulsd %xmm2, %xmm4 /* DP (P3*z+P1)*z */
- mulsd %xmm3, %xmm0 /* DP (P2*z+P0)*y */
- addsd %xmm4, %xmm0 /* DP P(y) */
- mulsd (%rsi,%rax,8), %xmm0 /* DP P(y)*T[j] */
- addsd (%rsi,%rax,8), %xmm0 /* DP T[j]*(P(y)+1) */
- mulsd %xmm1, %xmm0 /* DP result=2^n*(T[j]*(P(y)+1)) */
- cvtsd2ss %xmm0, %xmm0 /* convert result to single precision */
- ret
-END(__ieee754_expf)
-
- .section .rodata, "a"
- .p2align 3
-L(DP_T): /* table of double precision values 2^(j/K) for j=[0..K-1] */
- .long 0x00000000, 0x3ff00000
- .long 0x3e778061, 0x3ff02c9a
- .long 0xd3158574, 0x3ff059b0
- .long 0x18759bc8, 0x3ff08745
- .long 0x6cf9890f, 0x3ff0b558
- .long 0x32d3d1a2, 0x3ff0e3ec
- .long 0xd0125b51, 0x3ff11301
- .long 0xaea92de0, 0x3ff1429a
- .long 0x3c7d517b, 0x3ff172b8
- .long 0xeb6fcb75, 0x3ff1a35b
- .long 0x3168b9aa, 0x3ff1d487
- .long 0x88628cd6, 0x3ff2063b
- .long 0x6e756238, 0x3ff2387a
- .long 0x65e27cdd, 0x3ff26b45
- .long 0xf51fdee1, 0x3ff29e9d
- .long 0xa6e4030b, 0x3ff2d285
- .long 0x0a31b715, 0x3ff306fe
- .long 0xb26416ff, 0x3ff33c08
- .long 0x373aa9cb, 0x3ff371a7
- .long 0x34e59ff7, 0x3ff3a7db
- .long 0x4c123422, 0x3ff3dea6
- .long 0x21f72e2a, 0x3ff4160a
- .long 0x6061892d, 0x3ff44e08
- .long 0xb5c13cd0, 0x3ff486a2
- .long 0xd5362a27, 0x3ff4bfda
- .long 0x769d2ca7, 0x3ff4f9b2
- .long 0x569d4f82, 0x3ff5342b
- .long 0x36b527da, 0x3ff56f47
- .long 0xdd485429, 0x3ff5ab07
- .long 0x15ad2148, 0x3ff5e76f
- .long 0xb03a5585, 0x3ff6247e
- .long 0x82552225, 0x3ff66238
- .long 0x667f3bcd, 0x3ff6a09e
- .long 0x3c651a2f, 0x3ff6dfb2
- .long 0xe8ec5f74, 0x3ff71f75
- .long 0x564267c9, 0x3ff75feb
- .long 0x73eb0187, 0x3ff7a114
- .long 0x36cf4e62, 0x3ff7e2f3
- .long 0x994cce13, 0x3ff82589
- .long 0x9b4492ed, 0x3ff868d9
- .long 0x422aa0db, 0x3ff8ace5
- .long 0x99157736, 0x3ff8f1ae
- .long 0xb0cdc5e5, 0x3ff93737
- .long 0x9fde4e50, 0x3ff97d82
- .long 0x82a3f090, 0x3ff9c491
- .long 0x7b5de565, 0x3ffa0c66
- .long 0xb23e255d, 0x3ffa5503
- .long 0x5579fdbf, 0x3ffa9e6b
- .long 0x995ad3ad, 0x3ffae89f
- .long 0xb84f15fb, 0x3ffb33a2
- .long 0xf2fb5e47, 0x3ffb7f76
- .long 0x904bc1d2, 0x3ffbcc1e
- .long 0xdd85529c, 0x3ffc199b
- .long 0x2e57d14b, 0x3ffc67f1
- .long 0xdcef9069, 0x3ffcb720
- .long 0x4a07897c, 0x3ffd072d
- .long 0xdcfba487, 0x3ffd5818
- .long 0x03db3285, 0x3ffda9e6
- .long 0x337b9b5f, 0x3ffdfc97
- .long 0xe78b3ff6, 0x3ffe502e
- .long 0xa2a490da, 0x3ffea4af
- .long 0xee615a27, 0x3ffefa1b
- .long 0x5b6e4540, 0x3fff5076
- .long 0x819e90d8, 0x3fffa7c1
- .type L(DP_T), @object
- ASM_SIZE_DIRECTIVE(L(DP_T))
-
- .section .rodata.cst8,"aM",@progbits,8
- .p2align 3
-L(DP_KLN2): /* double precision K/log(2) */
- .long 0x652b82fe, 0x40571547
- .type L(DP_KLN2), @object
- ASM_SIZE_DIRECTIVE(L(DP_KLN2))
-
- .p2align 3
-L(DP_NLN2K): /* double precision -log(2)/K */
- .long 0xfefa39ef, 0xbf862e42
- .type L(DP_NLN2K), @object
- ASM_SIZE_DIRECTIVE(L(DP_NLN2K))
-
- .p2align 3
-L(DP_RS): /* double precision 2^23+2^22 */
- .long 0x00000000, 0x41680000
- .type L(DP_RS), @object
- ASM_SIZE_DIRECTIVE(L(DP_RS))
-
- .p2align 3
-L(DP_P3): /* double precision polynomial coefficient P3 */
- .long 0xeb78fa85, 0x3fa56420
- .type L(DP_P3), @object
- ASM_SIZE_DIRECTIVE(L(DP_P3))
-
- .p2align 3
-L(DP_P1): /* double precision polynomial coefficient P1 */
- .long 0x008d6118, 0x3fe00000
- .type L(DP_P1), @object
- ASM_SIZE_DIRECTIVE(L(DP_P1))
-
- .p2align 3
-L(DP_P2): /* double precision polynomial coefficient P2 */
- .long 0xda752d4f, 0x3fc55550
- .type L(DP_P2), @object
- ASM_SIZE_DIRECTIVE(L(DP_P2))
-
- .p2align 3
-L(DP_P0): /* double precision polynomial coefficient P0 */
- .long 0xffffe7c6, 0x3fefffff
- .type L(DP_P0), @object
- ASM_SIZE_DIRECTIVE(L(DP_P0))
-
- .p2align 2
-L(SP_RANGE): /* single precision overflow/underflow bounds */
- .long 0x42b17217 /* if x>this bound, then result overflows */
- .long 0x42cff1b4 /* if x<this bound, then result underflows */
- .type L(SP_RANGE), @object
- ASM_SIZE_DIRECTIVE(L(SP_RANGE))
-
- .p2align 2
-L(SP_INF_0):
- .long 0x7f800000 /* single precision Inf */
- .long 0 /* single precision zero */
- .type L(SP_INF_0), @object
- ASM_SIZE_DIRECTIVE(L(SP_INF_0))
-
- .section .rodata.cst4,"aM",@progbits,4
- .p2align 2
-L(SP_RS): /* single precision 2^23+2^22 */
- .long 0x4b400000
- .type L(SP_RS), @object
- ASM_SIZE_DIRECTIVE(L(SP_RS))
-
- .p2align 2
-L(SP_SMALL): /* single precision small value 2^(-100) */
- .long 0x0d800000
- .type L(SP_SMALL), @object
- ASM_SIZE_DIRECTIVE(L(SP_SMALL))
-
- .p2align 2
-L(SP_LARGE): /* single precision large value 2^100 */
- .long 0x71800000
- .type L(SP_LARGE), @object
- ASM_SIZE_DIRECTIVE(L(SP_LARGE))
-
- .p2align 2
-L(SP_ONE): /* single precision 1.0 */
- .long 0x3f800000
- .type L(SP_ONE), @object
- ASM_SIZE_DIRECTIVE(L(SP_ONE))
-
-strong_alias (__ieee754_expf, __expf_finite)
diff --git a/sysdeps/x86_64/fpu/e_expl.S b/sysdeps/x86_64/fpu/e_expl.S
index 8b3ddaec59..b75a103803 100644
--- a/sysdeps/x86_64/fpu/e_expl.S
+++ b/sysdeps/x86_64/fpu/e_expl.S
@@ -22,6 +22,7 @@
* -- moshier@na-net.ornl.gov
*/
+#include <libm-alias-ldouble.h>
#include <machine/asm.h>
#include <x86_64-math-asm.h>
@@ -99,7 +100,7 @@ ENTRY(IEEE754_EXPL)
/* Below -64.0 (may be -NaN or -Inf). */
andb %ah, %dh
cmpb $0x01, %dh
- je 2f /* Is +-NaN, jump. */
+ je 6f /* Is +-NaN, jump. */
jmp 1f /* -large, possibly -Inf. */
4: /* In range -64.0 to 64.0 (may be +-0 but not NaN or +-Inf). */
@@ -141,7 +142,7 @@ ENTRY(IEEE754_EXPL)
cmpb $0x05, %dh
je 1f /* Is +-Inf, jump. */
cmpb $0x01, %dh
- je 2f /* Is +-NaN, jump. */
+ je 6f /* Is +-NaN, jump. */
/* Overflow or underflow; saturate. */
fstp %st
fldt MO(csat)
@@ -207,10 +208,13 @@ ENTRY(IEEE754_EXPL)
fldz /* Set result to 0. */
#endif
2: ret
+6: /* NaN argument. */
+ fadd %st
+ ret
END(IEEE754_EXPL)
#ifdef USE_AS_EXPM1L
libm_hidden_def (__expm1l)
-weak_alias (__expm1l, expm1l)
+libm_alias_ldouble (__expm1, expm1)
#else
strong_alias (IEEE754_EXPL, EXPL_FINITE)
#endif
diff --git a/sysdeps/x86_64/fpu/e_log10l.S b/sysdeps/x86_64/fpu/e_log10l.S
index 8fa61644c1..e0cb88e32e 100644
--- a/sysdeps/x86_64/fpu/e_log10l.S
+++ b/sysdeps/x86_64/fpu/e_log10l.S
@@ -64,6 +64,7 @@ ENTRY(__ieee754_log10l)
jnz 4b // in case x is ±Inf
fstp %st(1)
fstp %st(1)
+ fadd %st(0)
ret
END(__ieee754_log10l)
diff --git a/sysdeps/x86_64/fpu/e_log2l.S b/sysdeps/x86_64/fpu/e_log2l.S
index a063255ddd..023ec29164 100644
--- a/sysdeps/x86_64/fpu/e_log2l.S
+++ b/sysdeps/x86_64/fpu/e_log2l.S
@@ -63,6 +63,7 @@ ENTRY(__ieee754_log2l)
jnz 4b // in case x is ±Inf
fstp %st(1)
fstp %st(1)
+ fadd %st(0)
ret
END (__ieee754_log2l)
diff --git a/sysdeps/x86_64/fpu/e_logl.S b/sysdeps/x86_64/fpu/e_logl.S
index dbe6fd59dc..0d3576f48b 100644
--- a/sysdeps/x86_64/fpu/e_logl.S
+++ b/sysdeps/x86_64/fpu/e_logl.S
@@ -66,6 +66,7 @@ ENTRY(__ieee754_logl)
jnz 4b // in case x is +-Inf
fstp %st(1)
fstp %st(1)
+ fadd %st(0)
ret
END (__ieee754_logl)
diff --git a/sysdeps/x86_64/fpu/e_powl.S b/sysdeps/x86_64/fpu/e_powl.S
index 1f68cf0102..f32228104e 100644
--- a/sysdeps/x86_64/fpu/e_powl.S
+++ b/sysdeps/x86_64/fpu/e_powl.S
@@ -1,5 +1,5 @@
/* ix87 specific implementation of pow function.
- Copyright (C) 1996-2016 Free Software Foundation, Inc.
+ Copyright (C) 1996-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
@@ -26,9 +26,9 @@
.type one,@object
one: .double 1.0
ASM_SIZE_DIRECTIVE(one)
- .type p3,@object
-p3: .byte 0, 0, 0, 0, 0, 0, 0x20, 0x40
- ASM_SIZE_DIRECTIVE(p3)
+ .type p2,@object
+p2: .byte 0, 0, 0, 0, 0, 0, 0x10, 0x40
+ ASM_SIZE_DIRECTIVE(p2)
.type p63,@object
p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
ASM_SIZE_DIRECTIVE(p63)
@@ -136,12 +136,12 @@ ENTRY(__ieee754_powl)
jmp 3f
9: /* OK, we have an integer value for y. Unless very small
- (we use < 8), use the algorithm for real exponent to avoid
+ (we use < 4), use the algorithm for real exponent to avoid
accumulation of errors. */
- fldl MO(p3) // 8 : y : x
- fld %st(1) // y : 8 : y : x
- fabs // |y| : 8 : y : x
- fcomip %st(1), %st // 8 : y : x
+ fldl MO(p2) // 4 : y : x
+ fld %st(1) // y : 4 : y : x
+ fabs // |y| : 4 : y : x
+ fcomip %st(1), %st // 4 : y : x
fstp %st(0) // y : x
jnc 3f
mov -8(%rsp),%eax
@@ -184,9 +184,15 @@ ENTRY(__ieee754_powl)
30: fldt 8(%rsp) // x : y
fldl MO(one) // 1.0 : x : y
fucomip %st(1),%st // x : y
- je 31f
- fxch // y : x
-31: fstp %st(1)
+ je 32f
+31: /* At least one argument NaN, and result should be NaN. */
+ faddp
+ ret
+32: jc 31b
+ /* pow (1, NaN); check if the NaN signaling. */
+ testb $0x40, 31(%rsp)
+ jz 31b
+ fstp %st(1)
ret
.align ALIGNARG(4)
@@ -217,12 +223,24 @@ ENTRY(__ieee754_powl)
cfi_adjust_cfa_offset (-40)
ret
- // pow(x,±0) = 1
+ // pow(x,±0) = 1, unless x is sNaN
.align ALIGNARG(4)
11: fstp %st(0) // pop y
+ fldt 8(%rsp) // x
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x01, %ah
+ je 112f // x is NaN
+111: fstp %st(0)
fldl MO(one)
ret
+112: testb $0x40, 15(%rsp)
+ jnz 111b
+ fadd %st(0)
+ ret
+
// y == ±inf
.align ALIGNARG(4)
12: fstp %st(0) // pop y
@@ -255,6 +273,7 @@ ENTRY(__ieee754_powl)
.align ALIGNARG(4)
13: fldt 8(%rsp) // load x == NaN
+ fadd %st(0)
ret
.align ALIGNARG(4)
diff --git a/sysdeps/x86_64/fpu/e_scalbl.S b/sysdeps/x86_64/fpu/e_scalbl.S
index 331bee580c..2982dc3b9e 100644
--- a/sysdeps/x86_64/fpu/e_scalbl.S
+++ b/sysdeps/x86_64/fpu/e_scalbl.S
@@ -44,7 +44,7 @@ ENTRY(__ieee754_scalbl)
fnstsw
andl $0x4500, %eax
cmpl $0x0100, %eax
- je 3f
+ je 2f
fld %st(1)
frndint
fcomip %st(2), %st
@@ -75,15 +75,8 @@ ENTRY(__ieee754_scalbl)
#endif
ret
- /* The result is NaN, but we must not raise an exception.
- So use a variable. */
-2: fstp %st
- fstp %st
- fldl MO(nan)
- ret
-
- /* The first parameter is a NaN. Return it. */
-3: fstp %st(1)
+ /* The result is NaN; raise an exception for sNaN arguments. */
+2: faddp
ret
/* Return NaN and raise the invalid exception. */
diff --git a/sysdeps/x86_64/fpu/e_sqrt.c b/sysdeps/x86_64/fpu/e_sqrt.c
index 4b86434913..f4c2e5fd7c 100644
--- a/sysdeps/x86_64/fpu/e_sqrt.c
+++ b/sysdeps/x86_64/fpu/e_sqrt.c
@@ -1,5 +1,5 @@
/* Square root of floating point number.
- Copyright (C) 2002-2016 Free Software Foundation, Inc.
+ Copyright (C) 2002-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/e_sqrtf.c b/sysdeps/x86_64/fpu/e_sqrtf.c
index 639137b735..8f76ccb530 100644
--- a/sysdeps/x86_64/fpu/e_sqrtf.c
+++ b/sysdeps/x86_64/fpu/e_sqrtf.c
@@ -1,5 +1,5 @@
/* Square root of floating point number.
- Copyright (C) 2002-2016 Free Software Foundation, Inc.
+ Copyright (C) 2002-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/fclrexcpt.c b/sysdeps/x86_64/fpu/fclrexcpt.c
index a8e00c0141..e7f6aa341f 100644
--- a/sysdeps/x86_64/fpu/fclrexcpt.c
+++ b/sysdeps/x86_64/fpu/fclrexcpt.c
@@ -1,5 +1,5 @@
/* Clear given exceptions in current floating-point environment.
- Copyright (C) 2001-2016 Free Software Foundation, Inc.
+ Copyright (C) 2001-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/fedisblxcpt.c b/sysdeps/x86_64/fpu/fedisblxcpt.c
index f1ea6cfa97..9153f997ed 100644
--- a/sysdeps/x86_64/fpu/fedisblxcpt.c
+++ b/sysdeps/x86_64/fpu/fedisblxcpt.c
@@ -1,5 +1,5 @@
/* Disable floating-point exceptions.
- Copyright (C) 2001-2016 Free Software Foundation, Inc.
+ Copyright (C) 2001-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 2001.
diff --git a/sysdeps/x86_64/fpu/feenablxcpt.c b/sysdeps/x86_64/fpu/feenablxcpt.c
index df4c628b8d..7a3e26b3f9 100644
--- a/sysdeps/x86_64/fpu/feenablxcpt.c
+++ b/sysdeps/x86_64/fpu/feenablxcpt.c
@@ -1,5 +1,5 @@
/* Enable floating-point exceptions.
- Copyright (C) 2001-2016 Free Software Foundation, Inc.
+ Copyright (C) 2001-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 2001.
diff --git a/sysdeps/x86_64/fpu/fegetenv.c b/sysdeps/x86_64/fpu/fegetenv.c
index a28efb36f3..9461af7575 100644
--- a/sysdeps/x86_64/fpu/fegetenv.c
+++ b/sysdeps/x86_64/fpu/fegetenv.c
@@ -1,5 +1,5 @@
/* Store current floating-point environment.
- Copyright (C) 2001-2016 Free Software Foundation, Inc.
+ Copyright (C) 2001-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/fegetexcept.c b/sysdeps/x86_64/fpu/fegetexcept.c
index 8acd0382a0..ce54c251ba 100644
--- a/sysdeps/x86_64/fpu/fegetexcept.c
+++ b/sysdeps/x86_64/fpu/fegetexcept.c
@@ -1,5 +1,5 @@
/* Get enabled floating-point exceptions.
- Copyright (C) 2001-2016 Free Software Foundation, Inc.
+ Copyright (C) 2001-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 2001.
diff --git a/sysdeps/x86_64/fpu/fegetmode.c b/sysdeps/x86_64/fpu/fegetmode.c
new file mode 100644
index 0000000000..cc4f12649b
--- /dev/null
+++ b/sysdeps/x86_64/fpu/fegetmode.c
@@ -0,0 +1,28 @@
+/* Store current floating-point control modes. x86_64 version.
+ Copyright (C) 2016-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv.h>
+#include <fpu_control.h>
+
+int
+fegetmode (femode_t *modep)
+{
+ _FPU_GETCW (modep->__control_word);
+ __asm__ ("stmxcsr %0" : "=m" (modep->__mxcsr));
+ return 0;
+}
diff --git a/sysdeps/x86_64/fpu/fegetround.c b/sysdeps/x86_64/fpu/fegetround.c
index 296d366560..0f31cafedd 100644
--- a/sysdeps/x86_64/fpu/fegetround.c
+++ b/sysdeps/x86_64/fpu/fegetround.c
@@ -1,5 +1,5 @@
/* Return current rounding direction.
- Copyright (C) 1997-2016 Free Software Foundation, Inc.
+ Copyright (C) 1997-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
diff --git a/sysdeps/x86_64/fpu/feholdexcpt.c b/sysdeps/x86_64/fpu/feholdexcpt.c
index a040c3dea5..dec689beb2 100644
--- a/sysdeps/x86_64/fpu/feholdexcpt.c
+++ b/sysdeps/x86_64/fpu/feholdexcpt.c
@@ -1,5 +1,5 @@
/* Store current floating-point environment and clear exceptions.
- Copyright (C) 2001-2016 Free Software Foundation, Inc.
+ Copyright (C) 2001-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/fesetenv.c b/sysdeps/x86_64/fpu/fesetenv.c
index 355d02aaa6..c12dba5101 100644
--- a/sysdeps/x86_64/fpu/fesetenv.c
+++ b/sysdeps/x86_64/fpu/fesetenv.c
@@ -1,5 +1,5 @@
/* Install given floating-point environment.
- Copyright (C) 2001-2016 Free Software Foundation, Inc.
+ Copyright (C) 2001-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/fesetexcept.c b/sysdeps/x86_64/fpu/fesetexcept.c
new file mode 100644
index 0000000000..122a7629dc
--- /dev/null
+++ b/sysdeps/x86_64/fpu/fesetexcept.c
@@ -0,0 +1,31 @@
+/* Set given exception flags. x86_64 version.
+ Copyright (C) 2016-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv.h>
+
+int
+fesetexcept (int excepts)
+{
+ unsigned int mxcsr;
+
+ __asm__ ("stmxcsr %0" : "=m" (*&mxcsr));
+ mxcsr |= excepts & FE_ALL_EXCEPT;
+ __asm__ ("ldmxcsr %0" : : "m" (*&mxcsr));
+
+ return 0;
+}
diff --git a/sysdeps/x86_64/fpu/fesetmode.c b/sysdeps/x86_64/fpu/fesetmode.c
new file mode 100644
index 0000000000..0771e4c10a
--- /dev/null
+++ b/sysdeps/x86_64/fpu/fesetmode.c
@@ -0,0 +1,50 @@
+/* Install given floating-point control modes. x86_64 version.
+ Copyright (C) 2016-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv.h>
+#include <fpu_control.h>
+
+/* All exceptions, including the x86-specific "denormal operand"
+ exception. */
+#define FE_ALL_EXCEPT_X86 (FE_ALL_EXCEPT | __FE_DENORM)
+
+int
+fesetmode (const femode_t *modep)
+{
+ fpu_control_t cw;
+ unsigned int mxcsr;
+ __asm__ ("stmxcsr %0" : "=m" (mxcsr));
+ /* Preserve SSE exception flags but restore other state in
+ MXCSR. */
+ mxcsr &= FE_ALL_EXCEPT_X86;
+ if (modep == FE_DFL_MODE)
+ {
+ cw = _FPU_DEFAULT;
+ /* Default MXCSR state has all bits zero except for those
+ masking exceptions. */
+ mxcsr |= FE_ALL_EXCEPT_X86 << 7;
+ }
+ else
+ {
+ cw = modep->__control_word;
+ mxcsr |= modep->__mxcsr & ~FE_ALL_EXCEPT_X86;
+ }
+ _FPU_SETCW (cw);
+ __asm__ ("ldmxcsr %0" : : "m" (mxcsr));
+ return 0;
+}
diff --git a/sysdeps/x86_64/fpu/fesetround.c b/sysdeps/x86_64/fpu/fesetround.c
index 475d63f4db..e5afc1d57a 100644
--- a/sysdeps/x86_64/fpu/fesetround.c
+++ b/sysdeps/x86_64/fpu/fesetround.c
@@ -1,5 +1,5 @@
/* Set current rounding direction.
- Copyright (C) 2001-2016 Free Software Foundation, Inc.
+ Copyright (C) 2001-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/feupdateenv.c b/sysdeps/x86_64/fpu/feupdateenv.c
index f035d57ca8..00da535e64 100644
--- a/sysdeps/x86_64/fpu/feupdateenv.c
+++ b/sysdeps/x86_64/fpu/feupdateenv.c
@@ -1,5 +1,5 @@
/* Install given floating-point environment and raise exceptions.
- Copyright (C) 1997-2016 Free Software Foundation, Inc.
+ Copyright (C) 1997-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
diff --git a/sysdeps/x86_64/fpu/fgetexcptflg.c b/sysdeps/x86_64/fpu/fgetexcptflg.c
index 938cf3e62b..16719ceb5e 100644
--- a/sysdeps/x86_64/fpu/fgetexcptflg.c
+++ b/sysdeps/x86_64/fpu/fgetexcptflg.c
@@ -1,5 +1,5 @@
/* Store current representation for exceptions.
- Copyright (C) 2001-2016 Free Software Foundation, Inc.
+ Copyright (C) 2001-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/fraiseexcpt.c b/sysdeps/x86_64/fpu/fraiseexcpt.c
index e2abbbec33..ca1c223053 100644
--- a/sysdeps/x86_64/fpu/fraiseexcpt.c
+++ b/sysdeps/x86_64/fpu/fraiseexcpt.c
@@ -1,5 +1,5 @@
/* Raise given exceptions.
- Copyright (C) 2001-2016 Free Software Foundation, Inc.
+ Copyright (C) 2001-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/fsetexcptflg.c b/sysdeps/x86_64/fpu/fsetexcptflg.c
index 76f7bad9a8..821dd9d786 100644
--- a/sysdeps/x86_64/fpu/fsetexcptflg.c
+++ b/sysdeps/x86_64/fpu/fsetexcptflg.c
@@ -1,5 +1,5 @@
/* Set floating-point environment exception handling.
- Copyright (C) 2001-2016 Free Software Foundation, Inc.
+ Copyright (C) 2001-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/ftestexcept.c b/sysdeps/x86_64/fpu/ftestexcept.c
index c8f2c01c67..63167c68df 100644
--- a/sysdeps/x86_64/fpu/ftestexcept.c
+++ b/sysdeps/x86_64/fpu/ftestexcept.c
@@ -1,5 +1,5 @@
/* Test exception in current environment.
- Copyright (C) 2001-2016 Free Software Foundation, Inc.
+ Copyright (C) 2001-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/k_rem_pio2l.c b/sysdeps/x86_64/fpu/k_rem_pio2l.c
deleted file mode 100644
index eea55a98d2..0000000000
--- a/sysdeps/x86_64/fpu/k_rem_pio2l.c
+++ /dev/null
@@ -1 +0,0 @@
-/* Not needed. */
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
index 445b47527d..912db318b6 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -3,1015 +3,1293 @@
# Maximal error of functions:
Function: "acos":
float: 1
+float128: 1
ifloat: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: "acos_downward":
double: 1
float: 1
+float128: 1
idouble: 1
ifloat: 1
+ifloat128: 1
ildouble: 2
ldouble: 2
Function: "acos_towardzero":
double: 1
float: 1
+float128: 1
idouble: 1
ifloat: 1
+ifloat128: 1
ildouble: 2
ldouble: 2
Function: "acos_upward":
double: 1
float: 1
+float128: 1
idouble: 1
ifloat: 1
+ifloat128: 1
ildouble: 2
ldouble: 2
Function: "acosh":
double: 2
float: 2
+float128: 2
idouble: 2
ifloat: 2
+ifloat128: 2
ildouble: 2
ldouble: 2
Function: "acosh_downward":
double: 2
float: 2
+float128: 3
idouble: 2
ifloat: 2
+ifloat128: 3
ildouble: 4
ldouble: 4
Function: "acosh_towardzero":
double: 2
float: 2
+float128: 2
idouble: 2
ifloat: 2
+ifloat128: 2
ildouble: 4
ldouble: 4
Function: "acosh_upward":
double: 2
float: 2
+float128: 2
idouble: 2
ifloat: 2
+ifloat128: 2
ildouble: 3
ldouble: 3
Function: "asin":
float: 1
+float128: 1
ifloat: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: "asin_downward":
double: 1
float: 1
+float128: 2
idouble: 1
ifloat: 1
+ifloat128: 2
ildouble: 2
ldouble: 2
Function: "asin_towardzero":
double: 1
float: 1
+float128: 1
idouble: 1
ifloat: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: "asin_upward":
double: 1
float: 1
+float128: 2
idouble: 1
ifloat: 1
+ifloat128: 2
ildouble: 1
ldouble: 1
Function: "asinh":
double: 1
float: 1
+float128: 3
idouble: 1
ifloat: 1
+ifloat128: 3
ildouble: 3
ldouble: 3
Function: "asinh_downward":
double: 3
float: 3
+float128: 4
idouble: 3
ifloat: 3
+ifloat128: 4
ildouble: 5
ldouble: 5
Function: "asinh_towardzero":
double: 2
float: 2
+float128: 2
idouble: 2
ifloat: 2
+ifloat128: 2
ildouble: 4
ldouble: 4
Function: "asinh_upward":
double: 3
float: 3
+float128: 4
idouble: 3
ifloat: 3
+ifloat128: 4
ildouble: 5
ldouble: 5
Function: "atan":
float: 1
+float128: 1
ifloat: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: "atan2":
float: 1
+float128: 1
ifloat: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: "atan2_downward":
double: 1
float: 2
+float128: 2
idouble: 1
ifloat: 2
+ifloat128: 2
ildouble: 1
ldouble: 1
Function: "atan2_towardzero":
double: 1
float: 2
+float128: 3
idouble: 1
ifloat: 2
+ifloat128: 3
ildouble: 1
ldouble: 1
Function: "atan2_upward":
double: 1
float: 2
+float128: 2
idouble: 1
ifloat: 2
+ifloat128: 2
ildouble: 1
ldouble: 1
Function: "atan_downward":
double: 1
float: 2
+float128: 2
idouble: 1
ifloat: 2
+ifloat128: 2
ildouble: 1
ldouble: 1
Function: "atan_towardzero":
double: 1
float: 1
+float128: 1
idouble: 1
ifloat: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: "atan_upward":
double: 1
float: 2
+float128: 2
idouble: 1
ifloat: 2
+ifloat128: 2
ildouble: 1
ldouble: 1
Function: "atanh":
double: 2
float: 2
+float128: 3
idouble: 2
ifloat: 2
+ifloat128: 3
ildouble: 3
ldouble: 3
Function: "atanh_downward":
double: 3
float: 3
+float128: 4
idouble: 3
ifloat: 3
+ifloat128: 4
ildouble: 5
ldouble: 5
Function: "atanh_towardzero":
double: 2
float: 2
+float128: 2
idouble: 2
ifloat: 2
+ifloat128: 2
ildouble: 4
ldouble: 4
Function: "atanh_upward":
double: 3
float: 3
+float128: 4
idouble: 3
ifloat: 3
+ifloat128: 4
ildouble: 5
ldouble: 5
Function: "cabs":
double: 1
+float128: 1
idouble: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: "cabs_downward":
double: 1
+float128: 1
idouble: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: "cabs_towardzero":
double: 1
+float128: 1
idouble: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: "cabs_upward":
double: 1
+float128: 1
idouble: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: Real part of "cacos":
double: 1
float: 2
+float128: 2
idouble: 1
ifloat: 2
+ifloat128: 2
ildouble: 1
ldouble: 1
Function: Imaginary part of "cacos":
-double: 1
+double: 2
float: 2
-idouble: 1
+float128: 2
+idouble: 2
ifloat: 2
+ifloat128: 2
ildouble: 2
ldouble: 2
Function: Real part of "cacos_downward":
-double: 2
+double: 3
float: 2
-idouble: 2
+float128: 3
+idouble: 3
ifloat: 2
+ifloat128: 3
ildouble: 2
ldouble: 2
Function: Imaginary part of "cacos_downward":
double: 5
float: 3
+float128: 6
idouble: 5
ifloat: 3
-ildouble: 5
-ldouble: 5
+ifloat128: 6
+ildouble: 6
+ldouble: 6
Function: Real part of "cacos_towardzero":
-double: 2
+double: 3
float: 2
-idouble: 2
+float128: 3
+idouble: 3
ifloat: 2
+ifloat128: 3
ildouble: 2
ldouble: 2
Function: Imaginary part of "cacos_towardzero":
double: 5
float: 3
+float128: 5
idouble: 5
ifloat: 3
+ifloat128: 5
ildouble: 5
ldouble: 5
Function: Real part of "cacos_upward":
double: 2
float: 2
+float128: 3
idouble: 2
ifloat: 2
+ifloat128: 3
ildouble: 2
ldouble: 2
Function: Imaginary part of "cacos_upward":
-double: 4
-float: 4
-idouble: 4
-ifloat: 4
-ildouble: 5
-ldouble: 5
+double: 5
+float: 7
+float128: 7
+idouble: 5
+ifloat: 7
+ifloat128: 7
+ildouble: 7
+ldouble: 7
Function: Real part of "cacosh":
-double: 1
+double: 2
float: 2
-idouble: 1
+float128: 2
+idouble: 2
ifloat: 2
+ifloat128: 2
ildouble: 2
ldouble: 2
Function: Imaginary part of "cacosh":
double: 1
float: 2
+float128: 2
idouble: 1
ifloat: 2
+ifloat128: 2
ildouble: 1
ldouble: 1
Function: Real part of "cacosh_downward":
double: 5
float: 3
+float128: 5
idouble: 5
ifloat: 3
+ifloat128: 5
ildouble: 5
ldouble: 5
Function: Imaginary part of "cacosh_downward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 2
-ldouble: 2
+double: 3
+float: 3
+float128: 4
+idouble: 3
+ifloat: 3
+ifloat128: 4
+ildouble: 3
+ldouble: 3
Function: Real part of "cacosh_towardzero":
double: 5
float: 3
+float128: 5
idouble: 5
ifloat: 3
+ifloat128: 5
ildouble: 5
ldouble: 5
Function: Imaginary part of "cacosh_towardzero":
-double: 2
+double: 3
float: 2
-idouble: 2
+float128: 3
+idouble: 3
ifloat: 2
+ifloat128: 3
ildouble: 2
ldouble: 2
Function: Real part of "cacosh_upward":
double: 4
float: 4
+float128: 6
idouble: 4
ifloat: 4
+ifloat128: 6
ildouble: 5
ldouble: 5
Function: Imaginary part of "cacosh_upward":
-double: 2
+double: 3
float: 2
-idouble: 2
+float128: 4
+idouble: 3
ifloat: 2
-ildouble: 2
-ldouble: 2
+ifloat128: 4
+ildouble: 3
+ldouble: 3
Function: "carg":
float: 1
+float128: 2
ifloat: 1
+ifloat128: 2
ildouble: 1
ldouble: 1
Function: "carg_downward":
double: 1
float: 2
+float128: 2
idouble: 1
ifloat: 2
+ifloat128: 2
ildouble: 1
ldouble: 1
Function: "carg_towardzero":
double: 1
float: 2
+float128: 3
idouble: 1
ifloat: 2
+ifloat128: 3
ildouble: 1
ldouble: 1
Function: "carg_upward":
double: 1
float: 2
+float128: 2
idouble: 1
ifloat: 2
+ifloat128: 2
ildouble: 1
ldouble: 1
Function: Real part of "casin":
double: 1
float: 1
+float128: 2
idouble: 1
ifloat: 1
+ifloat128: 2
ildouble: 1
ldouble: 1
Function: Imaginary part of "casin":
-double: 1
+double: 2
float: 2
-idouble: 1
+float128: 2
+idouble: 2
ifloat: 2
+ifloat128: 2
ildouble: 2
ldouble: 2
Function: Real part of "casin_downward":
double: 3
-float: 1
+float: 2
+float128: 3
idouble: 3
-ifloat: 1
-ildouble: 2
-ldouble: 2
+ifloat: 2
+ifloat128: 3
+ildouble: 3
+ldouble: 3
Function: Imaginary part of "casin_downward":
double: 5
float: 3
+float128: 6
idouble: 5
ifloat: 3
-ildouble: 5
-ldouble: 5
+ifloat128: 6
+ildouble: 6
+ldouble: 6
Function: Real part of "casin_towardzero":
double: 3
float: 1
+float128: 3
idouble: 3
ifloat: 1
-ildouble: 2
-ldouble: 2
+ifloat128: 3
+ildouble: 3
+ldouble: 3
Function: Imaginary part of "casin_towardzero":
double: 5
float: 3
+float128: 5
idouble: 5
ifloat: 3
+ifloat128: 5
ildouble: 5
ldouble: 5
Function: Real part of "casin_upward":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
+double: 3
+float: 2
+float128: 3
+idouble: 3
+ifloat: 2
+ifloat128: 3
ildouble: 2
ldouble: 2
Function: Imaginary part of "casin_upward":
-double: 4
-float: 4
-idouble: 4
-ifloat: 4
-ildouble: 5
-ldouble: 5
+double: 5
+float: 7
+float128: 7
+idouble: 5
+ifloat: 7
+ifloat128: 7
+ildouble: 7
+ldouble: 7
Function: Real part of "casinh":
-double: 1
+double: 2
float: 2
-idouble: 1
+float128: 2
+idouble: 2
ifloat: 2
+ifloat128: 2
ildouble: 2
ldouble: 2
Function: Imaginary part of "casinh":
double: 1
float: 1
+float128: 2
idouble: 1
ifloat: 1
+ifloat128: 2
ildouble: 1
ldouble: 1
Function: Real part of "casinh_downward":
double: 5
float: 3
+float128: 6
idouble: 5
ifloat: 3
-ildouble: 5
-ldouble: 5
+ifloat128: 6
+ildouble: 6
+ldouble: 6
Function: Imaginary part of "casinh_downward":
double: 3
-float: 1
+float: 2
+float128: 3
idouble: 3
-ifloat: 1
-ildouble: 2
-ldouble: 2
+ifloat: 2
+ifloat128: 3
+ildouble: 3
+ldouble: 3
Function: Real part of "casinh_towardzero":
double: 5
float: 3
+float128: 5
idouble: 5
ifloat: 3
+ifloat128: 5
ildouble: 5
ldouble: 5
Function: Imaginary part of "casinh_towardzero":
double: 3
float: 1
+float128: 3
idouble: 3
ifloat: 1
-ildouble: 2
-ldouble: 2
+ifloat128: 3
+ildouble: 3
+ldouble: 3
Function: Real part of "casinh_upward":
-double: 4
-float: 4
-idouble: 4
-ifloat: 4
-ildouble: 5
-ldouble: 5
+double: 5
+float: 7
+float128: 7
+idouble: 5
+ifloat: 7
+ifloat128: 7
+ildouble: 7
+ldouble: 7
Function: Imaginary part of "casinh_upward":
-double: 2
+double: 3
float: 2
-idouble: 2
+float128: 3
+idouble: 3
ifloat: 2
+ifloat128: 3
ildouble: 2
ldouble: 2
Function: Real part of "catan":
+double: 1
float: 1
+float128: 1
+idouble: 1
ifloat: 1
+ifloat128: 1
+ildouble: 1
+ldouble: 1
Function: Imaginary part of "catan":
double: 1
float: 1
+float128: 1
idouble: 1
ifloat: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: Real part of "catan_downward":
double: 1
-float: 1
+float: 2
+float128: 2
idouble: 1
-ifloat: 1
+ifloat: 2
+ifloat128: 2
ildouble: 1
ldouble: 1
Function: Imaginary part of "catan_downward":
double: 2
float: 2
+float128: 2
idouble: 2
ifloat: 2
+ifloat128: 2
ildouble: 4
ldouble: 4
Function: Real part of "catan_towardzero":
double: 1
-float: 1
+float: 2
+float128: 2
idouble: 1
-ifloat: 1
+ifloat: 2
+ifloat128: 2
ildouble: 1
ldouble: 1
Function: Imaginary part of "catan_towardzero":
double: 2
-float: 1
+float: 2
+float128: 2
idouble: 2
-ifloat: 1
+ifloat: 2
+ifloat128: 2
ildouble: 4
ldouble: 4
Function: Real part of "catan_upward":
+double: 1
float: 1
+float128: 2
+idouble: 1
ifloat: 1
+ifloat128: 2
ildouble: 1
ldouble: 1
Function: Imaginary part of "catan_upward":
double: 3
float: 3
+float128: 3
idouble: 3
ifloat: 3
+ifloat128: 3
ildouble: 3
ldouble: 3
Function: Real part of "catanh":
double: 1
float: 1
+float128: 1
idouble: 1
ifloat: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: Imaginary part of "catanh":
+double: 1
float: 1
+float128: 1
+idouble: 1
ifloat: 1
+ifloat128: 1
+ildouble: 1
+ldouble: 1
Function: Real part of "catanh_downward":
double: 2
float: 2
+float128: 2
idouble: 2
ifloat: 2
+ifloat128: 2
ildouble: 4
ldouble: 4
Function: Imaginary part of "catanh_downward":
double: 1
float: 2
+float128: 2
idouble: 1
ifloat: 2
+ifloat128: 2
ildouble: 1
ldouble: 1
Function: Real part of "catanh_towardzero":
double: 2
-float: 1
+float: 2
+float128: 2
idouble: 2
-ifloat: 1
+ifloat: 2
+ifloat128: 2
ildouble: 4
ldouble: 4
Function: Imaginary part of "catanh_towardzero":
double: 1
float: 2
+float128: 2
idouble: 1
ifloat: 2
+ifloat128: 2
ildouble: 1
ldouble: 1
Function: Real part of "catanh_upward":
double: 4
-float: 3
+float: 4
+float128: 4
idouble: 4
-ifloat: 3
+ifloat: 4
+ifloat128: 4
ildouble: 4
ldouble: 4
Function: Imaginary part of "catanh_upward":
+double: 1
float: 1
+float128: 2
+idouble: 1
ifloat: 1
+ifloat128: 2
ildouble: 1
ldouble: 1
Function: "cbrt":
double: 3
float: 1
+float128: 1
idouble: 3
ifloat: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: "cbrt_downward":
double: 4
float: 1
+float128: 1
idouble: 4
ifloat: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: "cbrt_towardzero":
double: 3
float: 1
+float128: 1
idouble: 3
ifloat: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: "cbrt_upward":
double: 5
float: 1
+float128: 1
idouble: 5
ifloat: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: Real part of "ccos":
double: 1
float: 1
+float128: 1
idouble: 1
ifloat: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: Imaginary part of "ccos":
double: 1
float: 1
+float128: 1
idouble: 1
ifloat: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: Real part of "ccos_downward":
double: 1
float: 1
+float128: 2
idouble: 1
ifloat: 1
+ifloat128: 2
ildouble: 3
ldouble: 3
Function: Imaginary part of "ccos_downward":
double: 2
float: 3
+float128: 2
idouble: 2
ifloat: 3
+ifloat128: 2
ildouble: 3
ldouble: 3
Function: Real part of "ccos_towardzero":
double: 1
float: 2
+float128: 2
idouble: 1
ifloat: 2
+ifloat128: 2
ildouble: 3
ldouble: 3
Function: Imaginary part of "ccos_towardzero":
double: 2
float: 3
+float128: 2
idouble: 2
ifloat: 3
+ifloat128: 2
ildouble: 3
ldouble: 3
Function: Real part of "ccos_upward":
double: 1
float: 2
+float128: 3
idouble: 1
ifloat: 2
+ifloat128: 3
ildouble: 2
ldouble: 2
Function: Imaginary part of "ccos_upward":
double: 2
float: 2
+float128: 2
idouble: 2
ifloat: 2
+ifloat128: 2
ildouble: 2
ldouble: 2
Function: Real part of "ccosh":
double: 1
float: 1
+float128: 1
idouble: 1
ifloat: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: Imaginary part of "ccosh":
double: 1
float: 1
+float128: 1
idouble: 1
ifloat: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: Real part of "ccosh_downward":
double: 1
float: 2
+float128: 2
idouble: 1
ifloat: 2
+ifloat128: 2
ildouble: 3
ldouble: 3
Function: Imaginary part of "ccosh_downward":
double: 2
float: 3
+float128: 2
idouble: 2
ifloat: 3
+ifloat128: 2
ildouble: 3
ldouble: 3
Function: Real part of "ccosh_towardzero":
double: 1
float: 3
+float128: 2
idouble: 1
ifloat: 3
+ifloat128: 2
ildouble: 3
ldouble: 3
Function: Imaginary part of "ccosh_towardzero":
double: 2
float: 3
+float128: 2
idouble: 2
ifloat: 3
+ifloat128: 2
ildouble: 3
ldouble: 3
Function: Real part of "ccosh_upward":
double: 1
float: 2
+float128: 3
idouble: 1
ifloat: 2
+ifloat128: 3
ildouble: 2
ldouble: 2
Function: Imaginary part of "ccosh_upward":
double: 2
float: 2
+float128: 2
idouble: 2
ifloat: 2
+ifloat128: 2
ildouble: 2
ldouble: 2
Function: Real part of "cexp":
double: 2
float: 1
+float128: 1
idouble: 2
ifloat: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: Imaginary part of "cexp":
double: 1
float: 2
+float128: 1
idouble: 1
ifloat: 2
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: Real part of "cexp_downward":
double: 1
float: 2
+float128: 2
idouble: 1
ifloat: 2
+ifloat128: 2
ildouble: 3
ldouble: 3
Function: Imaginary part of "cexp_downward":
double: 1
float: 3
+float128: 2
idouble: 1
ifloat: 3
+ifloat128: 2
ildouble: 3
ldouble: 3
Function: Real part of "cexp_towardzero":
double: 1
float: 2
+float128: 2
idouble: 1
ifloat: 2
+ifloat128: 2
ildouble: 3
ldouble: 3
Function: Imaginary part of "cexp_towardzero":
double: 1
float: 3
+float128: 2
idouble: 1
ifloat: 3
+ifloat128: 2
ildouble: 3
ldouble: 3
Function: Real part of "cexp_upward":
double: 1
float: 2
+float128: 3
idouble: 1
ifloat: 2
+ifloat128: 3
ildouble: 2
ldouble: 2
Function: Imaginary part of "cexp_upward":
double: 1
float: 2
+float128: 3
idouble: 1
ifloat: 2
+ifloat128: 3
ildouble: 3
ldouble: 3
Function: Real part of "clog":
double: 3
float: 3
+float128: 2
idouble: 3
ifloat: 3
+ifloat128: 2
ildouble: 3
ldouble: 3
Function: Imaginary part of "clog":
float: 1
+float128: 1
ifloat: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: Real part of "clog10":
double: 3
float: 4
+float128: 2
idouble: 3
ifloat: 4
+ifloat128: 2
ildouble: 4
ldouble: 4
Function: Imaginary part of "clog10":
double: 2
float: 2
+float128: 2
idouble: 2
ifloat: 2
+ifloat128: 2
ildouble: 2
ldouble: 2
Function: Real part of "clog10_downward":
double: 5
-float: 4
+float: 5
+float128: 3
idouble: 5
-ifloat: 4
+ifloat: 5
+ifloat128: 3
ildouble: 8
ldouble: 8
Function: Imaginary part of "clog10_downward":
double: 2
float: 4
+float128: 3
idouble: 2
ifloat: 4
+ifloat128: 3
ildouble: 3
ldouble: 3
Function: Real part of "clog10_towardzero":
double: 5
float: 5
+float128: 4
idouble: 5
ifloat: 5
+ifloat128: 4
ildouble: 8
ldouble: 8
Function: Imaginary part of "clog10_towardzero":
double: 2
float: 4
+float128: 3
idouble: 2
ifloat: 4
+ifloat128: 3
ildouble: 3
ldouble: 3
Function: Real part of "clog10_upward":
double: 6
float: 5
+float128: 4
idouble: 6
ifloat: 5
+ifloat128: 4
ildouble: 8
ldouble: 8
Function: Imaginary part of "clog10_upward":
double: 2
float: 4
+float128: 3
idouble: 2
ifloat: 4
+ifloat128: 3
ildouble: 3
ldouble: 3
Function: Real part of "clog_downward":
double: 4
float: 3
+float128: 3
idouble: 4
ifloat: 3
+ifloat128: 3
ildouble: 5
ldouble: 5
Function: Imaginary part of "clog_downward":
double: 1
float: 2
+float128: 2
idouble: 1
ifloat: 2
+ifloat128: 2
ildouble: 1
ldouble: 1
Function: Real part of "clog_towardzero":
double: 4
float: 4
+float128: 3
idouble: 4
ifloat: 4
+ifloat128: 3
ildouble: 5
ldouble: 5
Function: Imaginary part of "clog_towardzero":
double: 1
float: 3
+float128: 2
idouble: 1
ifloat: 3
+ifloat128: 2
ildouble: 1
ldouble: 1
Function: Real part of "clog_upward":
double: 4
float: 3
+float128: 4
idouble: 4
ifloat: 3
+ifloat128: 4
ildouble: 4
ldouble: 4
Function: Imaginary part of "clog_upward":
double: 1
float: 2
+float128: 2
idouble: 1
ifloat: 2
+ifloat128: 2
ildouble: 1
ldouble: 1
Function: "cos":
+double: 1
+float128: 1
+idouble: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: "cos_downward":
double: 1
+float128: 3
idouble: 1
+ifloat128: 3
ildouble: 3
ldouble: 3
Function: "cos_towardzero":
double: 1
+float128: 1
idouble: 1
+ifloat128: 1
ildouble: 2
ldouble: 2
Function: "cos_upward":
double: 1
+float128: 2
idouble: 1
+ifloat128: 2
ildouble: 2
ldouble: 2
@@ -1029,7 +1307,7 @@ Function: "cos_vlen4_avx2":
double: 2
Function: "cos_vlen8":
-double: 1
+double: 2
float: 1
Function: "cos_vlen8_avx2":
@@ -1038,546 +1316,690 @@ float: 1
Function: "cosh":
double: 1
float: 1
+float128: 1
idouble: 1
ifloat: 1
+ifloat128: 1
ildouble: 2
ldouble: 2
Function: "cosh_downward":
double: 1
float: 1
+float128: 2
idouble: 1
ifloat: 1
+ifloat128: 1
ildouble: 2
ldouble: 3
Function: "cosh_towardzero":
double: 1
float: 1
+float128: 2
idouble: 1
ifloat: 1
+ifloat128: 1
ildouble: 2
ldouble: 2
Function: "cosh_upward":
double: 1
float: 2
+float128: 3
idouble: 1
ifloat: 2
+ifloat128: 1
ildouble: 2
ldouble: 3
Function: Real part of "cpow":
double: 2
float: 5
+float128: 4
idouble: 2
ifloat: 5
+ifloat128: 4
ildouble: 3
ldouble: 3
Function: Imaginary part of "cpow":
float: 2
+float128: 1
ifloat: 2
+ifloat128: 1
ildouble: 4
ldouble: 4
Function: Real part of "cpow_downward":
double: 4
float: 8
+float128: 6
idouble: 4
ifloat: 8
+ifloat128: 6
ildouble: 7
ldouble: 7
Function: Imaginary part of "cpow_downward":
double: 1
float: 2
+float128: 2
idouble: 1
ifloat: 2
+ifloat128: 2
ildouble: 2
ldouble: 2
Function: Real part of "cpow_towardzero":
double: 4
float: 8
+float128: 6
idouble: 4
ifloat: 8
+ifloat128: 6
ildouble: 7
ldouble: 7
Function: Imaginary part of "cpow_towardzero":
double: 1
float: 2
+float128: 2
idouble: 1
ifloat: 2
+ifloat128: 2
ildouble: 1
ldouble: 1
Function: Real part of "cpow_upward":
double: 4
float: 1
+float128: 3
idouble: 4
ifloat: 1
+ifloat128: 3
ildouble: 2
ldouble: 2
Function: Imaginary part of "cpow_upward":
double: 1
float: 2
+float128: 2
idouble: 1
ifloat: 2
+ifloat128: 2
ildouble: 2
ldouble: 2
Function: Real part of "csin":
double: 1
float: 1
+float128: 1
idouble: 1
ifloat: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
+Function: Imaginary part of "csin":
+float128: 1
+ifloat128: 1
+
Function: Real part of "csin_downward":
double: 2
float: 3
+float128: 2
idouble: 2
ifloat: 3
+ifloat128: 2
ildouble: 3
ldouble: 3
Function: Imaginary part of "csin_downward":
double: 1
float: 2
+float128: 2
idouble: 1
ifloat: 2
+ifloat128: 2
ildouble: 3
ldouble: 3
Function: Real part of "csin_towardzero":
double: 2
float: 3
+float128: 2
idouble: 2
ifloat: 3
+ifloat128: 2
ildouble: 3
ldouble: 3
Function: Imaginary part of "csin_towardzero":
double: 2
float: 2
+float128: 2
idouble: 2
ifloat: 2
+ifloat128: 2
ildouble: 3
ldouble: 3
Function: Real part of "csin_upward":
double: 2
float: 3
+float128: 2
idouble: 2
ifloat: 3
+ifloat128: 2
ildouble: 3
ldouble: 3
Function: Imaginary part of "csin_upward":
double: 1
float: 3
+float128: 3
idouble: 1
ifloat: 3
+ifloat128: 3
ildouble: 3
ldouble: 3
Function: Real part of "csinh":
float: 1
+float128: 1
ifloat: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: Imaginary part of "csinh":
double: 1
float: 1
+float128: 1
idouble: 1
ifloat: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: Real part of "csinh_downward":
double: 2
float: 2
+float128: 2
idouble: 2
ifloat: 2
+ifloat128: 2
ildouble: 3
ldouble: 3
Function: Imaginary part of "csinh_downward":
double: 2
float: 3
+float128: 2
idouble: 2
ifloat: 3
+ifloat128: 2
ildouble: 3
ldouble: 3
Function: Real part of "csinh_towardzero":
double: 2
float: 2
+float128: 2
idouble: 2
ifloat: 2
+ifloat128: 2
ildouble: 3
ldouble: 3
Function: Imaginary part of "csinh_towardzero":
double: 2
float: 3
+float128: 2
idouble: 2
ifloat: 3
+ifloat128: 2
ildouble: 3
ldouble: 3
Function: Real part of "csinh_upward":
double: 1
float: 3
+float128: 3
idouble: 1
ifloat: 3
+ifloat128: 3
ildouble: 3
ldouble: 3
Function: Imaginary part of "csinh_upward":
double: 2
float: 3
+float128: 2
idouble: 2
ifloat: 3
+ifloat128: 2
ildouble: 3
ldouble: 3
Function: Real part of "csqrt":
double: 2
float: 2
+float128: 2
idouble: 2
ifloat: 2
+ifloat128: 2
ildouble: 2
ldouble: 2
Function: Imaginary part of "csqrt":
double: 2
float: 2
+float128: 2
idouble: 2
ifloat: 2
+ifloat128: 2
ildouble: 2
ldouble: 2
Function: Real part of "csqrt_downward":
double: 5
float: 4
+float128: 4
idouble: 5
ifloat: 4
+ifloat128: 4
ildouble: 5
ldouble: 5
Function: Imaginary part of "csqrt_downward":
double: 4
float: 3
+float128: 3
idouble: 4
ifloat: 3
+ifloat128: 3
ildouble: 4
ldouble: 4
Function: Real part of "csqrt_towardzero":
double: 4
float: 3
+float128: 3
idouble: 4
ifloat: 3
+ifloat128: 3
ildouble: 4
ldouble: 4
Function: Imaginary part of "csqrt_towardzero":
double: 4
float: 3
+float128: 3
idouble: 4
ifloat: 3
+ifloat128: 3
ildouble: 4
ldouble: 4
Function: Real part of "csqrt_upward":
double: 5
float: 4
+float128: 4
idouble: 5
ifloat: 4
+ifloat128: 4
ildouble: 5
ldouble: 5
Function: Imaginary part of "csqrt_upward":
double: 3
float: 3
+float128: 3
idouble: 3
ifloat: 3
+ifloat128: 3
ildouble: 4
ldouble: 4
Function: Real part of "ctan":
double: 1
float: 1
+float128: 3
idouble: 1
ifloat: 1
+ifloat128: 3
ildouble: 2
ldouble: 2
Function: Imaginary part of "ctan":
double: 2
float: 2
+float128: 3
idouble: 2
ifloat: 2
+ifloat128: 3
ildouble: 1
ldouble: 1
Function: Real part of "ctan_downward":
double: 6
float: 5
+float128: 4
idouble: 6
ifloat: 5
+ifloat128: 4
ildouble: 5
ldouble: 5
Function: Imaginary part of "ctan_downward":
double: 2
float: 2
+float128: 5
idouble: 2
ifloat: 2
+ifloat128: 5
ildouble: 4
ldouble: 4
Function: Real part of "ctan_towardzero":
double: 5
float: 3
+float128: 4
idouble: 5
ifloat: 3
+ifloat128: 4
ildouble: 5
ldouble: 5
Function: Imaginary part of "ctan_towardzero":
double: 2
float: 2
+float128: 5
idouble: 2
ifloat: 2
+ifloat128: 5
ildouble: 4
ldouble: 4
Function: Real part of "ctan_upward":
double: 2
float: 4
+float128: 5
idouble: 2
ifloat: 4
+ifloat128: 5
ildouble: 3
ldouble: 3
Function: Imaginary part of "ctan_upward":
double: 2
-float: 1
+float: 2
+float128: 5
idouble: 2
-ifloat: 1
+ifloat: 2
+ifloat128: 5
ildouble: 3
ldouble: 3
Function: Real part of "ctanh":
double: 2
float: 2
+float128: 3
idouble: 2
ifloat: 2
+ifloat128: 3
ildouble: 1
ldouble: 1
Function: Imaginary part of "ctanh":
double: 2
float: 2
+float128: 3
idouble: 2
ifloat: 2
+ifloat128: 3
ildouble: 2
ldouble: 2
Function: Real part of "ctanh_downward":
double: 4
float: 2
+float128: 5
idouble: 4
ifloat: 2
+ifloat128: 5
ildouble: 4
ldouble: 4
Function: Imaginary part of "ctanh_downward":
double: 6
float: 5
+float128: 4
idouble: 6
ifloat: 5
+ifloat128: 4
ildouble: 4
ldouble: 4
Function: Real part of "ctanh_towardzero":
double: 2
float: 2
+float128: 5
idouble: 2
ifloat: 2
+ifloat128: 5
ildouble: 4
ldouble: 4
Function: Imaginary part of "ctanh_towardzero":
double: 5
float: 3
+float128: 3
idouble: 5
ifloat: 3
+ifloat128: 3
ildouble: 3
ldouble: 3
Function: Real part of "ctanh_upward":
double: 2
float: 2
+float128: 5
idouble: 2
ifloat: 2
+ifloat128: 5
ildouble: 3
ldouble: 3
Function: Imaginary part of "ctanh_upward":
double: 2
float: 3
+float128: 5
idouble: 2
ifloat: 3
+ifloat128: 5
ildouble: 3
ldouble: 3
Function: "erf":
double: 1
float: 1
+float128: 1
idouble: 1
ifloat: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: "erf_downward":
double: 1
float: 1
+float128: 2
idouble: 1
ifloat: 1
+ifloat128: 2
ildouble: 1
ldouble: 1
Function: "erf_towardzero":
double: 1
float: 1
+float128: 1
idouble: 1
ifloat: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: "erf_upward":
double: 1
float: 1
+float128: 2
idouble: 1
ifloat: 1
+ifloat128: 2
ildouble: 1
ldouble: 1
Function: "erfc":
double: 3
float: 2
+float128: 2
idouble: 3
ifloat: 2
+ifloat128: 2
ildouble: 3
ldouble: 3
Function: "erfc_downward":
double: 5
float: 6
+float128: 5
idouble: 5
ifloat: 6
+ifloat128: 5
ildouble: 4
ldouble: 4
Function: "erfc_towardzero":
double: 3
float: 4
+float128: 4
idouble: 3
ifloat: 4
+ifloat128: 4
ildouble: 4
ldouble: 4
Function: "erfc_upward":
double: 5
float: 6
+float128: 5
idouble: 5
ifloat: 6
+ifloat128: 5
ildouble: 5
ldouble: 5
Function: "exp":
+float128: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: "exp10":
double: 2
+float128: 2
idouble: 2
+ifloat128: 2
ildouble: 1
ldouble: 1
Function: "exp10_downward":
double: 2
float: 1
+float128: 3
idouble: 2
ifloat: 1
+ifloat128: 3
ildouble: 2
ldouble: 2
Function: "exp10_towardzero":
double: 2
float: 1
+float128: 3
idouble: 2
ifloat: 1
+ifloat128: 3
ildouble: 2
ldouble: 2
Function: "exp10_upward":
double: 2
float: 1
+float128: 3
idouble: 2
ifloat: 1
+ifloat128: 3
ildouble: 2
ldouble: 2
Function: "exp2":
double: 1
float: 1
+float128: 1
idouble: 1
ifloat: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: "exp2_downward":
double: 1
float: 1
+float128: 1
idouble: 1
ifloat: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: "exp2_towardzero":
double: 1
float: 1
+float128: 1
idouble: 1
ifloat: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: "exp2_upward":
double: 1
float: 1
+float128: 2
idouble: 1
ifloat: 1
+ifloat128: 2
ildouble: 1
ldouble: 1
Function: "exp_downward":
double: 1
+float: 1
idouble: 1
+ifloat: 1
ildouble: 1
ldouble: 1
Function: "exp_towardzero":
double: 1
+float: 1
idouble: 1
+ifloat: 1
ildouble: 2
ldouble: 2
@@ -1612,32 +2034,40 @@ float: 1
Function: "expm1":
double: 1
float: 1
+float128: 1
idouble: 1
ifloat: 1
+ifloat128: 1
ildouble: 2
ldouble: 2
Function: "expm1_downward":
double: 1
float: 1
+float128: 2
idouble: 1
ifloat: 1
+ifloat128: 2
ildouble: 4
ldouble: 4
Function: "expm1_towardzero":
double: 1
float: 2
+float128: 4
idouble: 1
ifloat: 2
+ifloat128: 4
ildouble: 4
ldouble: 4
Function: "expm1_upward":
double: 1
float: 1
+float128: 3
idouble: 1
ifloat: 1
+ifloat128: 3
ildouble: 4
ldouble: 4
@@ -1675,275 +2105,347 @@ ldouble: 6
Function: "hypot":
double: 1
+float128: 1
idouble: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: "hypot_downward":
double: 1
+float128: 1
idouble: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: "hypot_towardzero":
double: 1
+float128: 1
idouble: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: "hypot_upward":
double: 1
+float128: 1
idouble: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: "j0":
double: 2
float: 2
+float128: 2
idouble: 2
ifloat: 2
+ifloat128: 2
ildouble: 2
ldouble: 2
Function: "j0_downward":
double: 2
float: 4
+float128: 4
idouble: 2
ifloat: 4
+ifloat128: 4
ildouble: 4
ldouble: 4
Function: "j0_towardzero":
double: 3
float: 2
+float128: 2
idouble: 3
ifloat: 2
+ifloat128: 2
ildouble: 5
ldouble: 5
Function: "j0_upward":
double: 3
float: 2
+float128: 5
idouble: 3
ifloat: 2
+ifloat128: 5
ildouble: 4
ldouble: 4
Function: "j1":
double: 1
float: 2
+float128: 4
idouble: 1
ifloat: 2
+ifloat128: 4
ildouble: 1
ldouble: 1
Function: "j1_downward":
double: 3
float: 3
+float128: 4
idouble: 3
ifloat: 3
+ifloat128: 4
ildouble: 4
ldouble: 4
Function: "j1_towardzero":
double: 3
float: 2
+float128: 4
idouble: 3
ifloat: 2
+ifloat128: 4
ildouble: 4
ldouble: 4
Function: "j1_upward":
double: 3
float: 5
+float128: 3
idouble: 3
ifloat: 5
+ifloat128: 3
ildouble: 3
ldouble: 3
Function: "jn":
double: 4
float: 4
+float128: 7
idouble: 4
ifloat: 4
+ifloat128: 7
ildouble: 4
ldouble: 4
Function: "jn_downward":
double: 5
float: 5
+float128: 8
idouble: 5
ifloat: 5
+ifloat128: 8
ildouble: 4
ldouble: 4
Function: "jn_towardzero":
double: 5
float: 5
+float128: 8
idouble: 5
ifloat: 5
+ifloat128: 8
ildouble: 5
ldouble: 5
Function: "jn_upward":
double: 5
float: 5
+float128: 7
idouble: 5
ifloat: 5
+ifloat128: 7
ildouble: 5
ldouble: 5
Function: "lgamma":
double: 4
float: 4
+float128: 5
idouble: 4
ifloat: 4
+ifloat128: 5
ildouble: 4
ldouble: 4
Function: "lgamma_downward":
double: 5
float: 4
+float128: 8
idouble: 5
ifloat: 4
+ifloat128: 8
ildouble: 7
ldouble: 7
Function: "lgamma_towardzero":
double: 5
float: 4
+float128: 5
idouble: 5
ifloat: 4
+ifloat128: 5
ildouble: 7
ldouble: 7
Function: "lgamma_upward":
double: 5
float: 5
+float128: 8
idouble: 5
ifloat: 5
+ifloat128: 8
ildouble: 6
ldouble: 6
Function: "log":
float: 1
+float128: 1
ifloat: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: "log10":
double: 2
float: 2
+float128: 1
idouble: 2
ifloat: 2
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: "log10_downward":
double: 2
float: 3
+float128: 1
idouble: 2
ifloat: 3
+ifloat128: 1
ildouble: 2
ldouble: 2
Function: "log10_towardzero":
double: 2
float: 2
+float128: 1
idouble: 2
ifloat: 2
+ifloat128: 1
ildouble: 2
ldouble: 2
Function: "log10_upward":
double: 2
float: 2
+float128: 1
idouble: 2
ifloat: 2
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: "log1p":
double: 1
float: 1
+float128: 2
idouble: 1
ifloat: 1
+ifloat128: 2
ildouble: 2
ldouble: 2
Function: "log1p_downward":
double: 2
float: 2
+float128: 3
idouble: 2
ifloat: 2
+ifloat128: 3
ildouble: 4
ldouble: 4
Function: "log1p_towardzero":
double: 2
float: 2
+float128: 3
idouble: 2
ifloat: 2
+ifloat128: 3
ildouble: 4
ldouble: 4
Function: "log1p_upward":
double: 2
float: 2
+float128: 2
idouble: 2
ifloat: 2
+ifloat128: 2
ildouble: 3
ldouble: 3
Function: "log2":
double: 2
float: 1
+float128: 2
idouble: 2
ifloat: 1
+ifloat128: 2
ildouble: 1
ldouble: 1
Function: "log2_downward":
double: 3
float: 3
+float128: 3
idouble: 3
ifloat: 3
+ifloat128: 3
ildouble: 1
ldouble: 1
Function: "log2_towardzero":
double: 2
float: 2
+float128: 1
idouble: 2
ifloat: 2
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: "log2_upward":
double: 3
float: 3
+float128: 1
idouble: 3
ifloat: 3
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: "log_downward":
float: 2
+float128: 1
ifloat: 2
+ifloat128: 1
ildouble: 2
ldouble: 2
Function: "log_towardzero":
float: 2
+float128: 2
ifloat: 2
+ifloat128: 2
ildouble: 2
ldouble: 2
Function: "log_upward":
double: 1
float: 2
+float128: 1
idouble: 1
ifloat: 2
+ifloat128: 1
ildouble: 1
ldouble: 1
@@ -1965,67 +2467,47 @@ double: 1
float: 3
Function: "log_vlen8_avx2":
-float: 2
+float: 3
Function: "pow":
+double: 1
float: 1
+float128: 2
+idouble: 1
ifloat: 1
+ifloat128: 2
ildouble: 1
ldouble: 1
-Function: "pow10":
-double: 2
-idouble: 2
-ildouble: 1
-ldouble: 1
-
-Function: "pow10_downward":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: "pow10_towardzero":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: "pow10_upward":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
Function: "pow_downward":
double: 1
float: 1
+float128: 2
idouble: 1
ifloat: 1
+ifloat128: 2
ildouble: 4
ldouble: 4
Function: "pow_towardzero":
double: 1
float: 1
+float128: 2
idouble: 1
ifloat: 1
-ildouble: 1
-ldouble: 1
+ifloat128: 2
+ildouble: 4
+ldouble: 4
Function: "pow_upward":
double: 1
float: 1
+float128: 2
idouble: 1
ifloat: 1
-ildouble: 2
-ldouble: 2
+ifloat128: 2
+ildouble: 4
+ldouble: 4
Function: "pow_vlen16":
float: 3
@@ -2048,24 +2530,34 @@ Function: "pow_vlen8_avx2":
float: 3
Function: "sin":
+double: 1
+float128: 1
+idouble: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: "sin_downward":
double: 1
+float128: 3
idouble: 1
+ifloat128: 3
ildouble: 3
ldouble: 3
Function: "sin_towardzero":
double: 1
+float128: 2
idouble: 1
+ifloat128: 2
ildouble: 2
ldouble: 2
Function: "sin_upward":
double: 1
+float128: 3
idouble: 1
+ifloat128: 3
ildouble: 3
ldouble: 3
@@ -2090,24 +2582,34 @@ Function: "sin_vlen8_avx2":
float: 1
Function: "sincos":
+double: 1
+float128: 1
+idouble: 1
+ifloat128: 1
ildouble: 1
ldouble: 1
Function: "sincos_downward":
double: 1
+float128: 3
idouble: 1
+ifloat128: 3
ildouble: 3
ldouble: 3
Function: "sincos_towardzero":
double: 1
+float128: 2
idouble: 1
+ifloat128: 2
ildouble: 2
ldouble: 2
Function: "sincos_upward":
double: 1
+float128: 3
idouble: 1
+ifloat128: 3
ildouble: 3
ldouble: 3
@@ -2125,7 +2627,7 @@ Function: "sincos_vlen4_avx2":
double: 2
Function: "sincos_vlen8":
-double: 1
+double: 2
float: 1
Function: "sincos_vlen8_avx2":
@@ -2134,222 +2636,278 @@ float: 1
Function: "sinh":
double: 2
float: 2
+float128: 2
idouble: 2
ifloat: 2
+ifloat128: 2
ildouble: 2
ldouble: 2
Function: "sinh_downward":
double: 3
float: 3
+float128: 3
idouble: 3
ifloat: 3
+ifloat128: 3
ildouble: 5
ldouble: 5
Function: "sinh_towardzero":
double: 2
float: 2
+float128: 3
idouble: 2
ifloat: 2
+ifloat128: 3
ildouble: 4
ldouble: 4
Function: "sinh_upward":
double: 3
float: 3
+float128: 4
idouble: 3
ifloat: 3
+ifloat128: 4
ildouble: 5
ldouble: 5
Function: "tan":
float: 1
+float128: 1
ifloat: 1
+ifloat128: 1
ildouble: 2
ldouble: 2
Function: "tan_downward":
double: 1
float: 2
+float128: 1
idouble: 1
ifloat: 2
+ifloat128: 1
ildouble: 3
ldouble: 3
Function: "tan_towardzero":
double: 1
float: 1
+float128: 1
idouble: 1
ifloat: 1
+ifloat128: 1
ildouble: 3
ldouble: 3
Function: "tan_upward":
double: 1
float: 1
+float128: 1
idouble: 1
ifloat: 1
+ifloat128: 1
ildouble: 2
ldouble: 2
Function: "tanh":
double: 2
float: 2
+float128: 2
idouble: 2
ifloat: 2
+ifloat128: 2
ildouble: 3
ldouble: 3
Function: "tanh_downward":
double: 3
float: 3
+float128: 4
idouble: 3
ifloat: 3
+ifloat128: 4
ildouble: 4
ldouble: 4
Function: "tanh_towardzero":
double: 2
float: 2
+float128: 3
idouble: 2
ifloat: 2
+ifloat128: 3
ildouble: 3
ldouble: 3
Function: "tanh_upward":
double: 3
float: 3
+float128: 3
idouble: 3
ifloat: 3
+ifloat128: 3
ildouble: 4
ldouble: 4
Function: "tgamma":
double: 5
float: 5
+float128: 4
idouble: 5
ifloat: 5
+ifloat128: 4
ildouble: 5
ldouble: 5
Function: "tgamma_downward":
double: 5
float: 5
+float128: 5
idouble: 5
ifloat: 5
+ifloat128: 5
ildouble: 5
ldouble: 5
Function: "tgamma_towardzero":
double: 5
float: 5
+float128: 5
idouble: 5
ifloat: 5
+ifloat128: 5
ildouble: 5
ldouble: 5
Function: "tgamma_upward":
double: 5
float: 5
+float128: 4
idouble: 5
ifloat: 5
+ifloat128: 4
ildouble: 5
ldouble: 5
Function: "y0":
double: 2
float: 1
+float128: 3
idouble: 2
ifloat: 1
+ifloat128: 3
ildouble: 1
ldouble: 1
Function: "y0_downward":
double: 3
float: 4
+float128: 4
idouble: 3
ifloat: 4
+ifloat128: 4
ildouble: 5
ldouble: 5
Function: "y0_towardzero":
double: 3
float: 3
+float128: 3
idouble: 3
ifloat: 3
+ifloat128: 3
ildouble: 5
ldouble: 5
Function: "y0_upward":
double: 3
float: 5
+float128: 3
idouble: 3
ifloat: 5
+ifloat128: 3
ildouble: 3
ldouble: 3
Function: "y1":
double: 3
float: 2
+float128: 2
idouble: 3
ifloat: 2
+ifloat128: 2
ildouble: 2
ldouble: 2
Function: "y1_downward":
double: 3
float: 2
+float128: 4
idouble: 3
ifloat: 2
+ifloat128: 4
ildouble: 7
ldouble: 7
Function: "y1_towardzero":
double: 3
float: 2
+float128: 2
idouble: 3
ifloat: 2
+ifloat128: 2
ildouble: 5
ldouble: 5
Function: "y1_upward":
double: 7
float: 2
+float128: 5
idouble: 7
ifloat: 2
+ifloat128: 5
ildouble: 7
ldouble: 7
Function: "yn":
double: 3
float: 3
+float128: 5
idouble: 3
ifloat: 3
+ifloat128: 5
ildouble: 4
ldouble: 4
Function: "yn_downward":
double: 3
float: 4
+float128: 5
idouble: 3
ifloat: 4
+ifloat128: 5
ildouble: 5
ldouble: 5
Function: "yn_towardzero":
double: 3
float: 3
+float128: 5
idouble: 3
ifloat: 3
+ifloat128: 5
ildouble: 5
ldouble: 5
Function: "yn_upward":
double: 4
float: 5
+float128: 5
idouble: 4
ifloat: 5
+ifloat128: 5
ildouble: 4
ldouble: 4
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps-name b/sysdeps/x86_64/fpu/libm-test-ulps-name
new file mode 100644
index 0000000000..1c09346681
--- /dev/null
+++ b/sysdeps/x86_64/fpu/libm-test-ulps-name
@@ -0,0 +1 @@
+x86_64
diff --git a/sysdeps/x86_64/fpu/math-tests-arch.h b/sysdeps/x86_64/fpu/math-tests-arch.h
index 867152046e..a5df133292 100644
--- a/sysdeps/x86_64/fpu/math-tests-arch.h
+++ b/sysdeps/x86_64/fpu/math-tests-arch.h
@@ -1,5 +1,5 @@
/* Runtime architecture check for math tests. x86_64 version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,11 +16,11 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
+#include <cpu-features.h>
+
#if defined REQUIRE_AVX
-# include <init-arch.h>
# define INIT_ARCH_EXT
-
# define CHECK_ARCH_EXT \
do \
{ \
@@ -29,10 +29,8 @@
while (0)
#elif defined REQUIRE_AVX2
-# include <init-arch.h>
# define INIT_ARCH_EXT
-
# define CHECK_ARCH_EXT \
do \
{ \
@@ -41,10 +39,8 @@
while (0)
#elif defined REQUIRE_AVX512F
-# include <init-arch.h>
# define INIT_ARCH_EXT
-
# define CHECK_ARCH_EXT \
do \
{ \
diff --git a/sysdeps/x86_64/fpu/math_ldbl.h b/sysdeps/x86_64/fpu/math_ldbl.h
index b9ff8dadaf..27f8fce904 100644
--- a/sysdeps/x86_64/fpu/math_ldbl.h
+++ b/sysdeps/x86_64/fpu/math_ldbl.h
@@ -1,6 +1,25 @@
-#ifndef _MATH_PRIVATE_H_
-#error "Never use <math_ldbl.h> directly; include <math_private.h> instead."
-#endif
+/* Manipulation of the bit representation of 'long double' quantities.
+ Copyright (C) 2001-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _MATH_LDBL_H_
+#define _MATH_LDBL_H_ 1
+
+#include <stdint.h>
/* A union which permits us to convert between a long double and
three 32 bit ints. */
@@ -10,8 +29,8 @@ typedef union
long double value;
struct
{
- u_int32_t lsw;
- u_int32_t msw;
+ uint32_t lsw;
+ uint32_t msw;
int sign_exponent:16;
unsigned int empty1:16;
unsigned int empty0:32;
@@ -77,3 +96,5 @@ do { \
se_u.parts.sign_exponent = (exp); \
(d) = se_u.value; \
} while (0)
+
+#endif /* math_ldbl.h */
diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h
index 027a6a3a4d..13052893ef 100644
--- a/sysdeps/x86_64/fpu/math_private.h
+++ b/sysdeps/x86_64/fpu/math_private.h
@@ -48,38 +48,6 @@
#include <sysdeps/i386/fpu/fenv_private.h>
#include_next <math_private.h>
-extern __always_inline double
-__ieee754_sqrt (double d)
-{
- double res;
-#if defined __AVX__ || defined SSE2AVX
- asm ("vsqrtsd %1, %0, %0" : "=x" (res) : "xm" (d));
-#else
- asm ("sqrtsd %1, %0" : "=x" (res) : "xm" (d));
-#endif
- return res;
-}
-
-extern __always_inline float
-__ieee754_sqrtf (float d)
-{
- float res;
-#if defined __AVX__ || defined SSE2AVX
- asm ("vsqrtss %1, %0, %0" : "=x" (res) : "xm" (d));
-#else
- asm ("sqrtss %1, %0" : "=x" (res) : "xm" (d));
-#endif
- return res;
-}
-
-extern __always_inline long double
-__ieee754_sqrtl (long double d)
-{
- long double res;
- asm ("fsqrt" : "=t" (res) : "0" (d));
- return res;
-}
-
#ifdef __SSE4_1__
extern __always_inline double
__rint (double d)
diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
index 34542155aa..9f387248aa 100644
--- a/sysdeps/x86_64/fpu/multiarch/Makefile
+++ b/sysdeps/x86_64/fpu/multiarch/Makefile
@@ -1,12 +1,54 @@
ifeq ($(subdir),math)
libm-sysdep_routines += s_floor-c s_ceil-c s_floorf-c s_ceilf-c \
- s_rint-c s_rintf-c s_nearbyint-c s_nearbyintf-c
+ s_rint-c s_rintf-c s_nearbyint-c s_nearbyintf-c \
+ s_trunc-c s_truncf-c
+
+libm-sysdep_routines += s_ceil-sse4_1 s_ceilf-sse4_1 s_floor-sse4_1 \
+ s_floorf-sse4_1 s_nearbyint-sse4_1 \
+ s_nearbyintf-sse4_1 s_rint-sse4_1 s_rintf-sse4_1 \
+ s_trunc-sse4_1 s_truncf-sse4_1
+
+libm-sysdep_routines += e_exp-fma e_log-fma e_pow-fma s_atan-fma \
+ e_asin-fma e_atan2-fma s_sin-fma s_tan-fma \
+ mpa-fma \
+ sincos32-fma doasin-fma dosincos-fma \
+ mpatan2-fma mpatan-fma mpsqrt-fma mptan-fma
+
+CFLAGS-doasin-fma.c = -mfma -mavx2
+CFLAGS-dosincos-fma.c = -mfma -mavx2
+CFLAGS-e_asin-fma.c = -mfma -mavx2
+CFLAGS-e_atan2-fma.c = -mfma -mavx2
+CFLAGS-e_exp-fma.c = -mfma -mavx2
+CFLAGS-e_log-fma.c = -mfma -mavx2
+CFLAGS-e_pow-fma.c = -mfma -mavx2 $(config-cflags-nofma)
+CFLAGS-mpa-fma.c = -mfma -mavx2
+CFLAGS-mpatan-fma.c = -mfma -mavx2
+CFLAGS-mpatan2-fma.c = -mfma -mavx2
+CFLAGS-mpsqrt-fma.c = -mfma -mavx2
+CFLAGS-mptan-fma.c = -mfma -mavx2
+CFLAGS-s_atan-fma.c = -mfma -mavx2
+CFLAGS-sincos32-fma.c = -mfma -mavx2
+CFLAGS-s_sin-fma.c = -mfma -mavx2
+CFLAGS-s_tan-fma.c = -mfma -mavx2
+
+libm-sysdep_routines += s_sinf-sse2 s_cosf-sse2 s_sincosf-sse2
+
+libm-sysdep_routines += e_exp2f-fma e_expf-fma e_log2f-fma e_logf-fma \
+ e_powf-fma s_sinf-fma s_cosf-fma s_sincosf-fma
+
+CFLAGS-e_exp2f-fma.c = -mfma -mavx2
+CFLAGS-e_expf-fma.c = -mfma -mavx2
+CFLAGS-e_log2f-fma.c = -mfma -mavx2
+CFLAGS-e_logf-fma.c = -mfma -mavx2
+CFLAGS-e_powf-fma.c = -mfma -mavx2
+CFLAGS-s_sinf-fma.c = -mfma -mavx2
+CFLAGS-s_cosf-fma.c = -mfma -mavx2
+CFLAGS-s_sincosf-fma.c = -mfma -mavx2
libm-sysdep_routines += e_exp-fma4 e_log-fma4 e_pow-fma4 s_atan-fma4 \
e_asin-fma4 e_atan2-fma4 s_sin-fma4 s_tan-fma4 \
- mplog-fma4 mpa-fma4 slowexp-fma4 slowpow-fma4 \
+ mpa-fma4 \
sincos32-fma4 doasin-fma4 dosincos-fma4 \
- halfulp-fma4 mpexp-fma4 \
mpatan2-fma4 mpatan-fma4 mpsqrt-fma4 mptan-fma4
CFLAGS-doasin-fma4.c = -mfma4
@@ -16,35 +58,26 @@ CFLAGS-e_atan2-fma4.c = -mfma4
CFLAGS-e_exp-fma4.c = -mfma4
CFLAGS-e_log-fma4.c = -mfma4
CFLAGS-e_pow-fma4.c = -mfma4 $(config-cflags-nofma)
-CFLAGS-halfulp-fma4.c = -mfma4
CFLAGS-mpa-fma4.c = -mfma4
CFLAGS-mpatan-fma4.c = -mfma4
CFLAGS-mpatan2-fma4.c = -mfma4
-CFLAGS-mpexp-fma4.c = -mfma4
-CFLAGS-mplog-fma4.c = -mfma4
CFLAGS-mpsqrt-fma4.c = -mfma4
CFLAGS-mptan-fma4.c = -mfma4
CFLAGS-s_atan-fma4.c = -mfma4
CFLAGS-sincos32-fma4.c = -mfma4
-CFLAGS-slowexp-fma4.c = -mfma4
-CFLAGS-slowpow-fma4.c = -mfma4
CFLAGS-s_sin-fma4.c = -mfma4
CFLAGS-s_tan-fma4.c = -mfma4
libm-sysdep_routines += e_exp-avx e_log-avx s_atan-avx \
e_atan2-avx s_sin-avx s_tan-avx \
- mplog-avx mpa-avx slowexp-avx \
- mpexp-avx
+ mpa-avx
CFLAGS-e_atan2-avx.c = -msse2avx -DSSE2AVX
CFLAGS-e_exp-avx.c = -msse2avx -DSSE2AVX
CFLAGS-e_log-avx.c = -msse2avx -DSSE2AVX
CFLAGS-mpa-avx.c = -msse2avx -DSSE2AVX
-CFLAGS-mpexp-avx.c = -msse2avx -DSSE2AVX
-CFLAGS-mplog-avx.c = -msse2avx -DSSE2AVX
CFLAGS-s_atan-avx.c = -msse2avx -DSSE2AVX
CFLAGS-s_sin-avx.c = -msse2avx -DSSE2AVX
-CFLAGS-slowexp-avx.c = -msse2avx -DSSE2AVX
CFLAGS-s_tan-avx.c = -msse2avx -DSSE2AVX
endif
@@ -66,5 +99,35 @@ libmvec-sysdep_routines += svml_d_cos2_core_sse4 svml_d_cos4_core_avx2 \
svml_d_pow4_core_avx2 svml_d_pow8_core_avx512 \
svml_s_powf4_core_sse4 svml_s_powf8_core_avx2 \
svml_s_powf16_core_avx512 svml_s_sincosf4_core_sse4 \
- svml_s_sincosf8_core_avx2 svml_s_sincosf16_core_avx512
+ svml_s_sincosf8_core_avx2 \
+ svml_s_sincosf16_core_avx512 \
+ svml_d_cos2_core-sse2 svml_d_cos4_core-sse \
+ svml_d_cos8_core-avx2 svml_d_exp2_core-sse2 \
+ svml_d_exp4_core-sse svml_d_exp8_core-avx2 \
+ svml_d_log2_core-sse2 svml_d_log4_core-sse \
+ svml_d_log8_core-avx2 svml_d_pow2_core-sse2 \
+ svml_d_pow4_core-sse svml_d_pow8_core-avx2 \
+ svml_d_sin2_core-sse2 svml_d_sin4_core-sse \
+ svml_d_sin8_core-avx2 \
+ svml_d_sincos2_core-sse2 \
+ svml_d_sincos4_core-sse \
+ svml_d_sincos8_core-avx2 \
+ svml_s_cosf16_core-avx2 \
+ svml_s_cosf4_core-sse2 \
+ svml_s_cosf8_core-sse \
+ svml_s_expf16_core-avx2 \
+ svml_s_expf4_core-sse2 \
+ svml_s_expf8_core-sse \
+ svml_s_logf16_core-avx2 \
+ svml_s_logf4_core-sse2 \
+ svml_s_logf8_core-sse \
+ svml_s_powf16_core-avx2 \
+ svml_s_powf4_core-sse2 \
+ svml_s_powf8_core-sse \
+ svml_s_sincosf16_core-avx2 \
+ svml_s_sincosf4_core-sse2 \
+ svml_s_sincosf8_core-sse \
+ svml_s_sinf16_core-avx2 \
+ svml_s_sinf4_core-sse2 \
+ svml_s_sinf8_core-sse
endif
diff --git a/sysdeps/x86_64/fpu/multiarch/doasin-fma.c b/sysdeps/x86_64/fpu/multiarch/doasin-fma.c
new file mode 100644
index 0000000000..7a09865fca
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/doasin-fma.c
@@ -0,0 +1,4 @@
+#define __doasin __doasin_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/doasin.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/dosincos-fma.c b/sysdeps/x86_64/fpu/multiarch/dosincos-fma.c
new file mode 100644
index 0000000000..5744586bdb
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/dosincos-fma.c
@@ -0,0 +1,6 @@
+#define __docos __docos_fma
+#define __dubcos __dubcos_fma
+#define __dubsin __dubsin_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/dosincos.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_asin-fma.c b/sysdeps/x86_64/fpu/multiarch/e_asin-fma.c
new file mode 100644
index 0000000000..50e9c64247
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_asin-fma.c
@@ -0,0 +1,11 @@
+#define __ieee754_acos __ieee754_acos_fma
+#define __ieee754_asin __ieee754_asin_fma
+#define __cos32 __cos32_fma
+#define __doasin __doasin_fma
+#define __docos __docos_fma
+#define __dubcos __dubcos_fma
+#define __dubsin __dubsin_fma
+#define __sin32 __sin32_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/e_asin.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_asin.c b/sysdeps/x86_64/fpu/multiarch/e_asin.c
index 111a5b99bd..8d47004e4f 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_asin.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_asin.c
@@ -1,26 +1,40 @@
-#include <init-arch.h>
-#include <math.h>
-#include <math_private.h>
-
-extern double __ieee754_acos_sse2 (double);
-extern double __ieee754_asin_sse2 (double);
-extern double __ieee754_acos_fma4 (double);
-extern double __ieee754_asin_fma4 (double);
-
-libm_ifunc (__ieee754_acos,
- HAS_ARCH_FEATURE (FMA4_Usable)
- ? __ieee754_acos_fma4
- : __ieee754_acos_sse2);
-strong_alias (__ieee754_acos, __acos_finite)
+/* Multiple versions of IEEE 754 asin and acos.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+extern double __redirect_ieee754_asin (double);
+extern double __redirect_ieee754_acos (double);
+
+#define SYMBOL_NAME ieee754_asin
+#include "ifunc-fma4.h"
-libm_ifunc (__ieee754_asin,
- HAS_ARCH_FEATURE (FMA4_Usable)
- ? __ieee754_asin_fma4
- : __ieee754_asin_sse2);
+libc_ifunc_redirected (__redirect_ieee754_asin, __ieee754_asin,
+ IFUNC_SELECTOR ());
strong_alias (__ieee754_asin, __asin_finite)
-#define __ieee754_acos __ieee754_acos_sse2
-#define __ieee754_asin __ieee754_asin_sse2
+#undef SYMBOL_NAME
+#define SYMBOL_NAME ieee754_acos
+#include "ifunc-fma4.h"
+
+libc_ifunc_redirected (__redirect_ieee754_acos, __ieee754_acos,
+ IFUNC_SELECTOR ());
+strong_alias (__ieee754_acos, __acos_finite)
+#define __ieee754_acos __ieee754_acos_sse2
+#define __ieee754_asin __ieee754_asin_sse2
#include <sysdeps/ieee754/dbl-64/e_asin.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2-fma.c b/sysdeps/x86_64/fpu/multiarch/e_atan2-fma.c
new file mode 100644
index 0000000000..caba686496
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_atan2-fma.c
@@ -0,0 +1,10 @@
+#define __ieee754_atan2 __ieee754_atan2_fma
+#define __add __add_fma
+#define __dbl_mp __dbl_mp_fma
+#define __dvd __dvd_fma
+#define __mpatan2 __mpatan2_fma
+#define __mul __mul_fma
+#define __sub __sub_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/e_atan2.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2.c b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
index 9ca3c02a44..6c2dd5af37 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_atan2.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
@@ -1,18 +1,29 @@
-#include <init-arch.h>
-#include <math.h>
-#include <math_private.h>
+/* Multiple versions of IEEE 754 atan.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
-extern double __ieee754_atan2_sse2 (double, double);
-extern double __ieee754_atan2_avx (double, double);
-extern double __ieee754_atan2_fma4 (double, double);
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
-libm_ifunc (__ieee754_atan2,
- HAS_ARCH_FEATURE (FMA4_Usable) ? __ieee754_atan2_fma4
- : (HAS_ARCH_FEATURE (AVX_Usable)
- ? __ieee754_atan2_avx : __ieee754_atan2_sse2));
-strong_alias (__ieee754_atan2, __atan2_finite)
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
-#define __ieee754_atan2 __ieee754_atan2_sse2
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+extern double __redirect_ieee754_atan2 (double, double);
+#define SYMBOL_NAME ieee754_atan2
+#include "ifunc-avx-fma4.h"
+libc_ifunc_redirected (__redirect_ieee754_atan2,
+ __ieee754_atan2, IFUNC_SELECTOR ());
+strong_alias (__ieee754_atan2, __atan2_finite)
+
+#define __ieee754_atan2 __ieee754_atan2_sse2
#include <sysdeps/ieee754/dbl-64/e_atan2.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp-avx.c b/sysdeps/x86_64/fpu/multiarch/e_exp-avx.c
index ee5dd6d2dc..afd917442a 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_exp-avx.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_exp-avx.c
@@ -1,6 +1,5 @@
#define __ieee754_exp __ieee754_exp_avx
#define __exp1 __exp1_avx
-#define __slowexp __slowexp_avx
#define SECTION __attribute__ ((section (".text.avx")))
#include <sysdeps/ieee754/dbl-64/e_exp.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp-fma.c b/sysdeps/x86_64/fpu/multiarch/e_exp-fma.c
new file mode 100644
index 0000000000..765b1b9dd3
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_exp-fma.c
@@ -0,0 +1,5 @@
+#define __ieee754_exp __ieee754_exp_fma
+#define __exp1 __exp1_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/e_exp.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c
index ae6eb67603..9ac7acad28 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c
@@ -1,6 +1,5 @@
#define __ieee754_exp __ieee754_exp_fma4
#define __exp1 __exp1_fma4
-#define __slowexp __slowexp_fma4
#define SECTION __attribute__ ((section (".text.fma4")))
#include <sysdeps/ieee754/dbl-64/e_exp.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp.c b/sysdeps/x86_64/fpu/multiarch/e_exp.c
index b7d7b5ff27..7cd7d1729c 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_exp.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_exp.c
@@ -1,18 +1,29 @@
-#include <init-arch.h>
-#include <math.h>
-#include <math_private.h>
+/* Multiple versions of IEEE 754 exp.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
-extern double __ieee754_exp_sse2 (double);
-extern double __ieee754_exp_avx (double);
-extern double __ieee754_exp_fma4 (double);
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
-libm_ifunc (__ieee754_exp,
- HAS_ARCH_FEATURE (FMA4_Usable) ? __ieee754_exp_fma4
- : (HAS_ARCH_FEATURE (AVX_Usable)
- ? __ieee754_exp_avx : __ieee754_exp_sse2));
-strong_alias (__ieee754_exp, __exp_finite)
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
-#define __ieee754_exp __ieee754_exp_sse2
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+extern double __redirect_ieee754_exp (double);
+#define SYMBOL_NAME ieee754_exp
+#include "ifunc-avx-fma4.h"
+libc_ifunc_redirected (__redirect_ieee754_exp, __ieee754_exp,
+ IFUNC_SELECTOR ());
+strong_alias (__ieee754_exp, __exp_finite)
+
+#define __ieee754_exp __ieee754_exp_sse2
#include <sysdeps/ieee754/dbl-64/e_exp.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp2f-fma.c b/sysdeps/x86_64/fpu/multiarch/e_exp2f-fma.c
new file mode 100644
index 0000000000..c915a50794
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_exp2f-fma.c
@@ -0,0 +1,3 @@
+#define __exp2f __exp2f_fma
+
+#include <sysdeps/ieee754/flt-32/e_exp2f.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp2f.c b/sysdeps/x86_64/fpu/multiarch/e_exp2f.c
new file mode 100644
index 0000000000..e3a0706839
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_exp2f.c
@@ -0,0 +1,40 @@
+/* Multiple versions of exp2f.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <libm-alias-float.h>
+
+extern float __redirect_exp2f (float);
+
+#define SYMBOL_NAME exp2f
+#include "ifunc-fma.h"
+
+libc_ifunc_redirected (__redirect_exp2f, __exp2f, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+# include <shlib-compat.h>
+versioned_symbol (libm, __exp2f, exp2f, GLIBC_2_27);
+libm_alias_float_other (__exp2, exp2)
+#else
+libm_alias_float (__exp2, exp2)
+#endif
+
+strong_alias (__exp2f, __ieee754_exp2f)
+strong_alias (__exp2f, __exp2f_finite)
+
+#define __exp2f __exp2f_sse2
+#include <sysdeps/ieee754/flt-32/e_exp2f.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_expf-fma.c b/sysdeps/x86_64/fpu/multiarch/e_expf-fma.c
new file mode 100644
index 0000000000..4e01cd6a82
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_expf-fma.c
@@ -0,0 +1,3 @@
+#define __expf __expf_fma
+
+#include <sysdeps/ieee754/flt-32/e_expf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_expf.c b/sysdeps/x86_64/fpu/multiarch/e_expf.c
new file mode 100644
index 0000000000..2b7c7ccbd0
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_expf.c
@@ -0,0 +1,43 @@
+/* Multiple versions of expf.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <libm-alias-float.h>
+
+extern float __redirect_expf (float);
+
+#define SYMBOL_NAME expf
+#include "ifunc-fma.h"
+
+libc_ifunc_redirected (__redirect_expf, __expf, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (__expf, __GI___expf, __redirect_expf)
+ __attribute__ ((visibility ("hidden")));
+
+# include <shlib-compat.h>
+versioned_symbol (libm, __expf, expf, GLIBC_2_27);
+libm_alias_float_other (__exp, exp)
+#else
+libm_alias_float (__exp, exp)
+#endif
+
+strong_alias (__expf, __ieee754_expf)
+strong_alias (__expf, __expf_finite)
+
+#define __expf __expf_sse2
+#include <sysdeps/ieee754/flt-32/e_expf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_log-avx.c b/sysdeps/x86_64/fpu/multiarch/e_log-avx.c
index c669019bc2..b22a5767be 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_log-avx.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_log-avx.c
@@ -1,8 +1,4 @@
#define __ieee754_log __ieee754_log_avx
-#define __mplog __mplog_avx
-#define __add __add_avx
-#define __dbl_mp __dbl_mp_avx
-#define __sub __sub_avx
#define SECTION __attribute__ ((section (".text.avx")))
#include <sysdeps/ieee754/dbl-64/e_log.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_log-fma.c b/sysdeps/x86_64/fpu/multiarch/e_log-fma.c
new file mode 100644
index 0000000000..bce0ee03c2
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_log-fma.c
@@ -0,0 +1,4 @@
+#define __ieee754_log __ieee754_log_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/e_log.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_log-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_log-fma4.c
index a2346cc618..f458f9c23c 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_log-fma4.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_log-fma4.c
@@ -1,8 +1,4 @@
#define __ieee754_log __ieee754_log_fma4
-#define __mplog __mplog_fma4
-#define __add __add_fma4
-#define __dbl_mp __dbl_mp_fma4
-#define __sub __sub_fma4
#define SECTION __attribute__ ((section (".text.fma4")))
#include <sysdeps/ieee754/dbl-64/e_log.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_log.c b/sysdeps/x86_64/fpu/multiarch/e_log.c
index cf9533d6c0..e0a1b02fae 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_log.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_log.c
@@ -1,18 +1,29 @@
-#include <init-arch.h>
-#include <math.h>
-#include <math_private.h>
+/* Multiple versions of IEEE 754 log.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
-extern double __ieee754_log_sse2 (double);
-extern double __ieee754_log_avx (double);
-extern double __ieee754_log_fma4 (double);
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
-libm_ifunc (__ieee754_log,
- HAS_ARCH_FEATURE (FMA4_Usable) ? __ieee754_log_fma4
- : (HAS_ARCH_FEATURE (AVX_Usable)
- ? __ieee754_log_avx : __ieee754_log_sse2));
-strong_alias (__ieee754_log, __log_finite)
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
-#define __ieee754_log __ieee754_log_sse2
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+extern double __redirect_ieee754_log (double);
+#define SYMBOL_NAME ieee754_log
+#include "ifunc-avx-fma4.h"
+libc_ifunc_redirected (__redirect_ieee754_log, __ieee754_log,
+ IFUNC_SELECTOR ());
+strong_alias (__ieee754_log, __log_finite)
+
+#define __ieee754_log __ieee754_log_sse2
#include <sysdeps/ieee754/dbl-64/e_log.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_log2f-fma.c b/sysdeps/x86_64/fpu/multiarch/e_log2f-fma.c
new file mode 100644
index 0000000000..8a76b836fb
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_log2f-fma.c
@@ -0,0 +1,3 @@
+#define __log2f __log2f_fma
+
+#include <sysdeps/ieee754/flt-32/e_log2f.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_log2f.c b/sysdeps/x86_64/fpu/multiarch/e_log2f.c
new file mode 100644
index 0000000000..12d0c30dd3
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_log2f.c
@@ -0,0 +1,43 @@
+/* Multiple versions of log2f.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <libm-alias-float.h>
+
+extern float __redirect_log2f (float);
+
+#define SYMBOL_NAME log2f
+#include "ifunc-fma.h"
+
+libc_ifunc_redirected (__redirect_log2f, __log2f, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (__log2f, __GI___log2f, __redirect_log2f)
+ __attribute__ ((visibility ("hidden")));
+
+# include <shlib-compat.h>
+versioned_symbol (libm, __log2f, log2f, GLIBC_2_27);
+libm_alias_float_other (__log2, log2)
+#else
+libm_alias_float (__log2, log2)
+#endif
+
+strong_alias (__log2f, __ieee754_log2f)
+strong_alias (__log2f, __log2f_finite)
+
+#define __log2f __log2f_sse2
+#include <sysdeps/ieee754/flt-32/e_log2f.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_logf-fma.c b/sysdeps/x86_64/fpu/multiarch/e_logf-fma.c
new file mode 100644
index 0000000000..a47fd8195f
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_logf-fma.c
@@ -0,0 +1,3 @@
+#define __logf __logf_fma
+
+#include <sysdeps/ieee754/flt-32/e_logf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_logf.c b/sysdeps/x86_64/fpu/multiarch/e_logf.c
new file mode 100644
index 0000000000..224d40a1e4
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_logf.c
@@ -0,0 +1,43 @@
+/* Multiple versions of logf.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <libm-alias-float.h>
+
+extern float __redirect_logf (float);
+
+#define SYMBOL_NAME logf
+#include "ifunc-fma.h"
+
+libc_ifunc_redirected (__redirect_logf, __logf, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (__logf, __GI___logf, __redirect_logf)
+ __attribute__ ((visibility ("hidden")));
+
+# include <shlib-compat.h>
+versioned_symbol (libm, __logf, logf, GLIBC_2_27);
+libm_alias_float_other (__log, log)
+#else
+libm_alias_float (__log, log)
+#endif
+
+strong_alias (__logf, __ieee754_logf)
+strong_alias (__logf, __logf_finite)
+
+#define __logf __logf_sse2
+#include <sysdeps/ieee754/flt-32/e_logf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow-fma.c b/sysdeps/x86_64/fpu/multiarch/e_pow-fma.c
new file mode 100644
index 0000000000..73c1e7fb89
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_pow-fma.c
@@ -0,0 +1,5 @@
+#define __ieee754_pow __ieee754_pow_fma
+#define __exp1 __exp1_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/e_pow.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c
index 5b3ea8e103..8971b655ca 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c
@@ -1,6 +1,5 @@
#define __ieee754_pow __ieee754_pow_fma4
#define __exp1 __exp1_fma4
-#define __slowpow __slowpow_fma4
#define SECTION __attribute__ ((section (".text.fma4")))
#include <sysdeps/ieee754/dbl-64/e_pow.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow.c b/sysdeps/x86_64/fpu/multiarch/e_pow.c
index a5c5d89c3e..084073c936 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_pow.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_pow.c
@@ -1,17 +1,29 @@
-#include <init-arch.h>
-#include <math.h>
-#include <math_private.h>
+/* Multiple versions of IEEE 754 pow.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
-extern double __ieee754_pow_sse2 (double, double);
-extern double __ieee754_pow_fma4 (double, double);
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
-libm_ifunc (__ieee754_pow,
- HAS_ARCH_FEATURE (FMA4_Usable)
- ? __ieee754_pow_fma4
- : __ieee754_pow_sse2);
-strong_alias (__ieee754_pow, __pow_finite)
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
-#define __ieee754_pow __ieee754_pow_sse2
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+extern double __redirect_ieee754_pow (double, double);
+#define SYMBOL_NAME ieee754_pow
+#include "ifunc-fma4.h"
+libc_ifunc_redirected (__redirect_ieee754_pow,
+ __ieee754_pow, IFUNC_SELECTOR ());
+strong_alias (__ieee754_pow, __pow_finite)
+
+#define __ieee754_pow __ieee754_pow_sse2
#include <sysdeps/ieee754/dbl-64/e_pow.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_powf-fma.c b/sysdeps/x86_64/fpu/multiarch/e_powf-fma.c
new file mode 100644
index 0000000000..fdf5dcc56a
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_powf-fma.c
@@ -0,0 +1,3 @@
+#define __powf __powf_fma
+
+#include <sysdeps/ieee754/flt-32/e_powf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_powf.c b/sysdeps/x86_64/fpu/multiarch/e_powf.c
new file mode 100644
index 0000000000..a185006f40
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_powf.c
@@ -0,0 +1,46 @@
+/* Multiple versions of powf.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <libm-alias-float.h>
+
+#define powf __redirect_powf
+#define __DECL_SIMD___redirect_powf
+#include <math.h>
+#undef powf
+
+#define SYMBOL_NAME powf
+#include "ifunc-fma.h"
+
+libc_ifunc_redirected (__redirect_powf, __powf, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (__powf, __GI___powf, __redirect_powf)
+ __attribute__ ((visibility ("hidden")));
+
+# include <shlib-compat.h>
+versioned_symbol (libm, __powf, powf, GLIBC_2_27);
+libm_alias_float_other (__pow, pow)
+#else
+libm_alias_float (__pow, pow)
+#endif
+
+strong_alias (__powf, __ieee754_powf)
+strong_alias (__powf, __powf_finite)
+
+#define __powf __powf_sse2
+#include <sysdeps/ieee754/flt-32/e_powf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c b/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c
deleted file mode 100644
index a00c17c016..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c
+++ /dev/null
@@ -1,4 +0,0 @@
-#define __halfulp __halfulp_fma4
-#define SECTION __attribute__ ((section (".text.fma4")))
-
-#include <sysdeps/ieee754/dbl-64/halfulp.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-avx-fma4.h b/sysdeps/x86_64/fpu/multiarch/ifunc-avx-fma4.h
new file mode 100644
index 0000000000..a5f9375afc
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/ifunc-avx-fma4.h
@@ -0,0 +1,43 @@
+/* Common definition for ifunc selections optimized with AVX, AVX2/FMA
+ and FMA4.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (fma) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (fma4) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, FMA_Usable)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable))
+ return OPTIMIZE (fma);
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, FMA4_Usable))
+ return OPTIMIZE (fma4);
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Usable))
+ return OPTIMIZE (avx);
+
+ return OPTIMIZE (sse2);
+}
diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf.S b/sysdeps/x86_64/fpu/multiarch/ifunc-fma.h
index 9a06a5c174..63a8cd221f 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_ceilf.S
+++ b/sysdeps/x86_64/fpu/multiarch/ifunc-fma.h
@@ -1,6 +1,6 @@
-/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
+/* Common definition for ifunc selections optimized with AVX2/FMA.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -16,23 +16,19 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <machine/asm.h>
#include <init-arch.h>
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (fma) attribute_hidden;
-ENTRY(__ceilf)
- .type __ceilf, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq __ceilf_sse41(%rip), %rax
- HAS_CPU_FEATURE (SSE4_1)
- jnz 2f
- leaq __ceilf_c(%rip), %rax
-2: ret
-END(__ceilf)
-weak_alias (__ceilf, ceilf)
+static inline void *
+IFUNC_SELECTOR (void)
+{
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+ if (CPU_FEATURES_ARCH_P (cpu_features, FMA_Usable)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable))
+ return OPTIMIZE (fma);
-ENTRY(__ceilf_sse41)
- roundss $2, %xmm0, %xmm0
- ret
-END(__ceilf_sse41)
+ return OPTIMIZE (sse2);
+}
diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h b/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h
new file mode 100644
index 0000000000..a2526a2ee0
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h
@@ -0,0 +1,39 @@
+/* Common definition for ifunc selections optimized with AVX2/FMA and
+ FMA4.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (fma) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (fma4) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, FMA_Usable)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable))
+ return OPTIMIZE (fma);
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, FMA4_Usable))
+ return OPTIMIZE (fma4);
+
+ return OPTIMIZE (sse2);
+}
diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx2.h b/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx2.h
new file mode 100644
index 0000000000..bd2d32e418
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx2.h
@@ -0,0 +1,39 @@
+/* Common definition for libmathvec ifunc selections optimized with
+ AVX2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <init-arch.h>
+
+#undef PASTER2
+#define PASTER2(x,y) x##_##y
+
+extern void REDIRECT_NAME (void);
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse_wrapper) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, FMA_Usable)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable))
+ return OPTIMIZE (avx2);
+
+ return OPTIMIZE (sse_wrapper);
+}
diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx512.h b/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx512.h
new file mode 100644
index 0000000000..174e462cfb
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx512.h
@@ -0,0 +1,45 @@
+/* Common definition for libmathvec ifunc selections optimized with
+ AVX512.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <init-arch.h>
+
+#undef PASTER2
+#define PASTER2(x,y) x##_##y
+
+extern void REDIRECT_NAME (void);
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_wrapper) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (knl) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (skx) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+ if (!CPU_FEATURES_ARCH_P (cpu_features, MathVec_Prefer_No_AVX512))
+ {
+ if (CPU_FEATURES_ARCH_P (cpu_features, AVX512DQ_Usable))
+ return OPTIMIZE (skx);
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable))
+ return OPTIMIZE (knl);
+ }
+
+ return OPTIMIZE (avx2_wrapper);
+}
diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-sse4_1.h b/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-sse4_1.h
new file mode 100644
index 0000000000..c1e70ebfc1
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-sse4_1.h
@@ -0,0 +1,38 @@
+/* Common definition for libmathvec ifunc selections optimized with
+ SSE4.1.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <init-arch.h>
+
+#undef PASTER2
+#define PASTER2(x,y) x##_##y
+
+extern void REDIRECT_NAME (void);
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+ if (CPU_FEATURES_CPU_P (cpu_features, SSE4_1))
+ return OPTIMIZE (sse4);
+
+ return OPTIMIZE (sse2);
+}
diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil.S b/sysdeps/x86_64/fpu/multiarch/ifunc-sse4_1.h
index 40fa729955..a8710ba802 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_ceil.S
+++ b/sysdeps/x86_64/fpu/multiarch/ifunc-sse4_1.h
@@ -1,6 +1,6 @@
-/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
+/* Common definition for ifunc selections optimized with SSE4.1.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -16,23 +16,18 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <machine/asm.h>
#include <init-arch.h>
+extern __typeof (REDIRECT_NAME) OPTIMIZE (c) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse41) attribute_hidden;
-ENTRY(__ceil)
- .type __ceil, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq __ceil_sse41(%rip), %rax
- HAS_CPU_FEATURE (SSE4_1)
- jnz 2f
- leaq __ceil_c(%rip), %rax
-2: ret
-END(__ceil)
-weak_alias (__ceil, ceil)
+static inline void *
+IFUNC_SELECTOR (void)
+{
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+ if (CPU_FEATURES_CPU_P (cpu_features, SSE4_1))
+ return OPTIMIZE (sse41);
-ENTRY(__ceil_sse41)
- roundsd $2, %xmm0, %xmm0
- ret
-END(__ceil_sse41)
+ return OPTIMIZE (c);
+}
diff --git a/sysdeps/x86_64/fpu/multiarch/mpa-fma.c b/sysdeps/x86_64/fpu/multiarch/mpa-fma.c
new file mode 100644
index 0000000000..177cc2517f
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/mpa-fma.c
@@ -0,0 +1,14 @@
+#define __add __add_fma
+#define __mul __mul_fma
+#define __sqr __sqr_fma
+#define __sub __sub_fma
+#define __dbl_mp __dbl_mp_fma
+#define __dvd __dvd_fma
+
+#define NO___CPY 1
+#define NO___MP_DBL 1
+#define NO___ACR 1
+#define NO__CONST 1
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/mpa.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/mpatan-fma.c b/sysdeps/x86_64/fpu/multiarch/mpatan-fma.c
new file mode 100644
index 0000000000..d216f9142d
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/mpatan-fma.c
@@ -0,0 +1,10 @@
+#define __mpatan __mpatan_fma
+#define __add __add_fma
+#define __dvd __dvd_fma
+#define __mpsqrt __mpsqrt_fma
+#define __mul __mul_fma
+#define __sub __sub_fma
+#define AVOID_MPATAN_H 1
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/mpatan.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/mpatan2-fma.c b/sysdeps/x86_64/fpu/multiarch/mpatan2-fma.c
new file mode 100644
index 0000000000..98df336f79
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/mpatan2-fma.c
@@ -0,0 +1,9 @@
+#define __mpatan2 __mpatan2_fma
+#define __add __add_fma
+#define __dvd __dvd_fma
+#define __mpatan __mpatan_fma
+#define __mpsqrt __mpsqrt_fma
+#define __mul __mul_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/mpatan2.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/mpexp-avx.c b/sysdeps/x86_64/fpu/multiarch/mpexp-avx.c
deleted file mode 100644
index 87f29c96c9..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/mpexp-avx.c
+++ /dev/null
@@ -1,9 +0,0 @@
-#define __mpexp __mpexp_avx
-#define __add __add_avx
-#define __dbl_mp __dbl_mp_avx
-#define __dvd __dvd_avx
-#define __mul __mul_avx
-#define AVOID_MPEXP_H 1
-#define SECTION __attribute__ ((section (".text.avx")))
-
-#include <sysdeps/ieee754/dbl-64/mpexp.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c b/sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c
deleted file mode 100644
index 07ca6e9ad0..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c
+++ /dev/null
@@ -1,9 +0,0 @@
-#define __mpexp __mpexp_fma4
-#define __add __add_fma4
-#define __dbl_mp __dbl_mp_fma4
-#define __dvd __dvd_fma4
-#define __mul __mul_fma4
-#define AVOID_MPEXP_H 1
-#define SECTION __attribute__ ((section (".text.fma4")))
-
-#include <sysdeps/ieee754/dbl-64/mpexp.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/mplog-avx.c b/sysdeps/x86_64/fpu/multiarch/mplog-avx.c
deleted file mode 100644
index fd783d9a67..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/mplog-avx.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#define __mplog __mplog_avx
-#define __add __add_avx
-#define __mpexp __mpexp_avx
-#define __mul __mul_avx
-#define __sub __sub_avx
-#define SECTION __attribute__ ((section (".text.avx")))
-
-#include <sysdeps/ieee754/dbl-64/mplog.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/mplog-fma4.c b/sysdeps/x86_64/fpu/multiarch/mplog-fma4.c
deleted file mode 100644
index b4733118d7..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/mplog-fma4.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#define __mplog __mplog_fma4
-#define __add __add_fma4
-#define __mpexp __mpexp_fma4
-#define __mul __mul_fma4
-#define __sub __sub_fma4
-#define SECTION __attribute__ ((section (".text.fma4")))
-
-#include <sysdeps/ieee754/dbl-64/mplog.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/mpsqrt-fma.c b/sysdeps/x86_64/fpu/multiarch/mpsqrt-fma.c
new file mode 100644
index 0000000000..44d7a23ae3
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/mpsqrt-fma.c
@@ -0,0 +1,8 @@
+#define __mpsqrt __mpsqrt_fma
+#define __dbl_mp __dbl_mp_fma
+#define __mul __mul_fma
+#define __sub __sub_fma
+#define AVOID_MPSQRT_H 1
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/mpsqrt.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/mptan-fma.c b/sysdeps/x86_64/fpu/multiarch/mptan-fma.c
new file mode 100644
index 0000000000..d1a691413c
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/mptan-fma.c
@@ -0,0 +1,7 @@
+#define __mptan __mptan_fma
+#define __c32 __c32_fma
+#define __dvd __dvd_fma
+#define __mpranred __mpranred_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/mptan.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan-avx.c b/sysdeps/x86_64/fpu/multiarch/s_atan-avx.c
index b5cb9c3a75..41816bfe6c 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_atan-avx.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_atan-avx.c
@@ -1,4 +1,4 @@
-#define atan __atan_avx
+#define __atan __atan_avx
#define __add __add_avx
#define __dbl_mp __dbl_mp_avx
#define __mul __mul_avx
diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan-fma.c b/sysdeps/x86_64/fpu/multiarch/s_atan-fma.c
new file mode 100644
index 0000000000..363e32bcbd
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_atan-fma.c
@@ -0,0 +1,9 @@
+#define __atan __atan_fma
+#define __add __add_fma
+#define __dbl_mp __dbl_mp_fma
+#define __mpatan __mpatan_fma
+#define __mul __mul_fma
+#define __sub __sub_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/s_atan.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c b/sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c
index 9e83e6cdab..ad8d3af579 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c
@@ -1,4 +1,4 @@
-#define atan __atan_fma4
+#define __atan __atan_fma4
#define __add __add_fma4
#define __dbl_mp __dbl_mp_fma4
#define __mpatan __mpatan_fma4
diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan.c b/sysdeps/x86_64/fpu/multiarch/s_atan.c
index 742e95cb96..f9ce8549ab 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_atan.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_atan.c
@@ -1,15 +1,30 @@
-#include <init-arch.h>
-#include <math.h>
+/* Multiple versions of atan.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
-extern double __atan_sse2 (double);
-extern double __atan_avx (double);
-extern double __atan_fma4 (double);
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
-libm_ifunc (atan, (HAS_ARCH_FEATURE (FMA4_Usable) ? __atan_fma4 :
- HAS_ARCH_FEATURE (AVX_Usable)
- ? __atan_avx : __atan_sse2));
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
-#define atan __atan_sse2
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+#include <libm-alias-double.h>
+extern double __redirect_atan (double);
+
+#define SYMBOL_NAME atan
+#include "ifunc-avx-fma4.h"
+
+libc_ifunc_redirected (__redirect_atan, __atan, IFUNC_SELECTOR ());
+libm_alias_double (__atan, atan)
+
+#define __atan __atan_sse2
#include <sysdeps/ieee754/dbl-64/s_atan.c>
diff --git a/sysdeps/x86_64/multiarch/mempcpy-avx512-no-vzeroupper.S b/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S
index fcc0945ea7..e90f05b42f 100644
--- a/sysdeps/x86_64/multiarch/mempcpy-avx512-no-vzeroupper.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S
@@ -1,6 +1,6 @@
-/* mempcpy optimized with AVX512 for KNL hardware.
- Copyright (C) 2016 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -16,7 +16,10 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#define USE_AS_MEMPCPY
-#define MEMCPY __mempcpy_avx512_no_vzeroupper
-#define MEMCPY_CHK __mempcpy_chk_avx512_no_vzeroupper
-#include "memcpy-avx512-no-vzeroupper.S"
+#include <sysdep.h>
+
+ .section .text.sse4.1,"ax",@progbits
+ENTRY(__ceil_sse41)
+ roundsd $10, %xmm0, %xmm0
+ ret
+END(__ceil_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil.c b/sysdeps/x86_64/fpu/multiarch/s_ceil.c
new file mode 100644
index 0000000000..070fcdddea
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_ceil.c
@@ -0,0 +1,31 @@
+/* Multiple versions of __ceil.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <libm-alias-double.h>
+
+#define ceil __redirect_ceil
+#define __ceil __redirect___ceil
+#include <math.h>
+#undef ceil
+#undef __ceil
+
+#define SYMBOL_NAME ceil
+#include "ifunc-sse4_1.h"
+
+libc_ifunc_redirected (__redirect_ceil, __ceil, IFUNC_SELECTOR ());
+libm_alias_double (__ceil, ceil)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S
new file mode 100644
index 0000000000..c3bd24c5ae
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S
@@ -0,0 +1,25 @@
+/* Copyright (C) 2011-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .section .text.sse4.1,"ax",@progbits
+ENTRY(__ceilf_sse41)
+ roundss $10, %xmm0, %xmm0
+ ret
+END(__ceilf_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf.c b/sysdeps/x86_64/fpu/multiarch/s_ceilf.c
new file mode 100644
index 0000000000..db0c6c4bc3
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf.c
@@ -0,0 +1,31 @@
+/* Multiple versions of __ceilf.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <libm-alias-float.h>
+
+#define ceilf __redirect_ceilf
+#define __ceilf __redirect___ceilf
+#include <math.h>
+#undef ceilf
+#undef __ceilf
+
+#define SYMBOL_NAME ceilf
+#include "ifunc-sse4_1.h"
+
+libc_ifunc_redirected (__redirect_ceilf, __ceilf, IFUNC_SELECTOR ());
+libm_alias_float (__ceil, ceil)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_cosf-fma.c b/sysdeps/x86_64/fpu/multiarch/s_cosf-fma.c
new file mode 100644
index 0000000000..5f9191aef9
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_cosf-fma.c
@@ -0,0 +1,2 @@
+#define COSF __cosf_fma
+#include <sysdeps/ieee754/flt-32/s_cosf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_cosf-sse2.c b/sysdeps/x86_64/fpu/multiarch/s_cosf-sse2.c
new file mode 100644
index 0000000000..87cf42a82a
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_cosf-sse2.c
@@ -0,0 +1,2 @@
+#define COSF __cosf_sse2
+#include <sysdeps/ieee754/flt-32/s_cosf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_cosf.c b/sysdeps/x86_64/fpu/multiarch/s_cosf.c
new file mode 100644
index 0000000000..33959d3d01
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_cosf.c
@@ -0,0 +1,28 @@
+/* Multiple versions of cosf.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <libm-alias-float.h>
+
+extern float __redirect_cosf (float);
+
+#define SYMBOL_NAME cosf
+#include "ifunc-fma.h"
+
+libc_ifunc_redirected (__redirect_cosf, __cosf, IFUNC_SELECTOR ());
+
+libm_alias_float (__cos, cos)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S
new file mode 100644
index 0000000000..b3c7aa29ff
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S
@@ -0,0 +1,25 @@
+/* Copyright (C) 2011-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .section .text.sse4.1,"ax",@progbits
+ENTRY(__floor_sse41)
+ roundsd $9, %xmm0, %xmm0
+ ret
+END(__floor_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor.c b/sysdeps/x86_64/fpu/multiarch/s_floor.c
new file mode 100644
index 0000000000..58f8ed8eaf
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_floor.c
@@ -0,0 +1,31 @@
+/* Multiple versions of __floor.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <libm-alias-double.h>
+
+#define floor __redirect_floor
+#define __floor __redirect___floor
+#include <math.h>
+#undef floor
+#undef __floor
+
+#define SYMBOL_NAME floor
+#include "ifunc-sse4_1.h"
+
+libc_ifunc_redirected (__redirect_floor, __floor, IFUNC_SELECTOR ());
+libm_alias_double (__floor, floor)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S
new file mode 100644
index 0000000000..43461d3e6b
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S
@@ -0,0 +1,25 @@
+/* Copyright (C) 2011-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .section .text.sse4.1,"ax",@progbits
+ENTRY(__floorf_sse41)
+ roundss $9, %xmm0, %xmm0
+ ret
+END(__floorf_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf.c b/sysdeps/x86_64/fpu/multiarch/s_floorf.c
new file mode 100644
index 0000000000..5ef2fec2e3
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_floorf.c
@@ -0,0 +1,31 @@
+/* Multiple versions of __floorf.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <libm-alias-float.h>
+
+#define floorf __redirect_floorf
+#define __floorf __redirect___floorf
+#include <math.h>
+#undef floorf
+#undef __floorf
+
+#define SYMBOL_NAME floorf
+#include "ifunc-sse4_1.h"
+
+libc_ifunc_redirected (__redirect_floorf, __floorf, IFUNC_SELECTOR ());
+libm_alias_float (__floor, floor)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_fma.c b/sysdeps/x86_64/fpu/multiarch/s_fma.c
index 1de1a84cbe..875c76d372 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_fma.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_fma.c
@@ -1,5 +1,5 @@
/* FMA version of fma.
- Copyright (C) 2009-2016 Free Software Foundation, Inc.
+ Copyright (C) 2009-2018 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -20,6 +20,7 @@
#include <config.h>
#include <math.h>
#include <init-arch.h>
+#include <libm-alias-double.h>
extern double __fma_sse2 (double x, double y, double z) attribute_hidden;
@@ -43,7 +44,7 @@ __fma_fma4 (double x, double y, double z)
libm_ifunc (__fma, HAS_ARCH_FEATURE (FMA_Usable)
? __fma_fma3 : (HAS_ARCH_FEATURE (FMA4_Usable)
? __fma_fma4 : __fma_sse2));
-weak_alias (__fma, fma)
+libm_alias_double (__fma, fma)
#define __fma __fma_sse2
diff --git a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c
index 8905e4b54f..5f4c2ec0be 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c
@@ -1,5 +1,5 @@
/* FMA version of fmaf.
- Copyright (C) 2009-2016 Free Software Foundation, Inc.
+ Copyright (C) 2009-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -19,6 +19,7 @@
#include <config.h>
#include <math.h>
#include <init-arch.h>
+#include <libm-alias-float.h>
extern float __fmaf_sse2 (float x, float y, float z) attribute_hidden;
@@ -42,7 +43,7 @@ __fmaf_fma4 (float x, float y, float z)
libm_ifunc (__fmaf, HAS_ARCH_FEATURE (FMA_Usable)
? __fmaf_fma3 : (HAS_ARCH_FEATURE (FMA4_Usable)
? __fmaf_fma4 : __fmaf_sse2));
-weak_alias (__fmaf, fmaf)
+libm_alias_float (__fma, fma)
#define __fmaf __fmaf_sse2
diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S
new file mode 100644
index 0000000000..f9ac36e4f0
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S
@@ -0,0 +1,25 @@
+/* Copyright (C) 2011-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .section .text.sse4.1,"ax",@progbits
+ENTRY(__nearbyint_sse41)
+ roundsd $0xc, %xmm0, %xmm0
+ ret
+END(__nearbyint_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.S
deleted file mode 100644
index 5091cf5813..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.S
+++ /dev/null
@@ -1,38 +0,0 @@
-/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <machine/asm.h>
-#include <init-arch.h>
-
-
-ENTRY(__nearbyint)
- .type __nearbyint, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq __nearbyint_sse41(%rip), %rax
- HAS_CPU_FEATURE (SSE4_1)
- jnz 2f
- leaq __nearbyint_c(%rip), %rax
-2: ret
-END(__nearbyint)
-weak_alias (__nearbyint, nearbyint)
-
-
-ENTRY(__nearbyint_sse41)
- roundsd $0xc, %xmm0, %xmm0
- ret
-END(__nearbyint_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c
new file mode 100644
index 0000000000..d92945fd14
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c
@@ -0,0 +1,32 @@
+/* Multiple versions of __nearbyint.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <libm-alias-double.h>
+
+#define nearbyint __redirect_nearbyint
+#define __nearbyint __redirect___nearbyint
+#include <math.h>
+#undef nearbyint
+#undef __nearbyint
+
+#define SYMBOL_NAME nearbyint
+#include "ifunc-sse4_1.h"
+
+libc_ifunc_redirected (__redirect_nearbyint, __nearbyint,
+ IFUNC_SELECTOR ());
+libm_alias_double (__nearbyint, nearbyint)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S
new file mode 100644
index 0000000000..2f427da778
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S
@@ -0,0 +1,25 @@
+/* Copyright (C) 2011-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .section .text.sse4.1,"ax",@progbits
+ENTRY(__nearbyintf_sse41)
+ roundss $0xc, %xmm0, %xmm0
+ ret
+END(__nearbyintf_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.S
deleted file mode 100644
index 4a13700001..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.S
+++ /dev/null
@@ -1,38 +0,0 @@
-/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <machine/asm.h>
-#include <init-arch.h>
-
-
-ENTRY(__nearbyintf)
- .type __nearbyintf, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq __nearbyintf_sse41(%rip), %rax
- HAS_CPU_FEATURE (SSE4_1)
- jnz 2f
- leaq __nearbyintf_c(%rip), %rax
-2: ret
-END(__nearbyintf)
-weak_alias (__nearbyintf, nearbyintf)
-
-
-ENTRY(__nearbyintf_sse41)
- roundss $0xc, %xmm0, %xmm0
- ret
-END(__nearbyintf_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c
index 1c0d1e14b7..ba7be27956 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_rint.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c
@@ -1,6 +1,6 @@
-/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
+/* Multiple versions of __nearbyintf.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -16,23 +16,17 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <machine/asm.h>
-#include <init-arch.h>
+#include <libm-alias-float.h>
+#define nearbyintf __redirect_nearbyintf
+#define __nearbyintf __redirect___nearbyintf
+#include <math.h>
+#undef nearbyintf
+#undef __nearbyintf
-ENTRY(__rint)
- .type __rint, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq __rint_sse41(%rip), %rax
- HAS_CPU_FEATURE (SSE4_1)
- jnz 2f
- leaq __rint_c(%rip), %rax
-2: ret
-END(__rint)
-weak_alias (__rint, rint)
+#define SYMBOL_NAME nearbyintf
+#include "ifunc-sse4_1.h"
-
-ENTRY(__rint_sse41)
- roundsd $4, %xmm0, %xmm0
- ret
-END(__rint_sse41)
+libc_ifunc_redirected (__redirect_nearbyintf, __nearbyintf,
+ IFUNC_SELECTOR ());
+libm_alias_float (__nearbyint, nearbyint)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S
new file mode 100644
index 0000000000..7d7568a1a0
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S
@@ -0,0 +1,25 @@
+/* Copyright (C) 2011-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .section .text.sse4.1,"ax",@progbits
+ENTRY(__rint_sse41)
+ roundsd $4, %xmm0, %xmm0
+ ret
+END(__rint_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint.c b/sysdeps/x86_64/fpu/multiarch/s_rint.c
new file mode 100644
index 0000000000..f1cb2fed0c
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_rint.c
@@ -0,0 +1,31 @@
+/* Multiple versions of __rint.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <libm-alias-double.h>
+
+#define rint __redirect_rint
+#define __rint __redirect___rint
+#include <math.h>
+#undef rint
+#undef __rint
+
+#define SYMBOL_NAME rint
+#include "ifunc-sse4_1.h"
+
+libc_ifunc_redirected (__redirect_rint, __rint, IFUNC_SELECTOR ());
+libm_alias_double (__rint, rint)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S
new file mode 100644
index 0000000000..ef5d896f55
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S
@@ -0,0 +1,25 @@
+/* Copyright (C) 2011-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .section .text.sse4.1,"ax",@progbits
+ENTRY(__rintf_sse41)
+ roundss $4, %xmm0, %xmm0
+ ret
+END(__rintf_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf.S b/sysdeps/x86_64/fpu/multiarch/s_rintf.S
deleted file mode 100644
index 8e42fa561f..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/s_rintf.S
+++ /dev/null
@@ -1,38 +0,0 @@
-/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <machine/asm.h>
-#include <init-arch.h>
-
-
-ENTRY(__rintf)
- .type __rintf, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq __rintf_sse41(%rip), %rax
- HAS_CPU_FEATURE (SSE4_1)
- jnz 2f
- leaq __rintf_c(%rip), %rax
-2: ret
-END(__rintf)
-weak_alias (__rintf, rintf)
-
-
-ENTRY(__rintf_sse41)
- roundss $4, %xmm0, %xmm0
- ret
-END(__rintf_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf.c b/sysdeps/x86_64/fpu/multiarch/s_rintf.c
new file mode 100644
index 0000000000..41323b3b5b
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_rintf.c
@@ -0,0 +1,31 @@
+/* Multiple versions of __rintf.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <libm-alias-float.h>
+
+#define rintf __redirect_rintf
+#define __rintf __redirect___rintf
+#include <math.h>
+#undef rintf
+#undef __rintf
+
+#define SYMBOL_NAME rintf
+#include "ifunc-sse4_1.h"
+
+libc_ifunc_redirected (__redirect_rintf, __rintf, IFUNC_SELECTOR ());
+libm_alias_float (__rint, rint)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin-fma.c b/sysdeps/x86_64/fpu/multiarch/s_sin-fma.c
new file mode 100644
index 0000000000..15f3c394d5
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_sin-fma.c
@@ -0,0 +1,11 @@
+#define __cos __cos_fma
+#define __sin __sin_fma
+#define __docos __docos_fma
+#define __dubsin __dubsin_fma
+#define __mpcos __mpcos_fma
+#define __mpcos1 __mpcos1_fma
+#define __mpsin __mpsin_fma
+#define __mpsin1 __mpsin1_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/s_sin.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin.c b/sysdeps/x86_64/fpu/multiarch/s_sin.c
index 8ffd3e7125..b289269240 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_sin.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_sin.c
@@ -1,26 +1,39 @@
-#include <init-arch.h>
-#include <math.h>
-#undef NAN
-
-extern double __cos_sse2 (double);
-extern double __sin_sse2 (double);
-extern double __cos_avx (double);
-extern double __sin_avx (double);
-extern double __cos_fma4 (double);
-extern double __sin_fma4 (double);
-
-libm_ifunc (__cos, (HAS_ARCH_FEATURE (FMA4_Usable) ? __cos_fma4 :
- HAS_ARCH_FEATURE (AVX_Usable)
- ? __cos_avx : __cos_sse2));
-weak_alias (__cos, cos)
-
-libm_ifunc (__sin, (HAS_ARCH_FEATURE (FMA4_Usable) ? __sin_fma4 :
- HAS_ARCH_FEATURE (AVX_Usable)
- ? __sin_avx : __sin_sse2));
-weak_alias (__sin, sin)
+/* Multiple versions of sin and cos.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
-#define __cos __cos_sse2
-#define __sin __sin_sse2
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <libm-alias-double.h>
+
+extern double __redirect_sin (double);
+extern double __redirect_cos (double);
+#define SYMBOL_NAME sin
+#include "ifunc-avx-fma4.h"
+libc_ifunc_redirected (__redirect_sin, __sin, IFUNC_SELECTOR ());
+libm_alias_double (__sin, sin)
+
+#undef SYMBOL_NAME
+#define SYMBOL_NAME cos
+#include "ifunc-avx-fma4.h"
+
+libc_ifunc_redirected (__redirect_cos, __cos, IFUNC_SELECTOR ());
+libm_alias_double (__cos, cos)
+
+#define __cos __cos_sse2
+#define __sin __sin_sse2
#include <sysdeps/ieee754/dbl-64/s_sin.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c b/sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c
new file mode 100644
index 0000000000..64abe7abca
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c
@@ -0,0 +1,240 @@
+/* Compute sine and cosine of argument optimized with vector.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <math.h>
+#include <math_private.h>
+#include <x86intrin.h>
+#include <libm-alias-float.h>
+#include "s_sincosf.h"
+
+#define SINCOSF __sincosf_fma
+
+#ifndef SINCOSF
+# define SINCOSF_FUNC __sincosf
+#else
+# define SINCOSF_FUNC SINCOSF
+#endif
+
+/* Chebyshev constants for sin and cos, range -PI/4 - PI/4. */
+static const __v2df V0 = { -0x1.5555555551cd9p-3, -0x1.ffffffffe98aep-2};
+static const __v2df V1 = { 0x1.1111110c2688bp-7, 0x1.55555545c50c7p-5 };
+static const __v2df V2 = { -0x1.a019f8b4bd1f9p-13, -0x1.6c16b348b6874p-10 };
+static const __v2df V3 = { 0x1.71d7264e6b5b4p-19, 0x1.a00eb9ac43ccp-16 };
+static const __v2df V4 = { -0x1.a947e1674b58ap-26, -0x1.23c97dd8844d7p-22 };
+
+/* Chebyshev constants for sin and cos, range 2^-27 - 2^-5. */
+static const __v2df VC0 = { -0x1.555555543d49dp-3, -0x1.fffffff5cc6fdp-2 };
+static const __v2df VC1 = { 0x1.110f475cec8c5p-7, 0x1.55514b178dac5p-5 };
+
+static const __v2df v2ones = { 1.0, 1.0 };
+
+/* Compute the sine and cosine values using Chebyshev polynomials where
+ THETA is the range reduced absolute value of the input
+ and it is less than Pi/4,
+ N is calculated as trunc(|x|/(Pi/4)) + 1 and it is used to decide
+ whether a sine or cosine approximation is more accurate and
+ SIGNBIT is used to add the correct sign after the Chebyshev
+ polynomial is computed. */
+static void
+reduced_sincos (const double theta, const unsigned int n,
+ const unsigned int signbit, float *sinx, float *cosx)
+{
+ __v2df v2x, v2sx, v2cx;
+ const __v2df v2theta = { theta, theta };
+ const __v2df v2theta2 = v2theta * v2theta;
+ /* Here sinf() and cosf() are calculated using sin Chebyshev polynomial:
+ x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))). */
+ v2x = V3 + v2theta2 * V4; /* S3+x^2*S4. */
+ v2x = V2 + v2theta2 * v2x; /* S2+x^2*(S3+x^2*S4). */
+ v2x = V1 + v2theta2 * v2x; /* S1+x^2*(S2+x^2*(S3+x^2*S4)). */
+ v2x = V0 + v2theta2 * v2x; /* S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4))). */
+ v2x = v2theta2 * v2x;
+ v2cx = v2ones + v2x;
+ v2sx = v2theta + v2theta * v2x;
+ /* We are operating on |x|, so we need to add back the original
+ signbit for sinf. */
+ /* Determine positive or negative primary interval. */
+ /* Are we in the primary interval of sin or cos? */
+ if ((n & 2) == 0)
+ {
+ const __v2df v2sign =
+ {
+ ones[((n >> 2) & 1) ^ signbit],
+ ones[((n + 2) >> 2) & 1]
+ };
+ v2cx[0] = v2sx[0];
+ v2cx *= v2sign;
+ __v4sf v4sx = _mm_cvtpd_ps (v2cx);
+ *sinx = v4sx[0];
+ *cosx = v4sx[1];
+ }
+ else
+ {
+ const __v2df v2sign =
+ {
+ ones[((n + 2) >> 2) & 1],
+ ones[((n >> 2) & 1) ^ signbit]
+ };
+ v2cx[0] = v2sx[0];
+ v2cx *= v2sign;
+ __v4sf v4sx = _mm_cvtpd_ps (v2cx);
+ *sinx = v4sx[1];
+ *cosx = v4sx[0];
+ }
+}
+
+void
+SINCOSF_FUNC (float x, float *sinx, float *cosx)
+{
+ double theta = x;
+ double abstheta = fabs (theta);
+ uint32_t ix, xi;
+ GET_FLOAT_WORD (xi, x);
+ /* |x| */
+ ix = xi & 0x7fffffff;
+ /* If |x|< Pi/4. */
+ if (ix < 0x3f490fdb)
+ {
+ if (ix >= 0x3d000000) /* |x| >= 2^-5. */
+ {
+ __v2df v2x, v2sx, v2cx;
+ const __v2df v2theta = { theta, theta };
+ const __v2df v2theta2 = v2theta * v2theta;
+ /* Chebyshev polynomial of the form for sin and cos. */
+ v2x = V3 + v2theta2 * V4;
+ v2x = V2 + v2theta2 * v2x;
+ v2x = V1 + v2theta2 * v2x;
+ v2x = V0 + v2theta2 * v2x;
+ v2x = v2theta2 * v2x;
+ v2cx = v2ones + v2x;
+ v2sx = v2theta + v2theta * v2x;
+ v2cx[0] = v2sx[0];
+ __v4sf v4sx = _mm_cvtpd_ps (v2cx);
+ *sinx = v4sx[0];
+ *cosx = v4sx[1];
+ }
+ else if (ix >= 0x32000000) /* |x| >= 2^-27. */
+ {
+ /* A simpler Chebyshev approximation is close enough for this range:
+ for sin: x+x^3*(SS0+x^2*SS1)
+ for cos: 1.0+x^2*(CC0+x^3*CC1). */
+ __v2df v2x, v2sx, v2cx;
+ const __v2df v2theta = { theta, theta };
+ const __v2df v2theta2 = v2theta * v2theta;
+ v2x = VC0 + v2theta * v2theta2 * VC1;
+ v2x = v2theta2 * v2x;
+ v2cx = v2ones + v2x;
+ v2sx = v2theta + v2theta * v2x;
+ v2cx[0] = v2sx[0];
+ __v4sf v4sx = _mm_cvtpd_ps (v2cx);
+ *sinx = v4sx[0];
+ *cosx = v4sx[1];
+ }
+ else
+ {
+ /* Handle some special cases. */
+ if (ix)
+ *sinx = theta - (theta * SMALL);
+ else
+ *sinx = theta;
+ *cosx = 1.0 - abstheta;
+ }
+ }
+ else /* |x| >= Pi/4. */
+ {
+ unsigned int signbit = xi >> 31;
+ if (ix < 0x40e231d6) /* |x| < 9*Pi/4. */
+ {
+ /* There are cases where FE_UPWARD rounding mode can
+ produce a result of abstheta * inv_PI_4 == 9,
+ where abstheta < 9pi/4, so the domain for
+ pio2_table must go to 5 (9 / 2 + 1). */
+ unsigned int n = (abstheta * inv_PI_4) + 1;
+ theta = abstheta - pio2_table[n / 2];
+ reduced_sincos (theta, n, signbit, sinx, cosx);
+ }
+ else if (ix < 0x7f800000)
+ {
+ if (ix < 0x4b000000) /* |x| < 2^23. */
+ {
+ unsigned int n = ((unsigned int) (abstheta * inv_PI_4)) + 1;
+ double x = n / 2;
+ theta = (abstheta - x * PI_2_hi) - x * PI_2_lo;
+ /* Argument reduction needed. */
+ reduced_sincos (theta, n, signbit, sinx, cosx);
+ }
+ else /* |x| >= 2^23. */
+ {
+ x = fabsf (x);
+ int exponent
+ = (ix >> FLOAT_EXPONENT_SHIFT) - FLOAT_EXPONENT_BIAS;
+ exponent += 3;
+ exponent /= 28;
+ double a = invpio4_table[exponent] * x;
+ double b = invpio4_table[exponent + 1] * x;
+ double c = invpio4_table[exponent + 2] * x;
+ double d = invpio4_table[exponent + 3] * x;
+ uint64_t l = a;
+ l &= ~0x7;
+ a -= l;
+ double e = a + b;
+ l = e;
+ e = a - l;
+ if (l & 1)
+ {
+ e -= 1.0;
+ e += b;
+ e += c;
+ e += d;
+ e *= M_PI_4;
+ reduced_sincos (e, l + 1, signbit, sinx, cosx);
+ }
+ else
+ {
+ e += b;
+ e += c;
+ e += d;
+ if (e <= 1.0)
+ {
+ e *= M_PI_4;
+ reduced_sincos (e, l + 1, signbit, sinx, cosx);
+ }
+ else
+ {
+ l++;
+ e -= 2.0;
+ e *= M_PI_4;
+ reduced_sincos (e, l + 1, signbit, sinx, cosx);
+ }
+ }
+ }
+ }
+ else
+ {
+ if (ix == 0x7f800000)
+ __set_errno (EDOM);
+ /* sin/cos(Inf or NaN) is NaN. */
+ *sinx = *cosx = x - x;
+ }
+ }
+}
+
+#ifndef SINCOSF
+libm_alias_float (__sincos, sincos)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincosf-sse2.S b/sysdeps/x86_64/fpu/multiarch/s_sincosf-sse2.S
new file mode 100644
index 0000000000..51d012bb12
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_sincosf-sse2.S
@@ -0,0 +1,2 @@
+#define __sincosf __sincosf_sse2
+#include <sysdeps/x86_64/fpu/s_sincosf.S>
diff --git a/sysdeps/x86_64/fpu/test-float-vlen4.c b/sysdeps/x86_64/fpu/multiarch/s_sincosf.c
index f6a4cf5c1e..6cb4295558 100644
--- a/sysdeps/x86_64/fpu/test-float-vlen4.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_sincosf.c
@@ -1,5 +1,5 @@
-/* Tests for SSE ISA versions of vector math functions.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+/* Multiple versions of sincosf.
+ Copyright (C) 2017 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,13 +16,13 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include "test-float-vlen4.h"
+#include <libm-alias-float.h>
-#define TEST_VECTOR_cosf 1
-#define TEST_VECTOR_sinf 1
-#define TEST_VECTOR_sincosf 1
-#define TEST_VECTOR_logf 1
-#define TEST_VECTOR_expf 1
-#define TEST_VECTOR_powf 1
+extern void __redirect_sincosf (float, float *, float *);
-#include "libm-test.c"
+#define SYMBOL_NAME sincosf
+#include "ifunc-fma.h"
+
+libc_ifunc_redirected (__redirect_sincosf, __sincosf, IFUNC_SELECTOR ());
+
+libm_alias_float (__sincos, sincos)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sinf-fma.c b/sysdeps/x86_64/fpu/multiarch/s_sinf-fma.c
new file mode 100644
index 0000000000..34440ebf4a
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_sinf-fma.c
@@ -0,0 +1,2 @@
+#define SINF __sinf_fma
+#include <sysdeps/ieee754/flt-32/s_sinf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sinf-sse2.c b/sysdeps/x86_64/fpu/multiarch/s_sinf-sse2.c
new file mode 100644
index 0000000000..74e32c98db
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_sinf-sse2.c
@@ -0,0 +1,2 @@
+#define SINF __sinf_sse2
+#include <sysdeps/ieee754/flt-32/s_sinf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sinf.c b/sysdeps/x86_64/fpu/multiarch/s_sinf.c
new file mode 100644
index 0000000000..4fdfbd8d3e
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_sinf.c
@@ -0,0 +1,28 @@
+/* Multiple versions of sinf.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <libm-alias-float.h>
+
+extern float __redirect_sinf (float);
+
+#define SYMBOL_NAME sinf
+#include "ifunc-fma.h"
+
+libc_ifunc_redirected (__redirect_sinf, __sinf, IFUNC_SELECTOR ());
+
+libm_alias_float (__sin, sin)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan-avx.c b/sysdeps/x86_64/fpu/multiarch/s_tan-avx.c
index 53de5d3c98..5ee29a9a06 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_tan-avx.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_tan-avx.c
@@ -1,4 +1,4 @@
-#define tan __tan_avx
+#define __tan __tan_avx
#define __dbl_mp __dbl_mp_avx
#define __sub __sub_avx
#define SECTION __attribute__ ((section (".text.avx")))
diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan-fma.c b/sysdeps/x86_64/fpu/multiarch/s_tan-fma.c
new file mode 100644
index 0000000000..1a1b9d2490
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_tan-fma.c
@@ -0,0 +1,8 @@
+#define __tan __tan_fma
+#define __dbl_mp __dbl_mp_fma
+#define __mpranred __mpranred_fma
+#define __mptan __mptan_fma
+#define __sub __sub_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/s_tan.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c b/sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c
index a805440b46..e4e9f6cb85 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c
@@ -1,4 +1,4 @@
-#define tan __tan_fma4
+#define __tan __tan_fma4
#define __dbl_mp __dbl_mp_fma4
#define __mpranred __mpranred_fma4
#define __mptan __mptan_fma4
diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan.c b/sysdeps/x86_64/fpu/multiarch/s_tan.c
index 25f3bca07e..bb75d8d0bc 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_tan.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_tan.c
@@ -1,15 +1,30 @@
-#include <init-arch.h>
-#include <math.h>
+/* Multiple versions of tan.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
-extern double __tan_sse2 (double);
-extern double __tan_avx (double);
-extern double __tan_fma4 (double);
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
-libm_ifunc (tan, (HAS_ARCH_FEATURE (FMA4_Usable) ? __tan_fma4 :
- HAS_ARCH_FEATURE (AVX_Usable)
- ? __tan_avx : __tan_sse2));
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
-#define tan __tan_sse2
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+#include <libm-alias-double.h>
+extern double __redirect_tan (double);
+
+#define SYMBOL_NAME tan
+#include "ifunc-avx-fma4.h"
+
+libc_ifunc_redirected (__redirect_tan, __tan, IFUNC_SELECTOR ());
+libm_alias_double (__tan, tan)
+
+#define __tan __tan_sse2
#include <sysdeps/ieee754/dbl-64/s_tan.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc-c.c b/sysdeps/x86_64/fpu/multiarch/s_trunc-c.c
new file mode 100644
index 0000000000..6204ae3c77
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-c.c
@@ -0,0 +1,2 @@
+#define __trunc __trunc_c
+#include <sysdeps/ieee754/dbl-64/wordsize-64/s_trunc.c>
diff --git a/sysdeps/x86_64/multiarch/mempcpy-avx-unaligned.S b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S
index 241378e770..b8046bfa0c 100644
--- a/sysdeps/x86_64/multiarch/mempcpy-avx-unaligned.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S
@@ -1,5 +1,5 @@
-/* mempcpy with AVX
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+/* trunc for SSE4.1.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,7 +16,10 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#define USE_AS_MEMPCPY
-#define MEMCPY __mempcpy_avx_unaligned
-#define MEMCPY_CHK __mempcpy_chk_avx_unaligned
-#include "memcpy-avx-unaligned.S"
+#include <sysdep.h>
+
+ .section .text.sse4.1,"ax",@progbits
+ENTRY(__trunc_sse41)
+ roundsd $11, %xmm0, %xmm0
+ ret
+END(__trunc_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc.c b/sysdeps/x86_64/fpu/multiarch/s_trunc.c
new file mode 100644
index 0000000000..a1b0c60630
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_trunc.c
@@ -0,0 +1,31 @@
+/* Multiple versions of __trunc.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <libm-alias-double.h>
+
+#define trunc __redirect_trunc
+#define __trunc __redirect___trunc
+#include <math.h>
+#undef trunc
+#undef __trunc
+
+#define SYMBOL_NAME trunc
+#include "ifunc-sse4_1.h"
+
+libc_ifunc_redirected (__redirect_trunc, __trunc, IFUNC_SELECTOR ());
+libm_alias_double (__trunc, trunc)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf-c.c b/sysdeps/x86_64/fpu/multiarch/s_truncf-c.c
new file mode 100644
index 0000000000..7a5ac7da1f
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-c.c
@@ -0,0 +1,2 @@
+#define __truncf __truncf_c
+#include <sysdeps/ieee754/flt-32/s_truncf.c>
diff --git a/sysdeps/x86_64/multiarch/memmove-avx-unaligned.S b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S
index 75e35f2957..2dabc0be57 100644
--- a/sysdeps/x86_64/multiarch/memmove-avx-unaligned.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S
@@ -1,5 +1,5 @@
-/* memmove with AVX
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+/* truncf for SSE4.1.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,7 +16,10 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#define USE_AS_MEMMOVE
-#define MEMCPY __memmove_avx_unaligned
-#define MEMCPY_CHK __memmove_chk_avx_unaligned
-#include "memcpy-avx-unaligned.S"
+#include <sysdep.h>
+
+ .section .text.sse4.1,"ax",@progbits
+ENTRY(__truncf_sse41)
+ roundss $11, %xmm0, %xmm0
+ ret
+END(__truncf_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf.c b/sysdeps/x86_64/fpu/multiarch/s_truncf.c
new file mode 100644
index 0000000000..a7e220bd0c
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_truncf.c
@@ -0,0 +1,31 @@
+/* Multiple versions of __truncf.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <libm-alias-float.h>
+
+#define truncf __redirect_truncf
+#define __truncf __redirect___truncf
+#include <math.h>
+#undef truncf
+#undef __truncf
+
+#define SYMBOL_NAME truncf
+#include "ifunc-sse4_1.h"
+
+libc_ifunc_redirected (__redirect_truncf, __truncf, IFUNC_SELECTOR ());
+libm_alias_float (__trunc, trunc)
diff --git a/sysdeps/x86_64/fpu/multiarch/sincos32-fma.c b/sysdeps/x86_64/fpu/multiarch/sincos32-fma.c
new file mode 100644
index 0000000000..dcd44bc5e8
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/sincos32-fma.c
@@ -0,0 +1,15 @@
+#define __cos32 __cos32_fma
+#define __sin32 __sin32_fma
+#define __c32 __c32_fma
+#define __mpsin __mpsin_fma
+#define __mpsin1 __mpsin1_fma
+#define __mpcos __mpcos_fma
+#define __mpcos1 __mpcos1_fma
+#define __mpranred __mpranred_fma
+#define __add __add_fma
+#define __dbl_mp __dbl_mp_fma
+#define __mul __mul_fma
+#define __sub __sub_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/sincos32.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/slowexp-avx.c b/sysdeps/x86_64/fpu/multiarch/slowexp-avx.c
deleted file mode 100644
index d01c6d71a4..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/slowexp-avx.c
+++ /dev/null
@@ -1,9 +0,0 @@
-#define __slowexp __slowexp_avx
-#define __add __add_avx
-#define __dbl_mp __dbl_mp_avx
-#define __mpexp __mpexp_avx
-#define __mul __mul_avx
-#define __sub __sub_avx
-#define SECTION __attribute__ ((section (".text.avx")))
-
-#include <sysdeps/ieee754/dbl-64/slowexp.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c b/sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c
deleted file mode 100644
index 3bcde84233..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c
+++ /dev/null
@@ -1,9 +0,0 @@
-#define __slowexp __slowexp_fma4
-#define __add __add_fma4
-#define __dbl_mp __dbl_mp_fma4
-#define __mpexp __mpexp_fma4
-#define __mul __mul_fma4
-#define __sub __sub_fma4
-#define SECTION __attribute__ ((section (".text.fma4")))
-
-#include <sysdeps/ieee754/dbl-64/slowexp.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c b/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c
deleted file mode 100644
index 69d69823bb..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c
+++ /dev/null
@@ -1,11 +0,0 @@
-#define __slowpow __slowpow_fma4
-#define __add __add_fma4
-#define __dbl_mp __dbl_mp_fma4
-#define __mpexp __mpexp_fma4
-#define __mplog __mplog_fma4
-#define __mul __mul_fma4
-#define __sub __sub_fma4
-#define __halfulp __halfulp_fma4
-#define SECTION __attribute__ ((section (".text.fma4")))
-
-#include <sysdeps/ieee754/dbl-64/slowpow.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core-sse2.S
new file mode 100644
index 0000000000..a85729807f
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core-sse2.S
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized cos, vector length is 2.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVbN2v_cos _ZGVbN2v_cos_sse2
+#include "../svml_d_cos2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.S
deleted file mode 100644
index 7d720e2fcb..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized cos, vector length is 2.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVbN2v_cos)
- .type _ZGVbN2v_cos, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVbN2v_cos_sse4(%rip), %rax
- HAS_CPU_FEATURE (SSE4_1)
- jz 2f
- ret
-2: leaq _ZGVbN2v_cos_sse2(%rip), %rax
- ret
-END (_ZGVbN2v_cos)
-libmvec_hidden_def (_ZGVbN2v_cos)
-
-#define _ZGVbN2v_cos _ZGVbN2v_cos_sse2
-#include "../svml_d_cos2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.c
new file mode 100644
index 0000000000..3ff39eecd7
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.c
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized cos, vector length is 2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVbN2v_cos
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN2v_cos, __GI__ZGVbN2v_cos, __redirect__ZGVbN2v_cos)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S
index 088fcae067..10be76e207 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S
@@ -1,5 +1,5 @@
/* Function cos vectorized with SSE4.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -205,7 +205,7 @@ ENTRY (_ZGVbN2v_cos_sse4)
shlq $4, %r15
movsd 200(%rsp,%r15), %xmm0
- call cos@PLT
+ call JUMPTARGET(cos)
movsd %xmm0, 264(%rsp,%r15)
jmp .LBL_1_8
@@ -215,7 +215,7 @@ ENTRY (_ZGVbN2v_cos_sse4)
shlq $4, %r15
movsd 192(%rsp,%r15), %xmm0
- call cos@PLT
+ call JUMPTARGET(cos)
movsd %xmm0, 256(%rsp,%r15)
jmp .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core-sse.S
new file mode 100644
index 0000000000..9f406ea7c9
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core-sse.S
@@ -0,0 +1,20 @@
+/* SSE version of vectorized cos, vector length is 4.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVdN4v_cos _ZGVdN4v_cos_sse_wrapper
+#include "../svml_d_cos4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.S
deleted file mode 100644
index 65a3570d2e..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized cos, vector length is 4.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVdN4v_cos)
- .type _ZGVdN4v_cos, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVdN4v_cos_avx2(%rip), %rax
- HAS_ARCH_FEATURE (AVX2_Usable)
- jz 2f
- ret
-2: leaq _ZGVdN4v_cos_sse_wrapper(%rip), %rax
- ret
-END (_ZGVdN4v_cos)
-libmvec_hidden_def (_ZGVdN4v_cos)
-
-#define _ZGVdN4v_cos _ZGVdN4v_cos_sse_wrapper
-#include "../svml_d_cos4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.c
new file mode 100644
index 0000000000..cb8405201a
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.c
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized cos, vector length is 4.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVdN4v_cos
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN4v_cos, __GI__ZGVdN4v_cos, __redirect__ZGVdN4v_cos)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S
index 4e653216d9..38cdc6bb03 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S
@@ -1,5 +1,5 @@
/* Function cos vectorized with AVX2.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -188,7 +188,7 @@ ENTRY (_ZGVdN4v_cos_avx2)
vmovsd 328(%rsp,%r15), %xmm0
vzeroupper
- call cos@PLT
+ call JUMPTARGET(cos)
vmovsd %xmm0, 392(%rsp,%r15)
jmp .LBL_1_8
@@ -199,7 +199,7 @@ ENTRY (_ZGVdN4v_cos_avx2)
vmovsd 320(%rsp,%r15), %xmm0
vzeroupper
- call cos@PLT
+ call JUMPTARGET(cos)
vmovsd %xmm0, 384(%rsp,%r15)
jmp .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core-avx2.S
new file mode 100644
index 0000000000..081baeeff5
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core-avx2.S
@@ -0,0 +1,20 @@
+/* AVX2 version of vectorized cos, vector length is 8.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVeN8v_cos _ZGVeN8v_cos_avx2_wrapper
+#include "../svml_d_cos8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S
deleted file mode 100644
index 3e7f16d44e..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Multiple versions of vectorized cos, vector length is 8.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVeN8v_cos)
- .type _ZGVeN8v_cos, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
-1: leaq _ZGVeN8v_cos_skx(%rip), %rax
- HAS_ARCH_FEATURE (AVX512DQ_Usable)
- jnz 2f
- leaq _ZGVeN8v_cos_knl(%rip), %rax
- HAS_ARCH_FEATURE (AVX512F_Usable)
- jnz 2f
- leaq _ZGVeN8v_cos_avx2_wrapper(%rip), %rax
-2: ret
-END (_ZGVeN8v_cos)
-
-#define _ZGVeN8v_cos _ZGVeN8v_cos_avx2_wrapper
-#include "../svml_d_cos8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.c
new file mode 100644
index 0000000000..4aa12595bc
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.c
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized cos, vector length is 8.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVeN8v_cos
+#include "ifunc-mathvec-avx512.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN8v_cos, __GI__ZGVeN8v_cos, __redirect__ZGVeN8v_cos)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S
index 1cac1d827a..24e3b36357 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S
@@ -1,5 +1,5 @@
/* Function cos vectorized with AVX-512, KNL and SKX versions.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,7 +22,7 @@
.text
ENTRY (_ZGVeN8v_cos_knl)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
WRAPPER_IMPL_AVX512 _ZGVdN4v_cos
#else
/*
@@ -221,7 +221,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_cos
movzbl %r12b, %r15d
shlq $4, %r15
vmovsd 1160(%rsp,%r15), %xmm0
- call cos@PLT
+ call JUMPTARGET(cos)
vmovsd %xmm0, 1224(%rsp,%r15)
jmp .LBL_1_8
@@ -229,14 +229,14 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_cos
movzbl %r12b, %r15d
shlq $4, %r15
vmovsd 1152(%rsp,%r15), %xmm0
- call cos@PLT
+ call JUMPTARGET(cos)
vmovsd %xmm0, 1216(%rsp,%r15)
jmp .LBL_1_7
#endif
END (_ZGVeN8v_cos_knl)
ENTRY (_ZGVeN8v_cos_skx)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
WRAPPER_IMPL_AVX512 _ZGVdN4v_cos
#else
/*
@@ -438,7 +438,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_cos
vzeroupper
vmovsd 1160(%rsp,%r15), %xmm0
- call cos@PLT
+ call JUMPTARGET(cos)
vmovsd %xmm0, 1224(%rsp,%r15)
jmp .LBL_2_8
@@ -450,7 +450,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_cos
vzeroupper
vmovsd 1152(%rsp,%r15), %xmm0
- call cos@PLT
+ call JUMPTARGET(cos)
vmovsd %xmm0, 1216(%rsp,%r15)
jmp .LBL_2_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core-sse2.S
new file mode 100644
index 0000000000..3591eb1f19
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core-sse2.S
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized exp.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVbN2v_exp _ZGVbN2v_exp_sse2
+#include "../svml_d_exp2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.S
deleted file mode 100644
index 136c67a550..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized exp.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVbN2v_exp)
- .type _ZGVbN2v_exp, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVbN2v_exp_sse4(%rip), %rax
- HAS_CPU_FEATURE (SSE4_1)
- jz 2f
- ret
-2: leaq _ZGVbN2v_exp_sse2(%rip), %rax
- ret
-END (_ZGVbN2v_exp)
-libmvec_hidden_def (_ZGVbN2v_exp)
-
-#define _ZGVbN2v_exp _ZGVbN2v_exp_sse2
-#include "../svml_d_exp2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.c
new file mode 100644
index 0000000000..2cfe8937c9
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.c
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized exp, vector length is 2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVbN2v_exp
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN2v_exp, __GI__ZGVbN2v_exp, __redirect__ZGVbN2v_exp)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S
index 445b230152..e98d11b311 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S
@@ -1,5 +1,5 @@
/* Function exp vectorized with SSE4.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -207,7 +207,7 @@ ENTRY (_ZGVbN2v_exp_sse4)
shlq $4, %r15
movsd 200(%rsp,%r15), %xmm0
- call exp@PLT
+ call JUMPTARGET(__exp_finite)
movsd %xmm0, 264(%rsp,%r15)
jmp .LBL_1_8
@@ -217,7 +217,7 @@ ENTRY (_ZGVbN2v_exp_sse4)
shlq $4, %r15
movsd 192(%rsp,%r15), %xmm0
- call exp@PLT
+ call JUMPTARGET(__exp_finite)
movsd %xmm0, 256(%rsp,%r15)
jmp .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core-sse.S
new file mode 100644
index 0000000000..f8e0b5517a
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core-sse.S
@@ -0,0 +1,20 @@
+/* SSE version of vectorized exp.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVdN4v_exp _ZGVdN4v_exp_sse_wrapper
+#include "../svml_d_exp4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.S
deleted file mode 100644
index 9d6a47be0a..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized exp.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVdN4v_exp)
- .type _ZGVdN4v_exp, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVdN4v_exp_avx2(%rip), %rax
- HAS_ARCH_FEATURE (AVX2_Usable)
- jz 2f
- ret
-2: leaq _ZGVdN4v_exp_sse_wrapper(%rip), %rax
- ret
-END (_ZGVdN4v_exp)
-libmvec_hidden_def (_ZGVdN4v_exp)
-
-#define _ZGVdN4v_exp _ZGVdN4v_exp_sse_wrapper
-#include "../svml_d_exp4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.c
new file mode 100644
index 0000000000..59bb36984a
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.c
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized exp, vector length is 4.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVdN4v_exp
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN4v_exp, __GI__ZGVdN4v_exp, __redirect__ZGVdN4v_exp)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S
index 25f9e28941..87990f8ad7 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S
@@ -1,5 +1,5 @@
/* Function exp vectorized with AVX2.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -193,7 +193,7 @@ ENTRY (_ZGVdN4v_exp_avx2)
vmovsd 328(%rsp,%r15), %xmm0
vzeroupper
- call exp@PLT
+ call JUMPTARGET(__exp_finite)
vmovsd %xmm0, 392(%rsp,%r15)
jmp .LBL_1_8
@@ -204,7 +204,7 @@ ENTRY (_ZGVdN4v_exp_avx2)
vmovsd 320(%rsp,%r15), %xmm0
vzeroupper
- call exp@PLT
+ call JUMPTARGET(__exp_finite)
vmovsd %xmm0, 384(%rsp,%r15)
jmp .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core-avx2.S
new file mode 100644
index 0000000000..b1d3cad0e1
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core-avx2.S
@@ -0,0 +1,20 @@
+/* AVX2 version of vectorized exp.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVeN8v_exp _ZGVeN8v_exp_avx2_wrapper
+#include "../svml_d_exp8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S
deleted file mode 100644
index 317ee36e61..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Multiple versions of vectorized exp.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVeN8v_exp)
- .type _ZGVeN8v_exp, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVeN8v_exp_skx(%rip), %rax
- HAS_ARCH_FEATURE (AVX512DQ_Usable)
- jnz 2f
- leaq _ZGVeN8v_exp_knl(%rip), %rax
- HAS_ARCH_FEATURE (AVX512F_Usable)
- jnz 2f
- leaq _ZGVeN8v_exp_avx2_wrapper(%rip), %rax
-2: ret
-END (_ZGVeN8v_exp)
-
-#define _ZGVeN8v_exp _ZGVeN8v_exp_avx2_wrapper
-#include "../svml_d_exp8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.c
new file mode 100644
index 0000000000..cfdc96ec86
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.c
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized exp, vector length is 8.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVeN8v_exp
+#include "ifunc-mathvec-avx512.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN8v_exp, __GI__ZGVeN8v_exp, __redirect__ZGVeN8v_exp)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S
index 74f1d2ce7b..8dd8a03e4b 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S
@@ -1,5 +1,5 @@
/* Function exp vectorized with AVX-512. KNL and SKX versions.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,7 +22,7 @@
.text
ENTRY (_ZGVeN8v_exp_knl)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
WRAPPER_IMPL_AVX512 _ZGVdN4v_exp
#else
/*
@@ -223,7 +223,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_exp
movzbl %r12b, %r15d
shlq $4, %r15
vmovsd 1160(%rsp,%r15), %xmm0
- call exp@PLT
+ call JUMPTARGET(__exp_finite)
vmovsd %xmm0, 1224(%rsp,%r15)
jmp .LBL_1_8
@@ -231,14 +231,14 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_exp
movzbl %r12b, %r15d
shlq $4, %r15
vmovsd 1152(%rsp,%r15), %xmm0
- call exp@PLT
+ call JUMPTARGET(__exp_finite)
vmovsd %xmm0, 1216(%rsp,%r15)
jmp .LBL_1_7
#endif
END (_ZGVeN8v_exp_knl)
ENTRY (_ZGVeN8v_exp_skx)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
WRAPPER_IMPL_AVX512 _ZGVdN4v_exp
#else
/*
@@ -438,7 +438,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_exp
vmovsd 1160(%rsp,%r15), %xmm0
vzeroupper
vmovsd 1160(%rsp,%r15), %xmm0
- call exp@PLT
+ call JUMPTARGET(__exp_finite)
vmovsd %xmm0, 1224(%rsp,%r15)
jmp .LBL_2_8
@@ -448,7 +448,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_exp
vmovsd 1152(%rsp,%r15), %xmm0
vzeroupper
vmovsd 1152(%rsp,%r15), %xmm0
- call exp@PLT
+ call JUMPTARGET(__exp_finite)
vmovsd %xmm0, 1216(%rsp,%r15)
jmp .LBL_2_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core-sse2.S
new file mode 100644
index 0000000000..761a1a537d
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core-sse2.S
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized log.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVbN2v_log _ZGVbN2v_log_sse2
+#include "../svml_d_log2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S
deleted file mode 100644
index 03d86a3e63..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized log.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVbN2v_log)
- .type _ZGVbN2v_log, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVbN2v_log_sse4(%rip), %rax
- HAS_CPU_FEATURE (SSE4_1)
- jz 2f
- ret
-2: leaq _ZGVbN2v_log_sse2(%rip), %rax
- ret
-END (_ZGVbN2v_log)
-libmvec_hidden_def (_ZGVbN2v_log)
-
-#define _ZGVbN2v_log _ZGVbN2v_log_sse2
-#include "../svml_d_log2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.c
new file mode 100644
index 0000000000..c24437a3be
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.c
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized log, vector length is 2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVbN2v_log
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN2v_log, __GI__ZGVbN2v_log, __redirect__ZGVbN2v_log)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S
index 5d254288f6..eb854c68d6 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S
@@ -1,5 +1,5 @@
/* Function log vectorized with SSE4.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -211,7 +211,7 @@ ENTRY (_ZGVbN2v_log_sse4)
shlq $4, %r15
movsd 200(%rsp,%r15), %xmm0
- call log@PLT
+ call JUMPTARGET(__log_finite)
movsd %xmm0, 264(%rsp,%r15)
jmp .LBL_1_8
@@ -221,7 +221,7 @@ ENTRY (_ZGVbN2v_log_sse4)
shlq $4, %r15
movsd 192(%rsp,%r15), %xmm0
- call log@PLT
+ call JUMPTARGET(__log_finite)
movsd %xmm0, 256(%rsp,%r15)
jmp .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core-sse.S
new file mode 100644
index 0000000000..2460512f78
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core-sse.S
@@ -0,0 +1,20 @@
+/* SSE version of vectorized log.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVdN4v_log _ZGVdN4v_log_sse_wrapper
+#include "../svml_d_log4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S
deleted file mode 100644
index 9f6ddbef15..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized log.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVdN4v_log)
- .type _ZGVdN4v_log, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVdN4v_log_avx2(%rip), %rax
- HAS_ARCH_FEATURE (AVX2_Usable)
- jz 2f
- ret
-2: leaq _ZGVdN4v_log_sse_wrapper(%rip), %rax
- ret
-END (_ZGVdN4v_log)
-libmvec_hidden_def (_ZGVdN4v_log)
-
-#define _ZGVdN4v_log _ZGVdN4v_log_sse_wrapper
-#include "../svml_d_log4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.c
new file mode 100644
index 0000000000..5751370d65
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.c
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized log, vector length is 4.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVdN4v_log
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN4v_log, __GI__ZGVdN4v_log, __redirect__ZGVdN4v_log)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S
index 5da298747d..81515850e1 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S
@@ -1,5 +1,5 @@
/* Function log vectorized with AVX2.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -191,7 +191,7 @@ ENTRY (_ZGVdN4v_log_avx2)
vmovsd 328(%rsp,%r15), %xmm0
vzeroupper
- call log@PLT
+ call JUMPTARGET(__log_finite)
vmovsd %xmm0, 392(%rsp,%r15)
jmp .LBL_1_8
@@ -202,7 +202,7 @@ ENTRY (_ZGVdN4v_log_avx2)
vmovsd 320(%rsp,%r15), %xmm0
vzeroupper
- call log@PLT
+ call JUMPTARGET(__log_finite)
vmovsd %xmm0, 384(%rsp,%r15)
jmp .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core-avx2.S
new file mode 100644
index 0000000000..ecfbeafb23
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core-avx2.S
@@ -0,0 +1,20 @@
+/* AVX2 version of vectorized log.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVeN8v_log _ZGVeN8v_log_avx2_wrapper
+#include "../svml_d_log8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S
deleted file mode 100644
index 2e1a1da1a5..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Multiple versions of vectorized log.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVeN8v_log)
- .type _ZGVeN8v_log, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVeN8v_log_skx(%rip), %rax
- HAS_ARCH_FEATURE (AVX512DQ_Usable)
- jnz 2f
- leaq _ZGVeN8v_log_knl(%rip), %rax
- HAS_ARCH_FEATURE (AVX512F_Usable)
- jnz 2f
- leaq _ZGVeN8v_log_avx2_wrapper(%rip), %rax
-2: ret
-END (_ZGVeN8v_log)
-
-#define _ZGVeN8v_log _ZGVeN8v_log_avx2_wrapper
-#include "../svml_d_log8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.c
new file mode 100644
index 0000000000..1e796dcfdd
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.c
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized log, vector length is 8.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVeN8v_log
+#include "ifunc-mathvec-avx512.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN8v_log, __GI__ZGVeN8v_log, __redirect__ZGVeN8v_log)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S
index dca8e61f34..ae8af8d861 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S
@@ -1,5 +1,5 @@
/* Function log vectorized with AVX-512. KNL and SKX versions.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,7 +22,7 @@
.text
ENTRY (_ZGVeN8v_log_knl)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
WRAPPER_IMPL_AVX512 _ZGVdN4v_log
#else
/*
@@ -222,7 +222,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_log
movzbl %r12b, %r15d
shlq $4, %r15
vmovsd 1160(%rsp,%r15), %xmm0
- call log@PLT
+ call JUMPTARGET(__log_finite)
vmovsd %xmm0, 1224(%rsp,%r15)
jmp .LBL_1_8
@@ -230,14 +230,14 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_log
movzbl %r12b, %r15d
shlq $4, %r15
vmovsd 1152(%rsp,%r15), %xmm0
- call log@PLT
+ call JUMPTARGET(__log_finite)
vmovsd %xmm0, 1216(%rsp,%r15)
jmp .LBL_1_7
#endif
END (_ZGVeN8v_log_knl)
ENTRY (_ZGVeN8v_log_skx)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
WRAPPER_IMPL_AVX512 _ZGVdN4v_log
#else
/*
@@ -443,7 +443,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_log
vzeroupper
vmovsd 1160(%rsp,%r15), %xmm0
- call log@PLT
+ call JUMPTARGET(__log_finite)
vmovsd %xmm0, 1224(%rsp,%r15)
jmp .LBL_2_8
@@ -455,7 +455,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_log
vzeroupper
vmovsd 1152(%rsp,%r15), %xmm0
- call log@PLT
+ call JUMPTARGET(__log_finite)
vmovsd %xmm0, 1216(%rsp,%r15)
jmp .LBL_2_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core-sse2.S
new file mode 100644
index 0000000000..2d8ad50681
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core-sse2.S
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized pow.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVbN2vv_pow _ZGVbN2vv_pow_sse2
+#include "../svml_d_pow2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.S
deleted file mode 100644
index 4a50246889..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized pow.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVbN2vv_pow)
- .type _ZGVbN2vv_pow, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVbN2vv_pow_sse4(%rip), %rax
- HAS_CPU_FEATURE (SSE4_1)
- jz 2f
- ret
-2: leaq _ZGVbN2vv_pow_sse2(%rip), %rax
- ret
-END (_ZGVbN2vv_pow)
-libmvec_hidden_def (_ZGVbN2vv_pow)
-
-#define _ZGVbN2vv_pow _ZGVbN2vv_pow_sse2
-#include "../svml_d_pow2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.c
new file mode 100644
index 0000000000..3424c0e326
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized pow, vector length is 2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVbN2vv_pow
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN2vv_pow, __GI__ZGVbN2vv_pow,
+ __redirect__ZGVbN2vv_pow)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S
index 064d170878..77828b44d5 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S
@@ -1,5 +1,5 @@
/* Function pow vectorized with SSE4.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -413,7 +413,7 @@ ENTRY (_ZGVbN2vv_pow_sse4)
movsd 72(%rsp,%r15), %xmm0
movsd 136(%rsp,%r15), %xmm1
- call pow@PLT
+ call JUMPTARGET(__pow_finite)
movsd %xmm0, 200(%rsp,%r15)
jmp .LBL_1_8
@@ -424,7 +424,7 @@ ENTRY (_ZGVbN2vv_pow_sse4)
movsd 64(%rsp,%r15), %xmm0
movsd 128(%rsp,%r15), %xmm1
- call pow@PLT
+ call JUMPTARGET(__pow_finite)
movsd %xmm0, 192(%rsp,%r15)
jmp .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core-sse.S
new file mode 100644
index 0000000000..4dcd14ff20
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core-sse.S
@@ -0,0 +1,20 @@
+/* SSE version of vectorized pow.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVdN4vv_pow _ZGVdN4vv_pow_sse_wrapper
+#include "../svml_d_pow4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.c
new file mode 100644
index 0000000000..447be39401
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized pow, vector length is 4.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVdN4vv_pow
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN4vv_pow, __GI__ZGVdN4vv_pow,
+ __redirect__ZGVdN4vv_pow)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S
index f2a73ffe1e..c43d62f202 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S
@@ -1,5 +1,5 @@
/* Function pow vectorized with AVX2.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -367,7 +367,7 @@ ENTRY (_ZGVdN4vv_pow_avx2)
vmovsd 264(%rsp,%r15), %xmm1
vzeroupper
- call pow@PLT
+ call JUMPTARGET(__pow_finite)
vmovsd %xmm0, 328(%rsp,%r15)
jmp .LBL_1_8
@@ -379,7 +379,7 @@ ENTRY (_ZGVdN4vv_pow_avx2)
vmovsd 256(%rsp,%r15), %xmm1
vzeroupper
- call pow@PLT
+ call JUMPTARGET(__pow_finite)
vmovsd %xmm0, 320(%rsp,%r15)
jmp .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core-avx2.S
new file mode 100644
index 0000000000..8acf700e76
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core-avx2.S
@@ -0,0 +1,20 @@
+/* AVX2 version of vectorized pow.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVeN8vv_pow _ZGVeN8vv_pow_avx2_wrapper
+#include "../svml_d_pow8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S
deleted file mode 100644
index 30bc53f2f7..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Multiple versions of vectorized pow.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVeN8vv_pow)
- .type _ZGVeN8vv_pow, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVeN8vv_pow_skx(%rip), %rax
- HAS_ARCH_FEATURE (AVX512DQ_Usable)
- jnz 2f
- leaq _ZGVeN8vv_pow_knl(%rip), %rax
- HAS_ARCH_FEATURE (AVX512F_Usable)
- jnz 2f
- leaq _ZGVeN8vv_pow_avx2_wrapper(%rip), %rax
-2: ret
-END (_ZGVeN8vv_pow)
-
-#define _ZGVeN8vv_pow _ZGVeN8vv_pow_avx2_wrapper
-#include "../svml_d_pow8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.c
new file mode 100644
index 0000000000..62f96965bb
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized pow, vector length is 8.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVeN8vv_pow
+#include "ifunc-mathvec-avx512.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN8vv_pow, __GI__ZGVeN8vv_pow,
+ __redirect__ZGVeN8vv_pow)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S
index 4a515233fc..a28c39b73d 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S
@@ -1,5 +1,5 @@
/* Function pow vectorized with AVX-512. KNL and SKX versions.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -82,7 +82,7 @@
.text
ENTRY (_ZGVeN8vv_pow_knl)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow
#else
pushq %rbp
@@ -392,7 +392,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow
shlq $4, %r15
vmovsd 1160(%rsp,%r15), %xmm0
vmovsd 1224(%rsp,%r15), %xmm1
- call pow@PLT
+ call JUMPTARGET(__pow_finite)
vmovsd %xmm0, 1288(%rsp,%r15)
jmp .LBL_1_8
@@ -401,7 +401,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow
shlq $4, %r15
vmovsd 1152(%rsp,%r15), %xmm0
vmovsd 1216(%rsp,%r15), %xmm1
- call pow@PLT
+ call JUMPTARGET(__pow_finite)
vmovsd %xmm0, 1280(%rsp,%r15)
jmp .LBL_1_7
@@ -409,7 +409,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow
END (_ZGVeN8vv_pow_knl)
ENTRY (_ZGVeN8vv_pow_skx)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow
#else
pushq %rbp
@@ -720,7 +720,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow
vzeroupper
vmovsd 1160(%rsp,%r15), %xmm0
- call pow@PLT
+ call JUMPTARGET(__pow_finite)
vmovsd %xmm0, 1288(%rsp,%r15)
jmp .LBL_2_8
@@ -732,7 +732,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow
vzeroupper
vmovsd 1152(%rsp,%r15), %xmm0
- call pow@PLT
+ call JUMPTARGET(__pow_finite)
vmovsd %xmm0, 1280(%rsp,%r15)
jmp .LBL_2_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core-sse2.S
new file mode 100644
index 0000000000..cb7b31aa1c
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core-sse2.S
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized sin.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVbN2v_sin _ZGVbN2v_sin_sse2
+#include "../svml_d_sin2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.S
deleted file mode 100644
index 112bec2224..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized sin.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVbN2v_sin)
- .type _ZGVbN2v_sin, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVbN2v_sin_sse4(%rip), %rax
- HAS_CPU_FEATURE (SSE4_1)
- jz 2f
- ret
-2: leaq _ZGVbN2v_sin_sse2(%rip), %rax
- ret
-END (_ZGVbN2v_sin)
-libmvec_hidden_def (_ZGVbN2v_sin)
-
-#define _ZGVbN2v_sin _ZGVbN2v_sin_sse2
-#include "../svml_d_sin2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.c
new file mode 100644
index 0000000000..1c5788f205
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.c
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized sin, vector length is 2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVbN2v_sin
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN2v_sin, __GI__ZGVbN2v_sin, __redirect__ZGVbN2v_sin)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S
index 5755ce6f74..15980e9eeb 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S
@@ -1,5 +1,5 @@
/* Function sin vectorized with SSE4.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -211,7 +211,7 @@ ENTRY (_ZGVbN2v_sin_sse4)
shlq $4, %r15
movsd 200(%rsp,%r15), %xmm0
- call sin@PLT
+ call JUMPTARGET(sin)
movsd %xmm0, 264(%rsp,%r15)
jmp .LBL_1_8
@@ -221,7 +221,7 @@ ENTRY (_ZGVbN2v_sin_sse4)
shlq $4, %r15
movsd 192(%rsp,%r15), %xmm0
- call sin@PLT
+ call JUMPTARGET(sin)
movsd %xmm0, 256(%rsp,%r15)
jmp .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core-sse.S
new file mode 100644
index 0000000000..07fae6f3b4
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core-sse.S
@@ -0,0 +1,20 @@
+/* SSE version of vectorized sin, vector length is 4.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVdN4v_sin _ZGVdN4v_sin_sse_wrapper
+#include "../svml_d_sin4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.S
deleted file mode 100644
index 700a1c629d..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized sin, vector length is 4.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVdN4v_sin)
- .type _ZGVdN4v_sin, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVdN4v_sin_avx2(%rip), %rax
- HAS_ARCH_FEATURE (AVX2_Usable)
- jz 2f
- ret
-2: leaq _ZGVdN4v_sin_sse_wrapper(%rip), %rax
- ret
-END (_ZGVdN4v_sin)
-libmvec_hidden_def (_ZGVdN4v_sin)
-
-#define _ZGVdN4v_sin _ZGVdN4v_sin_sse_wrapper
-#include "../svml_d_sin4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.c
new file mode 100644
index 0000000000..b5933914aa
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.c
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized sin, vector length is 4.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVdN4v_sin
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN4v_sin, __GI__ZGVdN4v_sin, __redirect__ZGVdN4v_sin)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S
index 46b557158a..4f0917c56d 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S
@@ -1,5 +1,5 @@
/* Function sin vectorized with AVX2.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -191,7 +191,7 @@ ENTRY (_ZGVdN4v_sin_avx2)
vmovsd 328(%rsp,%r15), %xmm0
vzeroupper
- call sin@PLT
+ call JUMPTARGET(sin)
vmovsd %xmm0, 392(%rsp,%r15)
jmp .LBL_1_8
@@ -202,7 +202,7 @@ ENTRY (_ZGVdN4v_sin_avx2)
vmovsd 320(%rsp,%r15), %xmm0
vzeroupper
- call sin@PLT
+ call JUMPTARGET(sin)
vmovsd %xmm0, 384(%rsp,%r15)
jmp .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core-avx2.S
new file mode 100644
index 0000000000..b64c3390d6
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core-avx2.S
@@ -0,0 +1,23 @@
+/* AVX2 version of vectorized sin.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+#define _ZGVeN8v_sin _ZGVeN8v_sin_avx2_wrapper
+#include "../svml_d_sin8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S
deleted file mode 100644
index 5afce0ed88..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Multiple versions of vectorized sin.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVeN8v_sin)
- .type _ZGVeN8v_sin, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVeN8v_sin_skx(%rip), %rax
- HAS_ARCH_FEATURE (AVX512DQ_Usable)
- jnz 2f
- leaq _ZGVeN8v_sin_knl(%rip), %rax
- HAS_ARCH_FEATURE (AVX512F_Usable)
- jnz 2f
- leaq _ZGVeN8v_sin_avx2_wrapper(%rip), %rax
-2: ret
-END (_ZGVeN8v_sin)
-
-#define _ZGVeN8v_sin _ZGVeN8v_sin_avx2_wrapper
-#include "../svml_d_sin8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.c
new file mode 100644
index 0000000000..57023d8494
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.c
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized sin, vector length is 8.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVeN8v_sin
+#include "ifunc-mathvec-avx512.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN8v_sin, __GI__ZGVeN8v_sin, __redirect__ZGVeN8v_sin)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S
index 6c565f3861..2d4b14fd1b 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S
@@ -1,5 +1,5 @@
/* Function sin vectorized with AVX-512, KNL and SKX versions.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,7 +22,7 @@
.text
ENTRY (_ZGVeN8v_sin_knl)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
WRAPPER_IMPL_AVX512 _ZGVdN4v_sin
#else
/*
@@ -222,7 +222,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin
movzbl %r12b, %r15d
shlq $4, %r15
vmovsd 1160(%rsp,%r15), %xmm0
- call sin@PLT
+ call JUMPTARGET(sin)
vmovsd %xmm0, 1224(%rsp,%r15)
jmp .LBL_1_8
@@ -230,14 +230,14 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin
movzbl %r12b, %r15d
shlq $4, %r15
vmovsd 1152(%rsp,%r15), %xmm0
- call sin@PLT
+ call JUMPTARGET(sin)
vmovsd %xmm0, 1216(%rsp,%r15)
jmp .LBL_1_7
#endif
END (_ZGVeN8v_sin_knl)
ENTRY (_ZGVeN8v_sin_skx)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
WRAPPER_IMPL_AVX512 _ZGVdN4v_sin
#else
/*
@@ -440,7 +440,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin
vzeroupper
vmovsd 1160(%rsp,%r15), %xmm0
- call sin@PLT
+ call JUMPTARGET(sin)
vmovsd %xmm0, 1224(%rsp,%r15)
jmp .LBL_2_8
@@ -452,7 +452,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin
vzeroupper
vmovsd 1152(%rsp,%r15), %xmm0
- call sin@PLT
+ call JUMPTARGET(sin)
vmovsd %xmm0, 1216(%rsp,%r15)
jmp .LBL_2_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core-sse2.S
new file mode 100644
index 0000000000..ab7f9c500d
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core-sse2.S
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized sincos.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVbN2vvv_sincos _ZGVbN2vvv_sincos_sse2
+#include "../svml_d_sincos2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.S
deleted file mode 100644
index 883d7d33a4..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized sincos.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVbN2vvv_sincos)
- .type _ZGVbN2vvv_sincos, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVbN2vvv_sincos_sse4(%rip), %rax
- HAS_CPU_FEATURE (SSE4_1)
- jz 2f
- ret
-2: leaq _ZGVbN2vvv_sincos_sse2(%rip), %rax
- ret
-END (_ZGVbN2vvv_sincos)
-libmvec_hidden_def (_ZGVbN2vvv_sincos)
-
-#define _ZGVbN2vvv_sincos _ZGVbN2vvv_sincos_sse2
-#include "../svml_d_sincos2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.c
new file mode 100644
index 0000000000..f373bb40a3
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized sincos, vector length is 2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVbN2vvv_sincos
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN2vvv_sincos, __GI__ZGVbN2vvv_sincos,
+ __redirect__ZGVbN2vvv_sincos)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S
index 65ad540122..b4dfa37898 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S
@@ -1,5 +1,5 @@
/* Function sincos vectorized with SSE4.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -20,7 +20,7 @@
#include "svml_d_trig_data.h"
.text
-ENTRY (_ZGVbN2vvv_sincos_sse4)
+ENTRY (_ZGVbN2vl8l8_sincos_sse4)
/*
ALGORITHM DESCRIPTION:
@@ -287,12 +287,12 @@ ENTRY (_ZGVbN2vvv_sincos_sse4)
shlq $4, %r15
movsd 136(%rsp,%r15), %xmm0
- call sin@PLT
+ call JUMPTARGET(sin)
movsd %xmm0, 200(%rsp,%r15)
movsd 136(%rsp,%r15), %xmm0
- call cos@PLT
+ call JUMPTARGET(cos)
movsd %xmm0, 264(%rsp,%r15)
jmp .LBL_1_8
@@ -302,13 +302,67 @@ ENTRY (_ZGVbN2vvv_sincos_sse4)
shlq $4, %r15
movsd 128(%rsp,%r15), %xmm0
- call sin@PLT
+ call JUMPTARGET(sin)
movsd %xmm0, 192(%rsp,%r15)
movsd 128(%rsp,%r15), %xmm0
- call cos@PLT
+ call JUMPTARGET(cos)
movsd %xmm0, 256(%rsp,%r15)
jmp .LBL_1_7
+END (_ZGVbN2vl8l8_sincos_sse4)
+libmvec_hidden_def(_ZGVbN2vl8l8_sincos_sse4)
+
+/* vvv version implemented with wrapper to vl8l8 variant. */
+ENTRY (_ZGVbN2vvv_sincos_sse4)
+#ifndef __ILP32__
+ subq $72, %rsp
+ .cfi_def_cfa_offset 80
+ movdqu %xmm1, 32(%rsp)
+ lea (%rsp), %rdi
+ movdqu %xmm2, 48(%rdi)
+ lea 16(%rsp), %rsi
+ call HIDDEN_JUMPTARGET(_ZGVbN2vl8l8_sincos_sse4)
+ movq 32(%rsp), %rdx
+ movq 48(%rsp), %rsi
+ movq 40(%rsp), %r8
+ movq 56(%rsp), %r10
+ movq (%rsp), %rax
+ movq 16(%rsp), %rcx
+ movq 8(%rsp), %rdi
+ movq 24(%rsp), %r9
+ movq %rax, (%rdx)
+ movq %rcx, (%rsi)
+ movq %rdi, (%r8)
+ movq %r9, (%r10)
+ addq $72, %rsp
+ .cfi_def_cfa_offset 8
+ ret
+#else
+ subl $72, %esp
+ .cfi_def_cfa_offset 80
+ leal 48(%rsp), %esi
+ movaps %xmm1, 16(%esp)
+ leal 32(%rsp), %edi
+ movaps %xmm2, (%esp)
+ call HIDDEN_JUMPTARGET(_ZGVbN2vl8l8_sincos_sse4)
+ movdqa 16(%esp), %xmm1
+ movsd 32(%esp), %xmm0
+ movq %xmm1, %rax
+ movdqa (%esp), %xmm2
+ movsd %xmm0, (%eax)
+ movsd 40(%esp), %xmm0
+ pextrd $1, %xmm1, %eax
+ movsd %xmm0, (%eax)
+ movsd 48(%esp), %xmm0
+ movq %xmm2, %rax
+ movsd %xmm0, (%eax)
+ movsd 56(%esp), %xmm0
+ pextrd $1, %xmm2, %eax
+ movsd %xmm0, (%eax)
+ addl $72, %esp
+ .cfi_def_cfa_offset 8
+ ret
+#endif
END (_ZGVbN2vvv_sincos_sse4)
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core-sse.S
new file mode 100644
index 0000000000..10b4a2cf16
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core-sse.S
@@ -0,0 +1,20 @@
+/* SSE version of vectorized sincos.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVdN4vvv_sincos _ZGVdN4vvv_sincos_sse_wrapper
+#include "../svml_d_sincos4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.S
deleted file mode 100644
index 69a3f74650..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized sincos.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVdN4vvv_sincos)
- .type _ZGVdN4vvv_sincos, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVdN4vvv_sincos_avx2(%rip), %rax
- HAS_ARCH_FEATURE (AVX2_Usable)
- jz 2f
- ret
-2: leaq _ZGVdN4vvv_sincos_sse_wrapper(%rip), %rax
- ret
-END (_ZGVdN4vvv_sincos)
-libmvec_hidden_def (_ZGVdN4vvv_sincos)
-
-#define _ZGVdN4vvv_sincos _ZGVdN4vvv_sincos_sse_wrapper
-#include "../svml_d_sincos4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.c
new file mode 100644
index 0000000000..1fabd7b471
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized sincos, vector length is 4.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVdN4vvv_sincos
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN4vvv_sincos, __GI__ZGVdN4vvv_sincos,
+ __redirect__ZGVdN4vvv_sincos)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S
index 60d03e9f8b..d56aa96ac9 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S
@@ -1,5 +1,5 @@
/* Function sincos vectorized with AVX2.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -20,7 +20,7 @@
#include "svml_d_trig_data.h"
.text
-ENTRY (_ZGVdN4vvv_sincos_avx2)
+ENTRY (_ZGVdN4vl8l8_sincos_avx2)
/*
ALGORITHM DESCRIPTION:
@@ -248,12 +248,12 @@ ENTRY (_ZGVdN4vvv_sincos_avx2)
vmovsd 264(%rsp,%r15), %xmm0
vzeroupper
- call sin@PLT
+ call JUMPTARGET(sin)
vmovsd %xmm0, 328(%rsp,%r15)
vmovsd 264(%rsp,%r15), %xmm0
- call cos@PLT
+ call JUMPTARGET(cos)
vmovsd %xmm0, 392(%rsp,%r15)
jmp .LBL_1_8
@@ -264,14 +264,110 @@ ENTRY (_ZGVdN4vvv_sincos_avx2)
vmovsd 256(%rsp,%r15), %xmm0
vzeroupper
- call sin@PLT
+ call JUMPTARGET(sin)
vmovsd %xmm0, 320(%rsp,%r15)
vmovsd 256(%rsp,%r15), %xmm0
- call cos@PLT
+ call JUMPTARGET(cos)
vmovsd %xmm0, 384(%rsp,%r15)
jmp .LBL_1_7
+END (_ZGVdN4vl8l8_sincos_avx2)
+libmvec_hidden_def(_ZGVdN4vl8l8_sincos_avx2)
+
+/* vvv version implemented with wrapper to vl8l8 variant. */
+ENTRY (_ZGVdN4vvv_sincos_avx2)
+#ifndef __ILP32__
+ pushq %rbp
+ cfi_adjust_cfa_offset (8)
+ cfi_rel_offset (%rbp, 0)
+ movq %rsp, %rbp
+ cfi_def_cfa_register (%rbp)
+ andq $-32, %rsp
+ subq $128, %rsp
+ vmovdqu %ymm1, 64(%rsp)
+ lea (%rsp), %rdi
+ vmovdqu %ymm2, 96(%rdi)
+ lea 32(%rsp), %rsi
+ call HIDDEN_JUMPTARGET(_ZGVdN4vl8l8_sincos_avx2)
+ movq 64(%rsp), %rdx
+ movq 96(%rsp), %rsi
+ movq 72(%rsp), %r8
+ movq 104(%rsp), %r10
+ movq (%rsp), %rax
+ movq 32(%rsp), %rcx
+ movq 8(%rsp), %rdi
+ movq 40(%rsp), %r9
+ movq %rax, (%rdx)
+ movq %rcx, (%rsi)
+ movq 80(%rsp), %rax
+ movq 112(%rsp), %rcx
+ movq %rdi, (%r8)
+ movq %r9, (%r10)
+ movq 88(%rsp), %rdi
+ movq 120(%rsp), %r9
+ movq 16(%rsp), %r11
+ movq 48(%rsp), %rdx
+ movq 24(%rsp), %rsi
+ movq 56(%rsp), %r8
+ movq %r11, (%rax)
+ movq %rdx, (%rcx)
+ movq %rsi, (%rdi)
+ movq %r8, (%r9)
+ movq %rbp, %rsp
+ cfi_def_cfa_register (%rsp)
+ popq %rbp
+ cfi_adjust_cfa_offset (-8)
+ cfi_restore (%rbp)
+ ret
+#else
+ leal 8(%rsp), %r10d
+ .cfi_def_cfa 10, 0
+ andl $-32, %esp
+ pushq -8(%r10d)
+ pushq %rbp
+ .cfi_escape 0x10,0x6,0x2,0x76,0
+ movl %esp, %ebp
+ pushq %r10
+ .cfi_escape 0xf,0x3,0x76,0x78,0x6
+ leal -48(%rbp), %esi
+ leal -80(%rbp), %edi
+ subl $104, %esp
+ vmovaps %xmm1, -96(%ebp)
+ vmovaps %xmm2, -112(%ebp)
+ call HIDDEN_JUMPTARGET(_ZGVdN4vl8l8_sincos_avx2)
+ movl -96(%ebp), %eax
+ vmovsd -80(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ movl -92(%ebp), %eax
+ vmovsd -72(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ movl -88(%ebp), %eax
+ vmovsd -64(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ movl -84(%ebp), %eax
+ vmovsd -56(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ movl -112(%ebp), %eax
+ vmovsd -48(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ movl -108(%ebp), %eax
+ vmovsd -40(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ movl -104(%ebp), %eax
+ vmovsd -32(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ movl -100(%ebp), %eax
+ vmovsd -24(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ addl $104, %esp
+ popq %r10
+ .cfi_def_cfa 10, 0
+ popq %rbp
+ leal -8(%r10), %esp
+ .cfi_def_cfa 7, 8
+ ret
+#endif
END (_ZGVdN4vvv_sincos_avx2)
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core-avx2.S
new file mode 100644
index 0000000000..8cf88f6461
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core-avx2.S
@@ -0,0 +1,20 @@
+/* AVX2 version of vectorized sincos.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVeN8vvv_sincos _ZGVeN8vvv_sincos_avx2_wrapper
+#include "../svml_d_sincos8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S
deleted file mode 100644
index 64cb08c5d1..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Multiple versions of vectorized sincos.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVeN8vvv_sincos)
- .type _ZGVeN8vvv_sincos, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVeN8vvv_sincos_skx(%rip), %rax
- HAS_ARCH_FEATURE (AVX512DQ_Usable)
- jnz 2f
- leaq _ZGVeN8vvv_sincos_knl(%rip), %rax
- HAS_ARCH_FEATURE (AVX512F_Usable)
- jnz 2f
- leaq _ZGVeN8vvv_sincos_avx2_wrapper(%rip), %rax
-2: ret
-END (_ZGVeN8vvv_sincos)
-
-#define _ZGVeN8vvv_sincos _ZGVeN8vvv_sincos_avx2_wrapper
-#include "../svml_d_sincos8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.c
new file mode 100644
index 0000000000..1409872ed2
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized sincos, vector length is 8.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVeN8vvv_sincos
+#include "ifunc-mathvec-avx512.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN8vvv_sincos, __GI__ZGVeN8vvv_sincos,
+ __redirect__ZGVeN8vvv_sincos)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S
index 44700f90b8..2df626c0c1 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S
@@ -1,5 +1,5 @@
/* Function sincos vectorized with AVX-512. KNL and SKX versions.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -36,9 +36,9 @@
sin(R), sin(R') are approximated by corresponding polynomial. */
.text
-ENTRY (_ZGVeN8vvv_sincos_knl)
-#ifndef HAVE_AVX512_ASM_SUPPORT
-WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos
+ENTRY (_ZGVeN8vl8l8_sincos_knl)
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
+WRAPPER_IMPL_AVX512_fFF _ZGVdN4vl8l8_sincos
#else
pushq %rbp
cfi_adjust_cfa_offset (8)
@@ -278,12 +278,12 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos
shlq $4, %r15
vmovsd 1160(%rsp,%r15), %xmm0
- call sin@PLT
+ call JUMPTARGET(sin)
vmovsd %xmm0, 1224(%rsp,%r15)
vmovsd 1160(%rsp,%r15), %xmm0
- call cos@PLT
+ call JUMPTARGET(cos)
vmovsd %xmm0, 1288(%rsp,%r15)
jmp .LBL_1_8
@@ -293,22 +293,23 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos
shlq $4, %r15
vmovsd 1152(%rsp,%r15), %xmm0
- call sin@PLT
+ call JUMPTARGET(sin)
vmovsd %xmm0, 1216(%rsp,%r15)
vmovsd 1152(%rsp,%r15), %xmm0
- call cos@PLT
+ call JUMPTARGET(cos)
vmovsd %xmm0, 1280(%rsp,%r15)
jmp .LBL_1_7
#endif
-END (_ZGVeN8vvv_sincos_knl)
+END (_ZGVeN8vl8l8_sincos_knl)
+libmvec_hidden_def(_ZGVeN8vl8l8_sincos_knl)
-ENTRY (_ZGVeN8vvv_sincos_skx)
-#ifndef HAVE_AVX512_ASM_SUPPORT
-WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos
+ENTRY (_ZGVeN8vl8l8_sincos_skx)
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
+WRAPPER_IMPL_AVX512_fFF _ZGVdN4vl8l8_sincos
#else
pushq %rbp
cfi_adjust_cfa_offset (8)
@@ -557,12 +558,12 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos
vzeroupper
vmovsd 1160(%rsp,%r15), %xmm0
- call sin@PLT
+ call JUMPTARGET(sin)
vmovsd %xmm0, 1224(%rsp,%r15)
vmovsd 1160(%rsp,%r15), %xmm0
- call cos@PLT
+ call JUMPTARGET(cos)
vmovsd %xmm0, 1288(%rsp,%r15)
jmp .LBL_2_8
@@ -574,17 +575,171 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos
vzeroupper
vmovsd 1152(%rsp,%r15), %xmm0
- call sin@PLT
+ call JUMPTARGET(sin)
vmovsd %xmm0, 1216(%rsp,%r15)
vmovsd 1152(%rsp,%r15), %xmm0
- call cos@PLT
+ call JUMPTARGET(cos)
vmovsd %xmm0, 1280(%rsp,%r15)
jmp .LBL_2_7
#endif
+END (_ZGVeN8vl8l8_sincos_skx)
+libmvec_hidden_def(_ZGVeN8vl8l8_sincos_skx)
+
+/* Wrapper between vvv and vl8l8 vector variants. */
+.macro WRAPPER_AVX512_vvv_vl8l8 callee
+#ifndef __ILP32__
+ pushq %rbp
+ cfi_adjust_cfa_offset (8)
+ cfi_rel_offset (%rbp, 0)
+ movq %rsp, %rbp
+ cfi_def_cfa_register (%rbp)
+ andq $-64, %rsp
+ subq $256, %rsp
+ vmovups %zmm1, 128(%rsp)
+ lea (%rsp), %rdi
+ vmovups %zmm2, 192(%rdi)
+ lea 64(%rsp), %rsi
+ call HIDDEN_JUMPTARGET(\callee)
+ movq 128(%rsp), %rdx
+ movq 136(%rsp), %rsi
+ movq 144(%rsp), %r8
+ movq 152(%rsp), %r10
+ movq (%rsp), %rax
+ movq 8(%rsp), %rcx
+ movq 16(%rsp), %rdi
+ movq 24(%rsp), %r9
+ movq %rax, (%rdx)
+ movq %rcx, (%rsi)
+ movq 160(%rsp), %rax
+ movq 168(%rsp), %rcx
+ movq %rdi, (%r8)
+ movq %r9, (%r10)
+ movq 176(%rsp), %rdi
+ movq 184(%rsp), %r9
+ movq 32(%rsp), %r11
+ movq 40(%rsp), %rdx
+ movq 48(%rsp), %rsi
+ movq 56(%rsp), %r8
+ movq %r11, (%rax)
+ movq %rdx, (%rcx)
+ movq 192(%rsp), %r11
+ movq 200(%rsp), %rdx
+ movq %rsi, (%rdi)
+ movq %r8, (%r9)
+ movq 208(%rsp), %rsi
+ movq 216(%rsp), %r8
+ movq 64(%rsp), %r10
+ movq 72(%rsp), %rax
+ movq 80(%rsp), %rcx
+ movq 88(%rsp), %rdi
+ movq %r10, (%r11)
+ movq %rax, (%rdx)
+ movq 224(%rsp), %r10
+ movq 232(%rsp), %rax
+ movq %rcx, (%rsi)
+ movq %rdi, (%r8)
+ movq 240(%rsp), %rcx
+ movq 248(%rsp), %rdi
+ movq 96(%rsp), %r9
+ movq 104(%rsp), %r11
+ movq 112(%rsp), %rdx
+ movq 120(%rsp), %rsi
+ movq %r9, (%r10)
+ movq %r11, (%rax)
+ movq %rdx, (%rcx)
+ movq %rsi, (%rdi)
+ movq %rbp, %rsp
+ cfi_def_cfa_register (%rsp)
+ popq %rbp
+ cfi_adjust_cfa_offset (-8)
+ cfi_restore (%rbp)
+ ret
+#else
+ leal 8(%rsp), %r10d
+ .cfi_def_cfa 10, 0
+ andl $-64, %esp
+ pushq -8(%r10d)
+ pushq %rbp
+ .cfi_escape 0x10,0x6,0x2,0x76,0
+ movl %esp, %ebp
+ pushq %r10
+ .cfi_escape 0xf,0x3,0x76,0x78,0x6
+ leal -112(%rbp), %esi
+ leal -176(%rbp), %edi
+ subl $232, %esp
+ vmovdqa %ymm1, -208(%ebp)
+ vmovdqa %ymm2, -240(%ebp)
+ call HIDDEN_JUMPTARGET(\callee)
+ vmovdqa -208(%ebp), %xmm0
+ vmovq %xmm0, %rax
+ vmovsd -176(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ shrq $32, %rax
+ vmovsd -168(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ movq -200(%ebp), %rax
+ vmovsd -160(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ shrq $32, %rax
+ vmovsd -152(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ movq -192(%ebp), %rax
+ vmovsd -144(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ shrq $32, %rax
+ vmovsd -136(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ movq -184(%ebp), %rax
+ vmovsd -128(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ shrq $32, %rax
+ vmovsd -120(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ vmovdqa -240(%ebp), %xmm0
+ vmovq %xmm0, %rax
+ vmovsd -112(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ shrq $32, %rax
+ vmovsd -104(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ movq -232(%ebp), %rax
+ vmovsd -96(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ shrq $32, %rax
+ vmovsd -88(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ movq -224(%ebp), %rax
+ vmovsd -80(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ shrq $32, %rax
+ vmovsd -72(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ movq -216(%ebp), %rax
+ vmovsd -64(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ shrq $32, %rax
+ vmovsd -56(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ addl $232, %esp
+ popq %r10
+ .cfi_def_cfa 10, 0
+ popq %rbp
+ leal -8(%r10), %esp
+ .cfi_def_cfa 7, 8
+ ret
+#endif
+.endm
+
+ENTRY (_ZGVeN8vvv_sincos_knl)
+WRAPPER_AVX512_vvv_vl8l8 _ZGVeN8vl8l8_sincos_knl
+END (_ZGVeN8vvv_sincos_knl)
+
+ENTRY (_ZGVeN8vvv_sincos_skx)
+WRAPPER_AVX512_vvv_vl8l8 _ZGVeN8vl8l8_sincos_skx
END (_ZGVeN8vvv_sincos_skx)
.section .rodata, "a"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core-avx2.S
new file mode 100644
index 0000000000..f01f89f294
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core-avx2.S
@@ -0,0 +1,20 @@
+/* AVX2 version of vectorized cosf.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVeN16v_cosf _ZGVeN16v_cosf_avx2_wrapper
+#include "../svml_s_cosf16_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S
deleted file mode 100644
index 755254a280..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Multiple versions of vectorized cosf.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVeN16v_cosf)
- .type _ZGVeN16v_cosf, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVeN16v_cosf_skx(%rip), %rax
- HAS_ARCH_FEATURE (AVX512DQ_Usable)
- jnz 2f
- leaq _ZGVeN16v_cosf_knl(%rip), %rax
- HAS_ARCH_FEATURE (AVX512F_Usable)
- jnz 2f
- leaq _ZGVeN16v_cosf_avx2_wrapper(%rip), %rax
-2: ret
-END (_ZGVeN16v_cosf)
-
-#define _ZGVeN16v_cosf _ZGVeN16v_cosf_avx2_wrapper
-#include "../svml_s_cosf16_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.c
new file mode 100644
index 0000000000..5bd0441b16
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized cosf, vector length is 16.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVeN16v_cosf
+#include "ifunc-mathvec-avx512.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN16v_cosf, __GI__ZGVeN16v_cosf,
+ __redirect__ZGVeN16v_cosf)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S
index 5004cd4758..6ea1137b42 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S
@@ -1,5 +1,5 @@
/* Function cosf vectorized with AVX-512. KNL and SKX versions.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,7 +22,7 @@
.text
ENTRY (_ZGVeN16v_cosf_knl)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf
#else
/*
@@ -225,21 +225,21 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf
cfi_restore_state
movzbl %r12b, %r15d
vmovss 1156(%rsp,%r15,8), %xmm0
- call cosf@PLT
+ call JUMPTARGET(cosf)
vmovss %xmm0, 1220(%rsp,%r15,8)
jmp .LBL_1_8
.LBL_1_12:
movzbl %r12b, %r15d
vmovss 1152(%rsp,%r15,8), %xmm0
- call cosf@PLT
+ call JUMPTARGET(cosf)
vmovss %xmm0, 1216(%rsp,%r15,8)
jmp .LBL_1_7
#endif
END (_ZGVeN16v_cosf_knl)
ENTRY (_ZGVeN16v_cosf_skx)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf
#else
/*
@@ -440,7 +440,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf
vmovss 1156(%rsp,%r15,8), %xmm0
vzeroupper
vmovss 1156(%rsp,%r15,8), %xmm0
- call cosf@PLT
+ call JUMPTARGET(cosf)
vmovss %xmm0, 1220(%rsp,%r15,8)
jmp .LBL_2_8
.LBL_2_12:
@@ -448,7 +448,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf
vmovss 1152(%rsp,%r15,8), %xmm0
vzeroupper
vmovss 1152(%rsp,%r15,8), %xmm0
- call cosf@PLT
+ call JUMPTARGET(cosf)
vmovss %xmm0, 1216(%rsp,%r15,8)
jmp .LBL_2_7
#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core-sse2.S
new file mode 100644
index 0000000000..727189f8e6
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core-sse2.S
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized cosf, vector length is 4.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVbN4v_cosf _ZGVbN4v_cosf_sse2
+#include "../svml_s_cosf4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S
deleted file mode 100644
index ad7de18851..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized cosf, vector length is 4.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVbN4v_cosf)
- .type _ZGVbN4v_cosf, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVbN4v_cosf_sse4(%rip), %rax
- HAS_CPU_FEATURE (SSE4_1)
- jz 2f
- ret
-2: leaq _ZGVbN4v_cosf_sse2(%rip), %rax
- ret
-END (_ZGVbN4v_cosf)
-libmvec_hidden_def (_ZGVbN4v_cosf)
-
-#define _ZGVbN4v_cosf _ZGVbN4v_cosf_sse2
-#include "../svml_s_cosf4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.c
new file mode 100644
index 0000000000..dde470af5d
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized cosf, vector length is 4.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVbN4v_cosf
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN4v_cosf, __GI__ZGVbN4v_cosf,
+ __redirect__ZGVbN4v_cosf)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S
index d23ff72a30..f4e0553bb3 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S
@@ -1,5 +1,5 @@
/* Function cosf vectorized with SSE4.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -211,7 +211,7 @@ ENTRY (_ZGVbN4v_cosf_sse4)
movzbl %r12b, %r15d
movss 196(%rsp,%r15,8), %xmm0
- call cosf@PLT
+ call JUMPTARGET(cosf)
movss %xmm0, 260(%rsp,%r15,8)
jmp .LBL_1_8
@@ -220,7 +220,7 @@ ENTRY (_ZGVbN4v_cosf_sse4)
movzbl %r12b, %r15d
movss 192(%rsp,%r15,8), %xmm0
- call cosf@PLT
+ call JUMPTARGET(cosf)
movss %xmm0, 256(%rsp,%r15,8)
jmp .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core-sse.S
new file mode 100644
index 0000000000..1e1a5540c3
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core-sse.S
@@ -0,0 +1,20 @@
+/* SSE version of vectorized cosf, vector length is 8.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVdN8v_cosf _ZGVdN8v_cosf_sse_wrapper
+#include "../svml_s_cosf8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S
deleted file mode 100644
index 602c70e324..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized cosf, vector length is 8.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVdN8v_cosf)
- .type _ZGVdN8v_cosf, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVdN8v_cosf_avx2(%rip), %rax
- HAS_ARCH_FEATURE (AVX2_Usable)
- jz 2f
- ret
-2: leaq _ZGVdN8v_cosf_sse_wrapper(%rip), %rax
- ret
-END (_ZGVdN8v_cosf)
-libmvec_hidden_def (_ZGVdN8v_cosf)
-
-#define _ZGVdN8v_cosf _ZGVdN8v_cosf_sse_wrapper
-#include "../svml_s_cosf8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.c
new file mode 100644
index 0000000000..56531b215a
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized cosf, vector length is 8.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVdN8v_cosf
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN8v_cosf, __GI__ZGVdN8v_cosf,
+ __redirect__ZGVdN8v_cosf)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S
index 513f3c0a29..dbff4a7b7e 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S
@@ -1,5 +1,5 @@
/* Function cosf vectorized with AVX2.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -197,7 +197,7 @@ ENTRY (_ZGVdN8v_cosf_avx2)
vmovss 324(%rsp,%r15,8), %xmm0
vzeroupper
- call cosf@PLT
+ call JUMPTARGET(cosf)
vmovss %xmm0, 388(%rsp,%r15,8)
jmp .LBL_1_8
@@ -207,7 +207,7 @@ ENTRY (_ZGVdN8v_cosf_avx2)
vmovss 320(%rsp,%r15,8), %xmm0
vzeroupper
- call cosf@PLT
+ call JUMPTARGET(cosf)
vmovss %xmm0, 384(%rsp,%r15,8)
jmp .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core-avx2.S
new file mode 100644
index 0000000000..e0b7fd787f
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core-avx2.S
@@ -0,0 +1,23 @@
+/* AVX2 version of vectorized expf.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+#define _ZGVeN16v_expf _ZGVeN16v_expf_avx2_wrapper
+#include "../svml_s_expf16_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S
deleted file mode 100644
index f990d36483..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Multiple versions of vectorized expf.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVeN16v_expf)
- .type _ZGVeN16v_expf, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVeN16v_expf_skx(%rip), %rax
- HAS_ARCH_FEATURE (AVX512DQ_Usable)
- jnz 2f
- leaq _ZGVeN16v_expf_knl(%rip), %rax
- HAS_ARCH_FEATURE (AVX512F_Usable)
- jnz 2f
- leaq _ZGVeN16v_expf_avx2_wrapper(%rip), %rax
-2: ret
-END (_ZGVeN16v_expf)
-
-#define _ZGVeN16v_expf _ZGVeN16v_expf_avx2_wrapper
-#include "../svml_s_expf16_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.c
new file mode 100644
index 0000000000..d358d93546
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized expf, vector length is 16.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVeN16v_expf
+#include "ifunc-mathvec-avx512.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN16v_expf, __GI__ZGVeN16v_expf,
+ __redirect__ZGVeN16v_expf)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S
index 7eb7a1b775..89ba0df28f 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S
@@ -1,5 +1,5 @@
/* Function expf vectorized with AVX-512. KNL and SKX versions.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,7 +22,7 @@
.text
ENTRY (_ZGVeN16v_expf_knl)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
WRAPPER_IMPL_AVX512 _ZGVdN8v_expf
#else
/*
@@ -212,14 +212,14 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_expf
cfi_restore_state
movzbl %r12b, %r15d
vmovss 1156(%rsp,%r15,8), %xmm0
- call expf@PLT
+ call JUMPTARGET(__expf_finite)
vmovss %xmm0, 1220(%rsp,%r15,8)
jmp .LBL_1_8
.LBL_1_12:
movzbl %r12b, %r15d
vmovss 1152(%rsp,%r15,8), %xmm0
- call expf@PLT
+ call JUMPTARGET(__expf_finite)
vmovss %xmm0, 1216(%rsp,%r15,8)
jmp .LBL_1_7
@@ -227,7 +227,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_expf
END (_ZGVeN16v_expf_knl)
ENTRY (_ZGVeN16v_expf_skx)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
WRAPPER_IMPL_AVX512 _ZGVdN8v_expf
#else
/*
@@ -422,7 +422,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_expf
vzeroupper
vmovss 1156(%rsp,%r15,8), %xmm0
- call expf@PLT
+ call JUMPTARGET(__expf_finite)
vmovss %xmm0, 1220(%rsp,%r15,8)
jmp .LBL_2_8
@@ -433,7 +433,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_expf
vzeroupper
vmovss 1152(%rsp,%r15,8), %xmm0
- call expf@PLT
+ call JUMPTARGET(__expf_finite)
vmovss %xmm0, 1216(%rsp,%r15,8)
jmp .LBL_2_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core-sse2.S
new file mode 100644
index 0000000000..8f57e4bbd9
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core-sse2.S
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized expf.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVbN4v_expf _ZGVbN4v_expf_sse2
+#include "../svml_s_expf4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.S
deleted file mode 100644
index 2fbe6d475e..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized expf.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVbN4v_expf)
- .type _ZGVbN4v_expf, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVbN4v_expf_sse4(%rip), %rax
- HAS_CPU_FEATURE (SSE4_1)
- jz 2f
- ret
-2: leaq _ZGVbN4v_expf_sse2(%rip), %rax
- ret
-END (_ZGVbN4v_expf)
-libmvec_hidden_def (_ZGVbN4v_expf)
-
-#define _ZGVbN4v_expf _ZGVbN4v_expf_sse2
-#include "../svml_s_expf4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.c
new file mode 100644
index 0000000000..82befe0b5d
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized expf, vector length is 4.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVbN4v_expf
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN4v_expf, __GI__ZGVbN4v_expf,
+ __redirect__ZGVbN4v_expf)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S
index c6f91e8dc1..254ec94096 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S
@@ -1,5 +1,5 @@
/* Function expf vectorized with SSE4.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -195,7 +195,7 @@ ENTRY (_ZGVbN4v_expf_sse4)
movzbl %r12b, %r15d
movss 196(%rsp,%r15,8), %xmm0
- call expf@PLT
+ call JUMPTARGET(__expf_finite)
movss %xmm0, 260(%rsp,%r15,8)
jmp .LBL_1_8
@@ -204,7 +204,7 @@ ENTRY (_ZGVbN4v_expf_sse4)
movzbl %r12b, %r15d
movss 192(%rsp,%r15,8), %xmm0
- call expf@PLT
+ call JUMPTARGET(__expf_finite)
movss %xmm0, 256(%rsp,%r15,8)
jmp .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core-sse.S
new file mode 100644
index 0000000000..459699c80c
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core-sse.S
@@ -0,0 +1,20 @@
+/* SSE version of vectorized expf.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVdN8v_expf _ZGVdN8v_expf_sse_wrapper
+#include "../svml_s_expf8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.S
deleted file mode 100644
index 7d19bb423d..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized expf.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVdN8v_expf)
- .type _ZGVdN8v_expf, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVdN8v_expf_avx2(%rip), %rax
- HAS_ARCH_FEATURE (AVX2_Usable)
- jz 2f
- ret
-2: leaq _ZGVdN8v_expf_sse_wrapper(%rip), %rax
- ret
-END (_ZGVdN8v_expf)
-libmvec_hidden_def (_ZGVdN8v_expf)
-
-#define _ZGVdN8v_expf _ZGVdN8v_expf_sse_wrapper
-#include "../svml_s_expf8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.c
new file mode 100644
index 0000000000..0b8a47ede0
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized expf, vector length is 8.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVdN8v_expf
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN8v_expf, __GI__ZGVdN8v_expf,
+ __redirect__ZGVdN8v_expf)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core_avx2.S
index c6be6954f7..ae1d5317e4 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core_avx2.S
@@ -1,5 +1,5 @@
/* Function expf vectorized with AVX2.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -184,7 +184,7 @@ ENTRY(_ZGVdN8v_expf_avx2)
vmovss 324(%rsp,%r15,8), %xmm0
vzeroupper
- call expf@PLT
+ call JUMPTARGET(__expf_finite)
vmovss %xmm0, 388(%rsp,%r15,8)
jmp .LBL_1_8
@@ -194,7 +194,7 @@ ENTRY(_ZGVdN8v_expf_avx2)
vmovss 320(%rsp,%r15,8), %xmm0
vzeroupper
- call expf@PLT
+ call JUMPTARGET(__expf_finite)
vmovss %xmm0, 384(%rsp,%r15,8)
jmp .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core-avx2.S
new file mode 100644
index 0000000000..b23bd12fa0
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core-avx2.S
@@ -0,0 +1,20 @@
+/* AVX2 version of vectorized logf.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVeN16v_logf _ZGVeN16v_logf_avx2_wrapper
+#include "../svml_s_logf16_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S
deleted file mode 100644
index 9efb2fb7df..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Multiple versions of vectorized logf.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVeN16v_logf)
- .type _ZGVeN16v_logf, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVeN16v_logf_skx(%rip), %rax
- HAS_ARCH_FEATURE (AVX512DQ_Usable)
- jnz 2f
- leaq _ZGVeN16v_logf_knl(%rip), %rax
- HAS_ARCH_FEATURE (AVX512F_Usable)
- jnz 2f
- leaq _ZGVeN16v_logf_avx2_wrapper(%rip), %rax
-2: ret
-END (_ZGVeN16v_logf)
-
-#define _ZGVeN16v_logf _ZGVeN16v_logf_avx2_wrapper
-#include "../svml_s_logf16_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.c
new file mode 100644
index 0000000000..fec61883b4
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized logf, vector length is 16.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVeN16v_logf
+#include "ifunc-mathvec-avx512.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN16v_logf, __GI__ZGVeN16v_logf,
+ __redirect__ZGVeN16v_logf)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S
index 6209058381..4cf0a96fe4 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S
@@ -1,5 +1,5 @@
/* Function logf vectorized with AVX-512. KNL and SKX versions.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,7 +22,7 @@
.text
ENTRY (_ZGVeN16v_logf_knl)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
WRAPPER_IMPL_AVX512 _ZGVdN8v_logf
#else
/*
@@ -197,21 +197,21 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_logf
cfi_restore_state
movzbl %r12b, %r15d
vmovss 1156(%rsp,%r15,8), %xmm0
- call logf@PLT
+ call JUMPTARGET(__logf_finite)
vmovss %xmm0, 1220(%rsp,%r15,8)
jmp .LBL_1_8
.LBL_1_12:
movzbl %r12b, %r15d
vmovss 1152(%rsp,%r15,8), %xmm0
- call logf@PLT
+ call JUMPTARGET(__logf_finite)
vmovss %xmm0, 1216(%rsp,%r15,8)
jmp .LBL_1_7
#endif
END (_ZGVeN16v_logf_knl)
ENTRY (_ZGVeN16v_logf_skx)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
WRAPPER_IMPL_AVX512 _ZGVdN8v_logf
#else
/*
@@ -391,7 +391,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_logf
vzeroupper
vmovss 1156(%rsp,%r15,8), %xmm0
- call logf@PLT
+ call JUMPTARGET(__logf_finite)
vmovss %xmm0, 1220(%rsp,%r15,8)
jmp .LBL_2_8
@@ -402,7 +402,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_logf
vzeroupper
vmovss 1152(%rsp,%r15,8), %xmm0
- call logf@PLT
+ call JUMPTARGET(__logf_finite)
vmovss %xmm0, 1216(%rsp,%r15,8)
jmp .LBL_2_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core-sse2.S
new file mode 100644
index 0000000000..2c2331e1d8
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core-sse2.S
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized logf.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVbN4v_logf _ZGVbN4v_logf_sse2
+#include "../svml_s_logf4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.S
deleted file mode 100644
index c85615ac25..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized logf.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVbN4v_logf)
- .type _ZGVbN4v_logf, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVbN4v_logf_sse4(%rip), %rax
- HAS_CPU_FEATURE (SSE4_1)
- jz 2f
- ret
-2: leaq _ZGVbN4v_logf_sse2(%rip), %rax
- ret
-END (_ZGVbN4v_logf)
-libmvec_hidden_def (_ZGVbN4v_logf)
-
-#define _ZGVbN4v_logf _ZGVbN4v_logf_sse2
-#include "../svml_s_logf4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.c
new file mode 100644
index 0000000000..f249c351bd
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized logf, vector length is 4.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVbN4v_logf
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN4v_logf, __GI__ZGVbN4v_logf,
+ __redirect__ZGVbN4v_logf)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core_sse4.S
index 1ce9838513..651eb5eb1a 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core_sse4.S
@@ -1,5 +1,5 @@
/* Function logf vectorized with SSE4.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -177,7 +177,7 @@ ENTRY (_ZGVbN4v_logf_sse4)
movzbl %r12b, %r15d
movss 196(%rsp,%r15,8), %xmm0
- call logf@PLT
+ call JUMPTARGET(__logf_finite)
movss %xmm0, 260(%rsp,%r15,8)
jmp .LBL_1_8
@@ -186,7 +186,7 @@ ENTRY (_ZGVbN4v_logf_sse4)
movzbl %r12b, %r15d
movss 192(%rsp,%r15,8), %xmm0
- call logf@PLT
+ call JUMPTARGET(__logf_finite)
movss %xmm0, 256(%rsp,%r15,8)
jmp .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core-sse.S
new file mode 100644
index 0000000000..862379277b
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core-sse.S
@@ -0,0 +1,20 @@
+/* SSE version of vectorized logf.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVdN8v_logf _ZGVdN8v_logf_sse_wrapper
+#include "../svml_s_logf8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.S
deleted file mode 100644
index 8f6d83dd56..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized logf.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVdN8v_logf)
- .type _ZGVdN8v_logf, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVdN8v_logf_avx2(%rip), %rax
- HAS_ARCH_FEATURE (AVX2_Usable)
- jz 2f
- ret
-2: leaq _ZGVdN8v_logf_sse_wrapper(%rip), %rax
- ret
-END (_ZGVdN8v_logf)
-libmvec_hidden_def (_ZGVdN8v_logf)
-
-#define _ZGVdN8v_logf _ZGVdN8v_logf_sse_wrapper
-#include "../svml_s_logf8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.c
new file mode 100644
index 0000000000..dbd29657ca
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized logf, vector length is 8.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVdN8v_logf
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN8v_logf, __GI__ZGVdN8v_logf,
+ __redirect__ZGVdN8v_logf)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core_avx2.S
index 91fb549ce6..c7f5448fcb 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core_avx2.S
@@ -1,5 +1,5 @@
/* Function logf vectorized with AVX2.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -166,7 +166,7 @@ ENTRY(_ZGVdN8v_logf_avx2)
vmovss 324(%rsp,%r15,8), %xmm0
vzeroupper
- call logf@PLT
+ call JUMPTARGET(__logf_finite)
vmovss %xmm0, 388(%rsp,%r15,8)
jmp .LBL_1_8
@@ -176,7 +176,7 @@ ENTRY(_ZGVdN8v_logf_avx2)
vmovss 320(%rsp,%r15,8), %xmm0
vzeroupper
- call logf@PLT
+ call JUMPTARGET(__logf_finite)
vmovss %xmm0, 384(%rsp,%r15,8)
jmp .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core-avx2.S
new file mode 100644
index 0000000000..de705c8632
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core-avx2.S
@@ -0,0 +1,20 @@
+/* AVX2 version of vectorized powf.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVeN16vv_powf _ZGVeN16vv_powf_avx2_wrapper
+#include "../svml_s_powf16_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S
deleted file mode 100644
index 80048ce977..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Multiple versions of vectorized powf.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVeN16vv_powf)
- .type _ZGVeN16vv_powf, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVeN16vv_powf_skx(%rip), %rax
- HAS_ARCH_FEATURE (AVX512DQ_Usable)
- jnz 2f
- leaq _ZGVeN16vv_powf_knl(%rip), %rax
- HAS_ARCH_FEATURE (AVX512F_Usable)
- jnz 2f
- leaq _ZGVeN16vv_powf_avx2_wrapper(%rip), %rax
-2: ret
-END (_ZGVeN16vv_powf)
-
-#define _ZGVeN16vv_powf _ZGVeN16vv_powf_avx2_wrapper
-#include "../svml_s_powf16_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.c
new file mode 100644
index 0000000000..91ea810441
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized powf, vector length is 16.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVeN16vv_powf
+#include "ifunc-mathvec-avx512.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN16vv_powf, __GI__ZGVeN16vv_powf,
+ __redirect__ZGVeN16vv_powf)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S
index 45d48723af..bdcd50afe1 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S
@@ -1,5 +1,5 @@
/* Function powf vectorized with AVX-512. KNL and SKX versions.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -82,7 +82,7 @@
.text
ENTRY (_ZGVeN16vv_powf_knl)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
WRAPPER_IMPL_AVX512_ff _ZGVdN8vv_powf
#else
pushq %rbp
@@ -344,7 +344,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN8vv_powf
movzbl %r12b, %r15d
vmovss 1156(%rsp,%r15,8), %xmm0
vmovss 1220(%rsp,%r15,8), %xmm1
- call powf@PLT
+ call JUMPTARGET(__powf_finite)
vmovss %xmm0, 1284(%rsp,%r15,8)
jmp .LBL_1_8
@@ -352,14 +352,14 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN8vv_powf
movzbl %r12b, %r15d
vmovss 1152(%rsp,%r15,8), %xmm0
vmovss 1216(%rsp,%r15,8), %xmm1
- call powf@PLT
+ call JUMPTARGET(__powf_finite)
vmovss %xmm0, 1280(%rsp,%r15,8)
jmp .LBL_1_7
#endif
END (_ZGVeN16vv_powf_knl)
ENTRY (_ZGVeN16vv_powf_skx)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
WRAPPER_IMPL_AVX512_ff _ZGVdN8vv_powf
#else
pushq %rbp
@@ -629,7 +629,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN8vv_powf
vmovss 1156(%rsp,%r15,8), %xmm1
vzeroupper
vmovss 1092(%rsp,%r15,8), %xmm0
- call powf@PLT
+ call JUMPTARGET(__powf_finite)
vmovss %xmm0, 1220(%rsp,%r15,8)
jmp .LBL_2_8
@@ -638,7 +638,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN8vv_powf
vmovss 1152(%rsp,%r15,8), %xmm1
vzeroupper
vmovss 1088(%rsp,%r15,8), %xmm0
- call powf@PLT
+ call JUMPTARGET(__powf_finite)
vmovss %xmm0, 1216(%rsp,%r15,8)
jmp .LBL_2_7
#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core-sse2.S
new file mode 100644
index 0000000000..b6789a621d
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core-sse2.S
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized powf.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVbN4vv_powf _ZGVbN4vv_powf_sse2
+#include "../svml_s_powf4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.S
deleted file mode 100644
index b46821189b..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized powf.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVbN4vv_powf)
- .type _ZGVbN4vv_powf, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVbN4vv_powf_sse4(%rip), %rax
- HAS_CPU_FEATURE (SSE4_1)
- jz 2f
- ret
-2: leaq _ZGVbN4vv_powf_sse2(%rip), %rax
- ret
-END (_ZGVbN4vv_powf)
-libmvec_hidden_def (_ZGVbN4vv_powf)
-
-#define _ZGVbN4vv_powf _ZGVbN4vv_powf_sse2
-#include "../svml_s_powf4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.c
new file mode 100644
index 0000000000..8149d7c991
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized powf, vector length is 4.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVbN4vv_powf
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN4vv_powf, __GI__ZGVbN4vv_powf,
+ __redirect__ZGVbN4vv_powf)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core_sse4.S
index 420f98c6a6..bc59545c98 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core_sse4.S
@@ -1,5 +1,5 @@
/* Function powf vectorized with SSE4.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -356,7 +356,7 @@ ENTRY (_ZGVbN4vv_powf_sse4)
movss 68(%rsp,%r15,8), %xmm0
movss 132(%rsp,%r15,8), %xmm1
- call powf@PLT
+ call JUMPTARGET(__powf_finite)
movss %xmm0, 196(%rsp,%r15,8)
jmp .LBL_1_8
@@ -366,7 +366,7 @@ ENTRY (_ZGVbN4vv_powf_sse4)
movss 64(%rsp,%r15,8), %xmm0
movss 128(%rsp,%r15,8), %xmm1
- call powf@PLT
+ call JUMPTARGET(__powf_finite)
movss %xmm0, 192(%rsp,%r15,8)
jmp .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core-sse.S
new file mode 100644
index 0000000000..48da6d25c7
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core-sse.S
@@ -0,0 +1,20 @@
+/* SSE version of vectorized powf.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVdN8vv_powf _ZGVdN8vv_powf_sse_wrapper
+#include "../svml_s_powf8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.S
deleted file mode 100644
index 945908a2ff..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized powf.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVdN8vv_powf)
- .type _ZGVdN8vv_powf, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVdN8vv_powf_avx2(%rip), %rax
- HAS_ARCH_FEATURE (AVX2_Usable)
- jz 2f
- ret
-2: leaq _ZGVdN8vv_powf_sse_wrapper(%rip), %rax
- ret
-END (_ZGVdN8vv_powf)
-libmvec_hidden_def (_ZGVdN8vv_powf)
-
-#define _ZGVdN8vv_powf _ZGVdN8vv_powf_sse_wrapper
-#include "../svml_s_powf8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.c
new file mode 100644
index 0000000000..0da188180e
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized sinf, vector length is 8.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVdN8vv_powf
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN8vv_powf, __GI__ZGVdN8vv_powf,
+ __redirect__ZGVdN8vv_powf)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core_avx2.S
index 4446859130..53a4b4bc2b 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core_avx2.S
@@ -1,5 +1,5 @@
/* Function powf vectorized with AVX2.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -317,7 +317,7 @@ ENTRY(_ZGVdN8vv_powf_avx2)
vmovss 132(%rsp,%r15,8), %xmm1
vzeroupper
- call powf@PLT
+ call JUMPTARGET(__powf_finite)
vmovss %xmm0, 196(%rsp,%r15,8)
jmp .LBL_1_8
@@ -328,7 +328,7 @@ ENTRY(_ZGVdN8vv_powf_avx2)
vmovss 128(%rsp,%r15,8), %xmm1
vzeroupper
- call powf@PLT
+ call JUMPTARGET(__powf_finite)
vmovss %xmm0, 192(%rsp,%r15,8)
jmp .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core-avx2.S
new file mode 100644
index 0000000000..c677e3f1cf
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core-avx2.S
@@ -0,0 +1,20 @@
+/* AVX2 version of vectorized sincosf.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVeN16vvv_sincosf _ZGVeN16vvv_sincosf_avx2_wrapper
+#include "../svml_s_sincosf16_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S
deleted file mode 100644
index 16cee0c676..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Multiple versions of vectorized sincosf.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVeN16vvv_sincosf)
- .type _ZGVeN16vvv_sincosf, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVeN16vvv_sincosf_skx(%rip), %rax
- HAS_ARCH_FEATURE (AVX512DQ_Usable)
- jnz 2f
- leaq _ZGVeN16vvv_sincosf_knl(%rip), %rax
- HAS_ARCH_FEATURE (AVX512F_Usable)
- jnz 2f
- leaq _ZGVeN16vvv_sincosf_avx2_wrapper(%rip), %rax
-2: ret
-END (_ZGVeN16vvv_sincosf)
-
-#define _ZGVeN16vvv_sincosf _ZGVeN16vvv_sincosf_avx2_wrapper
-#include "../svml_s_sincosf16_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.c
new file mode 100644
index 0000000000..b753be6bbd
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized sincosf, vector length is 16.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVeN16vvv_sincosf
+#include "ifunc-mathvec-avx512.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN16vvv_sincosf, __GI__ZGVeN16vvv_sincosf,
+ __redirect__ZGVeN16vvv_sincosf)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S
index 758aeeaeed..5fa4bc412a 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S
@@ -1,5 +1,5 @@
/* Function sincosf vectorized with AVX-512. KNL and SKX versions.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -41,7 +41,7 @@
b) Calculate 2 polynomials for sin and cos:
RS = X * ( A0 + X^2 * (A1 + x^2 * (A2 + x^2 * (A3))));
RC = B0 + X^2 * (B1 + x^2 * (B2 + x^2 * (B3 + x^2 * (B4))));
- c) Swap RS & RC if if first bit of obtained value after
+ c) Swap RS & RC if first bit of obtained value after
Right Shifting is set to 1. Using And, Andnot & Or operations.
3) Destination sign setting
a) Set shifted destination sign using XOR operation:
@@ -49,9 +49,9 @@
R2 = XOR( RC, SC ). */
.text
-ENTRY (_ZGVeN16vvv_sincosf_knl)
-#ifndef HAVE_AVX512_ASM_SUPPORT
-WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf
+ENTRY (_ZGVeN16vl4l4_sincosf_knl)
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
+WRAPPER_IMPL_AVX512_fFF _ZGVdN8vl4l4_sincosf
#else
pushq %rbp
cfi_adjust_cfa_offset (8)
@@ -243,12 +243,12 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf
movzbl %r12b, %r15d
vmovss 1156(%rsp,%r15,8), %xmm0
- call sinf@PLT
+ call JUMPTARGET(sinf)
vmovss %xmm0, 1220(%rsp,%r15,8)
vmovss 1156(%rsp,%r15,8), %xmm0
- call cosf@PLT
+ call JUMPTARGET(cosf)
vmovss %xmm0, 1284(%rsp,%r15,8)
jmp .LBL_1_8
@@ -257,20 +257,21 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf
movzbl %r12b, %r15d
vmovss 1152(%rsp,%r15,8), %xmm0
- call sinf@PLT
+ call JUMPTARGET(sinf)
vmovss %xmm0, 1216(%rsp,%r15,8)
vmovss 1152(%rsp,%r15,8), %xmm0
- call cosf@PLT
+ call JUMPTARGET(cosf)
vmovss %xmm0, 1280(%rsp,%r15,8)
jmp .LBL_1_7
#endif
-END (_ZGVeN16vvv_sincosf_knl)
+END (_ZGVeN16vl4l4_sincosf_knl)
+libmvec_hidden_def(_ZGVeN16vl4l4_sincosf_knl)
-ENTRY (_ZGVeN16vvv_sincosf_skx)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+ENTRY (_ZGVeN16vl4l4_sincosf_skx)
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf
#else
pushq %rbp
@@ -470,12 +471,12 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf
vzeroupper
vmovss 1156(%rsp,%r15,8), %xmm0
- call sinf@PLT
+ call JUMPTARGET(sinf)
vmovss %xmm0, 1220(%rsp,%r15,8)
vmovss 1156(%rsp,%r15,8), %xmm0
- call cosf@PLT
+ call JUMPTARGET(cosf)
vmovss %xmm0, 1284(%rsp,%r15,8)
jmp .LBL_2_8
@@ -486,16 +487,266 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf
vzeroupper
vmovss 1152(%rsp,%r15,8), %xmm0
- call sinf@PLT
+ call JUMPTARGET(sinf)
vmovss %xmm0, 1216(%rsp,%r15,8)
vmovss 1152(%rsp,%r15,8), %xmm0
- call cosf@PLT
+ call JUMPTARGET(cosf)
vmovss %xmm0, 1280(%rsp,%r15,8)
jmp .LBL_2_7
#endif
+END (_ZGVeN16vl4l4_sincosf_skx)
+libmvec_hidden_def(_ZGVeN16vl4l4_sincosf_skx)
+
+/* Wrapper between vvv and vl4l4 vector variants. */
+.macro WRAPPER_AVX512_vvv_vl4l4 callee
+#ifndef __ILP32__
+ pushq %rbp
+ cfi_adjust_cfa_offset (8)
+ cfi_rel_offset (%rbp, 0)
+ movq %rsp, %rbp
+ cfi_def_cfa_register (%rbp)
+ andq $-64, %rsp
+ subq $384, %rsp
+ vmovups %zmm1, 128(%rsp)
+ lea (%rsp), %rdi
+ vmovups %zmm2, 192(%rdi)
+ vmovups %zmm3, 256(%rdi)
+ vmovups %zmm4, 320(%rdi)
+ lea 64(%rsp), %rsi
+ call HIDDEN_JUMPTARGET(\callee)
+ movq 128(%rsp), %rdx
+ movq 136(%rsp), %rsi
+ movq 144(%rsp), %r8
+ movq 152(%rsp), %r10
+ movl (%rsp), %eax
+ movl 4(%rsp), %ecx
+ movl 8(%rsp), %edi
+ movl 12(%rsp), %r9d
+ movl %eax, (%rdx)
+ movl %ecx, (%rsi)
+ movq 160(%rsp), %rax
+ movq 168(%rsp), %rcx
+ movl %edi, (%r8)
+ movl %r9d, (%r10)
+ movq 176(%rsp), %rdi
+ movq 184(%rsp), %r9
+ movl 16(%rsp), %r11d
+ movl 20(%rsp), %edx
+ movl 24(%rsp), %esi
+ movl 28(%rsp), %r8d
+ movl %r11d, (%rax)
+ movl %edx, (%rcx)
+ movq 192(%rsp), %r11
+ movq 200(%rsp), %rdx
+ movl %esi, (%rdi)
+ movl %r8d, (%r9)
+ movq 208(%rsp), %rsi
+ movq 216(%rsp), %r8
+ movl 32(%rsp), %r10d
+ movl 36(%rsp), %eax
+ movl 40(%rsp), %ecx
+ movl 44(%rsp), %edi
+ movl %r10d, (%r11)
+ movl %eax, (%rdx)
+ movq 224(%rsp), %r10
+ movq 232(%rsp), %rax
+ movl %ecx, (%rsi)
+ movl %edi, (%r8)
+ movq 240(%rsp), %rcx
+ movq 248(%rsp), %rdi
+ movl 48(%rsp), %r9d
+ movl 52(%rsp), %r11d
+ movl 56(%rsp), %edx
+ movl 60(%rsp), %esi
+ movl %r9d, (%r10)
+ movl %r11d, (%rax)
+ movq 256(%rsp), %r9
+ movq 264(%rsp), %r11
+ movl %edx, (%rcx)
+ movl %esi, (%rdi)
+ movq 272(%rsp), %rdx
+ movq 280(%rsp), %rsi
+ movl 64(%rsp), %r8d
+ movl 68(%rsp), %r10d
+ movl 72(%rsp), %eax
+ movl 76(%rsp), %ecx
+ movl %r8d, (%r9)
+ movl %r10d, (%r11)
+ movq 288(%rsp), %r8
+ movq 296(%rsp), %r10
+ movl %eax, (%rdx)
+ movl %ecx, (%rsi)
+ movq 304(%rsp), %rax
+ movq 312(%rsp), %rcx
+ movl 80(%rsp), %edi
+ movl 84(%rsp), %r9d
+ movl 88(%rsp), %r11d
+ movl 92(%rsp), %edx
+ movl %edi, (%r8)
+ movl %r9d, (%r10)
+ movq 320(%rsp), %rdi
+ movq 328(%rsp), %r9
+ movl %r11d, (%rax)
+ movl %edx, (%rcx)
+ movq 336(%rsp), %r11
+ movq 344(%rsp), %rdx
+ movl 96(%rsp), %esi
+ movl 100(%rsp), %r8d
+ movl 104(%rsp), %r10d
+ movl 108(%rsp), %eax
+ movl %esi, (%rdi)
+ movl %r8d, (%r9)
+ movq 352(%rsp), %rsi
+ movq 360(%rsp), %r8
+ movl %r10d, (%r11)
+ movl %eax, (%rdx)
+ movq 368(%rsp), %r10
+ movq 376(%rsp), %rax
+ movl 112(%rsp), %ecx
+ movl 116(%rsp), %edi
+ movl 120(%rsp), %r9d
+ movl 124(%rsp), %r11d
+ movl %ecx, (%rsi)
+ movl %edi, (%r8)
+ movl %r9d, (%r10)
+ movl %r11d, (%rax)
+ movq %rbp, %rsp
+ cfi_def_cfa_register (%rsp)
+ popq %rbp
+ cfi_adjust_cfa_offset (-8)
+ cfi_restore (%rbp)
+ ret
+#else
+ leal 8(%rsp), %r10d
+ .cfi_def_cfa 10, 0
+ andl $-64, %esp
+ pushq -8(%r10d)
+ pushq %rbp
+ .cfi_escape 0x10,0x6,0x2,0x76,0
+ movl %esp, %ebp
+ pushq %r10
+ .cfi_escape 0xf,0x3,0x76,0x78,0x6
+ leal -112(%rbp), %esi
+ leal -176(%rbp), %edi
+ subl $296, %esp
+ vmovdqa64 %zmm1, -240(%ebp)
+ vmovdqa64 %zmm2, -304(%ebp)
+ call HIDDEN_JUMPTARGET(\callee)
+ movl -240(%ebp), %eax
+ vmovss -176(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -236(%ebp), %eax
+ vmovss -172(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -232(%ebp), %eax
+ vmovss -168(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -228(%ebp), %eax
+ vmovss -164(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -224(%ebp), %eax
+ vmovss -160(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -220(%ebp), %eax
+ vmovss -156(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -216(%ebp), %eax
+ vmovss -152(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -212(%ebp), %eax
+ vmovss -148(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -208(%ebp), %eax
+ vmovss -144(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -204(%ebp), %eax
+ vmovss -140(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -200(%ebp), %eax
+ vmovss -136(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -196(%ebp), %eax
+ vmovss -132(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -192(%ebp), %eax
+ vmovss -128(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -188(%ebp), %eax
+ vmovss -124(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -184(%ebp), %eax
+ vmovss -120(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -180(%ebp), %eax
+ vmovss -116(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -304(%ebp), %eax
+ vmovss -112(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -300(%ebp), %eax
+ vmovss -108(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -296(%ebp), %eax
+ vmovss -104(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -292(%ebp), %eax
+ vmovss -100(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -288(%ebp), %eax
+ vmovss -96(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -284(%ebp), %eax
+ vmovss -92(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -280(%ebp), %eax
+ vmovss -88(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -276(%ebp), %eax
+ vmovss -84(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -272(%ebp), %eax
+ vmovss -80(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -268(%ebp), %eax
+ vmovss -76(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -264(%ebp), %eax
+ vmovss -72(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -260(%ebp), %eax
+ vmovss -68(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -256(%ebp), %eax
+ vmovss -64(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -252(%ebp), %eax
+ vmovss -60(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -248(%ebp), %eax
+ vmovss -56(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -244(%ebp), %eax
+ vmovss -52(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ addl $296, %esp
+ popq %r10
+ .cfi_def_cfa 10, 0
+ popq %rbp
+ leal -8(%r10), %esp
+ .cfi_def_cfa 7, 8
+ ret
+#endif
+.endm
+
+ENTRY (_ZGVeN16vvv_sincosf_knl)
+WRAPPER_AVX512_vvv_vl4l4 _ZGVeN16vl4l4_sincosf_knl
+END (_ZGVeN16vvv_sincosf_knl)
+
+ENTRY (_ZGVeN16vvv_sincosf_skx)
+WRAPPER_AVX512_vvv_vl4l4 _ZGVeN16vl4l4_sincosf_skx
END (_ZGVeN16vvv_sincosf_skx)
.section .rodata, "a"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core-sse2.S
new file mode 100644
index 0000000000..cc718b3a2e
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core-sse2.S
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized sincosf.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVbN4vvv_sincosf _ZGVbN4vvv_sincosf_sse2
+#include "../svml_s_sincosf4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.S
deleted file mode 100644
index d72b4049e2..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized sincosf.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVbN4vvv_sincosf)
- .type _ZGVbN4vvv_sincosf, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVbN4vvv_sincosf_sse4(%rip), %rax
- HAS_CPU_FEATURE (SSE4_1)
- jz 2f
- ret
-2: leaq _ZGVbN4vvv_sincosf_sse2(%rip), %rax
- ret
-END (_ZGVbN4vvv_sincosf)
-libmvec_hidden_def (_ZGVbN4vvv_sincosf)
-
-#define _ZGVbN4vvv_sincosf _ZGVbN4vvv_sincosf_sse2
-#include "../svml_s_sincosf4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.c
new file mode 100644
index 0000000000..705d96a8fb
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized sincosf, vector length is 4.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVbN4vvv_sincosf
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN4vvv_sincosf, __GI__ZGVbN4vvv_sincosf,
+ __redirect__ZGVbN4vvv_sincosf)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S
index 643fc0ca3b..d758ceeb30 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S
@@ -1,5 +1,5 @@
/* Function sincosf vectorized with SSE4.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -20,7 +20,7 @@
#include "svml_s_trig_data.h"
.text
-ENTRY (_ZGVbN4vvv_sincosf_sse4)
+ENTRY (_ZGVbN4vl4l4_sincosf_sse4)
/*
ALGORITHM DESCRIPTION:
@@ -42,7 +42,7 @@ ENTRY (_ZGVbN4vvv_sincosf_sse4)
b) Calculate 2 polynomials for sin and cos:
RS = X * ( A0 + X^2 * (A1 + x^2 * (A2 + x^2 * (A3))));
RC = B0 + X^2 * (B1 + x^2 * (B2 + x^2 * (B3 + x^2 * (B4))));
- c) Swap RS & RC if if first bit of obtained value after
+ c) Swap RS & RC if first bit of obtained value after
Right Shifting is set to 1. Using And, Andnot & Or operations.
3) Destination sign setting
a) Set shifted destination sign using XOR operation:
@@ -241,12 +241,12 @@ ENTRY (_ZGVbN4vvv_sincosf_sse4)
movzbl %r12b, %r15d
movss 132(%rsp,%r15,8), %xmm0
- call sinf@PLT
+ call JUMPTARGET(sinf)
movss %xmm0, 196(%rsp,%r15,8)
movss 132(%rsp,%r15,8), %xmm0
- call cosf@PLT
+ call JUMPTARGET(cosf)
movss %xmm0, 260(%rsp,%r15,8)
jmp .LBL_1_8
@@ -255,14 +255,92 @@ ENTRY (_ZGVbN4vvv_sincosf_sse4)
movzbl %r12b, %r15d
movss 128(%rsp,%r15,8), %xmm0
- call sinf@PLT
+ call JUMPTARGET(sinf)
movss %xmm0, 192(%rsp,%r15,8)
movss 128(%rsp,%r15,8), %xmm0
- call cosf@PLT
+ call JUMPTARGET(cosf)
movss %xmm0, 256(%rsp,%r15,8)
jmp .LBL_1_7
+END (_ZGVbN4vl4l4_sincosf_sse4)
+libmvec_hidden_def(_ZGVbN4vl4l4_sincosf_sse4)
+
+/* vvv version implemented with wrapper to vl4l4 variant. */
+ENTRY (_ZGVbN4vvv_sincosf_sse4)
+#ifndef __ILP32__
+ subq $104, %rsp
+ .cfi_def_cfa_offset 112
+ movdqu %xmm1, 32(%rsp)
+ lea (%rsp), %rdi
+ movdqu %xmm2, 48(%rdi)
+ lea 16(%rsp), %rsi
+ movdqu %xmm3, 48(%rsi)
+ movdqu %xmm4, 64(%rsi)
+ call HIDDEN_JUMPTARGET(_ZGVbN4vl4l4_sincosf_sse4)
+ movq 32(%rsp), %rdx
+ movq 40(%rsp), %rsi
+ movq 48(%rsp), %r8
+ movq 56(%rsp), %r10
+ movl (%rsp), %eax
+ movl 4(%rsp), %ecx
+ movl 8(%rsp), %edi
+ movl 12(%rsp), %r9d
+ movl %eax, (%rdx)
+ movl %ecx, (%rsi)
+ movq 64(%rsp), %rax
+ movq 72(%rsp), %rcx
+ movl %edi, (%r8)
+ movl %r9d, (%r10)
+ movq 80(%rsp), %rdi
+ movq 88(%rsp), %r9
+ movl 16(%rsp), %r11d
+ movl 20(%rsp), %edx
+ movl 24(%rsp), %esi
+ movl 28(%rsp), %r8d
+ movl %r11d, (%rax)
+ movl %edx, (%rcx)
+ movl %esi, (%rdi)
+ movl %r8d, (%r9)
+ addq $104, %rsp
+ .cfi_def_cfa_offset 8
+ ret
+#else
+ subl $72, %esp
+ .cfi_def_cfa_offset 80
+ leal 48(%rsp), %esi
+ movaps %xmm1, 16(%esp)
+ leal 32(%rsp), %edi
+ movaps %xmm2, (%esp)
+ call HIDDEN_JUMPTARGET(_ZGVbN4vl4l4_sincosf_sse4)
+ movl 16(%esp), %eax
+ movss 32(%esp), %xmm0
+ movss %xmm0, (%eax)
+ movl 20(%esp), %eax
+ movss 36(%esp), %xmm0
+ movss %xmm0, (%eax)
+ movl 24(%esp), %eax
+ movss 40(%esp), %xmm0
+ movss %xmm0, (%eax)
+ movl 28(%esp), %eax
+ movss 44(%esp), %xmm0
+ movss %xmm0, (%eax)
+ movl (%esp), %eax
+ movss 48(%esp), %xmm0
+ movss %xmm0, (%eax)
+ movl 4(%esp), %eax
+ movss 52(%esp), %xmm0
+ movss %xmm0, (%eax)
+ movl 8(%esp), %eax
+ movss 56(%esp), %xmm0
+ movss %xmm0, (%eax)
+ movl 12(%esp), %eax
+ movss 60(%esp), %xmm0
+ movss %xmm0, (%eax)
+ addl $72, %esp
+ .cfi_def_cfa_offset 8
+ ret
+#endif
END (_ZGVbN4vvv_sincosf_sse4)
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core-sse.S
new file mode 100644
index 0000000000..348d1e6619
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core-sse.S
@@ -0,0 +1,20 @@
+/* SSE version of vectorized sincosf.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVdN8vvv_sincosf _ZGVdN8vvv_sincosf_sse_wrapper
+#include "../svml_s_sincosf8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.S
deleted file mode 100644
index 0123b8024e..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized sincosf.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVdN8vvv_sincosf)
- .type _ZGVdN8vvv_sincosf, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVdN8vvv_sincosf_avx2(%rip), %rax
- HAS_ARCH_FEATURE (AVX2_Usable)
- jz 2f
- ret
-2: leaq _ZGVdN8vvv_sincosf_sse_wrapper(%rip), %rax
- ret
-END (_ZGVdN8vvv_sincosf)
-libmvec_hidden_def (_ZGVdN8vvv_sincosf)
-
-#define _ZGVdN8vvv_sincosf _ZGVdN8vvv_sincosf_sse_wrapper
-#include "../svml_s_sincosf8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.c
new file mode 100644
index 0000000000..74f3d3f041
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized sincosf, vector length is 8.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVdN8vvv_sincosf
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN8vvv_sincosf, __GI__ZGVdN8vvv_sincosf,
+ __redirect__ZGVdN8vvv_sincosf)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S
index f2a0ba7116..8b4b92dd94 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S
@@ -1,5 +1,5 @@
/* Function sincosf vectorized with AVX2.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -20,7 +20,7 @@
#include "svml_s_trig_data.h"
.text
-ENTRY(_ZGVdN8vvv_sincosf_avx2)
+ENTRY (_ZGVdN8vl4l4_sincosf_avx2)
/*
ALGORITHM DESCRIPTION:
@@ -42,7 +42,7 @@ ENTRY(_ZGVdN8vvv_sincosf_avx2)
b) Calculate 2 polynomials for sin and cos:
RS = X * ( A0 + X^2 * (A1 + x^2 * (A2 + x^2 * (A3))));
RC = B0 + X^2 * (B1 + x^2 * (B2 + x^2 * (B3 + x^2 * (B4))));
- c) Swap RS & RC if if first bit of obtained value after
+ c) Swap RS & RC if first bit of obtained value after
Right Shifting is set to 1. Using And, Andnot & Or operations.
3) Destination sign setting
a) Set shifted destination sign using XOR operation:
@@ -213,12 +213,12 @@ ENTRY(_ZGVdN8vvv_sincosf_avx2)
vmovss 260(%rsp,%r15,8), %xmm0
vzeroupper
- call sinf@PLT
+ call JUMPTARGET(sinf)
vmovss %xmm0, 324(%rsp,%r15,8)
vmovss 260(%rsp,%r15,8), %xmm0
- call cosf@PLT
+ call JUMPTARGET(cosf)
vmovss %xmm0, 388(%rsp,%r15,8)
jmp .LBL_1_8
@@ -228,14 +228,162 @@ ENTRY(_ZGVdN8vvv_sincosf_avx2)
vmovss 256(%rsp,%r15,8), %xmm0
vzeroupper
- call sinf@PLT
+ call JUMPTARGET(sinf)
vmovss %xmm0, 320(%rsp,%r15,8)
vmovss 256(%rsp,%r15,8), %xmm0
- call cosf@PLT
+ call JUMPTARGET(cosf)
vmovss %xmm0, 384(%rsp,%r15,8)
jmp .LBL_1_7
-END(_ZGVdN8vvv_sincosf_avx2)
+END (_ZGVdN8vl4l4_sincosf_avx2)
+libmvec_hidden_def(_ZGVdN8vl4l4_sincosf_avx2)
+
+/* vvv version implemented with wrapper to vl4l4 variant. */
+ENTRY (_ZGVdN8vvv_sincosf_avx2)
+#ifndef __ILP32__
+ pushq %rbp
+ cfi_adjust_cfa_offset (8)
+ cfi_rel_offset (%rbp, 0)
+ movq %rsp, %rbp
+ cfi_def_cfa_register (%rbp)
+ andq $-32, %rsp
+ subq $192, %rsp
+ vmovdqu %ymm1, 64(%rsp)
+ lea (%rsp), %rdi
+ vmovdqu %ymm2, 96(%rdi)
+ vmovdqu %ymm3, 128(%rdi)
+ vmovdqu %ymm4, 160(%rdi)
+ lea 32(%rsp), %rsi
+ call HIDDEN_JUMPTARGET(_ZGVdN8vl4l4_sincosf_avx2)
+ movq 64(%rsp), %rdx
+ movq 72(%rsp), %rsi
+ movq 80(%rsp), %r8
+ movq 88(%rsp), %r10
+ movl (%rsp), %eax
+ movl 4(%rsp), %ecx
+ movl 8(%rsp), %edi
+ movl 12(%rsp), %r9d
+ movl %eax, (%rdx)
+ movl %ecx, (%rsi)
+ movq 96(%rsp), %rax
+ movq 104(%rsp), %rcx
+ movl %edi, (%r8)
+ movl %r9d, (%r10)
+ movq 112(%rsp), %rdi
+ movq 120(%rsp), %r9
+ movl 16(%rsp), %r11d
+ movl 20(%rsp), %edx
+ movl 24(%rsp), %esi
+ movl 28(%rsp), %r8d
+ movl %r11d, (%rax)
+ movl %edx, (%rcx)
+ movq 128(%rsp), %r11
+ movq 136(%rsp), %rdx
+ movl %esi, (%rdi)
+ movl %r8d, (%r9)
+ movq 144(%rsp), %rsi
+ movq 152(%rsp), %r8
+ movl 32(%rsp), %r10d
+ movl 36(%rsp), %eax
+ movl 40(%rsp), %ecx
+ movl 44(%rsp), %edi
+ movl %r10d, (%r11)
+ movl %eax, (%rdx)
+ movq 160(%rsp), %r10
+ movq 168(%rsp), %rax
+ movl %ecx, (%rsi)
+ movl %edi, (%r8)
+ movq 176(%rsp), %rcx
+ movq 184(%rsp), %rdi
+ movl 48(%rsp), %r9d
+ movl 52(%rsp), %r11d
+ movl 56(%rsp), %edx
+ movl 60(%rsp), %esi
+ movl %r9d, (%r10)
+ movl %r11d, (%rax)
+ movl %edx, (%rcx)
+ movl %esi, (%rdi)
+ movq %rbp, %rsp
+ cfi_def_cfa_register (%rsp)
+ popq %rbp
+ cfi_adjust_cfa_offset (-8)
+ cfi_restore (%rbp)
+ ret
+#else
+ leal 8(%rsp), %r10d
+ .cfi_def_cfa 10, 0
+ andl $-32, %esp
+ pushq -8(%r10d)
+ pushq %rbp
+ .cfi_escape 0x10,0x6,0x2,0x76,0
+ movl %esp, %ebp
+ pushq %r10
+ .cfi_escape 0xf,0x3,0x76,0x78,0x6
+ leal -48(%rbp), %esi
+ leal -80(%rbp), %edi
+ subl $136, %esp
+ vmovdqa %ymm1, -112(%ebp)
+ vmovdqa %ymm2, -144(%ebp)
+ call HIDDEN_JUMPTARGET(_ZGVdN8vl4l4_sincosf_avx2)
+ vmovdqa -112(%ebp), %xmm0
+ vmovq %xmm0, %rax
+ vmovss -80(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ vmovss -76(%ebp), %xmm0
+ shrq $32, %rax
+ vmovss %xmm0, (%eax)
+ movq -104(%ebp), %rax
+ vmovss -72(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ vmovss -68(%ebp), %xmm0
+ shrq $32, %rax
+ vmovss %xmm0, (%eax)
+ movq -96(%ebp), %rax
+ vmovss -64(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ vmovss -60(%ebp), %xmm0
+ shrq $32, %rax
+ vmovss %xmm0, (%eax)
+ movq -88(%ebp), %rax
+ vmovss -56(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ vmovss -52(%ebp), %xmm0
+ shrq $32, %rax
+ vmovss %xmm0, (%eax)
+ vmovdqa -144(%ebp), %xmm0
+ vmovq %xmm0, %rax
+ vmovss -48(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ vmovss -44(%ebp), %xmm0
+ shrq $32, %rax
+ vmovss %xmm0, (%eax)
+ movq -136(%ebp), %rax
+ vmovss -40(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ vmovss -36(%ebp), %xmm0
+ shrq $32, %rax
+ vmovss %xmm0, (%eax)
+ movq -128(%ebp), %rax
+ vmovss -32(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ vmovss -28(%ebp), %xmm0
+ shrq $32, %rax
+ vmovss %xmm0, (%eax)
+ movq -120(%ebp), %rax
+ vmovss -24(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ vmovss -20(%ebp), %xmm0
+ shrq $32, %rax
+ vmovss %xmm0, (%eax)
+ addl $136, %esp
+ popq %r10
+ .cfi_def_cfa 10, 0
+ popq %rbp
+ leal -8(%r10), %esp
+ .cfi_def_cfa 7, 8
+ ret
+#endif
+END (_ZGVdN8vvv_sincosf_avx2)
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core-avx2.S
new file mode 100644
index 0000000000..fa521b9dac
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core-avx2.S
@@ -0,0 +1,20 @@
+/* AVX2 version of vectorized sinf.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVeN16v_sinf _ZGVeN16v_sinf_avx2_wrapper
+#include "../svml_s_sinf16_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S
deleted file mode 100644
index 2212cdd94d..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Multiple versions of vectorized sinf.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVeN16v_sinf)
- .type _ZGVeN16v_sinf, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVeN16v_sinf_skx(%rip), %rax
- HAS_ARCH_FEATURE (AVX512DQ_Usable)
- jnz 2f
- leaq _ZGVeN16v_sinf_knl(%rip), %rax
- HAS_ARCH_FEATURE (AVX512F_Usable)
- jnz 2f
- leaq _ZGVeN16v_sinf_avx2_wrapper(%rip), %rax
-2: ret
-END (_ZGVeN16v_sinf)
-
-#define _ZGVeN16v_sinf _ZGVeN16v_sinf_avx2_wrapper
-#include "../svml_s_sinf16_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.c
new file mode 100644
index 0000000000..97e5b58284
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized sinf, vector length is 16.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVeN16v_sinf
+#include "ifunc-mathvec-avx512.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN16v_sinf, __GI__ZGVeN16v_sinf,
+ __redirect__ZGVeN16v_sinf)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S
index 61d8d3793a..141f747eb5 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S
@@ -1,5 +1,5 @@
/* Function sinf vectorized with AVX-512. KNL and SKX versions.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,7 +22,7 @@
.text
ENTRY(_ZGVeN16v_sinf_knl)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
WRAPPER_IMPL_AVX512 _ZGVdN8v_sinf
#else
/*
@@ -229,21 +229,21 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_sinf
cfi_restore_state
movzbl %r12b, %r15d
vmovss 1156(%rsp,%r15,8), %xmm0
- call sinf@PLT
+ call JUMPTARGET(sinf)
vmovss %xmm0, 1220(%rsp,%r15,8)
jmp .LBL_1_8
.LBL_1_12:
movzbl %r12b, %r15d
vmovss 1152(%rsp,%r15,8), %xmm0
- call sinf@PLT
+ call JUMPTARGET(sinf)
vmovss %xmm0, 1216(%rsp,%r15,8)
jmp .LBL_1_7
#endif
END(_ZGVeN16v_sinf_knl)
ENTRY (_ZGVeN16v_sinf_skx)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
WRAPPER_IMPL_AVX512 _ZGVdN8v_sinf
#else
/*
@@ -455,7 +455,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_sinf
vzeroupper
vmovss 1156(%rsp,%r15,8), %xmm0
- call sinf@PLT
+ call JUMPTARGET(sinf)
vmovss %xmm0, 1220(%rsp,%r15,8)
jmp .LBL_2_8
@@ -466,7 +466,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_sinf
vzeroupper
vmovss 1152(%rsp,%r15,8), %xmm0
- call sinf@PLT
+ call JUMPTARGET(sinf)
vmovss %xmm0, 1216(%rsp,%r15,8)
jmp .LBL_2_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core-sse2.S
new file mode 100644
index 0000000000..1d2e65c39d
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core-sse2.S
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized sinf.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVbN4v_sinf _ZGVbN4v_sinf_sse2
+#include "../svml_s_sinf4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.S
deleted file mode 100644
index b31554730d..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized sinf.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVbN4v_sinf)
- .type _ZGVbN4v_sinf, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVbN4v_sinf_sse4(%rip), %rax
- HAS_CPU_FEATURE (SSE4_1)
- jz 2f
- ret
-2: leaq _ZGVbN4v_sinf_sse2(%rip), %rax
- ret
-END (_ZGVbN4v_sinf)
-libmvec_hidden_def (_ZGVbN4v_sinf)
-
-#define _ZGVbN4v_sinf _ZGVbN4v_sinf_sse2
-#include "../svml_s_sinf4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.c
new file mode 100644
index 0000000000..93b8bfebbf
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized sinf, vector length is 4.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVbN4v_sinf
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN4v_sinf, __GI__ZGVbN4v_sinf,
+ __redirect__ZGVbN4v_sinf)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core_sse4.S
index 5268ab1f09..39a4c92235 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core_sse4.S
@@ -1,5 +1,5 @@
/* Function sinf vectorized with SSE4.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -207,7 +207,7 @@ ENTRY(_ZGVbN4v_sinf_sse4)
movzbl %r12b, %r15d
movss 196(%rsp,%r15,8), %xmm0
- call sinf@PLT
+ call JUMPTARGET(sinf)
movss %xmm0, 260(%rsp,%r15,8)
jmp .LBL_1_8
@@ -216,7 +216,7 @@ ENTRY(_ZGVbN4v_sinf_sse4)
movzbl %r12b, %r15d
movss 192(%rsp,%r15,8), %xmm0
- call sinf@PLT
+ call JUMPTARGET(sinf)
movss %xmm0, 256(%rsp,%r15,8)
jmp .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core-sse.S
new file mode 100644
index 0000000000..f2af3a0b4b
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core-sse.S
@@ -0,0 +1,20 @@
+/* SSE version of vectorized sinf, vector length is 8.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _ZGVdN8v_sinf _ZGVdN8v_sinf_sse_wrapper
+#include "../svml_s_sinf8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.S
deleted file mode 100644
index 47fe0a4adc..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized sinf, vector length is 8.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVdN8v_sinf)
- .type _ZGVdN8v_sinf, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
-1: leaq _ZGVdN8v_sinf_avx2(%rip), %rax
- HAS_ARCH_FEATURE (AVX2_Usable)
- jz 2f
- ret
-2: leaq _ZGVdN8v_sinf_sse_wrapper(%rip), %rax
- ret
-END (_ZGVdN8v_sinf)
-libmvec_hidden_def (_ZGVdN8v_sinf)
-
-#define _ZGVdN8v_sinf _ZGVdN8v_sinf_sse_wrapper
-#include "../svml_s_sinf8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.c
new file mode 100644
index 0000000000..cf13b6647c
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized sinf, vector length is 8.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SYMBOL_NAME _ZGVdN8v_sinf
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN8v_sinf, __GI__ZGVdN8v_sinf,
+ __redirect__ZGVdN8v_sinf)
+ __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core_avx2.S
index 9fdaadb2e8..5f7a95e9ad 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core_avx2.S
@@ -1,5 +1,5 @@
/* Function sinf vectorized with AVX2.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -201,7 +201,7 @@ ENTRY(_ZGVdN8v_sinf_avx2)
vmovss 324(%rsp,%r15,8), %xmm0
vzeroupper
- call sinf@PLT
+ call JUMPTARGET(sinf)
vmovss %xmm0, 388(%rsp,%r15,8)
jmp .LBL_1_8
@@ -211,7 +211,7 @@ ENTRY(_ZGVdN8v_sinf_avx2)
vmovss 320(%rsp,%r15,8), %xmm0
vzeroupper
- call sinf@PLT
+ call JUMPTARGET(sinf)
vmovss %xmm0, 384(%rsp,%r15,8)
jmp .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/printf_fphex.c b/sysdeps/x86_64/fpu/printf_fphex.c
index 0fbaa3748e..62efed10da 100644
--- a/sysdeps/x86_64/fpu/printf_fphex.c
+++ b/sysdeps/x86_64/fpu/printf_fphex.c
@@ -1,5 +1,5 @@
/* Print floating point number in hexadecimal notation according to ISO C99.
- Copyright (C) 1997-2016 Free Software Foundation, Inc.
+ Copyright (C) 1997-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/s_ceill.S b/sysdeps/x86_64/fpu/s_ceill.S
index 910c371d58..8f2bd351f6 100644
--- a/sysdeps/x86_64/fpu/s_ceill.S
+++ b/sysdeps/x86_64/fpu/s_ceill.S
@@ -5,27 +5,33 @@
* Public domain.
*/
+#include <libm-alias-ldouble.h>
#include <machine/asm.h>
ENTRY(__ceill)
fldt 8(%rsp)
- fstcw -4(%rsp) /* store fpu control word */
+ fnstenv -28(%rsp) /* store fpu environment */
/* We use here %edx although only the low 1 bits are defined.
But none of the operations should care and they are faster
than the 16 bit operations. */
movl $0x0800,%edx /* round towards +oo */
- orl -4(%rsp),%edx
+ orl -28(%rsp),%edx
andl $0xfbff,%edx
- movl %edx,-8(%rsp)
- fldcw -8(%rsp) /* load modified control word */
+ movl %edx,-32(%rsp)
+ fldcw -32(%rsp) /* load modified control word */
frndint /* round */
- fldcw -4(%rsp) /* restore original control word */
+ /* Preserve "invalid" exceptions from sNaN input. */
+ fnstsw
+ andl $0x1, %eax
+ orl %eax, -24(%rsp)
+
+ fldenv -28(%rsp) /* restore original environment */
ret
END (__ceill)
-weak_alias (__ceill, ceill)
+libm_alias_ldouble (__ceil, ceil)
diff --git a/sysdeps/x86_64/fpu/s_copysign.S b/sysdeps/x86_64/fpu/s_copysign.S
index 18f568f46f..e2921ce770 100644
--- a/sysdeps/x86_64/fpu/s_copysign.S
+++ b/sysdeps/x86_64/fpu/s_copysign.S
@@ -1,5 +1,5 @@
/* copy sign, double version.
- Copyright (C) 2002-2016 Free Software Foundation, Inc.
+ Copyright (C) 2002-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 2002.
@@ -18,6 +18,7 @@
<http://www.gnu.org/licenses/>. */
#include <machine/asm.h>
+#include <libm-alias-double.h>
.section .rodata.cst16,"aM",@progbits,16
@@ -47,4 +48,4 @@ ENTRY(__copysign)
ret
END (__copysign)
-weak_alias (__copysign, copysign)
+libm_alias_double (__copysign, copysign)
diff --git a/sysdeps/x86_64/fpu/s_copysignf.S b/sysdeps/x86_64/fpu/s_copysignf.S
index 00a1fabaee..4093e781fe 100644
--- a/sysdeps/x86_64/fpu/s_copysignf.S
+++ b/sysdeps/x86_64/fpu/s_copysignf.S
@@ -1,5 +1,5 @@
/* copy sign, double version.
- Copyright (C) 2002-2016 Free Software Foundation, Inc.
+ Copyright (C) 2002-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 2002.
@@ -18,6 +18,7 @@
<http://www.gnu.org/licenses/>. */
#include <machine/asm.h>
+#include <libm-alias-float.h>
.section .rodata
@@ -42,4 +43,4 @@ ENTRY(__copysignf)
retq
END (__copysignf)
-weak_alias (__copysignf, copysignf)
+libm_alias_float (__copysign, copysign)
diff --git a/sysdeps/x86_64/fpu/s_copysignl.S b/sysdeps/x86_64/fpu/s_copysignl.S
index 2ffd612d65..8616205d38 100644
--- a/sysdeps/x86_64/fpu/s_copysignl.S
+++ b/sysdeps/x86_64/fpu/s_copysignl.S
@@ -5,6 +5,7 @@
* Public domain.
*/
+#include <libm-alias-ldouble.h>
#include <machine/asm.h>
RCSID("$NetBSD: $")
@@ -19,4 +20,4 @@ ENTRY(__copysignl)
fldt 8(%rsp)
ret
END (__copysignl)
-weak_alias (__copysignl, copysignl)
+libm_alias_ldouble (__copysign, copysign)
diff --git a/sysdeps/x86_64/fpu/s_cosf.S b/sysdeps/x86_64/fpu/s_cosf.S
deleted file mode 100644
index 31968e498f..0000000000
--- a/sysdeps/x86_64/fpu/s_cosf.S
+++ /dev/null
@@ -1,533 +0,0 @@
-/* Optimized cosf function.
- Copyright (C) 2012-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#define __need_Emath
-#include <bits/errno.h>
-
-/* Short algorithm description:
- *
- * 1) if |x| == 0: return 1.0-|x|.
- * 2) if |x| < 2^-27: return 1.0-|x|.
- * 3) if |x| < 2^-5 : return 1.0+x^2*DP_COS2_0+x^5*DP_COS2_1.
- * 4) if |x| < Pi/4: return 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))).
- * 5) if |x| < 9*Pi/4:
- * 5.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+3,
- * t=|x|-j*Pi/4.
- * 5.2) Reconstruction:
- * s = (-1.0)^((n>>2)&1)
- * if(n&2 != 0) {
- * using cos(t) polynomial for |t|<Pi/4, result is
- * s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))).
- * } else {
- * using sin(t) polynomial for |t|<Pi/4, result is
- * s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))).
- * }
- * 6) if |x| < 2^23, large args:
- * 6.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+3,
- * t=|x|-j*Pi/4.
- * 6.2) Reconstruction same as (5.2).
- * 7) if |x| >= 2^23, very large args:
- * 7.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+3,
- * t=|x|-j*Pi/4.
- * 7.2) Reconstruction same as (5.2).
- * 8) if x is Inf, return x-x, and set errno=EDOM.
- * 9) if x is NaN, return x-x.
- *
- * Special cases:
- * cos(+-0) = 1 not raising inexact,
- * cos(subnormal) raises inexact,
- * cos(min_normalized) raises inexact,
- * cos(normalized) raises inexact,
- * cos(Inf) = NaN, raises invalid, sets errno to EDOM,
- * cos(NaN) = NaN.
- */
-
- .text
-ENTRY(__cosf)
- /* Input: single precision x in %xmm0 */
-
- movd %xmm0, %eax /* Bits of x */
- movaps %xmm0, %xmm7 /* Copy of x */
- cvtss2sd %xmm0, %xmm0 /* DP x */
- movss L(SP_ABS_MASK)(%rip), %xmm3
- andl $0x7fffffff, %eax /* |x| */
-
- cmpl $0x3f490fdb, %eax /* |x|<Pi/4? */
- jb L(arg_less_pio4)
-
- /* Here if |x|>=Pi/4 */
- andps %xmm7, %xmm3 /* SP |x| */
- andpd L(DP_ABS_MASK)(%rip), %xmm0 /* DP |x| */
- movss L(SP_INVPIO4)(%rip), %xmm2 /* SP 1/(Pi/4) */
-
- cmpl $0x40e231d6, %eax /* |x|<9*Pi/4? */
- jae L(large_args)
-
- /* Here if Pi/4<=|x|<9*Pi/4 */
- mulss %xmm3, %xmm2 /* SP |x|/(Pi/4) */
- cvttss2si %xmm2, %eax /* k, number of Pi/4 in x */
- lea L(PIO4J)(%rip), %rsi
- addl $1, %eax /* k+1 */
- movl $0x0e, %edx
- andl %eax, %edx /* j = (k+1)&0x0e */
- addl $2, %eax /* n */
- subsd (%rsi,%rdx,8), %xmm0 /* t = |x| - j * Pi/4 */
-
-L(reconstruction):
- /* Input: %eax=n, %xmm0=t */
- testl $2, %eax /* n&2 != 0? */
- jz L(sin_poly)
-
-/*L(cos_poly):*/
- /* Here if cos(x) calculated using cos(t) polynomial for |t|<Pi/4:
- * y = t*t; z = y*y;
- * s = sign(x) * (-1.0)^((n>>2)&1)
- * result = s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))))
- */
- shrl $2, %eax /* n>>2 */
- mulsd %xmm0, %xmm0 /* y=t^2 */
- andl $1, %eax /* (n>>2)&1 */
- movaps %xmm0, %xmm1 /* y */
- mulsd %xmm0, %xmm0 /* z=t^4 */
-
- movsd L(DP_C4)(%rip), %xmm4 /* C4 */
- mulsd %xmm0, %xmm4 /* z*C4 */
- movsd L(DP_C3)(%rip), %xmm3 /* C3 */
- mulsd %xmm0, %xmm3 /* z*C3 */
- lea L(DP_ONES)(%rip), %rsi
- addsd L(DP_C2)(%rip), %xmm4 /* C2+z*C4 */
- mulsd %xmm0, %xmm4 /* z*(C2+z*C4) */
- addsd L(DP_C1)(%rip), %xmm3 /* C1+z*C3 */
- mulsd %xmm0, %xmm3 /* z*(C1+z*C3) */
- addsd L(DP_C0)(%rip), %xmm4 /* C0+z*(C2+z*C4) */
- mulsd %xmm1, %xmm4 /* y*(C0+z*(C2+z*C4)) */
-
- addsd %xmm4, %xmm3 /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
- /* 1.0+y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
- addsd L(DP_ONES)(%rip), %xmm3
-
- mulsd (%rsi,%rax,8), %xmm3 /* DP result */
- cvtsd2ss %xmm3, %xmm0 /* SP result */
- ret
-
- .p2align 4
-L(sin_poly):
- /* Here if cos(x) calculated using sin(t) polynomial for |t|<Pi/4:
- * y = t*t; z = y*y;
- * s = sign(x) * (-1.0)^((n>>2)&1)
- * result = s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))))
- */
-
- movaps %xmm0, %xmm4 /* t */
- shrl $2, %eax /* n>>2 */
- mulsd %xmm0, %xmm0 /* y=t^2 */
- andl $1, %eax /* (n>>2)&1 */
- movaps %xmm0, %xmm1 /* y */
- mulsd %xmm0, %xmm0 /* z=t^4 */
-
- movsd L(DP_S4)(%rip), %xmm2 /* S4 */
- mulsd %xmm0, %xmm2 /* z*S4 */
- movsd L(DP_S3)(%rip), %xmm3 /* S3 */
- mulsd %xmm0, %xmm3 /* z*S3 */
- lea L(DP_ONES)(%rip), %rsi
- addsd L(DP_S2)(%rip), %xmm2 /* S2+z*S4 */
- mulsd %xmm0, %xmm2 /* z*(S2+z*S4) */
- addsd L(DP_S1)(%rip), %xmm3 /* S1+z*S3 */
- mulsd %xmm0, %xmm3 /* z*(S1+z*S3) */
- addsd L(DP_S0)(%rip), %xmm2 /* S0+z*(S2+z*S4) */
- mulsd %xmm1, %xmm2 /* y*(S0+z*(S2+z*S4)) */
- /* t*s, where s = sign(x) * (-1.0)^((n>>2)&1) */
- mulsd (%rsi,%rax,8), %xmm4
- /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
- addsd %xmm2, %xmm3
- /* t*s*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
- mulsd %xmm4, %xmm3
- /* t*s*(1.0+y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
- addsd %xmm4, %xmm3
- cvtsd2ss %xmm3, %xmm0 /* SP result */
- ret
-
- .p2align 4
-L(large_args):
- /* Here if |x|>=9*Pi/4 */
- cmpl $0x7f800000, %eax /* x is Inf or NaN? */
- jae L(arg_inf_or_nan)
-
- /* Here if finite |x|>=9*Pi/4 */
- cmpl $0x4b000000, %eax /* |x|<2^23? */
- jae L(very_large_args)
-
- /* Here if 9*Pi/4<=|x|<2^23 */
- movsd L(DP_INVPIO4)(%rip), %xmm1 /* 1/(Pi/4) */
- mulsd %xmm0, %xmm1 /* |x|/(Pi/4) */
- cvttsd2si %xmm1, %eax /* k=trunc(|x|/(Pi/4)) */
- addl $1, %eax /* k+1 */
- movl %eax, %edx
- andl $0xfffffffe, %edx /* j=(k+1)&0xfffffffe */
- cvtsi2sdl %edx, %xmm4 /* DP j */
- movsd L(DP_PIO4HI)(%rip), %xmm2 /* -PIO4HI = high part of -Pi/4 */
- mulsd %xmm4, %xmm2 /* -j*PIO4HI */
- movsd L(DP_PIO4LO)(%rip), %xmm3 /* -PIO4LO = low part of -Pi/4 */
- addsd %xmm2, %xmm0 /* |x| - j*PIO4HI */
- addl $2, %eax /* n */
- mulsd %xmm3, %xmm4 /* j*PIO4LO */
- addsd %xmm4, %xmm0 /* t = |x| - j*PIO4HI - j*PIO4LO */
- jmp L(reconstruction)
-
- .p2align 4
-L(very_large_args):
- /* Here if finite |x|>=2^23 */
-
- /* bitpos = (ix>>23) - BIAS_32 + 59; */
- shrl $23, %eax /* eb = biased exponent of x */
- /* bitpos = eb - 0x7f + 59, where 0x7f is exponent bias */
- subl $68, %eax
- movl $28, %ecx /* %cl=28 */
- movl %eax, %edx /* bitpos copy */
-
- /* j = bitpos/28; */
- div %cl /* j in register %al=%ax/%cl */
- movapd %xmm0, %xmm3 /* |x| */
- /* clear unneeded remainder from %ah */
- andl $0xff, %eax
-
- imull $28, %eax, %ecx /* j*28 */
- lea L(_FPI)(%rip), %rsi
- movsd L(DP_HI_MASK)(%rip), %xmm4 /* DP_HI_MASK */
- movapd %xmm0, %xmm5 /* |x| */
- mulsd -16(%rsi,%rax,8), %xmm3 /* tmp3 = FPI[j-2]*|x| */
- movapd %xmm0, %xmm1 /* |x| */
- mulsd -8(%rsi,%rax,8), %xmm5 /* tmp2 = FPI[j-1]*|x| */
- mulsd (%rsi,%rax,8), %xmm0 /* tmp0 = FPI[j]*|x| */
- addl $19, %ecx /* j*28+19 */
- mulsd 8(%rsi,%rax,8), %xmm1 /* tmp1 = FPI[j+1]*|x| */
- cmpl %ecx, %edx /* bitpos>=j*28+19? */
- jl L(very_large_skip1)
-
- /* Here if bitpos>=j*28+19 */
- andpd %xmm3, %xmm4 /* HI(tmp3) */
- subsd %xmm4, %xmm3 /* tmp3 = tmp3 - HI(tmp3) */
-L(very_large_skip1):
-
- movsd L(DP_2POW52)(%rip), %xmm6
- movapd %xmm5, %xmm2 /* tmp2 copy */
- addsd %xmm3, %xmm5 /* tmp5 = tmp3 + tmp2 */
- movl $1, %edx
- addsd %xmm5, %xmm6 /* tmp6 = tmp5 + 2^52 */
- movsd 8+L(DP_2POW52)(%rip), %xmm4
- movd %xmm6, %eax /* k = I64_LO(tmp6); */
- addsd %xmm6, %xmm4 /* tmp4 = tmp6 - 2^52 */
- comisd %xmm5, %xmm4 /* tmp4 > tmp5? */
- jbe L(very_large_skip2)
-
- /* Here if tmp4 > tmp5 */
- subl $1, %eax /* k-- */
- addsd 8+L(DP_ONES)(%rip), %xmm4 /* tmp4 -= 1.0 */
-L(very_large_skip2):
-
- andl %eax, %edx /* k&1 */
- lea L(DP_ZERONE)(%rip), %rsi
- subsd %xmm4, %xmm3 /* tmp3 -= tmp4 */
- addsd (%rsi,%rdx,8), %xmm3 /* t = DP_ZERONE[k&1] + tmp3 */
- addsd %xmm2, %xmm3 /* t += tmp2 */
- addsd %xmm3, %xmm0 /* t += tmp0 */
- addl $3, %eax /* n=k+3 */
- addsd %xmm1, %xmm0 /* t += tmp1 */
- mulsd L(DP_PIO4)(%rip), %xmm0 /* t *= PI04 */
-
- jmp L(reconstruction) /* end of very_large_args peth */
-
- .p2align 4
-L(arg_less_pio4):
- /* Here if |x|<Pi/4 */
- cmpl $0x3d000000, %eax /* |x|<2^-5? */
- jl L(arg_less_2pn5)
-
- /* Here if 2^-5<=|x|<Pi/4 */
- mulsd %xmm0, %xmm0 /* y=x^2 */
- movaps %xmm0, %xmm1 /* y */
- mulsd %xmm0, %xmm0 /* z=x^4 */
- movsd L(DP_C4)(%rip), %xmm3 /* C4 */
- mulsd %xmm0, %xmm3 /* z*C4 */
- movsd L(DP_C3)(%rip), %xmm5 /* C3 */
- mulsd %xmm0, %xmm5 /* z*C3 */
- addsd L(DP_C2)(%rip), %xmm3 /* C2+z*C4 */
- mulsd %xmm0, %xmm3 /* z*(C2+z*C4) */
- addsd L(DP_C1)(%rip), %xmm5 /* C1+z*C3 */
- mulsd %xmm0, %xmm5 /* z*(C1+z*C3) */
- addsd L(DP_C0)(%rip), %xmm3 /* C0+z*(C2+z*C4) */
- mulsd %xmm1, %xmm3 /* y*(C0+z*(C2+z*C4)) */
- /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
- addsd %xmm5, %xmm3
- /* 1.0 + y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
- addsd L(DP_ONES)(%rip), %xmm3
- cvtsd2ss %xmm3, %xmm0 /* SP result */
- ret
-
- .p2align 4
-L(arg_less_2pn5):
- /* Here if |x|<2^-5 */
- cmpl $0x32000000, %eax /* |x|<2^-27? */
- jl L(arg_less_2pn27)
-
- /* Here if 2^-27<=|x|<2^-5 */
- mulsd %xmm0, %xmm0 /* DP x^2 */
- movsd L(DP_COS2_1)(%rip), %xmm3 /* DP DP_COS2_1 */
- mulsd %xmm0, %xmm3 /* DP x^2*DP_COS2_1 */
- addsd L(DP_COS2_0)(%rip), %xmm3 /* DP DP_COS2_0+x^2*DP_COS2_1 */
- mulsd %xmm0, %xmm3 /* DP x^2*DP_COS2_0+x^4*DP_COS2_1 */
- /* DP 1.0+x^2*DP_COS2_0+x^4*DP_COS2_1 */
- addsd L(DP_ONES)(%rip), %xmm3
- cvtsd2ss %xmm3, %xmm0 /* SP result */
- ret
-
- .p2align 4
-L(arg_less_2pn27):
- /* Here if |x|<2^-27 */
- andps L(SP_ABS_MASK)(%rip),%xmm7 /* |x| */
- movss L(SP_ONE)(%rip), %xmm0 /* 1.0 */
- subss %xmm7, %xmm0 /* result is 1.0-|x| */
- ret
-
- .p2align 4
-L(arg_inf_or_nan):
- /* Here if |x| is Inf or NAN */
- jne L(skip_errno_setting) /* in case of x is NaN */
-
- /* Align stack to 16 bytes. */
- subq $8, %rsp
- cfi_adjust_cfa_offset (8)
- /* Here if x is Inf. Set errno to EDOM. */
- call JUMPTARGET(__errno_location)
- addq $8, %rsp
- cfi_adjust_cfa_offset (-8)
-
- movl $EDOM, (%rax)
-
- .p2align 4
-L(skip_errno_setting):
- /* Here if |x| is Inf or NAN. Continued. */
- movaps %xmm7, %xmm0 /* load x */
- subss %xmm0, %xmm0 /* Result is NaN */
- ret
-END(__cosf)
-
- .section .rodata, "a"
- .p2align 3
-L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
- .long 0x00000000,0x00000000
- .long 0x54442d18,0x3fe921fb
- .long 0x54442d18,0x3ff921fb
- .long 0x7f3321d2,0x4002d97c
- .long 0x54442d18,0x400921fb
- .long 0x2955385e,0x400f6a7a
- .long 0x7f3321d2,0x4012d97c
- .long 0xe9bba775,0x4015fdbb
- .long 0x54442d18,0x401921fb
- .long 0xbeccb2bb,0x401c463a
- .long 0x2955385e,0x401f6a7a
- .type L(PIO4J), @object
- ASM_SIZE_DIRECTIVE(L(PIO4J))
-
- .p2align 3
-L(_FPI): /* 4/Pi broken into sum of positive DP values */
- .long 0x00000000,0x00000000
- .long 0x6c000000,0x3ff45f30
- .long 0x2a000000,0x3e3c9c88
- .long 0xa8000000,0x3c54fe13
- .long 0xd0000000,0x3aaf47d4
- .long 0x6c000000,0x38fbb81b
- .long 0xe0000000,0x3714acc9
- .long 0x7c000000,0x3560e410
- .long 0x56000000,0x33bca2c7
- .long 0xac000000,0x31fbd778
- .long 0xe0000000,0x300b7246
- .long 0xe8000000,0x2e5d2126
- .long 0x48000000,0x2c970032
- .long 0xe8000000,0x2ad77504
- .long 0xe0000000,0x290921cf
- .long 0xb0000000,0x274deb1c
- .long 0xe0000000,0x25829a73
- .long 0xbe000000,0x23fd1046
- .long 0x10000000,0x2224baed
- .long 0x8e000000,0x20709d33
- .long 0x80000000,0x1e535a2f
- .long 0x64000000,0x1cef904e
- .long 0x30000000,0x1b0d6398
- .long 0x24000000,0x1964ce7d
- .long 0x16000000,0x17b908bf
- .type L(_FPI), @object
- ASM_SIZE_DIRECTIVE(L(_FPI))
-
-/* Coefficients of polynomial
- for cos(x)~=1.0+x^2*DP_COS2_0+x^4*DP_COS2_1, |x|<2^-5. */
- .p2align 3
-L(DP_COS2_0):
- .long 0xff5cc6fd,0xbfdfffff
- .type L(DP_COS2_0), @object
- ASM_SIZE_DIRECTIVE(L(DP_COS2_0))
-
- .p2align 3
-L(DP_COS2_1):
- .long 0xb178dac5,0x3fa55514
- .type L(DP_COS2_1), @object
- ASM_SIZE_DIRECTIVE(L(DP_COS2_1))
-
- .p2align 3
-L(DP_ZERONE):
- .long 0x00000000,0x00000000 /* 0.0 */
- .long 0x00000000,0xbff00000 /* 1.0 */
- .type L(DP_ZERONE), @object
- ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
-
- .p2align 3
-L(DP_ONES):
- .long 0x00000000,0x3ff00000 /* +1.0 */
- .long 0x00000000,0xbff00000 /* -1.0 */
- .type L(DP_ONES), @object
- ASM_SIZE_DIRECTIVE(L(DP_ONES))
-
-/* Coefficients of polynomial
- for sin(t)~=t+t^3*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))), |t|<Pi/4. */
- .p2align 3
-L(DP_S3):
- .long 0x64e6b5b4,0x3ec71d72
- .type L(DP_S3), @object
- ASM_SIZE_DIRECTIVE(L(DP_S3))
-
- .p2align 3
-L(DP_S1):
- .long 0x10c2688b,0x3f811111
- .type L(DP_S1), @object
- ASM_SIZE_DIRECTIVE(L(DP_S1))
-
- .p2align 3
-L(DP_S4):
- .long 0x1674b58a,0xbe5a947e
- .type L(DP_S4), @object
- ASM_SIZE_DIRECTIVE(L(DP_S4))
-
- .p2align 3
-L(DP_S2):
- .long 0x8b4bd1f9,0xbf2a019f
- .type L(DP_S2),@object
- ASM_SIZE_DIRECTIVE(L(DP_S2))
-
- .p2align 3
-L(DP_S0):
- .long 0x55551cd9,0xbfc55555
- .type L(DP_S0), @object
- ASM_SIZE_DIRECTIVE(L(DP_S0))
-
-/* Coefficients of polynomial
- for cos(t)~=1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))), |t|<Pi/4. */
- .p2align 3
-L(DP_C3):
- .long 0x9ac43cc0,0x3efa00eb
- .type L(DP_C3), @object
- ASM_SIZE_DIRECTIVE(L(DP_C3))
-
- .p2align 3
-L(DP_C1):
- .long 0x545c50c7,0x3fa55555
- .type L(DP_C1), @object
- ASM_SIZE_DIRECTIVE(L(DP_C1))
-
- .p2align 3
-L(DP_C4):
- .long 0xdd8844d7,0xbe923c97
- .type L(DP_C4), @object
- ASM_SIZE_DIRECTIVE(L(DP_C4))
-
- .p2align 3
-L(DP_C2):
- .long 0x348b6874,0xbf56c16b
- .type L(DP_C2), @object
- ASM_SIZE_DIRECTIVE(L(DP_C2))
-
- .p2align 3
-L(DP_C0):
- .long 0xfffe98ae,0xbfdfffff
- .type L(DP_C0), @object
- ASM_SIZE_DIRECTIVE(L(DP_C0))
-
- .p2align 3
-L(DP_PIO4):
- .long 0x54442d18,0x3fe921fb /* Pi/4 */
- .type L(DP_PIO4), @object
- ASM_SIZE_DIRECTIVE(L(DP_PIO4))
-
- .p2align 3
-L(DP_2POW52):
- .long 0x00000000,0x43300000 /* +2^52 */
- .long 0x00000000,0xc3300000 /* -2^52 */
- .type L(DP_2POW52), @object
- ASM_SIZE_DIRECTIVE(L(DP_2POW52))
-
- .p2align 3
-L(DP_INVPIO4):
- .long 0x6dc9c883,0x3ff45f30 /* 4/Pi */
- .type L(DP_INVPIO4), @object
- ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
-
- .p2align 3
-L(DP_PIO4HI):
- .long 0x54000000,0xbfe921fb /* High part of Pi/4 */
- .type L(DP_PIO4HI), @object
- ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
-
- .p2align 3
-L(DP_PIO4LO):
- .long 0x11A62633,0xbe010b46 /* Low part of Pi/4 */
- .type L(DP_PIO4LO), @object
- ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
-
- .p2align 2
-L(SP_INVPIO4):
- .long 0x3fa2f983 /* 4/Pi */
- .type L(SP_INVPIO4), @object
- ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
-
- .p2align 4
-L(DP_ABS_MASK): /* Mask for getting DP absolute value */
- .long 0xffffffff,0x7fffffff
- .long 0xffffffff,0x7fffffff
- .type L(DP_ABS_MASK), @object
- ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
-
- .p2align 3
-L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
- .long 0x00000000,0xffffffff
- .type L(DP_HI_MASK), @object
- ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
-
- .p2align 4
-L(SP_ABS_MASK): /* Mask for getting SP absolute value */
- .long 0x7fffffff,0x7fffffff
- .long 0x7fffffff,0x7fffffff
- .type L(SP_ABS_MASK), @object
- ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK))
-
- .p2align 2
-L(SP_ONE):
- .long 0x3f800000 /* 1.0 */
- .type L(SP_ONE), @object
- ASM_SIZE_DIRECTIVE(L(SP_ONE))
-
-weak_alias(__cosf, cosf)
diff --git a/sysdeps/x86_64/fpu/s_fabs.c b/sysdeps/x86_64/fpu/s_fabs.c
index d3a313fdf5..d1e17878d4 100644
--- a/sysdeps/x86_64/fpu/s_fabs.c
+++ b/sysdeps/x86_64/fpu/s_fabs.c
@@ -1,5 +1,5 @@
/* Absolute value of floating point number.
- Copyright (C) 2002-2016 Free Software Foundation, Inc.
+ Copyright (C) 2002-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -17,10 +17,11 @@
<http://www.gnu.org/licenses/>. */
#include <math.h>
+#include <libm-alias-double.h>
double
__fabs (double x)
{
return __builtin_fabs (x);
}
-weak_alias (__fabs, fabs)
+libm_alias_double (__fabs, fabs)
diff --git a/sysdeps/x86_64/fpu/s_fabsf.c b/sysdeps/x86_64/fpu/s_fabsf.c
index e6dcda9433..2f39228560 100644
--- a/sysdeps/x86_64/fpu/s_fabsf.c
+++ b/sysdeps/x86_64/fpu/s_fabsf.c
@@ -1,5 +1,5 @@
/* Absolute value of floating point number.
- Copyright (C) 2002-2016 Free Software Foundation, Inc.
+ Copyright (C) 2002-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -17,10 +17,11 @@
<http://www.gnu.org/licenses/>. */
#include <math.h>
+#include <libm-alias-float.h>
float
__fabsf (float x)
{
return __builtin_fabsf (x);
}
-weak_alias (__fabsf, fabsf)
+libm_alias_float (__fabs, fabs)
diff --git a/sysdeps/x86_64/fpu/s_fabsl.S b/sysdeps/x86_64/fpu/s_fabsl.S
index 6881ff11c7..7f03ecdccb 100644
--- a/sysdeps/x86_64/fpu/s_fabsl.S
+++ b/sysdeps/x86_64/fpu/s_fabsl.S
@@ -1,5 +1,5 @@
/* Absolute value of floating point number.
- Copyright (C) 2002-2016 Free Software Foundation, Inc.
+ Copyright (C) 2002-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -17,6 +17,7 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
+#include <libm-alias-ldouble.h>
.text
ENTRY(__fabsl)
@@ -24,4 +25,4 @@ ENTRY(__fabsl)
fabs
ret
END(__fabsl)
-weak_alias (__fabsl, fabsl)
+libm_alias_ldouble (__fabs, fabs)
diff --git a/sysdeps/x86_64/fpu/s_floorl.S b/sysdeps/x86_64/fpu/s_floorl.S
index f9ecc388df..75f8255648 100644
--- a/sysdeps/x86_64/fpu/s_floorl.S
+++ b/sysdeps/x86_64/fpu/s_floorl.S
@@ -5,26 +5,32 @@
* Public domain.
*/
+#include <libm-alias-ldouble.h>
#include <machine/asm.h>
ENTRY(__floorl)
fldt 8(%rsp)
- fstcw -4(%rsp) /* store fpu control word */
+ fnstenv -28(%rsp) /* store fpu environment */
/* We use here %edx although only the low 1 bits are defined.
But none of the operations should care and they are faster
than the 16 bit operations. */
movl $0x400,%edx /* round towards -oo */
- orl -4(%rsp),%edx
+ orl -28(%rsp),%edx
andl $0xf7ff,%edx
- movl %edx,-8(%rsp)
- fldcw -8(%rsp) /* load modified control word */
+ movl %edx,-32(%rsp)
+ fldcw -32(%rsp) /* load modified control word */
frndint /* round */
- fldcw -4(%rsp) /* restore original control word */
+ /* Preserve "invalid" exceptions from sNaN input. */
+ fnstsw
+ andl $0x1, %eax
+ orl %eax, -24(%rsp)
+
+ fldenv -28(%rsp) /* restore original environment */
ret
END (__floorl)
-weak_alias (__floorl, floorl)
+libm_alias_ldouble (__floor, floor)
diff --git a/sysdeps/x86_64/fpu/s_fmax.S b/sysdeps/x86_64/fpu/s_fmax.S
index 02096c0aea..7cd8f1ed10 100644
--- a/sysdeps/x86_64/fpu/s_fmax.S
+++ b/sysdeps/x86_64/fpu/s_fmax.S
@@ -1,5 +1,5 @@
/* Compute maximum of two numbers, regarding NaN as missing argument.
- Copyright (C) 2002-2016 Free Software Foundation, Inc.
+ Copyright (C) 2002-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 2002.
@@ -18,6 +18,7 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
+#include <libm-alias-double.h>
.text
ENTRY(__fmax)
@@ -27,9 +28,26 @@ ENTRY(__fmax)
jmp 2f
1: ucomisd %xmm1, %xmm1 // Is xmm1 a NaN?
- jp 2f // then return xmm0
+ jp 3f
+ // xmm0 is a NaN; xmm1 is not. Test if xmm0 is signaling.
+ movsd %xmm0, -8(%rsp)
+ testb $0x8, -2(%rsp)
+ jz 4f
movsd %xmm1, %xmm0 // otherwise return xmm1
+ ret
+
+3: // xmm1 is a NaN; xmm0 may or may not be.
+ ucomisd %xmm0, %xmm0
+ jp 4f
+ // xmm1 is a NaN; xmm0 is not. Test if xmm1 is signaling.
+ movsd %xmm1, -8(%rsp)
+ testb $0x8, -2(%rsp)
+ jz 4f
+ ret
+
+4: // Both arguments are NaNs, or one is a signaling NaN.
+ addsd %xmm1, %xmm0
2: ret
END(__fmax)
-weak_alias (__fmax, fmax)
+libm_alias_double (__fmax, fmax)
diff --git a/sysdeps/x86_64/fpu/s_fmaxf.S b/sysdeps/x86_64/fpu/s_fmaxf.S
index 28e129701e..9b932fddc2 100644
--- a/sysdeps/x86_64/fpu/s_fmaxf.S
+++ b/sysdeps/x86_64/fpu/s_fmaxf.S
@@ -1,5 +1,5 @@
/* Compute maximum of two numbers, regarding NaN as missing argument.
- Copyright (C) 2002-2016 Free Software Foundation, Inc.
+ Copyright (C) 2002-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 2002.
@@ -18,6 +18,7 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
+#include <libm-alias-float.h>
.text
ENTRY(__fmaxf)
@@ -27,9 +28,26 @@ ENTRY(__fmaxf)
jmp 2f
1: ucomiss %xmm1, %xmm1 // Is xmm1 a NaN?
- jp 2f // then return xmm0
+ jp 3f
+ // xmm0 is a NaN; xmm1 is not. Test if xmm0 is signaling.
+ movss %xmm0, -4(%rsp)
+ testb $0x40, -2(%rsp)
+ jz 4f
movss %xmm1, %xmm0 // otherwise return xmm1
+ ret
+
+3: // xmm1 is a NaN; xmm0 may or may not be.
+ ucomiss %xmm0, %xmm0
+ jp 4f
+ // xmm1 is a NaN; xmm0 is not. Test if xmm1 is signaling.
+ movss %xmm1, -4(%rsp)
+ testb $0x40, -2(%rsp)
+ jz 4f
+ ret
+
+4: // Both arguments are NaNs, or one is a signaling NaN.
+ addss %xmm1, %xmm0
2: ret
END(__fmaxf)
-weak_alias (__fmaxf, fmaxf)
+libm_alias_float (__fmax, fmax)
diff --git a/sysdeps/x86_64/fpu/s_fmaxl.S b/sysdeps/x86_64/fpu/s_fmaxl.S
index f0c2bc0d56..3463a07083 100644
--- a/sysdeps/x86_64/fpu/s_fmaxl.S
+++ b/sysdeps/x86_64/fpu/s_fmaxl.S
@@ -1,5 +1,5 @@
/* Compute maximum of two numbers, regarding NaN as missing argument.
- Copyright (C) 1997-2016 Free Software Foundation, Inc.
+ Copyright (C) 1997-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
@@ -18,22 +18,42 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
+#include <libm-alias-ldouble.h>
.text
ENTRY(__fmaxl)
fldt 8(%rsp) // x
fldt 24(%rsp) // x : y
- fucomi %st(0), %st
- fcmovu %st(1), %st // now %st contains y if not NaN, x otherwise
-
- fxch
-
fucomi %st(1), %st
+ jp 2f
fcmovb %st(1), %st
fstp %st(1)
ret
+
+2: // Unordered.
+ fucomi %st(0), %st
+ jp 3f
+ // st(1) is a NaN; st(0) is not. Test if st(1) is signaling.
+ testb $0x40, 15(%rsp)
+ jz 4f
+ fstp %st(1)
+ ret
+
+3: // st(0) is a NaN; st(1) may or may not be.
+ fxch
+ fucomi %st(0), %st
+ jp 4f
+ // st(1) is a NaN; st(0) is not. Test if st(1) is signaling.
+ testb $0x40, 31(%rsp)
+ jz 4f
+ fstp %st(1)
+ ret
+
+4: // Both arguments are NaNs, or one is a signaling NaN.
+ faddp
+ ret
END(__fmaxl)
-weak_alias (__fmaxl, fmaxl)
+libm_alias_ldouble (__fmax, fmax)
diff --git a/sysdeps/x86_64/fpu/s_fmin.S b/sysdeps/x86_64/fpu/s_fmin.S
index fb14e2f3ed..15b6eaed90 100644
--- a/sysdeps/x86_64/fpu/s_fmin.S
+++ b/sysdeps/x86_64/fpu/s_fmin.S
@@ -1,5 +1,5 @@
/* Compute minimum of two numbers, regarding NaN as missing argument.
- Copyright (C) 2002-2016 Free Software Foundation, Inc.
+ Copyright (C) 2002-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 2002.
@@ -18,6 +18,7 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
+#include <libm-alias-double.h>
.text
ENTRY(__fmin)
@@ -27,9 +28,26 @@ ENTRY(__fmin)
jmp 2f
1: ucomisd %xmm1, %xmm1 // Is xmm1 a NaN?
- jp 2f // then return xmm0
+ jp 3f
+ // xmm0 is a NaN; xmm1 is not. Test if xmm0 is signaling.
+ movsd %xmm0, -8(%rsp)
+ testb $0x8, -2(%rsp)
+ jz 4f
movsd %xmm1, %xmm0 // otherwise return xmm1
+ ret
+
+3: // xmm1 is a NaN; xmm0 may or may not be.
+ ucomisd %xmm0, %xmm0
+ jp 4f
+ // xmm1 is a NaN; xmm0 is not. Test if xmm1 is signaling.
+ movsd %xmm1, -8(%rsp)
+ testb $0x8, -2(%rsp)
+ jz 4f
+ ret
+
+4: // Both arguments are NaNs, or one is a signaling NaN.
+ addsd %xmm1, %xmm0
2: ret
END(__fmin)
-weak_alias (__fmin, fmin)
+libm_alias_double (__fmin, fmin)
diff --git a/sysdeps/x86_64/fpu/s_fminf.S b/sysdeps/x86_64/fpu/s_fminf.S
index c8d6d0fd33..28e26aead5 100644
--- a/sysdeps/x86_64/fpu/s_fminf.S
+++ b/sysdeps/x86_64/fpu/s_fminf.S
@@ -1,5 +1,5 @@
/* Compute minimum of two numbers, regarding NaN as missing argument.
- Copyright (C) 2002-2016 Free Software Foundation, Inc.
+ Copyright (C) 2002-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 2002.
@@ -18,6 +18,7 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
+#include <libm-alias-float.h>
.text
ENTRY(__fminf)
@@ -27,9 +28,26 @@ ENTRY(__fminf)
jmp 2f
1: ucomiss %xmm1, %xmm1 // Is xmm1 a NaN?
- jp 2f // then return xmm0
+ jp 3f
+ // xmm0 is a NaN; xmm1 is not. Test if xmm0 is signaling.
+ movss %xmm0, -4(%rsp)
+ testb $0x40, -2(%rsp)
+ jz 4f
movss %xmm1, %xmm0 // otherwise return xmm1
+ ret
+
+3: // xmm1 is a NaN; xmm0 may or may not be.
+ ucomiss %xmm0, %xmm0
+ jp 4f
+ // xmm1 is a NaN; xmm0 is not. Test if xmm1 is signaling.
+ movss %xmm1, -4(%rsp)
+ testb $0x40, -2(%rsp)
+ jz 4f
+ ret
+
+4: // Both arguments are NaNs, or one is a signaling NaN.
+ addss %xmm1, %xmm0
2: ret
END(__fminf)
-weak_alias (__fminf, fminf)
+libm_alias_float (__fmin, fmin)
diff --git a/sysdeps/x86_64/fpu/s_fminl.S b/sysdeps/x86_64/fpu/s_fminl.S
index f1a06d29d7..df81762449 100644
--- a/sysdeps/x86_64/fpu/s_fminl.S
+++ b/sysdeps/x86_64/fpu/s_fminl.S
@@ -1,5 +1,5 @@
/* Compute minimum of two numbers, regarding NaN as missing argument.
- Copyright (C) 1997-2016 Free Software Foundation, Inc.
+ Copyright (C) 1997-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
@@ -18,20 +18,42 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
+#include <libm-alias-ldouble.h>
.text
ENTRY(__fminl)
fldt 8(%rsp) // x
fldt 24(%rsp) // x : y
- fucomi %st(0), %st
- fcmovu %st(1), %st // now %st contains y if not NaN, x otherwise
-
fucomi %st(1), %st
+ jp 2f
fcmovnb %st(1), %st
fstp %st(1)
ret
+
+2: // Unordered.
+ fucomi %st(0), %st
+ jp 3f
+ // st(1) is a NaN; st(0) is not. Test if st(1) is signaling.
+ testb $0x40, 15(%rsp)
+ jz 4f
+ fstp %st(1)
+ ret
+
+3: // st(0) is a NaN; st(1) may or may not be.
+ fxch
+ fucomi %st(0), %st
+ jp 4f
+ // st(1) is a NaN; st(0) is not. Test if st(1) is signaling.
+ testb $0x40, 31(%rsp)
+ jz 4f
+ fstp %st(1)
+ ret
+
+4: // Both arguments are NaNs, or one is a signaling NaN.
+ faddp
+ ret
END(__fminl)
-weak_alias (__fminl, fminl)
+libm_alias_ldouble (__fmin, fmin)
diff --git a/sysdeps/x86_64/fpu/s_llrint.S b/sysdeps/x86_64/fpu/s_llrint.S
index 6634c653ea..7b93724e46 100644
--- a/sysdeps/x86_64/fpu/s_llrint.S
+++ b/sysdeps/x86_64/fpu/s_llrint.S
@@ -1,6 +1,6 @@
/* Round argument to nearest integral value according to current rounding
direction.
- Copyright (C) 2002-2016 Free Software Foundation, Inc.
+ Copyright (C) 2002-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.d>, 2002.
@@ -19,14 +19,15 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
+#include <libm-alias-double.h>
.text
ENTRY(__llrint)
cvtsd2si %xmm0,%rax
ret
END(__llrint)
-weak_alias (__llrint, llrint)
+libm_alias_double (__llrint, llrint)
#ifndef __ILP32__
strong_alias (__llrint, __lrint)
-weak_alias (__llrint, lrint)
+libm_alias_double (__llrint, lrint)
#endif
diff --git a/sysdeps/x86_64/fpu/s_llrintf.S b/sysdeps/x86_64/fpu/s_llrintf.S
index 5ac03dffd9..b6088de1ff 100644
--- a/sysdeps/x86_64/fpu/s_llrintf.S
+++ b/sysdeps/x86_64/fpu/s_llrintf.S
@@ -1,6 +1,6 @@
/* Round argument to nearest integral value according to current rounding
direction.
- Copyright (C) 2002-2016 Free Software Foundation, Inc.
+ Copyright (C) 2002-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.d>, 2002.
@@ -19,14 +19,15 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
+#include <libm-alias-float.h>
.text
ENTRY(__llrintf)
cvtss2si %xmm0,%rax
ret
END(__llrintf)
-weak_alias (__llrintf, llrintf)
+libm_alias_float (__llrint, llrint)
#ifndef __ILP32__
strong_alias (__llrintf, __lrintf)
-weak_alias (__llrintf, lrintf)
+libm_alias_float (__llrint, lrint)
#endif
diff --git a/sysdeps/x86_64/fpu/s_llrintl.S b/sysdeps/x86_64/fpu/s_llrintl.S
index 5f4d827dff..49f6ff1961 100644
--- a/sysdeps/x86_64/fpu/s_llrintl.S
+++ b/sysdeps/x86_64/fpu/s_llrintl.S
@@ -1,6 +1,6 @@
/* Round argument to nearest integral value according to current rounding
direction.
- Copyright (C) 1997-2016 Free Software Foundation, Inc.
+ Copyright (C) 1997-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -18,6 +18,7 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
+#include <libm-alias-ldouble.h>
.text
ENTRY(__llrintl)
@@ -27,8 +28,8 @@ ENTRY(__llrintl)
movq -8(%rsp),%rax
ret
END(__llrintl)
-weak_alias (__llrintl, llrintl)
+libm_alias_ldouble (__llrint, llrint)
#ifndef __ILP32__
strong_alias (__llrintl, __lrintl)
-weak_alias (__llrintl, lrintl)
+libm_alias_ldouble (__llrint, lrint)
#endif
diff --git a/sysdeps/x86_64/fpu/s_log1pl.S b/sysdeps/x86_64/fpu/s_log1pl.S
index e83f64d3c0..947e5e4552 100644
--- a/sysdeps/x86_64/fpu/s_log1pl.S
+++ b/sysdeps/x86_64/fpu/s_log1pl.S
@@ -68,6 +68,7 @@ ENTRY(__log1pl)
jnz 4b // in case x is ±Inf
fstp %st(1)
fstp %st(1)
+ fadd %st(0)
ret
END (__log1pl)
diff --git a/sysdeps/x86_64/fpu/s_nearbyintl.S b/sysdeps/x86_64/fpu/s_nearbyintl.S
index 76d41bdd52..80508bdbee 100644
--- a/sysdeps/x86_64/fpu/s_nearbyintl.S
+++ b/sysdeps/x86_64/fpu/s_nearbyintl.S
@@ -4,15 +4,12 @@
*/
/* Adapted for use as nearbyint by Ulrich Drepper <drepper@cygnus.com>. */
+#include <libm-alias-ldouble.h>
#include <machine/asm.h>
ENTRY(__nearbyintl)
fldt 8(%rsp)
fnstenv -28(%rsp)
- movl -28(%rsp), %eax
- orl $0x20, %eax
- movl %eax, -32(%rsp)
- fldcw -32(%rsp)
frndint
fnstsw
andl $0x1, %eax
@@ -20,4 +17,4 @@ ENTRY(__nearbyintl)
fldenv -28(%rsp)
ret
END (__nearbyintl)
-weak_alias (__nearbyintl, nearbyintl)
+libm_alias_ldouble (__nearbyint, nearbyint)
diff --git a/sysdeps/x86_64/fpu/s_signbit.S b/sysdeps/x86_64/fpu/s_signbit.S
index 92a79d3123..becfc646cb 100644
--- a/sysdeps/x86_64/fpu/s_signbit.S
+++ b/sysdeps/x86_64/fpu/s_signbit.S
@@ -1,5 +1,5 @@
/* Return nonzero value if number is negative.
- Copyright (C) 2009-2016 Free Software Foundation, Inc.
+ Copyright (C) 2009-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redha.com>, 2009.
diff --git a/sysdeps/x86_64/fpu/s_signbitf.S b/sysdeps/x86_64/fpu/s_signbitf.S
index 885645372e..c7be6a6329 100644
--- a/sysdeps/x86_64/fpu/s_signbitf.S
+++ b/sysdeps/x86_64/fpu/s_signbitf.S
@@ -1,5 +1,5 @@
/* Return nonzero value if number is negative.
- Copyright (C) 2009-2016 Free Software Foundation, Inc.
+ Copyright (C) 2009-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redha.com>, 2009.
diff --git a/sysdeps/x86_64/fpu/s_sincosf.S b/sysdeps/x86_64/fpu/s_sincosf.S
index 5e7cbe57e3..2086e8ca5c 100644
--- a/sysdeps/x86_64/fpu/s_sincosf.S
+++ b/sysdeps/x86_64/fpu/s_sincosf.S
@@ -1,5 +1,5 @@
/* Optimized sincosf function.
- Copyright (C) 2012-2016 Free Software Foundation, Inc.
+ Copyright (C) 2012-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -17,8 +17,8 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
-#define __need_Emath
-#include <bits/errno.h>
+#include <errno.h>
+#include <libm-alias-float.h>
/* Short algorithm description:
*
@@ -561,4 +561,6 @@ L(SP_ONE):
.type L(SP_ONE), @object
ASM_SIZE_DIRECTIVE(L(SP_ONE))
-weak_alias(__sincosf, sincosf)
+#ifndef __sincosf
+libm_alias_float (__sincos, sincos)
+#endif
diff --git a/sysdeps/x86_64/fpu/s_sinf.S b/sysdeps/x86_64/fpu/s_sinf.S
deleted file mode 100644
index c980c6e207..0000000000
--- a/sysdeps/x86_64/fpu/s_sinf.S
+++ /dev/null
@@ -1,559 +0,0 @@
-/* Optimized sinf function.
- Copyright (C) 2012-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#define __need_Emath
-#include <bits/errno.h>
-
-/* Short algorithm description:
- *
- * 1) if |x| == 0: return x.
- * 2) if |x| < 2^-27: return x-x*DP_SMALL, raise underflow only when needed.
- * 3) if |x| < 2^-5 : return x+x^3*DP_SIN2_0+x^5*DP_SIN2_1.
- * 4) if |x| < Pi/4: return x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))).
- * 5) if |x| < 9*Pi/4:
- * 5.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+1,
- * t=|x|-j*Pi/4.
- * 5.2) Reconstruction:
- * s = sign(x) * (-1.0)^((n>>2)&1)
- * if(n&2 != 0) {
- * using cos(t) polynomial for |t|<Pi/4, result is
- * s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))).
- * } else {
- * using sin(t) polynomial for |t|<Pi/4, result is
- * s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))).
- * }
- * 6) if |x| < 2^23, large args:
- * 6.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1,
- * t=|x|-j*Pi/4.
- * 6.2) Reconstruction same as (5.2).
- * 7) if |x| >= 2^23, very large args:
- * 7.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1,
- * t=|x|-j*Pi/4.
- * 7.2) Reconstruction same as (5.2).
- * 8) if x is Inf, return x-x, and set errno=EDOM.
- * 9) if x is NaN, return x-x.
- *
- * Special cases:
- * sin(+-0) = +-0 not raising inexact/underflow,
- * sin(subnormal) raises inexact/underflow,
- * sin(min_normalized) raises inexact/underflow,
- * sin(normalized) raises inexact,
- * sin(Inf) = NaN, raises invalid, sets errno to EDOM,
- * sin(NaN) = NaN.
- */
-
- .text
-ENTRY(__sinf)
- /* Input: single precision x in %xmm0 */
-
- movd %xmm0, %eax /* Bits of x */
- movaps %xmm0, %xmm7 /* Copy of x */
- cvtss2sd %xmm0, %xmm0 /* DP x */
- movss L(SP_ABS_MASK)(%rip), %xmm3
- movl %eax, %edi /* Copy of x bits */
- andl $0x7fffffff, %eax /* |x| */
-
- cmpl $0x3f490fdb, %eax /* |x|<Pi/4? */
- jb L(arg_less_pio4)
-
- /* Here if |x|>=Pi/4 */
- andps %xmm7, %xmm3 /* SP |x| */
- andpd L(DP_ABS_MASK)(%rip),%xmm0 /* DP |x| */
- movss L(SP_INVPIO4)(%rip), %xmm2 /* SP 1/(Pi/4) */
-
- cmpl $0x40e231d6, %eax /* |x|<9*Pi/4? */
- jae L(large_args)
-
- /* Here if Pi/4<=|x|<9*Pi/4 */
- mulss %xmm3, %xmm2 /* SP |x|/(Pi/4) */
- movl %edi, %ecx /* Load x */
- cvttss2si %xmm2, %eax /* k, number of Pi/4 in x */
- lea L(PIO4J)(%rip), %rsi
- shrl $31, %ecx /* sign of x */
- addl $1, %eax /* k+1 */
- movl $0x0e, %edx
- andl %eax, %edx /* j = (k+1)&0x0e */
- subsd (%rsi,%rdx,8), %xmm0 /* t = |x| - j * Pi/4 */
-
-L(reconstruction):
- /* Input: %eax=n, %xmm0=t, %ecx=sign(x) */
- testl $2, %eax /* n&2 != 0? */
- jz L(sin_poly)
-
-/*L(cos_poly):*/
- /* Here if sin(x) calculated using cos(t) polynomial for |t|<Pi/4:
- * y = t*t; z = y*y;
- * s = sign(x) * (-1.0)^((n>>2)&1)
- * result = s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))))
- */
- shrl $2, %eax /* n>>2 */
- mulsd %xmm0, %xmm0 /* y=t^2 */
- andl $1, %eax /* (n>>2)&1 */
- movaps %xmm0, %xmm1 /* y */
- mulsd %xmm0, %xmm0 /* z=t^4 */
-
- movsd L(DP_C4)(%rip), %xmm4 /* C4 */
- mulsd %xmm0, %xmm4 /* z*C4 */
- xorl %eax, %ecx /* (-1.0)^((n>>2)&1) XOR sign(x) */
- movsd L(DP_C3)(%rip), %xmm3 /* C3 */
- mulsd %xmm0, %xmm3 /* z*C3 */
- lea L(DP_ONES)(%rip), %rsi
- addsd L(DP_C2)(%rip), %xmm4 /* C2+z*C4 */
- mulsd %xmm0, %xmm4 /* z*(C2+z*C4) */
- addsd L(DP_C1)(%rip), %xmm3 /* C1+z*C3 */
- mulsd %xmm0, %xmm3 /* z*(C1+z*C3) */
- addsd L(DP_C0)(%rip), %xmm4 /* C0+z*(C2+z*C4) */
- mulsd %xmm1, %xmm4 /* y*(C0+z*(C2+z*C4)) */
-
- /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
- addsd %xmm4, %xmm3
- /* 1.0+y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
- addsd L(DP_ONES)(%rip), %xmm3
-
- mulsd (%rsi,%rcx,8), %xmm3 /* DP result */
- cvtsd2ss %xmm3, %xmm0 /* SP result */
- ret
-
- .p2align 4
-L(sin_poly):
- /* Here if sin(x) calculated using sin(t) polynomial for |t|<Pi/4:
- * y = t*t; z = y*y;
- * s = sign(x) * (-1.0)^((n>>2)&1)
- * result = s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))))
- */
-
- movaps %xmm0, %xmm4 /* t */
- shrl $2, %eax /* n>>2 */
- mulsd %xmm0, %xmm0 /* y=t^2 */
- andl $1, %eax /* (n>>2)&1 */
- movaps %xmm0, %xmm1 /* y */
- xorl %eax, %ecx /* (-1.0)^((n>>2)&1) XOR sign(x) */
- mulsd %xmm0, %xmm0 /* z=t^4 */
-
- movsd L(DP_S4)(%rip), %xmm2 /* S4 */
- mulsd %xmm0, %xmm2 /* z*S4 */
- movsd L(DP_S3)(%rip), %xmm3 /* S3 */
- mulsd %xmm0, %xmm3 /* z*S3 */
- lea L(DP_ONES)(%rip), %rsi
- addsd L(DP_S2)(%rip), %xmm2 /* S2+z*S4 */
- mulsd %xmm0, %xmm2 /* z*(S2+z*S4) */
- addsd L(DP_S1)(%rip), %xmm3 /* S1+z*S3 */
- mulsd %xmm0, %xmm3 /* z*(S1+z*S3) */
- addsd L(DP_S0)(%rip), %xmm2 /* S0+z*(S2+z*S4) */
- mulsd %xmm1, %xmm2 /* y*(S0+z*(S2+z*S4)) */
- /* t*s, where s = sign(x) * (-1.0)^((n>>2)&1) */
- mulsd (%rsi,%rcx,8), %xmm4
- /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
- addsd %xmm2, %xmm3
- /* t*s*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
- mulsd %xmm4, %xmm3
- /* t*s*(1.0+y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
- addsd %xmm4, %xmm3
- cvtsd2ss %xmm3, %xmm0 /* SP result */
- ret
-
- .p2align 4
-L(large_args):
- /* Here if |x|>=9*Pi/4 */
- cmpl $0x7f800000, %eax /* x is Inf or NaN? */
- jae L(arg_inf_or_nan)
-
- /* Here if finite |x|>=9*Pi/4 */
- cmpl $0x4b000000, %eax /* |x|<2^23? */
- jae L(very_large_args)
-
- /* Here if 9*Pi/4<=|x|<2^23 */
- movsd L(DP_INVPIO4)(%rip), %xmm1 /* 1/(Pi/4) */
- mulsd %xmm0, %xmm1 /* |x|/(Pi/4) */
- cvttsd2si %xmm1, %eax /* k=trunc(|x|/(Pi/4)) */
- addl $1, %eax /* k+1 */
- movl %eax, %edx
- andl $0xfffffffe, %edx /* j=(k+1)&0xfffffffe */
- cvtsi2sdl %edx, %xmm4 /* DP j */
- movl %edi, %ecx /* Load x */
- movsd L(DP_PIO4HI)(%rip), %xmm2 /* -PIO4HI = high part of -Pi/4 */
- shrl $31, %ecx /* sign bit of x */
- mulsd %xmm4, %xmm2 /* -j*PIO4HI */
- movsd L(DP_PIO4LO)(%rip), %xmm3 /* -PIO4LO = low part of -Pi/4 */
- addsd %xmm2, %xmm0 /* |x| - j*PIO4HI */
- mulsd %xmm3, %xmm4 /* j*PIO4LO */
- addsd %xmm4, %xmm0 /* t = |x| - j*PIO4HI - j*PIO4LO */
- jmp L(reconstruction)
-
- .p2align 4
-L(very_large_args):
- /* Here if finite |x|>=2^23 */
-
- /* bitpos = (ix>>23) - BIAS_32 + 59; */
- shrl $23, %eax /* eb = biased exponent of x */
- /* bitpos = eb - 0x7f + 59, where 0x7f is exponent bias */
- subl $68, %eax
- movl $28, %ecx /* %cl=28 */
- movl %eax, %edx /* bitpos copy */
-
- /* j = bitpos/28; */
- div %cl /* j in register %al=%ax/%cl */
- movapd %xmm0, %xmm3 /* |x| */
- /* clear unneeded remainder from %ah */
- andl $0xff, %eax
-
- imull $28, %eax, %ecx /* j*28 */
- lea L(_FPI)(%rip), %rsi
- movsd L(DP_HI_MASK)(%rip), %xmm4 /* DP_HI_MASK */
- movapd %xmm0, %xmm5 /* |x| */
- mulsd -16(%rsi,%rax,8), %xmm3 /* tmp3 = FPI[j-2]*|x| */
- movapd %xmm0, %xmm1 /* |x| */
- mulsd -8(%rsi,%rax,8), %xmm5 /* tmp2 = FPI[j-1]*|x| */
- mulsd (%rsi,%rax,8), %xmm0 /* tmp0 = FPI[j]*|x| */
- addl $19, %ecx /* j*28+19 */
- mulsd 8(%rsi,%rax,8), %xmm1 /* tmp1 = FPI[j+1]*|x| */
- cmpl %ecx, %edx /* bitpos>=j*28+19? */
- jl L(very_large_skip1)
-
- /* Here if bitpos>=j*28+19 */
- andpd %xmm3, %xmm4 /* HI(tmp3) */
- subsd %xmm4, %xmm3 /* tmp3 = tmp3 - HI(tmp3) */
-L(very_large_skip1):
-
- movsd L(DP_2POW52)(%rip), %xmm6
- movapd %xmm5, %xmm2 /* tmp2 copy */
- addsd %xmm3, %xmm5 /* tmp5 = tmp3 + tmp2 */
- movl $1, %edx
- addsd %xmm5, %xmm6 /* tmp6 = tmp5 + 2^52 */
- movsd 8+L(DP_2POW52)(%rip), %xmm4
- movd %xmm6, %eax /* k = I64_LO(tmp6); */
- addsd %xmm6, %xmm4 /* tmp4 = tmp6 - 2^52 */
- movl %edi, %ecx /* Load x */
- comisd %xmm5, %xmm4 /* tmp4 > tmp5? */
- jbe L(very_large_skip2)
-
- /* Here if tmp4 > tmp5 */
- subl $1, %eax /* k-- */
- addsd 8+L(DP_ONES)(%rip), %xmm4 /* tmp4 -= 1.0 */
-L(very_large_skip2):
-
- andl %eax, %edx /* k&1 */
- lea L(DP_ZERONE)(%rip), %rsi
- subsd %xmm4, %xmm3 /* tmp3 -= tmp4 */
- addsd (%rsi,%rdx,8), %xmm3 /* t = DP_ZERONE[k&1] + tmp3 */
- addsd %xmm2, %xmm3 /* t += tmp2 */
- shrl $31, %ecx /* sign of x */
- addsd %xmm3, %xmm0 /* t += tmp0 */
- addl $1, %eax /* n=k+1 */
- addsd %xmm1, %xmm0 /* t += tmp1 */
- mulsd L(DP_PIO4)(%rip), %xmm0 /* t *= PI04 */
-
- jmp L(reconstruction) /* end of very_large_args peth */
-
- .p2align 4
-L(arg_less_pio4):
- /* Here if |x|<Pi/4 */
- cmpl $0x3d000000, %eax /* |x|<2^-5? */
- jl L(arg_less_2pn5)
-
- /* Here if 2^-5<=|x|<Pi/4 */
- movaps %xmm0, %xmm3 /* x */
- mulsd %xmm0, %xmm0 /* y=x^2 */
- movaps %xmm0, %xmm1 /* y */
- mulsd %xmm0, %xmm0 /* z=x^4 */
- movsd L(DP_S4)(%rip), %xmm4 /* S4 */
- mulsd %xmm0, %xmm4 /* z*S4 */
- movsd L(DP_S3)(%rip), %xmm5 /* S3 */
- mulsd %xmm0, %xmm5 /* z*S3 */
- addsd L(DP_S2)(%rip), %xmm4 /* S2+z*S4 */
- mulsd %xmm0, %xmm4 /* z*(S2+z*S4) */
- addsd L(DP_S1)(%rip), %xmm5 /* S1+z*S3 */
- mulsd %xmm0, %xmm5 /* z*(S1+z*S3) */
- addsd L(DP_S0)(%rip), %xmm4 /* S0+z*(S2+z*S4) */
- mulsd %xmm1, %xmm4 /* y*(S0+z*(S2+z*S4)) */
- mulsd %xmm3, %xmm5 /* x*z*(S1+z*S3) */
- mulsd %xmm3, %xmm4 /* x*y*(S0+z*(S2+z*S4)) */
- /* x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
- addsd %xmm5, %xmm4
- /* x + x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
- addsd %xmm4, %xmm3
- cvtsd2ss %xmm3, %xmm0 /* SP result */
- ret
-
- .p2align 4
-L(arg_less_2pn5):
- /* Here if |x|<2^-5 */
- cmpl $0x32000000, %eax /* |x|<2^-27? */
- jl L(arg_less_2pn27)
-
- /* Here if 2^-27<=|x|<2^-5 */
- movaps %xmm0, %xmm1 /* DP x */
- mulsd %xmm0, %xmm0 /* DP x^2 */
- movsd L(DP_SIN2_1)(%rip), %xmm3 /* DP DP_SIN2_1 */
- mulsd %xmm0, %xmm3 /* DP x^2*DP_SIN2_1 */
- addsd L(DP_SIN2_0)(%rip), %xmm3 /* DP DP_SIN2_0+x^2*DP_SIN2_1 */
- mulsd %xmm0, %xmm3 /* DP x^2*DP_SIN2_0+x^4*DP_SIN2_1 */
- mulsd %xmm1, %xmm3 /* DP x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
- addsd %xmm1, %xmm3 /* DP x+x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
- cvtsd2ss %xmm3, %xmm0 /* SP result */
- ret
-
- .p2align 4
-L(arg_less_2pn27):
- cmpl $0, %eax /* x=0? */
- je L(arg_zero) /* in case x=0 return sin(+-0)==+-0 */
- /* Here if |x|<2^-27 */
- /*
- * Special cases here:
- * sin(subnormal) raises inexact/underflow
- * sin(min_normalized) raises inexact/underflow
- * sin(normalized) raises inexact
- */
- movaps %xmm0, %xmm3 /* Copy of DP x */
- mulsd L(DP_SMALL)(%rip), %xmm0 /* x*DP_SMALL */
- subsd %xmm0, %xmm3 /* Result is x-x*DP_SMALL */
- cvtsd2ss %xmm3, %xmm0 /* Result converted to SP */
- ret
-
- .p2align 4
-L(arg_zero):
- movaps %xmm7, %xmm0 /* SP x */
- ret
-
- .p2align 4
-L(arg_inf_or_nan):
- /* Here if |x| is Inf or NAN */
- jne L(skip_errno_setting) /* in case of x is NaN */
-
- /* Align stack to 16 bytes. */
- subq $8, %rsp
- cfi_adjust_cfa_offset (8)
- /* Here if x is Inf. Set errno to EDOM. */
- call JUMPTARGET(__errno_location)
- addq $8, %rsp
- cfi_adjust_cfa_offset (-8)
-
- movl $EDOM, (%rax)
-
- .p2align 4
-L(skip_errno_setting):
- /* Here if |x| is Inf or NAN. Continued. */
- movaps %xmm7, %xmm0 /* load x */
- subss %xmm0, %xmm0 /* Result is NaN */
- ret
-END(__sinf)
-
- .section .rodata, "a"
- .p2align 3
-L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
- .long 0x00000000,0x00000000
- .long 0x54442d18,0x3fe921fb
- .long 0x54442d18,0x3ff921fb
- .long 0x7f3321d2,0x4002d97c
- .long 0x54442d18,0x400921fb
- .long 0x2955385e,0x400f6a7a
- .long 0x7f3321d2,0x4012d97c
- .long 0xe9bba775,0x4015fdbb
- .long 0x54442d18,0x401921fb
- .long 0xbeccb2bb,0x401c463a
- .long 0x2955385e,0x401f6a7a
- .type L(PIO4J), @object
- ASM_SIZE_DIRECTIVE(L(PIO4J))
-
- .p2align 3
-L(_FPI): /* 4/Pi broken into sum of positive DP values */
- .long 0x00000000,0x00000000
- .long 0x6c000000,0x3ff45f30
- .long 0x2a000000,0x3e3c9c88
- .long 0xa8000000,0x3c54fe13
- .long 0xd0000000,0x3aaf47d4
- .long 0x6c000000,0x38fbb81b
- .long 0xe0000000,0x3714acc9
- .long 0x7c000000,0x3560e410
- .long 0x56000000,0x33bca2c7
- .long 0xac000000,0x31fbd778
- .long 0xe0000000,0x300b7246
- .long 0xe8000000,0x2e5d2126
- .long 0x48000000,0x2c970032
- .long 0xe8000000,0x2ad77504
- .long 0xe0000000,0x290921cf
- .long 0xb0000000,0x274deb1c
- .long 0xe0000000,0x25829a73
- .long 0xbe000000,0x23fd1046
- .long 0x10000000,0x2224baed
- .long 0x8e000000,0x20709d33
- .long 0x80000000,0x1e535a2f
- .long 0x64000000,0x1cef904e
- .long 0x30000000,0x1b0d6398
- .long 0x24000000,0x1964ce7d
- .long 0x16000000,0x17b908bf
- .type L(_FPI), @object
- ASM_SIZE_DIRECTIVE(L(_FPI))
-
-/* Coefficients of polynomial
- for sin(x)~=x+x^3*DP_SIN2_0+x^5*DP_SIN2_1, |x|<2^-5. */
- .p2align 3
-L(DP_SIN2_0):
- .long 0x5543d49d,0xbfc55555
- .type L(DP_SIN2_0), @object
- ASM_SIZE_DIRECTIVE(L(DP_SIN2_0))
-
- .p2align 3
-L(DP_SIN2_1):
- .long 0x75cec8c5,0x3f8110f4
- .type L(DP_SIN2_1), @object
- ASM_SIZE_DIRECTIVE(L(DP_SIN2_1))
-
- .p2align 3
-L(DP_ZERONE):
- .long 0x00000000,0x00000000 /* 0.0 */
- .long 0x00000000,0xbff00000 /* 1.0 */
- .type L(DP_ZERONE), @object
- ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
-
- .p2align 3
-L(DP_ONES):
- .long 0x00000000,0x3ff00000 /* +1.0 */
- .long 0x00000000,0xbff00000 /* -1.0 */
- .type L(DP_ONES), @object
- ASM_SIZE_DIRECTIVE(L(DP_ONES))
-
-/* Coefficients of polynomial
- for sin(t)~=t+t^3*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))), |t|<Pi/4. */
- .p2align 3
-L(DP_S3):
- .long 0x64e6b5b4,0x3ec71d72
- .type L(DP_S3), @object
- ASM_SIZE_DIRECTIVE(L(DP_S3))
-
- .p2align 3
-L(DP_S1):
- .long 0x10c2688b,0x3f811111
- .type L(DP_S1), @object
- ASM_SIZE_DIRECTIVE(L(DP_S1))
-
- .p2align 3
-L(DP_S4):
- .long 0x1674b58a,0xbe5a947e
- .type L(DP_S4), @object
- ASM_SIZE_DIRECTIVE(L(DP_S4))
-
- .p2align 3
-L(DP_S2):
- .long 0x8b4bd1f9,0xbf2a019f
- .type L(DP_S2), @object
- ASM_SIZE_DIRECTIVE(L(DP_S2))
-
- .p2align 3
-L(DP_S0):
- .long 0x55551cd9,0xbfc55555
- .type L(DP_S0), @object
- ASM_SIZE_DIRECTIVE(L(DP_S0))
-
- .p2align 3
-L(DP_SMALL):
- .long 0x00000000,0x3cd00000 /* 2^(-50) */
- .type L(DP_SMALL), @object
- ASM_SIZE_DIRECTIVE(L(DP_SMALL))
-
-/* Coefficients of polynomial
- for cos(t)~=1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))), |t|<Pi/4. */
- .p2align 3
-L(DP_C3):
- .long 0x9ac43cc0,0x3efa00eb
- .type L(DP_C3), @object
- ASM_SIZE_DIRECTIVE(L(DP_C3))
-
- .p2align 3
-L(DP_C1):
- .long 0x545c50c7,0x3fa55555
- .type L(DP_C1), @object
- ASM_SIZE_DIRECTIVE(L(DP_C1))
-
- .p2align 3
-L(DP_C4):
- .long 0xdd8844d7,0xbe923c97
- .type L(DP_C4), @object
- ASM_SIZE_DIRECTIVE(L(DP_C4))
-
- .p2align 3
-L(DP_C2):
- .long 0x348b6874,0xbf56c16b
- .type L(DP_C2), @object
- ASM_SIZE_DIRECTIVE(L(DP_C2))
-
- .p2align 3
-L(DP_C0):
- .long 0xfffe98ae,0xbfdfffff
- .type L(DP_C0), @object
- ASM_SIZE_DIRECTIVE(L(DP_C0))
-
- .p2align 3
-L(DP_PIO4):
- .long 0x54442d18,0x3fe921fb /* Pi/4 */
- .type L(DP_PIO4), @object
- ASM_SIZE_DIRECTIVE(L(DP_PIO4))
-
- .p2align 3
-L(DP_2POW52):
- .long 0x00000000,0x43300000 /* +2^52 */
- .long 0x00000000,0xc3300000 /* -2^52 */
- .type L(DP_2POW52), @object
- ASM_SIZE_DIRECTIVE(L(DP_2POW52))
-
- .p2align 3
-L(DP_INVPIO4):
- .long 0x6dc9c883,0x3ff45f30 /* 4/Pi */
- .type L(DP_INVPIO4), @object
- ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
-
- .p2align 3
-L(DP_PIO4HI):
- .long 0x54000000,0xbfe921fb /* High part of Pi/4 */
- .type L(DP_PIO4HI), @object
- ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
-
- .p2align 3
-L(DP_PIO4LO):
- .long 0x11A62633,0xbe010b46 /* Low part of Pi/4 */
- .type L(DP_PIO4LO), @object
- ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
-
- .p2align 2
-L(SP_INVPIO4):
- .long 0x3fa2f983 /* 4/Pi */
- .type L(SP_INVPIO4), @object
- ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
-
- .p2align 4
-L(DP_ABS_MASK): /* Mask for getting DP absolute value */
- .long 0xffffffff,0x7fffffff
- .long 0xffffffff,0x7fffffff
- .type L(DP_ABS_MASK), @object
- ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
-
- .p2align 3
-L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
- .long 0x00000000,0xffffffff
- .type L(DP_HI_MASK),@object
- ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
-
- .p2align 4
-L(SP_ABS_MASK): /* Mask for getting SP absolute value */
- .long 0x7fffffff,0x7fffffff
- .long 0x7fffffff,0x7fffffff
- .type L(SP_ABS_MASK), @object
- ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK))
-
-weak_alias(__sinf, sinf)
diff --git a/sysdeps/x86_64/fpu/s_truncl.S b/sysdeps/x86_64/fpu/s_truncl.S
index c37cf00241..22427ece00 100644
--- a/sysdeps/x86_64/fpu/s_truncl.S
+++ b/sysdeps/x86_64/fpu/s_truncl.S
@@ -1,5 +1,5 @@
/* Truncate long double value.
- Copyright (C) 1997-2016 Free Software Foundation, Inc.
+ Copyright (C) 1997-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
@@ -17,17 +17,21 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
+#include <libm-alias-ldouble.h>
#include <machine/asm.h>
ENTRY(__truncl)
fldt 8(%rsp)
- fstcw -4(%rsp)
+ fnstenv -28(%rsp)
movl $0xc00, %edx
- orl -4(%rsp), %edx
- movl %edx, -8(%rsp)
- fldcw -8(%rsp)
+ orl -28(%rsp), %edx
+ movl %edx, -32(%rsp)
+ fldcw -32(%rsp)
frndint
- fldcw -4(%rsp)
+ fnstsw
+ andl $0x1, %eax
+ orl %eax, -24(%rsp)
+ fldenv -28(%rsp)
ret
END(__truncl)
-weak_alias (__truncl, truncl)
+libm_alias_ldouble (__trunc, trunc)
diff --git a/sysdeps/x86_64/fpu/svml_d_cos2_core.S b/sysdeps/x86_64/fpu/svml_d_cos2_core.S
index 7f62d29917..111548367b 100644
--- a/sysdeps/x86_64/fpu/svml_d_cos2_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_cos2_core.S
@@ -1,5 +1,5 @@
/* Function cos vectorized with SSE2.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_cos4_core.S b/sysdeps/x86_64/fpu/svml_d_cos4_core.S
index b92ff13b86..28b31d510c 100644
--- a/sysdeps/x86_64/fpu/svml_d_cos4_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_cos4_core.S
@@ -1,5 +1,5 @@
/* Function cos vectorized with AVX2, wrapper version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_cos4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_cos4_core_avx.S
index a3da721e35..988d0650ca 100644
--- a/sysdeps/x86_64/fpu/svml_d_cos4_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_d_cos4_core_avx.S
@@ -1,5 +1,5 @@
/* Function cos vectorized in AVX ISA as wrapper to SSE4 ISA version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_cos8_core.S b/sysdeps/x86_64/fpu/svml_d_cos8_core.S
index e5d986d11a..830776b5d2 100644
--- a/sysdeps/x86_64/fpu/svml_d_cos8_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_cos8_core.S
@@ -1,5 +1,5 @@
/* Function cos vectorized with AVX-512, wrapper to AVX2.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_exp2_core.S b/sysdeps/x86_64/fpu/svml_d_exp2_core.S
index 9e511037a1..e19ddb7f3b 100644
--- a/sysdeps/x86_64/fpu/svml_d_exp2_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_exp2_core.S
@@ -1,5 +1,5 @@
/* Function exp vectorized with SSE2.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -21,7 +21,7 @@
.text
ENTRY (_ZGVbN2v_exp)
-WRAPPER_IMPL_SSE2 exp
+WRAPPER_IMPL_SSE2 __exp_finite
END (_ZGVbN2v_exp)
#ifndef USE_MULTIARCH
diff --git a/sysdeps/x86_64/fpu/svml_d_exp4_core.S b/sysdeps/x86_64/fpu/svml_d_exp4_core.S
index 8cac8adbc7..341fea8f30 100644
--- a/sysdeps/x86_64/fpu/svml_d_exp4_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_exp4_core.S
@@ -1,5 +1,5 @@
/* Function exp vectorized with AVX2, wrapper version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_exp4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_exp4_core_avx.S
index 1a0fbf574a..39e6fcf228 100644
--- a/sysdeps/x86_64/fpu/svml_d_exp4_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_d_exp4_core_avx.S
@@ -1,5 +1,5 @@
/* Function exp vectorized in AVX ISA as wrapper to SSE4 ISA version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_exp8_core.S b/sysdeps/x86_64/fpu/svml_d_exp8_core.S
index 2486e888a4..94edc01fcb 100644
--- a/sysdeps/x86_64/fpu/svml_d_exp8_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_exp8_core.S
@@ -1,5 +1,5 @@
/* Function exp vectorized with AVX-512. Wrapper to AVX2 version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_exp_data.S b/sysdeps/x86_64/fpu/svml_d_exp_data.S
index 6d1acbdd21..5e229c9bcc 100644
--- a/sysdeps/x86_64/fpu/svml_d_exp_data.S
+++ b/sysdeps/x86_64/fpu/svml_d_exp_data.S
@@ -1,5 +1,5 @@
/* Data for vector function exp.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_exp_data.h b/sysdeps/x86_64/fpu/svml_d_exp_data.h
index f993403d47..a3721ce137 100644
--- a/sysdeps/x86_64/fpu/svml_d_exp_data.h
+++ b/sysdeps/x86_64/fpu/svml_d_exp_data.h
@@ -1,5 +1,5 @@
/* Offsets for data table for function exp.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_log2_core.S b/sysdeps/x86_64/fpu/svml_d_log2_core.S
index 8ea40fee56..41522f2069 100644
--- a/sysdeps/x86_64/fpu/svml_d_log2_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_log2_core.S
@@ -1,5 +1,5 @@
/* Function log vectorized with SSE2.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -21,7 +21,7 @@
.text
ENTRY (_ZGVbN2v_log)
-WRAPPER_IMPL_SSE2 log
+WRAPPER_IMPL_SSE2 __log_finite
END (_ZGVbN2v_log)
#ifndef USE_MULTIARCH
diff --git a/sysdeps/x86_64/fpu/svml_d_log4_core.S b/sysdeps/x86_64/fpu/svml_d_log4_core.S
index 72813d8921..5857b45aa0 100644
--- a/sysdeps/x86_64/fpu/svml_d_log4_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_log4_core.S
@@ -1,5 +1,5 @@
/* Function log vectorized with AVX2, wrapper version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_log4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_log4_core_avx.S
index 6ca1139931..bab3ba9877 100644
--- a/sysdeps/x86_64/fpu/svml_d_log4_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_d_log4_core_avx.S
@@ -1,5 +1,5 @@
/* Function log vectorized in AVX ISA as wrapper to SSE4 ISA version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_log8_core.S b/sysdeps/x86_64/fpu/svml_d_log8_core.S
index 6850fd9a44..bb3523ee0d 100644
--- a/sysdeps/x86_64/fpu/svml_d_log8_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_log8_core.S
@@ -1,5 +1,5 @@
/* Function log vectorized with AVX-512. Wrapper to AVX2 version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_log_data.S b/sysdeps/x86_64/fpu/svml_d_log_data.S
index 9ab541b23f..0514551ccf 100644
--- a/sysdeps/x86_64/fpu/svml_d_log_data.S
+++ b/sysdeps/x86_64/fpu/svml_d_log_data.S
@@ -1,5 +1,5 @@
/* Data for function log.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_log_data.h b/sysdeps/x86_64/fpu/svml_d_log_data.h
index 30c2b54a4b..a317c7b845 100644
--- a/sysdeps/x86_64/fpu/svml_d_log_data.h
+++ b/sysdeps/x86_64/fpu/svml_d_log_data.h
@@ -1,5 +1,5 @@
/* Offsets for data table for function log.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_pow2_core.S b/sysdeps/x86_64/fpu/svml_d_pow2_core.S
index b25515c825..b2451b2ed5 100644
--- a/sysdeps/x86_64/fpu/svml_d_pow2_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_pow2_core.S
@@ -1,5 +1,5 @@
/* Function pow vectorized with SSE2.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -21,7 +21,7 @@
.text
ENTRY (_ZGVbN2vv_pow)
-WRAPPER_IMPL_SSE2_ff pow
+WRAPPER_IMPL_SSE2_ff __pow_finite
END (_ZGVbN2vv_pow)
#ifndef USE_MULTIARCH
diff --git a/sysdeps/x86_64/fpu/svml_d_pow4_core.S b/sysdeps/x86_64/fpu/svml_d_pow4_core.S
index 547993799e..1520ba1d45 100644
--- a/sysdeps/x86_64/fpu/svml_d_pow4_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_pow4_core.S
@@ -1,5 +1,5 @@
/* Function pow vectorized with AVX2, wrapper version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_pow4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_pow4_core_avx.S
index 4e4e9867b4..d4b265c91a 100644
--- a/sysdeps/x86_64/fpu/svml_d_pow4_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_d_pow4_core_avx.S
@@ -1,5 +1,5 @@
/* Function pow vectorized in AVX ISA as wrapper to SSE4 ISA version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_pow8_core.S b/sysdeps/x86_64/fpu/svml_d_pow8_core.S
index 372e5a9c83..15292ccebd 100644
--- a/sysdeps/x86_64/fpu/svml_d_pow8_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_pow8_core.S
@@ -1,5 +1,5 @@
/* Function pow vectorized with AVX-512. Wrapper to AVX2 version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_pow_data.S b/sysdeps/x86_64/fpu/svml_d_pow_data.S
index 8481f95455..9e5f99c25e 100644
--- a/sysdeps/x86_64/fpu/svml_d_pow_data.S
+++ b/sysdeps/x86_64/fpu/svml_d_pow_data.S
@@ -1,5 +1,5 @@
/* Data for function pow.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_pow_data.h b/sysdeps/x86_64/fpu/svml_d_pow_data.h
index 239ba96984..55b573b2a7 100644
--- a/sysdeps/x86_64/fpu/svml_d_pow_data.h
+++ b/sysdeps/x86_64/fpu/svml_d_pow_data.h
@@ -1,5 +1,5 @@
/* Offsets for data table for function pow.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_sin2_core.S b/sysdeps/x86_64/fpu/svml_d_sin2_core.S
index f6ec13104b..6485e0819f 100644
--- a/sysdeps/x86_64/fpu/svml_d_sin2_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_sin2_core.S
@@ -1,5 +1,5 @@
/* Function sin vectorized with SSE2.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_sin4_core.S b/sysdeps/x86_64/fpu/svml_d_sin4_core.S
index 95a1dec6f6..7c7c426451 100644
--- a/sysdeps/x86_64/fpu/svml_d_sin4_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_sin4_core.S
@@ -1,5 +1,5 @@
/* Function sin vectorized with AVX2, wrapper version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_sin4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_sin4_core_avx.S
index 29d1526a12..a8200dfc58 100644
--- a/sysdeps/x86_64/fpu/svml_d_sin4_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_d_sin4_core_avx.S
@@ -1,5 +1,5 @@
/* Function sin vectorized in AVX ISA as wrapper to SSE4 ISA version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_sin8_core.S b/sysdeps/x86_64/fpu/svml_d_sin8_core.S
index abd86b3d98..7f07a41ba1 100644
--- a/sysdeps/x86_64/fpu/svml_d_sin8_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_sin8_core.S
@@ -1,5 +1,5 @@
/* Function sin vectorized with AVX-512, wrapper to AVX2 version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_sincos2_core.S b/sysdeps/x86_64/fpu/svml_d_sincos2_core.S
index 74afa0a677..ebf9e25aca 100644
--- a/sysdeps/x86_64/fpu/svml_d_sincos2_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_sincos2_core.S
@@ -1,5 +1,5 @@
/* Function sincos vectorized with SSE2.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -20,8 +20,89 @@
#include "svml_d_wrapper_impl.h"
.text
-ENTRY (_ZGVbN2vvv_sincos)
+ENTRY (_ZGVbN2vl8l8_sincos)
WRAPPER_IMPL_SSE2_fFF sincos
+END (_ZGVbN2vl8l8_sincos)
+libmvec_hidden_def (_ZGVbN2vl8l8_sincos)
+
+/* SSE2 ISA version as wrapper to scalar (for vector
+ function declared with #pragma omp declare simd notinbranch). */
+.macro WRAPPER_IMPL_SSE2_fFF_vvv callee
+#ifndef __ILP32__
+ subq $88, %rsp
+ cfi_adjust_cfa_offset(88)
+ movaps %xmm0, 64(%rsp)
+ lea (%rsp), %rdi
+ movdqa %xmm1, 32(%rdi)
+ lea 16(%rsp), %rsi
+ movdqa %xmm2, 32(%rsi)
+ call JUMPTARGET(\callee)
+ movsd 72(%rsp), %xmm0
+ lea 8(%rsp), %rdi
+ lea 24(%rsp), %rsi
+ call JUMPTARGET(\callee)
+ movq 32(%rsp), %rdx
+ movq 48(%rsp), %rsi
+ movq 40(%rsp), %r8
+ movq 56(%rsp), %r10
+ movq (%rsp), %rax
+ movq 16(%rsp), %rcx
+ movq 8(%rsp), %rdi
+ movq 24(%rsp), %r9
+ movq %rax, (%rdx)
+ movq %rcx, (%rsi)
+ movq %rdi, (%r8)
+ movq %r9, (%r10)
+ addq $88, %rsp
+ cfi_adjust_cfa_offset(-88)
+ ret
+#else
+ pushq %rbp
+ .cfi_def_cfa_offset 16
+ .cfi_offset 6, -16
+ pushq %rbx
+ .cfi_def_cfa_offset 24
+ .cfi_offset 3, -24
+ subl $88, %esp
+ .cfi_def_cfa_offset 112
+ leal 64(%rsp), %esi
+ movaps %xmm1, 32(%esp)
+ leal 48(%rsp), %edi
+ movaps %xmm2, 16(%esp)
+ movq %rsi, %rbp
+ movq %rdi, %rbx
+ movaps %xmm0, (%esp)
+ call JUMPTARGET(\callee)
+ movupd 8(%esp), %xmm0
+ leal 8(%rbp), %esi
+ leal 8(%rbx), %edi
+ call JUMPTARGET(\callee)
+ movdqa 32(%esp), %xmm1
+ movsd 48(%esp), %xmm0
+ movq %xmm1, %rax
+ movdqa 16(%esp), %xmm2
+ movsd %xmm0, (%eax)
+ movsd 56(%esp), %xmm0
+ pextrd $1, %xmm1, %eax
+ movsd %xmm0, (%eax)
+ movsd 64(%esp), %xmm0
+ movq %xmm2, %rax
+ movsd %xmm0, (%eax)
+ movsd 72(%esp), %xmm0
+ pextrd $1, %xmm2, %eax
+ movsd %xmm0, (%eax)
+ addl $88, %esp
+ .cfi_def_cfa_offset 24
+ popq %rbx
+ .cfi_def_cfa_offset 16
+ popq %rbp
+ .cfi_def_cfa_offset 8
+ ret
+#endif
+.endm
+
+ENTRY (_ZGVbN2vvv_sincos)
+WRAPPER_IMPL_SSE2_fFF_vvv sincos
END (_ZGVbN2vvv_sincos)
#ifndef USE_MULTIARCH
diff --git a/sysdeps/x86_64/fpu/svml_d_sincos4_core.S b/sysdeps/x86_64/fpu/svml_d_sincos4_core.S
index 2c0b011fb3..626a2b3a7b 100644
--- a/sysdeps/x86_64/fpu/svml_d_sincos4_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_sincos4_core.S
@@ -1,5 +1,5 @@
/* Function sincos vectorized with AVX2, wrapper version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -20,8 +20,131 @@
#include "svml_d_wrapper_impl.h"
.text
+ENTRY (_ZGVdN4vl8l8_sincos)
+WRAPPER_IMPL_AVX_fFF _ZGVbN2vl8l8_sincos
+END (_ZGVdN4vl8l8_sincos)
+libmvec_hidden_def (_ZGVdN4vl8l8_sincos)
+
+/* AVX2 ISA version as wrapper to SSE ISA version (for vector
+ function declared with #pragma omp declare simd notinbranch). */
+.macro WRAPPER_IMPL_AVX2_fFF_vvv callee
+#ifndef __ILP32__
+ pushq %rbp
+ cfi_adjust_cfa_offset (8)
+ cfi_rel_offset (%rbp, 0)
+ movq %rsp, %rbp
+ cfi_def_cfa_register (%rbp)
+ andq $-32, %rsp
+ subq $160, %rsp
+ vmovupd %ymm0, 128(%rsp)
+ lea (%rsp), %rdi
+ vmovdqu %ymm1, 64(%rdi)
+ vmovdqu %ymm2, 96(%rdi)
+ lea 32(%rsp), %rsi
+ vzeroupper
+ call HIDDEN_JUMPTARGET(\callee)
+ vmovupd 144(%rsp), %xmm0
+ lea 16(%rsp), %rdi
+ lea 48(%rsp), %rsi
+ call HIDDEN_JUMPTARGET(\callee)
+ movq 64(%rsp), %rdx
+ movq 96(%rsp), %rsi
+ movq 72(%rsp), %r8
+ movq 104(%rsp), %r10
+ movq (%rsp), %rax
+ movq 32(%rsp), %rcx
+ movq 8(%rsp), %rdi
+ movq 40(%rsp), %r9
+ movq %rax, (%rdx)
+ movq %rcx, (%rsi)
+ movq 80(%rsp), %rax
+ movq 112(%rsp), %rcx
+ movq %rdi, (%r8)
+ movq %r9, (%r10)
+ movq 88(%rsp), %rdi
+ movq 120(%rsp), %r9
+ movq 16(%rsp), %r11
+ movq 48(%rsp), %rdx
+ movq 24(%rsp), %rsi
+ movq 56(%rsp), %r8
+ movq %r11, (%rax)
+ movq %rdx, (%rcx)
+ movq %rsi, (%rdi)
+ movq %r8, (%r9)
+ movq %rbp, %rsp
+ cfi_def_cfa_register (%rsp)
+ popq %rbp
+ cfi_adjust_cfa_offset (-8)
+ cfi_restore (%rbp)
+ ret
+#else
+ leal 8(%rsp), %r10d
+ .cfi_def_cfa 10, 0
+ andl $-32, %esp
+ pushq -8(%r10d)
+ pushq %rbp
+ .cfi_escape 0x10,0x6,0x2,0x76,0
+ movl %esp, %ebp
+ pushq %r12
+ leal -80(%rbp), %esi
+ pushq %r10
+ .cfi_escape 0xf,0x3,0x76,0x70,0x6
+ .cfi_escape 0x10,0xc,0x2,0x76,0x78
+ leal -112(%rbp), %edi
+ movq %rsi, %r12
+ pushq %rbx
+ .cfi_escape 0x10,0x3,0x2,0x76,0x68
+ movq %rdi, %rbx
+ subl $152, %esp
+ vmovaps %xmm1, -128(%ebp)
+ vmovaps %xmm2, -144(%ebp)
+ vmovapd %ymm0, -176(%ebp)
+ vzeroupper
+ call HIDDEN_JUMPTARGET(\callee)
+ leal 16(%r12), %esi
+ vmovapd -160(%ebp), %xmm0
+ leal 16(%rbx), %edi
+ call HIDDEN_JUMPTARGET(\callee)
+ movq -128(%ebp), %rax
+ vmovsd -112(%ebp), %xmm0
+ vmovdqa -128(%ebp), %xmm5
+ vmovdqa -144(%ebp), %xmm1
+ vmovsd %xmm0, (%eax)
+ vmovsd -104(%ebp), %xmm0
+ vpextrd $1, %xmm5, %eax
+ vmovsd %xmm0, (%eax)
+ movq -120(%ebp), %rax
+ vmovsd -96(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ vmovsd -88(%ebp), %xmm0
+ vpextrd $3, %xmm5, %eax
+ vmovsd %xmm0, (%eax)
+ movq -144(%ebp), %rax
+ vmovsd -80(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ vmovsd -72(%ebp), %xmm0
+ vpextrd $1, %xmm1, %eax
+ vmovsd %xmm0, (%eax)
+ movq -136(%ebp), %rax
+ vmovsd -64(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ vmovsd -56(%ebp), %xmm0
+ vpextrd $3, %xmm1, %eax
+ vmovsd %xmm0, (%eax)
+ addl $152, %esp
+ popq %rbx
+ popq %r10
+ .cfi_def_cfa 10, 0
+ popq %r12
+ popq %rbp
+ leal -8(%r10), %esp
+ .cfi_def_cfa 7, 8
+ ret
+#endif
+.endm
+
ENTRY (_ZGVdN4vvv_sincos)
-WRAPPER_IMPL_AVX_fFF _ZGVbN2vvv_sincos
+WRAPPER_IMPL_AVX2_fFF_vvv _ZGVbN2vl8l8_sincos
END (_ZGVdN4vvv_sincos)
#ifndef USE_MULTIARCH
diff --git a/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S
index e4320a97c7..4a5d4f637a 100644
--- a/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S
@@ -1,5 +1,5 @@
/* Function sincos vectorized in AVX ISA as wrapper to SSE4 ISA version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -20,6 +20,124 @@
#include "svml_d_wrapper_impl.h"
.text
+ENTRY (_ZGVcN4vl8l8_sincos)
+WRAPPER_IMPL_AVX_fFF _ZGVbN2vl8l8_sincos
+END (_ZGVcN4vl8l8_sincos)
+
+/* AVX ISA version as wrapper to SSE ISA version (for vector
+ function declared with #pragma omp declare simd notinbranch). */
+.macro WRAPPER_IMPL_AVX_fFF_vvv callee
+#ifndef __ILP32__
+ pushq %rbp
+ movq %rsp, %rbp
+ andq $-32, %rsp
+ subq $160, %rsp
+ vmovupd %ymm0, 64(%rsp)
+ lea (%rsp), %rdi
+ vmovdqu %xmm1, 96(%rdi)
+ vmovdqu %xmm2, 112(%rdi)
+ vmovdqu %xmm3, 128(%rdi)
+ vmovdqu %xmm4, 144(%rdi)
+ lea 32(%rsp), %rsi
+ vzeroupper
+ call HIDDEN_JUMPTARGET(\callee)
+ vmovdqu 80(%rsp), %xmm0
+ lea 16(%rsp), %rdi
+ lea 48(%rsp), %rsi
+ call HIDDEN_JUMPTARGET(\callee)
+ movq 96(%rsp), %rdx
+ movq 104(%rsp), %rsi
+ movq 112(%rsp), %r8
+ movq 120(%rsp), %r10
+ movq (%rsp), %rax
+ movq 8(%rsp), %rcx
+ movq 16(%rsp), %rdi
+ movq 24(%rsp), %r9
+ movq %rax, (%rdx)
+ movq %rcx, (%rsi)
+ movq 128(%rsp), %rax
+ movq 136(%rsp), %rcx
+ movq %rdi, (%r8)
+ movq %r9, (%r10)
+ movq 144(%rsp), %rdi
+ movq 152(%rsp), %r9
+ movq 32(%rsp), %r11
+ movq 40(%rsp), %rdx
+ movq 48(%rsp), %rsi
+ movq 56(%rsp), %r8
+ movq %r11, (%rax)
+ movq %rdx, (%rcx)
+ movq %rsi, (%rdi)
+ movq %r8, (%r9)
+ movq %rbp, %rsp
+ popq %rbp
+ ret
+#else
+ leal 8(%rsp), %r10d
+ .cfi_def_cfa 10, 0
+ andl $-32, %esp
+ pushq -8(%r10d)
+ pushq %rbp
+ .cfi_escape 0x10,0x6,0x2,0x76,0
+ movl %esp, %ebp
+ pushq %r12
+ leal -80(%rbp), %esi
+ pushq %r10
+ .cfi_escape 0xf,0x3,0x76,0x70,0x6
+ .cfi_escape 0x10,0xc,0x2,0x76,0x78
+ leal -112(%rbp), %edi
+ movq %rsi, %r12
+ pushq %rbx
+ .cfi_escape 0x10,0x3,0x2,0x76,0x68
+ movq %rdi, %rbx
+ subl $152, %esp
+ vmovaps %xmm1, -128(%ebp)
+ vmovaps %xmm2, -144(%ebp)
+ vmovapd %ymm0, -176(%ebp)
+ vzeroupper
+ call HIDDEN_JUMPTARGET(\callee)
+ leal 16(%r12), %esi
+ vmovupd -160(%ebp), %xmm0
+ leal 16(%rbx), %edi
+ call HIDDEN_JUMPTARGET(\callee)
+ movq -128(%ebp), %rax
+ vmovsd -112(%ebp), %xmm0
+ vmovdqa -128(%ebp), %xmm5
+ vmovdqa -144(%ebp), %xmm1
+ vmovsd %xmm0, (%eax)
+ vmovsd -104(%ebp), %xmm0
+ vpextrd $1, %xmm5, %eax
+ vmovsd %xmm0, (%eax)
+ movq -120(%ebp), %rax
+ vmovsd -96(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ vmovsd -88(%ebp), %xmm0
+ vpextrd $3, %xmm5, %eax
+ vmovsd %xmm0, (%eax)
+ movq -144(%ebp), %rax
+ vmovsd -80(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ vmovsd -72(%ebp), %xmm0
+ vpextrd $1, %xmm1, %eax
+ vmovsd %xmm0, (%eax)
+ movq -136(%ebp), %rax
+ vmovsd -64(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ vmovsd -56(%ebp), %xmm0
+ vpextrd $3, %xmm1, %eax
+ vmovsd %xmm0, (%eax)
+ addl $152, %esp
+ popq %rbx
+ popq %r10
+ .cfi_def_cfa 10, 0
+ popq %r12
+ popq %rbp
+ leal -8(%r10), %esp
+ .cfi_def_cfa 7, 8
+ ret
+#endif
+.endm
+
ENTRY (_ZGVcN4vvv_sincos)
-WRAPPER_IMPL_AVX_fFF _ZGVbN2vvv_sincos
+WRAPPER_IMPL_AVX_fFF_vvv _ZGVbN2vl8l8_sincos
END (_ZGVcN4vvv_sincos)
diff --git a/sysdeps/x86_64/fpu/svml_d_sincos8_core.S b/sysdeps/x86_64/fpu/svml_d_sincos8_core.S
index 68d490e5bc..7cf453872b 100644
--- a/sysdeps/x86_64/fpu/svml_d_sincos8_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_sincos8_core.S
@@ -1,5 +1,5 @@
/* Function sincos vectorized with AVX-512. Wrapper to AVX2 version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -20,6 +20,172 @@
#include "svml_d_wrapper_impl.h"
.text
+ENTRY (_ZGVeN8vl8l8_sincos)
+WRAPPER_IMPL_AVX512_fFF _ZGVdN4vl8l8_sincos
+END (_ZGVeN8vl8l8_sincos)
+
+/* AVX512 ISA version as wrapper to AVX2 ISA version (for vector
+ function declared with #pragma omp declare simd notinbranch). */
+.macro WRAPPER_IMPL_AVX512_fFF_vvv callee
+#ifndef __ILP32__
+ pushq %rbp
+ cfi_adjust_cfa_offset (8)
+ cfi_rel_offset (%rbp, 0)
+ movq %rsp, %rbp
+ cfi_def_cfa_register (%rbp)
+ andq $-64, %rsp
+ subq $320, %rsp
+ vmovups %zmm0, 256(%rsp)
+ lea (%rsp), %rdi
+ vmovups %zmm1, 128(%rdi)
+ vmovups %zmm2, 192(%rdi)
+ lea 64(%rsp), %rsi
+ call HIDDEN_JUMPTARGET(\callee)
+ vmovdqu 288(%rsp), %ymm0
+ lea 32(%rsp), %rdi
+ lea 96(%rsp), %rsi
+ call HIDDEN_JUMPTARGET(\callee)
+ movq 128(%rsp), %rdx
+ movq 192(%rsp), %rsi
+ movq 136(%rsp), %r8
+ movq 200(%rsp), %r10
+ movq (%rsp), %rax
+ movq 64(%rsp), %rcx
+ movq 8(%rsp), %rdi
+ movq 72(%rsp), %r9
+ movq %rax, (%rdx)
+ movq %rcx, (%rsi)
+ movq 144(%rsp), %rax
+ movq 208(%rsp), %rcx
+ movq %rdi, (%r8)
+ movq %r9, (%r10)
+ movq 152(%rsp), %rdi
+ movq 216(%rsp), %r9
+ movq 16(%rsp), %r11
+ movq 80(%rsp), %rdx
+ movq 24(%rsp), %rsi
+ movq 88(%rsp), %r8
+ movq %r11, (%rax)
+ movq %rdx, (%rcx)
+ movq 160(%rsp), %r11
+ movq 224(%rsp), %rdx
+ movq %rsi, (%rdi)
+ movq %r8, (%r9)
+ movq 168(%rsp), %rsi
+ movq 232(%rsp), %r8
+ movq 32(%rsp), %r10
+ movq 96(%rsp), %rax
+ movq 40(%rsp), %rcx
+ movq 104(%rsp), %rdi
+ movq %r10, (%r11)
+ movq %rax, (%rdx)
+ movq 176(%rsp), %r10
+ movq 240(%rsp), %rax
+ movq %rcx, (%rsi)
+ movq %rdi, (%r8)
+ movq 184(%rsp), %rcx
+ movq 248(%rsp), %rdi
+ movq 48(%rsp), %r9
+ movq 112(%rsp), %r11
+ movq 56(%rsp), %rdx
+ movq 120(%rsp), %rsi
+ movq %r9, (%r10)
+ movq %r11, (%rax)
+ movq %rdx, (%rcx)
+ movq %rsi, (%rdi)
+ movq %rbp, %rsp
+ cfi_def_cfa_register (%rsp)
+ popq %rbp
+ cfi_adjust_cfa_offset (-8)
+ cfi_restore (%rbp)
+ ret
+#else
+ leal 8(%rsp), %r10d
+ .cfi_def_cfa 10, 0
+ andl $-64, %esp
+ pushq -8(%r10d)
+ pushq %rbp
+ .cfi_escape 0x10,0x6,0x2,0x76,0
+ movl %esp, %ebp
+ pushq %r12
+ leal -112(%rbp), %esi
+ pushq %r10
+ .cfi_escape 0xf,0x3,0x76,0x70,0x6
+ .cfi_escape 0x10,0xc,0x2,0x76,0x78
+ leal -176(%rbp), %edi
+ movq %rsi, %r12
+ pushq %rbx
+ .cfi_escape 0x10,0x3,0x2,0x76,0x68
+ movq %rdi, %rbx
+ subl $280, %esp
+ vmovdqa %ymm1, -208(%ebp)
+ vmovdqa %ymm2, -240(%ebp)
+ vmovapd %zmm0, -304(%ebp)
+ call HIDDEN_JUMPTARGET(\callee)
+ leal 32(%r12), %esi
+ vmovupd -272(%ebp), %ymm0
+ leal 32(%rbx), %edi
+ call HIDDEN_JUMPTARGET(\callee)
+ movl -208(%ebp), %eax
+ vmovsd -176(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ movl -204(%ebp), %eax
+ vmovsd -168(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ movl -200(%ebp), %eax
+ vmovsd -160(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ movl -196(%ebp), %eax
+ vmovsd -152(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ movl -192(%ebp), %eax
+ vmovsd -144(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ movl -188(%ebp), %eax
+ vmovsd -136(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ movl -184(%ebp), %eax
+ vmovsd -128(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ movl -180(%ebp), %eax
+ vmovsd -120(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ movl -240(%ebp), %eax
+ vmovsd -112(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ movl -236(%ebp), %eax
+ vmovsd -104(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ movl -232(%ebp), %eax
+ vmovsd -96(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ movl -228(%ebp), %eax
+ vmovsd -88(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ movl -224(%ebp), %eax
+ vmovsd -80(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ movl -220(%ebp), %eax
+ vmovsd -72(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ movl -216(%ebp), %eax
+ vmovsd -64(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ movl -212(%ebp), %eax
+ vmovsd -56(%ebp), %xmm0
+ vmovsd %xmm0, (%eax)
+ addl $280, %esp
+ popq %rbx
+ popq %r10
+ .cfi_def_cfa 10, 0
+ popq %r12
+ popq %rbp
+ leal -8(%r10), %esp
+ .cfi_def_cfa 7, 8
+ ret
+#endif
+.endm
+
ENTRY (_ZGVeN8vvv_sincos)
-WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos
+WRAPPER_IMPL_AVX512_fFF_vvv _ZGVdN4vl8l8_sincos
END (_ZGVeN8vvv_sincos)
diff --git a/sysdeps/x86_64/fpu/svml_d_trig_data.S b/sysdeps/x86_64/fpu/svml_d_trig_data.S
index 887dacee91..2b148325fc 100644
--- a/sysdeps/x86_64/fpu/svml_d_trig_data.S
+++ b/sysdeps/x86_64/fpu/svml_d_trig_data.S
@@ -1,5 +1,5 @@
/* Data for vectorized sin, cos, sincos.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_trig_data.h b/sysdeps/x86_64/fpu/svml_d_trig_data.h
index 4617b5e0c3..b9bb5dc6af 100644
--- a/sysdeps/x86_64/fpu/svml_d_trig_data.h
+++ b/sysdeps/x86_64/fpu/svml_d_trig_data.h
@@ -1,5 +1,5 @@
/* Offsets for data table for vectorized sin, cos, sincos.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_wrapper_impl.h b/sysdeps/x86_64/fpu/svml_d_wrapper_impl.h
index 54f4f58371..d8452e0c2b 100644
--- a/sysdeps/x86_64/fpu/svml_d_wrapper_impl.h
+++ b/sysdeps/x86_64/fpu/svml_d_wrapper_impl.h
@@ -1,5 +1,5 @@
/* Wrapper implementations of vector math functions.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -21,10 +21,10 @@
subq $40, %rsp
cfi_adjust_cfa_offset(40)
movaps %xmm0, (%rsp)
- call \callee@PLT
+ call JUMPTARGET(\callee)
movsd %xmm0, 16(%rsp)
movsd 8(%rsp), %xmm0
- call \callee@PLT
+ call JUMPTARGET(\callee)
movsd 16(%rsp), %xmm1
movsd %xmm0, 24(%rsp)
unpcklpd %xmm0, %xmm1
@@ -40,11 +40,11 @@
cfi_adjust_cfa_offset(56)
movaps %xmm0, (%rsp)
movaps %xmm1, 16(%rsp)
- call \callee@PLT
+ call JUMPTARGET(\callee)
movsd %xmm0, 32(%rsp)
movsd 8(%rsp), %xmm0
movsd 24(%rsp), %xmm1
- call \callee@PLT
+ call JUMPTARGET(\callee)
movsd 32(%rsp), %xmm1
movsd %xmm0, 40(%rsp)
unpcklpd %xmm0, %xmm1
@@ -69,7 +69,7 @@
leaq 16(%rsp), %rsi
leaq 24(%rsp), %rdi
movaps %xmm0, (%rsp)
- call \callee@PLT
+ call JUMPTARGET(\callee)
leaq 16(%rsp), %rsi
leaq 24(%rsp), %rdi
movsd 24(%rsp), %xmm0
@@ -79,7 +79,7 @@
movsd 16(%rsp), %xmm0
movsd %xmm0, (%rbx)
movapd %xmm1, %xmm0
- call \callee@PLT
+ call JUMPTARGET(\callee)
movsd 24(%rsp), %xmm0
movsd %xmm0, 8(%rbp)
movsd 16(%rsp), %xmm0
@@ -201,29 +201,14 @@
cfi_def_cfa_register (%rbp)
andq $-64, %rsp
subq $128, %rsp
-/* Below is encoding for vmovups %zmm0, (%rsp). */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x11
- .byte 0x04
- .byte 0x24
+ vmovups %zmm0, (%rsp)
vmovupd (%rsp), %ymm0
call HIDDEN_JUMPTARGET(\callee)
vmovupd %ymm0, 64(%rsp)
vmovupd 32(%rsp), %ymm0
call HIDDEN_JUMPTARGET(\callee)
vmovupd %ymm0, 96(%rsp)
-/* Below is encoding for vmovups 64(%rsp), %zmm0. */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x10
- .byte 0x44
- .byte 0x24
- .byte 0x01
+ vmovups 64(%rsp), %zmm0
movq %rbp, %rsp
cfi_def_cfa_register (%rsp)
popq %rbp
@@ -241,23 +226,8 @@
cfi_def_cfa_register (%rbp)
andq $-64, %rsp
subq $192, %rsp
-/* Below is encoding for vmovups %zmm0, (%rsp). */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x11
- .byte 0x04
- .byte 0x24
-/* Below is encoding for vmovups %zmm1, 64(%rsp). */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x11
- .byte 0x4c
- .byte 0x24
- .byte 0x01
+ vmovups %zmm0, (%rsp)
+ vmovups %zmm1, 64(%rsp)
vmovupd (%rsp), %ymm0
vmovupd 64(%rsp), %ymm1
call HIDDEN_JUMPTARGET(\callee)
@@ -266,15 +236,7 @@
vmovupd 96(%rsp), %ymm1
call HIDDEN_JUMPTARGET(\callee)
vmovupd %ymm0, 160(%rsp)
-/* Below is encoding for vmovups 128(%rsp), %zmm0. */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x10
- .byte 0x44
- .byte 0x24
- .byte 0x02
+ vmovups 128(%rsp), %zmm0
movq %rbp, %rsp
cfi_def_cfa_register (%rsp)
popq %rbp
@@ -299,14 +261,7 @@
cfi_rel_offset (%r13, 0)
subq $176, %rsp
movq %rsi, %r13
-/* Below is encoding for vmovups %zmm0, (%rsp). */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x11
- .byte 0x04
- .byte 0x24
+ vmovups %zmm0, (%rsp)
movq %rdi, %r12
vmovupd (%rsp), %ymm0
call HIDDEN_JUMPTARGET(\callee)
diff --git a/sysdeps/x86_64/fpu/svml_finite_alias.S b/sysdeps/x86_64/fpu/svml_finite_alias.S
index 2dcfc37590..21a9d6d2ee 100644
--- a/sysdeps/x86_64/fpu/svml_finite_alias.S
+++ b/sysdeps/x86_64/fpu/svml_finite_alias.S
@@ -2,7 +2,7 @@
aliases in libmvec.so while compiler creates the vector names
based on scalar asm name. Corresponding discussion is at
<https://gcc.gnu.org/ml/gcc/2015-06/msg00173.html>.
- Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ Copyright (C) 2015-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_cosf16_core.S b/sysdeps/x86_64/fpu/svml_s_cosf16_core.S
index 9ca4fbfaa8..d1a4647082 100644
--- a/sysdeps/x86_64/fpu/svml_s_cosf16_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_cosf16_core.S
@@ -1,5 +1,5 @@
/* Function cosf vectorized with AVX-512. Wrapper to AVX2 version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_cosf4_core.S b/sysdeps/x86_64/fpu/svml_s_cosf4_core.S
index 363090c54a..d58ccecc09 100644
--- a/sysdeps/x86_64/fpu/svml_s_cosf4_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_cosf4_core.S
@@ -1,5 +1,5 @@
/* Function cosf vectorized with SSE2, wrapper version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_cosf8_core.S b/sysdeps/x86_64/fpu/svml_s_cosf8_core.S
index 26a6a4e4d6..f9dc74fc49 100644
--- a/sysdeps/x86_64/fpu/svml_s_cosf8_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_cosf8_core.S
@@ -1,5 +1,5 @@
/* Function cosf vectorized with AVX2, wrapper version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_cosf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_cosf8_core_avx.S
index 6c210d98ce..45f14e23df 100644
--- a/sysdeps/x86_64/fpu/svml_s_cosf8_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_s_cosf8_core_avx.S
@@ -1,5 +1,5 @@
/* Function cosf vectorized in AVX ISA as wrapper to SSE4 ISA version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_expf16_core.S b/sysdeps/x86_64/fpu/svml_s_expf16_core.S
index d8eecac674..4e18b6f544 100644
--- a/sysdeps/x86_64/fpu/svml_s_expf16_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_expf16_core.S
@@ -1,5 +1,5 @@
/* Function expf vectorized with AVX-512. Wrapper to AVX2 version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_expf4_core.S b/sysdeps/x86_64/fpu/svml_s_expf4_core.S
index 65b5d1a3ce..a2a6209621 100644
--- a/sysdeps/x86_64/fpu/svml_s_expf4_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_expf4_core.S
@@ -1,5 +1,5 @@
/* Function expf vectorized with SSE2.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,7 +22,7 @@
.text
ENTRY (_ZGVbN4v_expf)
-WRAPPER_IMPL_SSE2 expf
+WRAPPER_IMPL_SSE2 __expf_finite
END (_ZGVbN4v_expf)
#ifndef USE_MULTIARCH
diff --git a/sysdeps/x86_64/fpu/svml_s_expf8_core.S b/sysdeps/x86_64/fpu/svml_s_expf8_core.S
index e3cf975bf6..46297208cd 100644
--- a/sysdeps/x86_64/fpu/svml_s_expf8_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_expf8_core.S
@@ -1,5 +1,5 @@
/* Function expf vectorized with AVX2, wrapper version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_expf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_expf8_core_avx.S
index 90469d7dcf..1210dcf885 100644
--- a/sysdeps/x86_64/fpu/svml_s_expf8_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_s_expf8_core_avx.S
@@ -1,5 +1,5 @@
/* Function expf vectorized in AVX ISA as wrapper to SSE4 ISA version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_expf_data.S b/sysdeps/x86_64/fpu/svml_s_expf_data.S
index 4b644082b6..a1cb6e7591 100644
--- a/sysdeps/x86_64/fpu/svml_s_expf_data.S
+++ b/sysdeps/x86_64/fpu/svml_s_expf_data.S
@@ -1,5 +1,5 @@
/* Data for function expf.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_expf_data.h b/sysdeps/x86_64/fpu/svml_s_expf_data.h
index 3610633c96..56a1d8bdf6 100644
--- a/sysdeps/x86_64/fpu/svml_s_expf_data.h
+++ b/sysdeps/x86_64/fpu/svml_s_expf_data.h
@@ -1,5 +1,5 @@
/* Offsets for data table for vector function expf.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_logf16_core.S b/sysdeps/x86_64/fpu/svml_s_logf16_core.S
index cc2e97df78..e1f4b0cf0c 100644
--- a/sysdeps/x86_64/fpu/svml_s_logf16_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_logf16_core.S
@@ -1,5 +1,5 @@
/* Function logf vectorized with AVX-512. Wrapper to AVX2 version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_logf4_core.S b/sysdeps/x86_64/fpu/svml_s_logf4_core.S
index 195f328d92..496b93ffa6 100644
--- a/sysdeps/x86_64/fpu/svml_s_logf4_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_logf4_core.S
@@ -1,5 +1,5 @@
/* Function logf vectorized with SSE2.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,7 +22,7 @@
.text
ENTRY (_ZGVbN4v_logf)
-WRAPPER_IMPL_SSE2 logf
+WRAPPER_IMPL_SSE2 __logf_finite
END (_ZGVbN4v_logf)
#ifndef USE_MULTIARCH
diff --git a/sysdeps/x86_64/fpu/svml_s_logf8_core.S b/sysdeps/x86_64/fpu/svml_s_logf8_core.S
index 8bb6926667..f0ccee7205 100644
--- a/sysdeps/x86_64/fpu/svml_s_logf8_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_logf8_core.S
@@ -1,5 +1,5 @@
/* Function logf vectorized with AVX2, wrapper version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_logf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_logf8_core_avx.S
index c2efba23f2..1ddd0381cd 100644
--- a/sysdeps/x86_64/fpu/svml_s_logf8_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_s_logf8_core_avx.S
@@ -1,5 +1,5 @@
/* Function logf vectorized in AVX ISA as wrapper to SSE4 ISA version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_logf_data.S b/sysdeps/x86_64/fpu/svml_s_logf_data.S
index a5675f5c7a..154f98c2e0 100644
--- a/sysdeps/x86_64/fpu/svml_s_logf_data.S
+++ b/sysdeps/x86_64/fpu/svml_s_logf_data.S
@@ -1,5 +1,5 @@
/* Data for vector function logf.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_logf_data.h b/sysdeps/x86_64/fpu/svml_s_logf_data.h
index 619d5c4bd1..82a9903b10 100644
--- a/sysdeps/x86_64/fpu/svml_s_logf_data.h
+++ b/sysdeps/x86_64/fpu/svml_s_logf_data.h
@@ -1,5 +1,5 @@
/* Offsets for data table for vectorized function logf.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_powf16_core.S b/sysdeps/x86_64/fpu/svml_s_powf16_core.S
index cb52af0c6b..0859996d0a 100644
--- a/sysdeps/x86_64/fpu/svml_s_powf16_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_powf16_core.S
@@ -1,5 +1,5 @@
/* Function powf vectorized with AVX-512. Wrapper to AVX2 version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_powf4_core.S b/sysdeps/x86_64/fpu/svml_s_powf4_core.S
index 88fae60892..4276e6ea28 100644
--- a/sysdeps/x86_64/fpu/svml_s_powf4_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_powf4_core.S
@@ -1,5 +1,5 @@
/* Function powf vectorized with SSE2.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -21,7 +21,7 @@
.text
ENTRY (_ZGVbN4vv_powf)
-WRAPPER_IMPL_SSE2_ff powf
+WRAPPER_IMPL_SSE2_ff __powf_finite
END (_ZGVbN4vv_powf)
#ifndef USE_MULTIARCH
diff --git a/sysdeps/x86_64/fpu/svml_s_powf8_core.S b/sysdeps/x86_64/fpu/svml_s_powf8_core.S
index 8ea44897c1..764dc99ee7 100644
--- a/sysdeps/x86_64/fpu/svml_s_powf8_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_powf8_core.S
@@ -1,5 +1,5 @@
/* Function powf vectorized with AVX2, wrapper version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_powf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_powf8_core_avx.S
index b5e4e5e6ef..8bb1ef22fd 100644
--- a/sysdeps/x86_64/fpu/svml_s_powf8_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_s_powf8_core_avx.S
@@ -1,5 +1,5 @@
/* Function powf vectorized in AVX ISA as wrapper to SSE4 ISA version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_powf_data.S b/sysdeps/x86_64/fpu/svml_s_powf_data.S
index fc1a3d9390..74a31abd1e 100644
--- a/sysdeps/x86_64/fpu/svml_s_powf_data.S
+++ b/sysdeps/x86_64/fpu/svml_s_powf_data.S
@@ -1,5 +1,5 @@
/* Data for function powf.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_powf_data.h b/sysdeps/x86_64/fpu/svml_s_powf_data.h
index 514004238a..5d3270cf27 100644
--- a/sysdeps/x86_64/fpu/svml_s_powf_data.h
+++ b/sysdeps/x86_64/fpu/svml_s_powf_data.h
@@ -1,5 +1,5 @@
/* Offsets for data table for function powf.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S b/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S
index 5cbf10b8da..40eb974a74 100644
--- a/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S
@@ -1,5 +1,5 @@
/* Function sincosf vectorized with AVX-512. Wrapper to AVX2 version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -20,6 +20,270 @@
#include "svml_s_wrapper_impl.h"
.text
+ENTRY (_ZGVeN16vl4l4_sincosf)
+WRAPPER_IMPL_AVX512_fFF _ZGVdN8vl4l4_sincosf
+END (_ZGVeN16vl4l4_sincosf)
+
+/* AVX512 ISA version as wrapper to AVX2 ISA version (for vector
+ function declared with #pragma omp declare simd notinbranch). */
+.macro WRAPPER_IMPL_AVX512_fFF_vvv callee
+#ifndef __ILP32__
+ pushq %rbp
+ cfi_adjust_cfa_offset (8)
+ cfi_rel_offset (%rbp, 0)
+ movq %rsp, %rbp
+ cfi_def_cfa_register (%rbp)
+ andq $-64, %rsp
+ subq $448, %rsp
+ vmovups %zmm0, 384(%rsp)
+ lea (%rsp), %rdi
+ vmovups %zmm1, 128(%rdi)
+ vmovups %zmm2, 192(%rdi)
+ vmovups %zmm3, 256(%rdi)
+ vmovups %zmm4, 320(%rdi)
+ lea 64(%rsp), %rsi
+ call HIDDEN_JUMPTARGET(\callee)
+ vmovdqu 416(%rsp), %ymm0
+ lea 32(%rsp), %rdi
+ lea 96(%rsp), %rsi
+ call HIDDEN_JUMPTARGET(\callee)
+ movq 128(%rsp), %rdx
+ movq 136(%rsp), %rsi
+ movq 144(%rsp), %r8
+ movq 152(%rsp), %r10
+ movl (%rsp), %eax
+ movl 4(%rsp), %ecx
+ movl 8(%rsp), %edi
+ movl 12(%rsp), %r9d
+ movl %eax, (%rdx)
+ movl %ecx, (%rsi)
+ movq 160(%rsp), %rax
+ movq 168(%rsp), %rcx
+ movl %edi, (%r8)
+ movl %r9d, (%r10)
+ movq 176(%rsp), %rdi
+ movq 184(%rsp), %r9
+ movl 16(%rsp), %r11d
+ movl 20(%rsp), %edx
+ movl 24(%rsp), %esi
+ movl 28(%rsp), %r8d
+ movl %r11d, (%rax)
+ movl %edx, (%rcx)
+ movq 192(%rsp), %r11
+ movq 200(%rsp), %rdx
+ movl %esi, (%rdi)
+ movl %r8d, (%r9)
+ movq 208(%rsp), %rsi
+ movq 216(%rsp), %r8
+ movl 32(%rsp), %r10d
+ movl 36(%rsp), %eax
+ movl 40(%rsp), %ecx
+ movl 44(%rsp), %edi
+ movl %r10d, (%r11)
+ movl %eax, (%rdx)
+ movq 224(%rsp), %r10
+ movq 232(%rsp), %rax
+ movl %ecx, (%rsi)
+ movl %edi, (%r8)
+ movq 240(%rsp), %rcx
+ movq 248(%rsp), %rdi
+ movl 48(%rsp), %r9d
+ movl 52(%rsp), %r11d
+ movl 56(%rsp), %edx
+ movl 60(%rsp), %esi
+ movl %r9d, (%r10)
+ movl %r11d, (%rax)
+ movq 256(%rsp), %r9
+ movq 264(%rsp), %r11
+ movl %edx, (%rcx)
+ movl %esi, (%rdi)
+ movq 272(%rsp), %rdx
+ movq 280(%rsp), %rsi
+ movl 64(%rsp), %r8d
+ movl 68(%rsp), %r10d
+ movl 72(%rsp), %eax
+ movl 76(%rsp), %ecx
+ movl %r8d, (%r9)
+ movl %r10d, (%r11)
+ movq 288(%rsp), %r8
+ movq 296(%rsp), %r10
+ movl %eax, (%rdx)
+ movl %ecx, (%rsi)
+ movq 304(%rsp), %rax
+ movq 312(%rsp), %rcx
+ movl 80(%rsp), %edi
+ movl 84(%rsp), %r9d
+ movl 88(%rsp), %r11d
+ movl 92(%rsp), %edx
+ movl %edi, (%r8)
+ movl %r9d, (%r10)
+ movq 320(%rsp), %rdi
+ movq 328(%rsp), %r9
+ movl %r11d, (%rax)
+ movl %edx, (%rcx)
+ movq 336(%rsp), %r11
+ movq 344(%rsp), %rdx
+ movl 96(%rsp), %esi
+ movl 100(%rsp), %r8d
+ movl 104(%rsp), %r10d
+ movl 108(%rsp), %eax
+ movl %esi, (%rdi)
+ movl %r8d, (%r9)
+ movq 352(%rsp), %rsi
+ movq 360(%rsp), %r8
+ movl %r10d, (%r11)
+ movl %eax, (%rdx)
+ movq 368(%rsp), %r10
+ movq 376(%rsp), %rax
+ movl 112(%rsp), %ecx
+ movl 116(%rsp), %edi
+ movl 120(%rsp), %r9d
+ movl 124(%rsp), %r11d
+ movl %ecx, (%rsi)
+ movl %edi, (%r8)
+ movl %r9d, (%r10)
+ movl %r11d, (%rax)
+ movq %rbp, %rsp
+ cfi_def_cfa_register (%rsp)
+ popq %rbp
+ cfi_adjust_cfa_offset (-8)
+ cfi_restore (%rbp)
+ ret
+#else
+ leal 8(%rsp), %r10d
+ .cfi_def_cfa 10, 0
+ andl $-64, %esp
+ pushq -8(%r10d)
+ pushq %rbp
+ .cfi_escape 0x10,0x6,0x2,0x76,0
+ movl %esp, %ebp
+ pushq %r12
+ leal -112(%rbp), %esi
+ pushq %r10
+ .cfi_escape 0xf,0x3,0x76,0x70,0x6
+ .cfi_escape 0x10,0xc,0x2,0x76,0x78
+ leal -176(%rbp), %edi
+ movq %rsi, %r12
+ pushq %rbx
+ .cfi_escape 0x10,0x3,0x2,0x76,0x68
+ movq %rdi, %rbx
+ subl $344, %esp
+ vmovdqa64 %zmm1, -240(%ebp)
+ vmovdqa64 %zmm2, -304(%ebp)
+ vmovaps %zmm0, -368(%ebp)
+ call HIDDEN_JUMPTARGET(\callee)
+ leal 32(%r12), %esi
+ vmovups -336(%ebp), %ymm0
+ leal 32(%rbx), %edi
+ call HIDDEN_JUMPTARGET(\callee)
+ movl -240(%ebp), %eax
+ vmovss -176(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -236(%ebp), %eax
+ vmovss -172(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -232(%ebp), %eax
+ vmovss -168(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -228(%ebp), %eax
+ vmovss -164(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -224(%ebp), %eax
+ vmovss -160(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -220(%ebp), %eax
+ vmovss -156(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -216(%ebp), %eax
+ vmovss -152(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -212(%ebp), %eax
+ vmovss -148(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -208(%ebp), %eax
+ vmovss -144(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -204(%ebp), %eax
+ vmovss -140(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -200(%ebp), %eax
+ vmovss -136(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -196(%ebp), %eax
+ vmovss -132(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -192(%ebp), %eax
+ vmovss -128(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -188(%ebp), %eax
+ vmovss -124(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -184(%ebp), %eax
+ vmovss -120(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -180(%ebp), %eax
+ vmovss -116(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -304(%ebp), %eax
+ vmovss -112(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -300(%ebp), %eax
+ vmovss -108(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -296(%ebp), %eax
+ vmovss -104(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -292(%ebp), %eax
+ vmovss -100(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -288(%ebp), %eax
+ vmovss -96(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -284(%ebp), %eax
+ vmovss -92(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -280(%ebp), %eax
+ vmovss -88(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -276(%ebp), %eax
+ vmovss -84(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -272(%ebp), %eax
+ vmovss -80(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -268(%ebp), %eax
+ vmovss -76(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -264(%ebp), %eax
+ vmovss -72(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -260(%ebp), %eax
+ vmovss -68(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -256(%ebp), %eax
+ vmovss -64(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -252(%ebp), %eax
+ vmovss -60(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -248(%ebp), %eax
+ vmovss -56(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -244(%ebp), %eax
+ vmovss -52(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ addl $344, %esp
+ popq %rbx
+ popq %r10
+ .cfi_def_cfa 10, 0
+ popq %r12
+ popq %rbp
+ leal -8(%r10), %esp
+ .cfi_def_cfa 7, 8
+ ret
+#endif
+.endm
+
ENTRY (_ZGVeN16vvv_sincosf)
-WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf
+WRAPPER_IMPL_AVX512_fFF_vvv _ZGVdN8vl4l4_sincosf
END (_ZGVeN16vvv_sincosf)
diff --git a/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S b/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S
index 1a7d2733af..5daa5118d6 100644
--- a/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S
@@ -1,5 +1,5 @@
/* Function sincosf vectorized with SSE2.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,13 +16,135 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-
#include <sysdep.h>
#include "svml_s_wrapper_impl.h"
.text
-ENTRY (_ZGVbN4vvv_sincosf)
+ENTRY (_ZGVbN4vl4l4_sincosf)
WRAPPER_IMPL_SSE2_fFF sincosf
+END (_ZGVbN4vl4l4_sincosf)
+libmvec_hidden_def (_ZGVbN4vl4l4_sincosf)
+
+/* SSE2 ISA version as wrapper to scalar (for vector
+ function declared with #pragma omp declare simd notinbranch). */
+.macro WRAPPER_IMPL_SSE2_fFF_vvv callee
+#ifndef __ILP32__
+ subq $120, %rsp
+ cfi_adjust_cfa_offset(120)
+ movaps %xmm0, 96(%rsp)
+ lea (%rsp), %rdi
+ movdqa %xmm1, 32(%rdi)
+ lea 16(%rsp), %rsi
+ movdqa %xmm2, 32(%rsi)
+ movdqa %xmm3, 48(%rsi)
+ movdqa %xmm4, 64(%rsi)
+ call JUMPTARGET(\callee)
+ movss 100(%rsp), %xmm0
+ lea 4(%rsp), %rdi
+ lea 20(%rsp), %rsi
+ call JUMPTARGET(\callee)
+ movss 104(%rsp), %xmm0
+ lea 8(%rsp), %rdi
+ lea 24(%rsp), %rsi
+ call JUMPTARGET(\callee)
+ movss 108(%rsp), %xmm0
+ lea 12(%rsp), %rdi
+ lea 28(%rsp), %rsi
+ call JUMPTARGET(\callee)
+ movq 32(%rsp), %rdx
+ movq 40(%rsp), %rsi
+ movq 48(%rsp), %r8
+ movq 56(%rsp), %r10
+ movl (%rsp), %eax
+ movl 4(%rsp), %ecx
+ movl 8(%rsp), %edi
+ movl 12(%rsp), %r9d
+ movl %eax, (%rdx)
+ movl %ecx, (%rsi)
+ movq 64(%rsp), %rax
+ movq 72(%rsp), %rcx
+ movl %edi, (%r8)
+ movl %r9d, (%r10)
+ movq 80(%rsp), %rdi
+ movq 88(%rsp), %r9
+ movl 16(%rsp), %r11d
+ movl 20(%rsp), %edx
+ movl 24(%rsp), %esi
+ movl 28(%rsp), %r8d
+ movl %r11d, (%rax)
+ movl %edx, (%rcx)
+ movl %esi, (%rdi)
+ movl %r8d, (%r9)
+ addq $120, %rsp
+ cfi_adjust_cfa_offset(-120)
+ ret
+#else
+ pushq %rbp
+ .cfi_def_cfa_offset 16
+ .cfi_offset 6, -16
+ pushq %rbx
+ .cfi_def_cfa_offset 24
+ .cfi_offset 3, -24
+ subl $88, %esp
+ .cfi_def_cfa_offset 112
+ leal 64(%rsp), %esi
+ movaps %xmm1, (%esp)
+ leal 48(%rsp), %edi
+ movaps %xmm2, 16(%esp)
+ movq %rsi, %rbp
+ movq %rdi, %rbx
+ movaps %xmm0, 32(%esp)
+ call JUMPTARGET(\callee)
+ movups 36(%esp), %xmm0
+ leal 4(%rbp), %esi
+ leal 4(%rbx), %edi
+ call JUMPTARGET(\callee)
+ movups 40(%esp), %xmm0
+ leal 8(%rbp), %esi
+ leal 8(%rbx), %edi
+ call JUMPTARGET(\callee)
+ movups 44(%esp), %xmm0
+ leal 12(%rbp), %esi
+ leal 12(%rbx), %edi
+ call JUMPTARGET(\callee)
+ movq (%esp), %rax
+ movss 48(%esp), %xmm0
+ movdqa (%esp), %xmm4
+ movdqa 16(%esp), %xmm7
+ movss %xmm0, (%eax)
+ movss 52(%esp), %xmm0
+ pextrd $1, %xmm4, %eax
+ movss %xmm0, (%eax)
+ movq 8(%esp), %rax
+ movss 56(%esp), %xmm0
+ movss %xmm0, (%eax)
+ movss 60(%esp), %xmm0
+ pextrd $3, %xmm4, %eax
+ movss %xmm0, (%eax)
+ movq 16(%esp), %rax
+ movss 64(%esp), %xmm0
+ movss %xmm0, (%eax)
+ movss 68(%esp), %xmm0
+ pextrd $1, %xmm7, %eax
+ movss %xmm0, (%eax)
+ movq 24(%esp), %rax
+ movss 72(%esp), %xmm0
+ movss %xmm0, (%eax)
+ movss 76(%esp), %xmm0
+ pextrd $3, %xmm7, %eax
+ movss %xmm0, (%eax)
+ addl $88, %esp
+ .cfi_def_cfa_offset 24
+ popq %rbx
+ .cfi_def_cfa_offset 16
+ popq %rbp
+ .cfi_def_cfa_offset 8
+ ret
+#endif
+.endm
+
+ENTRY (_ZGVbN4vvv_sincosf)
+WRAPPER_IMPL_SSE2_fFF_vvv sincosf
END (_ZGVbN4vvv_sincosf)
#ifndef USE_MULTIARCH
diff --git a/sysdeps/x86_64/fpu/svml_s_sincosf8_core.S b/sysdeps/x86_64/fpu/svml_s_sincosf8_core.S
index 74d1dfd1a8..d6d4600d10 100644
--- a/sysdeps/x86_64/fpu/svml_s_sincosf8_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_sincosf8_core.S
@@ -1,5 +1,5 @@
/* Function sincosf vectorized with AVX2, wrapper version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -20,8 +20,179 @@
#include "svml_s_wrapper_impl.h"
.text
+ENTRY (_ZGVdN8vl4l4_sincosf)
+WRAPPER_IMPL_AVX_fFF _ZGVbN4vl4l4_sincosf
+END (_ZGVdN8vl4l4_sincosf)
+libmvec_hidden_def (_ZGVdN8vl4l4_sincosf)
+
+/* AVX2 ISA version as wrapper to SSE ISA version (for vector
+ function declared with #pragma omp declare simd notinbranch). */
+.macro WRAPPER_IMPL_AVX2_fFF_vvv callee
+#ifndef __ILP32__
+ pushq %rbp
+ cfi_adjust_cfa_offset (8)
+ cfi_rel_offset (%rbp, 0)
+ movq %rsp, %rbp
+ cfi_def_cfa_register (%rbp)
+ andq $-32, %rsp
+ subq $224, %rsp
+ vmovups %ymm0, 192(%rsp)
+ lea (%rsp), %rdi
+ vmovdqu %ymm1, 64(%rdi)
+ vmovdqu %ymm2, 96(%rdi)
+ vmovdqu %ymm3, 128(%rdi)
+ vmovdqu %ymm4, 160(%rdi)
+ lea 32(%rsp), %rsi
+ vzeroupper
+ call HIDDEN_JUMPTARGET(\callee)
+ vmovups 208(%rsp), %xmm0
+ lea 16(%rsp), %rdi
+ lea 48(%rsp), %rsi
+ call HIDDEN_JUMPTARGET(\callee)
+ movq 64(%rsp), %rdx
+ movq 72(%rsp), %rsi
+ movq 80(%rsp), %r8
+ movq 88(%rsp), %r10
+ movl (%rsp), %eax
+ movl 4(%rsp), %ecx
+ movl 8(%rsp), %edi
+ movl 12(%rsp), %r9d
+ movl %eax, (%rdx)
+ movl %ecx, (%rsi)
+ movq 96(%rsp), %rax
+ movq 104(%rsp), %rcx
+ movl %edi, (%r8)
+ movl %r9d, (%r10)
+ movq 112(%rsp), %rdi
+ movq 120(%rsp), %r9
+ movl 16(%rsp), %r11d
+ movl 20(%rsp), %edx
+ movl 24(%rsp), %esi
+ movl 28(%rsp), %r8d
+ movl %r11d, (%rax)
+ movl %edx, (%rcx)
+ movq 128(%rsp), %r11
+ movq 136(%rsp), %rdx
+ movl %esi, (%rdi)
+ movl %r8d, (%r9)
+ movq 144(%rsp), %rsi
+ movq 152(%rsp), %r8
+ movl 32(%rsp), %r10d
+ movl 36(%rsp), %eax
+ movl 40(%rsp), %ecx
+ movl 44(%rsp), %edi
+ movl %r10d, (%r11)
+ movl %eax, (%rdx)
+ movq 160(%rsp), %r10
+ movq 168(%rsp), %rax
+ movl %ecx, (%rsi)
+ movl %edi, (%r8)
+ movq 176(%rsp), %rcx
+ movq 184(%rsp), %rdi
+ movl 48(%rsp), %r9d
+ movl 52(%rsp), %r11d
+ movl 56(%rsp), %edx
+ movl 60(%rsp), %esi
+ movl %r9d, (%r10)
+ movl %r11d, (%rax)
+ movl %edx, (%rcx)
+ movl %esi, (%rdi)
+ movq %rbp, %rsp
+ cfi_def_cfa_register (%rsp)
+ popq %rbp
+ cfi_adjust_cfa_offset (-8)
+ cfi_restore (%rbp)
+ ret
+#else
+ leal 8(%rsp), %r10d
+ .cfi_def_cfa 10, 0
+ andl $-32, %esp
+ pushq -8(%r10d)
+ pushq %rbp
+ .cfi_escape 0x10,0x6,0x2,0x76,0
+ movl %esp, %ebp
+ pushq %r12
+ leal -80(%rbp), %esi
+ pushq %r10
+ .cfi_escape 0xf,0x3,0x76,0x70,0x6
+ .cfi_escape 0x10,0xc,0x2,0x76,0x78
+ leal -112(%rbp), %edi
+ movq %rsi, %r12
+ pushq %rbx
+ .cfi_escape 0x10,0x3,0x2,0x76,0x68
+ movq %rdi, %rbx
+ subl $184, %esp
+ vmovdqa %ymm1, -144(%ebp)
+ vmovdqa %ymm2, -176(%ebp)
+ vmovaps %ymm0, -208(%ebp)
+ vzeroupper
+ call HIDDEN_JUMPTARGET(\callee)
+ leal 16(%r12), %esi
+ vmovups -192(%ebp), %xmm0
+ leal 16(%rbx), %edi
+ call HIDDEN_JUMPTARGET(\callee)
+ movl -144(%ebp), %eax
+ vmovss -112(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -140(%ebp), %eax
+ vmovss -108(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -136(%ebp), %eax
+ vmovss -104(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -132(%ebp), %eax
+ vmovss -100(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -128(%ebp), %eax
+ vmovss -96(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -124(%ebp), %eax
+ vmovss -92(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -120(%ebp), %eax
+ vmovss -88(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -116(%ebp), %eax
+ vmovss -84(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -176(%ebp), %eax
+ vmovss -80(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -172(%ebp), %eax
+ vmovss -76(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -168(%ebp), %eax
+ vmovss -72(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -164(%ebp), %eax
+ vmovss -68(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -160(%ebp), %eax
+ vmovss -64(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -156(%ebp), %eax
+ vmovss -60(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -152(%ebp), %eax
+ vmovss -56(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ movl -148(%ebp), %eax
+ vmovss -52(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ addl $184, %esp
+ popq %rbx
+ popq %r10
+ .cfi_def_cfa 10, 0
+ popq %r12
+ popq %rbp
+ leal -8(%r10), %esp
+ .cfi_def_cfa 7, 8
+ ret
+#endif
+.endm
+
ENTRY (_ZGVdN8vvv_sincosf)
-WRAPPER_IMPL_AVX_fFF _ZGVbN4vvv_sincosf
+WRAPPER_IMPL_AVX2_fFF_vvv _ZGVbN4vl4l4_sincosf
END (_ZGVdN8vvv_sincosf)
#ifndef USE_MULTIARCH
diff --git a/sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S
index 55b8b2d768..585e6d87c4 100644
--- a/sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S
@@ -1,5 +1,5 @@
/* Function sincosf vectorized in AVX ISA as wrapper to SSE4 ISA version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -20,6 +20,179 @@
#include "svml_s_wrapper_impl.h"
.text
-ENTRY(_ZGVcN8vvv_sincosf)
-WRAPPER_IMPL_AVX_fFF _ZGVbN4vvv_sincosf
-END(_ZGVcN8vvv_sincosf)
+ENTRY (_ZGVcN8vl4l4_sincosf)
+WRAPPER_IMPL_AVX_fFF _ZGVbN4vl4l4_sincosf
+END (_ZGVcN8vl4l4_sincosf)
+
+/* AVX ISA version as wrapper to SSE ISA version (for vector
+ function declared with #pragma omp declare simd notinbranch). */
+.macro WRAPPER_IMPL_AVX_fFF_vvv callee
+#ifndef __ILP32__
+ pushq %rbp
+ movq %rsp, %rbp
+ andq $-32, %rsp
+ subq $224, %rsp
+ vmovups %ymm0, 64(%rsp)
+ lea (%rsp), %rdi
+ vmovdqu %xmm1, 96(%rdi)
+ vmovdqu %xmm2, 112(%rdi)
+ vmovdqu %xmm3, 128(%rdi)
+ vmovdqu %xmm4, 144(%rdi)
+ vmovdqu %xmm5, 160(%rdi)
+ lea 32(%rsp), %rsi
+ vmovdqu %xmm6, 144(%rsi)
+ vmovdqu %xmm7, 160(%rsi)
+ vzeroupper
+ call HIDDEN_JUMPTARGET(\callee)
+ vmovdqu 80(%rsp), %xmm0
+ lea 16(%rsp), %rdi
+ lea 48(%rsp), %rsi
+ call HIDDEN_JUMPTARGET(\callee)
+ movq 96(%rsp), %rdx
+ movq 104(%rsp), %rsi
+ movq 112(%rsp), %r8
+ movq 120(%rsp), %r10
+ movl (%rsp), %eax
+ movl 4(%rsp), %ecx
+ movl 8(%rsp), %edi
+ movl 12(%rsp), %r9d
+ movl %eax, (%rdx)
+ movl %ecx, (%rsi)
+ movq 128(%rsp), %rax
+ movq 136(%rsp), %rcx
+ movl %edi, (%r8)
+ movl %r9d, (%r10)
+ movq 144(%rsp), %rdi
+ movq 152(%rsp), %r9
+ movl 16(%rsp), %r11d
+ movl 20(%rsp), %edx
+ movl 24(%rsp), %esi
+ movl 28(%rsp), %r8d
+ movl %r11d, (%rax)
+ movl %edx, (%rcx)
+ movq 160(%rsp), %r11
+ movq 168(%rsp), %rdx
+ movl %esi, (%rdi)
+ movl %r8d, (%r9)
+ movq 176(%rsp), %rsi
+ movq 184(%rsp), %r8
+ movl 32(%rsp), %r10d
+ movl 36(%rsp), %eax
+ movl 40(%rsp), %ecx
+ movl 44(%rsp), %edi
+ movl %r10d, (%r11)
+ movl %eax, (%rdx)
+ movq 192(%rsp), %r10
+ movq 200(%rsp), %rax
+ movl %ecx, (%rsi)
+ movl %edi, (%r8)
+ movq 16(%rbp), %rcx
+ movq 24(%rbp), %rdi
+ movl 48(%rsp), %r9d
+ movl 52(%rsp), %r11d
+ movl 56(%rsp), %edx
+ movl 60(%rsp), %esi
+ movl %r9d, (%r10)
+ movl %r11d, (%rax)
+ movl %edx, (%rcx)
+ movl %esi, (%rdi)
+ movq %rbp, %rsp
+ popq %rbp
+ ret
+#else
+ leal 8(%rsp), %r10d
+ .cfi_def_cfa 10, 0
+ andl $-32, %esp
+ pushq -8(%r10d)
+ pushq %rbp
+ .cfi_escape 0x10,0x6,0x2,0x76,0
+ movl %esp, %ebp
+ pushq %r12
+ leal -80(%rbp), %esi
+ pushq %r10
+ .cfi_escape 0xf,0x3,0x76,0x70,0x6
+ .cfi_escape 0x10,0xc,0x2,0x76,0x78
+ leal -112(%rbp), %edi
+ movq %rsi, %r12
+ pushq %rbx
+ .cfi_escape 0x10,0x3,0x2,0x76,0x68
+ movq %rdi, %rbx
+ subl $184, %esp
+ vmovaps %xmm1, -128(%ebp)
+ vmovaps %xmm2, -144(%ebp)
+ vmovaps %xmm3, -160(%ebp)
+ vmovaps %xmm4, -176(%ebp)
+ vmovaps %ymm0, -208(%ebp)
+ vzeroupper
+ call HIDDEN_JUMPTARGET(\callee)
+ leal 16(%r12), %esi
+ vmovups -192(%ebp), %xmm0
+ leal 16(%rbx), %edi
+ call HIDDEN_JUMPTARGET(\callee)
+ movq -128(%ebp), %rax
+ vmovss -112(%ebp), %xmm0
+ vmovdqa -128(%ebp), %xmm7
+ vmovdqa -144(%ebp), %xmm3
+ vmovss %xmm0, (%eax)
+ vmovss -108(%ebp), %xmm0
+ vpextrd $1, %xmm7, %eax
+ vmovss %xmm0, (%eax)
+ movq -120(%ebp), %rax
+ vmovss -104(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ vmovss -100(%ebp), %xmm0
+ vpextrd $3, %xmm7, %eax
+ vmovdqa -160(%ebp), %xmm7
+ vmovss %xmm0, (%eax)
+ movq -144(%ebp), %rax
+ vmovss -96(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ vmovss -92(%ebp), %xmm0
+ vpextrd $1, %xmm3, %eax
+ vmovss %xmm0, (%eax)
+ movq -136(%ebp), %rax
+ vmovss -88(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ vmovss -84(%ebp), %xmm0
+ vpextrd $3, %xmm3, %eax
+ vmovss %xmm0, (%eax)
+ movq -160(%ebp), %rax
+ vmovss -80(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ vmovss -76(%ebp), %xmm0
+ vpextrd $1, %xmm7, %eax
+ vmovss %xmm0, (%eax)
+ movq -152(%ebp), %rax
+ vmovss -72(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ vmovss -68(%ebp), %xmm0
+ vpextrd $3, %xmm7, %eax
+ vmovss %xmm0, (%eax)
+ movq -176(%ebp), %rax
+ vmovss -64(%ebp), %xmm0
+ vmovdqa -176(%ebp), %xmm3
+ vmovss %xmm0, (%eax)
+ vmovss -60(%ebp), %xmm0
+ vpextrd $1, %xmm3, %eax
+ vmovss %xmm0, (%eax)
+ movq -168(%ebp), %rax
+ vmovss -56(%ebp), %xmm0
+ vmovss %xmm0, (%eax)
+ vmovss -52(%ebp), %xmm0
+ vpextrd $3, %xmm3, %eax
+ vmovss %xmm0, (%eax)
+ addl $184, %esp
+ popq %rbx
+ popq %r10
+ .cfi_def_cfa 10, 0
+ popq %r12
+ popq %rbp
+ leal -8(%r10), %esp
+ .cfi_def_cfa 7, 8
+ ret
+#endif
+.endm
+
+ENTRY (_ZGVcN8vvv_sincosf)
+WRAPPER_IMPL_AVX_fFF_vvv _ZGVbN4vl4l4_sincosf
+END (_ZGVcN8vvv_sincosf)
diff --git a/sysdeps/x86_64/fpu/svml_s_sinf16_core.S b/sysdeps/x86_64/fpu/svml_s_sinf16_core.S
index d7a31e1ea6..8c5547e26f 100644
--- a/sysdeps/x86_64/fpu/svml_s_sinf16_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_sinf16_core.S
@@ -1,5 +1,5 @@
/* Function sinf vectorized with AVX-512. Wrapper to AVX2 version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_sinf4_core.S b/sysdeps/x86_64/fpu/svml_s_sinf4_core.S
index 6f10137134..d56137b32a 100644
--- a/sysdeps/x86_64/fpu/svml_s_sinf4_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_sinf4_core.S
@@ -1,5 +1,5 @@
/* Function sinf vectorized with SSE2.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_sinf8_core.S b/sysdeps/x86_64/fpu/svml_s_sinf8_core.S
index c459658688..e39392243e 100644
--- a/sysdeps/x86_64/fpu/svml_s_sinf8_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_sinf8_core.S
@@ -1,5 +1,5 @@
/* Function sinf vectorized with AVX2, wrapper version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_sinf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_sinf8_core_avx.S
index 5e95aa2e02..9984e6f9f7 100644
--- a/sysdeps/x86_64/fpu/svml_s_sinf8_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_s_sinf8_core_avx.S
@@ -1,5 +1,5 @@
/* Function sinf vectorized in AVX ISA as wrapper to SSE4 ISA version.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_trig_data.S b/sysdeps/x86_64/fpu/svml_s_trig_data.S
index b61aa6abb9..8f1e1f60b8 100644
--- a/sysdeps/x86_64/fpu/svml_s_trig_data.S
+++ b/sysdeps/x86_64/fpu/svml_s_trig_data.S
@@ -1,5 +1,5 @@
/* Data for function cosf.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_trig_data.h b/sysdeps/x86_64/fpu/svml_s_trig_data.h
index 2e469a918a..0faf161c08 100644
--- a/sysdeps/x86_64/fpu/svml_s_trig_data.h
+++ b/sysdeps/x86_64/fpu/svml_s_trig_data.h
@@ -1,5 +1,5 @@
/* Offsets for data table for vectorized sinf, cosf, sincosf.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h b/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h
index b1a03be3d9..937afb5cbc 100644
--- a/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h
+++ b/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h
@@ -1,5 +1,5 @@
/* Wrapper implementations of vector math functions.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -21,16 +21,16 @@
subq $40, %rsp
cfi_adjust_cfa_offset(40)
movaps %xmm0, (%rsp)
- call \callee@PLT
+ call JUMPTARGET(\callee)
movss %xmm0, 16(%rsp)
movss 4(%rsp), %xmm0
- call \callee@PLT
+ call JUMPTARGET(\callee)
movss %xmm0, 20(%rsp)
movss 8(%rsp), %xmm0
- call \callee@PLT
+ call JUMPTARGET(\callee)
movss %xmm0, 24(%rsp)
movss 12(%rsp), %xmm0
- call \callee@PLT
+ call JUMPTARGET(\callee)
movss 16(%rsp), %xmm3
movss 20(%rsp), %xmm2
movss 24(%rsp), %xmm1
@@ -50,19 +50,19 @@
cfi_adjust_cfa_offset(56)
movaps %xmm0, (%rsp)
movaps %xmm1, 16(%rsp)
- call \callee@PLT
+ call JUMPTARGET(\callee)
movss %xmm0, 32(%rsp)
movss 4(%rsp), %xmm0
movss 20(%rsp), %xmm1
- call \callee@PLT
+ call JUMPTARGET(\callee)
movss %xmm0, 36(%rsp)
movss 8(%rsp), %xmm0
movss 24(%rsp), %xmm1
- call \callee@PLT
+ call JUMPTARGET(\callee)
movss %xmm0, 40(%rsp)
movss 12(%rsp), %xmm0
movss 28(%rsp), %xmm1
- call \callee@PLT
+ call JUMPTARGET(\callee)
movss 32(%rsp), %xmm3
movss 36(%rsp), %xmm2
movss 40(%rsp), %xmm1
@@ -91,7 +91,7 @@
leaq 24(%rsp), %rsi
leaq 28(%rsp), %rdi
movaps %xmm0, (%rsp)
- call \callee@PLT
+ call JUMPTARGET(\callee)
leaq 24(%rsp), %rsi
leaq 28(%rsp), %rdi
movss 28(%rsp), %xmm0
@@ -101,7 +101,7 @@
movss %xmm0, (%rbx)
movaps %xmm1, %xmm0
shufps $85, %xmm1, %xmm0
- call \callee@PLT
+ call JUMPTARGET(\callee)
movss 28(%rsp), %xmm0
leaq 24(%rsp), %rsi
movss %xmm0, 4(%rbp)
@@ -111,7 +111,7 @@
movss %xmm0, 4(%rbx)
movaps %xmm1, %xmm0
unpckhps %xmm1, %xmm0
- call \callee@PLT
+ call JUMPTARGET(\callee)
movaps (%rsp), %xmm1
leaq 24(%rsp), %rsi
leaq 28(%rsp), %rdi
@@ -121,7 +121,7 @@
movss 24(%rsp), %xmm0
movss %xmm0, 8(%rbx)
movaps %xmm1, %xmm0
- call \callee@PLT
+ call JUMPTARGET(\callee)
movss 28(%rsp), %xmm0
movss %xmm0, 12(%rbp)
movss 24(%rsp), %xmm0
@@ -246,29 +246,14 @@
cfi_def_cfa_register (%rbp)
andq $-64, %rsp
subq $128, %rsp
-/* Below is encoding for vmovups %zmm0, (%rsp). */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x11
- .byte 0x04
- .byte 0x24
+ vmovups %zmm0, (%rsp)
vmovupd (%rsp), %ymm0
call HIDDEN_JUMPTARGET(\callee)
vmovupd %ymm0, 64(%rsp)
vmovupd 32(%rsp), %ymm0
call HIDDEN_JUMPTARGET(\callee)
vmovupd %ymm0, 96(%rsp)
-/* Below is encoding for vmovups 64(%rsp), %zmm0. */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x10
- .byte 0x44
- .byte 0x24
- .byte 0x01
+ vmovups 64(%rsp), %zmm0
movq %rbp, %rsp
cfi_def_cfa_register (%rsp)
popq %rbp
@@ -286,23 +271,8 @@
cfi_def_cfa_register (%rbp)
andq $-64, %rsp
subq $192, %rsp
-/* Below is encoding for vmovups %zmm0, (%rsp). */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x11
- .byte 0x04
- .byte 0x24
-/* Below is encoding for vmovups %zmm1, 64(%rsp). */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x11
- .byte 0x4c
- .byte 0x24
- .byte 0x01
+ vmovups %zmm0, (%rsp)
+ vmovups %zmm1, 64(%rsp)
vmovups (%rsp), %ymm0
vmovups 64(%rsp), %ymm1
call HIDDEN_JUMPTARGET(\callee)
@@ -311,15 +281,7 @@
vmovups 96(%rsp), %ymm1
call HIDDEN_JUMPTARGET(\callee)
vmovups %ymm0, 160(%rsp)
-/* Below is encoding for vmovups 128(%rsp), %zmm0. */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x10
- .byte 0x44
- .byte 0x24
- .byte 0x02
+ vmovups 128(%rsp), %zmm0
movq %rbp, %rsp
cfi_def_cfa_register (%rsp)
popq %rbp
@@ -340,14 +302,7 @@
pushq %r13
subq $176, %rsp
movq %rsi, %r13
-/* Below is encoding for vmovaps %zmm0, (%rsp). */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x29
- .byte 0x04
- .byte 0x24
+ vmovaps %zmm0, (%rsp)
movq %rdi, %r12
vmovaps (%rsp), %ymm0
call HIDDEN_JUMPTARGET(\callee)
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx-main.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx-main.c
new file mode 100644
index 0000000000..43914ef0e7
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx-main.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-alias.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx-mod.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx-mod.c
new file mode 100644
index 0000000000..514883dcf9
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx-mod.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-alias-mod.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx.c
new file mode 100644
index 0000000000..43914ef0e7
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-alias.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx2-main.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx2-main.c
new file mode 100644
index 0000000000..43914ef0e7
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx2-main.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-alias.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx2-mod.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx2-mod.c
new file mode 100644
index 0000000000..514883dcf9
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx2-mod.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-alias-mod.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx2.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx2.c
new file mode 100644
index 0000000000..43914ef0e7
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx2.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-alias.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx512-main.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx512-main.c
new file mode 100644
index 0000000000..43914ef0e7
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx512-main.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-alias.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx512-mod.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx512-mod.c
new file mode 100644
index 0000000000..514883dcf9
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx512-mod.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-alias-mod.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx512.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx512.c
new file mode 100644
index 0000000000..43914ef0e7
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx512.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-alias.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-main.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-main.c
new file mode 100644
index 0000000000..43914ef0e7
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-main.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-alias.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-mod.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-mod.c
new file mode 100644
index 0000000000..6f2e588021
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-mod.c
@@ -0,0 +1,25 @@
+/* Part of test to build shared library to ensure link against
+ *_finite aliases from libmvec.
+ Copyright (C) 2016-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <math.h>
+#include <stdlib.h>
+#include <math-tests-arch.h>
+
+#include "test-double.h"
+#include "test-libmvec-alias-mod.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias.c
new file mode 100644
index 0000000000..d38b49d6c8
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias.c
@@ -0,0 +1,29 @@
+/* Part of test to ensure link against *_finite aliases from libmvec.
+ Copyright (C) 2016-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+extern int
+test_finite_alias (void);
+
+static int
+do_test (void)
+{
+ return test_finite_alias ();
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../../../test-skeleton.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx-main.c b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx-main.c
new file mode 100644
index 0000000000..fc2ffea314
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx-main.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-sincos-main.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx.c b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx.c
new file mode 100644
index 0000000000..896f1bcbaf
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-sincos.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx2-main.c b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx2-main.c
new file mode 100644
index 0000000000..fc2ffea314
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx2-main.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-sincos-main.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx2.c b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx2.c
new file mode 100644
index 0000000000..896f1bcbaf
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx2.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-sincos.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx512-main.c b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx512-main.c
new file mode 100644
index 0000000000..fc2ffea314
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx512-main.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-sincos-main.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx512.c b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx512.c
new file mode 100644
index 0000000000..896f1bcbaf
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx512.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-sincos.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-sincos-main.c b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-main.c
new file mode 100644
index 0000000000..2e52fddf5d
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-main.c
@@ -0,0 +1,43 @@
+/* Test for vector sincos ABI.
+ Copyright (C) 2016-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <math.h>
+
+#define N 1000
+double x[N], s[N], c[N];
+double* s_ptrs[N];
+double* c_ptrs[N];
+
+int
+test_sincos_abi (void)
+{
+ int i;
+
+ for(i = 0; i < N; i++)
+ {
+ x[i] = i / 3;
+ s_ptrs[i] = &s[i];
+ c_ptrs[i] = &c[i];
+ }
+
+#pragma omp simd
+ for(i = 0; i < N; i++)
+ sincos (x[i], s_ptrs[i], c_ptrs[i]);
+
+ return 0;
+}
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-sincos.c b/sysdeps/x86_64/fpu/test-double-libmvec-sincos.c
new file mode 100644
index 0000000000..cffaa73135
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-sincos.c
@@ -0,0 +1,44 @@
+/* Test for vector sincos ABI.
+ Copyright (C) 2016-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <math-tests-arch.h>
+
+extern int test_sincos_abi (void);
+
+int arch_check = 1;
+
+static void
+check_arch (void)
+{
+ CHECK_ARCH_EXT;
+ arch_check = 0;
+}
+
+static int
+do_test (void)
+{
+ check_arch ();
+
+ if (arch_check)
+ return 77;
+
+ return test_sincos_abi ();
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../../../test-skeleton.c"
diff --git a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c
index a9d15979aa..4ff1439f9c 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c
@@ -1,5 +1,5 @@
/* Wrapper part of tests for SSE ISA versions of vector math functions.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -17,13 +17,17 @@
<http://www.gnu.org/licenses/>. */
#include "test-double-vlen2.h"
+#include "test-math-vector-sincos.h"
#include <immintrin.h>
#define VEC_TYPE __m128d
VECTOR_WRAPPER (WRAPPER_NAME (cos), _ZGVbN2v_cos)
VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVbN2v_sin)
-VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincos), _ZGVbN2vvv_sincos)
VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVbN2v_log)
VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVbN2v_exp)
VECTOR_WRAPPER_ff (WRAPPER_NAME (pow), _ZGVbN2vv_pow)
+
+#define VEC_INT_TYPE __m128i
+
+VECTOR_WRAPPER_fFF_2 (WRAPPER_NAME (sincos), _ZGVbN2vvv_sincos)
diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c
index eb6a531502..c7bdad517b 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c
@@ -1,5 +1,5 @@
/* Wrapper part of tests for AVX2 ISA versions of vector math functions.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -17,6 +17,7 @@
<http://www.gnu.org/licenses/>. */
#include "test-double-vlen4.h"
+#include "test-math-vector-sincos.h"
#include <immintrin.h>
#undef VEC_SUFF
@@ -26,7 +27,14 @@
VECTOR_WRAPPER (WRAPPER_NAME (cos), _ZGVdN4v_cos)
VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVdN4v_sin)
-VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincos), _ZGVdN4vvv_sincos)
VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVdN4v_log)
VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVdN4v_exp)
VECTOR_WRAPPER_ff (WRAPPER_NAME (pow), _ZGVdN4vv_pow)
+
+#ifndef __ILP32__
+# define VEC_INT_TYPE __m256i
+#else
+# define VEC_INT_TYPE __m128i
+#endif
+
+VECTOR_WRAPPER_fFF_2 (WRAPPER_NAME (sincos), _ZGVdN4vvv_sincos)
diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-avx2.c b/sysdeps/x86_64/fpu/test-double-vlen4-avx2.h
index 0cadef03d6..4b196e66fc 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen4-avx2.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen4-avx2.h
@@ -1,5 +1,5 @@
/* Tests for AVX2 ISA versions of vector math functions.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,18 +16,10 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include "test-double-vlen4.h"
+#include <test-double-vlen4.h>
#undef VEC_SUFF
#define VEC_SUFF _vlen4_avx2
-#define TEST_VECTOR_cos 1
-#define TEST_VECTOR_sin 1
-#define TEST_VECTOR_sincos 1
-#define TEST_VECTOR_log 1
-#define TEST_VECTOR_exp 1
-#define TEST_VECTOR_pow 1
-
+#undef REQUIRE_AVX
#define REQUIRE_AVX2
-
-#include "libm-test.c"
diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c
index 52b81da3ee..2bb0085700 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c
@@ -1,5 +1,5 @@
/* Wrapper part of tests for AVX ISA versions of vector math functions.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -17,13 +17,21 @@
<http://www.gnu.org/licenses/>. */
#include "test-double-vlen4.h"
+#include "test-math-vector-sincos.h"
#include <immintrin.h>
#define VEC_TYPE __m256d
VECTOR_WRAPPER (WRAPPER_NAME (cos), _ZGVcN4v_cos)
VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVcN4v_sin)
-VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincos), _ZGVcN4vvv_sincos)
VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVcN4v_log)
VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVcN4v_exp)
VECTOR_WRAPPER_ff (WRAPPER_NAME (pow), _ZGVcN4vv_pow)
+
+#define VEC_INT_TYPE __m128i
+
+#ifndef __ILP32__
+VECTOR_WRAPPER_fFF_3 (WRAPPER_NAME (sincos), _ZGVcN4vvv_sincos)
+#else
+VECTOR_WRAPPER_fFF_2 (WRAPPER_NAME (sincos), _ZGVcN4vvv_sincos)
+#endif
diff --git a/sysdeps/x86_64/fpu/test-double-vlen4.c b/sysdeps/x86_64/fpu/test-double-vlen4.h
index 9ae97f1388..316340cb59 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen4.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen4.h
@@ -1,5 +1,5 @@
/* Tests for AVX ISA versions of vector math functions.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,15 +16,6 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include "test-double-vlen4.h"
-
-#define TEST_VECTOR_cos 1
-#define TEST_VECTOR_sin 1
-#define TEST_VECTOR_sincos 1
-#define TEST_VECTOR_log 1
-#define TEST_VECTOR_exp 1
-#define TEST_VECTOR_pow 1
+#include_next <test-double-vlen4.h>
#define REQUIRE_AVX
-
-#include "libm-test.c"
diff --git a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c
index c10bb9cb4a..ea179284ed 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c
@@ -1,5 +1,5 @@
/* Wrapper part of tests for AVX-512 versions of vector math functions.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -17,13 +17,21 @@
<http://www.gnu.org/licenses/>. */
#include "test-double-vlen8.h"
+#include "test-math-vector-sincos.h"
#include <immintrin.h>
#define VEC_TYPE __m512d
VECTOR_WRAPPER (WRAPPER_NAME (cos), _ZGVeN8v_cos)
VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVeN8v_sin)
-VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincos), _ZGVeN8vvv_sincos)
VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVeN8v_log)
VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVeN8v_exp)
VECTOR_WRAPPER_ff (WRAPPER_NAME (pow), _ZGVeN8vv_pow)
+
+#ifndef __ILP32__
+# define VEC_INT_TYPE __m512i
+#else
+# define VEC_INT_TYPE __m256i
+#endif
+
+VECTOR_WRAPPER_fFF_2 (WRAPPER_NAME (sincos), _ZGVeN8vvv_sincos)
diff --git a/sysdeps/x86_64/fpu/test-double-vlen8.c b/sysdeps/x86_64/fpu/test-double-vlen8.h
index 4fb6c8d196..41d188081e 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen8.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen8.h
@@ -1,5 +1,5 @@
/* Tests for AVX-512 versions of vector math functions.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,15 +16,6 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include "test-double-vlen8.h"
-
-#define TEST_VECTOR_cos 1
-#define TEST_VECTOR_sin 1
-#define TEST_VECTOR_sincos 1
-#define TEST_VECTOR_log 1
-#define TEST_VECTOR_exp 1
-#define TEST_VECTOR_pow 1
+#include_next <test-double-vlen8.h>
#define REQUIRE_AVX512F
-
-#include "libm-test.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx-main.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx-main.c
new file mode 100644
index 0000000000..f3691cc8e6
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx-main.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-alias.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx-mod.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx-mod.c
new file mode 100644
index 0000000000..7fc3d8aedd
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx-mod.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-alias-mod.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx.c
new file mode 100644
index 0000000000..f3691cc8e6
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-alias.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx2-main.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx2-main.c
new file mode 100644
index 0000000000..f3691cc8e6
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx2-main.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-alias.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx2-mod.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx2-mod.c
new file mode 100644
index 0000000000..7fc3d8aedd
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx2-mod.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-alias-mod.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx2.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx2.c
new file mode 100644
index 0000000000..f3691cc8e6
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx2.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-alias.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx512-main.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx512-main.c
new file mode 100644
index 0000000000..f3691cc8e6
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx512-main.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-alias.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx512-mod.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx512-mod.c
new file mode 100644
index 0000000000..7fc3d8aedd
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx512-mod.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-alias-mod.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx512.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx512.c
new file mode 100644
index 0000000000..f3691cc8e6
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx512.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-alias.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-main.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-main.c
new file mode 100644
index 0000000000..f3691cc8e6
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-main.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-alias.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-mod.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-mod.c
new file mode 100644
index 0000000000..5e6a587a94
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-mod.c
@@ -0,0 +1,25 @@
+/* Part of test to build shared library to ensure link against
+ *_finite aliases from libmvec.
+ Copyright (C) 2016-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <math.h>
+#include <stdlib.h>
+#include <math-tests-arch.h>
+
+#include "test-float.h"
+#include "test-libmvec-alias-mod.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias.c
new file mode 100644
index 0000000000..d38b49d6c8
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias.c
@@ -0,0 +1,29 @@
+/* Part of test to ensure link against *_finite aliases from libmvec.
+ Copyright (C) 2016-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+extern int
+test_finite_alias (void);
+
+static int
+do_test (void)
+{
+ return test_finite_alias ();
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../../../test-skeleton.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx-main.c b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx-main.c
new file mode 100644
index 0000000000..558e2ac649
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx-main.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-sincosf-main.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx.c b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx.c
new file mode 100644
index 0000000000..5b45f0a055
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-sincosf.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx2-main.c b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx2-main.c
new file mode 100644
index 0000000000..558e2ac649
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx2-main.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-sincosf-main.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx2.c b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx2.c
new file mode 100644
index 0000000000..5b45f0a055
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx2.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-sincosf.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx512-main.c b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx512-main.c
new file mode 100644
index 0000000000..558e2ac649
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx512-main.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-sincosf-main.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx512.c b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx512.c
new file mode 100644
index 0000000000..5b45f0a055
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx512.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-sincosf.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-main.c b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-main.c
new file mode 100644
index 0000000000..ce1dd1a8a4
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-main.c
@@ -0,0 +1,42 @@
+/* Test for vector sincosf ABI.
+ Copyright (C) 2016-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <math.h>
+
+#define N 1000
+float x[N], s[N], c[N];
+float *s_ptrs[N];
+float *c_ptrs[N];
+
+int
+test_sincosf_abi (void)
+{
+ int i;
+ for(i = 0; i < N; i++)
+ {
+ x[i] = i / 3;
+ s_ptrs[i] = &s[i];
+ c_ptrs[i] = &c[i];
+ }
+
+#pragma omp simd
+ for(i = 0; i < N; i++)
+ sincosf (x[i], s_ptrs[i], c_ptrs[i]);
+
+ return 0;
+}
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-sincosf.c b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf.c
new file mode 100644
index 0000000000..a56d9680a0
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf.c
@@ -0,0 +1,44 @@
+/* Test for vector sincosf ABI.
+ Copyright (C) 2016-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <math-tests-arch.h>
+
+extern int test_sincosf_abi (void);
+
+int arch_check = 1;
+
+static void
+check_arch (void)
+{
+ CHECK_ARCH_EXT;
+ arch_check = 0;
+}
+
+static int
+do_test (void)
+{
+ check_arch ();
+
+ if (arch_check)
+ return 77;
+
+ return test_sincosf_abi ();
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../../../test-skeleton.c"
diff --git a/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c
index dc09e4a338..d2a81ecf53 100644
--- a/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c
@@ -1,5 +1,5 @@
/* Wrapper part of tests for AVX-512 ISA versions of vector math functions.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -17,13 +17,21 @@
<http://www.gnu.org/licenses/>. */
#include "test-float-vlen16.h"
+#include "test-math-vector-sincos.h"
#include <immintrin.h>
#define VEC_TYPE __m512
VECTOR_WRAPPER (WRAPPER_NAME (cosf), _ZGVeN16v_cosf)
VECTOR_WRAPPER (WRAPPER_NAME (sinf), _ZGVeN16v_sinf)
-VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincosf), _ZGVeN16vvv_sincosf)
VECTOR_WRAPPER (WRAPPER_NAME (logf), _ZGVeN16v_logf)
VECTOR_WRAPPER (WRAPPER_NAME (expf), _ZGVeN16v_expf)
VECTOR_WRAPPER_ff (WRAPPER_NAME (powf), _ZGVeN16vv_powf)
+
+#define VEC_INT_TYPE __m512i
+
+#ifndef __ILP32__
+VECTOR_WRAPPER_fFF_3 (WRAPPER_NAME (sincosf), _ZGVeN16vvv_sincosf)
+#else
+VECTOR_WRAPPER_fFF_2 (WRAPPER_NAME (sincosf), _ZGVeN16vvv_sincosf)
+#endif
diff --git a/sysdeps/x86_64/fpu/test-float-vlen16.c b/sysdeps/x86_64/fpu/test-float-vlen16.h
index 882bfc840d..ffe27866b5 100644
--- a/sysdeps/x86_64/fpu/test-float-vlen16.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen16.h
@@ -1,5 +1,5 @@
/* Tests for AVX-512 ISA versions of vector math functions.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,15 +16,6 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include "test-float-vlen16.h"
-
-#define TEST_VECTOR_cosf 1
-#define TEST_VECTOR_sinf 1
-#define TEST_VECTOR_sincosf 1
-#define TEST_VECTOR_logf 1
-#define TEST_VECTOR_expf 1
-#define TEST_VECTOR_powf 1
+#include_next <test-float-vlen16.h>
#define REQUIRE_AVX512F
-
-#include "libm-test.c"
diff --git a/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c
index 0bb9818146..afa7da26f6 100644
--- a/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c
@@ -1,5 +1,5 @@
/* Wrapper part of tests for SSE ISA versions of vector math functions.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -17,13 +17,21 @@
<http://www.gnu.org/licenses/>. */
#include "test-float-vlen4.h"
+#include "test-math-vector-sincos.h"
#include <immintrin.h>
#define VEC_TYPE __m128
VECTOR_WRAPPER (WRAPPER_NAME (cosf), _ZGVbN4v_cosf)
VECTOR_WRAPPER (WRAPPER_NAME (sinf), _ZGVbN4v_sinf)
-VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincosf), _ZGVbN4vvv_sincosf)
VECTOR_WRAPPER (WRAPPER_NAME (logf), _ZGVbN4v_logf)
VECTOR_WRAPPER (WRAPPER_NAME (expf), _ZGVbN4v_expf)
VECTOR_WRAPPER_ff (WRAPPER_NAME (powf), _ZGVbN4vv_powf)
+
+#define VEC_INT_TYPE __m128i
+
+#ifndef __ILP32__
+VECTOR_WRAPPER_fFF_3 (WRAPPER_NAME (sincosf), _ZGVbN4vvv_sincosf)
+#else
+VECTOR_WRAPPER_fFF_2 (WRAPPER_NAME (sincosf), _ZGVbN4vvv_sincosf)
+#endif
diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c
index 4985ac2379..d7e79a3f37 100644
--- a/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c
@@ -1,5 +1,5 @@
/* Wrapper part of tests for AVX2 ISA versions of vector math functions.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -17,6 +17,7 @@
<http://www.gnu.org/licenses/>. */
#include "test-float-vlen8.h"
+#include "test-math-vector-sincos.h"
#include <immintrin.h>
#undef VEC_SUFF
@@ -26,7 +27,17 @@
VECTOR_WRAPPER (WRAPPER_NAME (cosf), _ZGVdN8v_cosf)
VECTOR_WRAPPER (WRAPPER_NAME (sinf), _ZGVdN8v_sinf)
-VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincosf), _ZGVdN8vvv_sincosf)
VECTOR_WRAPPER (WRAPPER_NAME (logf), _ZGVdN8v_logf)
VECTOR_WRAPPER (WRAPPER_NAME (expf), _ZGVdN8v_expf)
VECTOR_WRAPPER_ff (WRAPPER_NAME (powf), _ZGVdN8vv_powf)
+
+/* Redefinition of wrapper to be compatible with _ZGVdN8vvv_sincosf. */
+#undef VECTOR_WRAPPER_fFF
+
+#define VEC_INT_TYPE __m256i
+
+#ifndef __ILP32__
+VECTOR_WRAPPER_fFF_3 (WRAPPER_NAME (sincosf), _ZGVdN8vvv_sincosf)
+#else
+VECTOR_WRAPPER_fFF_2 (WRAPPER_NAME (sincosf), _ZGVdN8vvv_sincosf)
+#endif
diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-avx2.c b/sysdeps/x86_64/fpu/test-float-vlen8-avx2.h
index 7a416385b6..c468dd6e69 100644
--- a/sysdeps/x86_64/fpu/test-float-vlen8-avx2.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen8-avx2.h
@@ -1,5 +1,5 @@
/* Tests for AVX2 ISA versions of vector math functions.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,18 +16,10 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include "test-float-vlen8.h"
+#include <test-float-vlen8.h>
#undef VEC_SUFF
#define VEC_SUFF _vlen8_avx2
-#define TEST_VECTOR_cosf 1
-#define TEST_VECTOR_sinf 1
-#define TEST_VECTOR_sincosf 1
-#define TEST_VECTOR_logf 1
-#define TEST_VECTOR_expf 1
-#define TEST_VECTOR_powf 1
-
+#undef REQUIRE_AVX
#define REQUIRE_AVX2
-
-#include "libm-test.c"
diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c
index 9cc2883399..6f7869ba3d 100644
--- a/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c
@@ -1,5 +1,5 @@
/* Wrapper part of tests for AVX ISA versions of vector math functions.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -17,13 +17,21 @@
<http://www.gnu.org/licenses/>. */
#include "test-float-vlen8.h"
+#include "test-math-vector-sincos.h"
#include <immintrin.h>
#define VEC_TYPE __m256
VECTOR_WRAPPER (WRAPPER_NAME (cosf), _ZGVcN8v_cosf)
VECTOR_WRAPPER (WRAPPER_NAME (sinf), _ZGVcN8v_sinf)
-VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincosf), _ZGVcN8vvv_sincosf)
VECTOR_WRAPPER (WRAPPER_NAME (logf), _ZGVcN8v_logf)
VECTOR_WRAPPER (WRAPPER_NAME (expf), _ZGVcN8v_expf)
VECTOR_WRAPPER_ff (WRAPPER_NAME (powf), _ZGVcN8vv_powf)
+
+#define VEC_INT_TYPE __m128i
+
+#ifndef __ILP32__
+VECTOR_WRAPPER_fFF_4 (WRAPPER_NAME (sincosf), _ZGVcN8vvv_sincosf)
+#else
+VECTOR_WRAPPER_fFF_3 (WRAPPER_NAME (sincosf), _ZGVcN8vvv_sincosf)
+#endif
diff --git a/sysdeps/x86_64/fpu/test-float-vlen8.c b/sysdeps/x86_64/fpu/test-float-vlen8.h
index c92a50ae7e..153820ecc2 100644
--- a/sysdeps/x86_64/fpu/test-float-vlen8.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen8.h
@@ -1,5 +1,5 @@
/* Tests for AVX ISA versions of vector math functions.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,15 +16,6 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include "test-float-vlen8.h"
-
-#define TEST_VECTOR_cosf 1
-#define TEST_VECTOR_sinf 1
-#define TEST_VECTOR_sincosf 1
-#define TEST_VECTOR_logf 1
-#define TEST_VECTOR_expf 1
-#define TEST_VECTOR_powf 1
+#include_next <test-float-vlen8.h>
#define REQUIRE_AVX
-
-#include "libm-test.c"
diff --git a/sysdeps/x86_64/fpu/test-libmvec-alias-mod.c b/sysdeps/x86_64/fpu/test-libmvec-alias-mod.c
new file mode 100644
index 0000000000..6d70844147
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-libmvec-alias-mod.c
@@ -0,0 +1,66 @@
+/* Part of test to build shared library to ensure link against
+ *_finite aliases from libmvec.
+ Copyright (C) 2016-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define N 4000
+FLOAT log_arg[N];
+FLOAT exp_arg[N];
+FLOAT log_res[N];
+FLOAT exp_res[N];
+FLOAT pow_res[N];
+int arch_check = 1;
+
+static void
+init_arg (void)
+{
+ int i;
+
+ CHECK_ARCH_EXT;
+
+ arch_check = 0;
+
+ for (i = 0; i < N; i += 1)
+ {
+ log_arg[i] = 1.0;
+ exp_arg[i] = 0.0;
+ }
+}
+
+int
+test_finite_alias (void)
+{
+ int i;
+
+ init_arg ();
+
+ if (arch_check) return 77;
+
+#pragma omp simd
+ for (i = 0; i < N; i += 1)
+ {
+ log_res[i] = FUNC (log) (log_arg[i]);
+ exp_res[i] = FUNC (exp) (exp_arg[i]);
+ pow_res[i] = FUNC (pow) (log_arg[i], log_arg[i]);
+ }
+
+ if (log_res[0] != 0.0) return 1;
+ if (exp_res[0] != 1.0) return 1;
+ if (pow_res[0] != 1.0) return 1;
+
+ return 0;
+}
diff --git a/sysdeps/x86_64/fpu/x86_64-math-asm.h b/sysdeps/x86_64/fpu/x86_64-math-asm.h
index db3f9f78b0..597b967b7b 100644
--- a/sysdeps/x86_64/fpu/x86_64-math-asm.h
+++ b/sysdeps/x86_64/fpu/x86_64-math-asm.h
@@ -1,5 +1,5 @@
/* Helper macros for x86_64 libm functions.
- Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ Copyright (C) 2015-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/hp-timing.h b/sysdeps/x86_64/hp-timing.h
index 65381b314d..ec543bef03 100644
--- a/sysdeps/x86_64/hp-timing.h
+++ b/sysdeps/x86_64/hp-timing.h
@@ -1,5 +1,5 @@
/* High precision, low overhead timing functions. x86-64 version.
- Copyright (C) 2002-2016 Free Software Foundation, Inc.
+ Copyright (C) 2002-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/htonl.S b/sysdeps/x86_64/htonl.S
index c92fae8791..23e2046caa 100644
--- a/sysdeps/x86_64/htonl.S
+++ b/sysdeps/x86_64/htonl.S
@@ -1,5 +1,5 @@
/* Change byte order in word. For AMD x86-64.
- Copyright (C) 1997-2016 Free Software Foundation, Inc.
+ Copyright (C) 1997-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/ifuncmain8.c b/sysdeps/x86_64/ifuncmain8.c
index 448ab96bfa..449998df50 100644
--- a/sysdeps/x86_64/ifuncmain8.c
+++ b/sysdeps/x86_64/ifuncmain8.c
@@ -1,5 +1,5 @@
/* Test IFUNC selector with floating-point parameters.
- Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ Copyright (C) 2015-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/ifuncmod8.c b/sysdeps/x86_64/ifuncmod8.c
index c00436799c..8225c4da12 100644
--- a/sysdeps/x86_64/ifuncmod8.c
+++ b/sysdeps/x86_64/ifuncmod8.c
@@ -1,5 +1,5 @@
/* Test IFUNC selector with floating-point parameters.
- Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ Copyright (C) 2015-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -28,6 +28,7 @@ foo_impl (float x)
}
void *
+inhibit_stack_protector
foo_ifunc (void)
{
__m128i xmm = _mm_set1_epi32 (-1);
diff --git a/sysdeps/x86_64/jmpbuf-offsets.h b/sysdeps/x86_64/jmpbuf-offsets.h
index da71e555f7..6d1ee5e812 100644
--- a/sysdeps/x86_64/jmpbuf-offsets.h
+++ b/sysdeps/x86_64/jmpbuf-offsets.h
@@ -1,5 +1,5 @@
/* Private macros for accessing __jmp_buf contents. x86-64 version.
- Copyright (C) 2006-2016 Free Software Foundation, Inc.
+ Copyright (C) 2006-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/jmpbuf-unwind.h b/sysdeps/x86_64/jmpbuf-unwind.h
index aa0642b54a..49208bdd9e 100644
--- a/sysdeps/x86_64/jmpbuf-unwind.h
+++ b/sysdeps/x86_64/jmpbuf-unwind.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2003-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Jakub Jelinek <jakub@redhat.com>, 2003.
diff --git a/sysdeps/x86_64/ldsodefs.h b/sysdeps/x86_64/ldsodefs.h
deleted file mode 100644
index 6a96c53721..0000000000
--- a/sysdeps/x86_64/ldsodefs.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/* Run-time dynamic linker data structures for loaded ELF shared objects.
- Copyright (C) 1995-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#ifndef _X86_64_LDSODEFS_H
-#define _X86_64_LDSODEFS_H 1
-
-#include <elf.h>
-#include <cpu-features.h>
-
-struct La_x86_64_regs;
-struct La_x86_64_retval;
-struct La_x32_regs;
-struct La_x32_retval;
-
-#define ARCH_PLTENTER_MEMBERS \
- Elf64_Addr (*x86_64_gnu_pltenter) (Elf64_Sym *, unsigned int, \
- uintptr_t *, \
- uintptr_t *, struct La_x86_64_regs *, \
- unsigned int *, const char *name, \
- long int *framesizep); \
- Elf32_Addr (*x32_gnu_pltenter) (Elf32_Sym *, unsigned int, uintptr_t *, \
- uintptr_t *, struct La_x32_regs *, \
- unsigned int *, const char *name, \
- long int *framesizep)
-
-#define ARCH_PLTEXIT_MEMBERS \
- unsigned int (*x86_64_gnu_pltexit) (Elf64_Sym *, unsigned int, \
- uintptr_t *, \
- uintptr_t *, \
- const struct La_x86_64_regs *, \
- struct La_x86_64_retval *, \
- const char *); \
- unsigned int (*x32_gnu_pltexit) (Elf32_Sym *, unsigned int, uintptr_t *, \
- uintptr_t *, \
- const struct La_x32_regs *, \
- struct La_x86_64_retval *, \
- const char *)
-
-#include_next <ldsodefs.h>
-
-#endif
diff --git a/sysdeps/x86_64/localplt.data b/sysdeps/x86_64/localplt.data
index f168b143ff..c27a02b66a 100644
--- a/sysdeps/x86_64/localplt.data
+++ b/sysdeps/x86_64/localplt.data
@@ -8,12 +8,15 @@ libc.so: free + RELA R_X86_64_GLOB_DAT
libc.so: malloc + RELA R_X86_64_GLOB_DAT
libc.so: memalign + RELA R_X86_64_GLOB_DAT
libc.so: realloc + RELA R_X86_64_GLOB_DAT
-libm.so: matherr
-# The dynamic loader uses __libc_memalign internally to allocate aligned
-# TLS storage. The other malloc family of functions are expected to allow
-# user symbol interposition.
-ld.so: __libc_memalign + RELA R_X86_64_GLOB_DAT
+libm.so: matherr + RELA R_X86_64_GLOB_DAT
+# The main malloc is interposed into the dynamic linker, for
+# allocations after the initial link (when dlopen is used).
ld.so: malloc + RELA R_X86_64_GLOB_DAT
ld.so: calloc + RELA R_X86_64_GLOB_DAT
ld.so: realloc + RELA R_X86_64_GLOB_DAT
ld.so: free + RELA R_X86_64_GLOB_DAT
+# The TLS-enabled version of these functions is interposed from libc.so.
+ld.so: _dl_signal_error + RELA R_X86_64_GLOB_DAT
+ld.so: _dl_catch_error + RELA R_X86_64_GLOB_DAT
+ld.so: _dl_signal_exception + RELA R_X86_64_GLOB_DAT
+ld.so: _dl_catch_exception + RELA R_X86_64_GLOB_DAT
diff --git a/sysdeps/x86_64/lshift.S b/sysdeps/x86_64/lshift.S
index 49cbfbaf3d..af568768d0 100644
--- a/sysdeps/x86_64/lshift.S
+++ b/sysdeps/x86_64/lshift.S
@@ -1,5 +1,5 @@
/* x86-64 __mpn_lshift --
- Copyright (C) 2007-2016 Free Software Foundation, Inc.
+ Copyright (C) 2007-2018 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
diff --git a/sysdeps/x86_64/machine-gmon.h b/sysdeps/x86_64/machine-gmon.h
index 3d9ce5c44e..8bc111612c 100644
--- a/sysdeps/x86_64/machine-gmon.h
+++ b/sysdeps/x86_64/machine-gmon.h
@@ -1,5 +1,5 @@
/* x86-64-specific implementation of profiling support.
- Copyright (C) 2002-2016 Free Software Foundation, Inc.
+ Copyright (C) 2002-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 2002.
diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S
index 132eacba8f..feef5d4f24 100644
--- a/sysdeps/x86_64/memchr.S
+++ b/sysdeps/x86_64/memchr.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2018 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -18,26 +18,40 @@
#include <sysdep.h>
+#ifdef USE_AS_WMEMCHR
+# define MEMCHR wmemchr
+# define PCMPEQ pcmpeqd
+#else
+# define MEMCHR memchr
+# define PCMPEQ pcmpeqb
+#endif
+
/* fast SSE2 version with using pmaxub and 64 byte loop */
.text
-ENTRY(memchr)
- movd %rsi, %xmm1
- mov %rdi, %rcx
+ENTRY(MEMCHR)
+ movd %esi, %xmm1
+ mov %edi, %ecx
+#ifdef USE_AS_WMEMCHR
+ test %rdx, %rdx
+ jz L(return_null)
+ shl $2, %rdx
+#else
punpcklbw %xmm1, %xmm1
test %rdx, %rdx
jz L(return_null)
punpcklbw %xmm1, %xmm1
+#endif
- and $63, %rcx
+ and $63, %ecx
pshufd $0, %xmm1, %xmm1
- cmp $48, %rcx
+ cmp $48, %ecx
ja L(crosscache)
movdqu (%rdi), %xmm0
- pcmpeqb %xmm1, %xmm0
+ PCMPEQ %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
@@ -45,7 +59,7 @@ ENTRY(memchr)
sub $16, %rdx
jbe L(return_null)
add $16, %rdi
- and $15, %rcx
+ and $15, %ecx
and $-16, %rdi
add %rcx, %rdx
sub $64, %rdx
@@ -54,11 +68,11 @@ ENTRY(memchr)
.p2align 4
L(crosscache):
- and $15, %rcx
+ and $15, %ecx
and $-16, %rdi
movdqa (%rdi), %xmm0
- pcmpeqb %xmm1, %xmm0
+ PCMPEQ %xmm1, %xmm0
/* Check if there is a match. */
pmovmskb %xmm0, %eax
/* Remove the leading bytes. */
@@ -76,8 +90,12 @@ L(crosscache):
.p2align 4
L(unaligned_no_match):
- add %rcx, %rdx
- sub $16, %rdx
+ /* "rcx" is less than 16. Calculate "rdx + rcx - 16" by using
+ "rdx - (16 - rcx)" instead of "(rdx + rcx) - 16" to void
+ possible addition overflow. */
+ neg %rcx
+ add $16, %rcx
+ sub %rcx, %rdx
jbe L(return_null)
add $16, %rdi
sub $64, %rdx
@@ -86,25 +104,25 @@ L(unaligned_no_match):
.p2align 4
L(loop_prolog):
movdqa (%rdi), %xmm0
- pcmpeqb %xmm1, %xmm0
+ PCMPEQ %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
movdqa 16(%rdi), %xmm2
- pcmpeqb %xmm1, %xmm2
+ PCMPEQ %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
movdqa 32(%rdi), %xmm3
- pcmpeqb %xmm1, %xmm3
+ PCMPEQ %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32)
movdqa 48(%rdi), %xmm4
- pcmpeqb %xmm1, %xmm4
+ PCMPEQ %xmm1, %xmm4
add $64, %rdi
pmovmskb %xmm4, %eax
test %eax, %eax
@@ -117,25 +135,25 @@ L(loop_prolog):
jbe L(exit_loop)
movdqa (%rdi), %xmm0
- pcmpeqb %xmm1, %xmm0
+ PCMPEQ %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
movdqa 16(%rdi), %xmm2
- pcmpeqb %xmm1, %xmm2
+ PCMPEQ %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
movdqa 32(%rdi), %xmm3
- pcmpeqb %xmm1, %xmm3
+ PCMPEQ %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32)
movdqa 48(%rdi), %xmm3
- pcmpeqb %xmm1, %xmm3
+ PCMPEQ %xmm1, %xmm3
pmovmskb %xmm3, %eax
add $64, %rdi
@@ -144,7 +162,7 @@ L(loop_prolog):
mov %rdi, %rcx
and $-64, %rdi
- and $63, %rcx
+ and $63, %ecx
add %rcx, %rdx
.p2align 4
@@ -156,10 +174,10 @@ L(align64_loop):
movdqa 32(%rdi), %xmm3
movdqa 48(%rdi), %xmm4
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm1, %xmm2
- pcmpeqb %xmm1, %xmm3
- pcmpeqb %xmm1, %xmm4
+ PCMPEQ %xmm1, %xmm0
+ PCMPEQ %xmm1, %xmm2
+ PCMPEQ %xmm1, %xmm3
+ PCMPEQ %xmm1, %xmm4
pmaxub %xmm0, %xmm3
pmaxub %xmm2, %xmm4
@@ -182,9 +200,9 @@ L(align64_loop):
jnz L(matches16)
movdqa 32(%rdi), %xmm3
- pcmpeqb %xmm1, %xmm3
+ PCMPEQ %xmm1, %xmm3
- pcmpeqb 48(%rdi), %xmm1
+ PCMPEQ 48(%rdi), %xmm1
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32)
@@ -196,52 +214,52 @@ L(align64_loop):
.p2align 4
L(exit_loop):
- add $32, %rdx
+ add $32, %edx
jle L(exit_loop_32)
movdqa (%rdi), %xmm0
- pcmpeqb %xmm1, %xmm0
+ PCMPEQ %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
movdqa 16(%rdi), %xmm2
- pcmpeqb %xmm1, %xmm2
+ PCMPEQ %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
movdqa 32(%rdi), %xmm3
- pcmpeqb %xmm1, %xmm3
+ PCMPEQ %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32_1)
- sub $16, %rdx
+ sub $16, %edx
jle L(return_null)
- pcmpeqb 48(%rdi), %xmm1
+ PCMPEQ 48(%rdi), %xmm1
pmovmskb %xmm1, %eax
test %eax, %eax
jnz L(matches48_1)
- xor %rax, %rax
+ xor %eax, %eax
ret
.p2align 4
L(exit_loop_32):
- add $32, %rdx
+ add $32, %edx
movdqa (%rdi), %xmm0
- pcmpeqb %xmm1, %xmm0
+ PCMPEQ %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches_1)
- sub $16, %rdx
+ sub $16, %edx
jbe L(return_null)
- pcmpeqb 16(%rdi), %xmm1
+ PCMPEQ 16(%rdi), %xmm1
pmovmskb %xmm1, %eax
test %eax, %eax
jnz L(matches16_1)
- xor %rax, %rax
+ xor %eax, %eax
ret
.p2align 4
@@ -302,10 +320,11 @@ L(matches48_1):
.p2align 4
L(return_null):
- xor %rax, %rax
+ xor %eax, %eax
ret
-END(memchr)
+END(MEMCHR)
+#ifndef USE_AS_WMEMCHR
strong_alias (memchr, __memchr)
-
libc_hidden_builtin_def(memchr)
+#endif
diff --git a/sysdeps/x86_64/memcmp.S b/sysdeps/x86_64/memcmp.S
index 3fb018a772..bcb4a2e88d 100644
--- a/sysdeps/x86_64/memcmp.S
+++ b/sysdeps/x86_64/memcmp.S
@@ -1,5 +1,5 @@
/* memcmp with SSE2
- Copyright (C) 2009-2016 Free Software Foundation, Inc.
+ Copyright (C) 2009-2018 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/memcopy.h b/sysdeps/x86_64/memcopy.h
new file mode 100644
index 0000000000..590b6cb16b
--- /dev/null
+++ b/sysdeps/x86_64/memcopy.h
@@ -0,0 +1 @@
+/* X86-64 doesn't use memory copy functions. */
diff --git a/sysdeps/x86_64/memcpy.S b/sysdeps/x86_64/memcpy.S
index f6e3d9396c..d98500a78a 100644
--- a/sysdeps/x86_64/memcpy.S
+++ b/sysdeps/x86_64/memcpy.S
@@ -1,584 +1 @@
-/*
- Optimized memcpy for x86-64.
-
- Copyright (C) 2007-2016 Free Software Foundation, Inc.
- Contributed by Evandro Menezes <evandro.menezes@amd.com>, 2007.
-
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
-/* Stack slots in the red-zone. */
-
-#ifdef USE_AS_MEMPCPY
-# define RETVAL (0)
-#else
-# define RETVAL (-8)
-# if defined SHARED && !defined USE_MULTIARCH && IS_IN (libc)
-# define memcpy __memcpy
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(name) \
- .globl __GI_memcpy; __GI_memcpy = __memcpy
-# endif
-#endif
-#define SAVE0 (RETVAL - 8)
-#define SAVE1 (SAVE0 - 8)
-#define SAVE2 (SAVE1 - 8)
-#define SAVE3 (SAVE2 - 8)
-
- .text
-
-#if defined PIC && IS_IN (libc)
-ENTRY_CHK (__memcpy_chk)
-
- cmpq %rdx, %rcx
- jb HIDDEN_JUMPTARGET (__chk_fail)
-
-END_CHK (__memcpy_chk)
-#endif
-
-ENTRY(memcpy) /* (void *, const void*, size_t) */
-
-/* Handle tiny blocks. */
-
-L(1try): /* up to 32B */
- cmpq $32, %rdx
-#ifndef USE_AS_MEMPCPY
- movq %rdi, %rax /* save return value */
-#endif
- jae L(1after)
-
-L(1): /* 1-byte once */
- testb $1, %dl
- jz L(1a)
-
- movzbl (%rsi), %ecx
- movb %cl, (%rdi)
-
- incq %rsi
- incq %rdi
-
- .p2align 4,, 4
-
-L(1a): /* 2-byte once */
- testb $2, %dl
- jz L(1b)
-
- movzwl (%rsi), %ecx
- movw %cx, (%rdi)
-
- addq $2, %rsi
- addq $2, %rdi
-
- .p2align 4,, 4
-
-L(1b): /* 4-byte once */
- testb $4, %dl
- jz L(1c)
-
- movl (%rsi), %ecx
- movl %ecx, (%rdi)
-
- addq $4, %rsi
- addq $4, %rdi
-
- .p2align 4,, 4
-
-L(1c): /* 8-byte once */
- testb $8, %dl
- jz L(1d)
-
- movq (%rsi), %rcx
- movq %rcx, (%rdi)
-
- addq $8, %rsi
- addq $8, %rdi
-
- .p2align 4,, 4
-
-L(1d): /* 16-byte loop */
- andl $0xf0, %edx
- jz L(exit)
-
- .p2align 4
-
-L(1loop):
- movq (%rsi), %rcx
- movq 8(%rsi), %r8
- movq %rcx, (%rdi)
- movq %r8, 8(%rdi)
-
- subl $16, %edx
-
- leaq 16(%rsi), %rsi
- leaq 16(%rdi), %rdi
-
- jnz L(1loop)
-
- .p2align 4,, 4
-
-L(exit): /* exit */
-#ifdef USE_AS_MEMPCPY
- movq %rdi, %rax /* return value */
-#else
- rep
-#endif
- retq
-
- .p2align 4
-
-L(1after):
-#ifndef USE_AS_MEMPCPY
- movq %rax, RETVAL(%rsp) /* save return value */
-#endif
-
-/* Align to the natural word size. */
-
-L(aligntry):
- movl %esi, %ecx /* align by source */
-
- andl $7, %ecx
- jz L(alignafter) /* already aligned */
-
-L(align): /* align */
- leaq -8(%rcx, %rdx), %rdx /* calculate remaining bytes */
- subl $8, %ecx
-
- .p2align 4
-
-L(alignloop): /* 1-byte alignment loop */
- movzbl (%rsi), %eax
- movb %al, (%rdi)
-
- incl %ecx
-
- leaq 1(%rsi), %rsi
- leaq 1(%rdi), %rdi
-
- jnz L(alignloop)
-
- .p2align 4
-
-L(alignafter):
-
-/* Handle mid-sized blocks. */
-
-L(32try): /* up to 1KB */
- cmpq $1024, %rdx
- ja L(32after)
-
-L(32): /* 32-byte loop */
- movl %edx, %ecx
- shrl $5, %ecx
- jz L(32skip)
-
- .p2align 4
-
-L(32loop):
- decl %ecx
-
- movq (%rsi), %rax
- movq 8(%rsi), %r8
- movq 16(%rsi), %r9
- movq 24(%rsi), %r10
-
- movq %rax, (%rdi)
- movq %r8, 8(%rdi)
- movq %r9, 16(%rdi)
- movq %r10, 24(%rdi)
-
- leaq 32(%rsi), %rsi
- leaq 32(%rdi), %rdi
-
- jz L(32skip) /* help out smaller blocks */
-
- decl %ecx
-
- movq (%rsi), %rax
- movq 8(%rsi), %r8
- movq 16(%rsi), %r9
- movq 24(%rsi), %r10
-
- movq %rax, (%rdi)
- movq %r8, 8(%rdi)
- movq %r9, 16(%rdi)
- movq %r10, 24(%rdi)
-
- leaq 32(%rsi), %rsi
- leaq 32(%rdi), %rdi
-
- jnz L(32loop)
-
- .p2align 4
-
-L(32skip):
- andl $31, %edx /* check for left overs */
-#ifdef USE_AS_MEMPCPY
- jnz L(1)
-
- movq %rdi, %rax
-#else
- movq RETVAL(%rsp), %rax
- jnz L(1)
-
- rep
-#endif
- retq /* exit */
-
- .p2align 4
-
-L(32after):
-
-/*
- In order to minimize code-size in RTLD, algorithms specific for
- larger blocks are excluded when building for RTLD.
-*/
-
-/* Handle blocks smaller than 1/2 L1. */
-
-L(fasttry): /* first 1/2 L1 */
-#if IS_IN (libc) /* only up to this algorithm outside of libc.so */
- mov __x86_data_cache_size_half(%rip), %R11_LP
- cmpq %rdx, %r11 /* calculate the smaller of */
- cmovaq %rdx, %r11 /* remaining bytes and 1/2 L1 */
-#endif
-
-L(fast): /* good ol' MOVS */
-#if IS_IN (libc)
- movq %r11, %rcx
- andq $-8, %r11
-#else
- movq %rdx, %rcx
-#endif
- shrq $3, %rcx
- jz L(fastskip)
-
- rep
- movsq
-
- .p2align 4,, 4
-
-L(fastskip):
-#if IS_IN (libc)
- subq %r11, %rdx /* check for more */
- testq $-8, %rdx
- jnz L(fastafter)
-#endif
-
- andl $7, %edx /* check for left overs */
-#ifdef USE_AS_MEMPCPY
- jnz L(1)
-
- movq %rdi, %rax
-#else
- movq RETVAL(%rsp), %rax
- jnz L(1)
-
- rep
-#endif
- retq /* exit */
-
-#if IS_IN (libc) /* none of the algorithms below for RTLD */
-
- .p2align 4
-
-L(fastafter):
-
-/* Handle large blocks smaller than 1/2 L2. */
-
-L(pretry): /* first 1/2 L2 */
- mov __x86_shared_cache_size_half (%rip), %R8_LP
- cmpq %rdx, %r8 /* calculate the lesser of */
- cmovaq %rdx, %r8 /* remaining bytes and 1/2 L2 */
-
-L(pre): /* 64-byte with prefetching */
- movq %r8, %rcx
- andq $-64, %r8
- shrq $6, %rcx
- jz L(preskip)
-
- movq %r14, SAVE0(%rsp)
- cfi_rel_offset (%r14, SAVE0)
- movq %r13, SAVE1(%rsp)
- cfi_rel_offset (%r13, SAVE1)
- movq %r12, SAVE2(%rsp)
- cfi_rel_offset (%r12, SAVE2)
- movq %rbx, SAVE3(%rsp)
- cfi_rel_offset (%rbx, SAVE3)
-
- cmpl $0, __x86_prefetchw(%rip)
- jz L(preloop) /* check if PREFETCHW OK */
-
- .p2align 4
-
-/* ... when PREFETCHW is available (less cache-probe traffic in MP systems). */
-
-L(prewloop): /* cache-line in state M */
- decq %rcx
-
- movq (%rsi), %rax
- movq 8 (%rsi), %rbx
- movq 16 (%rsi), %r9
- movq 24 (%rsi), %r10
- movq 32 (%rsi), %r11
- movq 40 (%rsi), %r12
- movq 48 (%rsi), %r13
- movq 56 (%rsi), %r14
-
- prefetcht0 0 + 896 (%rsi)
- prefetcht0 64 + 896 (%rsi)
-
- movq %rax, (%rdi)
- movq %rbx, 8(%rdi)
- movq %r9, 16(%rdi)
- movq %r10, 24(%rdi)
- movq %r11, 32(%rdi)
- movq %r12, 40(%rdi)
- movq %r13, 48(%rdi)
- movq %r14, 56(%rdi)
-
- leaq 64(%rsi), %rsi
- leaq 64(%rdi), %rdi
-
- jz L(prebail)
-
- decq %rcx
-
- movq (%rsi), %rax
- movq 8(%rsi), %rbx
- movq 16(%rsi), %r9
- movq 24(%rsi), %r10
- movq 32(%rsi), %r11
- movq 40(%rsi), %r12
- movq 48(%rsi), %r13
- movq 56(%rsi), %r14
-
- movq %rax, (%rdi)
- movq %rbx, 8(%rdi)
- movq %r9, 16(%rdi)
- movq %r10, 24(%rdi)
- movq %r11, 32(%rdi)
- movq %r12, 40(%rdi)
- movq %r13, 48(%rdi)
- movq %r14, 56(%rdi)
-
- prefetchw 896 - 64(%rdi)
- prefetchw 896 - 0(%rdi)
-
- leaq 64(%rsi), %rsi
- leaq 64(%rdi), %rdi
-
- jnz L(prewloop)
- jmp L(prebail)
-
- .p2align 4
-
-/* ... when PREFETCHW is not available. */
-
-L(preloop): /* cache-line in state E */
- decq %rcx
-
- movq (%rsi), %rax
- movq 8(%rsi), %rbx
- movq 16(%rsi), %r9
- movq 24(%rsi), %r10
- movq 32(%rsi), %r11
- movq 40(%rsi), %r12
- movq 48(%rsi), %r13
- movq 56(%rsi), %r14
-
- prefetcht0 896 + 0(%rsi)
- prefetcht0 896 + 64(%rsi)
-
- movq %rax, (%rdi)
- movq %rbx, 8(%rdi)
- movq %r9, 16(%rdi)
- movq %r10, 24(%rdi)
- movq %r11, 32(%rdi)
- movq %r12, 40(%rdi)
- movq %r13, 48(%rdi)
- movq %r14, 56(%rdi)
-
- leaq 64 (%rsi), %rsi
- leaq 64 (%rdi), %rdi
-
- jz L(prebail)
-
- decq %rcx
-
- movq (%rsi), %rax
- movq 8(%rsi), %rbx
- movq 16(%rsi), %r9
- movq 24(%rsi), %r10
- movq 32(%rsi), %r11
- movq 40(%rsi), %r12
- movq 48(%rsi), %r13
- movq 56(%rsi), %r14
-
- prefetcht0 896 - 64(%rdi)
- prefetcht0 896 - 0(%rdi)
-
- movq %rax, (%rdi)
- movq %rbx, 8(%rdi)
- movq %r9, 16(%rdi)
- movq %r10, 24(%rdi)
- movq %r11, 32(%rdi)
- movq %r12, 40(%rdi)
- movq %r13, 48(%rdi)
- movq %r14, 56(%rdi)
-
- leaq 64(%rsi), %rsi
- leaq 64(%rdi), %rdi
-
- jnz L(preloop)
-
-L(prebail):
- movq SAVE3(%rsp), %rbx
- cfi_restore (%rbx)
- movq SAVE2(%rsp), %r12
- cfi_restore (%r12)
- movq SAVE1(%rsp), %r13
- cfi_restore (%r13)
- movq SAVE0(%rsp), %r14
- cfi_restore (%r14)
-
-/* .p2align 4 */
-
-L(preskip):
- subq %r8, %rdx /* check for more */
- testq $-64, %rdx
- jnz L(preafter)
-
- andl $63, %edx /* check for left overs */
-#ifdef USE_AS_MEMPCPY
- jnz L(1)
-
- movq %rdi, %rax
-#else
- movq RETVAL(%rsp), %rax
- jnz L(1)
-
- rep
-#endif
- retq /* exit */
-
- .p2align 4
-
-L(preafter):
-
-/* Handle huge blocks. */
-
-L(NTtry):
-
-L(NT): /* non-temporal 128-byte */
- movq %rdx, %rcx
- shrq $7, %rcx
- jz L(NTskip)
-
- movq %r14, SAVE0(%rsp)
- cfi_rel_offset (%r14, SAVE0)
- movq %r13, SAVE1(%rsp)
- cfi_rel_offset (%r13, SAVE1)
- movq %r12, SAVE2(%rsp)
- cfi_rel_offset (%r12, SAVE2)
-
- .p2align 4
-
-L(NTloop):
- prefetchnta 768(%rsi)
- prefetchnta 832(%rsi)
-
- decq %rcx
-
- movq (%rsi), %rax
- movq 8(%rsi), %r8
- movq 16(%rsi), %r9
- movq 24(%rsi), %r10
- movq 32(%rsi), %r11
- movq 40(%rsi), %r12
- movq 48(%rsi), %r13
- movq 56(%rsi), %r14
-
- movntiq %rax, (%rdi)
- movntiq %r8, 8(%rdi)
- movntiq %r9, 16(%rdi)
- movntiq %r10, 24(%rdi)
- movntiq %r11, 32(%rdi)
- movntiq %r12, 40(%rdi)
- movntiq %r13, 48(%rdi)
- movntiq %r14, 56(%rdi)
-
- movq 64(%rsi), %rax
- movq 72(%rsi), %r8
- movq 80(%rsi), %r9
- movq 88(%rsi), %r10
- movq 96(%rsi), %r11
- movq 104(%rsi), %r12
- movq 112(%rsi), %r13
- movq 120(%rsi), %r14
-
- movntiq %rax, 64(%rdi)
- movntiq %r8, 72(%rdi)
- movntiq %r9, 80(%rdi)
- movntiq %r10, 88(%rdi)
- movntiq %r11, 96(%rdi)
- movntiq %r12, 104(%rdi)
- movntiq %r13, 112(%rdi)
- movntiq %r14, 120(%rdi)
-
- leaq 128(%rsi), %rsi
- leaq 128(%rdi), %rdi
-
- jnz L(NTloop)
-
- sfence /* serialize memory stores */
-
- movq SAVE2(%rsp), %r12
- cfi_restore (%r12)
- movq SAVE1(%rsp), %r13
- cfi_restore (%r13)
- movq SAVE0(%rsp), %r14
- cfi_restore (%r14)
-
-L(NTskip):
- andl $127, %edx /* check for left overs */
-#ifdef USE_AS_MEMPCPY
- jnz L(1)
-
- movq %rdi, %rax
-#else
- movq RETVAL(%rsp), %rax
- jnz L(1)
-
- rep
-#endif
- retq /* exit */
-
-#endif /* IS_IN (libc) */
-
-END(memcpy)
-
-#ifndef USE_AS_MEMPCPY
-libc_hidden_builtin_def (memcpy)
-# if defined SHARED && !defined USE_MULTIARCH && IS_IN (libc)
-# undef memcpy
-# include <shlib-compat.h>
-versioned_symbol (libc, __memcpy, memcpy, GLIBC_2_14);
-# endif
-#endif
+/* Implemented in memcpy.S. */
diff --git a/sysdeps/x86_64/memcpy_chk.S b/sysdeps/x86_64/memcpy_chk.S
index 2296b55119..aa33cd5fc1 100644
--- a/sysdeps/x86_64/memcpy_chk.S
+++ b/sysdeps/x86_64/memcpy_chk.S
@@ -1,5 +1,5 @@
/* Checking memcpy for x86-64.
- Copyright (C) 2004-2016 Free Software Foundation, Inc.
+ Copyright (C) 2004-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -19,7 +19,7 @@
#include <sysdep.h>
#include "asm-syntax.h"
-#ifndef PIC
+#ifndef SHARED
/* For libc.so this is defined in memcpy.S.
For libc.a, this is a separate source to avoid
memcpy bringing in __chk_fail and all routines
diff --git a/sysdeps/x86_64/memmove.S b/sysdeps/x86_64/memmove.S
new file mode 100644
index 0000000000..9cc92ff9a9
--- /dev/null
+++ b/sysdeps/x86_64/memmove.S
@@ -0,0 +1,71 @@
+/* Optimized memmove for x86-64.
+ Copyright (C) 2016-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+#define VEC_SIZE 16
+#define VEC(i) xmm##i
+#define PREFETCHNT prefetchnta
+#define VMOVNT movntdq
+/* Use movups and movaps for smaller code sizes. */
+#define VMOVU movups
+#define VMOVA movaps
+
+#define SECTION(p) p
+
+#ifdef USE_MULTIARCH
+# if !IS_IN (libc)
+# define MEMCPY_SYMBOL(p,s) memcpy
+# endif
+#else
+# if defined SHARED && IS_IN (libc)
+# define MEMCPY_SYMBOL(p,s) __memcpy
+# else
+# define MEMCPY_SYMBOL(p,s) memcpy
+# endif
+#endif
+#if !defined USE_MULTIARCH || !IS_IN (libc)
+# define MEMPCPY_SYMBOL(p,s) __mempcpy
+#endif
+#ifndef MEMMOVE_SYMBOL
+# define MEMMOVE_CHK_SYMBOL(p,s) p
+# define MEMMOVE_SYMBOL(p,s) memmove
+#endif
+
+#include "multiarch/memmove-vec-unaligned-erms.S"
+
+#ifndef USE_MULTIARCH
+libc_hidden_builtin_def (memmove)
+# if defined SHARED && IS_IN (libc)
+strong_alias (memmove, __memcpy)
+libc_hidden_ver (memmove, memcpy)
+# endif
+libc_hidden_def (__mempcpy)
+weak_alias (__mempcpy, mempcpy)
+libc_hidden_builtin_def (mempcpy)
+
+# if defined SHARED && IS_IN (libc)
+# undef memcpy
+# include <shlib-compat.h>
+versioned_symbol (libc, __memcpy, memcpy, GLIBC_2_14);
+
+# if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14)
+compat_symbol (libc, memmove, memcpy, GLIBC_2_2_5);
+# endif
+# endif
+#endif
diff --git a/sysdeps/x86_64/fpu/s_fdiml.S b/sysdeps/x86_64/memmove_chk.S
index f9f1e20259..39b56dde65 100644
--- a/sysdeps/x86_64/fpu/s_fdiml.S
+++ b/sysdeps/x86_64/memmove_chk.S
@@ -1,7 +1,6 @@
-/* Compute positive difference.
- Copyright (C) 1997-2016 Free Software Foundation, Inc.
+/* Checking memmove for x86-64.
+ Copyright (C) 2016-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -18,26 +17,17 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
-
- .text
-ENTRY(__fdiml)
- fldt 8(%rsp) // x
- fldt 24(%rsp) // x : y
-
- fucomi %st(1), %st
- jp 1f
-
- jc 3f
- fstp %st(1)
- fldz
- jmp 2f
-
-3: fsubrp %st, %st(1)
- ret
-
-1: fucomi %st(0), %st
- fcmovnu %st(1), %st
-2: fstp %st(1)
- ret
-END(__fdiml)
-weak_alias (__fdiml, fdiml)
+#include "asm-syntax.h"
+
+#ifndef SHARED
+ /* For libc.so this is defined in memmove.S.
+ For libc.a, this is a separate source to avoid
+ memmove bringing in __chk_fail and all routines
+ it calls. */
+ .text
+ENTRY (__memmove_chk)
+ cmpq %rdx, %rcx
+ jb __chk_fail
+ jmp memmove
+END (__memmove_chk)
+#endif
diff --git a/sysdeps/x86_64/mempcpy.S b/sysdeps/x86_64/mempcpy.S
index acee5e56b1..d98500a78a 100644
--- a/sysdeps/x86_64/mempcpy.S
+++ b/sysdeps/x86_64/mempcpy.S
@@ -1,8 +1 @@
-#define USE_AS_MEMPCPY
-#define memcpy __mempcpy
-#define __memcpy_chk __mempcpy_chk
-#include <sysdeps/x86_64/memcpy.S>
-
-libc_hidden_def (__mempcpy)
-weak_alias (__mempcpy, mempcpy)
-libc_hidden_builtin_def (mempcpy)
+/* Implemented in memcpy.S. */
diff --git a/sysdeps/x86_64/mempcpy_chk.S b/sysdeps/x86_64/mempcpy_chk.S
index 390abc68dd..0e9e24db00 100644
--- a/sysdeps/x86_64/mempcpy_chk.S
+++ b/sysdeps/x86_64/mempcpy_chk.S
@@ -1,5 +1,5 @@
/* Checking mempcpy for x86-64.
- Copyright (C) 2004-2016 Free Software Foundation, Inc.
+ Copyright (C) 2004-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -19,7 +19,7 @@
#include <sysdep.h>
#include "asm-syntax.h"
-#ifndef PIC
+#ifndef SHARED
/* For libc.so this is defined in memcpy.S.
For libc.a, this is a separate source to avoid
mempcpy bringing in __chk_fail and all routines
diff --git a/sysdeps/x86_64/memrchr.S b/sysdeps/x86_64/memrchr.S
index 840de30cd7..b8e3fa1d87 100644
--- a/sysdeps/x86_64/memrchr.S
+++ b/sysdeps/x86_64/memrchr.S
@@ -1,6 +1,6 @@
/* fast SSE2 memrchr with 64 byte loop and pmaxub instruction using
- Copyright (C) 2011-2016 Free Software Foundation, Inc.
+ Copyright (C) 2011-2018 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -22,7 +22,7 @@
.text
ENTRY (__memrchr)
- movd %rsi, %xmm1
+ movd %esi, %xmm1
sub $16, %rdx
jbe L(length_less16)
@@ -42,8 +42,8 @@ ENTRY (__memrchr)
jnz L(matches0)
sub $64, %rdi
- mov %rdi, %rcx
- and $15, %rcx
+ mov %edi, %ecx
+ and $15, %ecx
jz L(loop_prolog)
add $16, %rdi
@@ -108,8 +108,8 @@ L(loop_prolog):
test %eax, %eax
jnz L(matches0)
- mov %rdi, %rcx
- and $63, %rcx
+ mov %edi, %ecx
+ and $63, %ecx
jz L(align64_loop)
add $64, %rdi
@@ -166,8 +166,8 @@ L(align64_loop):
.p2align 4
L(exit_loop):
- add $64, %rdx
- cmp $32, %rdx
+ add $64, %edx
+ cmp $32, %edx
jbe L(exit_loop_32)
movdqa 48(%rdi), %xmm0
@@ -187,7 +187,7 @@ L(exit_loop):
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches16_1)
- cmp $48, %rdx
+ cmp $48, %edx
jbe L(return_null)
pcmpeqb (%rdi), %xmm1
@@ -204,7 +204,7 @@ L(exit_loop_32):
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches48_1)
- cmp $16, %rdx
+ cmp $16, %edx
jbe L(return_null)
pcmpeqb 32(%rdi), %xmm1
@@ -276,7 +276,7 @@ L(matches48_1):
.p2align 4
L(return_null):
- xor %rax, %rax
+ xor %eax, %eax
ret
.p2align 4
@@ -306,18 +306,16 @@ L(length_less16):
punpcklbw %xmm1, %xmm1
punpcklbw %xmm1, %xmm1
- add $16, %rdx
+ add $16, %edx
pshufd $0, %xmm1, %xmm1
- mov %rdi, %rcx
- and $15, %rcx
+ mov %edi, %ecx
+ and $15, %ecx
jz L(length_less16_offset0)
- mov %rdi, %rcx
- and $15, %rcx
mov %cl, %dh
- mov %rcx, %r8
+ mov %ecx, %esi
add %dl, %dh
and $-16, %rdi
@@ -340,7 +338,7 @@ L(length_less16):
bsr %eax, %eax
add %rdi, %rax
- add %r8, %rax
+ add %rsi, %rax
ret
.p2align 4
@@ -362,14 +360,14 @@ L(length_less16_part2):
pcmpeqb (%rdi), %xmm1
pmovmskb %xmm1, %eax
- mov %r8, %rcx
+ mov %esi, %ecx
sar %cl, %eax
test %eax, %eax
jz L(return_null)
bsr %eax, %eax
add %rdi, %rax
- add %r8, %rax
+ add %rsi, %rax
ret
.p2align 4
diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S
index 4cf0da0fb8..b342679576 100644
--- a/sysdeps/x86_64/memset.S
+++ b/sysdeps/x86_64/memset.S
@@ -1,6 +1,6 @@
/* memset/bzero -- set memory area to CH/0
Optimized version for x86-64.
- Copyright (C) 2002-2016 Free Software Foundation, Inc.
+ Copyright (C) 2002-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -19,114 +19,43 @@
#include <sysdep.h>
- .text
-#if IS_IN (libc)
-ENTRY(__bzero)
- movq %rdi, %rax /* Set return value. */
- movq %rsi, %rdx /* Set n. */
- pxor %xmm0, %xmm0
- jmp L(entry_from_bzero)
-END(__bzero)
-weak_alias (__bzero, bzero)
-
-/* Like memset but takes additional parameter with return value. */
-ENTRY(__memset_tail)
- movq %rcx, %rax /* Set return value. */
-
- movd %esi, %xmm0
- punpcklbw %xmm0, %xmm0
- punpcklwd %xmm0, %xmm0
- pshufd $0, %xmm0, %xmm0
-
- jmp L(entry_from_bzero)
-END(__memset_tail)
+#define VEC_SIZE 16
+#define VEC(i) xmm##i
+/* Don't use movups and movaps since it will get larger nop paddings for
+ alignment. */
+#define VMOVU movdqu
+#define VMOVA movdqa
+
+#define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
+ movd d, %xmm0; \
+ movq r, %rax; \
+ punpcklbw %xmm0, %xmm0; \
+ punpcklwd %xmm0, %xmm0; \
+ pshufd $0, %xmm0, %xmm0
+
+#define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
+ movd d, %xmm0; \
+ movq r, %rax; \
+ pshufd $0, %xmm0, %xmm0
+
+#define SECTION(p) p
+
+#ifndef MEMSET_SYMBOL
+# define MEMSET_CHK_SYMBOL(p,s) p
+# define MEMSET_SYMBOL(p,s) memset
#endif
-#if defined PIC && IS_IN (libc)
-ENTRY_CHK (__memset_chk)
- cmpq %rdx, %rcx
- jb HIDDEN_JUMPTARGET (__chk_fail)
-END_CHK (__memset_chk)
+#ifndef WMEMSET_SYMBOL
+# define WMEMSET_CHK_SYMBOL(p,s) p
+# define WMEMSET_SYMBOL(p,s) __wmemset
#endif
-ENTRY (memset)
- movd %esi, %xmm0
- movq %rdi, %rax
- punpcklbw %xmm0, %xmm0
- punpcklwd %xmm0, %xmm0
- pshufd $0, %xmm0, %xmm0
-L(entry_from_bzero):
- cmpq $64, %rdx
- ja L(loop_start)
- cmpq $16, %rdx
- jbe L(less_16_bytes)
- cmpq $32, %rdx
- movdqu %xmm0, (%rdi)
- movdqu %xmm0, -16(%rdi,%rdx)
- ja L(between_32_64_bytes)
-L(return):
- rep
- ret
- .p2align 4
-L(between_32_64_bytes):
- movdqu %xmm0, 16(%rdi)
- movdqu %xmm0, -32(%rdi,%rdx)
- ret
- .p2align 4
-L(loop_start):
- leaq 64(%rdi), %rcx
- movdqu %xmm0, (%rdi)
- andq $-64, %rcx
- movdqu %xmm0, -16(%rdi,%rdx)
- movdqu %xmm0, 16(%rdi)
- movdqu %xmm0, -32(%rdi,%rdx)
- movdqu %xmm0, 32(%rdi)
- movdqu %xmm0, -48(%rdi,%rdx)
- movdqu %xmm0, 48(%rdi)
- movdqu %xmm0, -64(%rdi,%rdx)
- addq %rdi, %rdx
- andq $-64, %rdx
- cmpq %rdx, %rcx
- je L(return)
- .p2align 4
-L(loop):
- movdqa %xmm0, (%rcx)
- movdqa %xmm0, 16(%rcx)
- movdqa %xmm0, 32(%rcx)
- movdqa %xmm0, 48(%rcx)
- addq $64, %rcx
- cmpq %rcx, %rdx
- jne L(loop)
- rep
- ret
-L(less_16_bytes):
- movq %xmm0, %rcx
- testb $24, %dl
- jne L(between8_16bytes)
- testb $4, %dl
- jne L(between4_7bytes)
- testb $1, %dl
- je L(odd_byte)
- movb %cl, (%rdi)
-L(odd_byte):
- testb $2, %dl
- je L(return)
- movw %cx, -2(%rax,%rdx)
- ret
-L(between4_7bytes):
- movl %ecx, (%rdi)
- movl %ecx, -4(%rdi,%rdx)
- ret
-L(between8_16bytes):
- movq %rcx, (%rdi)
- movq %rcx, -8(%rdi,%rdx)
- ret
+#include "multiarch/memset-vec-unaligned-erms.S"
-END (memset)
libc_hidden_builtin_def (memset)
-#if defined PIC && IS_IN (libc) && !defined USE_MULTIARCH
-strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
- .section .gnu.warning.__memset_zero_constant_len_parameter
- .string "memset used with constant zero length parameter; this could be due to transposed parameters"
+#if IS_IN (libc)
+libc_hidden_def (__wmemset)
+weak_alias (__wmemset, wmemset)
+libc_hidden_weak (wmemset)
#endif
diff --git a/sysdeps/x86_64/memset_chk.S b/sysdeps/x86_64/memset_chk.S
index 95bb5d0e94..4ecf914fbe 100644
--- a/sysdeps/x86_64/memset_chk.S
+++ b/sysdeps/x86_64/memset_chk.S
@@ -1,5 +1,5 @@
/* Checking memset for x86-64.
- Copyright (C) 2004-2016 Free Software Foundation, Inc.
+ Copyright (C) 2004-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/memusage.h b/sysdeps/x86_64/memusage.h
index fc102c4252..45fd920b52 100644
--- a/sysdeps/x86_64/memusage.h
+++ b/sysdeps/x86_64/memusage.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2001-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2001-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/mul_1.S b/sysdeps/x86_64/mul_1.S
index 88b8f920a1..c38927b5a0 100644
--- a/sysdeps/x86_64/mul_1.S
+++ b/sysdeps/x86_64/mul_1.S
@@ -1,6 +1,6 @@
/* AMD64 __mpn_mul_1 -- Multiply a limb vector with a limb and store
the result in a second limb vector.
- Copyright (C) 2003-2016 Free Software Foundation, Inc.
+ Copyright (C) 2003-2018 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index d234f4ab66..bb5e970735 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -1,26 +1,46 @@
ifeq ($(subdir),csu)
tests += test-multiarch
-gen-as-const-headers += ifunc-defines.sym
endif
ifeq ($(subdir),string)
-sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
- strcmp-sse2-unaligned strncmp-ssse3 \
- memcmp-sse4 memcpy-ssse3 memcpy-sse2-unaligned \
- memcpy-avx512-no-vzeroupper mempcpy-ssse3 memmove-ssse3 \
- memcpy-ssse3-back mempcpy-ssse3-back memmove-avx-unaligned \
- memcpy-avx-unaligned mempcpy-avx-unaligned \
- mempcpy-avx512-no-vzeroupper memmove-ssse3-back \
- memmove-avx512-no-vzeroupper strcasecmp_l-ssse3 \
- strncase_l-ssse3 strcat-ssse3 strncat-ssse3\
+sysdep_routines += strncat-c stpncpy-c strncpy-c \
+ strcmp-sse2 strcmp-sse2-unaligned strcmp-ssse3 \
+ strcmp-sse4_2 strcmp-avx2 \
+ strncmp-sse2 strncmp-ssse3 strncmp-sse4_2 strncmp-avx2 \
+ memchr-sse2 rawmemchr-sse2 memchr-avx2 rawmemchr-avx2 \
+ memrchr-sse2 memrchr-avx2 \
+ memcmp-sse2 \
+ memcmp-avx2-movbe \
+ memcmp-sse4 memcpy-ssse3 \
+ memmove-ssse3 \
+ memcpy-ssse3-back \
+ memmove-ssse3-back \
+ memmove-avx512-no-vzeroupper \
+ strcasecmp_l-sse2 strcasecmp_l-ssse3 \
+ strcasecmp_l-sse4_2 strcasecmp_l-avx \
+ strncase_l-sse2 strncase_l-ssse3 \
+ strncase_l-sse4_2 strncase_l-avx \
+ strchr-sse2 strchrnul-sse2 strchr-avx2 strchrnul-avx2 \
+ strrchr-sse2 strrchr-avx2 \
+ strlen-sse2 strnlen-sse2 strlen-avx2 strnlen-avx2 \
+ strcat-ssse3 strncat-ssse3\
+ strcpy-sse2 stpcpy-sse2 \
strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \
strcpy-sse2-unaligned strncpy-sse2-unaligned \
stpcpy-sse2-unaligned stpncpy-sse2-unaligned \
+ strcat-sse2 \
strcat-sse2-unaligned strncat-sse2-unaligned \
strchr-sse2-no-bsf memcmp-ssse3 strstr-sse2-unaligned \
- strcspn-c strpbrk-c strspn-c varshift memset-avx2 \
- memset-avx512-no-vzeroupper
+ strcspn-sse2 strpbrk-sse2 strspn-sse2 \
+ strcspn-c strpbrk-c strspn-c varshift \
+ memset-avx512-no-vzeroupper \
+ memmove-sse2-unaligned-erms \
+ memmove-avx-unaligned-erms \
+ memmove-avx512-unaligned-erms \
+ memset-sse2-unaligned-erms \
+ memset-avx2-unaligned-erms \
+ memset-avx512-unaligned-erms
CFLAGS-varshift.c += -msse4
CFLAGS-strcspn-c.c += -msse4
CFLAGS-strpbrk-c.c += -msse4
@@ -28,5 +48,20 @@ CFLAGS-strspn-c.c += -msse4
endif
ifeq ($(subdir),wcsmbs)
-sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c wcscpy-ssse3 wcscpy-c
+sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c \
+ wmemcmp-avx2-movbe \
+ wmemchr-sse2 wmemchr-avx2 \
+ wcscmp-sse2 wcscmp-avx2 \
+ wcsncmp-sse2 wcsncmp-avx2 \
+ wcscpy-ssse3 wcscpy-c \
+ wcschr-sse2 wcschr-avx2 \
+ wcsrchr-sse2 wcsrchr-avx2 \
+ wcsnlen-sse4_1 wcsnlen-c \
+ wcslen-sse2 wcslen-avx2 wcsnlen-avx2
+endif
+
+ifeq ($(subdir),debug)
+sysdep_routines += memcpy_chk-nonshared mempcpy_chk-nonshared \
+ memmove_chk-nonshared memset_chk-nonshared \
+ wmemset_chk-nonshared
endif
diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h
new file mode 100644
index 0000000000..9cab837642
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h
@@ -0,0 +1,36 @@
+/* Common definition for ifunc selections optimized with SSE2 and AVX2.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ return OPTIMIZE (avx2);
+
+ return OPTIMIZE (sse2);
+}
diff --git a/sysdeps/x86_64/multiarch/ifunc-defines.sym b/sysdeps/x86_64/multiarch/ifunc-defines.sym
deleted file mode 100644
index 3df946f343..0000000000
--- a/sysdeps/x86_64/multiarch/ifunc-defines.sym
+++ /dev/null
@@ -1,20 +0,0 @@
-#include "init-arch.h"
-#include <stddef.h>
-
---
-
-CPU_FEATURES_SIZE sizeof (struct cpu_features)
-CPUID_OFFSET offsetof (struct cpu_features, cpuid)
-CPUID_SIZE sizeof (struct cpuid_registers)
-CPUID_EAX_OFFSET offsetof (struct cpuid_registers, eax)
-CPUID_EBX_OFFSET offsetof (struct cpuid_registers, ebx)
-CPUID_ECX_OFFSET offsetof (struct cpuid_registers, ecx)
-CPUID_EDX_OFFSET offsetof (struct cpuid_registers, edx)
-FAMILY_OFFSET offsetof (struct cpu_features, family)
-MODEL_OFFSET offsetof (struct cpu_features, model)
-FEATURE_OFFSET offsetof (struct cpu_features, feature)
-FEATURE_SIZE sizeof (unsigned int)
-
-COMMON_CPUID_INDEX_1
-COMMON_CPUID_INDEX_7
-FEATURE_INDEX_1
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 188b6d36c6..9aaaef7251 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -1,5 +1,5 @@
/* Enumerate available IFUNC implementations of a function. x86-64 version.
- Copyright (C) 2012-2016 Free Software Foundation, Inc.
+ Copyright (C) 2012-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -38,77 +38,164 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
size_t i = 0;
- /* Support sysdeps/x86_64/multiarch/memcmp.S. */
+ /* Support sysdeps/x86_64/multiarch/memchr.c. */
+ IFUNC_IMPL (i, name, memchr,
+ IFUNC_IMPL_ADD (array, i, memchr,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __memchr_avx2)
+ IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/memcmp.c. */
IFUNC_IMPL (i, name, memcmp,
+ IFUNC_IMPL_ADD (array, i, memcmp,
+ (HAS_ARCH_FEATURE (AVX2_Usable)
+ && HAS_CPU_FEATURE (MOVBE)),
+ __memcmp_avx2_movbe)
IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSE4_1),
__memcmp_sse4_1)
IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSSE3),
__memcmp_ssse3)
IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_sse2))
+#ifdef SHARED
/* Support sysdeps/x86_64/multiarch/memmove_chk.c. */
IFUNC_IMPL (i, name, __memmove_chk,
-#ifdef HAVE_AVX512_ASM_SUPPORT
IFUNC_IMPL_ADD (array, i, __memmove_chk,
HAS_ARCH_FEATURE (AVX512F_Usable),
__memmove_chk_avx512_no_vzeroupper)
-#endif
+ IFUNC_IMPL_ADD (array, i, __memmove_chk,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memmove_chk_avx512_unaligned)
+ IFUNC_IMPL_ADD (array, i, __memmove_chk,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memmove_chk_avx512_unaligned_erms)
IFUNC_IMPL_ADD (array, i, __memmove_chk,
HAS_ARCH_FEATURE (AVX_Usable),
__memmove_chk_avx_unaligned)
IFUNC_IMPL_ADD (array, i, __memmove_chk,
+ HAS_ARCH_FEATURE (AVX_Usable),
+ __memmove_chk_avx_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, __memmove_chk,
HAS_CPU_FEATURE (SSSE3),
__memmove_chk_ssse3_back)
IFUNC_IMPL_ADD (array, i, __memmove_chk,
HAS_CPU_FEATURE (SSSE3),
__memmove_chk_ssse3)
IFUNC_IMPL_ADD (array, i, __memmove_chk, 1,
- __memmove_chk_sse2))
+ __memmove_chk_sse2_unaligned)
+ IFUNC_IMPL_ADD (array, i, __memmove_chk, 1,
+ __memmove_chk_sse2_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, __memmove_chk, 1,
+ __memmove_chk_erms))
+#endif
- /* Support sysdeps/x86_64/multiarch/memmove.S. */
+ /* Support sysdeps/x86_64/multiarch/memmove.c. */
IFUNC_IMPL (i, name, memmove,
IFUNC_IMPL_ADD (array, i, memmove,
HAS_ARCH_FEATURE (AVX_Usable),
__memmove_avx_unaligned)
-#ifdef HAVE_AVX512_ASM_SUPPORT
+ IFUNC_IMPL_ADD (array, i, memmove,
+ HAS_ARCH_FEATURE (AVX_Usable),
+ __memmove_avx_unaligned_erms)
IFUNC_IMPL_ADD (array, i, memmove,
HAS_ARCH_FEATURE (AVX512F_Usable),
__memmove_avx512_no_vzeroupper)
-#endif
+ IFUNC_IMPL_ADD (array, i, memmove,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memmove_avx512_unaligned)
+ IFUNC_IMPL_ADD (array, i, memmove,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memmove_avx512_unaligned_erms)
IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3),
__memmove_ssse3_back)
IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3),
__memmove_ssse3)
- IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_sse2))
+ IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_erms)
+ IFUNC_IMPL_ADD (array, i, memmove, 1,
+ __memmove_sse2_unaligned)
+ IFUNC_IMPL_ADD (array, i, memmove, 1,
+ __memmove_sse2_unaligned_erms))
+
+ /* Support sysdeps/x86_64/multiarch/memrchr.c. */
+ IFUNC_IMPL (i, name, memrchr,
+ IFUNC_IMPL_ADD (array, i, memrchr,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __memrchr_avx2)
+ IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_sse2))
- /* Support sysdeps/x86_64/multiarch/memset_chk.S. */
+#ifdef SHARED
+ /* Support sysdeps/x86_64/multiarch/memset_chk.c. */
IFUNC_IMPL (i, name, __memset_chk,
IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
- __memset_chk_sse2)
+ __memset_chk_erms)
+ IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
+ __memset_chk_sse2_unaligned)
+ IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
+ __memset_chk_sse2_unaligned_erms)
IFUNC_IMPL_ADD (array, i, __memset_chk,
HAS_ARCH_FEATURE (AVX2_Usable),
- __memset_chk_avx2)
-#ifdef HAVE_AVX512_ASM_SUPPORT
+ __memset_chk_avx2_unaligned)
+ IFUNC_IMPL_ADD (array, i, __memset_chk,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __memset_chk_avx2_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, __memset_chk,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memset_chk_avx512_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, __memset_chk,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memset_chk_avx512_unaligned)
IFUNC_IMPL_ADD (array, i, __memset_chk,
HAS_ARCH_FEATURE (AVX512F_Usable),
__memset_chk_avx512_no_vzeroupper)
-#endif
)
+#endif
- /* Support sysdeps/x86_64/multiarch/memset.S. */
+ /* Support sysdeps/x86_64/multiarch/memset.c. */
IFUNC_IMPL (i, name, memset,
- IFUNC_IMPL_ADD (array, i, memset, 1, __memset_sse2)
+ IFUNC_IMPL_ADD (array, i, memset, 1,
+ __memset_sse2_unaligned)
+ IFUNC_IMPL_ADD (array, i, memset, 1,
+ __memset_sse2_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, memset, 1, __memset_erms)
+ IFUNC_IMPL_ADD (array, i, memset,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __memset_avx2_unaligned)
IFUNC_IMPL_ADD (array, i, memset,
HAS_ARCH_FEATURE (AVX2_Usable),
- __memset_avx2)
-#ifdef HAVE_AVX512_ASM_SUPPORT
+ __memset_avx2_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, memset,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memset_avx512_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, memset,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memset_avx512_unaligned)
IFUNC_IMPL_ADD (array, i, memset,
HAS_ARCH_FEATURE (AVX512F_Usable),
__memset_avx512_no_vzeroupper)
-#endif
)
- /* Support sysdeps/x86_64/multiarch/stpncpy.S. */
+ /* Support sysdeps/x86_64/multiarch/rawmemchr.c. */
+ IFUNC_IMPL (i, name, rawmemchr,
+ IFUNC_IMPL_ADD (array, i, rawmemchr,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __rawmemchr_avx2)
+ IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/strlen.c. */
+ IFUNC_IMPL (i, name, strlen,
+ IFUNC_IMPL_ADD (array, i, strlen,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __strlen_avx2)
+ IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/strnlen.c. */
+ IFUNC_IMPL (i, name, strnlen,
+ IFUNC_IMPL_ADD (array, i, strnlen,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __strnlen_avx2)
+ IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/stpncpy.c. */
IFUNC_IMPL (i, name, stpncpy,
IFUNC_IMPL_ADD (array, i, stpncpy, HAS_CPU_FEATURE (SSSE3),
__stpncpy_ssse3)
@@ -116,14 +203,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__stpncpy_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2))
- /* Support sysdeps/x86_64/multiarch/stpcpy.S. */
+ /* Support sysdeps/x86_64/multiarch/stpcpy.c. */
IFUNC_IMPL (i, name, stpcpy,
IFUNC_IMPL_ADD (array, i, stpcpy, HAS_CPU_FEATURE (SSSE3),
__stpcpy_ssse3)
IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_sse2))
- /* Support sysdeps/x86_64/multiarch/strcasecmp_l.S. */
+ /* Support sysdeps/x86_64/multiarch/strcasecmp_l.c. */
IFUNC_IMPL (i, name, strcasecmp,
IFUNC_IMPL_ADD (array, i, strcasecmp,
HAS_ARCH_FEATURE (AVX_Usable),
@@ -136,7 +223,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__strcasecmp_ssse3)
IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_sse2))
- /* Support sysdeps/x86_64/multiarch/strcasecmp_l.S. */
+ /* Support sysdeps/x86_64/multiarch/strcasecmp_l.c. */
IFUNC_IMPL (i, name, strcasecmp_l,
IFUNC_IMPL_ADD (array, i, strcasecmp_l,
HAS_ARCH_FEATURE (AVX_Usable),
@@ -150,20 +237,40 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, strcasecmp_l, 1,
__strcasecmp_l_sse2))
- /* Support sysdeps/x86_64/multiarch/strcat.S. */
+ /* Support sysdeps/x86_64/multiarch/strcat.c. */
IFUNC_IMPL (i, name, strcat,
IFUNC_IMPL_ADD (array, i, strcat, HAS_CPU_FEATURE (SSSE3),
__strcat_ssse3)
IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_sse2))
- /* Support sysdeps/x86_64/multiarch/strchr.S. */
+ /* Support sysdeps/x86_64/multiarch/strchr.c. */
IFUNC_IMPL (i, name, strchr,
+ IFUNC_IMPL_ADD (array, i, strchr,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __strchr_avx2)
IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2_no_bsf)
IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2))
- /* Support sysdeps/x86_64/multiarch/strcmp.S. */
+ /* Support sysdeps/x86_64/multiarch/strchrnul.c. */
+ IFUNC_IMPL (i, name, strchrnul,
+ IFUNC_IMPL_ADD (array, i, strchrnul,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __strchrnul_avx2)
+ IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/strrchr.c. */
+ IFUNC_IMPL (i, name, strrchr,
+ IFUNC_IMPL_ADD (array, i, strrchr,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __strrchr_avx2)
+ IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/strcmp.c. */
IFUNC_IMPL (i, name, strcmp,
+ IFUNC_IMPL_ADD (array, i, strcmp,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __strcmp_avx2)
IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSE4_2),
__strcmp_sse42)
IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSSE3),
@@ -171,20 +278,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_sse2))
- /* Support sysdeps/x86_64/multiarch/strcpy.S. */
+ /* Support sysdeps/x86_64/multiarch/strcpy.c. */
IFUNC_IMPL (i, name, strcpy,
IFUNC_IMPL_ADD (array, i, strcpy, HAS_CPU_FEATURE (SSSE3),
__strcpy_ssse3)
IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_sse2))
- /* Support sysdeps/x86_64/multiarch/strcspn.S. */
+ /* Support sysdeps/x86_64/multiarch/strcspn.c. */
IFUNC_IMPL (i, name, strcspn,
IFUNC_IMPL_ADD (array, i, strcspn, HAS_CPU_FEATURE (SSE4_2),
__strcspn_sse42)
IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2))
- /* Support sysdeps/x86_64/multiarch/strncase_l.S. */
+ /* Support sysdeps/x86_64/multiarch/strncase_l.c. */
IFUNC_IMPL (i, name, strncasecmp,
IFUNC_IMPL_ADD (array, i, strncasecmp,
HAS_ARCH_FEATURE (AVX_Usable),
@@ -198,7 +305,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, strncasecmp, 1,
__strncasecmp_sse2))
- /* Support sysdeps/x86_64/multiarch/strncase_l.S. */
+ /* Support sysdeps/x86_64/multiarch/strncase_l.c. */
IFUNC_IMPL (i, name, strncasecmp_l,
IFUNC_IMPL_ADD (array, i, strncasecmp_l,
HAS_ARCH_FEATURE (AVX_Usable),
@@ -212,7 +319,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, strncasecmp_l, 1,
__strncasecmp_l_sse2))
- /* Support sysdeps/x86_64/multiarch/strncat.S. */
+ /* Support sysdeps/x86_64/multiarch/strncat.c. */
IFUNC_IMPL (i, name, strncat,
IFUNC_IMPL_ADD (array, i, strncat, HAS_CPU_FEATURE (SSSE3),
__strncat_ssse3)
@@ -220,7 +327,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__strncat_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_sse2))
- /* Support sysdeps/x86_64/multiarch/strncpy.S. */
+ /* Support sysdeps/x86_64/multiarch/strncpy.c. */
IFUNC_IMPL (i, name, strncpy,
IFUNC_IMPL_ADD (array, i, strncpy, HAS_CPU_FEATURE (SSSE3),
__strncpy_ssse3)
@@ -228,14 +335,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__strncpy_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2))
- /* Support sysdeps/x86_64/multiarch/strpbrk.S. */
+ /* Support sysdeps/x86_64/multiarch/strpbrk.c. */
IFUNC_IMPL (i, name, strpbrk,
IFUNC_IMPL_ADD (array, i, strpbrk, HAS_CPU_FEATURE (SSE4_2),
__strpbrk_sse42)
IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2))
- /* Support sysdeps/x86_64/multiarch/strspn.S. */
+ /* Support sysdeps/x86_64/multiarch/strspn.c. */
IFUNC_IMPL (i, name, strspn,
IFUNC_IMPL_ADD (array, i, strspn, HAS_CPU_FEATURE (SSE4_2),
__strspn_sse42)
@@ -246,99 +353,226 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2))
- /* Support sysdeps/x86_64/multiarch/wcscpy.S. */
+ /* Support sysdeps/x86_64/multiarch/wcschr.c. */
+ IFUNC_IMPL (i, name, wcschr,
+ IFUNC_IMPL_ADD (array, i, wcschr,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __wcschr_avx2)
+ IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/wcsrchr.c. */
+ IFUNC_IMPL (i, name, wcsrchr,
+ IFUNC_IMPL_ADD (array, i, wcsrchr,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __wcsrchr_avx2)
+ IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/wcscmp.c. */
+ IFUNC_IMPL (i, name, wcscmp,
+ IFUNC_IMPL_ADD (array, i, wcscmp,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __wcscmp_avx2)
+ IFUNC_IMPL_ADD (array, i, wcscmp, 1, __wcscmp_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/wcsncmp.c. */
+ IFUNC_IMPL (i, name, wcsncmp,
+ IFUNC_IMPL_ADD (array, i, wcsncmp,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __wcsncmp_avx2)
+ IFUNC_IMPL_ADD (array, i, wcsncmp, 1, __wcsncmp_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/wcscpy.c. */
IFUNC_IMPL (i, name, wcscpy,
IFUNC_IMPL_ADD (array, i, wcscpy, HAS_CPU_FEATURE (SSSE3),
__wcscpy_ssse3)
IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_sse2))
- /* Support sysdeps/x86_64/multiarch/wmemcmp.S. */
+ /* Support sysdeps/x86_64/multiarch/wcslen.c. */
+ IFUNC_IMPL (i, name, wcslen,
+ IFUNC_IMPL_ADD (array, i, wcslen,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __wcslen_avx2)
+ IFUNC_IMPL_ADD (array, i, wcslen, 1, __wcslen_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/wcsnlen.c. */
+ IFUNC_IMPL (i, name, wcsnlen,
+ IFUNC_IMPL_ADD (array, i, wcsnlen,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __wcsnlen_avx2)
+ IFUNC_IMPL_ADD (array, i, wcsnlen,
+ HAS_CPU_FEATURE (SSE4_1),
+ __wcsnlen_sse4_1)
+ IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/wmemchr.c. */
+ IFUNC_IMPL (i, name, wmemchr,
+ IFUNC_IMPL_ADD (array, i, wmemchr,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __wmemchr_avx2)
+ IFUNC_IMPL_ADD (array, i, wmemchr, 1, __wmemchr_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/wmemcmp.c. */
IFUNC_IMPL (i, name, wmemcmp,
+ IFUNC_IMPL_ADD (array, i, wmemcmp,
+ (HAS_ARCH_FEATURE (AVX2_Usable)
+ && HAS_CPU_FEATURE (MOVBE)),
+ __wmemcmp_avx2_movbe)
IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSE4_1),
__wmemcmp_sse4_1)
IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSSE3),
__wmemcmp_ssse3)
IFUNC_IMPL_ADD (array, i, wmemcmp, 1, __wmemcmp_sse2))
+ /* Support sysdeps/x86_64/multiarch/wmemset.c. */
+ IFUNC_IMPL (i, name, wmemset,
+ IFUNC_IMPL_ADD (array, i, wmemset, 1,
+ __wmemset_sse2_unaligned)
+ IFUNC_IMPL_ADD (array, i, wmemset,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __wmemset_avx2_unaligned)
+ IFUNC_IMPL_ADD (array, i, wmemset,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __wmemset_avx512_unaligned))
+
#ifdef SHARED
- /* Support sysdeps/x86_64/multiarch/memcpy_chk.S. */
+ /* Support sysdeps/x86_64/multiarch/memcpy_chk.c. */
IFUNC_IMPL (i, name, __memcpy_chk,
-#ifdef HAVE_AVX512_ASM_SUPPORT
IFUNC_IMPL_ADD (array, i, __memcpy_chk,
HAS_ARCH_FEATURE (AVX512F_Usable),
__memcpy_chk_avx512_no_vzeroupper)
-#endif
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memcpy_chk_avx512_unaligned)
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memcpy_chk_avx512_unaligned_erms)
IFUNC_IMPL_ADD (array, i, __memcpy_chk,
HAS_ARCH_FEATURE (AVX_Usable),
__memcpy_chk_avx_unaligned)
IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+ HAS_ARCH_FEATURE (AVX_Usable),
+ __memcpy_chk_avx_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk,
HAS_CPU_FEATURE (SSSE3),
__memcpy_chk_ssse3_back)
IFUNC_IMPL_ADD (array, i, __memcpy_chk,
HAS_CPU_FEATURE (SSSE3),
__memcpy_chk_ssse3)
IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
- __memcpy_chk_sse2))
+ __memcpy_chk_sse2_unaligned)
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
+ __memcpy_chk_sse2_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
+ __memcpy_chk_erms))
+#endif
- /* Support sysdeps/x86_64/multiarch/memcpy.S. */
+ /* Support sysdeps/x86_64/multiarch/memcpy.c. */
IFUNC_IMPL (i, name, memcpy,
IFUNC_IMPL_ADD (array, i, memcpy,
HAS_ARCH_FEATURE (AVX_Usable),
__memcpy_avx_unaligned)
+ IFUNC_IMPL_ADD (array, i, memcpy,
+ HAS_ARCH_FEATURE (AVX_Usable),
+ __memcpy_avx_unaligned_erms)
IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3),
__memcpy_ssse3_back)
IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3),
__memcpy_ssse3)
-#ifdef HAVE_AVX512_ASM_SUPPORT
IFUNC_IMPL_ADD (array, i, memcpy,
HAS_ARCH_FEATURE (AVX512F_Usable),
__memcpy_avx512_no_vzeroupper)
-#endif
+ IFUNC_IMPL_ADD (array, i, memcpy,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memcpy_avx512_unaligned)
+ IFUNC_IMPL_ADD (array, i, memcpy,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memcpy_avx512_unaligned_erms)
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2))
+ IFUNC_IMPL_ADD (array, i, memcpy, 1,
+ __memcpy_sse2_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_erms))
- /* Support sysdeps/x86_64/multiarch/mempcpy_chk.S. */
+#ifdef SHARED
+ /* Support sysdeps/x86_64/multiarch/mempcpy_chk.c. */
IFUNC_IMPL (i, name, __mempcpy_chk,
-#ifdef HAVE_AVX512_ASM_SUPPORT
IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
HAS_ARCH_FEATURE (AVX512F_Usable),
__mempcpy_chk_avx512_no_vzeroupper)
-#endif
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __mempcpy_chk_avx512_unaligned)
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __mempcpy_chk_avx512_unaligned_erms)
IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
HAS_ARCH_FEATURE (AVX_Usable),
__mempcpy_chk_avx_unaligned)
IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+ HAS_ARCH_FEATURE (AVX_Usable),
+ __mempcpy_chk_avx_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
HAS_CPU_FEATURE (SSSE3),
__mempcpy_chk_ssse3_back)
IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
HAS_CPU_FEATURE (SSSE3),
__mempcpy_chk_ssse3)
IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
- __mempcpy_chk_sse2))
+ __mempcpy_chk_sse2_unaligned)
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
+ __mempcpy_chk_sse2_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
+ __mempcpy_chk_erms))
+#endif
- /* Support sysdeps/x86_64/multiarch/mempcpy.S. */
+ /* Support sysdeps/x86_64/multiarch/mempcpy.c. */
IFUNC_IMPL (i, name, mempcpy,
-#ifdef HAVE_AVX512_ASM_SUPPORT
IFUNC_IMPL_ADD (array, i, mempcpy,
HAS_ARCH_FEATURE (AVX512F_Usable),
__mempcpy_avx512_no_vzeroupper)
-#endif
+ IFUNC_IMPL_ADD (array, i, mempcpy,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __mempcpy_avx512_unaligned)
+ IFUNC_IMPL_ADD (array, i, mempcpy,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __mempcpy_avx512_unaligned_erms)
IFUNC_IMPL_ADD (array, i, mempcpy,
HAS_ARCH_FEATURE (AVX_Usable),
__mempcpy_avx_unaligned)
+ IFUNC_IMPL_ADD (array, i, mempcpy,
+ HAS_ARCH_FEATURE (AVX_Usable),
+ __mempcpy_avx_unaligned_erms)
IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3),
__mempcpy_ssse3_back)
IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3),
__mempcpy_ssse3)
- IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_sse2))
+ IFUNC_IMPL_ADD (array, i, mempcpy, 1,
+ __mempcpy_sse2_unaligned)
+ IFUNC_IMPL_ADD (array, i, mempcpy, 1,
+ __mempcpy_sse2_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_erms))
- /* Support sysdeps/x86_64/multiarch/strncmp.S. */
+ /* Support sysdeps/x86_64/multiarch/strncmp.c. */
IFUNC_IMPL (i, name, strncmp,
+ IFUNC_IMPL_ADD (array, i, strncmp,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __strncmp_avx2)
IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSE4_2),
__strncmp_sse42)
IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSSE3),
__strncmp_ssse3)
IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_sse2))
+
+#ifdef SHARED
+ /* Support sysdeps/x86_64/multiarch/wmemset_chk.c. */
+ IFUNC_IMPL (i, name, __wmemset_chk,
+ IFUNC_IMPL_ADD (array, i, __wmemset_chk, 1,
+ __wmemset_chk_sse2_unaligned)
+ IFUNC_IMPL_ADD (array, i, __wmemset_chk,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __wmemset_chk_avx2_unaligned)
+ IFUNC_IMPL_ADD (array, i, __wmemset_chk,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __wmemset_chk_avx512_unaligned))
#endif
return i;
diff --git a/sysdeps/x86_64/multiarch/ifunc-memcmp.h b/sysdeps/x86_64/multiarch/ifunc-memcmp.h
new file mode 100644
index 0000000000..bf5ab8eb7f
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/ifunc-memcmp.h
@@ -0,0 +1,45 @@
+/* Common definition for memcmp/wmemcmp ifunc selections.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+# include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_movbe) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
+ && CPU_FEATURES_CPU_P (cpu_features, MOVBE)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ return OPTIMIZE (avx2_movbe);
+
+ if (CPU_FEATURES_CPU_P (cpu_features, SSE4_1))
+ return OPTIMIZE (sse4_1);
+
+ if (CPU_FEATURES_CPU_P (cpu_features, SSSE3))
+ return OPTIMIZE (ssse3);
+
+ return OPTIMIZE (sse2);
+}
diff --git a/sysdeps/x86_64/multiarch/ifunc-memmove.h b/sysdeps/x86_64/multiarch/ifunc-memmove.h
new file mode 100644
index 0000000000..5b1eb1c92c
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/ifunc-memmove.h
@@ -0,0 +1,81 @@
+/* Common definition for memcpy, mempcpy and memmove implementation.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (erms) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned_erms)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3_back) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_erms)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_no_vzeroupper)
+ attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_ERMS)
+ || CPU_FEATURES_ARCH_P (cpu_features, Prefer_FSRM))
+ return OPTIMIZE (erms);
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable)
+ && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
+ {
+ if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+ return OPTIMIZE (avx512_no_vzeroupper);
+
+ if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+ return OPTIMIZE (avx512_unaligned_erms);
+
+ return OPTIMIZE (avx512_unaligned);
+ }
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ {
+ if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+ return OPTIMIZE (avx_unaligned_erms);
+
+ return OPTIMIZE (avx_unaligned);
+ }
+
+ if (!CPU_FEATURES_CPU_P (cpu_features, SSSE3)
+ || CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Copy))
+ {
+ if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+ return OPTIMIZE (sse2_unaligned_erms);
+
+ return OPTIMIZE (sse2_unaligned);
+ }
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Copy_Backward))
+ return OPTIMIZE (ssse3_back);
+
+ return OPTIMIZE (ssse3);
+}
diff --git a/sysdeps/x86_64/multiarch/ifunc-memset.h b/sysdeps/x86_64/multiarch/ifunc-memset.h
new file mode 100644
index 0000000000..19b5ae676c
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/ifunc-memset.h
@@ -0,0 +1,69 @@
+/* Common definition for memset/memset_chk ifunc selections.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (erms) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned_erms)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_no_vzeroupper)
+ attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_ERMS))
+ return OPTIMIZE (erms);
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable)
+ && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
+ {
+ if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+ return OPTIMIZE (avx512_no_vzeroupper);
+
+ if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+ return OPTIMIZE (avx512_unaligned_erms);
+
+ return OPTIMIZE (avx512_unaligned);
+ }
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable))
+ {
+ if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+ return OPTIMIZE (avx2_unaligned_erms);
+ else
+ return OPTIMIZE (avx2_unaligned);
+ }
+
+ if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+ return OPTIMIZE (sse2_unaligned_erms);
+
+ return OPTIMIZE (sse2_unaligned);
+}
diff --git a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
new file mode 100644
index 0000000000..f2b791cccf
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
@@ -0,0 +1,34 @@
+/* Common definition for ifunc selections optimized with SSE2 and SSE4.2.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+ if (CPU_FEATURES_CPU_P (cpu_features, SSE4_2))
+ return OPTIMIZE (sse42);
+
+ return OPTIMIZE (sse2);
+}
diff --git a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
new file mode 100644
index 0000000000..1ca170b663
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
@@ -0,0 +1,43 @@
+/* Common definition for strcasecmp famly ifunc selections.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Usable))
+ return OPTIMIZE (avx);
+
+ if (CPU_FEATURES_CPU_P (cpu_features, SSE4_2)
+ && !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2))
+ return OPTIMIZE (sse42);
+
+ if (CPU_FEATURES_CPU_P (cpu_features, SSSE3))
+ return OPTIMIZE (ssse3);
+
+ return OPTIMIZE (sse2);
+}
diff --git a/sysdeps/x86_64/multiarch/ifunc-unaligned-ssse3.h b/sysdeps/x86_64/multiarch/ifunc-unaligned-ssse3.h
new file mode 100644
index 0000000000..81805f9832
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/ifunc-unaligned-ssse3.h
@@ -0,0 +1,40 @@
+/* Common definition for ifunc selections optimized with SSE2, unaligned
+ SSE2 and SSSE3.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
+ return OPTIMIZE (sse2_unaligned);
+
+ if (CPU_FEATURES_CPU_P (cpu_features, SSSE3))
+ return OPTIMIZE (ssse3);
+
+ return OPTIMIZE (sse2);
+}
diff --git a/sysdeps/x86_64/multiarch/ifunc-wmemset.h b/sysdeps/x86_64/multiarch/ifunc-wmemset.h
new file mode 100644
index 0000000000..2f1085f5fc
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/ifunc-wmemset.h
@@ -0,0 +1,42 @@
+/* Common definition for wmemset/wmemset_chk ifunc selections.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ {
+ if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable)
+ && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
+ return OPTIMIZE (avx512_unaligned);
+ else
+ return OPTIMIZE (avx2_unaligned);
+ }
+
+ return OPTIMIZE (sse2_unaligned);
+}
diff --git a/sysdeps/x86_64/multiarch/memchr-avx2.S b/sysdeps/x86_64/multiarch/memchr-avx2.S
new file mode 100644
index 0000000000..5f5e772554
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memchr-avx2.S
@@ -0,0 +1,340 @@
+/* memchr/wmemchr optimized with AVX2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+
+# ifndef MEMCHR
+# define MEMCHR __memchr_avx2
+# endif
+
+# ifdef USE_AS_WMEMCHR
+# define VPCMPEQ vpcmpeqd
+# else
+# define VPCMPEQ vpcmpeqb
+# endif
+
+# ifndef VZEROUPPER
+# define VZEROUPPER vzeroupper
+# endif
+
+# define VEC_SIZE 32
+
+ .section .text.avx,"ax",@progbits
+ENTRY (MEMCHR)
+# ifndef USE_AS_RAWMEMCHR
+ /* Check for zero length. */
+ testq %rdx, %rdx
+ jz L(null)
+# endif
+ movl %edi, %ecx
+ /* Broadcast CHAR to YMM0. */
+ vmovd %esi, %xmm0
+# ifdef USE_AS_WMEMCHR
+ shl $2, %rdx
+ vpbroadcastd %xmm0, %ymm0
+# else
+ vpbroadcastb %xmm0, %ymm0
+# endif
+ /* Check if we may cross page boundary with one vector load. */
+ andl $(2 * VEC_SIZE - 1), %ecx
+ cmpl $VEC_SIZE, %ecx
+ ja L(cros_page_boundary)
+
+ /* Check the first VEC_SIZE bytes. */
+ VPCMPEQ (%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+
+# ifndef USE_AS_RAWMEMCHR
+ jnz L(first_vec_x0_check)
+ /* Adjust length and check the end of data. */
+ subq $VEC_SIZE, %rdx
+ jbe L(zero)
+# else
+ jnz L(first_vec_x0)
+# endif
+
+ /* Align data for aligned loads in the loop. */
+ addq $VEC_SIZE, %rdi
+ andl $(VEC_SIZE - 1), %ecx
+ andq $-VEC_SIZE, %rdi
+
+# ifndef USE_AS_RAWMEMCHR
+ /* Adjust length. */
+ addq %rcx, %rdx
+
+ subq $(VEC_SIZE * 4), %rdx
+ jbe L(last_4x_vec_or_less)
+# endif
+ jmp L(more_4x_vec)
+
+ .p2align 4
+L(cros_page_boundary):
+ andl $(VEC_SIZE - 1), %ecx
+ andq $-VEC_SIZE, %rdi
+ VPCMPEQ (%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ /* Remove the leading bytes. */
+ sarl %cl, %eax
+ testl %eax, %eax
+ jz L(aligned_more)
+ tzcntl %eax, %eax
+# ifndef USE_AS_RAWMEMCHR
+ /* Check the end of data. */
+ cmpq %rax, %rdx
+ jbe L(zero)
+# endif
+ addq %rdi, %rax
+ addq %rcx, %rax
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(aligned_more):
+# ifndef USE_AS_RAWMEMCHR
+ /* Calculate "rdx + rcx - VEC_SIZE" with "rdx - (VEC_SIZE - rcx)"
+ instead of "(rdx + rcx) - VEC_SIZE" to void possible addition
+ overflow. */
+ negq %rcx
+ addq $VEC_SIZE, %rcx
+
+ /* Check the end of data. */
+ subq %rcx, %rdx
+ jbe L(zero)
+# endif
+
+ addq $VEC_SIZE, %rdi
+
+# ifndef USE_AS_RAWMEMCHR
+ subq $(VEC_SIZE * 4), %rdx
+ jbe L(last_4x_vec_or_less)
+# endif
+
+L(more_4x_vec):
+ /* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time
+ since data is only aligned to VEC_SIZE. */
+ VPCMPEQ (%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x0)
+
+ VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x1)
+
+ VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x2)
+
+ VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x3)
+
+ addq $(VEC_SIZE * 4), %rdi
+
+# ifndef USE_AS_RAWMEMCHR
+ subq $(VEC_SIZE * 4), %rdx
+ jbe L(last_4x_vec_or_less)
+# endif
+
+ /* Align data to 4 * VEC_SIZE. */
+ movq %rdi, %rcx
+ andl $(4 * VEC_SIZE - 1), %ecx
+ andq $-(4 * VEC_SIZE), %rdi
+
+# ifndef USE_AS_RAWMEMCHR
+ /* Adjust length. */
+ addq %rcx, %rdx
+# endif
+
+ .p2align 4
+L(loop_4x_vec):
+ /* Compare 4 * VEC at a time forward. */
+ VPCMPEQ (%rdi), %ymm0, %ymm1
+ VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm2
+ VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm3
+ VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm4
+
+ vpor %ymm1, %ymm2, %ymm5
+ vpor %ymm3, %ymm4, %ymm6
+ vpor %ymm5, %ymm6, %ymm5
+
+ vpmovmskb %ymm5, %eax
+ testl %eax, %eax
+ jnz L(4x_vec_end)
+
+ addq $(VEC_SIZE * 4), %rdi
+
+# ifdef USE_AS_RAWMEMCHR
+ jmp L(loop_4x_vec)
+# else
+ subq $(VEC_SIZE * 4), %rdx
+ ja L(loop_4x_vec)
+
+L(last_4x_vec_or_less):
+ /* Less than 4 * VEC and aligned to VEC_SIZE. */
+ addl $(VEC_SIZE * 2), %edx
+ jle L(last_2x_vec)
+
+ VPCMPEQ (%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x0)
+
+ VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x1)
+
+ VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+
+ jnz L(first_vec_x2_check)
+ subl $VEC_SIZE, %edx
+ jle L(zero)
+
+ VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+
+ jnz L(first_vec_x3_check)
+ xorl %eax, %eax
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(last_2x_vec):
+ addl $(VEC_SIZE * 2), %edx
+ VPCMPEQ (%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+
+ jnz L(first_vec_x0_check)
+ subl $VEC_SIZE, %edx
+ jle L(zero)
+
+ VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x1_check)
+ xorl %eax, %eax
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(first_vec_x0_check):
+ tzcntl %eax, %eax
+ /* Check the end of data. */
+ cmpq %rax, %rdx
+ jbe L(zero)
+ addq %rdi, %rax
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(first_vec_x1_check):
+ tzcntl %eax, %eax
+ /* Check the end of data. */
+ cmpq %rax, %rdx
+ jbe L(zero)
+ addq $VEC_SIZE, %rax
+ addq %rdi, %rax
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(first_vec_x2_check):
+ tzcntl %eax, %eax
+ /* Check the end of data. */
+ cmpq %rax, %rdx
+ jbe L(zero)
+ addq $(VEC_SIZE * 2), %rax
+ addq %rdi, %rax
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(first_vec_x3_check):
+ tzcntl %eax, %eax
+ /* Check the end of data. */
+ cmpq %rax, %rdx
+ jbe L(zero)
+ addq $(VEC_SIZE * 3), %rax
+ addq %rdi, %rax
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(zero):
+ VZEROUPPER
+L(null):
+ xorl %eax, %eax
+ ret
+# endif
+
+ .p2align 4
+L(first_vec_x0):
+ tzcntl %eax, %eax
+ addq %rdi, %rax
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(first_vec_x1):
+ tzcntl %eax, %eax
+ addq $VEC_SIZE, %rax
+ addq %rdi, %rax
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(first_vec_x2):
+ tzcntl %eax, %eax
+ addq $(VEC_SIZE * 2), %rax
+ addq %rdi, %rax
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(4x_vec_end):
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x0)
+ vpmovmskb %ymm2, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x1)
+ vpmovmskb %ymm3, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x2)
+ vpmovmskb %ymm4, %eax
+ testl %eax, %eax
+L(first_vec_x3):
+ tzcntl %eax, %eax
+ addq $(VEC_SIZE * 3), %rax
+ addq %rdi, %rax
+ VZEROUPPER
+ ret
+
+END (MEMCHR)
+#endif
diff --git a/sysdeps/x86_64/multiarch/memchr-sse2.S b/sysdeps/x86_64/multiarch/memchr-sse2.S
new file mode 100644
index 0000000000..8a5e7fd1c5
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memchr-sse2.S
@@ -0,0 +1,28 @@
+/* memchr optimized with SSE2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# define memchr __memchr_sse2
+
+# undef strong_alias
+# define strong_alias(memchr, __memchr)
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(memchr)
+#endif
+
+#include "../memchr.S"
diff --git a/sysdeps/x86_64/multiarch/wcscpy.S b/sysdeps/x86_64/multiarch/memchr.c
index 8e7270b9c7..016f57846a 100644
--- a/sysdeps/x86_64/multiarch/wcscpy.S
+++ b/sysdeps/x86_64/multiarch/memchr.c
@@ -1,7 +1,6 @@
-/* Multiple versions of wcscpy
+/* Multiple versions of memchr
All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2011-2016 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -18,23 +17,19 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <sysdep.h>
-#include <init-arch.h>
-
/* Define multiple versions only for the definition in libc. */
#if IS_IN (libc)
-
- .text
-ENTRY(wcscpy)
- .type wcscpy, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- HAS_CPU_FEATURE (SSSE3)
- jnz 2f
- leaq __wcscpy_sse2(%rip), %rax
- ret
-
-2: leaq __wcscpy_ssse3(%rip), %rax
- ret
-
-END(wcscpy)
+# define memchr __redirect_memchr
+# include <string.h>
+# undef memchr
+
+# define SYMBOL_NAME memchr
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_memchr, memchr, IFUNC_SELECTOR ());
+strong_alias (memchr, __memchr)
+# ifdef SHARED
+__hidden_ver1 (memchr, __GI_memchr, __redirect_memchr)
+ __attribute__((visibility ("hidden")));
+# endif
#endif
diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
new file mode 100644
index 0000000000..30f764c393
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
@@ -0,0 +1,429 @@
+/* memcmp/wmemcmp optimized with AVX2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+/* memcmp/wmemcmp is implemented as:
+ 1. For size from 2 to 7 bytes, load as big endian with movbe and bswap
+ to avoid branches.
+ 2. Use overlapping compare to avoid branch.
+ 3. Use vector compare when size >= 4 bytes for memcmp or size >= 8
+ bytes for wmemcmp.
+ 4. If size is 8 * VEC_SIZE or less, unroll the loop.
+ 5. Compare 4 * VEC_SIZE at a time with the aligned first memory
+ area.
+ 6. Use 2 vector compares when size is 2 * VEC_SIZE or less.
+ 7. Use 4 vector compares when size is 4 * VEC_SIZE or less.
+ 8. Use 8 vector compares when size is 8 * VEC_SIZE or less. */
+
+# include <sysdep.h>
+
+# ifndef MEMCMP
+# define MEMCMP __memcmp_avx2_movbe
+# endif
+
+# ifdef USE_AS_WMEMCMP
+# define VPCMPEQ vpcmpeqd
+# else
+# define VPCMPEQ vpcmpeqb
+# endif
+
+# ifndef VZEROUPPER
+# define VZEROUPPER vzeroupper
+# endif
+
+# define VEC_SIZE 32
+# define VEC_MASK ((1 << VEC_SIZE) - 1)
+
+/* Warning!
+ wmemcmp has to use SIGNED comparison for elements.
+ memcmp has to use UNSIGNED comparison for elemnts.
+*/
+
+ .section .text.avx,"ax",@progbits
+ENTRY (MEMCMP)
+# ifdef USE_AS_WMEMCMP
+ shl $2, %rdx
+# endif
+ cmpq $VEC_SIZE, %rdx
+ jb L(less_vec)
+
+ /* From VEC to 2 * VEC. No branch when size == VEC_SIZE. */
+ vmovdqu (%rsi), %ymm2
+ VPCMPEQ (%rdi), %ymm2, %ymm2
+ vpmovmskb %ymm2, %eax
+ subl $VEC_MASK, %eax
+ jnz L(first_vec)
+
+ cmpq $(VEC_SIZE * 2), %rdx
+ jbe L(last_vec)
+
+ VPCMPEQ %ymm0, %ymm0, %ymm0
+ /* More than 2 * VEC. */
+ cmpq $(VEC_SIZE * 8), %rdx
+ ja L(more_8x_vec)
+ cmpq $(VEC_SIZE * 4), %rdx
+ jb L(last_4x_vec)
+
+ /* From 4 * VEC to 8 * VEC, inclusively. */
+ vmovdqu (%rsi), %ymm1
+ VPCMPEQ (%rdi), %ymm1, %ymm1
+
+ vmovdqu VEC_SIZE(%rsi), %ymm2
+ VPCMPEQ VEC_SIZE(%rdi), %ymm2, %ymm2
+
+ vmovdqu (VEC_SIZE * 2)(%rsi), %ymm3
+ VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm3, %ymm3
+
+ vmovdqu (VEC_SIZE * 3)(%rsi), %ymm4
+ VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm4, %ymm4
+
+ vpand %ymm1, %ymm2, %ymm5
+ vpand %ymm3, %ymm4, %ymm6
+ vpand %ymm5, %ymm6, %ymm5
+
+ vptest %ymm0, %ymm5
+ jnc L(4x_vec_end)
+
+ leaq -(4 * VEC_SIZE)(%rdi, %rdx), %rdi
+ leaq -(4 * VEC_SIZE)(%rsi, %rdx), %rsi
+ vmovdqu (%rsi), %ymm1
+ VPCMPEQ (%rdi), %ymm1, %ymm1
+
+ vmovdqu VEC_SIZE(%rsi), %ymm2
+ VPCMPEQ VEC_SIZE(%rdi), %ymm2, %ymm2
+ vpand %ymm2, %ymm1, %ymm5
+
+ vmovdqu (VEC_SIZE * 2)(%rsi), %ymm3
+ VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm3, %ymm3
+ vpand %ymm3, %ymm5, %ymm5
+
+ vmovdqu (VEC_SIZE * 3)(%rsi), %ymm4
+ VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm4, %ymm4
+ vpand %ymm4, %ymm5, %ymm5
+
+ vptest %ymm0, %ymm5
+ jnc L(4x_vec_end)
+ xorl %eax, %eax
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(last_2x_vec):
+ /* From VEC to 2 * VEC. No branch when size == VEC_SIZE. */
+ vmovdqu (%rsi), %ymm2
+ VPCMPEQ (%rdi), %ymm2, %ymm2
+ vpmovmskb %ymm2, %eax
+ subl $VEC_MASK, %eax
+ jnz L(first_vec)
+
+L(last_vec):
+ /* Use overlapping loads to avoid branches. */
+ leaq -VEC_SIZE(%rdi, %rdx), %rdi
+ leaq -VEC_SIZE(%rsi, %rdx), %rsi
+ vmovdqu (%rsi), %ymm2
+ VPCMPEQ (%rdi), %ymm2, %ymm2
+ vpmovmskb %ymm2, %eax
+ subl $VEC_MASK, %eax
+ jnz L(first_vec)
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(first_vec):
+ /* A byte or int32 is different within 16 or 32 bytes. */
+ tzcntl %eax, %ecx
+# ifdef USE_AS_WMEMCMP
+ xorl %eax, %eax
+ movl (%rdi, %rcx), %edx
+ cmpl (%rsi, %rcx), %edx
+L(wmemcmp_return):
+ setl %al
+ negl %eax
+ orl $1, %eax
+# else
+ movzbl (%rdi, %rcx), %eax
+ movzbl (%rsi, %rcx), %edx
+ sub %edx, %eax
+# endif
+ VZEROUPPER
+ ret
+
+# ifdef USE_AS_WMEMCMP
+ .p2align 4
+L(4):
+ xorl %eax, %eax
+ movl (%rdi), %edx
+ cmpl (%rsi), %edx
+ jne L(wmemcmp_return)
+ ret
+# else
+ .p2align 4
+L(between_4_7):
+ /* Load as big endian with overlapping movbe to avoid branches. */
+ movbe (%rdi), %eax
+ movbe (%rsi), %ecx
+ shlq $32, %rax
+ shlq $32, %rcx
+ movbe -4(%rdi, %rdx), %edi
+ movbe -4(%rsi, %rdx), %esi
+ orq %rdi, %rax
+ orq %rsi, %rcx
+ subq %rcx, %rax
+ je L(exit)
+ sbbl %eax, %eax
+ orl $1, %eax
+ ret
+
+ .p2align 4
+L(exit):
+ ret
+
+ .p2align 4
+L(between_2_3):
+ /* Load as big endian to avoid branches. */
+ movzwl (%rdi), %eax
+ movzwl (%rsi), %ecx
+ shll $8, %eax
+ shll $8, %ecx
+ bswap %eax
+ bswap %ecx
+ movb -1(%rdi, %rdx), %al
+ movb -1(%rsi, %rdx), %cl
+ /* Subtraction is okay because the upper 8 bits are zero. */
+ subl %ecx, %eax
+ ret
+
+ .p2align 4
+L(1):
+ movzbl (%rdi), %eax
+ movzbl (%rsi), %ecx
+ subl %ecx, %eax
+ ret
+# endif
+
+ .p2align 4
+L(zero):
+ xorl %eax, %eax
+ ret
+
+ .p2align 4
+L(less_vec):
+# ifdef USE_AS_WMEMCMP
+ /* It can only be 0, 4, 8, 12, 16, 20, 24, 28 bytes. */
+ cmpb $4, %dl
+ je L(4)
+ jb L(zero)
+# else
+ cmpb $1, %dl
+ je L(1)
+ jb L(zero)
+ cmpb $4, %dl
+ jb L(between_2_3)
+ cmpb $8, %dl
+ jb L(between_4_7)
+# endif
+ cmpb $16, %dl
+ jae L(between_16_31)
+ /* It is between 8 and 15 bytes. */
+ vmovq (%rdi), %xmm1
+ vmovq (%rsi), %xmm2
+ VPCMPEQ %xmm1, %xmm2, %xmm2
+ vpmovmskb %xmm2, %eax
+ subl $0xffff, %eax
+ jnz L(first_vec)
+ /* Use overlapping loads to avoid branches. */
+ leaq -8(%rdi, %rdx), %rdi
+ leaq -8(%rsi, %rdx), %rsi
+ vmovq (%rdi), %xmm1
+ vmovq (%rsi), %xmm2
+ VPCMPEQ %xmm1, %xmm2, %xmm2
+ vpmovmskb %xmm2, %eax
+ subl $0xffff, %eax
+ jnz L(first_vec)
+ ret
+
+ .p2align 4
+L(between_16_31):
+ /* From 16 to 31 bytes. No branch when size == 16. */
+ vmovdqu (%rsi), %xmm2
+ VPCMPEQ (%rdi), %xmm2, %xmm2
+ vpmovmskb %xmm2, %eax
+ subl $0xffff, %eax
+ jnz L(first_vec)
+
+ /* Use overlapping loads to avoid branches. */
+ leaq -16(%rdi, %rdx), %rdi
+ leaq -16(%rsi, %rdx), %rsi
+ vmovdqu (%rsi), %xmm2
+ VPCMPEQ (%rdi), %xmm2, %xmm2
+ vpmovmskb %xmm2, %eax
+ subl $0xffff, %eax
+ jnz L(first_vec)
+ ret
+
+ .p2align 4
+L(more_8x_vec):
+ /* More than 8 * VEC. Check the first VEC. */
+ vmovdqu (%rsi), %ymm2
+ VPCMPEQ (%rdi), %ymm2, %ymm2
+ vpmovmskb %ymm2, %eax
+ subl $VEC_MASK, %eax
+ jnz L(first_vec)
+
+ /* Align the first memory area for aligned loads in the loop.
+ Compute how much the first memory area is misaligned. */
+ movq %rdi, %rcx
+ andl $(VEC_SIZE - 1), %ecx
+ /* Get the negative of offset for alignment. */
+ subq $VEC_SIZE, %rcx
+ /* Adjust the second memory area. */
+ subq %rcx, %rsi
+ /* Adjust the first memory area which should be aligned now. */
+ subq %rcx, %rdi
+ /* Adjust length. */
+ addq %rcx, %rdx
+
+L(loop_4x_vec):
+ /* Compare 4 * VEC at a time forward. */
+ vmovdqu (%rsi), %ymm1
+ VPCMPEQ (%rdi), %ymm1, %ymm1
+
+ vmovdqu VEC_SIZE(%rsi), %ymm2
+ VPCMPEQ VEC_SIZE(%rdi), %ymm2, %ymm2
+ vpand %ymm2, %ymm1, %ymm5
+
+ vmovdqu (VEC_SIZE * 2)(%rsi), %ymm3
+ VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm3, %ymm3
+ vpand %ymm3, %ymm5, %ymm5
+
+ vmovdqu (VEC_SIZE * 3)(%rsi), %ymm4
+ VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm4, %ymm4
+ vpand %ymm4, %ymm5, %ymm5
+
+ vptest %ymm0, %ymm5
+ jnc L(4x_vec_end)
+
+ addq $(VEC_SIZE * 4), %rdi
+ addq $(VEC_SIZE * 4), %rsi
+
+ subq $(VEC_SIZE * 4), %rdx
+ cmpq $(VEC_SIZE * 4), %rdx
+ jae L(loop_4x_vec)
+
+ /* Less than 4 * VEC. */
+ cmpq $VEC_SIZE, %rdx
+ jbe L(last_vec)
+ cmpq $(VEC_SIZE * 2), %rdx
+ jbe L(last_2x_vec)
+
+L(last_4x_vec):
+ /* From 2 * VEC to 4 * VEC. */
+ vmovdqu (%rsi), %ymm2
+ VPCMPEQ (%rdi), %ymm2, %ymm2
+ vpmovmskb %ymm2, %eax
+ subl $VEC_MASK, %eax
+ jnz L(first_vec)
+
+ addq $VEC_SIZE, %rdi
+ addq $VEC_SIZE, %rsi
+ vmovdqu (%rsi), %ymm2
+ VPCMPEQ (%rdi), %ymm2, %ymm2
+ vpmovmskb %ymm2, %eax
+ subl $VEC_MASK, %eax
+ jnz L(first_vec)
+
+ /* Use overlapping loads to avoid branches. */
+ leaq -(3 * VEC_SIZE)(%rdi, %rdx), %rdi
+ leaq -(3 * VEC_SIZE)(%rsi, %rdx), %rsi
+ vmovdqu (%rsi), %ymm2
+ VPCMPEQ (%rdi), %ymm2, %ymm2
+ vpmovmskb %ymm2, %eax
+ subl $VEC_MASK, %eax
+ jnz L(first_vec)
+
+ addq $VEC_SIZE, %rdi
+ addq $VEC_SIZE, %rsi
+ vmovdqu (%rsi), %ymm2
+ VPCMPEQ (%rdi), %ymm2, %ymm2
+ vpmovmskb %ymm2, %eax
+ subl $VEC_MASK, %eax
+ jnz L(first_vec)
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(4x_vec_end):
+ vpmovmskb %ymm1, %eax
+ subl $VEC_MASK, %eax
+ jnz L(first_vec)
+ vpmovmskb %ymm2, %eax
+ subl $VEC_MASK, %eax
+ jnz L(first_vec_x1)
+ vpmovmskb %ymm3, %eax
+ subl $VEC_MASK, %eax
+ jnz L(first_vec_x2)
+ vpmovmskb %ymm4, %eax
+ subl $VEC_MASK, %eax
+ tzcntl %eax, %ecx
+# ifdef USE_AS_WMEMCMP
+ xorl %eax, %eax
+ movl (VEC_SIZE * 3)(%rdi, %rcx), %edx
+ cmpl (VEC_SIZE * 3)(%rsi, %rcx), %edx
+ jmp L(wmemcmp_return)
+# else
+ movzbl (VEC_SIZE * 3)(%rdi, %rcx), %eax
+ movzbl (VEC_SIZE * 3)(%rsi, %rcx), %edx
+ sub %edx, %eax
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(first_vec_x1):
+ tzcntl %eax, %ecx
+# ifdef USE_AS_WMEMCMP
+ xorl %eax, %eax
+ movl VEC_SIZE(%rdi, %rcx), %edx
+ cmpl VEC_SIZE(%rsi, %rcx), %edx
+ jmp L(wmemcmp_return)
+# else
+ movzbl VEC_SIZE(%rdi, %rcx), %eax
+ movzbl VEC_SIZE(%rsi, %rcx), %edx
+ sub %edx, %eax
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(first_vec_x2):
+ tzcntl %eax, %ecx
+# ifdef USE_AS_WMEMCMP
+ xorl %eax, %eax
+ movl (VEC_SIZE * 2)(%rdi, %rcx), %edx
+ cmpl (VEC_SIZE * 2)(%rsi, %rcx), %edx
+ jmp L(wmemcmp_return)
+# else
+ movzbl (VEC_SIZE * 2)(%rdi, %rcx), %eax
+ movzbl (VEC_SIZE * 2)(%rsi, %rcx), %edx
+ sub %edx, %eax
+# endif
+ VZEROUPPER
+ ret
+END (MEMCMP)
+#endif
diff --git a/sysdeps/x86_64/multiarch/memcmp-sse2.S b/sysdeps/x86_64/multiarch/memcmp-sse2.S
new file mode 100644
index 0000000000..6058aa751e
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcmp-sse2.S
@@ -0,0 +1,31 @@
+/* memcmp with SSE2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# define memcmp __memcmp_sse2
+
+# ifdef SHARED
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(name)
+# endif
+
+# undef weak_alias
+# define weak_alias(ignored1, ignored2)
+#endif
+
+#include <sysdeps/x86_64/memcmp.S>
diff --git a/sysdeps/x86_64/multiarch/memcmp-sse4.S b/sysdeps/x86_64/multiarch/memcmp-sse4.S
index 786f87282c..8e164f2cb6 100644
--- a/sysdeps/x86_64/multiarch/memcmp-sse4.S
+++ b/sysdeps/x86_64/multiarch/memcmp-sse4.S
@@ -1,5 +1,5 @@
/* memcmp with SSE4.1, wmemcmp with SSE4.1
- Copyright (C) 2010-2016 Free Software Foundation, Inc.
+ Copyright (C) 2010-2018 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -31,7 +31,7 @@
lea TABLE(%rip), %r11; \
movslq (%r11, INDEX, SCALE), %rcx; \
add %r11, %rcx; \
- jmp *%rcx; \
+ _CET_NOTRACK jmp *%rcx; \
ud2
/* Warning!
diff --git a/sysdeps/x86_64/multiarch/memcmp-ssse3.S b/sysdeps/x86_64/multiarch/memcmp-ssse3.S
index a22f399e02..6f76c64123 100644
--- a/sysdeps/x86_64/multiarch/memcmp-ssse3.S
+++ b/sysdeps/x86_64/multiarch/memcmp-ssse3.S
@@ -1,5 +1,5 @@
/* memcmp with SSSE3, wmemcmp with SSSE3
- Copyright (C) 2011-2016 Free Software Foundation, Inc.
+ Copyright (C) 2011-2018 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/multiarch/memcmp.S b/sysdeps/x86_64/multiarch/memcmp.S
deleted file mode 100644
index b5a1cc202e..0000000000
--- a/sysdeps/x86_64/multiarch/memcmp.S
+++ /dev/null
@@ -1,67 +0,0 @@
-/* Multiple versions of memcmp
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2016 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in libc. */
-#if IS_IN (libc)
- .text
-ENTRY(memcmp)
- .type memcmp, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- HAS_CPU_FEATURE (SSSE3)
- jnz 2f
- leaq __memcmp_sse2(%rip), %rax
- ret
-
-2: HAS_CPU_FEATURE (SSE4_1)
- jz 3f
- leaq __memcmp_sse4_1(%rip), %rax
- ret
-
-3: leaq __memcmp_ssse3(%rip), %rax
- ret
-
-END(memcmp)
-
-# undef ENTRY
-# define ENTRY(name) \
- .type __memcmp_sse2, @function; \
- .p2align 4; \
- .globl __memcmp_sse2; \
- .hidden __memcmp_sse2; \
- __memcmp_sse2: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size __memcmp_sse2, .-__memcmp_sse2
-
-# ifdef SHARED
-# undef libc_hidden_builtin_def
-/* IFUNC doesn't work with the hidden functions in shared library since
- they will be called without setting up EBX needed for PLT which is
- used by IFUNC. */
-# define libc_hidden_builtin_def(name) \
- .globl __GI_memcmp; __GI_memcmp = __memcmp_sse2
-# endif
-#endif
-
-#include "../memcmp.S"
diff --git a/sysdeps/x86_64/multiarch/strspn.S b/sysdeps/x86_64/multiarch/memcmp.c
index 4942826b24..6f3ca43128 100644
--- a/sysdeps/x86_64/multiarch/strspn.S
+++ b/sysdeps/x86_64/multiarch/memcmp.c
@@ -1,7 +1,6 @@
-/* Multiple versions of strspn
+/* Multiple versions of memcmp.
All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2009-2016 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -18,33 +17,21 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <config.h>
-#include <sysdep.h>
-#include <init-arch.h>
-
/* Define multiple versions only for the definition in libc. */
#if IS_IN (libc)
- .text
-ENTRY(strspn)
- .type strspn, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq __strspn_sse2(%rip), %rax
- HAS_CPU_FEATURE (SSE4_2)
- jz 2f
- leaq __strspn_sse42(%rip), %rax
-2: ret
-END(strspn)
+# define memcmp __redirect_memcmp
+# include <string.h>
+# undef memcmp
-# undef ENTRY
-# define ENTRY(name) \
- .type __strspn_sse2, @function; \
- .globl __strspn_sse2; \
- .align 16; \
- __strspn_sse2: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size __strspn_sse2, .-__strspn_sse2
-#endif
+# define SYMBOL_NAME memcmp
+# include "ifunc-memcmp.h"
-#include "../strspn.S"
+libc_ifunc_redirected (__redirect_memcmp, memcmp, IFUNC_SELECTOR ());
+# undef bcmp
+weak_alias (memcmp, bcmp)
+
+# ifdef SHARED
+__hidden_ver1 (memcmp, __GI_memcmp, __redirect_memcmp)
+ __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S b/sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S
deleted file mode 100644
index 74fed186e9..0000000000
--- a/sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S
+++ /dev/null
@@ -1,376 +0,0 @@
-/* memcpy with AVX
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-#if IS_IN (libc) \
- && (defined SHARED \
- || defined USE_AS_MEMMOVE \
- || !defined USE_MULTIARCH)
-
-#include "asm-syntax.h"
-#ifndef MEMCPY
-# define MEMCPY __memcpy_avx_unaligned
-# define MEMCPY_CHK __memcpy_chk_avx_unaligned
-#endif
-
- .section .text.avx,"ax",@progbits
-#if !defined USE_AS_BCOPY
-ENTRY (MEMCPY_CHK)
- cmpq %rdx, %rcx
- jb HIDDEN_JUMPTARGET (__chk_fail)
-END (MEMCPY_CHK)
-#endif
-
-ENTRY (MEMCPY)
- mov %rdi, %rax
-#ifdef USE_AS_MEMPCPY
- add %rdx, %rax
-#endif
- cmp $256, %rdx
- jae L(256bytesormore)
- cmp $16, %dl
- jb L(less_16bytes)
- cmp $128, %dl
- jb L(less_128bytes)
- vmovdqu (%rsi), %xmm0
- lea (%rsi, %rdx), %rcx
- vmovdqu 0x10(%rsi), %xmm1
- vmovdqu 0x20(%rsi), %xmm2
- vmovdqu 0x30(%rsi), %xmm3
- vmovdqu 0x40(%rsi), %xmm4
- vmovdqu 0x50(%rsi), %xmm5
- vmovdqu 0x60(%rsi), %xmm6
- vmovdqu 0x70(%rsi), %xmm7
- vmovdqu -0x80(%rcx), %xmm8
- vmovdqu -0x70(%rcx), %xmm9
- vmovdqu -0x60(%rcx), %xmm10
- vmovdqu -0x50(%rcx), %xmm11
- vmovdqu -0x40(%rcx), %xmm12
- vmovdqu -0x30(%rcx), %xmm13
- vmovdqu -0x20(%rcx), %xmm14
- vmovdqu -0x10(%rcx), %xmm15
- lea (%rdi, %rdx), %rdx
- vmovdqu %xmm0, (%rdi)
- vmovdqu %xmm1, 0x10(%rdi)
- vmovdqu %xmm2, 0x20(%rdi)
- vmovdqu %xmm3, 0x30(%rdi)
- vmovdqu %xmm4, 0x40(%rdi)
- vmovdqu %xmm5, 0x50(%rdi)
- vmovdqu %xmm6, 0x60(%rdi)
- vmovdqu %xmm7, 0x70(%rdi)
- vmovdqu %xmm8, -0x80(%rdx)
- vmovdqu %xmm9, -0x70(%rdx)
- vmovdqu %xmm10, -0x60(%rdx)
- vmovdqu %xmm11, -0x50(%rdx)
- vmovdqu %xmm12, -0x40(%rdx)
- vmovdqu %xmm13, -0x30(%rdx)
- vmovdqu %xmm14, -0x20(%rdx)
- vmovdqu %xmm15, -0x10(%rdx)
- ret
- .p2align 4
-L(less_128bytes):
- cmp $64, %dl
- jb L(less_64bytes)
- vmovdqu (%rsi), %xmm0
- lea (%rsi, %rdx), %rcx
- vmovdqu 0x10(%rsi), %xmm1
- vmovdqu 0x20(%rsi), %xmm2
- lea (%rdi, %rdx), %rdx
- vmovdqu 0x30(%rsi), %xmm3
- vmovdqu -0x40(%rcx), %xmm4
- vmovdqu -0x30(%rcx), %xmm5
- vmovdqu -0x20(%rcx), %xmm6
- vmovdqu -0x10(%rcx), %xmm7
- vmovdqu %xmm0, (%rdi)
- vmovdqu %xmm1, 0x10(%rdi)
- vmovdqu %xmm2, 0x20(%rdi)
- vmovdqu %xmm3, 0x30(%rdi)
- vmovdqu %xmm4, -0x40(%rdx)
- vmovdqu %xmm5, -0x30(%rdx)
- vmovdqu %xmm6, -0x20(%rdx)
- vmovdqu %xmm7, -0x10(%rdx)
- ret
-
- .p2align 4
-L(less_64bytes):
- cmp $32, %dl
- jb L(less_32bytes)
- vmovdqu (%rsi), %xmm0
- vmovdqu 0x10(%rsi), %xmm1
- vmovdqu -0x20(%rsi, %rdx), %xmm6
- vmovdqu -0x10(%rsi, %rdx), %xmm7
- vmovdqu %xmm0, (%rdi)
- vmovdqu %xmm1, 0x10(%rdi)
- vmovdqu %xmm6, -0x20(%rdi, %rdx)
- vmovdqu %xmm7, -0x10(%rdi, %rdx)
- ret
-
- .p2align 4
-L(less_32bytes):
- vmovdqu (%rsi), %xmm0
- vmovdqu -0x10(%rsi, %rdx), %xmm7
- vmovdqu %xmm0, (%rdi)
- vmovdqu %xmm7, -0x10(%rdi, %rdx)
- ret
-
- .p2align 4
-L(less_16bytes):
- cmp $8, %dl
- jb L(less_8bytes)
- movq -0x08(%rsi, %rdx), %rcx
- movq (%rsi), %rsi
- movq %rsi, (%rdi)
- movq %rcx, -0x08(%rdi, %rdx)
- ret
-
- .p2align 4
-L(less_8bytes):
- cmp $4, %dl
- jb L(less_4bytes)
- mov -0x04(%rsi, %rdx), %ecx
- mov (%rsi), %esi
- mov %esi, (%rdi)
- mov %ecx, -0x04(%rdi, %rdx)
- ret
-
-L(less_4bytes):
- cmp $1, %dl
- jbe L(less_2bytes)
- mov -0x02(%rsi, %rdx), %cx
- mov (%rsi), %si
- mov %si, (%rdi)
- mov %cx, -0x02(%rdi, %rdx)
- ret
-
-L(less_2bytes):
- jb L(less_0bytes)
- mov (%rsi), %cl
- mov %cl, (%rdi)
-L(less_0bytes):
- ret
-
- .p2align 4
-L(256bytesormore):
-#ifdef USE_AS_MEMMOVE
- mov %rdi, %rcx
- sub %rsi, %rcx
- cmp %rdx, %rcx
- jc L(copy_backward)
-#endif
- cmp $2048, %rdx
- jae L(gobble_data_movsb)
- mov %rax, %r8
- lea (%rsi, %rdx), %rcx
- mov %rdi, %r10
- vmovdqu -0x80(%rcx), %xmm5
- vmovdqu -0x70(%rcx), %xmm6
- mov $0x80, %rax
- and $-32, %rdi
- add $32, %rdi
- vmovdqu -0x60(%rcx), %xmm7
- vmovdqu -0x50(%rcx), %xmm8
- mov %rdi, %r11
- sub %r10, %r11
- vmovdqu -0x40(%rcx), %xmm9
- vmovdqu -0x30(%rcx), %xmm10
- sub %r11, %rdx
- vmovdqu -0x20(%rcx), %xmm11
- vmovdqu -0x10(%rcx), %xmm12
- vmovdqu (%rsi), %ymm4
- add %r11, %rsi
- sub %eax, %edx
-L(goble_128_loop):
- vmovdqu (%rsi), %ymm0
- vmovdqu 0x20(%rsi), %ymm1
- vmovdqu 0x40(%rsi), %ymm2
- vmovdqu 0x60(%rsi), %ymm3
- add %rax, %rsi
- vmovdqa %ymm0, (%rdi)
- vmovdqa %ymm1, 0x20(%rdi)
- vmovdqa %ymm2, 0x40(%rdi)
- vmovdqa %ymm3, 0x60(%rdi)
- add %rax, %rdi
- sub %eax, %edx
- jae L(goble_128_loop)
- add %eax, %edx
- add %rdi, %rdx
- vmovdqu %ymm4, (%r10)
- vzeroupper
- vmovdqu %xmm5, -0x80(%rdx)
- vmovdqu %xmm6, -0x70(%rdx)
- vmovdqu %xmm7, -0x60(%rdx)
- vmovdqu %xmm8, -0x50(%rdx)
- vmovdqu %xmm9, -0x40(%rdx)
- vmovdqu %xmm10, -0x30(%rdx)
- vmovdqu %xmm11, -0x20(%rdx)
- vmovdqu %xmm12, -0x10(%rdx)
- mov %r8, %rax
- ret
-
- .p2align 4
-L(gobble_data_movsb):
-#ifdef SHARED_CACHE_SIZE_HALF
- mov $SHARED_CACHE_SIZE_HALF, %rcx
-#else
- mov __x86_shared_cache_size_half(%rip), %rcx
-#endif
- shl $3, %rcx
- cmp %rcx, %rdx
- jae L(gobble_big_data_fwd)
- mov %rdx, %rcx
- mov %rdx, %rcx
- rep movsb
- ret
-
- .p2align 4
-L(gobble_big_data_fwd):
- lea (%rsi, %rdx), %rcx
- vmovdqu (%rsi), %ymm4
- vmovdqu -0x80(%rsi,%rdx), %xmm5
- vmovdqu -0x70(%rcx), %xmm6
- vmovdqu -0x60(%rcx), %xmm7
- vmovdqu -0x50(%rcx), %xmm8
- vmovdqu -0x40(%rcx), %xmm9
- vmovdqu -0x30(%rcx), %xmm10
- vmovdqu -0x20(%rcx), %xmm11
- vmovdqu -0x10(%rcx), %xmm12
- mov %rdi, %r8
- and $-32, %rdi
- add $32, %rdi
- mov %rdi, %r10
- sub %r8, %r10
- sub %r10, %rdx
- add %r10, %rsi
- lea (%rdi, %rdx), %rcx
- add $-0x80, %rdx
-L(gobble_mem_fwd_loop):
- prefetchnta 0x1c0(%rsi)
- prefetchnta 0x280(%rsi)
- vmovdqu (%rsi), %ymm0
- vmovdqu 0x20(%rsi), %ymm1
- vmovdqu 0x40(%rsi), %ymm2
- vmovdqu 0x60(%rsi), %ymm3
- sub $-0x80, %rsi
- vmovntdq %ymm0, (%rdi)
- vmovntdq %ymm1, 0x20(%rdi)
- vmovntdq %ymm2, 0x40(%rdi)
- vmovntdq %ymm3, 0x60(%rdi)
- sub $-0x80, %rdi
- add $-0x80, %rdx
- jb L(gobble_mem_fwd_loop)
- sfence
- vmovdqu %ymm4, (%r8)
- vzeroupper
- vmovdqu %xmm5, -0x80(%rcx)
- vmovdqu %xmm6, -0x70(%rcx)
- vmovdqu %xmm7, -0x60(%rcx)
- vmovdqu %xmm8, -0x50(%rcx)
- vmovdqu %xmm9, -0x40(%rcx)
- vmovdqu %xmm10, -0x30(%rcx)
- vmovdqu %xmm11, -0x20(%rcx)
- vmovdqu %xmm12, -0x10(%rcx)
- ret
-
-#ifdef USE_AS_MEMMOVE
- .p2align 4
-L(copy_backward):
-#ifdef SHARED_CACHE_SIZE_HALF
- mov $SHARED_CACHE_SIZE_HALF, %rcx
-#else
- mov __x86_shared_cache_size_half(%rip), %rcx
-#endif
- shl $3, %rcx
- vmovdqu (%rsi), %xmm5
- vmovdqu 0x10(%rsi), %xmm6
- add %rdx, %rdi
- vmovdqu 0x20(%rsi), %xmm7
- vmovdqu 0x30(%rsi), %xmm8
- lea -0x20(%rdi), %r10
- mov %rdi, %r11
- vmovdqu 0x40(%rsi), %xmm9
- vmovdqu 0x50(%rsi), %xmm10
- and $0x1f, %r11
- vmovdqu 0x60(%rsi), %xmm11
- vmovdqu 0x70(%rsi), %xmm12
- xor %r11, %rdi
- add %rdx, %rsi
- vmovdqu -0x20(%rsi), %ymm4
- sub %r11, %rsi
- sub %r11, %rdx
- cmp %rcx, %rdx
- ja L(gobble_big_data_bwd)
- add $-0x80, %rdx
-L(gobble_mem_bwd_llc):
- vmovdqu -0x20(%rsi), %ymm0
- vmovdqu -0x40(%rsi), %ymm1
- vmovdqu -0x60(%rsi), %ymm2
- vmovdqu -0x80(%rsi), %ymm3
- lea -0x80(%rsi), %rsi
- vmovdqa %ymm0, -0x20(%rdi)
- vmovdqa %ymm1, -0x40(%rdi)
- vmovdqa %ymm2, -0x60(%rdi)
- vmovdqa %ymm3, -0x80(%rdi)
- lea -0x80(%rdi), %rdi
- add $-0x80, %rdx
- jb L(gobble_mem_bwd_llc)
- vmovdqu %ymm4, (%r10)
- vzeroupper
- vmovdqu %xmm5, (%rax)
- vmovdqu %xmm6, 0x10(%rax)
- vmovdqu %xmm7, 0x20(%rax)
- vmovdqu %xmm8, 0x30(%rax)
- vmovdqu %xmm9, 0x40(%rax)
- vmovdqu %xmm10, 0x50(%rax)
- vmovdqu %xmm11, 0x60(%rax)
- vmovdqu %xmm12, 0x70(%rax)
- ret
-
- .p2align 4
-L(gobble_big_data_bwd):
- add $-0x80, %rdx
-L(gobble_mem_bwd_loop):
- prefetchnta -0x1c0(%rsi)
- prefetchnta -0x280(%rsi)
- vmovdqu -0x20(%rsi), %ymm0
- vmovdqu -0x40(%rsi), %ymm1
- vmovdqu -0x60(%rsi), %ymm2
- vmovdqu -0x80(%rsi), %ymm3
- lea -0x80(%rsi), %rsi
- vmovntdq %ymm0, -0x20(%rdi)
- vmovntdq %ymm1, -0x40(%rdi)
- vmovntdq %ymm2, -0x60(%rdi)
- vmovntdq %ymm3, -0x80(%rdi)
- lea -0x80(%rdi), %rdi
- add $-0x80, %rdx
- jb L(gobble_mem_bwd_loop)
- sfence
- vmovdqu %ymm4, (%r10)
- vzeroupper
- vmovdqu %xmm5, (%rax)
- vmovdqu %xmm6, 0x10(%rax)
- vmovdqu %xmm7, 0x20(%rax)
- vmovdqu %xmm8, 0x30(%rax)
- vmovdqu %xmm9, 0x40(%rax)
- vmovdqu %xmm10, 0x50(%rax)
- vmovdqu %xmm11, 0x60(%rax)
- vmovdqu %xmm12, 0x70(%rax)
- ret
-#endif
-END (MEMCPY)
-#endif
diff --git a/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S
deleted file mode 100644
index 1bb12e81b0..0000000000
--- a/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S
+++ /dev/null
@@ -1,408 +0,0 @@
-/* memcpy optimized with AVX512 for KNL hardware.
- Copyright (C) 2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-#if defined HAVE_AVX512_ASM_SUPPORT && IS_IN (libc) \
- && (defined SHARED \
- || defined USE_AS_MEMMOVE \
- || !defined USE_MULTIARCH)
-
-#include "asm-syntax.h"
-#ifndef MEMCPY
-# define MEMCPY __memcpy_avx512_no_vzeroupper
-# define MEMCPY_CHK __memcpy_chk_avx512_no_vzeroupper
-#endif
-
- .section .text,"ax",@progbits
-#if !defined USE_AS_BCOPY
-ENTRY (MEMCPY_CHK)
- cmpq %rdx, %rcx
- jb HIDDEN_JUMPTARGET (__chk_fail)
-END (MEMCPY_CHK)
-#endif
-
-ENTRY (MEMCPY)
- mov %rdi, %rax
-#ifdef USE_AS_MEMPCPY
- add %rdx, %rax
-#endif
- lea (%rsi, %rdx), %rcx
- lea (%rdi, %rdx), %r9
- cmp $512, %rdx
- ja L(512bytesormore)
-
-L(check):
- cmp $16, %rdx
- jbe L(less_16bytes)
- cmp $256, %rdx
- jb L(less_256bytes)
- vmovups (%rsi), %zmm0
- vmovups 0x40(%rsi), %zmm1
- vmovups 0x80(%rsi), %zmm2
- vmovups 0xC0(%rsi), %zmm3
- vmovups -0x100(%rcx), %zmm4
- vmovups -0xC0(%rcx), %zmm5
- vmovups -0x80(%rcx), %zmm6
- vmovups -0x40(%rcx), %zmm7
- vmovups %zmm0, (%rdi)
- vmovups %zmm1, 0x40(%rdi)
- vmovups %zmm2, 0x80(%rdi)
- vmovups %zmm3, 0xC0(%rdi)
- vmovups %zmm4, -0x100(%r9)
- vmovups %zmm5, -0xC0(%r9)
- vmovups %zmm6, -0x80(%r9)
- vmovups %zmm7, -0x40(%r9)
- ret
-
-L(less_256bytes):
- cmp $128, %dl
- jb L(less_128bytes)
- vmovups (%rsi), %zmm0
- vmovups 0x40(%rsi), %zmm1
- vmovups -0x80(%rcx), %zmm2
- vmovups -0x40(%rcx), %zmm3
- vmovups %zmm0, (%rdi)
- vmovups %zmm1, 0x40(%rdi)
- vmovups %zmm2, -0x80(%r9)
- vmovups %zmm3, -0x40(%r9)
- ret
-
-L(less_128bytes):
- cmp $64, %dl
- jb L(less_64bytes)
- vmovdqu (%rsi), %ymm0
- vmovdqu 0x20(%rsi), %ymm1
- vmovdqu -0x40(%rcx), %ymm2
- vmovdqu -0x20(%rcx), %ymm3
- vmovdqu %ymm0, (%rdi)
- vmovdqu %ymm1, 0x20(%rdi)
- vmovdqu %ymm2, -0x40(%r9)
- vmovdqu %ymm3, -0x20(%r9)
- ret
-
-L(less_64bytes):
- cmp $32, %dl
- jb L(less_32bytes)
- vmovdqu (%rsi), %ymm0
- vmovdqu -0x20(%rcx), %ymm1
- vmovdqu %ymm0, (%rdi)
- vmovdqu %ymm1, -0x20(%r9)
- ret
-
-L(less_32bytes):
- vmovdqu (%rsi), %xmm0
- vmovdqu -0x10(%rcx), %xmm1
- vmovdqu %xmm0, (%rdi)
- vmovdqu %xmm1, -0x10(%r9)
- ret
-
-L(less_16bytes):
- cmp $8, %dl
- jb L(less_8bytes)
- movq (%rsi), %rsi
- movq -0x8(%rcx), %rcx
- movq %rsi, (%rdi)
- movq %rcx, -0x8(%r9)
- ret
-
-L(less_8bytes):
- cmp $4, %dl
- jb L(less_4bytes)
- mov (%rsi), %esi
- mov -0x4(%rcx), %ecx
- mov %esi, (%rdi)
- mov %ecx, -0x4(%r9)
- ret
-
-L(less_4bytes):
- cmp $2, %dl
- jb L(less_2bytes)
- mov (%rsi), %si
- mov -0x2(%rcx), %cx
- mov %si, (%rdi)
- mov %cx, -0x2(%r9)
- ret
-
-L(less_2bytes):
- cmp $1, %dl
- jb L(less_1bytes)
- mov (%rsi), %cl
- mov %cl, (%rdi)
-L(less_1bytes):
- ret
-
-L(512bytesormore):
-#ifdef SHARED_CACHE_SIZE_HALF
- mov $SHARED_CACHE_SIZE_HALF, %r8
-#else
- mov __x86_shared_cache_size_half(%rip), %r8
-#endif
- cmp %r8, %rdx
- jae L(preloop_large)
- cmp $1024, %rdx
- ja L(1024bytesormore)
- prefetcht1 (%rsi)
- prefetcht1 0x40(%rsi)
- prefetcht1 0x80(%rsi)
- prefetcht1 0xC0(%rsi)
- prefetcht1 0x100(%rsi)
- prefetcht1 0x140(%rsi)
- prefetcht1 0x180(%rsi)
- prefetcht1 0x1C0(%rsi)
- prefetcht1 -0x200(%rcx)
- prefetcht1 -0x1C0(%rcx)
- prefetcht1 -0x180(%rcx)
- prefetcht1 -0x140(%rcx)
- prefetcht1 -0x100(%rcx)
- prefetcht1 -0xC0(%rcx)
- prefetcht1 -0x80(%rcx)
- prefetcht1 -0x40(%rcx)
- vmovups (%rsi), %zmm0
- vmovups 0x40(%rsi), %zmm1
- vmovups 0x80(%rsi), %zmm2
- vmovups 0xC0(%rsi), %zmm3
- vmovups 0x100(%rsi), %zmm4
- vmovups 0x140(%rsi), %zmm5
- vmovups 0x180(%rsi), %zmm6
- vmovups 0x1C0(%rsi), %zmm7
- vmovups -0x200(%rcx), %zmm8
- vmovups -0x1C0(%rcx), %zmm9
- vmovups -0x180(%rcx), %zmm10
- vmovups -0x140(%rcx), %zmm11
- vmovups -0x100(%rcx), %zmm12
- vmovups -0xC0(%rcx), %zmm13
- vmovups -0x80(%rcx), %zmm14
- vmovups -0x40(%rcx), %zmm15
- vmovups %zmm0, (%rdi)
- vmovups %zmm1, 0x40(%rdi)
- vmovups %zmm2, 0x80(%rdi)
- vmovups %zmm3, 0xC0(%rdi)
- vmovups %zmm4, 0x100(%rdi)
- vmovups %zmm5, 0x140(%rdi)
- vmovups %zmm6, 0x180(%rdi)
- vmovups %zmm7, 0x1C0(%rdi)
- vmovups %zmm8, -0x200(%r9)
- vmovups %zmm9, -0x1C0(%r9)
- vmovups %zmm10, -0x180(%r9)
- vmovups %zmm11, -0x140(%r9)
- vmovups %zmm12, -0x100(%r9)
- vmovups %zmm13, -0xC0(%r9)
- vmovups %zmm14, -0x80(%r9)
- vmovups %zmm15, -0x40(%r9)
- ret
-
-L(1024bytesormore):
- cmp %rsi, %rdi
- ja L(1024bytesormore_bkw)
- sub $512, %r9
- vmovups -0x200(%rcx), %zmm8
- vmovups -0x1C0(%rcx), %zmm9
- vmovups -0x180(%rcx), %zmm10
- vmovups -0x140(%rcx), %zmm11
- vmovups -0x100(%rcx), %zmm12
- vmovups -0xC0(%rcx), %zmm13
- vmovups -0x80(%rcx), %zmm14
- vmovups -0x40(%rcx), %zmm15
- prefetcht1 (%rsi)
- prefetcht1 0x40(%rsi)
- prefetcht1 0x80(%rsi)
- prefetcht1 0xC0(%rsi)
- prefetcht1 0x100(%rsi)
- prefetcht1 0x140(%rsi)
- prefetcht1 0x180(%rsi)
- prefetcht1 0x1C0(%rsi)
-
-/* Loop with unaligned memory access. */
-L(gobble_512bytes_loop):
- vmovups (%rsi), %zmm0
- vmovups 0x40(%rsi), %zmm1
- vmovups 0x80(%rsi), %zmm2
- vmovups 0xC0(%rsi), %zmm3
- vmovups 0x100(%rsi), %zmm4
- vmovups 0x140(%rsi), %zmm5
- vmovups 0x180(%rsi), %zmm6
- vmovups 0x1C0(%rsi), %zmm7
- add $512, %rsi
- prefetcht1 (%rsi)
- prefetcht1 0x40(%rsi)
- prefetcht1 0x80(%rsi)
- prefetcht1 0xC0(%rsi)
- prefetcht1 0x100(%rsi)
- prefetcht1 0x140(%rsi)
- prefetcht1 0x180(%rsi)
- prefetcht1 0x1C0(%rsi)
- vmovups %zmm0, (%rdi)
- vmovups %zmm1, 0x40(%rdi)
- vmovups %zmm2, 0x80(%rdi)
- vmovups %zmm3, 0xC0(%rdi)
- vmovups %zmm4, 0x100(%rdi)
- vmovups %zmm5, 0x140(%rdi)
- vmovups %zmm6, 0x180(%rdi)
- vmovups %zmm7, 0x1C0(%rdi)
- add $512, %rdi
- cmp %r9, %rdi
- jb L(gobble_512bytes_loop)
- vmovups %zmm8, (%r9)
- vmovups %zmm9, 0x40(%r9)
- vmovups %zmm10, 0x80(%r9)
- vmovups %zmm11, 0xC0(%r9)
- vmovups %zmm12, 0x100(%r9)
- vmovups %zmm13, 0x140(%r9)
- vmovups %zmm14, 0x180(%r9)
- vmovups %zmm15, 0x1C0(%r9)
- ret
-
-L(1024bytesormore_bkw):
- add $512, %rdi
- vmovups 0x1C0(%rsi), %zmm8
- vmovups 0x180(%rsi), %zmm9
- vmovups 0x140(%rsi), %zmm10
- vmovups 0x100(%rsi), %zmm11
- vmovups 0xC0(%rsi), %zmm12
- vmovups 0x80(%rsi), %zmm13
- vmovups 0x40(%rsi), %zmm14
- vmovups (%rsi), %zmm15
- prefetcht1 -0x40(%rcx)
- prefetcht1 -0x80(%rcx)
- prefetcht1 -0xC0(%rcx)
- prefetcht1 -0x100(%rcx)
- prefetcht1 -0x140(%rcx)
- prefetcht1 -0x180(%rcx)
- prefetcht1 -0x1C0(%rcx)
- prefetcht1 -0x200(%rcx)
-
-/* Backward loop with unaligned memory access. */
-L(gobble_512bytes_loop_bkw):
- vmovups -0x40(%rcx), %zmm0
- vmovups -0x80(%rcx), %zmm1
- vmovups -0xC0(%rcx), %zmm2
- vmovups -0x100(%rcx), %zmm3
- vmovups -0x140(%rcx), %zmm4
- vmovups -0x180(%rcx), %zmm5
- vmovups -0x1C0(%rcx), %zmm6
- vmovups -0x200(%rcx), %zmm7
- sub $512, %rcx
- prefetcht1 -0x40(%rcx)
- prefetcht1 -0x80(%rcx)
- prefetcht1 -0xC0(%rcx)
- prefetcht1 -0x100(%rcx)
- prefetcht1 -0x140(%rcx)
- prefetcht1 -0x180(%rcx)
- prefetcht1 -0x1C0(%rcx)
- prefetcht1 -0x200(%rcx)
- vmovups %zmm0, -0x40(%r9)
- vmovups %zmm1, -0x80(%r9)
- vmovups %zmm2, -0xC0(%r9)
- vmovups %zmm3, -0x100(%r9)
- vmovups %zmm4, -0x140(%r9)
- vmovups %zmm5, -0x180(%r9)
- vmovups %zmm6, -0x1C0(%r9)
- vmovups %zmm7, -0x200(%r9)
- sub $512, %r9
- cmp %rdi, %r9
- ja L(gobble_512bytes_loop_bkw)
- vmovups %zmm8, -0x40(%rdi)
- vmovups %zmm9, -0x80(%rdi)
- vmovups %zmm10, -0xC0(%rdi)
- vmovups %zmm11, -0x100(%rdi)
- vmovups %zmm12, -0x140(%rdi)
- vmovups %zmm13, -0x180(%rdi)
- vmovups %zmm14, -0x1C0(%rdi)
- vmovups %zmm15, -0x200(%rdi)
- ret
-
-L(preloop_large):
- cmp %rsi, %rdi
- ja L(preloop_large_bkw)
- vmovups (%rsi), %zmm4
- vmovups 0x40(%rsi), %zmm5
-
-/* Align destination for access with non-temporal stores in the loop. */
- mov %rdi, %r8
- and $-0x80, %rdi
- add $0x80, %rdi
- sub %rdi, %r8
- sub %r8, %rsi
- add %r8, %rdx
-L(gobble_256bytes_nt_loop):
- prefetcht1 0x200(%rsi)
- prefetcht1 0x240(%rsi)
- prefetcht1 0x280(%rsi)
- prefetcht1 0x2C0(%rsi)
- prefetcht1 0x300(%rsi)
- prefetcht1 0x340(%rsi)
- prefetcht1 0x380(%rsi)
- prefetcht1 0x3C0(%rsi)
- vmovdqu64 (%rsi), %zmm0
- vmovdqu64 0x40(%rsi), %zmm1
- vmovdqu64 0x80(%rsi), %zmm2
- vmovdqu64 0xC0(%rsi), %zmm3
- vmovntdq %zmm0, (%rdi)
- vmovntdq %zmm1, 0x40(%rdi)
- vmovntdq %zmm2, 0x80(%rdi)
- vmovntdq %zmm3, 0xC0(%rdi)
- sub $256, %rdx
- add $256, %rsi
- add $256, %rdi
- cmp $256, %rdx
- ja L(gobble_256bytes_nt_loop)
- sfence
- vmovups %zmm4, (%rax)
- vmovups %zmm5, 0x40(%rax)
- jmp L(check)
-
-L(preloop_large_bkw):
- vmovups -0x80(%rcx), %zmm4
- vmovups -0x40(%rcx), %zmm5
-
-/* Align end of destination for access with non-temporal stores. */
- mov %r9, %r8
- and $-0x80, %r9
- sub %r9, %r8
- sub %r8, %rcx
- sub %r8, %rdx
- add %r9, %r8
-L(gobble_256bytes_nt_loop_bkw):
- prefetcht1 -0x400(%rcx)
- prefetcht1 -0x3C0(%rcx)
- prefetcht1 -0x380(%rcx)
- prefetcht1 -0x340(%rcx)
- prefetcht1 -0x300(%rcx)
- prefetcht1 -0x2C0(%rcx)
- prefetcht1 -0x280(%rcx)
- prefetcht1 -0x240(%rcx)
- vmovdqu64 -0x100(%rcx), %zmm0
- vmovdqu64 -0xC0(%rcx), %zmm1
- vmovdqu64 -0x80(%rcx), %zmm2
- vmovdqu64 -0x40(%rcx), %zmm3
- vmovntdq %zmm0, -0x100(%r9)
- vmovntdq %zmm1, -0xC0(%r9)
- vmovntdq %zmm2, -0x80(%r9)
- vmovntdq %zmm3, -0x40(%r9)
- sub $256, %rdx
- sub $256, %rcx
- sub $256, %r9
- cmp $256, %rdx
- ja L(gobble_256bytes_nt_loop_bkw)
- sfence
- vmovups %zmm4, -0x80(%r8)
- vmovups %zmm5, -0x40(%r8)
- jmp L(check)
-END (MEMCPY)
-#endif
diff --git a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
deleted file mode 100644
index c4509831fa..0000000000
--- a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
+++ /dev/null
@@ -1,175 +0,0 @@
-/* memcpy with unaliged loads
- Copyright (C) 2013-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-
-#include <sysdep.h>
-
-#include "asm-syntax.h"
-
-
-ENTRY(__memcpy_sse2_unaligned)
- movq %rsi, %rax
- leaq (%rdx,%rdx), %rcx
- subq %rdi, %rax
- subq %rdx, %rax
- cmpq %rcx, %rax
- jb L(overlapping)
- cmpq $16, %rdx
- jbe L(less_16)
- movdqu (%rsi), %xmm8
- cmpq $32, %rdx
- movdqu %xmm8, (%rdi)
- movdqu -16(%rsi,%rdx), %xmm8
- movdqu %xmm8, -16(%rdi,%rdx)
- ja .L31
-L(return):
- movq %rdi, %rax
- ret
- .p2align 4,,10
- .p2align 4
-.L31:
- movdqu 16(%rsi), %xmm8
- cmpq $64, %rdx
- movdqu %xmm8, 16(%rdi)
- movdqu -32(%rsi,%rdx), %xmm8
- movdqu %xmm8, -32(%rdi,%rdx)
- jbe L(return)
- movdqu 32(%rsi), %xmm8
- cmpq $128, %rdx
- movdqu %xmm8, 32(%rdi)
- movdqu -48(%rsi,%rdx), %xmm8
- movdqu %xmm8, -48(%rdi,%rdx)
- movdqu 48(%rsi), %xmm8
- movdqu %xmm8, 48(%rdi)
- movdqu -64(%rsi,%rdx), %xmm8
- movdqu %xmm8, -64(%rdi,%rdx)
- jbe L(return)
- leaq 64(%rdi), %rcx
- addq %rdi, %rdx
- andq $-64, %rdx
- andq $-64, %rcx
- movq %rcx, %rax
- subq %rdi, %rax
- addq %rax, %rsi
- cmpq %rdx, %rcx
- je L(return)
- movq %rsi, %r10
- subq %rcx, %r10
- leaq 16(%r10), %r9
- leaq 32(%r10), %r8
- leaq 48(%r10), %rax
- .p2align 4,,10
- .p2align 4
-L(loop):
- movdqu (%rcx,%r10), %xmm8
- movdqa %xmm8, (%rcx)
- movdqu (%rcx,%r9), %xmm8
- movdqa %xmm8, 16(%rcx)
- movdqu (%rcx,%r8), %xmm8
- movdqa %xmm8, 32(%rcx)
- movdqu (%rcx,%rax), %xmm8
- movdqa %xmm8, 48(%rcx)
- addq $64, %rcx
- cmpq %rcx, %rdx
- jne L(loop)
- jmp L(return)
-L(overlapping):
- cmpq %rsi, %rdi
- jae .L3
- testq %rdx, %rdx
- .p2align 4,,5
- je L(return)
- movq %rdx, %r9
- leaq 16(%rsi), %rcx
- leaq 16(%rdi), %r8
- shrq $4, %r9
- movq %r9, %rax
- salq $4, %rax
- cmpq %rcx, %rdi
- setae %cl
- cmpq %r8, %rsi
- setae %r8b
- orl %r8d, %ecx
- cmpq $15, %rdx
- seta %r8b
- testb %r8b, %cl
- je .L16
- testq %rax, %rax
- je .L16
- xorl %ecx, %ecx
- xorl %r8d, %r8d
-.L7:
- movdqu (%rsi,%rcx), %xmm8
- addq $1, %r8
- movdqu %xmm8, (%rdi,%rcx)
- addq $16, %rcx
- cmpq %r8, %r9
- ja .L7
- cmpq %rax, %rdx
- je L(return)
-.L21:
- movzbl (%rsi,%rax), %ecx
- movb %cl, (%rdi,%rax)
- addq $1, %rax
- cmpq %rax, %rdx
- ja .L21
- jmp L(return)
-L(less_16):
- testb $24, %dl
- jne L(between_9_16)
- testb $4, %dl
- .p2align 4,,5
- jne L(between_5_8)
- testq %rdx, %rdx
- .p2align 4,,2
- je L(return)
- movzbl (%rsi), %eax
- testb $2, %dl
- movb %al, (%rdi)
- je L(return)
- movzwl -2(%rsi,%rdx), %eax
- movw %ax, -2(%rdi,%rdx)
- jmp L(return)
-.L3:
- leaq -1(%rdx), %rax
- .p2align 4,,10
- .p2align 4
-.L11:
- movzbl (%rsi,%rax), %edx
- movb %dl, (%rdi,%rax)
- subq $1, %rax
- jmp .L11
-L(between_9_16):
- movq (%rsi), %rax
- movq %rax, (%rdi)
- movq -8(%rsi,%rdx), %rax
- movq %rax, -8(%rdi,%rdx)
- jmp L(return)
-.L16:
- xorl %eax, %eax
- jmp .L21
-L(between_5_8):
- movl (%rsi), %eax
- movl %eax, (%rdi)
- movl -4(%rsi,%rdx), %eax
- movl %eax, -4(%rdi,%rdx)
- jmp L(return)
-END(__memcpy_sse2_unaligned)
-
-#endif
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
index 08b41e9e5a..3cd1123326 100644
--- a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
@@ -1,5 +1,5 @@
/* memcpy with SSSE3 and REP string
- Copyright (C) 2010-2016 Free Software Foundation, Inc.
+ Copyright (C) 2010-2018 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -19,16 +19,15 @@
#include <sysdep.h>
-#if IS_IN (libc) \
- && (defined SHARED \
- || defined USE_AS_MEMMOVE \
- || !defined USE_MULTIARCH)
+#if IS_IN (libc)
#include "asm-syntax.h"
#ifndef MEMCPY
# define MEMCPY __memcpy_ssse3_back
# define MEMCPY_CHK __memcpy_chk_ssse3_back
+# define MEMPCPY __mempcpy_ssse3_back
+# define MEMPCPY_CHK __mempcpy_chk_ssse3_back
#endif
#define JMPTBL(I, B) I - B
@@ -40,10 +39,23 @@
lea TABLE(%rip), %r11; \
movslq (%r11, INDEX, SCALE), INDEX; \
lea (%r11, INDEX), INDEX; \
- jmp *INDEX; \
+ _CET_NOTRACK jmp *INDEX; \
ud2
.section .text.ssse3,"ax",@progbits
+#if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE
+ENTRY (MEMPCPY_CHK)
+ cmpq %rdx, %rcx
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMPCPY_CHK)
+
+ENTRY (MEMPCPY)
+ movq %rdi, %rax
+ addq %rdx, %rax
+ jmp L(start)
+END (MEMPCPY)
+#endif
+
#if !defined USE_AS_BCOPY
ENTRY (MEMCPY_CHK)
cmpq %rdx, %rcx
@@ -66,6 +78,7 @@ ENTRY (MEMCPY)
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
L(copy_forward):
#endif
+L(start):
cmp $144, %rdx
jae L(144bytesormore)
@@ -112,7 +125,7 @@ L(144bytesormore):
sub $0x80, %rdx
movslq (%r11, %r9, 4), %r9
add %r11, %r9
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
.p2align 4
@@ -142,7 +155,7 @@ L(copy_backward):
sub $0x80, %rdx
movslq (%r11, %r9, 4), %r9
add %r11, %r9
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
.p2align 4
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3.S b/sysdeps/x86_64/multiarch/memcpy-ssse3.S
index 95de9695f9..0240bfa309 100644
--- a/sysdeps/x86_64/multiarch/memcpy-ssse3.S
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3.S
@@ -1,5 +1,5 @@
/* memcpy with SSSE3
- Copyright (C) 2010-2016 Free Software Foundation, Inc.
+ Copyright (C) 2010-2018 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -19,16 +19,15 @@
#include <sysdep.h>
-#if IS_IN (libc) \
- && (defined SHARED \
- || defined USE_AS_MEMMOVE \
- || !defined USE_MULTIARCH)
+#if IS_IN (libc)
#include "asm-syntax.h"
#ifndef MEMCPY
# define MEMCPY __memcpy_ssse3
# define MEMCPY_CHK __memcpy_chk_ssse3
+# define MEMPCPY __mempcpy_ssse3
+# define MEMPCPY_CHK __mempcpy_chk_ssse3
#endif
#define JMPTBL(I, B) I - B
@@ -40,10 +39,23 @@
lea TABLE(%rip), %r11; \
movslq (%r11, INDEX, SCALE), INDEX; \
lea (%r11, INDEX), INDEX; \
- jmp *INDEX; \
+ _CET_NOTRACK jmp *INDEX; \
ud2
.section .text.ssse3,"ax",@progbits
+#if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE
+ENTRY (MEMPCPY_CHK)
+ cmpq %rdx, %rcx
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMPCPY_CHK)
+
+ENTRY (MEMPCPY)
+ movq %rdi, %rax
+ addq %rdx, %rax
+ jmp L(start)
+END (MEMPCPY)
+#endif
+
#if !defined USE_AS_BCOPY
ENTRY (MEMCPY_CHK)
cmpq %rdx, %rcx
@@ -66,6 +78,7 @@ ENTRY (MEMCPY)
jmp L(copy_backward)
L(copy_forward):
#endif
+L(start):
cmp $79, %rdx
lea L(table_less_80bytes)(%rip), %r11
ja L(80bytesormore)
@@ -73,7 +86,7 @@ L(copy_forward):
add %rdx, %rsi
add %rdx, %rdi
add %r11, %r9
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
.p2align 4
@@ -428,7 +441,7 @@ L(shl_1):
lea (L(shl_1_loop_L2)-L(shl_1_loop_L1))(%r9), %r9
L(L1_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_1_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -451,7 +464,7 @@ L(shl_1_loop_L1):
jb L(shl_1_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_1_end):
movaps %xmm4, -0x20(%rdi)
@@ -471,7 +484,7 @@ L(shl_1_bwd):
lea (L(shl_1_bwd_loop_L2)-L(shl_1_bwd_loop_L1))(%r9), %r9
L(L1_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_1_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -496,7 +509,7 @@ L(shl_1_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_1_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_1_bwd_end):
movaps %xmm4, (%rdi)
@@ -513,7 +526,7 @@ L(shl_2):
lea (L(shl_2_loop_L2)-L(shl_2_loop_L1))(%r9), %r9
L(L2_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_2_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -536,7 +549,7 @@ L(shl_2_loop_L1):
jb L(shl_2_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_2_end):
movaps %xmm4, -0x20(%rdi)
@@ -556,7 +569,7 @@ L(shl_2_bwd):
lea (L(shl_2_bwd_loop_L2)-L(shl_2_bwd_loop_L1))(%r9), %r9
L(L2_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_2_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -581,7 +594,7 @@ L(shl_2_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_2_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_2_bwd_end):
movaps %xmm4, (%rdi)
@@ -598,7 +611,7 @@ L(shl_3):
lea (L(shl_3_loop_L2)-L(shl_3_loop_L1))(%r9), %r9
L(L3_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_3_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -621,7 +634,7 @@ L(shl_3_loop_L1):
jb L(shl_3_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_3_end):
movaps %xmm4, -0x20(%rdi)
@@ -641,7 +654,7 @@ L(shl_3_bwd):
lea (L(shl_3_bwd_loop_L2)-L(shl_3_bwd_loop_L1))(%r9), %r9
L(L3_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_3_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -666,7 +679,7 @@ L(shl_3_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_3_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_3_bwd_end):
movaps %xmm4, (%rdi)
@@ -683,7 +696,7 @@ L(shl_4):
lea (L(shl_4_loop_L2)-L(shl_4_loop_L1))(%r9), %r9
L(L4_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_4_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -706,7 +719,7 @@ L(shl_4_loop_L1):
jb L(shl_4_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_4_end):
movaps %xmm4, -0x20(%rdi)
@@ -726,7 +739,7 @@ L(shl_4_bwd):
lea (L(shl_4_bwd_loop_L2)-L(shl_4_bwd_loop_L1))(%r9), %r9
L(L4_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_4_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -751,7 +764,7 @@ L(shl_4_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_4_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_4_bwd_end):
movaps %xmm4, (%rdi)
@@ -768,7 +781,7 @@ L(shl_5):
lea (L(shl_5_loop_L2)-L(shl_5_loop_L1))(%r9), %r9
L(L5_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_5_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -791,7 +804,7 @@ L(shl_5_loop_L1):
jb L(shl_5_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_5_end):
movaps %xmm4, -0x20(%rdi)
@@ -811,7 +824,7 @@ L(shl_5_bwd):
lea (L(shl_5_bwd_loop_L2)-L(shl_5_bwd_loop_L1))(%r9), %r9
L(L5_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_5_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -836,7 +849,7 @@ L(shl_5_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_5_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_5_bwd_end):
movaps %xmm4, (%rdi)
@@ -853,7 +866,7 @@ L(shl_6):
lea (L(shl_6_loop_L2)-L(shl_6_loop_L1))(%r9), %r9
L(L6_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_6_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -876,7 +889,7 @@ L(shl_6_loop_L1):
jb L(shl_6_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_6_end):
movaps %xmm4, -0x20(%rdi)
@@ -896,7 +909,7 @@ L(shl_6_bwd):
lea (L(shl_6_bwd_loop_L2)-L(shl_6_bwd_loop_L1))(%r9), %r9
L(L6_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_6_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -921,7 +934,7 @@ L(shl_6_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_6_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_6_bwd_end):
movaps %xmm4, (%rdi)
@@ -938,7 +951,7 @@ L(shl_7):
lea (L(shl_7_loop_L2)-L(shl_7_loop_L1))(%r9), %r9
L(L7_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_7_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -961,7 +974,7 @@ L(shl_7_loop_L1):
jb L(shl_7_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_7_end):
movaps %xmm4, -0x20(%rdi)
@@ -981,7 +994,7 @@ L(shl_7_bwd):
lea (L(shl_7_bwd_loop_L2)-L(shl_7_bwd_loop_L1))(%r9), %r9
L(L7_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_7_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -1006,7 +1019,7 @@ L(shl_7_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_7_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_7_bwd_end):
movaps %xmm4, (%rdi)
@@ -1023,7 +1036,7 @@ L(shl_8):
lea (L(shl_8_loop_L2)-L(shl_8_loop_L1))(%r9), %r9
L(L8_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
L(shl_8_loop_L2):
prefetchnta 0x1c0(%rsi)
L(shl_8_loop_L1):
@@ -1045,7 +1058,7 @@ L(shl_8_loop_L1):
jb L(shl_8_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
.p2align 4
L(shl_8_end):
@@ -1066,7 +1079,7 @@ L(shl_8_bwd):
lea (L(shl_8_bwd_loop_L2)-L(shl_8_bwd_loop_L1))(%r9), %r9
L(L8_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_8_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -1091,7 +1104,7 @@ L(shl_8_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_8_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_8_bwd_end):
movaps %xmm4, (%rdi)
@@ -1108,7 +1121,7 @@ L(shl_9):
lea (L(shl_9_loop_L2)-L(shl_9_loop_L1))(%r9), %r9
L(L9_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_9_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -1131,7 +1144,7 @@ L(shl_9_loop_L1):
jb L(shl_9_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_9_end):
movaps %xmm4, -0x20(%rdi)
@@ -1151,7 +1164,7 @@ L(shl_9_bwd):
lea (L(shl_9_bwd_loop_L2)-L(shl_9_bwd_loop_L1))(%r9), %r9
L(L9_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_9_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -1176,7 +1189,7 @@ L(shl_9_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_9_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_9_bwd_end):
movaps %xmm4, (%rdi)
@@ -1193,7 +1206,7 @@ L(shl_10):
lea (L(shl_10_loop_L2)-L(shl_10_loop_L1))(%r9), %r9
L(L10_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_10_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -1216,7 +1229,7 @@ L(shl_10_loop_L1):
jb L(shl_10_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_10_end):
movaps %xmm4, -0x20(%rdi)
@@ -1236,7 +1249,7 @@ L(shl_10_bwd):
lea (L(shl_10_bwd_loop_L2)-L(shl_10_bwd_loop_L1))(%r9), %r9
L(L10_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_10_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -1261,7 +1274,7 @@ L(shl_10_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_10_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_10_bwd_end):
movaps %xmm4, (%rdi)
@@ -1278,7 +1291,7 @@ L(shl_11):
lea (L(shl_11_loop_L2)-L(shl_11_loop_L1))(%r9), %r9
L(L11_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_11_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -1301,7 +1314,7 @@ L(shl_11_loop_L1):
jb L(shl_11_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_11_end):
movaps %xmm4, -0x20(%rdi)
@@ -1321,7 +1334,7 @@ L(shl_11_bwd):
lea (L(shl_11_bwd_loop_L2)-L(shl_11_bwd_loop_L1))(%r9), %r9
L(L11_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_11_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -1346,7 +1359,7 @@ L(shl_11_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_11_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_11_bwd_end):
movaps %xmm4, (%rdi)
@@ -1363,7 +1376,7 @@ L(shl_12):
lea (L(shl_12_loop_L2)-L(shl_12_loop_L1))(%r9), %r9
L(L12_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_12_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -1386,7 +1399,7 @@ L(shl_12_loop_L1):
jb L(shl_12_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_12_end):
movaps %xmm4, -0x20(%rdi)
@@ -1406,7 +1419,7 @@ L(shl_12_bwd):
lea (L(shl_12_bwd_loop_L2)-L(shl_12_bwd_loop_L1))(%r9), %r9
L(L12_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_12_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -1431,7 +1444,7 @@ L(shl_12_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_12_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_12_bwd_end):
movaps %xmm4, (%rdi)
@@ -1448,7 +1461,7 @@ L(shl_13):
lea (L(shl_13_loop_L2)-L(shl_13_loop_L1))(%r9), %r9
L(L13_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_13_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -1471,7 +1484,7 @@ L(shl_13_loop_L1):
jb L(shl_13_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_13_end):
movaps %xmm4, -0x20(%rdi)
@@ -1491,7 +1504,7 @@ L(shl_13_bwd):
lea (L(shl_13_bwd_loop_L2)-L(shl_13_bwd_loop_L1))(%r9), %r9
L(L13_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_13_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -1516,7 +1529,7 @@ L(shl_13_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_13_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_13_bwd_end):
movaps %xmm4, (%rdi)
@@ -1533,7 +1546,7 @@ L(shl_14):
lea (L(shl_14_loop_L2)-L(shl_14_loop_L1))(%r9), %r9
L(L14_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_14_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -1556,7 +1569,7 @@ L(shl_14_loop_L1):
jb L(shl_14_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_14_end):
movaps %xmm4, -0x20(%rdi)
@@ -1576,7 +1589,7 @@ L(shl_14_bwd):
lea (L(shl_14_bwd_loop_L2)-L(shl_14_bwd_loop_L1))(%r9), %r9
L(L14_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_14_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -1601,7 +1614,7 @@ L(shl_14_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_14_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_14_bwd_end):
movaps %xmm4, (%rdi)
@@ -1618,7 +1631,7 @@ L(shl_15):
lea (L(shl_15_loop_L2)-L(shl_15_loop_L1))(%r9), %r9
L(L15_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_15_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -1641,7 +1654,7 @@ L(shl_15_loop_L1):
jb L(shl_15_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_15_end):
movaps %xmm4, -0x20(%rdi)
@@ -1661,7 +1674,7 @@ L(shl_15_bwd):
lea (L(shl_15_bwd_loop_L2)-L(shl_15_bwd_loop_L1))(%r9), %r9
L(L15_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_15_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -1686,7 +1699,7 @@ L(shl_15_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_15_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_15_bwd_end):
movaps %xmm4, (%rdi)
diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S
deleted file mode 100644
index 64a1bcd137..0000000000
--- a/sysdeps/x86_64/multiarch/memcpy.S
+++ /dev/null
@@ -1,89 +0,0 @@
-/* Multiple versions of memcpy
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2016 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <shlib-compat.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib and for
- DSO. In static binaries we need memcpy before the initialization
- happened. */
-#if defined SHARED && IS_IN (libc)
- .text
-ENTRY(__new_memcpy)
- .type __new_memcpy, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
-#ifdef HAVE_AVX512_ASM_SUPPORT
- HAS_ARCH_FEATURE (AVX512F_Usable)
- jz 1f
- HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
- jz 1f
- leaq __memcpy_avx512_no_vzeroupper(%rip), %rax
- ret
-#endif
-1: leaq __memcpy_avx_unaligned(%rip), %rax
- HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
- jz 2f
- ret
-2: leaq __memcpy_sse2(%rip), %rax
- HAS_ARCH_FEATURE (Slow_BSF)
- jnz 3f
- leaq __memcpy_sse2_unaligned(%rip), %rax
- ret
-3: HAS_CPU_FEATURE (SSSE3)
- jz 4f
- leaq __memcpy_ssse3(%rip), %rax
-4: ret
-END(__new_memcpy)
-
-# undef ENTRY
-# define ENTRY(name) \
- .type __memcpy_sse2, @function; \
- .globl __memcpy_sse2; \
- .hidden __memcpy_sse2; \
- .p2align 4; \
- __memcpy_sse2: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size __memcpy_sse2, .-__memcpy_sse2
-
-# undef ENTRY_CHK
-# define ENTRY_CHK(name) \
- .type __memcpy_chk_sse2, @function; \
- .globl __memcpy_chk_sse2; \
- .p2align 4; \
- __memcpy_chk_sse2: cfi_startproc; \
- CALL_MCOUNT
-# undef END_CHK
-# define END_CHK(name) \
- cfi_endproc; .size __memcpy_chk_sse2, .-__memcpy_chk_sse2
-
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal memcpy calls through a PLT.
- The speedup we get from using SSSE3 instruction is likely eaten away
- by the indirect call in the PLT. */
-# define libc_hidden_builtin_def(name) \
- .globl __GI_memcpy; __GI_memcpy = __memcpy_sse2
-
-versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14);
-#endif
-
-#include "../memcpy.S"
diff --git a/sysdeps/x86_64/multiarch/memcpy.c b/sysdeps/x86_64/multiarch/memcpy.c
new file mode 100644
index 0000000000..419f76aefc
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcpy.c
@@ -0,0 +1,39 @@
+/* Multiple versions of memcpy.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define memcpy __redirect_memcpy
+# include <string.h>
+# undef memcpy
+
+# define SYMBOL_NAME memcpy
+# include "ifunc-memmove.h"
+
+libc_ifunc_redirected (__redirect_memcpy, __new_memcpy,
+ IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (__new_memcpy, __GI_memcpy, __redirect_memcpy)
+ __attribute__ ((visibility ("hidden")));
+# endif
+
+# include <shlib-compat.h>
+versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14);
+#endif
diff --git a/sysdeps/x86_64/multiarch/memcpy_chk-nonshared.S b/sysdeps/x86_64/multiarch/memcpy_chk-nonshared.S
new file mode 100644
index 0000000000..84c8842ce7
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcpy_chk-nonshared.S
@@ -0,0 +1,21 @@
+/* Non-shared version of memcpy_chk for x86-64.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc) && !defined SHARED
+# include <sysdeps/x86_64/memcpy_chk.S>
+#endif
diff --git a/sysdeps/x86_64/multiarch/memcpy_chk.S b/sysdeps/x86_64/multiarch/memcpy_chk.S
deleted file mode 100644
index 648217e971..0000000000
--- a/sysdeps/x86_64/multiarch/memcpy_chk.S
+++ /dev/null
@@ -1,56 +0,0 @@
-/* Multiple versions of __memcpy_chk
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2016 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib and for
- DSO. There are no multiarch memcpy functions for static binaries.
- */
-#if IS_IN (libc)
-# ifdef SHARED
- .text
-ENTRY(__memcpy_chk)
- .type __memcpy_chk, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
-#ifdef HAVE_AVX512_ASM_SUPPORT
- HAS_ARCH_FEATURE (AVX512F_Usable)
- jz 1f
- HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
- jz 1f
- leaq __memcpy_chk_avx512_no_vzeroupper(%rip), %rax
- ret
-#endif
-1: leaq __memcpy_chk_sse2(%rip), %rax
- HAS_CPU_FEATURE (SSSE3)
- jz 2f
- leaq __memcpy_chk_ssse3(%rip), %rax
- HAS_ARCH_FEATURE (Fast_Copy_Backward)
- jz 2f
- leaq __memcpy_chk_ssse3_back(%rip), %rax
- HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
- jz 2f
- leaq __memcpy_chk_avx_unaligned(%rip), %rax
-2: ret
-END(__memcpy_chk)
-# else
-# include "../memcpy_chk.S"
-# endif
-#endif
diff --git a/sysdeps/x86_64/multiarch/memcpy_chk.c b/sysdeps/x86_64/multiarch/memcpy_chk.c
new file mode 100644
index 0000000000..c9b901a6dd
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcpy_chk.c
@@ -0,0 +1,31 @@
+/* Multiple versions of __memcpy_chk
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc.so. */
+#if IS_IN (libc) && defined SHARED
+# define __memcpy_chk __redirect_memcpy_chk
+# include <string.h>
+# undef __memcpy_chk
+
+# define SYMBOL_NAME memcpy_chk
+# include "ifunc-memmove.h"
+
+libc_ifunc_redirected (__redirect_memcpy_chk, __memcpy_chk,
+ IFUNC_SELECTOR ());
+#endif
diff --git a/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S
new file mode 100644
index 0000000000..e195e93f15
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S
@@ -0,0 +1,12 @@
+#if IS_IN (libc)
+# define VEC_SIZE 32
+# define VEC(i) ymm##i
+# define VMOVNT vmovntdq
+# define VMOVU vmovdqu
+# define VMOVA vmovdqa
+
+# define SECTION(p) p##.avx
+# define MEMMOVE_SYMBOL(p,s) p##_avx_##s
+
+# include "memmove-vec-unaligned-erms.S"
+#endif
diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S
index 518d1fec35..effc3ac2de 100644
--- a/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S
+++ b/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S
@@ -1,5 +1,5 @@
-/* memmove optimized with AVX512 for KNL hardware.
- Copyright (C) 2016 Free Software Foundation, Inc.
+/* memmove/memcpy/mempcpy optimized with AVX512 for KNL hardware.
+ Copyright (C) 2016-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,7 +16,400 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#define USE_AS_MEMMOVE
-#define MEMCPY __memmove_avx512_no_vzeroupper
-#define MEMCPY_CHK __memmove_chk_avx512_no_vzeroupper
-#include "memcpy-avx512-no-vzeroupper.S"
+#include <sysdep.h>
+
+#if IS_IN (libc)
+
+# include "asm-syntax.h"
+
+ .section .text.avx512,"ax",@progbits
+ENTRY (__mempcpy_chk_avx512_no_vzeroupper)
+ cmpq %rdx, %rcx
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (__mempcpy_chk_avx512_no_vzeroupper)
+
+ENTRY (__mempcpy_avx512_no_vzeroupper)
+ movq %rdi, %rax
+ addq %rdx, %rax
+ jmp L(start)
+END (__mempcpy_avx512_no_vzeroupper)
+
+ENTRY (__memmove_chk_avx512_no_vzeroupper)
+ cmpq %rdx, %rcx
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (__memmove_chk_avx512_no_vzeroupper)
+
+ENTRY (__memmove_avx512_no_vzeroupper)
+ mov %rdi, %rax
+# ifdef USE_AS_MEMPCPY
+ add %rdx, %rax
+# endif
+L(start):
+ lea (%rsi, %rdx), %rcx
+ lea (%rdi, %rdx), %r9
+ cmp $512, %rdx
+ ja L(512bytesormore)
+
+L(check):
+ cmp $16, %rdx
+ jbe L(less_16bytes)
+ cmp $256, %rdx
+ jb L(less_256bytes)
+ vmovups (%rsi), %zmm0
+ vmovups 0x40(%rsi), %zmm1
+ vmovups 0x80(%rsi), %zmm2
+ vmovups 0xC0(%rsi), %zmm3
+ vmovups -0x100(%rcx), %zmm4
+ vmovups -0xC0(%rcx), %zmm5
+ vmovups -0x80(%rcx), %zmm6
+ vmovups -0x40(%rcx), %zmm7
+ vmovups %zmm0, (%rdi)
+ vmovups %zmm1, 0x40(%rdi)
+ vmovups %zmm2, 0x80(%rdi)
+ vmovups %zmm3, 0xC0(%rdi)
+ vmovups %zmm4, -0x100(%r9)
+ vmovups %zmm5, -0xC0(%r9)
+ vmovups %zmm6, -0x80(%r9)
+ vmovups %zmm7, -0x40(%r9)
+ ret
+
+L(less_256bytes):
+ cmp $128, %dl
+ jb L(less_128bytes)
+ vmovups (%rsi), %zmm0
+ vmovups 0x40(%rsi), %zmm1
+ vmovups -0x80(%rcx), %zmm2
+ vmovups -0x40(%rcx), %zmm3
+ vmovups %zmm0, (%rdi)
+ vmovups %zmm1, 0x40(%rdi)
+ vmovups %zmm2, -0x80(%r9)
+ vmovups %zmm3, -0x40(%r9)
+ ret
+
+L(less_128bytes):
+ cmp $64, %dl
+ jb L(less_64bytes)
+ vmovdqu (%rsi), %ymm0
+ vmovdqu 0x20(%rsi), %ymm1
+ vmovdqu -0x40(%rcx), %ymm2
+ vmovdqu -0x20(%rcx), %ymm3
+ vmovdqu %ymm0, (%rdi)
+ vmovdqu %ymm1, 0x20(%rdi)
+ vmovdqu %ymm2, -0x40(%r9)
+ vmovdqu %ymm3, -0x20(%r9)
+ ret
+
+L(less_64bytes):
+ cmp $32, %dl
+ jb L(less_32bytes)
+ vmovdqu (%rsi), %ymm0
+ vmovdqu -0x20(%rcx), %ymm1
+ vmovdqu %ymm0, (%rdi)
+ vmovdqu %ymm1, -0x20(%r9)
+ ret
+
+L(less_32bytes):
+ vmovdqu (%rsi), %xmm0
+ vmovdqu -0x10(%rcx), %xmm1
+ vmovdqu %xmm0, (%rdi)
+ vmovdqu %xmm1, -0x10(%r9)
+ ret
+
+L(less_16bytes):
+ cmp $8, %dl
+ jb L(less_8bytes)
+ movq (%rsi), %rsi
+ movq -0x8(%rcx), %rcx
+ movq %rsi, (%rdi)
+ movq %rcx, -0x8(%r9)
+ ret
+
+L(less_8bytes):
+ cmp $4, %dl
+ jb L(less_4bytes)
+ mov (%rsi), %esi
+ mov -0x4(%rcx), %ecx
+ mov %esi, (%rdi)
+ mov %ecx, -0x4(%r9)
+ ret
+
+L(less_4bytes):
+ cmp $2, %dl
+ jb L(less_2bytes)
+ mov (%rsi), %si
+ mov -0x2(%rcx), %cx
+ mov %si, (%rdi)
+ mov %cx, -0x2(%r9)
+ ret
+
+L(less_2bytes):
+ cmp $1, %dl
+ jb L(less_1bytes)
+ mov (%rsi), %cl
+ mov %cl, (%rdi)
+L(less_1bytes):
+ ret
+
+L(512bytesormore):
+# ifdef SHARED_CACHE_SIZE_HALF
+ mov $SHARED_CACHE_SIZE_HALF, %r8
+# else
+ mov __x86_shared_cache_size_half(%rip), %r8
+# endif
+ cmp %r8, %rdx
+ jae L(preloop_large)
+ cmp $1024, %rdx
+ ja L(1024bytesormore)
+ prefetcht1 (%rsi)
+ prefetcht1 0x40(%rsi)
+ prefetcht1 0x80(%rsi)
+ prefetcht1 0xC0(%rsi)
+ prefetcht1 0x100(%rsi)
+ prefetcht1 0x140(%rsi)
+ prefetcht1 0x180(%rsi)
+ prefetcht1 0x1C0(%rsi)
+ prefetcht1 -0x200(%rcx)
+ prefetcht1 -0x1C0(%rcx)
+ prefetcht1 -0x180(%rcx)
+ prefetcht1 -0x140(%rcx)
+ prefetcht1 -0x100(%rcx)
+ prefetcht1 -0xC0(%rcx)
+ prefetcht1 -0x80(%rcx)
+ prefetcht1 -0x40(%rcx)
+ vmovups (%rsi), %zmm0
+ vmovups 0x40(%rsi), %zmm1
+ vmovups 0x80(%rsi), %zmm2
+ vmovups 0xC0(%rsi), %zmm3
+ vmovups 0x100(%rsi), %zmm4
+ vmovups 0x140(%rsi), %zmm5
+ vmovups 0x180(%rsi), %zmm6
+ vmovups 0x1C0(%rsi), %zmm7
+ vmovups -0x200(%rcx), %zmm8
+ vmovups -0x1C0(%rcx), %zmm9
+ vmovups -0x180(%rcx), %zmm10
+ vmovups -0x140(%rcx), %zmm11
+ vmovups -0x100(%rcx), %zmm12
+ vmovups -0xC0(%rcx), %zmm13
+ vmovups -0x80(%rcx), %zmm14
+ vmovups -0x40(%rcx), %zmm15
+ vmovups %zmm0, (%rdi)
+ vmovups %zmm1, 0x40(%rdi)
+ vmovups %zmm2, 0x80(%rdi)
+ vmovups %zmm3, 0xC0(%rdi)
+ vmovups %zmm4, 0x100(%rdi)
+ vmovups %zmm5, 0x140(%rdi)
+ vmovups %zmm6, 0x180(%rdi)
+ vmovups %zmm7, 0x1C0(%rdi)
+ vmovups %zmm8, -0x200(%r9)
+ vmovups %zmm9, -0x1C0(%r9)
+ vmovups %zmm10, -0x180(%r9)
+ vmovups %zmm11, -0x140(%r9)
+ vmovups %zmm12, -0x100(%r9)
+ vmovups %zmm13, -0xC0(%r9)
+ vmovups %zmm14, -0x80(%r9)
+ vmovups %zmm15, -0x40(%r9)
+ ret
+
+L(1024bytesormore):
+ cmp %rsi, %rdi
+ ja L(1024bytesormore_bkw)
+ sub $512, %r9
+ vmovups -0x200(%rcx), %zmm8
+ vmovups -0x1C0(%rcx), %zmm9
+ vmovups -0x180(%rcx), %zmm10
+ vmovups -0x140(%rcx), %zmm11
+ vmovups -0x100(%rcx), %zmm12
+ vmovups -0xC0(%rcx), %zmm13
+ vmovups -0x80(%rcx), %zmm14
+ vmovups -0x40(%rcx), %zmm15
+ prefetcht1 (%rsi)
+ prefetcht1 0x40(%rsi)
+ prefetcht1 0x80(%rsi)
+ prefetcht1 0xC0(%rsi)
+ prefetcht1 0x100(%rsi)
+ prefetcht1 0x140(%rsi)
+ prefetcht1 0x180(%rsi)
+ prefetcht1 0x1C0(%rsi)
+
+/* Loop with unaligned memory access. */
+L(gobble_512bytes_loop):
+ vmovups (%rsi), %zmm0
+ vmovups 0x40(%rsi), %zmm1
+ vmovups 0x80(%rsi), %zmm2
+ vmovups 0xC0(%rsi), %zmm3
+ vmovups 0x100(%rsi), %zmm4
+ vmovups 0x140(%rsi), %zmm5
+ vmovups 0x180(%rsi), %zmm6
+ vmovups 0x1C0(%rsi), %zmm7
+ add $512, %rsi
+ prefetcht1 (%rsi)
+ prefetcht1 0x40(%rsi)
+ prefetcht1 0x80(%rsi)
+ prefetcht1 0xC0(%rsi)
+ prefetcht1 0x100(%rsi)
+ prefetcht1 0x140(%rsi)
+ prefetcht1 0x180(%rsi)
+ prefetcht1 0x1C0(%rsi)
+ vmovups %zmm0, (%rdi)
+ vmovups %zmm1, 0x40(%rdi)
+ vmovups %zmm2, 0x80(%rdi)
+ vmovups %zmm3, 0xC0(%rdi)
+ vmovups %zmm4, 0x100(%rdi)
+ vmovups %zmm5, 0x140(%rdi)
+ vmovups %zmm6, 0x180(%rdi)
+ vmovups %zmm7, 0x1C0(%rdi)
+ add $512, %rdi
+ cmp %r9, %rdi
+ jb L(gobble_512bytes_loop)
+ vmovups %zmm8, (%r9)
+ vmovups %zmm9, 0x40(%r9)
+ vmovups %zmm10, 0x80(%r9)
+ vmovups %zmm11, 0xC0(%r9)
+ vmovups %zmm12, 0x100(%r9)
+ vmovups %zmm13, 0x140(%r9)
+ vmovups %zmm14, 0x180(%r9)
+ vmovups %zmm15, 0x1C0(%r9)
+ ret
+
+L(1024bytesormore_bkw):
+ add $512, %rdi
+ vmovups 0x1C0(%rsi), %zmm8
+ vmovups 0x180(%rsi), %zmm9
+ vmovups 0x140(%rsi), %zmm10
+ vmovups 0x100(%rsi), %zmm11
+ vmovups 0xC0(%rsi), %zmm12
+ vmovups 0x80(%rsi), %zmm13
+ vmovups 0x40(%rsi), %zmm14
+ vmovups (%rsi), %zmm15
+ prefetcht1 -0x40(%rcx)
+ prefetcht1 -0x80(%rcx)
+ prefetcht1 -0xC0(%rcx)
+ prefetcht1 -0x100(%rcx)
+ prefetcht1 -0x140(%rcx)
+ prefetcht1 -0x180(%rcx)
+ prefetcht1 -0x1C0(%rcx)
+ prefetcht1 -0x200(%rcx)
+
+/* Backward loop with unaligned memory access. */
+L(gobble_512bytes_loop_bkw):
+ vmovups -0x40(%rcx), %zmm0
+ vmovups -0x80(%rcx), %zmm1
+ vmovups -0xC0(%rcx), %zmm2
+ vmovups -0x100(%rcx), %zmm3
+ vmovups -0x140(%rcx), %zmm4
+ vmovups -0x180(%rcx), %zmm5
+ vmovups -0x1C0(%rcx), %zmm6
+ vmovups -0x200(%rcx), %zmm7
+ sub $512, %rcx
+ prefetcht1 -0x40(%rcx)
+ prefetcht1 -0x80(%rcx)
+ prefetcht1 -0xC0(%rcx)
+ prefetcht1 -0x100(%rcx)
+ prefetcht1 -0x140(%rcx)
+ prefetcht1 -0x180(%rcx)
+ prefetcht1 -0x1C0(%rcx)
+ prefetcht1 -0x200(%rcx)
+ vmovups %zmm0, -0x40(%r9)
+ vmovups %zmm1, -0x80(%r9)
+ vmovups %zmm2, -0xC0(%r9)
+ vmovups %zmm3, -0x100(%r9)
+ vmovups %zmm4, -0x140(%r9)
+ vmovups %zmm5, -0x180(%r9)
+ vmovups %zmm6, -0x1C0(%r9)
+ vmovups %zmm7, -0x200(%r9)
+ sub $512, %r9
+ cmp %rdi, %r9
+ ja L(gobble_512bytes_loop_bkw)
+ vmovups %zmm8, -0x40(%rdi)
+ vmovups %zmm9, -0x80(%rdi)
+ vmovups %zmm10, -0xC0(%rdi)
+ vmovups %zmm11, -0x100(%rdi)
+ vmovups %zmm12, -0x140(%rdi)
+ vmovups %zmm13, -0x180(%rdi)
+ vmovups %zmm14, -0x1C0(%rdi)
+ vmovups %zmm15, -0x200(%rdi)
+ ret
+
+L(preloop_large):
+ cmp %rsi, %rdi
+ ja L(preloop_large_bkw)
+ vmovups (%rsi), %zmm4
+ vmovups 0x40(%rsi), %zmm5
+
+ mov %rdi, %r11
+/* Align destination for access with non-temporal stores in the loop. */
+ mov %rdi, %r8
+ and $-0x80, %rdi
+ add $0x80, %rdi
+ sub %rdi, %r8
+ sub %r8, %rsi
+ add %r8, %rdx
+L(gobble_256bytes_nt_loop):
+ prefetcht1 0x200(%rsi)
+ prefetcht1 0x240(%rsi)
+ prefetcht1 0x280(%rsi)
+ prefetcht1 0x2C0(%rsi)
+ prefetcht1 0x300(%rsi)
+ prefetcht1 0x340(%rsi)
+ prefetcht1 0x380(%rsi)
+ prefetcht1 0x3C0(%rsi)
+ vmovdqu64 (%rsi), %zmm0
+ vmovdqu64 0x40(%rsi), %zmm1
+ vmovdqu64 0x80(%rsi), %zmm2
+ vmovdqu64 0xC0(%rsi), %zmm3
+ vmovntdq %zmm0, (%rdi)
+ vmovntdq %zmm1, 0x40(%rdi)
+ vmovntdq %zmm2, 0x80(%rdi)
+ vmovntdq %zmm3, 0xC0(%rdi)
+ sub $256, %rdx
+ add $256, %rsi
+ add $256, %rdi
+ cmp $256, %rdx
+ ja L(gobble_256bytes_nt_loop)
+ sfence
+ vmovups %zmm4, (%r11)
+ vmovups %zmm5, 0x40(%r11)
+ jmp L(check)
+
+L(preloop_large_bkw):
+ vmovups -0x80(%rcx), %zmm4
+ vmovups -0x40(%rcx), %zmm5
+
+/* Align end of destination for access with non-temporal stores. */
+ mov %r9, %r8
+ and $-0x80, %r9
+ sub %r9, %r8
+ sub %r8, %rcx
+ sub %r8, %rdx
+ add %r9, %r8
+L(gobble_256bytes_nt_loop_bkw):
+ prefetcht1 -0x400(%rcx)
+ prefetcht1 -0x3C0(%rcx)
+ prefetcht1 -0x380(%rcx)
+ prefetcht1 -0x340(%rcx)
+ prefetcht1 -0x300(%rcx)
+ prefetcht1 -0x2C0(%rcx)
+ prefetcht1 -0x280(%rcx)
+ prefetcht1 -0x240(%rcx)
+ vmovdqu64 -0x100(%rcx), %zmm0
+ vmovdqu64 -0xC0(%rcx), %zmm1
+ vmovdqu64 -0x80(%rcx), %zmm2
+ vmovdqu64 -0x40(%rcx), %zmm3
+ vmovntdq %zmm0, -0x100(%r9)
+ vmovntdq %zmm1, -0xC0(%r9)
+ vmovntdq %zmm2, -0x80(%r9)
+ vmovntdq %zmm3, -0x40(%r9)
+ sub $256, %rdx
+ sub $256, %rcx
+ sub $256, %r9
+ cmp $256, %rdx
+ ja L(gobble_256bytes_nt_loop_bkw)
+ sfence
+ vmovups %zmm4, -0x80(%r8)
+ vmovups %zmm5, -0x40(%r8)
+ jmp L(check)
+END (__memmove_avx512_no_vzeroupper)
+
+strong_alias (__memmove_avx512_no_vzeroupper, __memcpy_avx512_no_vzeroupper)
+strong_alias (__memmove_chk_avx512_no_vzeroupper, __memcpy_chk_avx512_no_vzeroupper)
+#endif
diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S
new file mode 100644
index 0000000000..aac1515cf6
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S
@@ -0,0 +1,12 @@
+#if IS_IN (libc)
+# define VEC_SIZE 64
+# define VEC(i) zmm##i
+# define VMOVNT vmovntdq
+# define VMOVU vmovdqu64
+# define VMOVA vmovdqa64
+
+# define SECTION(p) p##.avx512
+# define MEMMOVE_SYMBOL(p,s) p##_avx512_##s
+
+# include "memmove-vec-unaligned-erms.S"
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf.S b/sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S
index 74a149a950..7c6163ddcb 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_floorf.S
+++ b/sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S
@@ -1,6 +1,6 @@
-/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
+/* memmove with SSE2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -16,23 +16,18 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <machine/asm.h>
-#include <init-arch.h>
-
-
-ENTRY(__floorf)
- .type __floorf, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq __floorf_sse41(%rip), %rax
- HAS_CPU_FEATURE (SSE4_1)
- jnz 2f
- leaq __floorf_c(%rip), %rax
-2: ret
-END(__floorf)
-weak_alias (__floorf, floorf)
-
-
-ENTRY(__floorf_sse41)
- roundss $1, %xmm0, %xmm0
- ret
-END(__floorf_sse41)
+#if IS_IN (libc)
+# define MEMMOVE_SYMBOL(p,s) p##_sse2_##s
+#else
+weak_alias (__mempcpy, mempcpy)
+#endif
+
+#include <sysdeps/x86_64/memmove.S>
+
+#if defined SHARED && IS_IN (libc)
+# include <shlib-compat.h>
+# if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14)
+/* Use __memmove_sse2_unaligned to support overlapping addresses. */
+compat_symbol (libc, __memmove_sse2_unaligned, memcpy, GLIBC_2_2_5);
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
new file mode 100644
index 0000000000..e2ede45e9f
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
@@ -0,0 +1,565 @@
+/* memmove/memcpy/mempcpy with unaligned load/store and rep movsb
+ Copyright (C) 2016-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* memmove/memcpy/mempcpy is implemented as:
+ 1. Use overlapping load and store to avoid branch.
+ 2. Load all sources into registers and store them together to avoid
+ possible address overlap between source and destination.
+ 3. If size is 8 * VEC_SIZE or less, load all sources into registers
+ and store them together.
+ 4. If address of destination > address of source, backward copy
+ 4 * VEC_SIZE at a time with unaligned load and aligned store.
+ Load the first 4 * VEC and last VEC before the loop and store
+ them after the loop to support overlapping addresses.
+ 5. Otherwise, forward copy 4 * VEC_SIZE at a time with unaligned
+ load and aligned store. Load the last 4 * VEC and first VEC
+ before the loop and store them after the loop to support
+ overlapping addresses.
+ 6. If size >= __x86_shared_non_temporal_threshold and there is no
+ overlap between destination and source, use non-temporal store
+ instead of aligned store. */
+
+#include <sysdep.h>
+
+#ifndef MEMCPY_SYMBOL
+# define MEMCPY_SYMBOL(p,s) MEMMOVE_SYMBOL(p, s)
+#endif
+
+#ifndef MEMPCPY_SYMBOL
+# define MEMPCPY_SYMBOL(p,s) MEMMOVE_SYMBOL(p, s)
+#endif
+
+#ifndef MEMMOVE_CHK_SYMBOL
+# define MEMMOVE_CHK_SYMBOL(p,s) MEMMOVE_SYMBOL(p, s)
+#endif
+
+#ifndef VZEROUPPER
+# if VEC_SIZE > 16
+# define VZEROUPPER vzeroupper
+# else
+# define VZEROUPPER
+# endif
+#endif
+
+/* Threshold to use Enhanced REP MOVSB. Since there is overhead to set
+ up REP MOVSB operation, REP MOVSB isn't faster on short data. The
+ memcpy micro benchmark in glibc shows that 2KB is the approximate
+ value above which REP MOVSB becomes faster than SSE2 optimization
+ on processors with Enhanced REP MOVSB. Since larger register size
+ can move more data with a single load and store, the threshold is
+ higher with larger register size. */
+#ifndef REP_MOVSB_THRESHOLD
+# define REP_MOVSB_THRESHOLD (2048 * (VEC_SIZE / 16))
+#endif
+
+#ifndef PREFETCH
+# define PREFETCH(addr) prefetcht0 addr
+#endif
+
+/* Assume 64-byte prefetch size. */
+#ifndef PREFETCH_SIZE
+# define PREFETCH_SIZE 64
+#endif
+
+#define PREFETCHED_LOAD_SIZE (VEC_SIZE * 4)
+
+#if PREFETCH_SIZE == 64
+# if PREFETCHED_LOAD_SIZE == PREFETCH_SIZE
+# define PREFETCH_ONE_SET(dir, base, offset) \
+ PREFETCH ((offset)base)
+# elif PREFETCHED_LOAD_SIZE == 2 * PREFETCH_SIZE
+# define PREFETCH_ONE_SET(dir, base, offset) \
+ PREFETCH ((offset)base); \
+ PREFETCH ((offset + dir * PREFETCH_SIZE)base)
+# elif PREFETCHED_LOAD_SIZE == 4 * PREFETCH_SIZE
+# define PREFETCH_ONE_SET(dir, base, offset) \
+ PREFETCH ((offset)base); \
+ PREFETCH ((offset + dir * PREFETCH_SIZE)base); \
+ PREFETCH ((offset + dir * PREFETCH_SIZE * 2)base); \
+ PREFETCH ((offset + dir * PREFETCH_SIZE * 3)base)
+# else
+# error Unsupported PREFETCHED_LOAD_SIZE!
+# endif
+#else
+# error Unsupported PREFETCH_SIZE!
+#endif
+
+#ifndef SECTION
+# error SECTION is not defined!
+#endif
+
+ .section SECTION(.text),"ax",@progbits
+#if defined SHARED && IS_IN (libc)
+ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned))
+ cmpq %rdx, %rcx
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned))
+#endif
+
+ENTRY (MEMPCPY_SYMBOL (__mempcpy, unaligned))
+ movq %rdi, %rax
+ addq %rdx, %rax
+ jmp L(start)
+END (MEMPCPY_SYMBOL (__mempcpy, unaligned))
+
+#if defined SHARED && IS_IN (libc)
+ENTRY (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned))
+ cmpq %rdx, %rcx
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned))
+#endif
+
+ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned))
+ movq %rdi, %rax
+L(start):
+ cmpq $VEC_SIZE, %rdx
+ jb L(less_vec)
+ cmpq $(VEC_SIZE * 2), %rdx
+ ja L(more_2x_vec)
+#if !defined USE_MULTIARCH || !IS_IN (libc)
+L(last_2x_vec):
+#endif
+ /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */
+ VMOVU (%rsi), %VEC(0)
+ VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(1)
+ VMOVU %VEC(0), (%rdi)
+ VMOVU %VEC(1), -VEC_SIZE(%rdi,%rdx)
+ VZEROUPPER
+#if !defined USE_MULTIARCH || !IS_IN (libc)
+L(nop):
+#endif
+ ret
+#if defined USE_MULTIARCH && IS_IN (libc)
+END (MEMMOVE_SYMBOL (__memmove, unaligned))
+
+# if VEC_SIZE == 16
+ENTRY (__mempcpy_chk_erms)
+ cmpq %rdx, %rcx
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (__mempcpy_chk_erms)
+
+/* Only used to measure performance of REP MOVSB. */
+ENTRY (__mempcpy_erms)
+ movq %rdi, %rax
+ /* Skip zero length. */
+ testq %rdx, %rdx
+ jz 2f
+ addq %rdx, %rax
+ jmp L(start_movsb)
+END (__mempcpy_erms)
+
+ENTRY (__memmove_chk_erms)
+ cmpq %rdx, %rcx
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (__memmove_chk_erms)
+
+ENTRY (__memmove_erms)
+ movq %rdi, %rax
+ /* Skip zero length. */
+ testq %rdx, %rdx
+ jz 2f
+L(start_movsb):
+ movq %rdx, %rcx
+ cmpq %rsi, %rdi
+ jb 1f
+ /* Source == destination is less common. */
+ je 2f
+ leaq (%rsi,%rcx), %rdx
+ cmpq %rdx, %rdi
+ jb L(movsb_backward)
+1:
+ rep movsb
+2:
+ ret
+L(movsb_backward):
+ leaq -1(%rdi,%rcx), %rdi
+ leaq -1(%rsi,%rcx), %rsi
+ std
+ rep movsb
+ cld
+ ret
+END (__memmove_erms)
+strong_alias (__memmove_erms, __memcpy_erms)
+strong_alias (__memmove_chk_erms, __memcpy_chk_erms)
+# endif
+
+# ifdef SHARED
+ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms))
+ cmpq %rdx, %rcx
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms))
+# endif
+
+ENTRY (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms))
+ movq %rdi, %rax
+ addq %rdx, %rax
+ jmp L(start_erms)
+END (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms))
+
+# ifdef SHARED
+ENTRY (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms))
+ cmpq %rdx, %rcx
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms))
+# endif
+
+ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
+ movq %rdi, %rax
+L(start_erms):
+ cmpq $VEC_SIZE, %rdx
+ jb L(less_vec)
+ cmpq $(VEC_SIZE * 2), %rdx
+ ja L(movsb_more_2x_vec)
+L(last_2x_vec):
+ /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */
+ VMOVU (%rsi), %VEC(0)
+ VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(1)
+ VMOVU %VEC(0), (%rdi)
+ VMOVU %VEC(1), -VEC_SIZE(%rdi,%rdx)
+L(return):
+ VZEROUPPER
+ ret
+
+L(movsb):
+ cmpq __x86_shared_non_temporal_threshold(%rip), %rdx
+ jae L(more_8x_vec)
+ cmpq %rsi, %rdi
+ jb 1f
+ /* Source == destination is less common. */
+ je L(nop)
+ leaq (%rsi,%rdx), %r9
+ cmpq %r9, %rdi
+ /* Avoid slow backward REP MOVSB. */
+# if REP_MOVSB_THRESHOLD <= (VEC_SIZE * 8)
+# error Unsupported REP_MOVSB_THRESHOLD and VEC_SIZE!
+# endif
+ jb L(more_8x_vec_backward)
+1:
+ movq %rdx, %rcx
+ rep movsb
+L(nop):
+ ret
+#endif
+
+L(less_vec):
+ /* Less than 1 VEC. */
+#if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64
+# error Unsupported VEC_SIZE!
+#endif
+#if VEC_SIZE > 32
+ cmpb $32, %dl
+ jae L(between_32_63)
+#endif
+#if VEC_SIZE > 16
+ cmpb $16, %dl
+ jae L(between_16_31)
+#endif
+ cmpb $8, %dl
+ jae L(between_8_15)
+ cmpb $4, %dl
+ jae L(between_4_7)
+ cmpb $1, %dl
+ ja L(between_2_3)
+ jb 1f
+ movzbl (%rsi), %ecx
+ movb %cl, (%rdi)
+1:
+ ret
+#if VEC_SIZE > 32
+L(between_32_63):
+ /* From 32 to 63. No branch when size == 32. */
+ vmovdqu (%rsi), %ymm0
+ vmovdqu -32(%rsi,%rdx), %ymm1
+ vmovdqu %ymm0, (%rdi)
+ vmovdqu %ymm1, -32(%rdi,%rdx)
+ VZEROUPPER
+ ret
+#endif
+#if VEC_SIZE > 16
+ /* From 16 to 31. No branch when size == 16. */
+L(between_16_31):
+ vmovdqu (%rsi), %xmm0
+ vmovdqu -16(%rsi,%rdx), %xmm1
+ vmovdqu %xmm0, (%rdi)
+ vmovdqu %xmm1, -16(%rdi,%rdx)
+ ret
+#endif
+L(between_8_15):
+ /* From 8 to 15. No branch when size == 8. */
+ movq -8(%rsi,%rdx), %rcx
+ movq (%rsi), %rsi
+ movq %rcx, -8(%rdi,%rdx)
+ movq %rsi, (%rdi)
+ ret
+L(between_4_7):
+ /* From 4 to 7. No branch when size == 4. */
+ movl -4(%rsi,%rdx), %ecx
+ movl (%rsi), %esi
+ movl %ecx, -4(%rdi,%rdx)
+ movl %esi, (%rdi)
+ ret
+L(between_2_3):
+ /* From 2 to 3. No branch when size == 2. */
+ movzwl -2(%rsi,%rdx), %ecx
+ movzwl (%rsi), %esi
+ movw %cx, -2(%rdi,%rdx)
+ movw %si, (%rdi)
+ ret
+
+#if defined USE_MULTIARCH && IS_IN (libc)
+L(movsb_more_2x_vec):
+ cmpq $REP_MOVSB_THRESHOLD, %rdx
+ ja L(movsb)
+#endif
+L(more_2x_vec):
+ /* More than 2 * VEC and there may be overlap between destination
+ and source. */
+ cmpq $(VEC_SIZE * 8), %rdx
+ ja L(more_8x_vec)
+ cmpq $(VEC_SIZE * 4), %rdx
+ jb L(last_4x_vec)
+ /* Copy from 4 * VEC to 8 * VEC, inclusively. */
+ VMOVU (%rsi), %VEC(0)
+ VMOVU VEC_SIZE(%rsi), %VEC(1)
+ VMOVU (VEC_SIZE * 2)(%rsi), %VEC(2)
+ VMOVU (VEC_SIZE * 3)(%rsi), %VEC(3)
+ VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(4)
+ VMOVU -(VEC_SIZE * 2)(%rsi,%rdx), %VEC(5)
+ VMOVU -(VEC_SIZE * 3)(%rsi,%rdx), %VEC(6)
+ VMOVU -(VEC_SIZE * 4)(%rsi,%rdx), %VEC(7)
+ VMOVU %VEC(0), (%rdi)
+ VMOVU %VEC(1), VEC_SIZE(%rdi)
+ VMOVU %VEC(2), (VEC_SIZE * 2)(%rdi)
+ VMOVU %VEC(3), (VEC_SIZE * 3)(%rdi)
+ VMOVU %VEC(4), -VEC_SIZE(%rdi,%rdx)
+ VMOVU %VEC(5), -(VEC_SIZE * 2)(%rdi,%rdx)
+ VMOVU %VEC(6), -(VEC_SIZE * 3)(%rdi,%rdx)
+ VMOVU %VEC(7), -(VEC_SIZE * 4)(%rdi,%rdx)
+ VZEROUPPER
+ ret
+L(last_4x_vec):
+ /* Copy from 2 * VEC to 4 * VEC. */
+ VMOVU (%rsi), %VEC(0)
+ VMOVU VEC_SIZE(%rsi), %VEC(1)
+ VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(2)
+ VMOVU -(VEC_SIZE * 2)(%rsi,%rdx), %VEC(3)
+ VMOVU %VEC(0), (%rdi)
+ VMOVU %VEC(1), VEC_SIZE(%rdi)
+ VMOVU %VEC(2), -VEC_SIZE(%rdi,%rdx)
+ VMOVU %VEC(3), -(VEC_SIZE * 2)(%rdi,%rdx)
+ VZEROUPPER
+ ret
+
+L(more_8x_vec):
+ cmpq %rsi, %rdi
+ ja L(more_8x_vec_backward)
+ /* Source == destination is less common. */
+ je L(nop)
+ /* Load the first VEC and last 4 * VEC to support overlapping
+ addresses. */
+ VMOVU (%rsi), %VEC(4)
+ VMOVU -VEC_SIZE(%rsi, %rdx), %VEC(5)
+ VMOVU -(VEC_SIZE * 2)(%rsi, %rdx), %VEC(6)
+ VMOVU -(VEC_SIZE * 3)(%rsi, %rdx), %VEC(7)
+ VMOVU -(VEC_SIZE * 4)(%rsi, %rdx), %VEC(8)
+ /* Save start and stop of the destination buffer. */
+ movq %rdi, %r11
+ leaq -VEC_SIZE(%rdi, %rdx), %rcx
+ /* Align destination for aligned stores in the loop. Compute
+ how much destination is misaligned. */
+ movq %rdi, %r8
+ andq $(VEC_SIZE - 1), %r8
+ /* Get the negative of offset for alignment. */
+ subq $VEC_SIZE, %r8
+ /* Adjust source. */
+ subq %r8, %rsi
+ /* Adjust destination which should be aligned now. */
+ subq %r8, %rdi
+ /* Adjust length. */
+ addq %r8, %rdx
+#if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
+ /* Check non-temporal store threshold. */
+ cmpq __x86_shared_non_temporal_threshold(%rip), %rdx
+ ja L(large_forward)
+#endif
+L(loop_4x_vec_forward):
+ /* Copy 4 * VEC a time forward. */
+ VMOVU (%rsi), %VEC(0)
+ VMOVU VEC_SIZE(%rsi), %VEC(1)
+ VMOVU (VEC_SIZE * 2)(%rsi), %VEC(2)
+ VMOVU (VEC_SIZE * 3)(%rsi), %VEC(3)
+ addq $(VEC_SIZE * 4), %rsi
+ subq $(VEC_SIZE * 4), %rdx
+ VMOVA %VEC(0), (%rdi)
+ VMOVA %VEC(1), VEC_SIZE(%rdi)
+ VMOVA %VEC(2), (VEC_SIZE * 2)(%rdi)
+ VMOVA %VEC(3), (VEC_SIZE * 3)(%rdi)
+ addq $(VEC_SIZE * 4), %rdi
+ cmpq $(VEC_SIZE * 4), %rdx
+ ja L(loop_4x_vec_forward)
+ /* Store the last 4 * VEC. */
+ VMOVU %VEC(5), (%rcx)
+ VMOVU %VEC(6), -VEC_SIZE(%rcx)
+ VMOVU %VEC(7), -(VEC_SIZE * 2)(%rcx)
+ VMOVU %VEC(8), -(VEC_SIZE * 3)(%rcx)
+ /* Store the first VEC. */
+ VMOVU %VEC(4), (%r11)
+ VZEROUPPER
+ ret
+
+L(more_8x_vec_backward):
+ /* Load the first 4 * VEC and last VEC to support overlapping
+ addresses. */
+ VMOVU (%rsi), %VEC(4)
+ VMOVU VEC_SIZE(%rsi), %VEC(5)
+ VMOVU (VEC_SIZE * 2)(%rsi), %VEC(6)
+ VMOVU (VEC_SIZE * 3)(%rsi), %VEC(7)
+ VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(8)
+ /* Save stop of the destination buffer. */
+ leaq -VEC_SIZE(%rdi, %rdx), %r11
+ /* Align destination end for aligned stores in the loop. Compute
+ how much destination end is misaligned. */
+ leaq -VEC_SIZE(%rsi, %rdx), %rcx
+ movq %r11, %r9
+ movq %r11, %r8
+ andq $(VEC_SIZE - 1), %r8
+ /* Adjust source. */
+ subq %r8, %rcx
+ /* Adjust the end of destination which should be aligned now. */
+ subq %r8, %r9
+ /* Adjust length. */
+ subq %r8, %rdx
+#if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
+ /* Check non-temporal store threshold. */
+ cmpq __x86_shared_non_temporal_threshold(%rip), %rdx
+ ja L(large_backward)
+#endif
+L(loop_4x_vec_backward):
+ /* Copy 4 * VEC a time backward. */
+ VMOVU (%rcx), %VEC(0)
+ VMOVU -VEC_SIZE(%rcx), %VEC(1)
+ VMOVU -(VEC_SIZE * 2)(%rcx), %VEC(2)
+ VMOVU -(VEC_SIZE * 3)(%rcx), %VEC(3)
+ subq $(VEC_SIZE * 4), %rcx
+ subq $(VEC_SIZE * 4), %rdx
+ VMOVA %VEC(0), (%r9)
+ VMOVA %VEC(1), -VEC_SIZE(%r9)
+ VMOVA %VEC(2), -(VEC_SIZE * 2)(%r9)
+ VMOVA %VEC(3), -(VEC_SIZE * 3)(%r9)
+ subq $(VEC_SIZE * 4), %r9
+ cmpq $(VEC_SIZE * 4), %rdx
+ ja L(loop_4x_vec_backward)
+ /* Store the first 4 * VEC. */
+ VMOVU %VEC(4), (%rdi)
+ VMOVU %VEC(5), VEC_SIZE(%rdi)
+ VMOVU %VEC(6), (VEC_SIZE * 2)(%rdi)
+ VMOVU %VEC(7), (VEC_SIZE * 3)(%rdi)
+ /* Store the last VEC. */
+ VMOVU %VEC(8), (%r11)
+ VZEROUPPER
+ ret
+
+#if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
+L(large_forward):
+ /* Don't use non-temporal store if there is overlap between
+ destination and source since destination may be in cache
+ when source is loaded. */
+ leaq (%rdi, %rdx), %r10
+ cmpq %r10, %rsi
+ jb L(loop_4x_vec_forward)
+L(loop_large_forward):
+ /* Copy 4 * VEC a time forward with non-temporal stores. */
+ PREFETCH_ONE_SET (1, (%rsi), PREFETCHED_LOAD_SIZE * 2)
+ PREFETCH_ONE_SET (1, (%rsi), PREFETCHED_LOAD_SIZE * 3)
+ VMOVU (%rsi), %VEC(0)
+ VMOVU VEC_SIZE(%rsi), %VEC(1)
+ VMOVU (VEC_SIZE * 2)(%rsi), %VEC(2)
+ VMOVU (VEC_SIZE * 3)(%rsi), %VEC(3)
+ addq $PREFETCHED_LOAD_SIZE, %rsi
+ subq $PREFETCHED_LOAD_SIZE, %rdx
+ VMOVNT %VEC(0), (%rdi)
+ VMOVNT %VEC(1), VEC_SIZE(%rdi)
+ VMOVNT %VEC(2), (VEC_SIZE * 2)(%rdi)
+ VMOVNT %VEC(3), (VEC_SIZE * 3)(%rdi)
+ addq $PREFETCHED_LOAD_SIZE, %rdi
+ cmpq $PREFETCHED_LOAD_SIZE, %rdx
+ ja L(loop_large_forward)
+ sfence
+ /* Store the last 4 * VEC. */
+ VMOVU %VEC(5), (%rcx)
+ VMOVU %VEC(6), -VEC_SIZE(%rcx)
+ VMOVU %VEC(7), -(VEC_SIZE * 2)(%rcx)
+ VMOVU %VEC(8), -(VEC_SIZE * 3)(%rcx)
+ /* Store the first VEC. */
+ VMOVU %VEC(4), (%r11)
+ VZEROUPPER
+ ret
+
+L(large_backward):
+ /* Don't use non-temporal store if there is overlap between
+ destination and source since destination may be in cache
+ when source is loaded. */
+ leaq (%rcx, %rdx), %r10
+ cmpq %r10, %r9
+ jb L(loop_4x_vec_backward)
+L(loop_large_backward):
+ /* Copy 4 * VEC a time backward with non-temporal stores. */
+ PREFETCH_ONE_SET (-1, (%rcx), -PREFETCHED_LOAD_SIZE * 2)
+ PREFETCH_ONE_SET (-1, (%rcx), -PREFETCHED_LOAD_SIZE * 3)
+ VMOVU (%rcx), %VEC(0)
+ VMOVU -VEC_SIZE(%rcx), %VEC(1)
+ VMOVU -(VEC_SIZE * 2)(%rcx), %VEC(2)
+ VMOVU -(VEC_SIZE * 3)(%rcx), %VEC(3)
+ subq $PREFETCHED_LOAD_SIZE, %rcx
+ subq $PREFETCHED_LOAD_SIZE, %rdx
+ VMOVNT %VEC(0), (%r9)
+ VMOVNT %VEC(1), -VEC_SIZE(%r9)
+ VMOVNT %VEC(2), -(VEC_SIZE * 2)(%r9)
+ VMOVNT %VEC(3), -(VEC_SIZE * 3)(%r9)
+ subq $PREFETCHED_LOAD_SIZE, %r9
+ cmpq $PREFETCHED_LOAD_SIZE, %rdx
+ ja L(loop_large_backward)
+ sfence
+ /* Store the first 4 * VEC. */
+ VMOVU %VEC(4), (%rdi)
+ VMOVU %VEC(5), VEC_SIZE(%rdi)
+ VMOVU %VEC(6), (VEC_SIZE * 2)(%rdi)
+ VMOVU %VEC(7), (VEC_SIZE * 3)(%rdi)
+ /* Store the last VEC. */
+ VMOVU %VEC(8), (%r11)
+ VZEROUPPER
+ ret
+#endif
+END (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
+
+#if IS_IN (libc)
+# ifdef USE_MULTIARCH
+strong_alias (MEMMOVE_SYMBOL (__memmove, unaligned_erms),
+ MEMMOVE_SYMBOL (__memcpy, unaligned_erms))
+# ifdef SHARED
+strong_alias (MEMMOVE_SYMBOL (__memmove_chk, unaligned_erms),
+ MEMMOVE_SYMBOL (__memcpy_chk, unaligned_erms))
+# endif
+# endif
+# ifdef SHARED
+strong_alias (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned),
+ MEMMOVE_CHK_SYMBOL (__memcpy_chk, unaligned))
+# endif
+#endif
+strong_alias (MEMMOVE_SYMBOL (__memmove, unaligned),
+ MEMCPY_SYMBOL (__memcpy, unaligned))
diff --git a/sysdeps/x86_64/multiarch/memmove.c b/sysdeps/x86_64/multiarch/memmove.c
index 8da5640bb0..d512228eae 100644
--- a/sysdeps/x86_64/multiarch/memmove.c
+++ b/sysdeps/x86_64/multiarch/memmove.c
@@ -1,6 +1,6 @@
-/* Multiple versions of memmove.
+/* Multiple versions of memmmove.
All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2016 Free Software Foundation, Inc.
+ Copyright (C) 2016-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -17,57 +17,21 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
+/* Define multiple versions only for the definition in libc. */
#if IS_IN (libc)
-# define MEMMOVE __memmove_sse2
-# ifdef SHARED
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(name) \
- __hidden_ver1 (__memmove_sse2, __GI_memmove, __memmove_sse2);
-# endif
-
-/* Redefine memmove so that the compiler won't complain about the type
- mismatch with the IFUNC selector in strong_alias, below. */
-# undef memmove
# define memmove __redirect_memmove
# include <string.h>
# undef memmove
-extern __typeof (__redirect_memmove) __memmove_sse2 attribute_hidden;
-extern __typeof (__redirect_memmove) __memmove_ssse3 attribute_hidden;
-extern __typeof (__redirect_memmove) __memmove_ssse3_back attribute_hidden;
-extern __typeof (__redirect_memmove) __memmove_avx_unaligned attribute_hidden;
-# ifdef HAVE_AVX512_ASM_SUPPORT
- extern __typeof (__redirect_memmove) __memmove_avx512_no_vzeroupper attribute_hidden;
-# endif
-
-#endif
+# define SYMBOL_NAME memmove
+# include "ifunc-memmove.h"
-#include "string/memmove.c"
+libc_ifunc_redirected (__redirect_memmove, __libc_memmove,
+ IFUNC_SELECTOR ());
-#if IS_IN (libc)
-# include <shlib-compat.h>
-# include "init-arch.h"
-
-/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
- ifunc symbol properly. */
-extern __typeof (__redirect_memmove) __libc_memmove;
-libc_ifunc (__libc_memmove,
-#ifdef HAVE_AVX512_ASM_SUPPORT
- HAS_ARCH_FEATURE (AVX512F_Usable)
- && HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
- ? __memmove_avx512_no_vzeroupper
- :
-#endif
- (HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
- ? __memmove_avx_unaligned
- : (HAS_CPU_FEATURE (SSSE3)
- ? (HAS_ARCH_FEATURE (Fast_Copy_Backward)
- ? __memmove_ssse3_back : __memmove_ssse3)
- : __memmove_sse2)));
-
-strong_alias (__libc_memmove, memmove)
-
-# if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14)
-compat_symbol (libc, memmove, memcpy, GLIBC_2_2_5);
+strong_alias (__libc_memmove, memmove);
+# ifdef SHARED
+__hidden_ver1 (__libc_memmove, __GI_memmove, __redirect_memmove)
+ __attribute__ ((visibility ("hidden")));
# endif
#endif
diff --git a/sysdeps/x86_64/multiarch/memmove_chk-nonshared.S b/sysdeps/x86_64/multiarch/memmove_chk-nonshared.S
new file mode 100644
index 0000000000..c362a3324d
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmove_chk-nonshared.S
@@ -0,0 +1,21 @@
+/* Non-shared version of memmove_chk for x86-64.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc) && !defined SHARED
+# include <sysdeps/x86_64/memmove_chk.S>
+#endif
diff --git a/sysdeps/x86_64/multiarch/memmove_chk.c b/sysdeps/x86_64/multiarch/memmove_chk.c
index f64da63180..0e9dc7e07f 100644
--- a/sysdeps/x86_64/multiarch/memmove_chk.c
+++ b/sysdeps/x86_64/multiarch/memmove_chk.c
@@ -1,6 +1,6 @@
-/* Multiple versions of __memmove_chk.
+/* Multiple versions of __memmove_chk
All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2016 Free Software Foundation, Inc.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -17,30 +17,15 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <string.h>
-#include "init-arch.h"
+/* Define multiple versions only for the definition in libc.so. */
+#if IS_IN (libc) && defined SHARED
+# define __memmove_chk __redirect_memmove_chk
+# include <string.h>
+# undef __memmove_chk
-#define MEMMOVE_CHK __memmove_chk_sse2
+# define SYMBOL_NAME memmove_chk
+# include "ifunc-memmove.h"
-extern __typeof (__memmove_chk) __memmove_chk_sse2 attribute_hidden;
-extern __typeof (__memmove_chk) __memmove_chk_ssse3 attribute_hidden;
-extern __typeof (__memmove_chk) __memmove_chk_ssse3_back attribute_hidden;
-extern __typeof (__memmove_chk) __memmove_chk_avx_unaligned attribute_hidden;
-# ifdef HAVE_AVX512_ASM_SUPPORT
- extern __typeof (__memmove_chk) __memmove_chk_avx512_no_vzeroupper attribute_hidden;
-# endif
-
-#include "debug/memmove_chk.c"
-
-libc_ifunc (__memmove_chk,
-#ifdef HAVE_AVX512_ASM_SUPPORT
- HAS_ARCH_FEATURE (AVX512F_Usable)
- && HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
- ? __memmove_chk_avx512_no_vzeroupper
- :
+libc_ifunc_redirected (__redirect_memmove_chk, __memmove_chk,
+ IFUNC_SELECTOR ());
#endif
- HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) ? __memmove_chk_avx_unaligned :
- (HAS_CPU_FEATURE (SSSE3)
- ? (HAS_ARCH_FEATURE (Fast_Copy_Backward)
- ? __memmove_chk_ssse3_back : __memmove_chk_ssse3)
- : __memmove_chk_sse2));
diff --git a/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S
deleted file mode 100644
index 82ffacb8fb..0000000000
--- a/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_MEMPCPY
-#define MEMCPY __mempcpy_ssse3_back
-#define MEMCPY_CHK __mempcpy_chk_ssse3_back
-#include "memcpy-ssse3-back.S"
diff --git a/sysdeps/x86_64/multiarch/mempcpy-ssse3.S b/sysdeps/x86_64/multiarch/mempcpy-ssse3.S
deleted file mode 100644
index 822d98e954..0000000000
--- a/sysdeps/x86_64/multiarch/mempcpy-ssse3.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_MEMPCPY
-#define MEMCPY __mempcpy_ssse3
-#define MEMCPY_CHK __mempcpy_chk_ssse3
-#include "memcpy-ssse3.S"
diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S
deleted file mode 100644
index ed78623565..0000000000
--- a/sysdeps/x86_64/multiarch/mempcpy.S
+++ /dev/null
@@ -1,86 +0,0 @@
-/* Multiple versions of mempcpy
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2016 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib and for
- DSO. In static binaries we need mempcpy before the initialization
- happened. */
-#if defined SHARED && IS_IN (libc)
-ENTRY(__mempcpy)
- .type __mempcpy, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
-#ifdef HAVE_AVX512_ASM_SUPPORT
- HAS_ARCH_FEATURE (AVX512F_Usable)
- jz 1f
- HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
- jz 1f
- leaq __mempcpy_avx512_no_vzeroupper(%rip), %rax
- ret
-#endif
-1: leaq __mempcpy_sse2(%rip), %rax
- HAS_CPU_FEATURE (SSSE3)
- jz 2f
- leaq __mempcpy_ssse3(%rip), %rax
- HAS_ARCH_FEATURE (Fast_Copy_Backward)
- jz 2f
- leaq __mempcpy_ssse3_back(%rip), %rax
- HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
- jz 2f
- leaq __mempcpy_avx_unaligned(%rip), %rax
-2: ret
-END(__mempcpy)
-
-# undef ENTRY
-# define ENTRY(name) \
- .type __mempcpy_sse2, @function; \
- .p2align 4; \
- .globl __mempcpy_sse2; \
- .hidden __mempcpy_sse2; \
- __mempcpy_sse2: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size __mempcpy_sse2, .-__mempcpy_sse2
-
-# undef ENTRY_CHK
-# define ENTRY_CHK(name) \
- .type __mempcpy_chk_sse2, @function; \
- .globl __mempcpy_chk_sse2; \
- .p2align 4; \
- __mempcpy_chk_sse2: cfi_startproc; \
- CALL_MCOUNT
-# undef END_CHK
-# define END_CHK(name) \
- cfi_endproc; .size __mempcpy_chk_sse2, .-__mempcpy_chk_sse2
-
-# undef libc_hidden_def
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal mempcpy calls through a PLT.
- The speedup we get from using SSSE3 instruction is likely eaten away
- by the indirect call in the PLT. */
-# define libc_hidden_def(name) \
- .globl __GI_mempcpy; __GI_mempcpy = __mempcpy_sse2
-# define libc_hidden_builtin_def(name) \
- .globl __GI___mempcpy; __GI___mempcpy = __mempcpy_sse2
-#endif
-
-#include "../mempcpy.S"
diff --git a/sysdeps/x86_64/multiarch/mempcpy.c b/sysdeps/x86_64/multiarch/mempcpy.c
new file mode 100644
index 0000000000..9fe41dda82
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/mempcpy.c
@@ -0,0 +1,42 @@
+/* Multiple versions of mempcpy.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define mempcpy __redirect_mempcpy
+# define __mempcpy __redirect___mempcpy
+# define NO_MEMPCPY_STPCPY_REDIRECT
+# define __NO_STRING_INLINES
+# include <string.h>
+# undef mempcpy
+# undef __mempcpy
+
+# define SYMBOL_NAME mempcpy
+# include "ifunc-memmove.h"
+
+libc_ifunc_redirected (__redirect_mempcpy, __mempcpy, IFUNC_SELECTOR ());
+
+weak_alias (__mempcpy, mempcpy)
+# ifdef SHARED
+__hidden_ver1 (__mempcpy, __GI___mempcpy, __redirect___mempcpy)
+ __attribute__ ((visibility ("hidden")));
+__hidden_ver1 (mempcpy, __GI_mempcpy, __redirect_mempcpy)
+ __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk-nonshared.S b/sysdeps/x86_64/multiarch/mempcpy_chk-nonshared.S
new file mode 100644
index 0000000000..7133246a1d
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/mempcpy_chk-nonshared.S
@@ -0,0 +1,21 @@
+/* Non-shared version of mempcpy_chk for x86-64.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc) && !defined SHARED
+# include <sysdeps/x86_64/mempcpy_chk.S>
+#endif
diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk.S b/sysdeps/x86_64/multiarch/mempcpy_chk.S
deleted file mode 100644
index 6e8a89d38c..0000000000
--- a/sysdeps/x86_64/multiarch/mempcpy_chk.S
+++ /dev/null
@@ -1,56 +0,0 @@
-/* Multiple versions of __mempcpy_chk
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2016 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib and for
- DSO. There are no multiarch mempcpy functions for static binaries.
- */
-#if IS_IN (libc)
-# ifdef SHARED
- .text
-ENTRY(__mempcpy_chk)
- .type __mempcpy_chk, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
-#ifdef HAVE_AVX512_ASM_SUPPORT
- HAS_ARCH_FEATURE (AVX512F_Usable)
- jz 1f
- HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
- jz 1f
- leaq __mempcpy_chk_avx512_no_vzeroupper(%rip), %rax
- ret
-#endif
-1: leaq __mempcpy_chk_sse2(%rip), %rax
- HAS_CPU_FEATURE (SSSE3)
- jz 2f
- leaq __mempcpy_chk_ssse3(%rip), %rax
- HAS_ARCH_FEATURE (Fast_Copy_Backward)
- jz 2f
- leaq __mempcpy_chk_ssse3_back(%rip), %rax
- HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
- jz 2f
- leaq __mempcpy_chk_avx_unaligned(%rip), %rax
-2: ret
-END(__mempcpy_chk)
-# else
-# include "../mempcpy_chk.S"
-# endif
-#endif
diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk.c b/sysdeps/x86_64/multiarch/mempcpy_chk.c
new file mode 100644
index 0000000000..956918b3a1
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/mempcpy_chk.c
@@ -0,0 +1,31 @@
+/* Multiple versions of __mempcpy_chk
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc.so. */
+#if IS_IN (libc) && defined SHARED
+# define __mempcpy_chk __redirect_mempcpy_chk
+# include <string.h>
+# undef __mempcpy_chk
+
+# define SYMBOL_NAME mempcpy_chk
+# include "ifunc-memmove.h"
+
+libc_ifunc_redirected (__redirect_mempcpy_chk, __mempcpy_chk,
+ IFUNC_SELECTOR ());
+#endif
diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2.S b/sysdeps/x86_64/multiarch/memrchr-avx2.S
new file mode 100644
index 0000000000..b41a58bcba
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memrchr-avx2.S
@@ -0,0 +1,359 @@
+/* memrchr optimized with AVX2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+
+# ifndef VZEROUPPER
+# define VZEROUPPER vzeroupper
+# endif
+
+# define VEC_SIZE 32
+
+ .section .text.avx,"ax",@progbits
+ENTRY (__memrchr_avx2)
+ /* Broadcast CHAR to YMM0. */
+ vmovd %esi, %xmm0
+ vpbroadcastb %xmm0, %ymm0
+
+ subq $VEC_SIZE, %rdx
+ jbe L(last_vec_or_less)
+
+ addq %rdx, %rdi
+
+ /* Check the last VEC_SIZE bytes. */
+ vpcmpeqb (%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(last_vec_x0)
+
+ subq $(VEC_SIZE * 4), %rdi
+ movl %edi, %ecx
+ andl $(VEC_SIZE - 1), %ecx
+ jz L(aligned_more)
+
+ /* Align data for aligned loads in the loop. */
+ addq $VEC_SIZE, %rdi
+ addq $VEC_SIZE, %rdx
+ andq $-VEC_SIZE, %rdi
+ subq %rcx, %rdx
+
+ .p2align 4
+L(aligned_more):
+ subq $(VEC_SIZE * 4), %rdx
+ jbe L(last_4x_vec_or_less)
+
+ /* Check the last 4 * VEC_SIZE. Only one VEC_SIZE at a time
+ since data is only aligned to VEC_SIZE. */
+ vpcmpeqb (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(last_vec_x3)
+
+ vpcmpeqb (VEC_SIZE * 2)(%rdi), %ymm0, %ymm2
+ vpmovmskb %ymm2, %eax
+ testl %eax, %eax
+ jnz L(last_vec_x2)
+
+ vpcmpeqb VEC_SIZE(%rdi), %ymm0, %ymm3
+ vpmovmskb %ymm3, %eax
+ testl %eax, %eax
+ jnz L(last_vec_x1)
+
+ vpcmpeqb (%rdi), %ymm0, %ymm4
+ vpmovmskb %ymm4, %eax
+ testl %eax, %eax
+ jnz L(last_vec_x0)
+
+ /* Align data to 4 * VEC_SIZE for loop with fewer branches.
+ There are some overlaps with above if data isn't aligned
+ to 4 * VEC_SIZE. */
+ movl %edi, %ecx
+ andl $(VEC_SIZE * 4 - 1), %ecx
+ jz L(loop_4x_vec)
+
+ addq $(VEC_SIZE * 4), %rdi
+ addq $(VEC_SIZE * 4), %rdx
+ andq $-(VEC_SIZE * 4), %rdi
+ subq %rcx, %rdx
+
+ .p2align 4
+L(loop_4x_vec):
+ /* Compare 4 * VEC at a time forward. */
+ subq $(VEC_SIZE * 4), %rdi
+ subq $(VEC_SIZE * 4), %rdx
+ jbe L(last_4x_vec_or_less)
+
+ vmovdqa (%rdi), %ymm1
+ vmovdqa VEC_SIZE(%rdi), %ymm2
+ vmovdqa (VEC_SIZE * 2)(%rdi), %ymm3
+ vmovdqa (VEC_SIZE * 3)(%rdi), %ymm4
+
+ vpcmpeqb %ymm1, %ymm0, %ymm1
+ vpcmpeqb %ymm2, %ymm0, %ymm2
+ vpcmpeqb %ymm3, %ymm0, %ymm3
+ vpcmpeqb %ymm4, %ymm0, %ymm4
+
+ vpor %ymm1, %ymm2, %ymm5
+ vpor %ymm3, %ymm4, %ymm6
+ vpor %ymm5, %ymm6, %ymm5
+
+ vpmovmskb %ymm5, %eax
+ testl %eax, %eax
+ jz L(loop_4x_vec)
+
+ /* There is a match. */
+ vpmovmskb %ymm4, %eax
+ testl %eax, %eax
+ jnz L(last_vec_x3)
+
+ vpmovmskb %ymm3, %eax
+ testl %eax, %eax
+ jnz L(last_vec_x2)
+
+ vpmovmskb %ymm2, %eax
+ testl %eax, %eax
+ jnz L(last_vec_x1)
+
+ vpmovmskb %ymm1, %eax
+ bsrl %eax, %eax
+ addq %rdi, %rax
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(last_4x_vec_or_less):
+ addl $(VEC_SIZE * 4), %edx
+ cmpl $(VEC_SIZE * 2), %edx
+ jbe L(last_2x_vec)
+
+ vpcmpeqb (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(last_vec_x3)
+
+ vpcmpeqb (VEC_SIZE * 2)(%rdi), %ymm0, %ymm2
+ vpmovmskb %ymm2, %eax
+ testl %eax, %eax
+ jnz L(last_vec_x2)
+
+ vpcmpeqb VEC_SIZE(%rdi), %ymm0, %ymm3
+ vpmovmskb %ymm3, %eax
+ testl %eax, %eax
+ jnz L(last_vec_x1_check)
+ cmpl $(VEC_SIZE * 3), %edx
+ jbe L(zero)
+
+ vpcmpeqb (%rdi), %ymm0, %ymm4
+ vpmovmskb %ymm4, %eax
+ testl %eax, %eax
+ jz L(zero)
+ bsrl %eax, %eax
+ subq $(VEC_SIZE * 4), %rdx
+ addq %rax, %rdx
+ jl L(zero)
+ addq %rdi, %rax
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(last_2x_vec):
+ vpcmpeqb (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(last_vec_x3_check)
+ cmpl $VEC_SIZE, %edx
+ jbe L(zero)
+
+ vpcmpeqb (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jz L(zero)
+ bsrl %eax, %eax
+ subq $(VEC_SIZE * 2), %rdx
+ addq %rax, %rdx
+ jl L(zero)
+ addl $(VEC_SIZE * 2), %eax
+ addq %rdi, %rax
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(last_vec_x0):
+ bsrl %eax, %eax
+ addq %rdi, %rax
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(last_vec_x1):
+ bsrl %eax, %eax
+ addl $VEC_SIZE, %eax
+ addq %rdi, %rax
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(last_vec_x2):
+ bsrl %eax, %eax
+ addl $(VEC_SIZE * 2), %eax
+ addq %rdi, %rax
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(last_vec_x3):
+ bsrl %eax, %eax
+ addl $(VEC_SIZE * 3), %eax
+ addq %rdi, %rax
+ ret
+
+ .p2align 4
+L(last_vec_x1_check):
+ bsrl %eax, %eax
+ subq $(VEC_SIZE * 3), %rdx
+ addq %rax, %rdx
+ jl L(zero)
+ addl $VEC_SIZE, %eax
+ addq %rdi, %rax
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(last_vec_x3_check):
+ bsrl %eax, %eax
+ subq $VEC_SIZE, %rdx
+ addq %rax, %rdx
+ jl L(zero)
+ addl $(VEC_SIZE * 3), %eax
+ addq %rdi, %rax
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(zero):
+ VZEROUPPER
+L(null):
+ xorl %eax, %eax
+ ret
+
+ .p2align 4
+L(last_vec_or_less_aligned):
+ movl %edx, %ecx
+
+ vpcmpeqb (%rdi), %ymm0, %ymm1
+
+ movl $1, %edx
+ /* Support rdx << 32. */
+ salq %cl, %rdx
+ subq $1, %rdx
+
+ vpmovmskb %ymm1, %eax
+
+ /* Remove the trailing bytes. */
+ andl %edx, %eax
+ testl %eax, %eax
+ jz L(zero)
+
+ bsrl %eax, %eax
+ addq %rdi, %rax
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(last_vec_or_less):
+ addl $VEC_SIZE, %edx
+
+ /* Check for zero length. */
+ testl %edx, %edx
+ jz L(null)
+
+ movl %edi, %ecx
+ andl $(VEC_SIZE - 1), %ecx
+ jz L(last_vec_or_less_aligned)
+
+ movl %ecx, %esi
+ movl %ecx, %r8d
+ addl %edx, %esi
+ andq $-VEC_SIZE, %rdi
+
+ subl $VEC_SIZE, %esi
+ ja L(last_vec_2x_aligned)
+
+ /* Check the last VEC. */
+ vpcmpeqb (%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+
+ /* Remove the leading and trailing bytes. */
+ sarl %cl, %eax
+ movl %edx, %ecx
+
+ movl $1, %edx
+ sall %cl, %edx
+ subl $1, %edx
+
+ andl %edx, %eax
+ testl %eax, %eax
+ jz L(zero)
+
+ bsrl %eax, %eax
+ addq %rdi, %rax
+ addq %r8, %rax
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(last_vec_2x_aligned):
+ movl %esi, %ecx
+
+ /* Check the last VEC. */
+ vpcmpeqb VEC_SIZE(%rdi), %ymm0, %ymm1
+
+ movl $1, %edx
+ sall %cl, %edx
+ subl $1, %edx
+
+ vpmovmskb %ymm1, %eax
+
+ /* Remove the trailing bytes. */
+ andl %edx, %eax
+
+ testl %eax, %eax
+ jnz L(last_vec_x1)
+
+ /* Check the second last VEC. */
+ vpcmpeqb (%rdi), %ymm0, %ymm1
+
+ movl %r8d, %ecx
+
+ vpmovmskb %ymm1, %eax
+
+ /* Remove the leading bytes. Must use unsigned right shift for
+ bsrl below. */
+ shrl %cl, %eax
+ testl %eax, %eax
+ jz L(zero)
+
+ bsrl %eax, %eax
+ addq %rdi, %rax
+ addq %r8, %rax
+ VZEROUPPER
+ ret
+END (__memrchr_avx2)
+#endif
diff --git a/sysdeps/x86_64/memmove.c b/sysdeps/x86_64/multiarch/memrchr-sse2.S
index 07f81852d6..12281663ec 100644
--- a/sysdeps/x86_64/memmove.c
+++ b/sysdeps/x86_64/multiarch/memrchr-sse2.S
@@ -1,4 +1,5 @@
-/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
+/* memrchr optimized with SSE2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -15,12 +16,11 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include "string/memmove.c"
+#if IS_IN (libc)
+# define __memrchr __memrchr_sse2
-#if !defined memmove && IS_IN (libc)
-#include <shlib-compat.h>
-
-#if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14)
-compat_symbol (libc, memmove, memcpy, GLIBC_2_2_5);
-#endif
+# undef weak_alias
+# define weak_alias(__memrchr, memrchr)
#endif
+
+#include "../memrchr.S"
diff --git a/sysdeps/x86_64/multiarch/memrchr.c b/sysdeps/x86_64/multiarch/memrchr.c
new file mode 100644
index 0000000000..d227fe7819
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memrchr.c
@@ -0,0 +1,31 @@
+/* Multiple versions of memrchr
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define memrchr __redirect_memrchr
+# include <string.h>
+# undef memrchr
+
+# define SYMBOL_NAME memrchr
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_memrchr, __memrchr, IFUNC_SELECTOR ());
+weak_alias (__memrchr, memrchr)
+#endif
diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
new file mode 100644
index 0000000000..7ab3d89849
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
@@ -0,0 +1,22 @@
+#if IS_IN (libc)
+# define VEC_SIZE 32
+# define VEC(i) ymm##i
+# define VMOVU vmovdqu
+# define VMOVA vmovdqa
+
+# define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
+ vmovd d, %xmm0; \
+ movq r, %rax; \
+ vpbroadcastb %xmm0, %ymm0
+
+# define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
+ vmovd d, %xmm0; \
+ movq r, %rax; \
+ vpbroadcastd %xmm0, %ymm0
+
+# define SECTION(p) p##.avx
+# define MEMSET_SYMBOL(p,s) p##_avx2_##s
+# define WMEMSET_SYMBOL(p,s) p##_avx2_##s
+
+# include "memset-vec-unaligned-erms.S"
+#endif
diff --git a/sysdeps/x86_64/multiarch/memset-avx2.S b/sysdeps/x86_64/multiarch/memset-avx2.S
deleted file mode 100644
index df634728d4..0000000000
--- a/sysdeps/x86_64/multiarch/memset-avx2.S
+++ /dev/null
@@ -1,168 +0,0 @@
-/* memset with AVX2
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-#if IS_IN (libc)
-
-#include "asm-syntax.h"
-#ifndef MEMSET
-# define MEMSET __memset_avx2
-# define MEMSET_CHK __memset_chk_avx2
-#endif
-
- .section .text.avx2,"ax",@progbits
-#if defined PIC
-ENTRY (MEMSET_CHK)
- cmpq %rdx, %rcx
- jb HIDDEN_JUMPTARGET (__chk_fail)
-END (MEMSET_CHK)
-#endif
-
-ENTRY (MEMSET)
- vpxor %xmm0, %xmm0, %xmm0
- vmovd %esi, %xmm1
- lea (%rdi, %rdx), %rsi
- mov %rdi, %rax
- vpshufb %xmm0, %xmm1, %xmm0
- cmp $16, %rdx
- jb L(less_16bytes)
- cmp $256, %rdx
- jae L(256bytesormore)
- cmp $128, %dl
- jb L(less_128bytes)
- vmovdqu %xmm0, (%rdi)
- vmovdqu %xmm0, 0x10(%rdi)
- vmovdqu %xmm0, 0x20(%rdi)
- vmovdqu %xmm0, 0x30(%rdi)
- vmovdqu %xmm0, 0x40(%rdi)
- vmovdqu %xmm0, 0x50(%rdi)
- vmovdqu %xmm0, 0x60(%rdi)
- vmovdqu %xmm0, 0x70(%rdi)
- vmovdqu %xmm0, -0x80(%rsi)
- vmovdqu %xmm0, -0x70(%rsi)
- vmovdqu %xmm0, -0x60(%rsi)
- vmovdqu %xmm0, -0x50(%rsi)
- vmovdqu %xmm0, -0x40(%rsi)
- vmovdqu %xmm0, -0x30(%rsi)
- vmovdqu %xmm0, -0x20(%rsi)
- vmovdqu %xmm0, -0x10(%rsi)
- ret
-
- .p2align 4
-L(less_128bytes):
- cmp $64, %dl
- jb L(less_64bytes)
- vmovdqu %xmm0, (%rdi)
- vmovdqu %xmm0, 0x10(%rdi)
- vmovdqu %xmm0, 0x20(%rdi)
- vmovdqu %xmm0, 0x30(%rdi)
- vmovdqu %xmm0, -0x40(%rsi)
- vmovdqu %xmm0, -0x30(%rsi)
- vmovdqu %xmm0, -0x20(%rsi)
- vmovdqu %xmm0, -0x10(%rsi)
- ret
-
- .p2align 4
-L(less_64bytes):
- cmp $32, %dl
- jb L(less_32bytes)
- vmovdqu %xmm0, (%rdi)
- vmovdqu %xmm0, 0x10(%rdi)
- vmovdqu %xmm0, -0x20(%rsi)
- vmovdqu %xmm0, -0x10(%rsi)
- ret
-
- .p2align 4
-L(less_32bytes):
- vmovdqu %xmm0, (%rdi)
- vmovdqu %xmm0, -0x10(%rsi)
- ret
-
- .p2align 4
-L(less_16bytes):
- cmp $8, %dl
- jb L(less_8bytes)
- vmovq %xmm0, (%rdi)
- vmovq %xmm0, -0x08(%rsi)
- ret
-
- .p2align 4
-L(less_8bytes):
- vmovd %xmm0, %ecx
- cmp $4, %dl
- jb L(less_4bytes)
- mov %ecx, (%rdi)
- mov %ecx, -0x04(%rsi)
- ret
-
- .p2align 4
-L(less_4bytes):
- cmp $2, %dl
- jb L(less_2bytes)
- mov %cx, (%rdi)
- mov %cx, -0x02(%rsi)
- ret
-
- .p2align 4
-L(less_2bytes):
- cmp $1, %dl
- jb L(less_1bytes)
- mov %cl, (%rdi)
-L(less_1bytes):
- ret
-
- .p2align 4
-L(256bytesormore):
- vinserti128 $1, %xmm0, %ymm0, %ymm0
- and $-0x20, %rdi
- add $0x20, %rdi
- vmovdqu %ymm0, (%rax)
- sub %rdi, %rax
- lea -0x80(%rax, %rdx), %rcx
- cmp $4096, %rcx
- ja L(gobble_data)
-L(gobble_128_loop):
- vmovdqa %ymm0, (%rdi)
- vmovdqa %ymm0, 0x20(%rdi)
- vmovdqa %ymm0, 0x40(%rdi)
- vmovdqa %ymm0, 0x60(%rdi)
- sub $-0x80, %rdi
- add $-0x80, %ecx
- jb L(gobble_128_loop)
- mov %rsi, %rax
- vmovdqu %ymm0, -0x80(%rsi)
- vmovdqu %ymm0, -0x60(%rsi)
- vmovdqu %ymm0, -0x40(%rsi)
- vmovdqu %ymm0, -0x20(%rsi)
- sub %rdx, %rax
- vzeroupper
- ret
-
- .p2align 4
-L(gobble_data):
- sub $-0x80, %rcx
- vmovd %xmm0, %eax
- rep stosb
- mov %rsi, %rax
- sub %rdx, %rax
- vzeroupper
- ret
-
-END (MEMSET)
-#endif
diff --git a/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S
index 1e638d7ac2..689cc1199c 100644
--- a/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S
+++ b/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S
@@ -1,5 +1,5 @@
/* memset optimized with AVX512 for KNL hardware.
- Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ Copyright (C) 2015-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -18,7 +18,7 @@
#include <sysdep.h>
-#if defined HAVE_AVX512_ASM_SUPPORT && IS_IN (libc)
+#if IS_IN (libc)
#include "asm-syntax.h"
#ifndef MEMSET
@@ -26,7 +26,7 @@
# define MEMSET_CHK __memset_chk_avx512_no_vzeroupper
#endif
- .section .text,"ax",@progbits
+ .section .text.avx512,"ax",@progbits
#if defined PIC
ENTRY (MEMSET_CHK)
cmpq %rdx, %rcx
diff --git a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
new file mode 100644
index 0000000000..0783979ca5
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
@@ -0,0 +1,24 @@
+#if IS_IN (libc)
+# define VEC_SIZE 64
+# define VEC(i) zmm##i
+# define VMOVU vmovdqu64
+# define VMOVA vmovdqa64
+
+# define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
+ vmovd d, %xmm0; \
+ movq r, %rax; \
+ vpbroadcastb %xmm0, %xmm0; \
+ vpbroadcastq %xmm0, %zmm0
+
+# define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
+ vmovd d, %xmm0; \
+ movq r, %rax; \
+ vpbroadcastd %xmm0, %xmm0; \
+ vpbroadcastq %xmm0, %zmm0
+
+# define SECTION(p) p##.avx512
+# define MEMSET_SYMBOL(p,s) p##_avx512_##s
+# define WMEMSET_SYMBOL(p,s) p##_avx512_##s
+
+# include "memset-vec-unaligned-erms.S"
+#endif
diff --git a/sysdeps/x86_64/multiarch/wmemcmp.S b/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S
index b510f756e2..be6671759b 100644
--- a/sysdeps/x86_64/multiarch/wmemcmp.S
+++ b/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S
@@ -1,7 +1,6 @@
-/* Multiple versions of wmemcmp
+/* memset with SSE2.
All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2011-2016 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -19,26 +18,24 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
+#include <shlib-compat.h>
#include <init-arch.h>
-/* Define multiple versions only for the definition in libc. */
#if IS_IN (libc)
- .text
-ENTRY(wmemcmp)
- .type wmemcmp, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- HAS_CPU_FEATURE (SSSE3)
- jnz 2f
- leaq __wmemcmp_sse2(%rip), %rax
- ret
-
-2: HAS_CPU_FEATURE (SSE4_1)
- jz 3f
- leaq __wmemcmp_sse4_1(%rip), %rax
- ret
-
-3: leaq __wmemcmp_ssse3(%rip), %rax
- ret
-
-END(wmemcmp)
+# define MEMSET_SYMBOL(p,s) p##_sse2_##s
+# define WMEMSET_SYMBOL(p,s) p##_sse2_##s
+
+# ifdef SHARED
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(name)
+# endif
+
+# undef weak_alias
+# define weak_alias(original, alias) \
+ .weak bzero; bzero = __bzero
+
+# undef strong_alias
+# define strong_alias(ignored1, ignored2)
#endif
+
+#include <sysdeps/x86_64/memset.S>
diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
new file mode 100644
index 0000000000..dc9cb88b37
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
@@ -0,0 +1,274 @@
+/* memset/bzero with unaligned store and rep stosb
+ Copyright (C) 2016-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* memset is implemented as:
+ 1. Use overlapping store to avoid branch.
+ 2. If size is less than VEC, use integer register stores.
+ 3. If size is from VEC_SIZE to 2 * VEC_SIZE, use 2 VEC stores.
+ 4. If size is from 2 * VEC_SIZE to 4 * VEC_SIZE, use 4 VEC stores.
+ 5. If size is more to 4 * VEC_SIZE, align to 4 * VEC_SIZE with
+ 4 VEC stores and store 4 * VEC at a time until done. */
+
+#include <sysdep.h>
+
+#ifndef MEMSET_CHK_SYMBOL
+# define MEMSET_CHK_SYMBOL(p,s) MEMSET_SYMBOL(p, s)
+#endif
+
+#ifndef WMEMSET_CHK_SYMBOL
+# define WMEMSET_CHK_SYMBOL(p,s) WMEMSET_SYMBOL(p, s)
+#endif
+
+#ifndef VZEROUPPER
+# if VEC_SIZE > 16
+# define VZEROUPPER vzeroupper
+# else
+# define VZEROUPPER
+# endif
+#endif
+
+#ifndef VZEROUPPER_SHORT_RETURN
+# if VEC_SIZE > 16
+# define VZEROUPPER_SHORT_RETURN vzeroupper
+# else
+# define VZEROUPPER_SHORT_RETURN rep
+# endif
+#endif
+
+#ifndef MOVQ
+# if VEC_SIZE > 16
+# define MOVQ vmovq
+# else
+# define MOVQ movq
+# endif
+#endif
+
+/* Threshold to use Enhanced REP STOSB. Since there is overhead to set
+ up REP STOSB operation, REP STOSB isn't faster on short data. The
+ memset micro benchmark in glibc shows that 2KB is the approximate
+ value above which REP STOSB becomes faster on processors with
+ Enhanced REP STOSB. Since the stored value is fixed, larger register
+ size has minimal impact on threshold. */
+#ifndef REP_STOSB_THRESHOLD
+# define REP_STOSB_THRESHOLD 2048
+#endif
+
+#ifndef SECTION
+# error SECTION is not defined!
+#endif
+
+ .section SECTION(.text),"ax",@progbits
+#if VEC_SIZE == 16 && IS_IN (libc)
+ENTRY (__bzero)
+ movq %rdi, %rax /* Set return value. */
+ movq %rsi, %rdx /* Set n. */
+ pxor %xmm0, %xmm0
+ jmp L(entry_from_bzero)
+END (__bzero)
+weak_alias (__bzero, bzero)
+#endif
+
+#if IS_IN (libc)
+# if defined SHARED
+ENTRY_CHK (WMEMSET_CHK_SYMBOL (__wmemset_chk, unaligned))
+ cmpq %rdx, %rcx
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END_CHK (WMEMSET_CHK_SYMBOL (__wmemset_chk, unaligned))
+# endif
+
+ENTRY (WMEMSET_SYMBOL (__wmemset, unaligned))
+ shlq $2, %rdx
+ WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi)
+ jmp L(entry_from_bzero)
+END (WMEMSET_SYMBOL (__wmemset, unaligned))
+#endif
+
+#if defined SHARED && IS_IN (libc)
+ENTRY_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned))
+ cmpq %rdx, %rcx
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned))
+#endif
+
+ENTRY (MEMSET_SYMBOL (__memset, unaligned))
+ MEMSET_VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi)
+L(entry_from_bzero):
+ cmpq $VEC_SIZE, %rdx
+ jb L(less_vec)
+ cmpq $(VEC_SIZE * 2), %rdx
+ ja L(more_2x_vec)
+ /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */
+ VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx)
+ VMOVU %VEC(0), (%rdi)
+ VZEROUPPER
+ ret
+#if defined USE_MULTIARCH && IS_IN (libc)
+END (MEMSET_SYMBOL (__memset, unaligned))
+
+# if VEC_SIZE == 16
+ENTRY (__memset_chk_erms)
+ cmpq %rdx, %rcx
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (__memset_chk_erms)
+
+/* Only used to measure performance of REP STOSB. */
+ENTRY (__memset_erms)
+ /* Skip zero length. */
+ testq %rdx, %rdx
+ jnz L(stosb)
+ movq %rdi, %rax
+ ret
+# else
+/* Provide a hidden symbol to debugger. */
+ .hidden MEMSET_SYMBOL (__memset, erms)
+ENTRY (MEMSET_SYMBOL (__memset, erms))
+# endif
+L(stosb):
+ /* Issue vzeroupper before rep stosb. */
+ VZEROUPPER
+ movq %rdx, %rcx
+ movzbl %sil, %eax
+ movq %rdi, %rdx
+ rep stosb
+ movq %rdx, %rax
+ ret
+# if VEC_SIZE == 16
+END (__memset_erms)
+# else
+END (MEMSET_SYMBOL (__memset, erms))
+# endif
+
+# if defined SHARED && IS_IN (libc)
+ENTRY_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned_erms))
+ cmpq %rdx, %rcx
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned_erms))
+# endif
+
+ENTRY (MEMSET_SYMBOL (__memset, unaligned_erms))
+ MEMSET_VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi)
+ cmpq $VEC_SIZE, %rdx
+ jb L(less_vec)
+ cmpq $(VEC_SIZE * 2), %rdx
+ ja L(stosb_more_2x_vec)
+ /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */
+ VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx)
+ VMOVU %VEC(0), (%rdi)
+ VZEROUPPER
+ ret
+
+L(stosb_more_2x_vec):
+ cmpq $REP_STOSB_THRESHOLD, %rdx
+ ja L(stosb)
+#endif
+L(more_2x_vec):
+ cmpq $(VEC_SIZE * 4), %rdx
+ ja L(loop_start)
+ VMOVU %VEC(0), (%rdi)
+ VMOVU %VEC(0), VEC_SIZE(%rdi)
+ VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx)
+ VMOVU %VEC(0), -(VEC_SIZE * 2)(%rdi,%rdx)
+L(return):
+ VZEROUPPER
+ ret
+
+L(loop_start):
+ leaq (VEC_SIZE * 4)(%rdi), %rcx
+ VMOVU %VEC(0), (%rdi)
+ andq $-(VEC_SIZE * 4), %rcx
+ VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx)
+ VMOVU %VEC(0), VEC_SIZE(%rdi)
+ VMOVU %VEC(0), -(VEC_SIZE * 2)(%rdi,%rdx)
+ VMOVU %VEC(0), (VEC_SIZE * 2)(%rdi)
+ VMOVU %VEC(0), -(VEC_SIZE * 3)(%rdi,%rdx)
+ VMOVU %VEC(0), (VEC_SIZE * 3)(%rdi)
+ VMOVU %VEC(0), -(VEC_SIZE * 4)(%rdi,%rdx)
+ addq %rdi, %rdx
+ andq $-(VEC_SIZE * 4), %rdx
+ cmpq %rdx, %rcx
+ je L(return)
+L(loop):
+ VMOVA %VEC(0), (%rcx)
+ VMOVA %VEC(0), VEC_SIZE(%rcx)
+ VMOVA %VEC(0), (VEC_SIZE * 2)(%rcx)
+ VMOVA %VEC(0), (VEC_SIZE * 3)(%rcx)
+ addq $(VEC_SIZE * 4), %rcx
+ cmpq %rcx, %rdx
+ jne L(loop)
+ VZEROUPPER_SHORT_RETURN
+ ret
+L(less_vec):
+ /* Less than 1 VEC. */
+# if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64
+# error Unsupported VEC_SIZE!
+# endif
+# if VEC_SIZE > 32
+ cmpb $32, %dl
+ jae L(between_32_63)
+# endif
+# if VEC_SIZE > 16
+ cmpb $16, %dl
+ jae L(between_16_31)
+# endif
+ MOVQ %xmm0, %rcx
+ cmpb $8, %dl
+ jae L(between_8_15)
+ cmpb $4, %dl
+ jae L(between_4_7)
+ cmpb $1, %dl
+ ja L(between_2_3)
+ jb 1f
+ movb %cl, (%rdi)
+1:
+ VZEROUPPER
+ ret
+# if VEC_SIZE > 32
+ /* From 32 to 63. No branch when size == 32. */
+L(between_32_63):
+ vmovdqu %ymm0, -32(%rdi,%rdx)
+ vmovdqu %ymm0, (%rdi)
+ VZEROUPPER
+ ret
+# endif
+# if VEC_SIZE > 16
+ /* From 16 to 31. No branch when size == 16. */
+L(between_16_31):
+ vmovdqu %xmm0, -16(%rdi,%rdx)
+ vmovdqu %xmm0, (%rdi)
+ VZEROUPPER
+ ret
+# endif
+ /* From 8 to 15. No branch when size == 8. */
+L(between_8_15):
+ movq %rcx, -8(%rdi,%rdx)
+ movq %rcx, (%rdi)
+ VZEROUPPER
+ ret
+L(between_4_7):
+ /* From 4 to 7. No branch when size == 4. */
+ movl %ecx, -4(%rdi,%rdx)
+ movl %ecx, (%rdi)
+ VZEROUPPER
+ ret
+L(between_2_3):
+ /* From 2 to 3. No branch when size == 2. */
+ movw %cx, -2(%rdi,%rdx)
+ movw %cx, (%rdi)
+ VZEROUPPER
+ ret
+END (MEMSET_SYMBOL (__memset, unaligned_erms))
diff --git a/sysdeps/x86_64/multiarch/memset.S b/sysdeps/x86_64/multiarch/memset.S
deleted file mode 100644
index 8e3b9b9764..0000000000
--- a/sysdeps/x86_64/multiarch/memset.S
+++ /dev/null
@@ -1,64 +0,0 @@
-/* Multiple versions of memset
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <shlib-compat.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib. */
-#if IS_IN (libc)
-ENTRY(memset)
- .type memset, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq __memset_sse2(%rip), %rax
- HAS_ARCH_FEATURE (AVX2_Usable)
- jz 2f
- leaq __memset_avx2(%rip), %rax
-#ifdef HAVE_AVX512_ASM_SUPPORT
- HAS_ARCH_FEATURE (AVX512F_Usable)
- jz 2f
- HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
- jz 2f
- leaq __memset_avx512_no_vzeroupper(%rip), %rax
-#endif
-2: ret
-END(memset)
-#endif
-
-#if IS_IN (libc)
-# undef memset
-# define memset __memset_sse2
-
-# undef __memset_chk
-# define __memset_chk __memset_chk_sse2
-
-# ifdef SHARED
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal memset calls through a PLT.
- The speedup we get from using GPR instruction is likely eaten away
- by the indirect call in the PLT. */
-# define libc_hidden_builtin_def(name) \
- .globl __GI_memset; __GI_memset = __memset_sse2
-# endif
-
-# undef strong_alias
-# define strong_alias(original, alias)
-#endif
-
-#include "../memset.S"
diff --git a/sysdeps/x86_64/multiarch/memset.c b/sysdeps/x86_64/multiarch/memset.c
new file mode 100644
index 0000000000..064841d5fc
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memset.c
@@ -0,0 +1,35 @@
+/* Multiple versions of memset.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define memset __redirect_memset
+# include <string.h>
+# undef memset
+
+# define SYMBOL_NAME memset
+# include "ifunc-memset.h"
+
+libc_ifunc_redirected (__redirect_memset, memset, IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (memset, __GI_memset, __redirect_memset)
+ __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/memset_chk-nonshared.S b/sysdeps/x86_64/multiarch/memset_chk-nonshared.S
new file mode 100644
index 0000000000..dcc2384a27
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memset_chk-nonshared.S
@@ -0,0 +1,21 @@
+/* Non-shared version of memcpy_chk for x86-64.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc) && !defined SHARED
+# include <sysdeps/x86_64/memset_chk.S>
+#endif
diff --git a/sysdeps/x86_64/multiarch/memset_chk.S b/sysdeps/x86_64/multiarch/memset_chk.S
deleted file mode 100644
index 9a7b270274..0000000000
--- a/sysdeps/x86_64/multiarch/memset_chk.S
+++ /dev/null
@@ -1,49 +0,0 @@
-/* Multiple versions of memset_chk
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib. */
-#if IS_IN (libc)
-# ifdef SHARED
-ENTRY(__memset_chk)
- .type __memset_chk, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq __memset_chk_sse2(%rip), %rax
- HAS_ARCH_FEATURE (AVX2_Usable)
- jz 2f
- leaq __memset_chk_avx2(%rip), %rax
-#ifdef HAVE_AVX512_ASM_SUPPORT
- HAS_ARCH_FEATURE (AVX512F_Usable)
- jz 2f
- HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
- jz 2f
- leaq __memset_chk_avx512_no_vzeroupper(%rip), %rax
-#endif
-2: ret
-END(__memset_chk)
-
-strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
- .section .gnu.warning.__memset_zero_constant_len_parameter
- .string "memset used with constant zero length parameter; this could be due to transposed parameters"
-# else
-# include "../memset_chk.S"
-# endif
-#endif
diff --git a/sysdeps/x86_64/multiarch/memset_chk.c b/sysdeps/x86_64/multiarch/memset_chk.c
new file mode 100644
index 0000000000..f9c05b364e
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memset_chk.c
@@ -0,0 +1,31 @@
+/* Multiple versions of __memset_chk
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc.so. */
+#if IS_IN (libc) && defined SHARED
+# define __memset_chk __redirect_memset_chk
+# include <string.h>
+# undef __memset_chk
+
+# define SYMBOL_NAME memset_chk
+# include "ifunc-memset.h"
+
+libc_ifunc_redirected (__redirect_memset_chk, __memset_chk,
+ IFUNC_SELECTOR ());
+#endif
diff --git a/sysdeps/x86_64/multiarch/rawmemchr-avx2.S b/sysdeps/x86_64/multiarch/rawmemchr-avx2.S
new file mode 100644
index 0000000000..128f9ea637
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/rawmemchr-avx2.S
@@ -0,0 +1,4 @@
+#define MEMCHR __rawmemchr_avx2
+#define USE_AS_RAWMEMCHR 1
+
+#include "memchr-avx2.S"
diff --git a/sysdeps/x86_64/multiarch/rawmemchr-sse2.S b/sysdeps/x86_64/multiarch/rawmemchr-sse2.S
new file mode 100644
index 0000000000..c681d84037
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/rawmemchr-sse2.S
@@ -0,0 +1,29 @@
+/* rawmemchr optimized with SSE2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define __rawmemchr __rawmemchr_sse2
+
+# undef weak_alias
+# define weak_alias(__rawmemchr, rawmemchr)
+# undef libc_hidden_def
+# define libc_hidden_def(__rawmemchr)
+#endif
+
+#include "../rawmemchr.S"
diff --git a/sysdeps/x86_64/multiarch/rawmemchr.c b/sysdeps/x86_64/multiarch/rawmemchr.c
new file mode 100644
index 0000000000..8a0bc3137e
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/rawmemchr.c
@@ -0,0 +1,38 @@
+/* Multiple versions of rawmemchr
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define rawmemchr __redirect_rawmemchr
+# define __rawmemchr __redirect___rawmemchr
+# include <string.h>
+# undef rawmemchr
+# undef __rawmemchr
+
+# define SYMBOL_NAME rawmemchr
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_rawmemchr, __rawmemchr,
+ IFUNC_SELECTOR ());
+weak_alias (__rawmemchr, rawmemchr)
+# ifdef SHARED
+__hidden_ver1 (__rawmemchr, __GI___rawmemchr, __redirect___rawmemchr)
+ __attribute__((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/sched_cpucount.c b/sysdeps/x86_64/multiarch/sched_cpucount.c
index b75aeb79b2..d10d74ae21 100644
--- a/sysdeps/x86_64/multiarch/sched_cpucount.c
+++ b/sysdeps/x86_64/multiarch/sched_cpucount.c
@@ -1,6 +1,6 @@
/* Count bits in CPU set. x86-64 multi-arch version.
This file is part of the GNU C Library.
- Copyright (C) 2008-2016 Free Software Foundation, Inc.
+ Copyright (C) 2008-2018 Free Software Foundation, Inc.
Contributed by Ulrich Drepper <drepper@redhat.com>.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/multiarch/stpcpy-sse2.S b/sysdeps/x86_64/multiarch/stpcpy-sse2.S
new file mode 100644
index 0000000000..b91a988399
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/stpcpy-sse2.S
@@ -0,0 +1,33 @@
+/* stpcpy optimized with SSE2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+# define __stpcpy __stpcpy_sse2
+
+# undef weak_alias
+# define weak_alias(ignored1, ignored2)
+# undef libc_hidden_def
+# define libc_hidden_def(__stpcpy)
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(stpcpy)
+#endif
+
+#define USE_AS_STPCPY
+#include <sysdeps/x86_64/stpcpy.S>
diff --git a/sysdeps/x86_64/multiarch/stpcpy.S b/sysdeps/x86_64/multiarch/stpcpy.S
deleted file mode 100644
index ee81ab6ae3..0000000000
--- a/sysdeps/x86_64/multiarch/stpcpy.S
+++ /dev/null
@@ -1,9 +0,0 @@
-/* Multiple versions of stpcpy
- All versions must be listed in ifunc-impl-list.c. */
-#define USE_AS_STPCPY
-#define STRCPY __stpcpy
-#include "strcpy.S"
-
-weak_alias (__stpcpy, stpcpy)
-libc_hidden_def (__stpcpy)
-libc_hidden_builtin_def (stpcpy)
diff --git a/sysdeps/x86_64/multiarch/stpcpy.c b/sysdeps/x86_64/multiarch/stpcpy.c
new file mode 100644
index 0000000000..1e340fca99
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/stpcpy.c
@@ -0,0 +1,42 @@
+/* Multiple versions of stpcpy.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define stpcpy __redirect_stpcpy
+# define __stpcpy __redirect___stpcpy
+# define NO_MEMPCPY_STPCPY_REDIRECT
+# define __NO_STRING_INLINES
+# include <string.h>
+# undef stpcpy
+# undef __stpcpy
+
+# define SYMBOL_NAME stpcpy
+# include "ifunc-unaligned-ssse3.h"
+
+libc_ifunc_redirected (__redirect_stpcpy, __stpcpy, IFUNC_SELECTOR ());
+
+weak_alias (__stpcpy, stpcpy)
+# ifdef SHARED
+__hidden_ver1 (__stpcpy, __GI___stpcpy, __redirect___stpcpy)
+ __attribute__ ((visibility ("hidden")));
+__hidden_ver1 (stpcpy, __GI_stpcpy, __redirect_stpcpy)
+ __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/stpncpy-c.c b/sysdeps/x86_64/multiarch/stpncpy-c.c
index 2fde77dcab..b016e487e1 100644
--- a/sysdeps/x86_64/multiarch/stpncpy-c.c
+++ b/sysdeps/x86_64/multiarch/stpncpy-c.c
@@ -1,8 +1,7 @@
#define STPNCPY __stpncpy_sse2
-#ifdef SHARED
+#undef weak_alias
+#define weak_alias(ignored1, ignored2)
#undef libc_hidden_def
-#define libc_hidden_def(name) \
- __hidden_ver1 (__stpncpy_sse2, __GI___stpncpy, __stpncpy_sse2);
-#endif
+#define libc_hidden_def(stpncpy)
-#include "stpncpy.c"
+#include <string/stpncpy.c>
diff --git a/sysdeps/x86_64/multiarch/stpncpy.S b/sysdeps/x86_64/multiarch/stpncpy.S
deleted file mode 100644
index 2698ca6a8c..0000000000
--- a/sysdeps/x86_64/multiarch/stpncpy.S
+++ /dev/null
@@ -1,8 +0,0 @@
-/* Multiple versions of stpncpy
- All versions must be listed in ifunc-impl-list.c. */
-#define STRCPY __stpncpy
-#define USE_AS_STPCPY
-#define USE_AS_STRNCPY
-#include "strcpy.S"
-
-weak_alias (__stpncpy, stpncpy)
diff --git a/sysdeps/x86_64/multiarch/stpncpy.c b/sysdeps/x86_64/multiarch/stpncpy.c
new file mode 100644
index 0000000000..28842ece2b
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/stpncpy.c
@@ -0,0 +1,38 @@
+/* Multiple versions of stpncpy.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define stpncpy __redirect_stpncpy
+# define __stpncpy __redirect___stpncpy
+# include <string.h>
+# undef stpncpy
+# undef __stpncpy
+
+# define SYMBOL_NAME stpncpy
+# include "ifunc-unaligned-ssse3.h"
+
+libc_ifunc_redirected (__redirect_stpncpy, __stpncpy, IFUNC_SELECTOR ());
+
+weak_alias (__stpncpy, stpncpy)
+# ifdef SHARED
+__hidden_ver1 (__stpncpy, __GI___stpncpy, __redirect___stpncpy)
+ __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strcasecmp.c b/sysdeps/x86_64/multiarch/strcasecmp.c
new file mode 100644
index 0000000000..8676a621c6
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcasecmp.c
@@ -0,0 +1,39 @@
+/* Multiple versions of strcasecmp.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define strcasecmp __redirect_strcasecmp
+# define __strcasecmp __redirect___strcasecmp
+# include <string.h>
+# undef strcasecmp
+# undef __strcasecmp
+
+# define SYMBOL_NAME strcasecmp
+# include "ifunc-strcasecmp.h"
+
+libc_ifunc_redirected (__redirect_strcasecmp, __strcasecmp,
+ IFUNC_SELECTOR ());
+
+weak_alias (__strcasecmp, strcasecmp)
+# ifdef SHARED
+__hidden_ver1 (__strcasecmp, __GI___strcasecmp, __redirect___strcasecmp)
+ __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l-avx.S b/sysdeps/x86_64/multiarch/strcasecmp_l-avx.S
new file mode 100644
index 0000000000..56a03547eb
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcasecmp_l-avx.S
@@ -0,0 +1,22 @@
+/* strcasecmp_l optimized with AVX.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define STRCMP_SSE42 __strcasecmp_l_avx
+#define USE_AVX 1
+#define USE_AS_STRCASECMP_L
+#include "strcmp-sse42.S"
diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l-sse2.S b/sysdeps/x86_64/multiarch/strcasecmp_l-sse2.S
new file mode 100644
index 0000000000..2984640405
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcasecmp_l-sse2.S
@@ -0,0 +1,23 @@
+/* strcasecmp_l optimized with SSE2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define STRCMP __strcasecmp_l_sse2
+#define USE_AS_STRCASECMP_L
+#define NO_NOLOCALE_ALIAS
+#define __strcasecmp __strcasecmp_sse2
+#include <sysdeps/x86_64/strcmp.S>
diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l-sse4_2.S b/sysdeps/x86_64/multiarch/strcasecmp_l-sse4_2.S
new file mode 100644
index 0000000000..31e2f9075d
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcasecmp_l-sse4_2.S
@@ -0,0 +1,21 @@
+/* strcasecmp_l optimized with SSE4.2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define STRCMP_SSE42 __strcasecmp_l_sse42
+#define USE_AS_STRCASECMP_L
+#include "strcmp-sse42.S"
diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l.S b/sysdeps/x86_64/multiarch/strcasecmp_l.S
deleted file mode 100644
index 49f5b9fd95..0000000000
--- a/sysdeps/x86_64/multiarch/strcasecmp_l.S
+++ /dev/null
@@ -1,8 +0,0 @@
-/* Multiple versions of strcasecmp and strcasecmp_l
- All versions must be listed in ifunc-impl-list.c. */
-#define STRCMP __strcasecmp_l
-#define USE_AS_STRCASECMP_L
-#include "strcmp.S"
-
-weak_alias (__strcasecmp_l, strcasecmp_l)
-libc_hidden_def (strcasecmp_l)
diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l.c b/sysdeps/x86_64/multiarch/strcasecmp_l.c
new file mode 100644
index 0000000000..dc674510df
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcasecmp_l.c
@@ -0,0 +1,40 @@
+/* Multiple versions of strcasecmp_l.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define strcasecmp_l __redirect_strcasecmp_l
+# define __strcasecmp_l __redirect___strcasecmp_l
+# include <string.h>
+# undef strcasecmp_l
+# undef __strcasecmp_l
+
+# define SYMBOL_NAME strcasecmp_l
+# include "ifunc-strcasecmp.h"
+
+libc_ifunc_redirected (__redirect_strcasecmp_l, __strcasecmp_l,
+ IFUNC_SELECTOR ());
+
+weak_alias (__strcasecmp_l, strcasecmp_l)
+# ifdef SHARED
+__hidden_ver1 (__strcasecmp_l, __GI___strcasecmp_l,
+ __redirect___strcasecmp_l)
+ __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
index 3a694d45c2..852f179bf4 100644
--- a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
@@ -1,5 +1,5 @@
/* strcat with SSE2
- Copyright (C) 2011-2016 Free Software Foundation, Inc.
+ Copyright (C) 2011-2018 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/multiarch/strcat-sse2.S b/sysdeps/x86_64/multiarch/strcat-sse2.S
new file mode 100644
index 0000000000..8eb64e104c
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcat-sse2.S
@@ -0,0 +1,28 @@
+/* strcat optimized with SSE2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+# define strcat __strcat_sse2
+
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(strcat)
+#endif
+
+#include <sysdeps/x86_64/strcat.S>
diff --git a/sysdeps/x86_64/multiarch/strcat-ssse3.S b/sysdeps/x86_64/multiarch/strcat-ssse3.S
index 96184d0f0f..2d4fd78f99 100644
--- a/sysdeps/x86_64/multiarch/strcat-ssse3.S
+++ b/sysdeps/x86_64/multiarch/strcat-ssse3.S
@@ -1,5 +1,5 @@
/* strcat with SSSE3
- Copyright (C) 2011-2016 Free Software Foundation, Inc.
+ Copyright (C) 2011-2018 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/multiarch/strcat.S b/sysdeps/x86_64/multiarch/strcat.S
deleted file mode 100644
index 7bb38e68ad..0000000000
--- a/sysdeps/x86_64/multiarch/strcat.S
+++ /dev/null
@@ -1,85 +0,0 @@
-/* Multiple versions of strcat
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2009-2016 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#ifndef USE_AS_STRNCAT
-# ifndef STRCAT
-# define STRCAT strcat
-# endif
-#endif
-
-#ifdef USE_AS_STRNCAT
-# define STRCAT_SSSE3 __strncat_ssse3
-# define STRCAT_SSE2 __strncat_sse2
-# define STRCAT_SSE2_UNALIGNED __strncat_sse2_unaligned
-# define __GI_STRCAT __GI_strncat
-# define __GI___STRCAT __GI___strncat
-#else
-# define STRCAT_SSSE3 __strcat_ssse3
-# define STRCAT_SSE2 __strcat_sse2
-# define STRCAT_SSE2_UNALIGNED __strcat_sse2_unaligned
-# define __GI_STRCAT __GI_strcat
-# define __GI___STRCAT __GI___strcat
-#endif
-
-
-/* Define multiple versions only for the definition in libc. */
-#if IS_IN (libc)
- .text
-ENTRY(STRCAT)
- .type STRCAT, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq STRCAT_SSE2_UNALIGNED(%rip), %rax
- HAS_ARCH_FEATURE (Fast_Unaligned_Load)
- jnz 2f
- leaq STRCAT_SSE2(%rip), %rax
- HAS_CPU_FEATURE (SSSE3)
- jz 2f
- leaq STRCAT_SSSE3(%rip), %rax
-2: ret
-END(STRCAT)
-
-# undef ENTRY
-# define ENTRY(name) \
- .type STRCAT_SSE2, @function; \
- .align 16; \
- .globl STRCAT_SSE2; \
- .hidden STRCAT_SSE2; \
- STRCAT_SSE2: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size STRCAT_SSE2, .-STRCAT_SSE2
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal strcat calls through a PLT.
- The speedup we get from using SSSE3 instruction is likely eaten away
- by the indirect call in the PLT. */
-# define libc_hidden_builtin_def(name) \
- .globl __GI_STRCAT; __GI_STRCAT = STRCAT_SSE2
-# undef libc_hidden_def
-# define libc_hidden_def(name) \
- .globl __GI___STRCAT; __GI___STRCAT = STRCAT_SSE2
-#endif
-
-#ifndef USE_AS_STRNCAT
-# include "../strcat.S"
-#endif
diff --git a/sysdeps/x86_64/multiarch/strcat.c b/sysdeps/x86_64/multiarch/strcat.c
new file mode 100644
index 0000000000..1f7f6263f3
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcat.c
@@ -0,0 +1,35 @@
+/* Multiple versions of strcat.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define strcat __redirect_strcat
+# include <string.h>
+# undef strcat
+
+# define SYMBOL_NAME strcat
+# include "ifunc-unaligned-ssse3.h"
+
+libc_ifunc_redirected (__redirect_strcat, strcat, IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (strcat, __GI_strcat, __redirect_strcat)
+ __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strchr-avx2.S b/sysdeps/x86_64/multiarch/strchr-avx2.S
new file mode 100644
index 0000000000..47bc3c9949
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strchr-avx2.S
@@ -0,0 +1,254 @@
+/* strchr/strchrnul optimized with AVX2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+
+# ifndef STRCHR
+# define STRCHR __strchr_avx2
+# endif
+
+# ifdef USE_AS_WCSCHR
+# define VPBROADCAST vpbroadcastd
+# define VPCMPEQ vpcmpeqd
+# define CHAR_REG esi
+# else
+# define VPBROADCAST vpbroadcastb
+# define VPCMPEQ vpcmpeqb
+# define CHAR_REG sil
+# endif
+
+# ifndef VZEROUPPER
+# define VZEROUPPER vzeroupper
+# endif
+
+# define VEC_SIZE 32
+
+ .section .text.avx,"ax",@progbits
+ENTRY (STRCHR)
+ movl %edi, %ecx
+ /* Broadcast CHAR to YMM0. */
+ vmovd %esi, %xmm0
+ vpxor %xmm9, %xmm9, %xmm9
+ VPBROADCAST %xmm0, %ymm0
+ /* Check if we may cross page boundary with one vector load. */
+ andl $(2 * VEC_SIZE - 1), %ecx
+ cmpl $VEC_SIZE, %ecx
+ ja L(cros_page_boundary)
+
+ /* Check the first VEC_SIZE bytes. Search for both CHAR and the
+ null byte. */
+ vmovdqu (%rdi), %ymm8
+ VPCMPEQ %ymm8, %ymm0, %ymm1
+ VPCMPEQ %ymm8, %ymm9, %ymm2
+ vpor %ymm1, %ymm2, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x0)
+
+ /* Align data for aligned loads in the loop. */
+ addq $VEC_SIZE, %rdi
+ andl $(VEC_SIZE - 1), %ecx
+ andq $-VEC_SIZE, %rdi
+
+ jmp L(more_4x_vec)
+
+ .p2align 4
+L(cros_page_boundary):
+ andl $(VEC_SIZE - 1), %ecx
+ andq $-VEC_SIZE, %rdi
+ vmovdqu (%rdi), %ymm8
+ VPCMPEQ %ymm8, %ymm0, %ymm1
+ VPCMPEQ %ymm8, %ymm9, %ymm2
+ vpor %ymm1, %ymm2, %ymm1
+ vpmovmskb %ymm1, %eax
+ /* Remove the leading bytes. */
+ sarl %cl, %eax
+ testl %eax, %eax
+ jz L(aligned_more)
+ /* Found CHAR or the null byte. */
+ tzcntl %eax, %eax
+ addq %rcx, %rax
+# ifdef USE_AS_STRCHRNUL
+ addq %rdi, %rax
+# else
+ xorl %edx, %edx
+ leaq (%rdi, %rax), %rax
+ cmp (%rax), %CHAR_REG
+ cmovne %rdx, %rax
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(aligned_more):
+ addq $VEC_SIZE, %rdi
+
+L(more_4x_vec):
+ /* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time
+ since data is only aligned to VEC_SIZE. */
+ vmovdqa (%rdi), %ymm8
+ VPCMPEQ %ymm8, %ymm0, %ymm1
+ VPCMPEQ %ymm8, %ymm9, %ymm2
+ vpor %ymm1, %ymm2, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x0)
+
+ vmovdqa VEC_SIZE(%rdi), %ymm8
+ VPCMPEQ %ymm8, %ymm0, %ymm1
+ VPCMPEQ %ymm8, %ymm9, %ymm2
+ vpor %ymm1, %ymm2, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x1)
+
+ vmovdqa (VEC_SIZE * 2)(%rdi), %ymm8
+ VPCMPEQ %ymm8, %ymm0, %ymm1
+ VPCMPEQ %ymm8, %ymm9, %ymm2
+ vpor %ymm1, %ymm2, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x2)
+
+ vmovdqa (VEC_SIZE * 3)(%rdi), %ymm8
+ VPCMPEQ %ymm8, %ymm0, %ymm1
+ VPCMPEQ %ymm8, %ymm9, %ymm2
+ vpor %ymm1, %ymm2, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x3)
+
+ addq $(VEC_SIZE * 4), %rdi
+
+ /* Align data to 4 * VEC_SIZE. */
+ movq %rdi, %rcx
+ andl $(4 * VEC_SIZE - 1), %ecx
+ andq $-(4 * VEC_SIZE), %rdi
+
+ .p2align 4
+L(loop_4x_vec):
+ /* Compare 4 * VEC at a time forward. */
+ vmovdqa (%rdi), %ymm5
+ vmovdqa VEC_SIZE(%rdi), %ymm6
+ vmovdqa (VEC_SIZE * 2)(%rdi), %ymm7
+ vmovdqa (VEC_SIZE * 3)(%rdi), %ymm8
+
+ VPCMPEQ %ymm5, %ymm0, %ymm1
+ VPCMPEQ %ymm6, %ymm0, %ymm2
+ VPCMPEQ %ymm7, %ymm0, %ymm3
+ VPCMPEQ %ymm8, %ymm0, %ymm4
+
+ VPCMPEQ %ymm5, %ymm9, %ymm5
+ VPCMPEQ %ymm6, %ymm9, %ymm6
+ VPCMPEQ %ymm7, %ymm9, %ymm7
+ VPCMPEQ %ymm8, %ymm9, %ymm8
+
+ vpor %ymm1, %ymm5, %ymm1
+ vpor %ymm2, %ymm6, %ymm2
+ vpor %ymm3, %ymm7, %ymm3
+ vpor %ymm4, %ymm8, %ymm4
+
+ vpor %ymm1, %ymm2, %ymm5
+ vpor %ymm3, %ymm4, %ymm6
+
+ vpor %ymm5, %ymm6, %ymm5
+
+ vpmovmskb %ymm5, %eax
+ testl %eax, %eax
+ jnz L(4x_vec_end)
+
+ addq $(VEC_SIZE * 4), %rdi
+
+ jmp L(loop_4x_vec)
+
+ .p2align 4
+L(first_vec_x0):
+ /* Found CHAR or the null byte. */
+ tzcntl %eax, %eax
+# ifdef USE_AS_STRCHRNUL
+ addq %rdi, %rax
+# else
+ xorl %edx, %edx
+ leaq (%rdi, %rax), %rax
+ cmp (%rax), %CHAR_REG
+ cmovne %rdx, %rax
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(first_vec_x1):
+ tzcntl %eax, %eax
+# ifdef USE_AS_STRCHRNUL
+ addq $VEC_SIZE, %rax
+ addq %rdi, %rax
+# else
+ xorl %edx, %edx
+ leaq VEC_SIZE(%rdi, %rax), %rax
+ cmp (%rax), %CHAR_REG
+ cmovne %rdx, %rax
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(first_vec_x2):
+ tzcntl %eax, %eax
+# ifdef USE_AS_STRCHRNUL
+ addq $(VEC_SIZE * 2), %rax
+ addq %rdi, %rax
+# else
+ xorl %edx, %edx
+ leaq (VEC_SIZE * 2)(%rdi, %rax), %rax
+ cmp (%rax), %CHAR_REG
+ cmovne %rdx, %rax
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(4x_vec_end):
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x0)
+ vpmovmskb %ymm2, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x1)
+ vpmovmskb %ymm3, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x2)
+ vpmovmskb %ymm4, %eax
+ testl %eax, %eax
+L(first_vec_x3):
+ tzcntl %eax, %eax
+# ifdef USE_AS_STRCHRNUL
+ addq $(VEC_SIZE * 3), %rax
+ addq %rdi, %rax
+# else
+ xorl %edx, %edx
+ leaq (VEC_SIZE * 3)(%rdi, %rax), %rax
+ cmp (%rax), %CHAR_REG
+ cmovne %rdx, %rax
+# endif
+ VZEROUPPER
+ ret
+
+END (STRCHR)
+#endif
diff --git a/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S b/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S
index 979d112b28..93fb661da2 100644
--- a/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S
+++ b/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S
@@ -1,5 +1,5 @@
/* strchr with SSE2 without bsf
- Copyright (C) 2011-2016 Free Software Foundation, Inc.
+ Copyright (C) 2011-2018 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/multiarch/strchr-sse2.S b/sysdeps/x86_64/multiarch/strchr-sse2.S
new file mode 100644
index 0000000000..8a6e77195c
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strchr-sse2.S
@@ -0,0 +1,28 @@
+/* strchr optimized with SSE2.
+ Copyright (C) 2009-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# define strchr __strchr_sse2
+
+# undef weak_alias
+# define weak_alias(strchr, index)
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(strchr)
+#endif
+
+#include "../strchr.S"
diff --git a/sysdeps/x86_64/multiarch/strchr.S b/sysdeps/x86_64/multiarch/strchr.S
deleted file mode 100644
index 40683ad32b..0000000000
--- a/sysdeps/x86_64/multiarch/strchr.S
+++ /dev/null
@@ -1,57 +0,0 @@
-/* Multiple versions of strchr
- Copyright (C) 2009-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-
-/* Define multiple versions only for the definition in libc. */
-#if IS_IN (libc)
- .text
-ENTRY(strchr)
- .type strchr, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq __strchr_sse2(%rip), %rax
-2: HAS_ARCH_FEATURE (Slow_BSF)
- jz 3f
- leaq __strchr_sse2_no_bsf(%rip), %rax
-3: ret
-END(strchr)
-
-
-
-# undef ENTRY
-# define ENTRY(name) \
- .type __strchr_sse2, @function; \
- .align 16; \
- .globl __strchr_sse2; \
- .hidden __strchr_sse2; \
- __strchr_sse2: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size __strchr_sse2, .-__strchr_sse2
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal strchr calls through a PLT.
- The speedup we get from using SSE4.2 instruction is likely eaten away
- by the indirect call in the PLT. */
-# define libc_hidden_builtin_def(name) \
- .globl __GI_strchr; __GI_strchr = __strchr_sse2
-#endif
-
-#include "../strchr.S"
diff --git a/sysdeps/x86_64/multiarch/strchr.c b/sysdeps/x86_64/multiarch/strchr.c
new file mode 100644
index 0000000000..76d64fb378
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strchr.c
@@ -0,0 +1,55 @@
+/* Multiple versions of strchr.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2009-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define strchr __redirect_strchr
+# include <string.h>
+# undef strchr
+
+# define SYMBOL_NAME strchr
+# include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_no_bsf) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ return OPTIMIZE (avx2);
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, Slow_BSF))
+ return OPTIMIZE (sse2_no_bsf);
+
+ return OPTIMIZE (sse2);
+}
+
+libc_ifunc_redirected (__redirect_strchr, strchr, IFUNC_SELECTOR ());
+weak_alias (strchr, index)
+# ifdef SHARED
+__hidden_ver1 (strchr, __GI_strchr, __redirect_strchr)
+ __attribute__((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strchrnul-avx2.S b/sysdeps/x86_64/multiarch/strchrnul-avx2.S
new file mode 100644
index 0000000000..fa0cc09760
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strchrnul-avx2.S
@@ -0,0 +1,3 @@
+#define STRCHR __strchrnul_avx2
+#define USE_AS_STRCHRNUL 1
+#include "strchr-avx2.S"
diff --git a/sysdeps/x86_64/multiarch/strchrnul-sse2.S b/sysdeps/x86_64/multiarch/strchrnul-sse2.S
new file mode 100644
index 0000000000..d4a2be118e
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strchrnul-sse2.S
@@ -0,0 +1,26 @@
+/* strchrnul optimized with SSE2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# define __strchrnul __strchrnul_sse2
+
+# undef weak_alias
+# define weak_alias(__strchrnul, strchrnul)
+#endif
+
+#include "../strchrnul.S"
diff --git a/sysdeps/x86_64/multiarch/strchrnul.c b/sysdeps/x86_64/multiarch/strchrnul.c
new file mode 100644
index 0000000000..7514999341
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strchrnul.c
@@ -0,0 +1,34 @@
+/* Multiple versions of strchrnul.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define strchrnul __redirect_strchrnul
+# define __strchrnul __redirect___strchrnul
+# include <string.h>
+# undef __strchrnul
+# undef strchrnul
+
+# define SYMBOL_NAME strchrnul
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_strchrnul, __strchrnul,
+ IFUNC_SELECTOR ());
+weak_alias (__strchrnul, strchrnul)
+#endif
diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S
new file mode 100644
index 0000000000..e8397f3b05
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S
@@ -0,0 +1,847 @@
+/* strcmp/wcscmp/strncmp/wcsncmp optimized with AVX2.
+ Copyright (C) 2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+
+# ifndef STRCMP
+# define STRCMP __strcmp_avx2
+# endif
+
+# define PAGE_SIZE 4096
+
+/* VEC_SIZE = Number of bytes in a ymm register */
+# define VEC_SIZE 32
+
+/* Shift for dividing by (VEC_SIZE * 4). */
+# define DIVIDE_BY_VEC_4_SHIFT 7
+# if (VEC_SIZE * 4) != (1 << DIVIDE_BY_VEC_4_SHIFT)
+# error (VEC_SIZE * 4) != (1 << DIVIDE_BY_VEC_4_SHIFT)
+# endif
+
+# ifdef USE_AS_WCSCMP
+/* Compare packed dwords. */
+# define VPCMPEQ vpcmpeqd
+/* Compare packed dwords and store minimum. */
+# define VPMINU vpminud
+/* 1 dword char == 4 bytes. */
+# define SIZE_OF_CHAR 4
+# else
+/* Compare packed bytes. */
+# define VPCMPEQ vpcmpeqb
+/* Compare packed bytes and store minimum. */
+# define VPMINU vpminub
+/* 1 byte char == 1 byte. */
+# define SIZE_OF_CHAR 1
+# endif
+
+# ifndef VZEROUPPER
+# define VZEROUPPER vzeroupper
+# endif
+
+/* Warning!
+ wcscmp/wcsncmp have to use SIGNED comparison for elements.
+ strcmp/strncmp have to use UNSIGNED comparison for elements.
+*/
+
+/* The main idea of the string comparison (byte or dword) using AVX2
+ consists of comparing (VPCMPEQ) two ymm vectors. The latter can be on
+ either packed bytes or dwords depending on USE_AS_WCSCMP. In order
+ to check the null char, algorithm keeps the matched bytes/dwords,
+ requiring two more AVX2 instructions (VPMINU and VPCMPEQ). In general,
+ the costs of comparing VEC_SIZE bytes (32-bytes) are two VPCMPEQ and
+ one VPMINU instructions, together with movdqu and testl instructions.
+ Main loop (away from from page boundary) compares 4 vectors are a time,
+ effectively comparing 4 x VEC_SIZE bytes (128 bytes) on each loop.
+
+ The routine strncmp/wcsncmp (enabled by defining USE_AS_STRNCMP) logic
+ is the same as strcmp, except that an a maximum offset is tracked. If
+ the maximum offset is reached before a difference is found, zero is
+ returned. */
+
+ .section .text.avx,"ax",@progbits
+ENTRY (STRCMP)
+# ifdef USE_AS_STRNCMP
+ /* Check for simple cases (0 or 1) in offset. */
+ cmp $1, %rdx
+ je L(char0)
+ jb L(zero)
+# ifdef USE_AS_WCSCMP
+ /* Convert units: from wide to byte char. */
+ shl $2, %rdx
+# endif
+ /* Register %r11 tracks the maximum offset. */
+ movq %rdx, %r11
+# endif
+ movl %edi, %eax
+ xorl %edx, %edx
+ /* Make %ymm7 all zeros in this function. */
+ vpxor %ymm7, %ymm7, %ymm7
+ orl %esi, %eax
+ andl $(PAGE_SIZE - 1), %eax
+ cmpl $(PAGE_SIZE - (VEC_SIZE * 4)), %eax
+ jg L(cross_page)
+ /* Start comparing 4 vectors. */
+ vmovdqu (%rdi), %ymm1
+ VPCMPEQ (%rsi), %ymm1, %ymm0
+ VPMINU %ymm1, %ymm0, %ymm0
+ VPCMPEQ %ymm7, %ymm0, %ymm0
+ vpmovmskb %ymm0, %ecx
+ testl %ecx, %ecx
+ je L(next_3_vectors)
+ tzcntl %ecx, %edx
+# ifdef USE_AS_STRNCMP
+ /* Return 0 if the mismatched index (%rdx) is after the maximum
+ offset (%r11). */
+ cmpq %r11, %rdx
+ jae L(zero)
+# endif
+# ifdef USE_AS_WCSCMP
+ xorl %eax, %eax
+ movl (%rdi, %rdx), %ecx
+ cmpl (%rsi, %rdx), %ecx
+ je L(return)
+L(wcscmp_return):
+ setl %al
+ negl %eax
+ orl $1, %eax
+L(return):
+# else
+ movzbl (%rdi, %rdx), %eax
+ movzbl (%rsi, %rdx), %edx
+ subl %edx, %eax
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(return_vec_size):
+ tzcntl %ecx, %edx
+# ifdef USE_AS_STRNCMP
+ /* Return 0 if the mismatched index (%rdx + VEC_SIZE) is after
+ the maximum offset (%r11). */
+ addq $VEC_SIZE, %rdx
+ cmpq %r11, %rdx
+ jae L(zero)
+# ifdef USE_AS_WCSCMP
+ xorl %eax, %eax
+ movl (%rdi, %rdx), %ecx
+ cmpl (%rsi, %rdx), %ecx
+ jne L(wcscmp_return)
+# else
+ movzbl (%rdi, %rdx), %eax
+ movzbl (%rsi, %rdx), %edx
+ subl %edx, %eax
+# endif
+# else
+# ifdef USE_AS_WCSCMP
+ xorl %eax, %eax
+ movl VEC_SIZE(%rdi, %rdx), %ecx
+ cmpl VEC_SIZE(%rsi, %rdx), %ecx
+ jne L(wcscmp_return)
+# else
+ movzbl VEC_SIZE(%rdi, %rdx), %eax
+ movzbl VEC_SIZE(%rsi, %rdx), %edx
+ subl %edx, %eax
+# endif
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(return_2_vec_size):
+ tzcntl %ecx, %edx
+# ifdef USE_AS_STRNCMP
+ /* Return 0 if the mismatched index (%rdx + 2 * VEC_SIZE) is
+ after the maximum offset (%r11). */
+ addq $(VEC_SIZE * 2), %rdx
+ cmpq %r11, %rdx
+ jae L(zero)
+# ifdef USE_AS_WCSCMP
+ xorl %eax, %eax
+ movl (%rdi, %rdx), %ecx
+ cmpl (%rsi, %rdx), %ecx
+ jne L(wcscmp_return)
+# else
+ movzbl (%rdi, %rdx), %eax
+ movzbl (%rsi, %rdx), %edx
+ subl %edx, %eax
+# endif
+# else
+# ifdef USE_AS_WCSCMP
+ xorl %eax, %eax
+ movl (VEC_SIZE * 2)(%rdi, %rdx), %ecx
+ cmpl (VEC_SIZE * 2)(%rsi, %rdx), %ecx
+ jne L(wcscmp_return)
+# else
+ movzbl (VEC_SIZE * 2)(%rdi, %rdx), %eax
+ movzbl (VEC_SIZE * 2)(%rsi, %rdx), %edx
+ subl %edx, %eax
+# endif
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(return_3_vec_size):
+ tzcntl %ecx, %edx
+# ifdef USE_AS_STRNCMP
+ /* Return 0 if the mismatched index (%rdx + 3 * VEC_SIZE) is
+ after the maximum offset (%r11). */
+ addq $(VEC_SIZE * 3), %rdx
+ cmpq %r11, %rdx
+ jae L(zero)
+# ifdef USE_AS_WCSCMP
+ xorl %eax, %eax
+ movl (%rdi, %rdx), %ecx
+ cmpl (%rsi, %rdx), %ecx
+ jne L(wcscmp_return)
+# else
+ movzbl (%rdi, %rdx), %eax
+ movzbl (%rsi, %rdx), %edx
+ subl %edx, %eax
+# endif
+# else
+# ifdef USE_AS_WCSCMP
+ xorl %eax, %eax
+ movl (VEC_SIZE * 3)(%rdi, %rdx), %ecx
+ cmpl (VEC_SIZE * 3)(%rsi, %rdx), %ecx
+ jne L(wcscmp_return)
+# else
+ movzbl (VEC_SIZE * 3)(%rdi, %rdx), %eax
+ movzbl (VEC_SIZE * 3)(%rsi, %rdx), %edx
+ subl %edx, %eax
+# endif
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(next_3_vectors):
+ vmovdqu VEC_SIZE(%rdi), %ymm6
+ VPCMPEQ VEC_SIZE(%rsi), %ymm6, %ymm3
+ VPMINU %ymm6, %ymm3, %ymm3
+ VPCMPEQ %ymm7, %ymm3, %ymm3
+ vpmovmskb %ymm3, %ecx
+ testl %ecx, %ecx
+ jne L(return_vec_size)
+ vmovdqu (VEC_SIZE * 2)(%rdi), %ymm5
+ vmovdqu (VEC_SIZE * 3)(%rdi), %ymm4
+ vmovdqu (VEC_SIZE * 3)(%rsi), %ymm0
+ VPCMPEQ (VEC_SIZE * 2)(%rsi), %ymm5, %ymm2
+ VPMINU %ymm5, %ymm2, %ymm2
+ VPCMPEQ %ymm4, %ymm0, %ymm0
+ VPCMPEQ %ymm7, %ymm2, %ymm2
+ vpmovmskb %ymm2, %ecx
+ testl %ecx, %ecx
+ jne L(return_2_vec_size)
+ VPMINU %ymm4, %ymm0, %ymm0
+ VPCMPEQ %ymm7, %ymm0, %ymm0
+ vpmovmskb %ymm0, %ecx
+ testl %ecx, %ecx
+ jne L(return_3_vec_size)
+L(main_loop_header):
+ leaq (VEC_SIZE * 4)(%rdi), %rdx
+ movl $PAGE_SIZE, %ecx
+ /* Align load via RAX. */
+ andq $-(VEC_SIZE * 4), %rdx
+ subq %rdi, %rdx
+ leaq (%rdi, %rdx), %rax
+# ifdef USE_AS_STRNCMP
+ /* Starting from this point, the maximum offset, or simply the
+ 'offset', DECREASES by the same amount when base pointers are
+ moved forward. Return 0 when:
+ 1) On match: offset <= the matched vector index.
+ 2) On mistmach, offset is before the mistmatched index.
+ */
+ subq %rdx, %r11
+ jbe L(zero)
+# endif
+ addq %rsi, %rdx
+ movq %rdx, %rsi
+ andl $(PAGE_SIZE - 1), %esi
+ /* Number of bytes before page crossing. */
+ subq %rsi, %rcx
+ /* Number of VEC_SIZE * 4 blocks before page crossing. */
+ shrq $DIVIDE_BY_VEC_4_SHIFT, %rcx
+ /* ESI: Number of VEC_SIZE * 4 blocks before page crossing. */
+ movl %ecx, %esi
+ jmp L(loop_start)
+
+ .p2align 4
+L(loop):
+# ifdef USE_AS_STRNCMP
+ /* Base pointers are moved forward by 4 * VEC_SIZE. Decrease
+ the maximum offset (%r11) by the same amount. */
+ subq $(VEC_SIZE * 4), %r11
+ jbe L(zero)
+# endif
+ addq $(VEC_SIZE * 4), %rax
+ addq $(VEC_SIZE * 4), %rdx
+L(loop_start):
+ testl %esi, %esi
+ leal -1(%esi), %esi
+ je L(loop_cross_page)
+L(back_to_loop):
+ /* Main loop, comparing 4 vectors are a time. */
+ vmovdqa (%rax), %ymm0
+ vmovdqa VEC_SIZE(%rax), %ymm3
+ VPCMPEQ (%rdx), %ymm0, %ymm4
+ VPCMPEQ VEC_SIZE(%rdx), %ymm3, %ymm1
+ VPMINU %ymm0, %ymm4, %ymm4
+ VPMINU %ymm3, %ymm1, %ymm1
+ vmovdqa (VEC_SIZE * 2)(%rax), %ymm2
+ VPMINU %ymm1, %ymm4, %ymm0
+ vmovdqa (VEC_SIZE * 3)(%rax), %ymm3
+ VPCMPEQ (VEC_SIZE * 2)(%rdx), %ymm2, %ymm5
+ VPCMPEQ (VEC_SIZE * 3)(%rdx), %ymm3, %ymm6
+ VPMINU %ymm2, %ymm5, %ymm5
+ VPMINU %ymm3, %ymm6, %ymm6
+ VPMINU %ymm5, %ymm0, %ymm0
+ VPMINU %ymm6, %ymm0, %ymm0
+ VPCMPEQ %ymm7, %ymm0, %ymm0
+
+ /* Test each mask (32 bits) individually because for VEC_SIZE
+ == 32 is not possible to OR the four masks and keep all bits
+ in a 64-bit integer register, differing from SSE2 strcmp
+ where ORing is possible. */
+ vpmovmskb %ymm0, %ecx
+ testl %ecx, %ecx
+ je L(loop)
+ VPCMPEQ %ymm7, %ymm4, %ymm0
+ vpmovmskb %ymm0, %edi
+ testl %edi, %edi
+ je L(test_vec)
+ tzcntl %edi, %ecx
+# ifdef USE_AS_STRNCMP
+ cmpq %rcx, %r11
+ jbe L(zero)
+# ifdef USE_AS_WCSCMP
+ movq %rax, %rsi
+ xorl %eax, %eax
+ movl (%rsi, %rcx), %edi
+ cmpl (%rdx, %rcx), %edi
+ jne L(wcscmp_return)
+# else
+ movzbl (%rax, %rcx), %eax
+ movzbl (%rdx, %rcx), %edx
+ subl %edx, %eax
+# endif
+# else
+# ifdef USE_AS_WCSCMP
+ movq %rax, %rsi
+ xorl %eax, %eax
+ movl (%rsi, %rcx), %edi
+ cmpl (%rdx, %rcx), %edi
+ jne L(wcscmp_return)
+# else
+ movzbl (%rax, %rcx), %eax
+ movzbl (%rdx, %rcx), %edx
+ subl %edx, %eax
+# endif
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(test_vec):
+# ifdef USE_AS_STRNCMP
+ /* The first vector matched. Return 0 if the maximum offset
+ (%r11) <= VEC_SIZE. */
+ cmpq $VEC_SIZE, %r11
+ jbe L(zero)
+# endif
+ VPCMPEQ %ymm7, %ymm1, %ymm1
+ vpmovmskb %ymm1, %ecx
+ testl %ecx, %ecx
+ je L(test_2_vec)
+ tzcntl %ecx, %edi
+# ifdef USE_AS_STRNCMP
+ addq $VEC_SIZE, %rdi
+ cmpq %rdi, %r11
+ jbe L(zero)
+# ifdef USE_AS_WCSCMP
+ movq %rax, %rsi
+ xorl %eax, %eax
+ movl (%rsi, %rdi), %ecx
+ cmpl (%rdx, %rdi), %ecx
+ jne L(wcscmp_return)
+# else
+ movzbl (%rax, %rdi), %eax
+ movzbl (%rdx, %rdi), %edx
+ subl %edx, %eax
+# endif
+# else
+# ifdef USE_AS_WCSCMP
+ movq %rax, %rsi
+ xorl %eax, %eax
+ movl VEC_SIZE(%rsi, %rdi), %ecx
+ cmpl VEC_SIZE(%rdx, %rdi), %ecx
+ jne L(wcscmp_return)
+# else
+ movzbl VEC_SIZE(%rax, %rdi), %eax
+ movzbl VEC_SIZE(%rdx, %rdi), %edx
+ subl %edx, %eax
+# endif
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(test_2_vec):
+# ifdef USE_AS_STRNCMP
+ /* The first 2 vectors matched. Return 0 if the maximum offset
+ (%r11) <= 2 * VEC_SIZE. */
+ cmpq $(VEC_SIZE * 2), %r11
+ jbe L(zero)
+# endif
+ VPCMPEQ %ymm7, %ymm5, %ymm5
+ vpmovmskb %ymm5, %ecx
+ testl %ecx, %ecx
+ je L(test_3_vec)
+ tzcntl %ecx, %edi
+# ifdef USE_AS_STRNCMP
+ addq $(VEC_SIZE * 2), %rdi
+ cmpq %rdi, %r11
+ jbe L(zero)
+# ifdef USE_AS_WCSCMP
+ movq %rax, %rsi
+ xorl %eax, %eax
+ movl (%rsi, %rdi), %ecx
+ cmpl (%rdx, %rdi), %ecx
+ jne L(wcscmp_return)
+# else
+ movzbl (%rax, %rdi), %eax
+ movzbl (%rdx, %rdi), %edx
+ subl %edx, %eax
+# endif
+# else
+# ifdef USE_AS_WCSCMP
+ movq %rax, %rsi
+ xorl %eax, %eax
+ movl (VEC_SIZE * 2)(%rsi, %rdi), %ecx
+ cmpl (VEC_SIZE * 2)(%rdx, %rdi), %ecx
+ jne L(wcscmp_return)
+# else
+ movzbl (VEC_SIZE * 2)(%rax, %rdi), %eax
+ movzbl (VEC_SIZE * 2)(%rdx, %rdi), %edx
+ subl %edx, %eax
+# endif
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(test_3_vec):
+# ifdef USE_AS_STRNCMP
+ /* The first 3 vectors matched. Return 0 if the maximum offset
+ (%r11) <= 3 * VEC_SIZE. */
+ cmpq $(VEC_SIZE * 3), %r11
+ jbe L(zero)
+# endif
+ VPCMPEQ %ymm7, %ymm6, %ymm6
+ vpmovmskb %ymm6, %esi
+ tzcntl %esi, %ecx
+# ifdef USE_AS_STRNCMP
+ addq $(VEC_SIZE * 3), %rcx
+ cmpq %rcx, %r11
+ jbe L(zero)
+# ifdef USE_AS_WCSCMP
+ movq %rax, %rsi
+ xorl %eax, %eax
+ movl (%rsi, %rcx), %esi
+ cmpl (%rdx, %rcx), %esi
+ jne L(wcscmp_return)
+# else
+ movzbl (%rax, %rcx), %eax
+ movzbl (%rdx, %rcx), %edx
+ subl %edx, %eax
+# endif
+# else
+# ifdef USE_AS_WCSCMP
+ movq %rax, %rsi
+ xorl %eax, %eax
+ movl (VEC_SIZE * 3)(%rsi, %rcx), %esi
+ cmpl (VEC_SIZE * 3)(%rdx, %rcx), %esi
+ jne L(wcscmp_return)
+# else
+ movzbl (VEC_SIZE * 3)(%rax, %rcx), %eax
+ movzbl (VEC_SIZE * 3)(%rdx, %rcx), %edx
+ subl %edx, %eax
+# endif
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(loop_cross_page):
+ xorl %r10d, %r10d
+ movq %rdx, %rcx
+ /* Align load via RDX. We load the extra ECX bytes which should
+ be ignored. */
+ andl $((VEC_SIZE * 4) - 1), %ecx
+ /* R10 is -RCX. */
+ subq %rcx, %r10
+
+ /* This works only if VEC_SIZE * 2 == 64. */
+# if (VEC_SIZE * 2) != 64
+# error (VEC_SIZE * 2) != 64
+# endif
+
+ /* Check if the first VEC_SIZE * 2 bytes should be ignored. */
+ cmpl $(VEC_SIZE * 2), %ecx
+ jge L(loop_cross_page_2_vec)
+
+ vmovdqu (%rax, %r10), %ymm2
+ vmovdqu VEC_SIZE(%rax, %r10), %ymm3
+ VPCMPEQ (%rdx, %r10), %ymm2, %ymm0
+ VPCMPEQ VEC_SIZE(%rdx, %r10), %ymm3, %ymm1
+ VPMINU %ymm2, %ymm0, %ymm0
+ VPMINU %ymm3, %ymm1, %ymm1
+ VPCMPEQ %ymm7, %ymm0, %ymm0
+ VPCMPEQ %ymm7, %ymm1, %ymm1
+
+ vpmovmskb %ymm0, %edi
+ vpmovmskb %ymm1, %esi
+
+ salq $32, %rsi
+ xorq %rsi, %rdi
+
+ /* Since ECX < VEC_SIZE * 2, simply skip the first ECX bytes. */
+ shrq %cl, %rdi
+
+ testq %rdi, %rdi
+ je L(loop_cross_page_2_vec)
+ tzcntq %rdi, %rcx
+# ifdef USE_AS_STRNCMP
+ cmpq %rcx, %r11
+ jbe L(zero)
+# ifdef USE_AS_WCSCMP
+ movq %rax, %rsi
+ xorl %eax, %eax
+ movl (%rsi, %rcx), %edi
+ cmpl (%rdx, %rcx), %edi
+ jne L(wcscmp_return)
+# else
+ movzbl (%rax, %rcx), %eax
+ movzbl (%rdx, %rcx), %edx
+ subl %edx, %eax
+# endif
+# else
+# ifdef USE_AS_WCSCMP
+ movq %rax, %rsi
+ xorl %eax, %eax
+ movl (%rsi, %rcx), %edi
+ cmpl (%rdx, %rcx), %edi
+ jne L(wcscmp_return)
+# else
+ movzbl (%rax, %rcx), %eax
+ movzbl (%rdx, %rcx), %edx
+ subl %edx, %eax
+# endif
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(loop_cross_page_2_vec):
+ /* The first VEC_SIZE * 2 bytes match or are ignored. */
+ vmovdqu (VEC_SIZE * 2)(%rax, %r10), %ymm2
+ vmovdqu (VEC_SIZE * 3)(%rax, %r10), %ymm3
+ VPCMPEQ (VEC_SIZE * 2)(%rdx, %r10), %ymm2, %ymm5
+ VPMINU %ymm2, %ymm5, %ymm5
+ VPCMPEQ (VEC_SIZE * 3)(%rdx, %r10), %ymm3, %ymm6
+ VPCMPEQ %ymm7, %ymm5, %ymm5
+ VPMINU %ymm3, %ymm6, %ymm6
+ VPCMPEQ %ymm7, %ymm6, %ymm6
+
+ vpmovmskb %ymm5, %edi
+ vpmovmskb %ymm6, %esi
+
+ salq $32, %rsi
+ xorq %rsi, %rdi
+
+ xorl %r8d, %r8d
+ /* If ECX > VEC_SIZE * 2, skip ECX - (VEC_SIZE * 2) bytes. */
+ subl $(VEC_SIZE * 2), %ecx
+ jle 1f
+ /* Skip ECX bytes. */
+ shrq %cl, %rdi
+ /* R8 has number of bytes skipped. */
+ movl %ecx, %r8d
+1:
+ /* Before jumping back to the loop, set ESI to the number of
+ VEC_SIZE * 4 blocks before page crossing. */
+ movl $(PAGE_SIZE / (VEC_SIZE * 4) - 1), %esi
+
+ testq %rdi, %rdi
+ je L(back_to_loop)
+ tzcntq %rdi, %rcx
+ addq %r10, %rcx
+ /* Adjust for number of bytes skipped. */
+ addq %r8, %rcx
+# ifdef USE_AS_STRNCMP
+ addq $(VEC_SIZE * 2), %rcx
+ subq %rcx, %r11
+ jbe L(zero)
+# ifdef USE_AS_WCSCMP
+ movq %rax, %rsi
+ xorl %eax, %eax
+ movl (%rsi, %rcx), %edi
+ cmpl (%rdx, %rcx), %edi
+ jne L(wcscmp_return)
+# else
+ movzbl (%rax, %rcx), %eax
+ movzbl (%rdx, %rcx), %edx
+ subl %edx, %eax
+# endif
+# else
+# ifdef USE_AS_WCSCMP
+ movq %rax, %rsi
+ xorl %eax, %eax
+ movl (VEC_SIZE * 2)(%rsi, %rcx), %edi
+ cmpl (VEC_SIZE * 2)(%rdx, %rcx), %edi
+ jne L(wcscmp_return)
+# else
+ movzbl (VEC_SIZE * 2)(%rax, %rcx), %eax
+ movzbl (VEC_SIZE * 2)(%rdx, %rcx), %edx
+ subl %edx, %eax
+# endif
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(cross_page_loop):
+ /* Check one byte/dword at a time. */
+# ifdef USE_AS_WCSCMP
+ cmpl %ecx, %eax
+# else
+ subl %ecx, %eax
+# endif
+ jne L(different)
+ addl $SIZE_OF_CHAR, %edx
+ cmpl $(VEC_SIZE * 4), %edx
+ je L(main_loop_header)
+# ifdef USE_AS_STRNCMP
+ cmpq %r11, %rdx
+ jae L(zero)
+# endif
+# ifdef USE_AS_WCSCMP
+ movl (%rdi, %rdx), %eax
+ movl (%rsi, %rdx), %ecx
+# else
+ movzbl (%rdi, %rdx), %eax
+ movzbl (%rsi, %rdx), %ecx
+# endif
+ /* Check null char. */
+ testl %eax, %eax
+ jne L(cross_page_loop)
+ /* Since %eax == 0, subtract is OK for both SIGNED and UNSIGNED
+ comparisons. */
+ subl %ecx, %eax
+# ifndef USE_AS_WCSCMP
+L(different):
+# endif
+ VZEROUPPER
+ ret
+
+# ifdef USE_AS_WCSCMP
+ .p2align 4
+L(different):
+ /* Use movl to avoid modifying EFLAGS. */
+ movl $0, %eax
+ setl %al
+ negl %eax
+ orl $1, %eax
+ VZEROUPPER
+ ret
+# endif
+
+# ifdef USE_AS_STRNCMP
+ .p2align 4
+L(zero):
+ xorl %eax, %eax
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(char0):
+# ifdef USE_AS_WCSCMP
+ xorl %eax, %eax
+ movl (%rdi), %ecx
+ cmpl (%rsi), %ecx
+ jne L(wcscmp_return)
+# else
+ movzbl (%rsi), %ecx
+ movzbl (%rdi), %eax
+ subl %ecx, %eax
+# endif
+ VZEROUPPER
+ ret
+# endif
+
+ .p2align 4
+L(last_vector):
+ addq %rdx, %rdi
+ addq %rdx, %rsi
+# ifdef USE_AS_STRNCMP
+ subq %rdx, %r11
+# endif
+ tzcntl %ecx, %edx
+# ifdef USE_AS_STRNCMP
+ cmpq %r11, %rdx
+ jae L(zero)
+# endif
+# ifdef USE_AS_WCSCMP
+ xorl %eax, %eax
+ movl (%rdi, %rdx), %ecx
+ cmpl (%rsi, %rdx), %ecx
+ jne L(wcscmp_return)
+# else
+ movzbl (%rdi, %rdx), %eax
+ movzbl (%rsi, %rdx), %edx
+ subl %edx, %eax
+# endif
+ VZEROUPPER
+ ret
+
+ /* Comparing on page boundary region requires special treatment:
+ It must done one vector at the time, starting with the wider
+ ymm vector if possible, if not, with xmm. If fetching 16 bytes
+ (xmm) still passes the boundary, byte comparison must be done.
+ */
+ .p2align 4
+L(cross_page):
+ /* Try one ymm vector at a time. */
+ cmpl $(PAGE_SIZE - VEC_SIZE), %eax
+ jg L(cross_page_1_vector)
+L(loop_1_vector):
+ vmovdqu (%rdi, %rdx), %ymm1
+ VPCMPEQ (%rsi, %rdx), %ymm1, %ymm0
+ VPMINU %ymm1, %ymm0, %ymm0
+ VPCMPEQ %ymm7, %ymm0, %ymm0
+ vpmovmskb %ymm0, %ecx
+ testl %ecx, %ecx
+ jne L(last_vector)
+
+ addl $VEC_SIZE, %edx
+
+ addl $VEC_SIZE, %eax
+# ifdef USE_AS_STRNCMP
+ /* Return 0 if the current offset (%rdx) >= the maximum offset
+ (%r11). */
+ cmpq %r11, %rdx
+ jae L(zero)
+# endif
+ cmpl $(PAGE_SIZE - VEC_SIZE), %eax
+ jle L(loop_1_vector)
+L(cross_page_1_vector):
+ /* Less than 32 bytes to check, try one xmm vector. */
+ cmpl $(PAGE_SIZE - 16), %eax
+ jg L(cross_page_1_xmm)
+ vmovdqu (%rdi, %rdx), %xmm1
+ VPCMPEQ (%rsi, %rdx), %xmm1, %xmm0
+ VPMINU %xmm1, %xmm0, %xmm0
+ VPCMPEQ %xmm7, %xmm0, %xmm0
+ vpmovmskb %xmm0, %ecx
+ testl %ecx, %ecx
+ jne L(last_vector)
+
+ addl $16, %edx
+# ifndef USE_AS_WCSCMP
+ addl $16, %eax
+# endif
+# ifdef USE_AS_STRNCMP
+ /* Return 0 if the current offset (%rdx) >= the maximum offset
+ (%r11). */
+ cmpq %r11, %rdx
+ jae L(zero)
+# endif
+
+L(cross_page_1_xmm):
+# ifndef USE_AS_WCSCMP
+ /* Less than 16 bytes to check, try 8 byte vector. NB: No need
+ for wcscmp nor wcsncmp since wide char is 4 bytes. */
+ cmpl $(PAGE_SIZE - 8), %eax
+ jg L(cross_page_8bytes)
+ vmovq (%rdi, %rdx), %xmm1
+ vmovq (%rsi, %rdx), %xmm0
+ VPCMPEQ %xmm0, %xmm1, %xmm0
+ VPMINU %xmm1, %xmm0, %xmm0
+ VPCMPEQ %xmm7, %xmm0, %xmm0
+ vpmovmskb %xmm0, %ecx
+ /* Only last 8 bits are valid. */
+ andl $0xff, %ecx
+ testl %ecx, %ecx
+ jne L(last_vector)
+
+ addl $8, %edx
+ addl $8, %eax
+# ifdef USE_AS_STRNCMP
+ /* Return 0 if the current offset (%rdx) >= the maximum offset
+ (%r11). */
+ cmpq %r11, %rdx
+ jae L(zero)
+# endif
+
+L(cross_page_8bytes):
+ /* Less than 8 bytes to check, try 4 byte vector. */
+ cmpl $(PAGE_SIZE - 4), %eax
+ jg L(cross_page_4bytes)
+ vmovd (%rdi, %rdx), %xmm1
+ vmovd (%rsi, %rdx), %xmm0
+ VPCMPEQ %xmm0, %xmm1, %xmm0
+ VPMINU %xmm1, %xmm0, %xmm0
+ VPCMPEQ %xmm7, %xmm0, %xmm0
+ vpmovmskb %xmm0, %ecx
+ /* Only last 4 bits are valid. */
+ andl $0xf, %ecx
+ testl %ecx, %ecx
+ jne L(last_vector)
+
+ addl $4, %edx
+# ifdef USE_AS_STRNCMP
+ /* Return 0 if the current offset (%rdx) >= the maximum offset
+ (%r11). */
+ cmpq %r11, %rdx
+ jae L(zero)
+# endif
+
+L(cross_page_4bytes):
+# endif
+ /* Less than 4 bytes to check, try one byte/dword at a time. */
+# ifdef USE_AS_STRNCMP
+ cmpq %r11, %rdx
+ jae L(zero)
+# endif
+# ifdef USE_AS_WCSCMP
+ movl (%rdi, %rdx), %eax
+ movl (%rsi, %rdx), %ecx
+# else
+ movzbl (%rdi, %rdx), %eax
+ movzbl (%rsi, %rdx), %ecx
+# endif
+ testl %eax, %eax
+ jne L(cross_page_loop)
+ subl %ecx, %eax
+ VZEROUPPER
+ ret
+END (STRCMP)
+#endif
diff --git a/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S
index bf555b4066..a9b6267d15 100644
--- a/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S
@@ -1,5 +1,5 @@
/* strcmp with unaligned loads
- Copyright (C) 2013-2016 Free Software Foundation, Inc.
+ Copyright (C) 2013-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/multiarch/strcmp-sse2.S b/sysdeps/x86_64/multiarch/strcmp-sse2.S
new file mode 100644
index 0000000000..d173ded8c0
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcmp-sse2.S
@@ -0,0 +1,28 @@
+/* strcmp optimized with SSE2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# include <sysdep.h>
+
+# define STRCMP __strcmp_sse2
+
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(strcmp)
+#endif
+
+#include <sysdeps/x86_64/strcmp.S>
diff --git a/sysdeps/x86_64/multiarch/strcmp-sse42.S b/sysdeps/x86_64/multiarch/strcmp-sse42.S
index 70df84ae32..d3c07bd292 100644
--- a/sysdeps/x86_64/multiarch/strcmp-sse42.S
+++ b/sysdeps/x86_64/multiarch/strcmp-sse42.S
@@ -1,5 +1,5 @@
/* strcmp with SSE4.2
- Copyright (C) 2009-2016 Free Software Foundation, Inc.
+ Copyright (C) 2009-2018 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -17,6 +17,40 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
+#include <sysdep.h>
+
+#ifndef STRCMP_SSE42
+# define STRCMP_SSE42 __strcmp_sse42
+#endif
+
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+# include "locale-defines.h"
+#endif
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+/* Since the counter, %r11, is unsigned, we branch to strcmp_exitz
+ if the new counter > the old one or is 0. */
+# define UPDATE_STRNCMP_COUNTER \
+ /* calculate left number to compare */ \
+ lea -16(%rcx, %r11), %r9; \
+ cmp %r9, %r11; \
+ jb LABEL(strcmp_exitz); \
+ test %r9, %r9; \
+ je LABEL(strcmp_exitz); \
+ mov %r9, %r11
+#else
+# define UPDATE_STRNCMP_COUNTER
+#endif
+
+#ifdef USE_AVX
+# define SECTION avx
+# define GLABEL(l) l##_avx
+#else
+# define SECTION sse4.2
+# define GLABEL(l) l##_sse42
+#endif
+
+#define LABEL(l) .L##l
/* We use 0x1a:
_SIDD_SBYTE_OPS
@@ -92,6 +126,7 @@ END (GLABEL(__strncasecmp))
STRCMP_SSE42:
cfi_startproc
+ _CET_ENDBR
CALL_MCOUNT
/*
@@ -240,7 +275,7 @@ LABEL(bigger):
movslq (%r10, %r9,4), %r9
pcmpeqb %xmm1, D(%xmm0) /* Any null chars? */
lea (%r10, %r9), %r10
- jmp *%r10 /* jump to corresponding case */
+ _CET_NOTRACK jmp *%r10 /* jump to corresponding case */
/*
* The following cases will be handled by ashr_0
diff --git a/sysdeps/x86_64/multiarch/strcmp-sse4_2.S b/sysdeps/x86_64/multiarch/strcmp-sse4_2.S
new file mode 100644
index 0000000000..776e5e060f
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcmp-sse4_2.S
@@ -0,0 +1,21 @@
+/* strcmp optimized with SSE4.2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# include "strcmp-sse42.S"
+#endif
diff --git a/sysdeps/x86_64/multiarch/strcmp.S b/sysdeps/x86_64/multiarch/strcmp.S
deleted file mode 100644
index 0e4a113f61..0000000000
--- a/sysdeps/x86_64/multiarch/strcmp.S
+++ /dev/null
@@ -1,209 +0,0 @@
-/* Multiple versions of strcmp
- Copyright (C) 2009-2016 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#ifdef USE_AS_STRNCMP
-/* Since the counter, %r11, is unsigned, we branch to strcmp_exitz
- if the new counter > the old one or is 0. */
-# define UPDATE_STRNCMP_COUNTER \
- /* calculate left number to compare */ \
- lea -16(%rcx, %r11), %r9; \
- cmp %r9, %r11; \
- jb LABEL(strcmp_exitz); \
- test %r9, %r9; \
- je LABEL(strcmp_exitz); \
- mov %r9, %r11
-
-# define STRCMP_SSE42 __strncmp_sse42
-# define STRCMP_SSSE3 __strncmp_ssse3
-# define STRCMP_SSE2 __strncmp_sse2
-# define __GI_STRCMP __GI_strncmp
-#elif defined USE_AS_STRCASECMP_L
-# include "locale-defines.h"
-
-# define UPDATE_STRNCMP_COUNTER
-
-# define STRCMP_AVX __strcasecmp_l_avx
-# define STRCMP_SSE42 __strcasecmp_l_sse42
-# define STRCMP_SSSE3 __strcasecmp_l_ssse3
-# define STRCMP_SSE2 __strcasecmp_l_sse2
-# define __GI_STRCMP __GI___strcasecmp_l
-#elif defined USE_AS_STRNCASECMP_L
-# include "locale-defines.h"
-
-/* Since the counter, %r11, is unsigned, we branch to strcmp_exitz
- if the new counter > the old one or is 0. */
-# define UPDATE_STRNCMP_COUNTER \
- /* calculate left number to compare */ \
- lea -16(%rcx, %r11), %r9; \
- cmp %r9, %r11; \
- jb LABEL(strcmp_exitz); \
- test %r9, %r9; \
- je LABEL(strcmp_exitz); \
- mov %r9, %r11
-
-# define STRCMP_AVX __strncasecmp_l_avx
-# define STRCMP_SSE42 __strncasecmp_l_sse42
-# define STRCMP_SSSE3 __strncasecmp_l_ssse3
-# define STRCMP_SSE2 __strncasecmp_l_sse2
-# define __GI_STRCMP __GI___strncasecmp_l
-#else
-# define USE_AS_STRCMP
-# define UPDATE_STRNCMP_COUNTER
-# ifndef STRCMP
-# define STRCMP strcmp
-# define STRCMP_SSE42 __strcmp_sse42
-# define STRCMP_SSSE3 __strcmp_ssse3
-# define STRCMP_SSE2 __strcmp_sse2
-# define __GI_STRCMP __GI_strcmp
-# endif
-#endif
-
-/* Define multiple versions only for the definition in libc. Don't
- define multiple versions for strncmp in static library since we
- need strncmp before the initialization happened. */
-#if (defined SHARED || !defined USE_AS_STRNCMP) && IS_IN (libc)
- .text
-ENTRY(STRCMP)
- .type STRCMP, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
-#ifdef USE_AS_STRCMP
- leaq __strcmp_sse2_unaligned(%rip), %rax
- HAS_ARCH_FEATURE (Fast_Unaligned_Load)
- jnz 3f
-#else
- HAS_ARCH_FEATURE (Slow_SSE4_2)
- jnz 2f
- leaq STRCMP_SSE42(%rip), %rax
- HAS_CPU_FEATURE (SSE4_2)
- jnz 3f
-#endif
-2: leaq STRCMP_SSSE3(%rip), %rax
- HAS_CPU_FEATURE (SSSE3)
- jnz 3f
- leaq STRCMP_SSE2(%rip), %rax
-3: ret
-END(STRCMP)
-
-# ifdef USE_AS_STRCASECMP_L
-ENTRY(__strcasecmp)
- .type __strcasecmp, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq __strcasecmp_avx(%rip), %rax
- HAS_ARCH_FEATURE (AVX_Usable)
- jnz 3f
- HAS_ARCH_FEATURE (Slow_SSE4_2)
- jnz 2f
- leaq __strcasecmp_sse42(%rip), %rax
- HAS_CPU_FEATURE (SSE4_2)
- jnz 3f
-2: leaq __strcasecmp_ssse3(%rip), %rax
- HAS_CPU_FEATURE (SSSE3)
- jnz 3f
- leaq __strcasecmp_sse2(%rip), %rax
-3: ret
-END(__strcasecmp)
-weak_alias (__strcasecmp, strcasecmp)
-# endif
-# ifdef USE_AS_STRNCASECMP_L
-ENTRY(__strncasecmp)
- .type __strncasecmp, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq __strncasecmp_avx(%rip), %rax
- HAS_ARCH_FEATURE (AVX_Usable)
- jnz 3f
- HAS_ARCH_FEATURE (Slow_SSE4_2)
- jnz 2f
- leaq __strncasecmp_sse42(%rip), %rax
- HAS_CPU_FEATURE (SSE4_2)
- jnz 3f
-2: leaq __strncasecmp_ssse3(%rip), %rax
- HAS_CPU_FEATURE (SSSE3)
- jnz 3f
- leaq __strncasecmp_sse2(%rip), %rax
-3: ret
-END(__strncasecmp)
-weak_alias (__strncasecmp, strncasecmp)
-# endif
-
-# undef LABEL
-# define LABEL(l) .L##l##_sse42
-# define GLABEL(l) l##_sse42
-# define SECTION sse4.2
-# include "strcmp-sse42.S"
-
-
-# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# define LABEL(l) .L##l##_avx
-# define GLABEL(l) l##_avx
-# define USE_AVX 1
-# undef STRCMP_SSE42
-# define STRCMP_SSE42 STRCMP_AVX
-# define SECTION avx
-# include "strcmp-sse42.S"
-# endif
-
-
-# undef ENTRY
-# define ENTRY(name) \
- .type STRCMP_SSE2, @function; \
- .align 16; \
- .globl STRCMP_SSE2; \
- .hidden STRCMP_SSE2; \
- STRCMP_SSE2: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size STRCMP_SSE2, .-STRCMP_SSE2
-
-# ifdef USE_AS_STRCASECMP_L
-# define ENTRY2(name) \
- .type __strcasecmp_sse2, @function; \
- .align 16; \
- .globl __strcasecmp_sse2; \
- .hidden __strcasecmp_sse2; \
- __strcasecmp_sse2: cfi_startproc; \
- CALL_MCOUNT
-# define END2(name) \
- cfi_endproc; .size __strcasecmp_sse2, .-__strcasecmp_sse2
-# endif
-
-# ifdef USE_AS_STRNCASECMP_L
-# define ENTRY2(name) \
- .type __strncasecmp_sse2, @function; \
- .align 16; \
- .globl __strncasecmp_sse2; \
- .hidden __strncasecmp_sse2; \
- __strncasecmp_sse2: cfi_startproc; \
- CALL_MCOUNT
-# define END2(name) \
- cfi_endproc; .size __strncasecmp_sse2, .-__strncasecmp_sse2
-# endif
-
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal strcmp calls through a PLT.
- The speedup we get from using SSE4.2 instruction is likely eaten away
- by the indirect call in the PLT. */
-# define libc_hidden_builtin_def(name) \
- .globl __GI_STRCMP; __GI_STRCMP = STRCMP_SSE2
-#endif
-
-#include "../strcmp.S"
diff --git a/sysdeps/x86_64/multiarch/strcmp.c b/sysdeps/x86_64/multiarch/strcmp.c
new file mode 100644
index 0000000000..b903e418df
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcmp.c
@@ -0,0 +1,59 @@
+/* Multiple versions of strcmp.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define strcmp __redirect_strcmp
+# include <string.h>
+# undef strcmp
+
+# define SYMBOL_NAME strcmp
+# include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ return OPTIMIZE (avx2);
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
+ return OPTIMIZE (sse2_unaligned);
+
+ if (CPU_FEATURES_CPU_P (cpu_features, SSSE3))
+ return OPTIMIZE (ssse3);
+
+ return OPTIMIZE (sse2);
+}
+
+libc_ifunc_redirected (__redirect_strcmp, strcmp, IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (strcmp, __GI_strcmp, __redirect_strcmp)
+ __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
index caa74be2c2..72bf7e8586 100644
--- a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
@@ -1,5 +1,5 @@
/* strcpy with SSE2 and unaligned load
- Copyright (C) 2011-2016 Free Software Foundation, Inc.
+ Copyright (C) 2011-2018 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -33,7 +33,7 @@
lea TABLE(%rip), %r11; \
movslq (%r11, INDEX, SCALE), %rcx; \
lea (%r11, %rcx), %rcx; \
- jmp *%rcx
+ _CET_NOTRACK jmp *%rcx
# ifndef USE_AS_STRCAT
@@ -99,6 +99,8 @@ L(Unalign16Both):
sub %rcx, %rdi
# ifdef USE_AS_STRNCPY
add %rcx, %r8
+ sbb %rcx, %rcx
+ or %rcx, %r8
# endif
mov $16, %rcx
movdqa (%rsi, %rcx), %xmm1
diff --git a/sysdeps/x86_64/multiarch/strcpy-sse2.S b/sysdeps/x86_64/multiarch/strcpy-sse2.S
new file mode 100644
index 0000000000..70136017fa
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcpy-sse2.S
@@ -0,0 +1,28 @@
+/* strcpy optimized with SSE2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+# define strcpy __strcpy_sse2
+
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(strcpy)
+#endif
+
+#include <sysdeps/x86_64/strcpy.S>
diff --git a/sysdeps/x86_64/multiarch/strcpy-ssse3.S b/sysdeps/x86_64/multiarch/strcpy-ssse3.S
index 5bdb7671cf..9858d0c4d5 100644
--- a/sysdeps/x86_64/multiarch/strcpy-ssse3.S
+++ b/sysdeps/x86_64/multiarch/strcpy-ssse3.S
@@ -1,5 +1,5 @@
/* strcpy with SSSE3
- Copyright (C) 2011-2016 Free Software Foundation, Inc.
+ Copyright (C) 2011-2018 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/multiarch/strcpy.S b/sysdeps/x86_64/multiarch/strcpy.S
deleted file mode 100644
index 024f6ef899..0000000000
--- a/sysdeps/x86_64/multiarch/strcpy.S
+++ /dev/null
@@ -1,99 +0,0 @@
-/* Multiple versions of strcpy
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2009-2016 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#if !defined (USE_AS_STPCPY) && !defined (USE_AS_STRNCPY)
-# ifndef STRCPY
-# define STRCPY strcpy
-# endif
-#endif
-
-#ifdef USE_AS_STPCPY
-# ifdef USE_AS_STRNCPY
-# define STRCPY_SSSE3 __stpncpy_ssse3
-# define STRCPY_SSE2 __stpncpy_sse2
-# define STRCPY_SSE2_UNALIGNED __stpncpy_sse2_unaligned
-# define __GI_STRCPY __GI_stpncpy
-# define __GI___STRCPY __GI___stpncpy
-# else
-# define STRCPY_SSSE3 __stpcpy_ssse3
-# define STRCPY_SSE2 __stpcpy_sse2
-# define STRCPY_SSE2_UNALIGNED __stpcpy_sse2_unaligned
-# define __GI_STRCPY __GI_stpcpy
-# define __GI___STRCPY __GI___stpcpy
-# endif
-#else
-# ifdef USE_AS_STRNCPY
-# define STRCPY_SSSE3 __strncpy_ssse3
-# define STRCPY_SSE2 __strncpy_sse2
-# define STRCPY_SSE2_UNALIGNED __strncpy_sse2_unaligned
-# define __GI_STRCPY __GI_strncpy
-# else
-# define STRCPY_SSSE3 __strcpy_ssse3
-# define STRCPY_SSE2 __strcpy_sse2
-# define STRCPY_SSE2_UNALIGNED __strcpy_sse2_unaligned
-# define __GI_STRCPY __GI_strcpy
-# endif
-#endif
-
-
-/* Define multiple versions only for the definition in libc. */
-#if IS_IN (libc)
- .text
-ENTRY(STRCPY)
- .type STRCPY, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq STRCPY_SSE2_UNALIGNED(%rip), %rax
- HAS_ARCH_FEATURE (Fast_Unaligned_Load)
- jnz 2f
- leaq STRCPY_SSE2(%rip), %rax
- HAS_CPU_FEATURE (SSSE3)
- jz 2f
- leaq STRCPY_SSSE3(%rip), %rax
-2: ret
-END(STRCPY)
-
-# undef ENTRY
-# define ENTRY(name) \
- .type STRCPY_SSE2, @function; \
- .align 16; \
- .globl STRCPY_SSE2; \
- .hidden STRCPY_SSE2; \
- STRCPY_SSE2: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size STRCPY_SSE2, .-STRCPY_SSE2
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal strcpy calls through a PLT.
- The speedup we get from using SSSE3 instruction is likely eaten away
- by the indirect call in the PLT. */
-# define libc_hidden_builtin_def(name) \
- .globl __GI_STRCPY; __GI_STRCPY = STRCPY_SSE2
-# undef libc_hidden_def
-# define libc_hidden_def(name) \
- .globl __GI___STRCPY; __GI___STRCPY = STRCPY_SSE2
-#endif
-
-#ifndef USE_AS_STRNCPY
-#include "../strcpy.S"
-#endif
diff --git a/sysdeps/x86_64/multiarch/strcpy.c b/sysdeps/x86_64/multiarch/strcpy.c
new file mode 100644
index 0000000000..12e0e3ffe2
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcpy.c
@@ -0,0 +1,35 @@
+/* Multiple versions of strcpy.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define strcpy __redirect_strcpy
+# include <string.h>
+# undef strcpy
+
+# define SYMBOL_NAME strcpy
+# include "ifunc-unaligned-ssse3.h"
+
+libc_ifunc_redirected (__redirect_strcpy, strcpy, IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (strcpy, __GI_strcpy, __redirect_strcpy)
+ __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-c.c
index 91b804ddd6..857af10486 100644
--- a/sysdeps/x86_64/multiarch/strcspn-c.c
+++ b/sysdeps/x86_64/multiarch/strcspn-c.c
@@ -1,5 +1,5 @@
/* strcspn with SSE4.2 intrinsics
- Copyright (C) 2009-2016 Free Software Foundation, Inc.
+ Copyright (C) 2009-2018 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -70,7 +70,7 @@ char *
#else
size_t
#endif
-STRCSPN_SSE2 (const char *, const char *);
+STRCSPN_SSE2 (const char *, const char *) attribute_hidden;
#ifdef USE_AS_STRPBRK
diff --git a/sysdeps/x86_64/multiarch/strcspn-sse2.S b/sysdeps/x86_64/multiarch/strcspn-sse2.S
new file mode 100644
index 0000000000..8a0c69d7f5
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcspn-sse2.S
@@ -0,0 +1,28 @@
+/* strcspn optimized with SSE2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+# define strcspn __strcspn_sse2
+
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(strcspn)
+#endif
+
+#include <sysdeps/x86_64/strcspn.S>
diff --git a/sysdeps/x86_64/multiarch/strcspn.S b/sysdeps/x86_64/multiarch/strcspn.S
deleted file mode 100644
index 8e7ff1c663..0000000000
--- a/sysdeps/x86_64/multiarch/strcspn.S
+++ /dev/null
@@ -1,69 +0,0 @@
-/* Multiple versions of strcspn
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2009-2016 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <config.h>
-#include <sysdep.h>
-#include <init-arch.h>
-
-#ifdef USE_AS_STRPBRK
-#define STRCSPN_SSE42 __strpbrk_sse42
-#define STRCSPN_SSE2 __strpbrk_sse2
-#define __GI_STRCSPN __GI_strpbrk
-#else
-#ifndef STRCSPN
-#define STRCSPN strcspn
-#define STRCSPN_SSE42 __strcspn_sse42
-#define STRCSPN_SSE2 __strcspn_sse2
-#define __GI_STRCSPN __GI_strcspn
-#endif
-#endif
-
-/* Define multiple versions only for the definition in libc. Don't
- define multiple versions for strpbrk in static library since we
- need strpbrk before the initialization happened. */
-#if (defined SHARED || !defined USE_AS_STRPBRK) && IS_IN (libc)
- .text
-ENTRY(STRCSPN)
- .type STRCSPN, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq STRCSPN_SSE2(%rip), %rax
- HAS_CPU_FEATURE (SSE4_2)
- jz 2f
- leaq STRCSPN_SSE42(%rip), %rax
-2: ret
-END(STRCSPN)
-
-# undef ENTRY
-# define ENTRY(name) \
- .type STRCSPN_SSE2, @function; \
- .globl STRCSPN_SSE2; \
- .align 16; \
- STRCSPN_SSE2: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size STRCSPN_SSE2, .-STRCSPN_SSE2
-#endif
-
-#ifdef USE_AS_STRPBRK
-#include "../strpbrk.S"
-#else
-#include "../strcspn.S"
-#endif
diff --git a/sysdeps/x86_64/multiarch/strcspn.c b/sysdeps/x86_64/multiarch/strcspn.c
new file mode 100644
index 0000000000..9712e8410c
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcspn.c
@@ -0,0 +1,35 @@
+/* Multiple versions of strcspn.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define strcspn __redirect_strcspn
+# include <string.h>
+# undef strcspn
+
+# define SYMBOL_NAME strcspn
+# include "ifunc-sse4_2.h"
+
+libc_ifunc_redirected (__redirect_strcspn, strcspn, IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (strcspn, __GI_strcspn, __redirect_strcspn)
+ __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strlen-avx2.S b/sysdeps/x86_64/multiarch/strlen-avx2.S
new file mode 100644
index 0000000000..fb2418cddc
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strlen-avx2.S
@@ -0,0 +1,393 @@
+/* strlen/strnlen/wcslen/wcsnlen optimized with AVX2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+
+# ifndef STRLEN
+# define STRLEN __strlen_avx2
+# endif
+
+# ifdef USE_AS_WCSLEN
+# define VPCMPEQ vpcmpeqd
+# define VPMINU vpminud
+# else
+# define VPCMPEQ vpcmpeqb
+# define VPMINU vpminub
+# endif
+
+# ifndef VZEROUPPER
+# define VZEROUPPER vzeroupper
+# endif
+
+# define VEC_SIZE 32
+
+ .section .text.avx,"ax",@progbits
+ENTRY (STRLEN)
+# ifdef USE_AS_STRNLEN
+ /* Check for zero length. */
+ testq %rsi, %rsi
+ jz L(zero)
+# ifdef USE_AS_WCSLEN
+ shl $2, %rsi
+# endif
+ movq %rsi, %r8
+# endif
+ movl %edi, %ecx
+ movq %rdi, %rdx
+ vpxor %xmm0, %xmm0, %xmm0
+
+ /* Check if we may cross page boundary with one vector load. */
+ andl $(2 * VEC_SIZE - 1), %ecx
+ cmpl $VEC_SIZE, %ecx
+ ja L(cros_page_boundary)
+
+ /* Check the first VEC_SIZE bytes. */
+ VPCMPEQ (%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+
+# ifdef USE_AS_STRNLEN
+ jnz L(first_vec_x0_check)
+ /* Adjust length and check the end of data. */
+ subq $VEC_SIZE, %rsi
+ jbe L(max)
+# else
+ jnz L(first_vec_x0)
+# endif
+
+ /* Align data for aligned loads in the loop. */
+ addq $VEC_SIZE, %rdi
+ andl $(VEC_SIZE - 1), %ecx
+ andq $-VEC_SIZE, %rdi
+
+# ifdef USE_AS_STRNLEN
+ /* Adjust length. */
+ addq %rcx, %rsi
+
+ subq $(VEC_SIZE * 4), %rsi
+ jbe L(last_4x_vec_or_less)
+# endif
+ jmp L(more_4x_vec)
+
+ .p2align 4
+L(cros_page_boundary):
+ andl $(VEC_SIZE - 1), %ecx
+ andq $-VEC_SIZE, %rdi
+ VPCMPEQ (%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ /* Remove the leading bytes. */
+ sarl %cl, %eax
+ testl %eax, %eax
+ jz L(aligned_more)
+ tzcntl %eax, %eax
+# ifdef USE_AS_STRNLEN
+ /* Check the end of data. */
+ cmpq %rax, %rsi
+ jbe L(max)
+# endif
+ addq %rdi, %rax
+ addq %rcx, %rax
+ subq %rdx, %rax
+# ifdef USE_AS_WCSLEN
+ shrq $2, %rax
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(aligned_more):
+# ifdef USE_AS_STRNLEN
+ /* "rcx" is less than VEC_SIZE. Calculate "rdx + rcx - VEC_SIZE"
+ with "rdx - (VEC_SIZE - rcx)" instead of "(rdx + rcx) - VEC_SIZE"
+ to void possible addition overflow. */
+ negq %rcx
+ addq $VEC_SIZE, %rcx
+
+ /* Check the end of data. */
+ subq %rcx, %rsi
+ jbe L(max)
+# endif
+
+ addq $VEC_SIZE, %rdi
+
+# ifdef USE_AS_STRNLEN
+ subq $(VEC_SIZE * 4), %rsi
+ jbe L(last_4x_vec_or_less)
+# endif
+
+L(more_4x_vec):
+ /* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time
+ since data is only aligned to VEC_SIZE. */
+ VPCMPEQ (%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x0)
+
+ VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x1)
+
+ VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x2)
+
+ VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x3)
+
+ addq $(VEC_SIZE * 4), %rdi
+
+# ifdef USE_AS_STRNLEN
+ subq $(VEC_SIZE * 4), %rsi
+ jbe L(last_4x_vec_or_less)
+# endif
+
+ /* Align data to 4 * VEC_SIZE. */
+ movq %rdi, %rcx
+ andl $(4 * VEC_SIZE - 1), %ecx
+ andq $-(4 * VEC_SIZE), %rdi
+
+# ifdef USE_AS_STRNLEN
+ /* Adjust length. */
+ addq %rcx, %rsi
+# endif
+
+ .p2align 4
+L(loop_4x_vec):
+ /* Compare 4 * VEC at a time forward. */
+ vmovdqa (%rdi), %ymm1
+ vmovdqa VEC_SIZE(%rdi), %ymm2
+ vmovdqa (VEC_SIZE * 2)(%rdi), %ymm3
+ vmovdqa (VEC_SIZE * 3)(%rdi), %ymm4
+ VPMINU %ymm1, %ymm2, %ymm5
+ VPMINU %ymm3, %ymm4, %ymm6
+ VPMINU %ymm5, %ymm6, %ymm5
+
+ VPCMPEQ %ymm5, %ymm0, %ymm5
+ vpmovmskb %ymm5, %eax
+ testl %eax, %eax
+ jnz L(4x_vec_end)
+
+ addq $(VEC_SIZE * 4), %rdi
+
+# ifndef USE_AS_STRNLEN
+ jmp L(loop_4x_vec)
+# else
+ subq $(VEC_SIZE * 4), %rsi
+ ja L(loop_4x_vec)
+
+L(last_4x_vec_or_less):
+ /* Less than 4 * VEC and aligned to VEC_SIZE. */
+ addl $(VEC_SIZE * 2), %esi
+ jle L(last_2x_vec)
+
+ VPCMPEQ (%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x0)
+
+ VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x1)
+
+ VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+
+ jnz L(first_vec_x2_check)
+ subl $VEC_SIZE, %esi
+ jle L(max)
+
+ VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+
+ jnz L(first_vec_x3_check)
+ movq %r8, %rax
+# ifdef USE_AS_WCSLEN
+ shrq $2, %rax
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(last_2x_vec):
+ addl $(VEC_SIZE * 2), %esi
+ VPCMPEQ (%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+
+ jnz L(first_vec_x0_check)
+ subl $VEC_SIZE, %esi
+ jle L(max)
+
+ VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x1_check)
+ movq %r8, %rax
+# ifdef USE_AS_WCSLEN
+ shrq $2, %rax
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(first_vec_x0_check):
+ tzcntl %eax, %eax
+ /* Check the end of data. */
+ cmpq %rax, %rsi
+ jbe L(max)
+ addq %rdi, %rax
+ subq %rdx, %rax
+# ifdef USE_AS_WCSLEN
+ shrq $2, %rax
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(first_vec_x1_check):
+ tzcntl %eax, %eax
+ /* Check the end of data. */
+ cmpq %rax, %rsi
+ jbe L(max)
+ addq $VEC_SIZE, %rax
+ addq %rdi, %rax
+ subq %rdx, %rax
+# ifdef USE_AS_WCSLEN
+ shrq $2, %rax
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(first_vec_x2_check):
+ tzcntl %eax, %eax
+ /* Check the end of data. */
+ cmpq %rax, %rsi
+ jbe L(max)
+ addq $(VEC_SIZE * 2), %rax
+ addq %rdi, %rax
+ subq %rdx, %rax
+# ifdef USE_AS_WCSLEN
+ shrq $2, %rax
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(first_vec_x3_check):
+ tzcntl %eax, %eax
+ /* Check the end of data. */
+ cmpq %rax, %rsi
+ jbe L(max)
+ addq $(VEC_SIZE * 3), %rax
+ addq %rdi, %rax
+ subq %rdx, %rax
+# ifdef USE_AS_WCSLEN
+ shrq $2, %rax
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(max):
+ movq %r8, %rax
+# ifdef USE_AS_WCSLEN
+ shrq $2, %rax
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(zero):
+ xorl %eax, %eax
+ ret
+# endif
+
+ .p2align 4
+L(first_vec_x0):
+ tzcntl %eax, %eax
+ addq %rdi, %rax
+ subq %rdx, %rax
+# ifdef USE_AS_WCSLEN
+ shrq $2, %rax
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(first_vec_x1):
+ tzcntl %eax, %eax
+ addq $VEC_SIZE, %rax
+ addq %rdi, %rax
+ subq %rdx, %rax
+# ifdef USE_AS_WCSLEN
+ shrq $2, %rax
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(first_vec_x2):
+ tzcntl %eax, %eax
+ addq $(VEC_SIZE * 2), %rax
+ addq %rdi, %rax
+ subq %rdx, %rax
+# ifdef USE_AS_WCSLEN
+ shrq $2, %rax
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(4x_vec_end):
+ VPCMPEQ %ymm1, %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x0)
+ VPCMPEQ %ymm2, %ymm0, %ymm2
+ vpmovmskb %ymm2, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x1)
+ VPCMPEQ %ymm3, %ymm0, %ymm3
+ vpmovmskb %ymm3, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x2)
+ VPCMPEQ %ymm4, %ymm0, %ymm4
+ vpmovmskb %ymm4, %eax
+L(first_vec_x3):
+ tzcntl %eax, %eax
+ addq $(VEC_SIZE * 3), %rax
+ addq %rdi, %rax
+ subq %rdx, %rax
+# ifdef USE_AS_WCSLEN
+ shrq $2, %rax
+# endif
+ VZEROUPPER
+ ret
+
+END (STRLEN)
+#endif
diff --git a/sysdeps/x86_64/multiarch/strlen-sse2.S b/sysdeps/x86_64/multiarch/strlen-sse2.S
new file mode 100644
index 0000000000..7bc57b8d0f
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strlen-sse2.S
@@ -0,0 +1,23 @@
+/* strlen optimized with SSE2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# define strlen __strlen_sse2
+#endif
+
+#include "../strlen.S"
diff --git a/sysdeps/x86_64/multiarch/strlen.c b/sysdeps/x86_64/multiarch/strlen.c
new file mode 100644
index 0000000000..1758d22b8f
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strlen.c
@@ -0,0 +1,34 @@
+/* Multiple versions of strlen.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define strlen __redirect_strlen
+# include <string.h>
+# undef strlen
+
+# define SYMBOL_NAME strlen
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_strlen, strlen, IFUNC_SELECTOR ());
+# ifdef SHARED
+__hidden_ver1 (strlen, __GI_strlen, __redirect_strlen)
+ __attribute__((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strncase.c b/sysdeps/x86_64/multiarch/strncase.c
new file mode 100644
index 0000000000..798966cf3e
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncase.c
@@ -0,0 +1,35 @@
+/* Multiple versions of strncasecmp.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define strncasecmp __redirect_strncasecmp
+# define __strncasecmp __redirect___strncasecmp
+# include <string.h>
+# undef strncasecmp
+# undef __strncasecmp
+
+# define SYMBOL_NAME strncasecmp
+# include "ifunc-strcasecmp.h"
+
+libc_ifunc_redirected (__redirect_strncasecmp, __strncasecmp,
+ IFUNC_SELECTOR ());
+
+weak_alias (__strncasecmp, strncasecmp)
+#endif
diff --git a/sysdeps/x86_64/multiarch/strncase_l-avx.S b/sysdeps/x86_64/multiarch/strncase_l-avx.S
new file mode 100644
index 0000000000..0c4e525bd4
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncase_l-avx.S
@@ -0,0 +1,22 @@
+/* strncasecmp_l optimized with AVX.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define STRCMP_SSE42 __strncasecmp_l_avx
+#define USE_AVX 1
+#define USE_AS_STRNCASECMP_L
+#include "strcmp-sse42.S"
diff --git a/sysdeps/x86_64/multiarch/strncase_l-sse2.S b/sysdeps/x86_64/multiarch/strncase_l-sse2.S
new file mode 100644
index 0000000000..e7841334b7
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncase_l-sse2.S
@@ -0,0 +1,23 @@
+/* strncasecmp_l optimized with SSE2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define STRCMP __strncasecmp_l_sse2
+#define NO_NOLOCALE_ALIAS
+#define USE_AS_STRNCASECMP_L
+#define __strncasecmp __strncasecmp_sse2
+#include <sysdeps/x86_64/strcmp.S>
diff --git a/sysdeps/x86_64/multiarch/strncase_l-sse4_2.S b/sysdeps/x86_64/multiarch/strncase_l-sse4_2.S
new file mode 100644
index 0000000000..d2ea88c4ce
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncase_l-sse4_2.S
@@ -0,0 +1,21 @@
+/* strncasecmp_l optimized with SSE4.2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define STRCMP_SSE42 __strncasecmp_l_sse42
+#define USE_AS_STRNCASECMP_L
+#include "strcmp-sse42.S"
diff --git a/sysdeps/x86_64/multiarch/strncase_l.S b/sysdeps/x86_64/multiarch/strncase_l.S
deleted file mode 100644
index 9c0149788e..0000000000
--- a/sysdeps/x86_64/multiarch/strncase_l.S
+++ /dev/null
@@ -1,8 +0,0 @@
-/* Multiple versions of strncasecmp and strncasecmp_l
- All versions must be listed in ifunc-impl-list.c. */
-#define STRCMP __strncasecmp_l
-#define USE_AS_STRNCASECMP_L
-#include "strcmp.S"
-
-weak_alias (__strncasecmp_l, strncasecmp_l)
-libc_hidden_def (strncasecmp_l)
diff --git a/sysdeps/x86_64/multiarch/strncase_l.c b/sysdeps/x86_64/multiarch/strncase_l.c
new file mode 100644
index 0000000000..97631cf401
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncase_l.c
@@ -0,0 +1,40 @@
+/* Multiple versions of strncasecmp_l.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define strncasecmp_l __redirect_strncasecmp_l
+# define __strncasecmp_l __redirect___strncasecmp_l
+# include <string.h>
+# undef strncasecmp_l
+# undef __strncasecmp_l
+
+# define SYMBOL_NAME strncasecmp_l
+# include "ifunc-strcasecmp.h"
+
+libc_ifunc_redirected (__redirect_strncasecmp_l, __strncasecmp_l,
+ IFUNC_SELECTOR ());
+
+weak_alias (__strncasecmp_l, strncasecmp_l)
+# ifdef SHARED
+__hidden_ver1 (__strncasecmp_l, __GI___strncasecmp_l,
+ __redirect___strncasecmp_l)
+ __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strncat-c.c b/sysdeps/x86_64/multiarch/strncat-c.c
index a3cdbff689..93a7fab7ea 100644
--- a/sysdeps/x86_64/multiarch/strncat-c.c
+++ b/sysdeps/x86_64/multiarch/strncat-c.c
@@ -1,8 +1,2 @@
#define STRNCAT __strncat_sse2
-#ifdef SHARED
-#undef libc_hidden_def
-#define libc_hidden_def(name) \
- __hidden_ver1 (__strncat_sse2, __GI___strncat, __strncat_sse2);
-#endif
-
-#include "string/strncat.c"
+#include <string/strncat.c>
diff --git a/sysdeps/x86_64/multiarch/strncat.S b/sysdeps/x86_64/multiarch/strncat.S
deleted file mode 100644
index 5c1bf41453..0000000000
--- a/sysdeps/x86_64/multiarch/strncat.S
+++ /dev/null
@@ -1,5 +0,0 @@
-/* Multiple versions of strncat
- All versions must be listed in ifunc-impl-list.c. */
-#define STRCAT strncat
-#define USE_AS_STRNCAT
-#include "strcat.S"
diff --git a/sysdeps/x86_64/multiarch/strncat.c b/sysdeps/x86_64/multiarch/strncat.c
new file mode 100644
index 0000000000..841c165565
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncat.c
@@ -0,0 +1,35 @@
+/* Multiple versions of strncat.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define strncat __redirect_strncat
+# include <string.h>
+# undef strncat
+
+# define SYMBOL_NAME strncat
+# include "ifunc-unaligned-ssse3.h"
+
+libc_ifunc_redirected (__redirect_strncat, strncat, IFUNC_SELECTOR ());
+strong_alias (strncat, __strncat);
+# ifdef SHARED
+__hidden_ver1 (strncat, __GI___strncat, __redirect_strncat)
+ __attribute__((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strncmp-avx2.S b/sysdeps/x86_64/multiarch/strncmp-avx2.S
new file mode 100644
index 0000000000..1678bcc235
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncmp-avx2.S
@@ -0,0 +1,3 @@
+#define STRCMP __strncmp_avx2
+#define USE_AS_STRNCMP 1
+#include "strcmp-avx2.S"
diff --git a/sysdeps/x86_64/fpu/test-double-vlen2.c b/sysdeps/x86_64/multiarch/strncmp-sse2.S
index c7a3dff747..a5ecb82b13 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen2.c
+++ b/sysdeps/x86_64/multiarch/strncmp-sse2.S
@@ -1,5 +1,5 @@
-/* Tests for SSE ISA versions of vector math functions.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+/* strcmp optimized with SSE2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,13 +16,15 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include "test-double-vlen2.h"
+#include <sysdep.h>
-#define TEST_VECTOR_cos 1
-#define TEST_VECTOR_sin 1
-#define TEST_VECTOR_sincos 1
-#define TEST_VECTOR_log 1
-#define TEST_VECTOR_exp 1
-#define TEST_VECTOR_pow 1
+#if IS_IN (libc)
+# define STRCMP __strncmp_sse2
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(strcmp)
+#else
+# define STRCMP strncmp
+#endif
-#include "libm-test.c"
+#define USE_AS_STRNCMP
+#include <sysdeps/x86_64/strcmp.S>
diff --git a/sysdeps/x86_64/multiarch/strncmp-sse4_2.S b/sysdeps/x86_64/multiarch/strncmp-sse4_2.S
new file mode 100644
index 0000000000..b859c1eb74
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncmp-sse4_2.S
@@ -0,0 +1,21 @@
+/* strncmp optimized with SSE4.2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define STRCMP_SSE42 __strncmp_sse42
+#define USE_AS_STRNCMP
+#include "strcmp-sse42.S"
diff --git a/sysdeps/x86_64/multiarch/strncmp-ssse3.S b/sysdeps/x86_64/multiarch/strncmp-ssse3.S
index 96380a46be..fa43484b54 100644
--- a/sysdeps/x86_64/multiarch/strncmp-ssse3.S
+++ b/sysdeps/x86_64/multiarch/strncmp-ssse3.S
@@ -1,6 +1,28 @@
-#ifdef SHARED
-# define USE_SSSE3 1
-# define STRCMP __strncmp_ssse3
-# define USE_AS_STRNCMP
-# include "../strcmp.S"
-#endif
+/* strcmp optimized with SSSE3.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+#define STRCMP __strncmp_ssse3
+
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(strcmp)
+
+#define USE_SSSE3 1
+#define USE_AS_STRNCMP
+#include <sysdeps/x86_64/strcmp.S>
diff --git a/sysdeps/x86_64/multiarch/strncmp.S b/sysdeps/x86_64/multiarch/strncmp.S
deleted file mode 100644
index fd5eb1397c..0000000000
--- a/sysdeps/x86_64/multiarch/strncmp.S
+++ /dev/null
@@ -1,5 +0,0 @@
-/* Multiple versions of strncmp
- All versions must be listed in ifunc-impl-list.c. */
-#define STRCMP strncmp
-#define USE_AS_STRNCMP
-#include "strcmp.S"
diff --git a/sysdeps/x86_64/multiarch/strncmp.c b/sysdeps/x86_64/multiarch/strncmp.c
new file mode 100644
index 0000000000..02b6d0b6f5
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncmp.c
@@ -0,0 +1,60 @@
+/* Multiple versions of strncmp.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define strncmp __redirect_strncmp
+# include <string.h>
+# undef strncmp
+
+# define SYMBOL_NAME strncmp
+# include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ return OPTIMIZE (avx2);
+
+ if (CPU_FEATURES_CPU_P (cpu_features, SSE4_2)
+ && !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2))
+ return OPTIMIZE (sse42);
+
+ if (CPU_FEATURES_CPU_P (cpu_features, SSSE3))
+ return OPTIMIZE (ssse3);
+
+ return OPTIMIZE (sse2);
+}
+
+libc_ifunc_redirected (__redirect_strncmp, strncmp, IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (strncmp, __GI_strncmp, __redirect_strncmp)
+ __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strncpy-c.c b/sysdeps/x86_64/multiarch/strncpy-c.c
index 296c32cb5d..57c45ac7ab 100644
--- a/sysdeps/x86_64/multiarch/strncpy-c.c
+++ b/sysdeps/x86_64/multiarch/strncpy-c.c
@@ -1,8 +1,5 @@
#define STRNCPY __strncpy_sse2
-#ifdef SHARED
#undef libc_hidden_builtin_def
-#define libc_hidden_builtin_def(name) \
- __hidden_ver1 (__strncpy_sse2, __GI_strncpy, __strncpy_sse2);
-#endif
+#define libc_hidden_builtin_def(strncpy)
-#include "strncpy.c"
+#include <string/strncpy.c>
diff --git a/sysdeps/x86_64/multiarch/strncpy.S b/sysdeps/x86_64/multiarch/strncpy.S
deleted file mode 100644
index 6d87a0ba35..0000000000
--- a/sysdeps/x86_64/multiarch/strncpy.S
+++ /dev/null
@@ -1,5 +0,0 @@
-/* Multiple versions of strncpy
- All versions must be listed in ifunc-impl-list.c. */
-#define STRCPY strncpy
-#define USE_AS_STRNCPY
-#include "strcpy.S"
diff --git a/sysdeps/x86_64/multiarch/strncpy.c b/sysdeps/x86_64/multiarch/strncpy.c
new file mode 100644
index 0000000000..3c3de8b18e
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncpy.c
@@ -0,0 +1,35 @@
+/* Multiple versions of strncpy.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define strncpy __redirect_strncpy
+# include <string.h>
+# undef strncpy
+
+# define SYMBOL_NAME strncpy
+# include "ifunc-unaligned-ssse3.h"
+
+libc_ifunc_redirected (__redirect_strncpy, strncpy, IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (strncpy, __GI_strncpy, __redirect_strncpy)
+ __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strnlen-avx2.S b/sysdeps/x86_64/multiarch/strnlen-avx2.S
new file mode 100644
index 0000000000..c4062b22f7
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strnlen-avx2.S
@@ -0,0 +1,4 @@
+#define STRLEN __strnlen_avx2
+#define USE_AS_STRNLEN 1
+
+#include "strlen-avx2.S"
diff --git a/sysdeps/x86_64/multiarch/strnlen-sse2.S b/sysdeps/x86_64/multiarch/strnlen-sse2.S
new file mode 100644
index 0000000000..41f33f6f6f
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strnlen-sse2.S
@@ -0,0 +1,28 @@
+/* strnlen optimized with SSE2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# define __strnlen __strnlen_sse2
+
+# undef weak_alias
+# define weak_alias(__strnlen, strnlen)
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(strnlen)
+#endif
+
+#include "../strnlen.S"
diff --git a/sysdeps/x86_64/multiarch/strnlen.c b/sysdeps/x86_64/multiarch/strnlen.c
new file mode 100644
index 0000000000..3ab94ce230
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strnlen.c
@@ -0,0 +1,39 @@
+/* Multiple versions of strnlen.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define strnlen __redirect_strnlen
+# define __strnlen __redirect___strnlen
+# include <string.h>
+# undef __strnlen
+# undef strnlen
+
+# define SYMBOL_NAME strnlen
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_strnlen, __strnlen, IFUNC_SELECTOR ());
+weak_alias (__strnlen, strnlen);
+# ifdef SHARED
+__hidden_ver1 (__strnlen, __GI___strnlen, __redirect___strnlen)
+ __attribute__((visibility ("hidden")));
+__hidden_ver1 (strnlen, __GI_strnlen, __redirect_strnlen)
+ __attribute__((weak, visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strpbrk-c.c b/sysdeps/x86_64/multiarch/strpbrk-c.c
index bbf5c49d89..c58dcb5605 100644
--- a/sysdeps/x86_64/multiarch/strpbrk-c.c
+++ b/sysdeps/x86_64/multiarch/strpbrk-c.c
@@ -1,8 +1,4 @@
-/* Don't define multiple versions for strpbrk in static library since we
- need strpbrk before the initialization happened. */
-#ifdef SHARED
-# define USE_AS_STRPBRK
-# define STRCSPN_SSE2 __strpbrk_sse2
-# define STRCSPN_SSE42 __strpbrk_sse42
-# include "strcspn-c.c"
-#endif
+#define USE_AS_STRPBRK
+#define STRCSPN_SSE2 __strpbrk_sse2
+#define STRCSPN_SSE42 __strpbrk_sse42
+#include "strcspn-c.c"
diff --git a/sysdeps/x86_64/multiarch/strpbrk-sse2.S b/sysdeps/x86_64/multiarch/strpbrk-sse2.S
new file mode 100644
index 0000000000..3c6a74db29
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strpbrk-sse2.S
@@ -0,0 +1,29 @@
+/* strpbrk optimized with SSE2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+# define strcspn __strpbrk_sse2
+
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(strpbrk)
+#endif
+
+#define USE_AS_STRPBRK
+#include <sysdeps/x86_64/strcspn.S>
diff --git a/sysdeps/x86_64/multiarch/strpbrk.S b/sysdeps/x86_64/multiarch/strpbrk.S
deleted file mode 100644
index 7201d6376f..0000000000
--- a/sysdeps/x86_64/multiarch/strpbrk.S
+++ /dev/null
@@ -1,5 +0,0 @@
-/* Multiple versions of strpbrk
- All versions must be listed in ifunc-impl-list.c. */
-#define STRCSPN strpbrk
-#define USE_AS_STRPBRK
-#include "strcspn.S"
diff --git a/sysdeps/x86_64/multiarch/strpbrk.c b/sysdeps/x86_64/multiarch/strpbrk.c
new file mode 100644
index 0000000000..a0d435a504
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strpbrk.c
@@ -0,0 +1,35 @@
+/* Multiple versions of strpbrk.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define strpbrk __redirect_strpbrk
+# include <string.h>
+# undef strpbrk
+
+# define SYMBOL_NAME strpbrk
+# include "ifunc-sse4_2.h"
+
+libc_ifunc_redirected (__redirect_strpbrk, strpbrk, IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (strpbrk, __GI_strpbrk, __redirect_strpbrk)
+ __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strrchr-avx2.S b/sysdeps/x86_64/multiarch/strrchr-avx2.S
new file mode 100644
index 0000000000..4381e6ab3e
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strrchr-avx2.S
@@ -0,0 +1,235 @@
+/* strrchr/wcsrchr optimized with AVX2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+
+# ifndef STRRCHR
+# define STRRCHR __strrchr_avx2
+# endif
+
+# ifdef USE_AS_WCSRCHR
+# define VPBROADCAST vpbroadcastd
+# define VPCMPEQ vpcmpeqd
+# else
+# define VPBROADCAST vpbroadcastb
+# define VPCMPEQ vpcmpeqb
+# endif
+
+# ifndef VZEROUPPER
+# define VZEROUPPER vzeroupper
+# endif
+
+# define VEC_SIZE 32
+
+ .section .text.avx,"ax",@progbits
+ENTRY (STRRCHR)
+ movd %esi, %xmm4
+ movl %edi, %ecx
+ /* Broadcast CHAR to YMM4. */
+ VPBROADCAST %xmm4, %ymm4
+ vpxor %ymm0, %ymm0, %ymm0
+
+ /* Check if we may cross page boundary with one vector load. */
+ andl $(2 * VEC_SIZE - 1), %ecx
+ cmpl $VEC_SIZE, %ecx
+ ja L(cros_page_boundary)
+
+ vmovdqu (%rdi), %ymm1
+ VPCMPEQ %ymm1, %ymm0, %ymm2
+ VPCMPEQ %ymm1, %ymm4, %ymm3
+ vpmovmskb %ymm2, %ecx
+ vpmovmskb %ymm3, %eax
+ addq $VEC_SIZE, %rdi
+
+ testl %eax, %eax
+ jnz L(first_vec)
+
+ testl %ecx, %ecx
+ jnz L(return_null)
+
+ andq $-VEC_SIZE, %rdi
+ xorl %edx, %edx
+ jmp L(aligned_loop)
+
+ .p2align 4
+L(first_vec):
+ /* Check if there is a nul CHAR. */
+ testl %ecx, %ecx
+ jnz L(char_and_nul_in_first_vec)
+
+ /* Remember the match and keep searching. */
+ movl %eax, %edx
+ movq %rdi, %rsi
+ andq $-VEC_SIZE, %rdi
+ jmp L(aligned_loop)
+
+ .p2align 4
+L(cros_page_boundary):
+ andl $(VEC_SIZE - 1), %ecx
+ andq $-VEC_SIZE, %rdi
+ vmovdqa (%rdi), %ymm1
+ VPCMPEQ %ymm1, %ymm0, %ymm2
+ VPCMPEQ %ymm1, %ymm4, %ymm3
+ vpmovmskb %ymm2, %edx
+ vpmovmskb %ymm3, %eax
+ shrl %cl, %edx
+ shrl %cl, %eax
+ addq $VEC_SIZE, %rdi
+
+ /* Check if there is a CHAR. */
+ testl %eax, %eax
+ jnz L(found_char)
+
+ testl %edx, %edx
+ jnz L(return_null)
+
+ jmp L(aligned_loop)
+
+ .p2align 4
+L(found_char):
+ testl %edx, %edx
+ jnz L(char_and_nul)
+
+ /* Remember the match and keep searching. */
+ movl %eax, %edx
+ leaq (%rdi, %rcx), %rsi
+
+ .p2align 4
+L(aligned_loop):
+ vmovdqa (%rdi), %ymm1
+ VPCMPEQ %ymm1, %ymm0, %ymm2
+ addq $VEC_SIZE, %rdi
+ VPCMPEQ %ymm1, %ymm4, %ymm3
+ vpmovmskb %ymm2, %ecx
+ vpmovmskb %ymm3, %eax
+ orl %eax, %ecx
+ jnz L(char_nor_null)
+
+ vmovdqa (%rdi), %ymm1
+ VPCMPEQ %ymm1, %ymm0, %ymm2
+ add $VEC_SIZE, %rdi
+ VPCMPEQ %ymm1, %ymm4, %ymm3
+ vpmovmskb %ymm2, %ecx
+ vpmovmskb %ymm3, %eax
+ orl %eax, %ecx
+ jnz L(char_nor_null)
+
+ vmovdqa (%rdi), %ymm1
+ VPCMPEQ %ymm1, %ymm0, %ymm2
+ addq $VEC_SIZE, %rdi
+ VPCMPEQ %ymm1, %ymm4, %ymm3
+ vpmovmskb %ymm2, %ecx
+ vpmovmskb %ymm3, %eax
+ orl %eax, %ecx
+ jnz L(char_nor_null)
+
+ vmovdqa (%rdi), %ymm1
+ VPCMPEQ %ymm1, %ymm0, %ymm2
+ addq $VEC_SIZE, %rdi
+ VPCMPEQ %ymm1, %ymm4, %ymm3
+ vpmovmskb %ymm2, %ecx
+ vpmovmskb %ymm3, %eax
+ orl %eax, %ecx
+ jz L(aligned_loop)
+
+ .p2align 4
+L(char_nor_null):
+ /* Find a CHAR or a nul CHAR in a loop. */
+ testl %eax, %eax
+ jnz L(match)
+L(return_value):
+ testl %edx, %edx
+ jz L(return_null)
+ movl %edx, %eax
+ movq %rsi, %rdi
+
+# ifdef USE_AS_WCSRCHR
+ /* Keep the first bit for each matching CHAR for bsr. */
+ andl $0x11111111, %eax
+# endif
+ bsrl %eax, %eax
+ leaq -VEC_SIZE(%rdi, %rax), %rax
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(match):
+ /* Find a CHAR. Check if there is a nul CHAR. */
+ vpmovmskb %ymm2, %ecx
+ testl %ecx, %ecx
+ jnz L(find_nul)
+
+ /* Remember the match and keep searching. */
+ movl %eax, %edx
+ movq %rdi, %rsi
+ jmp L(aligned_loop)
+
+ .p2align 4
+L(find_nul):
+# ifdef USE_AS_WCSRCHR
+ /* Keep the first bit for each matching CHAR for bsr. */
+ andl $0x11111111, %ecx
+ andl $0x11111111, %eax
+# endif
+ /* Mask out any matching bits after the nul CHAR. */
+ movl %ecx, %r8d
+ subl $1, %r8d
+ xorl %ecx, %r8d
+ andl %r8d, %eax
+ testl %eax, %eax
+ /* If there is no CHAR here, return the remembered one. */
+ jz L(return_value)
+ bsrl %eax, %eax
+ leaq -VEC_SIZE(%rdi, %rax), %rax
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(char_and_nul):
+ /* Find both a CHAR and a nul CHAR. */
+ addq %rcx, %rdi
+ movl %edx, %ecx
+L(char_and_nul_in_first_vec):
+# ifdef USE_AS_WCSRCHR
+ /* Keep the first bit for each matching CHAR for bsr. */
+ andl $0x11111111, %ecx
+ andl $0x11111111, %eax
+# endif
+ /* Mask out any matching bits after the nul CHAR. */
+ movl %ecx, %r8d
+ subl $1, %r8d
+ xorl %ecx, %r8d
+ andl %r8d, %eax
+ testl %eax, %eax
+ /* Return null pointer if the nul CHAR comes first. */
+ jz L(return_null)
+ bsrl %eax, %eax
+ leaq -VEC_SIZE(%rdi, %rax), %rax
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(return_null):
+ xorl %eax, %eax
+ VZEROUPPER
+ ret
+
+END (STRRCHR)
+#endif
diff --git a/sysdeps/x86_64/multiarch/strrchr-sse2.S b/sysdeps/x86_64/multiarch/strrchr-sse2.S
new file mode 100644
index 0000000000..0ec76fe9cc
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strrchr-sse2.S
@@ -0,0 +1,28 @@
+/* strrchr optimized with SSE2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# define strrchr __strrchr_sse2
+
+# undef weak_alias
+# define weak_alias(strrchr, rindex)
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(strrchr)
+#endif
+
+#include "../strrchr.S"
diff --git a/sysdeps/x86_64/multiarch/strrchr.c b/sysdeps/x86_64/multiarch/strrchr.c
new file mode 100644
index 0000000000..a719edde10
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strrchr.c
@@ -0,0 +1,34 @@
+/* Multiple versions of strrchr.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define strrchr __redirect_strrchr
+# include <string.h>
+# undef strrchr
+
+# define SYMBOL_NAME strrchr
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_strrchr, strrchr, IFUNC_SELECTOR ());
+weak_alias (strrchr, rindex);
+# ifdef SHARED
+__hidden_ver1 (strrchr, __GI_strrchr, __redirect_strrchr)
+ __attribute__((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-c.c
index 9675f9360e..4554cff0c2 100644
--- a/sysdeps/x86_64/multiarch/strspn-c.c
+++ b/sysdeps/x86_64/multiarch/strspn-c.c
@@ -1,5 +1,5 @@
/* strspn with SSE4.2 intrinsics
- Copyright (C) 2009-2016 Free Software Foundation, Inc.
+ Copyright (C) 2009-2018 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -52,7 +52,7 @@
We exit from the loop for case 1. */
-extern size_t __strspn_sse2 (const char *, const char *);
+extern size_t __strspn_sse2 (const char *, const char *) attribute_hidden;
size_t
diff --git a/sysdeps/x86_64/multiarch/strspn-sse2.S b/sysdeps/x86_64/multiarch/strspn-sse2.S
new file mode 100644
index 0000000000..4686cdd55d
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strspn-sse2.S
@@ -0,0 +1,28 @@
+/* strspn optimized with SSE2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+# define strspn __strspn_sse2
+
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(strspn)
+#endif
+
+#include <sysdeps/x86_64/strspn.S>
diff --git a/sysdeps/x86_64/multiarch/strspn.c b/sysdeps/x86_64/multiarch/strspn.c
new file mode 100644
index 0000000000..56ab4d9558
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strspn.c
@@ -0,0 +1,35 @@
+/* Multiple versions of strspn.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define strspn __redirect_strspn
+# include <string.h>
+# undef strspn
+
+# define SYMBOL_NAME strspn
+# include "ifunc-sse4_2.h"
+
+libc_ifunc_redirected (__redirect_strspn, strspn, IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (strspn, __GI_strspn, __redirect_strspn)
+ __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S
index 4ead1dfaf5..8188b8f643 100644
--- a/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S
@@ -1,5 +1,5 @@
/* strstr with unaligned loads
- Copyright (C) 2009-2016 Free Software Foundation, Inc.
+ Copyright (C) 2009-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c
index eecba2243e..30ce597a16 100644
--- a/sysdeps/x86_64/multiarch/strstr.c
+++ b/sysdeps/x86_64/multiarch/strstr.c
@@ -1,6 +1,6 @@
/* Multiple versions of strstr.
All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2012-2016 Free Software Foundation, Inc.
+ Copyright (C) 2012-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/multiarch/test-multiarch.c b/sysdeps/x86_64/multiarch/test-multiarch.c
index 4eb0c16cd8..aa872f27db 100644
--- a/sysdeps/x86_64/multiarch/test-multiarch.c
+++ b/sysdeps/x86_64/multiarch/test-multiarch.c
@@ -1,6 +1,6 @@
/* Test CPU feature data.
This file is part of the GNU C Library.
- Copyright (C) 2012-2016 Free Software Foundation, Inc.
+ Copyright (C) 2012-2018 Free Software Foundation, Inc.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -16,7 +16,7 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <init-arch.h>
+#include <cpu-features.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
diff --git a/sysdeps/x86_64/multiarch/varshift.c b/sysdeps/x86_64/multiarch/varshift.c
index 7921be5b57..2838736544 100644
--- a/sysdeps/x86_64/multiarch/varshift.c
+++ b/sysdeps/x86_64/multiarch/varshift.c
@@ -1,5 +1,5 @@
/* Helper for variable shifts of SSE registers.
- Copyright (C) 2010-2016 Free Software Foundation, Inc.
+ Copyright (C) 2010-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/multiarch/varshift.h b/sysdeps/x86_64/multiarch/varshift.h
index 7b27d0e9dd..76f2759874 100644
--- a/sysdeps/x86_64/multiarch/varshift.h
+++ b/sysdeps/x86_64/multiarch/varshift.h
@@ -1,5 +1,5 @@
/* Helper for variable shifts of SSE registers.
- Copyright (C) 2010-2016 Free Software Foundation, Inc.
+ Copyright (C) 2010-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/multiarch/wcschr-avx2.S b/sysdeps/x86_64/multiarch/wcschr-avx2.S
new file mode 100644
index 0000000000..67726b6837
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcschr-avx2.S
@@ -0,0 +1,3 @@
+#define STRCHR __wcschr_avx2
+#define USE_AS_WCSCHR 1
+#include "strchr-avx2.S"
diff --git a/sysdeps/x86_64/multiarch/wcschr-sse2.S b/sysdeps/x86_64/multiarch/wcschr-sse2.S
new file mode 100644
index 0000000000..67e4742ef1
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcschr-sse2.S
@@ -0,0 +1,30 @@
+/* wcschr optimized with SSE2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# define __wcschr __wcschr_sse2
+
+# undef weak_alias
+# define weak_alias(__wcschr, wcschr)
+# undef libc_hidden_def
+# define libc_hidden_def(__wcschr)
+# undef libc_hidden_weak
+# define libc_hidden_weak(wcschr)
+#endif
+
+#include "../wcschr.S"
diff --git a/sysdeps/x86_64/multiarch/wcschr.c b/sysdeps/x86_64/multiarch/wcschr.c
new file mode 100644
index 0000000000..20a03833b9
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcschr.c
@@ -0,0 +1,39 @@
+/* Multiple versions of wcschr.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define wcschr __redirect_wcschr
+# define __wcschr __redirect___wcschr
+# include <wchar.h>
+# undef wcschr
+# undef __wcschr
+
+# define SYMBOL_NAME wcschr
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_wcschr, __wcschr, IFUNC_SELECTOR ());
+weak_alias (__wcschr, wcschr);
+# ifdef SHARED
+__hidden_ver1 (__wcschr, __GI___wcschr, __redirect___wcschr)
+ __attribute__((visibility ("hidden")));
+__hidden_ver1 (wcschr, __GI_wcschr, __redirect_wcschr)
+ __attribute__((weak, visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/wcscmp-avx2.S b/sysdeps/x86_64/multiarch/wcscmp-avx2.S
new file mode 100644
index 0000000000..e5da4da689
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcscmp-avx2.S
@@ -0,0 +1,4 @@
+#define STRCMP __wcscmp_avx2
+#define USE_AS_WCSCMP 1
+
+#include "strcmp-avx2.S"
diff --git a/sysdeps/x86_64/multiarch/wcscmp-sse2.S b/sysdeps/x86_64/multiarch/wcscmp-sse2.S
new file mode 100644
index 0000000000..b129d1c073
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcscmp-sse2.S
@@ -0,0 +1,23 @@
+/* wcscmp optimized with SSE2.
+ Copyright (C) 2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# define __wcscmp __wcscmp_sse2
+#endif
+
+#include "../wcscmp.S"
diff --git a/sysdeps/x86_64/multiarch/wcscmp.c b/sysdeps/x86_64/multiarch/wcscmp.c
new file mode 100644
index 0000000000..74d92cf0f9
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcscmp.c
@@ -0,0 +1,37 @@
+/* Multiple versions of wcscmp.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define wcscmp __redirect_wcscmp
+# define __wcscmp __redirect___wcscmp
+# include <wchar.h>
+# undef wcscmp
+# undef __wcscmp
+
+# define SYMBOL_NAME wcscmp
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_wcscmp, __wcscmp, IFUNC_SELECTOR ());
+weak_alias (__wcscmp, wcscmp)
+
+# ifdef SHARED
+__hidden_ver1 (__wcscmp, __GI___wcscmp, __redirect_wcscmp)
+ __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/wcscpy-ssse3.S b/sysdeps/x86_64/multiarch/wcscpy-ssse3.S
index 341e57a5ca..ea1589052b 100644
--- a/sysdeps/x86_64/multiarch/wcscpy-ssse3.S
+++ b/sysdeps/x86_64/multiarch/wcscpy-ssse3.S
@@ -1,5 +1,5 @@
/* wcscpy with SSSE3
- Copyright (C) 2011-2016 Free Software Foundation, Inc.
+ Copyright (C) 2011-2018 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/multiarch/wcscpy.c b/sysdeps/x86_64/multiarch/wcscpy.c
new file mode 100644
index 0000000000..f23b1fd853
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcscpy.c
@@ -0,0 +1,44 @@
+/* Multiple versions of wcscpy.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define wcscpy __redirect_wcscpy
+# include <wchar.h>
+# undef wcscpy
+
+# define SYMBOL_NAME wcscpy
+# include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+ if (CPU_FEATURES_CPU_P (cpu_features, SSSE3))
+ return OPTIMIZE (ssse3);
+
+ return OPTIMIZE (sse2);
+}
+
+libc_ifunc_redirected (__redirect_wcscpy, wcscpy, IFUNC_SELECTOR ());
+#endif
diff --git a/sysdeps/x86_64/multiarch/wcslen-avx2.S b/sysdeps/x86_64/multiarch/wcslen-avx2.S
new file mode 100644
index 0000000000..c9224f1bc5
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcslen-avx2.S
@@ -0,0 +1,4 @@
+#define STRLEN __wcslen_avx2
+#define USE_AS_WCSLEN 1
+
+#include "strlen-avx2.S"
diff --git a/sysdeps/x86_64/multiarch/wcslen-sse2.S b/sysdeps/x86_64/multiarch/wcslen-sse2.S
new file mode 100644
index 0000000000..6031978363
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcslen-sse2.S
@@ -0,0 +1,26 @@
+/* wcslen optimized with SSE2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# define __wcslen __wcslen_sse2
+
+# undef weak_alias
+# define weak_alias(__wcslen, wcslen)
+#endif
+
+#include "../wcslen.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor.S b/sysdeps/x86_64/multiarch/wcslen.c
index 57a0eee5ba..6d06e47cbd 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_floor.S
+++ b/sysdeps/x86_64/multiarch/wcslen.c
@@ -1,6 +1,7 @@
-/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
+/* Multiple versions of wcslen.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -16,23 +17,15 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <machine/asm.h>
-#include <init-arch.h>
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define __wcslen __redirect_wcslen
+# include <wchar.h>
+# undef __wcslen
+# define SYMBOL_NAME wcslen
+# include "ifunc-avx2.h"
-ENTRY(__floor)
- .type __floor, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq __floor_sse41(%rip), %rax
- HAS_CPU_FEATURE (SSE4_1)
- jnz 2f
- leaq __floor_c(%rip), %rax
-2: ret
-END(__floor)
-weak_alias (__floor, floor)
-
-
-ENTRY(__floor_sse41)
- roundsd $1, %xmm0, %xmm0
- ret
-END(__floor_sse41)
+libc_ifunc_redirected (__redirect_wcslen, __wcslen, IFUNC_SELECTOR ());
+weak_alias (__wcslen, wcslen);
+#endif
diff --git a/sysdeps/x86_64/multiarch/wcsncmp-avx2.S b/sysdeps/x86_64/multiarch/wcsncmp-avx2.S
new file mode 100644
index 0000000000..4fa1de4d3f
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcsncmp-avx2.S
@@ -0,0 +1,5 @@
+#define STRCMP __wcsncmp_avx2
+#define USE_AS_STRNCMP 1
+#define USE_AS_WCSCMP 1
+
+#include "strcmp-avx2.S"
diff --git a/sysdeps/x86_64/multiarch/wcsncmp-sse2.c b/sysdeps/x86_64/multiarch/wcsncmp-sse2.c
new file mode 100644
index 0000000000..2bc7b4f693
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcsncmp-sse2.c
@@ -0,0 +1,20 @@
+/* wcsncmp optimized with SSE2.
+ Copyright (C) 2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define WCSNCMP __wcsncmp_sse2
+#include <wcsmbs/wcsncmp.c>
diff --git a/sysdeps/x86_64/multiarch/wcsncmp.c b/sysdeps/x86_64/multiarch/wcsncmp.c
new file mode 100644
index 0000000000..90e9a352d9
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcsncmp.c
@@ -0,0 +1,31 @@
+/* Multiple versions of wcsncmp.
+ Copyright (C) 2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define wcsncmp __redirect_wcsncmp
+# define __wcsncmp __redirect___wcsncmp
+# include <wchar.h>
+# undef wcsncmp
+# undef __wcsncmp
+
+# define SYMBOL_NAME wcsncmp
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_wcsncmp, wcsncmp, IFUNC_SELECTOR ());
+#endif
diff --git a/sysdeps/x86_64/multiarch/wcsnlen-avx2.S b/sysdeps/x86_64/multiarch/wcsnlen-avx2.S
new file mode 100644
index 0000000000..fac83546b5
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcsnlen-avx2.S
@@ -0,0 +1,5 @@
+#define STRLEN __wcsnlen_avx2
+#define USE_AS_WCSLEN 1
+#define USE_AS_STRNLEN 1
+
+#include "strlen-avx2.S"
diff --git a/sysdeps/x86_64/multiarch/wcsnlen-c.c b/sysdeps/x86_64/multiarch/wcsnlen-c.c
new file mode 100644
index 0000000000..e1ec7cfbb5
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcsnlen-c.c
@@ -0,0 +1,9 @@
+#if IS_IN (libc)
+# include <wchar.h>
+
+# define WCSNLEN __wcsnlen_sse2
+
+extern __typeof (wcsnlen) __wcsnlen_sse2;
+#endif
+
+#include "wcsmbs/wcsnlen.c"
diff --git a/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S
new file mode 100644
index 0000000000..a8cab0cb00
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S
@@ -0,0 +1,5 @@
+#define AS_WCSLEN
+#define AS_STRNLEN
+#define strlen __wcsnlen_sse4_1
+
+#include "../strlen.S"
diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c
new file mode 100644
index 0000000000..bd376057e3
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcsnlen.c
@@ -0,0 +1,51 @@
+/* Multiple versions of wcsnlen.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define __wcsnlen __redirect_wcsnlen
+# include <wchar.h>
+# undef __wcsnlen
+
+# define SYMBOL_NAME wcsnlen
+# include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ return OPTIMIZE (avx2);
+
+ if (CPU_FEATURES_CPU_P (cpu_features, SSE4_1))
+ return OPTIMIZE (sse4_1);
+
+ return OPTIMIZE (sse2);
+}
+
+libc_ifunc_redirected (__redirect_wcsnlen, __wcsnlen, IFUNC_SELECTOR ());
+weak_alias (__wcsnlen, wcsnlen);
+#endif
diff --git a/sysdeps/x86_64/multiarch/wcsrchr-avx2.S b/sysdeps/x86_64/multiarch/wcsrchr-avx2.S
new file mode 100644
index 0000000000..cf8a239ab2
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcsrchr-avx2.S
@@ -0,0 +1,3 @@
+#define STRRCHR __wcsrchr_avx2
+#define USE_AS_WCSRCHR 1
+#include "strrchr-avx2.S"
diff --git a/sysdeps/x86_64/multiarch/wcsrchr-sse2.S b/sysdeps/x86_64/multiarch/wcsrchr-sse2.S
new file mode 100644
index 0000000000..d015e95317
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcsrchr-sse2.S
@@ -0,0 +1,23 @@
+/* wcsrchr optimized with SSE2.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# define wcsrchr __wcsrchr_sse2
+#endif
+
+#include "../wcsrchr.S"
diff --git a/sysdeps/x86_64/multiarch/wcsrchr.c b/sysdeps/x86_64/multiarch/wcsrchr.c
new file mode 100644
index 0000000000..219fc828a6
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcsrchr.c
@@ -0,0 +1,29 @@
+/* Multiple versions of wcsrchr.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define wcsrchr __redirect_wcsrchr
+# include <wchar.h>
+# undef wcsrchr
+
+# define SYMBOL_NAME wcsrchr
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_wcsrchr, wcsrchr, IFUNC_SELECTOR ());
+#endif
diff --git a/sysdeps/x86_64/multiarch/wmemchr-avx2.S b/sysdeps/x86_64/multiarch/wmemchr-avx2.S
new file mode 100644
index 0000000000..282854f1a1
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wmemchr-avx2.S
@@ -0,0 +1,4 @@
+#define MEMCHR __wmemchr_avx2
+#define USE_AS_WMEMCHR 1
+
+#include "memchr-avx2.S"
diff --git a/sysdeps/x86_64/multiarch/wmemchr-sse2.S b/sysdeps/x86_64/multiarch/wmemchr-sse2.S
new file mode 100644
index 0000000000..70a965d552
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wmemchr-sse2.S
@@ -0,0 +1,4 @@
+#define USE_AS_WMEMCHR 1
+#define wmemchr __wmemchr_sse2
+
+#include "../memchr.S"
diff --git a/sysdeps/x86_64/multiarch/wmemchr.c b/sysdeps/x86_64/multiarch/wmemchr.c
new file mode 100644
index 0000000000..6d833702c6
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wmemchr.c
@@ -0,0 +1,39 @@
+/* Multiple versions of wmemchr
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define wmemchr __redirect_wmemchr
+# define __wmemchr __redirect___wmemchr
+# include <wchar.h>
+# undef wmemchr
+# undef __wmemchr
+
+# define SYMBOL_NAME wmemchr
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_wmemchr, __wmemchr, IFUNC_SELECTOR ());
+weak_alias (__wmemchr, wmemchr)
+# ifdef SHARED
+__hidden_ver1 (__wmemchr, __GI___wmemchr, __redirect___wmemchr)
+ __attribute__((visibility ("hidden")));
+__hidden_ver1 (wmemchr, __GI_wmemchr, __redirect_wmemchr)
+ __attribute__((weak, visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/wmemcmp-avx2-movbe.S b/sysdeps/x86_64/multiarch/wmemcmp-avx2-movbe.S
new file mode 100644
index 0000000000..bfa1a16a35
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wmemcmp-avx2-movbe.S
@@ -0,0 +1,4 @@
+#define MEMCMP __wmemcmp_avx2_movbe
+#define USE_AS_WMEMCMP 1
+
+#include "memcmp-avx2-movbe.S"
diff --git a/sysdeps/x86_64/multiarch/wmemcmp.c b/sysdeps/x86_64/multiarch/wmemcmp.c
new file mode 100644
index 0000000000..3f4a7422f3
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wmemcmp.c
@@ -0,0 +1,30 @@
+/* Multiple versions of wmemcmp
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define wmemcmp __redirect_wmemcmp
+# include <wchar.h>
+# undef wmemcmp
+
+# define SYMBOL_NAME wmemcmp
+# include "ifunc-memcmp.h"
+
+libc_ifunc_redirected (__redirect_wmemcmp, wmemcmp, IFUNC_SELECTOR ());
+#endif
diff --git a/sysdeps/x86_64/multiarch/wmemset.c b/sysdeps/x86_64/multiarch/wmemset.c
new file mode 100644
index 0000000000..9fee77ea81
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wmemset.c
@@ -0,0 +1,40 @@
+/* Multiple versions of wmemset.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define wmemset __redirect_wmemset
+# define __wmemset __redirect___wmemset
+# include <wchar.h>
+# undef wmemset
+# undef __wmemset
+
+# define SYMBOL_NAME wmemset
+# include "ifunc-wmemset.h"
+
+libc_ifunc_redirected (__redirect_wmemset, __wmemset, IFUNC_SELECTOR ());
+weak_alias (__wmemset, wmemset)
+
+# ifdef SHARED
+__hidden_ver1 (__wmemset, __GI___wmemset, __redirect___wmemset)
+ __attribute__ ((visibility ("hidden")));
+__hidden_ver1 (wmemset, __GI_wmemset, __redirect_wmemset)
+ __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/wmemset_chk-nonshared.S b/sysdeps/x86_64/multiarch/wmemset_chk-nonshared.S
new file mode 100644
index 0000000000..140c93d6f0
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wmemset_chk-nonshared.S
@@ -0,0 +1,21 @@
+/* Non-shared version of wmemset_chk for x86-64.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc) && !defined SHARED
+# include <sysdeps/x86_64/wmemset_chk.S>
+#endif
diff --git a/sysdeps/x86_64/multiarch/wmemset_chk.c b/sysdeps/x86_64/multiarch/wmemset_chk.c
new file mode 100644
index 0000000000..88280192c5
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wmemset_chk.c
@@ -0,0 +1,31 @@
+/* Multiple versions of wmemset_chk.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc.so. */
+#if IS_IN (libc) && defined SHARED
+# define __wmemset_chk __redirect_wmemset_chk
+# include <wchar.h>
+# undef __wmemset_chk
+
+# define SYMBOL_NAME wmemset_chk
+# include "ifunc-wmemset.h"
+
+libc_ifunc_redirected (__redirect_wmemset_chk, __wmemset_chk,
+ IFUNC_SELECTOR ());
+#endif
diff --git a/sysdeps/x86_64/nptl/Makefile b/sysdeps/x86_64/nptl/Makefile
index 9b64b533ee..73024033ee 100644
--- a/sysdeps/x86_64/nptl/Makefile
+++ b/sysdeps/x86_64/nptl/Makefile
@@ -1,4 +1,4 @@
-# Copyright (C) 2002-2016 Free Software Foundation, Inc.
+# Copyright (C) 2002-2018 Free Software Foundation, Inc.
# This file is part of the GNU C Library.
# The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/nptl/pthread-offsets.h b/sysdeps/x86_64/nptl/pthread-offsets.h
new file mode 100644
index 0000000000..16c6b0d9fd
--- /dev/null
+++ b/sysdeps/x86_64/nptl/pthread-offsets.h
@@ -0,0 +1,5 @@
+#define __PTHREAD_MUTEX_NUSERS_OFFSET 12
+#define __PTHREAD_MUTEX_KIND_OFFSET 16
+#define __PTHREAD_MUTEX_SPINS_OFFSET 20
+#define __PTHREAD_MUTEX_ELISION_OFFSET 22
+#define __PTHREAD_MUTEX_LIST_OFFSET 24
diff --git a/sysdeps/x86_64/nptl/pthread_spin_lock.S b/sysdeps/x86_64/nptl/pthread_spin_lock.S
index b871241617..730fd65034 100644
--- a/sysdeps/x86_64/nptl/pthread_spin_lock.S
+++ b/sysdeps/x86_64/nptl/pthread_spin_lock.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/nptl/pthread_spin_trylock.S b/sysdeps/x86_64/nptl/pthread_spin_trylock.S
index c9c53171fe..a8f1b72d60 100644
--- a/sysdeps/x86_64/nptl/pthread_spin_trylock.S
+++ b/sysdeps/x86_64/nptl/pthread_spin_trylock.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
diff --git a/sysdeps/x86_64/nptl/pthread_spin_unlock.S b/sysdeps/x86_64/nptl/pthread_spin_unlock.S
index 188de2e8cb..afd114e855 100644
--- a/sysdeps/x86_64/nptl/pthread_spin_unlock.S
+++ b/sysdeps/x86_64/nptl/pthread_spin_unlock.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
diff --git a/sysdeps/x86_64/nptl/pthreaddef.h b/sysdeps/x86_64/nptl/pthreaddef.h
index 9397efc631..036deb5772 100644
--- a/sysdeps/x86_64/nptl/pthreaddef.h
+++ b/sysdeps/x86_64/nptl/pthreaddef.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
diff --git a/sysdeps/x86_64/nptl/tcb-offsets.sym b/sysdeps/x86_64/nptl/tcb-offsets.sym
index aeb752673a..ae8034743b 100644
--- a/sysdeps/x86_64/nptl/tcb-offsets.sym
+++ b/sysdeps/x86_64/nptl/tcb-offsets.sym
@@ -4,7 +4,6 @@
RESULT offsetof (struct pthread, result)
TID offsetof (struct pthread, tid)
-PID offsetof (struct pthread, pid)
CANCELHANDLING offsetof (struct pthread, cancelhandling)
CLEANUP_JMP_BUF offsetof (struct pthread, cleanup_jmp_buf)
CLEANUP offsetof (struct pthread, cleanup)
@@ -13,9 +12,8 @@ MUTEX_FUTEX offsetof (pthread_mutex_t, __data.__lock)
MULTIPLE_THREADS_OFFSET offsetof (tcbhead_t, multiple_threads)
POINTER_GUARD offsetof (tcbhead_t, pointer_guard)
VGETCPU_CACHE_OFFSET offsetof (tcbhead_t, vgetcpu_cache)
-#ifndef __ASSUME_PRIVATE_FUTEX
-PRIVATE_FUTEX offsetof (tcbhead_t, private_futex)
-#endif
+FEATURE_1_OFFSET offsetof (tcbhead_t, feature_1)
+SSP_BASE_OFFSET offsetof (tcbhead_t, ssp_base)
-- Not strictly offsets, but these values are also used in the TCB.
TCB_CANCELSTATE_BITMASK CANCELSTATE_BITMASK
diff --git a/sysdeps/x86_64/nptl/tls.h b/sysdeps/x86_64/nptl/tls.h
index 2b061a07c6..e88561c934 100644
--- a/sysdeps/x86_64/nptl/tls.h
+++ b/sysdeps/x86_64/nptl/tls.h
@@ -1,5 +1,5 @@
/* Definition for thread-local data handling. nptl/x86_64 version.
- Copyright (C) 2002-2016 Free Software Foundation, Inc.
+ Copyright (C) 2002-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -26,8 +26,9 @@
# include <stdint.h>
# include <stdlib.h>
# include <sysdep.h>
-# include <libc-internal.h>
+# include <libc-pointer-arith.h> /* For cast_to_integer. */
# include <kernel-features.h>
+# include <dl-dtv.h>
/* Replacement type for __m128 since this file is included by ld.so,
which is compiled with -mno-sse. It must not change the alignment
@@ -38,18 +39,6 @@ typedef struct
} __128bits;
-/* Type for the dtv. */
-typedef union dtv
-{
- size_t counter;
- struct
- {
- void *val;
- bool is_static;
- } pointer;
-} dtv_t;
-
-
typedef struct
{
void *tcb; /* Pointer to the TCB. Not necessarily the
@@ -62,17 +51,17 @@ typedef struct
uintptr_t stack_guard;
uintptr_t pointer_guard;
unsigned long int vgetcpu_cache[2];
-# ifndef __ASSUME_PRIVATE_FUTEX
- int private_futex;
-# else
- int __glibc_reserved1;
-# endif
+ /* Bit 0: X86_FEATURE_1_IBT.
+ Bit 1: X86_FEATURE_1_SHSTK.
+ */
+ unsigned int feature_1;
int __glibc_unused1;
/* Reservation of some values for the TM ABI. */
void *__private_tm[4];
/* GCC split stack support. */
void *__private_ss;
- long int __glibc_reserved2;
+ /* The lowest address of shadow stack, */
+ unsigned long long int ssp_base;
/* Must be kept even if it is no longer used by glibc since programs,
like AddressSanitizer, depend on the size of tcbhead_t. */
__128bits __glibc_unused2[8][4] __attribute__ ((aligned (32)));
@@ -80,6 +69,23 @@ typedef struct
void *__padding[8];
} tcbhead_t;
+# ifdef __ILP32__
+/* morestack.S in libgcc uses offset 0x40 to access __private_ss, */
+_Static_assert (offsetof (tcbhead_t, __private_ss) == 0x40,
+ "offset of __private_ss != 0x40");
+/* NB: ssp_base used to be "long int __glibc_reserved2", which was
+ changed from 32 bits to 64 bits. Make sure that the offset of the
+ next field, __glibc_unused2, is unchanged. */
+_Static_assert (offsetof (tcbhead_t, __glibc_unused2) == 0x60,
+ "offset of __glibc_unused2 != 0x60");
+# else
+/* morestack.S in libgcc uses offset 0x70 to access __private_ss, */
+_Static_assert (offsetof (tcbhead_t, __private_ss) == 0x70,
+ "offset of __private_ss != 0x70");
+_Static_assert (offsetof (tcbhead_t, __glibc_unused2) == 0x80,
+ "offset of __glibc_unused2 != 0x80");
+# endif
+
#else /* __ASSEMBLER__ */
# include <tcb-offsets.h>
#endif
@@ -337,18 +343,6 @@ typedef struct
abort (); })
-# define CALL_THREAD_FCT(descr) \
- ({ void *__res; \
- asm volatile ("movq %%fs:%P2, %%rdi\n\t" \
- "callq *%%fs:%P1" \
- : "=a" (__res) \
- : "i" (offsetof (struct pthread, start_routine)), \
- "i" (offsetof (struct pthread, arg)) \
- : "di", "si", "cx", "dx", "r8", "r9", "r10", "r11", \
- "memory", "cc"); \
- __res; })
-
-
/* Set the stack guard field in TCB head. */
# define THREAD_SET_STACK_GUARD(value) \
THREAD_SETMEM (THREAD_SELF, header.stack_guard, value)
@@ -366,6 +360,7 @@ typedef struct
/* Get and set the global scope generation counter in the TCB head. */
+# define THREAD_GSCOPE_IN_TCB 1
# define THREAD_GSCOPE_FLAG_UNUSED 0
# define THREAD_GSCOPE_FLAG_USED 1
# define THREAD_GSCOPE_FLAG_WAIT 2
diff --git a/sysdeps/x86_64/rawmemchr.S b/sysdeps/x86_64/rawmemchr.S
index f90b7921a1..cf972768ec 100644
--- a/sysdeps/x86_64/rawmemchr.S
+++ b/sysdeps/x86_64/rawmemchr.S
@@ -1,6 +1,6 @@
/* fast SSE2 memchr with 64 byte loop and pmaxub instruction using
- Copyright (C) 2011-2016 Free Software Foundation, Inc.
+ Copyright (C) 2011-2018 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -196,11 +196,6 @@ L(matches32):
lea 32(%rax, %rdi), %rax
ret
- .p2align 4
-L(return_null):
- xor %rax, %rax
- ret
-
END (__rawmemchr)
weak_alias (__rawmemchr, rawmemchr)
diff --git a/sysdeps/x86_64/rshift.S b/sysdeps/x86_64/rshift.S
index c88c6d82bb..1d018b857b 100644
--- a/sysdeps/x86_64/rshift.S
+++ b/sysdeps/x86_64/rshift.S
@@ -1,5 +1,5 @@
/* x86-64 __mpn_rshift --
- Copyright (C) 2007-2016 Free Software Foundation, Inc.
+ Copyright (C) 2007-2018 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
diff --git a/sysdeps/x86_64/rtld-offsets.sym b/sysdeps/x86_64/rtld-offsets.sym
new file mode 100644
index 0000000000..fd41b51521
--- /dev/null
+++ b/sysdeps/x86_64/rtld-offsets.sym
@@ -0,0 +1,6 @@
+#define SHARED
+#include <ldsodefs.h>
+
+--
+
+GL_TLS_GENERATION_OFFSET offsetof (struct rtld_global, _dl_tls_generation)
diff --git a/sysdeps/x86_64/sched_cpucount.c b/sysdeps/x86_64/sched_cpucount.c
index 0834e711b3..af5bbcc044 100644
--- a/sysdeps/x86_64/sched_cpucount.c
+++ b/sysdeps/x86_64/sched_cpucount.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2007-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/setjmp.S b/sysdeps/x86_64/setjmp.S
index 3e93967c2f..78a8bf4644 100644
--- a/sysdeps/x86_64/setjmp.S
+++ b/sysdeps/x86_64/setjmp.S
@@ -1,5 +1,5 @@
/* setjmp for x86-64.
- Copyright (C) 2001-2016 Free Software Foundation, Inc.
+ Copyright (C) 2001-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -18,9 +18,15 @@
#include <sysdep.h>
#include <jmpbuf-offsets.h>
+#include <jmp_buf-ssp.h>
#include <asm-syntax.h>
#include <stap-probe.h>
+/* Don't save shadow stack register if shadow stack isn't enabled. */
+#if !SHSTK_ENABLED
+# undef SHADOW_STACK_POINTER_OFFSET
+#endif
+
ENTRY (__sigsetjmp)
/* Save registers. */
movq %rbx, (JB_RBX*8)(%rdi)
@@ -54,17 +60,28 @@ ENTRY (__sigsetjmp)
#endif
movq %rax, (JB_PC*8)(%rdi)
+#ifdef SHADOW_STACK_POINTER_OFFSET
+# if IS_IN (libc) && defined SHARED && defined FEATURE_1_OFFSET
+ /* Check if Shadow Stack is enabled. */
+ testl $X86_FEATURE_1_SHSTK, %fs:FEATURE_1_OFFSET
+ jz L(skip_ssp)
+# else
+ xorl %eax, %eax
+# endif
+ /* Get the current Shadow-Stack-Pointer and save it. */
+ rdsspq %rax
+ movq %rax, SHADOW_STACK_POINTER_OFFSET(%rdi)
+# if IS_IN (libc) && defined SHARED && defined FEATURE_1_OFFSET
+L(skip_ssp):
+# endif
+#endif
#if IS_IN (rtld)
/* In ld.so we never save the signal mask. */
xorl %eax, %eax
retq
#else
/* Make a tail call to __sigjmp_save; it takes the same args. */
-# ifdef PIC
- jmp C_SYMBOL_NAME (__sigjmp_save)@PLT
-# else
jmp __sigjmp_save
-# endif
#endif
END (__sigsetjmp)
hidden_def (__sigsetjmp)
diff --git a/sysdeps/x86_64/stackinfo.h b/sysdeps/x86_64/stackinfo.h
index 848aa7754c..f7a5672f27 100644
--- a/sysdeps/x86_64/stackinfo.h
+++ b/sysdeps/x86_64/stackinfo.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2001-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2001-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/start.S b/sysdeps/x86_64/start.S
index 1374974307..354d2e6ec7 100644
--- a/sysdeps/x86_64/start.S
+++ b/sysdeps/x86_64/start.S
@@ -1,5 +1,5 @@
/* Startup code compliant to the ELF x86-64 ABI.
- Copyright (C) 2001-2016 Free Software Foundation, Inc.
+ Copyright (C) 2001-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 2001.
@@ -96,27 +96,28 @@ ENTRY (_start)
which grow downwards). */
pushq %rsp
-#ifdef SHARED
+#ifdef PIC
/* Pass address of our own entry points to .fini and .init. */
mov __libc_csu_fini@GOTPCREL(%rip), %R8_LP
mov __libc_csu_init@GOTPCREL(%rip), %RCX_LP
mov main@GOTPCREL(%rip), %RDI_LP
-
- /* Call the user's main function, and exit with its value.
- But let the libc call main. */
- call __libc_start_main@PLT
#else
/* Pass address of our own entry points to .fini and .init. */
mov $__libc_csu_fini, %R8_LP
mov $__libc_csu_init, %RCX_LP
mov $main, %RDI_LP
+#endif
/* Call the user's main function, and exit with its value.
- But let the libc call main. */
- call __libc_start_main
-#endif
+ But let the libc call main. Since __libc_start_main in
+ libc.so is called very early, lazy binding isn't relevant
+ here. Use indirect branch via GOT to avoid extra branch
+ to PLT slot. In case of static executable, ld in binutils
+ 2.26 or above can convert indirect branch into direct
+ branch. */
+ call *__libc_start_main@GOTPCREL(%rip)
hlt /* Crash if somehow `exit' does return. */
END (_start)
diff --git a/sysdeps/x86_64/strcasecmp_l-nonascii.c b/sysdeps/x86_64/strcasecmp_l-nonascii.c
index 30e8969603..9ba9bc808c 100644
--- a/sysdeps/x86_64/strcasecmp_l-nonascii.c
+++ b/sysdeps/x86_64/strcasecmp_l-nonascii.c
@@ -1,7 +1,7 @@
#include <string.h>
extern int __strcasecmp_l_nonascii (const char *__s1, const char *__s2,
- __locale_t __loc);
+ locale_t __loc);
#define __strcasecmp_l __strcasecmp_l_nonascii
#define USE_IN_EXTENDED_LOCALE_MODEL 1
diff --git a/sysdeps/x86_64/strcat.S b/sysdeps/x86_64/strcat.S
index dadf4c76b2..9a4a4e6feb 100644
--- a/sysdeps/x86_64/strcat.S
+++ b/sysdeps/x86_64/strcat.S
@@ -1,6 +1,6 @@
/* strcat(dest, src) -- Append SRC on the end of DEST.
Optimized for x86-64.
- Copyright (C) 2002-2016 Free Software Foundation, Inc.
+ Copyright (C) 2002-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 2002.
diff --git a/sysdeps/x86_64/strchr.S b/sysdeps/x86_64/strchr.S
index 4431fee648..1d5112746f 100644
--- a/sysdeps/x86_64/strchr.S
+++ b/sysdeps/x86_64/strchr.S
@@ -1,6 +1,6 @@
/* strchr (str, ch) -- Return pointer to first occurrence of CH in STR.
For AMD x86-64.
- Copyright (C) 2009-2016 Free Software Foundation, Inc.
+ Copyright (C) 2009-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/strchrnul.S b/sysdeps/x86_64/strchrnul.S
index 7b52d699ee..149f3a9ced 100644
--- a/sysdeps/x86_64/strchrnul.S
+++ b/sysdeps/x86_64/strchrnul.S
@@ -1,7 +1,7 @@
/* strchrnul (str, ch) -- Return pointer to first occurrence of CH in STR
or terminating NUL byte.
For AMD x86-64.
- Copyright (C) 2009-2016 Free Software Foundation, Inc.
+ Copyright (C) 2009-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/strcmp.S b/sysdeps/x86_64/strcmp.S
index c5c44d4e27..e16945b961 100644
--- a/sysdeps/x86_64/strcmp.S
+++ b/sysdeps/x86_64/strcmp.S
@@ -1,5 +1,5 @@
/* Highly optimized version for x86-64.
- Copyright (C) 1999-2016 Free Software Foundation, Inc.
+ Copyright (C) 1999-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Based on i686 version contributed by Ulrich Drepper
<drepper@cygnus.com>, 1999.
@@ -233,7 +233,7 @@ LABEL(bigger):
lea LABEL(unaligned_table)(%rip), %r10
movslq (%r10, %r9,4), %r9
lea (%r10, %r9), %r10
- jmp *%r10 /* jump to corresponding case */
+ _CET_NOTRACK jmp *%r10 /* jump to corresponding case */
/*
* The following cases will be handled by ashr_0
diff --git a/sysdeps/x86_64/strcpy.S b/sysdeps/x86_64/strcpy.S
index 3f90c0020a..66128a7cb5 100644
--- a/sysdeps/x86_64/strcpy.S
+++ b/sysdeps/x86_64/strcpy.S
@@ -1,5 +1,5 @@
/* strcpy/stpcpy implementation for x86-64.
- Copyright (C) 2002-2016 Free Software Foundation, Inc.
+ Copyright (C) 2002-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 2002.
diff --git a/sysdeps/x86_64/strcspn.S b/sysdeps/x86_64/strcspn.S
index de526c8fdd..7f9202d656 100644
--- a/sysdeps/x86_64/strcspn.S
+++ b/sysdeps/x86_64/strcspn.S
@@ -1,7 +1,7 @@
/* strcspn (str, ss) -- Return the length of the initial segment of STR
which contains no characters from SS.
For AMD x86-64.
- Copyright (C) 1994-2016 Free Software Foundation, Inc.
+ Copyright (C) 1994-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>.
Bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>.
@@ -24,9 +24,6 @@
#include <sysdep.h>
#include "asm-syntax.h"
-/* BEWARE: `#ifdef strcspn' means that strcspn is redefined as `strpbrk' */
-#define STRPBRK_P (defined strcspn)
-
.text
ENTRY (strcspn)
@@ -111,7 +108,7 @@ L(5): incq %rax
L(4): addq $256, %rsp /* remove skipset */
cfi_adjust_cfa_offset(-256)
-#if STRPBRK_P
+#ifdef USE_AS_STRPBRK
xorl %edx,%edx
orb %cl, %cl /* was last character NUL? */
cmovzq %rdx, %rax /* Yes: return NULL */
diff --git a/sysdeps/x86_64/strlen.S b/sysdeps/x86_64/strlen.S
index 12f63ad1bb..01cb5fa846 100644
--- a/sysdeps/x86_64/strlen.S
+++ b/sysdeps/x86_64/strlen.S
@@ -1,5 +1,5 @@
-/* SSE2 version of strlen.
- Copyright (C) 2012-2016 Free Software Foundation, Inc.
+/* SSE2 version of strlen/wcslen.
+ Copyright (C) 2012-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -18,6 +18,16 @@
#include <sysdep.h>
+#ifdef AS_WCSLEN
+# define PMINU pminud
+# define PCMPEQ pcmpeqd
+# define SHIFT_RETURN shrq $2, %rax
+#else
+# define PMINU pminub
+# define PCMPEQ pcmpeqb
+# define SHIFT_RETURN
+#endif
+
/* Long lived register in strlen(s), strnlen(s, n) are:
%xmm3 - zero
@@ -32,10 +42,10 @@ ENTRY(strlen)
/* Test 64 bytes from %rax for zero. Save result as bitmask in %rdx. */
#define FIND_ZERO \
- pcmpeqb (%rax), %xmm0; \
- pcmpeqb 16(%rax), %xmm1; \
- pcmpeqb 32(%rax), %xmm2; \
- pcmpeqb 48(%rax), %xmm3; \
+ PCMPEQ (%rax), %xmm0; \
+ PCMPEQ 16(%rax), %xmm1; \
+ PCMPEQ 32(%rax), %xmm2; \
+ PCMPEQ 48(%rax), %xmm3; \
pmovmskb %xmm0, %esi; \
pmovmskb %xmm1, %edx; \
pmovmskb %xmm2, %r8d; \
@@ -54,6 +64,9 @@ ENTRY(strlen)
xor %rax, %rax
ret
L(n_nonzero):
+# ifdef AS_WCSLEN
+ shlq $2, %rsi
+# endif
/* Initialize long lived registers. */
@@ -96,6 +109,7 @@ L(n_nonzero):
test %rdx, %rdx; \
je L(lab); \
bsfq %rdx, %rax; \
+ SHIFT_RETURN; \
ret
#ifdef AS_STRNLEN
@@ -104,19 +118,20 @@ L(n_nonzero):
#else
/* Test first 16 bytes unaligned. */
movdqu (%rax), %xmm4
- pcmpeqb %xmm0, %xmm4
+ PCMPEQ %xmm0, %xmm4
pmovmskb %xmm4, %edx
test %edx, %edx
je L(next48_bytes)
bsf %edx, %eax /* If eax is zeroed 16bit bsf can be used. */
+ SHIFT_RETURN
ret
L(next48_bytes):
/* Same as FIND_ZERO except we do not check first 16 bytes. */
andq $-16, %rax
- pcmpeqb 16(%rax), %xmm1
- pcmpeqb 32(%rax), %xmm2
- pcmpeqb 48(%rax), %xmm3
+ PCMPEQ 16(%rax), %xmm1
+ PCMPEQ 32(%rax), %xmm2
+ PCMPEQ 48(%rax), %xmm3
pmovmskb %xmm1, %edx
pmovmskb %xmm2, %r8d
pmovmskb %xmm3, %ecx
@@ -145,6 +160,7 @@ L(strnlen_ret):
test %rdx, %rdx
je L(loop_init)
bsfq %rdx, %rax
+ SHIFT_RETURN
ret
#endif
.p2align 4
@@ -161,10 +177,10 @@ L(loop):
je L(exit_end)
movdqa (%rax), %xmm0
- pminub 16(%rax), %xmm0
- pminub 32(%rax), %xmm0
- pminub 48(%rax), %xmm0
- pcmpeqb %xmm3, %xmm0
+ PMINU 16(%rax), %xmm0
+ PMINU 32(%rax), %xmm0
+ PMINU 48(%rax), %xmm0
+ PCMPEQ %xmm3, %xmm0
pmovmskb %xmm0, %edx
testl %edx, %edx
jne L(exit)
@@ -182,6 +198,7 @@ L(first):
bsfq %rdx, %rdx
addq %rdx, %rax
subq %rdi, %rax
+ SHIFT_RETURN
ret
.p2align 4
@@ -192,6 +209,7 @@ L(exit):
bsfq %rdx, %rdx
addq %rdx, %rax
subq %rdi, %rax
+ SHIFT_RETURN
ret
#else
@@ -201,10 +219,10 @@ L(exit):
L(loop):
movdqa 64(%rax), %xmm0
- pminub 80(%rax), %xmm0
- pminub 96(%rax), %xmm0
- pminub 112(%rax), %xmm0
- pcmpeqb %xmm3, %xmm0
+ PMINU 80(%rax), %xmm0
+ PMINU 96(%rax), %xmm0
+ PMINU 112(%rax), %xmm0
+ PCMPEQ %xmm3, %xmm0
pmovmskb %xmm0, %edx
testl %edx, %edx
jne L(exit64)
@@ -212,10 +230,10 @@ L(loop):
subq $-128, %rax
movdqa (%rax), %xmm0
- pminub 16(%rax), %xmm0
- pminub 32(%rax), %xmm0
- pminub 48(%rax), %xmm0
- pcmpeqb %xmm3, %xmm0
+ PMINU 16(%rax), %xmm0
+ PMINU 32(%rax), %xmm0
+ PMINU 48(%rax), %xmm0
+ PCMPEQ %xmm3, %xmm0
pmovmskb %xmm0, %edx
testl %edx, %edx
jne L(exit0)
@@ -231,6 +249,7 @@ L(exit0):
bsfq %rdx, %rdx
addq %rdx, %rax
subq %rdi, %rax
+ SHIFT_RETURN
ret
#endif
diff --git a/sysdeps/x86_64/strncase_l-nonascii.c b/sysdeps/x86_64/strncase_l-nonascii.c
index 8664863778..e3d83a06cd 100644
--- a/sysdeps/x86_64/strncase_l-nonascii.c
+++ b/sysdeps/x86_64/strncase_l-nonascii.c
@@ -1,7 +1,7 @@
#include <string.h>
extern int __strncasecmp_l_nonascii (const char *__s1, const char *__s2,
- size_t __n, __locale_t __loc);
+ size_t __n, locale_t __loc);
#define __strncasecmp_l __strncasecmp_l_nonascii
#define USE_IN_EXTENDED_LOCALE_MODEL 1
diff --git a/sysdeps/x86_64/strpbrk.S b/sysdeps/x86_64/strpbrk.S
index 9b97ada84e..21888a5b92 100644
--- a/sysdeps/x86_64/strpbrk.S
+++ b/sysdeps/x86_64/strpbrk.S
@@ -1,2 +1,3 @@
#define strcspn strpbrk
+#define USE_AS_STRPBRK
#include <sysdeps/x86_64/strcspn.S>
diff --git a/sysdeps/x86_64/strrchr.S b/sysdeps/x86_64/strrchr.S
index de0be762ed..aca98e7eaa 100644
--- a/sysdeps/x86_64/strrchr.S
+++ b/sysdeps/x86_64/strrchr.S
@@ -1,5 +1,5 @@
/* strrchr (str, ch) -- Return pointer to last occurrence of CH in STR.
- Copyright (C) 2013-2016 Free Software Foundation, Inc.
+ Copyright (C) 2013-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/strspn.S b/sysdeps/x86_64/strspn.S
index 49dd4ba9f5..635f1bc6ce 100644
--- a/sysdeps/x86_64/strspn.S
+++ b/sysdeps/x86_64/strspn.S
@@ -1,7 +1,7 @@
/* strspn (str, ss) -- Return the length of the initial segment of STR
which contains only characters from SS.
For AMD x86-64.
- Copyright (C) 1994-2016 Free Software Foundation, Inc.
+ Copyright (C) 1994-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>.
Bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>.
diff --git a/sysdeps/x86_64/strtok.S b/sysdeps/x86_64/strtok.S
deleted file mode 100644
index bd5b103d50..0000000000
--- a/sysdeps/x86_64/strtok.S
+++ /dev/null
@@ -1,208 +0,0 @@
-/* strtok (str, delim) -- Return next DELIM separated token from STR.
- For AMD x86-64.
- Copyright (C) 1998-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Based on i686 version contributed by Ulrich Drepper
- <drepper@cygnus.com>, 1998.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
-/* This file can be used for the strtok and strtok_r functions:
-
- strtok:
- INPUT PARAMETER:
- str %rdi
- delim %rsi
-
- strtok_r:
- INPUT PARAMETER:
- str %rdi
- delim %rsi
- save_ptr %rdx
-
- We do a common implementation here. */
-
-#ifdef USE_AS_STRTOK_R
-# define SAVE_PTR (%r9)
-#else
- .bss
- .local save_ptr
- .type save_ptr, @object
- .size save_ptr, LP_SIZE
-save_ptr:
- .space LP_SIZE
-
-# ifdef PIC
-# define SAVE_PTR save_ptr(%rip)
-# else
-# define SAVE_PTR save_ptr
-# endif
-
-# define FUNCTION strtok
-#endif
-
- .text
-ENTRY (FUNCTION)
- /* First we create a table with flags for all possible characters.
- For the ASCII (7bit/8bit) or ISO-8859-X character sets which are
- supported by the C string functions we have 256 characters.
- Before inserting marks for the stop characters we clear the whole
- table. */
- movq %rdi, %r8 /* Save value. */
- subq $256, %rsp /* Make space for 256 bytes. */
- cfi_adjust_cfa_offset(256)
- movl $32, %ecx /* 32*8 bytes = 256 bytes. */
- movq %rsp, %rdi
- xorl %eax, %eax /* We store 0s. */
- cld
- rep
- stosq
-
- /* Note: %rcx = 0 !!! */
-
-#ifdef USE_AS_STRTOK_R
- /* The value is stored in the third argument. */
- mov %RDX_LP, %R9_LP /* Save value - see def. of SAVE_PTR. */
- mov (%rdx), %RAX_LP
-#else
- /* The value is in the local variable defined above. But
- we have to take care for PIC code. */
- mov SAVE_PTR, %RAX_LP
-#endif
- movq %r8, %rdx /* Get start of string. */
-
- /* If the pointer is NULL we have to use the stored value of
- the last run. */
- cmpq $0, %rdx
- cmove %rax, %rdx
- testq %rdx, %rdx
- jz L(returnNULL)
- movq %rsi, %rax /* Get start of delimiter set. */
-
-/* For understanding the following code remember that %rcx == 0 now.
- Although all the following instruction only modify %cl we always
- have a correct zero-extended 64-bit value in %rcx. */
-
-L(2): movb (%rax), %cl /* get byte from stopset */
- testb %cl, %cl /* is NUL char? */
- jz L(1) /* yes => start compare loop */
- movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */
-
- movb 1(%rax), %cl /* get byte from stopset */
- testb $0xff, %cl /* is NUL char? */
- jz L(1) /* yes => start compare loop */
- movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */
-
- movb 2(%rax), %cl /* get byte from stopset */
- testb $0xff, %cl /* is NUL char? */
- jz L(1) /* yes => start compare loop */
- movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */
-
- movb 3(%rax), %cl /* get byte from stopset */
- addq $4, %rax /* increment stopset pointer */
- movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */
- testb $0xff, %cl /* is NUL char? */
- jnz L(2) /* no => process next dword from stopset */
-
-L(1):
-
- leaq -4(%rdx), %rax /* prepare loop */
-
- /* We use a neat trick for the following loop. Normally we would
- have to test for two termination conditions
- 1. a character in the stopset was found
- and
- 2. the end of the string was found
- As a sign that the character is in the stopset we store its
- value in the table. The value of NUL is NUL so the loop
- terminates for NUL in every case. */
-
-L(3): addq $4, %rax /* adjust pointer for full loop round */
-
- movb (%rax), %cl /* get byte from string */
- testb %cl, (%rsp,%rcx) /* is it contained in stopset? */
- jz L(4) /* no => start of token */
-
- movb 1(%rax), %cl /* get byte from string */
- testb %cl, (%rsp,%rcx) /* is it contained in stopset? */
- jz L(5) /* no => start of token */
-
- movb 2(%rax), %cl /* get byte from string */
- testb %cl, (%rsp,%rcx) /* is it contained in stopset? */
- jz L(6) /* no => start of token */
-
- movb 3(%rax), %cl /* get byte from string */
- testb %cl, (%rsp,%rcx) /* is it contained in stopset? */
- jnz L(3) /* yes => start of loop */
-
- incq %rax /* adjust pointer */
-L(6): incq %rax
-L(5): incq %rax
-
- /* Now we have to terminate the string. */
-
-L(4): leaq -4(%rax), %rdx /* We use %rDX for the next run. */
-
-L(7): addq $4, %rdx /* adjust pointer for full loop round */
-
- movb (%rdx), %cl /* get byte from string */
- cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */
- je L(8) /* yes => return */
-
- movb 1(%rdx), %cl /* get byte from string */
- cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */
- je L(9) /* yes => return */
-
- movb 2(%rdx), %cl /* get byte from string */
- cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */
- je L(10) /* yes => return */
-
- movb 3(%rdx), %cl /* get byte from string */
- cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */
- jne L(7) /* no => start loop again */
-
- incq %rdx /* adjust pointer */
-L(10): incq %rdx
-L(9): incq %rdx
-
-L(8): cmpq %rax, %rdx
- je L(returnNULL) /* There was no token anymore. */
-
- movb $0, (%rdx) /* Terminate string. */
-
- /* Are we at end of string? */
- cmpb $0, %cl
- leaq 1(%rdx), %rcx
- cmovne %rcx, %rdx
-
- /* Store the pointer to the next character. */
- mov %RDX_LP, SAVE_PTR
-
-L(epilogue):
- /* Remove the stopset table. */
- addq $256, %rsp
- cfi_adjust_cfa_offset(-256)
- retq
-
-L(returnNULL):
- xorl %eax, %eax
- /* Store the pointer to the next character. */
- mov %RDX_LP, SAVE_PTR
- jmp L(epilogue)
-
-END (FUNCTION)
diff --git a/sysdeps/x86_64/strtok_r.S b/sysdeps/x86_64/strtok_r.S
deleted file mode 100644
index f0db78c67a..0000000000
--- a/sysdeps/x86_64/strtok_r.S
+++ /dev/null
@@ -1,5 +0,0 @@
-#define FUNCTION __strtok_r
-#define USE_AS_STRTOK_R 1
-#include <sysdeps/x86_64/strtok.S>
-weak_alias (__strtok_r, strtok_r)
-strong_alias (__strtok_r, __GI___strtok_r)
diff --git a/sysdeps/x86_64/sub_n.S b/sysdeps/x86_64/sub_n.S
index cc9bc48b01..e70d48ba47 100644
--- a/sysdeps/x86_64/sub_n.S
+++ b/sysdeps/x86_64/sub_n.S
@@ -1,6 +1,6 @@
/* x86-64 __mpn_sub_n -- Add two limb vectors of the same length > 0 and store
sum in a third limb vector.
- Copyright (C) 2006-2016 Free Software Foundation, Inc.
+ Copyright (C) 2006-2018 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
diff --git a/sysdeps/x86_64/submul_1.S b/sysdeps/x86_64/submul_1.S
index 3037cb9c45..ba1bf92bc5 100644
--- a/sysdeps/x86_64/submul_1.S
+++ b/sysdeps/x86_64/submul_1.S
@@ -1,6 +1,6 @@
/* x86-64 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
the result from a second limb vector.
- Copyright (C) 2003-2016 Free Software Foundation, Inc.
+ Copyright (C) 2003-2018 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
diff --git a/sysdeps/x86_64/sysdep.h b/sysdeps/x86_64/sysdep.h
index fbe3560588..1738d7f955 100644
--- a/sysdeps/x86_64/sysdep.h
+++ b/sysdeps/x86_64/sysdep.h
@@ -1,5 +1,5 @@
/* Assembler macros for x86-64.
- Copyright (C) 2001-2016 Free Software Foundation, Inc.
+ Copyright (C) 2001-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -19,7 +19,7 @@
#ifndef _X86_64_SYSDEP_H
#define _X86_64_SYSDEP_H 1
-#include <sysdeps/generic/sysdep.h>
+#include <sysdeps/x86/sysdep.h>
#ifdef __ASSEMBLER__
@@ -32,28 +32,6 @@
#define cfi_offset_rel_rsp(regn, off) .cfi_escape 0x10, regn, 0x4, 0x13, \
0x77, off & 0x7F | 0x80, off >> 7
-/* ELF uses byte-counts for .align, most others use log2 of count of bytes. */
-#define ALIGNARG(log2) 1<<log2
-#define ASM_SIZE_DIRECTIVE(name) .size name,.-name;
-
-
-/* Define an entry point visible from C. */
-#define ENTRY(name) \
- .globl C_SYMBOL_NAME(name); \
- .type C_SYMBOL_NAME(name),@function; \
- .align ALIGNARG(4); \
- C_LABEL(name) \
- cfi_startproc; \
- CALL_MCOUNT
-
-#undef END
-#define END(name) \
- cfi_endproc; \
- ASM_SIZE_DIRECTIVE(name)
-
-#define ENTRY_CHK(name) ENTRY (name)
-#define END_CHK(name) END (name)
-
/* If compiled for profiling, call `mcount' at the start of each function. */
#ifdef PROF
/* The mcount code relies on a normal frame pointer being on the stack
@@ -70,12 +48,6 @@
#define CALL_MCOUNT /* Do nothing. */
#endif
-/* Since C identifiers are not normally prefixed with an underscore
- on this system, the asm identifier `syscall_error' intrudes on the
- C name space. Make sure we use an innocuous name. */
-#define syscall_error __syscall_error
-#define mcount _mcount
-
#define PSEUDO(name, syscall_name, args) \
lose: \
jmp JUMPTARGET(syscall_error) \
@@ -84,25 +56,18 @@ lose: \
DO_CALL (syscall_name, args); \
jb lose
-#undef PSEUDO_END
-#define PSEUDO_END(name) \
- END (name)
-
#undef JUMPTARGET
-#ifdef PIC
-#define JUMPTARGET(name) name##@PLT
+#ifdef SHARED
+# ifdef BIND_NOW
+# define JUMPTARGET(name) *name##@GOTPCREL(%rip)
+# else
+# define JUMPTARGET(name) name##@PLT
+# endif
#else
-#define JUMPTARGET(name) name
+/* For static archives, branch to target directly. */
+# define JUMPTARGET(name) name
#endif
-/* Local label name for asm code. */
-#ifndef L
-/* ELF-like local names start with `.L'. */
-# define L(name) .L##name
-#endif
-
-#define atom_text_section .section ".text.atom", "ax"
-
/* Long and pointer size in bytes. */
#define LP_SIZE 8
diff --git a/sysdeps/x86_64/tls_get_addr.S b/sysdeps/x86_64/tls_get_addr.S
new file mode 100644
index 0000000000..cf8c6d101b
--- /dev/null
+++ b/sysdeps/x86_64/tls_get_addr.S
@@ -0,0 +1,61 @@
+/* Stack-aligning implementation of __tls_get_addr. x86-64 version.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifdef SHARED
+
+# include <sysdep.h>
+# include "tlsdesc.h"
+# include "rtld-offsets.h"
+
+/* See __tls_get_addr and __tls_get_addr_slow in dl-tls.c. This function
+ call __tls_get_addr_slow on both slow paths. It realigns the stack
+ before the call to work around GCC PR58066. */
+
+ENTRY (__tls_get_addr)
+ mov %fs:DTV_OFFSET, %RDX_LP
+ mov GL_TLS_GENERATION_OFFSET+_rtld_local(%rip), %RAX_LP
+ /* GL(dl_tls_generation) == dtv[0].counter */
+ cmp %RAX_LP, (%rdx)
+ jne 1f
+ mov TI_MODULE_OFFSET(%rdi), %RAX_LP
+ /* dtv[ti->ti_module] */
+# ifdef __LP64__
+ salq $4, %rax
+ movq (%rdx,%rax), %rax
+# else
+ movl (%rdx,%rax, 8), %eax
+# endif
+ cmp $-1, %RAX_LP
+ je 1f
+ add TI_OFFSET_OFFSET(%rdi), %RAX_LP
+ ret
+1:
+ /* On the slow path, align the stack. */
+ pushq %rbp
+ cfi_def_cfa_offset (16)
+ cfi_offset (%rbp, -16)
+ mov %RSP_LP, %RBP_LP
+ cfi_def_cfa_register (%rbp)
+ and $-16, %RSP_LP
+ call __tls_get_addr_slow
+ mov %RBP_LP, %RSP_LP
+ popq %rbp
+ cfi_def_cfa (%rsp, 8)
+ ret
+END (__tls_get_addr)
+#endif /* SHARED */
diff --git a/sysdeps/x86_64/tlsdesc.c b/sysdeps/x86_64/tlsdesc.c
index aff8b67941..302d097dbb 100644
--- a/sysdeps/x86_64/tlsdesc.c
+++ b/sysdeps/x86_64/tlsdesc.c
@@ -1,5 +1,5 @@
/* Manage TLS descriptors. x86_64 version.
- Copyright (C) 2005-2016 Free Software Foundation, Inc.
+ Copyright (C) 2005-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -134,7 +134,6 @@ _dl_tlsdesc_resolve_hold_fixup (struct tlsdesc volatile *td,
if there is one. */
void
-internal_function
_dl_unmap (struct link_map *map)
{
_dl_unmap_segments (map);
diff --git a/sysdeps/x86_64/tlsdesc.sym b/sysdeps/x86_64/tlsdesc.sym
index 33854975d0..fc897ab4b5 100644
--- a/sysdeps/x86_64/tlsdesc.sym
+++ b/sysdeps/x86_64/tlsdesc.sym
@@ -15,3 +15,6 @@ TLSDESC_ARG offsetof(struct tlsdesc, arg)
TLSDESC_GEN_COUNT offsetof(struct tlsdesc_dynamic_arg, gen_count)
TLSDESC_MODID offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module)
TLSDESC_MODOFF offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset)
+
+TI_MODULE_OFFSET offsetof(tls_index, ti_module)
+TI_OFFSET_OFFSET offsetof(tls_index, ti_offset)
diff --git a/sysdeps/x86_64/tst-audit.h b/sysdeps/x86_64/tst-audit.h
index 94e9dd5282..623ef8920c 100644
--- a/sysdeps/x86_64/tst-audit.h
+++ b/sysdeps/x86_64/tst-audit.h
@@ -1,6 +1,6 @@
/* Definitions for testing PLT entry/exit auditing. x86_64 version.
- Copyright (C) 2012-2016 Free Software Foundation, Inc.
+ Copyright (C) 2012-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.S b/sysdeps/x86_64/tst-audit10-aux.c
index fb9f989adc..e1b2d92f75 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.S
+++ b/sysdeps/x86_64/tst-audit10-aux.c
@@ -1,5 +1,5 @@
-/* Multiple versions of vectorized pow.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+/* Test case for preserved AVX512 registers in dynamic linker, -mavx512f part.
+ Copyright (C) 2012-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,21 +16,26 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY (_ZGVdN4vv_pow)
- .type _ZGVdN4vv_pow, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq _ZGVdN4vv_pow_avx2(%rip), %rax
- HAS_ARCH_FEATURE (AVX2_Usable)
- jz 2f
- ret
-2: leaq _ZGVdN4vv_pow_sse_wrapper(%rip), %rax
- ret
-END (_ZGVdN4vv_pow)
-libmvec_hidden_def (_ZGVdN4vv_pow)
-
-#define _ZGVdN4vv_pow _ZGVdN4vv_pow_sse_wrapper
-#include "../svml_d_pow4_core.S"
+#include <immintrin.h>
+#include <stdlib.h>
+#include <string.h>
+
+int
+tst_audit10_aux (void)
+{
+#ifdef __AVX512F__
+ extern __m512i audit_test (__m512i, __m512i, __m512i, __m512i,
+ __m512i, __m512i, __m512i, __m512i);
+
+ __m512i zmm = _mm512_setzero_si512 ();
+ __m512i ret = audit_test (zmm, zmm, zmm, zmm, zmm, zmm, zmm, zmm);
+
+ zmm = _mm512_set1_epi64 (0x12349876);
+
+ if (memcmp (&zmm, &ret, sizeof (ret)))
+ abort ();
+ return 0;
+#else /* __AVX512F__ */
+ return 77;
+#endif /* __AVX512F__ */
+}
diff --git a/sysdeps/x86_64/tst-audit10.c b/sysdeps/x86_64/tst-audit10.c
index d104341be8..568011cb96 100644
--- a/sysdeps/x86_64/tst-audit10.c
+++ b/sysdeps/x86_64/tst-audit10.c
@@ -1,4 +1,5 @@
-/* Copyright (C) 2012-2016 Free Software Foundation, Inc.
+/* Test case for preserved AVX512 registers in dynamic linker.
+ Copyright (C) 2012-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -15,17 +16,14 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-/* Test case for x86-64 preserved registers in dynamic linker. */
-
-#ifdef __AVX512F__
-#include <stdlib.h>
-#include <string.h>
#include <cpuid.h>
-#include <immintrin.h>
+
+int tst_audit10_aux (void);
static int
avx512_enabled (void)
{
+#ifdef bit_AVX512F
unsigned int eax, ebx, ecx, edx;
if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0
@@ -40,34 +38,20 @@ avx512_enabled (void)
/* Verify that ZMM, YMM and XMM states are enabled. */
return (eax & 0xe6) == 0xe6;
+#else
+ return 0;
+#endif
}
-
-extern __m512i audit_test (__m512i, __m512i, __m512i, __m512i,
- __m512i, __m512i, __m512i, __m512i);
static int
do_test (void)
{
/* Run AVX512 test only if AVX512 is supported. */
if (avx512_enabled ())
- {
- __m512i zmm = _mm512_setzero_si512 ();
- __m512i ret = audit_test (zmm, zmm, zmm, zmm, zmm, zmm, zmm, zmm);
-
- zmm = _mm512_set1_epi64 (0x12349876);
-
- if (memcmp (&zmm, &ret, sizeof (ret)))
- abort ();
- }
- return 0;
-}
-#else
-static int
-do_test (void)
-{
- return 0;
+ return tst_audit10_aux ();
+ else
+ return 77;
}
-#endif
#define TEST_FUNCTION do_test ()
#include "../../test-skeleton.c"
diff --git a/sysdeps/x86_64/tst-audit4-aux.c b/sysdeps/x86_64/tst-audit4-aux.c
new file mode 100644
index 0000000000..2770be5b5e
--- /dev/null
+++ b/sysdeps/x86_64/tst-audit4-aux.c
@@ -0,0 +1,39 @@
+/* Test case for preserved AVX registers in dynamic linker, -mavx part.
+ Copyright (C) 2009-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <immintrin.h>
+#include <stdlib.h>
+#include <string.h>
+
+extern __m256i audit_test (__m256i, __m256i, __m256i, __m256i,
+ __m256i, __m256i, __m256i, __m256i);
+
+int
+tst_audit4_aux (void)
+{
+#ifdef __AVX__
+ __m256i ymm = _mm256_setzero_si256 ();
+ __m256i ret = audit_test (ymm, ymm, ymm, ymm, ymm, ymm, ymm, ymm);
+ ymm = _mm256_set1_epi32 (0x12349876);
+ if (memcmp (&ymm, &ret, sizeof (ret)))
+ abort ();
+ return 0;
+#else /* __AVX__ */
+ return 77;
+#endif /* __AVX__ */
+}
diff --git a/sysdeps/x86_64/tst-audit4.c b/sysdeps/x86_64/tst-audit4.c
index 44d51231e3..d7ca24ac2d 100644
--- a/sysdeps/x86_64/tst-audit4.c
+++ b/sysdeps/x86_64/tst-audit4.c
@@ -1,11 +1,24 @@
-/* Test case for x86-64 preserved registers in dynamic linker. */
+/* Test case for preserved AVX registers in dynamic linker.
+ Copyright (C) 2009-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
-#ifdef __AVX__
-#include <stdlib.h>
-#include <string.h>
#include <cpuid.h>
-#include <immintrin.h>
+int tst_audit4_aux (void);
static int
avx_enabled (void)
@@ -22,31 +35,15 @@ avx_enabled (void)
return (eax & 6) == 6;
}
-
-extern __m256i audit_test (__m256i, __m256i, __m256i, __m256i,
- __m256i, __m256i, __m256i, __m256i);
static int
do_test (void)
{
/* Run AVX test only if AVX is supported. */
if (avx_enabled ())
- {
- __m256i ymm = _mm256_setzero_si256 ();
- __m256i ret = audit_test (ymm, ymm, ymm, ymm, ymm, ymm, ymm, ymm);
-
- ymm = _mm256_set1_epi32 (0x12349876);
- if (memcmp (&ymm, &ret, sizeof (ret)))
- abort ();
- }
- return 0;
-}
-#else
-static int
-do_test (void)
-{
- return 0;
+ return tst_audit4_aux ();
+ else
+ return 77;
}
-#endif
#define TEST_FUNCTION do_test ()
#include "../../test-skeleton.c"
diff --git a/sysdeps/x86_64/tst-auditmod10a.c b/sysdeps/x86_64/tst-auditmod10a.c
index e94dbaf7fe..ff6021a79a 100644
--- a/sysdeps/x86_64/tst-auditmod10a.c
+++ b/sysdeps/x86_64/tst-auditmod10a.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/tst-auditmod10b.c b/sysdeps/x86_64/tst-auditmod10b.c
index ad6fcafdda..de1bbbb7fb 100644
--- a/sysdeps/x86_64/tst-auditmod10b.c
+++ b/sysdeps/x86_64/tst-auditmod10b.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -19,6 +19,8 @@
function parameter passing/return. */
#include <dlfcn.h>
+#include <link.h>
+#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/sysdeps/x86_64/tst-auditmod3b.c b/sysdeps/x86_64/tst-auditmod3b.c
index 1a41ca80c0..7aad92382e 100644
--- a/sysdeps/x86_64/tst-auditmod3b.c
+++ b/sysdeps/x86_64/tst-auditmod3b.c
@@ -2,6 +2,8 @@
function parameter passing/return. */
#include <dlfcn.h>
+#include <link.h>
+#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/sysdeps/x86_64/tst-auditmod4b.c b/sysdeps/x86_64/tst-auditmod4b.c
index 2b0d827e88..1153ea442c 100644
--- a/sysdeps/x86_64/tst-auditmod4b.c
+++ b/sysdeps/x86_64/tst-auditmod4b.c
@@ -2,6 +2,8 @@
function parameter passing/return. */
#include <dlfcn.h>
+#include <link.h>
+#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/sysdeps/x86_64/tst-auditmod5b.c b/sysdeps/x86_64/tst-auditmod5b.c
index a74d261f03..6a280fd61b 100644
--- a/sysdeps/x86_64/tst-auditmod5b.c
+++ b/sysdeps/x86_64/tst-auditmod5b.c
@@ -2,6 +2,8 @@
function parameter passing/return. */
#include <dlfcn.h>
+#include <link.h>
+#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/sysdeps/x86_64/tst-auditmod6b.c b/sysdeps/x86_64/tst-auditmod6b.c
index 886fc33e9b..3533602c07 100644
--- a/sysdeps/x86_64/tst-auditmod6b.c
+++ b/sysdeps/x86_64/tst-auditmod6b.c
@@ -2,6 +2,8 @@
function parameter passing/return. */
#include <dlfcn.h>
+#include <link.h>
+#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/sysdeps/x86_64/tst-auditmod6c.c b/sysdeps/x86_64/tst-auditmod6c.c
index b2ee24d8bf..8000e89224 100644
--- a/sysdeps/x86_64/tst-auditmod6c.c
+++ b/sysdeps/x86_64/tst-auditmod6c.c
@@ -2,6 +2,8 @@
function parameter passing/return. */
#include <dlfcn.h>
+#include <link.h>
+#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/sysdeps/x86_64/tst-auditmod7b.c b/sysdeps/x86_64/tst-auditmod7b.c
index f27076d3bb..5abe6d1bc9 100644
--- a/sysdeps/x86_64/tst-auditmod7b.c
+++ b/sysdeps/x86_64/tst-auditmod7b.c
@@ -2,6 +2,8 @@
function parameter passing/return. */
#include <dlfcn.h>
+#include <link.h>
+#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/sysdeps/x86_64/tst-avx-aux.c b/sysdeps/x86_64/tst-avx-aux.c
new file mode 100644
index 0000000000..e6ae368fd8
--- /dev/null
+++ b/sysdeps/x86_64/tst-avx-aux.c
@@ -0,0 +1,47 @@
+/* Test case for preserved AVX registers in dynamic linker, -mavx part.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <immintrin.h>
+#include <stdlib.h>
+#include <string.h>
+
+int
+tst_avx_aux (void)
+{
+#ifdef __AVX__
+ extern __m256i avx_test (__m256i, __m256i, __m256i, __m256i,
+ __m256i, __m256i, __m256i, __m256i);
+
+ __m256i ymm0 = _mm256_set1_epi32 (0);
+ __m256i ymm1 = _mm256_set1_epi32 (1);
+ __m256i ymm2 = _mm256_set1_epi32 (2);
+ __m256i ymm3 = _mm256_set1_epi32 (3);
+ __m256i ymm4 = _mm256_set1_epi32 (4);
+ __m256i ymm5 = _mm256_set1_epi32 (5);
+ __m256i ymm6 = _mm256_set1_epi32 (6);
+ __m256i ymm7 = _mm256_set1_epi32 (7);
+ __m256i ret = avx_test (ymm0, ymm1, ymm2, ymm3,
+ ymm4, ymm5, ymm6, ymm7);
+ ymm0 = _mm256_set1_epi32 (0x12349876);
+ if (memcmp (&ymm0, &ret, sizeof (ret)))
+ abort ();
+ return 0;
+#else /* __AVX__ */
+ return 77;
+#endif /* __AVX__ */
+}
diff --git a/sysdeps/x86_64/tst-avx.c b/sysdeps/x86_64/tst-avx.c
new file mode 100644
index 0000000000..9c52fc264a
--- /dev/null
+++ b/sysdeps/x86_64/tst-avx.c
@@ -0,0 +1,49 @@
+/* Test case for preserved AVX registers in dynamic linker.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <cpuid.h>
+
+int tst_avx_aux (void);
+
+static int
+avx_enabled (void)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0
+ || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE))
+ return 0;
+
+ /* Check the OS has AVX and SSE saving enabled. */
+ asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0));
+
+ return (eax & 6) == 6;
+}
+
+static int
+do_test (void)
+{
+ /* Run AVX test only if AVX is supported. */
+ if (avx_enabled ())
+ return tst_avx_aux ();
+ else
+ return 77;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../../test-skeleton.c"
diff --git a/sysdeps/x86_64/tst-avx512-aux.c b/sysdeps/x86_64/tst-avx512-aux.c
new file mode 100644
index 0000000000..87c4124398
--- /dev/null
+++ b/sysdeps/x86_64/tst-avx512-aux.c
@@ -0,0 +1,48 @@
+/* Test case for preserved AVX512 registers in dynamic linker,
+ -mavx512 part.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <immintrin.h>
+#include <stdlib.h>
+#include <string.h>
+
+int
+tst_avx512_aux (void)
+{
+#ifdef __AVX512F__
+ extern __m512i avx512_test (__m512i, __m512i, __m512i, __m512i,
+ __m512i, __m512i, __m512i, __m512i);
+
+ __m512i zmm0 = _mm512_set1_epi32 (0);
+ __m512i zmm1 = _mm512_set1_epi32 (1);
+ __m512i zmm2 = _mm512_set1_epi32 (2);
+ __m512i zmm3 = _mm512_set1_epi32 (3);
+ __m512i zmm4 = _mm512_set1_epi32 (4);
+ __m512i zmm5 = _mm512_set1_epi32 (5);
+ __m512i zmm6 = _mm512_set1_epi32 (6);
+ __m512i zmm7 = _mm512_set1_epi32 (7);
+ __m512i ret = avx512_test (zmm0, zmm1, zmm2, zmm3,
+ zmm4, zmm5, zmm6, zmm7);
+ zmm0 = _mm512_set1_epi32 (0x12349876);
+ if (memcmp (&zmm0, &ret, sizeof (ret)))
+ abort ();
+ return 0;
+#else /* __AVX512F__ */
+ return 77;
+#endif /* __AVX512F__ */
+}
diff --git a/sysdeps/x86_64/tst-avx512.c b/sysdeps/x86_64/tst-avx512.c
new file mode 100644
index 0000000000..63d8bc9c27
--- /dev/null
+++ b/sysdeps/x86_64/tst-avx512.c
@@ -0,0 +1,57 @@
+/* Test case for preserved AVX512 registers in dynamic linker.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <cpuid.h>
+
+int tst_avx512_aux (void);
+
+static int
+avx512_enabled (void)
+{
+#ifdef bit_AVX512F
+ unsigned int eax, ebx, ecx, edx;
+
+ if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0
+ || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE))
+ return 0;
+
+ __cpuid_count (7, 0, eax, ebx, ecx, edx);
+ if (!(ebx & bit_AVX512F))
+ return 0;
+
+ asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0));
+
+ /* Verify that ZMM, YMM and XMM states are enabled. */
+ return (eax & 0xe6) == 0xe6;
+#else
+ return 0;
+#endif
+}
+
+static int
+do_test (void)
+{
+ /* Run AVX512 test only if AVX512 is supported. */
+ if (avx512_enabled ())
+ return tst_avx512_aux ();
+ else
+ return 77;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../../test-skeleton.c"
diff --git a/sysdeps/x86_64/tst-avx512mod.c b/sysdeps/x86_64/tst-avx512mod.c
new file mode 100644
index 0000000000..4cfb3a2c3d
--- /dev/null
+++ b/sysdeps/x86_64/tst-avx512mod.c
@@ -0,0 +1,48 @@
+/* Test case for x86-64 preserved AVX512 registers in dynamic linker. */
+
+#ifdef __AVX512F__
+#include <stdlib.h>
+#include <string.h>
+#include <immintrin.h>
+
+__m512i
+avx512_test (__m512i x0, __m512i x1, __m512i x2, __m512i x3,
+ __m512i x4, __m512i x5, __m512i x6, __m512i x7)
+{
+ __m512i zmm;
+
+ zmm = _mm512_set1_epi32 (0);
+ if (memcmp (&zmm, &x0, sizeof (zmm)))
+ abort ();
+
+ zmm = _mm512_set1_epi32 (1);
+ if (memcmp (&zmm, &x1, sizeof (zmm)))
+ abort ();
+
+ zmm = _mm512_set1_epi32 (2);
+ if (memcmp (&zmm, &x2, sizeof (zmm)))
+ abort ();
+
+ zmm = _mm512_set1_epi32 (3);
+ if (memcmp (&zmm, &x3, sizeof (zmm)))
+ abort ();
+
+ zmm = _mm512_set1_epi32 (4);
+ if (memcmp (&zmm, &x4, sizeof (zmm)))
+ abort ();
+
+ zmm = _mm512_set1_epi32 (5);
+ if (memcmp (&zmm, &x5, sizeof (zmm)))
+ abort ();
+
+ zmm = _mm512_set1_epi32 (6);
+ if (memcmp (&zmm, &x6, sizeof (zmm)))
+ abort ();
+
+ zmm = _mm512_set1_epi32 (7);
+ if (memcmp (&zmm, &x7, sizeof (zmm)))
+ abort ();
+
+ return _mm512_set1_epi32 (0x12349876);
+}
+#endif
diff --git a/sysdeps/x86_64/tst-avxmod.c b/sysdeps/x86_64/tst-avxmod.c
new file mode 100644
index 0000000000..6e5b154997
--- /dev/null
+++ b/sysdeps/x86_64/tst-avxmod.c
@@ -0,0 +1,48 @@
+/* Test case for x86-64 preserved AVX registers in dynamic linker. */
+
+#ifdef __AVX__
+#include <stdlib.h>
+#include <string.h>
+#include <immintrin.h>
+
+__m256i
+avx_test (__m256i x0, __m256i x1, __m256i x2, __m256i x3,
+ __m256i x4, __m256i x5, __m256i x6, __m256i x7)
+{
+ __m256i ymm;
+
+ ymm = _mm256_set1_epi32 (0);
+ if (memcmp (&ymm, &x0, sizeof (ymm)))
+ abort ();
+
+ ymm = _mm256_set1_epi32 (1);
+ if (memcmp (&ymm, &x1, sizeof (ymm)))
+ abort ();
+
+ ymm = _mm256_set1_epi32 (2);
+ if (memcmp (&ymm, &x2, sizeof (ymm)))
+ abort ();
+
+ ymm = _mm256_set1_epi32 (3);
+ if (memcmp (&ymm, &x3, sizeof (ymm)))
+ abort ();
+
+ ymm = _mm256_set1_epi32 (4);
+ if (memcmp (&ymm, &x4, sizeof (ymm)))
+ abort ();
+
+ ymm = _mm256_set1_epi32 (5);
+ if (memcmp (&ymm, &x5, sizeof (ymm)))
+ abort ();
+
+ ymm = _mm256_set1_epi32 (6);
+ if (memcmp (&ymm, &x6, sizeof (ymm)))
+ abort ();
+
+ ymm = _mm256_set1_epi32 (7);
+ if (memcmp (&ymm, &x7, sizeof (ymm)))
+ abort ();
+
+ return _mm256_set1_epi32 (0x12349876);
+}
+#endif
diff --git a/sysdeps/x86_64/tst-mallocalign1.c b/sysdeps/x86_64/tst-mallocalign1.c
index 3897af86c1..0f2e725e3b 100644
--- a/sysdeps/x86_64/tst-mallocalign1.c
+++ b/sysdeps/x86_64/tst-mallocalign1.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/tst-platform-1.c b/sysdeps/x86_64/tst-platform-1.c
new file mode 100644
index 0000000000..91dbbb93db
--- /dev/null
+++ b/sysdeps/x86_64/tst-platform-1.c
@@ -0,0 +1,29 @@
+/* Test PRELOAD with $PLATFORM.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <stdlib.h>
+
+extern int preload (void);
+
+static int
+do_test (void)
+{
+ return preload () == 0x1234 ? EXIT_SUCCESS : EXIT_FAILURE;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/x86_64/tst-platformmod-1.c b/sysdeps/x86_64/tst-platformmod-1.c
new file mode 100644
index 0000000000..be0e786e76
--- /dev/null
+++ b/sysdeps/x86_64/tst-platformmod-1.c
@@ -0,0 +1,23 @@
+/* Test PRELOAD with $PLATFORM.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+int
+preload (void)
+{
+ return 0;
+}
diff --git a/sysdeps/x86_64/tst-platformmod-2.c b/sysdeps/x86_64/tst-platformmod-2.c
new file mode 100644
index 0000000000..413d0bd94b
--- /dev/null
+++ b/sysdeps/x86_64/tst-platformmod-2.c
@@ -0,0 +1,23 @@
+/* Test PRELOAD with $PLATFORM.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+int
+preload (void)
+{
+ return 0x1234;
+}
diff --git a/sysdeps/x86_64/tst-quad1.c b/sysdeps/x86_64/tst-quad1.c
index 1cb63a748f..089b25d2df 100644
--- a/sysdeps/x86_64/tst-quad1.c
+++ b/sysdeps/x86_64/tst-quad1.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/tst-quadmod1.S b/sysdeps/x86_64/tst-quadmod1.S
index 588c5016b6..c60f9dc89d 100644
--- a/sysdeps/x86_64/tst-quadmod1.S
+++ b/sysdeps/x86_64/tst-quadmod1.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -28,6 +28,9 @@
.type func, @function
func:
.cfi_startproc
+#if defined __CET__ && (__CET__ & 1) != 0
+ endbr64
+#endif
xorl %edi, %edi
jmp exit@PLT
.cfi_endproc
@@ -37,6 +40,9 @@ func:
foo:
.cfi_startproc
.cfi_def_cfa_register 6
+#if defined __CET__ && (__CET__ & 1) != 0
+ endbr64
+#endif
movq .Ljmp(%rip), %rax
subq $BIAS, %rax
jmp *%rax
diff --git a/sysdeps/x86_64/tst-quadmod2.S b/sysdeps/x86_64/tst-quadmod2.S
index 7409a9eaa3..af03444d4f 100644
--- a/sysdeps/x86_64/tst-quadmod2.S
+++ b/sysdeps/x86_64/tst-quadmod2.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -27,6 +27,9 @@
.type func, @function
func:
.cfi_startproc
+#if defined __CET__ && (__CET__ & 1) != 0
+ endbr64
+#endif
xorl %edi, %edi
jmp exit@PLT
.cfi_endproc
@@ -36,6 +39,9 @@ func:
foo:
.cfi_startproc
.cfi_def_cfa_register 6
+#if defined __CET__ && (__CET__ & 1) != 0
+ endbr64
+#endif
movq .Ljmp(%rip), %rax
subq $BIAS, %rax
jmp *%rax
diff --git a/sysdeps/x86_64/tst-sse.c b/sysdeps/x86_64/tst-sse.c
new file mode 100644
index 0000000000..d219889d1f
--- /dev/null
+++ b/sysdeps/x86_64/tst-sse.c
@@ -0,0 +1,46 @@
+/* Test case for preserved SSE registers in dynamic linker.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <immintrin.h>
+#include <stdlib.h>
+#include <string.h>
+
+extern __m128i sse_test (__m128i, __m128i, __m128i, __m128i,
+ __m128i, __m128i, __m128i, __m128i);
+
+static int
+do_test (void)
+{
+ __m128i xmm0 = _mm_set1_epi32 (0);
+ __m128i xmm1 = _mm_set1_epi32 (1);
+ __m128i xmm2 = _mm_set1_epi32 (2);
+ __m128i xmm3 = _mm_set1_epi32 (3);
+ __m128i xmm4 = _mm_set1_epi32 (4);
+ __m128i xmm5 = _mm_set1_epi32 (5);
+ __m128i xmm6 = _mm_set1_epi32 (6);
+ __m128i xmm7 = _mm_set1_epi32 (7);
+ __m128i ret = sse_test (xmm0, xmm1, xmm2, xmm3,
+ xmm4, xmm5, xmm6, xmm7);
+ xmm0 = _mm_set1_epi32 (0x12349876);
+ if (memcmp (&xmm0, &ret, sizeof (ret)))
+ abort ();
+ return 0;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../../test-skeleton.c"
diff --git a/sysdeps/x86_64/tst-ssemod.c b/sysdeps/x86_64/tst-ssemod.c
new file mode 100644
index 0000000000..907a64c69e
--- /dev/null
+++ b/sysdeps/x86_64/tst-ssemod.c
@@ -0,0 +1,46 @@
+/* Test case for x86-64 preserved SSE registers in dynamic linker. */
+
+#include <stdlib.h>
+#include <string.h>
+#include <immintrin.h>
+
+__m128i
+sse_test (__m128i x0, __m128i x1, __m128i x2, __m128i x3,
+ __m128i x4, __m128i x5, __m128i x6, __m128i x7)
+{
+ __m128i xmm;
+
+ xmm = _mm_set1_epi32 (0);
+ if (memcmp (&xmm, &x0, sizeof (xmm)))
+ abort ();
+
+ xmm = _mm_set1_epi32 (1);
+ if (memcmp (&xmm, &x1, sizeof (xmm)))
+ abort ();
+
+ xmm = _mm_set1_epi32 (2);
+ if (memcmp (&xmm, &x2, sizeof (xmm)))
+ abort ();
+
+ xmm = _mm_set1_epi32 (3);
+ if (memcmp (&xmm, &x3, sizeof (xmm)))
+ abort ();
+
+ xmm = _mm_set1_epi32 (4);
+ if (memcmp (&xmm, &x4, sizeof (xmm)))
+ abort ();
+
+ xmm = _mm_set1_epi32 (5);
+ if (memcmp (&xmm, &x5, sizeof (xmm)))
+ abort ();
+
+ xmm = _mm_set1_epi32 (6);
+ if (memcmp (&xmm, &x6, sizeof (xmm)))
+ abort ();
+
+ xmm = _mm_set1_epi32 (7);
+ if (memcmp (&xmm, &x7, sizeof (xmm)))
+ abort ();
+
+ return _mm_set1_epi32 (0x12349876);
+}
diff --git a/sysdeps/x86_64/tst-stack-align.h b/sysdeps/x86_64/tst-stack-align.h
index 24e8e61c35..b2ef77f65d 100644
--- a/sysdeps/x86_64/tst-stack-align.h
+++ b/sysdeps/x86_64/tst-stack-align.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2003-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/tst-x86_64-1.c b/sysdeps/x86_64/tst-x86_64-1.c
new file mode 100644
index 0000000000..801c866bdd
--- /dev/null
+++ b/sysdeps/x86_64/tst-x86_64-1.c
@@ -0,0 +1,26 @@
+/* Test searching the "x86_64" directory for shared libraries.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+extern void foo (void);
+
+int
+main (void)
+{
+ foo ();
+ return 0;
+}
diff --git a/sysdeps/x86_64/tst-x86_64mod-1.c b/sysdeps/x86_64/tst-x86_64mod-1.c
new file mode 100644
index 0000000000..57e955d5d9
--- /dev/null
+++ b/sysdeps/x86_64/tst-x86_64mod-1.c
@@ -0,0 +1,22 @@
+/* Test searching the "x86_64" directory for shared libraries.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+void
+foo (void)
+{
+}
diff --git a/sysdeps/x86_64/wcschr.S b/sysdeps/x86_64/wcschr.S
index 8604289e46..29284662a1 100644
--- a/sysdeps/x86_64/wcschr.S
+++ b/sysdeps/x86_64/wcschr.S
@@ -1,5 +1,5 @@
/* wcschr with SSSE3
- Copyright (C) 2011-2016 Free Software Foundation, Inc.
+ Copyright (C) 2011-2018 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/wcscmp.S b/sysdeps/x86_64/wcscmp.S
index 705a73b10e..0d506c8b5c 100644
--- a/sysdeps/x86_64/wcscmp.S
+++ b/sysdeps/x86_64/wcscmp.S
@@ -1,5 +1,5 @@
/* Optimized wcscmp for x86-64 with SSE2.
- Copyright (C) 2011-2016 Free Software Foundation, Inc.
+ Copyright (C) 2011-2018 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -946,5 +946,7 @@ L(equal):
ret
END (__wcscmp)
+#ifndef __wcscmp
libc_hidden_def (__wcscmp)
weak_alias (__wcscmp, wcscmp)
+#endif
diff --git a/sysdeps/x86_64/wcslen.S b/sysdeps/x86_64/wcslen.S
index 7a9175eefe..9f5f723227 100644
--- a/sysdeps/x86_64/wcslen.S
+++ b/sysdeps/x86_64/wcslen.S
@@ -1,5 +1,5 @@
/* Optimized wcslen for x86-64 with SSE2.
- Copyright (C) 2011-2016 Free Software Foundation, Inc.
+ Copyright (C) 2011-2018 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/wcsrchr.S b/sysdeps/x86_64/wcsrchr.S
index fb192f3ecf..2f38853727 100644
--- a/sysdeps/x86_64/wcsrchr.S
+++ b/sysdeps/x86_64/wcsrchr.S
@@ -1,5 +1,5 @@
/* wcsrchr with SSSE3
- Copyright (C) 2011-2016 Free Software Foundation, Inc.
+ Copyright (C) 2011-2018 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/wmemset.S b/sysdeps/x86_64/wmemset.S
new file mode 100644
index 0000000000..f96d567fd8
--- /dev/null
+++ b/sysdeps/x86_64/wmemset.S
@@ -0,0 +1 @@
+/* Implemented in memset.S. */
diff --git a/sysdeps/x86_64/wmemset_chk.S b/sysdeps/x86_64/wmemset_chk.S
new file mode 100644
index 0000000000..9275ebb40d
--- /dev/null
+++ b/sysdeps/x86_64/wmemset_chk.S
@@ -0,0 +1,33 @@
+/* Checking wmemset for x86-64.
+ Copyright (C) 2004-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#ifndef SHARED
+ /* For libc.so this is defined in wmemset.S.
+ For libc.a, this is a separate source to avoid
+ wmemset bringing in __chk_fail and all routines
+ it calls. */
+ .text
+ENTRY (__wmemset_chk)
+ cmpq %rdx, %rcx
+ jb __chk_fail
+ jmp wmemset
+END (__wmemset_chk)
+#endif
diff --git a/sysdeps/x86_64/wordcopy.c b/sysdeps/x86_64/wordcopy.c
new file mode 100644
index 0000000000..590b6cb16b
--- /dev/null
+++ b/sysdeps/x86_64/wordcopy.c
@@ -0,0 +1 @@
+/* X86-64 doesn't use memory copy functions. */
diff --git a/sysdeps/x86_64/x32/dl-machine.h b/sysdeps/x86_64/x32/dl-machine.h
index 47132fcd96..2a612913ff 100644
--- a/sysdeps/x86_64/x32/dl-machine.h
+++ b/sysdeps/x86_64/x32/dl-machine.h
@@ -1,5 +1,5 @@
/* Machine-dependent ELF dynamic relocation inline functions. x32 version.
- Copyright (C) 2012-2016 Free Software Foundation, Inc.
+ Copyright (C) 2012-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/x32/fpu/s_lrint.S b/sysdeps/x86_64/x32/fpu/s_lrint.S
index aa68863553..381684583e 100644
--- a/sysdeps/x86_64/x32/fpu/s_lrint.S
+++ b/sysdeps/x86_64/x32/fpu/s_lrint.S
@@ -1,6 +1,6 @@
/* Round argument to nearest integral value according to current rounding
direction.
- Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ Copyright (C) 2015-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -18,10 +18,11 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
+#include <libm-alias-double.h>
.text
ENTRY(__lrint)
cvtsd2si %xmm0,%eax
ret
END(__lrint)
-weak_alias (__lrint, lrint)
+libm_alias_double (__lrint, lrint)
diff --git a/sysdeps/x86_64/x32/fpu/s_lrintf.S b/sysdeps/x86_64/x32/fpu/s_lrintf.S
index bb5b1665bd..361d34a989 100644
--- a/sysdeps/x86_64/x32/fpu/s_lrintf.S
+++ b/sysdeps/x86_64/x32/fpu/s_lrintf.S
@@ -1,6 +1,6 @@
/* Round argument to nearest integral value according to current rounding
direction.
- Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ Copyright (C) 2015-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -18,10 +18,11 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
+#include <libm-alias-float.h>
.text
ENTRY(__lrintf)
cvtss2si %xmm0,%eax
ret
END(__lrintf)
-weak_alias (__lrintf, lrintf)
+libm_alias_float (__lrint, lrint)
diff --git a/sysdeps/x86_64/x32/fpu/s_lrintl.S b/sysdeps/x86_64/x32/fpu/s_lrintl.S
index 6bc8f6fdb9..b68313e916 100644
--- a/sysdeps/x86_64/x32/fpu/s_lrintl.S
+++ b/sysdeps/x86_64/x32/fpu/s_lrintl.S
@@ -1,6 +1,6 @@
/* Round argument to nearest integral value according to current rounding
direction.
- Copyright (C) 1997-2016 Free Software Foundation, Inc.
+ Copyright (C) 1997-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -18,6 +18,7 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
+#include <libm-alias-ldouble.h>
.text
ENTRY(__lrintl)
@@ -27,4 +28,4 @@ ENTRY(__lrintl)
movl -4(%rsp),%eax
ret
END(__lrintl)
-weak_alias (__lrintl, lrintl)
+libm_alias_ldouble (__lrint, lrint)
diff --git a/sysdeps/x86_64/x32/gmp-mparam.h b/sysdeps/x86_64/x32/gmp-mparam.h
index df37442bfb..331c26d587 100644
--- a/sysdeps/x86_64/x32/gmp-mparam.h
+++ b/sysdeps/x86_64/x32/gmp-mparam.h
@@ -1,6 +1,6 @@
/* gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright (C) 2012-2016 Free Software Foundation, Inc.
+Copyright (C) 2012-2018 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
diff --git a/sysdeps/x86_64/x32/nptl/tls.h b/sysdeps/x86_64/x32/nptl/tls.h
deleted file mode 100644
index 245623494b..0000000000
--- a/sysdeps/x86_64/x32/nptl/tls.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/* Definition for thread-local data handling. nptl/x32 version.
- Copyright (C) 2012-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#ifndef _X32_TLS_H
-#define _X32_TLS_H 1
-
-#include_next <tls.h>
-
-#ifndef __ASSEMBLER__
-
-/* X32 doesn't support 32-bit indirect calls via memory. Instead, we
- load the 32-bit address from memory into the lower 32 bits of the
- return-value register, which will automatically zero-extend the upper
- 32 bits of the return-value register. We then do the indirect call
- via the 64-bit return-value register. */
-# undef CALL_THREAD_FCT
-# define CALL_THREAD_FCT(descr) \
- ({ void *__res; \
- asm volatile ("movl %%fs:%P2, %%edi\n\t" \
- "movl %%fs:%P1, %k0\n\t" \
- "callq *%q0" \
- : "=a" (__res) \
- : "i" (offsetof (struct pthread, start_routine)), \
- "i" (offsetof (struct pthread, arg)) \
- : "di", "si", "cx", "dx", "r8", "r9", "r10", "r11", \
- "memory", "cc"); \
- __res; })
-
-#endif /* __ASSEMBLER__ */
-
-#endif /* x32/tls.h */
diff --git a/sysdeps/x86_64/x32/sysdep.h b/sysdeps/x86_64/x32/sysdep.h
index 17a1446796..b40a438771 100644
--- a/sysdeps/x86_64/x32/sysdep.h
+++ b/sysdeps/x86_64/x32/sysdep.h
@@ -1,5 +1,5 @@
/* Assembler macros for x32.
- Copyright (C) 2012-2016 Free Software Foundation, Inc.
+ Copyright (C) 2012-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or