summaryrefslogtreecommitdiff
path: root/sysdeps/x86_64
diff options
context:
space:
mode:
authorSamuel Thibault <samuel.thibault@ens-lyon.org>2016-10-09 19:34:06 +0200
committerSamuel Thibault <samuel.thibault@ens-lyon.org>2016-10-09 19:34:06 +0200
commit6772d640a4f4874166a61f1859e1660a2913a89d (patch)
tree839fea4d5dcefab75577cecb563ccad4234eb953 /sysdeps/x86_64
parentf98906bbb57cb495b4501afc5f18604ef3a94e2a (diff)
parent7bb5f8a836b916d6ebf7b6921b136e99cea2442d (diff)
Merge commit 'refs/top-bases/t/hurdsig-fixes' into t/hurdsig-fixes
Diffstat (limited to 'sysdeps/x86_64')
-rw-r--r--sysdeps/x86_64/Makefile26
-rw-r--r--sysdeps/x86_64/__longjmp.S2
-rw-r--r--sysdeps/x86_64/_mcount.S2
-rw-r--r--sysdeps/x86_64/add_n.S2
-rw-r--r--sysdeps/x86_64/addmul_1.S2
-rw-r--r--sysdeps/x86_64/atomic-machine.h (renamed from sysdeps/x86_64/bits/atomic.h)6
-rw-r--r--sysdeps/x86_64/backtrace.c15
-rw-r--r--sysdeps/x86_64/bsd-_setjmp.S2
-rw-r--r--sysdeps/x86_64/bsd-setjmp.S2
-rw-r--r--sysdeps/x86_64/cacheinfo.c139
-rw-r--r--sysdeps/x86_64/configure194
-rw-r--r--sysdeps/x86_64/configure.ac59
-rw-r--r--sysdeps/x86_64/crti.S2
-rw-r--r--sysdeps/x86_64/crtn.S2
-rw-r--r--sysdeps/x86_64/dl-irel.h2
-rw-r--r--sysdeps/x86_64/dl-lookupcfg.h5
-rw-r--r--sysdeps/x86_64/dl-machine.h34
-rw-r--r--sysdeps/x86_64/dl-procinfo.c57
-rw-r--r--sysdeps/x86_64/dl-tls.h2
-rw-r--r--sysdeps/x86_64/dl-tlsdesc.S2
-rw-r--r--sysdeps/x86_64/dl-tlsdesc.h7
-rw-r--r--sysdeps/x86_64/dl-trampoline.S464
-rw-r--r--sysdeps/x86_64/dl-trampoline.h364
-rw-r--r--sysdeps/x86_64/ffs.c2
-rw-r--r--sysdeps/x86_64/ffsll.c2
-rw-r--r--sysdeps/x86_64/fpu/Makefile4
-rw-r--r--sysdeps/x86_64/fpu/e_exp2l.S11
-rw-r--r--sysdeps/x86_64/fpu/e_expf.S2
-rw-r--r--sysdeps/x86_64/fpu/e_expl.S18
-rw-r--r--sysdeps/x86_64/fpu/e_log10l.S8
-rw-r--r--sysdeps/x86_64/fpu/e_log2l.S8
-rw-r--r--sysdeps/x86_64/fpu/e_logl.S8
-rw-r--r--sysdeps/x86_64/fpu/e_powl.S5
-rw-r--r--sysdeps/x86_64/fpu/e_sqrt.c2
-rw-r--r--sysdeps/x86_64/fpu/e_sqrtf.c2
-rw-r--r--sysdeps/x86_64/fpu/fclrexcpt.c2
-rw-r--r--sysdeps/x86_64/fpu/fedisblxcpt.c2
-rw-r--r--sysdeps/x86_64/fpu/feenablxcpt.c2
-rw-r--r--sysdeps/x86_64/fpu/fegetenv.c2
-rw-r--r--sysdeps/x86_64/fpu/fegetexcept.c2
-rw-r--r--sysdeps/x86_64/fpu/fegetround.c2
-rw-r--r--sysdeps/x86_64/fpu/feholdexcpt.c2
-rw-r--r--sysdeps/x86_64/fpu/fesetenv.c42
-rw-r--r--sysdeps/x86_64/fpu/fesetround.c2
-rw-r--r--sysdeps/x86_64/fpu/feupdateenv.c2
-rw-r--r--sysdeps/x86_64/fpu/fgetexcptflg.c2
-rw-r--r--sysdeps/x86_64/fpu/fraiseexcpt.c2
-rw-r--r--sysdeps/x86_64/fpu/fsetexcptflg.c2
-rw-r--r--sysdeps/x86_64/fpu/ftestexcept.c2
-rw-r--r--sysdeps/x86_64/fpu/libm-test-ulps366
-rw-r--r--sysdeps/x86_64/fpu/math-tests-arch.h44
-rw-r--r--sysdeps/x86_64/fpu/multiarch/Makefile6
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_asin.c20
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_atan2.c21
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_exp.c21
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_log.c21
-rw-r--r--sysdeps/x86_64/fpu/multiarch/e_pow.c15
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_atan.c19
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_ceil.S7
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_ceilf.S7
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_floor.S7
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_floorf.S7
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_fma.c18
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_fmaf.c18
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_nearbyint.S7
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_nearbyintf.S7
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_rint.S7
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_rintf.S7
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_sin.c29
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_tan.c19
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.S10
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.S10
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S10
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.S10
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.S10
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S12
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S10
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S10
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S12
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.S10
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.S10
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S12
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.S10
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.S10
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S12
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.S10
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.S10
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S12
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S12
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S10
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S10
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S12
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.S10
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.S10
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core_avx2.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S12
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.S10
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core_sse4.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.S10
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core_avx2.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S12
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.S10
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core_sse4.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.S10
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core_avx2.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S12
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.S10
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.S10
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S12
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.S10
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core_sse4.S2
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.S8
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core_avx2.S2
-rw-r--r--sysdeps/x86_64/fpu/printf_fphex.c2
-rw-r--r--sysdeps/x86_64/fpu/s_copysign.S2
-rw-r--r--sysdeps/x86_64/fpu/s_copysignf.S2
-rw-r--r--sysdeps/x86_64/fpu/s_cosf.S8
-rw-r--r--sysdeps/x86_64/fpu/s_fabs.c2
-rw-r--r--sysdeps/x86_64/fpu/s_fabsf.c2
-rw-r--r--sysdeps/x86_64/fpu/s_fabsl.S2
-rw-r--r--sysdeps/x86_64/fpu/s_fdiml.S2
-rw-r--r--sysdeps/x86_64/fpu/s_fmax.S2
-rw-r--r--sysdeps/x86_64/fpu/s_fmaxf.S2
-rw-r--r--sysdeps/x86_64/fpu/s_fmaxl.S2
-rw-r--r--sysdeps/x86_64/fpu/s_fmin.S2
-rw-r--r--sysdeps/x86_64/fpu/s_fminf.S2
-rw-r--r--sysdeps/x86_64/fpu/s_fminl.S2
-rw-r--r--sysdeps/x86_64/fpu/s_llrint.S4
-rw-r--r--sysdeps/x86_64/fpu/s_llrintf.S4
-rw-r--r--sysdeps/x86_64/fpu/s_llrintl.S5
-rw-r--r--sysdeps/x86_64/fpu/s_nearbyintl.S14
-rw-r--r--sysdeps/x86_64/fpu/s_scalbnl.S1
-rw-r--r--sysdeps/x86_64/fpu/s_signbit.S2
-rw-r--r--sysdeps/x86_64/fpu/s_signbitf.S2
-rw-r--r--sysdeps/x86_64/fpu/s_sincosf.S8
-rw-r--r--sysdeps/x86_64/fpu/s_sinf.S8
-rw-r--r--sysdeps/x86_64/fpu/s_truncl.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_cos2_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_cos4_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_cos4_core_avx.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_cos8_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_exp2_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_exp4_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_exp4_core_avx.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_exp8_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_exp_data.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_exp_data.h2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_log2_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_log4_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_log4_core_avx.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_log8_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_log_data.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_log_data.h2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_pow2_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_pow4_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_pow4_core_avx.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_pow8_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_pow_data.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_pow_data.h2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_sin2_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_sin4_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_sin4_core_avx.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_sin8_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_sincos2_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_sincos4_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_sincos8_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_trig_data.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_trig_data.h2
-rw-r--r--sysdeps/x86_64/fpu/svml_d_wrapper_impl.h2
-rw-r--r--sysdeps/x86_64/fpu/svml_finite_alias.S58
-rw-r--r--sysdeps/x86_64/fpu/svml_s_cosf16_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_cosf4_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_cosf8_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_cosf8_core_avx.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_expf16_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_expf4_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_expf8_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_expf8_core_avx.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_expf_data.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_expf_data.h2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_logf16_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_logf4_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_logf8_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_logf8_core_avx.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_logf_data.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_logf_data.h2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_powf16_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_powf4_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_powf8_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_powf8_core_avx.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_powf_data.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_powf_data.h2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_sincosf16_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_sincosf4_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_sincosf8_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_sinf16_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_sinf4_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_sinf8_core.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_sinf8_core_avx.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_trig_data.S2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_trig_data.h2
-rw-r--r--sysdeps/x86_64/fpu/svml_s_wrapper_impl.h2
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c2
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen2.c2
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c2
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen4-avx2.c2
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c2
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen4.c2
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c2
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen8.c2
-rw-r--r--sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c2
-rw-r--r--sysdeps/x86_64/fpu/test-float-vlen16.c2
-rw-r--r--sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c2
-rw-r--r--sysdeps/x86_64/fpu/test-float-vlen4.c2
-rw-r--r--sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c2
-rw-r--r--sysdeps/x86_64/fpu/test-float-vlen8-avx2.c2
-rw-r--r--sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c2
-rw-r--r--sysdeps/x86_64/fpu/test-float-vlen8.c2
-rw-r--r--sysdeps/x86_64/fpu/x86_64-math-asm.h74
-rw-r--r--sysdeps/x86_64/hp-timing.h2
-rw-r--r--sysdeps/x86_64/htonl.S2
-rw-r--r--sysdeps/x86_64/ifuncmain8.c32
-rw-r--r--sysdeps/x86_64/ifuncmod8.c36
-rw-r--r--sysdeps/x86_64/jmpbuf-offsets.h2
-rw-r--r--sysdeps/x86_64/jmpbuf-unwind.h2
-rw-r--r--sysdeps/x86_64/ldsodefs.h3
-rw-r--r--sysdeps/x86_64/localplt.data14
-rw-r--r--sysdeps/x86_64/lshift.S2
-rw-r--r--sysdeps/x86_64/machine-gmon.h2
-rw-r--r--sysdeps/x86_64/memchr.S2
-rw-r--r--sysdeps/x86_64/memcmp.S2
-rw-r--r--sysdeps/x86_64/memcpy.S2
-rw-r--r--sysdeps/x86_64/memcpy_chk.S2
-rw-r--r--sysdeps/x86_64/memmove.c2
-rw-r--r--sysdeps/x86_64/mempcpy_chk.S2
-rw-r--r--sysdeps/x86_64/memrchr.S2
-rw-r--r--sysdeps/x86_64/memset.S54
-rw-r--r--sysdeps/x86_64/memset_chk.S2
-rw-r--r--sysdeps/x86_64/memusage.h2
-rw-r--r--sysdeps/x86_64/mul_1.S2
-rw-r--r--sysdeps/x86_64/multiarch/Makefile24
-rw-r--r--sysdeps/x86_64/multiarch/Versions5
-rw-r--r--sysdeps/x86_64/multiarch/cacheinfo.c2
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-defines.sym1
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-impl-list.c194
-rw-r--r--sysdeps/x86_64/multiarch/init-arch.c223
-rw-r--r--sysdeps/x86_64/multiarch/init-arch.h206
-rw-r--r--sysdeps/x86_64/multiarch/memcmp-sse4.S2
-rw-r--r--sysdeps/x86_64/multiarch/memcmp-ssse3.S2
-rw-r--r--sysdeps/x86_64/multiarch/memcmp.S11
-rw-r--r--sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S2
-rw-r--r--sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S408
-rw-r--r--sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S6
-rw-r--r--sysdeps/x86_64/multiarch/memcpy-ssse3-back.S2
-rw-r--r--sysdeps/x86_64/multiarch/memcpy-ssse3.S2
-rw-r--r--sysdeps/x86_64/multiarch/memcpy.S30
-rw-r--r--sysdeps/x86_64/multiarch/memcpy_chk.S20
-rw-r--r--sysdeps/x86_64/multiarch/memmove-avx-unaligned.S2
-rw-r--r--sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S22
-rw-r--r--sysdeps/x86_64/multiarch/memmove.c19
-rw-r--r--sysdeps/x86_64/multiarch/memmove_chk.c17
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy-avx-unaligned.S2
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy-avx512-no-vzeroupper.S22
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy.S20
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy_chk.S20
-rw-r--r--sysdeps/x86_64/multiarch/memset-avx2.S2
-rw-r--r--sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S194
-rw-r--r--sysdeps/x86_64/multiarch/memset.S45
-rw-r--r--sysdeps/x86_64/multiarch/memset_chk.S19
-rw-r--r--sysdeps/x86_64/multiarch/rtld-memcmp.c1
-rw-r--r--sysdeps/x86_64/multiarch/rtld-memset.S1
-rw-r--r--sysdeps/x86_64/multiarch/sched_cpucount.c4
-rw-r--r--sysdeps/x86_64/multiarch/strcasestr.c13
-rw-r--r--sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S2
-rw-r--r--sysdeps/x86_64/multiarch/strcat-ssse3.S2
-rw-r--r--sysdeps/x86_64/multiarch/strcat.S12
-rw-r--r--sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S2
-rw-r--r--sysdeps/x86_64/multiarch/strchr.S10
-rw-r--r--sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S6
-rw-r--r--sysdeps/x86_64/multiarch/strcmp-sse42.S2
-rw-r--r--sysdeps/x86_64/multiarch/strcmp.S66
-rw-r--r--sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S2
-rw-r--r--sysdeps/x86_64/multiarch/strcpy-ssse3.S2
-rw-r--r--sysdeps/x86_64/multiarch/strcpy.S12
-rw-r--r--sysdeps/x86_64/multiarch/strcspn-c.c2
-rw-r--r--sysdeps/x86_64/multiarch/strcspn.S14
-rw-r--r--sysdeps/x86_64/multiarch/strspn-c.c2
-rw-r--r--sysdeps/x86_64/multiarch/strspn.S15
-rw-r--r--sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S2
-rw-r--r--sysdeps/x86_64/multiarch/strstr.c7
-rw-r--r--sysdeps/x86_64/multiarch/test-multiarch.c20
-rw-r--r--sysdeps/x86_64/multiarch/varshift.c2
-rw-r--r--sysdeps/x86_64/multiarch/varshift.h2
-rw-r--r--sysdeps/x86_64/multiarch/wcscpy-ssse3.S2
-rw-r--r--sysdeps/x86_64/multiarch/wcscpy.S9
-rw-r--r--sysdeps/x86_64/multiarch/wmemcmp.S11
-rw-r--r--sysdeps/x86_64/nptl/Makefile2
-rw-r--r--sysdeps/x86_64/nptl/pthread_spin_lock.S10
-rw-r--r--sysdeps/x86_64/nptl/pthread_spin_trylock.S10
-rw-r--r--sysdeps/x86_64/nptl/pthread_spin_unlock.S11
-rw-r--r--sysdeps/x86_64/nptl/pthreaddef.h2
-rw-r--r--sysdeps/x86_64/nptl/tcb-offsets.sym1
-rw-r--r--sysdeps/x86_64/nptl/tls.h44
-rw-r--r--sysdeps/x86_64/rawmemchr.S2
-rw-r--r--sysdeps/x86_64/rshift.S2
-rw-r--r--sysdeps/x86_64/rtld-memcmp.c1
-rw-r--r--sysdeps/x86_64/rtld-strchr.S288
-rw-r--r--sysdeps/x86_64/rtld-strlen.S136
-rw-r--r--sysdeps/x86_64/sched_cpucount.c2
-rw-r--r--sysdeps/x86_64/setjmp.S2
-rw-r--r--sysdeps/x86_64/stackinfo.h2
-rw-r--r--sysdeps/x86_64/start.S2
-rw-r--r--sysdeps/x86_64/stpcpy_chk.S3
-rw-r--r--sysdeps/x86_64/strcat.S2
-rw-r--r--sysdeps/x86_64/strchr.S2
-rw-r--r--sysdeps/x86_64/strchrnul.S2
-rw-r--r--sysdeps/x86_64/strcmp.S471
-rw-r--r--sysdeps/x86_64/strcpy.S2
-rw-r--r--sysdeps/x86_64/strcpy_chk.S208
-rw-r--r--sysdeps/x86_64/strcspn.S2
-rw-r--r--sysdeps/x86_64/strlen.S96
-rw-r--r--sysdeps/x86_64/strrchr.S2
-rw-r--r--sysdeps/x86_64/strspn.S2
-rw-r--r--sysdeps/x86_64/strtok.S2
-rw-r--r--sysdeps/x86_64/sub_n.S2
-rw-r--r--sysdeps/x86_64/submul_1.S2
-rw-r--r--sysdeps/x86_64/sysdep.h2
-rw-r--r--sysdeps/x86_64/tlsdesc.c2
-rw-r--r--sysdeps/x86_64/tst-audit.h2
-rw-r--r--sysdeps/x86_64/tst-audit10.c2
-rw-r--r--sysdeps/x86_64/tst-auditmod10a.c2
-rw-r--r--sysdeps/x86_64/tst-auditmod10b.c2
-rw-r--r--sysdeps/x86_64/tst-mallocalign1.c2
-rw-r--r--sysdeps/x86_64/tst-quad1.c2
-rw-r--r--sysdeps/x86_64/tst-quadmod1.S2
-rw-r--r--sysdeps/x86_64/tst-quadmod2.S2
-rw-r--r--sysdeps/x86_64/tst-split-dynreloc.c28
-rw-r--r--sysdeps/x86_64/tst-split-dynreloc.lds5
-rw-r--r--sysdeps/x86_64/tst-stack-align.h2
-rw-r--r--sysdeps/x86_64/wcschr.S2
-rw-r--r--sysdeps/x86_64/wcscmp.S2
-rw-r--r--sysdeps/x86_64/wcslen.S2
-rw-r--r--sysdeps/x86_64/wcsrchr.S2
-rw-r--r--sysdeps/x86_64/x32/dl-machine.h2
-rw-r--r--sysdeps/x86_64/x32/fpu/s_lrint.S (renamed from sysdeps/x86_64/rtld-memset.S)24
-rw-r--r--sysdeps/x86_64/x32/fpu/s_lrintf.S27
-rw-r--r--sysdeps/x86_64/x32/fpu/s_lrintl.S30
-rw-r--r--sysdeps/x86_64/x32/gmp-mparam.h2
-rw-r--r--sysdeps/x86_64/x32/nptl/tls.h2
-rw-r--r--sysdeps/x86_64/x32/sysdep.h6
381 files changed, 2909 insertions, 3519 deletions
diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
index ef70a50c84..67ed5ba213 100644
--- a/sysdeps/x86_64/Makefile
+++ b/sysdeps/x86_64/Makefile
@@ -19,8 +19,17 @@ gen-as-const-headers += locale-defines.sym
endif
ifeq ($(subdir),elf)
+# There is no good reason to use MMX in x86-64 ld.so with GCC.
+CFLAGS-.os += $(if $(filter $(@F),$(patsubst %,%.os,$(all-rtld-routines))),\
+ -mno-mmx)
+
sysdep-dl-routines += tlsdesc dl-tlsdesc
+tests += ifuncmain8
+modules-names += ifuncmod8
+
+$(objpfx)ifuncmain8: $(objpfx)ifuncmod8.so
+
tests += tst-quad1 tst-quad2
modules-names += tst-quadmod1 tst-quadmod2
@@ -34,10 +43,12 @@ tests-pie += $(quad-pie-test)
$(objpfx)tst-quad1pie: $(objpfx)tst-quadmod1pie.o
$(objpfx)tst-quad2pie: $(objpfx)tst-quadmod2pie.o
-tests += tst-audit3 tst-audit4 tst-audit5 tst-audit10
-ifeq (yes,$(config-cflags-avx))
-tests += tst-audit6 tst-audit7
-endif
+tests += tst-audit3 tst-audit4 tst-audit5 tst-audit6 tst-audit7 tst-audit10
+
+tests += tst-split-dynreloc
+LDFLAGS-tst-split-dynreloc = -Wl,-T,$(..)sysdeps/x86_64/tst-split-dynreloc.lds
+tst-split-dynreloc-ENV = LD_BIND_NOW=1
+
modules-names += tst-auditmod3a tst-auditmod3b \
tst-auditmod4a tst-auditmod4b \
tst-auditmod5a tst-auditmod5b \
@@ -70,18 +81,13 @@ $(objpfx)tst-audit10: $(objpfx)tst-auditmod10a.so
$(objpfx)tst-audit10.out: $(objpfx)tst-auditmod10b.so
tst-audit10-ENV = LD_AUDIT=$(objpfx)tst-auditmod10b.so
-ifeq (yes,$(config-cflags-avx))
-AVX-CFLAGS=-mavx
-ifeq (yes,$(config-cflags-novzeroupper))
-AVX-CFLAGS+=-mno-vzeroupper
-endif
+AVX-CFLAGS=-mavx -mno-vzeroupper
CFLAGS-tst-audit4.c += $(AVX-CFLAGS)
CFLAGS-tst-auditmod4a.c += $(AVX-CFLAGS)
CFLAGS-tst-auditmod4b.c += $(AVX-CFLAGS)
CFLAGS-tst-auditmod6b.c += $(AVX-CFLAGS)
CFLAGS-tst-auditmod6c.c += $(AVX-CFLAGS)
CFLAGS-tst-auditmod7b.c += $(AVX-CFLAGS)
-endif
ifeq (yes,$(config-cflags-avx512))
AVX512-CFLAGS = -mavx512f
CFLAGS-tst-audit10.c += $(AVX512-CFLAGS)
diff --git a/sysdeps/x86_64/__longjmp.S b/sysdeps/x86_64/__longjmp.S
index a410efb08c..c164626577 100644
--- a/sysdeps/x86_64/__longjmp.S
+++ b/sysdeps/x86_64/__longjmp.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2001-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2001-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/_mcount.S b/sysdeps/x86_64/_mcount.S
index 01787f95f7..5d7edd2a29 100644
--- a/sysdeps/x86_64/_mcount.S
+++ b/sysdeps/x86_64/_mcount.S
@@ -1,5 +1,5 @@
/* Machine-specific calling sequence for `mcount' profiling function. x86-64 version.
- Copyright (C) 2002-2015 Free Software Foundation, Inc.
+ Copyright (C) 2002-2016 Free Software Foundation, Inc.
Contributed by Andreas Jaeger <aj@suse.de>.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/add_n.S b/sysdeps/x86_64/add_n.S
index b8e7c3e067..fc99811476 100644
--- a/sysdeps/x86_64/add_n.S
+++ b/sysdeps/x86_64/add_n.S
@@ -1,6 +1,6 @@
/* x86-64 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
sum in a third limb vector.
- Copyright (C) 2006-2015 Free Software Foundation, Inc.
+ Copyright (C) 2006-2016 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
diff --git a/sysdeps/x86_64/addmul_1.S b/sysdeps/x86_64/addmul_1.S
index 829e01eff9..ab7c2fa701 100644
--- a/sysdeps/x86_64/addmul_1.S
+++ b/sysdeps/x86_64/addmul_1.S
@@ -1,6 +1,6 @@
/* x86-64 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
the result to a second limb vector.
- Copyright (C) 2003-2015 Free Software Foundation, Inc.
+ Copyright (C) 2003-2016 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
diff --git a/sysdeps/x86_64/bits/atomic.h b/sysdeps/x86_64/atomic-machine.h
index 337b334db1..a5b86eb3ce 100644
--- a/sysdeps/x86_64/bits/atomic.h
+++ b/sysdeps/x86_64/atomic-machine.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@@ -56,11 +56,7 @@ typedef uintmax_t uatomic_max_t;
#endif
#define __HAVE_64B_ATOMICS 1
-#if __GNUC_PREREQ (4, 7)
#define USE_ATOMIC_COMPILER_BUILTINS 1
-#else
-#define USE_ATOMIC_COMPILER_BUILTINS 0
-#endif
#define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \
__sync_val_compare_and_swap (mem, oldval, newval)
diff --git a/sysdeps/x86_64/backtrace.c b/sysdeps/x86_64/backtrace.c
index 2a3848d20f..e04407c516 100644
--- a/sysdeps/x86_64/backtrace.c
+++ b/sysdeps/x86_64/backtrace.c
@@ -1,5 +1,5 @@
/* Return backtrace of current program state.
- Copyright (C) 2003-2015 Free Software Foundation, Inc.
+ Copyright (C) 2003-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Jakub Jelinek <jakub@redhat.com>, 2003.
@@ -17,7 +17,7 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <bits/libc-lock.h>
+#include <libc-lock.h>
#include <dlfcn.h>
#include <execinfo.h>
#include <stdlib.h>
@@ -92,11 +92,13 @@ backtrace_helper (struct _Unwind_Context *ctx, void *a)
}
int
-__backtrace (array, size)
- void **array;
- int size;
+__backtrace (void **array, int size)
{
struct trace_arg arg = { .array = array, .cfa = 0, .size = size, .cnt = -1 };
+
+ if (size <= 0)
+ return 0;
+
#ifdef SHARED
__libc_once_define (static, once);
@@ -105,8 +107,7 @@ __backtrace (array, size)
return 0;
#endif
- if (size >= 1)
- unwind_backtrace (backtrace_helper, &arg);
+ unwind_backtrace (backtrace_helper, &arg);
/* _Unwind_Backtrace seems to put NULL address above
_start. Fix it up here. */
diff --git a/sysdeps/x86_64/bsd-_setjmp.S b/sysdeps/x86_64/bsd-_setjmp.S
index fed6afd97b..1a2a94f1a6 100644
--- a/sysdeps/x86_64/bsd-_setjmp.S
+++ b/sysdeps/x86_64/bsd-_setjmp.S
@@ -1,5 +1,5 @@
/* BSD `_setjmp' entry point to `sigsetjmp (..., 0)'. x86-64 version.
- Copyright (C) 1994-2015 Free Software Foundation, Inc.
+ Copyright (C) 1994-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/bsd-setjmp.S b/sysdeps/x86_64/bsd-setjmp.S
index 6a078dd786..11d9d8daa0 100644
--- a/sysdeps/x86_64/bsd-setjmp.S
+++ b/sysdeps/x86_64/bsd-setjmp.S
@@ -1,5 +1,5 @@
/* BSD `setjmp' entry point to `sigsetjmp (..., 1)'. x86-64 version.
- Copyright (C) 1994-2015 Free Software Foundation, Inc.
+ Copyright (C) 1994-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/cacheinfo.c b/sysdeps/x86_64/cacheinfo.c
index b99fb9a762..96463df064 100644
--- a/sysdeps/x86_64/cacheinfo.c
+++ b/sysdeps/x86_64/cacheinfo.c
@@ -1,5 +1,5 @@
/* x86_64 cache info.
- Copyright (C) 2003-2015 Free Software Foundation, Inc.
+ Copyright (C) 2003-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -21,40 +21,11 @@
#include <stdlib.h>
#include <unistd.h>
#include <cpuid.h>
+#include <init-arch.h>
-#ifndef __cpuid_count
-/* FIXME: Provide __cpuid_count if it isn't defined. Copied from gcc
- 4.4.0. Remove this if gcc 4.4 is the minimum requirement. */
-# if defined(__i386__) && defined(__PIC__)
-/* %ebx may be the PIC register. */
-# define __cpuid_count(level, count, a, b, c, d) \
- __asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \
- "cpuid\n\t" \
- "xchg{l}\t{%%}ebx, %1\n\t" \
- : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
- : "0" (level), "2" (count))
-# else
-# define __cpuid_count(level, count, a, b, c, d) \
- __asm__ ("cpuid\n\t" \
- : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
- : "0" (level), "2" (count))
-# endif
-#endif
-
-#ifdef USE_MULTIARCH
-# include "multiarch/init-arch.h"
-
-# define is_intel __cpu_features.kind == arch_kind_intel
-# define is_amd __cpu_features.kind == arch_kind_amd
-# define max_cpuid __cpu_features.max_cpuid
-#else
- /* This spells out "GenuineIntel". */
-# define is_intel \
- ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69
- /* This spells out "AuthenticAMD". */
-# define is_amd \
- ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65
-#endif
+#define is_intel GLRO(dl_x86_cpu_features).kind == arch_kind_intel
+#define is_amd GLRO(dl_x86_cpu_features).kind == arch_kind_amd
+#define max_cpuid GLRO(dl_x86_cpu_features).max_cpuid
static const struct intel_02_cache_info
{
@@ -235,21 +206,8 @@ intel_check_word (int name, unsigned int value, bool *has_level_2,
/* Intel reused this value. For family 15, model 6 it
specifies the 3rd level cache. Otherwise the 2nd
level cache. */
- unsigned int family;
- unsigned int model;
-#ifdef USE_MULTIARCH
- family = __cpu_features.family;
- model = __cpu_features.model;
-#else
- unsigned int eax;
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
- __cpuid (1, eax, ebx, ecx, edx);
-
- family = ((eax >> 20) & 0xff) + ((eax >> 8) & 0xf);
- model = (((eax >>16) & 0xf) << 4) + ((eax >> 4) & 0xf);
-#endif
+ unsigned int family = GLRO(dl_x86_cpu_features).family;
+ unsigned int model = GLRO(dl_x86_cpu_features).model;
if (family == 15 && model == 6)
{
@@ -476,18 +434,6 @@ long int
attribute_hidden
__cache_sysconf (int name)
{
-#ifdef USE_MULTIARCH
- if (__cpu_features.kind == arch_kind_unknown)
- __init_cpu_features ();
-#else
- /* Find out what brand of processor. */
- unsigned int max_cpuid;
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
- __cpuid (0, max_cpuid, ebx, ecx, edx);
-#endif
-
if (is_intel)
return handle_intel (name, max_cpuid);
@@ -523,18 +469,6 @@ long int __x86_raw_shared_cache_size attribute_hidden = 1024 * 1024;
int __x86_prefetchw attribute_hidden;
#endif
-#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
-/* Instructions preferred for memory and string routines.
-
- 0: Regular instructions
- 1: MMX instructions
- 2: SSE2 instructions
- 3: SSSE3 instructions
-
- */
-int __x86_preferred_memory_instruction attribute_hidden;
-#endif
-
static void
__attribute__((constructor))
@@ -551,14 +485,6 @@ init_cacheinfo (void)
unsigned int level;
unsigned int threads = 0;
-#ifdef USE_MULTIARCH
- if (__cpu_features.kind == arch_kind_unknown)
- __init_cpu_features ();
-#else
- int max_cpuid;
- __cpuid (0, max_cpuid, ebx, ecx, edx);
-#endif
-
if (is_intel)
{
data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid);
@@ -574,34 +500,13 @@ init_cacheinfo (void)
shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
}
- unsigned int ebx_1;
-
-#ifdef USE_MULTIARCH
- eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
- ebx_1 = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
- ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
- edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
-#else
- __cpuid (1, eax, ebx_1, ecx, edx);
-#endif
-
- unsigned int family = (eax >> 8) & 0x0f;
- unsigned int model = (eax >> 4) & 0x0f;
- unsigned int extended_model = (eax >> 12) & 0xf0;
-
-#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
- /* Intel prefers SSSE3 instructions for memory/string routines
- if they are available. */
- if ((ecx & 0x200))
- __x86_preferred_memory_instruction = 3;
- else
- __x86_preferred_memory_instruction = 2;
-#endif
-
/* Figure out the number of logical threads that share the
highest cache level. */
if (max_cpuid >= 4)
{
+ unsigned int family = GLRO(dl_x86_cpu_features).family;
+ unsigned int model = GLRO(dl_x86_cpu_features).model;
+
int i = 0;
/* Query until desired cache level is enumerated. */
@@ -653,7 +558,6 @@ init_cacheinfo (void)
threads += 1;
if (threads > 2 && level == 2 && family == 6)
{
- model += extended_model;
switch (model)
{
case 0x57:
@@ -676,7 +580,9 @@ init_cacheinfo (void)
intel_bug_no_cache_info:
/* Assume that all logical threads share the highest cache level. */
- threads = (ebx_1 >> 16) & 0xff;
+ threads
+ = ((GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].ebx
+ >> 16) & 0xff);
}
/* Cap usage of highest cache level to the number of supported
@@ -691,25 +597,6 @@ init_cacheinfo (void)
long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
-#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
-# ifdef USE_MULTIARCH
- eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
- ebx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
- ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
- edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
-# else
- __cpuid (1, eax, ebx, ecx, edx);
-# endif
-
- /* AMD prefers SSSE3 instructions for memory/string routines
- if they are avaiable, otherwise it prefers integer
- instructions. */
- if ((ecx & 0x200))
- __x86_preferred_memory_instruction = 3;
- else
- __x86_preferred_memory_instruction = 0;
-#endif
-
/* Get maximum extended function. */
__cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure
index 552f535ac6..c72b9d3184 100644
--- a/sysdeps/x86_64/configure
+++ b/sysdeps/x86_64/configure
@@ -1,100 +1,6 @@
-
-# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES
-# -------------------------------------------------------
-# Tests whether HEADER exists and can be compiled using the include files in
-# INCLUDES, setting the cache variable VAR accordingly.
-ac_fn_c_check_header_compile ()
-{
- as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
-$as_echo_n "checking for $2... " >&6; }
-if eval \${$3+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-$4
-#include <$2>
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
- eval "$3=yes"
-else
- eval "$3=no"
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-fi
-eval ac_res=\$$3
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
-$as_echo "$ac_res" >&6; }
- eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
-
-} # ac_fn_c_check_header_compile
# This file is generated from configure.ac by Autoconf. DO NOT EDIT!
# Local configure fragment for sysdeps/x86_64.
-
-ac_fn_c_check_header_compile "$LINENO" "cpuid.h" "ac_cv_header_cpuid_h" "/* No default includes. */
-"
-if test "x$ac_cv_header_cpuid_h" = xyes; then :
-
-else
- as_fn_error $? "gcc must provide the <cpuid.h> header" "$LINENO" 5
-fi
-
-
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SSE4 support" >&5
-$as_echo_n "checking for SSE4 support... " >&6; }
-if ${libc_cv_cc_sse4+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if { ac_try='${CC-cc} -msse4 -xc /dev/null -S -o /dev/null'
- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; }; then :
- libc_cv_cc_sse4=yes
-else
- libc_cv_cc_sse4=no
-fi
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_sse4" >&5
-$as_echo "$libc_cv_cc_sse4" >&6; }
-if test $libc_cv_cc_sse4 = yes; then
- $as_echo "#define HAVE_SSE4_SUPPORT 1" >>confdefs.h
-
-fi
-config_vars="$config_vars
-config-cflags-sse4 = $libc_cv_cc_sse4"
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX support" >&5
-$as_echo_n "checking for AVX support... " >&6; }
-if ${libc_cv_cc_avx+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if { ac_try='${CC-cc} -mavx -xc /dev/null -S -o /dev/null'
- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; }; then :
- libc_cv_cc_avx=yes
-else
- libc_cv_cc_avx=no
-fi
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_avx" >&5
-$as_echo "$libc_cv_cc_avx" >&6; }
-if test $libc_cv_cc_avx = yes; then
- $as_echo "#define HAVE_AVX_SUPPORT 1" >>confdefs.h
-
-fi
-config_vars="$config_vars
-config-cflags-avx = $libc_cv_cc_avx"
-
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX512 support in assembler" >&5
$as_echo_n "checking for AVX512 support in assembler... " >&6; }
if ${libc_cv_asm_avx512+:} false; then :
@@ -149,80 +55,6 @@ fi
config_vars="$config_vars
config-cflags-avx512 = $libc_cv_cc_avx512"
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX encoding of SSE instructions" >&5
-$as_echo_n "checking for AVX encoding of SSE instructions... " >&6; }
-if ${libc_cv_cc_sse2avx+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if { ac_try='${CC-cc} -msse2avx -xc /dev/null -S -o /dev/null'
- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; }; then :
- libc_cv_cc_sse2avx=yes
-else
- libc_cv_cc_sse2avx=no
-fi
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_sse2avx" >&5
-$as_echo "$libc_cv_cc_sse2avx" >&6; }
-if test $libc_cv_cc_sse2avx = yes; then
- $as_echo "#define HAVE_SSE2AVX_SUPPORT 1" >>confdefs.h
-
-fi
-config_vars="$config_vars
-config-cflags-sse2avx = $libc_cv_cc_sse2avx"
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for FMA4 support" >&5
-$as_echo_n "checking for FMA4 support... " >&6; }
-if ${libc_cv_cc_fma4+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if { ac_try='${CC-cc} -mfma4 -xc /dev/null -S -o /dev/null'
- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; }; then :
- libc_cv_cc_fma4=yes
-else
- libc_cv_cc_fma4=no
-fi
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_fma4" >&5
-$as_echo "$libc_cv_cc_fma4" >&6; }
-if test $libc_cv_cc_fma4 = yes; then
- $as_echo "#define HAVE_FMA4_SUPPORT 1" >>confdefs.h
-
-fi
-config_vars="$config_vars
-have-mfma4 = $libc_cv_cc_fma4"
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for -mno-vzeroupper support" >&5
-$as_echo_n "checking for -mno-vzeroupper support... " >&6; }
-if ${libc_cv_cc_novzeroupper+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if { ac_try='${CC-cc} -mno-vzeroupper -xc /dev/null -S -o /dev/null'
- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; }; then :
- libc_cv_cc_novzeroupper=yes
-else
- libc_cv_cc_novzeroupper=no
-fi
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_novzeroupper" >&5
-$as_echo "$libc_cv_cc_novzeroupper" >&6; }
-config_vars="$config_vars
-config-cflags-novzeroupper = $libc_cv_cc_novzeroupper"
-
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for Intel MPX support" >&5
$as_echo_n "checking for Intel MPX support... " >&6; }
if ${libc_cv_asm_mpx+:} false; then :
@@ -250,32 +82,6 @@ if test $libc_cv_asm_mpx == yes; then
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX2 support" >&5
-$as_echo_n "checking for AVX2 support... " >&6; }
-if ${libc_cv_cc_avx2+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if { ac_try='${CC-cc} -mavx2 -xc /dev/null -S -o /dev/null'
- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; }; then :
- libc_cv_cc_avx2=yes
-else
- libc_cv_cc_avx2=no
-fi
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_avx2" >&5
-$as_echo "$libc_cv_cc_avx2" >&6; }
-if test $libc_cv_cc_avx2 = yes; then
- $as_echo "#define HAVE_AVX2_SUPPORT 1" >>confdefs.h
-
-fi
-config_vars="$config_vars
-config-cflags-avx2 = $libc_cv_cc_avx2"
-
if test x"$build_mathvec" = xnotset; then
build_mathvec=yes
fi
diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac
index e7208c9b30..37b1059af3 100644
--- a/sysdeps/x86_64/configure.ac
+++ b/sysdeps/x86_64/configure.ac
@@ -1,28 +1,6 @@
GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
# Local configure fragment for sysdeps/x86_64.
-AC_CHECK_HEADER([cpuid.h], ,
- [AC_MSG_ERROR([gcc must provide the <cpuid.h> header])],
- [/* No default includes. */])
-
-dnl Check if -msse4 works.
-AC_CACHE_CHECK(for SSE4 support, libc_cv_cc_sse4, [dnl
-LIBC_TRY_CC_OPTION([-msse4], [libc_cv_cc_sse4=yes], [libc_cv_cc_sse4=no])
-])
-if test $libc_cv_cc_sse4 = yes; then
- AC_DEFINE(HAVE_SSE4_SUPPORT)
-fi
-LIBC_CONFIG_VAR([config-cflags-sse4], [$libc_cv_cc_sse4])
-
-dnl Check if -mavx works.
-AC_CACHE_CHECK(for AVX support, libc_cv_cc_avx, [dnl
-LIBC_TRY_CC_OPTION([-mavx], [libc_cv_cc_avx=yes], [libc_cv_cc_avx=no])
-])
-if test $libc_cv_cc_avx = yes; then
- AC_DEFINE(HAVE_AVX_SUPPORT)
-fi
-LIBC_CONFIG_VAR([config-cflags-avx], [$libc_cv_cc_avx])
-
dnl Check if asm supports AVX512.
AC_CACHE_CHECK(for AVX512 support in assembler, libc_cv_asm_avx512, [dnl
cat > conftest.s <<\EOF
@@ -48,34 +26,6 @@ if test $libc_cv_cc_avx512 = yes; then
fi
LIBC_CONFIG_VAR([config-cflags-avx512], [$libc_cv_cc_avx512])
-dnl Check if -msse2avx works.
-AC_CACHE_CHECK(for AVX encoding of SSE instructions, libc_cv_cc_sse2avx, [dnl
-LIBC_TRY_CC_OPTION([-msse2avx],
- [libc_cv_cc_sse2avx=yes],
- [libc_cv_cc_sse2avx=no])
-])
-if test $libc_cv_cc_sse2avx = yes; then
- AC_DEFINE(HAVE_SSE2AVX_SUPPORT)
-fi
-LIBC_CONFIG_VAR([config-cflags-sse2avx], [$libc_cv_cc_sse2avx])
-
-dnl Check if -mfma4 works.
-AC_CACHE_CHECK(for FMA4 support, libc_cv_cc_fma4, [dnl
-LIBC_TRY_CC_OPTION([-mfma4], [libc_cv_cc_fma4=yes], [libc_cv_cc_fma4=no])
-])
-if test $libc_cv_cc_fma4 = yes; then
- AC_DEFINE(HAVE_FMA4_SUPPORT)
-fi
-LIBC_CONFIG_VAR([have-mfma4], [$libc_cv_cc_fma4])
-
-dnl Check if -mno-vzeroupper works.
-AC_CACHE_CHECK(for -mno-vzeroupper support, libc_cv_cc_novzeroupper, [dnl
-LIBC_TRY_CC_OPTION([-mno-vzeroupper],
- [libc_cv_cc_novzeroupper=yes],
- [libc_cv_cc_novzeroupper=no])
-])
-LIBC_CONFIG_VAR([config-cflags-novzeroupper], [$libc_cv_cc_novzeroupper])
-
dnl Check whether asm supports Intel MPX
AC_CACHE_CHECK(for Intel MPX support, libc_cv_asm_mpx, [dnl
cat > conftest.s <<\EOF
@@ -91,15 +41,6 @@ if test $libc_cv_asm_mpx == yes; then
AC_DEFINE(HAVE_MPX_SUPPORT)
fi
-dnl Check if -mavx2 works.
-AC_CACHE_CHECK(for AVX2 support, libc_cv_cc_avx2, [dnl
-LIBC_TRY_CC_OPTION([-mavx2], [libc_cv_cc_avx2=yes], [libc_cv_cc_avx2=no])
-])
-if test $libc_cv_cc_avx2 = yes; then
- AC_DEFINE(HAVE_AVX2_SUPPORT)
-fi
-LIBC_CONFIG_VAR([config-cflags-avx2], [$libc_cv_cc_avx2])
-
if test x"$build_mathvec" = xnotset; then
build_mathvec=yes
fi
diff --git a/sysdeps/x86_64/crti.S b/sysdeps/x86_64/crti.S
index 595b0fe83b..a34525974a 100644
--- a/sysdeps/x86_64/crti.S
+++ b/sysdeps/x86_64/crti.S
@@ -1,5 +1,5 @@
/* Special .init and .fini section support for x86-64.
- Copyright (C) 2012-2015 Free Software Foundation, Inc.
+ Copyright (C) 2012-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/crtn.S b/sysdeps/x86_64/crtn.S
index e2d6de73e4..b2fa0c6765 100644
--- a/sysdeps/x86_64/crtn.S
+++ b/sysdeps/x86_64/crtn.S
@@ -1,5 +1,5 @@
/* Special .init and .fini section support for x86-64.
- Copyright (C) 2012-2015 Free Software Foundation, Inc.
+ Copyright (C) 2012-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/dl-irel.h b/sysdeps/x86_64/dl-irel.h
index d0fa4330cc..80d7d1dd78 100644
--- a/sysdeps/x86_64/dl-irel.h
+++ b/sysdeps/x86_64/dl-irel.h
@@ -1,6 +1,6 @@
/* Machine-dependent ELF indirect relocation inline functions.
x86-64 version.
- Copyright (C) 2009-2015 Free Software Foundation, Inc.
+ Copyright (C) 2009-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/dl-lookupcfg.h b/sysdeps/x86_64/dl-lookupcfg.h
index 310f261fec..033b475889 100644
--- a/sysdeps/x86_64/dl-lookupcfg.h
+++ b/sysdeps/x86_64/dl-lookupcfg.h
@@ -1,5 +1,5 @@
/* Configuration of lookup functions.
- Copyright (C) 2005-2015 Free Software Foundation, Inc.
+ Copyright (C) 2005-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -26,6 +26,7 @@
struct link_map;
-extern void internal_function _dl_unmap (struct link_map *map);
+extern void _dl_unmap (struct link_map *map)
+ internal_function attribute_hidden;
#define DL_UNMAP(map) _dl_unmap (map)
diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
index cae6db3560..980ca73cf2 100644
--- a/sysdeps/x86_64/dl-machine.h
+++ b/sysdeps/x86_64/dl-machine.h
@@ -1,5 +1,5 @@
/* Machine-dependent ELF dynamic relocation inline functions. x86-64 version.
- Copyright (C) 2001-2015 Free Software Foundation, Inc.
+ Copyright (C) 2001-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>.
@@ -26,6 +26,7 @@
#include <sysdep.h>
#include <tls.h>
#include <dl-tlsdesc.h>
+#include <cpu-features.c>
/* Return nonzero iff ELF header is compatible with the running host. */
static inline int __attribute__ ((unused))
@@ -65,8 +66,12 @@ static inline int __attribute__ ((unused, always_inline))
elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
{
Elf64_Addr *got;
- extern void _dl_runtime_resolve (ElfW(Word)) attribute_hidden;
- extern void _dl_runtime_profile (ElfW(Word)) attribute_hidden;
+ extern void _dl_runtime_resolve_sse (ElfW(Word)) attribute_hidden;
+ extern void _dl_runtime_resolve_avx (ElfW(Word)) attribute_hidden;
+ extern void _dl_runtime_resolve_avx512 (ElfW(Word)) attribute_hidden;
+ extern void _dl_runtime_profile_sse (ElfW(Word)) attribute_hidden;
+ extern void _dl_runtime_profile_avx (ElfW(Word)) attribute_hidden;
+ extern void _dl_runtime_profile_avx512 (ElfW(Word)) attribute_hidden;
if (l->l_info[DT_JMPREL] && lazy)
{
@@ -94,7 +99,12 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
end in this function. */
if (__glibc_unlikely (profile))
{
- *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile;
+ if (HAS_ARCH_FEATURE (AVX512F_Usable))
+ *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_avx512;
+ else if (HAS_ARCH_FEATURE (AVX_Usable))
+ *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_avx;
+ else
+ *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_sse;
if (GLRO(dl_profile) != NULL
&& _dl_name_match_p (GLRO(dl_profile), l))
@@ -103,9 +113,17 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
GL(dl_profile_map) = l;
}
else
- /* This function will get called to fix up the GOT entry indicated by
- the offset on the stack, and then jump to the resolved address. */
- *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_resolve;
+ {
+ /* This function will get called to fix up the GOT entry
+ indicated by the offset on the stack, and then jump to
+ the resolved address. */
+ if (HAS_ARCH_FEATURE (AVX512F_Usable))
+ *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_resolve_avx512;
+ else if (HAS_ARCH_FEATURE (AVX_Usable))
+ *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_resolve_avx;
+ else
+ *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_resolve_sse;
+ }
}
if (l->l_info[ADDRIDX (DT_TLSDESC_GOT)] && lazy)
@@ -205,6 +223,8 @@ dl_platform_init (void)
if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0')
/* Avoid an empty string which would disturb us. */
GLRO(dl_platform) = NULL;
+
+ init_cpu_features (&GLRO(dl_x86_cpu_features));
}
static inline ElfW(Addr)
diff --git a/sysdeps/x86_64/dl-procinfo.c b/sysdeps/x86_64/dl-procinfo.c
new file mode 100644
index 0000000000..4625695dfb
--- /dev/null
+++ b/sysdeps/x86_64/dl-procinfo.c
@@ -0,0 +1,57 @@
+/* Data for x86-64 version of processor capability information.
+ Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* If anything should be added here check whether the size of each string
+ is still ok with the given array size.
+
+ All the #ifdefs in the definitions are quite irritating but
+ necessary if we want to avoid duplicating the information. There
+ are three different modes:
+
+ - PROCINFO_DECL is defined. This means we are only interested in
+ declarations.
+
+ - PROCINFO_DECL is not defined:
+
+ + if SHARED is defined the file is included in an array
+ initializer. The .element = { ... } syntax is needed.
+
+ + if SHARED is not defined a normal array initialization is
+ needed.
+ */
+
+#ifndef PROCINFO_CLASS
+# define PROCINFO_CLASS
+#endif
+
+#if !defined PROCINFO_DECL && defined SHARED
+ ._dl_x86_cpu_features
+#else
+PROCINFO_CLASS struct cpu_features _dl_x86_cpu_features
+#endif
+#ifndef PROCINFO_DECL
+= { }
+#endif
+#if !defined SHARED || defined PROCINFO_DECL
+;
+#else
+,
+#endif
+
+#undef PROCINFO_DECL
+#undef PROCINFO_CLASS
diff --git a/sysdeps/x86_64/dl-tls.h b/sysdeps/x86_64/dl-tls.h
index 285799b674..0f101e6ac6 100644
--- a/sysdeps/x86_64/dl-tls.h
+++ b/sysdeps/x86_64/dl-tls.h
@@ -1,5 +1,5 @@
/* Thread-local storage handling in the ELF dynamic linker. x86-64 version.
- Copyright (C) 2002-2015 Free Software Foundation, Inc.
+ Copyright (C) 2002-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S
index edb6328f8e..3cb7c3d031 100644
--- a/sysdeps/x86_64/dl-tlsdesc.S
+++ b/sysdeps/x86_64/dl-tlsdesc.S
@@ -1,5 +1,5 @@
/* Thread-local storage handling in the ELF dynamic linker. x86_64 version.
- Copyright (C) 2004-2015 Free Software Foundation, Inc.
+ Copyright (C) 2004-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/dl-tlsdesc.h b/sysdeps/x86_64/dl-tlsdesc.h
index cf32328264..11e1a50b8f 100644
--- a/sysdeps/x86_64/dl-tlsdesc.h
+++ b/sysdeps/x86_64/dl-tlsdesc.h
@@ -1,6 +1,6 @@
/* Thread-local storage descriptor handling in the ELF dynamic linker.
x86_64 version.
- Copyright (C) 2005-2015 Free Software Foundation, Inc.
+ Copyright (C) 2005-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -60,8 +60,9 @@ extern ptrdiff_t attribute_hidden
_dl_tlsdesc_resolve_hold(struct tlsdesc *on_rax);
# ifdef SHARED
-extern void *internal_function _dl_make_tlsdesc_dynamic (struct link_map *map,
- size_t ti_offset);
+extern void *_dl_make_tlsdesc_dynamic (struct link_map *map,
+ size_t ti_offset)
+ internal_function attribute_hidden;
extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic(struct tlsdesc *);
# endif
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
index 678c57fc24..9fb6b13983 100644
--- a/sysdeps/x86_64/dl-trampoline.S
+++ b/sysdeps/x86_64/dl-trampoline.S
@@ -1,5 +1,5 @@
/* PLT trampolines. x86-64 version.
- Copyright (C) 2004-2015 Free Software Foundation, Inc.
+ Copyright (C) 2004-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -20,23 +20,40 @@
#include <sysdep.h>
#include <link-defines.h>
-#if (RTLD_SAVESPACE_SSE % 32) != 0
-# error RTLD_SAVESPACE_SSE must be aligned to 32 bytes
+#ifndef DL_STACK_ALIGNMENT
+/* Due to GCC bug:
+
+ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
+
+ __tls_get_addr may be called with 8-byte stack alignment. Although
+ this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume
+ that stack will be always aligned at 16 bytes. We use unaligned
+ 16-byte move to load and store SSE registers, which has no penalty
+ on modern processors if stack is 16-byte aligned. */
+# define DL_STACK_ALIGNMENT 8
+#endif
+
+#ifndef DL_RUNIME_UNALIGNED_VEC_SIZE
+/* The maximum size of unaligned vector load and store. */
+# define DL_RUNIME_UNALIGNED_VEC_SIZE 16
#endif
+/* True if _dl_runtime_resolve should align stack to VEC_SIZE bytes. */
+#define DL_RUNIME_RESOLVE_REALIGN_STACK \
+ (VEC_SIZE > DL_STACK_ALIGNMENT \
+ && VEC_SIZE > DL_RUNIME_UNALIGNED_VEC_SIZE)
+
+/* Align vector register save area to 16 bytes. */
+#define REGISTER_SAVE_VEC_OFF 0
+
/* Area on stack to save and restore registers used for parameter
passing when calling _dl_fixup. */
#ifdef __ILP32__
-/* X32 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX. */
-# define REGISTER_SAVE_AREA (8 * 7)
-# define REGISTER_SAVE_RAX 0
+# define REGISTER_SAVE_RAX (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 8)
# define PRESERVE_BND_REGS_PREFIX
#else
-/* X86-64 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as BND0,
- BND1, BND2, BND3. */
-# define REGISTER_SAVE_AREA (8 * 7 + 16 * 4)
/* Align bound register save area to 16 bytes. */
-# define REGISTER_SAVE_BND0 0
+# define REGISTER_SAVE_BND0 (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 8)
# define REGISTER_SAVE_BND1 (REGISTER_SAVE_BND0 + 16)
# define REGISTER_SAVE_BND2 (REGISTER_SAVE_BND1 + 16)
# define REGISTER_SAVE_BND3 (REGISTER_SAVE_BND2 + 16)
@@ -54,386 +71,61 @@
#define REGISTER_SAVE_R8 (REGISTER_SAVE_RDI + 8)
#define REGISTER_SAVE_R9 (REGISTER_SAVE_R8 + 8)
- .text
- .globl _dl_runtime_resolve
- .type _dl_runtime_resolve, @function
- .align 16
- cfi_startproc
-_dl_runtime_resolve:
- cfi_adjust_cfa_offset(16) # Incorporate PLT
- subq $REGISTER_SAVE_AREA,%rsp
- cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
- # Preserve registers otherwise clobbered.
- movq %rax, REGISTER_SAVE_RAX(%rsp)
- movq %rcx, REGISTER_SAVE_RCX(%rsp)
- movq %rdx, REGISTER_SAVE_RDX(%rsp)
- movq %rsi, REGISTER_SAVE_RSI(%rsp)
- movq %rdi, REGISTER_SAVE_RDI(%rsp)
- movq %r8, REGISTER_SAVE_R8(%rsp)
- movq %r9, REGISTER_SAVE_R9(%rsp)
-#ifndef __ILP32__
- # We also have to preserve bound registers. These are nops if
- # Intel MPX isn't available or disabled.
-# ifdef HAVE_MPX_SUPPORT
- bndmov %bnd0, REGISTER_SAVE_BND0(%rsp)
- bndmov %bnd1, REGISTER_SAVE_BND1(%rsp)
- bndmov %bnd2, REGISTER_SAVE_BND2(%rsp)
- bndmov %bnd3, REGISTER_SAVE_BND3(%rsp)
-# else
-# if REGISTER_SAVE_BND0 == 0
- .byte 0x66,0x0f,0x1b,0x04,0x24
-# else
- .byte 0x66,0x0f,0x1b,0x44,0x24,REGISTER_SAVE_BND0
-# endif
- .byte 0x66,0x0f,0x1b,0x4c,0x24,REGISTER_SAVE_BND1
- .byte 0x66,0x0f,0x1b,0x54,0x24,REGISTER_SAVE_BND2
- .byte 0x66,0x0f,0x1b,0x5c,0x24,REGISTER_SAVE_BND3
-# endif
-#endif
- # Copy args pushed by PLT in register.
- # %rdi: link_map, %rsi: reloc_index
- movq (REGISTER_SAVE_AREA + 8)(%rsp), %rsi
- movq REGISTER_SAVE_AREA(%rsp), %rdi
- call _dl_fixup # Call resolver.
- movq %rax, %r11 # Save return value
-#ifndef __ILP32__
- # Restore bound registers. These are nops if Intel MPX isn't
- # avaiable or disabled.
-# ifdef HAVE_MPX_SUPPORT
- bndmov REGISTER_SAVE_BND3(%rsp), %bnd3
- bndmov REGISTER_SAVE_BND2(%rsp), %bnd2
- bndmov REGISTER_SAVE_BND1(%rsp), %bnd1
- bndmov REGISTER_SAVE_BND0(%rsp), %bnd0
-# else
- .byte 0x66,0x0f,0x1a,0x5c,0x24,REGISTER_SAVE_BND3
- .byte 0x66,0x0f,0x1a,0x54,0x24,REGISTER_SAVE_BND2
- .byte 0x66,0x0f,0x1a,0x4c,0x24,REGISTER_SAVE_BND1
-# if REGISTER_SAVE_BND0 == 0
- .byte 0x66,0x0f,0x1a,0x04,0x24
-# else
- .byte 0x66,0x0f,0x1a,0x44,0x24,REGISTER_SAVE_BND0
-# endif
-# endif
-#endif
- # Get register content back.
- movq REGISTER_SAVE_R9(%rsp), %r9
- movq REGISTER_SAVE_R8(%rsp), %r8
- movq REGISTER_SAVE_RDI(%rsp), %rdi
- movq REGISTER_SAVE_RSI(%rsp), %rsi
- movq REGISTER_SAVE_RDX(%rsp), %rdx
- movq REGISTER_SAVE_RCX(%rsp), %rcx
- movq REGISTER_SAVE_RAX(%rsp), %rax
- # Adjust stack(PLT did 2 pushes)
- addq $(REGISTER_SAVE_AREA + 16), %rsp
- cfi_adjust_cfa_offset(-(REGISTER_SAVE_AREA + 16))
- # Preserve bound registers.
- PRESERVE_BND_REGS_PREFIX
- jmp *%r11 # Jump to function address.
- cfi_endproc
- .size _dl_runtime_resolve, .-_dl_runtime_resolve
-
-
-#ifndef PROF
- .globl _dl_runtime_profile
- .type _dl_runtime_profile, @function
- .align 16
- cfi_startproc
-
-_dl_runtime_profile:
- cfi_adjust_cfa_offset(16) # Incorporate PLT
- /* The La_x86_64_regs data structure pointed to by the
- fourth paramater must be 16-byte aligned. This must
- be explicitly enforced. We have the set up a dynamically
- sized stack frame. %rbx points to the top half which
- has a fixed size and preserves the original stack pointer. */
-
- subq $32, %rsp # Allocate the local storage.
- cfi_adjust_cfa_offset(32)
- movq %rbx, (%rsp)
- cfi_rel_offset(%rbx, 0)
+#define RESTORE_AVX
- /* On the stack:
- 56(%rbx) parameter #1
- 48(%rbx) return address
-
- 40(%rbx) reloc index
- 32(%rbx) link_map
-
- 24(%rbx) La_x86_64_regs pointer
- 16(%rbx) framesize
- 8(%rbx) rax
- (%rbx) rbx
- */
-
- movq %rax, 8(%rsp)
- movq %rsp, %rbx
- cfi_def_cfa_register(%rbx)
-
- /* Actively align the La_x86_64_regs structure. */
- andq $0xfffffffffffffff0, %rsp
-# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
- /* sizeof(La_x86_64_regs). Need extra space for 8 SSE registers
- to detect if any xmm0-xmm7 registers are changed by audit
- module. */
- subq $(LR_SIZE + XMM_SIZE*8), %rsp
+#ifdef HAVE_AVX512_ASM_SUPPORT
+# define VEC_SIZE 64
+# define VMOVA vmovdqa64
+# if DL_RUNIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
+# define VMOV vmovdqa64
# else
- subq $LR_SIZE, %rsp # sizeof(La_x86_64_regs)
-# endif
- movq %rsp, 24(%rbx)
-
- /* Fill the La_x86_64_regs structure. */
- movq %rdx, LR_RDX_OFFSET(%rsp)
- movq %r8, LR_R8_OFFSET(%rsp)
- movq %r9, LR_R9_OFFSET(%rsp)
- movq %rcx, LR_RCX_OFFSET(%rsp)
- movq %rsi, LR_RSI_OFFSET(%rsp)
- movq %rdi, LR_RDI_OFFSET(%rsp)
- movq %rbp, LR_RBP_OFFSET(%rsp)
-
- leaq 48(%rbx), %rax
- movq %rax, LR_RSP_OFFSET(%rsp)
-
- /* We always store the XMM registers even if AVX is available.
- This is to provide backward binary compatibility for existing
- audit modules. */
- movaps %xmm0, (LR_XMM_OFFSET)(%rsp)
- movaps %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp)
- movaps %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp)
- movaps %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp)
- movaps %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp)
- movaps %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp)
- movaps %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
- movaps %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
-
-# ifndef __ILP32__
-# ifdef HAVE_MPX_SUPPORT
- bndmov %bnd0, (LR_BND_OFFSET)(%rsp) # Preserve bound
- bndmov %bnd1, (LR_BND_OFFSET + BND_SIZE)(%rsp) # registers. Nops if
- bndmov %bnd2, (LR_BND_OFFSET + BND_SIZE*2)(%rsp) # MPX not available
- bndmov %bnd3, (LR_BND_OFFSET + BND_SIZE*3)(%rsp) # or disabled.
-# else
- .byte 0x66,0x0f,0x1b,0x84,0x24;.long (LR_BND_OFFSET)
- .byte 0x66,0x0f,0x1b,0x8c,0x24;.long (LR_BND_OFFSET + BND_SIZE)
- .byte 0x66,0x0f,0x1b,0x94,0x24;.long (LR_BND_OFFSET + BND_SIZE*2)
- .byte 0x66,0x0f,0x1b,0x9c,0x24;.long (LR_BND_OFFSET + BND_SIZE*3)
-# endif
-# endif
-
-# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
- .data
-L(have_avx):
- .zero 4
- .size L(have_avx), 4
- .previous
-
- cmpl $0, L(have_avx)(%rip)
- jne L(defined)
- movq %rbx, %r11 # Save rbx
- movl $1, %eax
- cpuid
- movq %r11,%rbx # Restore rbx
- xorl %eax, %eax
- // AVX and XSAVE supported?
- andl $((1 << 28) | (1 << 27)), %ecx
- cmpl $((1 << 28) | (1 << 27)), %ecx
- jne 10f
-# ifdef HAVE_AVX512_ASM_SUPPORT
- // AVX512 supported in processor?
- movq %rbx, %r11 # Save rbx
- xorl %ecx, %ecx
- mov $0x7, %eax
- cpuid
- andl $(1 << 16), %ebx
-# endif
- xorl %ecx, %ecx
- // Get XFEATURE_ENABLED_MASK
- xgetbv
-# ifdef HAVE_AVX512_ASM_SUPPORT
- test %ebx, %ebx
- movq %r11, %rbx # Restore rbx
- je 20f
- // Verify that XCR0[7:5] = '111b' and
- // XCR0[2:1] = '11b' which means
- // that zmm state is enabled
- andl $0xe6, %eax
- cmpl $0xe6, %eax
- jne 20f
- movl %eax, L(have_avx)(%rip)
-L(avx512):
-# define RESTORE_AVX
-# define VMOV vmovdqu64
-# define VEC(i) zmm##i
-# define MORE_CODE
-# include "dl-trampoline.h"
-# undef VMOV
-# undef VEC
-# undef RESTORE_AVX
-# endif
-20: andl $0x6, %eax
-10: subl $0x5, %eax
- movl %eax, L(have_avx)(%rip)
- cmpl $0, %eax
-
-L(defined):
- js L(no_avx)
-# ifdef HAVE_AVX512_ASM_SUPPORT
- cmpl $0xe6, L(have_avx)(%rip)
- je L(avx512)
-# endif
-
-# define RESTORE_AVX
-# define VMOV vmovdqu
-# define VEC(i) ymm##i
-# define MORE_CODE
-# include "dl-trampoline.h"
-
- .align 16
-L(no_avx):
+# define VMOV vmovdqu64
# endif
-
-# undef RESTORE_AVX
+# define VEC(i) zmm##i
+# define _dl_runtime_resolve _dl_runtime_resolve_avx512
+# define _dl_runtime_profile _dl_runtime_profile_avx512
# include "dl-trampoline.h"
-
- cfi_endproc
- .size _dl_runtime_profile, .-_dl_runtime_profile
+# undef _dl_runtime_resolve
+# undef _dl_runtime_profile
+# undef VEC
+# undef VMOV
+# undef VMOVA
+# undef VEC_SIZE
+#else
+strong_alias (_dl_runtime_resolve_avx, _dl_runtime_resolve_avx512)
+ .hidden _dl_runtime_resolve_avx512
+strong_alias (_dl_runtime_profile_avx, _dl_runtime_profile_avx512)
+ .hidden _dl_runtime_profile_avx512
#endif
-
-#ifdef SHARED
- .globl _dl_x86_64_save_sse
- .type _dl_x86_64_save_sse, @function
- .align 16
- cfi_startproc
-_dl_x86_64_save_sse:
-# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
- cmpl $0, L(have_avx)(%rip)
- jne L(defined_5)
- movq %rbx, %r11 # Save rbx
- movl $1, %eax
- cpuid
- movq %r11,%rbx # Restore rbx
- xorl %eax, %eax
- // AVX and XSAVE supported?
- andl $((1 << 28) | (1 << 27)), %ecx
- cmpl $((1 << 28) | (1 << 27)), %ecx
- jne 1f
-# ifdef HAVE_AVX512_ASM_SUPPORT
- // AVX512 supported in a processor?
- movq %rbx, %r11 # Save rbx
- xorl %ecx,%ecx
- mov $0x7,%eax
- cpuid
- andl $(1 << 16), %ebx
-# endif
- xorl %ecx, %ecx
- // Get XFEATURE_ENABLED_MASK
- xgetbv
-# ifdef HAVE_AVX512_ASM_SUPPORT
- test %ebx, %ebx
- movq %r11, %rbx # Restore rbx
- je 2f
- // Verify that XCR0[7:5] = '111b' and
- // XCR0[2:1] = '11b' which means
- // that zmm state is enabled
- andl $0xe6, %eax
- movl %eax, L(have_avx)(%rip)
- cmpl $0xe6, %eax
- je L(avx512_5)
-# endif
-
-2: andl $0x6, %eax
-1: subl $0x5, %eax
- movl %eax, L(have_avx)(%rip)
- cmpl $0, %eax
-
-L(defined_5):
- js L(no_avx5)
-# ifdef HAVE_AVX512_ASM_SUPPORT
- cmpl $0xe6, L(have_avx)(%rip)
- je L(avx512_5)
-# endif
-
- vmovdqa %ymm0, %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE
- vmovdqa %ymm1, %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE
- vmovdqa %ymm2, %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE
- vmovdqa %ymm3, %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE
- vmovdqa %ymm4, %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE
- vmovdqa %ymm5, %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE
- vmovdqa %ymm6, %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE
- vmovdqa %ymm7, %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE
- ret
-# ifdef HAVE_AVX512_ASM_SUPPORT
-L(avx512_5):
- vmovdqu64 %zmm0, %fs:RTLD_SAVESPACE_SSE+0*ZMM_SIZE
- vmovdqu64 %zmm1, %fs:RTLD_SAVESPACE_SSE+1*ZMM_SIZE
- vmovdqu64 %zmm2, %fs:RTLD_SAVESPACE_SSE+2*ZMM_SIZE
- vmovdqu64 %zmm3, %fs:RTLD_SAVESPACE_SSE+3*ZMM_SIZE
- vmovdqu64 %zmm4, %fs:RTLD_SAVESPACE_SSE+4*ZMM_SIZE
- vmovdqu64 %zmm5, %fs:RTLD_SAVESPACE_SSE+5*ZMM_SIZE
- vmovdqu64 %zmm6, %fs:RTLD_SAVESPACE_SSE+6*ZMM_SIZE
- vmovdqu64 %zmm7, %fs:RTLD_SAVESPACE_SSE+7*ZMM_SIZE
- ret
-# endif
-L(no_avx5):
-# endif
- movdqa %xmm0, %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE
- movdqa %xmm1, %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE
- movdqa %xmm2, %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE
- movdqa %xmm3, %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE
- movdqa %xmm4, %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE
- movdqa %xmm5, %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE
- movdqa %xmm6, %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE
- movdqa %xmm7, %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE
- ret
- cfi_endproc
- .size _dl_x86_64_save_sse, .-_dl_x86_64_save_sse
-
-
- .globl _dl_x86_64_restore_sse
- .type _dl_x86_64_restore_sse, @function
- .align 16
- cfi_startproc
-_dl_x86_64_restore_sse:
-# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
- cmpl $0, L(have_avx)(%rip)
- js L(no_avx6)
-# ifdef HAVE_AVX512_ASM_SUPPORT
- cmpl $0xe6, L(have_avx)(%rip)
- je L(avx512_6)
-# endif
-
- vmovdqa %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE, %ymm0
- vmovdqa %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE, %ymm1
- vmovdqa %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE, %ymm2
- vmovdqa %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE, %ymm3
- vmovdqa %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE, %ymm4
- vmovdqa %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE, %ymm5
- vmovdqa %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE, %ymm6
- vmovdqa %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE, %ymm7
- ret
-# ifdef HAVE_AVX512_ASM_SUPPORT
-L(avx512_6):
- vmovdqu64 %fs:RTLD_SAVESPACE_SSE+0*ZMM_SIZE, %zmm0
- vmovdqu64 %fs:RTLD_SAVESPACE_SSE+1*ZMM_SIZE, %zmm1
- vmovdqu64 %fs:RTLD_SAVESPACE_SSE+2*ZMM_SIZE, %zmm2
- vmovdqu64 %fs:RTLD_SAVESPACE_SSE+3*ZMM_SIZE, %zmm3
- vmovdqu64 %fs:RTLD_SAVESPACE_SSE+4*ZMM_SIZE, %zmm4
- vmovdqu64 %fs:RTLD_SAVESPACE_SSE+5*ZMM_SIZE, %zmm5
- vmovdqu64 %fs:RTLD_SAVESPACE_SSE+6*ZMM_SIZE, %zmm6
- vmovdqu64 %fs:RTLD_SAVESPACE_SSE+7*ZMM_SIZE, %zmm7
- ret
-# endif
-L(no_avx6):
-# endif
- movdqa %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE, %xmm0
- movdqa %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE, %xmm1
- movdqa %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE, %xmm2
- movdqa %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE, %xmm3
- movdqa %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE, %xmm4
- movdqa %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE, %xmm5
- movdqa %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE, %xmm6
- movdqa %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE, %xmm7
- ret
- cfi_endproc
- .size _dl_x86_64_restore_sse, .-_dl_x86_64_restore_sse
+#define VEC_SIZE 32
+#define VMOVA vmovdqa
+#if DL_RUNIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
+# define VMOV vmovdqa
+#else
+# define VMOV vmovdqu
+#endif
+#define VEC(i) ymm##i
+#define _dl_runtime_resolve _dl_runtime_resolve_avx
+#define _dl_runtime_profile _dl_runtime_profile_avx
+#include "dl-trampoline.h"
+#undef _dl_runtime_resolve
+#undef _dl_runtime_profile
+#undef VEC
+#undef VMOV
+#undef VMOVA
+#undef VEC_SIZE
+
+/* movaps/movups is 1-byte shorter. */
+#define VEC_SIZE 16
+#define VMOVA movaps
+#if DL_RUNIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
+# define VMOV movaps
+#else
+# define VMOV movups
#endif
+#define VEC(i) xmm##i
+#define _dl_runtime_resolve _dl_runtime_resolve_sse
+#define _dl_runtime_profile _dl_runtime_profile_sse
+#undef RESTORE_AVX
+#include "dl-trampoline.h"
diff --git a/sysdeps/x86_64/dl-trampoline.h b/sysdeps/x86_64/dl-trampoline.h
index d542428ac2..f4191833ab 100644
--- a/sysdeps/x86_64/dl-trampoline.h
+++ b/sysdeps/x86_64/dl-trampoline.h
@@ -1,6 +1,5 @@
-/* Partial PLT profile trampoline to save and restore x86-64 vector
- registers.
- Copyright (C) 2009-2015 Free Software Foundation, Inc.
+/* PLT trampolines. x86-64 version.
+ Copyright (C) 2009-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -17,16 +16,248 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#ifdef RESTORE_AVX
+#undef REGISTER_SAVE_AREA_RAW
+#ifdef __ILP32__
+/* X32 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as VEC0 to
+ VEC7. */
+# define REGISTER_SAVE_AREA_RAW (8 * 7 + VEC_SIZE * 8)
+#else
+/* X86-64 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as
+ BND0, BND1, BND2, BND3 and VEC0 to VEC7. */
+# define REGISTER_SAVE_AREA_RAW (8 * 7 + 16 * 4 + VEC_SIZE * 8)
+#endif
+
+#undef REGISTER_SAVE_AREA
+#undef LOCAL_STORAGE_AREA
+#undef BASE
+#if DL_RUNIME_RESOLVE_REALIGN_STACK
+# define REGISTER_SAVE_AREA (REGISTER_SAVE_AREA_RAW + 8)
+/* Local stack area before jumping to function address: RBX. */
+# define LOCAL_STORAGE_AREA 8
+# define BASE rbx
+# if (REGISTER_SAVE_AREA % VEC_SIZE) != 0
+# error REGISTER_SAVE_AREA must be multples of VEC_SIZE
+# endif
+#else
+# define REGISTER_SAVE_AREA REGISTER_SAVE_AREA_RAW
+/* Local stack area before jumping to function address: All saved
+ registers. */
+# define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA
+# define BASE rsp
+# if (REGISTER_SAVE_AREA % 16) != 8
+# error REGISTER_SAVE_AREA must be odd multples of 8
+# endif
+#endif
+
+ .text
+ .globl _dl_runtime_resolve
+ .hidden _dl_runtime_resolve
+ .type _dl_runtime_resolve, @function
+ .align 16
+ cfi_startproc
+_dl_runtime_resolve:
+ cfi_adjust_cfa_offset(16) # Incorporate PLT
+#if DL_RUNIME_RESOLVE_REALIGN_STACK
+# if LOCAL_STORAGE_AREA != 8
+# error LOCAL_STORAGE_AREA must be 8
+# endif
+ pushq %rbx # push subtracts stack by 8.
+ cfi_adjust_cfa_offset(8)
+ cfi_rel_offset(%rbx, 0)
+ mov %RSP_LP, %RBX_LP
+ cfi_def_cfa_register(%rbx)
+ and $-VEC_SIZE, %RSP_LP
+#endif
+ sub $REGISTER_SAVE_AREA, %RSP_LP
+ cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
+ # Preserve registers otherwise clobbered.
+ movq %rax, REGISTER_SAVE_RAX(%rsp)
+ movq %rcx, REGISTER_SAVE_RCX(%rsp)
+ movq %rdx, REGISTER_SAVE_RDX(%rsp)
+ movq %rsi, REGISTER_SAVE_RSI(%rsp)
+ movq %rdi, REGISTER_SAVE_RDI(%rsp)
+ movq %r8, REGISTER_SAVE_R8(%rsp)
+ movq %r9, REGISTER_SAVE_R9(%rsp)
+ VMOV %VEC(0), (REGISTER_SAVE_VEC_OFF)(%rsp)
+ VMOV %VEC(1), (REGISTER_SAVE_VEC_OFF + VEC_SIZE)(%rsp)
+ VMOV %VEC(2), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 2)(%rsp)
+ VMOV %VEC(3), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 3)(%rsp)
+ VMOV %VEC(4), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 4)(%rsp)
+ VMOV %VEC(5), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 5)(%rsp)
+ VMOV %VEC(6), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 6)(%rsp)
+ VMOV %VEC(7), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 7)(%rsp)
+#ifndef __ILP32__
+ # We also have to preserve bound registers. These are nops if
+ # Intel MPX isn't available or disabled.
+# ifdef HAVE_MPX_SUPPORT
+ bndmov %bnd0, REGISTER_SAVE_BND0(%rsp)
+ bndmov %bnd1, REGISTER_SAVE_BND1(%rsp)
+ bndmov %bnd2, REGISTER_SAVE_BND2(%rsp)
+ bndmov %bnd3, REGISTER_SAVE_BND3(%rsp)
+# else
+# if REGISTER_SAVE_BND0 == 0
+ .byte 0x66,0x0f,0x1b,0x04,0x24
+# else
+ .byte 0x66,0x0f,0x1b,0x44,0x24,REGISTER_SAVE_BND0
+# endif
+ .byte 0x66,0x0f,0x1b,0x4c,0x24,REGISTER_SAVE_BND1
+ .byte 0x66,0x0f,0x1b,0x54,0x24,REGISTER_SAVE_BND2
+ .byte 0x66,0x0f,0x1b,0x5c,0x24,REGISTER_SAVE_BND3
+# endif
+#endif
+ # Copy args pushed by PLT in register.
+ # %rdi: link_map, %rsi: reloc_index
+ mov (LOCAL_STORAGE_AREA + 8)(%BASE), %RSI_LP
+ mov LOCAL_STORAGE_AREA(%BASE), %RDI_LP
+ call _dl_fixup # Call resolver.
+ mov %RAX_LP, %R11_LP # Save return value
+#ifndef __ILP32__
+ # Restore bound registers. These are nops if Intel MPX isn't
+ # avaiable or disabled.
+# ifdef HAVE_MPX_SUPPORT
+ bndmov REGISTER_SAVE_BND3(%rsp), %bnd3
+ bndmov REGISTER_SAVE_BND2(%rsp), %bnd2
+ bndmov REGISTER_SAVE_BND1(%rsp), %bnd1
+ bndmov REGISTER_SAVE_BND0(%rsp), %bnd0
+# else
+ .byte 0x66,0x0f,0x1a,0x5c,0x24,REGISTER_SAVE_BND3
+ .byte 0x66,0x0f,0x1a,0x54,0x24,REGISTER_SAVE_BND2
+ .byte 0x66,0x0f,0x1a,0x4c,0x24,REGISTER_SAVE_BND1
+# if REGISTER_SAVE_BND0 == 0
+ .byte 0x66,0x0f,0x1a,0x04,0x24
+# else
+ .byte 0x66,0x0f,0x1a,0x44,0x24,REGISTER_SAVE_BND0
+# endif
+# endif
+#endif
+ # Get register content back.
+ movq REGISTER_SAVE_R9(%rsp), %r9
+ movq REGISTER_SAVE_R8(%rsp), %r8
+ movq REGISTER_SAVE_RDI(%rsp), %rdi
+ movq REGISTER_SAVE_RSI(%rsp), %rsi
+ movq REGISTER_SAVE_RDX(%rsp), %rdx
+ movq REGISTER_SAVE_RCX(%rsp), %rcx
+ movq REGISTER_SAVE_RAX(%rsp), %rax
+ VMOV (REGISTER_SAVE_VEC_OFF)(%rsp), %VEC(0)
+ VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE)(%rsp), %VEC(1)
+ VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 2)(%rsp), %VEC(2)
+ VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 3)(%rsp), %VEC(3)
+ VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 4)(%rsp), %VEC(4)
+ VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 5)(%rsp), %VEC(5)
+ VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 6)(%rsp), %VEC(6)
+ VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 7)(%rsp), %VEC(7)
+#if DL_RUNIME_RESOLVE_REALIGN_STACK
+ mov %RBX_LP, %RSP_LP
+ cfi_def_cfa_register(%rsp)
+ movq (%rsp), %rbx
+ cfi_restore(%rbx)
+#endif
+ # Adjust stack(PLT did 2 pushes)
+ add $(LOCAL_STORAGE_AREA + 16), %RSP_LP
+ cfi_adjust_cfa_offset(-(LOCAL_STORAGE_AREA + 16))
+ # Preserve bound registers.
+ PRESERVE_BND_REGS_PREFIX
+ jmp *%r11 # Jump to function address.
+ cfi_endproc
+ .size _dl_runtime_resolve, .-_dl_runtime_resolve
+
+
+#ifndef PROF
+# if (LR_VECTOR_OFFSET % VEC_SIZE) != 0
+# error LR_VECTOR_OFFSET must be multples of VEC_SIZE
+# endif
+
+ .globl _dl_runtime_profile
+ .hidden _dl_runtime_profile
+ .type _dl_runtime_profile, @function
+ .align 16
+_dl_runtime_profile:
+ cfi_startproc
+ cfi_adjust_cfa_offset(16) # Incorporate PLT
+ /* The La_x86_64_regs data structure pointed to by the
+ fourth paramater must be VEC_SIZE-byte aligned. This must
+ be explicitly enforced. We have the set up a dynamically
+ sized stack frame. %rbx points to the top half which
+ has a fixed size and preserves the original stack pointer. */
+
+ sub $32, %RSP_LP # Allocate the local storage.
+ cfi_adjust_cfa_offset(32)
+ movq %rbx, (%rsp)
+ cfi_rel_offset(%rbx, 0)
+
+ /* On the stack:
+ 56(%rbx) parameter #1
+ 48(%rbx) return address
+
+ 40(%rbx) reloc index
+ 32(%rbx) link_map
+
+ 24(%rbx) La_x86_64_regs pointer
+ 16(%rbx) framesize
+ 8(%rbx) rax
+ (%rbx) rbx
+ */
+
+ movq %rax, 8(%rsp)
+ mov %RSP_LP, %RBX_LP
+ cfi_def_cfa_register(%rbx)
+
+ /* Actively align the La_x86_64_regs structure. */
+ and $-VEC_SIZE, %RSP_LP
+ /* sizeof(La_x86_64_regs). Need extra space for 8 SSE registers
+ to detect if any xmm0-xmm7 registers are changed by audit
+ module. */
+ sub $(LR_SIZE + XMM_SIZE*8), %RSP_LP
+ movq %rsp, 24(%rbx)
+
+ /* Fill the La_x86_64_regs structure. */
+ movq %rdx, LR_RDX_OFFSET(%rsp)
+ movq %r8, LR_R8_OFFSET(%rsp)
+ movq %r9, LR_R9_OFFSET(%rsp)
+ movq %rcx, LR_RCX_OFFSET(%rsp)
+ movq %rsi, LR_RSI_OFFSET(%rsp)
+ movq %rdi, LR_RDI_OFFSET(%rsp)
+ movq %rbp, LR_RBP_OFFSET(%rsp)
+
+ lea 48(%rbx), %RAX_LP
+ movq %rax, LR_RSP_OFFSET(%rsp)
+
+ /* We always store the XMM registers even if AVX is available.
+ This is to provide backward binary compatibility for existing
+ audit modules. */
+ movaps %xmm0, (LR_XMM_OFFSET)(%rsp)
+ movaps %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp)
+ movaps %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp)
+ movaps %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp)
+ movaps %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp)
+ movaps %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp)
+ movaps %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
+ movaps %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
+
+# ifndef __ILP32__
+# ifdef HAVE_MPX_SUPPORT
+ bndmov %bnd0, (LR_BND_OFFSET)(%rsp) # Preserve bound
+ bndmov %bnd1, (LR_BND_OFFSET + BND_SIZE)(%rsp) # registers. Nops if
+ bndmov %bnd2, (LR_BND_OFFSET + BND_SIZE*2)(%rsp) # MPX not available
+ bndmov %bnd3, (LR_BND_OFFSET + BND_SIZE*3)(%rsp) # or disabled.
+# else
+ .byte 0x66,0x0f,0x1b,0x84,0x24;.long (LR_BND_OFFSET)
+ .byte 0x66,0x0f,0x1b,0x8c,0x24;.long (LR_BND_OFFSET + BND_SIZE)
+ .byte 0x66,0x0f,0x1b,0x94,0x24;.long (LR_BND_OFFSET + BND_SIZE*2)
+ .byte 0x66,0x0f,0x1b,0x9c,0x24;.long (LR_BND_OFFSET + BND_SIZE*3)
+# endif
+# endif
+
+# ifdef RESTORE_AVX
/* This is to support AVX audit modules. */
- VMOV %VEC(0), (LR_VECTOR_OFFSET)(%rsp)
- VMOV %VEC(1), (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
- VMOV %VEC(2), (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
- VMOV %VEC(3), (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
- VMOV %VEC(4), (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
- VMOV %VEC(5), (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
- VMOV %VEC(6), (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
- VMOV %VEC(7), (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
+ VMOVA %VEC(0), (LR_VECTOR_OFFSET)(%rsp)
+ VMOVA %VEC(1), (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
+ VMOVA %VEC(2), (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
+ VMOVA %VEC(3), (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
+ VMOVA %VEC(4), (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
+ VMOVA %VEC(5), (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
+ VMOVA %VEC(6), (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
+ VMOVA %VEC(7), (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
/* Save xmm0-xmm7 registers to detect if any of them are
changed by audit module. */
@@ -38,7 +269,7 @@
vmovdqa %xmm5, (LR_SIZE + XMM_SIZE*5)(%rsp)
vmovdqa %xmm6, (LR_SIZE + XMM_SIZE*6)(%rsp)
vmovdqa %xmm7, (LR_SIZE + XMM_SIZE*7)(%rsp)
-#endif
+# endif
mov %RSP_LP, %RCX_LP # La_x86_64_regs pointer to %rcx.
mov 48(%rbx), %RDX_LP # Load return address if needed.
@@ -63,7 +294,7 @@
movaps (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6
movaps (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7
-#ifdef RESTORE_AVX
+# ifdef RESTORE_AVX
/* Check if any xmm0-xmm7 registers are changed by audit
module. */
vpcmpeqq (LR_SIZE)(%rsp), %xmm0, %xmm8
@@ -72,7 +303,7 @@
je 2f
vmovdqa %xmm0, (LR_VECTOR_OFFSET)(%rsp)
jmp 1f
-2: VMOV (LR_VECTOR_OFFSET)(%rsp), %VEC(0)
+2: VMOVA (LR_VECTOR_OFFSET)(%rsp), %VEC(0)
vmovdqa %xmm0, (LR_XMM_OFFSET)(%rsp)
1: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8
@@ -81,7 +312,7 @@
je 2f
vmovdqa %xmm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
jmp 1f
-2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %VEC(1)
+2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %VEC(1)
vmovdqa %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp)
1: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8
@@ -90,7 +321,7 @@
je 2f
vmovdqa %xmm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
jmp 1f
-2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %VEC(2)
+2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %VEC(2)
vmovdqa %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp)
1: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8
@@ -99,7 +330,7 @@
je 2f
vmovdqa %xmm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
jmp 1f
-2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %VEC(3)
+2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %VEC(3)
vmovdqa %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp)
1: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8
@@ -108,7 +339,7 @@
je 2f
vmovdqa %xmm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
jmp 1f
-2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %VEC(4)
+2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %VEC(4)
vmovdqa %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp)
1: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8
@@ -117,7 +348,7 @@
je 2f
vmovdqa %xmm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
jmp 1f
-2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %VEC(5)
+2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %VEC(5)
vmovdqa %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp)
1: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8
@@ -126,7 +357,7 @@
je 2f
vmovdqa %xmm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
jmp 1f
-2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %VEC(6)
+2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %VEC(6)
vmovdqa %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
1: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8
@@ -135,25 +366,25 @@
je 2f
vmovdqa %xmm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
jmp 1f
-2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %VEC(7)
+2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %VEC(7)
vmovdqa %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
1:
-#endif
+# endif
-#ifndef __ILP32__
-# ifdef HAVE_MPX_SUPPORT
+# ifndef __ILP32__
+# ifdef HAVE_MPX_SUPPORT
bndmov (LR_BND_OFFSET)(%rsp), %bnd0 # Restore bound
bndmov (LR_BND_OFFSET + BND_SIZE)(%rsp), %bnd1 # registers.
bndmov (LR_BND_OFFSET + BND_SIZE*2)(%rsp), %bnd2
bndmov (LR_BND_OFFSET + BND_SIZE*3)(%rsp), %bnd3
-# else
+# else
.byte 0x66,0x0f,0x1a,0x84,0x24;.long (LR_BND_OFFSET)
.byte 0x66,0x0f,0x1a,0x8c,0x24;.long (LR_BND_OFFSET + BND_SIZE)
.byte 0x66,0x0f,0x1a,0x94,0x24;.long (LR_BND_OFFSET + BND_SIZE*2)
.byte 0x66,0x0f,0x1a,0x9c,0x24;.long (LR_BND_OFFSET + BND_SIZE*3)
+# endif
# endif
-#endif
mov 16(%rbx), %R10_LP # Anything in framesize?
test %R10_LP, %R10_LP
@@ -168,12 +399,12 @@
movq LR_RSI_OFFSET(%rsp), %rsi
movq LR_RDI_OFFSET(%rsp), %rdi
- movq %rbx, %rsp
+ mov %RBX_LP, %RSP_LP
movq (%rsp), %rbx
- cfi_restore(rbx)
+ cfi_restore(%rbx)
cfi_def_cfa_register(%rsp)
- addq $48, %rsp # Adjust the stack to the return value
+ add $48, %RSP_LP # Adjust the stack to the return value
# (eats the reloc index and link_map)
cfi_adjust_cfa_offset(-48)
PRESERVE_BND_REGS_PREFIX
@@ -189,13 +420,13 @@
temporary buffer of the size specified by the 'framesize'
returned from _dl_profile_fixup */
- leaq LR_RSP_OFFSET(%rbx), %rsi # stack
- addq $8, %r10
- andq $0xfffffffffffffff0, %r10
- movq %r10, %rcx
- subq %r10, %rsp
- movq %rsp, %rdi
- shrq $3, %rcx
+ lea LR_RSP_OFFSET(%rbx), %RSI_LP # stack
+ add $8, %R10_LP
+ and $-16, %R10_LP
+ mov %R10_LP, %RCX_LP
+ sub %R10_LP, %RSP_LP
+ mov %RSP_LP, %RDI_LP
+ shr $3, %RCX_LP
rep
movsq
@@ -206,21 +437,21 @@
PRESERVE_BND_REGS_PREFIX
call *%r11
- mov 24(%rbx), %rsp # Drop the copied stack content
+ mov 24(%rbx), %RSP_LP # Drop the copied stack content
/* Now we have to prepare the La_x86_64_retval structure for the
_dl_call_pltexit. The La_x86_64_regs is being pointed by rsp now,
so we just need to allocate the sizeof(La_x86_64_retval) space on
the stack, since the alignment has already been taken care of. */
-#ifdef RESTORE_AVX
+# ifdef RESTORE_AVX
/* sizeof(La_x86_64_retval). Need extra space for 2 SSE
registers to detect if xmm0/xmm1 registers are changed
by audit module. */
- subq $(LRV_SIZE + XMM_SIZE*2), %rsp
-#else
- subq $LRV_SIZE, %rsp # sizeof(La_x86_64_retval)
-#endif
- movq %rsp, %rcx # La_x86_64_retval argument to %rcx.
+ sub $(LRV_SIZE + XMM_SIZE*2), %RSP_LP
+# else
+ sub $LRV_SIZE, %RSP_LP # sizeof(La_x86_64_retval)
+# endif
+ mov %RSP_LP, %RCX_LP # La_x86_64_retval argument to %rcx.
/* Fill in the La_x86_64_retval structure. */
movq %rax, LRV_RAX_OFFSET(%rcx)
@@ -229,26 +460,26 @@
movaps %xmm0, LRV_XMM0_OFFSET(%rcx)
movaps %xmm1, LRV_XMM1_OFFSET(%rcx)
-#ifdef RESTORE_AVX
+# ifdef RESTORE_AVX
/* This is to support AVX audit modules. */
- VMOV %VEC(0), LRV_VECTOR0_OFFSET(%rcx)
- VMOV %VEC(1), LRV_VECTOR1_OFFSET(%rcx)
+ VMOVA %VEC(0), LRV_VECTOR0_OFFSET(%rcx)
+ VMOVA %VEC(1), LRV_VECTOR1_OFFSET(%rcx)
/* Save xmm0/xmm1 registers to detect if they are changed
by audit module. */
vmovdqa %xmm0, (LRV_SIZE)(%rcx)
vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE)(%rcx)
-#endif
+# endif
-#ifndef __ILP32__
-# ifdef HAVE_MPX_SUPPORT
+# ifndef __ILP32__
+# ifdef HAVE_MPX_SUPPORT
bndmov %bnd0, LRV_BND0_OFFSET(%rcx) # Preserve returned bounds.
bndmov %bnd1, LRV_BND1_OFFSET(%rcx)
-# else
+# else
.byte 0x66,0x0f,0x1b,0x81;.long (LRV_BND0_OFFSET)
.byte 0x66,0x0f,0x1b,0x89;.long (LRV_BND1_OFFSET)
+# endif
# endif
-#endif
fstpt LRV_ST0_OFFSET(%rcx)
fstpt LRV_ST1_OFFSET(%rcx)
@@ -265,50 +496,47 @@
movaps LRV_XMM0_OFFSET(%rsp), %xmm0
movaps LRV_XMM1_OFFSET(%rsp), %xmm1
-#ifdef RESTORE_AVX
+# ifdef RESTORE_AVX
/* Check if xmm0/xmm1 registers are changed by audit module. */
vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2
vpmovmskb %xmm2, %esi
cmpl $0xffff, %esi
jne 1f
- VMOV LRV_VECTOR0_OFFSET(%rsp), %VEC(0)
+ VMOVA LRV_VECTOR0_OFFSET(%rsp), %VEC(0)
1: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2
vpmovmskb %xmm2, %esi
cmpl $0xffff, %esi
jne 1f
- VMOV LRV_VECTOR1_OFFSET(%rsp), %VEC(1)
+ VMOVA LRV_VECTOR1_OFFSET(%rsp), %VEC(1)
1:
-#endif
+# endif
-#ifndef __ILP32__
-# ifdef HAVE_MPX_SUPPORT
+# ifndef __ILP32__
+# ifdef HAVE_MPX_SUPPORT
bndmov LRV_BND0_OFFSET(%rsp), %bnd0 # Restore bound registers.
bndmov LRV_BND1_OFFSET(%rsp), %bnd1
-# else
+# else
.byte 0x66,0x0f,0x1a,0x84,0x24;.long (LRV_BND0_OFFSET)
.byte 0x66,0x0f,0x1a,0x8c,0x24;.long (LRV_BND1_OFFSET)
+# endif
# endif
-#endif
fldt LRV_ST1_OFFSET(%rsp)
fldt LRV_ST0_OFFSET(%rsp)
- movq %rbx, %rsp
+ mov %RBX_LP, %RSP_LP
movq (%rsp), %rbx
- cfi_restore(rbx)
+ cfi_restore(%rbx)
cfi_def_cfa_register(%rsp)
- addq $48, %rsp # Adjust the stack to the return value
+ add $48, %RSP_LP # Adjust the stack to the return value
# (eats the reloc index and link_map)
cfi_adjust_cfa_offset(-48)
PRESERVE_BND_REGS_PREFIX
retq
-#ifdef MORE_CODE
- cfi_adjust_cfa_offset(48)
- cfi_rel_offset(%rbx, 0)
- cfi_def_cfa_register(%rbx)
-# undef MORE_CODE
+ cfi_endproc
+ .size _dl_runtime_profile, .-_dl_runtime_profile
#endif
diff --git a/sysdeps/x86_64/ffs.c b/sysdeps/x86_64/ffs.c
index 48feb4aba2..be5b6c8589 100644
--- a/sysdeps/x86_64/ffs.c
+++ b/sysdeps/x86_64/ffs.c
@@ -1,7 +1,7 @@
/* ffs -- find first set bit in a word, counted from least significant end.
For AMD x86-64.
This file is part of the GNU C Library.
- Copyright (C) 1991-2015 Free Software Foundation, Inc.
+ Copyright (C) 1991-2016 Free Software Foundation, Inc.
Contributed by Ulrich Drepper <drepper@cygnus.com>.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/ffsll.c b/sysdeps/x86_64/ffsll.c
index 1c26679da7..c0f5abc446 100644
--- a/sysdeps/x86_64/ffsll.c
+++ b/sysdeps/x86_64/ffsll.c
@@ -1,7 +1,7 @@
/* ffsll -- find first set bit in a word, counted from least significant end.
For AMD x86-64.
This file is part of the GNU C Library.
- Copyright (C) 1991-2015 Free Software Foundation, Inc.
+ Copyright (C) 1991-2016 Free Software Foundation, Inc.
Contributed by Ulrich Drepper <drepper@cygnus.com>.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/Makefile b/sysdeps/x86_64/fpu/Makefile
index 1ebe5118bf..88742faff1 100644
--- a/sysdeps/x86_64/fpu/Makefile
+++ b/sysdeps/x86_64/fpu/Makefile
@@ -20,7 +20,9 @@ libmvec-support += svml_d_cos2_core svml_d_cos4_core_avx \
svml_d_pow_data svml_s_powf4_core svml_s_powf8_core_avx \
svml_s_powf8_core svml_s_powf16_core svml_s_powf_data \
svml_s_sincosf4_core svml_s_sincosf8_core_avx \
- svml_s_sincosf8_core svml_s_sincosf16_core init-arch
+ svml_s_sincosf8_core svml_s_sincosf16_core svml_finite_alias
+
+libmvec-static-only-routines = svml_finite_alias
endif
# Variables for libmvec tests.
diff --git a/sysdeps/x86_64/fpu/e_exp2l.S b/sysdeps/x86_64/fpu/e_exp2l.S
index 7d42a932db..0e059b7565 100644
--- a/sysdeps/x86_64/fpu/e_exp2l.S
+++ b/sysdeps/x86_64/fpu/e_exp2l.S
@@ -6,7 +6,17 @@
*/
#include <machine/asm.h>
+#include <x86_64-math-asm.h>
+DEFINE_LDBL_MIN
+
+#ifdef PIC
+# define MO(op) op##(%rip)
+#else
+# define MO(op) op
+#endif
+
+ .text
ENTRY(__ieee754_exp2l)
fldt 8(%rsp)
/* I added the following ugly construct because exp(+-Inf) resulted
@@ -36,6 +46,7 @@ ENTRY(__ieee754_exp2l)
faddp /* 2^(fract(x)) */
fscale /* e^x */
fstp %st(1)
+ LDBL_CHECK_FORCE_UFLOW_NONNEG_NAN
ret
1: testl $0x200, %eax /* Test sign. */
diff --git a/sysdeps/x86_64/fpu/e_expf.S b/sysdeps/x86_64/fpu/e_expf.S
index 34453ca409..d4b63a8d8e 100644
--- a/sysdeps/x86_64/fpu/e_expf.S
+++ b/sysdeps/x86_64/fpu/e_expf.S
@@ -1,5 +1,5 @@
/* Optimized __ieee754_expf function.
- Copyright (C) 2012-2015 Free Software Foundation, Inc.
+ Copyright (C) 2012-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/fpu/e_expl.S b/sysdeps/x86_64/fpu/e_expl.S
index 14dd29dcad..8b3ddaec59 100644
--- a/sysdeps/x86_64/fpu/e_expl.S
+++ b/sysdeps/x86_64/fpu/e_expl.S
@@ -23,6 +23,7 @@
*/
#include <machine/asm.h>
+#include <x86_64-math-asm.h>
#ifdef USE_AS_EXP10L
# define IEEE754_EXPL __ieee754_exp10l
@@ -65,10 +66,7 @@ c1: .byte 0x20, 0xfa, 0xee, 0xc2, 0x5f, 0x70, 0xa5, 0xec, 0xed, 0x3f
csat: .byte 0, 0, 0, 0, 0, 0, 0, 0x80, 0x0e, 0x40
.byte 0, 0, 0, 0, 0, 0
ASM_SIZE_DIRECTIVE(csat)
- .type cmin,@object
-cmin: .byte 0, 0, 0, 0, 0, 0, 0, 0x80, 0x1, 0
- .byte 0, 0, 0, 0, 0, 0
- ASM_SIZE_DIRECTIVE(cmin)
+DEFINE_LDBL_MIN
#endif
#ifdef PIC
@@ -192,17 +190,9 @@ ENTRY(IEEE754_EXPL)
fstp %st(1) /* 2 */
fscale /* 2 scale factor is st(1); base^x */
fstp %st(1) /* 1 */
- /* Ensure underflow for tiny result. */
- fldt MO(cmin) /* 2 cmin */
- fld %st(1) /* 3 */
- fcomip %st(1), %st /* 2 */
- fstp %st /* 1 */
- jnc 6f
- fld %st
- fmul %st
- fstp %st
+ LDBL_CHECK_FORCE_UFLOW_NONNEG
#endif
-6: fstp %st(1) /* 0 */
+ fstp %st(1) /* 0 */
jmp 2f
1:
#ifdef USE_AS_EXPM1L
diff --git a/sysdeps/x86_64/fpu/e_log10l.S b/sysdeps/x86_64/fpu/e_log10l.S
index 2607ad199b..8fa61644c1 100644
--- a/sysdeps/x86_64/fpu/e_log10l.S
+++ b/sysdeps/x86_64/fpu/e_log10l.S
@@ -79,7 +79,13 @@ ENTRY(__log10l_finite)
fnstsw // x-1 : x : log10(2)
andb $0x45, %ah
jz 2b
- fstp %st(1) // x-1 : log10(2)
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x40, %ah
+ jne 6f
+ fabs // log10(1) is +0 in all rounding modes.
+6: fstp %st(1) // x-1 : log10(2)
fyl2xp1 // log10(x)
ret
END(__log10l_finite)
diff --git a/sysdeps/x86_64/fpu/e_log2l.S b/sysdeps/x86_64/fpu/e_log2l.S
index c12906d456..a063255ddd 100644
--- a/sysdeps/x86_64/fpu/e_log2l.S
+++ b/sysdeps/x86_64/fpu/e_log2l.S
@@ -78,7 +78,13 @@ ENTRY(__log2l_finite)
fnstsw // x-1 : x : 1
andb $0x45, %ah
jz 2b
- fstp %st(1) // x-1 : 1
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x40, %ah
+ jne 6f
+ fabs // log2(1) is +0 in all rounding modes.
+6: fstp %st(1) // x-1 : 1
fyl2xp1 // log(x)
ret
END (__log2l_finite)
diff --git a/sysdeps/x86_64/fpu/e_logl.S b/sysdeps/x86_64/fpu/e_logl.S
index 047b8db88a..dbe6fd59dc 100644
--- a/sysdeps/x86_64/fpu/e_logl.S
+++ b/sysdeps/x86_64/fpu/e_logl.S
@@ -81,7 +81,13 @@ ENTRY(__logl_finite)
fnstsw // x-1 : x : log(2)
andb $0x45, %ah
jz 2b
- fstp %st(1) // x-1 : log(2)
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x40, %ah
+ jne 7f
+ fabs // log(1) is +0 in all rounding modes.
+7: fstp %st(1) // x-1 : log(2)
fyl2xp1 // log(x)
ret
END (__logl_finite)
diff --git a/sysdeps/x86_64/fpu/e_powl.S b/sysdeps/x86_64/fpu/e_powl.S
index 358abb8dcb..1f68cf0102 100644
--- a/sysdeps/x86_64/fpu/e_powl.S
+++ b/sysdeps/x86_64/fpu/e_powl.S
@@ -1,5 +1,5 @@
/* ix87 specific implementation of pow function.
- Copyright (C) 1996-2015 Free Software Foundation, Inc.
+ Copyright (C) 1996-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
@@ -18,6 +18,7 @@
<http://www.gnu.org/licenses/>. */
#include <machine/asm.h>
+#include <x86_64-math-asm.h>
.section .rodata.cst8,"aM",@progbits,8
@@ -59,6 +60,7 @@ minfinity:
mzero:
.byte 0, 0, 0, 0, 0, 0, 0, 0x80
ASM_SIZE_DIRECTIVE(minf_mzero)
+DEFINE_LDBL_MIN
#ifdef PIC
# define MO(op) op##(%rip)
@@ -175,6 +177,7 @@ ENTRY(__ieee754_powl)
orl %edx, %ecx
jnz 6b
fstp %st(0) // ST*x
+ LDBL_CHECK_FORCE_UFLOW_NONNAN
ret
/* y is ±NAN */
diff --git a/sysdeps/x86_64/fpu/e_sqrt.c b/sysdeps/x86_64/fpu/e_sqrt.c
index b587f1cfb7..4b86434913 100644
--- a/sysdeps/x86_64/fpu/e_sqrt.c
+++ b/sysdeps/x86_64/fpu/e_sqrt.c
@@ -1,5 +1,5 @@
/* Square root of floating point number.
- Copyright (C) 2002-2015 Free Software Foundation, Inc.
+ Copyright (C) 2002-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/e_sqrtf.c b/sysdeps/x86_64/fpu/e_sqrtf.c
index 386ca1ce1d..639137b735 100644
--- a/sysdeps/x86_64/fpu/e_sqrtf.c
+++ b/sysdeps/x86_64/fpu/e_sqrtf.c
@@ -1,5 +1,5 @@
/* Square root of floating point number.
- Copyright (C) 2002-2015 Free Software Foundation, Inc.
+ Copyright (C) 2002-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/fclrexcpt.c b/sysdeps/x86_64/fpu/fclrexcpt.c
index ec03e1e2c2..a8e00c0141 100644
--- a/sysdeps/x86_64/fpu/fclrexcpt.c
+++ b/sysdeps/x86_64/fpu/fclrexcpt.c
@@ -1,5 +1,5 @@
/* Clear given exceptions in current floating-point environment.
- Copyright (C) 2001-2015 Free Software Foundation, Inc.
+ Copyright (C) 2001-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/fedisblxcpt.c b/sysdeps/x86_64/fpu/fedisblxcpt.c
index 95f585aefc..f1ea6cfa97 100644
--- a/sysdeps/x86_64/fpu/fedisblxcpt.c
+++ b/sysdeps/x86_64/fpu/fedisblxcpt.c
@@ -1,5 +1,5 @@
/* Disable floating-point exceptions.
- Copyright (C) 2001-2015 Free Software Foundation, Inc.
+ Copyright (C) 2001-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 2001.
diff --git a/sysdeps/x86_64/fpu/feenablxcpt.c b/sysdeps/x86_64/fpu/feenablxcpt.c
index e04875fe21..df4c628b8d 100644
--- a/sysdeps/x86_64/fpu/feenablxcpt.c
+++ b/sysdeps/x86_64/fpu/feenablxcpt.c
@@ -1,5 +1,5 @@
/* Enable floating-point exceptions.
- Copyright (C) 2001-2015 Free Software Foundation, Inc.
+ Copyright (C) 2001-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 2001.
diff --git a/sysdeps/x86_64/fpu/fegetenv.c b/sysdeps/x86_64/fpu/fegetenv.c
index 7314cee7e6..a28efb36f3 100644
--- a/sysdeps/x86_64/fpu/fegetenv.c
+++ b/sysdeps/x86_64/fpu/fegetenv.c
@@ -1,5 +1,5 @@
/* Store current floating-point environment.
- Copyright (C) 2001-2015 Free Software Foundation, Inc.
+ Copyright (C) 2001-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/fegetexcept.c b/sysdeps/x86_64/fpu/fegetexcept.c
index 27a0803aa2..8acd0382a0 100644
--- a/sysdeps/x86_64/fpu/fegetexcept.c
+++ b/sysdeps/x86_64/fpu/fegetexcept.c
@@ -1,5 +1,5 @@
/* Get enabled floating-point exceptions.
- Copyright (C) 2001-2015 Free Software Foundation, Inc.
+ Copyright (C) 2001-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 2001.
diff --git a/sysdeps/x86_64/fpu/fegetround.c b/sysdeps/x86_64/fpu/fegetround.c
index b515d8afe7..296d366560 100644
--- a/sysdeps/x86_64/fpu/fegetround.c
+++ b/sysdeps/x86_64/fpu/fegetround.c
@@ -1,5 +1,5 @@
/* Return current rounding direction.
- Copyright (C) 1997-2015 Free Software Foundation, Inc.
+ Copyright (C) 1997-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
diff --git a/sysdeps/x86_64/fpu/feholdexcpt.c b/sysdeps/x86_64/fpu/feholdexcpt.c
index 615b702135..a040c3dea5 100644
--- a/sysdeps/x86_64/fpu/feholdexcpt.c
+++ b/sysdeps/x86_64/fpu/feholdexcpt.c
@@ -1,5 +1,5 @@
/* Store current floating-point environment and clear exceptions.
- Copyright (C) 2001-2015 Free Software Foundation, Inc.
+ Copyright (C) 2001-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/fesetenv.c b/sysdeps/x86_64/fpu/fesetenv.c
index 3e3fd8492d..355d02aaa6 100644
--- a/sysdeps/x86_64/fpu/fesetenv.c
+++ b/sysdeps/x86_64/fpu/fesetenv.c
@@ -1,5 +1,5 @@
/* Install given floating-point environment.
- Copyright (C) 2001-2015 Free Software Foundation, Inc.
+ Copyright (C) 2001-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -17,9 +17,15 @@
<http://www.gnu.org/licenses/>. */
#include <fenv.h>
+#include <fpu_control.h>
#include <assert.h>
+/* All exceptions, including the x86-specific "denormal operand"
+ exception. */
+#define FE_ALL_EXCEPT_X86 (FE_ALL_EXCEPT | __FE_DENORM)
+
+
int
__fesetenv (const fenv_t *envp)
{
@@ -34,43 +40,61 @@ __fesetenv (const fenv_t *envp)
if (envp == FE_DFL_ENV)
{
- temp.__control_word |= FE_ALL_EXCEPT;
+ temp.__control_word |= FE_ALL_EXCEPT_X86;
temp.__control_word &= ~FE_TOWARDZERO;
- temp.__status_word &= ~FE_ALL_EXCEPT;
+ temp.__control_word |= _FPU_EXTENDED;
+ temp.__status_word &= ~FE_ALL_EXCEPT_X86;
temp.__eip = 0;
temp.__cs_selector = 0;
temp.__opcode = 0;
temp.__data_offset = 0;
temp.__data_selector = 0;
+ /* Clear SSE exceptions. */
+ temp.__mxcsr &= ~FE_ALL_EXCEPT_X86;
/* Set mask for SSE MXCSR. */
- temp.__mxcsr |= (FE_ALL_EXCEPT << 7);
+ temp.__mxcsr |= (FE_ALL_EXCEPT_X86 << 7);
/* Set rounding to FE_TONEAREST. */
temp.__mxcsr &= ~ 0x6000;
temp.__mxcsr |= (FE_TONEAREST << 3);
+ /* Clear the FZ and DAZ bits. */
+ temp.__mxcsr &= ~0x8040;
}
else if (envp == FE_NOMASK_ENV)
{
temp.__control_word &= ~(FE_ALL_EXCEPT | FE_TOWARDZERO);
- temp.__status_word &= ~FE_ALL_EXCEPT;
+ /* Keep the "denormal operand" exception masked. */
+ temp.__control_word |= __FE_DENORM;
+ temp.__control_word |= _FPU_EXTENDED;
+ temp.__status_word &= ~FE_ALL_EXCEPT_X86;
temp.__eip = 0;
temp.__cs_selector = 0;
temp.__opcode = 0;
temp.__data_offset = 0;
temp.__data_selector = 0;
+ /* Clear SSE exceptions. */
+ temp.__mxcsr &= ~FE_ALL_EXCEPT_X86;
/* Set mask for SSE MXCSR. */
/* Set rounding to FE_TONEAREST. */
temp.__mxcsr &= ~ 0x6000;
temp.__mxcsr |= (FE_TONEAREST << 3);
/* Do not mask exceptions. */
temp.__mxcsr &= ~(FE_ALL_EXCEPT << 7);
+ /* Keep the "denormal operand" exception masked. */
+ temp.__mxcsr |= (__FE_DENORM << 7);
+ /* Clear the FZ and DAZ bits. */
+ temp.__mxcsr &= ~0x8040;
}
else
{
- temp.__control_word &= ~(FE_ALL_EXCEPT | FE_TOWARDZERO);
+ temp.__control_word &= ~(FE_ALL_EXCEPT_X86
+ | FE_TOWARDZERO
+ | _FPU_EXTENDED);
temp.__control_word |= (envp->__control_word
- & (FE_ALL_EXCEPT | FE_TOWARDZERO));
- temp.__status_word &= ~FE_ALL_EXCEPT;
- temp.__status_word |= envp->__status_word & FE_ALL_EXCEPT;
+ & (FE_ALL_EXCEPT_X86
+ | FE_TOWARDZERO
+ | _FPU_EXTENDED));
+ temp.__status_word &= ~FE_ALL_EXCEPT_X86;
+ temp.__status_word |= envp->__status_word & FE_ALL_EXCEPT_X86;
temp.__eip = envp->__eip;
temp.__cs_selector = envp->__cs_selector;
temp.__opcode = envp->__opcode;
diff --git a/sysdeps/x86_64/fpu/fesetround.c b/sysdeps/x86_64/fpu/fesetround.c
index 2a9c351142..475d63f4db 100644
--- a/sysdeps/x86_64/fpu/fesetround.c
+++ b/sysdeps/x86_64/fpu/fesetround.c
@@ -1,5 +1,5 @@
/* Set current rounding direction.
- Copyright (C) 2001-2015 Free Software Foundation, Inc.
+ Copyright (C) 2001-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/feupdateenv.c b/sysdeps/x86_64/fpu/feupdateenv.c
index 99dfdd8f5c..f035d57ca8 100644
--- a/sysdeps/x86_64/fpu/feupdateenv.c
+++ b/sysdeps/x86_64/fpu/feupdateenv.c
@@ -1,5 +1,5 @@
/* Install given floating-point environment and raise exceptions.
- Copyright (C) 1997-2015 Free Software Foundation, Inc.
+ Copyright (C) 1997-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
diff --git a/sysdeps/x86_64/fpu/fgetexcptflg.c b/sysdeps/x86_64/fpu/fgetexcptflg.c
index e4f321e239..938cf3e62b 100644
--- a/sysdeps/x86_64/fpu/fgetexcptflg.c
+++ b/sysdeps/x86_64/fpu/fgetexcptflg.c
@@ -1,5 +1,5 @@
/* Store current representation for exceptions.
- Copyright (C) 2001-2015 Free Software Foundation, Inc.
+ Copyright (C) 2001-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/fraiseexcpt.c b/sysdeps/x86_64/fpu/fraiseexcpt.c
index 3cd924647e..e2abbbec33 100644
--- a/sysdeps/x86_64/fpu/fraiseexcpt.c
+++ b/sysdeps/x86_64/fpu/fraiseexcpt.c
@@ -1,5 +1,5 @@
/* Raise given exceptions.
- Copyright (C) 2001-2015 Free Software Foundation, Inc.
+ Copyright (C) 2001-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/fsetexcptflg.c b/sysdeps/x86_64/fpu/fsetexcptflg.c
index f7915e3fdd..76f7bad9a8 100644
--- a/sysdeps/x86_64/fpu/fsetexcptflg.c
+++ b/sysdeps/x86_64/fpu/fsetexcptflg.c
@@ -1,5 +1,5 @@
/* Set floating-point environment exception handling.
- Copyright (C) 2001-2015 Free Software Foundation, Inc.
+ Copyright (C) 2001-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/ftestexcept.c b/sysdeps/x86_64/fpu/ftestexcept.c
index 1e67c2fe21..c8f2c01c67 100644
--- a/sysdeps/x86_64/fpu/ftestexcept.c
+++ b/sysdeps/x86_64/fpu/ftestexcept.c
@@ -1,5 +1,5 @@
/* Test exception in current environment.
- Copyright (C) 2001-2015 Free Software Foundation, Inc.
+ Copyright (C) 2001-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
index de7d420aef..445b47527d 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -32,34 +32,34 @@ ildouble: 2
ldouble: 2
Function: "acosh":
-double: 1
+double: 2
float: 2
-idouble: 1
+idouble: 2
ifloat: 2
ildouble: 2
ldouble: 2
Function: "acosh_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
ildouble: 4
ldouble: 4
Function: "acosh_towardzero":
double: 2
-float: 1
+float: 2
idouble: 2
-ifloat: 1
+ifloat: 2
ildouble: 4
ldouble: 4
Function: "acosh_upward":
double: 2
-float: 1
+float: 2
idouble: 2
-ifloat: 1
+ifloat: 2
ildouble: 3
ldouble: 3
@@ -98,8 +98,8 @@ double: 1
float: 1
idouble: 1
ifloat: 1
-ildouble: 2
-ldouble: 2
+ildouble: 3
+ldouble: 3
Function: "asinh_downward":
double: 3
@@ -122,8 +122,8 @@ double: 3
float: 3
idouble: 3
ifloat: 3
-ildouble: 4
-ldouble: 4
+ildouble: 5
+ldouble: 5
Function: "atan":
float: 1
@@ -186,18 +186,18 @@ ildouble: 1
ldouble: 1
Function: "atanh":
-double: 1
+double: 2
float: 2
-idouble: 1
+idouble: 2
ifloat: 2
ildouble: 3
ldouble: 3
Function: "atanh_downward":
double: 3
-float: 2
+float: 3
idouble: 3
-ifloat: 2
+ifloat: 3
ildouble: 5
ldouble: 5
@@ -210,9 +210,9 @@ ildouble: 4
ldouble: 4
Function: "atanh_upward":
-double: 2
+double: 3
float: 3
-idouble: 2
+idouble: 3
ifloat: 3
ildouble: 5
ldouble: 5
@@ -668,9 +668,9 @@ ildouble: 1
ldouble: 1
Function: "cbrt_upward":
-double: 4
+double: 5
float: 1
-idouble: 4
+idouble: 5
ifloat: 1
ildouble: 1
ldouble: 1
@@ -869,11 +869,11 @@ ldouble: 3
Function: Real part of "clog":
double: 3
-float: 2
+float: 3
idouble: 3
-ifloat: 2
-ildouble: 4
-ldouble: 4
+ifloat: 3
+ildouble: 3
+ldouble: 3
Function: Imaginary part of "clog":
float: 1
@@ -883,9 +883,9 @@ ldouble: 1
Function: Real part of "clog10":
double: 3
-float: 3
+float: 4
idouble: 3
-ifloat: 3
+ifloat: 4
ildouble: 4
ldouble: 4
@@ -898,10 +898,10 @@ ildouble: 2
ldouble: 2
Function: Real part of "clog10_downward":
-double: 6
-float: 6
-idouble: 6
-ifloat: 6
+double: 5
+float: 4
+idouble: 5
+ifloat: 4
ildouble: 8
ldouble: 8
@@ -910,14 +910,14 @@ double: 2
float: 4
idouble: 2
ifloat: 4
-ildouble: 2
-ldouble: 2
+ildouble: 3
+ldouble: 3
Function: Real part of "clog10_towardzero":
double: 5
-float: 4
+float: 5
idouble: 5
-ifloat: 4
+ifloat: 5
ildouble: 8
ldouble: 8
@@ -930,28 +930,28 @@ ildouble: 3
ldouble: 3
Function: Real part of "clog10_upward":
-double: 8
+double: 6
float: 5
-idouble: 8
+idouble: 6
ifloat: 5
-ildouble: 6
-ldouble: 6
+ildouble: 8
+ldouble: 8
Function: Imaginary part of "clog10_upward":
double: 2
-float: 3
+float: 4
idouble: 2
-ifloat: 3
+ifloat: 4
ildouble: 3
ldouble: 3
Function: Real part of "clog_downward":
-double: 7
-float: 5
-idouble: 7
-ifloat: 5
-ildouble: 7
-ldouble: 7
+double: 4
+float: 3
+idouble: 4
+ifloat: 3
+ildouble: 5
+ldouble: 5
Function: Imaginary part of "clog_downward":
double: 1
@@ -962,28 +962,28 @@ ildouble: 1
ldouble: 1
Function: Real part of "clog_towardzero":
-double: 7
-float: 5
-idouble: 7
-ifloat: 5
-ildouble: 8
-ldouble: 8
+double: 4
+float: 4
+idouble: 4
+ifloat: 4
+ildouble: 5
+ldouble: 5
Function: Imaginary part of "clog_towardzero":
double: 1
-float: 2
+float: 3
idouble: 1
-ifloat: 2
+ifloat: 3
ildouble: 1
ldouble: 1
Function: Real part of "clog_upward":
-double: 8
-float: 5
-idouble: 8
-ifloat: 5
-ildouble: 6
-ldouble: 6
+double: 4
+float: 3
+idouble: 4
+ifloat: 3
+ildouble: 4
+ldouble: 4
Function: Imaginary part of "clog_upward":
double: 1
@@ -1019,14 +1019,14 @@ Function: "cos_vlen16":
float: 1
Function: "cos_vlen2":
-double: 1
+double: 2
Function: "cos_vlen4":
-double: 1
+double: 2
float: 1
Function: "cos_vlen4_avx2":
-double: 1
+double: 2
Function: "cos_vlen8":
double: 1
@@ -1040,7 +1040,7 @@ double: 1
float: 1
idouble: 1
ifloat: 1
-ildouble: 1
+ildouble: 2
ldouble: 2
Function: "cosh_downward":
@@ -1264,25 +1264,25 @@ ildouble: 2
ldouble: 2
Function: Real part of "csqrt_downward":
-double: 4
+double: 5
float: 4
-idouble: 4
+idouble: 5
ifloat: 4
-ildouble: 4
-ldouble: 4
+ildouble: 5
+ldouble: 5
Function: Imaginary part of "csqrt_downward":
double: 4
float: 3
idouble: 4
ifloat: 3
-ildouble: 3
-ldouble: 3
+ildouble: 4
+ldouble: 4
Function: Real part of "csqrt_towardzero":
-double: 3
+double: 4
float: 3
-idouble: 3
+idouble: 4
ifloat: 3
ildouble: 4
ldouble: 4
@@ -1292,8 +1292,8 @@ double: 4
float: 3
idouble: 4
ifloat: 3
-ildouble: 3
-ldouble: 3
+ildouble: 4
+ldouble: 4
Function: Real part of "csqrt_upward":
double: 5
@@ -1308,8 +1308,8 @@ double: 3
float: 3
idouble: 3
ifloat: 3
-ildouble: 3
-ldouble: 3
+ildouble: 4
+ldouble: 4
Function: Real part of "ctan":
double: 1
@@ -1472,17 +1472,17 @@ ildouble: 1
ldouble: 1
Function: "erfc":
-double: 2
+double: 3
float: 2
-idouble: 2
+idouble: 3
ifloat: 2
-ildouble: 2
-ldouble: 2
+ildouble: 3
+ldouble: 3
Function: "erfc_downward":
-double: 4
+double: 5
float: 6
-idouble: 4
+idouble: 5
ifloat: 6
ildouble: 4
ldouble: 4
@@ -1496,12 +1496,12 @@ ildouble: 4
ldouble: 4
Function: "erfc_upward":
-double: 4
+double: 5
float: 6
-idouble: 4
+idouble: 5
ifloat: 6
-ildouble: 4
-ldouble: 4
+ildouble: 5
+ldouble: 5
Function: "exp":
ildouble: 1
@@ -1578,12 +1578,14 @@ ldouble: 1
Function: "exp_towardzero":
double: 1
idouble: 1
-ildouble: 1
-ldouble: 1
+ildouble: 2
+ldouble: 2
Function: "exp_upward":
double: 1
+float: 1
idouble: 1
+ifloat: 1
ildouble: 1
ldouble: 1
@@ -1625,9 +1627,9 @@ ldouble: 4
Function: "expm1_towardzero":
double: 1
-float: 1
+float: 2
idouble: 1
-ifloat: 1
+ifloat: 2
ildouble: 4
ldouble: 4
@@ -1640,36 +1642,36 @@ ildouble: 4
ldouble: 4
Function: "gamma":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 2
-ldouble: 2
-
-Function: "gamma_downward":
double: 4
-float: 3
+float: 4
idouble: 4
-ifloat: 3
-ildouble: 6
-ldouble: 6
+ifloat: 4
+ildouble: 4
+ldouble: 4
+
+Function: "gamma_downward":
+double: 5
+float: 4
+idouble: 5
+ifloat: 4
+ildouble: 7
+ldouble: 7
Function: "gamma_towardzero":
-double: 4
-float: 3
-idouble: 4
-ifloat: 3
-ildouble: 6
-ldouble: 6
+double: 5
+float: 4
+idouble: 5
+ifloat: 4
+ildouble: 7
+ldouble: 7
Function: "gamma_upward":
-double: 4
-float: 3
-idouble: 4
-ifloat: 3
-ildouble: 4
-ldouble: 4
+double: 5
+float: 5
+idouble: 5
+ifloat: 5
+ildouble: 6
+ldouble: 6
Function: "hypot":
double: 1
@@ -1792,36 +1794,36 @@ ildouble: 5
ldouble: 5
Function: "lgamma":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 2
-ldouble: 2
-
-Function: "lgamma_downward":
double: 4
-float: 3
+float: 4
idouble: 4
-ifloat: 3
-ildouble: 6
-ldouble: 6
+ifloat: 4
+ildouble: 4
+ldouble: 4
+
+Function: "lgamma_downward":
+double: 5
+float: 4
+idouble: 5
+ifloat: 4
+ildouble: 7
+ldouble: 7
Function: "lgamma_towardzero":
-double: 4
-float: 3
-idouble: 4
-ifloat: 3
-ildouble: 6
-ldouble: 6
+double: 5
+float: 4
+idouble: 5
+ifloat: 4
+ildouble: 7
+ldouble: 7
Function: "lgamma_upward":
-double: 4
-float: 3
-idouble: 4
-ifloat: 3
-ildouble: 4
-ldouble: 4
+double: 5
+float: 5
+idouble: 5
+ifloat: 5
+ildouble: 6
+ldouble: 6
Function: "log":
float: 1
@@ -1874,16 +1876,16 @@ double: 2
float: 2
idouble: 2
ifloat: 2
-ildouble: 3
-ldouble: 3
+ildouble: 4
+ldouble: 4
Function: "log1p_towardzero":
double: 2
float: 2
idouble: 2
ifloat: 2
-ildouble: 3
-ldouble: 3
+ildouble: 4
+ldouble: 4
Function: "log1p_upward":
double: 2
@@ -1938,7 +1940,9 @@ ildouble: 2
ldouble: 2
Function: "log_upward":
+double: 1
float: 2
+idouble: 1
ifloat: 2
ildouble: 1
ldouble: 1
@@ -1964,8 +1968,8 @@ Function: "log_vlen8_avx2":
float: 2
Function: "pow":
-float: 3
-ifloat: 3
+float: 1
+ifloat: 1
ildouble: 1
ldouble: 1
@@ -2001,25 +2005,25 @@ ldouble: 2
Function: "pow_downward":
double: 1
-float: 3
+float: 1
idouble: 1
-ifloat: 3
+ifloat: 1
ildouble: 4
ldouble: 4
Function: "pow_towardzero":
double: 1
-float: 4
+float: 1
idouble: 1
-ifloat: 4
+ifloat: 1
ildouble: 1
ldouble: 1
Function: "pow_upward":
double: 1
-float: 4
+float: 1
idouble: 1
-ifloat: 4
+ifloat: 1
ildouble: 2
ldouble: 2
@@ -2050,14 +2054,14 @@ ldouble: 1
Function: "sin_downward":
double: 1
idouble: 1
-ildouble: 2
-ldouble: 2
+ildouble: 3
+ldouble: 3
Function: "sin_towardzero":
double: 1
idouble: 1
-ildouble: 1
-ldouble: 1
+ildouble: 2
+ldouble: 2
Function: "sin_upward":
double: 1
@@ -2111,14 +2115,14 @@ Function: "sincos_vlen16":
float: 1
Function: "sincos_vlen2":
-double: 1
+double: 2
Function: "sincos_vlen4":
-double: 1
+double: 2
float: 1
Function: "sincos_vlen4_avx2":
-double: 1
+double: 2
Function: "sincos_vlen8":
double: 1
@@ -2162,24 +2166,24 @@ ldouble: 5
Function: "tan":
float: 1
ifloat: 1
-ildouble: 1
-ldouble: 1
+ildouble: 2
+ldouble: 2
Function: "tan_downward":
double: 1
float: 2
idouble: 1
ifloat: 2
-ildouble: 2
-ldouble: 2
+ildouble: 3
+ldouble: 3
Function: "tan_towardzero":
double: 1
float: 1
idouble: 1
ifloat: 1
-ildouble: 2
-ldouble: 2
+ildouble: 3
+ldouble: 3
Function: "tan_upward":
double: 1
@@ -2194,8 +2198,8 @@ double: 2
float: 2
idouble: 2
ifloat: 2
-ildouble: 2
-ldouble: 2
+ildouble: 3
+ldouble: 3
Function: "tanh_downward":
double: 3
@@ -2222,36 +2226,36 @@ ildouble: 4
ldouble: 4
Function: "tgamma":
-double: 4
+double: 5
float: 5
-idouble: 4
+idouble: 5
ifloat: 5
-ildouble: 3
-ldouble: 3
+ildouble: 5
+ldouble: 5
Function: "tgamma_downward":
-double: 4
-float: 4
-idouble: 4
-ifloat: 4
-ildouble: 3
-ldouble: 3
+double: 5
+float: 5
+idouble: 5
+ifloat: 5
+ildouble: 5
+ldouble: 5
Function: "tgamma_towardzero":
double: 5
float: 5
idouble: 5
ifloat: 5
-ildouble: 3
-ldouble: 3
+ildouble: 5
+ldouble: 5
Function: "tgamma_upward":
double: 5
float: 5
idouble: 5
ifloat: 5
-ildouble: 3
-ldouble: 3
+ildouble: 5
+ldouble: 5
Function: "y0":
double: 2
diff --git a/sysdeps/x86_64/fpu/math-tests-arch.h b/sysdeps/x86_64/fpu/math-tests-arch.h
index e8833bfe0a..867152046e 100644
--- a/sysdeps/x86_64/fpu/math-tests-arch.h
+++ b/sysdeps/x86_64/fpu/math-tests-arch.h
@@ -1,5 +1,5 @@
/* Runtime architecture check for math tests. x86_64 version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -19,66 +19,36 @@
#if defined REQUIRE_AVX
# include <init-arch.h>
-/* Set to 1 if AVX supported. */
-static int avx_usable;
-
-# define INIT_ARCH_EXT \
- do \
- { \
- __init_cpu_features (); \
- avx_usable = __cpu_features.feature[index_AVX_Usable] \
- & bit_AVX_Usable; \
- } \
- while (0)
+# define INIT_ARCH_EXT
# define CHECK_ARCH_EXT \
do \
{ \
- if (!avx_usable) return; \
+ if (!HAS_ARCH_FEATURE (AVX_Usable)) return; \
} \
while (0)
#elif defined REQUIRE_AVX2
# include <init-arch.h>
- /* Set to 1 if AVX2 supported. */
- static int avx2_usable;
-
-# define INIT_ARCH_EXT \
- do \
- { \
- __init_cpu_features (); \
- avx2_usable = __cpu_features.feature[index_AVX2_Usable] \
- & bit_AVX2_Usable; \
- } \
- while (0)
+# define INIT_ARCH_EXT
# define CHECK_ARCH_EXT \
do \
{ \
- if (!avx2_usable) return; \
+ if (!HAS_ARCH_FEATURE (AVX2_Usable)) return; \
} \
while (0)
#elif defined REQUIRE_AVX512F
# include <init-arch.h>
- /* Set to 1 if supported. */
- static int avx512f_usable;
-
-# define INIT_ARCH_EXT \
- do \
- { \
- __init_cpu_features (); \
- avx512f_usable = __cpu_features.feature[index_AVX512F_Usable] \
- & bit_AVX512F_Usable; \
- } \
- while (0)
+# define INIT_ARCH_EXT
# define CHECK_ARCH_EXT \
do \
{ \
- if (!avx512f_usable) return; \
+ if (!HAS_ARCH_FEATURE (AVX512F_Usable)) return; \
} \
while (0)
diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
index 86ea473b4f..34542155aa 100644
--- a/sysdeps/x86_64/fpu/multiarch/Makefile
+++ b/sysdeps/x86_64/fpu/multiarch/Makefile
@@ -2,7 +2,6 @@ ifeq ($(subdir),math)
libm-sysdep_routines += s_floor-c s_ceil-c s_floorf-c s_ceilf-c \
s_rint-c s_rintf-c s_nearbyint-c s_nearbyintf-c
-ifeq ($(have-mfma4),yes)
libm-sysdep_routines += e_exp-fma4 e_log-fma4 e_pow-fma4 s_atan-fma4 \
e_asin-fma4 e_atan2-fma4 s_sin-fma4 s_tan-fma4 \
mplog-fma4 mpa-fma4 slowexp-fma4 slowpow-fma4 \
@@ -16,7 +15,7 @@ CFLAGS-e_asin-fma4.c = -mfma4
CFLAGS-e_atan2-fma4.c = -mfma4
CFLAGS-e_exp-fma4.c = -mfma4
CFLAGS-e_log-fma4.c = -mfma4
-CFLAGS-e_pow-fma4.c = -mfma4
+CFLAGS-e_pow-fma4.c = -mfma4 $(config-cflags-nofma)
CFLAGS-halfulp-fma4.c = -mfma4
CFLAGS-mpa-fma4.c = -mfma4
CFLAGS-mpatan-fma4.c = -mfma4
@@ -31,9 +30,7 @@ CFLAGS-slowexp-fma4.c = -mfma4
CFLAGS-slowpow-fma4.c = -mfma4
CFLAGS-s_sin-fma4.c = -mfma4
CFLAGS-s_tan-fma4.c = -mfma4
-endif
-ifeq ($(config-cflags-sse2avx),yes)
libm-sysdep_routines += e_exp-avx e_log-avx s_atan-avx \
e_atan2-avx s_sin-avx s_tan-avx \
mplog-avx mpa-avx slowexp-avx \
@@ -50,7 +47,6 @@ CFLAGS-s_sin-avx.c = -msse2avx -DSSE2AVX
CFLAGS-slowexp-avx.c = -msse2avx -DSSE2AVX
CFLAGS-s_tan-avx.c = -msse2avx -DSSE2AVX
endif
-endif
ifeq ($(subdir),mathvec)
libmvec-sysdep_routines += svml_d_cos2_core_sse4 svml_d_cos4_core_avx2 \
diff --git a/sysdeps/x86_64/fpu/multiarch/e_asin.c b/sysdeps/x86_64/fpu/multiarch/e_asin.c
index 55865c02f3..111a5b99bd 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_asin.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_asin.c
@@ -1,7 +1,6 @@
-#ifdef HAVE_FMA4_SUPPORT
-# include <init-arch.h>
-# include <math.h>
-# include <math_private.h>
+#include <init-arch.h>
+#include <math.h>
+#include <math_private.h>
extern double __ieee754_acos_sse2 (double);
extern double __ieee754_asin_sse2 (double);
@@ -9,16 +8,19 @@ extern double __ieee754_acos_fma4 (double);
extern double __ieee754_asin_fma4 (double);
libm_ifunc (__ieee754_acos,
- HAS_FMA4 ? __ieee754_acos_fma4 : __ieee754_acos_sse2);
+ HAS_ARCH_FEATURE (FMA4_Usable)
+ ? __ieee754_acos_fma4
+ : __ieee754_acos_sse2);
strong_alias (__ieee754_acos, __acos_finite)
libm_ifunc (__ieee754_asin,
- HAS_FMA4 ? __ieee754_asin_fma4 : __ieee754_asin_sse2);
+ HAS_ARCH_FEATURE (FMA4_Usable)
+ ? __ieee754_asin_fma4
+ : __ieee754_asin_sse2);
strong_alias (__ieee754_asin, __asin_finite)
-# define __ieee754_acos __ieee754_acos_sse2
-# define __ieee754_asin __ieee754_asin_sse2
-#endif
+#define __ieee754_acos __ieee754_acos_sse2
+#define __ieee754_asin __ieee754_asin_sse2
#include <sysdeps/ieee754/dbl-64/e_asin.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2.c b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
index 547681cb59..9ca3c02a44 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_atan2.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
@@ -1,25 +1,18 @@
-#if defined HAVE_FMA4_SUPPORT || defined HAVE_AVX_SUPPORT
-# include <init-arch.h>
-# include <math.h>
-# include <math_private.h>
+#include <init-arch.h>
+#include <math.h>
+#include <math_private.h>
extern double __ieee754_atan2_sse2 (double, double);
extern double __ieee754_atan2_avx (double, double);
-# ifdef HAVE_FMA4_SUPPORT
extern double __ieee754_atan2_fma4 (double, double);
-# else
-# undef HAS_FMA4
-# define HAS_FMA4 0
-# define __ieee754_atan2_fma4 ((void *) 0)
-# endif
libm_ifunc (__ieee754_atan2,
- HAS_FMA4 ? __ieee754_atan2_fma4
- : (HAS_AVX ? __ieee754_atan2_avx : __ieee754_atan2_sse2));
+ HAS_ARCH_FEATURE (FMA4_Usable) ? __ieee754_atan2_fma4
+ : (HAS_ARCH_FEATURE (AVX_Usable)
+ ? __ieee754_atan2_avx : __ieee754_atan2_sse2));
strong_alias (__ieee754_atan2, __atan2_finite)
-# define __ieee754_atan2 __ieee754_atan2_sse2
-#endif
+#define __ieee754_atan2 __ieee754_atan2_sse2
#include <sysdeps/ieee754/dbl-64/e_atan2.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp.c b/sysdeps/x86_64/fpu/multiarch/e_exp.c
index d244954056..b7d7b5ff27 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_exp.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_exp.c
@@ -1,25 +1,18 @@
-#if defined HAVE_FMA4_SUPPORT || defined HAVE_AVX_SUPPORT
-# include <init-arch.h>
-# include <math.h>
-# include <math_private.h>
+#include <init-arch.h>
+#include <math.h>
+#include <math_private.h>
extern double __ieee754_exp_sse2 (double);
extern double __ieee754_exp_avx (double);
-# ifdef HAVE_FMA4_SUPPORT
extern double __ieee754_exp_fma4 (double);
-# else
-# undef HAS_FMA4
-# define HAS_FMA4 0
-# define __ieee754_exp_fma4 ((void *) 0)
-# endif
libm_ifunc (__ieee754_exp,
- HAS_FMA4 ? __ieee754_exp_fma4
- : (HAS_AVX ? __ieee754_exp_avx : __ieee754_exp_sse2));
+ HAS_ARCH_FEATURE (FMA4_Usable) ? __ieee754_exp_fma4
+ : (HAS_ARCH_FEATURE (AVX_Usable)
+ ? __ieee754_exp_avx : __ieee754_exp_sse2));
strong_alias (__ieee754_exp, __exp_finite)
-# define __ieee754_exp __ieee754_exp_sse2
-#endif
+#define __ieee754_exp __ieee754_exp_sse2
#include <sysdeps/ieee754/dbl-64/e_exp.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_log.c b/sysdeps/x86_64/fpu/multiarch/e_log.c
index 98054737bd..cf9533d6c0 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_log.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_log.c
@@ -1,25 +1,18 @@
-#if defined HAVE_FMA4_SUPPORT || defined HAVE_AVX_SUPPORT
-# include <init-arch.h>
-# include <math.h>
-# include <math_private.h>
+#include <init-arch.h>
+#include <math.h>
+#include <math_private.h>
extern double __ieee754_log_sse2 (double);
extern double __ieee754_log_avx (double);
-# ifdef HAVE_FMA4_SUPPORT
extern double __ieee754_log_fma4 (double);
-# else
-# undef HAS_FMA4
-# define HAS_FMA4 0
-# define __ieee754_log_fma4 ((void *) 0)
-# endif
libm_ifunc (__ieee754_log,
- HAS_FMA4 ? __ieee754_log_fma4
- : (HAS_AVX ? __ieee754_log_avx : __ieee754_log_sse2));
+ HAS_ARCH_FEATURE (FMA4_Usable) ? __ieee754_log_fma4
+ : (HAS_ARCH_FEATURE (AVX_Usable)
+ ? __ieee754_log_avx : __ieee754_log_sse2));
strong_alias (__ieee754_log, __log_finite)
-# define __ieee754_log __ieee754_log_sse2
-#endif
+#define __ieee754_log __ieee754_log_sse2
#include <sysdeps/ieee754/dbl-64/e_log.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow.c b/sysdeps/x86_64/fpu/multiarch/e_pow.c
index 433cce0de6..a5c5d89c3e 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_pow.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_pow.c
@@ -1,16 +1,17 @@
-#ifdef HAVE_FMA4_SUPPORT
-# include <init-arch.h>
-# include <math.h>
-# include <math_private.h>
+#include <init-arch.h>
+#include <math.h>
+#include <math_private.h>
extern double __ieee754_pow_sse2 (double, double);
extern double __ieee754_pow_fma4 (double, double);
-libm_ifunc (__ieee754_pow, HAS_FMA4 ? __ieee754_pow_fma4 : __ieee754_pow_sse2);
+libm_ifunc (__ieee754_pow,
+ HAS_ARCH_FEATURE (FMA4_Usable)
+ ? __ieee754_pow_fma4
+ : __ieee754_pow_sse2);
strong_alias (__ieee754_pow, __pow_finite)
-# define __ieee754_pow __ieee754_pow_sse2
-#endif
+#define __ieee754_pow __ieee754_pow_sse2
#include <sysdeps/ieee754/dbl-64/e_pow.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan.c b/sysdeps/x86_64/fpu/multiarch/s_atan.c
index ae16d7c9bb..742e95cb96 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_atan.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_atan.c
@@ -1,22 +1,15 @@
-#if defined HAVE_FMA4_SUPPORT || defined HAVE_AVX_SUPPORT
-# include <init-arch.h>
-# include <math.h>
+#include <init-arch.h>
+#include <math.h>
extern double __atan_sse2 (double);
extern double __atan_avx (double);
-# ifdef HAVE_FMA4_SUPPORT
extern double __atan_fma4 (double);
-# else
-# undef HAS_FMA4
-# define HAS_FMA4 0
-# define __atan_fma4 ((void *) 0)
-# endif
-libm_ifunc (atan, (HAS_FMA4 ? __atan_fma4 :
- HAS_AVX ? __atan_avx : __atan_sse2));
+libm_ifunc (atan, (HAS_ARCH_FEATURE (FMA4_Usable) ? __atan_fma4 :
+ HAS_ARCH_FEATURE (AVX_Usable)
+ ? __atan_avx : __atan_sse2));
-# define atan __atan_sse2
-#endif
+#define atan __atan_sse2
#include <sysdeps/ieee754/dbl-64/s_atan.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil.S b/sysdeps/x86_64/fpu/multiarch/s_ceil.S
index 00ecede74d..40fa729955 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_ceil.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_ceil.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
@@ -22,10 +22,9 @@
ENTRY(__ceil)
.type __ceil, @gnu_indirect_function
- call __get_cpu_features@plt
- movq %rax, %rdx
+ LOAD_RTLD_GLOBAL_RO_RDX
leaq __ceil_sse41(%rip), %rax
- testl $bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx)
+ HAS_CPU_FEATURE (SSE4_1)
jnz 2f
leaq __ceil_c(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf.S b/sysdeps/x86_64/fpu/multiarch/s_ceilf.S
index c8ed70553e..9a06a5c174 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_ceilf.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
@@ -22,10 +22,9 @@
ENTRY(__ceilf)
.type __ceilf, @gnu_indirect_function
- call __get_cpu_features@plt
- movq %rax, %rdx
+ LOAD_RTLD_GLOBAL_RO_RDX
leaq __ceilf_sse41(%rip), %rax
- testl $bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx)
+ HAS_CPU_FEATURE (SSE4_1)
jnz 2f
leaq __ceilf_c(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor.S b/sysdeps/x86_64/fpu/multiarch/s_floor.S
index 952ffaa314..57a0eee5ba 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_floor.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_floor.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
@@ -22,10 +22,9 @@
ENTRY(__floor)
.type __floor, @gnu_indirect_function
- call __get_cpu_features@plt
- movq %rax, %rdx
+ LOAD_RTLD_GLOBAL_RO_RDX
leaq __floor_sse41(%rip), %rax
- testl $bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx)
+ HAS_CPU_FEATURE (SSE4_1)
jnz 2f
leaq __floor_c(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf.S b/sysdeps/x86_64/fpu/multiarch/s_floorf.S
index c8231e86b3..74a149a950 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_floorf.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_floorf.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
@@ -22,10 +22,9 @@
ENTRY(__floorf)
.type __floorf, @gnu_indirect_function
- call __get_cpu_features@plt
- movq %rax, %rdx
+ LOAD_RTLD_GLOBAL_RO_RDX
leaq __floorf_sse41(%rip), %rax
- testl $bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx)
+ HAS_CPU_FEATURE (SSE4_1)
jnz 2f
leaq __floorf_c(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/s_fma.c b/sysdeps/x86_64/fpu/multiarch/s_fma.c
index 0963a0b36a..1de1a84cbe 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_fma.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_fma.c
@@ -1,5 +1,5 @@
/* FMA version of fma.
- Copyright (C) 2009-2015 Free Software Foundation, Inc.
+ Copyright (C) 2009-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -21,8 +21,6 @@
#include <math.h>
#include <init-arch.h>
-#ifdef HAVE_AVX_SUPPORT
-
extern double __fma_sse2 (double x, double y, double z) attribute_hidden;
@@ -34,25 +32,19 @@ __fma_fma3 (double x, double y, double z)
}
-# ifdef HAVE_FMA4_SUPPORT
static double
__fma_fma4 (double x, double y, double z)
{
asm ("vfmaddsd %3, %2, %1, %0" : "=x" (x) : "x" (x), "x" (y), "x" (z));
return x;
}
-# else
-# undef HAS_FMA4
-# define HAS_FMA4 0
-# define __fma_fma4 ((void *) 0)
-# endif
-libm_ifunc (__fma, HAS_FMA
- ? __fma_fma3 : (HAS_FMA4 ? __fma_fma4 : __fma_sse2));
+libm_ifunc (__fma, HAS_ARCH_FEATURE (FMA_Usable)
+ ? __fma_fma3 : (HAS_ARCH_FEATURE (FMA4_Usable)
+ ? __fma_fma4 : __fma_sse2));
weak_alias (__fma, fma)
-# define __fma __fma_sse2
-#endif
+#define __fma __fma_sse2
#include <sysdeps/ieee754/dbl-64/s_fma.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c
index 6046961f86..8905e4b54f 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c
@@ -1,5 +1,5 @@
/* FMA version of fmaf.
- Copyright (C) 2009-2015 Free Software Foundation, Inc.
+ Copyright (C) 2009-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -20,8 +20,6 @@
#include <math.h>
#include <init-arch.h>
-#ifdef HAVE_AVX_SUPPORT
-
extern float __fmaf_sse2 (float x, float y, float z) attribute_hidden;
@@ -33,25 +31,19 @@ __fmaf_fma3 (float x, float y, float z)
}
-# ifdef HAVE_FMA4_SUPPORT
static float
__fmaf_fma4 (float x, float y, float z)
{
asm ("vfmaddss %3, %2, %1, %0" : "=x" (x) : "x" (x), "x" (y), "x" (z));
return x;
}
-# else
-# undef HAS_FMA4
-# define HAS_FMA4 0
-# define __fmaf_fma4 ((void *) 0)
-# endif
-libm_ifunc (__fmaf, HAS_FMA
- ? __fmaf_fma3 : (HAS_FMA4 ? __fmaf_fma4 : __fmaf_sse2));
+libm_ifunc (__fmaf, HAS_ARCH_FEATURE (FMA_Usable)
+ ? __fmaf_fma3 : (HAS_ARCH_FEATURE (FMA4_Usable)
+ ? __fmaf_fma4 : __fmaf_sse2));
weak_alias (__fmaf, fmaf)
-# define __fmaf __fmaf_sse2
-#endif
+#define __fmaf __fmaf_sse2
#include <sysdeps/ieee754/dbl-64/s_fmaf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.S
index b5d32b5873..5091cf5813 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
@@ -22,10 +22,9 @@
ENTRY(__nearbyint)
.type __nearbyint, @gnu_indirect_function
- call __get_cpu_features@plt
- movq %rax, %rdx
+ LOAD_RTLD_GLOBAL_RO_RDX
leaq __nearbyint_sse41(%rip), %rax
- testl $bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx)
+ HAS_CPU_FEATURE (SSE4_1)
jnz 2f
leaq __nearbyint_c(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.S
index cd7e177a55..4a13700001 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
@@ -22,10 +22,9 @@
ENTRY(__nearbyintf)
.type __nearbyintf, @gnu_indirect_function
- call __get_cpu_features@plt
- movq %rax, %rdx
+ LOAD_RTLD_GLOBAL_RO_RDX
leaq __nearbyintf_sse41(%rip), %rax
- testl $bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx)
+ HAS_CPU_FEATURE (SSE4_1)
jnz 2f
leaq __nearbyintf_c(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint.S b/sysdeps/x86_64/fpu/multiarch/s_rint.S
index f52cef65db..1c0d1e14b7 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_rint.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_rint.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
@@ -22,10 +22,9 @@
ENTRY(__rint)
.type __rint, @gnu_indirect_function
- call __get_cpu_features@plt
- movq %rax, %rdx
+ LOAD_RTLD_GLOBAL_RO_RDX
leaq __rint_sse41(%rip), %rax
- testl $bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx)
+ HAS_CPU_FEATURE (SSE4_1)
jnz 2f
leaq __rint_c(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf.S b/sysdeps/x86_64/fpu/multiarch/s_rintf.S
index e2608d4c4e..8e42fa561f 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_rintf.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_rintf.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
@@ -22,10 +22,9 @@
ENTRY(__rintf)
.type __rintf, @gnu_indirect_function
- call __get_cpu_features@plt
- movq %rax, %rdx
+ LOAD_RTLD_GLOBAL_RO_RDX
leaq __rintf_sse41(%rip), %rax
- testl $bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx)
+ HAS_CPU_FEATURE (SSE4_1)
jnz 2f
leaq __rintf_c(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin.c b/sysdeps/x86_64/fpu/multiarch/s_sin.c
index a0c2521c98..8ffd3e7125 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_sin.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_sin.c
@@ -1,33 +1,26 @@
-#if defined HAVE_FMA4_SUPPORT || defined HAVE_AVX_SUPPORT
-# include <init-arch.h>
-# include <math.h>
-# undef NAN
+#include <init-arch.h>
+#include <math.h>
+#undef NAN
extern double __cos_sse2 (double);
extern double __sin_sse2 (double);
extern double __cos_avx (double);
extern double __sin_avx (double);
-# ifdef HAVE_FMA4_SUPPORT
extern double __cos_fma4 (double);
extern double __sin_fma4 (double);
-# else
-# undef HAS_FMA4
-# define HAS_FMA4 0
-# define __cos_fma4 ((void *) 0)
-# define __sin_fma4 ((void *) 0)
-# endif
-libm_ifunc (__cos, (HAS_FMA4 ? __cos_fma4 :
- HAS_AVX ? __cos_avx : __cos_sse2));
+libm_ifunc (__cos, (HAS_ARCH_FEATURE (FMA4_Usable) ? __cos_fma4 :
+ HAS_ARCH_FEATURE (AVX_Usable)
+ ? __cos_avx : __cos_sse2));
weak_alias (__cos, cos)
-libm_ifunc (__sin, (HAS_FMA4 ? __sin_fma4 :
- HAS_AVX ? __sin_avx : __sin_sse2));
+libm_ifunc (__sin, (HAS_ARCH_FEATURE (FMA4_Usable) ? __sin_fma4 :
+ HAS_ARCH_FEATURE (AVX_Usable)
+ ? __sin_avx : __sin_sse2));
weak_alias (__sin, sin)
-# define __cos __cos_sse2
-# define __sin __sin_sse2
-#endif
+#define __cos __cos_sse2
+#define __sin __sin_sse2
#include <sysdeps/ieee754/dbl-64/s_sin.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan.c b/sysdeps/x86_64/fpu/multiarch/s_tan.c
index 904308fada..25f3bca07e 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_tan.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_tan.c
@@ -1,22 +1,15 @@
-#if defined HAVE_FMA4_SUPPORT || defined HAVE_AVX_SUPPORT
-# include <init-arch.h>
-# include <math.h>
+#include <init-arch.h>
+#include <math.h>
extern double __tan_sse2 (double);
extern double __tan_avx (double);
-# ifdef HAVE_FMA4_SUPPORT
extern double __tan_fma4 (double);
-# else
-# undef HAS_FMA4
-# define HAS_FMA4 0
-# define __tan_fma4 ((void *) 0)
-# endif
-libm_ifunc (tan, (HAS_FMA4 ? __tan_fma4 :
- HAS_AVX ? __tan_avx : __tan_sse2));
+libm_ifunc (tan, (HAS_ARCH_FEATURE (FMA4_Usable) ? __tan_fma4 :
+ HAS_ARCH_FEATURE (AVX_Usable)
+ ? __tan_avx : __tan_sse2));
-# define tan __tan_sse2
-#endif
+#define tan __tan_sse2
#include <sysdeps/ieee754/dbl-64/s_tan.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.S
index 5f67d83bd4..7d720e2fcb 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized cos, vector length is 2.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVbN2v_cos)
.type _ZGVbN2v_cos, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVbN2v_cos_sse4(%rip), %rax
- testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVbN2v_cos_sse4(%rip), %rax
+ HAS_CPU_FEATURE (SSE4_1)
jz 2f
ret
2: leaq _ZGVbN2v_cos_sse2(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S
index 4420edcae0..088fcae067 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S
@@ -1,5 +1,5 @@
/* Function cos vectorized with SSE4.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.S
index 5babb834ad..65a3570d2e 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized cos, vector length is 4.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVdN4v_cos)
.type _ZGVdN4v_cos, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVdN4v_cos_avx2(%rip), %rax
- testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVdN4v_cos_avx2(%rip), %rax
+ HAS_ARCH_FEATURE (AVX2_Usable)
jz 2f
ret
2: leaq _ZGVdN4v_cos_sse_wrapper(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S
index 9a776e7df7..4e653216d9 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S
@@ -1,5 +1,5 @@
/* Function cos vectorized with AVX2.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S
index d0f4f27f46..3e7f16d44e 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized cos, vector length is 8.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,14 +22,12 @@
.text
ENTRY (_ZGVeN8v_cos)
.type _ZGVeN8v_cos, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
+ LOAD_RTLD_GLOBAL_RO_RDX
1: leaq _ZGVeN8v_cos_skx(%rip), %rax
- testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip)
+ HAS_ARCH_FEATURE (AVX512DQ_Usable)
jnz 2f
leaq _ZGVeN8v_cos_knl(%rip), %rax
- testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip)
+ HAS_ARCH_FEATURE (AVX512F_Usable)
jnz 2f
leaq _ZGVeN8v_cos_avx2_wrapper(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S
index b376155210..1cac1d827a 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S
@@ -1,5 +1,5 @@
/* Function cos vectorized with AVX-512, KNL and SKX versions.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.S
index ef3dc49a1c..136c67a550 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized exp.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVbN2v_exp)
.type _ZGVbN2v_exp, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVbN2v_exp_sse4(%rip), %rax
- testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVbN2v_exp_sse4(%rip), %rax
+ HAS_CPU_FEATURE (SSE4_1)
jz 2f
ret
2: leaq _ZGVbN2v_exp_sse2(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S
index 1f5445924a..445b230152 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S
@@ -1,5 +1,5 @@
/* Function exp vectorized with SSE4.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.S
index 7f2ebdef67..9d6a47be0a 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized exp.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVdN4v_exp)
.type _ZGVdN4v_exp, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVdN4v_exp_avx2(%rip), %rax
- testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVdN4v_exp_avx2(%rip), %rax
+ HAS_ARCH_FEATURE (AVX2_Usable)
jz 2f
ret
2: leaq _ZGVdN4v_exp_sse_wrapper(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S
index a34e267433..25f9e28941 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S
@@ -1,5 +1,5 @@
/* Function exp vectorized with AVX2.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S
index 7b7c07d926..317ee36e61 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized exp.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,14 +22,12 @@
.text
ENTRY (_ZGVeN8v_exp)
.type _ZGVeN8v_exp, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVeN8v_exp_skx(%rip), %rax
- testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVeN8v_exp_skx(%rip), %rax
+ HAS_ARCH_FEATURE (AVX512DQ_Usable)
jnz 2f
leaq _ZGVeN8v_exp_knl(%rip), %rax
- testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip)
+ HAS_ARCH_FEATURE (AVX512F_Usable)
jnz 2f
leaq _ZGVeN8v_exp_avx2_wrapper(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S
index 049a7e49cd..74f1d2ce7b 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S
@@ -1,5 +1,5 @@
/* Function exp vectorized with AVX-512. KNL and SKX versions.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S
index 38d369fc3c..03d86a3e63 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized log.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVbN2v_log)
.type _ZGVbN2v_log, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVbN2v_log_sse4(%rip), %rax
- testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVbN2v_log_sse4(%rip), %rax
+ HAS_CPU_FEATURE (SSE4_1)
jz 2f
ret
2: leaq _ZGVbN2v_log_sse2(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S
index 82f3d8215d..5d254288f6 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S
@@ -1,5 +1,5 @@
/* Function log vectorized with SSE4.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S
index ddb6105405..9f6ddbef15 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized log.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVdN4v_log)
.type _ZGVdN4v_log, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVdN4v_log_avx2(%rip), %rax
- testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVdN4v_log_avx2(%rip), %rax
+ HAS_ARCH_FEATURE (AVX2_Usable)
jz 2f
ret
2: leaq _ZGVdN4v_log_sse_wrapper(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S
index 816aede395..5da298747d 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S
@@ -1,5 +1,5 @@
/* Function log vectorized with AVX2.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S
index 76375fdae0..2e1a1da1a5 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized log.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,14 +22,12 @@
.text
ENTRY (_ZGVeN8v_log)
.type _ZGVeN8v_log, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVeN8v_log_skx(%rip), %rax
- testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVeN8v_log_skx(%rip), %rax
+ HAS_ARCH_FEATURE (AVX512DQ_Usable)
jnz 2f
leaq _ZGVeN8v_log_knl(%rip), %rax
- testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip)
+ HAS_ARCH_FEATURE (AVX512F_Usable)
jnz 2f
leaq _ZGVeN8v_log_avx2_wrapper(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S
index b0f3dd580c..dca8e61f34 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S
@@ -1,5 +1,5 @@
/* Function log vectorized with AVX-512. KNL and SKX versions.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.S
index f111388922..4a50246889 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized pow.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVbN2vv_pow)
.type _ZGVbN2vv_pow, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVbN2vv_pow_sse4(%rip), %rax
- testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVbN2vv_pow_sse4(%rip), %rax
+ HAS_CPU_FEATURE (SSE4_1)
jz 2f
ret
2: leaq _ZGVbN2vv_pow_sse2(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S
index 9f6ec29ac5..064d170878 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S
@@ -1,5 +1,5 @@
/* Function pow vectorized with SSE4.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.S
index 21e3070a42..fb9f989adc 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized pow.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVdN4vv_pow)
.type _ZGVdN4vv_pow, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVdN4vv_pow_avx2(%rip), %rax
- testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVdN4vv_pow_avx2(%rip), %rax
+ HAS_ARCH_FEATURE (AVX2_Usable)
jz 2f
ret
2: leaq _ZGVdN4vv_pow_sse_wrapper(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S
index f1f1f35ca2..f2a73ffe1e 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S
@@ -1,5 +1,5 @@
/* Function pow vectorized with AVX2.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S
index c1e5e76f92..30bc53f2f7 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized pow.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,14 +22,12 @@
.text
ENTRY (_ZGVeN8vv_pow)
.type _ZGVeN8vv_pow, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVeN8vv_pow_skx(%rip), %rax
- testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVeN8vv_pow_skx(%rip), %rax
+ HAS_ARCH_FEATURE (AVX512DQ_Usable)
jnz 2f
leaq _ZGVeN8vv_pow_knl(%rip), %rax
- testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip)
+ HAS_ARCH_FEATURE (AVX512F_Usable)
jnz 2f
leaq _ZGVeN8vv_pow_avx2_wrapper(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S
index 8dd89c8ebb..4a515233fc 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S
@@ -1,5 +1,5 @@
/* Function pow vectorized with AVX-512. KNL and SKX versions.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.S
index 29bd0a7b4d..112bec2224 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized sin.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVbN2v_sin)
.type _ZGVbN2v_sin, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVbN2v_sin_sse4(%rip), %rax
- testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVbN2v_sin_sse4(%rip), %rax
+ HAS_CPU_FEATURE (SSE4_1)
jz 2f
ret
2: leaq _ZGVbN2v_sin_sse2(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S
index 3a1ccbf139..5755ce6f74 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S
@@ -1,5 +1,5 @@
/* Function sin vectorized with SSE4.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.S
index c3a453a477..700a1c629d 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized sin, vector length is 4.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVdN4v_sin)
.type _ZGVdN4v_sin, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVdN4v_sin_avx2(%rip), %rax
- testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVdN4v_sin_avx2(%rip), %rax
+ HAS_ARCH_FEATURE (AVX2_Usable)
jz 2f
ret
2: leaq _ZGVdN4v_sin_sse_wrapper(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S
index 6bf8b32b4f..46b557158a 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S
@@ -1,5 +1,5 @@
/* Function sin vectorized with AVX2.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S
index 131f2f47c5..5afce0ed88 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized sin.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,14 +22,12 @@
.text
ENTRY (_ZGVeN8v_sin)
.type _ZGVeN8v_sin, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVeN8v_sin_skx(%rip), %rax
- testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVeN8v_sin_skx(%rip), %rax
+ HAS_ARCH_FEATURE (AVX512DQ_Usable)
jnz 2f
leaq _ZGVeN8v_sin_knl(%rip), %rax
- testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip)
+ HAS_ARCH_FEATURE (AVX512F_Usable)
jnz 2f
leaq _ZGVeN8v_sin_avx2_wrapper(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S
index 422f6e8b0f..6c565f3861 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S
@@ -1,5 +1,5 @@
/* Function sin vectorized with AVX-512, KNL and SKX versions.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.S
index e8e5771808..883d7d33a4 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized sincos.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVbN2vvv_sincos)
.type _ZGVbN2vvv_sincos, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVbN2vvv_sincos_sse4(%rip), %rax
- testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVbN2vvv_sincos_sse4(%rip), %rax
+ HAS_CPU_FEATURE (SSE4_1)
jz 2f
ret
2: leaq _ZGVbN2vvv_sincos_sse2(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S
index b504d1d732..65ad540122 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S
@@ -1,5 +1,5 @@
/* Function sincos vectorized with SSE4.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.S
index 64744ffa62..69a3f74650 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized sincos.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVdN4vvv_sincos)
.type _ZGVdN4vvv_sincos, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVdN4vvv_sincos_avx2(%rip), %rax
- testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVdN4vvv_sincos_avx2(%rip), %rax
+ HAS_ARCH_FEATURE (AVX2_Usable)
jz 2f
ret
2: leaq _ZGVdN4vvv_sincos_sse_wrapper(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S
index dca5604111..60d03e9f8b 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S
@@ -1,5 +1,5 @@
/* Function sincos vectorized with AVX2.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S
index e33109099e..64cb08c5d1 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized sincos.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,14 +22,12 @@
.text
ENTRY (_ZGVeN8vvv_sincos)
.type _ZGVeN8vvv_sincos, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVeN8vvv_sincos_skx(%rip), %rax
- testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVeN8vvv_sincos_skx(%rip), %rax
+ HAS_ARCH_FEATURE (AVX512DQ_Usable)
jnz 2f
leaq _ZGVeN8vvv_sincos_knl(%rip), %rax
- testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip)
+ HAS_ARCH_FEATURE (AVX512F_Usable)
jnz 2f
leaq _ZGVeN8vvv_sincos_avx2_wrapper(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S
index e8388325f7..44700f90b8 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S
@@ -1,5 +1,5 @@
/* Function sincos vectorized with AVX-512. KNL and SKX versions.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S
index 0654d3c19b..755254a280 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized cosf.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,14 +22,12 @@
.text
ENTRY (_ZGVeN16v_cosf)
.type _ZGVeN16v_cosf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVeN16v_cosf_skx(%rip), %rax
- testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVeN16v_cosf_skx(%rip), %rax
+ HAS_ARCH_FEATURE (AVX512DQ_Usable)
jnz 2f
leaq _ZGVeN16v_cosf_knl(%rip), %rax
- testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip)
+ HAS_ARCH_FEATURE (AVX512F_Usable)
jnz 2f
leaq _ZGVeN16v_cosf_avx2_wrapper(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S
index e777476d73..5004cd4758 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S
@@ -1,5 +1,5 @@
/* Function cosf vectorized with AVX-512. KNL and SKX versions.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S
index fa2363bb1f..ad7de18851 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized cosf, vector length is 4.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVbN4v_cosf)
.type _ZGVbN4v_cosf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVbN4v_cosf_sse4(%rip), %rax
- testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVbN4v_cosf_sse4(%rip), %rax
+ HAS_CPU_FEATURE (SSE4_1)
jz 2f
ret
2: leaq _ZGVbN4v_cosf_sse2(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S
index bdb6591905..d23ff72a30 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S
@@ -1,5 +1,5 @@
/* Function cosf vectorized with SSE4.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S
index e14bba4a76..602c70e324 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized cosf, vector length is 8.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVdN8v_cosf)
.type _ZGVdN8v_cosf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVdN8v_cosf_avx2(%rip), %rax
- testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVdN8v_cosf_avx2(%rip), %rax
+ HAS_ARCH_FEATURE (AVX2_Usable)
jz 2f
ret
2: leaq _ZGVdN8v_cosf_sse_wrapper(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S
index 1efc943295..513f3c0a29 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S
@@ -1,5 +1,5 @@
/* Function cosf vectorized with AVX2.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S
index 62858eb39e..f990d36483 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized expf.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,14 +22,12 @@
.text
ENTRY (_ZGVeN16v_expf)
.type _ZGVeN16v_expf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVeN16v_expf_skx(%rip), %rax
- testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVeN16v_expf_skx(%rip), %rax
+ HAS_ARCH_FEATURE (AVX512DQ_Usable)
jnz 2f
leaq _ZGVeN16v_expf_knl(%rip), %rax
- testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip)
+ HAS_ARCH_FEATURE (AVX512F_Usable)
jnz 2f
leaq _ZGVeN16v_expf_avx2_wrapper(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S
index ec69055351..7eb7a1b775 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S
@@ -1,5 +1,5 @@
/* Function expf vectorized with AVX-512. KNL and SKX versions.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.S
index 37d38bc6f8..2fbe6d475e 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized expf.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVbN4v_expf)
.type _ZGVbN4v_expf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVbN4v_expf_sse4(%rip), %rax
- testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVbN4v_expf_sse4(%rip), %rax
+ HAS_CPU_FEATURE (SSE4_1)
jz 2f
ret
2: leaq _ZGVbN4v_expf_sse2(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S
index fcc1859c3a..c6f91e8dc1 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S
@@ -1,5 +1,5 @@
/* Function expf vectorized with SSE4.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.S
index e3dc1b1038..7d19bb423d 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized expf.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVdN8v_expf)
.type _ZGVdN8v_expf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVdN8v_expf_avx2(%rip), %rax
- testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVdN8v_expf_avx2(%rip), %rax
+ HAS_ARCH_FEATURE (AVX2_Usable)
jz 2f
ret
2: leaq _ZGVdN8v_expf_sse_wrapper(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core_avx2.S
index c876ecc03e..c6be6954f7 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core_avx2.S
@@ -1,5 +1,5 @@
/* Function expf vectorized with AVX2.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S
index 68c57e4386..9efb2fb7df 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized logf.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,14 +22,12 @@
.text
ENTRY (_ZGVeN16v_logf)
.type _ZGVeN16v_logf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVeN16v_logf_skx(%rip), %rax
- testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVeN16v_logf_skx(%rip), %rax
+ HAS_ARCH_FEATURE (AVX512DQ_Usable)
jnz 2f
leaq _ZGVeN16v_logf_knl(%rip), %rax
- testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip)
+ HAS_ARCH_FEATURE (AVX512F_Usable)
jnz 2f
leaq _ZGVeN16v_logf_avx2_wrapper(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S
index 86fcab6e63..6209058381 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S
@@ -1,5 +1,5 @@
/* Function logf vectorized with AVX-512. KNL and SKX versions.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.S
index 153ed8ebc2..c85615ac25 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized logf.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVbN4v_logf)
.type _ZGVbN4v_logf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVbN4v_logf_sse4(%rip), %rax
- testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVbN4v_logf_sse4(%rip), %rax
+ HAS_CPU_FEATURE (SSE4_1)
jz 2f
ret
2: leaq _ZGVbN4v_logf_sse2(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core_sse4.S
index 68f11033d9..1ce9838513 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core_sse4.S
@@ -1,5 +1,5 @@
/* Function logf vectorized with SSE4.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.S
index 6f50bf6bdb..8f6d83dd56 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized logf.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVdN8v_logf)
.type _ZGVdN8v_logf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVdN8v_logf_avx2(%rip), %rax
- testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVdN8v_logf_avx2(%rip), %rax
+ HAS_ARCH_FEATURE (AVX2_Usable)
jz 2f
ret
2: leaq _ZGVdN8v_logf_sse_wrapper(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core_avx2.S
index 1f08b4218a..91fb549ce6 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core_avx2.S
@@ -1,5 +1,5 @@
/* Function logf vectorized with AVX2.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S
index 3aa9f952ce..80048ce977 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized powf.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,14 +22,12 @@
.text
ENTRY (_ZGVeN16vv_powf)
.type _ZGVeN16vv_powf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVeN16vv_powf_skx(%rip), %rax
- testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVeN16vv_powf_skx(%rip), %rax
+ HAS_ARCH_FEATURE (AVX512DQ_Usable)
jnz 2f
leaq _ZGVeN16vv_powf_knl(%rip), %rax
- testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip)
+ HAS_ARCH_FEATURE (AVX512F_Usable)
jnz 2f
leaq _ZGVeN16vv_powf_avx2_wrapper(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S
index 4b61974cb6..45d48723af 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S
@@ -1,5 +1,5 @@
/* Function powf vectorized with AVX-512. KNL and SKX versions.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.S
index f88b9ca6d4..b46821189b 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized powf.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVbN4vv_powf)
.type _ZGVbN4vv_powf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVbN4vv_powf_sse4(%rip), %rax
- testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVbN4vv_powf_sse4(%rip), %rax
+ HAS_CPU_FEATURE (SSE4_1)
jz 2f
ret
2: leaq _ZGVbN4vv_powf_sse2(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core_sse4.S
index 6068f51c46..420f98c6a6 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core_sse4.S
@@ -1,5 +1,5 @@
/* Function powf vectorized with SSE4.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.S
index 4552e573a9..945908a2ff 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized powf.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVdN8vv_powf)
.type _ZGVdN8vv_powf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVdN8vv_powf_avx2(%rip), %rax
- testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVdN8vv_powf_avx2(%rip), %rax
+ HAS_ARCH_FEATURE (AVX2_Usable)
jz 2f
ret
2: leaq _ZGVdN8vv_powf_sse_wrapper(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core_avx2.S
index cfb86c7851..4446859130 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core_avx2.S
@@ -1,5 +1,5 @@
/* Function powf vectorized with AVX2.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S
index bdcabab6e2..16cee0c676 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized sincosf.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,14 +22,12 @@
.text
ENTRY (_ZGVeN16vvv_sincosf)
.type _ZGVeN16vvv_sincosf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVeN16vvv_sincosf_skx(%rip), %rax
- testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVeN16vvv_sincosf_skx(%rip), %rax
+ HAS_ARCH_FEATURE (AVX512DQ_Usable)
jnz 2f
leaq _ZGVeN16vvv_sincosf_knl(%rip), %rax
- testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip)
+ HAS_ARCH_FEATURE (AVX512F_Usable)
jnz 2f
leaq _ZGVeN16vvv_sincosf_avx2_wrapper(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S
index efff91bb0d..758aeeaeed 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S
@@ -1,5 +1,5 @@
/* Function sincosf vectorized with AVX-512. KNL and SKX versions.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.S
index 610046b587..d72b4049e2 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized sincosf.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVbN4vvv_sincosf)
.type _ZGVbN4vvv_sincosf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVbN4vvv_sincosf_sse4(%rip), %rax
- testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVbN4vvv_sincosf_sse4(%rip), %rax
+ HAS_CPU_FEATURE (SSE4_1)
jz 2f
ret
2: leaq _ZGVbN4vvv_sincosf_sse2(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S
index 4d846b5d7e..643fc0ca3b 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S
@@ -1,5 +1,5 @@
/* Function sincosf vectorized with SSE4.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.S
index 9e5be67fc9..0123b8024e 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized sincosf.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVdN8vvv_sincosf)
.type _ZGVdN8vvv_sincosf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVdN8vvv_sincosf_avx2(%rip), %rax
- testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVdN8vvv_sincosf_avx2(%rip), %rax
+ HAS_ARCH_FEATURE (AVX2_Usable)
jz 2f
ret
2: leaq _ZGVdN8vvv_sincosf_sse_wrapper(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S
index 0108fd5126..f2a0ba7116 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S
@@ -1,5 +1,5 @@
/* Function sincosf vectorized with AVX2.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S
index 3ec78a0b5e..2212cdd94d 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized sinf.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,14 +22,12 @@
.text
ENTRY (_ZGVeN16v_sinf)
.type _ZGVeN16v_sinf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVeN16v_sinf_skx(%rip), %rax
- testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVeN16v_sinf_skx(%rip), %rax
+ HAS_ARCH_FEATURE (AVX512DQ_Usable)
jnz 2f
leaq _ZGVeN16v_sinf_knl(%rip), %rax
- testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip)
+ HAS_ARCH_FEATURE (AVX512F_Usable)
jnz 2f
leaq _ZGVeN16v_sinf_avx2_wrapper(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S
index f13ed96af8..61d8d3793a 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S
@@ -1,5 +1,5 @@
/* Function sinf vectorized with AVX-512. KNL and SKX versions.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.S
index cf1e4df406..b31554730d 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized sinf.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVbN4v_sinf)
.type _ZGVbN4v_sinf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq _ZGVbN4v_sinf_sse4(%rip), %rax
- testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq _ZGVbN4v_sinf_sse4(%rip), %rax
+ HAS_CPU_FEATURE (SSE4_1)
jz 2f
ret
2: leaq _ZGVbN4v_sinf_sse2(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core_sse4.S
index b8b852bcae..5268ab1f09 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core_sse4.S
@@ -1,5 +1,5 @@
/* Function sinf vectorized with SSE4.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.S
index b28bf3cabc..47fe0a4adc 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.S
@@ -1,5 +1,5 @@
/* Multiple versions of vectorized sinf, vector length is 8.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,11 +22,9 @@
.text
ENTRY (_ZGVdN8v_sinf)
.type _ZGVdN8v_sinf, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
+ LOAD_RTLD_GLOBAL_RO_RDX
1: leaq _ZGVdN8v_sinf_avx2(%rip), %rax
- testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip)
+ HAS_ARCH_FEATURE (AVX2_Usable)
jz 2f
ret
2: leaq _ZGVdN8v_sinf_sse_wrapper(%rip), %rax
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core_avx2.S
index a130d25fce..9fdaadb2e8 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core_avx2.S
@@ -1,5 +1,5 @@
/* Function sinf vectorized with AVX2.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/printf_fphex.c b/sysdeps/x86_64/fpu/printf_fphex.c
index 7b900caa88..0fbaa3748e 100644
--- a/sysdeps/x86_64/fpu/printf_fphex.c
+++ b/sysdeps/x86_64/fpu/printf_fphex.c
@@ -1,5 +1,5 @@
/* Print floating point number in hexadecimal notation according to ISO C99.
- Copyright (C) 1997-2015 Free Software Foundation, Inc.
+ Copyright (C) 1997-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/s_copysign.S b/sysdeps/x86_64/fpu/s_copysign.S
index 0576343595..18f568f46f 100644
--- a/sysdeps/x86_64/fpu/s_copysign.S
+++ b/sysdeps/x86_64/fpu/s_copysign.S
@@ -1,5 +1,5 @@
/* copy sign, double version.
- Copyright (C) 2002-2015 Free Software Foundation, Inc.
+ Copyright (C) 2002-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 2002.
diff --git a/sysdeps/x86_64/fpu/s_copysignf.S b/sysdeps/x86_64/fpu/s_copysignf.S
index 4961afca46..00a1fabaee 100644
--- a/sysdeps/x86_64/fpu/s_copysignf.S
+++ b/sysdeps/x86_64/fpu/s_copysignf.S
@@ -1,5 +1,5 @@
/* copy sign, double version.
- Copyright (C) 2002-2015 Free Software Foundation, Inc.
+ Copyright (C) 2002-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 2002.
diff --git a/sysdeps/x86_64/fpu/s_cosf.S b/sysdeps/x86_64/fpu/s_cosf.S
index b7868ceb20..31968e498f 100644
--- a/sysdeps/x86_64/fpu/s_cosf.S
+++ b/sysdeps/x86_64/fpu/s_cosf.S
@@ -1,5 +1,5 @@
/* Optimized cosf function.
- Copyright (C) 2012-2015 Free Software Foundation, Inc.
+ Copyright (C) 2012-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -310,8 +310,14 @@ L(arg_inf_or_nan):
/* Here if |x| is Inf or NAN */
jne L(skip_errno_setting) /* in case of x is NaN */
+ /* Align stack to 16 bytes. */
+ subq $8, %rsp
+ cfi_adjust_cfa_offset (8)
/* Here if x is Inf. Set errno to EDOM. */
call JUMPTARGET(__errno_location)
+ addq $8, %rsp
+ cfi_adjust_cfa_offset (-8)
+
movl $EDOM, (%rax)
.p2align 4
diff --git a/sysdeps/x86_64/fpu/s_fabs.c b/sysdeps/x86_64/fpu/s_fabs.c
index 5e4f1b390f..d3a313fdf5 100644
--- a/sysdeps/x86_64/fpu/s_fabs.c
+++ b/sysdeps/x86_64/fpu/s_fabs.c
@@ -1,5 +1,5 @@
/* Absolute value of floating point number.
- Copyright (C) 2002-2015 Free Software Foundation, Inc.
+ Copyright (C) 2002-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/s_fabsf.c b/sysdeps/x86_64/fpu/s_fabsf.c
index a80c2589fa..e6dcda9433 100644
--- a/sysdeps/x86_64/fpu/s_fabsf.c
+++ b/sysdeps/x86_64/fpu/s_fabsf.c
@@ -1,5 +1,5 @@
/* Absolute value of floating point number.
- Copyright (C) 2002-2015 Free Software Foundation, Inc.
+ Copyright (C) 2002-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/s_fabsl.S b/sysdeps/x86_64/fpu/s_fabsl.S
index 8d4694b978..6881ff11c7 100644
--- a/sysdeps/x86_64/fpu/s_fabsl.S
+++ b/sysdeps/x86_64/fpu/s_fabsl.S
@@ -1,5 +1,5 @@
/* Absolute value of floating point number.
- Copyright (C) 2002-2015 Free Software Foundation, Inc.
+ Copyright (C) 2002-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/s_fdiml.S b/sysdeps/x86_64/fpu/s_fdiml.S
index ae7490a2a9..f9f1e20259 100644
--- a/sysdeps/x86_64/fpu/s_fdiml.S
+++ b/sysdeps/x86_64/fpu/s_fdiml.S
@@ -1,5 +1,5 @@
/* Compute positive difference.
- Copyright (C) 1997-2015 Free Software Foundation, Inc.
+ Copyright (C) 1997-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
diff --git a/sysdeps/x86_64/fpu/s_fmax.S b/sysdeps/x86_64/fpu/s_fmax.S
index 9857ab0b30..02096c0aea 100644
--- a/sysdeps/x86_64/fpu/s_fmax.S
+++ b/sysdeps/x86_64/fpu/s_fmax.S
@@ -1,5 +1,5 @@
/* Compute maximum of two numbers, regarding NaN as missing argument.
- Copyright (C) 2002-2015 Free Software Foundation, Inc.
+ Copyright (C) 2002-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 2002.
diff --git a/sysdeps/x86_64/fpu/s_fmaxf.S b/sysdeps/x86_64/fpu/s_fmaxf.S
index 0aa9d20cd2..28e129701e 100644
--- a/sysdeps/x86_64/fpu/s_fmaxf.S
+++ b/sysdeps/x86_64/fpu/s_fmaxf.S
@@ -1,5 +1,5 @@
/* Compute maximum of two numbers, regarding NaN as missing argument.
- Copyright (C) 2002-2015 Free Software Foundation, Inc.
+ Copyright (C) 2002-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 2002.
diff --git a/sysdeps/x86_64/fpu/s_fmaxl.S b/sysdeps/x86_64/fpu/s_fmaxl.S
index 11827134c0..f0c2bc0d56 100644
--- a/sysdeps/x86_64/fpu/s_fmaxl.S
+++ b/sysdeps/x86_64/fpu/s_fmaxl.S
@@ -1,5 +1,5 @@
/* Compute maximum of two numbers, regarding NaN as missing argument.
- Copyright (C) 1997-2015 Free Software Foundation, Inc.
+ Copyright (C) 1997-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
diff --git a/sysdeps/x86_64/fpu/s_fmin.S b/sysdeps/x86_64/fpu/s_fmin.S
index 9bd00a70b2..fb14e2f3ed 100644
--- a/sysdeps/x86_64/fpu/s_fmin.S
+++ b/sysdeps/x86_64/fpu/s_fmin.S
@@ -1,5 +1,5 @@
/* Compute minimum of two numbers, regarding NaN as missing argument.
- Copyright (C) 2002-2015 Free Software Foundation, Inc.
+ Copyright (C) 2002-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 2002.
diff --git a/sysdeps/x86_64/fpu/s_fminf.S b/sysdeps/x86_64/fpu/s_fminf.S
index 996c34b1a0..c8d6d0fd33 100644
--- a/sysdeps/x86_64/fpu/s_fminf.S
+++ b/sysdeps/x86_64/fpu/s_fminf.S
@@ -1,5 +1,5 @@
/* Compute minimum of two numbers, regarding NaN as missing argument.
- Copyright (C) 2002-2015 Free Software Foundation, Inc.
+ Copyright (C) 2002-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 2002.
diff --git a/sysdeps/x86_64/fpu/s_fminl.S b/sysdeps/x86_64/fpu/s_fminl.S
index be9571b4f1..f1a06d29d7 100644
--- a/sysdeps/x86_64/fpu/s_fminl.S
+++ b/sysdeps/x86_64/fpu/s_fminl.S
@@ -1,5 +1,5 @@
/* Compute minimum of two numbers, regarding NaN as missing argument.
- Copyright (C) 1997-2015 Free Software Foundation, Inc.
+ Copyright (C) 1997-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
diff --git a/sysdeps/x86_64/fpu/s_llrint.S b/sysdeps/x86_64/fpu/s_llrint.S
index e822c06070..6634c653ea 100644
--- a/sysdeps/x86_64/fpu/s_llrint.S
+++ b/sysdeps/x86_64/fpu/s_llrint.S
@@ -1,6 +1,6 @@
/* Round argument to nearest integral value according to current rounding
direction.
- Copyright (C) 2002-2015 Free Software Foundation, Inc.
+ Copyright (C) 2002-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.d>, 2002.
@@ -26,5 +26,7 @@ ENTRY(__llrint)
ret
END(__llrint)
weak_alias (__llrint, llrint)
+#ifndef __ILP32__
strong_alias (__llrint, __lrint)
weak_alias (__llrint, lrint)
+#endif
diff --git a/sysdeps/x86_64/fpu/s_llrintf.S b/sysdeps/x86_64/fpu/s_llrintf.S
index 6825511a57..5ac03dffd9 100644
--- a/sysdeps/x86_64/fpu/s_llrintf.S
+++ b/sysdeps/x86_64/fpu/s_llrintf.S
@@ -1,6 +1,6 @@
/* Round argument to nearest integral value according to current rounding
direction.
- Copyright (C) 2002-2015 Free Software Foundation, Inc.
+ Copyright (C) 2002-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.d>, 2002.
@@ -26,5 +26,7 @@ ENTRY(__llrintf)
ret
END(__llrintf)
weak_alias (__llrintf, llrintf)
+#ifndef __ILP32__
strong_alias (__llrintf, __lrintf)
weak_alias (__llrintf, lrintf)
+#endif
diff --git a/sysdeps/x86_64/fpu/s_llrintl.S b/sysdeps/x86_64/fpu/s_llrintl.S
index abe3a5bc0b..5f4d827dff 100644
--- a/sysdeps/x86_64/fpu/s_llrintl.S
+++ b/sysdeps/x86_64/fpu/s_llrintl.S
@@ -1,6 +1,6 @@
/* Round argument to nearest integral value according to current rounding
direction.
- Copyright (C) 1997-2015 Free Software Foundation, Inc.
+ Copyright (C) 1997-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -28,6 +28,7 @@ ENTRY(__llrintl)
ret
END(__llrintl)
weak_alias (__llrintl, llrintl)
+#ifndef __ILP32__
strong_alias (__llrintl, __lrintl)
weak_alias (__llrintl, lrintl)
-
+#endif
diff --git a/sysdeps/x86_64/fpu/s_nearbyintl.S b/sysdeps/x86_64/fpu/s_nearbyintl.S
index dab2750a23..76d41bdd52 100644
--- a/sysdeps/x86_64/fpu/s_nearbyintl.S
+++ b/sysdeps/x86_64/fpu/s_nearbyintl.S
@@ -8,14 +8,16 @@
ENTRY(__nearbyintl)
fldt 8(%rsp)
- fnstcw -4(%rsp)
- movl -4(%rsp), %eax
+ fnstenv -28(%rsp)
+ movl -28(%rsp), %eax
orl $0x20, %eax
- movl %eax, -8(%rsp)
- fldcw -8(%rsp)
+ movl %eax, -32(%rsp)
+ fldcw -32(%rsp)
frndint
- fclex
- fldcw -4(%rsp)
+ fnstsw
+ andl $0x1, %eax
+ orl %eax, -24(%rsp)
+ fldenv -28(%rsp)
ret
END (__nearbyintl)
weak_alias (__nearbyintl, nearbyintl)
diff --git a/sysdeps/x86_64/fpu/s_scalbnl.S b/sysdeps/x86_64/fpu/s_scalbnl.S
index d0e9301eed..6c7683c32b 100644
--- a/sysdeps/x86_64/fpu/s_scalbnl.S
+++ b/sysdeps/x86_64/fpu/s_scalbnl.S
@@ -15,4 +15,3 @@ ENTRY(__scalbnl)
fstp %st(1)
ret
END (__scalbnl)
-weak_alias (__scalbnl, scalbnl)
diff --git a/sysdeps/x86_64/fpu/s_signbit.S b/sysdeps/x86_64/fpu/s_signbit.S
index a327c45330..92a79d3123 100644
--- a/sysdeps/x86_64/fpu/s_signbit.S
+++ b/sysdeps/x86_64/fpu/s_signbit.S
@@ -1,5 +1,5 @@
/* Return nonzero value if number is negative.
- Copyright (C) 2009-2015 Free Software Foundation, Inc.
+ Copyright (C) 2009-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redha.com>, 2009.
diff --git a/sysdeps/x86_64/fpu/s_signbitf.S b/sysdeps/x86_64/fpu/s_signbitf.S
index 90994705c7..885645372e 100644
--- a/sysdeps/x86_64/fpu/s_signbitf.S
+++ b/sysdeps/x86_64/fpu/s_signbitf.S
@@ -1,5 +1,5 @@
/* Return nonzero value if number is negative.
- Copyright (C) 2009-2015 Free Software Foundation, Inc.
+ Copyright (C) 2009-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redha.com>, 2009.
diff --git a/sysdeps/x86_64/fpu/s_sincosf.S b/sysdeps/x86_64/fpu/s_sincosf.S
index 21db70a88b..5e7cbe57e3 100644
--- a/sysdeps/x86_64/fpu/s_sincosf.S
+++ b/sysdeps/x86_64/fpu/s_sincosf.S
@@ -1,5 +1,5 @@
/* Optimized sincosf function.
- Copyright (C) 2012-2015 Free Software Foundation, Inc.
+ Copyright (C) 2012-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -354,8 +354,14 @@ L(arg_inf_or_nan):
/* Here if |x| is Inf or NAN */
jne L(skip_errno_setting) /* in case of x is NaN */
+ /* Align stack to 16 bytes. */
+ subq $8, %rsp
+ cfi_adjust_cfa_offset (8)
/* Here if x is Inf. Set errno to EDOM. */
call JUMPTARGET(__errno_location)
+ addq $8, %rsp
+ cfi_adjust_cfa_offset (-8)
+
movl $EDOM, (%rax)
.p2align 4
diff --git a/sysdeps/x86_64/fpu/s_sinf.S b/sysdeps/x86_64/fpu/s_sinf.S
index dc921641de..c980c6e207 100644
--- a/sysdeps/x86_64/fpu/s_sinf.S
+++ b/sysdeps/x86_64/fpu/s_sinf.S
@@ -1,5 +1,5 @@
/* Optimized sinf function.
- Copyright (C) 2012-2015 Free Software Foundation, Inc.
+ Copyright (C) 2012-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -336,8 +336,14 @@ L(arg_inf_or_nan):
/* Here if |x| is Inf or NAN */
jne L(skip_errno_setting) /* in case of x is NaN */
+ /* Align stack to 16 bytes. */
+ subq $8, %rsp
+ cfi_adjust_cfa_offset (8)
/* Here if x is Inf. Set errno to EDOM. */
call JUMPTARGET(__errno_location)
+ addq $8, %rsp
+ cfi_adjust_cfa_offset (-8)
+
movl $EDOM, (%rax)
.p2align 4
diff --git a/sysdeps/x86_64/fpu/s_truncl.S b/sysdeps/x86_64/fpu/s_truncl.S
index 6ba4a27cad..c37cf00241 100644
--- a/sysdeps/x86_64/fpu/s_truncl.S
+++ b/sysdeps/x86_64/fpu/s_truncl.S
@@ -1,5 +1,5 @@
/* Truncate long double value.
- Copyright (C) 1997-2015 Free Software Foundation, Inc.
+ Copyright (C) 1997-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
diff --git a/sysdeps/x86_64/fpu/svml_d_cos2_core.S b/sysdeps/x86_64/fpu/svml_d_cos2_core.S
index a26beca4a1..7f62d29917 100644
--- a/sysdeps/x86_64/fpu/svml_d_cos2_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_cos2_core.S
@@ -1,5 +1,5 @@
/* Function cos vectorized with SSE2.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_cos4_core.S b/sysdeps/x86_64/fpu/svml_d_cos4_core.S
index 35996b7318..b92ff13b86 100644
--- a/sysdeps/x86_64/fpu/svml_d_cos4_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_cos4_core.S
@@ -1,5 +1,5 @@
/* Function cos vectorized with AVX2, wrapper version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_cos4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_cos4_core_avx.S
index bf10b01cc5..a3da721e35 100644
--- a/sysdeps/x86_64/fpu/svml_d_cos4_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_d_cos4_core_avx.S
@@ -1,5 +1,5 @@
/* Function cos vectorized in AVX ISA as wrapper to SSE4 ISA version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_cos8_core.S b/sysdeps/x86_64/fpu/svml_d_cos8_core.S
index 1ba10e8c9b..e5d986d11a 100644
--- a/sysdeps/x86_64/fpu/svml_d_cos8_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_cos8_core.S
@@ -1,5 +1,5 @@
/* Function cos vectorized with AVX-512, wrapper to AVX2.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_exp2_core.S b/sysdeps/x86_64/fpu/svml_d_exp2_core.S
index ca3dd76364..9e511037a1 100644
--- a/sysdeps/x86_64/fpu/svml_d_exp2_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_exp2_core.S
@@ -1,5 +1,5 @@
/* Function exp vectorized with SSE2.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_exp4_core.S b/sysdeps/x86_64/fpu/svml_d_exp4_core.S
index d497811980..8cac8adbc7 100644
--- a/sysdeps/x86_64/fpu/svml_d_exp4_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_exp4_core.S
@@ -1,5 +1,5 @@
/* Function exp vectorized with AVX2, wrapper version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_exp4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_exp4_core_avx.S
index 5dd2f6cd17..1a0fbf574a 100644
--- a/sysdeps/x86_64/fpu/svml_d_exp4_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_d_exp4_core_avx.S
@@ -1,5 +1,5 @@
/* Function exp vectorized in AVX ISA as wrapper to SSE4 ISA version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_exp8_core.S b/sysdeps/x86_64/fpu/svml_d_exp8_core.S
index 3e273a3e71..2486e888a4 100644
--- a/sysdeps/x86_64/fpu/svml_d_exp8_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_exp8_core.S
@@ -1,5 +1,5 @@
/* Function exp vectorized with AVX-512. Wrapper to AVX2 version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_exp_data.S b/sysdeps/x86_64/fpu/svml_d_exp_data.S
index 66fa3b88d7..6d1acbdd21 100644
--- a/sysdeps/x86_64/fpu/svml_d_exp_data.S
+++ b/sysdeps/x86_64/fpu/svml_d_exp_data.S
@@ -1,5 +1,5 @@
/* Data for vector function exp.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_exp_data.h b/sysdeps/x86_64/fpu/svml_d_exp_data.h
index 71ebdb799e..f993403d47 100644
--- a/sysdeps/x86_64/fpu/svml_d_exp_data.h
+++ b/sysdeps/x86_64/fpu/svml_d_exp_data.h
@@ -1,5 +1,5 @@
/* Offsets for data table for function exp.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_log2_core.S b/sysdeps/x86_64/fpu/svml_d_log2_core.S
index daa63b583f..8ea40fee56 100644
--- a/sysdeps/x86_64/fpu/svml_d_log2_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_log2_core.S
@@ -1,5 +1,5 @@
/* Function log vectorized with SSE2.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_log4_core.S b/sysdeps/x86_64/fpu/svml_d_log4_core.S
index 009c93c837..72813d8921 100644
--- a/sysdeps/x86_64/fpu/svml_d_log4_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_log4_core.S
@@ -1,5 +1,5 @@
/* Function log vectorized with AVX2, wrapper version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_log4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_log4_core_avx.S
index 554fc45712..6ca1139931 100644
--- a/sysdeps/x86_64/fpu/svml_d_log4_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_d_log4_core_avx.S
@@ -1,5 +1,5 @@
/* Function log vectorized in AVX ISA as wrapper to SSE4 ISA version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_log8_core.S b/sysdeps/x86_64/fpu/svml_d_log8_core.S
index 9728305f17..6850fd9a44 100644
--- a/sysdeps/x86_64/fpu/svml_d_log8_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_log8_core.S
@@ -1,5 +1,5 @@
/* Function log vectorized with AVX-512. Wrapper to AVX2 version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_log_data.S b/sysdeps/x86_64/fpu/svml_d_log_data.S
index 1ce78e2c8b..9ab541b23f 100644
--- a/sysdeps/x86_64/fpu/svml_d_log_data.S
+++ b/sysdeps/x86_64/fpu/svml_d_log_data.S
@@ -1,5 +1,5 @@
/* Data for function log.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_log_data.h b/sysdeps/x86_64/fpu/svml_d_log_data.h
index 8ca55a8010..30c2b54a4b 100644
--- a/sysdeps/x86_64/fpu/svml_d_log_data.h
+++ b/sysdeps/x86_64/fpu/svml_d_log_data.h
@@ -1,5 +1,5 @@
/* Offsets for data table for function log.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_pow2_core.S b/sysdeps/x86_64/fpu/svml_d_pow2_core.S
index 0b726a1eaf..b25515c825 100644
--- a/sysdeps/x86_64/fpu/svml_d_pow2_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_pow2_core.S
@@ -1,5 +1,5 @@
/* Function pow vectorized with SSE2.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_pow4_core.S b/sysdeps/x86_64/fpu/svml_d_pow4_core.S
index 9eb47ab8c9..547993799e 100644
--- a/sysdeps/x86_64/fpu/svml_d_pow4_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_pow4_core.S
@@ -1,5 +1,5 @@
/* Function pow vectorized with AVX2, wrapper version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_pow4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_pow4_core_avx.S
index 6c7b59995d..4e4e9867b4 100644
--- a/sysdeps/x86_64/fpu/svml_d_pow4_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_d_pow4_core_avx.S
@@ -1,5 +1,5 @@
/* Function pow vectorized in AVX ISA as wrapper to SSE4 ISA version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_pow8_core.S b/sysdeps/x86_64/fpu/svml_d_pow8_core.S
index cd99457843..372e5a9c83 100644
--- a/sysdeps/x86_64/fpu/svml_d_pow8_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_pow8_core.S
@@ -1,5 +1,5 @@
/* Function pow vectorized with AVX-512. Wrapper to AVX2 version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_pow_data.S b/sysdeps/x86_64/fpu/svml_d_pow_data.S
index 1e0733e0a6..8481f95455 100644
--- a/sysdeps/x86_64/fpu/svml_d_pow_data.S
+++ b/sysdeps/x86_64/fpu/svml_d_pow_data.S
@@ -1,5 +1,5 @@
/* Data for function pow.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_pow_data.h b/sysdeps/x86_64/fpu/svml_d_pow_data.h
index a1b9f9bc46..239ba96984 100644
--- a/sysdeps/x86_64/fpu/svml_d_pow_data.h
+++ b/sysdeps/x86_64/fpu/svml_d_pow_data.h
@@ -1,5 +1,5 @@
/* Offsets for data table for function pow.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_sin2_core.S b/sysdeps/x86_64/fpu/svml_d_sin2_core.S
index c619dab966..f6ec13104b 100644
--- a/sysdeps/x86_64/fpu/svml_d_sin2_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_sin2_core.S
@@ -1,5 +1,5 @@
/* Function sin vectorized with SSE2.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_sin4_core.S b/sysdeps/x86_64/fpu/svml_d_sin4_core.S
index f650d461a5..95a1dec6f6 100644
--- a/sysdeps/x86_64/fpu/svml_d_sin4_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_sin4_core.S
@@ -1,5 +1,5 @@
/* Function sin vectorized with AVX2, wrapper version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_sin4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_sin4_core_avx.S
index a21ffafa32..29d1526a12 100644
--- a/sysdeps/x86_64/fpu/svml_d_sin4_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_d_sin4_core_avx.S
@@ -1,5 +1,5 @@
/* Function sin vectorized in AVX ISA as wrapper to SSE4 ISA version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_sin8_core.S b/sysdeps/x86_64/fpu/svml_d_sin8_core.S
index 2e78b5e35a..abd86b3d98 100644
--- a/sysdeps/x86_64/fpu/svml_d_sin8_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_sin8_core.S
@@ -1,5 +1,5 @@
/* Function sin vectorized with AVX-512, wrapper to AVX2 version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_sincos2_core.S b/sysdeps/x86_64/fpu/svml_d_sincos2_core.S
index bd089e1ed0..74afa0a677 100644
--- a/sysdeps/x86_64/fpu/svml_d_sincos2_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_sincos2_core.S
@@ -1,5 +1,5 @@
/* Function sincos vectorized with SSE2.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_sincos4_core.S b/sysdeps/x86_64/fpu/svml_d_sincos4_core.S
index d67cd30132..2c0b011fb3 100644
--- a/sysdeps/x86_64/fpu/svml_d_sincos4_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_sincos4_core.S
@@ -1,5 +1,5 @@
/* Function sincos vectorized with AVX2, wrapper version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S
index 4f3f15aea6..e4320a97c7 100644
--- a/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S
@@ -1,5 +1,5 @@
/* Function sincos vectorized in AVX ISA as wrapper to SSE4 ISA version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_sincos8_core.S b/sysdeps/x86_64/fpu/svml_d_sincos8_core.S
index e7f7121fa0..68d490e5bc 100644
--- a/sysdeps/x86_64/fpu/svml_d_sincos8_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_sincos8_core.S
@@ -1,5 +1,5 @@
/* Function sincos vectorized with AVX-512. Wrapper to AVX2 version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_trig_data.S b/sysdeps/x86_64/fpu/svml_d_trig_data.S
index d3b30598cc..887dacee91 100644
--- a/sysdeps/x86_64/fpu/svml_d_trig_data.S
+++ b/sysdeps/x86_64/fpu/svml_d_trig_data.S
@@ -1,5 +1,5 @@
/* Data for vectorized sin, cos, sincos.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_trig_data.h b/sysdeps/x86_64/fpu/svml_d_trig_data.h
index 1395337c7e..4617b5e0c3 100644
--- a/sysdeps/x86_64/fpu/svml_d_trig_data.h
+++ b/sysdeps/x86_64/fpu/svml_d_trig_data.h
@@ -1,5 +1,5 @@
/* Offsets for data table for vectorized sin, cos, sincos.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_wrapper_impl.h b/sysdeps/x86_64/fpu/svml_d_wrapper_impl.h
index 5c0ff897c0..54f4f58371 100644
--- a/sysdeps/x86_64/fpu/svml_d_wrapper_impl.h
+++ b/sysdeps/x86_64/fpu/svml_d_wrapper_impl.h
@@ -1,5 +1,5 @@
/* Wrapper implementations of vector math functions.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_finite_alias.S b/sysdeps/x86_64/fpu/svml_finite_alias.S
new file mode 100644
index 0000000000..2dcfc37590
--- /dev/null
+++ b/sysdeps/x86_64/fpu/svml_finite_alias.S
@@ -0,0 +1,58 @@
+/* These aliases added as workaround to exclude unnecessary symbol
+ aliases in libmvec.so while compiler creates the vector names
+ based on scalar asm name. Corresponding discussion is at
+ <https://gcc.gnu.org/ml/gcc/2015-06/msg00173.html>.
+ Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+#define ALIAS_IMPL(alias, target) \
+ENTRY (alias); \
+ jmp *target@GOTPCREL(%rip); \
+END (alias)
+
+ .text
+ALIAS_IMPL (_ZGVbN2v___log_finite, _ZGVbN2v_log)
+ALIAS_IMPL (_ZGVcN4v___log_finite, _ZGVcN4v_log)
+ALIAS_IMPL (_ZGVdN4v___log_finite, _ZGVdN4v_log)
+ALIAS_IMPL (_ZGVeN8v___log_finite, _ZGVeN8v_log)
+
+ALIAS_IMPL (_ZGVbN4v___logf_finite, _ZGVbN4v_logf)
+ALIAS_IMPL (_ZGVcN8v___logf_finite, _ZGVcN8v_logf)
+ALIAS_IMPL (_ZGVdN8v___logf_finite, _ZGVdN8v_logf)
+ALIAS_IMPL (_ZGVeN16v___logf_finite, _ZGVeN16v_logf)
+
+ALIAS_IMPL (_ZGVbN2v___exp_finite, _ZGVbN2v_exp)
+ALIAS_IMPL (_ZGVcN4v___exp_finite, _ZGVcN4v_exp)
+ALIAS_IMPL (_ZGVdN4v___exp_finite, _ZGVdN4v_exp)
+ALIAS_IMPL (_ZGVeN8v___exp_finite, _ZGVeN8v_exp)
+
+ALIAS_IMPL (_ZGVbN4v___expf_finite, _ZGVbN4v_expf)
+ALIAS_IMPL (_ZGVcN8v___expf_finite, _ZGVcN8v_expf)
+ALIAS_IMPL (_ZGVdN8v___expf_finite, _ZGVdN8v_expf)
+ALIAS_IMPL (_ZGVeN16v___expf_finite, _ZGVeN16v_expf)
+
+ALIAS_IMPL (_ZGVbN2vv___pow_finite, _ZGVbN2vv_pow)
+ALIAS_IMPL (_ZGVcN4vv___pow_finite, _ZGVcN4vv_pow)
+ALIAS_IMPL (_ZGVdN4vv___pow_finite, _ZGVdN4vv_pow)
+ALIAS_IMPL (_ZGVeN8vv___pow_finite, _ZGVeN8vv_pow)
+
+ALIAS_IMPL (_ZGVbN4vv___powf_finite, _ZGVbN4vv_powf)
+ALIAS_IMPL (_ZGVcN8vv___powf_finite, _ZGVcN8vv_powf)
+ALIAS_IMPL (_ZGVdN8vv___powf_finite, _ZGVdN8vv_powf)
+ALIAS_IMPL (_ZGVeN16vv___powf_finite, _ZGVeN16vv_powf)
diff --git a/sysdeps/x86_64/fpu/svml_s_cosf16_core.S b/sysdeps/x86_64/fpu/svml_s_cosf16_core.S
index e623df5dc3..9ca4fbfaa8 100644
--- a/sysdeps/x86_64/fpu/svml_s_cosf16_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_cosf16_core.S
@@ -1,5 +1,5 @@
/* Function cosf vectorized with AVX-512. Wrapper to AVX2 version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_cosf4_core.S b/sysdeps/x86_64/fpu/svml_s_cosf4_core.S
index 9875cd7f71..363090c54a 100644
--- a/sysdeps/x86_64/fpu/svml_s_cosf4_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_cosf4_core.S
@@ -1,5 +1,5 @@
/* Function cosf vectorized with SSE2, wrapper version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_cosf8_core.S b/sysdeps/x86_64/fpu/svml_s_cosf8_core.S
index 376ee358ae..26a6a4e4d6 100644
--- a/sysdeps/x86_64/fpu/svml_s_cosf8_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_cosf8_core.S
@@ -1,5 +1,5 @@
/* Function cosf vectorized with AVX2, wrapper version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_cosf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_cosf8_core_avx.S
index a443fd28ad..6c210d98ce 100644
--- a/sysdeps/x86_64/fpu/svml_s_cosf8_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_s_cosf8_core_avx.S
@@ -1,5 +1,5 @@
/* Function cosf vectorized in AVX ISA as wrapper to SSE4 ISA version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_expf16_core.S b/sysdeps/x86_64/fpu/svml_s_expf16_core.S
index d9d355c372..d8eecac674 100644
--- a/sysdeps/x86_64/fpu/svml_s_expf16_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_expf16_core.S
@@ -1,5 +1,5 @@
/* Function expf vectorized with AVX-512. Wrapper to AVX2 version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_expf4_core.S b/sysdeps/x86_64/fpu/svml_s_expf4_core.S
index 71c5da4657..65b5d1a3ce 100644
--- a/sysdeps/x86_64/fpu/svml_s_expf4_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_expf4_core.S
@@ -1,5 +1,5 @@
/* Function expf vectorized with SSE2.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_expf8_core.S b/sysdeps/x86_64/fpu/svml_s_expf8_core.S
index d254a992a4..e3cf975bf6 100644
--- a/sysdeps/x86_64/fpu/svml_s_expf8_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_expf8_core.S
@@ -1,5 +1,5 @@
/* Function expf vectorized with AVX2, wrapper version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_expf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_expf8_core_avx.S
index ece40ba972..90469d7dcf 100644
--- a/sysdeps/x86_64/fpu/svml_s_expf8_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_s_expf8_core_avx.S
@@ -1,5 +1,5 @@
/* Function expf vectorized in AVX ISA as wrapper to SSE4 ISA version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_expf_data.S b/sysdeps/x86_64/fpu/svml_s_expf_data.S
index eee9d69e31..4b644082b6 100644
--- a/sysdeps/x86_64/fpu/svml_s_expf_data.S
+++ b/sysdeps/x86_64/fpu/svml_s_expf_data.S
@@ -1,5 +1,5 @@
/* Data for function expf.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_expf_data.h b/sysdeps/x86_64/fpu/svml_s_expf_data.h
index beaa290540..3610633c96 100644
--- a/sysdeps/x86_64/fpu/svml_s_expf_data.h
+++ b/sysdeps/x86_64/fpu/svml_s_expf_data.h
@@ -1,5 +1,5 @@
/* Offsets for data table for vector function expf.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_logf16_core.S b/sysdeps/x86_64/fpu/svml_s_logf16_core.S
index 47ae7855a3..cc2e97df78 100644
--- a/sysdeps/x86_64/fpu/svml_s_logf16_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_logf16_core.S
@@ -1,5 +1,5 @@
/* Function logf vectorized with AVX-512. Wrapper to AVX2 version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_logf4_core.S b/sysdeps/x86_64/fpu/svml_s_logf4_core.S
index 09be406d3c..195f328d92 100644
--- a/sysdeps/x86_64/fpu/svml_s_logf4_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_logf4_core.S
@@ -1,5 +1,5 @@
/* Function logf vectorized with SSE2.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_logf8_core.S b/sysdeps/x86_64/fpu/svml_s_logf8_core.S
index cf4e9be537..8bb6926667 100644
--- a/sysdeps/x86_64/fpu/svml_s_logf8_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_logf8_core.S
@@ -1,5 +1,5 @@
/* Function logf vectorized with AVX2, wrapper version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_logf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_logf8_core_avx.S
index 7ab572bb30..c2efba23f2 100644
--- a/sysdeps/x86_64/fpu/svml_s_logf8_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_s_logf8_core_avx.S
@@ -1,5 +1,5 @@
/* Function logf vectorized in AVX ISA as wrapper to SSE4 ISA version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_logf_data.S b/sysdeps/x86_64/fpu/svml_s_logf_data.S
index 1e7f7015d3..a5675f5c7a 100644
--- a/sysdeps/x86_64/fpu/svml_s_logf_data.S
+++ b/sysdeps/x86_64/fpu/svml_s_logf_data.S
@@ -1,5 +1,5 @@
/* Data for vector function logf.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_logf_data.h b/sysdeps/x86_64/fpu/svml_s_logf_data.h
index d42411a849..619d5c4bd1 100644
--- a/sysdeps/x86_64/fpu/svml_s_logf_data.h
+++ b/sysdeps/x86_64/fpu/svml_s_logf_data.h
@@ -1,5 +1,5 @@
/* Offsets for data table for vectorized function logf.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_powf16_core.S b/sysdeps/x86_64/fpu/svml_s_powf16_core.S
index efd84c2fff..cb52af0c6b 100644
--- a/sysdeps/x86_64/fpu/svml_s_powf16_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_powf16_core.S
@@ -1,5 +1,5 @@
/* Function powf vectorized with AVX-512. Wrapper to AVX2 version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_powf4_core.S b/sysdeps/x86_64/fpu/svml_s_powf4_core.S
index 81f0f530de..88fae60892 100644
--- a/sysdeps/x86_64/fpu/svml_s_powf4_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_powf4_core.S
@@ -1,5 +1,5 @@
/* Function powf vectorized with SSE2.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_powf8_core.S b/sysdeps/x86_64/fpu/svml_s_powf8_core.S
index 8fed6c7c86..8ea44897c1 100644
--- a/sysdeps/x86_64/fpu/svml_s_powf8_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_powf8_core.S
@@ -1,5 +1,5 @@
/* Function powf vectorized with AVX2, wrapper version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_powf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_powf8_core_avx.S
index eeeb66d46e..b5e4e5e6ef 100644
--- a/sysdeps/x86_64/fpu/svml_s_powf8_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_s_powf8_core_avx.S
@@ -1,5 +1,5 @@
/* Function powf vectorized in AVX ISA as wrapper to SSE4 ISA version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_powf_data.S b/sysdeps/x86_64/fpu/svml_s_powf_data.S
index 4a4799ae4f..fc1a3d9390 100644
--- a/sysdeps/x86_64/fpu/svml_s_powf_data.S
+++ b/sysdeps/x86_64/fpu/svml_s_powf_data.S
@@ -1,5 +1,5 @@
/* Data for function powf.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_powf_data.h b/sysdeps/x86_64/fpu/svml_s_powf_data.h
index d847368e4b..514004238a 100644
--- a/sysdeps/x86_64/fpu/svml_s_powf_data.h
+++ b/sysdeps/x86_64/fpu/svml_s_powf_data.h
@@ -1,5 +1,5 @@
/* Offsets for data table for function powf.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S b/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S
index 992f9a91cc..5cbf10b8da 100644
--- a/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S
@@ -1,5 +1,5 @@
/* Function sincosf vectorized with AVX-512. Wrapper to AVX2 version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S b/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S
index d402ffba15..1a7d2733af 100644
--- a/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S
@@ -1,5 +1,5 @@
/* Function sincosf vectorized with SSE2.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_sincosf8_core.S b/sysdeps/x86_64/fpu/svml_s_sincosf8_core.S
index eec7de87d5..74d1dfd1a8 100644
--- a/sysdeps/x86_64/fpu/svml_s_sincosf8_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_sincosf8_core.S
@@ -1,5 +1,5 @@
/* Function sincosf vectorized with AVX2, wrapper version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S
index c247444dfc..55b8b2d768 100644
--- a/sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S
@@ -1,5 +1,5 @@
/* Function sincosf vectorized in AVX ISA as wrapper to SSE4 ISA version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_sinf16_core.S b/sysdeps/x86_64/fpu/svml_s_sinf16_core.S
index add6e0fd43..d7a31e1ea6 100644
--- a/sysdeps/x86_64/fpu/svml_s_sinf16_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_sinf16_core.S
@@ -1,5 +1,5 @@
/* Function sinf vectorized with AVX-512. Wrapper to AVX2 version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_sinf4_core.S b/sysdeps/x86_64/fpu/svml_s_sinf4_core.S
index 2349c7b788..6f10137134 100644
--- a/sysdeps/x86_64/fpu/svml_s_sinf4_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_sinf4_core.S
@@ -1,5 +1,5 @@
/* Function sinf vectorized with SSE2.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_sinf8_core.S b/sysdeps/x86_64/fpu/svml_s_sinf8_core.S
index fe31e3793e..c459658688 100644
--- a/sysdeps/x86_64/fpu/svml_s_sinf8_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_sinf8_core.S
@@ -1,5 +1,5 @@
/* Function sinf vectorized with AVX2, wrapper version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_sinf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_sinf8_core_avx.S
index f54be48ee3..5e95aa2e02 100644
--- a/sysdeps/x86_64/fpu/svml_s_sinf8_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_s_sinf8_core_avx.S
@@ -1,5 +1,5 @@
/* Function sinf vectorized in AVX ISA as wrapper to SSE4 ISA version.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_trig_data.S b/sysdeps/x86_64/fpu/svml_s_trig_data.S
index 07fc7d272d..b61aa6abb9 100644
--- a/sysdeps/x86_64/fpu/svml_s_trig_data.S
+++ b/sysdeps/x86_64/fpu/svml_s_trig_data.S
@@ -1,5 +1,5 @@
/* Data for function cosf.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_trig_data.h b/sysdeps/x86_64/fpu/svml_s_trig_data.h
index 5a91dad41c..2e469a918a 100644
--- a/sysdeps/x86_64/fpu/svml_s_trig_data.h
+++ b/sysdeps/x86_64/fpu/svml_s_trig_data.h
@@ -1,5 +1,5 @@
/* Offsets for data table for vectorized sinf, cosf, sincosf.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h b/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h
index d255d195ee..b1a03be3d9 100644
--- a/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h
+++ b/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h
@@ -1,5 +1,5 @@
/* Wrapper implementations of vector math functions.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c
index 4e764f2475..a9d15979aa 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c
@@ -1,5 +1,5 @@
/* Wrapper part of tests for SSE ISA versions of vector math functions.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/test-double-vlen2.c b/sysdeps/x86_64/fpu/test-double-vlen2.c
index 2b6896425e..c7a3dff747 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen2.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen2.c
@@ -1,5 +1,5 @@
/* Tests for SSE ISA versions of vector math functions.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c
index bc2fd16c5a..eb6a531502 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c
@@ -1,5 +1,5 @@
/* Wrapper part of tests for AVX2 ISA versions of vector math functions.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-avx2.c b/sysdeps/x86_64/fpu/test-double-vlen4-avx2.c
index 56723ab4d7..0cadef03d6 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen4-avx2.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen4-avx2.c
@@ -1,5 +1,5 @@
/* Tests for AVX2 ISA versions of vector math functions.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c
index a711c9e1c3..52b81da3ee 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c
@@ -1,5 +1,5 @@
/* Wrapper part of tests for AVX ISA versions of vector math functions.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/test-double-vlen4.c b/sysdeps/x86_64/fpu/test-double-vlen4.c
index f0813437b4..9ae97f1388 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen4.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen4.c
@@ -1,5 +1,5 @@
/* Tests for AVX ISA versions of vector math functions.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c
index 942c42b83b..c10bb9cb4a 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c
@@ -1,5 +1,5 @@
/* Wrapper part of tests for AVX-512 versions of vector math functions.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/test-double-vlen8.c b/sysdeps/x86_64/fpu/test-double-vlen8.c
index 1e23b83418..4fb6c8d196 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen8.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen8.c
@@ -1,5 +1,5 @@
/* Tests for AVX-512 versions of vector math functions.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c
index bc98e78ff0..dc09e4a338 100644
--- a/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c
@@ -1,5 +1,5 @@
/* Wrapper part of tests for AVX-512 ISA versions of vector math functions.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/test-float-vlen16.c b/sysdeps/x86_64/fpu/test-float-vlen16.c
index d7f683f09c..882bfc840d 100644
--- a/sysdeps/x86_64/fpu/test-float-vlen16.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen16.c
@@ -1,5 +1,5 @@
/* Tests for AVX-512 ISA versions of vector math functions.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c
index 39254efed4..0bb9818146 100644
--- a/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c
@@ -1,5 +1,5 @@
/* Wrapper part of tests for SSE ISA versions of vector math functions.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/test-float-vlen4.c b/sysdeps/x86_64/fpu/test-float-vlen4.c
index e56d64260e..f6a4cf5c1e 100644
--- a/sysdeps/x86_64/fpu/test-float-vlen4.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen4.c
@@ -1,5 +1,5 @@
/* Tests for SSE ISA versions of vector math functions.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c
index 6bd0d50779..4985ac2379 100644
--- a/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c
@@ -1,5 +1,5 @@
/* Wrapper part of tests for AVX2 ISA versions of vector math functions.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-avx2.c b/sysdeps/x86_64/fpu/test-float-vlen8-avx2.c
index 0012082b8e..7a416385b6 100644
--- a/sysdeps/x86_64/fpu/test-float-vlen8-avx2.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen8-avx2.c
@@ -1,5 +1,5 @@
/* Tests for AVX2 ISA versions of vector math functions.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c
index 2fec906de0..9cc2883399 100644
--- a/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c
@@ -1,5 +1,5 @@
/* Wrapper part of tests for AVX ISA versions of vector math functions.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/test-float-vlen8.c b/sysdeps/x86_64/fpu/test-float-vlen8.c
index 891e58ff88..c92a50ae7e 100644
--- a/sysdeps/x86_64/fpu/test-float-vlen8.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen8.c
@@ -1,5 +1,5 @@
/* Tests for AVX ISA versions of vector math functions.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/x86_64-math-asm.h b/sysdeps/x86_64/fpu/x86_64-math-asm.h
new file mode 100644
index 0000000000..db3f9f78b0
--- /dev/null
+++ b/sysdeps/x86_64/fpu/x86_64-math-asm.h
@@ -0,0 +1,74 @@
+/* Helper macros for x86_64 libm functions.
+ Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _X86_64_MATH_ASM_H
+#define _X86_64_MATH_ASM_H 1
+
+/* Define constants for the minimum value of a floating-point
+ type. */
+#define DEFINE_LDBL_MIN \
+ .section .rodata.cst16,"aM",@progbits,16; \
+ .p2align 4; \
+ .type ldbl_min,@object; \
+ldbl_min: \
+ .byte 0, 0, 0, 0, 0, 0, 0, 0x80, 0x1, 0; \
+ .byte 0, 0, 0, 0, 0, 0; \
+ .size ldbl_min, .-ldbl_min;
+
+/* Force an underflow exception if the given value (nonnegative or
+ NaN) is subnormal. The relevant constant for the minimum of the
+ type must have been defined, the MO macro must have been defined
+ for access to memory operands, and, if PIC, the PIC register must
+ have been loaded. */
+#define LDBL_CHECK_FORCE_UFLOW_NONNEG_NAN \
+ fldt MO(ldbl_min); \
+ fld %st(1); \
+ fucomip %st(1), %st(0); \
+ fstp %st(0); \
+ jnc 6464f; \
+ fld %st(0); \
+ fmul %st(0); \
+ fstp %st(0); \
+6464:
+
+/* Likewise, but the argument is not a NaN. */
+#define LDBL_CHECK_FORCE_UFLOW_NONNAN \
+ fldt MO(ldbl_min); \
+ fld %st(1); \
+ fabs; \
+ fcomip %st(1), %st(0); \
+ fstp %st(0); \
+ jnc 6464f; \
+ fld %st(0); \
+ fmul %st(0); \
+ fstp %st(0); \
+6464:
+
+/* Likewise, but the argument is nonnegative and not a NaN. */
+#define LDBL_CHECK_FORCE_UFLOW_NONNEG \
+ fldt MO(ldbl_min); \
+ fld %st(1); \
+ fcomip %st(1), %st(0); \
+ fstp %st(0); \
+ jnc 6464f; \
+ fld %st(0); \
+ fmul %st(0); \
+ fstp %st(0); \
+6464:
+
+#endif /* x86_64-math-asm.h. */
diff --git a/sysdeps/x86_64/hp-timing.h b/sysdeps/x86_64/hp-timing.h
index 493f9735bd..65381b314d 100644
--- a/sysdeps/x86_64/hp-timing.h
+++ b/sysdeps/x86_64/hp-timing.h
@@ -1,5 +1,5 @@
/* High precision, low overhead timing functions. x86-64 version.
- Copyright (C) 2002-2015 Free Software Foundation, Inc.
+ Copyright (C) 2002-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/htonl.S b/sysdeps/x86_64/htonl.S
index 85a690f5b1..c92fae8791 100644
--- a/sysdeps/x86_64/htonl.S
+++ b/sysdeps/x86_64/htonl.S
@@ -1,5 +1,5 @@
/* Change byte order in word. For AMD x86-64.
- Copyright (C) 1997-2015 Free Software Foundation, Inc.
+ Copyright (C) 1997-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/ifuncmain8.c b/sysdeps/x86_64/ifuncmain8.c
new file mode 100644
index 0000000000..448ab96bfa
--- /dev/null
+++ b/sysdeps/x86_64/ifuncmain8.c
@@ -0,0 +1,32 @@
+/* Test IFUNC selector with floating-point parameters.
+ Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <stdlib.h>
+
+extern float foo (float);
+
+static int
+do_test (void)
+{
+ if (foo (2) != 3)
+ abort ();
+ return 0;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
diff --git a/sysdeps/x86_64/ifuncmod8.c b/sysdeps/x86_64/ifuncmod8.c
new file mode 100644
index 0000000000..c00436799c
--- /dev/null
+++ b/sysdeps/x86_64/ifuncmod8.c
@@ -0,0 +1,36 @@
+/* Test IFUNC selector with floating-point parameters.
+ Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <emmintrin.h>
+
+void * foo_ifunc (void) __asm__ ("foo");
+__asm__(".type foo, %gnu_indirect_function");
+
+static float
+foo_impl (float x)
+{
+ return x + 1;
+}
+
+void *
+foo_ifunc (void)
+{
+ __m128i xmm = _mm_set1_epi32 (-1);
+ asm volatile ("movdqa %0, %%xmm0" : : "x" (xmm) : "xmm0" );
+ return foo_impl;
+}
diff --git a/sysdeps/x86_64/jmpbuf-offsets.h b/sysdeps/x86_64/jmpbuf-offsets.h
index 03176a91f0..da71e555f7 100644
--- a/sysdeps/x86_64/jmpbuf-offsets.h
+++ b/sysdeps/x86_64/jmpbuf-offsets.h
@@ -1,5 +1,5 @@
/* Private macros for accessing __jmp_buf contents. x86-64 version.
- Copyright (C) 2006-2015 Free Software Foundation, Inc.
+ Copyright (C) 2006-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/jmpbuf-unwind.h b/sysdeps/x86_64/jmpbuf-unwind.h
index 3d9b2b589f..aa0642b54a 100644
--- a/sysdeps/x86_64/jmpbuf-unwind.h
+++ b/sysdeps/x86_64/jmpbuf-unwind.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2003-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Jakub Jelinek <jakub@redhat.com>, 2003.
diff --git a/sysdeps/x86_64/ldsodefs.h b/sysdeps/x86_64/ldsodefs.h
index 84d36e82be..6a96c53721 100644
--- a/sysdeps/x86_64/ldsodefs.h
+++ b/sysdeps/x86_64/ldsodefs.h
@@ -1,5 +1,5 @@
/* Run-time dynamic linker data structures for loaded ELF shared objects.
- Copyright (C) 1995-2015 Free Software Foundation, Inc.
+ Copyright (C) 1995-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -20,6 +20,7 @@
#define _X86_64_LDSODEFS_H 1
#include <elf.h>
+#include <cpu-features.h>
struct La_x86_64_regs;
struct La_x86_64_retval;
diff --git a/sysdeps/x86_64/localplt.data b/sysdeps/x86_64/localplt.data
index d140476dfe..f168b143ff 100644
--- a/sysdeps/x86_64/localplt.data
+++ b/sysdeps/x86_64/localplt.data
@@ -3,17 +3,17 @@
# users can define their own functions and have library internals call them.
# Linker in binutils 2.26 and newer consolidates R_X86_64_JUMP_SLOT
# relocation with R_X86_64_GLOB_DAT relocation against the same symbol.
-libc.so: calloc
+libc.so: calloc + RELA R_X86_64_GLOB_DAT
libc.so: free + RELA R_X86_64_GLOB_DAT
libc.so: malloc + RELA R_X86_64_GLOB_DAT
-libc.so: memalign
-libc.so: realloc
+libc.so: memalign + RELA R_X86_64_GLOB_DAT
+libc.so: realloc + RELA R_X86_64_GLOB_DAT
libm.so: matherr
# The dynamic loader uses __libc_memalign internally to allocate aligned
# TLS storage. The other malloc family of functions are expected to allow
# user symbol interposition.
-ld.so: __libc_memalign
-ld.so: malloc
-ld.so: calloc
-ld.so: realloc
+ld.so: __libc_memalign + RELA R_X86_64_GLOB_DAT
+ld.so: malloc + RELA R_X86_64_GLOB_DAT
+ld.so: calloc + RELA R_X86_64_GLOB_DAT
+ld.so: realloc + RELA R_X86_64_GLOB_DAT
ld.so: free + RELA R_X86_64_GLOB_DAT
diff --git a/sysdeps/x86_64/lshift.S b/sysdeps/x86_64/lshift.S
index 03fb631207..49cbfbaf3d 100644
--- a/sysdeps/x86_64/lshift.S
+++ b/sysdeps/x86_64/lshift.S
@@ -1,5 +1,5 @@
/* x86-64 __mpn_lshift --
- Copyright (C) 2007-2015 Free Software Foundation, Inc.
+ Copyright (C) 2007-2016 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
diff --git a/sysdeps/x86_64/machine-gmon.h b/sysdeps/x86_64/machine-gmon.h
index 51cf7793b4..3d9ce5c44e 100644
--- a/sysdeps/x86_64/machine-gmon.h
+++ b/sysdeps/x86_64/machine-gmon.h
@@ -1,5 +1,5 @@
/* x86-64-specific implementation of profiling support.
- Copyright (C) 2002-2015 Free Software Foundation, Inc.
+ Copyright (C) 2002-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 2002.
diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S
index fae85caae1..132eacba8f 100644
--- a/sysdeps/x86_64/memchr.S
+++ b/sysdeps/x86_64/memchr.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/memcmp.S b/sysdeps/x86_64/memcmp.S
index f636716b64..3fb018a772 100644
--- a/sysdeps/x86_64/memcmp.S
+++ b/sysdeps/x86_64/memcmp.S
@@ -1,5 +1,5 @@
/* memcmp with SSE2
- Copyright (C) 2009-2015 Free Software Foundation, Inc.
+ Copyright (C) 2009-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/memcpy.S b/sysdeps/x86_64/memcpy.S
index eea8c2a5af..f6e3d9396c 100644
--- a/sysdeps/x86_64/memcpy.S
+++ b/sysdeps/x86_64/memcpy.S
@@ -1,7 +1,7 @@
/*
Optimized memcpy for x86-64.
- Copyright (C) 2007-2015 Free Software Foundation, Inc.
+ Copyright (C) 2007-2016 Free Software Foundation, Inc.
Contributed by Evandro Menezes <evandro.menezes@amd.com>, 2007.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/memcpy_chk.S b/sysdeps/x86_64/memcpy_chk.S
index f1a5ac4b23..2296b55119 100644
--- a/sysdeps/x86_64/memcpy_chk.S
+++ b/sysdeps/x86_64/memcpy_chk.S
@@ -1,5 +1,5 @@
/* Checking memcpy for x86-64.
- Copyright (C) 2004-2015 Free Software Foundation, Inc.
+ Copyright (C) 2004-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/memmove.c b/sysdeps/x86_64/memmove.c
index e0694a859f..07f81852d6 100644
--- a/sysdeps/x86_64/memmove.c
+++ b/sysdeps/x86_64/memmove.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/mempcpy_chk.S b/sysdeps/x86_64/mempcpy_chk.S
index 968e7edf3f..390abc68dd 100644
--- a/sysdeps/x86_64/mempcpy_chk.S
+++ b/sysdeps/x86_64/mempcpy_chk.S
@@ -1,5 +1,5 @@
/* Checking mempcpy for x86-64.
- Copyright (C) 2004-2015 Free Software Foundation, Inc.
+ Copyright (C) 2004-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/memrchr.S b/sysdeps/x86_64/memrchr.S
index 707b8390db..840de30cd7 100644
--- a/sysdeps/x86_64/memrchr.S
+++ b/sysdeps/x86_64/memrchr.S
@@ -1,6 +1,6 @@
/* fast SSE2 memrchr with 64 byte loop and pmaxub instruction using
- Copyright (C) 2011-2015 Free Software Foundation, Inc.
+ Copyright (C) 2011-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S
index e4962546c4..4cf0da0fb8 100644
--- a/sysdeps/x86_64/memset.S
+++ b/sysdeps/x86_64/memset.S
@@ -1,6 +1,6 @@
/* memset/bzero -- set memory area to CH/0
Optimized version for x86-64.
- Copyright (C) 2002-2015 Free Software Foundation, Inc.
+ Copyright (C) 2002-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -24,7 +24,7 @@
ENTRY(__bzero)
movq %rdi, %rax /* Set return value. */
movq %rsi, %rdx /* Set n. */
- pxor %xmm8, %xmm8
+ pxor %xmm0, %xmm0
jmp L(entry_from_bzero)
END(__bzero)
weak_alias (__bzero, bzero)
@@ -33,10 +33,10 @@ weak_alias (__bzero, bzero)
ENTRY(__memset_tail)
movq %rcx, %rax /* Set return value. */
- movd %esi, %xmm8
- punpcklbw %xmm8, %xmm8
- punpcklwd %xmm8, %xmm8
- pshufd $0, %xmm8, %xmm8
+ movd %esi, %xmm0
+ punpcklbw %xmm0, %xmm0
+ punpcklwd %xmm0, %xmm0
+ pshufd $0, %xmm0, %xmm0
jmp L(entry_from_bzero)
END(__memset_tail)
@@ -50,57 +50,57 @@ END_CHK (__memset_chk)
#endif
ENTRY (memset)
- movd %esi, %xmm8
+ movd %esi, %xmm0
movq %rdi, %rax
- punpcklbw %xmm8, %xmm8
- punpcklwd %xmm8, %xmm8
- pshufd $0, %xmm8, %xmm8
+ punpcklbw %xmm0, %xmm0
+ punpcklwd %xmm0, %xmm0
+ pshufd $0, %xmm0, %xmm0
L(entry_from_bzero):
cmpq $64, %rdx
ja L(loop_start)
cmpq $16, %rdx
jbe L(less_16_bytes)
cmpq $32, %rdx
- movdqu %xmm8, (%rdi)
- movdqu %xmm8, -16(%rdi,%rdx)
+ movdqu %xmm0, (%rdi)
+ movdqu %xmm0, -16(%rdi,%rdx)
ja L(between_32_64_bytes)
L(return):
rep
ret
.p2align 4
L(between_32_64_bytes):
- movdqu %xmm8, 16(%rdi)
- movdqu %xmm8, -32(%rdi,%rdx)
+ movdqu %xmm0, 16(%rdi)
+ movdqu %xmm0, -32(%rdi,%rdx)
ret
.p2align 4
L(loop_start):
leaq 64(%rdi), %rcx
- movdqu %xmm8, (%rdi)
+ movdqu %xmm0, (%rdi)
andq $-64, %rcx
- movdqu %xmm8, -16(%rdi,%rdx)
- movdqu %xmm8, 16(%rdi)
- movdqu %xmm8, -32(%rdi,%rdx)
- movdqu %xmm8, 32(%rdi)
- movdqu %xmm8, -48(%rdi,%rdx)
- movdqu %xmm8, 48(%rdi)
- movdqu %xmm8, -64(%rdi,%rdx)
+ movdqu %xmm0, -16(%rdi,%rdx)
+ movdqu %xmm0, 16(%rdi)
+ movdqu %xmm0, -32(%rdi,%rdx)
+ movdqu %xmm0, 32(%rdi)
+ movdqu %xmm0, -48(%rdi,%rdx)
+ movdqu %xmm0, 48(%rdi)
+ movdqu %xmm0, -64(%rdi,%rdx)
addq %rdi, %rdx
andq $-64, %rdx
cmpq %rdx, %rcx
je L(return)
.p2align 4
L(loop):
- movdqa %xmm8, (%rcx)
- movdqa %xmm8, 16(%rcx)
- movdqa %xmm8, 32(%rcx)
- movdqa %xmm8, 48(%rcx)
+ movdqa %xmm0, (%rcx)
+ movdqa %xmm0, 16(%rcx)
+ movdqa %xmm0, 32(%rcx)
+ movdqa %xmm0, 48(%rcx)
addq $64, %rcx
cmpq %rcx, %rdx
jne L(loop)
rep
ret
L(less_16_bytes):
- movq %xmm8, %rcx
+ movq %xmm0, %rcx
testb $24, %dl
jne L(between8_16bytes)
testb $4, %dl
diff --git a/sysdeps/x86_64/memset_chk.S b/sysdeps/x86_64/memset_chk.S
index 70204267ca..95bb5d0e94 100644
--- a/sysdeps/x86_64/memset_chk.S
+++ b/sysdeps/x86_64/memset_chk.S
@@ -1,5 +1,5 @@
/* Checking memset for x86-64.
- Copyright (C) 2004-2015 Free Software Foundation, Inc.
+ Copyright (C) 2004-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/memusage.h b/sysdeps/x86_64/memusage.h
index e915c1a672..fc102c4252 100644
--- a/sysdeps/x86_64/memusage.h
+++ b/sysdeps/x86_64/memusage.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2001-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2001-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/mul_1.S b/sysdeps/x86_64/mul_1.S
index 2fb8fad2bd..88b8f920a1 100644
--- a/sysdeps/x86_64/mul_1.S
+++ b/sysdeps/x86_64/mul_1.S
@@ -1,6 +1,6 @@
/* AMD64 __mpn_mul_1 -- Multiply a limb vector with a limb and store
the result in a second limb vector.
- Copyright (C) 2003-2015 Free Software Foundation, Inc.
+ Copyright (C) 2003-2016 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index d7002a9df3..d234f4ab66 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -1,5 +1,4 @@
ifeq ($(subdir),csu)
-aux += init-arch
tests += test-multiarch
gen-as-const-headers += ifunc-defines.sym
endif
@@ -8,31 +7,26 @@ ifeq ($(subdir),string)
sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
strcmp-sse2-unaligned strncmp-ssse3 \
- memcmp-sse4 memcpy-ssse3 \
- memcpy-sse2-unaligned mempcpy-ssse3 \
- memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \
- memmove-avx-unaligned memcpy-avx-unaligned mempcpy-avx-unaligned \
- memmove-ssse3-back strcasecmp_l-ssse3 \
+ memcmp-sse4 memcpy-ssse3 memcpy-sse2-unaligned \
+ memcpy-avx512-no-vzeroupper mempcpy-ssse3 memmove-ssse3 \
+ memcpy-ssse3-back mempcpy-ssse3-back memmove-avx-unaligned \
+ memcpy-avx-unaligned mempcpy-avx-unaligned \
+ mempcpy-avx512-no-vzeroupper memmove-ssse3-back \
+ memmove-avx512-no-vzeroupper strcasecmp_l-ssse3 \
strncase_l-ssse3 strcat-ssse3 strncat-ssse3\
strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \
strcpy-sse2-unaligned strncpy-sse2-unaligned \
stpcpy-sse2-unaligned stpncpy-sse2-unaligned \
strcat-sse2-unaligned strncat-sse2-unaligned \
- strchr-sse2-no-bsf memcmp-ssse3 strstr-sse2-unaligned
-
-ifeq (yes,$(config-cflags-sse4))
-sysdep_routines += strcspn-c strpbrk-c strspn-c varshift
+ strchr-sse2-no-bsf memcmp-ssse3 strstr-sse2-unaligned \
+ strcspn-c strpbrk-c strspn-c varshift memset-avx2 \
+ memset-avx512-no-vzeroupper
CFLAGS-varshift.c += -msse4
CFLAGS-strcspn-c.c += -msse4
CFLAGS-strpbrk-c.c += -msse4
CFLAGS-strspn-c.c += -msse4
endif
-ifeq (yes,$(config-cflags-avx2))
-sysdep_routines += memset-avx2
-endif
-endif
-
ifeq ($(subdir),wcsmbs)
sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c wcscpy-ssse3 wcscpy-c
endif
diff --git a/sysdeps/x86_64/multiarch/Versions b/sysdeps/x86_64/multiarch/Versions
deleted file mode 100644
index 59b185ac8d..0000000000
--- a/sysdeps/x86_64/multiarch/Versions
+++ /dev/null
@@ -1,5 +0,0 @@
-libc {
- GLIBC_PRIVATE {
- __get_cpu_features;
- }
-}
diff --git a/sysdeps/x86_64/multiarch/cacheinfo.c b/sysdeps/x86_64/multiarch/cacheinfo.c
deleted file mode 100644
index f87b8dce6b..0000000000
--- a/sysdeps/x86_64/multiarch/cacheinfo.c
+++ /dev/null
@@ -1,2 +0,0 @@
-#define DISABLE_PREFERRED_MEMORY_INSTRUCTION
-#include "../cacheinfo.c"
diff --git a/sysdeps/x86_64/multiarch/ifunc-defines.sym b/sysdeps/x86_64/multiarch/ifunc-defines.sym
index a410d8808f..3df946f343 100644
--- a/sysdeps/x86_64/multiarch/ifunc-defines.sym
+++ b/sysdeps/x86_64/multiarch/ifunc-defines.sym
@@ -4,7 +4,6 @@
--
CPU_FEATURES_SIZE sizeof (struct cpu_features)
-KIND_OFFSET offsetof (struct cpu_features, kind)
CPUID_OFFSET offsetof (struct cpu_features, cpuid)
CPUID_SIZE sizeof (struct cpuid_registers)
CPUID_EAX_OFFSET offsetof (struct cpuid_registers, eax)
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index b64e4f1532..188b6d36c6 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -1,5 +1,5 @@
/* Enumerate available IFUNC implementations of a function. x86-64 version.
- Copyright (C) 2012-2015 Free Software Foundation, Inc.
+ Copyright (C) 2012-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -20,10 +20,11 @@
#include <string.h>
#include <wchar.h>
#include <ifunc-impl-list.h>
+#include <sysdep.h>
#include "init-arch.h"
/* Maximum number of IFUNC implementations. */
-#define MAX_IFUNC 4
+#define MAX_IFUNC 5
/* Fill ARRAY of MAX elements with IFUNC implementations for function
NAME supported on target machine and return the number of valid
@@ -39,48 +40,77 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/memcmp.S. */
IFUNC_IMPL (i, name, memcmp,
- IFUNC_IMPL_ADD (array, i, memcmp, HAS_SSE4_1,
+ IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSE4_1),
__memcmp_sse4_1)
- IFUNC_IMPL_ADD (array, i, memcmp, HAS_SSSE3, __memcmp_ssse3)
+ IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSSE3),
+ __memcmp_ssse3)
IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_sse2))
- /* Support sysdeps/x86_64/multiarch/memmove_chk.S. */
+ /* Support sysdeps/x86_64/multiarch/memmove_chk.c. */
IFUNC_IMPL (i, name, __memmove_chk,
- IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_AVX,
+#ifdef HAVE_AVX512_ASM_SUPPORT
+ IFUNC_IMPL_ADD (array, i, __memmove_chk,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memmove_chk_avx512_no_vzeroupper)
+#endif
+ IFUNC_IMPL_ADD (array, i, __memmove_chk,
+ HAS_ARCH_FEATURE (AVX_Usable),
__memmove_chk_avx_unaligned)
- IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3,
+ IFUNC_IMPL_ADD (array, i, __memmove_chk,
+ HAS_CPU_FEATURE (SSSE3),
__memmove_chk_ssse3_back)
- IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3,
+ IFUNC_IMPL_ADD (array, i, __memmove_chk,
+ HAS_CPU_FEATURE (SSSE3),
__memmove_chk_ssse3)
IFUNC_IMPL_ADD (array, i, __memmove_chk, 1,
__memmove_chk_sse2))
/* Support sysdeps/x86_64/multiarch/memmove.S. */
IFUNC_IMPL (i, name, memmove,
- IFUNC_IMPL_ADD (array, i, memmove, HAS_AVX,
+ IFUNC_IMPL_ADD (array, i, memmove,
+ HAS_ARCH_FEATURE (AVX_Usable),
__memmove_avx_unaligned)
- IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3,
+#ifdef HAVE_AVX512_ASM_SUPPORT
+ IFUNC_IMPL_ADD (array, i, memmove,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memmove_avx512_no_vzeroupper)
+#endif
+ IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3),
__memmove_ssse3_back)
- IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3,
+ IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3),
__memmove_ssse3)
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_sse2))
-#ifdef HAVE_AVX2_SUPPORT
/* Support sysdeps/x86_64/multiarch/memset_chk.S. */
IFUNC_IMPL (i, name, __memset_chk,
- IFUNC_IMPL_ADD (array, i, __memset_chk, 1, __memset_chk_sse2)
- IFUNC_IMPL_ADD (array, i, __memset_chk, HAS_AVX2,
- __memset_chk_avx2))
+ IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
+ __memset_chk_sse2)
+ IFUNC_IMPL_ADD (array, i, __memset_chk,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __memset_chk_avx2)
+#ifdef HAVE_AVX512_ASM_SUPPORT
+ IFUNC_IMPL_ADD (array, i, __memset_chk,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memset_chk_avx512_no_vzeroupper)
+#endif
+ )
/* Support sysdeps/x86_64/multiarch/memset.S. */
IFUNC_IMPL (i, name, memset,
IFUNC_IMPL_ADD (array, i, memset, 1, __memset_sse2)
- IFUNC_IMPL_ADD (array, i, memset, HAS_AVX2, __memset_avx2))
+ IFUNC_IMPL_ADD (array, i, memset,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __memset_avx2)
+#ifdef HAVE_AVX512_ASM_SUPPORT
+ IFUNC_IMPL_ADD (array, i, memset,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memset_avx512_no_vzeroupper)
#endif
+ )
/* Support sysdeps/x86_64/multiarch/stpncpy.S. */
IFUNC_IMPL (i, name, stpncpy,
- IFUNC_IMPL_ADD (array, i, stpncpy, HAS_SSSE3,
+ IFUNC_IMPL_ADD (array, i, stpncpy, HAS_CPU_FEATURE (SSSE3),
__stpncpy_ssse3)
IFUNC_IMPL_ADD (array, i, stpncpy, 1,
__stpncpy_sse2_unaligned)
@@ -88,38 +118,42 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/stpcpy.S. */
IFUNC_IMPL (i, name, stpcpy,
- IFUNC_IMPL_ADD (array, i, stpcpy, HAS_SSSE3, __stpcpy_ssse3)
+ IFUNC_IMPL_ADD (array, i, stpcpy, HAS_CPU_FEATURE (SSSE3),
+ __stpcpy_ssse3)
IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_sse2))
/* Support sysdeps/x86_64/multiarch/strcasecmp_l.S. */
IFUNC_IMPL (i, name, strcasecmp,
- IFUNC_IMPL_ADD (array, i, strcasecmp, HAS_AVX,
+ IFUNC_IMPL_ADD (array, i, strcasecmp,
+ HAS_ARCH_FEATURE (AVX_Usable),
__strcasecmp_avx)
- IFUNC_IMPL_ADD (array, i, strcasecmp, HAS_SSE4_2,
+ IFUNC_IMPL_ADD (array, i, strcasecmp,
+ HAS_CPU_FEATURE (SSE4_2),
__strcasecmp_sse42)
- IFUNC_IMPL_ADD (array, i, strcasecmp, HAS_SSSE3,
+ IFUNC_IMPL_ADD (array, i, strcasecmp,
+ HAS_CPU_FEATURE (SSSE3),
__strcasecmp_ssse3)
IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_sse2))
/* Support sysdeps/x86_64/multiarch/strcasecmp_l.S. */
IFUNC_IMPL (i, name, strcasecmp_l,
- IFUNC_IMPL_ADD (array, i, strcasecmp_l, HAS_AVX,
+ IFUNC_IMPL_ADD (array, i, strcasecmp_l,
+ HAS_ARCH_FEATURE (AVX_Usable),
__strcasecmp_l_avx)
- IFUNC_IMPL_ADD (array, i, strcasecmp_l, HAS_SSE4_2,
+ IFUNC_IMPL_ADD (array, i, strcasecmp_l,
+ HAS_CPU_FEATURE (SSE4_2),
__strcasecmp_l_sse42)
- IFUNC_IMPL_ADD (array, i, strcasecmp_l, HAS_SSSE3,
+ IFUNC_IMPL_ADD (array, i, strcasecmp_l,
+ HAS_CPU_FEATURE (SSSE3),
__strcasecmp_l_ssse3)
IFUNC_IMPL_ADD (array, i, strcasecmp_l, 1,
__strcasecmp_l_sse2))
- /* Support sysdeps/x86_64/multiarch/strcasestr.c. */
- IFUNC_IMPL (i, name, strcasestr,
- IFUNC_IMPL_ADD (array, i, strcasestr, 1, __strcasestr_sse2))
-
/* Support sysdeps/x86_64/multiarch/strcat.S. */
IFUNC_IMPL (i, name, strcat,
- IFUNC_IMPL_ADD (array, i, strcat, HAS_SSSE3, __strcat_ssse3)
+ IFUNC_IMPL_ADD (array, i, strcat, HAS_CPU_FEATURE (SSSE3),
+ __strcat_ssse3)
IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_sse2))
@@ -130,48 +164,57 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/strcmp.S. */
IFUNC_IMPL (i, name, strcmp,
- IFUNC_IMPL_ADD (array, i, strcmp, HAS_SSE4_2, __strcmp_sse42)
- IFUNC_IMPL_ADD (array, i, strcmp, HAS_SSSE3, __strcmp_ssse3)
+ IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSE4_2),
+ __strcmp_sse42)
+ IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSSE3),
+ __strcmp_ssse3)
IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_sse2))
/* Support sysdeps/x86_64/multiarch/strcpy.S. */
IFUNC_IMPL (i, name, strcpy,
- IFUNC_IMPL_ADD (array, i, strcpy, HAS_SSSE3, __strcpy_ssse3)
+ IFUNC_IMPL_ADD (array, i, strcpy, HAS_CPU_FEATURE (SSSE3),
+ __strcpy_ssse3)
IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_sse2))
/* Support sysdeps/x86_64/multiarch/strcspn.S. */
IFUNC_IMPL (i, name, strcspn,
- IFUNC_IMPL_ADD (array, i, strcspn, HAS_SSE4_2,
+ IFUNC_IMPL_ADD (array, i, strcspn, HAS_CPU_FEATURE (SSE4_2),
__strcspn_sse42)
IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2))
/* Support sysdeps/x86_64/multiarch/strncase_l.S. */
IFUNC_IMPL (i, name, strncasecmp,
- IFUNC_IMPL_ADD (array, i, strncasecmp, HAS_AVX,
+ IFUNC_IMPL_ADD (array, i, strncasecmp,
+ HAS_ARCH_FEATURE (AVX_Usable),
__strncasecmp_avx)
- IFUNC_IMPL_ADD (array, i, strncasecmp, HAS_SSE4_2,
+ IFUNC_IMPL_ADD (array, i, strncasecmp,
+ HAS_CPU_FEATURE (SSE4_2),
__strncasecmp_sse42)
- IFUNC_IMPL_ADD (array, i, strncasecmp, HAS_SSSE3,
+ IFUNC_IMPL_ADD (array, i, strncasecmp,
+ HAS_CPU_FEATURE (SSSE3),
__strncasecmp_ssse3)
IFUNC_IMPL_ADD (array, i, strncasecmp, 1,
__strncasecmp_sse2))
/* Support sysdeps/x86_64/multiarch/strncase_l.S. */
IFUNC_IMPL (i, name, strncasecmp_l,
- IFUNC_IMPL_ADD (array, i, strncasecmp_l, HAS_AVX,
+ IFUNC_IMPL_ADD (array, i, strncasecmp_l,
+ HAS_ARCH_FEATURE (AVX_Usable),
__strncasecmp_l_avx)
- IFUNC_IMPL_ADD (array, i, strncasecmp_l, HAS_SSE4_2,
+ IFUNC_IMPL_ADD (array, i, strncasecmp_l,
+ HAS_CPU_FEATURE (SSE4_2),
__strncasecmp_l_sse42)
- IFUNC_IMPL_ADD (array, i, strncasecmp_l, HAS_SSSE3,
+ IFUNC_IMPL_ADD (array, i, strncasecmp_l,
+ HAS_CPU_FEATURE (SSSE3),
__strncasecmp_l_ssse3)
IFUNC_IMPL_ADD (array, i, strncasecmp_l, 1,
__strncasecmp_l_sse2))
/* Support sysdeps/x86_64/multiarch/strncat.S. */
IFUNC_IMPL (i, name, strncat,
- IFUNC_IMPL_ADD (array, i, strncat, HAS_SSSE3,
+ IFUNC_IMPL_ADD (array, i, strncat, HAS_CPU_FEATURE (SSSE3),
__strncat_ssse3)
IFUNC_IMPL_ADD (array, i, strncat, 1,
__strncat_sse2_unaligned)
@@ -179,7 +222,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/strncpy.S. */
IFUNC_IMPL (i, name, strncpy,
- IFUNC_IMPL_ADD (array, i, strncpy, HAS_SSSE3,
+ IFUNC_IMPL_ADD (array, i, strncpy, HAS_CPU_FEATURE (SSSE3),
__strncpy_ssse3)
IFUNC_IMPL_ADD (array, i, strncpy, 1,
__strncpy_sse2_unaligned)
@@ -187,14 +230,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/strpbrk.S. */
IFUNC_IMPL (i, name, strpbrk,
- IFUNC_IMPL_ADD (array, i, strpbrk, HAS_SSE4_2,
+ IFUNC_IMPL_ADD (array, i, strpbrk, HAS_CPU_FEATURE (SSE4_2),
__strpbrk_sse42)
IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2))
/* Support sysdeps/x86_64/multiarch/strspn.S. */
IFUNC_IMPL (i, name, strspn,
- IFUNC_IMPL_ADD (array, i, strspn, HAS_SSE4_2, __strspn_sse42)
+ IFUNC_IMPL_ADD (array, i, strspn, HAS_CPU_FEATURE (SSE4_2),
+ __strspn_sse42)
IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2))
/* Support sysdeps/x86_64/multiarch/strstr.c. */
@@ -204,65 +248,95 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/wcscpy.S. */
IFUNC_IMPL (i, name, wcscpy,
- IFUNC_IMPL_ADD (array, i, wcscpy, HAS_SSSE3, __wcscpy_ssse3)
+ IFUNC_IMPL_ADD (array, i, wcscpy, HAS_CPU_FEATURE (SSSE3),
+ __wcscpy_ssse3)
IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_sse2))
/* Support sysdeps/x86_64/multiarch/wmemcmp.S. */
IFUNC_IMPL (i, name, wmemcmp,
- IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_SSE4_1,
+ IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSE4_1),
__wmemcmp_sse4_1)
- IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_SSSE3,
+ IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSSE3),
__wmemcmp_ssse3)
IFUNC_IMPL_ADD (array, i, wmemcmp, 1, __wmemcmp_sse2))
#ifdef SHARED
/* Support sysdeps/x86_64/multiarch/memcpy_chk.S. */
IFUNC_IMPL (i, name, __memcpy_chk,
- IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_AVX,
+#ifdef HAVE_AVX512_ASM_SUPPORT
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memcpy_chk_avx512_no_vzeroupper)
+#endif
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+ HAS_ARCH_FEATURE (AVX_Usable),
__memcpy_chk_avx_unaligned)
- IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3,
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+ HAS_CPU_FEATURE (SSSE3),
__memcpy_chk_ssse3_back)
- IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3,
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+ HAS_CPU_FEATURE (SSSE3),
__memcpy_chk_ssse3)
IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
__memcpy_chk_sse2))
/* Support sysdeps/x86_64/multiarch/memcpy.S. */
IFUNC_IMPL (i, name, memcpy,
- IFUNC_IMPL_ADD (array, i, memcpy, HAS_AVX,
+ IFUNC_IMPL_ADD (array, i, memcpy,
+ HAS_ARCH_FEATURE (AVX_Usable),
__memcpy_avx_unaligned)
- IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3,
+ IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3),
__memcpy_ssse3_back)
- IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3, __memcpy_ssse3)
+ IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3),
+ __memcpy_ssse3)
+#ifdef HAVE_AVX512_ASM_SUPPORT
+ IFUNC_IMPL_ADD (array, i, memcpy,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memcpy_avx512_no_vzeroupper)
+#endif
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2))
/* Support sysdeps/x86_64/multiarch/mempcpy_chk.S. */
IFUNC_IMPL (i, name, __mempcpy_chk,
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_AVX,
+#ifdef HAVE_AVX512_ASM_SUPPORT
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __mempcpy_chk_avx512_no_vzeroupper)
+#endif
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+ HAS_ARCH_FEATURE (AVX_Usable),
__mempcpy_chk_avx_unaligned)
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3,
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+ HAS_CPU_FEATURE (SSSE3),
__mempcpy_chk_ssse3_back)
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3,
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+ HAS_CPU_FEATURE (SSSE3),
__mempcpy_chk_ssse3)
IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
__mempcpy_chk_sse2))
/* Support sysdeps/x86_64/multiarch/mempcpy.S. */
IFUNC_IMPL (i, name, mempcpy,
- IFUNC_IMPL_ADD (array, i, mempcpy, HAS_AVX,
+#ifdef HAVE_AVX512_ASM_SUPPORT
+ IFUNC_IMPL_ADD (array, i, mempcpy,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __mempcpy_avx512_no_vzeroupper)
+#endif
+ IFUNC_IMPL_ADD (array, i, mempcpy,
+ HAS_ARCH_FEATURE (AVX_Usable),
__mempcpy_avx_unaligned)
- IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3,
+ IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3),
__mempcpy_ssse3_back)
- IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3,
+ IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3),
__mempcpy_ssse3)
IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_sse2))
/* Support sysdeps/x86_64/multiarch/strncmp.S. */
IFUNC_IMPL (i, name, strncmp,
- IFUNC_IMPL_ADD (array, i, strncmp, HAS_SSE4_2,
+ IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSE4_2),
__strncmp_sse42)
- IFUNC_IMPL_ADD (array, i, strncmp, HAS_SSSE3,
+ IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSSE3),
__strncmp_ssse3)
IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_sse2))
#endif
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
deleted file mode 100644
index aaad5fa841..0000000000
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ /dev/null
@@ -1,223 +0,0 @@
-/* Initialize CPU feature data.
- This file is part of the GNU C Library.
- Copyright (C) 2008-2015 Free Software Foundation, Inc.
- Contributed by Ulrich Drepper <drepper@redhat.com>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <atomic.h>
-#include <cpuid.h>
-#include "init-arch.h"
-
-
-struct cpu_features __cpu_features attribute_hidden;
-
-
-static void
-get_common_indeces (unsigned int *family, unsigned int *model)
-{
- __cpuid (1, __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx);
-
- unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
- *family = (eax >> 8) & 0x0f;
- *model = (eax >> 4) & 0x0f;
-}
-
-
-void
-__init_cpu_features (void)
-{
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
- unsigned int family = 0;
- unsigned int model = 0;
- enum cpu_features_kind kind;
-
- __cpuid (0, __cpu_features.max_cpuid, ebx, ecx, edx);
-
- /* This spells out "GenuineIntel". */
- if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
- {
- kind = arch_kind_intel;
-
- get_common_indeces (&family, &model);
-
- unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
- unsigned int extended_family = (eax >> 20) & 0xff;
- unsigned int extended_model = (eax >> 12) & 0xf0;
- if (family == 0x0f)
- {
- family += extended_family;
- model += extended_model;
- }
- else if (family == 0x06)
- {
- ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
- model += extended_model;
- switch (model)
- {
- case 0x1c:
- case 0x26:
- /* BSF is slow on Atom. */
- __cpu_features.feature[index_Slow_BSF] |= bit_Slow_BSF;
- break;
-
- case 0x37:
- case 0x4a:
- case 0x4d:
- case 0x5a:
- case 0x5d:
- /* Unaligned load versions are faster than SSSE3
- on Silvermont. */
-#if index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop
-# error index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop
-#endif
-#if index_Fast_Unaligned_Load != index_Slow_SSE4_2
-# error index_Fast_Unaligned_Load != index_Slow_SSE4_2
-#endif
- __cpu_features.feature[index_Fast_Unaligned_Load]
- |= (bit_Fast_Unaligned_Load
- | bit_Prefer_PMINUB_for_stringop
- | bit_Slow_SSE4_2);
- break;
-
- default:
- /* Unknown family 0x06 processors. Assuming this is one
- of Core i3/i5/i7 processors if AVX is available. */
- if ((ecx & bit_AVX) == 0)
- break;
-
- case 0x1a:
- case 0x1e:
- case 0x1f:
- case 0x25:
- case 0x2c:
- case 0x2e:
- case 0x2f:
- /* Rep string instructions, copy backward, unaligned loads
- and pminub are fast on Intel Core i3, i5 and i7. */
-#if index_Fast_Rep_String != index_Fast_Copy_Backward
-# error index_Fast_Rep_String != index_Fast_Copy_Backward
-#endif
-#if index_Fast_Rep_String != index_Fast_Unaligned_Load
-# error index_Fast_Rep_String != index_Fast_Unaligned_Load
-#endif
-#if index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop
-# error index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop
-#endif
- __cpu_features.feature[index_Fast_Rep_String]
- |= (bit_Fast_Rep_String
- | bit_Fast_Copy_Backward
- | bit_Fast_Unaligned_Load
- | bit_Prefer_PMINUB_for_stringop);
- break;
- }
- }
- }
- /* This spells out "AuthenticAMD". */
- else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
- {
- kind = arch_kind_amd;
-
- get_common_indeces (&family, &model);
-
- ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
-
- unsigned int eax;
- __cpuid (0x80000000, eax, ebx, ecx, edx);
- if (eax >= 0x80000001)
- __cpuid (0x80000001,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].eax,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ebx,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].edx);
- }
- else
- kind = arch_kind_other;
-
- if (__cpu_features.max_cpuid >= 7)
- __cpuid_count (7, 0,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_7].eax,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ebx,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ecx,
- __cpu_features.cpuid[COMMON_CPUID_INDEX_7].edx);
-
- /* Can we call xgetbv? */
- if (CPUID_OSXSAVE)
- {
- unsigned int xcrlow;
- unsigned int xcrhigh;
- asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
- /* Is YMM and XMM state usable? */
- if ((xcrlow & (bit_YMM_state | bit_XMM_state)) ==
- (bit_YMM_state | bit_XMM_state))
- {
- /* Determine if AVX is usable. */
- if (CPUID_AVX)
- __cpu_features.feature[index_AVX_Usable] |= bit_AVX_Usable;
-#if index_AVX2_Usable != index_AVX_Fast_Unaligned_Load
-# error index_AVX2_Usable != index_AVX_Fast_Unaligned_Load
-#endif
- /* Determine if AVX2 is usable. Unaligned load with 256-bit
- AVX registers are faster on processors with AVX2. */
- if (CPUID_AVX2)
- __cpu_features.feature[index_AVX2_Usable]
- |= bit_AVX2_Usable | bit_AVX_Fast_Unaligned_Load;
- /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
- ZMM16-ZMM31 state are enabled. */
- if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state
- | bit_ZMM16_31_state)) ==
- (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
- {
- /* Determine if AVX512F is usable. */
- if (CPUID_AVX512F)
- {
- __cpu_features.feature[index_AVX512F_Usable]
- |= bit_AVX512F_Usable;
- /* Determine if AVX512DQ is usable. */
- if (CPUID_AVX512DQ)
- __cpu_features.feature[index_AVX512DQ_Usable]
- |= bit_AVX512DQ_Usable;
- }
- }
- /* Determine if FMA is usable. */
- if (CPUID_FMA)
- __cpu_features.feature[index_FMA_Usable] |= bit_FMA_Usable;
- /* Determine if FMA4 is usable. */
- if (CPUID_FMA4)
- __cpu_features.feature[index_FMA4_Usable] |= bit_FMA4_Usable;
- }
- }
-
- __cpu_features.family = family;
- __cpu_features.model = model;
- atomic_write_barrier ();
- __cpu_features.kind = kind;
-}
-
-#undef __get_cpu_features
-
-const struct cpu_features *
-__get_cpu_features (void)
-{
- if (__cpu_features.kind == arch_kind_unknown)
- __init_cpu_features ();
-
- return &__cpu_features;
-}
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
deleted file mode 100644
index cfc6e7049e..0000000000
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ /dev/null
@@ -1,206 +0,0 @@
-/* This file is part of the GNU C Library.
- Copyright (C) 2008-2015 Free Software Foundation, Inc.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#define bit_Fast_Rep_String (1 << 0)
-#define bit_Fast_Copy_Backward (1 << 1)
-#define bit_Slow_BSF (1 << 2)
-#define bit_Fast_Unaligned_Load (1 << 4)
-#define bit_Prefer_PMINUB_for_stringop (1 << 5)
-#define bit_AVX_Usable (1 << 6)
-#define bit_FMA_Usable (1 << 7)
-#define bit_FMA4_Usable (1 << 8)
-#define bit_Slow_SSE4_2 (1 << 9)
-#define bit_AVX2_Usable (1 << 10)
-#define bit_AVX_Fast_Unaligned_Load (1 << 11)
-#define bit_AVX512F_Usable (1 << 12)
-#define bit_AVX512DQ_Usable (1 << 13)
-
-/* CPUID Feature flags. */
-
-/* COMMON_CPUID_INDEX_1. */
-#define bit_SSE2 (1 << 26)
-#define bit_SSSE3 (1 << 9)
-#define bit_SSE4_1 (1 << 19)
-#define bit_SSE4_2 (1 << 20)
-#define bit_OSXSAVE (1 << 27)
-#define bit_AVX (1 << 28)
-#define bit_POPCOUNT (1 << 23)
-#define bit_FMA (1 << 12)
-#define bit_FMA4 (1 << 16)
-
-/* COMMON_CPUID_INDEX_7. */
-#define bit_RTM (1 << 11)
-#define bit_AVX2 (1 << 5)
-#define bit_AVX512F (1 << 16)
-#define bit_AVX512DQ (1 << 17)
-
-/* XCR0 Feature flags. */
-#define bit_XMM_state (1 << 1)
-#define bit_YMM_state (2 << 1)
-#define bit_Opmask_state (1 << 5)
-#define bit_ZMM0_15_state (1 << 6)
-#define bit_ZMM16_31_state (1 << 7)
-
-/* The integer bit array index for the first set of internal feature bits. */
-# define FEATURE_INDEX_1 0
-
-/* The current maximum size of the feature integer bit array. */
-# define FEATURE_INDEX_MAX 1
-
-#ifdef __ASSEMBLER__
-
-# include <ifunc-defines.h>
-
-# define index_SSE2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET
-# define index_SSSE3 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
-# define index_SSE4_1 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
-# define index_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
-# define index_AVX COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
-# define index_AVX2 COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET
-
-# define index_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE
-# define index_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE
-# define index_Slow_BSF FEATURE_INDEX_1*FEATURE_SIZE
-# define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE
-# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
-# define index_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE
-# define index_FMA_Usable FEATURE_INDEX_1*FEATURE_SIZE
-# define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE
-# define index_Slow_SSE4_2 FEATURE_INDEX_1*FEATURE_SIZE
-# define index_AVX2_Usable FEATURE_INDEX_1*FEATURE_SIZE
-# define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE
-# define index_AVX512F_Usable FEATURE_INDEX_1*FEATURE_SIZE
-# define index_AVX512DQ_Usable FEATURE_INDEX_1*FEATURE_SIZE
-
-#else /* __ASSEMBLER__ */
-
-# include <sys/param.h>
-
-enum
- {
- COMMON_CPUID_INDEX_1 = 0,
- COMMON_CPUID_INDEX_7,
- COMMON_CPUID_INDEX_80000001, /* for AMD */
- /* Keep the following line at the end. */
- COMMON_CPUID_INDEX_MAX
- };
-
-extern struct cpu_features
-{
- enum cpu_features_kind
- {
- arch_kind_unknown = 0,
- arch_kind_intel,
- arch_kind_amd,
- arch_kind_other
- } kind;
- int max_cpuid;
- struct cpuid_registers
- {
- unsigned int eax;
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
- } cpuid[COMMON_CPUID_INDEX_MAX];
- unsigned int family;
- unsigned int model;
- unsigned int feature[FEATURE_INDEX_MAX];
-} __cpu_features attribute_hidden;
-
-
-extern void __init_cpu_features (void) attribute_hidden;
-# define INIT_ARCH() \
- do \
- if (__cpu_features.kind == arch_kind_unknown) \
- __init_cpu_features (); \
- while (0)
-
-/* Used from outside libc.so to get access to the CPU features structure. */
-extern const struct cpu_features *__get_cpu_features (void)
- __attribute__ ((const));
-
-# if IS_IN (libc)
-# define __get_cpu_features() (&__cpu_features)
-# endif
-
-# define HAS_CPU_FEATURE(idx, reg, bit) \
- ((__get_cpu_features ()->cpuid[idx].reg & (bit)) != 0)
-
-/* Following are the feature tests used throughout libc. */
-
-/* CPUID_* evaluates to true if the feature flag is enabled.
- We always use &__cpu_features because the HAS_CPUID_* macros
- are called only within __init_cpu_features, where we can't
- call __get_cpu_features without infinite recursion. */
-# define HAS_CPUID_FLAG(idx, reg, bit) \
- (((&__cpu_features)->cpuid[idx].reg & (bit)) != 0)
-
-# define CPUID_OSXSAVE \
- HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_OSXSAVE)
-# define CPUID_AVX \
- HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_AVX)
-# define CPUID_FMA \
- HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_FMA)
-# define CPUID_FMA4 \
- HAS_CPUID_FLAG (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4)
-# define CPUID_RTM \
- HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_RTM)
-# define CPUID_AVX2 \
- HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX2)
-# define CPUID_AVX512F \
- HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX512F)
-# define CPUID_AVX512DQ \
- HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX512DQ)
-
-/* HAS_* evaluates to true if we may use the feature at runtime. */
-# define HAS_SSE2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, bit_SSE2)
-# define HAS_POPCOUNT HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_POPCOUNT)
-# define HAS_SSSE3 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSSE3)
-# define HAS_SSE4_1 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1)
-# define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2)
-# define HAS_RTM HAS_CPU_FEATURE (COMMON_CPUID_INDEX_7, ebx, bit_RTM)
-
-# define index_Fast_Rep_String FEATURE_INDEX_1
-# define index_Fast_Copy_Backward FEATURE_INDEX_1
-# define index_Slow_BSF FEATURE_INDEX_1
-# define index_Fast_Unaligned_Load FEATURE_INDEX_1
-# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1
-# define index_AVX_Usable FEATURE_INDEX_1
-# define index_FMA_Usable FEATURE_INDEX_1
-# define index_FMA4_Usable FEATURE_INDEX_1
-# define index_Slow_SSE4_2 FEATURE_INDEX_1
-# define index_AVX2_Usable FEATURE_INDEX_1
-# define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1
-# define index_AVX512F_Usable FEATURE_INDEX_1
-# define index_AVX512DQ_Usable FEATURE_INDEX_1
-
-# define HAS_ARCH_FEATURE(name) \
- ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
-
-# define HAS_FAST_REP_STRING HAS_ARCH_FEATURE (Fast_Rep_String)
-# define HAS_FAST_COPY_BACKWARD HAS_ARCH_FEATURE (Fast_Copy_Backward)
-# define HAS_SLOW_BSF HAS_ARCH_FEATURE (Slow_BSF)
-# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load)
-# define HAS_AVX HAS_ARCH_FEATURE (AVX_Usable)
-# define HAS_AVX2 HAS_ARCH_FEATURE (AVX2_Usable)
-# define HAS_AVX512F HAS_ARCH_FEATURE (AVX512F_Usable)
-# define HAS_AVX512DQ HAS_ARCH_FEATURE (AVX512DQ_Usable)
-# define HAS_FMA HAS_ARCH_FEATURE (FMA_Usable)
-# define HAS_FMA4 HAS_ARCH_FEATURE (FMA4_Usable)
-# define HAS_AVX_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
-
-#endif /* __ASSEMBLER__ */
diff --git a/sysdeps/x86_64/multiarch/memcmp-sse4.S b/sysdeps/x86_64/multiarch/memcmp-sse4.S
index 533fece51a..786f87282c 100644
--- a/sysdeps/x86_64/multiarch/memcmp-sse4.S
+++ b/sysdeps/x86_64/multiarch/memcmp-sse4.S
@@ -1,5 +1,5 @@
/* memcmp with SSE4.1, wmemcmp with SSE4.1
- Copyright (C) 2010-2015 Free Software Foundation, Inc.
+ Copyright (C) 2010-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/multiarch/memcmp-ssse3.S b/sysdeps/x86_64/multiarch/memcmp-ssse3.S
index 948148b1cd..a22f399e02 100644
--- a/sysdeps/x86_64/multiarch/memcmp-ssse3.S
+++ b/sysdeps/x86_64/multiarch/memcmp-ssse3.S
@@ -1,5 +1,5 @@
/* memcmp with SSSE3, wmemcmp with SSSE3
- Copyright (C) 2011-2015 Free Software Foundation, Inc.
+ Copyright (C) 2011-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/multiarch/memcmp.S b/sysdeps/x86_64/multiarch/memcmp.S
index f8b46363d0..b5a1cc202e 100644
--- a/sysdeps/x86_64/multiarch/memcmp.S
+++ b/sysdeps/x86_64/multiarch/memcmp.S
@@ -1,6 +1,6 @@
/* Multiple versions of memcmp
All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2015 Free Software Foundation, Inc.
+ Copyright (C) 2010-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -26,16 +26,13 @@
.text
ENTRY(memcmp)
.type memcmp, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-
-1: testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ HAS_CPU_FEATURE (SSSE3)
jnz 2f
leaq __memcmp_sse2(%rip), %rax
ret
-2: testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+2: HAS_CPU_FEATURE (SSE4_1)
jz 3f
leaq __memcmp_sse4_1(%rip), %rax
ret
diff --git a/sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S b/sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S
index 9f033f5456..74fed186e9 100644
--- a/sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S
+++ b/sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S
@@ -1,5 +1,5 @@
/* memcpy with AVX
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S
new file mode 100644
index 0000000000..1bb12e81b0
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S
@@ -0,0 +1,408 @@
+/* memcpy optimized with AVX512 for KNL hardware.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+#if defined HAVE_AVX512_ASM_SUPPORT && IS_IN (libc) \
+ && (defined SHARED \
+ || defined USE_AS_MEMMOVE \
+ || !defined USE_MULTIARCH)
+
+#include "asm-syntax.h"
+#ifndef MEMCPY
+# define MEMCPY __memcpy_avx512_no_vzeroupper
+# define MEMCPY_CHK __memcpy_chk_avx512_no_vzeroupper
+#endif
+
+ .section .text,"ax",@progbits
+#if !defined USE_AS_BCOPY
+ENTRY (MEMCPY_CHK)
+ cmpq %rdx, %rcx
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMCPY_CHK)
+#endif
+
+ENTRY (MEMCPY)
+ mov %rdi, %rax
+#ifdef USE_AS_MEMPCPY
+ add %rdx, %rax
+#endif
+ lea (%rsi, %rdx), %rcx
+ lea (%rdi, %rdx), %r9
+ cmp $512, %rdx
+ ja L(512bytesormore)
+
+L(check):
+ cmp $16, %rdx
+ jbe L(less_16bytes)
+ cmp $256, %rdx
+ jb L(less_256bytes)
+ vmovups (%rsi), %zmm0
+ vmovups 0x40(%rsi), %zmm1
+ vmovups 0x80(%rsi), %zmm2
+ vmovups 0xC0(%rsi), %zmm3
+ vmovups -0x100(%rcx), %zmm4
+ vmovups -0xC0(%rcx), %zmm5
+ vmovups -0x80(%rcx), %zmm6
+ vmovups -0x40(%rcx), %zmm7
+ vmovups %zmm0, (%rdi)
+ vmovups %zmm1, 0x40(%rdi)
+ vmovups %zmm2, 0x80(%rdi)
+ vmovups %zmm3, 0xC0(%rdi)
+ vmovups %zmm4, -0x100(%r9)
+ vmovups %zmm5, -0xC0(%r9)
+ vmovups %zmm6, -0x80(%r9)
+ vmovups %zmm7, -0x40(%r9)
+ ret
+
+L(less_256bytes):
+ cmp $128, %dl
+ jb L(less_128bytes)
+ vmovups (%rsi), %zmm0
+ vmovups 0x40(%rsi), %zmm1
+ vmovups -0x80(%rcx), %zmm2
+ vmovups -0x40(%rcx), %zmm3
+ vmovups %zmm0, (%rdi)
+ vmovups %zmm1, 0x40(%rdi)
+ vmovups %zmm2, -0x80(%r9)
+ vmovups %zmm3, -0x40(%r9)
+ ret
+
+L(less_128bytes):
+ cmp $64, %dl
+ jb L(less_64bytes)
+ vmovdqu (%rsi), %ymm0
+ vmovdqu 0x20(%rsi), %ymm1
+ vmovdqu -0x40(%rcx), %ymm2
+ vmovdqu -0x20(%rcx), %ymm3
+ vmovdqu %ymm0, (%rdi)
+ vmovdqu %ymm1, 0x20(%rdi)
+ vmovdqu %ymm2, -0x40(%r9)
+ vmovdqu %ymm3, -0x20(%r9)
+ ret
+
+L(less_64bytes):
+ cmp $32, %dl
+ jb L(less_32bytes)
+ vmovdqu (%rsi), %ymm0
+ vmovdqu -0x20(%rcx), %ymm1
+ vmovdqu %ymm0, (%rdi)
+ vmovdqu %ymm1, -0x20(%r9)
+ ret
+
+L(less_32bytes):
+ vmovdqu (%rsi), %xmm0
+ vmovdqu -0x10(%rcx), %xmm1
+ vmovdqu %xmm0, (%rdi)
+ vmovdqu %xmm1, -0x10(%r9)
+ ret
+
+L(less_16bytes):
+ cmp $8, %dl
+ jb L(less_8bytes)
+ movq (%rsi), %rsi
+ movq -0x8(%rcx), %rcx
+ movq %rsi, (%rdi)
+ movq %rcx, -0x8(%r9)
+ ret
+
+L(less_8bytes):
+ cmp $4, %dl
+ jb L(less_4bytes)
+ mov (%rsi), %esi
+ mov -0x4(%rcx), %ecx
+ mov %esi, (%rdi)
+ mov %ecx, -0x4(%r9)
+ ret
+
+L(less_4bytes):
+ cmp $2, %dl
+ jb L(less_2bytes)
+ mov (%rsi), %si
+ mov -0x2(%rcx), %cx
+ mov %si, (%rdi)
+ mov %cx, -0x2(%r9)
+ ret
+
+L(less_2bytes):
+ cmp $1, %dl
+ jb L(less_1bytes)
+ mov (%rsi), %cl
+ mov %cl, (%rdi)
+L(less_1bytes):
+ ret
+
+L(512bytesormore):
+#ifdef SHARED_CACHE_SIZE_HALF
+ mov $SHARED_CACHE_SIZE_HALF, %r8
+#else
+ mov __x86_shared_cache_size_half(%rip), %r8
+#endif
+ cmp %r8, %rdx
+ jae L(preloop_large)
+ cmp $1024, %rdx
+ ja L(1024bytesormore)
+ prefetcht1 (%rsi)
+ prefetcht1 0x40(%rsi)
+ prefetcht1 0x80(%rsi)
+ prefetcht1 0xC0(%rsi)
+ prefetcht1 0x100(%rsi)
+ prefetcht1 0x140(%rsi)
+ prefetcht1 0x180(%rsi)
+ prefetcht1 0x1C0(%rsi)
+ prefetcht1 -0x200(%rcx)
+ prefetcht1 -0x1C0(%rcx)
+ prefetcht1 -0x180(%rcx)
+ prefetcht1 -0x140(%rcx)
+ prefetcht1 -0x100(%rcx)
+ prefetcht1 -0xC0(%rcx)
+ prefetcht1 -0x80(%rcx)
+ prefetcht1 -0x40(%rcx)
+ vmovups (%rsi), %zmm0
+ vmovups 0x40(%rsi), %zmm1
+ vmovups 0x80(%rsi), %zmm2
+ vmovups 0xC0(%rsi), %zmm3
+ vmovups 0x100(%rsi), %zmm4
+ vmovups 0x140(%rsi), %zmm5
+ vmovups 0x180(%rsi), %zmm6
+ vmovups 0x1C0(%rsi), %zmm7
+ vmovups -0x200(%rcx), %zmm8
+ vmovups -0x1C0(%rcx), %zmm9
+ vmovups -0x180(%rcx), %zmm10
+ vmovups -0x140(%rcx), %zmm11
+ vmovups -0x100(%rcx), %zmm12
+ vmovups -0xC0(%rcx), %zmm13
+ vmovups -0x80(%rcx), %zmm14
+ vmovups -0x40(%rcx), %zmm15
+ vmovups %zmm0, (%rdi)
+ vmovups %zmm1, 0x40(%rdi)
+ vmovups %zmm2, 0x80(%rdi)
+ vmovups %zmm3, 0xC0(%rdi)
+ vmovups %zmm4, 0x100(%rdi)
+ vmovups %zmm5, 0x140(%rdi)
+ vmovups %zmm6, 0x180(%rdi)
+ vmovups %zmm7, 0x1C0(%rdi)
+ vmovups %zmm8, -0x200(%r9)
+ vmovups %zmm9, -0x1C0(%r9)
+ vmovups %zmm10, -0x180(%r9)
+ vmovups %zmm11, -0x140(%r9)
+ vmovups %zmm12, -0x100(%r9)
+ vmovups %zmm13, -0xC0(%r9)
+ vmovups %zmm14, -0x80(%r9)
+ vmovups %zmm15, -0x40(%r9)
+ ret
+
+L(1024bytesormore):
+ cmp %rsi, %rdi
+ ja L(1024bytesormore_bkw)
+ sub $512, %r9
+ vmovups -0x200(%rcx), %zmm8
+ vmovups -0x1C0(%rcx), %zmm9
+ vmovups -0x180(%rcx), %zmm10
+ vmovups -0x140(%rcx), %zmm11
+ vmovups -0x100(%rcx), %zmm12
+ vmovups -0xC0(%rcx), %zmm13
+ vmovups -0x80(%rcx), %zmm14
+ vmovups -0x40(%rcx), %zmm15
+ prefetcht1 (%rsi)
+ prefetcht1 0x40(%rsi)
+ prefetcht1 0x80(%rsi)
+ prefetcht1 0xC0(%rsi)
+ prefetcht1 0x100(%rsi)
+ prefetcht1 0x140(%rsi)
+ prefetcht1 0x180(%rsi)
+ prefetcht1 0x1C0(%rsi)
+
+/* Loop with unaligned memory access. */
+L(gobble_512bytes_loop):
+ vmovups (%rsi), %zmm0
+ vmovups 0x40(%rsi), %zmm1
+ vmovups 0x80(%rsi), %zmm2
+ vmovups 0xC0(%rsi), %zmm3
+ vmovups 0x100(%rsi), %zmm4
+ vmovups 0x140(%rsi), %zmm5
+ vmovups 0x180(%rsi), %zmm6
+ vmovups 0x1C0(%rsi), %zmm7
+ add $512, %rsi
+ prefetcht1 (%rsi)
+ prefetcht1 0x40(%rsi)
+ prefetcht1 0x80(%rsi)
+ prefetcht1 0xC0(%rsi)
+ prefetcht1 0x100(%rsi)
+ prefetcht1 0x140(%rsi)
+ prefetcht1 0x180(%rsi)
+ prefetcht1 0x1C0(%rsi)
+ vmovups %zmm0, (%rdi)
+ vmovups %zmm1, 0x40(%rdi)
+ vmovups %zmm2, 0x80(%rdi)
+ vmovups %zmm3, 0xC0(%rdi)
+ vmovups %zmm4, 0x100(%rdi)
+ vmovups %zmm5, 0x140(%rdi)
+ vmovups %zmm6, 0x180(%rdi)
+ vmovups %zmm7, 0x1C0(%rdi)
+ add $512, %rdi
+ cmp %r9, %rdi
+ jb L(gobble_512bytes_loop)
+ vmovups %zmm8, (%r9)
+ vmovups %zmm9, 0x40(%r9)
+ vmovups %zmm10, 0x80(%r9)
+ vmovups %zmm11, 0xC0(%r9)
+ vmovups %zmm12, 0x100(%r9)
+ vmovups %zmm13, 0x140(%r9)
+ vmovups %zmm14, 0x180(%r9)
+ vmovups %zmm15, 0x1C0(%r9)
+ ret
+
+L(1024bytesormore_bkw):
+ add $512, %rdi
+ vmovups 0x1C0(%rsi), %zmm8
+ vmovups 0x180(%rsi), %zmm9
+ vmovups 0x140(%rsi), %zmm10
+ vmovups 0x100(%rsi), %zmm11
+ vmovups 0xC0(%rsi), %zmm12
+ vmovups 0x80(%rsi), %zmm13
+ vmovups 0x40(%rsi), %zmm14
+ vmovups (%rsi), %zmm15
+ prefetcht1 -0x40(%rcx)
+ prefetcht1 -0x80(%rcx)
+ prefetcht1 -0xC0(%rcx)
+ prefetcht1 -0x100(%rcx)
+ prefetcht1 -0x140(%rcx)
+ prefetcht1 -0x180(%rcx)
+ prefetcht1 -0x1C0(%rcx)
+ prefetcht1 -0x200(%rcx)
+
+/* Backward loop with unaligned memory access. */
+L(gobble_512bytes_loop_bkw):
+ vmovups -0x40(%rcx), %zmm0
+ vmovups -0x80(%rcx), %zmm1
+ vmovups -0xC0(%rcx), %zmm2
+ vmovups -0x100(%rcx), %zmm3
+ vmovups -0x140(%rcx), %zmm4
+ vmovups -0x180(%rcx), %zmm5
+ vmovups -0x1C0(%rcx), %zmm6
+ vmovups -0x200(%rcx), %zmm7
+ sub $512, %rcx
+ prefetcht1 -0x40(%rcx)
+ prefetcht1 -0x80(%rcx)
+ prefetcht1 -0xC0(%rcx)
+ prefetcht1 -0x100(%rcx)
+ prefetcht1 -0x140(%rcx)
+ prefetcht1 -0x180(%rcx)
+ prefetcht1 -0x1C0(%rcx)
+ prefetcht1 -0x200(%rcx)
+ vmovups %zmm0, -0x40(%r9)
+ vmovups %zmm1, -0x80(%r9)
+ vmovups %zmm2, -0xC0(%r9)
+ vmovups %zmm3, -0x100(%r9)
+ vmovups %zmm4, -0x140(%r9)
+ vmovups %zmm5, -0x180(%r9)
+ vmovups %zmm6, -0x1C0(%r9)
+ vmovups %zmm7, -0x200(%r9)
+ sub $512, %r9
+ cmp %rdi, %r9
+ ja L(gobble_512bytes_loop_bkw)
+ vmovups %zmm8, -0x40(%rdi)
+ vmovups %zmm9, -0x80(%rdi)
+ vmovups %zmm10, -0xC0(%rdi)
+ vmovups %zmm11, -0x100(%rdi)
+ vmovups %zmm12, -0x140(%rdi)
+ vmovups %zmm13, -0x180(%rdi)
+ vmovups %zmm14, -0x1C0(%rdi)
+ vmovups %zmm15, -0x200(%rdi)
+ ret
+
+L(preloop_large):
+ cmp %rsi, %rdi
+ ja L(preloop_large_bkw)
+ vmovups (%rsi), %zmm4
+ vmovups 0x40(%rsi), %zmm5
+
+/* Align destination for access with non-temporal stores in the loop. */
+ mov %rdi, %r8
+ and $-0x80, %rdi
+ add $0x80, %rdi
+ sub %rdi, %r8
+ sub %r8, %rsi
+ add %r8, %rdx
+L(gobble_256bytes_nt_loop):
+ prefetcht1 0x200(%rsi)
+ prefetcht1 0x240(%rsi)
+ prefetcht1 0x280(%rsi)
+ prefetcht1 0x2C0(%rsi)
+ prefetcht1 0x300(%rsi)
+ prefetcht1 0x340(%rsi)
+ prefetcht1 0x380(%rsi)
+ prefetcht1 0x3C0(%rsi)
+ vmovdqu64 (%rsi), %zmm0
+ vmovdqu64 0x40(%rsi), %zmm1
+ vmovdqu64 0x80(%rsi), %zmm2
+ vmovdqu64 0xC0(%rsi), %zmm3
+ vmovntdq %zmm0, (%rdi)
+ vmovntdq %zmm1, 0x40(%rdi)
+ vmovntdq %zmm2, 0x80(%rdi)
+ vmovntdq %zmm3, 0xC0(%rdi)
+ sub $256, %rdx
+ add $256, %rsi
+ add $256, %rdi
+ cmp $256, %rdx
+ ja L(gobble_256bytes_nt_loop)
+ sfence
+ vmovups %zmm4, (%rax)
+ vmovups %zmm5, 0x40(%rax)
+ jmp L(check)
+
+L(preloop_large_bkw):
+ vmovups -0x80(%rcx), %zmm4
+ vmovups -0x40(%rcx), %zmm5
+
+/* Align end of destination for access with non-temporal stores. */
+ mov %r9, %r8
+ and $-0x80, %r9
+ sub %r9, %r8
+ sub %r8, %rcx
+ sub %r8, %rdx
+ add %r9, %r8
+L(gobble_256bytes_nt_loop_bkw):
+ prefetcht1 -0x400(%rcx)
+ prefetcht1 -0x3C0(%rcx)
+ prefetcht1 -0x380(%rcx)
+ prefetcht1 -0x340(%rcx)
+ prefetcht1 -0x300(%rcx)
+ prefetcht1 -0x2C0(%rcx)
+ prefetcht1 -0x280(%rcx)
+ prefetcht1 -0x240(%rcx)
+ vmovdqu64 -0x100(%rcx), %zmm0
+ vmovdqu64 -0xC0(%rcx), %zmm1
+ vmovdqu64 -0x80(%rcx), %zmm2
+ vmovdqu64 -0x40(%rcx), %zmm3
+ vmovntdq %zmm0, -0x100(%r9)
+ vmovntdq %zmm1, -0xC0(%r9)
+ vmovntdq %zmm2, -0x80(%r9)
+ vmovntdq %zmm3, -0x40(%r9)
+ sub $256, %rdx
+ sub $256, %rcx
+ sub $256, %r9
+ cmp $256, %rdx
+ ja L(gobble_256bytes_nt_loop_bkw)
+ sfence
+ vmovups %zmm4, -0x80(%r8)
+ vmovups %zmm5, -0x40(%r8)
+ jmp L(check)
+END (MEMCPY)
+#endif
diff --git a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
index c5450af25a..c4509831fa 100644
--- a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
@@ -1,5 +1,5 @@
/* memcpy with unaliged loads
- Copyright (C) 2013-2015 Free Software Foundation, Inc.
+ Copyright (C) 2013-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,6 +16,8 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
+#if IS_IN (libc)
+
#include <sysdep.h>
#include "asm-syntax.h"
@@ -169,3 +171,5 @@ L(between_5_8):
movl %eax, -4(%rdi,%rdx)
jmp L(return)
END(__memcpy_sse2_unaligned)
+
+#endif
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
index 30e0d1c575..08b41e9e5a 100644
--- a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
@@ -1,5 +1,5 @@
/* memcpy with SSSE3 and REP string
- Copyright (C) 2010-2015 Free Software Foundation, Inc.
+ Copyright (C) 2010-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3.S b/sysdeps/x86_64/multiarch/memcpy-ssse3.S
index 33cc493dd4..95de9695f9 100644
--- a/sysdeps/x86_64/multiarch/memcpy-ssse3.S
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3.S
@@ -1,5 +1,5 @@
/* memcpy with SSSE3
- Copyright (C) 2010-2015 Free Software Foundation, Inc.
+ Copyright (C) 2010-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S
index 4e18cd3070..64a1bcd137 100644
--- a/sysdeps/x86_64/multiarch/memcpy.S
+++ b/sysdeps/x86_64/multiarch/memcpy.S
@@ -1,6 +1,6 @@
/* Multiple versions of memcpy
All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2015 Free Software Foundation, Inc.
+ Copyright (C) 2010-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -29,22 +29,28 @@
.text
ENTRY(__new_memcpy)
.type __new_memcpy, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
+ LOAD_RTLD_GLOBAL_RO_RDX
+#ifdef HAVE_AVX512_ASM_SUPPORT
+ HAS_ARCH_FEATURE (AVX512F_Usable)
+ jz 1f
+ HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
+ jz 1f
+ leaq __memcpy_avx512_no_vzeroupper(%rip), %rax
+ ret
+#endif
1: leaq __memcpy_avx_unaligned(%rip), %rax
- testl $bit_AVX_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_AVX_Fast_Unaligned_Load(%rip)
- jz 1f
+ HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
+ jz 2f
ret
-1: leaq __memcpy_sse2(%rip), %rax
- testl $bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip)
- jnz 2f
+2: leaq __memcpy_sse2(%rip), %rax
+ HAS_ARCH_FEATURE (Slow_BSF)
+ jnz 3f
leaq __memcpy_sse2_unaligned(%rip), %rax
ret
-2: testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
- jz 3f
+3: HAS_CPU_FEATURE (SSSE3)
+ jz 4f
leaq __memcpy_ssse3(%rip), %rax
-3: ret
+4: ret
END(__new_memcpy)
# undef ENTRY
diff --git a/sysdeps/x86_64/multiarch/memcpy_chk.S b/sysdeps/x86_64/multiarch/memcpy_chk.S
index 1e756ea0c2..648217e971 100644
--- a/sysdeps/x86_64/multiarch/memcpy_chk.S
+++ b/sysdeps/x86_64/multiarch/memcpy_chk.S
@@ -1,6 +1,6 @@
/* Multiple versions of __memcpy_chk
All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2015 Free Software Foundation, Inc.
+ Copyright (C) 2010-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -29,17 +29,23 @@
.text
ENTRY(__memcpy_chk)
.type __memcpy_chk, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
+ LOAD_RTLD_GLOBAL_RO_RDX
+#ifdef HAVE_AVX512_ASM_SUPPORT
+ HAS_ARCH_FEATURE (AVX512F_Usable)
+ jz 1f
+ HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
+ jz 1f
+ leaq __memcpy_chk_avx512_no_vzeroupper(%rip), %rax
+ ret
+#endif
1: leaq __memcpy_chk_sse2(%rip), %rax
- testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ HAS_CPU_FEATURE (SSSE3)
jz 2f
leaq __memcpy_chk_ssse3(%rip), %rax
- testl $bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
+ HAS_ARCH_FEATURE (Fast_Copy_Backward)
jz 2f
leaq __memcpy_chk_ssse3_back(%rip), %rax
- testl $bit_AVX_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_AVX_Fast_Unaligned_Load(%rip)
+ HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
jz 2f
leaq __memcpy_chk_avx_unaligned(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/multiarch/memmove-avx-unaligned.S b/sysdeps/x86_64/multiarch/memmove-avx-unaligned.S
index 01eac94889..75e35f2957 100644
--- a/sysdeps/x86_64/multiarch/memmove-avx-unaligned.S
+++ b/sysdeps/x86_64/multiarch/memmove-avx-unaligned.S
@@ -1,5 +1,5 @@
/* memmove with AVX
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S
new file mode 100644
index 0000000000..518d1fec35
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S
@@ -0,0 +1,22 @@
+/* memmove optimized with AVX512 for KNL hardware.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define USE_AS_MEMMOVE
+#define MEMCPY __memmove_avx512_no_vzeroupper
+#define MEMCPY_CHK __memmove_chk_avx512_no_vzeroupper
+#include "memcpy-avx512-no-vzeroupper.S"
diff --git a/sysdeps/x86_64/multiarch/memmove.c b/sysdeps/x86_64/multiarch/memmove.c
index dd153a3eaa..8da5640bb0 100644
--- a/sysdeps/x86_64/multiarch/memmove.c
+++ b/sysdeps/x86_64/multiarch/memmove.c
@@ -1,6 +1,6 @@
/* Multiple versions of memmove.
All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2015 Free Software Foundation, Inc.
+ Copyright (C) 2010-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -36,6 +36,9 @@ extern __typeof (__redirect_memmove) __memmove_sse2 attribute_hidden;
extern __typeof (__redirect_memmove) __memmove_ssse3 attribute_hidden;
extern __typeof (__redirect_memmove) __memmove_ssse3_back attribute_hidden;
extern __typeof (__redirect_memmove) __memmove_avx_unaligned attribute_hidden;
+# ifdef HAVE_AVX512_ASM_SUPPORT
+ extern __typeof (__redirect_memmove) __memmove_avx512_no_vzeroupper attribute_hidden;
+# endif
#endif
@@ -49,12 +52,18 @@ extern __typeof (__redirect_memmove) __memmove_avx_unaligned attribute_hidden;
ifunc symbol properly. */
extern __typeof (__redirect_memmove) __libc_memmove;
libc_ifunc (__libc_memmove,
- HAS_AVX_FAST_UNALIGNED_LOAD
+#ifdef HAVE_AVX512_ASM_SUPPORT
+ HAS_ARCH_FEATURE (AVX512F_Usable)
+ && HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
+ ? __memmove_avx512_no_vzeroupper
+ :
+#endif
+ (HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
? __memmove_avx_unaligned
- : (HAS_SSSE3
- ? (HAS_FAST_COPY_BACKWARD
+ : (HAS_CPU_FEATURE (SSSE3)
+ ? (HAS_ARCH_FEATURE (Fast_Copy_Backward)
? __memmove_ssse3_back : __memmove_ssse3)
- : __memmove_sse2));
+ : __memmove_sse2)));
strong_alias (__libc_memmove, memmove)
diff --git a/sysdeps/x86_64/multiarch/memmove_chk.c b/sysdeps/x86_64/multiarch/memmove_chk.c
index 8b12d002dc..f64da63180 100644
--- a/sysdeps/x86_64/multiarch/memmove_chk.c
+++ b/sysdeps/x86_64/multiarch/memmove_chk.c
@@ -1,6 +1,6 @@
/* Multiple versions of __memmove_chk.
All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2015 Free Software Foundation, Inc.
+ Copyright (C) 2010-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -26,12 +26,21 @@ extern __typeof (__memmove_chk) __memmove_chk_sse2 attribute_hidden;
extern __typeof (__memmove_chk) __memmove_chk_ssse3 attribute_hidden;
extern __typeof (__memmove_chk) __memmove_chk_ssse3_back attribute_hidden;
extern __typeof (__memmove_chk) __memmove_chk_avx_unaligned attribute_hidden;
+# ifdef HAVE_AVX512_ASM_SUPPORT
+ extern __typeof (__memmove_chk) __memmove_chk_avx512_no_vzeroupper attribute_hidden;
+# endif
#include "debug/memmove_chk.c"
libc_ifunc (__memmove_chk,
- HAS_AVX_FAST_UNALIGNED_LOAD ? __memmove_chk_avx_unaligned :
- (HAS_SSSE3
- ? (HAS_FAST_COPY_BACKWARD
+#ifdef HAVE_AVX512_ASM_SUPPORT
+ HAS_ARCH_FEATURE (AVX512F_Usable)
+ && HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
+ ? __memmove_chk_avx512_no_vzeroupper
+ :
+#endif
+ HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) ? __memmove_chk_avx_unaligned :
+ (HAS_CPU_FEATURE (SSSE3)
+ ? (HAS_ARCH_FEATURE (Fast_Copy_Backward)
? __memmove_chk_ssse3_back : __memmove_chk_ssse3)
: __memmove_chk_sse2));
diff --git a/sysdeps/x86_64/multiarch/mempcpy-avx-unaligned.S b/sysdeps/x86_64/multiarch/mempcpy-avx-unaligned.S
index 128ff832fb..241378e770 100644
--- a/sysdeps/x86_64/multiarch/mempcpy-avx-unaligned.S
+++ b/sysdeps/x86_64/multiarch/mempcpy-avx-unaligned.S
@@ -1,5 +1,5 @@
/* mempcpy with AVX
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/multiarch/mempcpy-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/mempcpy-avx512-no-vzeroupper.S
new file mode 100644
index 0000000000..fcc0945ea7
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/mempcpy-avx512-no-vzeroupper.S
@@ -0,0 +1,22 @@
+/* mempcpy optimized with AVX512 for KNL hardware.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define USE_AS_MEMPCPY
+#define MEMCPY __mempcpy_avx512_no_vzeroupper
+#define MEMCPY_CHK __mempcpy_chk_avx512_no_vzeroupper
+#include "memcpy-avx512-no-vzeroupper.S"
diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S
index 2eaacdf049..ed78623565 100644
--- a/sysdeps/x86_64/multiarch/mempcpy.S
+++ b/sysdeps/x86_64/multiarch/mempcpy.S
@@ -1,6 +1,6 @@
/* Multiple versions of mempcpy
All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2015 Free Software Foundation, Inc.
+ Copyright (C) 2010-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -27,17 +27,23 @@
#if defined SHARED && IS_IN (libc)
ENTRY(__mempcpy)
.type __mempcpy, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
+ LOAD_RTLD_GLOBAL_RO_RDX
+#ifdef HAVE_AVX512_ASM_SUPPORT
+ HAS_ARCH_FEATURE (AVX512F_Usable)
+ jz 1f
+ HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
+ jz 1f
+ leaq __mempcpy_avx512_no_vzeroupper(%rip), %rax
+ ret
+#endif
1: leaq __mempcpy_sse2(%rip), %rax
- testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ HAS_CPU_FEATURE (SSSE3)
jz 2f
leaq __mempcpy_ssse3(%rip), %rax
- testl $bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
+ HAS_ARCH_FEATURE (Fast_Copy_Backward)
jz 2f
leaq __mempcpy_ssse3_back(%rip), %rax
- testl $bit_AVX_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_AVX_Fast_Unaligned_Load(%rip)
+ HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
jz 2f
leaq __mempcpy_avx_unaligned(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk.S b/sysdeps/x86_64/multiarch/mempcpy_chk.S
index 17b84701b0..6e8a89d38c 100644
--- a/sysdeps/x86_64/multiarch/mempcpy_chk.S
+++ b/sysdeps/x86_64/multiarch/mempcpy_chk.S
@@ -1,6 +1,6 @@
/* Multiple versions of __mempcpy_chk
All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2015 Free Software Foundation, Inc.
+ Copyright (C) 2010-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -29,17 +29,23 @@
.text
ENTRY(__mempcpy_chk)
.type __mempcpy_chk, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
+ LOAD_RTLD_GLOBAL_RO_RDX
+#ifdef HAVE_AVX512_ASM_SUPPORT
+ HAS_ARCH_FEATURE (AVX512F_Usable)
+ jz 1f
+ HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
+ jz 1f
+ leaq __mempcpy_chk_avx512_no_vzeroupper(%rip), %rax
+ ret
+#endif
1: leaq __mempcpy_chk_sse2(%rip), %rax
- testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ HAS_CPU_FEATURE (SSSE3)
jz 2f
leaq __mempcpy_chk_ssse3(%rip), %rax
- testl $bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
+ HAS_ARCH_FEATURE (Fast_Copy_Backward)
jz 2f
leaq __mempcpy_chk_ssse3_back(%rip), %rax
- testl $bit_AVX_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_AVX_Fast_Unaligned_Load(%rip)
+ HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
jz 2f
leaq __mempcpy_chk_avx_unaligned(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/multiarch/memset-avx2.S b/sysdeps/x86_64/multiarch/memset-avx2.S
index 28eabade35..df634728d4 100644
--- a/sysdeps/x86_64/multiarch/memset-avx2.S
+++ b/sysdeps/x86_64/multiarch/memset-avx2.S
@@ -1,5 +1,5 @@
/* memset with AVX2
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S
new file mode 100644
index 0000000000..1e638d7ac2
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S
@@ -0,0 +1,194 @@
+/* memset optimized with AVX512 for KNL hardware.
+ Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+#if defined HAVE_AVX512_ASM_SUPPORT && IS_IN (libc)
+
+#include "asm-syntax.h"
+#ifndef MEMSET
+# define MEMSET __memset_avx512_no_vzeroupper
+# define MEMSET_CHK __memset_chk_avx512_no_vzeroupper
+#endif
+
+ .section .text,"ax",@progbits
+#if defined PIC
+ENTRY (MEMSET_CHK)
+ cmpq %rdx, %rcx
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMSET_CHK)
+#endif
+
+ENTRY (MEMSET)
+ vpxor %xmm0, %xmm0, %xmm0
+ vmovd %esi, %xmm1
+ lea (%rdi, %rdx), %rsi
+ mov %rdi, %rax
+ vpshufb %xmm0, %xmm1, %xmm0
+ cmp $16, %rdx
+ jb L(less_16bytes)
+ cmp $512, %rdx
+ vbroadcastss %xmm0, %zmm2
+ ja L(512bytesormore)
+ cmp $256, %rdx
+ jb L(less_256bytes)
+ vmovups %zmm2, (%rdi)
+ vmovups %zmm2, 0x40(%rdi)
+ vmovups %zmm2, 0x80(%rdi)
+ vmovups %zmm2, 0xC0(%rdi)
+ vmovups %zmm2, -0x100(%rsi)
+ vmovups %zmm2, -0xC0(%rsi)
+ vmovups %zmm2, -0x80(%rsi)
+ vmovups %zmm2, -0x40(%rsi)
+ ret
+
+L(less_256bytes):
+ cmp $128, %dl
+ jb L(less_128bytes)
+ vmovups %zmm2, (%rdi)
+ vmovups %zmm2, 0x40(%rdi)
+ vmovups %zmm2, -0x80(%rsi)
+ vmovups %zmm2, -0x40(%rsi)
+ ret
+
+L(less_128bytes):
+ cmp $64, %dl
+ jb L(less_64bytes)
+ vmovups %zmm2, (%rdi)
+ vmovups %zmm2, -0x40(%rsi)
+ ret
+
+L(less_64bytes):
+ cmp $32, %dl
+ jb L(less_32bytes)
+ vmovdqu %ymm2, (%rdi)
+ vmovdqu %ymm2, -0x20(%rsi)
+ ret
+
+L(less_32bytes):
+ vmovdqu %xmm0, (%rdi)
+ vmovdqu %xmm0, -0x10(%rsi)
+ ret
+
+L(less_16bytes):
+ cmp $8, %dl
+ jb L(less_8bytes)
+ vmovq %xmm0, (%rdi)
+ vmovq %xmm0, -0x08(%rsi)
+ ret
+
+L(less_8bytes):
+ vmovd %xmm0, %ecx
+ cmp $4, %dl
+ jb L(less_4bytes)
+ mov %ecx, (%rdi)
+ mov %ecx, -0x04(%rsi)
+ ret
+
+L(less_4bytes):
+ cmp $2, %dl
+ jb L(less_2bytes)
+ mov %cx, (%rdi)
+ mov %cx, -0x02(%rsi)
+ ret
+
+L(less_2bytes):
+ cmp $1, %dl
+ jb L(less_1bytes)
+ mov %cl, (%rdi)
+L(less_1bytes):
+ ret
+
+L(512bytesormore):
+ mov __x86_shared_cache_size_half(%rip), %rcx
+ cmp %rcx, %rdx
+ ja L(preloop_large)
+ cmp $1024, %rdx
+ ja L(1024bytesormore)
+
+ vmovups %zmm2, (%rdi)
+ vmovups %zmm2, 0x40(%rdi)
+ vmovups %zmm2, 0x80(%rdi)
+ vmovups %zmm2, 0xC0(%rdi)
+ vmovups %zmm2, 0x100(%rdi)
+ vmovups %zmm2, 0x140(%rdi)
+ vmovups %zmm2, 0x180(%rdi)
+ vmovups %zmm2, 0x1C0(%rdi)
+ vmovups %zmm2, -0x200(%rsi)
+ vmovups %zmm2, -0x1C0(%rsi)
+ vmovups %zmm2, -0x180(%rsi)
+ vmovups %zmm2, -0x140(%rsi)
+ vmovups %zmm2, -0x100(%rsi)
+ vmovups %zmm2, -0xC0(%rsi)
+ vmovups %zmm2, -0x80(%rsi)
+ vmovups %zmm2, -0x40(%rsi)
+ ret
+
+/* Align on 64 and loop with aligned stores. */
+L(1024bytesormore):
+ sub $0x100, %rsi
+ vmovups %zmm2, (%rax)
+ and $-0x40, %rdi
+ add $0x40, %rdi
+
+L(gobble_256bytes_loop):
+ vmovaps %zmm2, (%rdi)
+ vmovaps %zmm2, 0x40(%rdi)
+ vmovaps %zmm2, 0x80(%rdi)
+ vmovaps %zmm2, 0xC0(%rdi)
+ add $0x100, %rdi
+ cmp %rsi, %rdi
+ jb L(gobble_256bytes_loop)
+ vmovups %zmm2, (%rsi)
+ vmovups %zmm2, 0x40(%rsi)
+ vmovups %zmm2, 0x80(%rsi)
+ vmovups %zmm2, 0xC0(%rsi)
+ ret
+
+/* Align on 128 and loop with non-temporal stores. */
+L(preloop_large):
+ and $-0x80, %rdi
+ add $0x80, %rdi
+ vmovups %zmm2, (%rax)
+ vmovups %zmm2, 0x40(%rax)
+ sub $0x200, %rsi
+
+L(gobble_512bytes_nt_loop):
+ vmovntdq %zmm2, (%rdi)
+ vmovntdq %zmm2, 0x40(%rdi)
+ vmovntdq %zmm2, 0x80(%rdi)
+ vmovntdq %zmm2, 0xC0(%rdi)
+ vmovntdq %zmm2, 0x100(%rdi)
+ vmovntdq %zmm2, 0x140(%rdi)
+ vmovntdq %zmm2, 0x180(%rdi)
+ vmovntdq %zmm2, 0x1C0(%rdi)
+ add $0x200, %rdi
+ cmp %rsi, %rdi
+ jb L(gobble_512bytes_nt_loop)
+ sfence
+ vmovups %zmm2, (%rsi)
+ vmovups %zmm2, 0x40(%rsi)
+ vmovups %zmm2, 0x80(%rsi)
+ vmovups %zmm2, 0xC0(%rsi)
+ vmovups %zmm2, 0x100(%rsi)
+ vmovups %zmm2, 0x140(%rsi)
+ vmovups %zmm2, 0x180(%rsi)
+ vmovups %zmm2, 0x1C0(%rsi)
+ ret
+END (MEMSET)
+#endif
diff --git a/sysdeps/x86_64/multiarch/memset.S b/sysdeps/x86_64/multiarch/memset.S
index c5f1fb340e..8e3b9b9764 100644
--- a/sysdeps/x86_64/multiarch/memset.S
+++ b/sysdeps/x86_64/multiarch/memset.S
@@ -1,6 +1,6 @@
/* Multiple versions of memset
All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -17,45 +17,48 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#ifdef HAVE_AVX2_SUPPORT
#include <sysdep.h>
#include <shlib-compat.h>
#include <init-arch.h>
/* Define multiple versions only for the definition in lib. */
-# if IS_IN (libc)
+#if IS_IN (libc)
ENTRY(memset)
.type memset, @gnu_indirect_function
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq __memset_sse2(%rip), %rax
- testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq __memset_sse2(%rip), %rax
+ HAS_ARCH_FEATURE (AVX2_Usable)
jz 2f
leaq __memset_avx2(%rip), %rax
+#ifdef HAVE_AVX512_ASM_SUPPORT
+ HAS_ARCH_FEATURE (AVX512F_Usable)
+ jz 2f
+ HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
+ jz 2f
+ leaq __memset_avx512_no_vzeroupper(%rip), %rax
+#endif
2: ret
END(memset)
-# endif
+#endif
-# if IS_IN (libc)
-# undef memset
-# define memset __memset_sse2
+#if IS_IN (libc)
+# undef memset
+# define memset __memset_sse2
-# undef __memset_chk
-# define __memset_chk __memset_chk_sse2
+# undef __memset_chk
+# define __memset_chk __memset_chk_sse2
-# ifdef SHARED
-# undef libc_hidden_builtin_def
+# ifdef SHARED
+# undef libc_hidden_builtin_def
/* It doesn't make sense to send libc-internal memset calls through a PLT.
The speedup we get from using GPR instruction is likely eaten away
by the indirect call in the PLT. */
-# define libc_hidden_builtin_def(name) \
+# define libc_hidden_builtin_def(name) \
.globl __GI_memset; __GI_memset = __memset_sse2
-# endif
-
-# undef strong_alias
-# define strong_alias(original, alias)
# endif
+
+# undef strong_alias
+# define strong_alias(original, alias)
#endif
#include "../memset.S"
diff --git a/sysdeps/x86_64/multiarch/memset_chk.S b/sysdeps/x86_64/multiarch/memset_chk.S
index 64fed3118a..9a7b270274 100644
--- a/sysdeps/x86_64/multiarch/memset_chk.S
+++ b/sysdeps/x86_64/multiarch/memset_chk.S
@@ -1,6 +1,6 @@
/* Multiple versions of memset_chk
All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -22,16 +22,21 @@
/* Define multiple versions only for the definition in lib. */
#if IS_IN (libc)
-# if defined SHARED && defined HAVE_AVX2_SUPPORT
+# ifdef SHARED
ENTRY(__memset_chk)
.type __memset_chk, @gnu_indirect_function
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq __memset_chk_sse2(%rip), %rax
- testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq __memset_chk_sse2(%rip), %rax
+ HAS_ARCH_FEATURE (AVX2_Usable)
jz 2f
leaq __memset_chk_avx2(%rip), %rax
+#ifdef HAVE_AVX512_ASM_SUPPORT
+ HAS_ARCH_FEATURE (AVX512F_Usable)
+ jz 2f
+ HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
+ jz 2f
+ leaq __memset_chk_avx512_no_vzeroupper(%rip), %rax
+#endif
2: ret
END(__memset_chk)
diff --git a/sysdeps/x86_64/multiarch/rtld-memcmp.c b/sysdeps/x86_64/multiarch/rtld-memcmp.c
deleted file mode 100644
index 0f271356c2..0000000000
--- a/sysdeps/x86_64/multiarch/rtld-memcmp.c
+++ /dev/null
@@ -1 +0,0 @@
-#include "../rtld-memcmp.c"
diff --git a/sysdeps/x86_64/multiarch/rtld-memset.S b/sysdeps/x86_64/multiarch/rtld-memset.S
deleted file mode 100644
index 8092aa07da..0000000000
--- a/sysdeps/x86_64/multiarch/rtld-memset.S
+++ /dev/null
@@ -1 +0,0 @@
-#include "../rtld-memset.S"
diff --git a/sysdeps/x86_64/multiarch/sched_cpucount.c b/sysdeps/x86_64/multiarch/sched_cpucount.c
index 72ad7b01a8..b75aeb79b2 100644
--- a/sysdeps/x86_64/multiarch/sched_cpucount.c
+++ b/sysdeps/x86_64/multiarch/sched_cpucount.c
@@ -1,6 +1,6 @@
/* Count bits in CPU set. x86-64 multi-arch version.
This file is part of the GNU C Library.
- Copyright (C) 2008-2015 Free Software Foundation, Inc.
+ Copyright (C) 2008-2016 Free Software Foundation, Inc.
Contributed by Ulrich Drepper <drepper@redhat.com>.
The GNU C Library is free software; you can redistribute it and/or
@@ -33,4 +33,4 @@
#undef __sched_cpucount
libc_ifunc (__sched_cpucount,
- HAS_POPCOUNT ? popcount_cpucount : generic_cpucount);
+ HAS_CPU_FEATURE (POPCOUNT) ? popcount_cpucount : generic_cpucount);
diff --git a/sysdeps/x86_64/multiarch/strcasestr.c b/sysdeps/x86_64/multiarch/strcasestr.c
deleted file mode 100644
index 834e656a2c..0000000000
--- a/sysdeps/x86_64/multiarch/strcasestr.c
+++ /dev/null
@@ -1,13 +0,0 @@
-/* Multiple versions of strcasestr
- All versions must be listed in ifunc-impl-list.c. */
-
-#include "init-arch.h"
-
-#define STRCASESTR __strcasestr_sse2
-
-#include "string/strcasestr.c"
-
-extern __typeof (__strcasestr_sse2) __strcasestr_sse2 attribute_hidden;
-
-libc_ifunc (__strcasestr,
- __strcasestr_sse2);
diff --git a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
index 81f1b40ef6..3a694d45c2 100644
--- a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
@@ -1,5 +1,5 @@
/* strcat with SSE2
- Copyright (C) 2011-2015 Free Software Foundation, Inc.
+ Copyright (C) 2011-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/multiarch/strcat-ssse3.S b/sysdeps/x86_64/multiarch/strcat-ssse3.S
index d7b990725a..96184d0f0f 100644
--- a/sysdeps/x86_64/multiarch/strcat-ssse3.S
+++ b/sysdeps/x86_64/multiarch/strcat-ssse3.S
@@ -1,5 +1,5 @@
/* strcat with SSSE3
- Copyright (C) 2011-2015 Free Software Foundation, Inc.
+ Copyright (C) 2011-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/multiarch/strcat.S b/sysdeps/x86_64/multiarch/strcat.S
index 44993fade5..7bb38e68ad 100644
--- a/sysdeps/x86_64/multiarch/strcat.S
+++ b/sysdeps/x86_64/multiarch/strcat.S
@@ -1,6 +1,6 @@
/* Multiple versions of strcat
All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2009-2015 Free Software Foundation, Inc.
+ Copyright (C) 2009-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -47,14 +47,12 @@
.text
ENTRY(STRCAT)
.type STRCAT, @gnu_indirect_function
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq STRCAT_SSE2_UNALIGNED(%rip), %rax
- testl $bit_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_Fast_Unaligned_Load(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq STRCAT_SSE2_UNALIGNED(%rip), %rax
+ HAS_ARCH_FEATURE (Fast_Unaligned_Load)
jnz 2f
leaq STRCAT_SSE2(%rip), %rax
- testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ HAS_CPU_FEATURE (SSSE3)
jz 2f
leaq STRCAT_SSSE3(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S b/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S
index 0398650a01..979d112b28 100644
--- a/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S
+++ b/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S
@@ -1,5 +1,5 @@
/* strchr with SSE2 without bsf
- Copyright (C) 2011-2015 Free Software Foundation, Inc.
+ Copyright (C) 2011-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/multiarch/strchr.S b/sysdeps/x86_64/multiarch/strchr.S
index af55fac398..40683ad32b 100644
--- a/sysdeps/x86_64/multiarch/strchr.S
+++ b/sysdeps/x86_64/multiarch/strchr.S
@@ -1,5 +1,5 @@
/* Multiple versions of strchr
- Copyright (C) 2009-2015 Free Software Foundation, Inc.
+ Copyright (C) 2009-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -25,11 +25,9 @@
.text
ENTRY(strchr)
.type strchr, @gnu_indirect_function
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq __strchr_sse2(%rip), %rax
-2: testl $bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq __strchr_sse2(%rip), %rax
+2: HAS_ARCH_FEATURE (Slow_BSF)
jz 3f
leaq __strchr_sse2_no_bsf(%rip), %rax
3: ret
diff --git a/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S
index 20b65fa775..bf555b4066 100644
--- a/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S
@@ -1,5 +1,5 @@
/* strcmp with unaligned loads
- Copyright (C) 2013-2015 Free Software Foundation, Inc.
+ Copyright (C) 2013-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,6 +16,8 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
+#if IS_IN (libc)
+
#include "sysdep.h"
ENTRY ( __strcmp_sse2_unaligned)
@@ -207,3 +209,5 @@ L(different):
subl %ecx, %eax
ret
END (__strcmp_sse2_unaligned)
+
+#endif
diff --git a/sysdeps/x86_64/multiarch/strcmp-sse42.S b/sysdeps/x86_64/multiarch/strcmp-sse42.S
index 4dff0a564b..70df84ae32 100644
--- a/sysdeps/x86_64/multiarch/strcmp-sse42.S
+++ b/sysdeps/x86_64/multiarch/strcmp-sse42.S
@@ -1,5 +1,5 @@
/* strcmp with SSE4.2
- Copyright (C) 2009-2015 Free Software Foundation, Inc.
+ Copyright (C) 2009-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/multiarch/strcmp.S b/sysdeps/x86_64/multiarch/strcmp.S
index f50f26c393..0e4a113f61 100644
--- a/sysdeps/x86_64/multiarch/strcmp.S
+++ b/sysdeps/x86_64/multiarch/strcmp.S
@@ -1,5 +1,5 @@
/* Multiple versions of strcmp
- Copyright (C) 2009-2015 Free Software Foundation, Inc.
+ Copyright (C) 2009-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -84,24 +84,20 @@
.text
ENTRY(STRCMP)
.type STRCMP, @gnu_indirect_function
- /* Manually inlined call to __get_cpu_features. */
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
- jne 1f
- call __init_cpu_features
-1:
+ LOAD_RTLD_GLOBAL_RO_RDX
#ifdef USE_AS_STRCMP
leaq __strcmp_sse2_unaligned(%rip), %rax
- testl $bit_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_Fast_Unaligned_Load(%rip)
+ HAS_ARCH_FEATURE (Fast_Unaligned_Load)
jnz 3f
#else
- testl $bit_Slow_SSE4_2, __cpu_features+FEATURE_OFFSET+index_Slow_SSE4_2(%rip)
+ HAS_ARCH_FEATURE (Slow_SSE4_2)
jnz 2f
leaq STRCMP_SSE42(%rip), %rax
- testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
+ HAS_CPU_FEATURE (SSE4_2)
jnz 3f
#endif
2: leaq STRCMP_SSSE3(%rip), %rax
- testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ HAS_CPU_FEATURE (SSSE3)
jnz 3f
leaq STRCMP_SSE2(%rip), %rax
3: ret
@@ -110,23 +106,17 @@ END(STRCMP)
# ifdef USE_AS_STRCASECMP_L
ENTRY(__strcasecmp)
.type __strcasecmp, @gnu_indirect_function
- /* Manually inlined call to __get_cpu_features. */
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
- jne 1f
- call __init_cpu_features
-1:
-# ifdef HAVE_AVX_SUPPORT
+ LOAD_RTLD_GLOBAL_RO_RDX
leaq __strcasecmp_avx(%rip), %rax
- testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
+ HAS_ARCH_FEATURE (AVX_Usable)
jnz 3f
-# endif
- testl $bit_Slow_SSE4_2, __cpu_features+FEATURE_OFFSET+index_Slow_SSE4_2(%rip)
+ HAS_ARCH_FEATURE (Slow_SSE4_2)
jnz 2f
leaq __strcasecmp_sse42(%rip), %rax
- testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
+ HAS_CPU_FEATURE (SSE4_2)
jnz 3f
2: leaq __strcasecmp_ssse3(%rip), %rax
- testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ HAS_CPU_FEATURE (SSSE3)
jnz 3f
leaq __strcasecmp_sse2(%rip), %rax
3: ret
@@ -136,23 +126,17 @@ weak_alias (__strcasecmp, strcasecmp)
# ifdef USE_AS_STRNCASECMP_L
ENTRY(__strncasecmp)
.type __strncasecmp, @gnu_indirect_function
- /* Manually inlined call to __get_cpu_features. */
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
- jne 1f
- call __init_cpu_features
-1:
-# ifdef HAVE_AVX_SUPPORT
+ LOAD_RTLD_GLOBAL_RO_RDX
leaq __strncasecmp_avx(%rip), %rax
- testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
+ HAS_ARCH_FEATURE (AVX_Usable)
jnz 3f
-# endif
- testl $bit_Slow_SSE4_2, __cpu_features+FEATURE_OFFSET+index_Slow_SSE4_2(%rip)
+ HAS_ARCH_FEATURE (Slow_SSE4_2)
jnz 2f
leaq __strncasecmp_sse42(%rip), %rax
- testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
+ HAS_CPU_FEATURE (SSE4_2)
jnz 3f
2: leaq __strncasecmp_ssse3(%rip), %rax
- testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ HAS_CPU_FEATURE (SSSE3)
jnz 3f
leaq __strncasecmp_sse2(%rip), %rax
3: ret
@@ -167,16 +151,14 @@ weak_alias (__strncasecmp, strncasecmp)
# include "strcmp-sse42.S"
-# ifdef HAVE_AVX_SUPPORT
-# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# define LABEL(l) .L##l##_avx
-# define GLABEL(l) l##_avx
-# define USE_AVX 1
-# undef STRCMP_SSE42
-# define STRCMP_SSE42 STRCMP_AVX
-# define SECTION avx
-# include "strcmp-sse42.S"
-# endif
+# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+# define LABEL(l) .L##l##_avx
+# define GLABEL(l) l##_avx
+# define USE_AVX 1
+# undef STRCMP_SSE42
+# define STRCMP_SSE42 STRCMP_AVX
+# define SECTION avx
+# include "strcmp-sse42.S"
# endif
diff --git a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
index 8f03d1db24..caa74be2c2 100644
--- a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
@@ -1,5 +1,5 @@
/* strcpy with SSE2 and unaligned load
- Copyright (C) 2011-2015 Free Software Foundation, Inc.
+ Copyright (C) 2011-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/multiarch/strcpy-ssse3.S b/sysdeps/x86_64/multiarch/strcpy-ssse3.S
index 1f22c9a918..5bdb7671cf 100644
--- a/sysdeps/x86_64/multiarch/strcpy-ssse3.S
+++ b/sysdeps/x86_64/multiarch/strcpy-ssse3.S
@@ -1,5 +1,5 @@
/* strcpy with SSSE3
- Copyright (C) 2011-2015 Free Software Foundation, Inc.
+ Copyright (C) 2011-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/multiarch/strcpy.S b/sysdeps/x86_64/multiarch/strcpy.S
index 9464ee8b63..024f6ef899 100644
--- a/sysdeps/x86_64/multiarch/strcpy.S
+++ b/sysdeps/x86_64/multiarch/strcpy.S
@@ -1,6 +1,6 @@
/* Multiple versions of strcpy
All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2009-2015 Free Software Foundation, Inc.
+ Copyright (C) 2009-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -61,14 +61,12 @@
.text
ENTRY(STRCPY)
.type STRCPY, @gnu_indirect_function
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq STRCPY_SSE2_UNALIGNED(%rip), %rax
- testl $bit_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_Fast_Unaligned_Load(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq STRCPY_SSE2_UNALIGNED(%rip), %rax
+ HAS_ARCH_FEATURE (Fast_Unaligned_Load)
jnz 2f
leaq STRCPY_SSE2(%rip), %rax
- testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ HAS_CPU_FEATURE (SSSE3)
jz 2f
leaq STRCPY_SSSE3(%rip), %rax
2: ret
diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-c.c
index 60b2ed7a3f..91b804ddd6 100644
--- a/sysdeps/x86_64/multiarch/strcspn-c.c
+++ b/sysdeps/x86_64/multiarch/strcspn-c.c
@@ -1,5 +1,5 @@
/* strcspn with SSE4.2 intrinsics
- Copyright (C) 2009-2015 Free Software Foundation, Inc.
+ Copyright (C) 2009-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/multiarch/strcspn.S b/sysdeps/x86_64/multiarch/strcspn.S
index 95e882c443..8e7ff1c663 100644
--- a/sysdeps/x86_64/multiarch/strcspn.S
+++ b/sysdeps/x86_64/multiarch/strcspn.S
@@ -1,6 +1,6 @@
/* Multiple versions of strcspn
All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2009-2015 Free Software Foundation, Inc.
+ Copyright (C) 2009-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -19,9 +19,6 @@
<http://www.gnu.org/licenses/>. */
#include <config.h>
-
-#ifdef HAVE_SSE4_SUPPORT
-
#include <sysdep.h>
#include <init-arch.h>
@@ -45,11 +42,9 @@
.text
ENTRY(STRCSPN)
.type STRCSPN, @gnu_indirect_function
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq STRCSPN_SSE2(%rip), %rax
- testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq STRCSPN_SSE2(%rip), %rax
+ HAS_CPU_FEATURE (SSE4_2)
jz 2f
leaq STRCSPN_SSE42(%rip), %rax
2: ret
@@ -66,7 +61,6 @@ END(STRCSPN)
# define END(name) \
cfi_endproc; .size STRCSPN_SSE2, .-STRCSPN_SSE2
#endif
-#endif /* HAVE_SSE4_SUPPORT */
#ifdef USE_AS_STRPBRK
#include "../strpbrk.S"
diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-c.c
index 6b0c80aa43..9675f9360e 100644
--- a/sysdeps/x86_64/multiarch/strspn-c.c
+++ b/sysdeps/x86_64/multiarch/strspn-c.c
@@ -1,5 +1,5 @@
/* strspn with SSE4.2 intrinsics
- Copyright (C) 2009-2015 Free Software Foundation, Inc.
+ Copyright (C) 2009-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/multiarch/strspn.S b/sysdeps/x86_64/multiarch/strspn.S
index b734c1729a..4942826b24 100644
--- a/sysdeps/x86_64/multiarch/strspn.S
+++ b/sysdeps/x86_64/multiarch/strspn.S
@@ -1,6 +1,6 @@
/* Multiple versions of strspn
All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2009-2015 Free Software Foundation, Inc.
+ Copyright (C) 2009-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -19,9 +19,6 @@
<http://www.gnu.org/licenses/>. */
#include <config.h>
-
-#ifdef HAVE_SSE4_SUPPORT
-
#include <sysdep.h>
#include <init-arch.h>
@@ -30,11 +27,9 @@
.text
ENTRY(strspn)
.type strspn, @gnu_indirect_function
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq __strspn_sse2(%rip), %rax
- testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq __strspn_sse2(%rip), %rax
+ HAS_CPU_FEATURE (SSE4_2)
jz 2f
leaq __strspn_sse42(%rip), %rax
2: ret
@@ -52,6 +47,4 @@ END(strspn)
cfi_endproc; .size __strspn_sse2, .-__strspn_sse2
#endif
-#endif /* HAVE_SSE4_SUPPORT */
-
#include "../strspn.S"
diff --git a/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S
index 4f0e2ebdab..4ead1dfaf5 100644
--- a/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S
@@ -1,5 +1,5 @@
/* strstr with unaligned loads
- Copyright (C) 2009-2015 Free Software Foundation, Inc.
+ Copyright (C) 2009-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c
index 507994bd38..eecba2243e 100644
--- a/sysdeps/x86_64/multiarch/strstr.c
+++ b/sysdeps/x86_64/multiarch/strstr.c
@@ -1,6 +1,6 @@
/* Multiple versions of strstr.
All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2012-2015 Free Software Foundation, Inc.
+ Copyright (C) 2012-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -41,7 +41,10 @@ extern __typeof (__redirect_strstr) __strstr_sse2 attribute_hidden;
/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
ifunc symbol properly. */
extern __typeof (__redirect_strstr) __libc_strstr;
-libc_ifunc (__libc_strstr, HAS_FAST_UNALIGNED_LOAD ? __strstr_sse2_unaligned : __strstr_sse2)
+libc_ifunc (__libc_strstr,
+ HAS_ARCH_FEATURE (Fast_Unaligned_Load)
+ ? __strstr_sse2_unaligned
+ : __strstr_sse2)
#undef strstr
strong_alias (__libc_strstr, strstr)
diff --git a/sysdeps/x86_64/multiarch/test-multiarch.c b/sysdeps/x86_64/multiarch/test-multiarch.c
index 949d26e550..4eb0c16cd8 100644
--- a/sysdeps/x86_64/multiarch/test-multiarch.c
+++ b/sysdeps/x86_64/multiarch/test-multiarch.c
@@ -1,6 +1,6 @@
/* Test CPU feature data.
This file is part of the GNU C Library.
- Copyright (C) 2012-2015 Free Software Foundation, Inc.
+ Copyright (C) 2012-2016 Free Software Foundation, Inc.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -75,12 +75,18 @@ do_test (int argc, char **argv)
int fails;
get_cpuinfo ();
- fails = check_proc ("avx", HAS_AVX, "HAS_AVX");
- fails += check_proc ("fma4", HAS_FMA4, "HAS_FMA4");
- fails += check_proc ("sse4_2", HAS_SSE4_2, "HAS_SSE4_2");
- fails += check_proc ("sse4_1", HAS_SSE4_1, "HAS_SSE4_1");
- fails += check_proc ("ssse3", HAS_SSSE3, "HAS_SSSE3");
- fails += check_proc ("popcnt", HAS_POPCOUNT, "HAS_POPCOUNT");
+ fails = check_proc ("avx", HAS_ARCH_FEATURE (AVX_Usable),
+ "HAS_ARCH_FEATURE (AVX_Usable)");
+ fails += check_proc ("fma4", HAS_ARCH_FEATURE (FMA4_Usable),
+ "HAS_ARCH_FEATURE (FMA4_Usable)");
+ fails += check_proc ("sse4_2", HAS_CPU_FEATURE (SSE4_2),
+ "HAS_CPU_FEATURE (SSE4_2)");
+ fails += check_proc ("sse4_1", HAS_CPU_FEATURE (SSE4_1)
+ , "HAS_CPU_FEATURE (SSE4_1)");
+ fails += check_proc ("ssse3", HAS_CPU_FEATURE (SSSE3),
+ "HAS_CPU_FEATURE (SSSE3)");
+ fails += check_proc ("popcnt", HAS_CPU_FEATURE (POPCOUNT),
+ "HAS_CPU_FEATURE (POPCOUNT)");
printf ("%d differences between /proc/cpuinfo and glibc code.\n", fails);
diff --git a/sysdeps/x86_64/multiarch/varshift.c b/sysdeps/x86_64/multiarch/varshift.c
index 0007ef79e5..7921be5b57 100644
--- a/sysdeps/x86_64/multiarch/varshift.c
+++ b/sysdeps/x86_64/multiarch/varshift.c
@@ -1,5 +1,5 @@
/* Helper for variable shifts of SSE registers.
- Copyright (C) 2010-2015 Free Software Foundation, Inc.
+ Copyright (C) 2010-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/multiarch/varshift.h b/sysdeps/x86_64/multiarch/varshift.h
index 30ace3d914..7b27d0e9dd 100644
--- a/sysdeps/x86_64/multiarch/varshift.h
+++ b/sysdeps/x86_64/multiarch/varshift.h
@@ -1,5 +1,5 @@
/* Helper for variable shifts of SSE registers.
- Copyright (C) 2010-2015 Free Software Foundation, Inc.
+ Copyright (C) 2010-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/multiarch/wcscpy-ssse3.S b/sysdeps/x86_64/multiarch/wcscpy-ssse3.S
index 8097862574..341e57a5ca 100644
--- a/sysdeps/x86_64/multiarch/wcscpy-ssse3.S
+++ b/sysdeps/x86_64/multiarch/wcscpy-ssse3.S
@@ -1,5 +1,5 @@
/* wcscpy with SSSE3
- Copyright (C) 2011-2015 Free Software Foundation, Inc.
+ Copyright (C) 2011-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/multiarch/wcscpy.S b/sysdeps/x86_64/multiarch/wcscpy.S
index ff2f5a73d1..8e7270b9c7 100644
--- a/sysdeps/x86_64/multiarch/wcscpy.S
+++ b/sysdeps/x86_64/multiarch/wcscpy.S
@@ -1,6 +1,6 @@
/* Multiple versions of wcscpy
All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2011-2015 Free Software Foundation, Inc.
+ Copyright (C) 2011-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -27,11 +27,8 @@
.text
ENTRY(wcscpy)
.type wcscpy, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-
-1: testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ HAS_CPU_FEATURE (SSSE3)
jnz 2f
leaq __wcscpy_sse2(%rip), %rax
ret
diff --git a/sysdeps/x86_64/multiarch/wmemcmp.S b/sysdeps/x86_64/multiarch/wmemcmp.S
index 109e2457fe..b510f756e2 100644
--- a/sysdeps/x86_64/multiarch/wmemcmp.S
+++ b/sysdeps/x86_64/multiarch/wmemcmp.S
@@ -1,6 +1,6 @@
/* Multiple versions of wmemcmp
All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2011-2015 Free Software Foundation, Inc.
+ Copyright (C) 2011-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -26,16 +26,13 @@
.text
ENTRY(wmemcmp)
.type wmemcmp, @gnu_indirect_function
- cmpl $0, KIND_OFFSET+__cpu_features(%rip)
- jne 1f
- call __init_cpu_features
-
-1: testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ LOAD_RTLD_GLOBAL_RO_RDX
+ HAS_CPU_FEATURE (SSSE3)
jnz 2f
leaq __wmemcmp_sse2(%rip), %rax
ret
-2: testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+2: HAS_CPU_FEATURE (SSE4_1)
jz 3f
leaq __wmemcmp_sse4_1(%rip), %rax
ret
diff --git a/sysdeps/x86_64/nptl/Makefile b/sysdeps/x86_64/nptl/Makefile
index 14fb69a94d..9b64b533ee 100644
--- a/sysdeps/x86_64/nptl/Makefile
+++ b/sysdeps/x86_64/nptl/Makefile
@@ -1,4 +1,4 @@
-# Copyright (C) 2002-2015 Free Software Foundation, Inc.
+# Copyright (C) 2002-2016 Free Software Foundation, Inc.
# This file is part of the GNU C Library.
# The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/nptl/pthread_spin_lock.S b/sysdeps/x86_64/nptl/pthread_spin_lock.S
index d1a9b68028..b871241617 100644
--- a/sysdeps/x86_64/nptl/pthread_spin_lock.S
+++ b/sysdeps/x86_64/nptl/pthread_spin_lock.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,11 +16,9 @@
<http://www.gnu.org/licenses/>. */
#include <lowlevellock.h>
+#include <sysdep.h>
- .globl pthread_spin_lock
- .type pthread_spin_lock,@function
- .align 16
-pthread_spin_lock:
+ENTRY(pthread_spin_lock)
1: LOCK
decl 0(%rdi)
jne 2f
@@ -33,4 +31,4 @@ pthread_spin_lock:
cmpl $0, 0(%rdi)
jg 1b
jmp 2b
- .size pthread_spin_lock,.-pthread_spin_lock
+END(pthread_spin_lock)
diff --git a/sysdeps/x86_64/nptl/pthread_spin_trylock.S b/sysdeps/x86_64/nptl/pthread_spin_trylock.S
index 6b58929ef4..c9c53171fe 100644
--- a/sysdeps/x86_64/nptl/pthread_spin_trylock.S
+++ b/sysdeps/x86_64/nptl/pthread_spin_trylock.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@@ -17,6 +17,7 @@
<http://www.gnu.org/licenses/>. */
#include <pthread-errnos.h>
+#include <sysdep.h>
#ifdef UP
@@ -25,10 +26,7 @@
# define LOCK lock
#endif
- .globl pthread_spin_trylock
- .type pthread_spin_trylock,@function
- .align 16
-pthread_spin_trylock:
+ENTRY(pthread_spin_trylock)
movl $1, %eax
xorl %ecx, %ecx
LOCK
@@ -36,4 +34,4 @@ pthread_spin_trylock:
movl $EBUSY, %eax
cmovel %ecx, %eax
retq
- .size pthread_spin_trylock,.-pthread_spin_trylock
+END(pthread_spin_trylock)
diff --git a/sysdeps/x86_64/nptl/pthread_spin_unlock.S b/sysdeps/x86_64/nptl/pthread_spin_unlock.S
index 74d7dd6430..188de2e8cb 100644
--- a/sysdeps/x86_64/nptl/pthread_spin_unlock.S
+++ b/sysdeps/x86_64/nptl/pthread_spin_unlock.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@@ -16,14 +16,13 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
- .globl pthread_spin_unlock
- .type pthread_spin_unlock,@function
- .align 16
-pthread_spin_unlock:
+#include <sysdep.h>
+
+ENTRY(pthread_spin_unlock)
movl $1, (%rdi)
xorl %eax, %eax
retq
- .size pthread_spin_unlock,.-pthread_spin_unlock
+END(pthread_spin_unlock)
/* The implementation of pthread_spin_init is identical. */
.globl pthread_spin_init
diff --git a/sysdeps/x86_64/nptl/pthreaddef.h b/sysdeps/x86_64/nptl/pthreaddef.h
index 9c7130dae2..9397efc631 100644
--- a/sysdeps/x86_64/nptl/pthreaddef.h
+++ b/sysdeps/x86_64/nptl/pthreaddef.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
diff --git a/sysdeps/x86_64/nptl/tcb-offsets.sym b/sysdeps/x86_64/nptl/tcb-offsets.sym
index 729d1da38f..aeb752673a 100644
--- a/sysdeps/x86_64/nptl/tcb-offsets.sym
+++ b/sysdeps/x86_64/nptl/tcb-offsets.sym
@@ -16,7 +16,6 @@ VGETCPU_CACHE_OFFSET offsetof (tcbhead_t, vgetcpu_cache)
#ifndef __ASSUME_PRIVATE_FUTEX
PRIVATE_FUTEX offsetof (tcbhead_t, private_futex)
#endif
-RTLD_SAVESPACE_SSE offsetof (tcbhead_t, rtld_savespace_sse)
-- Not strictly offsets, but these values are also used in the TCB.
TCB_CANCELSTATE_BITMASK CANCELSTATE_BITMASK
diff --git a/sysdeps/x86_64/nptl/tls.h b/sysdeps/x86_64/nptl/tls.h
index d7543c651f..2b061a07c6 100644
--- a/sysdeps/x86_64/nptl/tls.h
+++ b/sysdeps/x86_64/nptl/tls.h
@@ -1,5 +1,5 @@
/* Definition for thread-local data handling. nptl/x86_64 version.
- Copyright (C) 2002-2015 Free Software Foundation, Inc.
+ Copyright (C) 2002-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -67,14 +67,15 @@ typedef struct
# else
int __glibc_reserved1;
# endif
- int rtld_must_xmm_save;
+ int __glibc_unused1;
/* Reservation of some values for the TM ABI. */
void *__private_tm[4];
/* GCC split stack support. */
void *__private_ss;
long int __glibc_reserved2;
- /* Have space for the post-AVX register size. */
- __128bits rtld_savespace_sse[8][4] __attribute__ ((aligned (32)));
+ /* Must be kept even if it is no longer used by glibc since programs,
+ like AddressSanitizer, depend on the size of tcbhead_t. */
+ __128bits __glibc_unused2[8][4] __attribute__ ((aligned (32)));
void *__padding[8];
} tcbhead_t;
@@ -384,41 +385,6 @@ typedef struct
# define THREAD_GSCOPE_WAIT() \
GL(dl_wait_lookup_done) ()
-
-# ifdef SHARED
-/* Defined in dl-trampoline.S. */
-extern void _dl_x86_64_save_sse (void);
-extern void _dl_x86_64_restore_sse (void);
-
-# define RTLD_CHECK_FOREIGN_CALL \
- (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save) != 0)
-
-/* NB: Don't use the xchg operation because that would imply a lock
- prefix which is expensive and unnecessary. The cache line is also
- not contested at all. */
-# define RTLD_ENABLE_FOREIGN_CALL \
- int old_rtld_must_xmm_save = THREAD_GETMEM (THREAD_SELF, \
- header.rtld_must_xmm_save); \
- THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, 1)
-
-# define RTLD_PREPARE_FOREIGN_CALL \
- do if (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save)) \
- { \
- _dl_x86_64_save_sse (); \
- THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, 0); \
- } \
- while (0)
-
-# define RTLD_FINALIZE_FOREIGN_CALL \
- do { \
- if (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save) == 0) \
- _dl_x86_64_restore_sse (); \
- THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, \
- old_rtld_must_xmm_save); \
- } while (0)
-# endif
-
-
#endif /* __ASSEMBLER__ */
#endif /* tls.h */
diff --git a/sysdeps/x86_64/rawmemchr.S b/sysdeps/x86_64/rawmemchr.S
index ec2cb9c76c..f90b7921a1 100644
--- a/sysdeps/x86_64/rawmemchr.S
+++ b/sysdeps/x86_64/rawmemchr.S
@@ -1,6 +1,6 @@
/* fast SSE2 memchr with 64 byte loop and pmaxub instruction using
- Copyright (C) 2011-2015 Free Software Foundation, Inc.
+ Copyright (C) 2011-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/rshift.S b/sysdeps/x86_64/rshift.S
index 4166e612d5..c88c6d82bb 100644
--- a/sysdeps/x86_64/rshift.S
+++ b/sysdeps/x86_64/rshift.S
@@ -1,5 +1,5 @@
/* x86-64 __mpn_rshift --
- Copyright (C) 2007-2015 Free Software Foundation, Inc.
+ Copyright (C) 2007-2016 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
diff --git a/sysdeps/x86_64/rtld-memcmp.c b/sysdeps/x86_64/rtld-memcmp.c
deleted file mode 100644
index 2ee40328b8..0000000000
--- a/sysdeps/x86_64/rtld-memcmp.c
+++ /dev/null
@@ -1 +0,0 @@
-#include <string/memcmp.c>
diff --git a/sysdeps/x86_64/rtld-strchr.S b/sysdeps/x86_64/rtld-strchr.S
deleted file mode 100644
index cc694d71b6..0000000000
--- a/sysdeps/x86_64/rtld-strchr.S
+++ /dev/null
@@ -1,288 +0,0 @@
-/* strchr (str, ch) -- Return pointer to first occurrence of CH in STR.
- For AMD x86-64.
- Copyright (C) 2002-2015 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
-
- .text
-ENTRY (strchr)
-
- /* Before we start with the main loop we process single bytes
- until the source pointer is aligned. This has two reasons:
- 1. aligned 64-bit memory access is faster
- and (more important)
- 2. we process in the main loop 64 bit in one step although
- we don't know the end of the string. But accessing at
- 8-byte alignment guarantees that we never access illegal
- memory if this would not also be done by the trivial
- implementation (this is because all processor inherent
- boundaries are multiples of 8). */
-
- movq %rdi, %rdx
- andl $7, %edx /* Mask alignment bits */
- movq %rdi, %rax /* duplicate destination. */
- jz 1f /* aligned => start loop */
- neg %edx
- addl $8, %edx /* Align to 8 bytes. */
-
- /* Search the first bytes directly. */
-0: movb (%rax), %cl /* load byte */
- cmpb %cl,%sil /* compare byte. */
- je 6f /* target found */
- testb %cl,%cl /* is byte NUL? */
- je 7f /* yes => return NULL */
- incq %rax /* increment pointer */
- decl %edx
- jnz 0b
-
-
-1:
- /* At the moment %rsi contains C. What we need for the
- algorithm is C in all bytes of the register. Avoid
- operations on 16 bit words because these require an
- prefix byte (and one more cycle). */
- /* Populate 8 bit data to full 64-bit. */
- movabs $0x0101010101010101,%r9
- movzbl %sil,%edx
- imul %rdx,%r9
-
- movq $0xfefefefefefefeff, %r8 /* Save magic. */
-
- /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
- change any of the hole bits of LONGWORD.
-
- 1) Is this safe? Will it catch all the zero bytes?
- Suppose there is a byte with all zeros. Any carry bits
- propagating from its left will fall into the hole at its
- least significant bit and stop. Since there will be no
- carry from its most significant bit, the LSB of the
- byte to the left will be unchanged, and the zero will be
- detected.
-
- 2) Is this worthwhile? Will it ignore everything except
- zero bytes? Suppose every byte of QUARDWORD has a bit set
- somewhere. There will be a carry into bit 8. If bit 8
- is set, this will carry into bit 16. If bit 8 is clear,
- one of bits 9-15 must be set, so there will be a carry
- into bit 16. Similarly, there will be a carry into bit
- 24 tec.. If one of bits 54-63 is set, there will be a carry
- into bit 64 (=carry flag), so all of the hole bits will
- be changed.
-
- 3) But wait! Aren't we looking for C, not zero?
- Good point. So what we do is XOR LONGWORD with a longword,
- each of whose bytes is C. This turns each byte that is C
- into a zero. */
-
- .p2align 4
-4:
- /* Main Loop is unrolled 4 times. */
- /* First unroll. */
- movq (%rax), %rcx /* get double word (= 8 bytes) in question */
- addq $8,%rax /* adjust pointer for next word */
- movq %r8, %rdx /* magic value */
- xorq %r9, %rcx /* XOR with qword c|...|c => bytes of str == c
- are now 0 */
- addq %rcx, %rdx /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc 3f /* highest byte is NUL => return pointer */
- xorq %rcx, %rdx /* (word+magic)^word */
- orq %r8, %rdx /* set all non-carry bits */
- incq %rdx /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
- jnz 3f /* found c => return pointer */
-
- /* The quadword we looked at does not contain the value we're looking
- for. Let's search now whether we have reached the end of the
- string. */
- xorq %r9, %rcx /* restore original dword without reload */
- movq %r8, %rdx /* magic value */
- addq %rcx, %rdx /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc 7f /* highest byte is NUL => return NULL */
- xorq %rcx, %rdx /* (word+magic)^word */
- orq %r8, %rdx /* set all non-carry bits */
- incq %rdx /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
- jnz 7f /* found NUL => return NULL */
-
- /* Second unroll. */
- movq (%rax), %rcx /* get double word (= 8 bytes) in question */
- addq $8,%rax /* adjust pointer for next word */
- movq %r8, %rdx /* magic value */
- xorq %r9, %rcx /* XOR with qword c|...|c => bytes of str == c
- are now 0 */
- addq %rcx, %rdx /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc 3f /* highest byte is NUL => return pointer */
- xorq %rcx, %rdx /* (word+magic)^word */
- orq %r8, %rdx /* set all non-carry bits */
- incq %rdx /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
- jnz 3f /* found c => return pointer */
-
- /* The quadword we looked at does not contain the value we're looking
- for. Let's search now whether we have reached the end of the
- string. */
- xorq %r9, %rcx /* restore original dword without reload */
- movq %r8, %rdx /* magic value */
- addq %rcx, %rdx /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc 7f /* highest byte is NUL => return NULL */
- xorq %rcx, %rdx /* (word+magic)^word */
- orq %r8, %rdx /* set all non-carry bits */
- incq %rdx /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
- jnz 7f /* found NUL => return NULL */
- /* Third unroll. */
- movq (%rax), %rcx /* get double word (= 8 bytes) in question */
- addq $8,%rax /* adjust pointer for next word */
- movq %r8, %rdx /* magic value */
- xorq %r9, %rcx /* XOR with qword c|...|c => bytes of str == c
- are now 0 */
- addq %rcx, %rdx /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc 3f /* highest byte is NUL => return pointer */
- xorq %rcx, %rdx /* (word+magic)^word */
- orq %r8, %rdx /* set all non-carry bits */
- incq %rdx /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
- jnz 3f /* found c => return pointer */
-
- /* The quadword we looked at does not contain the value we're looking
- for. Let's search now whether we have reached the end of the
- string. */
- xorq %r9, %rcx /* restore original dword without reload */
- movq %r8, %rdx /* magic value */
- addq %rcx, %rdx /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc 7f /* highest byte is NUL => return NULL */
- xorq %rcx, %rdx /* (word+magic)^word */
- orq %r8, %rdx /* set all non-carry bits */
- incq %rdx /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
- jnz 7f /* found NUL => return NULL */
- /* Fourth unroll. */
- movq (%rax), %rcx /* get double word (= 8 bytes) in question */
- addq $8,%rax /* adjust pointer for next word */
- movq %r8, %rdx /* magic value */
- xorq %r9, %rcx /* XOR with qword c|...|c => bytes of str == c
- are now 0 */
- addq %rcx, %rdx /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc 3f /* highest byte is NUL => return pointer */
- xorq %rcx, %rdx /* (word+magic)^word */
- orq %r8, %rdx /* set all non-carry bits */
- incq %rdx /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
- jnz 3f /* found c => return pointer */
-
- /* The quadword we looked at does not contain the value we're looking
- for. Let's search now whether we have reached the end of the
- string. */
- xorq %r9, %rcx /* restore original dword without reload */
- movq %r8, %rdx /* magic value */
- addq %rcx, %rdx /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc 7f /* highest byte is NUL => return NULL */
- xorq %rcx, %rdx /* (word+magic)^word */
- orq %r8, %rdx /* set all non-carry bits */
- incq %rdx /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
- jz 4b /* no NUL found => restart loop */
-
-
-7: /* Return NULL. */
- xorl %eax, %eax
- retq
-
-
- /* We now scan for the byte in which the character was matched.
- But we have to take care of the case that a NUL char is
- found before this in the dword. Note that we XORed %rcx
- with the byte we're looking for, therefore the tests below look
- reversed. */
-
-
- .p2align 4 /* Align, it's a jump target. */
-3: movq %r9,%rdx /* move to %rdx so that we can access bytes */
- subq $8,%rax /* correct pointer increment. */
- testb %cl, %cl /* is first byte C? */
- jz 6f /* yes => return pointer */
- cmpb %dl, %cl /* is first byte NUL? */
- je 7b /* yes => return NULL */
- incq %rax /* increment pointer */
-
- testb %ch, %ch /* is second byte C? */
- jz 6f /* yes => return pointer */
- cmpb %dl, %ch /* is second byte NUL? */
- je 7b /* yes => return NULL? */
- incq %rax /* increment pointer */
-
- shrq $16, %rcx /* make upper bytes accessible */
- testb %cl, %cl /* is third byte C? */
- jz 6f /* yes => return pointer */
- cmpb %dl, %cl /* is third byte NUL? */
- je 7b /* yes => return NULL */
- incq %rax /* increment pointer */
-
- testb %ch, %ch /* is fourth byte C? */
- jz 6f /* yes => return pointer */
- cmpb %dl, %ch /* is fourth byte NUL? */
- je 7b /* yes => return NULL? */
- incq %rax /* increment pointer */
-
- shrq $16, %rcx /* make upper bytes accessible */
- testb %cl, %cl /* is fifth byte C? */
- jz 6f /* yes => return pointer */
- cmpb %dl, %cl /* is fifth byte NUL? */
- je 7b /* yes => return NULL */
- incq %rax /* increment pointer */
-
- testb %ch, %ch /* is sixth byte C? */
- jz 6f /* yes => return pointer */
- cmpb %dl, %ch /* is sixth byte NUL? */
- je 7b /* yes => return NULL? */
- incq %rax /* increment pointer */
-
- shrq $16, %rcx /* make upper bytes accessible */
- testb %cl, %cl /* is seventh byte C? */
- jz 6f /* yes => return pointer */
- cmpb %dl, %cl /* is seventh byte NUL? */
- je 7b /* yes => return NULL */
-
- /* It must be in the eigth byte and it cannot be NUL. */
- incq %rax
-
-6:
- nop
- retq
-END (strchr)
-
-weak_alias (strchr, index)
-libc_hidden_builtin_def (strchr)
diff --git a/sysdeps/x86_64/rtld-strlen.S b/sysdeps/x86_64/rtld-strlen.S
deleted file mode 100644
index 1328652154..0000000000
--- a/sysdeps/x86_64/rtld-strlen.S
+++ /dev/null
@@ -1,136 +0,0 @@
-/* strlen(str) -- determine the length of the string STR.
- Copyright (C) 2002-2015 Free Software Foundation, Inc.
- Based on i486 version contributed by Ulrich Drepper <drepper@redhat.com>.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
-
- .text
-ENTRY (strlen)
- movq %rdi, %rcx /* Duplicate source pointer. */
- andl $7, %ecx /* mask alignment bits */
- movq %rdi, %rax /* duplicate destination. */
- jz 1f /* aligned => start loop */
-
- neg %ecx /* We need to align to 8 bytes. */
- addl $8,%ecx
- /* Search the first bytes directly. */
-0: cmpb $0x0,(%rax) /* is byte NUL? */
- je 2f /* yes => return */
- incq %rax /* increment pointer */
- decl %ecx
- jnz 0b
-
-1: movq $0xfefefefefefefeff,%r8 /* Save magic. */
-
- .p2align 4 /* Align loop. */
-4: /* Main Loop is unrolled 4 times. */
- /* First unroll. */
- movq (%rax), %rcx /* get double word (= 8 bytes) in question */
- addq $8,%rax /* adjust pointer for next word */
- movq %r8, %rdx /* magic value */
- addq %rcx, %rdx /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc 3f /* highest byte is NUL => return pointer */
- xorq %rcx, %rdx /* (word+magic)^word */
- orq %r8, %rdx /* set all non-carry bits */
- incq %rdx /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
- jnz 3f /* found NUL => return pointer */
-
- /* Second unroll. */
- movq (%rax), %rcx /* get double word (= 8 bytes) in question */
- addq $8,%rax /* adjust pointer for next word */
- movq %r8, %rdx /* magic value */
- addq %rcx, %rdx /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc 3f /* highest byte is NUL => return pointer */
- xorq %rcx, %rdx /* (word+magic)^word */
- orq %r8, %rdx /* set all non-carry bits */
- incq %rdx /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
- jnz 3f /* found NUL => return pointer */
-
- /* Third unroll. */
- movq (%rax), %rcx /* get double word (= 8 bytes) in question */
- addq $8,%rax /* adjust pointer for next word */
- movq %r8, %rdx /* magic value */
- addq %rcx, %rdx /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc 3f /* highest byte is NUL => return pointer */
- xorq %rcx, %rdx /* (word+magic)^word */
- orq %r8, %rdx /* set all non-carry bits */
- incq %rdx /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
- jnz 3f /* found NUL => return pointer */
-
- /* Fourth unroll. */
- movq (%rax), %rcx /* get double word (= 8 bytes) in question */
- addq $8,%rax /* adjust pointer for next word */
- movq %r8, %rdx /* magic value */
- addq %rcx, %rdx /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc 3f /* highest byte is NUL => return pointer */
- xorq %rcx, %rdx /* (word+magic)^word */
- orq %r8, %rdx /* set all non-carry bits */
- incq %rdx /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
- jz 4b /* no NUL found => continue loop */
-
- .p2align 4 /* Align, it's a jump target. */
-3: subq $8,%rax /* correct pointer increment. */
-
- testb %cl, %cl /* is first byte NUL? */
- jz 2f /* yes => return */
- incq %rax /* increment pointer */
-
- testb %ch, %ch /* is second byte NUL? */
- jz 2f /* yes => return */
- incq %rax /* increment pointer */
-
- testl $0x00ff0000, %ecx /* is third byte NUL? */
- jz 2f /* yes => return pointer */
- incq %rax /* increment pointer */
-
- testl $0xff000000, %ecx /* is fourth byte NUL? */
- jz 2f /* yes => return pointer */
- incq %rax /* increment pointer */
-
- shrq $32, %rcx /* look at other half. */
-
- testb %cl, %cl /* is first byte NUL? */
- jz 2f /* yes => return */
- incq %rax /* increment pointer */
-
- testb %ch, %ch /* is second byte NUL? */
- jz 2f /* yes => return */
- incq %rax /* increment pointer */
-
- testl $0xff0000, %ecx /* is third byte NUL? */
- jz 2f /* yes => return pointer */
- incq %rax /* increment pointer */
-2:
- subq %rdi, %rax /* compute difference to string start */
- ret
-END (strlen)
-libc_hidden_builtin_def (strlen)
diff --git a/sysdeps/x86_64/sched_cpucount.c b/sysdeps/x86_64/sched_cpucount.c
index 72e67aa999..0834e711b3 100644
--- a/sysdeps/x86_64/sched_cpucount.c
+++ b/sysdeps/x86_64/sched_cpucount.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2007-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/setjmp.S b/sysdeps/x86_64/setjmp.S
index 774aaf1e8d..3e93967c2f 100644
--- a/sysdeps/x86_64/setjmp.S
+++ b/sysdeps/x86_64/setjmp.S
@@ -1,5 +1,5 @@
/* setjmp for x86-64.
- Copyright (C) 2001-2015 Free Software Foundation, Inc.
+ Copyright (C) 2001-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/stackinfo.h b/sysdeps/x86_64/stackinfo.h
index 5b81d808d0..848aa7754c 100644
--- a/sysdeps/x86_64/stackinfo.h
+++ b/sysdeps/x86_64/stackinfo.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2001-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2001-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/start.S b/sysdeps/x86_64/start.S
index 0d27a38e9c..1374974307 100644
--- a/sysdeps/x86_64/start.S
+++ b/sysdeps/x86_64/start.S
@@ -1,5 +1,5 @@
/* Startup code compliant to the ELF x86-64 ABI.
- Copyright (C) 2001-2015 Free Software Foundation, Inc.
+ Copyright (C) 2001-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 2001.
diff --git a/sysdeps/x86_64/stpcpy_chk.S b/sysdeps/x86_64/stpcpy_chk.S
deleted file mode 100644
index 905e8d7ee3..0000000000
--- a/sysdeps/x86_64/stpcpy_chk.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_STPCPY_CHK
-#define STRCPY_CHK __stpcpy_chk
-#include <sysdeps/x86_64/strcpy_chk.S>
diff --git a/sysdeps/x86_64/strcat.S b/sysdeps/x86_64/strcat.S
index affb15a32e..dadf4c76b2 100644
--- a/sysdeps/x86_64/strcat.S
+++ b/sysdeps/x86_64/strcat.S
@@ -1,6 +1,6 @@
/* strcat(dest, src) -- Append SRC on the end of DEST.
Optimized for x86-64.
- Copyright (C) 2002-2015 Free Software Foundation, Inc.
+ Copyright (C) 2002-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 2002.
diff --git a/sysdeps/x86_64/strchr.S b/sysdeps/x86_64/strchr.S
index d7955af8d2..4431fee648 100644
--- a/sysdeps/x86_64/strchr.S
+++ b/sysdeps/x86_64/strchr.S
@@ -1,6 +1,6 @@
/* strchr (str, ch) -- Return pointer to first occurrence of CH in STR.
For AMD x86-64.
- Copyright (C) 2009-2015 Free Software Foundation, Inc.
+ Copyright (C) 2009-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/strchrnul.S b/sysdeps/x86_64/strchrnul.S
index e8cab0dd16..7b52d699ee 100644
--- a/sysdeps/x86_64/strchrnul.S
+++ b/sysdeps/x86_64/strchrnul.S
@@ -1,7 +1,7 @@
/* strchrnul (str, ch) -- Return pointer to first occurrence of CH in STR
or terminating NUL byte.
For AMD x86-64.
- Copyright (C) 2009-2015 Free Software Foundation, Inc.
+ Copyright (C) 2009-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/strcmp.S b/sysdeps/x86_64/strcmp.S
index 1329649d3a..c5c44d4e27 100644
--- a/sysdeps/x86_64/strcmp.S
+++ b/sysdeps/x86_64/strcmp.S
@@ -1,5 +1,5 @@
/* Highly optimized version for x86-64.
- Copyright (C) 1999-2015 Free Software Foundation, Inc.
+ Copyright (C) 1999-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Based on i686 version contributed by Ulrich Drepper
<drepper@cygnus.com>, 1999.
@@ -29,13 +29,6 @@
#endif
#ifdef USE_AS_STRNCMP
-/* The simplified code below is not set up to handle strncmp() so far.
- Should this become necessary it has to be implemented. For now
- just report the problem. */
-# if !IS_IN (libc)
-# error "strncmp not implemented so far"
-# endif
-
/* Since the counter, %r11, is unsigned, we branch to strcmp_exitz
if the new counter > the old one or is 0. */
# define UPDATE_STRNCMP_COUNTER \
@@ -50,20 +43,10 @@
#elif defined USE_AS_STRCASECMP_L
# include "locale-defines.h"
-/* No support for strcasecmp outside libc so far since it is not needed. */
-# if !IS_IN (libc)
-# error "strcasecmp_l not implemented so far"
-# endif
-
# define UPDATE_STRNCMP_COUNTER
#elif defined USE_AS_STRNCASECMP_L
# include "locale-defines.h"
-/* No support for strncasecmp outside libc so far since it is not needed. */
-# if !IS_IN (libc)
-# error "strncasecmp_l not implemented so far"
-# endif
-
# define UPDATE_STRNCMP_COUNTER \
/* calculate left number to compare */ \
lea -16(%rcx, %r11), %r9; \
@@ -126,63 +109,44 @@ libc_hidden_def (__strncasecmp)
#endif
ENTRY (STRCMP)
-#if !IS_IN (libc)
-/* Simple version since we can't use SSE registers in ld.so. */
-L(oop): movb (%rdi), %al
- cmpb (%rsi), %al
- jne L(neq)
- incq %rdi
- incq %rsi
- testb %al, %al
- jnz L(oop)
-
- xorl %eax, %eax
- ret
-
-L(neq): movl $1, %eax
- movl $-1, %ecx
- cmovbl %ecx, %eax
- ret
-END (STRCMP)
-#else /* !IS_IN (libc) */
-# ifdef USE_AS_STRCASECMP_L
+#ifdef USE_AS_STRCASECMP_L
/* We have to fall back on the C implementation for locales
with encodings not matching ASCII for single bytes. */
-# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
+# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
mov LOCALE_T___LOCALES+LC_CTYPE*LP_SIZE(%rdx), %RAX_LP
-# else
+# else
mov (%rdx), %RAX_LP
-# endif
+# endif
testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
jne __strcasecmp_l_nonascii
-# elif defined USE_AS_STRNCASECMP_L
+#elif defined USE_AS_STRNCASECMP_L
/* We have to fall back on the C implementation for locales
with encodings not matching ASCII for single bytes. */
-# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
+# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
mov LOCALE_T___LOCALES+LC_CTYPE*LP_SIZE(%rcx), %RAX_LP
-# else
+# else
mov (%rcx), %RAX_LP
-# endif
+# endif
testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
jne __strncasecmp_l_nonascii
-# endif
+#endif
/*
* This implementation uses SSE to compare up to 16 bytes at a time.
*/
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
test %rdx, %rdx
je LABEL(strcmp_exitz)
cmp $1, %rdx
je LABEL(Byte0)
mov %rdx, %r11
-# endif
+#endif
mov %esi, %ecx
mov %edi, %eax
/* Use 64bit AND here to avoid long NOP padding. */
and $0x3f, %rcx /* rsi alignment in cache line */
and $0x3f, %rax /* rdi alignment in cache line */
-# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
.section .rodata.cst16,"aM",@progbits,16
.align 16
.Lbelowupper:
@@ -196,12 +160,12 @@ END (STRCMP)
.quad 0x2020202020202020
.previous
movdqa .Lbelowupper(%rip), %xmm5
-# define UCLOW_reg %xmm5
+# define UCLOW_reg %xmm5
movdqa .Ltopupper(%rip), %xmm6
-# define UCHIGH_reg %xmm6
+# define UCHIGH_reg %xmm6
movdqa .Ltouppermask(%rip), %xmm7
-# define LCQWORD_reg %xmm7
-# endif
+# define LCQWORD_reg %xmm7
+#endif
cmp $0x30, %ecx
ja LABEL(crosscache) /* rsi: 16-byte load will cross cache line */
cmp $0x30, %eax
@@ -210,8 +174,8 @@ END (STRCMP)
movlpd (%rsi), %xmm2
movhpd 8(%rdi), %xmm1
movhpd 8(%rsi), %xmm2
-# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# define TOLOWER(reg1, reg2) \
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+# define TOLOWER(reg1, reg2) \
movdqa reg1, %xmm8; \
movdqa UCHIGH_reg, %xmm9; \
movdqa reg2, %xmm10; \
@@ -227,9 +191,9 @@ END (STRCMP)
por %xmm8, reg1; \
por %xmm10, reg2
TOLOWER (%xmm1, %xmm2)
-# else
-# define TOLOWER(reg1, reg2)
-# endif
+#else
+# define TOLOWER(reg1, reg2)
+#endif
pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
pcmpeqb %xmm1, %xmm0 /* Any null chars? */
pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */
@@ -237,10 +201,10 @@ END (STRCMP)
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */
jnz LABEL(less16bytes) /* If not, find different value or null char */
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz) /* finish comparision */
-# endif
+#endif
add $16, %rsi /* prepare to search next 16 bytes */
add $16, %rdi /* prepare to search next 16 bytes */
@@ -282,13 +246,13 @@ LABEL(ashr_0):
movdqa (%rsi), %xmm1
pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */
pcmpeqb %xmm1, %xmm0 /* Any null chars? */
-# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
+#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */
-# else
+#else
movdqa (%rdi), %xmm2
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm2, %xmm1 /* compare 16 bytes for equality */
-# endif
+#endif
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %r9d
shr %cl, %edx /* adjust 0xffff for offset */
@@ -321,10 +285,10 @@ LABEL(loop_ashr_0):
sub $0xffff, %edx
jnz LABEL(exit) /* mismatch or null char seen */
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
@@ -336,10 +300,10 @@ LABEL(loop_ashr_0):
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
jmp LABEL(loop_ashr_0)
@@ -388,13 +352,13 @@ LABEL(gobble_ashr_1):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4 /* store for next cycle */
-# ifndef USE_SSSE3
+#ifndef USE_SSSE3
psrldq $1, %xmm3
pslldq $15, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
-# else
+#else
palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
-# endif
+#endif
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
@@ -404,10 +368,10 @@ LABEL(gobble_ashr_1):
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa %xmm4, %xmm3
@@ -418,13 +382,13 @@ LABEL(gobble_ashr_1):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4 /* store for next cycle */
-# ifndef USE_SSSE3
+#ifndef USE_SSSE3
psrldq $1, %xmm3
pslldq $15, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
-# else
+#else
palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
-# endif
+#endif
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
@@ -434,10 +398,10 @@ LABEL(gobble_ashr_1):
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa %xmm4, %xmm3
jmp LABEL(loop_ashr_1)
@@ -453,10 +417,10 @@ LABEL(nibble_ashr_1):
test $0xfffe, %edx
jnz LABEL(ashr_1_exittail) /* find null char*/
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $15, %r11
jbe LABEL(ashr_1_exittail)
-# endif
+#endif
pxor %xmm0, %xmm0
sub $0x1000, %r10 /* substract 4K from %r10 */
@@ -518,13 +482,13 @@ LABEL(gobble_ashr_2):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
-# ifndef USE_SSSE3
+#ifndef USE_SSSE3
psrldq $2, %xmm3
pslldq $14, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
-# else
+#else
palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
-# endif
+#endif
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
@@ -534,10 +498,10 @@ LABEL(gobble_ashr_2):
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa %xmm4, %xmm3
@@ -549,13 +513,13 @@ LABEL(gobble_ashr_2):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
-# ifndef USE_SSSE3
+#ifndef USE_SSSE3
psrldq $2, %xmm3
pslldq $14, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
-# else
+#else
palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
-# endif
+#endif
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
@@ -565,10 +529,10 @@ LABEL(gobble_ashr_2):
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa %xmm4, %xmm3
@@ -581,10 +545,10 @@ LABEL(nibble_ashr_2):
test $0xfffc, %edx
jnz LABEL(ashr_2_exittail)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $14, %r11
jbe LABEL(ashr_2_exittail)
-# endif
+#endif
pxor %xmm0, %xmm0
sub $0x1000, %r10
@@ -643,13 +607,13 @@ LABEL(gobble_ashr_3):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
-# ifndef USE_SSSE3
+#ifndef USE_SSSE3
psrldq $3, %xmm3
pslldq $13, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
-# else
+#else
palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
-# endif
+#endif
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
@@ -659,10 +623,10 @@ LABEL(gobble_ashr_3):
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa %xmm4, %xmm3
@@ -674,13 +638,13 @@ LABEL(gobble_ashr_3):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
-# ifndef USE_SSSE3
+#ifndef USE_SSSE3
psrldq $3, %xmm3
pslldq $13, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
-# else
+#else
palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
-# endif
+#endif
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
@@ -690,10 +654,10 @@ LABEL(gobble_ashr_3):
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa %xmm4, %xmm3
@@ -706,10 +670,10 @@ LABEL(nibble_ashr_3):
test $0xfff8, %edx
jnz LABEL(ashr_3_exittail)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $13, %r11
jbe LABEL(ashr_3_exittail)
-# endif
+#endif
pxor %xmm0, %xmm0
sub $0x1000, %r10
@@ -768,13 +732,13 @@ LABEL(gobble_ashr_4):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
-# ifndef USE_SSSE3
+#ifndef USE_SSSE3
psrldq $4, %xmm3
pslldq $12, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
-# else
+#else
palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
-# endif
+#endif
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
@@ -784,10 +748,10 @@ LABEL(gobble_ashr_4):
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa %xmm4, %xmm3
@@ -799,13 +763,13 @@ LABEL(gobble_ashr_4):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
-# ifndef USE_SSSE3
+#ifndef USE_SSSE3
psrldq $4, %xmm3
pslldq $12, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
-# else
+#else
palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
-# endif
+#endif
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
@@ -815,10 +779,10 @@ LABEL(gobble_ashr_4):
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa %xmm4, %xmm3
@@ -831,10 +795,10 @@ LABEL(nibble_ashr_4):
test $0xfff0, %edx
jnz LABEL(ashr_4_exittail)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $12, %r11
jbe LABEL(ashr_4_exittail)
-# endif
+#endif
pxor %xmm0, %xmm0
sub $0x1000, %r10
@@ -893,13 +857,13 @@ LABEL(gobble_ashr_5):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
-# ifndef USE_SSSE3
+#ifndef USE_SSSE3
psrldq $5, %xmm3
pslldq $11, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
-# else
+#else
palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
-# endif
+#endif
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
@@ -909,10 +873,10 @@ LABEL(gobble_ashr_5):
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa %xmm4, %xmm3
@@ -924,13 +888,13 @@ LABEL(gobble_ashr_5):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
-# ifndef USE_SSSE3
+#ifndef USE_SSSE3
psrldq $5, %xmm3
pslldq $11, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
-# else
+#else
palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
-# endif
+#endif
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
@@ -940,10 +904,10 @@ LABEL(gobble_ashr_5):
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa %xmm4, %xmm3
@@ -956,10 +920,10 @@ LABEL(nibble_ashr_5):
test $0xffe0, %edx
jnz LABEL(ashr_5_exittail)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $11, %r11
jbe LABEL(ashr_5_exittail)
-# endif
+#endif
pxor %xmm0, %xmm0
sub $0x1000, %r10
@@ -1018,13 +982,13 @@ LABEL(gobble_ashr_6):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
-# ifndef USE_SSSE3
+#ifndef USE_SSSE3
psrldq $6, %xmm3
pslldq $10, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
-# else
+#else
palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
-# endif
+#endif
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
@@ -1034,10 +998,10 @@ LABEL(gobble_ashr_6):
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa %xmm4, %xmm3
@@ -1049,13 +1013,13 @@ LABEL(gobble_ashr_6):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
-# ifndef USE_SSSE3
+#ifndef USE_SSSE3
psrldq $6, %xmm3
pslldq $10, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
-# else
+#else
palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
-# endif
+#endif
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
@@ -1065,10 +1029,10 @@ LABEL(gobble_ashr_6):
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa %xmm4, %xmm3
@@ -1081,10 +1045,10 @@ LABEL(nibble_ashr_6):
test $0xffc0, %edx
jnz LABEL(ashr_6_exittail)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $10, %r11
jbe LABEL(ashr_6_exittail)
-# endif
+#endif
pxor %xmm0, %xmm0
sub $0x1000, %r10
@@ -1143,13 +1107,13 @@ LABEL(gobble_ashr_7):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
-# ifndef USE_SSSE3
+#ifndef USE_SSSE3
psrldq $7, %xmm3
pslldq $9, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
-# else
+#else
palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
-# endif
+#endif
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
@@ -1159,10 +1123,10 @@ LABEL(gobble_ashr_7):
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa %xmm4, %xmm3
@@ -1174,13 +1138,13 @@ LABEL(gobble_ashr_7):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
-# ifndef USE_SSSE3
+#ifndef USE_SSSE3
psrldq $7, %xmm3
pslldq $9, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
-# else
+#else
palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
-# endif
+#endif
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
@@ -1190,10 +1154,10 @@ LABEL(gobble_ashr_7):
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa %xmm4, %xmm3
@@ -1206,10 +1170,10 @@ LABEL(nibble_ashr_7):
test $0xff80, %edx
jnz LABEL(ashr_7_exittail)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $9, %r11
jbe LABEL(ashr_7_exittail)
-# endif
+#endif
pxor %xmm0, %xmm0
sub $0x1000, %r10
@@ -1268,13 +1232,13 @@ LABEL(gobble_ashr_8):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
-# ifndef USE_SSSE3
+#ifndef USE_SSSE3
psrldq $8, %xmm3
pslldq $8, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
-# else
+#else
palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
-# endif
+#endif
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
@@ -1284,10 +1248,10 @@ LABEL(gobble_ashr_8):
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa %xmm4, %xmm3
@@ -1299,13 +1263,13 @@ LABEL(gobble_ashr_8):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
-# ifndef USE_SSSE3
+#ifndef USE_SSSE3
psrldq $8, %xmm3
pslldq $8, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
-# else
+#else
palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
-# endif
+#endif
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
@@ -1315,10 +1279,10 @@ LABEL(gobble_ashr_8):
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa %xmm4, %xmm3
@@ -1331,10 +1295,10 @@ LABEL(nibble_ashr_8):
test $0xff00, %edx
jnz LABEL(ashr_8_exittail)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $8, %r11
jbe LABEL(ashr_8_exittail)
-# endif
+#endif
pxor %xmm0, %xmm0
sub $0x1000, %r10
@@ -1393,13 +1357,13 @@ LABEL(gobble_ashr_9):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
-# ifndef USE_SSSE3
+#ifndef USE_SSSE3
psrldq $9, %xmm3
pslldq $7, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
-# else
+#else
palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
-# endif
+#endif
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
@@ -1409,10 +1373,10 @@ LABEL(gobble_ashr_9):
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa %xmm4, %xmm3
@@ -1424,13 +1388,13 @@ LABEL(gobble_ashr_9):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
-# ifndef USE_SSSE3
+#ifndef USE_SSSE3
psrldq $9, %xmm3
pslldq $7, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
-# else
+#else
palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
-# endif
+#endif
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
@@ -1440,10 +1404,10 @@ LABEL(gobble_ashr_9):
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa %xmm4, %xmm3 /* store for next cycle */
@@ -1456,10 +1420,10 @@ LABEL(nibble_ashr_9):
test $0xfe00, %edx
jnz LABEL(ashr_9_exittail)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $7, %r11
jbe LABEL(ashr_9_exittail)
-# endif
+#endif
pxor %xmm0, %xmm0
sub $0x1000, %r10
@@ -1518,13 +1482,13 @@ LABEL(gobble_ashr_10):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
-# ifndef USE_SSSE3
+#ifndef USE_SSSE3
psrldq $10, %xmm3
pslldq $6, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
-# else
+#else
palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
-# endif
+#endif
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
@@ -1534,10 +1498,10 @@ LABEL(gobble_ashr_10):
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa %xmm4, %xmm3
@@ -1549,13 +1513,13 @@ LABEL(gobble_ashr_10):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
-# ifndef USE_SSSE3
+#ifndef USE_SSSE3
psrldq $10, %xmm3
pslldq $6, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
-# else
+#else
palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
-# endif
+#endif
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
@@ -1565,10 +1529,10 @@ LABEL(gobble_ashr_10):
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa %xmm4, %xmm3
@@ -1581,10 +1545,10 @@ LABEL(nibble_ashr_10):
test $0xfc00, %edx
jnz LABEL(ashr_10_exittail)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $6, %r11
jbe LABEL(ashr_10_exittail)
-# endif
+#endif
pxor %xmm0, %xmm0
sub $0x1000, %r10
@@ -1643,13 +1607,13 @@ LABEL(gobble_ashr_11):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
-# ifndef USE_SSSE3
+#ifndef USE_SSSE3
psrldq $11, %xmm3
pslldq $5, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
-# else
+#else
palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
-# endif
+#endif
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
@@ -1659,10 +1623,10 @@ LABEL(gobble_ashr_11):
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa %xmm4, %xmm3
@@ -1674,13 +1638,13 @@ LABEL(gobble_ashr_11):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
-# ifndef USE_SSSE3
+#ifndef USE_SSSE3
psrldq $11, %xmm3
pslldq $5, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
-# else
+#else
palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
-# endif
+#endif
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
@@ -1690,10 +1654,10 @@ LABEL(gobble_ashr_11):
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa %xmm4, %xmm3
@@ -1706,10 +1670,10 @@ LABEL(nibble_ashr_11):
test $0xf800, %edx
jnz LABEL(ashr_11_exittail)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $5, %r11
jbe LABEL(ashr_11_exittail)
-# endif
+#endif
pxor %xmm0, %xmm0
sub $0x1000, %r10
@@ -1768,13 +1732,13 @@ LABEL(gobble_ashr_12):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
-# ifndef USE_SSSE3
+#ifndef USE_SSSE3
psrldq $12, %xmm3
pslldq $4, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
-# else
+#else
palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
-# endif
+#endif
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
@@ -1784,10 +1748,10 @@ LABEL(gobble_ashr_12):
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa %xmm4, %xmm3
@@ -1799,13 +1763,13 @@ LABEL(gobble_ashr_12):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
-# ifndef USE_SSSE3
+#ifndef USE_SSSE3
psrldq $12, %xmm3
pslldq $4, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
-# else
+#else
palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
-# endif
+#endif
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
@@ -1815,10 +1779,10 @@ LABEL(gobble_ashr_12):
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa %xmm4, %xmm3
@@ -1831,10 +1795,10 @@ LABEL(nibble_ashr_12):
test $0xf000, %edx
jnz LABEL(ashr_12_exittail)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $4, %r11
jbe LABEL(ashr_12_exittail)
-# endif
+#endif
pxor %xmm0, %xmm0
sub $0x1000, %r10
@@ -1893,13 +1857,13 @@ LABEL(gobble_ashr_13):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
-# ifndef USE_SSSE3
+#ifndef USE_SSSE3
psrldq $13, %xmm3
pslldq $3, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
-# else
+#else
palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
-# endif
+#endif
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
@@ -1909,10 +1873,10 @@ LABEL(gobble_ashr_13):
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa %xmm4, %xmm3
@@ -1924,13 +1888,13 @@ LABEL(gobble_ashr_13):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
-# ifndef USE_SSSE3
+#ifndef USE_SSSE3
psrldq $13, %xmm3
pslldq $3, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
-# else
+#else
palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
-# endif
+#endif
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
@@ -1940,10 +1904,10 @@ LABEL(gobble_ashr_13):
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa %xmm4, %xmm3
@@ -1956,10 +1920,10 @@ LABEL(nibble_ashr_13):
test $0xe000, %edx
jnz LABEL(ashr_13_exittail)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $3, %r11
jbe LABEL(ashr_13_exittail)
-# endif
+#endif
pxor %xmm0, %xmm0
sub $0x1000, %r10
@@ -2018,13 +1982,13 @@ LABEL(gobble_ashr_14):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
-# ifndef USE_SSSE3
+#ifndef USE_SSSE3
psrldq $14, %xmm3
pslldq $2, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
-# else
+#else
palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
-# endif
+#endif
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
@@ -2034,10 +1998,10 @@ LABEL(gobble_ashr_14):
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa %xmm4, %xmm3
@@ -2049,13 +2013,13 @@ LABEL(gobble_ashr_14):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
-# ifndef USE_SSSE3
+#ifndef USE_SSSE3
psrldq $14, %xmm3
pslldq $2, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
-# else
+#else
palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
-# endif
+#endif
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
@@ -2065,10 +2029,10 @@ LABEL(gobble_ashr_14):
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP | defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP | defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa %xmm4, %xmm3
@@ -2081,10 +2045,10 @@ LABEL(nibble_ashr_14):
test $0xc000, %edx
jnz LABEL(ashr_14_exittail)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $2, %r11
jbe LABEL(ashr_14_exittail)
-# endif
+#endif
pxor %xmm0, %xmm0
sub $0x1000, %r10
@@ -2145,13 +2109,13 @@ LABEL(gobble_ashr_15):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
-# ifndef USE_SSSE3
+#ifndef USE_SSSE3
psrldq $15, %xmm3
pslldq $1, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
-# else
+#else
palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
-# endif
+#endif
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
@@ -2161,10 +2125,10 @@ LABEL(gobble_ashr_15):
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa %xmm4, %xmm3
@@ -2176,13 +2140,13 @@ LABEL(gobble_ashr_15):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
-# ifndef USE_SSSE3
+#ifndef USE_SSSE3
psrldq $15, %xmm3
pslldq $1, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
-# else
+#else
palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
-# endif
+#endif
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm1, %xmm0
@@ -2192,10 +2156,10 @@ LABEL(gobble_ashr_15):
sub $0xffff, %edx
jnz LABEL(exit)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
add $16, %rcx
movdqa %xmm4, %xmm3
@@ -2208,10 +2172,10 @@ LABEL(nibble_ashr_15):
test $0x8000, %edx
jnz LABEL(ashr_15_exittail)
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmpq $1, %r11
jbe LABEL(ashr_15_exittail)
-# endif
+#endif
pxor %xmm0, %xmm0
sub $0x1000, %r10
@@ -2246,18 +2210,18 @@ LABEL(ret):
LABEL(less16bytes):
bsf %rdx, %rdx /* find and store bit index in %rdx */
-# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub %rdx, %r11
jbe LABEL(strcmp_exitz)
-# endif
+#endif
movzbl (%rsi, %rdx), %ecx
movzbl (%rdi, %rdx), %eax
-# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx
movl (%rdx,%rcx,4), %ecx
movl (%rdx,%rax,4), %eax
-# endif
+#endif
sub %ecx, %eax
ret
@@ -2271,11 +2235,11 @@ LABEL(Byte0):
movzx (%rsi), %ecx
movzx (%rdi), %eax
-# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx
movl (%rdx,%rcx,4), %ecx
movl (%rdx,%rax,4), %eax
-# endif
+#endif
sub %ecx, %eax
ret
@@ -2300,5 +2264,4 @@ LABEL(unaligned_table):
.int LABEL(ashr_14) - LABEL(unaligned_table)
.int LABEL(ashr_15) - LABEL(unaligned_table)
.int LABEL(ashr_0) - LABEL(unaligned_table)
-#endif /* !IS_IN (libc) */
libc_hidden_builtin_def (STRCMP)
diff --git a/sysdeps/x86_64/strcpy.S b/sysdeps/x86_64/strcpy.S
index 23231088fd..3f90c0020a 100644
--- a/sysdeps/x86_64/strcpy.S
+++ b/sysdeps/x86_64/strcpy.S
@@ -1,5 +1,5 @@
/* strcpy/stpcpy implementation for x86-64.
- Copyright (C) 2002-2015 Free Software Foundation, Inc.
+ Copyright (C) 2002-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 2002.
diff --git a/sysdeps/x86_64/strcpy_chk.S b/sysdeps/x86_64/strcpy_chk.S
deleted file mode 100644
index 24e51c66f1..0000000000
--- a/sysdeps/x86_64/strcpy_chk.S
+++ /dev/null
@@ -1,208 +0,0 @@
-/* strcpy/stpcpy checking implementation for x86-64.
- Copyright (C) 2002-2015 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Andreas Jaeger <aj@suse.de>, 2002.
- Adopted into checking version by Jakub Jelinek <jakub@redhat.com>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
-#ifndef USE_AS_STPCPY_CHK
-# define STRCPY_CHK __strcpy_chk
-#endif
-
- .text
-ENTRY (STRCPY_CHK)
- movq %rsi, %rcx /* Source register. */
- andl $7, %ecx /* mask alignment bits */
-#ifndef USE_AS_STPCPY_CHK
- movq %rdi, %r10 /* Duplicate destination pointer. */
-#endif
- jz 5f /* aligned => start loop */
-
- cmpq $8, %rdx /* Check if only few bytes left in
- destination. */
- jb 50f
-
- subq $8, %rcx /* We need to align to 8 bytes. */
- addq %rcx, %rdx /* Subtract count of stored bytes
- in the cycle below from destlen. */
-
- /* Search the first bytes directly. */
-0:
- movb (%rsi), %al /* Fetch a byte */
- testb %al, %al /* Is it NUL? */
- movb %al, (%rdi) /* Store it */
- jz 4f /* If it was NUL, done! */
- incq %rsi
- incq %rdi
- incl %ecx
- jnz 0b
-
-5:
- movq $0xfefefefefefefeff,%r8
- cmpq $32, %rdx /* Are there enough bytes in destination
- for the next unrolled round? */
- jb 60f /* If not, avoid the unrolled loop. */
-
- /* Now the sources is aligned. Unfortunatly we cannot force
- to have both source and destination aligned, so ignore the
- alignment of the destination. */
- .p2align 4
-1:
- /* 1st unroll. */
- movq (%rsi), %rax /* Read double word (8 bytes). */
- addq $8, %rsi /* Adjust pointer for next word. */
- movq %rax, %r9 /* Save a copy for NUL finding. */
- addq %r8, %r9 /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc 3f /* highest byte is NUL => return pointer */
- xorq %rax, %r9 /* (word+magic)^word */
- orq %r8, %r9 /* set all non-carry bits */
- incq %r9 /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
-
- jnz 3f /* found NUL => return pointer */
-
- movq %rax, (%rdi) /* Write value to destination. */
- addq $8, %rdi /* Adjust pointer. */
-
- /* 2nd unroll. */
- movq (%rsi), %rax /* Read double word (8 bytes). */
- addq $8, %rsi /* Adjust pointer for next word. */
- movq %rax, %r9 /* Save a copy for NUL finding. */
- addq %r8, %r9 /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc 3f /* highest byte is NUL => return pointer */
- xorq %rax, %r9 /* (word+magic)^word */
- orq %r8, %r9 /* set all non-carry bits */
- incq %r9 /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
-
- jnz 3f /* found NUL => return pointer */
-
- movq %rax, (%rdi) /* Write value to destination. */
- addq $8, %rdi /* Adjust pointer. */
-
- /* 3rd unroll. */
- movq (%rsi), %rax /* Read double word (8 bytes). */
- addq $8, %rsi /* Adjust pointer for next word. */
- movq %rax, %r9 /* Save a copy for NUL finding. */
- addq %r8, %r9 /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc 3f /* highest byte is NUL => return pointer */
- xorq %rax, %r9 /* (word+magic)^word */
- orq %r8, %r9 /* set all non-carry bits */
- incq %r9 /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
-
- jnz 3f /* found NUL => return pointer */
-
- movq %rax, (%rdi) /* Write value to destination. */
- addq $8, %rdi /* Adjust pointer. */
-
- /* 4th unroll. */
- movq (%rsi), %rax /* Read double word (8 bytes). */
- addq $8, %rsi /* Adjust pointer for next word. */
- movq %rax, %r9 /* Save a copy for NUL finding. */
- addq %r8, %r9 /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc 3f /* highest byte is NUL => return pointer */
- xorq %rax, %r9 /* (word+magic)^word */
- orq %r8, %r9 /* set all non-carry bits */
- incq %r9 /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
-
- jnz 3f /* found NUL => return pointer */
-
- subq $32, %rdx /* Adjust destlen. */
- movq %rax, (%rdi) /* Write value to destination. */
- addq $8, %rdi /* Adjust pointer. */
- cmpq $32, %rdx /* Are there enough bytes in destination
- for the next unrolled round? */
- jae 1b /* Next iteration. */
-
-60:
- cmpq $8, %rdx /* Are there enough bytes in destination
- for the next unrolled round? */
- jb 50f /* Now, copy and check byte by byte. */
-
- movq (%rsi), %rax /* Read double word (8 bytes). */
- addq $8, %rsi /* Adjust pointer for next word. */
- movq %rax, %r9 /* Save a copy for NUL finding. */
- addq %r8, %r9 /* add the magic value to the word. We get
- carry bits reported for each byte which
- is *not* 0 */
- jnc 3f /* highest byte is NUL => return pointer */
- xorq %rax, %r9 /* (word+magic)^word */
- orq %r8, %r9 /* set all non-carry bits */
- incq %r9 /* add 1: if one carry bit was *not* set
- the addition will not result in 0. */
-
- jnz 3f /* found NUL => return pointer */
-
- subq $8, %rdx /* Adjust destlen. */
- movq %rax, (%rdi) /* Write value to destination. */
- addq $8, %rdi /* Adjust pointer. */
- jmp 60b /* Next iteration. */
-
- /* Do the last few bytes. %rax contains the value to write.
- The loop is unrolled twice. */
- .p2align 4
-3:
- /* Note that stpcpy needs to return with the value of the NUL
- byte. */
- movb %al, (%rdi) /* 1st byte. */
- testb %al, %al /* Is it NUL. */
- jz 4f /* yes, finish. */
- incq %rdi /* Increment destination. */
- movb %ah, (%rdi) /* 2nd byte. */
- testb %ah, %ah /* Is it NUL?. */
- jz 4f /* yes, finish. */
- incq %rdi /* Increment destination. */
- shrq $16, %rax /* Shift... */
- jmp 3b /* and look at next two bytes in %rax. */
-
-51:
- /* Search the bytes directly, checking for overflows. */
- incq %rsi
- incq %rdi
- decq %rdx
- jz HIDDEN_JUMPTARGET (__chk_fail)
-52:
- movb (%rsi), %al /* Fetch a byte */
- testb %al, %al /* Is it NUL? */
- movb %al, (%rdi) /* Store it */
- jnz 51b /* If it was NUL, done! */
-4:
-#ifdef USE_AS_STPCPY_CHK
- movq %rdi, %rax /* Destination is return value. */
-#else
- movq %r10, %rax /* Source is return value. */
-#endif
- retq
-
-50:
- testq %rdx, %rdx
- jnz 52b
- jmp HIDDEN_JUMPTARGET (__chk_fail)
-
-END (STRCPY_CHK)
diff --git a/sysdeps/x86_64/strcspn.S b/sysdeps/x86_64/strcspn.S
index c6c20f001c..de526c8fdd 100644
--- a/sysdeps/x86_64/strcspn.S
+++ b/sysdeps/x86_64/strcspn.S
@@ -1,7 +1,7 @@
/* strcspn (str, ss) -- Return the length of the initial segment of STR
which contains no characters from SS.
For AMD x86-64.
- Copyright (C) 1994-2015 Free Software Foundation, Inc.
+ Copyright (C) 1994-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>.
Bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>.
diff --git a/sysdeps/x86_64/strlen.S b/sysdeps/x86_64/strlen.S
index c382c8d23e..12f63ad1bb 100644
--- a/sysdeps/x86_64/strlen.S
+++ b/sysdeps/x86_64/strlen.S
@@ -1,5 +1,5 @@
/* SSE2 version of strlen.
- Copyright (C) 2012-2015 Free Software Foundation, Inc.
+ Copyright (C) 2012-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -20,7 +20,7 @@
/* Long lived register in strlen(s), strnlen(s, n) are:
- %xmm11 - zero
+ %xmm3 - zero
%rdi - s
%r10 (s+n) & (~(64-1))
%r11 s+n
@@ -32,14 +32,14 @@ ENTRY(strlen)
/* Test 64 bytes from %rax for zero. Save result as bitmask in %rdx. */
#define FIND_ZERO \
- pcmpeqb (%rax), %xmm8; \
- pcmpeqb 16(%rax), %xmm9; \
- pcmpeqb 32(%rax), %xmm10; \
- pcmpeqb 48(%rax), %xmm11; \
- pmovmskb %xmm8, %esi; \
- pmovmskb %xmm9, %edx; \
- pmovmskb %xmm10, %r8d; \
- pmovmskb %xmm11, %ecx; \
+ pcmpeqb (%rax), %xmm0; \
+ pcmpeqb 16(%rax), %xmm1; \
+ pcmpeqb 32(%rax), %xmm2; \
+ pcmpeqb 48(%rax), %xmm3; \
+ pmovmskb %xmm0, %esi; \
+ pmovmskb %xmm1, %edx; \
+ pmovmskb %xmm2, %r8d; \
+ pmovmskb %xmm3, %ecx; \
salq $16, %rdx; \
salq $16, %rcx; \
orq %rsi, %rdx; \
@@ -63,10 +63,10 @@ L(n_nonzero):
mov %rsi, %r11
#endif
- pxor %xmm8, %xmm8
- pxor %xmm9, %xmm9
- pxor %xmm10, %xmm10
- pxor %xmm11, %xmm11
+ pxor %xmm0, %xmm0
+ pxor %xmm1, %xmm1
+ pxor %xmm2, %xmm2
+ pxor %xmm3, %xmm3
movq %rdi, %rax
movq %rdi, %rcx
andq $4095, %rcx
@@ -103,9 +103,9 @@ L(n_nonzero):
FIND_ZERO
#else
/* Test first 16 bytes unaligned. */
- movdqu (%rax), %xmm12
- pcmpeqb %xmm8, %xmm12
- pmovmskb %xmm12, %edx
+ movdqu (%rax), %xmm4
+ pcmpeqb %xmm0, %xmm4
+ pmovmskb %xmm4, %edx
test %edx, %edx
je L(next48_bytes)
bsf %edx, %eax /* If eax is zeroed 16bit bsf can be used. */
@@ -114,12 +114,12 @@ L(n_nonzero):
L(next48_bytes):
/* Same as FIND_ZERO except we do not check first 16 bytes. */
andq $-16, %rax
- pcmpeqb 16(%rax), %xmm9
- pcmpeqb 32(%rax), %xmm10
- pcmpeqb 48(%rax), %xmm11
- pmovmskb %xmm9, %edx
- pmovmskb %xmm10, %r8d
- pmovmskb %xmm11, %ecx
+ pcmpeqb 16(%rax), %xmm1
+ pcmpeqb 32(%rax), %xmm2
+ pcmpeqb 48(%rax), %xmm3
+ pmovmskb %xmm1, %edx
+ pmovmskb %xmm2, %r8d
+ pmovmskb %xmm3, %ecx
salq $16, %rdx
salq $16, %rcx
orq %r8, %rcx
@@ -127,7 +127,7 @@ L(next48_bytes):
orq %rcx, %rdx
#endif
- /* When no zero byte is found xmm9-11 are zero so we do not have to
+ /* When no zero byte is found xmm1-3 are zero so we do not have to
zero them. */
PROLOG(loop)
@@ -149,9 +149,9 @@ L(strnlen_ret):
#endif
.p2align 4
L(loop_init):
- pxor %xmm9, %xmm9
- pxor %xmm10, %xmm10
- pxor %xmm11, %xmm11
+ pxor %xmm1, %xmm1
+ pxor %xmm2, %xmm2
+ pxor %xmm3, %xmm3
#ifdef AS_STRNLEN
.p2align 4
L(loop):
@@ -160,12 +160,12 @@ L(loop):
cmpq %rax, %r10
je L(exit_end)
- movdqa (%rax), %xmm8
- pminub 16(%rax), %xmm8
- pminub 32(%rax), %xmm8
- pminub 48(%rax), %xmm8
- pcmpeqb %xmm11, %xmm8
- pmovmskb %xmm8, %edx
+ movdqa (%rax), %xmm0
+ pminub 16(%rax), %xmm0
+ pminub 32(%rax), %xmm0
+ pminub 48(%rax), %xmm0
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %edx
testl %edx, %edx
jne L(exit)
jmp L(loop)
@@ -174,7 +174,7 @@ L(loop):
L(exit_end):
cmp %rax, %r11
je L(first) /* Do not read when end is at page boundary. */
- pxor %xmm8, %xmm8
+ pxor %xmm0, %xmm0
FIND_ZERO
L(first):
@@ -186,7 +186,7 @@ L(first):
.p2align 4
L(exit):
- pxor %xmm8, %xmm8
+ pxor %xmm0, %xmm0
FIND_ZERO
bsfq %rdx, %rdx
@@ -200,23 +200,23 @@ L(exit):
.p2align 4
L(loop):
- movdqa 64(%rax), %xmm8
- pminub 80(%rax), %xmm8
- pminub 96(%rax), %xmm8
- pminub 112(%rax), %xmm8
- pcmpeqb %xmm11, %xmm8
- pmovmskb %xmm8, %edx
+ movdqa 64(%rax), %xmm0
+ pminub 80(%rax), %xmm0
+ pminub 96(%rax), %xmm0
+ pminub 112(%rax), %xmm0
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %edx
testl %edx, %edx
jne L(exit64)
subq $-128, %rax
- movdqa (%rax), %xmm8
- pminub 16(%rax), %xmm8
- pminub 32(%rax), %xmm8
- pminub 48(%rax), %xmm8
- pcmpeqb %xmm11, %xmm8
- pmovmskb %xmm8, %edx
+ movdqa (%rax), %xmm0
+ pminub 16(%rax), %xmm0
+ pminub 32(%rax), %xmm0
+ pminub 48(%rax), %xmm0
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %edx
testl %edx, %edx
jne L(exit0)
jmp L(loop)
@@ -225,7 +225,7 @@ L(loop):
L(exit64):
addq $64, %rax
L(exit0):
- pxor %xmm8, %xmm8
+ pxor %xmm0, %xmm0
FIND_ZERO
bsfq %rdx, %rdx
diff --git a/sysdeps/x86_64/strrchr.S b/sysdeps/x86_64/strrchr.S
index 14a3abafb3..de0be762ed 100644
--- a/sysdeps/x86_64/strrchr.S
+++ b/sysdeps/x86_64/strrchr.S
@@ -1,5 +1,5 @@
/* strrchr (str, ch) -- Return pointer to last occurrence of CH in STR.
- Copyright (C) 2013-2015 Free Software Foundation, Inc.
+ Copyright (C) 2013-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/strspn.S b/sysdeps/x86_64/strspn.S
index 62f061bc8f..49dd4ba9f5 100644
--- a/sysdeps/x86_64/strspn.S
+++ b/sysdeps/x86_64/strspn.S
@@ -1,7 +1,7 @@
/* strspn (str, ss) -- Return the length of the initial segment of STR
which contains only characters from SS.
For AMD x86-64.
- Copyright (C) 1994-2015 Free Software Foundation, Inc.
+ Copyright (C) 1994-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>.
Bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>.
diff --git a/sysdeps/x86_64/strtok.S b/sysdeps/x86_64/strtok.S
index f24be7aae7..bd5b103d50 100644
--- a/sysdeps/x86_64/strtok.S
+++ b/sysdeps/x86_64/strtok.S
@@ -1,6 +1,6 @@
/* strtok (str, delim) -- Return next DELIM separated token from STR.
For AMD x86-64.
- Copyright (C) 1998-2015 Free Software Foundation, Inc.
+ Copyright (C) 1998-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Based on i686 version contributed by Ulrich Drepper
<drepper@cygnus.com>, 1998.
diff --git a/sysdeps/x86_64/sub_n.S b/sysdeps/x86_64/sub_n.S
index 4879ace6a3..cc9bc48b01 100644
--- a/sysdeps/x86_64/sub_n.S
+++ b/sysdeps/x86_64/sub_n.S
@@ -1,6 +1,6 @@
/* x86-64 __mpn_sub_n -- Add two limb vectors of the same length > 0 and store
sum in a third limb vector.
- Copyright (C) 2006-2015 Free Software Foundation, Inc.
+ Copyright (C) 2006-2016 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
diff --git a/sysdeps/x86_64/submul_1.S b/sysdeps/x86_64/submul_1.S
index f5468b97e3..3037cb9c45 100644
--- a/sysdeps/x86_64/submul_1.S
+++ b/sysdeps/x86_64/submul_1.S
@@ -1,6 +1,6 @@
/* x86-64 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
the result from a second limb vector.
- Copyright (C) 2003-2015 Free Software Foundation, Inc.
+ Copyright (C) 2003-2016 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
diff --git a/sysdeps/x86_64/sysdep.h b/sysdeps/x86_64/sysdep.h
index e79a3974fd..fbe3560588 100644
--- a/sysdeps/x86_64/sysdep.h
+++ b/sysdeps/x86_64/sysdep.h
@@ -1,5 +1,5 @@
/* Assembler macros for x86-64.
- Copyright (C) 2001-2015 Free Software Foundation, Inc.
+ Copyright (C) 2001-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/tlsdesc.c b/sysdeps/x86_64/tlsdesc.c
index 6807fa26ec..aff8b67941 100644
--- a/sysdeps/x86_64/tlsdesc.c
+++ b/sysdeps/x86_64/tlsdesc.c
@@ -1,5 +1,5 @@
/* Manage TLS descriptors. x86_64 version.
- Copyright (C) 2005-2015 Free Software Foundation, Inc.
+ Copyright (C) 2005-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/tst-audit.h b/sysdeps/x86_64/tst-audit.h
index f2bf3aa008..94e9dd5282 100644
--- a/sysdeps/x86_64/tst-audit.h
+++ b/sysdeps/x86_64/tst-audit.h
@@ -1,6 +1,6 @@
/* Definitions for testing PLT entry/exit auditing. x86_64 version.
- Copyright (C) 2012-2015 Free Software Foundation, Inc.
+ Copyright (C) 2012-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/tst-audit10.c b/sysdeps/x86_64/tst-audit10.c
index 6919871564..d104341be8 100644
--- a/sysdeps/x86_64/tst-audit10.c
+++ b/sysdeps/x86_64/tst-audit10.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/tst-auditmod10a.c b/sysdeps/x86_64/tst-auditmod10a.c
index dc0d276c54..e94dbaf7fe 100644
--- a/sysdeps/x86_64/tst-auditmod10a.c
+++ b/sysdeps/x86_64/tst-auditmod10a.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/tst-auditmod10b.c b/sysdeps/x86_64/tst-auditmod10b.c
index 0eb36747d2..ad6fcafdda 100644
--- a/sysdeps/x86_64/tst-auditmod10b.c
+++ b/sysdeps/x86_64/tst-auditmod10b.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/tst-mallocalign1.c b/sysdeps/x86_64/tst-mallocalign1.c
index 89f4bed0be..3897af86c1 100644
--- a/sysdeps/x86_64/tst-mallocalign1.c
+++ b/sysdeps/x86_64/tst-mallocalign1.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/tst-quad1.c b/sysdeps/x86_64/tst-quad1.c
index c24182c6b6..1cb63a748f 100644
--- a/sysdeps/x86_64/tst-quad1.c
+++ b/sysdeps/x86_64/tst-quad1.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/tst-quadmod1.S b/sysdeps/x86_64/tst-quadmod1.S
index 3902850654..588c5016b6 100644
--- a/sysdeps/x86_64/tst-quadmod1.S
+++ b/sysdeps/x86_64/tst-quadmod1.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/tst-quadmod2.S b/sysdeps/x86_64/tst-quadmod2.S
index 1d515a8530..7409a9eaa3 100644
--- a/sysdeps/x86_64/tst-quadmod2.S
+++ b/sysdeps/x86_64/tst-quadmod2.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/tst-split-dynreloc.c b/sysdeps/x86_64/tst-split-dynreloc.c
new file mode 100644
index 0000000000..2f9e9b9477
--- /dev/null
+++ b/sysdeps/x86_64/tst-split-dynreloc.c
@@ -0,0 +1,28 @@
+/* This test will be used to create an executable with a specific
+ section layout in which .rela.dyn and .rela.plt are not contiguous.
+ For x86 case, readelf will report something like:
+
+ ...
+ [10] .rela.dyn RELA
+ [11] .bar PROGBITS
+ [12] .rela.plt RELA
+ ...
+
+ This is important as this case was not correctly handled by dynamic
+ linker in the bind-now case, and the second section was never
+ processed. */
+
+#include <stdio.h>
+
+const int __attribute__ ((section(".bar"))) bar = 0x12345678;
+static const char foo[] = "foo";
+
+static int
+do_test (void)
+{
+ printf ("%s %d\n", foo, bar);
+ return 0;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
diff --git a/sysdeps/x86_64/tst-split-dynreloc.lds b/sysdeps/x86_64/tst-split-dynreloc.lds
new file mode 100644
index 0000000000..2229e698c9
--- /dev/null
+++ b/sysdeps/x86_64/tst-split-dynreloc.lds
@@ -0,0 +1,5 @@
+SECTIONS
+{
+ .bar : { *(.bar) }
+}
+INSERT AFTER .rela.dyn;
diff --git a/sysdeps/x86_64/tst-stack-align.h b/sysdeps/x86_64/tst-stack-align.h
index 8d91a4c81e..24e8e61c35 100644
--- a/sysdeps/x86_64/tst-stack-align.h
+++ b/sysdeps/x86_64/tst-stack-align.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2003-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/wcschr.S b/sysdeps/x86_64/wcschr.S
index dad111ff38..8604289e46 100644
--- a/sysdeps/x86_64/wcschr.S
+++ b/sysdeps/x86_64/wcschr.S
@@ -1,5 +1,5 @@
/* wcschr with SSSE3
- Copyright (C) 2011-2015 Free Software Foundation, Inc.
+ Copyright (C) 2011-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/wcscmp.S b/sysdeps/x86_64/wcscmp.S
index bd36d9f5d9..705a73b10e 100644
--- a/sysdeps/x86_64/wcscmp.S
+++ b/sysdeps/x86_64/wcscmp.S
@@ -1,5 +1,5 @@
/* Optimized wcscmp for x86-64 with SSE2.
- Copyright (C) 2011-2015 Free Software Foundation, Inc.
+ Copyright (C) 2011-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/wcslen.S b/sysdeps/x86_64/wcslen.S
index 8c1210ed8d..7a9175eefe 100644
--- a/sysdeps/x86_64/wcslen.S
+++ b/sysdeps/x86_64/wcslen.S
@@ -1,5 +1,5 @@
/* Optimized wcslen for x86-64 with SSE2.
- Copyright (C) 2011-2015 Free Software Foundation, Inc.
+ Copyright (C) 2011-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/wcsrchr.S b/sysdeps/x86_64/wcsrchr.S
index 9e28aac7f0..fb192f3ecf 100644
--- a/sysdeps/x86_64/wcsrchr.S
+++ b/sysdeps/x86_64/wcsrchr.S
@@ -1,5 +1,5 @@
/* wcsrchr with SSSE3
- Copyright (C) 2011-2015 Free Software Foundation, Inc.
+ Copyright (C) 2011-2016 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
diff --git a/sysdeps/x86_64/x32/dl-machine.h b/sysdeps/x86_64/x32/dl-machine.h
index f5efaa5060..47132fcd96 100644
--- a/sysdeps/x86_64/x32/dl-machine.h
+++ b/sysdeps/x86_64/x32/dl-machine.h
@@ -1,5 +1,5 @@
/* Machine-dependent ELF dynamic relocation inline functions. x32 version.
- Copyright (C) 2012-2015 Free Software Foundation, Inc.
+ Copyright (C) 2012-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/rtld-memset.S b/sysdeps/x86_64/x32/fpu/s_lrint.S
index f8df3334bc..aa68863553 100644
--- a/sysdeps/x86_64/rtld-memset.S
+++ b/sysdeps/x86_64/x32/fpu/s_lrint.S
@@ -1,6 +1,6 @@
-/* memset implementation for the dynamic linker. This is separate from the
- libc implementation to avoid writing to SSE registers.
- Copyright (C) 2013-2015 Free Software Foundation, Inc.
+/* Round argument to nearest integral value according to current rounding
+ direction.
+ Copyright (C) 2015-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -18,20 +18,10 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
-#include "asm-syntax.h"
-
.text
-/* void *memset (void *dest, char c, size_t count)
- dest => %rdi
- c => %rsi
- count => %rdx */
-ENTRY (memset)
- mov %rdx, %rcx
- movzbl %sil, %eax
- mov %rdi, %rdx
- rep stosb
- mov %rdx, %rax
+ENTRY(__lrint)
+ cvtsd2si %xmm0,%eax
ret
-END (memset)
-libc_hidden_builtin_def (memset)
+END(__lrint)
+weak_alias (__lrint, lrint)
diff --git a/sysdeps/x86_64/x32/fpu/s_lrintf.S b/sysdeps/x86_64/x32/fpu/s_lrintf.S
new file mode 100644
index 0000000000..bb5b1665bd
--- /dev/null
+++ b/sysdeps/x86_64/x32/fpu/s_lrintf.S
@@ -0,0 +1,27 @@
+/* Round argument to nearest integral value according to current rounding
+ direction.
+ Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .text
+ENTRY(__lrintf)
+ cvtss2si %xmm0,%eax
+ ret
+END(__lrintf)
+weak_alias (__lrintf, lrintf)
diff --git a/sysdeps/x86_64/x32/fpu/s_lrintl.S b/sysdeps/x86_64/x32/fpu/s_lrintl.S
new file mode 100644
index 0000000000..6bc8f6fdb9
--- /dev/null
+++ b/sysdeps/x86_64/x32/fpu/s_lrintl.S
@@ -0,0 +1,30 @@
+/* Round argument to nearest integral value according to current rounding
+ direction.
+ Copyright (C) 1997-2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .text
+ENTRY(__lrintl)
+ fldt 8(%rsp)
+ fistpl -4(%rsp)
+ fwait
+ movl -4(%rsp),%eax
+ ret
+END(__lrintl)
+weak_alias (__lrintl, lrintl)
diff --git a/sysdeps/x86_64/x32/gmp-mparam.h b/sysdeps/x86_64/x32/gmp-mparam.h
index 2125a70a85..df37442bfb 100644
--- a/sysdeps/x86_64/x32/gmp-mparam.h
+++ b/sysdeps/x86_64/x32/gmp-mparam.h
@@ -1,6 +1,6 @@
/* gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright (C) 2012-2015 Free Software Foundation, Inc.
+Copyright (C) 2012-2016 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
diff --git a/sysdeps/x86_64/x32/nptl/tls.h b/sysdeps/x86_64/x32/nptl/tls.h
index bdc74a10a4..245623494b 100644
--- a/sysdeps/x86_64/x32/nptl/tls.h
+++ b/sysdeps/x86_64/x32/nptl/tls.h
@@ -1,5 +1,5 @@
/* Definition for thread-local data handling. nptl/x32 version.
- Copyright (C) 2012-2015 Free Software Foundation, Inc.
+ Copyright (C) 2012-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/x32/sysdep.h b/sysdeps/x86_64/x32/sysdep.h
index 0cbc1a083f..17a1446796 100644
--- a/sysdeps/x86_64/x32/sysdep.h
+++ b/sysdeps/x86_64/x32/sysdep.h
@@ -1,5 +1,5 @@
/* Assembler macros for x32.
- Copyright (C) 2012-2015 Free Software Foundation, Inc.
+ Copyright (C) 2012-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -90,7 +90,3 @@
# define R15_LP "r15d"
#endif /* __ASSEMBLER__ */
-
-/* On x32, it is not required to normalize a 64-bit value before using
- it as a 32-bit value. */
-#define REGISTER_CAST_INT32_TO_INT64 0