diff options
author | Thomas Schwinge <thomas@codesourcery.com> | 2013-12-20 09:32:04 +0100 |
---|---|---|
committer | Thomas Schwinge <thomas@codesourcery.com> | 2013-12-20 09:32:04 +0100 |
commit | 1ff35137add0e9637df9e3fcc21133674188c8c4 (patch) | |
tree | 8adaab857154e5d87bf4bcaa60fa71517b9cb6a2 /sysdeps/x86_64 | |
parent | c3e519d5360f96ecd7afe32c34e74483852d278d (diff) | |
parent | ddd183dfa34297ea2660882ba01f9f9cbb59f646 (diff) |
Merge commit 'refs/top-bases/t/hurdsig-SA_SIGINFO' into t/hurdsig-SA_SIGINFO
Diffstat (limited to 'sysdeps/x86_64')
35 files changed, 5440 insertions, 1952 deletions
diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure index 9232411951..5a83a53aae 100644 --- a/sysdeps/x86_64/configure +++ b/sysdeps/x86_64/configure @@ -29,7 +29,7 @@ $as_echo "$ac_res" >&6; } eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_c_check_header_compile -# This file is generated from configure.in by Autoconf. DO NOT EDIT! +# This file is generated from configure.ac by Autoconf. DO NOT EDIT! # Local configure fragment for sysdeps/x86_64. diff --git a/sysdeps/x86_64/configure.in b/sysdeps/x86_64/configure.ac index c682f93c3e..c682f93c3e 100644 --- a/sysdeps/x86_64/configure.in +++ b/sysdeps/x86_64/configure.ac diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S index de5219a3b2..34680450c2 100644 --- a/sysdeps/x86_64/dl-tlsdesc.S +++ b/sysdeps/x86_64/dl-tlsdesc.S @@ -239,6 +239,6 @@ _dl_tlsdesc_resolve_hold: movq 64(%rsp), %rcx addq $72, %rsp cfi_adjust_cfa_offset (-72) - jmp *(%eax) + jmp *(%rax) cfi_endproc .size _dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold diff --git a/sysdeps/x86_64/ffs.c b/sysdeps/x86_64/ffs.c index 27013d6ae6..07ee7dd4ab 100644 --- a/sysdeps/x86_64/ffs.c +++ b/sysdeps/x86_64/ffs.c @@ -35,4 +35,5 @@ __ffs (int x) return cnt + 1; } weak_alias (__ffs, ffs) +libc_hidden_def (__ffs) libc_hidden_builtin_def (ffs) diff --git a/sysdeps/x86_64/fpu/e_expl.S b/sysdeps/x86_64/fpu/e_expl.S index a919780390..1c21f03ddc 100644 --- a/sysdeps/x86_64/fpu/e_expl.S +++ b/sysdeps/x86_64/fpu/e_expl.S @@ -127,9 +127,20 @@ ENTRY(IEEE754_EXPL) #endif 3: FLDLOG /* 1 log2(base) */ fmul %st(1), %st /* 1 x log2(base) */ +#ifdef USE_AS_EXPM1L + /* Set round-to-nearest temporarily. */ + fstcw -4(%rsp) + movl $0xf3ff, %edx + andl -4(%rsp), %edx + movl %edx, -8(%rsp) + fldcw -8(%rsp) +#endif frndint /* 1 i */ fld %st(1) /* 2 x */ frndint /* 2 xi */ +#ifdef USE_AS_EXPM1L + fldcw -4(%rsp) +#endif fld %st(1) /* 3 i */ fldt MO(c0) /* 4 c0 */ fld %st(2) /* 5 xi */ diff --git a/sysdeps/x86_64/fpu/e_powl.S b/sysdeps/x86_64/fpu/e_powl.S index e10172674e..590223b60c 100644 --- a/sysdeps/x86_64/fpu/e_powl.S +++ b/sysdeps/x86_64/fpu/e_powl.S @@ -96,6 +96,9 @@ ENTRY(__ieee754_powl) cmpb $0x05, %ah je 15f // x is ħinf + cmpb $0x01, %ah + je 31f // x is NaN + fxch // y : x /* fistpll raises invalid exception for |y| >= 1L<<63. */ diff --git a/sysdeps/x86_64/fpu/fegetround.c b/sysdeps/x86_64/fpu/fegetround.c index 1a52b7ea67..c7cd046f39 100644 --- a/sysdeps/x86_64/fpu/fegetround.c +++ b/sysdeps/x86_64/fpu/fegetround.c @@ -30,3 +30,4 @@ fegetround (void) return cw & 0xc00; } +libm_hidden_def (fegetround) diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps index d02618a0a1..7e612b0775 100644 --- a/sysdeps/x86_64/fpu/libm-test-ulps +++ b/sysdeps/x86_64/fpu/libm-test-ulps @@ -4,6 +4,9 @@ Test "acos (0.75)": ildouble: 1 ldouble: 1 +Test "acos (0xcp-4)": +ildouble: 1 +ldouble: 1 # acos_downward Test "acos_downward (-0)": @@ -14,6 +17,9 @@ ldouble: 1 Test "acos_downward (-0.5)": double: 1 idouble: 1 +Test "acos_downward (-0x8p-4)": +float: 1 +ifloat: 1 Test "acos_downward (-1)": float: 1 ifloat: 1 @@ -29,6 +35,26 @@ double: 1 float: 1 idouble: 1 ifloat: 1 +Test "acos_downward (0xf.fffffff8p-4)": +ildouble: 1 +ldouble: 1 +Test "acos_downward (0xf.ffffffffffff8p-4)": +ildouble: 1 +ldouble: 1 +Test "acos_downward (0xf.fffffffffffffffp-4)": +ildouble: 1 +ldouble: 1 +Test "acos_downward (0xf.fffffffffffp-4)": +ildouble: 1 +ldouble: 1 +Test "acos_downward (0xf.fffffp-4)": +ildouble: 1 +ldouble: 1 + +# acos_tonearest +Test "acos_tonearest (0xcp-4)": +ildouble: 1 +ldouble: 1 # acos_towardzero Test "acos_towardzero (-0)": @@ -39,6 +65,9 @@ ldouble: 1 Test "acos_towardzero (-0.5)": double: 1 idouble: 1 +Test "acos_towardzero (-0x8p-4)": +float: 1 +ifloat: 1 Test "acos_towardzero (-1)": float: 1 ifloat: 1 @@ -54,22 +83,100 @@ double: 1 float: 1 idouble: 1 ifloat: 1 +Test "acos_towardzero (0xf.fffffff8p-4)": +ildouble: 1 +ldouble: 1 +Test "acos_towardzero (0xf.ffffffffffff8p-4)": +ildouble: 1 +ldouble: 1 +Test "acos_towardzero (0xf.fffffffffffffffp-4)": +ildouble: 1 +ldouble: 1 +Test "acos_towardzero (0xf.fffffffffffp-4)": +ildouble: 1 +ldouble: 1 +Test "acos_towardzero (0xf.fffffp-4)": +ildouble: 1 +ldouble: 1 # acos_upward +Test "acos_upward (+0)": +double: 1 +idouble: 1 +Test "acos_upward (-0)": +double: 1 +idouble: 1 Test "acos_upward (-0.5)": ildouble: 1 ldouble: 1 +Test "acos_upward (-0x1p+0)": +double: 1 +idouble: 1 Test "acos_upward (0.5)": ildouble: 1 ldouble: 1 +Test "acos_upward (0x1.70ef54646d496p-56)": +double: 1 +idouble: 1 +Test "acos_upward (0x1.70ef54646d497p-56)": +double: 1 +idouble: 1 +Test "acos_upward (0x1.70ef54p-56)": +double: 1 +idouble: 1 +Test "acos_upward (0x1.70ef56p-56)": +double: 1 +idouble: 1 +Test "acos_upward (0xcp-4)": +ildouble: 1 +ldouble: 1 +Test "acos_upward (0xf.fffffffffffffffp-4)": +ildouble: 1 +ldouble: 1 # asin_downward Test "asin_downward (-0.5)": double: 1 idouble: 1 +Test "asin_downward (-0x1p+0)": +double: 1 +idouble: 1 +Test "asin_downward (-0x8p-4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "asin_downward (-0xf.fffffff8p-4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "asin_downward (-0xf.ffffffffffff8p-4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "asin_downward (-0xf.fffffffffffffffp-4)": +ildouble: 1 +ldouble: 1 +Test "asin_downward (-0xf.fffffffffffp-4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "asin_downward (-0xf.fffffp-4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 Test "asin_downward (0.5)": double: 1 idouble: 1 +Test "asin_downward (0x8p-4)": +float: 1 +ifloat: 1 Test "asin_downward (1.0)": float: 1 ifloat: 1 @@ -80,6 +187,9 @@ ldouble: 1 Test "asin_towardzero (-0.5)": double: 1 idouble: 1 +Test "asin_towardzero (-0x8p-4)": +float: 1 +ifloat: 1 Test "asin_towardzero (-1.0)": float: 1 ifloat: 1 @@ -88,6 +198,9 @@ ldouble: 1 Test "asin_towardzero (0.5)": double: 1 idouble: 1 +Test "asin_towardzero (0x8p-4)": +float: 1 +ifloat: 1 Test "asin_towardzero (1.0)": float: 1 ifloat: 1 @@ -95,22 +208,79 @@ ildouble: 1 ldouble: 1 # asin_upward +Test "asin_upward (-0x8p-4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +Test "asin_upward (-0xf.fffffff8p-4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "asin_upward (-0xf.ffffffffffff8p-4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "asin_upward (-0xf.fffffffffffffffp-4)": +ildouble: 1 +ldouble: 1 +Test "asin_upward (-0xf.fffffffffffp-4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "asin_upward (-0xf.fffffp-4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 Test "asin_upward (-1.0)": float: 1 ifloat: 1 ildouble: 1 ldouble: 1 +Test "asin_upward (0x1p+0)": +double: 1 +idouble: 1 +Test "asin_upward (0x8p-4)": +ildouble: 1 +ldouble: 1 # atan2 Test "atan2 (-0.75, -1.0)": float: 1 ifloat: 1 +Test "atan2 (-0x1.effe81f852716ffep-8, -0x7.57d1de0e5124664p-12)": +ildouble: 1 +ldouble: 1 +Test "atan2 (-0x1.effe81f852717p-8, -0x7.57d1ep-12)": +ildouble: 1 +ldouble: 1 +Test "atan2 (-0x1.effe82p-8, -0x7.57d1d8p-12)": +float: 1 +ifloat: 1 +Test "atan2 (-0xcp-4, -0x1p+0)": +float: 1 +ifloat: 1 +Test "atan2 (-0xf.fffffp+124, -0x4p-128)": +float: 1 +ifloat: 1 Test "atan2 (-max_value, -min_value)": float: 1 ifloat: 1 Test "atan2 (0.75, -1.0)": float: 1 ifloat: 1 +Test "atan2 (0x1.64p+0, 0xe.ep-4)": +float: 1 +ifloat: 1 +Test "atan2 (0xcp-4, -0x1p+0)": +float: 1 +ifloat: 1 Test "atan2 (1.390625, 0.9296875)": float: 1 ifloat: 1 @@ -121,6 +291,11 @@ float: 1 ifloat: 1 ildouble: 1 ldouble: 1 +Test "atanh (0xcp-4)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 # cacos Test "Imaginary part of: cacos (+0 + 0.5 i)": @@ -4830,6 +5005,15 @@ ldouble: 1 Test "cbrt (-0.001)": ildouble: 1 ldouble: 1 +Test "cbrt (-0x1.bp+4)": +double: 1 +idouble: 1 +Test "cbrt (-0x4.189374bc6a7ef9d8p-12)": +ildouble: 1 +ldouble: 1 +Test "cbrt (-0x4.18937p-12)": +float: 1 +ifloat: 1 Test "cbrt (-27.0)": double: 1 idouble: 1 @@ -4841,6 +5025,14 @@ double: 1 idouble: 1 ildouble: 1 ldouble: 1 +Test "cbrt (0xcp-4)": +double: 1 +idouble: 1 +Test "cbrt (0xf.ep-4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 # ccos Test "Imaginary part of: ccos (-0.75 + 710.5 i)": @@ -5511,6 +5703,9 @@ idouble: 1 ifloat: 1 # cos +Test "cos (0x1.921fb4p+0)": +ildouble: 1 +ldouble: 1 Test "cos (M_PI_6l * 2.0)": double: 1 idouble: 1 @@ -5521,6 +5716,151 @@ idouble: 2 ifloat: 1 # cos_downward +Test "cos_downward (0x1.000000cf4a2a2p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "cos_downward (0x1.0000010b239a9p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "cos_downward (0x1.00000162a932bp+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "cos_downward (0x1.000002d452a1p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "cos_downward (0x1.000002p+0)": +double: 1 +idouble: 1 +Test "cos_downward (0x1.0c152382d7365p+0)": +double: 1 +idouble: 1 +Test "cos_downward (0x1.921fb4p+0)": +ildouble: 1 +ldouble: 1 +Test "cos_downward (0x1.921fb54442d18468p+0)": +ildouble: 1 +ldouble: 1 +Test "cos_downward (0x1.921fb54442d1846ap+0)": +ildouble: 1 +ldouble: 1 +Test "cos_downward (0x1.921fb54442d18p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "cos_downward (0x1.921fb54442d19p+0)": +ildouble: 1 +ldouble: 1 +Test "cos_downward (0x1.921fb6p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "cos_downward (0x1p+0)": +double: 1 +idouble: 1 +Test "cos_downward (0x1p+120)": +ildouble: 1 +ldouble: 1 +Test "cos_downward (0x1p+28)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "cos_downward (0x2.182a44p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "cos_downward (0x2.182a4705ae6cap+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "cos_downward (0x2.182a4705ae6cb08cp+0)": +ildouble: 1 +ldouble: 1 +Test "cos_downward (0x2.182a4705ae6cb09p+0)": +ildouble: 1 +ldouble: 1 +Test "cos_downward (0x2.182a4705ae6ccp+0)": +ildouble: 1 +ldouble: 1 +Test "cos_downward (0x2.182a48p+0)": +ildouble: 1 +ldouble: 1 +Test "cos_downward (0x2.1e19e0c9bab24p+72)": +double: 1 +idouble: 1 +Test "cos_downward (0x2.1e19e4p+72)": +double: 1 +idouble: 1 +Test "cos_downward (0x2.1e19ep+72)": +double: 1 +idouble: 1 +Test "cos_downward (0x2p+0)": +ildouble: 1 +ldouble: 1 +Test "cos_downward (0x3p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "cos_downward (0x4p+0)": +ildouble: 1 +ldouble: 1 +Test "cos_downward (0x4p+48)": +double: 1 +idouble: 1 +Test "cos_downward (0x8p+0)": +ildouble: 2 +ldouble: 2 +Test "cos_downward (0x8p+1020)": +ildouble: 1 +ldouble: 1 +Test "cos_downward (0x9p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "cos_downward (0xa.217bap+12)": +ildouble: 1 +ldouble: 1 +Test "cos_downward (0xap+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "cos_downward (0xc.d4966d92d1708p-4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "cos_downward (0xc.d4966d92d171p-4)": +double: 1 +idouble: 1 +Test "cos_downward (0xc.d4966p-4)": +double: 1 +idouble: 1 +Test "cos_downward (0xcp-4)": +double: 1 +idouble: 1 +Test "cos_downward (0xf.ffffffffffff8p+1020)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "cos_downward (0xf.fffffp+124)": +double: 1 +idouble: 1 Test "cos_downward (1)": ildouble: 1 ldouble: 1 @@ -5553,7 +5893,105 @@ Test "cos_downward (9)": float: 1 ifloat: 1 +# cos_tonearest +Test "cos_tonearest (0x1.921fb4p+0)": +ildouble: 1 +ldouble: 1 + # cos_towardzero +Test "cos_towardzero (0x1.000000cf4a2a2p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "cos_towardzero (0x1.0000010b239a9p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "cos_towardzero (0x1.00000162a932bp+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "cos_towardzero (0x1.000002d452a1p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "cos_towardzero (0x1.000002p+0)": +double: 1 +idouble: 1 +Test "cos_towardzero (0x1.0c152382d7365p+0)": +double: 1 +idouble: 1 +Test "cos_towardzero (0x1.921fb4p+0)": +ildouble: 1 +ldouble: 1 +Test "cos_towardzero (0x1.921fb54442d18p+0)": +double: 1 +idouble: 1 +Test "cos_towardzero (0x1.921fb54442d19p+0)": +double: 1 +idouble: 1 +Test "cos_towardzero (0x1p+0)": +double: 1 +idouble: 1 +Test "cos_towardzero (0x1p+120)": +double: 1 +idouble: 1 +Test "cos_towardzero (0x2.182a4705ae6ccp+0)": +double: 1 +idouble: 1 +Test "cos_towardzero (0x2.182a48p+0)": +double: 1 +idouble: 1 +Test "cos_towardzero (0x2.1e19e0c9bab24p+72)": +double: 1 +idouble: 1 +Test "cos_towardzero (0x2.1e19e4p+72)": +double: 1 +idouble: 1 +Test "cos_towardzero (0x2.1e19ep+72)": +double: 1 +idouble: 1 +Test "cos_towardzero (0x2p+0)": +double: 1 +idouble: 1 +Test "cos_towardzero (0x4p+0)": +double: 1 +idouble: 1 +Test "cos_towardzero (0x4p+48)": +double: 1 +idouble: 1 +Test "cos_towardzero (0x8p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "cos_towardzero (0x8p+1020)": +double: 1 +idouble: 1 +Test "cos_towardzero (0xa.217bap+12)": +ildouble: 1 +ldouble: 1 +Test "cos_towardzero (0xc.d4966d92d1708p-4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "cos_towardzero (0xc.d4966d92d171p-4)": +double: 1 +idouble: 1 +Test "cos_towardzero (0xc.d4966p-4)": +double: 1 +idouble: 1 +Test "cos_towardzero (0xcp-4)": +double: 1 +idouble: 1 +Test "cos_towardzero (0xf.fffffp+124)": +double: 1 +idouble: 1 Test "cos_towardzero (1)": ildouble: 1 ldouble: 1 @@ -5585,6 +6023,139 @@ ildouble: 1 ldouble: 1 # cos_upward +Test "cos_upward (-0x2p+64)": +double: 1 +idouble: 1 +Test "cos_upward (0x1.000004p+0)": +double: 1 +idouble: 1 +Test "cos_upward (0x1.000005bc7d86dp+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "cos_upward (0x1.000006p+0)": +double: 1 +idouble: 1 +Test "cos_upward (0x1.0c1522p+0)": +double: 1 +idouble: 1 +Test "cos_upward (0x1.0c152382d7366p+0)": +double: 1 +idouble: 1 +Test "cos_upward (0x1.0c1524p+0)": +double: 1 +idouble: 1 +Test "cos_upward (0x1.921fb4p+0)": +double: 1 +idouble: 1 +ildouble: 2 +ldouble: 2 +Test "cos_upward (0x1.921fb54442d18468p+0)": +ildouble: 1 +ldouble: 1 +Test "cos_upward (0x1.921fb54442d1846ap+0)": +ildouble: 1 +ldouble: 1 +Test "cos_upward (0x1.921fb54442d18p+0)": +ildouble: 1 +ldouble: 1 +Test "cos_upward (0x1.921fb54442d19p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "cos_upward (0x1.921fb6p+0)": +ildouble: 2 +ldouble: 2 +Test "cos_upward (0x1p+120)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "cos_upward (0x1p+28)": +ildouble: 1 +ldouble: 1 +Test "cos_upward (0x2.182a44p+0)": +ildouble: 1 +ldouble: 1 +Test "cos_upward (0x2.182a4705ae6cap+0)": +ildouble: 1 +ldouble: 1 +Test "cos_upward (0x2.182a4705ae6cb08cp+0)": +ildouble: 1 +ldouble: 1 +Test "cos_upward (0x2.182a4705ae6cb09p+0)": +ildouble: 1 +ldouble: 1 +Test "cos_upward (0x2.182a4705ae6ccp+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "cos_upward (0x2.182a48p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "cos_upward (0x2p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "cos_upward (0x2p+64)": +double: 1 +idouble: 1 +Test "cos_upward (0x3p+0)": +ildouble: 1 +ldouble: 1 +Test "cos_upward (0x4p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "cos_upward (0x5p+0)": +double: 1 +idouble: 1 +Test "cos_upward (0x6p+0)": +double: 1 +idouble: 1 +Test "cos_upward (0x7p+0)": +double: 1 +idouble: 1 +Test "cos_upward (0x8p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "cos_upward (0x8p+1020)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "cos_upward (0x8p+124)": +double: 1 +idouble: 1 +Test "cos_upward (0x9p+0)": +ildouble: 1 +ldouble: 1 +Test "cos_upward (0xa.217bap+12)": +double: 1 +idouble: 1 +ildouble: 2 +ldouble: 2 +Test "cos_upward (0xap+0)": +ildouble: 1 +ldouble: 1 +Test "cos_upward (0xc.d4967p-4)": +double: 1 +idouble: 1 +Test "cos_upward (0xf.ffffcp+124)": +double: 1 +idouble: 1 +Test "cos_upward (0xf.ffffffffffff8p+1020)": +ildouble: 1 +ldouble: 1 Test "cos_upward (1)": float: 1 ifloat: 1 @@ -5614,7 +6185,32 @@ Test "cos_upward (9)": ildouble: 1 ldouble: 1 +# cosh +Test "cosh (-0x2.c5e3acp+8)": +double: 1 +idouble: 1 +Test "cosh (0x1.6p+4)": +ildouble: 1 +ldouble: 1 +Test "cosh (0x2.c5e3acp+8)": +double: 1 +idouble: 1 + # cosh_downward +Test "cosh_downward (-0x2.c5e3bp+8)": +double: 1 +idouble: 1 +Test "cosh_downward (0x1.6p+4)": +double: 1 +idouble: 1 +ildouble: 2 +ldouble: 2 +Test "cosh_downward (0x1.7p+4)": +double: 1 +idouble: 1 +Test "cosh_downward (0x2.c5e3bp+8)": +double: 1 +idouble: 1 Test "cosh_downward (22)": float: 1 ifloat: 1 @@ -5632,11 +6228,34 @@ ildouble: 1 ldouble: 1 # cosh_tonearest +Test "cosh_tonearest (-0x2.c5e3acp+8)": +double: 1 +idouble: 1 +Test "cosh_tonearest (0x1.6p+4)": +ildouble: 1 +ldouble: 1 +Test "cosh_tonearest (0x2.c5e3acp+8)": +double: 1 +idouble: 1 Test "cosh_tonearest (22)": ildouble: 1 ldouble: 1 # cosh_towardzero +Test "cosh_towardzero (-0x2.c5e3bp+8)": +double: 1 +idouble: 1 +Test "cosh_towardzero (0x1.6p+4)": +double: 1 +idouble: 1 +ildouble: 2 +ldouble: 2 +Test "cosh_towardzero (0x1.7p+4)": +double: 1 +idouble: 1 +Test "cosh_towardzero (0x2.c5e3bp+8)": +double: 1 +idouble: 1 Test "cosh_towardzero (22)": float: 1 ifloat: 1 @@ -5654,6 +6273,27 @@ ildouble: 1 ldouble: 1 # cosh_upward +Test "cosh_upward (-0x2.c5e3acd2922a6p+8)": +ildouble: 1 +ldouble: 1 +Test "cosh_upward (-0x2.c5e3bp+8)": +double: 1 +idouble: 1 +Test "cosh_upward (0x1.6p+4)": +ildouble: 1 +ldouble: 1 +Test "cosh_upward (0x1.7p+4)": +ildouble: 1 +ldouble: 1 +Test "cosh_upward (0x1.8p+4)": +double: 1 +idouble: 1 +Test "cosh_upward (0x2.c5e3acd2922a6p+8)": +ildouble: 1 +ldouble: 1 +Test "cosh_upward (0x2.c5e3bp+8)": +double: 1 +idouble: 1 Test "cosh_upward (22)": ildouble: 1 ldouble: 1 @@ -5990,6 +6630,11 @@ ldouble: 1 Test "Imaginary part of: ctanh (0 + 0x3.243f6cp-1 i)": float: 1 ifloat: 1 +Test "Imaginary part of: ctanh (0 + M_PI_4l i)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 Test "Imaginary part of: ctanh (0 + pi/4 i)": double: 1 idouble: 1 @@ -6114,14 +6759,26 @@ ildouble: 2 ldouble: 2 # erf +Test "erf (-0x8p-4)": +ildouble: 1 +ldouble: 1 +Test "erf (0x1.4p+0)": +double: 1 +idouble: 1 Test "erf (1.25)": double: 1 idouble: 1 # erfc +Test "erfc (-0x8p-4)": +float: 1 +ifloat: 1 Test "erfc (0.75)": float: 1 ifloat: 1 +Test "erfc (0x1.4p+0)": +ildouble: 1 +ldouble: 1 Test "erfc (0x1.f7303cp+1)": double: 1 idouble: 1 @@ -6132,6 +6789,39 @@ float: 1 ifloat: 1 ildouble: 1 ldouble: 1 +Test "erfc (0x2p+0)": +double: 1 +idouble: 1 +Test "erfc (0x3.ee6078p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "erfc (0x4.2p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "erfc (0x6.4p+4)": +ildouble: 1 +ldouble: 1 +Test "erfc (0x7.fe8008p+0)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "erfc (0x7.fffd58p+0)": +ildouble: 1 +ldouble: 1 +Test "erfc (0x7.fffd59e26af37bc8p+0)": +ildouble: 1 +ldouble: 1 +Test "erfc (0x7.fffd59e26af37bcp+0)": +ildouble: 1 +ldouble: 1 +Test "erfc (0x7.fffd6p+0)": +float: 1 +ifloat: 1 Test "erfc (1.25)": ildouble: 1 ldouble: 1 @@ -6145,6 +6835,15 @@ ildouble: 1 ldouble: 1 # exp10 +Test "exp10 (-0x1.31p+8)": +double: 1 +idouble: 1 +Test "exp10 (-0x1p+0)": +double: 1 +idouble: 1 +Test "exp10 (-0x2.4p+4)": +double: 1 +idouble: 1 Test "exp10 (-1)": double: 1 idouble: 1 @@ -6157,6 +6856,15 @@ idouble: 1 Test "exp10 (0.75)": ildouble: 1 ldouble: 1 +Test "exp10 (0x2.4p+4)": +double: 1 +idouble: 1 +Test "exp10 (0x3p+0)": +double: 1 +idouble: 1 +Test "exp10 (0xcp-4)": +ildouble: 1 +ldouble: 1 Test "exp10 (3)": double: 1 idouble: 1 @@ -6165,6 +6873,26 @@ double: 1 idouble: 1 # exp_downward +Test "exp_downward (0x2.c5cp+8)": +ildouble: 1 +ldouble: 1 +Test "exp_downward (0x2p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "exp_downward (0x3.e8p+8)": +ildouble: 1 +ldouble: 1 +Test "exp_downward (0x3p+0)": +double: 1 +idouble: 1 +Test "exp_downward (0x5.8b9028p+4)": +double: 1 +idouble: 1 +Test "exp_downward (0xcp-4)": +double: 1 +idouble: 1 Test "exp_downward (1)": ildouble: 1 ldouble: 1 @@ -6180,6 +6908,50 @@ ildouble: 1 ldouble: 1 # exp_towardzero +Test "exp_towardzero (-0x2.e870a4p+8)": +ildouble: 1 +ldouble: 1 +Test "exp_towardzero (-0x2.e870a7e5e88c1f0cp+8)": +ildouble: 1 +ldouble: 1 +Test "exp_towardzero (-0x2.e870a7e5e88c1f1p+8)": +ildouble: 1 +ldouble: 1 +Test "exp_towardzero (-0x2.e870a7e5e88c2p+8)": +ildouble: 1 +ldouble: 1 +Test "exp_towardzero (-0x2.e870a7e5e88cp+8)": +ildouble: 1 +ldouble: 1 +Test "exp_towardzero (-0x2.e870a8p+8)": +ildouble: 1 +ldouble: 1 +Test "exp_towardzero (-0x2.ebe224p+8)": +ildouble: 1 +ldouble: 1 +Test "exp_towardzero (-0x2.ebe228p+8)": +ildouble: 1 +ldouble: 1 +Test "exp_towardzero (0x2.c5cp+8)": +ildouble: 1 +ldouble: 1 +Test "exp_towardzero (0x2p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "exp_towardzero (0x3.e8p+8)": +ildouble: 1 +ldouble: 1 +Test "exp_towardzero (0x3p+0)": +double: 1 +idouble: 1 +Test "exp_towardzero (0x5.8b9028p+4)": +double: 1 +idouble: 1 +Test "exp_towardzero (0xcp-4)": +double: 1 +idouble: 1 Test "exp_towardzero (1)": ildouble: 1 ldouble: 1 @@ -6195,17 +6967,86 @@ ildouble: 1 ldouble: 1 # exp_upward +Test "exp_upward (-0x2.e870a4p+8)": +double: 1 +idouble: 1 +Test "exp_upward (-0x2.e870a7e5e88c1f0cp+8)": +ildouble: 1 +ldouble: 1 +Test "exp_upward (-0x2.e870a7e5e88c2p+8)": +double: 1 +idouble: 1 +Test "exp_upward (-0x2.e870a7e5e88cp+8)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "exp_upward (-0x2.e870a8p+8)": +double: 1 +idouble: 1 +Test "exp_upward (-0x2.ebe224p+8)": +double: 1 +idouble: 1 +Test "exp_upward (-0x2.ebe227861639p+8)": +double: 1 +idouble: 1 +Test "exp_upward (-0x2.ebe228p+8)": +double: 1 +idouble: 1 +Test "exp_upward (-0x4.d2p+8)": +double: 1 +idouble: 1 +Test "exp_upward (-0xf.ffffffffffff8p+1020)": +double: 1 +idouble: 1 +Test "exp_upward (-0xf.fffffp+124)": +double: 1 +idouble: 1 +Test "exp_upward (0x1p+0)": +double: 1 +idouble: 1 +Test "exp_upward (0x2.c5cp+8)": +double: 1 +idouble: 1 +Test "exp_upward (0x3.2p+4)": +double: 1 +idouble: 1 Test "exp_upward (1)": float: 1 ifloat: 1 # expm1 +Test "expm1 (-0x1p-64)": +ildouble: 1 +ldouble: 1 +Test "expm1 (-0x2.dp+4)": +ildouble: 1 +ldouble: 1 +Test "expm1 (-0x4p-12)": +ildouble: 1 +ldouble: 1 Test "expm1 (-45.0)": ildouble: 1 ldouble: 1 Test "expm1 (0.75)": double: 1 idouble: 1 +Test "expm1 (0x1.f4p+8)": +double: 1 +idouble: 1 +Test "expm1 (0x1p+0)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "expm1 (0x2.c5c4p+12)": +ildouble: 1 +ldouble: 1 +Test "expm1 (0xcp-4)": +double: 1 +idouble: 1 Test "expm1 (1)": double: 1 float: 1 @@ -6218,15 +7059,358 @@ Test "expm1 (500.0)": double: 1 idouble: 1 +# expm1_downward +Test "expm1_downward (-0x1p-100)": +ildouble: 1 +ldouble: 1 +Test "expm1_downward (-0x2.ep+4)": +ildouble: 1 +ldouble: 1 +Test "expm1_downward (-0x4.9p+4)": +ildouble: 1 +ldouble: 1 +Test "expm1_downward (-0x4.bp+4)": +ildouble: 1 +ldouble: 1 +Test "expm1_downward (-0x4p-4)": +ildouble: 1 +ldouble: 1 +Test "expm1_downward (-0x5p+4)": +ildouble: 1 +ldouble: 1 +Test "expm1_downward (-0x6.4p+4)": +ildouble: 1 +ldouble: 1 +Test "expm1_downward (0x1.f4p+8)": +double: 1 +idouble: 1 +Test "expm1_downward (0x1p+0)": +ildouble: 1 +ldouble: 1 +Test "expm1_downward (0x1p-100)": +ildouble: 1 +ldouble: 1 +Test "expm1_downward (0x1p-32)": +ildouble: 1 +ldouble: 1 +Test "expm1_downward (0x3.2p+4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "expm1_downward (0x4p-12)": +ildouble: 1 +ldouble: 1 +Test "expm1_downward (0x4p-52)": +ildouble: 1 +ldouble: 1 +Test "expm1_downward (0x7.fp+4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "expm1_downward (0x8p-32)": +ildouble: 1 +ldouble: 1 + +# expm1_tonearest +Test "expm1_tonearest (-0x1p-64)": +ildouble: 1 +ldouble: 1 +Test "expm1_tonearest (-0x2.dp+4)": +ildouble: 1 +ldouble: 1 +Test "expm1_tonearest (-0x4p-12)": +ildouble: 1 +ldouble: 1 +Test "expm1_tonearest (0x1.f4p+8)": +double: 1 +idouble: 1 +Test "expm1_tonearest (0x1p+0)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "expm1_tonearest (0x2.c5c4p+12)": +ildouble: 1 +ldouble: 1 +Test "expm1_tonearest (0xcp-4)": +double: 1 +idouble: 1 + +# expm1_towardzero +Test "expm1_towardzero (-0x1.86ap+16)": +ildouble: 1 +ldouble: 1 +Test "expm1_towardzero (-0x1p-100)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "expm1_towardzero (-0x1p-20)": +ildouble: 2 +ldouble: 2 +Test "expm1_towardzero (-0x1p-32)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "expm1_towardzero (-0x1p-64)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "expm1_towardzero (-0x2.71p+12)": +ildouble: 1 +ldouble: 1 +Test "expm1_towardzero (-0x2.dp+4)": +ildouble: 1 +ldouble: 1 +Test "expm1_towardzero (-0x3.e8p+8)": +ildouble: 1 +ldouble: 1 +Test "expm1_towardzero (-0x4.ap+4)": +ildouble: 1 +ldouble: 1 +Test "expm1_towardzero (-0x4.ep+4)": +ildouble: 1 +ldouble: 1 +Test "expm1_towardzero (-0x4.fp+4)": +ildouble: 1 +ldouble: 1 +Test "expm1_towardzero (-0x4p-12)": +ildouble: 1 +ldouble: 1 +Test "expm1_towardzero (-0x4p-52)": +float: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 +Test "expm1_towardzero (-0x8p-32)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "expm1_towardzero (-0xf.ffffffffffff8p+1020)": +ildouble: 1 +ldouble: 1 +Test "expm1_towardzero (-0xf.fffffffffffffffp+16380)": +ildouble: 1 +ldouble: 1 +Test "expm1_towardzero (-0xf.fffffp+124)": +ildouble: 1 +ldouble: 1 +Test "expm1_towardzero (0x1.f4p+8)": +double: 1 +idouble: 1 +Test "expm1_towardzero (0x1p+0)": +ildouble: 1 +ldouble: 1 +Test "expm1_towardzero (0x1p-100)": +ildouble: 1 +ldouble: 1 +Test "expm1_towardzero (0x1p-32)": +ildouble: 1 +ldouble: 1 +Test "expm1_towardzero (0x3.2p+4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "expm1_towardzero (0x4p-12)": +ildouble: 1 +ldouble: 1 +Test "expm1_towardzero (0x4p-52)": +ildouble: 1 +ldouble: 1 +Test "expm1_towardzero (0x7.fp+4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "expm1_towardzero (0x8p-32)": +ildouble: 1 +ldouble: 1 + +# expm1_upward +Test "expm1_upward (-0x1.86ap+16)": +ildouble: 1 +ldouble: 1 +Test "expm1_upward (-0x1p-100)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "expm1_upward (-0x1p-20)": +ildouble: 2 +ldouble: 2 +Test "expm1_upward (-0x1p-32)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "expm1_upward (-0x1p-64)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "expm1_upward (-0x2.71p+12)": +ildouble: 1 +ldouble: 1 +Test "expm1_upward (-0x2.dp+4)": +ildouble: 1 +ldouble: 1 +Test "expm1_upward (-0x3.e8p+8)": +ildouble: 1 +ldouble: 1 +Test "expm1_upward (-0x4.ap+4)": +ildouble: 1 +ldouble: 1 +Test "expm1_upward (-0x4.ep+4)": +ildouble: 1 +ldouble: 1 +Test "expm1_upward (-0x4.fp+4)": +ildouble: 1 +ldouble: 1 +Test "expm1_upward (-0x4p-12)": +ildouble: 1 +ldouble: 1 +Test "expm1_upward (-0x4p-52)": +float: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 +Test "expm1_upward (-0x8p-32)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "expm1_upward (-0xf.ffffffffffff8p+1020)": +ildouble: 1 +ldouble: 1 +Test "expm1_upward (-0xf.fffffffffffffffp+16380)": +ildouble: 1 +ldouble: 1 +Test "expm1_upward (-0xf.fffffp+124)": +ildouble: 1 +ldouble: 1 +Test "expm1_upward (0x1.f4p+8)": +ildouble: 1 +ldouble: 1 +Test "expm1_upward (0x1p-100)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +Test "expm1_upward (0x1p-32)": +float: 1 +ifloat: 1 +Test "expm1_upward (0x1p-64)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +Test "expm1_upward (0x4p-4)": +ildouble: 1 +ldouble: 1 +Test "expm1_upward (0x4p-52)": +float: 1 +ifloat: 1 +Test "expm1_upward (0x8p-32)": +float: 1 +ifloat: 1 + # gamma Test "gamma (-0.5)": ildouble: 1 ldouble: 1 +Test "gamma (-0x1p-10)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "gamma (-0x1p-15)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +Test "gamma (-0x1p-20)": +double: 1 +idouble: 1 +Test "gamma (-0x1p-30)": +ildouble: 1 +ldouble: 1 +Test "gamma (-0x1p-5)": +double: 1 +idouble: 1 +Test "gamma (-0x2p-16)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +Test "gamma (-0x4p-12)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "gamma (-0x4p-32)": +ildouble: 1 +ldouble: 1 +Test "gamma (-0x8p-4)": +ildouble: 1 +ldouble: 1 +Test "gamma (-0x8p-8)": +double: 1 +idouble: 1 Test "gamma (0.7)": double: 1 float: 1 idouble: 1 ifloat: 1 +Test "gamma (0x1.3333333333334p+0)": +ildouble: 1 +ldouble: 1 +Test "gamma (0x1p-10)": +float: 1 +ifloat: 1 +Test "gamma (0x1p-30)": +double: 1 +idouble: 1 +Test "gamma (0x1p-40)": +ildouble: 1 +ldouble: 1 +Test "gamma (0x4p-12)": +float: 1 +ifloat: 1 +Test "gamma (0x4p-32)": +double: 1 +idouble: 1 +Test "gamma (0xb.333333333333334p-4)": +ildouble: 1 +ldouble: 1 +Test "gamma (0xb.3333333333338p-4)": +ildouble: 1 +ldouble: 1 +Test "gamma (0xb.333333333333p-4)": +double: 1 +idouble: 1 +Test "gamma (0xb.33333p-4)": +double: 1 +idouble: 1 Test "gamma (1.2)": double: 1 float: 2 @@ -6242,6 +7426,42 @@ ifloat: 1 Test "hypot (-0.7, 12.4)": float: 1 ifloat: 1 +Test "hypot (-0xb.3333333333338p-4, -0xc.6666666666668p+0)": +ildouble: 1 +ldouble: 1 +Test "hypot (-0xb.3333333333338p-4, 0xc.6666666666668p+0)": +ildouble: 1 +ldouble: 1 +Test "hypot (-0xb.33333p-4, -0xc.666666666666p+0)": +ildouble: 1 +ldouble: 1 +Test "hypot (-0xb.33333p-4, 0xc.666666666666p+0)": +ildouble: 1 +ldouble: 1 +Test "hypot (-0xb.33334p-4, -0xc.6666666666668p+0)": +double: 1 +idouble: 1 +Test "hypot (-0xb.33334p-4, 0xc.6666666666668p+0)": +double: 1 +idouble: 1 +Test "hypot (-0xc.6666666666668p+0, -0xb.3333333333338p-4)": +ildouble: 1 +ldouble: 1 +Test "hypot (-0xc.6666666666668p+0, -0xb.33334p-4)": +double: 1 +idouble: 1 +Test "hypot (-0xc.6666666666668p+0, 0xb.3333333333338p-4)": +ildouble: 1 +ldouble: 1 +Test "hypot (-0xc.6666666666668p+0, 0xb.33334p-4)": +double: 1 +idouble: 1 +Test "hypot (-0xc.666666666666p+0, -0xb.33333p-4)": +ildouble: 1 +ldouble: 1 +Test "hypot (-0xc.666666666666p+0, 0xb.33333p-4)": +ildouble: 1 +ldouble: 1 Test "hypot (-12.4, -0.7)": float: 1 ifloat: 1 @@ -6254,6 +7474,54 @@ ifloat: 1 Test "hypot (0.7, 12.4)": float: 1 ifloat: 1 +Test "hypot (0x1.23456789abcdef02p-500, 0x1.23456789abcdefp-500)": +ildouble: 1 +ldouble: 1 +Test "hypot (0x1.23456789abcdefp-500, 0x1.23456789abcdef02p-500)": +ildouble: 1 +ldouble: 1 +Test "hypot (0x1.23456789abcdefp-500, 0x1.23456789abcdfp-500)": +ildouble: 1 +ldouble: 1 +Test "hypot (0x1.23456789abcdfp-500, 0x1.23456789abcdefp-500)": +ildouble: 1 +ldouble: 1 +Test "hypot (0xb.3333333333338p-4, -0xc.6666666666668p+0)": +ildouble: 1 +ldouble: 1 +Test "hypot (0xb.3333333333338p-4, 0xc.6666666666668p+0)": +ildouble: 1 +ldouble: 1 +Test "hypot (0xb.33333p-4, -0xc.666666666666p+0)": +ildouble: 1 +ldouble: 1 +Test "hypot (0xb.33333p-4, 0xc.666666666666p+0)": +ildouble: 1 +ldouble: 1 +Test "hypot (0xb.33334p-4, -0xc.6666666666668p+0)": +double: 1 +idouble: 1 +Test "hypot (0xb.33334p-4, 0xc.6666666666668p+0)": +double: 1 +idouble: 1 +Test "hypot (0xc.6666666666668p+0, -0xb.3333333333338p-4)": +ildouble: 1 +ldouble: 1 +Test "hypot (0xc.6666666666668p+0, -0xb.33334p-4)": +double: 1 +idouble: 1 +Test "hypot (0xc.6666666666668p+0, 0xb.3333333333338p-4)": +ildouble: 1 +ldouble: 1 +Test "hypot (0xc.6666666666668p+0, 0xb.33334p-4)": +double: 1 +idouble: 1 +Test "hypot (0xc.666666666666p+0, -0xb.33333p-4)": +ildouble: 1 +ldouble: 1 +Test "hypot (0xc.666666666666p+0, 0xb.33333p-4)": +ildouble: 1 +ldouble: 1 Test "hypot (12.4, -0.7)": float: 1 ifloat: 1 @@ -6265,6 +7533,23 @@ ifloat: 1 Test "j0 (-0x1.001000001p+593)": ildouble: 2 ldouble: 2 +Test "j0 (-0x2.002000002p+592)": +ildouble: 2 +ldouble: 2 +Test "j0 (-0x4p+0)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 +Test "j0 (-0xf.fffffp+124)": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 Test "j0 (-4.0)": double: 1 float: 1 @@ -6281,6 +7566,43 @@ ifloat: 2 Test "j0 (0x1p16382)": ildouble: 1 ldouble: 1 +Test "j0 (0x2p+0)": +float: 2 +ifloat: 2 +Test "j0 (0x4p+0)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 +Test "j0 (0x4p+16380)": +ildouble: 1 +ldouble: 1 +Test "j0 (0x8p+0)": +float: 1 +ifloat: 1 +Test "j0 (0xap+0)": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +Test "j0 (0xcp-4)": +float: 1 +ifloat: 1 +Test "j0 (0xe.be71dp+104)": +float: 2 +ifloat: 2 +Test "j0 (0xf.ffffffffffff8p+1020)": +ildouble: 1 +ldouble: 1 +Test "j0 (0xf.fffffp+124)": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 Test "j0 (10.0)": double: 2 float: 1 @@ -6312,6 +7634,33 @@ idouble: 1 Test "j1 (0x1p16382)": ildouble: 1 ldouble: 1 +Test "j1 (0x2p+0)": +double: 1 +idouble: 1 +Test "j1 (0x4.ffcp+72)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "j1 (0x4p+16380)": +ildouble: 1 +ldouble: 1 +Test "j1 (0x8p+0)": +double: 1 +idouble: 1 +Test "j1 (0xap+0)": +float: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 +Test "j1 (0xf.ffffffffffff8p+1020)": +double: 1 +idouble: 1 +Test "j1 (0xf.fffffp+124)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 Test "j1 (10.0)": float: 2 ifloat: 2 @@ -6325,6 +7674,13 @@ double: 1 idouble: 1 # jn +Test "jn (0, -0x4p+0)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 Test "jn (0, -4.0)": double: 1 float: 1 @@ -6335,6 +7691,27 @@ ldouble: 2 Test "jn (0, 0.75)": float: 1 ifloat: 1 +Test "jn (0, 0x2p+0)": +float: 2 +ifloat: 2 +Test "jn (0, 0x4p+0)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 +Test "jn (0, 0x8p+0)": +float: 1 +ifloat: 1 +Test "jn (0, 0xap+0)": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +Test "jn (0, 0xcp-4)": +float: 1 +ifloat: 1 Test "jn (0, 10.0)": double: 2 float: 1 @@ -6353,6 +7730,17 @@ ldouble: 2 Test "jn (0, 8.0)": float: 1 ifloat: 1 +Test "jn (1, 0x2p+0)": +double: 1 +idouble: 1 +Test "jn (1, 0x8p+0)": +double: 1 +idouble: 1 +Test "jn (1, 0xap+0)": +float: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 Test "jn (1, 10.0)": float: 2 ifloat: 2 @@ -6364,6 +7752,9 @@ idouble: 1 Test "jn (1, 8.0)": double: 1 idouble: 1 +Test "jn (10, -0x1p+0)": +ildouble: 1 +ldouble: 1 Test "jn (10, -1.0)": ildouble: 1 ldouble: 1 @@ -6379,6 +7770,35 @@ idouble: 1 ifloat: 1 ildouble: 2 ldouble: 2 +Test "jn (10, 0x1p+0)": +ildouble: 1 +ldouble: 1 +Test "jn (10, 0x2p+0)": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 1 +Test "jn (10, 0x2p-4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +Test "jn (10, 0xap+0)": +double: 4 +float: 2 +idouble: 4 +ifloat: 2 +ildouble: 2 +ldouble: 2 +Test "jn (10, 0xcp-4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 Test "jn (10, 1.0)": ildouble: 1 ldouble: 1 @@ -6403,6 +7823,61 @@ idouble: 2 ifloat: 2 ildouble: 1 ldouble: 1 +Test "jn (2, 0x1p127)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "jn (2, 0x2.67a2a4p+0)": +float: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 +Test "jn (2, 0x2.67a2a5d2e36800fcp+0)": +ildouble: 1 +ldouble: 1 +Test "jn (2, 0x2.67a2a5d2e36801p+0)": +ildouble: 2 +ldouble: 2 +Test "jn (2, 0x2.67a2a5d2e3682p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "jn (2, 0x2.67a2a5d2e368p+0)": +double: 2 +idouble: 2 +ildouble: 1 +ldouble: 1 +Test "jn (2, 0x2.67a2a8p+0)": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 2 +ldouble: 2 +Test "jn (2, 0x8p+124)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "jn (2, 0xf.fffb1p+96)": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 +Test "jn (2, 0xf.ffffffffffff8p+1020)": +ildouble: 1 +ldouble: 1 +Test "jn (2, 0xf.fffffp+124)": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 Test "jn (2, 2.4048255576957729)": double: 2 float: 1 @@ -6410,6 +7885,9 @@ idouble: 2 ifloat: 1 ildouble: 1 ldouble: 1 +Test "jn (3, -0x1p+0)": +ildouble: 1 +ldouble: 1 Test "jn (3, -1.0)": ildouble: 1 ldouble: 1 @@ -6423,6 +7901,54 @@ double: 1 float: 1 idouble: 1 ifloat: 1 +Test "jn (3, 0x1p+0)": +ildouble: 1 +ldouble: 1 +Test "jn (3, 0x2.67a2a4p+0)": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 1 +Test "jn (3, 0x2.67a2a5d2e36801p+0)": +ildouble: 3 +ldouble: 3 +Test "jn (3, 0x2.67a2a5d2e3682p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "jn (3, 0x2.67a2a5d2e368p+0)": +double: 3 +idouble: 3 +Test "jn (3, 0x2.67a2a8p+0)": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 1 +ldouble: 1 +Test "jn (3, 0x2p+0)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "jn (3, 0x2p-4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +Test "jn (3, 0xap+0)": +double: 3 +idouble: 3 +ildouble: 1 +ldouble: 1 +Test "jn (3, 0xcp-4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 Test "jn (3, 1.0)": ildouble: 1 ldouble: 1 @@ -6443,11 +7969,59 @@ double: 3 idouble: 3 ildouble: 1 ldouble: 1 +Test "jn (4, 0x2.67a2a4p+0)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "jn (4, 0x2.67a2a5d2e36800fcp+0)": +ildouble: 2 +ldouble: 2 +Test "jn (4, 0x2.67a2a5d2e36801p+0)": +ildouble: 1 +ldouble: 1 +Test "jn (4, 0x2.67a2a5d2e3682p+0)": +double: 1 +idouble: 1 +Test "jn (4, 0x2.67a2a5d2e368p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "jn (4, 0x2.67a2a8p+0)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 Test "jn (4, 2.4048255576957729)": double: 1 idouble: 1 ildouble: 2 ldouble: 2 +Test "jn (5, 0x2.67a2a4p+0)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "jn (5, 0x2.67a2a5d2e36800fcp+0)": +ildouble: 2 +ldouble: 2 +Test "jn (5, 0x2.67a2a5d2e36801p+0)": +ildouble: 1 +ldouble: 1 +Test "jn (5, 0x2.67a2a5d2e3682p+0)": +double: 1 +idouble: 1 +Test "jn (5, 0x2.67a2a5d2e368p+0)": +double: 2 +idouble: 2 +ildouble: 1 +ldouble: 1 +Test "jn (5, 0x2.67a2a8p+0)": +float: 2 +ifloat: 2 Test "jn (5, 2.4048255576957729)": double: 3 float: 1 @@ -6455,6 +8029,34 @@ idouble: 3 ifloat: 1 ildouble: 3 ldouble: 3 +Test "jn (6, 0x2.67a2a4p+0)": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "jn (6, 0x2.67a2a5d2e36800fcp+0)": +ildouble: 1 +ldouble: 1 +Test "jn (6, 0x2.67a2a5d2e36801p+0)": +ildouble: 1 +ldouble: 1 +Test "jn (6, 0x2.67a2a5d2e3682p+0)": +double: 2 +idouble: 2 +ildouble: 1 +ldouble: 1 +Test "jn (6, 0x2.67a2a5d2e368p+0)": +double: 4 +idouble: 4 +Test "jn (6, 0x2.67a2a8p+0)": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 1 +ldouble: 1 Test "jn (6, 2.4048255576957729)": double: 4 float: 3 @@ -6462,11 +8064,55 @@ idouble: 4 ifloat: 3 ildouble: 1 ldouble: 1 +Test "jn (7, 0x2.67a2a4p+0)": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +Test "jn (7, 0x2.67a2a5d2e36800fcp+0)": +ildouble: 2 +ldouble: 2 +Test "jn (7, 0x2.67a2a5d2e36801p+0)": +ildouble: 1 +ldouble: 1 +Test "jn (7, 0x2.67a2a5d2e3682p+0)": +ildouble: 4 +ldouble: 4 +Test "jn (7, 0x2.67a2a5d2e368p+0)": +double: 3 +idouble: 3 +ildouble: 1 +ldouble: 1 +Test "jn (7, 0x2.67a2a8p+0)": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 Test "jn (7, 2.4048255576957729)": double: 3 float: 5 idouble: 3 ifloat: 5 +Test "jn (8, 0x2.67a2a4p+0)": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 +Test "jn (8, 0x2.67a2a5d2e3682p+0)": +double: 1 +idouble: 1 +Test "jn (8, 0x2.67a2a5d2e368p+0)": +double: 3 +idouble: 3 +Test "jn (8, 0x2.67a2a8p+0)": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 +ildouble: 1 +ldouble: 1 Test "jn (8, 2.4048255576957729)": double: 3 float: 2 @@ -6474,6 +8120,28 @@ idouble: 3 ifloat: 2 ildouble: 2 ldouble: 2 +Test "jn (9, 0x2.67a2a4p+0)": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 2 +ldouble: 2 +Test "jn (9, 0x2.67a2a5d2e3682p+0)": +double: 4 +idouble: 4 +Test "jn (9, 0x2.67a2a5d2e368p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "jn (9, 0x2.67a2a8p+0)": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 3 +ldouble: 3 Test "jn (9, 2.4048255576957729)": double: 1 float: 2 @@ -6486,11 +8154,79 @@ ldouble: 2 Test "lgamma (-0.5)": ildouble: 1 ldouble: 1 +Test "lgamma (-0x1p-10)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "lgamma (-0x1p-15)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +Test "lgamma (-0x1p-20)": +double: 1 +idouble: 1 +Test "lgamma (-0x1p-30)": +ildouble: 1 +ldouble: 1 +Test "lgamma (-0x1p-5)": +double: 1 +idouble: 1 +Test "lgamma (-0x2p-16)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +Test "lgamma (-0x4p-12)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "lgamma (-0x4p-32)": +ildouble: 1 +ldouble: 1 +Test "lgamma (-0x8p-4)": +ildouble: 1 +ldouble: 1 +Test "lgamma (-0x8p-8)": +double: 1 +idouble: 1 Test "lgamma (0.7)": double: 1 float: 1 idouble: 1 ifloat: 1 +Test "lgamma (0x1.3333333333334p+0)": +ildouble: 1 +ldouble: 1 +Test "lgamma (0x1p-10)": +float: 1 +ifloat: 1 +Test "lgamma (0x1p-30)": +double: 1 +idouble: 1 +Test "lgamma (0x1p-40)": +ildouble: 1 +ldouble: 1 +Test "lgamma (0x4p-12)": +float: 1 +ifloat: 1 +Test "lgamma (0x4p-32)": +double: 1 +idouble: 1 +Test "lgamma (0xb.333333333333334p-4)": +ildouble: 1 +ldouble: 1 +Test "lgamma (0xb.3333333333338p-4)": +ildouble: 1 +ldouble: 1 +Test "lgamma (0xb.333333333333p-4)": +double: 1 +idouble: 1 +Test "lgamma (0xb.33333p-4)": +double: 1 +idouble: 1 Test "lgamma (1.2)": double: 1 float: 2 @@ -6499,6 +8235,20 @@ ifloat: 2 ildouble: 1 ldouble: 1 +# log +Test "log (0x2.b7e154p+0)": +ildouble: 1 +ldouble: 1 +Test "log (0x2.b7e15p+0)": +float: 1 +ifloat: 1 +Test "log (0x5.e2d58d8b3bcdf1bp-4)": +ildouble: 1 +ldouble: 1 +Test "log (0x5.e2d59p-4)": +ildouble: 1 +ldouble: 1 + # log10 Test "log10 (0.75)": double: 1 @@ -6507,6 +8257,34 @@ idouble: 1 ifloat: 2 ildouble: 1 ldouble: 1 +Test "log10 (0x1.999998p-4)": +ildouble: 1 +ldouble: 1 +Test "log10 (0x1.9999999999999998p-4)": +ildouble: 1 +ldouble: 1 +Test "log10 (0x1.999999999999ap-4)": +ildouble: 1 +ldouble: 1 +Test "log10 (0x2.b7e151628aed2a68p+0)": +ildouble: 1 +ldouble: 1 +Test "log10 (0x2.b7e151628aed2p+0)": +ildouble: 1 +ldouble: 1 +Test "log10 (0x2.b7e154p+0)": +float: 1 +ifloat: 1 +Test "log10 (0x4p-128)": +ildouble: 1 +ldouble: 1 +Test "log10 (0xcp-4)": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 1 Test "log10 (e)": float: 1 ifloat: 1 @@ -6517,6 +8295,12 @@ ldouble: 1 Test "log1p (-0.25)": float: 1 ifloat: 1 +Test "log1p (-0x4p-4)": +float: 1 +ifloat: 1 +Test "log1p (0x1.b7e15p+0)": +float: 1 +ifloat: 1 # pow Test "pow (0x0.ffffffp0, -0x1p24)": @@ -6527,11 +8311,37 @@ ldouble: 1 Test "pow (0x0.ffffffp0, 0x1p24)": float: 1 ifloat: 1 +Test "pow (0x1.0000000000001p+0, 0x2.468adp+60)": +ildouble: 1 +ldouble: 1 +Test "pow (0x1.000002p+0, 0x1p+24)": +float: 1 +ifloat: 1 Test "pow (0x1.000002p0, 0x1p24)": float: 1 ifloat: 1 +Test "pow (0xf.ffffffffffff8p-4, 0x4.8d15ap+60)": +ildouble: 1 +ldouble: 1 +Test "pow (0xf.fffffp-4, -0x1p+24)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "pow (0xf.fffffp-4, 0x1p+24)": +float: 1 +ifloat: 1 # pow10 +Test "pow10 (-0x1.31p+8)": +double: 1 +idouble: 1 +Test "pow10 (-0x1p+0)": +double: 1 +idouble: 1 +Test "pow10 (-0x2.4p+4)": +double: 1 +idouble: 1 Test "pow10 (-1)": double: 1 idouble: 1 @@ -6544,6 +8354,15 @@ idouble: 1 Test "pow10 (0.75)": ildouble: 1 ldouble: 1 +Test "pow10 (0x2.4p+4)": +double: 1 +idouble: 1 +Test "pow10 (0x3p+0)": +double: 1 +idouble: 1 +Test "pow10 (0xcp-4)": +ildouble: 1 +ldouble: 1 Test "pow10 (3)": double: 1 idouble: 1 @@ -6556,6 +8375,25 @@ Test "pow_downward (1.5, 1.03125)": float: 1 ifloat: 1 +# pow_tonearest +Test "pow_tonearest (0x1.0000000000001p+0, 0x2.468adp+60)": +ildouble: 1 +ldouble: 1 +Test "pow_tonearest (0x1.000002p+0, 0x1p+24)": +float: 1 +ifloat: 1 +Test "pow_tonearest (0xf.ffffffffffff8p-4, 0x4.8d15ap+60)": +ildouble: 1 +ldouble: 1 +Test "pow_tonearest (0xf.fffffp-4, -0x1p+24)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "pow_tonearest (0xf.fffffp-4, 0x1p+24)": +float: 1 +ifloat: 1 + # pow_towardzero Test "pow_towardzero (1.5, 1.03125)": float: 1 @@ -6571,7 +8409,246 @@ Test "pow_upward (1.5, 1.03125)": ildouble: 1 ldouble: 1 +# sin +Test "sin (0xf.ffffffffffff8p+1020)": +ildouble: 1 +ldouble: 1 + # sin_downward +Test "sin_downward (-0x1.921fb4p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_downward (-0x1.921fb6p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_downward (-0x2p+64)": +double: 1 +idouble: 1 +Test "sin_downward (-0x8.60a91c16b9b28p-4)": +ildouble: 1 +ldouble: 1 +Test "sin_downward (-0x8.60a91c16b9b2c24p-4)": +ildouble: 1 +ldouble: 1 +Test "sin_downward (-0x8.60a91c16b9b3p-4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_downward (-0x8.60a91p-4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_downward (-0x8.60a92p-4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x1.921fb54442d18468p+0)": +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x1.921fb54442d1846ap+0)": +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x1.921fb54442d18p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x1.921fb54442d19p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x1p+28)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x2.1e19e0c9bab24p+72)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x2.1e19e4p+72)": +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x2.1e19ep+72)": +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x2.553534p+0)": +double: 1 +idouble: 1 +Test "sin_downward (0x2.5535376715bap+0)": +double: 1 +idouble: 1 +Test "sin_downward (0x2p+0)": +double: 1 +idouble: 1 +Test "sin_downward (0x3.be735c19be9fep+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x3.be735c19be9ffffcp+0)": +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x3.be735c19beap+0)": +ildouble: 2 +ldouble: 2 +Test "sin_downward (0x3.be735cp+0)": +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x3.be736p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x3.ec2a0250032a0004p+0)": +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x3.ec2a0250032a2p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x3.ec2a0250032ap+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x3.ec2a04p+0)": +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x3.ec2ap+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x3p+0)": +ildouble: 2 +ldouble: 2 +Test "sin_downward (0x4.093388p-4)": +double: 1 +idouble: 1 +Test "sin_downward (0x4.1237e153f7080008p+0)": +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x4.1237e153f7084p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x4.1237e153f708p+0)": +double: 1 +idouble: 1 +Test "sin_downward (0x4.1237e8p+0)": +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x4.1237ep+0)": +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x4.c92d08p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x4.c92d0ffa4bf00008p+0)": +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x4.c92d0ffa4bf04p+0)": +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x4.c92d0ffa4bfp+0)": +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x4.c92d1p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x4p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x4p+48)": +double: 1 +idouble: 1 +Test "sin_downward (0x5.fbec7477d4a80008p+0)": +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x5.fbec7477d4a84p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x5.fbec7477d4a8p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x5.fbec78p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x5.fbec7p+0)": +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x5p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x6p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_downward (0x8p+0)": +double: 1 +idouble: 1 +Test "sin_downward (0x8p+1020)": +double: 1 +idouble: 1 +Test "sin_downward (0x9p+0)": +double: 1 +idouble: 1 +Test "sin_downward (0xap+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_downward (0xc.d4966d92d1708p-4)": +double: 1 +idouble: 1 +Test "sin_downward (0xc.d4966p-4)": +double: 1 +idouble: 1 +Test "sin_downward (0xe.ef3af1b5d8p-4)": +ildouble: 1 +ldouble: 1 +Test "sin_downward (0xf.ffffcp+124)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_downward (0xf.ffffffffffff8p+1020)": +double: 1 +idouble: 1 +ildouble: 2 +ldouble: 2 +Test "sin_downward (0xf.fffffp+124)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 Test "sin_downward (1)": ildouble: 1 ldouble: 1 @@ -6593,7 +8670,137 @@ ifloat: 1 ildouble: 1 ldouble: 1 +# sin_tonearest +Test "sin_tonearest (0xf.ffffffffffff8p+1020)": +ildouble: 1 +ldouble: 1 + # sin_towardzero +Test "sin_towardzero (-0x1.921fb54442d18468p+0)": +ildouble: 1 +ldouble: 1 +Test "sin_towardzero (-0x1.921fb54442d1846ap+0)": +ildouble: 1 +ldouble: 1 +Test "sin_towardzero (-0x1.921fb54442d18p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_towardzero (-0x1.921fb54442d19p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_towardzero (-0x2p+64)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_towardzero (0x1.921fb54442d18468p+0)": +ildouble: 1 +ldouble: 1 +Test "sin_towardzero (0x1.921fb54442d1846ap+0)": +ildouble: 1 +ldouble: 1 +Test "sin_towardzero (0x1.921fb54442d18p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_towardzero (0x1.921fb54442d19p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_towardzero (0x2.1e19e4p+72)": +double: 1 +idouble: 1 +Test "sin_towardzero (0x2.1e19ep+72)": +double: 1 +idouble: 1 +Test "sin_towardzero (0x2.553534p+0)": +double: 1 +idouble: 1 +Test "sin_towardzero (0x2.5535376715bap+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_towardzero (0x2p+0)": +double: 1 +idouble: 1 +Test "sin_towardzero (0x2p+64)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_towardzero (0x3.be735c19beap+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_towardzero (0x3.be735cp+0)": +double: 1 +idouble: 1 +Test "sin_towardzero (0x3.ec2a0250032ap+0)": +ildouble: 1 +ldouble: 1 +Test "sin_towardzero (0x3.ec2a04p+0)": +double: 1 +idouble: 1 +Test "sin_towardzero (0x4.093388p-4)": +double: 1 +idouble: 1 +Test "sin_towardzero (0x4.1237e153f708p+0)": +ildouble: 1 +ldouble: 1 +Test "sin_towardzero (0x4.1237e8p+0)": +double: 1 +idouble: 1 +Test "sin_towardzero (0x4.1237ep+0)": +double: 1 +idouble: 1 +Test "sin_towardzero (0x4.c92d0ffa4bf04p+0)": +double: 1 +idouble: 1 +Test "sin_towardzero (0x4.c92d0ffa4bfp+0)": +double: 1 +idouble: 1 +Test "sin_towardzero (0x4p+48)": +double: 1 +idouble: 1 +Test "sin_towardzero (0x5.fbec7p+0)": +double: 1 +idouble: 1 +Test "sin_towardzero (0x8p+0)": +double: 1 +idouble: 1 +Test "sin_towardzero (0x8p+1020)": +double: 1 +idouble: 1 +Test "sin_towardzero (0x9p+0)": +double: 1 +idouble: 1 +Test "sin_towardzero (0xb.fa09ap+100)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_towardzero (0xc.d4966d92d1708p-4)": +double: 1 +idouble: 1 +Test "sin_towardzero (0xc.d4966p-4)": +double: 1 +idouble: 1 +Test "sin_towardzero (0xf.ffffcp+124)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_towardzero (0xf.ffffffffffff8p+1020)": +double: 1 +idouble: 1 Test "sin_towardzero (1)": ildouble: 1 ldouble: 1 @@ -6622,6 +8829,258 @@ ildouble: 1 ldouble: 1 # sin_upward +Test "sin_upward (-0x1.921fb4p+0)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (-0x1.921fb54442d18468p+0)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (-0x1.921fb54442d1846ap+0)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (-0x1.921fb54442d18p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_upward (-0x1.921fb54442d19p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_upward (-0x1.921fb6p+0)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (-0x2p+64)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (-0x8.60a91c16b9b28p-4)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (-0x8.60a91c16b9b2c23p-4)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (-0x8.60a91c16b9b2c24p-4)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (-0x8.60a91c16b9b3p-4)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (-0x8.60a91p-4)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (-0x8.60a92p-4)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x1.921fb4p+0)": +double: 1 +idouble: 1 +Test "sin_upward (0x1.921fb6p+0)": +double: 1 +idouble: 1 +Test "sin_upward (0x1p+0)": +double: 1 +idouble: 1 +Test "sin_upward (0x1p+120)": +double: 1 +idouble: 1 +Test "sin_upward (0x1p+28)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x2.1e19e0c9bab24p+72)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x2.1e19e4p+72)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x2.1e19ep+72)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x2.5535376715b9ep+0)": +double: 1 +idouble: 1 +Test "sin_upward (0x2.5535376715bap+0)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x2.553538p+0)": +double: 1 +idouble: 1 +Test "sin_upward (0x2p+64)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x3.be735c19be9fep+0)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x3.be735c19be9ffffcp+0)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x3.be735c19beap+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x3.be735cp+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x3.be736p+0)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x3.ec2a0250032a0004p+0)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x3.ec2a0250032a2p+0)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x3.ec2a0250032ap+0)": +ildouble: 2 +ldouble: 2 +Test "sin_upward (0x3.ec2a04p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x3.ec2ap+0)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x3p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x4.093385688a2d1508p-4)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x4.093385688a2d4p-4)": +double: 1 +idouble: 1 +Test "sin_upward (0x4.093385688a2dp-4)": +double: 1 +idouble: 1 +Test "sin_upward (0x4.09338p-4)": +double: 1 +idouble: 1 +Test "sin_upward (0x4.1237e153f7080008p+0)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x4.1237e153f7084p+0)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x4.1237e153f708p+0)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x4.1237e8p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x4.1237ep+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x4.c92d08p+0)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x4.c92d0ffa4bf00008p+0)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x4.c92d0ffa4bf04p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x4.c92d0ffa4bfp+0)": +double: 1 +idouble: 1 +Test "sin_upward (0x4.c92d1p+0)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x4p+0)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x5.fbec7477d4a80008p+0)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x5.fbec7477d4a84p+0)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x5.fbec7477d4a8p+0)": +ildouble: 2 +ldouble: 2 +Test "sin_upward (0x5.fbec78p+0)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x5.fbec7p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x5p+0)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x6p+0)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (0x7p+0)": +double: 1 +idouble: 1 +Test "sin_upward (0x8.60a91c16b9b3p-4)": +double: 1 +idouble: 1 +Test "sin_upward (0x8.60a91p-4)": +double: 1 +idouble: 1 +Test "sin_upward (0x8.60a92p-4)": +double: 1 +idouble: 1 +Test "sin_upward (0x8p+124)": +double: 1 +idouble: 1 +Test "sin_upward (0xap+0)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (0xb.fa09ap+100)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_upward (0xc.d4966d92d171p-4)": +double: 1 +idouble: 1 +Test "sin_upward (0xc.d4967p-4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sin_upward (0xcp-4)": +double: 1 +idouble: 1 +Test "sin_upward (0xe.ef3af1b5d8008p-4)": +double: 1 +idouble: 1 +Test "sin_upward (0xe.ef3af1b5d8p-4)": +double: 1 +idouble: 1 +Test "sin_upward (0xe.ef3afp-4)": +double: 1 +idouble: 1 +Test "sin_upward (0xe.ef3bp-4)": +double: 1 +idouble: 1 +Test "sin_upward (0xf.ffffffffffff8p+1020)": +ildouble: 1 +ldouble: 1 +Test "sin_upward (0xf.fffffp+124)": +ildouble: 1 +ldouble: 1 Test "sin_upward (1)": float: 1 ifloat: 1 @@ -6661,6 +9120,12 @@ float: 1 ifloat: 1 # sincos +Test "sincos (0x1.921fb4p+0) extra output 2": +ildouble: 1 +ldouble: 1 +Test "sincos (0xf.ffffffffffff8p+1020) extra output 1": +ildouble: 1 +ldouble: 1 Test "sincos (M_PI_6l*2.0) extra output 1": double: 1 float: 1 @@ -6678,6 +9143,22 @@ ildouble: 1 ldouble: 1 # sinh_downward +Test "sinh_downward (0x1.6p+4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sinh_downward (0x1.7p+4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sinh_downward (0x1.8p+4)": +ildouble: 1 +ldouble: 1 +Test "sinh_downward (0x8p-32)": +ildouble: 1 +ldouble: 1 Test "sinh_downward (22)": float: 1 ifloat: 1 @@ -6695,6 +9176,22 @@ ildouble: 2 ldouble: 2 # sinh_towardzero +Test "sinh_towardzero (0x1.6p+4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sinh_towardzero (0x1.7p+4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "sinh_towardzero (0x1.8p+4)": +ildouble: 1 +ldouble: 1 +Test "sinh_towardzero (0x8p-32)": +ildouble: 1 +ldouble: 1 Test "sinh_towardzero (22)": float: 1 ifloat: 1 @@ -6712,6 +9209,23 @@ ildouble: 2 ldouble: 2 # sinh_upward +Test "sinh_upward (0x1.6p+4)": +ildouble: 1 +ldouble: 1 +Test "sinh_upward (0x1.7p+4)": +ildouble: 1 +ldouble: 1 +Test "sinh_upward (0x1.8p+4)": +double: 1 +idouble: 1 +Test "sinh_upward (0x8p-32)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +Test "sinh_upward (0xcp-4)": +ildouble: 1 +ldouble: 1 Test "sinh_upward (22)": ildouble: 1 ldouble: 1 @@ -6720,14 +9234,209 @@ ildouble: 1 ldouble: 1 # tan +Test "tan (0x1p+0)": +ildouble: 1 +ldouble: 1 Test "tan (0x1p16383)": ildouble: 1 ldouble: 1 +Test "tan (0x2.1e19e0c9bab24p+72)": +ildouble: 1 +ldouble: 1 +Test "tan (0x2p+0)": +ildouble: 1 +ldouble: 1 +Test "tan (0x8p+0)": +ildouble: 1 +ldouble: 1 +Test "tan (0x8p+16380)": +ildouble: 1 +ldouble: 1 Test "tan (1e22)": ildouble: 1 ldouble: 1 # tan_downward +Test "tan_downward (-0x2p+64)": +double: 1 +idouble: 1 +Test "tan_downward (-0xc.908p-4)": +float: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 +Test "tan_downward (-0xc.90cp-4)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_downward (-0xc.90ep-4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_downward (-0xc.90f8p-4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_downward (-0xc.90fcp-4)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_downward (-0xc.90fd8p-4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_downward (-0xc.90fdap-4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_downward (-0xc.90fdbp-4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_downward (-0xc.90fdcp-4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_downward (-0xc.90fdp-4)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_downward (-0xc.90fep-4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_downward (-0xc.90fp-4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_downward (-0xc.91p-4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_downward (-0xc.92p-4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "tan_downward (-0xc.94p-4)": +ildouble: 1 +ldouble: 1 +Test "tan_downward (-0xc.98p-4)": +ildouble: 1 +ldouble: 1 +Test "tan_downward (-0xc.9p-4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_downward (-0xc.ap-4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "tan_downward (0x1p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "tan_downward (0x2.1e19e0c9bab24p+72)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "tan_downward (0x2p+0)": +double: 1 +idouble: 1 +Test "tan_downward (0x3p+0)": +double: 1 +idouble: 1 +Test "tan_downward (0x4p+0)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_downward (0x6p+0)": +double: 1 +idouble: 1 +Test "tan_downward (0x7p+0)": +double: 1 +idouble: 1 +Test "tan_downward (0x8p+0)": +ildouble: 1 +ldouble: 1 +Test "tan_downward (0x8p+1020)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "tan_downward (0xc.908p-4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +Test "tan_downward (0xc.90cp-4)": +double: 1 +idouble: 1 +Test "tan_downward (0xc.90fcp-4)": +double: 1 +idouble: 1 +Test "tan_downward (0xc.90fdaa22168c8p-4)": +double: 1 +idouble: 1 +Test "tan_downward (0xc.90fdp-4)": +double: 1 +idouble: 1 +Test "tan_downward (0xc.92p-4)": +float: 1 +ifloat: 1 +Test "tan_downward (0xc.94p-4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +Test "tan_downward (0xc.98p-4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +Test "tan_downward (0xc.ap-4)": +float: 1 +ifloat: 1 +Test "tan_downward (0xcp-4)": +double: 1 +idouble: 1 Test "tan_downward (1)": float: 1 ifloat: 1 @@ -6760,6 +9469,21 @@ float: 1 ifloat: 1 # tan_tonearest +Test "tan_tonearest (0x1p+0)": +ildouble: 1 +ldouble: 1 +Test "tan_tonearest (0x2.1e19e0c9bab24p+72)": +ildouble: 1 +ldouble: 1 +Test "tan_tonearest (0x2p+0)": +ildouble: 1 +ldouble: 1 +Test "tan_tonearest (0x8p+0)": +ildouble: 1 +ldouble: 1 +Test "tan_tonearest (0x8p+16380)": +ildouble: 1 +ldouble: 1 Test "tan_tonearest (1)": ildouble: 1 ldouble: 1 @@ -6771,6 +9495,96 @@ ildouble: 1 ldouble: 1 # tan_towardzero +Test "tan_towardzero (-0x2p+64)": +double: 1 +idouble: 1 +Test "tan_towardzero (-0xc.908p-4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +Test "tan_towardzero (-0xc.90cp-4)": +double: 1 +idouble: 1 +Test "tan_towardzero (-0xc.90fcp-4)": +double: 1 +idouble: 1 +Test "tan_towardzero (-0xc.90fdp-4)": +double: 1 +idouble: 1 +Test "tan_towardzero (-0xc.94p-4)": +double: 1 +idouble: 1 +Test "tan_towardzero (-0xc.98p-4)": +double: 1 +idouble: 1 +Test "tan_towardzero (0x1p+0)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_towardzero (0x2.1e19e0c9bab24p+72)": +ildouble: 1 +ldouble: 1 +Test "tan_towardzero (0x2.1e19e4p+72)": +double: 1 +idouble: 1 +Test "tan_towardzero (0x2.1e19ep+72)": +double: 1 +idouble: 1 +Test "tan_towardzero (0x2p+64)": +double: 1 +idouble: 1 +Test "tan_towardzero (0x5p+0)": +double: 1 +idouble: 1 +Test "tan_towardzero (0x7p+0)": +double: 1 +idouble: 1 +Test "tan_towardzero (0x8p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "tan_towardzero (0x9p+0)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +Test "tan_towardzero (0xc.908p-4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +Test "tan_towardzero (0xc.90cp-4)": +double: 1 +idouble: 1 +Test "tan_towardzero (0xc.90fcp-4)": +double: 1 +idouble: 1 +Test "tan_towardzero (0xc.90fdaa22168c8p-4)": +double: 1 +idouble: 1 +Test "tan_towardzero (0xc.90fdp-4)": +double: 1 +idouble: 1 +Test "tan_towardzero (0xc.94p-4)": +double: 1 +idouble: 1 +Test "tan_towardzero (0xc.98p-4)": +double: 1 +idouble: 1 +Test "tan_towardzero (0xcp-4)": +double: 1 +idouble: 1 +Test "tan_towardzero (0xf.ffffffffffff8p+1020)": +double: 1 +idouble: 1 +Test "tan_towardzero (0xf.fffffp+124)": +double: 1 +idouble: 1 Test "tan_towardzero (1)": ildouble: 1 ldouble: 1 @@ -6803,6 +9617,196 @@ ildouble: 1 ldouble: 1 # tan_upward +Test "tan_upward (-0xc.908p-4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "tan_upward (-0xc.90cp-4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_upward (-0xc.90ep-4)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_upward (-0xc.90f8p-4)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_upward (-0xc.90fcp-4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_upward (-0xc.90fd8p-4)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_upward (-0xc.90fdap-4)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_upward (-0xc.90fdbp-4)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_upward (-0xc.90fdcp-4)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_upward (-0xc.90fdp-4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_upward (-0xc.90fep-4)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_upward (-0xc.90fp-4)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_upward (-0xc.91p-4)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_upward (-0xc.92p-4)": +ildouble: 1 +ldouble: 1 +Test "tan_upward (-0xc.94p-4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "tan_upward (-0xc.98p-4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "tan_upward (-0xc.9p-4)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_upward (-0xc.ap-4)": +ildouble: 1 +ldouble: 1 +Test "tan_upward (0x1p+0)": +float: 1 +ifloat: 1 +Test "tan_upward (0x2.1e19e4p+72)": +double: 1 +idouble: 1 +Test "tan_upward (0x2.1e19ep+72)": +double: 1 +idouble: 1 +Test "tan_upward (0x2p+64)": +double: 1 +idouble: 1 +Test "tan_upward (0x4p+0)": +double: 1 +idouble: 1 +Test "tan_upward (0x5p+0)": +double: 1 +idouble: 1 +ildouble: 2 +ldouble: 2 +Test "tan_upward (0x7p+0)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_upward (0x8p+0)": +double: 1 +idouble: 1 +Test "tan_upward (0x9p+0)": +double: 1 +idouble: 1 +Test "tan_upward (0xap+0)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_upward (0xc.908p-4)": +float: 1 +ifloat: 1 +Test "tan_upward (0xc.90ep-4)": +double: 1 +idouble: 1 +Test "tan_upward (0xc.90f8p-4)": +double: 1 +idouble: 1 +Test "tan_upward (0xc.90fd8p-4)": +double: 1 +idouble: 1 +Test "tan_upward (0xc.90fdap-4)": +double: 1 +idouble: 1 +Test "tan_upward (0xc.90fdbp-4)": +double: 1 +idouble: 1 +Test "tan_upward (0xc.90fdcp-4)": +double: 1 +idouble: 1 +Test "tan_upward (0xc.90fep-4)": +double: 1 +idouble: 1 +Test "tan_upward (0xc.90fp-4)": +double: 1 +idouble: 1 +Test "tan_upward (0xc.91p-4)": +double: 1 +idouble: 1 +Test "tan_upward (0xc.92p-4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +Test "tan_upward (0xc.94p-4)": +float: 1 +ifloat: 1 +Test "tan_upward (0xc.98p-4)": +float: 1 +ifloat: 1 +Test "tan_upward (0xc.9p-4)": +double: 1 +idouble: 1 +Test "tan_upward (0xc.ap-4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +Test "tan_upward (0xcp-4)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tan_upward (0xf.ffffffffffff8p+1020)": +double: 1 +idouble: 1 +Test "tan_upward (0xf.fffffp+124)": +double: 1 +idouble: 1 Test "tan_upward (1)": float: 1 ifloat: 1 @@ -6849,6 +9853,11 @@ ldouble: 1 Test "tgamma (-0x0.ffffffp0)": float: 1 ifloat: 1 +Test "tgamma (-0x1.000002p+0)": +double: 2 +idouble: 2 +ildouble: 1 +ldouble: 1 Test "tgamma (-0x1.000002p0)": double: 2 idouble: 2 @@ -6859,9 +9868,67 @@ float: 2 ifloat: 2 ildouble: 1 ldouble: 1 +Test "tgamma (-0x1.3ffffep+4)": +float: 2 +ifloat: 2 +Test "tgamma (-0x1.3ffffffffffffffep+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x1.3ffffffffffffp+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x1.4000000000001p+4)": +double: 1 +idouble: 1 +ildouble: 2 +ldouble: 2 +Test "tgamma (-0x1.400002p+4)": +float: 1 +ifloat: 1 Test "tgamma (-0x1.5800000080001p+7)": ildouble: 2 ldouble: 2 +Test "tgamma (-0x1.8p+0)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x1.dffffep+4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x1.dffffffffffffffep+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x1.e000000000000002p+4)": +ildouble: 2 +ldouble: 2 +Test "tgamma (-0x1.e000000000001p+4)": +double: 3 +idouble: 3 +Test "tgamma (-0x1.e00002p+4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 +Test "tgamma (-0x1.f3fffep+8)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x1.f3fffffffffffp+8)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x1.f400000000000002p+8)": +ildouble: 3 +ldouble: 3 +Test "tgamma (-0x1.f40002p+8)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x1.fffffffffffffp+0)": +ildouble: 1 +ldouble: 1 Test "tgamma (-0x1.fffffffffffffp0)": ildouble: 1 ldouble: 1 @@ -6911,14 +9978,29 @@ ldouble: 3 Test "tgamma (-0x1p-24)": ildouble: 1 ldouble: 1 +Test "tgamma (-0x2.0000000000000004p+0)": +ildouble: 1 +ldouble: 1 Test "tgamma (-0x2.0000000000000004p0)": ildouble: 1 ldouble: 1 +Test "tgamma (-0x2.0000000000002p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 Test "tgamma (-0x2.0000000000002p0)": double: 1 idouble: 1 ildouble: 1 ldouble: 1 +Test "tgamma (-0x2.000004p+0)": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 1 +ldouble: 1 Test "tgamma (-0x2.000004p0)": double: 2 float: 1 @@ -6926,6 +10008,71 @@ idouble: 2 ifloat: 1 ildouble: 1 ldouble: 1 +Test "tgamma (-0x2.146544p+4)": +float: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x2.7ffffcp+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x2.7fffffffffffep+4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x2.8000000000002p+4)": +double: 1 +idouble: 1 +Test "tgamma (-0x2.800004p+4)": +double: 2 +idouble: 2 +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x2.8ffffcp+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x2.8fffffffffffep+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x2.8ffffffffffffffcp+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x2.8p+0)": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +Test "tgamma (-0x2.9000000000000004p+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x2.9000000000002p+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x2.900004p+4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x2.9ffffcp+4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x2.a00004p+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x2.edfffffffffffffcp+8)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x2.ee00000000002p+8)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x2.fffffcp+0)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 Test "tgamma (-0x2.fffffcp0)": double: 1 float: 1 @@ -6978,22 +10125,78 @@ ldouble: 1 Test "tgamma (-0x2ed.fffffffffffffcp0)": ildouble: 1 ldouble: 1 +Test "tgamma (-0x3.0000000000002p+0)": +ildouble: 1 +ldouble: 1 Test "tgamma (-0x3.0000000000002p0)": ildouble: 1 ldouble: 1 +Test "tgamma (-0x3.000004p+0)": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 Test "tgamma (-0x3.000004p0)": double: 2 float: 1 idouble: 2 ifloat: 1 +Test "tgamma (-0x3.1ffffcp+4)": +double: 1 +idouble: 1 +Test "tgamma (-0x3.1fffffffffffep+4)": +double: 3 +idouble: 3 +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x3.2000000000000004p+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x3.2000000000002p+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x3.200004p+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x3.8p+0)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +Test "tgamma (-0x3.e7fffcp+8)": +ildouble: 3 +ldouble: 3 +Test "tgamma (-0x3.e7fffffffffffffcp+8)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x3.e800000000000004p+8)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x3.e800000000002p+8)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x3.e80004p+8)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x3.fffffcp+0)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 Test "tgamma (-0x3.fffffcp0)": double: 1 float: 1 idouble: 1 ifloat: 1 +Test "tgamma (-0x3.ffffffffffffep+0)": +double: 2 +idouble: 2 Test "tgamma (-0x3.ffffffffffffep0)": double: 2 idouble: 2 +Test "tgamma (-0x3.fffffffffffffffcp+0)": +ildouble: 1 +ldouble: 1 Test "tgamma (-0x3.fffffffffffffffcp0)": ildouble: 1 ldouble: 1 @@ -7014,12 +10217,39 @@ ldouble: 1 Test "tgamma (-0x3e8.00000000000004p0)": ildouble: 1 ldouble: 1 +Test "tgamma (-0x4.0000000000000008p+0)": +ildouble: 1 +ldouble: 1 Test "tgamma (-0x4.0000000000000008p0)": ildouble: 1 ldouble: 1 +Test "tgamma (-0x4.000008p+0)": +float: 1 +ifloat: 1 Test "tgamma (-0x4.000008p0)": float: 1 ifloat: 1 +Test "tgamma (-0x4.8p+0)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x4.e200000000000008p+8)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x4.e200000000004p+8)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x4.e20008p+8)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x4.fffff8p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 Test "tgamma (-0x4.fffff8p0)": double: 1 float: 1 @@ -7027,18 +10257,34 @@ idouble: 1 ifloat: 1 ildouble: 1 ldouble: 1 +Test "tgamma (-0x4.ffffffffffffcp+0)": +double: 1 +idouble: 1 Test "tgamma (-0x4.ffffffffffffcp0)": double: 1 idouble: 1 Test "tgamma (-0x4e2.00000000000008p0)": ildouble: 1 ldouble: 1 +Test "tgamma (-0x5.0000000000000008p+0)": +ildouble: 1 +ldouble: 1 Test "tgamma (-0x5.0000000000000008p0)": ildouble: 1 ldouble: 1 +Test "tgamma (-0x5.0000000000004p+0)": +ildouble: 1 +ldouble: 1 Test "tgamma (-0x5.0000000000004p0)": ildouble: 1 ldouble: 1 +Test "tgamma (-0x5.000008p+0)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 Test "tgamma (-0x5.000008p0)": double: 1 float: 2 @@ -7046,18 +10292,77 @@ idouble: 1 ifloat: 2 ildouble: 1 ldouble: 1 +Test "tgamma (-0x5.8p+0)": +double: 1 +idouble: 1 +Test "tgamma (-0x5.dbfff8p+8)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x5.dbffffffffffcp+8)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x5.dbfffffffffffff8p+8)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x5.dc00000000004p+8)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x5.ffffffffffffcp+0)": +double: 1 +idouble: 1 Test "tgamma (-0x5.ffffffffffffcp0)": double: 1 idouble: 1 Test "tgamma (-0x5db.fffffffffffff8p0)": ildouble: 1 ldouble: 1 +Test "tgamma (-0x6.0000000000000008p+0)": +ildouble: 1 +ldouble: 1 Test "tgamma (-0x6.0000000000000008p0)": ildouble: 1 ldouble: 1 +Test "tgamma (-0x6.000008p+0)": +float: 2 +ifloat: 2 Test "tgamma (-0x6.000008p0)": float: 2 ifloat: 2 +Test "tgamma (-0x6.3ffff8p+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x6.3fffffffffffcp+4)": +double: 2 +idouble: 2 +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x6.3ffffffffffffff8p+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x6.4000000000000008p+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x6.4000000000004p+4)": +double: 1 +idouble: 1 +ildouble: 2 +ldouble: 2 +Test "tgamma (-0x6.400008p+4)": +double: 1 +idouble: 1 +Test "tgamma (-0x6.8p+0)": +float: 1 +ifloat: 1 +Test "tgamma (-0x6.d600000000000008p+8)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x6.fffff8p+0)": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 2 +ldouble: 2 Test "tgamma (-0x6.fffff8p0)": double: 2 float: 1 @@ -7065,6 +10370,11 @@ idouble: 2 ifloat: 1 ildouble: 2 ldouble: 2 +Test "tgamma (-0x6.ffffffffffffcp+0)": +double: 2 +idouble: 2 +ildouble: 1 +ldouble: 1 Test "tgamma (-0x6.ffffffffffffcp0)": double: 2 idouble: 2 @@ -7089,14 +10399,29 @@ ldouble: 2 Test "tgamma (-0x6d6.00000000000008p0)": ildouble: 1 ldouble: 1 +Test "tgamma (-0x7.0000000000000008p+0)": +ildouble: 1 +ldouble: 1 Test "tgamma (-0x7.0000000000000008p0)": ildouble: 1 ldouble: 1 +Test "tgamma (-0x7.0000000000004p+0)": +double: 3 +idouble: 3 +ildouble: 1 +ldouble: 1 Test "tgamma (-0x7.0000000000004p0)": double: 3 idouble: 3 ildouble: 1 ldouble: 1 +Test "tgamma (-0x7.000008p+0)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 Test "tgamma (-0x7.000008p0)": double: 1 float: 1 @@ -7104,6 +10429,20 @@ idouble: 1 ifloat: 1 ildouble: 1 ldouble: 1 +Test "tgamma (-0x7.8p+0)": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x7.fffff8p+0)": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 1 +ldouble: 1 Test "tgamma (-0x7.fffff8p0)": double: 3 float: 1 @@ -7111,25 +10450,80 @@ idouble: 3 ifloat: 1 ildouble: 1 ldouble: 1 +Test "tgamma (-0x7.ffffffffffffcp+0)": +double: 3 +idouble: 3 Test "tgamma (-0x7.ffffffffffffcp0)": double: 3 idouble: 3 +Test "tgamma (-0x7.fffffffffffffff8p+0)": +ildouble: 4 +ldouble: 4 Test "tgamma (-0x7.fffffffffffffff8p0)": ildouble: 4 ldouble: 4 +Test "tgamma (-0x8.000000000000001p+0)": +ildouble: 2 +ldouble: 2 Test "tgamma (-0x8.000000000000001p0)": ildouble: 2 ldouble: 2 +Test "tgamma (-0x8.00001p+0)": +double: 2 +idouble: 2 +ildouble: 1 +ldouble: 1 Test "tgamma (-0x8.00001p0)": double: 2 idouble: 2 ildouble: 1 ldouble: 1 +Test "tgamma (-0x8.8p+0)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x8p-4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +Test "tgamma (-0x9.5fffffffffff8p+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x9.5ffffffffffffffp+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x9.6000000000008p+4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "tgamma (-0x9.60001p+4)": +double: 1 +idouble: 1 +Test "tgamma (-0x9.8p+0)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 +Test "tgamma (-0x9.ffffffffffff8p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 Test "tgamma (-0x9.ffffffffffff8p0)": double: 1 idouble: 1 ildouble: 1 ldouble: 1 +Test "tgamma (-0x9.fffffp+0)": +float: 1 +ifloat: 1 Test "tgamma (-0x9.fffffp0)": float: 1 ifloat: 1 @@ -7144,14 +10538,99 @@ double: 1 idouble: 1 ildouble: 1 ldouble: 1 +Test "tgamma (-0xa.000000000000001p+0)": +ildouble: 2 +ldouble: 2 Test "tgamma (-0xa.000000000000001p0)": ildouble: 2 ldouble: 2 +Test "tgamma (-0xa.00001p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 Test "tgamma (-0xa.00001p0)": double: 1 idouble: 1 ildouble: 1 ldouble: 1 +Test "tgamma (-0xa.c000000400008p+4)": +ildouble: 2 +ldouble: 2 +Test "tgamma (-0xa.c0001p+4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "tgamma (-0xb.4ffffffffffffffp+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0xb.500000000000001p+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0xb.5000000000008p+4)": +ildouble: 2 +ldouble: 2 +Test "tgamma (-0xb.5ffffffffffffffp+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0xb.600000000000001p+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0xb.6fffffffffff8p+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0xb.6ffffffffffffffp+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0xb.7fffffffffff8p+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0xb.7ffffffffffffffp+4)": +ildouble: 2 +ldouble: 2 +Test "tgamma (-0xb.8000000000008p+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0xb.bffffffffffffffp+4)": +ildouble: 2 +ldouble: 2 +Test "tgamma (-0xb.c00000000000001p+4)": +ildouble: 3 +ldouble: 3 +Test "tgamma (-0xb.c000000000008p+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0xb.d00000000000001p+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0xb.d000000000008p+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0xb.dffffp+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0xb.e00000000000001p+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0xb.e000000000008p+4)": +ildouble: 2 +ldouble: 2 +Test "tgamma (-0xb.e0001p+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0xb.effffffffffffffp+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0xb.effffp+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0xb.f00000000000001p+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0xb.f000000000008p+4)": +ildouble: 1 +ldouble: 1 Test "tgamma (-0xb4.ffffffffffffffp0)": ildouble: 1 ldouble: 1 @@ -7200,6 +10679,30 @@ ldouble: 1 Test "tgamma (-0xbf.00000000000001p0)": ildouble: 1 ldouble: 1 +Test "tgamma (-0xf.9fffffffffff8p+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0xf.9ffffffffffffffp+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0xf.a00000000000001p+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0xf.a000000000008p+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0xf.a0001p+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0xf.ffffffffffff8p-4)": +double: 1 +idouble: 1 +Test "tgamma (-0xf.fffffffffffffffp-4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (-0xf.fffffp-4)": +float: 1 +ifloat: 1 Test "tgamma (-0xf9.ffffffffffffffp0)": ildouble: 1 ldouble: 1 @@ -7261,14 +10764,52 @@ double: 1 float: 1 idouble: 1 ifloat: 1 +Test "tgamma (0x1.28p+4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tgamma (0x1.38p+4)": +double: 2 +idouble: 2 +Test "tgamma (0x1.78p+4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tgamma (0x1.d8p+4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tgamma (0x1.e8p+4)": +float: 1 +ifloat: 1 +Test "tgamma (0x1.fffffep+0)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 Test "tgamma (0x1.fffffep0)": float: 1 ifloat: 1 ildouble: 1 ldouble: 1 +Test "tgamma (0x1.fffffffffffffffep+0)": +ildouble: 1 +ldouble: 1 Test "tgamma (0x1.fffffffffffffffep0)": ildouble: 1 ldouble: 1 +Test "tgamma (0x1.fffffffffffffp+0)": +double: 1 +idouble: 1 Test "tgamma (0x1.fffffffffffffp0)": double: 1 idouble: 1 @@ -7281,67 +10822,164 @@ idouble: 1 Test "tgamma (0x1p-64)": ildouble: 1 ldouble: 1 +Test "tgamma (0x2.0000000000002p+0)": +ildouble: 1 +ldouble: 1 Test "tgamma (0x2.0000000000002p0)": ildouble: 1 ldouble: 1 +Test "tgamma (0x2.000004p+0)": +ildouble: 1 +ldouble: 1 Test "tgamma (0x2.000004p0)": ildouble: 1 ldouble: 1 +Test "tgamma (0x2.08p+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (0x2.18p+4)": +float: 1 +ifloat: 1 +Test "tgamma (0x2.28p+4)": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 1 Test "tgamma (0x2.30a43cp+4)": double: 1 float: 2 idouble: 1 ifloat: 2 +Test "tgamma (0x2.8p+0)": +float: 2 +ifloat: 2 +Test "tgamma (0x2.fffffcp+0)": +float: 3 +ifloat: 3 +ildouble: 1 +ldouble: 1 Test "tgamma (0x2.fffffcp0)": float: 3 ifloat: 3 ildouble: 1 ldouble: 1 +Test "tgamma (0x2.ffffffffffffep+0)": +ildouble: 1 +ldouble: 1 Test "tgamma (0x2.ffffffffffffep0)": ildouble: 1 ldouble: 1 +Test "tgamma (0x3.0000000000002p+0)": +ildouble: 1 +ldouble: 1 Test "tgamma (0x3.0000000000002p0)": ildouble: 1 ldouble: 1 +Test "tgamma (0x3.8p+0)": +float: 2 +ifloat: 2 +Test "tgamma (0x3.e8p+8)": +ildouble: 1 +ldouble: 1 +Test "tgamma (0x3.fffffcp+0)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 Test "tgamma (0x3.fffffcp0)": float: 1 ifloat: 1 ildouble: 1 ldouble: 1 +Test "tgamma (0x3.ffffffffffffep+0)": +double: 1 +idouble: 1 Test "tgamma (0x3.ffffffffffffep0)": double: 1 idouble: 1 +Test "tgamma (0x3.fffffffffffffffcp+0)": +ildouble: 1 +ldouble: 1 Test "tgamma (0x3.fffffffffffffffcp0)": ildouble: 1 ldouble: 1 +Test "tgamma (0x3p+0)": +float: 1 +ifloat: 1 +Test "tgamma (0x4.0000000000000008p+0)": +ildouble: 1 +ldouble: 1 Test "tgamma (0x4.0000000000000008p0)": ildouble: 1 ldouble: 1 +Test "tgamma (0x4.0000000000004p+0)": +double: 1 +idouble: 1 Test "tgamma (0x4.0000000000004p0)": double: 1 idouble: 1 +Test "tgamma (0x4.8p+0)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +Test "tgamma (0x4.fffff8p+0)": +ildouble: 1 +ldouble: 1 Test "tgamma (0x4.fffff8p0)": float: 1 ifloat: 1 ildouble: 1 ldouble: 1 +Test "tgamma (0x4.ffffffffffffcp+0)": +double: 1 +idouble: 1 Test "tgamma (0x4.ffffffffffffcp0)": double: 1 idouble: 1 +Test "tgamma (0x4.fffffffffffffff8p+0)": +ildouble: 1 +ldouble: 1 Test "tgamma (0x4.fffffffffffffff8p0)": ildouble: 1 ldouble: 1 +Test "tgamma (0x4p+0)": +float: 1 +ifloat: 1 +Test "tgamma (0x5.0000000000000008p+0)": +ildouble: 1 +ldouble: 1 Test "tgamma (0x5.0000000000000008p0)": ildouble: 1 ldouble: 1 +Test "tgamma (0x5.0000000000004p+0)": +double: 1 +idouble: 1 Test "tgamma (0x5.0000000000004p0)": double: 1 idouble: 1 +Test "tgamma (0x5.000008p+0)": +float: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 Test "tgamma (0x5.000008p0)": float: 3 ifloat: 3 ildouble: 1 ldouble: 1 +Test "tgamma (0x5.8p+0)": +ildouble: 1 +ldouble: 1 +Test "tgamma (0x5.fffff8p+0)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 Test "tgamma (0x5.fffff8p0)": double: 1 float: 1 @@ -7349,36 +10987,84 @@ idouble: 1 ifloat: 1 ildouble: 1 ldouble: 1 +Test "tgamma (0x5.ffffffffffffcp+0)": +ildouble: 1 +ldouble: 1 Test "tgamma (0x5.ffffffffffffcp0)": ildouble: 1 ldouble: 1 +Test "tgamma (0x6.0000000000000008p+0)": +ildouble: 1 +ldouble: 1 Test "tgamma (0x6.0000000000000008p0)": ildouble: 1 ldouble: 1 +Test "tgamma (0x6.0000000000004p+0)": +double: 1 +idouble: 1 Test "tgamma (0x6.0000000000004p0)": double: 1 idouble: 1 +Test "tgamma (0x6.000008p+0)": +float: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 Test "tgamma (0x6.000008p0)": float: 2 ifloat: 2 ildouble: 1 ldouble: 1 +Test "tgamma (0x6.8p+0)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tgamma (0x6.db8c603359a94p+8)": +ildouble: 2 +ldouble: 2 +Test "tgamma (0x6.fffff8p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 Test "tgamma (0x6.fffff8p0)": double: 1 idouble: 1 ildouble: 1 ldouble: 1 +Test "tgamma (0x6.ffffffffffffcp+0)": +double: 3 +idouble: 3 Test "tgamma (0x6.ffffffffffffcp0)": double: 3 idouble: 3 +Test "tgamma (0x6p+0)": +float: 1 +ifloat: 1 +Test "tgamma (0x7.0000000000000008p+0)": +ildouble: 1 +ldouble: 1 Test "tgamma (0x7.0000000000000008p0)": ildouble: 1 ldouble: 1 +Test "tgamma (0x7.0000000000004p+0)": +double: 4 +idouble: 4 +ildouble: 1 +ldouble: 1 Test "tgamma (0x7.0000000000004p0)": double: 4 idouble: 4 ildouble: 1 ldouble: 1 +Test "tgamma (0x7.000008p+0)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 Test "tgamma (0x7.000008p0)": double: 1 float: 1 @@ -7386,6 +11072,20 @@ idouble: 1 ifloat: 1 ildouble: 1 ldouble: 1 +Test "tgamma (0x7.8p+0)": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "tgamma (0x7.fffff8p+0)": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 1 +ldouble: 1 Test "tgamma (0x7.fffff8p0)": double: 2 float: 1 @@ -7393,25 +11093,92 @@ idouble: 2 ifloat: 1 ildouble: 1 ldouble: 1 +Test "tgamma (0x7.ffffffffffffcp+0)": +double: 2 +idouble: 2 Test "tgamma (0x7.ffffffffffffcp0)": double: 2 idouble: 2 +Test "tgamma (0x7.fffffffffffffff8p+0)": +ildouble: 3 +ldouble: 3 Test "tgamma (0x7.fffffffffffffff8p0)": ildouble: 3 ldouble: 3 +Test "tgamma (0x7p+0)": +double: 1 +idouble: 1 +Test "tgamma (0x8.000000000000001p+0)": +ildouble: 1 +ldouble: 1 Test "tgamma (0x8.000000000000001p0)": ildouble: 1 ldouble: 1 +Test "tgamma (0x8.00001p+0)": +double: 2 +idouble: 2 +ildouble: 1 +ldouble: 1 Test "tgamma (0x8.00001p0)": double: 2 idouble: 2 ildouble: 1 ldouble: 1 +Test "tgamma (0x8.8p+0)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 +Test "tgamma (0x8p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "tgamma (0x8p-4)": +float: 1 +ifloat: 1 +Test "tgamma (0x8p-56)": +double: 1 +idouble: 1 +Test "tgamma (0x9.8p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "tgamma (0x9p+0)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 Test "tgamma (0xa.b9fd72b0fb238p+4)": double: 1 idouble: 1 ildouble: 2 ldouble: 2 +Test "tgamma (0xa.b9fd72b0fb23a9dp+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (0xa.b9fd72b0fb23a9ep+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (0xa.b9fd7p+4)": +double: 2 +idouble: 2 +ildouble: 1 +ldouble: 1 +Test "tgamma (0xa.b9fd8p+4)": +ildouble: 1 +ldouble: 1 +Test "tgamma (0xap+0)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +Test "tgamma (0xb.333333333333p-4)": +ildouble: 1 +ldouble: 1 Test "tgamma (10)": double: 1 float: 1 @@ -7529,9 +11296,21 @@ double: 1 idouble: 1 ildouble: 1 ldouble: 1 +Test "y0 (0x1.8p+0)": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 Test "y0 (0x1.ff00000000002p+840)": double: 1 idouble: 1 +Test "y0 (0x1p+0)": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 1 +ldouble: 1 Test "y0 (0x1p-10)": double: 1 idouble: 1 @@ -7574,6 +11353,54 @@ ifloat: 1 Test "y0 (0x1p16382)": ildouble: 1 ldouble: 1 +Test "y0 (0x2p-4)": +ildouble: 1 +ldouble: 1 +Test "y0 (0x4.ffcp+72)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "y0 (0x4p+16380)": +ildouble: 1 +ldouble: 1 +Test "y0 (0x4p-112)": +double: 1 +idouble: 1 +Test "y0 (0x4p-12)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "y0 (0x4p-32)": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "y0 (0x4p-52)": +float: 1 +ifloat: 1 +Test "y0 (0x4p-72)": +double: 1 +idouble: 1 +Test "y0 (0x8p+0)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "y0 (0xap+0)": +float: 1 +ifloat: 1 +Test "y0 (0xf.ffffffffffff8p+1020)": +double: 1 +idouble: 1 +Test "y0 (0xf.fffffp+124)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 Test "y0 (1.0)": double: 2 float: 1 @@ -7618,12 +11445,63 @@ idouble: 1 ifloat: 1 ildouble: 1 ldouble: 1 +Test "y1 (0x1.8p+0)": +float: 1 +ifloat: 1 Test "y1 (0x1p-10)": double: 1 idouble: 1 Test "y1 (0x1p16382)": ildouble: 1 ldouble: 1 +Test "y1 (0x2.002000002p+592)": +ildouble: 2 +ldouble: 2 +Test "y1 (0x2p+0)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "y1 (0x2p-4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "y1 (0x4p+16380)": +ildouble: 1 +ldouble: 1 +Test "y1 (0x4p-12)": +double: 1 +idouble: 1 +Test "y1 (0x8p+0)": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +Test "y1 (0x9.3f102p+96)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "y1 (0xap+0)": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +Test "y1 (0xf.ffffffffffff8p+1020)": +ildouble: 1 +ldouble: 1 +Test "y1 (0xf.fffffp+124)": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 Test "y1 (1.5)": float: 1 ifloat: 1 @@ -7646,9 +11524,44 @@ idouble: 1 ifloat: 2 # yn +Test "yn (-10, 0x1p+0)": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +Test "yn (-10, 1.0)": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 Test "yn (0, 0.125)": ildouble: 1 ldouble: 1 +Test "yn (0, 0x1.8p+0)": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +Test "yn (0, 0x1p+0)": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "yn (0, 0x2p-4)": +ildouble: 1 +ldouble: 1 +Test "yn (0, 0x8p+0)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "yn (0, 0xap+0)": +float: 1 +ifloat: 1 Test "yn (0, 1.0)": double: 2 float: 1 @@ -7681,6 +11594,31 @@ double: 1 idouble: 1 ildouble: 1 ldouble: 1 +Test "yn (1, 0x1.8p+0)": +float: 1 +ifloat: 1 +Test "yn (1, 0x2p+0)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 +Test "yn (1, 0x2p-4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "yn (1, 0x8p+0)": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +Test "yn (1, 0xap+0)": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 Test "yn (1, 1.5)": float: 1 ifloat: 1 @@ -7713,6 +11651,42 @@ idouble: 1 ifloat: 2 ildouble: 4 ldouble: 4 +Test "yn (10, 0x1p+0)": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +Test "yn (10, 0x2p+0)": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +Test "yn (10, 0x2p-4)": +double: 1 +idouble: 1 +ildouble: 2 +ldouble: 2 +Test "yn (10, 0x4p-1024)": +ildouble: 1 +ldouble: 1 +Test "yn (10, 0x4p-128)": +ildouble: 1 +ldouble: 1 +Test "yn (10, 0x8p-972)": +ildouble: 1 +ldouble: 1 +Test "yn (10, 0xap+0)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +Test "yn (10, 0xcp-4)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 4 +ldouble: 4 Test "yn (10, 1.0)": double: 1 float: 2 @@ -7728,6 +11702,34 @@ double: 3 float: 1 idouble: 3 ifloat: 1 +Test "yn (2, 0x1.ffff62p+99)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "yn (2, 0x1p127)": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +Test "yn (2, 0x8p+124)": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +Test "yn (2, 0xf.fffb1p+96)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "yn (2, 0xf.ffffffffffff8p+1020)": +double: 1 +idouble: 1 +Test "yn (2, 0xf.fffffp+124)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 Test "yn (3, 0.125)": double: 1 idouble: 1 @@ -7740,6 +11742,24 @@ idouble: 1 ifloat: 1 ildouble: 2 ldouble: 2 +Test "yn (3, 0x2p+0)": +double: 1 +idouble: 1 +Test "yn (3, 0x2p-4)": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 +Test "yn (3, 0xap+0)": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +Test "yn (3, 0xcp-4)": +double: 1 +idouble: 1 +ildouble: 2 +ldouble: 2 Test "yn (3, 10.0)": double: 1 float: 1 @@ -7762,6 +11782,10 @@ ifloat: 1 ildouble: 1 ldouble: 1 +Function: "acos_tonearest": +ildouble: 1 +ldouble: 1 + Function: "acos_towardzero": double: 1 float: 1 @@ -7771,6 +11795,8 @@ ildouble: 1 ldouble: 1 Function: "acos_upward": +double: 1 +idouble: 1 ildouble: 1 ldouble: 1 @@ -7791,7 +11817,9 @@ ildouble: 1 ldouble: 1 Function: "asin_upward": +double: 1 float: 1 +idouble: 1 ifloat: 1 ildouble: 1 ldouble: 1 @@ -7799,6 +11827,8 @@ ldouble: 1 Function: "atan2": float: 1 ifloat: 1 +ildouble: 1 +ldouble: 1 Function: "atanh": float: 1 @@ -7896,7 +11926,9 @@ ifloat: 1 Function: "cbrt": double: 1 +float: 1 idouble: 1 +ifloat: 1 ildouble: 1 ldouble: 1 @@ -7985,40 +12017,64 @@ ildouble: 1 ldouble: 1 Function: "cos_downward": +double: 1 float: 1 +idouble: 1 ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "cos_tonearest": ildouble: 1 ldouble: 1 Function: "cos_towardzero": +double: 1 float: 1 +idouble: 1 ifloat: 1 ildouble: 1 ldouble: 1 Function: "cos_upward": +double: 1 float: 1 +idouble: 1 ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "cosh": +double: 1 +idouble: 1 ildouble: 1 ldouble: 1 Function: "cosh_downward": +double: 1 float: 1 +idouble: 1 ifloat: 1 ildouble: 2 ldouble: 2 Function: "cosh_tonearest": +double: 1 +idouble: 1 ildouble: 1 ldouble: 1 Function: "cosh_towardzero": +double: 1 float: 1 +idouble: 1 ifloat: 1 ildouble: 2 ldouble: 2 Function: "cosh_upward": +double: 1 +idouble: 1 ildouble: 1 ldouble: 1 @@ -8207,6 +12263,8 @@ ldouble: 2 Function: "erf": double: 1 idouble: 1 +ildouble: 1 +ldouble: 1 Function: "erfc": double: 1 @@ -8223,20 +12281,28 @@ ildouble: 1 ldouble: 1 Function: "exp_downward": +double: 1 float: 1 +idouble: 1 ifloat: 1 ildouble: 2 ldouble: 2 Function: "exp_towardzero": +double: 1 float: 1 +idouble: 1 ifloat: 1 ildouble: 2 ldouble: 2 Function: "exp_upward": +double: 1 float: 1 +idouble: 1 ifloat: 1 +ildouble: 1 +ldouble: 1 Function: "expm1": double: 1 @@ -8246,6 +12312,38 @@ ifloat: 1 ildouble: 1 ldouble: 1 +Function: "expm1_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "expm1_tonearest": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "expm1_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "expm1_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + Function: "gamma": double: 1 float: 2 @@ -8255,8 +12353,12 @@ ildouble: 1 ldouble: 1 Function: "hypot": +double: 1 float: 1 +idouble: 1 ifloat: 1 +ildouble: 1 +ldouble: 1 Function: "j0": double: 2 @@ -8279,8 +12381,8 @@ double: 4 float: 5 idouble: 4 ifloat: 5 -ildouble: 3 -ldouble: 3 +ildouble: 4 +ldouble: 4 Function: "lgamma": double: 1 @@ -8290,6 +12392,12 @@ ifloat: 2 ildouble: 1 ldouble: 1 +Function: "log": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + Function: "log10": double: 1 float: 2 @@ -8301,6 +12409,8 @@ ldouble: 1 Function: "log1p": float: 1 ifloat: 1 +ildouble: 1 +ldouble: 1 Function: "pow": float: 1 @@ -8318,6 +12428,12 @@ Function: "pow_downward": float: 1 ifloat: 1 +Function: "pow_tonearest": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + Function: "pow_towardzero": float: 1 ifloat: 1 @@ -8328,23 +12444,37 @@ ifloat: 1 ildouble: 1 ldouble: 1 +Function: "sin": +ildouble: 1 +ldouble: 1 + Function: "sin_downward": +double: 1 float: 1 +idouble: 1 ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "sin_tonearest": ildouble: 1 ldouble: 1 Function: "sin_towardzero": +double: 1 float: 1 +idouble: 1 ifloat: 1 ildouble: 1 ldouble: 1 Function: "sin_upward": +double: 1 float: 1 +idouble: 1 ifloat: 1 -ildouble: 1 -ldouble: 1 +ildouble: 2 +ldouble: 2 Function: "sincos": double: 1 @@ -8359,18 +12489,26 @@ ildouble: 1 ldouble: 1 Function: "sinh_downward": +double: 1 float: 1 +idouble: 1 ifloat: 1 ildouble: 2 ldouble: 2 Function: "sinh_towardzero": +double: 1 float: 1 +idouble: 1 ifloat: 1 ildouble: 2 ldouble: 2 Function: "sinh_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 ildouble: 1 ldouble: 1 @@ -8381,8 +12519,10 @@ ildouble: 1 ldouble: 1 Function: "tan_downward": -float: 1 -ifloat: 1 +double: 1 +float: 2 +idouble: 1 +ifloat: 2 ildouble: 1 ldouble: 1 @@ -8391,13 +12531,17 @@ ildouble: 1 ldouble: 1 Function: "tan_towardzero": +double: 1 float: 1 +idouble: 1 ifloat: 1 ildouble: 1 ldouble: 1 Function: "tan_upward": +double: 1 float: 1 +idouble: 1 ifloat: 1 ildouble: 2 ldouble: 2 @@ -8428,9 +12572,9 @@ ldouble: 2 Function: "yn": double: 3 -float: 2 +float: 3 idouble: 3 -ifloat: 2 +ifloat: 3 ildouble: 4 ldouble: 4 diff --git a/sysdeps/x86_64/fpu/printf_fphex.c b/sysdeps/x86_64/fpu/printf_fphex.c index c85d1f79fb..be55f9cf6b 100644 --- a/sysdeps/x86_64/fpu/printf_fphex.c +++ b/sysdeps/x86_64/fpu/printf_fphex.c @@ -25,10 +25,11 @@ do { \ /* The "strange" 80 bit format on ix86 and m68k has an explicit \ leading digit in the 64 bit mantissa. */ \ unsigned long long int num; \ + union ieee854_long_double u; \ + u.d = fpnum.ldbl; \ \ - \ - num = (((unsigned long long int) fpnum.ldbl.ieee.mantissa0) << 32 \ - | fpnum.ldbl.ieee.mantissa1); \ + num = (((unsigned long long int) u.ieee.mantissa0) << 32 \ + | u.ieee.mantissa1); \ \ zero_mantissa = num == 0; \ \ @@ -61,7 +62,7 @@ do { \ \ /* We have 3 bits from the mantissa in the leading nibble. \ Therefore we are here using `IEEE854_LONG_DOUBLE_BIAS + 3'. */ \ - exponent = fpnum.ldbl.ieee.exponent; \ + exponent = u.ieee.exponent; \ \ if (exponent == 0) \ { \ diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S index 6c69f4b442..9b1de89d98 100644 --- a/sysdeps/x86_64/memset.S +++ b/sysdeps/x86_64/memset.S @@ -19,10 +19,6 @@ #include <sysdep.h> -#ifndef ALIGN -# define ALIGN(n) .p2align n -#endif - .text #if !defined NOT_IN_libc ENTRY(__bzero) @@ -71,12 +67,12 @@ L(entry_from_bzero): L(return): rep ret - ALIGN (4) + .p2align 4 L(between_32_64_bytes): movdqu %xmm8, 16(%rdi) movdqu %xmm8, -32(%rdi,%rdx) ret - ALIGN (4) + .p2align 4 L(loop_start): leaq 64(%rdi), %rcx movdqu %xmm8, (%rdi) @@ -92,7 +88,7 @@ L(loop_start): andq $-64, %rdx cmpq %rdx, %rcx je L(return) - ALIGN (4) + .p2align 4 L(loop): movdqa %xmm8, (%rcx) movdqa %xmm8, 16(%rcx) diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index 203d16eed3..57a3c13e8a 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -6,25 +6,24 @@ endif ifeq ($(subdir),string) -sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \ - strend-sse4 memcmp-sse4 memcpy-ssse3 memcpy-sse2-unaligned mempcpy-ssse3 \ +sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \ + strcmp-sse2-unaligned strncmp-ssse3 \ + memcmp-sse4 memcpy-ssse3 \ + memcpy-sse2-unaligned mempcpy-ssse3 \ memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \ - memmove-ssse3-back strcasestr-nonascii strcasecmp_l-ssse3 \ + memmove-ssse3-back strcasecmp_l-ssse3 \ strncase_l-ssse3 strcat-ssse3 strncat-ssse3\ strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \ strcpy-sse2-unaligned strncpy-sse2-unaligned \ stpcpy-sse2-unaligned stpncpy-sse2-unaligned \ strcat-sse2-unaligned strncat-sse2-unaligned \ - strrchr-sse2-no-bsf strchr-sse2-no-bsf memcmp-ssse3 + strchr-sse2-no-bsf memcmp-ssse3 strstr-sse2-unaligned ifeq (yes,$(config-cflags-sse4)) -sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c varshift +sysdep_routines += strcspn-c strpbrk-c strspn-c varshift CFLAGS-varshift.c += -msse4 CFLAGS-strcspn-c.c += -msse4 CFLAGS-strpbrk-c.c += -msse4 CFLAGS-strspn-c.c += -msse4 -CFLAGS-strstr.c += -msse4 -CFLAGS-strcasestr.c += -msse4 -CFLAGS-strcasestr-nonascii.c += -msse4 endif endif diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index d0992e113f..3344889ce3 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -98,8 +98,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/strcasestr.c. */ IFUNC_IMPL (i, name, strcasestr, - IFUNC_IMPL_ADD (array, i, strcasestr, HAS_SSE4_2, - __strcasestr_sse42) IFUNC_IMPL_ADD (array, i, strcasestr, 1, __strcasestr_sse2)) /* Support sysdeps/x86_64/multiarch/strcat.S. */ @@ -110,7 +108,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/strchr.S. */ IFUNC_IMPL (i, name, strchr, - IFUNC_IMPL_ADD (array, i, strchr, HAS_SSE4_2, __strchr_sse42) IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2_no_bsf) IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2)) @@ -118,6 +115,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL (i, name, strcmp, IFUNC_IMPL_ADD (array, i, strcmp, HAS_SSE4_2, __strcmp_sse42) IFUNC_IMPL_ADD (array, i, strcmp, HAS_SSSE3, __strcmp_ssse3) + IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_sse2_unaligned) IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_sse2)) /* Support sysdeps/x86_64/multiarch/strcpy.S. */ @@ -176,21 +174,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __strpbrk_sse42) IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2)) - /* Support sysdeps/x86_64/multiarch/strrchr.S. */ - IFUNC_IMPL (i, name, strrchr, - IFUNC_IMPL_ADD (array, i, strrchr, HAS_SSE4_2, - __strrchr_sse42) - IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_sse2_no_bsf) - IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_sse2)) /* Support sysdeps/x86_64/multiarch/strspn.S. */ IFUNC_IMPL (i, name, strspn, IFUNC_IMPL_ADD (array, i, strspn, HAS_SSE4_2, __strspn_sse42) IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2)) - /* Support sysdeps/x86_64/multiarch/strstr-c.c. */ + /* Support sysdeps/x86_64/multiarch/strstr.c. */ IFUNC_IMPL (i, name, strstr, - IFUNC_IMPL_ADD (array, i, strstr, HAS_SSE4_2, __strstr_sse42) + IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned) IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2)) /* Support sysdeps/x86_64/multiarch/wcscpy.S. */ diff --git a/sysdeps/x86_64/multiarch/memcmp-sse4.S b/sysdeps/x86_64/multiarch/memcmp-sse4.S index 1ed4200f4c..d7b147e5ce 100644 --- a/sysdeps/x86_64/multiarch/memcmp-sse4.S +++ b/sysdeps/x86_64/multiarch/memcmp-sse4.S @@ -25,10 +25,6 @@ # define MEMCMP __memcmp_sse4_1 # endif -# ifndef ALIGN -# define ALIGN(n) .p2align n -# endif - # define JMPTBL(I, B) (I - B) # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ @@ -60,7 +56,7 @@ ENTRY (MEMCMP) BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) # ifndef USE_AS_WMEMCMP - ALIGN (4) + .p2align 4 L(firstbyte): movzbl (%rdi), %eax movzbl (%rsi), %ecx @@ -68,7 +64,7 @@ L(firstbyte): ret # endif - ALIGN (4) + .p2align 4 L(79bytesormore): movdqu (%rsi), %xmm1 movdqu (%rdi), %xmm2 @@ -316,7 +312,7 @@ L(less32bytesin256): add %rdx, %rdi BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(512bytesormore): # ifdef DATA_CACHE_SIZE_HALF mov $DATA_CACHE_SIZE_HALF, %R8_LP @@ -329,7 +325,7 @@ L(512bytesormore): cmp %r8, %rdx ja L(L2_L3_cache_unaglined) sub $64, %rdx - ALIGN (4) + .p2align 4 L(64bytesormore_loop): movdqu (%rdi), %xmm2 pxor (%rsi), %xmm2 @@ -361,7 +357,7 @@ L(64bytesormore_loop): L(L2_L3_cache_unaglined): sub $64, %rdx - ALIGN (4) + .p2align 4 L(L2_L3_unaligned_128bytes_loop): prefetchnta 0x1c0(%rdi) prefetchnta 0x1c0(%rsi) @@ -396,7 +392,7 @@ L(L2_L3_unaligned_128bytes_loop): /* * This case is for machines which are sensitive for unaligned instructions. */ - ALIGN (4) + .p2align 4 L(2aligned): cmp $128, %rdx ja L(128bytesormorein2aligned) @@ -444,7 +440,7 @@ L(less32bytesin64in2alinged): add %rdx, %rdi BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(128bytesormorein2aligned): cmp $512, %rdx ja L(512bytesormorein2aligned) @@ -519,7 +515,7 @@ L(less32bytesin128in2aligned): add %rdx, %rdi BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(256bytesormorein2aligned): sub $256, %rdx @@ -632,7 +628,7 @@ L(less32bytesin256in2alinged): add %rdx, %rdi BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(512bytesormorein2aligned): # ifdef DATA_CACHE_SIZE_HALF mov $DATA_CACHE_SIZE_HALF, %R8_LP @@ -646,7 +642,7 @@ L(512bytesormorein2aligned): ja L(L2_L3_cache_aglined) sub $64, %rdx - ALIGN (4) + .p2align 4 L(64bytesormore_loopin2aligned): movdqa (%rdi), %xmm2 pxor (%rsi), %xmm2 @@ -678,7 +674,7 @@ L(64bytesormore_loopin2aligned): L(L2_L3_cache_aglined): sub $64, %rdx - ALIGN (4) + .p2align 4 L(L2_L3_aligned_128bytes_loop): prefetchnta 0x1c0(%rdi) prefetchnta 0x1c0(%rsi) @@ -711,7 +707,7 @@ L(L2_L3_aligned_128bytes_loop): BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(64bytesormore_loop_end): add $16, %rdi add $16, %rsi @@ -806,7 +802,7 @@ L(8bytes): xor %eax, %eax ret - ALIGN (4) + .p2align 4 L(12bytes): mov -12(%rdi), %rax mov -12(%rsi), %rcx @@ -827,7 +823,7 @@ L(0bytes): # ifndef USE_AS_WMEMCMP /* unreal case for wmemcmp */ - ALIGN (4) + .p2align 4 L(65bytes): movdqu -65(%rdi), %xmm1 movdqu -65(%rsi), %xmm2 @@ -864,7 +860,7 @@ L(9bytes): sub %edx, %eax ret - ALIGN (4) + .p2align 4 L(13bytes): mov -13(%rdi), %rax mov -13(%rsi), %rcx @@ -877,7 +873,7 @@ L(13bytes): xor %eax, %eax ret - ALIGN (4) + .p2align 4 L(5bytes): mov -5(%rdi), %eax mov -5(%rsi), %ecx @@ -888,7 +884,7 @@ L(5bytes): sub %edx, %eax ret - ALIGN (4) + .p2align 4 L(66bytes): movdqu -66(%rdi), %xmm1 movdqu -66(%rsi), %xmm2 @@ -929,7 +925,7 @@ L(10bytes): sub %ecx, %eax ret - ALIGN (4) + .p2align 4 L(14bytes): mov -14(%rdi), %rax mov -14(%rsi), %rcx @@ -942,7 +938,7 @@ L(14bytes): xor %eax, %eax ret - ALIGN (4) + .p2align 4 L(6bytes): mov -6(%rdi), %eax mov -6(%rsi), %ecx @@ -958,7 +954,7 @@ L(2bytes): sub %ecx, %eax ret - ALIGN (4) + .p2align 4 L(67bytes): movdqu -67(%rdi), %xmm2 movdqu -67(%rsi), %xmm1 @@ -997,7 +993,7 @@ L(11bytes): xor %eax, %eax ret - ALIGN (4) + .p2align 4 L(15bytes): mov -15(%rdi), %rax mov -15(%rsi), %rcx @@ -1010,7 +1006,7 @@ L(15bytes): xor %eax, %eax ret - ALIGN (4) + .p2align 4 L(7bytes): mov -7(%rdi), %eax mov -7(%rsi), %ecx @@ -1023,7 +1019,7 @@ L(7bytes): xor %eax, %eax ret - ALIGN (4) + .p2align 4 L(3bytes): movzwl -3(%rdi), %eax movzwl -3(%rsi), %ecx @@ -1036,7 +1032,7 @@ L(1bytes): ret # endif - ALIGN (4) + .p2align 4 L(68bytes): movdqu -68(%rdi), %xmm2 movdqu -68(%rsi), %xmm1 @@ -1079,7 +1075,7 @@ L(20bytes): # ifndef USE_AS_WMEMCMP /* unreal cases for wmemcmp */ - ALIGN (4) + .p2align 4 L(69bytes): movdqu -69(%rsi), %xmm1 movdqu -69(%rdi), %xmm2 @@ -1115,7 +1111,7 @@ L(21bytes): xor %eax, %eax ret - ALIGN (4) + .p2align 4 L(70bytes): movdqu -70(%rsi), %xmm1 movdqu -70(%rdi), %xmm2 @@ -1151,7 +1147,7 @@ L(22bytes): xor %eax, %eax ret - ALIGN (4) + .p2align 4 L(71bytes): movdqu -71(%rsi), %xmm1 movdqu -71(%rdi), %xmm2 @@ -1188,7 +1184,7 @@ L(23bytes): ret # endif - ALIGN (4) + .p2align 4 L(72bytes): movdqu -72(%rsi), %xmm1 movdqu -72(%rdi), %xmm2 @@ -1227,7 +1223,7 @@ L(24bytes): # ifndef USE_AS_WMEMCMP /* unreal cases for wmemcmp */ - ALIGN (4) + .p2align 4 L(73bytes): movdqu -73(%rsi), %xmm1 movdqu -73(%rdi), %xmm2 @@ -1265,7 +1261,7 @@ L(25bytes): sub %ecx, %eax ret - ALIGN (4) + .p2align 4 L(74bytes): movdqu -74(%rsi), %xmm1 movdqu -74(%rdi), %xmm2 @@ -1302,7 +1298,7 @@ L(26bytes): movzwl -2(%rsi), %ecx jmp L(diffin2bytes) - ALIGN (4) + .p2align 4 L(75bytes): movdqu -75(%rsi), %xmm1 movdqu -75(%rdi), %xmm2 @@ -1342,7 +1338,7 @@ L(27bytes): xor %eax, %eax ret # endif - ALIGN (4) + .p2align 4 L(76bytes): movdqu -76(%rsi), %xmm1 movdqu -76(%rdi), %xmm2 @@ -1388,7 +1384,7 @@ L(28bytes): # ifndef USE_AS_WMEMCMP /* unreal cases for wmemcmp */ - ALIGN (4) + .p2align 4 L(77bytes): movdqu -77(%rsi), %xmm1 movdqu -77(%rdi), %xmm2 @@ -1430,7 +1426,7 @@ L(29bytes): xor %eax, %eax ret - ALIGN (4) + .p2align 4 L(78bytes): movdqu -78(%rsi), %xmm1 movdqu -78(%rdi), %xmm2 @@ -1470,7 +1466,7 @@ L(30bytes): xor %eax, %eax ret - ALIGN (4) + .p2align 4 L(79bytes): movdqu -79(%rsi), %xmm1 movdqu -79(%rdi), %xmm2 @@ -1510,7 +1506,7 @@ L(31bytes): xor %eax, %eax ret # endif - ALIGN (4) + .p2align 4 L(64bytes): movdqu -64(%rdi), %xmm2 movdqu -64(%rsi), %xmm1 @@ -1548,7 +1544,7 @@ L(32bytes): /* * Aligned 8 bytes to avoid 2 branch "taken" in one 16 alinged code block. */ - ALIGN (3) + .p2align 3 L(less16bytes): movsbq %dl, %rdx mov (%rsi, %rdx), %rcx @@ -1585,7 +1581,7 @@ L(diffin2bytes): sub %ecx, %eax ret - ALIGN (4) + .p2align 4 L(end): and $0xff, %eax and $0xff, %ecx @@ -1599,7 +1595,7 @@ L(end): neg %eax ret - ALIGN (4) + .p2align 4 L(nequal_bigger): ret @@ -1611,7 +1607,7 @@ L(unreal_case): END (MEMCMP) .section .rodata.sse4.1,"a",@progbits - ALIGN (3) + .p2align 3 # ifndef USE_AS_WMEMCMP L(table_64bytes): .int JMPTBL (L(0bytes), L(table_64bytes)) diff --git a/sysdeps/x86_64/multiarch/memcmp-ssse3.S b/sysdeps/x86_64/multiarch/memcmp-ssse3.S index e319df926e..e04f918dff 100644 --- a/sysdeps/x86_64/multiarch/memcmp-ssse3.S +++ b/sysdeps/x86_64/multiarch/memcmp-ssse3.S @@ -25,10 +25,6 @@ # define MEMCMP __memcmp_ssse3 # endif -# ifndef ALIGN -# define ALIGN(n) .p2align n -# endif - /* Warning! wmemcmp has to use SIGNED comparison for elements. memcmp has to use UNSIGNED comparison for elemnts. @@ -50,7 +46,7 @@ ENTRY (MEMCMP) add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 /* ECX >= 32. */ L(48bytesormore): movdqu (%rdi), %xmm3 @@ -90,7 +86,7 @@ L(48bytesormore): je L(shr_6) jmp L(shr_7) - ALIGN (2) + .p2align 2 L(next_unaligned_table): cmp $8, %edx je L(shr_8) @@ -117,7 +113,7 @@ L(next_unaligned_table): jmp L(shr_12) # endif - ALIGN (4) + .p2align 4 L(shr_0): cmp $80, %rcx lea -48(%rcx), %rcx @@ -137,7 +133,7 @@ L(shr_0): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_0_gobble): movdqa (%rsi), %xmm0 xor %eax, %eax @@ -180,7 +176,7 @@ L(next): # ifndef USE_AS_WMEMCMP - ALIGN (4) + .p2align 4 L(shr_1): cmp $80, %rcx lea -48(%rcx), %rcx @@ -207,7 +203,7 @@ L(shr_1): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_1_gobble): sub $32, %rcx movdqa 16(%rsi), %xmm0 @@ -258,7 +254,7 @@ L(shr_1_gobble_next): jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_2): cmp $80, %rcx lea -48(%rcx), %rcx @@ -285,7 +281,7 @@ L(shr_2): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_2_gobble): sub $32, %rcx movdqa 16(%rsi), %xmm0 @@ -335,7 +331,7 @@ L(shr_2_gobble_next): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_3): cmp $80, %rcx lea -48(%rcx), %rcx @@ -362,7 +358,7 @@ L(shr_3): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_3_gobble): sub $32, %rcx movdqa 16(%rsi), %xmm0 @@ -414,7 +410,7 @@ L(shr_3_gobble_next): # endif - ALIGN (4) + .p2align 4 L(shr_4): cmp $80, %rcx lea -48(%rcx), %rcx @@ -441,7 +437,7 @@ L(shr_4): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_4_gobble): sub $32, %rcx movdqa 16(%rsi), %xmm0 @@ -493,7 +489,7 @@ L(shr_4_gobble_next): # ifndef USE_AS_WMEMCMP - ALIGN (4) + .p2align 4 L(shr_5): cmp $80, %rcx lea -48(%rcx), %rcx @@ -520,7 +516,7 @@ L(shr_5): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_5_gobble): sub $32, %rcx movdqa 16(%rsi), %xmm0 @@ -570,7 +566,7 @@ L(shr_5_gobble_next): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_6): cmp $80, %rcx lea -48(%rcx), %rcx @@ -597,7 +593,7 @@ L(shr_6): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_6_gobble): sub $32, %rcx movdqa 16(%rsi), %xmm0 @@ -647,7 +643,7 @@ L(shr_6_gobble_next): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_7): cmp $80, %rcx lea -48(%rcx), %rcx @@ -674,7 +670,7 @@ L(shr_7): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_7_gobble): sub $32, %rcx movdqa 16(%rsi), %xmm0 @@ -726,7 +722,7 @@ L(shr_7_gobble_next): # endif - ALIGN (4) + .p2align 4 L(shr_8): cmp $80, %rcx lea -48(%rcx), %rcx @@ -753,7 +749,7 @@ L(shr_8): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_8_gobble): sub $32, %rcx movdqa 16(%rsi), %xmm0 @@ -805,7 +801,7 @@ L(shr_8_gobble_next): # ifndef USE_AS_WMEMCMP - ALIGN (4) + .p2align 4 L(shr_9): cmp $80, %rcx lea -48(%rcx), %rcx @@ -832,7 +828,7 @@ L(shr_9): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_9_gobble): sub $32, %rcx movdqa 16(%rsi), %xmm0 @@ -882,7 +878,7 @@ L(shr_9_gobble_next): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_10): cmp $80, %rcx lea -48(%rcx), %rcx @@ -909,7 +905,7 @@ L(shr_10): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_10_gobble): sub $32, %rcx movdqa 16(%rsi), %xmm0 @@ -959,7 +955,7 @@ L(shr_10_gobble_next): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_11): cmp $80, %rcx lea -48(%rcx), %rcx @@ -986,7 +982,7 @@ L(shr_11): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_11_gobble): sub $32, %rcx movdqa 16(%rsi), %xmm0 @@ -1038,7 +1034,7 @@ L(shr_11_gobble_next): # endif - ALIGN (4) + .p2align 4 L(shr_12): cmp $80, %rcx lea -48(%rcx), %rcx @@ -1065,7 +1061,7 @@ L(shr_12): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_12_gobble): sub $32, %rcx movdqa 16(%rsi), %xmm0 @@ -1117,7 +1113,7 @@ L(shr_12_gobble_next): # ifndef USE_AS_WMEMCMP - ALIGN (4) + .p2align 4 L(shr_13): cmp $80, %rcx lea -48(%rcx), %rcx @@ -1144,7 +1140,7 @@ L(shr_13): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_13_gobble): sub $32, %rcx movdqa 16(%rsi), %xmm0 @@ -1194,7 +1190,7 @@ L(shr_13_gobble_next): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_14): cmp $80, %rcx lea -48(%rcx), %rcx @@ -1221,7 +1217,7 @@ L(shr_14): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_14_gobble): sub $32, %rcx movdqa 16(%rsi), %xmm0 @@ -1271,7 +1267,7 @@ L(shr_14_gobble_next): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_15): cmp $80, %rcx lea -48(%rcx), %rcx @@ -1298,7 +1294,7 @@ L(shr_15): add %rcx, %rdi jmp L(less48bytes) - ALIGN (4) + .p2align 4 L(shr_15_gobble): sub $32, %rcx movdqa 16(%rsi), %xmm0 @@ -1348,7 +1344,7 @@ L(shr_15_gobble_next): add %rcx, %rdi jmp L(less48bytes) # endif - ALIGN (4) + .p2align 4 L(exit): pmovmskb %xmm1, %r8d sub $0xffff, %r8d @@ -1389,56 +1385,56 @@ L(less16bytes): sub %edx, %eax ret - ALIGN (4) + .p2align 4 L(Byte16): movzbl -16(%rdi), %eax movzbl -16(%rsi), %edx sub %edx, %eax ret - ALIGN (4) + .p2align 4 L(Byte17): movzbl -15(%rdi), %eax movzbl -15(%rsi), %edx sub %edx, %eax ret - ALIGN (4) + .p2align 4 L(Byte18): movzbl -14(%rdi), %eax movzbl -14(%rsi), %edx sub %edx, %eax ret - ALIGN (4) + .p2align 4 L(Byte19): movzbl -13(%rdi), %eax movzbl -13(%rsi), %edx sub %edx, %eax ret - ALIGN (4) + .p2align 4 L(Byte20): movzbl -12(%rdi), %eax movzbl -12(%rsi), %edx sub %edx, %eax ret - ALIGN (4) + .p2align 4 L(Byte21): movzbl -11(%rdi), %eax movzbl -11(%rsi), %edx sub %edx, %eax ret - ALIGN (4) + .p2align 4 L(Byte22): movzbl -10(%rdi), %eax movzbl -10(%rsi), %edx sub %edx, %eax ret - ALIGN (4) + .p2align 4 L(next_24_bytes): lea 8(%rdi), %rdi lea 8(%rsi), %rsi @@ -1479,14 +1475,14 @@ L(next_24_bytes): jne L(find_diff) ret - ALIGN (4) + .p2align 4 L(second_double_word): mov -12(%rdi), %eax cmp -12(%rsi), %eax jne L(find_diff) ret - ALIGN (4) + .p2align 4 L(next_two_double_words): and $15, %dh jz L(fourth_double_word) @@ -1495,7 +1491,7 @@ L(next_two_double_words): jne L(find_diff) ret - ALIGN (4) + .p2align 4 L(fourth_double_word): mov -4(%rdi), %eax cmp -4(%rsi), %eax @@ -1503,7 +1499,7 @@ L(fourth_double_word): ret # endif - ALIGN (4) + .p2align 4 L(less48bytes): cmp $8, %ecx jae L(more8bytes) @@ -1527,7 +1523,7 @@ L(less48bytes): jmp L(4bytes) # endif - ALIGN (4) + .p2align 4 L(more8bytes): cmp $16, %ecx jae L(more16bytes) @@ -1551,7 +1547,7 @@ L(more8bytes): jmp L(12bytes) # endif - ALIGN (4) + .p2align 4 L(more16bytes): cmp $24, %ecx jae L(more24bytes) @@ -1575,7 +1571,7 @@ L(more16bytes): jmp L(20bytes) # endif - ALIGN (4) + .p2align 4 L(more24bytes): cmp $32, %ecx jae L(more32bytes) @@ -1599,7 +1595,7 @@ L(more24bytes): jmp L(28bytes) # endif - ALIGN (4) + .p2align 4 L(more32bytes): cmp $40, %ecx jae L(more40bytes) @@ -1623,7 +1619,7 @@ L(more32bytes): jmp L(36bytes) # endif - ALIGN (4) + .p2align 4 L(more40bytes): cmp $40, %ecx je L(40bytes) @@ -1642,7 +1638,7 @@ L(more40bytes): je L(46bytes) jmp L(47bytes) - ALIGN (4) + .p2align 4 L(44bytes): movl -44(%rdi), %eax movl -44(%rsi), %ecx @@ -1702,7 +1698,7 @@ L(0bytes): xor %eax, %eax ret # else - ALIGN (4) + .p2align 4 L(44bytes): movl -44(%rdi), %eax cmp -44(%rsi), %eax @@ -1753,7 +1749,7 @@ L(0bytes): # endif # ifndef USE_AS_WMEMCMP - ALIGN (4) + .p2align 4 L(45bytes): movl -45(%rdi), %eax movl -45(%rsi), %ecx @@ -1816,7 +1812,7 @@ L(1bytes): xor %eax, %eax ret - ALIGN (4) + .p2align 4 L(46bytes): movl -46(%rdi), %eax movl -46(%rsi), %ecx @@ -1882,7 +1878,7 @@ L(2bytes): xor %eax, %eax ret - ALIGN (4) + .p2align 4 L(47bytes): movl -47(%rdi), %eax movl -47(%rsi), %ecx @@ -1951,7 +1947,7 @@ L(3bytes): xor %eax, %eax ret - ALIGN (4) + .p2align 4 L(find_diff): cmpb %cl, %al jne L(set) @@ -1973,19 +1969,19 @@ L(set): # else /* for wmemcmp */ - ALIGN (4) + .p2align 4 L(find_diff): mov $1, %eax jg L(find_diff_bigger) neg %eax ret - ALIGN (4) + .p2align 4 L(find_diff_bigger): ret # endif - ALIGN (4) + .p2align 4 L(equal): xor %eax, %eax ret diff --git a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S index efdfea238f..df6578ebc9 100644 --- a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S +++ b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S @@ -20,10 +20,6 @@ #include "asm-syntax.h" -#ifndef ALIGN -# define ALIGN(n) .p2align n -#endif - ENTRY(__memcpy_sse2_unaligned) movq %rsi, %rax @@ -44,7 +40,7 @@ L(return): movq %rdi, %rax ret .p2align 4,,10 - ALIGN(4) + .p2align 4 .L31: movdqu 16(%rsi), %xmm8 cmpq $64, %rdx @@ -77,7 +73,7 @@ L(return): leaq 32(%r10), %r8 leaq 48(%r10), %rax .p2align 4,,10 - ALIGN(4) + .p2align 4 L(loop): movdqu (%rcx,%r10), %xmm8 movdqa %xmm8, (%rcx) @@ -151,7 +147,7 @@ L(less_16): .L3: leaq -1(%rdx), %rax .p2align 4,,10 - ALIGN(4) + .p2align 4 .L11: movzbl (%rsi,%rax), %edx movb %dl, (%rdi,%rax) diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S index fc9fcef27d..0eb7d9b758 100644 --- a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S +++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S @@ -31,10 +31,6 @@ # define MEMCPY_CHK __memcpy_chk_ssse3_back #endif -#ifndef ALIGN -# define ALIGN(n) .p2align n -#endif - #define JMPTBL(I, B) I - B /* Branch to an entry in a jump table. TABLE is a jump table with @@ -87,7 +83,7 @@ L(bk_write): BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4) #endif - ALIGN (4) + .p2align 4 L(144bytesormore): #ifndef USE_AS_MEMMOVE @@ -119,7 +115,7 @@ L(144bytesormore): jmp *%r9 ud2 - ALIGN (4) + .p2align 4 L(copy_backward): #ifdef DATA_CACHE_SIZE mov $DATA_CACHE_SIZE, %RCX_LP @@ -149,7 +145,7 @@ L(copy_backward): jmp *%r9 ud2 - ALIGN (4) + .p2align 4 L(shl_0): mov %rdx, %r9 @@ -162,7 +158,7 @@ L(shl_0): #endif jae L(gobble_mem_fwd) sub $0x80, %rdx - ALIGN (4) + .p2align 4 L(shl_0_loop): movdqa (%rsi), %xmm1 movdqa %xmm1, (%rdi) @@ -190,7 +186,7 @@ L(shl_0_loop): add %rdx, %rdi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_0_bwd): sub $0x80, %rdx L(copy_backward_loop): @@ -221,7 +217,7 @@ L(copy_backward_loop): sub %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_1): sub $0x80, %rdx movaps -0x01(%rsi), %xmm1 @@ -258,7 +254,7 @@ L(shl_1): add %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_1_bwd): movaps -0x01(%rsi), %xmm1 @@ -304,7 +300,7 @@ L(shl_1_bwd): sub %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_2): sub $0x80, %rdx movaps -0x02(%rsi), %xmm1 @@ -341,7 +337,7 @@ L(shl_2): add %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_2_bwd): movaps -0x02(%rsi), %xmm1 @@ -387,7 +383,7 @@ L(shl_2_bwd): sub %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_3): sub $0x80, %rdx movaps -0x03(%rsi), %xmm1 @@ -424,7 +420,7 @@ L(shl_3): add %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_3_bwd): movaps -0x03(%rsi), %xmm1 @@ -470,7 +466,7 @@ L(shl_3_bwd): sub %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_4): sub $0x80, %rdx movaps -0x04(%rsi), %xmm1 @@ -507,7 +503,7 @@ L(shl_4): add %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_4_bwd): movaps -0x04(%rsi), %xmm1 @@ -553,7 +549,7 @@ L(shl_4_bwd): sub %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_5): sub $0x80, %rdx movaps -0x05(%rsi), %xmm1 @@ -590,7 +586,7 @@ L(shl_5): add %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_5_bwd): movaps -0x05(%rsi), %xmm1 @@ -636,7 +632,7 @@ L(shl_5_bwd): sub %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_6): sub $0x80, %rdx movaps -0x06(%rsi), %xmm1 @@ -673,7 +669,7 @@ L(shl_6): add %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_6_bwd): movaps -0x06(%rsi), %xmm1 @@ -719,7 +715,7 @@ L(shl_6_bwd): sub %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_7): sub $0x80, %rdx movaps -0x07(%rsi), %xmm1 @@ -756,7 +752,7 @@ L(shl_7): add %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_7_bwd): movaps -0x07(%rsi), %xmm1 @@ -802,7 +798,7 @@ L(shl_7_bwd): sub %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_8): sub $0x80, %rdx movaps -0x08(%rsi), %xmm1 @@ -839,7 +835,7 @@ L(shl_8): add %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_8_bwd): movaps -0x08(%rsi), %xmm1 @@ -886,7 +882,7 @@ L(shl_8_end_bwd): sub %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_9): sub $0x80, %rdx movaps -0x09(%rsi), %xmm1 @@ -923,7 +919,7 @@ L(shl_9): add %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_9_bwd): movaps -0x09(%rsi), %xmm1 @@ -969,7 +965,7 @@ L(shl_9_bwd): sub %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_10): sub $0x80, %rdx movaps -0x0a(%rsi), %xmm1 @@ -1006,7 +1002,7 @@ L(shl_10): add %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_10_bwd): movaps -0x0a(%rsi), %xmm1 @@ -1052,7 +1048,7 @@ L(shl_10_bwd): sub %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_11): sub $0x80, %rdx movaps -0x0b(%rsi), %xmm1 @@ -1089,7 +1085,7 @@ L(shl_11): add %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_11_bwd): movaps -0x0b(%rsi), %xmm1 @@ -1135,7 +1131,7 @@ L(shl_11_bwd): sub %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_12): sub $0x80, %rdx movdqa -0x0c(%rsi), %xmm1 @@ -1173,7 +1169,7 @@ L(shl_12): add %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_12_bwd): movaps -0x0c(%rsi), %xmm1 @@ -1219,7 +1215,7 @@ L(shl_12_bwd): sub %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_13): sub $0x80, %rdx movaps -0x0d(%rsi), %xmm1 @@ -1256,7 +1252,7 @@ L(shl_13): add %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_13_bwd): movaps -0x0d(%rsi), %xmm1 @@ -1302,7 +1298,7 @@ L(shl_13_bwd): sub %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_14): sub $0x80, %rdx movaps -0x0e(%rsi), %xmm1 @@ -1339,7 +1335,7 @@ L(shl_14): add %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_14_bwd): movaps -0x0e(%rsi), %xmm1 @@ -1385,7 +1381,7 @@ L(shl_14_bwd): sub %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_15): sub $0x80, %rdx movaps -0x0f(%rsi), %xmm1 @@ -1422,7 +1418,7 @@ L(shl_15): add %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_15_bwd): movaps -0x0f(%rsi), %xmm1 @@ -1468,7 +1464,7 @@ L(shl_15_bwd): sub %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(gobble_mem_fwd): movdqu (%rsi), %xmm1 movdqu %xmm0, (%r8) @@ -1570,7 +1566,7 @@ L(gobble_mem_fwd_end): add %rdx, %rdi BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4) - ALIGN (4) + .p2align 4 L(gobble_mem_bwd): add %rdx, %rsi add %rdx, %rdi @@ -2833,7 +2829,7 @@ L(bwd_write_1bytes): END (MEMCPY) .section .rodata.ssse3,"a",@progbits - ALIGN (3) + .p2align 3 L(table_144_bytes_bwd): .int JMPTBL (L(bwd_write_0bytes), L(table_144_bytes_bwd)) .int JMPTBL (L(bwd_write_1bytes), L(table_144_bytes_bwd)) @@ -2980,7 +2976,7 @@ L(table_144_bytes_bwd): .int JMPTBL (L(bwd_write_142bytes), L(table_144_bytes_bwd)) .int JMPTBL (L(bwd_write_143bytes), L(table_144_bytes_bwd)) - ALIGN (3) + .p2align 3 L(table_144_bytes_fwd): .int JMPTBL (L(fwd_write_0bytes), L(table_144_bytes_fwd)) .int JMPTBL (L(fwd_write_1bytes), L(table_144_bytes_fwd)) @@ -3127,7 +3123,7 @@ L(table_144_bytes_fwd): .int JMPTBL (L(fwd_write_142bytes), L(table_144_bytes_fwd)) .int JMPTBL (L(fwd_write_143bytes), L(table_144_bytes_fwd)) - ALIGN (3) + .p2align 3 L(shl_table_fwd): .int JMPTBL (L(shl_0), L(shl_table_fwd)) .int JMPTBL (L(shl_1), L(shl_table_fwd)) @@ -3146,7 +3142,7 @@ L(shl_table_fwd): .int JMPTBL (L(shl_14), L(shl_table_fwd)) .int JMPTBL (L(shl_15), L(shl_table_fwd)) - ALIGN (3) + .p2align 3 L(shl_table_bwd): .int JMPTBL (L(shl_0_bwd), L(shl_table_bwd)) .int JMPTBL (L(shl_1_bwd), L(shl_table_bwd)) diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3.S b/sysdeps/x86_64/multiarch/memcpy-ssse3.S index 9642ceecd9..0cedab2447 100644 --- a/sysdeps/x86_64/multiarch/memcpy-ssse3.S +++ b/sysdeps/x86_64/multiarch/memcpy-ssse3.S @@ -31,10 +31,6 @@ # define MEMCPY_CHK __memcpy_chk_ssse3 #endif -#ifndef ALIGN -# define ALIGN(n) .p2align n -#endif - #define JMPTBL(I, B) I - B /* Branch to an entry in a jump table. TABLE is a jump table with @@ -80,7 +76,7 @@ L(copy_forward): jmp *%r9 ud2 - ALIGN (4) + .p2align 4 L(80bytesormore): #ifndef USE_AS_MEMMOVE cmp %dil, %sil @@ -113,7 +109,7 @@ L(80bytesormore): #endif BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %r9, 4) - ALIGN (4) + .p2align 4 L(copy_backward): movdqu -16(%rsi, %rdx), %xmm0 add %rdx, %rsi @@ -144,7 +140,7 @@ L(copy_backward): #endif BRANCH_TO_JMPTBL_ENTRY (L(shl_table_bwd), %r9, 4) - ALIGN (4) + .p2align 4 L(shl_0): sub $16, %rdx movdqa (%rsi), %xmm1 @@ -172,7 +168,7 @@ L(shl_0_less_64bytes): add %rdx, %rdi BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_0_gobble): #ifdef DATA_CACHE_SIZE_HALF cmp $DATA_CACHE_SIZE_HALF, %RDX_LP @@ -228,7 +224,7 @@ L(shl_0_cache_less_64bytes): add %rdx, %rdi BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_0_gobble_mem_loop): prefetcht0 0x1c0(%rsi) prefetcht0 0x280(%rsi) @@ -287,7 +283,7 @@ L(shl_0_mem_less_32bytes): add %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_0_bwd): sub $16, %rdx movdqa -0x10(%rsi), %xmm1 @@ -313,7 +309,7 @@ L(shl_0_bwd): L(shl_0_less_64bytes_bwd): BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_0_gobble_bwd): #ifdef DATA_CACHE_SIZE_HALF cmp $DATA_CACHE_SIZE_HALF, %RDX_LP @@ -367,7 +363,7 @@ L(shl_0_gobble_bwd_loop): L(shl_0_gobble_bwd_less_64bytes): BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_0_gobble_mem_bwd_loop): prefetcht0 -0x1c0(%rsi) prefetcht0 -0x280(%rsi) @@ -423,7 +419,7 @@ L(shl_0_mem_bwd_less_64bytes): L(shl_0_mem_bwd_less_32bytes): BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_1): lea (L(shl_1_loop_L1)-L(shl_1))(%r9), %r9 cmp %rcx, %rdx @@ -466,7 +462,7 @@ L(shl_1_end): add %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_1_bwd): lea (L(shl_1_bwd_loop_L1)-L(shl_1_bwd))(%r9), %r9 cmp %rcx, %rdx @@ -508,7 +504,7 @@ L(shl_1_bwd_end): movdqu %xmm0, (%r8) BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_2): lea (L(shl_2_loop_L1)-L(shl_2))(%r9), %r9 cmp %rcx, %rdx @@ -551,7 +547,7 @@ L(shl_2_end): add %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_2_bwd): lea (L(shl_2_bwd_loop_L1)-L(shl_2_bwd))(%r9), %r9 cmp %rcx, %rdx @@ -593,7 +589,7 @@ L(shl_2_bwd_end): movdqu %xmm0, (%r8) BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_3): lea (L(shl_3_loop_L1)-L(shl_3))(%r9), %r9 cmp %rcx, %rdx @@ -636,7 +632,7 @@ L(shl_3_end): add %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_3_bwd): lea (L(shl_3_bwd_loop_L1)-L(shl_3_bwd))(%r9), %r9 cmp %rcx, %rdx @@ -678,7 +674,7 @@ L(shl_3_bwd_end): movdqu %xmm0, (%r8) BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_4): lea (L(shl_4_loop_L1)-L(shl_4))(%r9), %r9 cmp %rcx, %rdx @@ -721,7 +717,7 @@ L(shl_4_end): add %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_4_bwd): lea (L(shl_4_bwd_loop_L1)-L(shl_4_bwd))(%r9), %r9 cmp %rcx, %rdx @@ -763,7 +759,7 @@ L(shl_4_bwd_end): movdqu %xmm0, (%r8) BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_5): lea (L(shl_5_loop_L1)-L(shl_5))(%r9), %r9 cmp %rcx, %rdx @@ -806,7 +802,7 @@ L(shl_5_end): add %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_5_bwd): lea (L(shl_5_bwd_loop_L1)-L(shl_5_bwd))(%r9), %r9 cmp %rcx, %rdx @@ -848,7 +844,7 @@ L(shl_5_bwd_end): movdqu %xmm0, (%r8) BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_6): lea (L(shl_6_loop_L1)-L(shl_6))(%r9), %r9 cmp %rcx, %rdx @@ -891,7 +887,7 @@ L(shl_6_end): add %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_6_bwd): lea (L(shl_6_bwd_loop_L1)-L(shl_6_bwd))(%r9), %r9 cmp %rcx, %rdx @@ -933,7 +929,7 @@ L(shl_6_bwd_end): movdqu %xmm0, (%r8) BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_7): lea (L(shl_7_loop_L1)-L(shl_7))(%r9), %r9 cmp %rcx, %rdx @@ -976,7 +972,7 @@ L(shl_7_end): add %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_7_bwd): lea (L(shl_7_bwd_loop_L1)-L(shl_7_bwd))(%r9), %r9 cmp %rcx, %rdx @@ -1018,7 +1014,7 @@ L(shl_7_bwd_end): movdqu %xmm0, (%r8) BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_8): lea (L(shl_8_loop_L1)-L(shl_8))(%r9), %r9 cmp %rcx, %rdx @@ -1051,7 +1047,7 @@ L(shl_8_loop_L1): movaps %xmm5, -0x10(%rdi) jmp *%r9 ud2 - ALIGN (4) + .p2align 4 L(shl_8_end): lea 64(%rdx), %rdx movaps %xmm4, -0x20(%rdi) @@ -1061,7 +1057,7 @@ L(shl_8_end): movdqu %xmm0, (%r8) BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_8_bwd): lea (L(shl_8_bwd_loop_L1)-L(shl_8_bwd))(%r9), %r9 cmp %rcx, %rdx @@ -1103,7 +1099,7 @@ L(shl_8_bwd_end): movdqu %xmm0, (%r8) BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_9): lea (L(shl_9_loop_L1)-L(shl_9))(%r9), %r9 cmp %rcx, %rdx @@ -1146,7 +1142,7 @@ L(shl_9_end): add %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_9_bwd): lea (L(shl_9_bwd_loop_L1)-L(shl_9_bwd))(%r9), %r9 cmp %rcx, %rdx @@ -1188,7 +1184,7 @@ L(shl_9_bwd_end): movdqu %xmm0, (%r8) BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_10): lea (L(shl_10_loop_L1)-L(shl_10))(%r9), %r9 cmp %rcx, %rdx @@ -1231,7 +1227,7 @@ L(shl_10_end): add %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_10_bwd): lea (L(shl_10_bwd_loop_L1)-L(shl_10_bwd))(%r9), %r9 cmp %rcx, %rdx @@ -1273,7 +1269,7 @@ L(shl_10_bwd_end): movdqu %xmm0, (%r8) BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_11): lea (L(shl_11_loop_L1)-L(shl_11))(%r9), %r9 cmp %rcx, %rdx @@ -1316,7 +1312,7 @@ L(shl_11_end): add %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_11_bwd): lea (L(shl_11_bwd_loop_L1)-L(shl_11_bwd))(%r9), %r9 cmp %rcx, %rdx @@ -1358,7 +1354,7 @@ L(shl_11_bwd_end): movdqu %xmm0, (%r8) BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_12): lea (L(shl_12_loop_L1)-L(shl_12))(%r9), %r9 cmp %rcx, %rdx @@ -1401,7 +1397,7 @@ L(shl_12_end): add %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_12_bwd): lea (L(shl_12_bwd_loop_L1)-L(shl_12_bwd))(%r9), %r9 cmp %rcx, %rdx @@ -1443,7 +1439,7 @@ L(shl_12_bwd_end): movdqu %xmm0, (%r8) BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_13): lea (L(shl_13_loop_L1)-L(shl_13))(%r9), %r9 cmp %rcx, %rdx @@ -1486,7 +1482,7 @@ L(shl_13_end): add %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_13_bwd): lea (L(shl_13_bwd_loop_L1)-L(shl_13_bwd))(%r9), %r9 cmp %rcx, %rdx @@ -1528,7 +1524,7 @@ L(shl_13_bwd_end): movdqu %xmm0, (%r8) BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_14): lea (L(shl_14_loop_L1)-L(shl_14))(%r9), %r9 cmp %rcx, %rdx @@ -1571,7 +1567,7 @@ L(shl_14_end): add %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_14_bwd): lea (L(shl_14_bwd_loop_L1)-L(shl_14_bwd))(%r9), %r9 cmp %rcx, %rdx @@ -1613,7 +1609,7 @@ L(shl_14_bwd_end): movdqu %xmm0, (%r8) BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_15): lea (L(shl_15_loop_L1)-L(shl_15))(%r9), %r9 cmp %rcx, %rdx @@ -1656,7 +1652,7 @@ L(shl_15_end): add %rdx, %rsi BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(shl_15_bwd): lea (L(shl_15_bwd_loop_L1)-L(shl_15_bwd))(%r9), %r9 cmp %rcx, %rdx @@ -1698,7 +1694,7 @@ L(shl_15_bwd_end): movdqu %xmm0, (%r8) BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4) - ALIGN (4) + .p2align 4 L(write_72bytes): movdqu -72(%rsi), %xmm0 movdqu -56(%rsi), %xmm1 @@ -1716,7 +1712,7 @@ L(write_72bytes): mov %rcx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_64bytes): movdqu -64(%rsi), %xmm0 mov -48(%rsi), %rcx @@ -1734,7 +1730,7 @@ L(write_64bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_56bytes): movdqu -56(%rsi), %xmm0 mov -40(%rsi), %r8 @@ -1750,7 +1746,7 @@ L(write_56bytes): mov %rcx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_48bytes): mov -48(%rsi), %rcx mov -40(%rsi), %r8 @@ -1766,7 +1762,7 @@ L(write_48bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_40bytes): mov -40(%rsi), %r8 mov -32(%rsi), %r9 @@ -1780,7 +1776,7 @@ L(write_40bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_32bytes): mov -32(%rsi), %r9 mov -24(%rsi), %r10 @@ -1792,7 +1788,7 @@ L(write_32bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_24bytes): mov -24(%rsi), %r10 mov -16(%rsi), %r11 @@ -1802,7 +1798,7 @@ L(write_24bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_16bytes): mov -16(%rsi), %r11 mov -8(%rsi), %rdx @@ -1810,14 +1806,14 @@ L(write_16bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_8bytes): mov -8(%rsi), %rdx mov %rdx, -8(%rdi) L(write_0bytes): ret - ALIGN (4) + .p2align 4 L(write_73bytes): movdqu -73(%rsi), %xmm0 movdqu -57(%rsi), %xmm1 @@ -1837,7 +1833,7 @@ L(write_73bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_65bytes): movdqu -65(%rsi), %xmm0 movdqu -49(%rsi), %xmm1 @@ -1855,7 +1851,7 @@ L(write_65bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_57bytes): movdqu -57(%rsi), %xmm0 mov -41(%rsi), %r8 @@ -1873,7 +1869,7 @@ L(write_57bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_49bytes): movdqu -49(%rsi), %xmm0 mov -33(%rsi), %r9 @@ -1889,7 +1885,7 @@ L(write_49bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_41bytes): mov -41(%rsi), %r8 mov -33(%rsi), %r9 @@ -1905,7 +1901,7 @@ L(write_41bytes): mov %dl, -1(%rdi) ret - ALIGN (4) + .p2align 4 L(write_33bytes): mov -33(%rsi), %r9 mov -25(%rsi), %r10 @@ -1919,7 +1915,7 @@ L(write_33bytes): mov %dl, -1(%rdi) ret - ALIGN (4) + .p2align 4 L(write_25bytes): mov -25(%rsi), %r10 mov -17(%rsi), %r11 @@ -1931,7 +1927,7 @@ L(write_25bytes): mov %dl, -1(%rdi) ret - ALIGN (4) + .p2align 4 L(write_17bytes): mov -17(%rsi), %r11 mov -9(%rsi), %rcx @@ -1941,7 +1937,7 @@ L(write_17bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_9bytes): mov -9(%rsi), %rcx mov -4(%rsi), %edx @@ -1949,13 +1945,13 @@ L(write_9bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_1bytes): mov -1(%rsi), %dl mov %dl, -1(%rdi) ret - ALIGN (4) + .p2align 4 L(write_74bytes): movdqu -74(%rsi), %xmm0 movdqu -58(%rsi), %xmm1 @@ -1975,7 +1971,7 @@ L(write_74bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_66bytes): movdqu -66(%rsi), %xmm0 movdqu -50(%rsi), %xmm1 @@ -1995,7 +1991,7 @@ L(write_66bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_58bytes): movdqu -58(%rsi), %xmm1 mov -42(%rsi), %r8 @@ -2013,7 +2009,7 @@ L(write_58bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_50bytes): movdqu -50(%rsi), %xmm0 mov -34(%rsi), %r9 @@ -2029,7 +2025,7 @@ L(write_50bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_42bytes): mov -42(%rsi), %r8 mov -34(%rsi), %r9 @@ -2045,7 +2041,7 @@ L(write_42bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_34bytes): mov -34(%rsi), %r9 mov -26(%rsi), %r10 @@ -2059,7 +2055,7 @@ L(write_34bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_26bytes): mov -26(%rsi), %r10 mov -18(%rsi), %r11 @@ -2071,7 +2067,7 @@ L(write_26bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_18bytes): mov -18(%rsi), %r11 mov -10(%rsi), %rcx @@ -2081,7 +2077,7 @@ L(write_18bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_10bytes): mov -10(%rsi), %rcx mov -4(%rsi), %edx @@ -2089,13 +2085,13 @@ L(write_10bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_2bytes): mov -2(%rsi), %dx mov %dx, -2(%rdi) ret - ALIGN (4) + .p2align 4 L(write_75bytes): movdqu -75(%rsi), %xmm0 movdqu -59(%rsi), %xmm1 @@ -2115,7 +2111,7 @@ L(write_75bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_67bytes): movdqu -67(%rsi), %xmm0 movdqu -59(%rsi), %xmm1 @@ -2135,7 +2131,7 @@ L(write_67bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_59bytes): movdqu -59(%rsi), %xmm0 mov -43(%rsi), %r8 @@ -2153,7 +2149,7 @@ L(write_59bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_51bytes): movdqu -51(%rsi), %xmm0 mov -35(%rsi), %r9 @@ -2169,7 +2165,7 @@ L(write_51bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_43bytes): mov -43(%rsi), %r8 mov -35(%rsi), %r9 @@ -2185,7 +2181,7 @@ L(write_43bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_35bytes): mov -35(%rsi), %r9 mov -27(%rsi), %r10 @@ -2199,7 +2195,7 @@ L(write_35bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_27bytes): mov -27(%rsi), %r10 mov -19(%rsi), %r11 @@ -2211,7 +2207,7 @@ L(write_27bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_19bytes): mov -19(%rsi), %r11 mov -11(%rsi), %rcx @@ -2221,7 +2217,7 @@ L(write_19bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_11bytes): mov -11(%rsi), %rcx mov -4(%rsi), %edx @@ -2229,7 +2225,7 @@ L(write_11bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_3bytes): mov -3(%rsi), %dx mov -2(%rsi), %cx @@ -2237,7 +2233,7 @@ L(write_3bytes): mov %cx, -2(%rdi) ret - ALIGN (4) + .p2align 4 L(write_76bytes): movdqu -76(%rsi), %xmm0 movdqu -60(%rsi), %xmm1 @@ -2257,7 +2253,7 @@ L(write_76bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_68bytes): movdqu -68(%rsi), %xmm0 movdqu -52(%rsi), %xmm1 @@ -2275,7 +2271,7 @@ L(write_68bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_60bytes): movdqu -60(%rsi), %xmm0 mov -44(%rsi), %r8 @@ -2293,7 +2289,7 @@ L(write_60bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_52bytes): movdqu -52(%rsi), %xmm0 mov -36(%rsi), %r9 @@ -2309,7 +2305,7 @@ L(write_52bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_44bytes): mov -44(%rsi), %r8 mov -36(%rsi), %r9 @@ -2325,7 +2321,7 @@ L(write_44bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_36bytes): mov -36(%rsi), %r9 mov -28(%rsi), %r10 @@ -2339,7 +2335,7 @@ L(write_36bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_28bytes): mov -28(%rsi), %r10 mov -20(%rsi), %r11 @@ -2351,7 +2347,7 @@ L(write_28bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_20bytes): mov -20(%rsi), %r11 mov -12(%rsi), %rcx @@ -2361,7 +2357,7 @@ L(write_20bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_12bytes): mov -12(%rsi), %rcx mov -4(%rsi), %edx @@ -2369,13 +2365,13 @@ L(write_12bytes): mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_4bytes): mov -4(%rsi), %edx mov %edx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_77bytes): movdqu -77(%rsi), %xmm0 movdqu -61(%rsi), %xmm1 @@ -2395,7 +2391,7 @@ L(write_77bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_69bytes): movdqu -69(%rsi), %xmm0 movdqu -53(%rsi), %xmm1 @@ -2413,7 +2409,7 @@ L(write_69bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_61bytes): movdqu -61(%rsi), %xmm0 mov -45(%rsi), %r8 @@ -2431,7 +2427,7 @@ L(write_61bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_53bytes): movdqu -53(%rsi), %xmm0 mov -45(%rsi), %r8 @@ -2448,7 +2444,7 @@ L(write_53bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_45bytes): mov -45(%rsi), %r8 mov -37(%rsi), %r9 @@ -2464,7 +2460,7 @@ L(write_45bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_37bytes): mov -37(%rsi), %r9 mov -29(%rsi), %r10 @@ -2478,7 +2474,7 @@ L(write_37bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_29bytes): mov -29(%rsi), %r10 mov -21(%rsi), %r11 @@ -2490,7 +2486,7 @@ L(write_29bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_21bytes): mov -21(%rsi), %r11 mov -13(%rsi), %rcx @@ -2500,7 +2496,7 @@ L(write_21bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_13bytes): mov -13(%rsi), %rcx mov -8(%rsi), %rdx @@ -2508,7 +2504,7 @@ L(write_13bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_5bytes): mov -5(%rsi), %edx mov -4(%rsi), %ecx @@ -2516,7 +2512,7 @@ L(write_5bytes): mov %ecx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_78bytes): movdqu -78(%rsi), %xmm0 movdqu -62(%rsi), %xmm1 @@ -2536,7 +2532,7 @@ L(write_78bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_70bytes): movdqu -70(%rsi), %xmm0 movdqu -54(%rsi), %xmm1 @@ -2554,7 +2550,7 @@ L(write_70bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_62bytes): movdqu -62(%rsi), %xmm0 mov -46(%rsi), %r8 @@ -2572,7 +2568,7 @@ L(write_62bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_54bytes): movdqu -54(%rsi), %xmm0 mov -38(%rsi), %r9 @@ -2588,7 +2584,7 @@ L(write_54bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_46bytes): mov -46(%rsi), %r8 mov -38(%rsi), %r9 @@ -2604,7 +2600,7 @@ L(write_46bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_38bytes): mov -38(%rsi), %r9 mov -30(%rsi), %r10 @@ -2618,7 +2614,7 @@ L(write_38bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_30bytes): mov -30(%rsi), %r10 mov -22(%rsi), %r11 @@ -2630,7 +2626,7 @@ L(write_30bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_22bytes): mov -22(%rsi), %r11 mov -14(%rsi), %rcx @@ -2640,7 +2636,7 @@ L(write_22bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_14bytes): mov -14(%rsi), %rcx mov -8(%rsi), %rdx @@ -2648,7 +2644,7 @@ L(write_14bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_6bytes): mov -6(%rsi), %edx mov -4(%rsi), %ecx @@ -2656,7 +2652,7 @@ L(write_6bytes): mov %ecx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(write_79bytes): movdqu -79(%rsi), %xmm0 movdqu -63(%rsi), %xmm1 @@ -2676,7 +2672,7 @@ L(write_79bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_71bytes): movdqu -71(%rsi), %xmm0 movdqu -55(%rsi), %xmm1 @@ -2694,7 +2690,7 @@ L(write_71bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_63bytes): movdqu -63(%rsi), %xmm0 mov -47(%rsi), %r8 @@ -2712,7 +2708,7 @@ L(write_63bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_55bytes): movdqu -55(%rsi), %xmm0 mov -39(%rsi), %r9 @@ -2728,7 +2724,7 @@ L(write_55bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_47bytes): mov -47(%rsi), %r8 mov -39(%rsi), %r9 @@ -2744,7 +2740,7 @@ L(write_47bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_39bytes): mov -39(%rsi), %r9 mov -31(%rsi), %r10 @@ -2758,7 +2754,7 @@ L(write_39bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_31bytes): mov -31(%rsi), %r10 mov -23(%rsi), %r11 @@ -2770,7 +2766,7 @@ L(write_31bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_23bytes): mov -23(%rsi), %r11 mov -15(%rsi), %rcx @@ -2780,7 +2776,7 @@ L(write_23bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_15bytes): mov -15(%rsi), %rcx mov -8(%rsi), %rdx @@ -2788,7 +2784,7 @@ L(write_15bytes): mov %rdx, -8(%rdi) ret - ALIGN (4) + .p2align 4 L(write_7bytes): mov -7(%rsi), %edx mov -4(%rsi), %ecx @@ -2796,7 +2792,7 @@ L(write_7bytes): mov %ecx, -4(%rdi) ret - ALIGN (4) + .p2align 4 L(large_page_fwd): movdqu (%rsi), %xmm1 lea 16(%rsi), %rsi @@ -2859,7 +2855,7 @@ L(large_page_less_64bytes): BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4) #ifdef USE_AS_MEMMOVE - ALIGN (4) + .p2align 4 L(ll_cache_copy_fwd_start): prefetcht0 0x1c0(%rsi) prefetcht0 0x200(%rsi) @@ -2906,7 +2902,7 @@ L(large_page_ll_less_fwd_64bytes): BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4) #endif - ALIGN (4) + .p2align 4 L(large_page_bwd): movdqu -0x10(%rsi), %xmm1 lea -16(%rsi), %rsi @@ -2966,7 +2962,7 @@ L(large_page_less_bwd_64bytes): BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4) #ifdef USE_AS_MEMMOVE - ALIGN (4) + .p2align 4 L(ll_cache_copy_bwd_start): prefetcht0 -0x1c0(%rsi) prefetcht0 -0x200(%rsi) @@ -3014,7 +3010,7 @@ L(large_page_ll_less_bwd_64bytes): END (MEMCPY) .section .rodata.ssse3,"a",@progbits - ALIGN (3) + .p2align 3 L(table_less_80bytes): .int JMPTBL (L(write_0bytes), L(table_less_80bytes)) .int JMPTBL (L(write_1bytes), L(table_less_80bytes)) @@ -3097,7 +3093,7 @@ L(table_less_80bytes): .int JMPTBL (L(write_78bytes), L(table_less_80bytes)) .int JMPTBL (L(write_79bytes), L(table_less_80bytes)) - ALIGN (3) + .p2align 3 L(shl_table): .int JMPTBL (L(shl_0), L(shl_table)) .int JMPTBL (L(shl_1), L(shl_table)) @@ -3116,7 +3112,7 @@ L(shl_table): .int JMPTBL (L(shl_14), L(shl_table)) .int JMPTBL (L(shl_15), L(shl_table)) - ALIGN (3) + .p2align 3 L(shl_table_bwd): .int JMPTBL (L(shl_0_bwd), L(shl_table_bwd)) .int JMPTBL (L(shl_1_bwd), L(shl_table_bwd)) diff --git a/sysdeps/x86_64/multiarch/strcasestr-c.c b/sysdeps/x86_64/multiarch/strcasestr-c.c deleted file mode 100644 index c13a4c44f3..0000000000 --- a/sysdeps/x86_64/multiarch/strcasestr-c.c +++ /dev/null @@ -1,19 +0,0 @@ -/* Multiple versions of strcasestr - All versions must be listed in ifunc-impl-list.c. */ - -#include "init-arch.h" - -#define STRCASESTR __strcasestr_sse2 - -#include "string/strcasestr.c" - -extern char *__strcasestr_sse42 (const char *, const char *) attribute_hidden; -extern __typeof (__strcasestr_sse2) __strcasestr_sse2 attribute_hidden; - -#if 1 -libc_ifunc (__strcasestr, - HAS_SSE4_2 ? __strcasestr_sse42 : __strcasestr_sse2); -#else -libc_ifunc (__strcasestr, - 0 ? __strcasestr_sse42 : __strcasestr_sse2); -#endif diff --git a/sysdeps/x86_64/multiarch/strcasestr-nonascii.c b/sysdeps/x86_64/multiarch/strcasestr-nonascii.c deleted file mode 100644 index 032a6420d6..0000000000 --- a/sysdeps/x86_64/multiarch/strcasestr-nonascii.c +++ /dev/null @@ -1,50 +0,0 @@ -/* strstr with SSE4.2 intrinsics - Copyright (C) 2010-2013 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <ctype.h> -#include <xmmintrin.h> - - -/* Similar to __m128i_strloadu. Convert to lower case for none-POSIX/C - locale. */ -static __m128i -__m128i_strloadu_tolower (const unsigned char *p) -{ - union - { - char b[16]; - __m128i x; - } u; - - for (int i = 0; i < 16; ++i) - if (p[i] == 0) - { - u.b[i] = 0; - break; - } - else - u.b[i] = tolower (p[i]); - - return u.x; -} - - -#define STRCASESTR_NONASCII -#define USE_AS_STRCASESTR -#define STRSTR_SSE42 __strcasestr_sse42_nonascii -#include "strstr.c" diff --git a/sysdeps/x86_64/multiarch/strcasestr.c b/sysdeps/x86_64/multiarch/strcasestr.c index d1cfb3b264..834e656a2c 100644 --- a/sysdeps/x86_64/multiarch/strcasestr.c +++ b/sysdeps/x86_64/multiarch/strcasestr.c @@ -1,7 +1,13 @@ -extern char *__strcasestr_sse42_nonascii (const unsigned char *s1, - const unsigned char *s2) - attribute_hidden; +/* Multiple versions of strcasestr + All versions must be listed in ifunc-impl-list.c. */ -#define USE_AS_STRCASESTR -#define STRSTR_SSE42 __strcasestr_sse42 -#include "strstr.c" +#include "init-arch.h" + +#define STRCASESTR __strcasestr_sse2 + +#include "string/strcasestr.c" + +extern __typeof (__strcasestr_sse2) __strcasestr_sse2 attribute_hidden; + +libc_ifunc (__strcasestr, + __strcasestr_sse2); diff --git a/sysdeps/x86_64/multiarch/strchr.S b/sysdeps/x86_64/multiarch/strchr.S index f170238b55..3f0b2c5f5a 100644 --- a/sysdeps/x86_64/multiarch/strchr.S +++ b/sysdeps/x86_64/multiarch/strchr.S @@ -29,12 +29,6 @@ ENTRY(strchr) jne 1f call __init_cpu_features 1: leaq __strchr_sse2(%rip), %rax - testl $bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip) - jnz 2f - testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) - jz 2f - leaq __strchr_sse42(%rip), %rax - ret 2: testl $bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip) jz 3f leaq __strchr_sse2_no_bsf(%rip), %rax @@ -42,127 +36,6 @@ ENTRY(strchr) END(strchr) -/* - This implementation uses SSE4 instructions to compare up to 16 bytes - at a time looking for the first occurrence of the character c in the - string s: - - char *strchr (const char *s, int c); - - We use 0xa: - _SIDD_SBYTE_OPS - | _SIDD_CMP_EQUAL_EACH - | _SIDD_LEAST_SIGNIFICANT - on pcmpistri to compare xmm/mem128 - - 0 1 2 3 4 5 6 7 8 9 A B C D E F - X X X X X X X X X X X X X X X X - - against xmm - - 0 1 2 3 4 5 6 7 8 9 A B C D E F - C C C C C C C C C C C C C C C C - - to find out if the first 16byte data element has a byte C and the - offset of the first byte. There are 3 cases: - - 1. The first 16byte data element has the byte C at the offset X. - 2. The first 16byte data element has EOS and doesn't have the byte C. - 3. The first 16byte data element is valid and doesn't have the byte C. - - Here is the table of ECX, CFlag, ZFlag and SFlag for 3 cases: - - case ECX CFlag ZFlag SFlag - 1 X 1 0/1 0 - 2 16 0 1 0 - 3 16 0 0 0 - - We exit from the loop for cases 1 and 2 with jbe which branches - when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset - X for case 1. */ - - .section .text.sse4.2,"ax",@progbits - .align 16 - .type __strchr_sse42, @function - .globl __strchr_sse42 - .hidden __strchr_sse42 -__strchr_sse42: - cfi_startproc - CALL_MCOUNT - testb %sil, %sil - je __strend_sse4 - pxor %xmm2, %xmm2 - movd %esi, %xmm1 - movl %edi, %ecx - pshufb %xmm2, %xmm1 - andl $15, %ecx - movq %rdi, %r8 - je L(aligned_start) - -/* Handle unaligned string. */ - andq $-16, %r8 - movdqa (%r8), %xmm0 - pcmpeqb %xmm0, %xmm2 - pcmpeqb %xmm1, %xmm0 - /* Find where NULL is. */ - pmovmskb %xmm2, %edx - /* Check if there is a match. */ - pmovmskb %xmm0, %esi - /* Remove the leading bytes. */ - sarl %cl, %edx - sarl %cl, %esi - testl %esi, %esi - je L(unaligned_no_match) - /* Check which byte is a match. */ - bsfl %esi, %eax - /* Is there a NULL? */ - testl %edx, %edx - je L(unaligned_match) - bsfl %edx, %esi - cmpl %esi, %eax - /* Return NULL if NULL comes first. */ - ja L(return_null) -L(unaligned_match): - addq %rdi, %rax - ret - - .p2align 4 -L(unaligned_no_match): - testl %edx, %edx - jne L(return_null) - -/* Loop start on aligned string. */ -L(loop): - addq $16, %r8 -L(aligned_start): - pcmpistri $0x2, (%r8), %xmm1 - jbe L(wrap) - addq $16, %r8 - pcmpistri $0x2, (%r8), %xmm1 - jbe L(wrap) - addq $16, %r8 - pcmpistri $0x2, (%r8), %xmm1 - jbe L(wrap) - addq $16, %r8 - pcmpistri $0x2, (%r8), %xmm1 - jbe L(wrap) - jmp L(loop) -L(wrap): - jc L(loop_exit) - -/* Return NULL. */ -L(return_null): - xorl %eax, %eax - ret - -/* Loop exit. */ - .p2align 4 -L(loop_exit): - leaq (%r8,%rcx), %rax - ret - cfi_endproc - .size __strchr_sse42, .-__strchr_sse42 - # undef ENTRY # define ENTRY(name) \ diff --git a/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S new file mode 100644 index 0000000000..4a8e57a243 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S @@ -0,0 +1,209 @@ +/* strcmp with unaligned loads + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "sysdep.h" + +ENTRY ( __strcmp_sse2_unaligned) + movl %edi, %eax + xorl %edx, %edx + pxor %xmm7, %xmm7 + orl %esi, %eax + andl $4095, %eax + cmpl $4032, %eax + jg L(cross_page) + movdqu (%rdi), %xmm1 + movdqu (%rsi), %xmm0 + pcmpeqb %xmm1, %xmm0 + pminub %xmm1, %xmm0 + pxor %xmm1, %xmm1 + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm0, %eax + testq %rax, %rax + je L(next_48_bytes) +L(return): + bsfq %rax, %rdx + movzbl (%rdi, %rdx), %eax + movzbl (%rsi, %rdx), %edx + subl %edx, %eax + ret + + .p2align 4 +L(next_48_bytes): + movdqu 16(%rdi), %xmm6 + movdqu 16(%rsi), %xmm3 + movdqu 32(%rdi), %xmm5 + pcmpeqb %xmm6, %xmm3 + movdqu 32(%rsi), %xmm2 + pminub %xmm6, %xmm3 + pcmpeqb %xmm1, %xmm3 + movdqu 48(%rdi), %xmm4 + pcmpeqb %xmm5, %xmm2 + pmovmskb %xmm3, %edx + movdqu 48(%rsi), %xmm0 + pminub %xmm5, %xmm2 + pcmpeqb %xmm1, %xmm2 + pcmpeqb %xmm4, %xmm0 + pmovmskb %xmm2, %eax + salq $16, %rdx + pminub %xmm4, %xmm0 + pcmpeqb %xmm1, %xmm0 + salq $32, %rax + orq %rdx, %rax + pmovmskb %xmm0, %ecx + movq %rcx, %rdx + salq $48, %rdx + orq %rdx, %rax + jne L(return) +L(main_loop_header): + leaq 64(%rdi), %rdx + movl $4096, %ecx + pxor %xmm9, %xmm9 + andq $-64, %rdx + subq %rdi, %rdx + leaq (%rdi, %rdx), %rax + addq %rsi, %rdx + movq %rdx, %rsi + andl $4095, %esi + subq %rsi, %rcx + shrq $6, %rcx + movq %rcx, %rsi + jmp L(loop_start) + + .p2align 4 +L(loop): + addq $64, %rax + addq $64, %rdx +L(loop_start): + testq %rsi, %rsi + leaq -1(%rsi), %rsi + je L(loop_cross_page) +L(back_to_loop): + movdqu (%rdx), %xmm0 + movdqu 16(%rdx), %xmm1 + movdqa (%rax), %xmm2 + movdqa 16(%rax), %xmm3 + pcmpeqb %xmm2, %xmm0 + movdqu 32(%rdx), %xmm5 + pcmpeqb %xmm3, %xmm1 + pminub %xmm2, %xmm0 + movdqu 48(%rdx), %xmm6 + pminub %xmm3, %xmm1 + movdqa 32(%rax), %xmm2 + pminub %xmm1, %xmm0 + movdqa 48(%rax), %xmm3 + pcmpeqb %xmm2, %xmm5 + pcmpeqb %xmm3, %xmm6 + pminub %xmm2, %xmm5 + pminub %xmm3, %xmm6 + pminub %xmm5, %xmm0 + pminub %xmm6, %xmm0 + pcmpeqb %xmm7, %xmm0 + pmovmskb %xmm0, %ecx + testl %ecx, %ecx + je L(loop) + pcmpeqb %xmm7, %xmm5 + movdqu (%rdx), %xmm0 + pcmpeqb %xmm7, %xmm1 + movdqa (%rax), %xmm2 + pcmpeqb %xmm2, %xmm0 + pminub %xmm2, %xmm0 + pcmpeqb %xmm7, %xmm6 + pcmpeqb %xmm7, %xmm0 + pmovmskb %xmm1, %ecx + pmovmskb %xmm5, %r8d + pmovmskb %xmm0, %edi + salq $16, %rcx + salq $32, %r8 + pmovmskb %xmm6, %esi + orq %r8, %rcx + orq %rdi, %rcx + salq $48, %rsi + orq %rsi, %rcx + bsfq %rcx, %rcx + movzbl (%rax, %rcx), %eax + movzbl (%rdx, %rcx), %edx + subl %edx, %eax + ret + + .p2align 4 +L(loop_cross_page): + xor %r10, %r10 + movq %rdx, %r9 + and $63, %r9 + subq %r9, %r10 + + movdqa (%rdx, %r10), %xmm0 + movdqa 16(%rdx, %r10), %xmm1 + movdqu (%rax, %r10), %xmm2 + movdqu 16(%rax, %r10), %xmm3 + pcmpeqb %xmm2, %xmm0 + movdqa 32(%rdx, %r10), %xmm5 + pcmpeqb %xmm3, %xmm1 + pminub %xmm2, %xmm0 + movdqa 48(%rdx, %r10), %xmm6 + pminub %xmm3, %xmm1 + movdqu 32(%rax, %r10), %xmm2 + movdqu 48(%rax, %r10), %xmm3 + pcmpeqb %xmm2, %xmm5 + pcmpeqb %xmm3, %xmm6 + pminub %xmm2, %xmm5 + pminub %xmm3, %xmm6 + + pcmpeqb %xmm7, %xmm0 + pcmpeqb %xmm7, %xmm1 + pcmpeqb %xmm7, %xmm5 + pcmpeqb %xmm7, %xmm6 + + pmovmskb %xmm1, %ecx + pmovmskb %xmm5, %r8d + pmovmskb %xmm0, %edi + salq $16, %rcx + salq $32, %r8 + pmovmskb %xmm6, %esi + orq %r8, %rdi + orq %rcx, %rdi + salq $48, %rsi + orq %rsi, %rdi + movq %r9, %rcx + movq $63, %rsi + shrq %cl, %rdi + test %rdi, %rdi + je L(back_to_loop) + bsfq %rdi, %rcx + movzbl (%rax, %rcx), %eax + movzbl (%rdx, %rcx), %edx + subl %edx, %eax + ret + + .p2align 4 +L(cross_page_loop): + cmpb %cl, %al + jne L(different) + addq $1, %rdx + cmpq $64, %rdx + je L(main_loop_header) +L(cross_page): + movzbl (%rdi, %rdx), %eax + movzbl (%rsi, %rdx), %ecx + testb %al, %al + jne L(cross_page_loop) + xorl %eax, %eax +L(different): + subl %ecx, %eax + ret +END (__strcmp_sse2_unaligned) diff --git a/sysdeps/x86_64/multiarch/strcmp.S b/sysdeps/x86_64/multiarch/strcmp.S index 1d4d711838..c5dcd1aa5e 100644 --- a/sysdeps/x86_64/multiarch/strcmp.S +++ b/sysdeps/x86_64/multiarch/strcmp.S @@ -66,6 +66,7 @@ # define STRCMP_SSE2 __strncasecmp_l_sse2 # define __GI_STRCMP __GI___strncasecmp_l #else +# define USE_AS_STRCMP # define UPDATE_STRNCMP_COUNTER # ifndef STRCMP # define STRCMP strcmp @@ -88,11 +89,17 @@ ENTRY(STRCMP) jne 1f call __init_cpu_features 1: +#ifdef USE_AS_STRCMP + leaq __strcmp_sse2_unaligned(%rip), %rax + testl $bit_Fast_Unaligned_Load, __cpu_features+CPUID_OFFSET+index_Fast_Unaligned_Load(%rip) + jnz 3f +#else testl $bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip) jnz 2f leaq STRCMP_SSE42(%rip), %rax testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) jnz 3f +#endif 2: leaq STRCMP_SSSE3(%rip), %rax testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip) jnz 3f diff --git a/sysdeps/x86_64/multiarch/strend-sse4.S b/sysdeps/x86_64/multiarch/strend-sse4.S deleted file mode 100644 index c5a7ae28a6..0000000000 --- a/sysdeps/x86_64/multiarch/strend-sse4.S +++ /dev/null @@ -1,48 +0,0 @@ -/* Return the pointer to the end of string, using SSE4.2 - Copyright (C) 2009-2013 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - - .section .text.sse4.2,"ax",@progbits -ENTRY (__strend_sse4) - pxor %xmm2, %xmm2 - movq %rdi, %rcx - andq $~15, %rdi - movdqa %xmm2, %xmm1 - pcmpeqb (%rdi), %xmm2 - orl $0xffffffff, %esi - subq %rdi, %rcx - shll %cl, %esi - pmovmskb %xmm2, %edx - andl %esi, %edx - jnz 1f - -2: pcmpistri $0x08, 16(%rdi), %xmm1 - leaq 16(%rdi), %rdi - jnz 2b - - leaq (%rdi,%rcx), %rax - ret - -1: bsfl %edx, %eax - addq %rdi, %rax - ret - -END (__strend_sse4) diff --git a/sysdeps/x86_64/multiarch/strrchr-sse2-no-bsf.S b/sysdeps/x86_64/multiarch/strrchr-sse2-no-bsf.S deleted file mode 100644 index fcef610dbc..0000000000 --- a/sysdeps/x86_64/multiarch/strrchr-sse2-no-bsf.S +++ /dev/null @@ -1,555 +0,0 @@ -/* strrchr with SSE2 without bsf and bsr - Copyright (C) 2011-2013 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if defined SHARED && !defined NOT_IN_libc - -# include <sysdep.h> -# include "asm-syntax.h" - - atom_text_section -ENTRY (__strrchr_sse2_no_bsf) - - movd %rsi, %xmm1 - pxor %xmm2, %xmm2 - mov %rdi, %rcx - punpcklbw %xmm1, %xmm1 - punpcklbw %xmm1, %xmm1 - /* ECX has OFFSET. */ - and $63, %rcx - cmp $48, %rcx - pshufd $0, %xmm1, %xmm1 - ja L(crosscache) - -/* unaligned string. */ - movdqu (%rdi), %xmm0 - pcmpeqb %xmm0, %xmm2 - pcmpeqb %xmm1, %xmm0 - /* Find where NULL is. */ - pmovmskb %xmm2, %rcx - /* Check if there is a match. */ - pmovmskb %xmm0, %rax - add $16, %rdi - - test %rax, %rax - jnz L(unaligned_match1) - - test %rcx, %rcx - jnz L(return_null) - - and $-16, %rdi - xor %r8, %r8 - jmp L(loop) - - .p2align 4 -L(unaligned_match1): - test %rcx, %rcx - jnz L(prolog_find_zero_1) - - mov %rax, %r8 - mov %rdi, %rsi - and $-16, %rdi - jmp L(loop) - - .p2align 4 -L(crosscache): -/* Hancle unaligned string. */ - and $15, %rcx - and $-16, %rdi - pxor %xmm3, %xmm3 - movdqa (%rdi), %xmm0 - pcmpeqb %xmm0, %xmm3 - pcmpeqb %xmm1, %xmm0 - /* Find where NULL is. */ - pmovmskb %xmm3, %rdx - /* Check if there is a match. */ - pmovmskb %xmm0, %rax - /* Remove the leading bytes. */ - shr %cl, %rdx - shr %cl, %rax - add $16, %rdi - - test %rax, %rax - jnz L(unaligned_match) - - test %rdx, %rdx - jnz L(return_null) - - xor %r8, %r8 - jmp L(loop) - - .p2align 4 -L(unaligned_match): - test %rdx, %rdx - jnz L(prolog_find_zero) - - mov %rax, %r8 - lea (%rdi, %rcx), %rsi - -/* Loop start on aligned string. */ - .p2align 4 -L(loop): - movdqa (%rdi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %rdi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %rcx - pmovmskb %xmm0, %rax - or %rax, %rcx - jnz L(matches) - - movdqa (%rdi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %rdi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %rcx - pmovmskb %xmm0, %rax - or %rax, %rcx - jnz L(matches) - - movdqa (%rdi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %rdi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %rcx - pmovmskb %xmm0, %rax - or %rax, %rcx - jnz L(matches) - - movdqa (%rdi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %rdi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %rcx - pmovmskb %xmm0, %rax - or %rax, %rcx - jz L(loop) - -L(matches): - test %rax, %rax - jnz L(match) -L(return_value): - test %r8, %r8 - jz L(return_null) - mov %r8, %rax - mov %rsi, %rdi - jmp L(match_exit) - - .p2align 4 -L(match): - pmovmskb %xmm2, %rcx - test %rcx, %rcx - jnz L(find_zero) - mov %rax, %r8 - mov %rdi, %rsi - jmp L(loop) - - .p2align 4 -L(find_zero): - test %cl, %cl - jz L(find_zero_high) - mov %cl, %dl - and $15, %dl - jz L(find_zero_8) - test $0x01, %cl - jnz L(FindZeroExit1) - test $0x02, %cl - jnz L(FindZeroExit2) - test $0x04, %cl - jnz L(FindZeroExit3) - and $1 << 4 - 1, %rax - jz L(return_value) - jmp L(match_exit) - - .p2align 4 -L(find_zero_8): - test $0x10, %cl - jnz L(FindZeroExit5) - test $0x20, %cl - jnz L(FindZeroExit6) - test $0x40, %cl - jnz L(FindZeroExit7) - and $1 << 8 - 1, %rax - jz L(return_value) - jmp L(match_exit) - - .p2align 4 -L(find_zero_high): - mov %ch, %dh - and $15, %dh - jz L(find_zero_high_8) - test $0x01, %ch - jnz L(FindZeroExit9) - test $0x02, %ch - jnz L(FindZeroExit10) - test $0x04, %ch - jnz L(FindZeroExit11) - and $1 << 12 - 1, %rax - jz L(return_value) - jmp L(match_exit) - - .p2align 4 -L(find_zero_high_8): - test $0x10, %ch - jnz L(FindZeroExit13) - test $0x20, %ch - jnz L(FindZeroExit14) - test $0x40, %ch - jnz L(FindZeroExit15) - and $1 << 16 - 1, %rax - jz L(return_value) - jmp L(match_exit) - - .p2align 4 -L(FindZeroExit1): - and $1, %rax - jz L(return_value) - jmp L(match_exit) - - .p2align 4 -L(FindZeroExit2): - and $1 << 2 - 1, %rax - jz L(return_value) - jmp L(match_exit) - - .p2align 4 -L(FindZeroExit3): - and $1 << 3 - 1, %rax - jz L(return_value) - jmp L(match_exit) - - .p2align 4 -L(FindZeroExit5): - and $1 << 5 - 1, %rax - jz L(return_value) - jmp L(match_exit) - - .p2align 4 -L(FindZeroExit6): - and $1 << 6 - 1, %rax - jz L(return_value) - jmp L(match_exit) - - .p2align 4 -L(FindZeroExit7): - and $1 << 7 - 1, %rax - jz L(return_value) - jmp L(match_exit) - - .p2align 4 -L(FindZeroExit9): - and $1 << 9 - 1, %rax - jz L(return_value) - jmp L(match_exit) - - .p2align 4 -L(FindZeroExit10): - and $1 << 10 - 1, %rax - jz L(return_value) - jmp L(match_exit) - - .p2align 4 -L(FindZeroExit11): - and $1 << 11 - 1, %rax - jz L(return_value) - jmp L(match_exit) - - .p2align 4 -L(FindZeroExit13): - and $1 << 13 - 1, %rax - jz L(return_value) - jmp L(match_exit) - - .p2align 4 -L(FindZeroExit14): - and $1 << 14 - 1, %rax - jz L(return_value) - jmp L(match_exit) - - .p2align 4 -L(FindZeroExit15): - and $1 << 15 - 1, %rax - jz L(return_value) - - .p2align 4 -L(match_exit): - test %ah, %ah - jnz L(match_exit_high) - mov %al, %dl - and $15 << 4, %dl - jnz L(match_exit_8) - test $0x08, %al - jnz L(Exit4) - test $0x04, %al - jnz L(Exit3) - test $0x02, %al - jnz L(Exit2) - lea -16(%rdi), %rax - ret - - .p2align 4 -L(match_exit_8): - test $0x80, %al - jnz L(Exit8) - test $0x40, %al - jnz L(Exit7) - test $0x20, %al - jnz L(Exit6) - lea -12(%rdi), %rax - ret - - .p2align 4 -L(match_exit_high): - mov %ah, %dh - and $15 << 4, %dh - jnz L(match_exit_high_8) - test $0x08, %ah - jnz L(Exit12) - test $0x04, %ah - jnz L(Exit11) - test $0x02, %ah - jnz L(Exit10) - lea -8(%rdi), %rax - ret - - .p2align 4 -L(match_exit_high_8): - test $0x80, %ah - jnz L(Exit16) - test $0x40, %ah - jnz L(Exit15) - test $0x20, %ah - jnz L(Exit14) - lea -4(%rdi), %rax - ret - - .p2align 4 -L(Exit2): - lea -15(%rdi), %rax - ret - - .p2align 4 -L(Exit3): - lea -14(%rdi), %rax - ret - - .p2align 4 -L(Exit4): - lea -13(%rdi), %rax - ret - - .p2align 4 -L(Exit6): - lea -11(%rdi), %rax - ret - - .p2align 4 -L(Exit7): - lea -10(%rdi), %rax - ret - - .p2align 4 -L(Exit8): - lea -9(%rdi), %rax - ret - - .p2align 4 -L(Exit10): - lea -7(%rdi), %rax - ret - - .p2align 4 -L(Exit11): - lea -6(%rdi), %rax - ret - - .p2align 4 -L(Exit12): - lea -5(%rdi), %rax - ret - - .p2align 4 -L(Exit14): - lea -3(%rdi), %rax - ret - - .p2align 4 -L(Exit15): - lea -2(%rdi), %rax - ret - - .p2align 4 -L(Exit16): - lea -1(%rdi), %rax - ret - -/* Return NULL. */ - .p2align 4 -L(return_null): - xor %rax, %rax - ret - - .p2align 4 -L(prolog_find_zero): - add %rcx, %rdi - mov %rdx, %rcx -L(prolog_find_zero_1): - test %cl, %cl - jz L(prolog_find_zero_high) - mov %cl, %dl - and $15, %dl - jz L(prolog_find_zero_8) - test $0x01, %cl - jnz L(PrologFindZeroExit1) - test $0x02, %cl - jnz L(PrologFindZeroExit2) - test $0x04, %cl - jnz L(PrologFindZeroExit3) - and $1 << 4 - 1, %rax - jnz L(match_exit) - xor %rax, %rax - ret - - .p2align 4 -L(prolog_find_zero_8): - test $0x10, %cl - jnz L(PrologFindZeroExit5) - test $0x20, %cl - jnz L(PrologFindZeroExit6) - test $0x40, %cl - jnz L(PrologFindZeroExit7) - and $1 << 8 - 1, %rax - jnz L(match_exit) - xor %rax, %rax - ret - - .p2align 4 -L(prolog_find_zero_high): - mov %ch, %dh - and $15, %dh - jz L(prolog_find_zero_high_8) - test $0x01, %ch - jnz L(PrologFindZeroExit9) - test $0x02, %ch - jnz L(PrologFindZeroExit10) - test $0x04, %ch - jnz L(PrologFindZeroExit11) - and $1 << 12 - 1, %rax - jnz L(match_exit) - xor %rax, %rax - ret - - .p2align 4 -L(prolog_find_zero_high_8): - test $0x10, %ch - jnz L(PrologFindZeroExit13) - test $0x20, %ch - jnz L(PrologFindZeroExit14) - test $0x40, %ch - jnz L(PrologFindZeroExit15) - and $1 << 16 - 1, %rax - jnz L(match_exit) - xor %rax, %rax - ret - - .p2align 4 -L(PrologFindZeroExit1): - and $1, %rax - jnz L(match_exit) - xor %rax, %rax - ret - - .p2align 4 -L(PrologFindZeroExit2): - and $1 << 2 - 1, %rax - jnz L(match_exit) - xor %rax, %rax - ret - - .p2align 4 -L(PrologFindZeroExit3): - and $1 << 3 - 1, %rax - jnz L(match_exit) - xor %rax, %rax - ret - - .p2align 4 -L(PrologFindZeroExit5): - and $1 << 5 - 1, %rax - jnz L(match_exit) - xor %rax, %rax - ret - - .p2align 4 -L(PrologFindZeroExit6): - and $1 << 6 - 1, %rax - jnz L(match_exit) - xor %rax, %rax - ret - - .p2align 4 -L(PrologFindZeroExit7): - and $1 << 7 - 1, %rax - jnz L(match_exit) - xor %rax, %rax - ret - - .p2align 4 -L(PrologFindZeroExit9): - and $1 << 9 - 1, %rax - jnz L(match_exit) - xor %rax, %rax - ret - - .p2align 4 -L(PrologFindZeroExit10): - and $1 << 10 - 1, %rax - jnz L(match_exit) - xor %rax, %rax - ret - - .p2align 4 -L(PrologFindZeroExit11): - and $1 << 11 - 1, %rax - jnz L(match_exit) - xor %rax, %rax - ret - - .p2align 4 -L(PrologFindZeroExit13): - and $1 << 13 - 1, %rax - jnz L(match_exit) - xor %rax, %rax - ret - - .p2align 4 -L(PrologFindZeroExit14): - and $1 << 14 - 1, %rax - jnz L(match_exit) - xor %rax, %rax - ret - - .p2align 4 -L(PrologFindZeroExit15): - and $1 << 15 - 1, %rax - jnz L(match_exit) - xor %rax, %rax - ret - -END (__strrchr_sse2_no_bsf) -#endif diff --git a/sysdeps/x86_64/multiarch/strrchr.S b/sysdeps/x86_64/multiarch/strrchr.S deleted file mode 100644 index 3f92a41ef9..0000000000 --- a/sysdeps/x86_64/multiarch/strrchr.S +++ /dev/null @@ -1,288 +0,0 @@ -/* Multiple versions of strrchr - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2009-2013 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - -/* Define multiple versions only for the definition in libc and for - the DSO. In static binaries we need strrchr before the initialization - happened. */ -#if defined SHARED && !defined NOT_IN_libc - .text -ENTRY(strrchr) - .type strrchr, @gnu_indirect_function - cmpl $0, __cpu_features+KIND_OFFSET(%rip) - jne 1f - call __init_cpu_features -1: leaq __strrchr_sse2(%rip), %rax - testl $bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip) - jnz 2f - testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) - jz 2f - leaq __strrchr_sse42(%rip), %rax - ret -2: testl $bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip) - jz 3f - leaq __strrchr_sse2_no_bsf(%rip), %rax -3: ret -END(strrchr) - -/* - This implementation uses SSE4 instructions to compare up to 16 bytes - at a time looking for the last occurrence of the character c in the - string s: - - char *strrchr (const char *s, int c); - - We use 0x4a: - _SIDD_SBYTE_OPS - | _SIDD_CMP_EQUAL_EACH - | _SIDD_MOST_SIGNIFICANT - on pcmpistri to compare xmm/mem128 - - 0 1 2 3 4 5 6 7 8 9 A B C D E F - X X X X X X X X X X X X X X X X - - against xmm - - 0 1 2 3 4 5 6 7 8 9 A B C D E F - C C C C C C C C C C C C C C C C - - to find out if the first 16byte data element has a byte C and the - last offset. There are 4 cases: - - 1. The first 16byte data element has EOS and has the byte C at the - last offset X. - 2. The first 16byte data element is valid and has the byte C at the - last offset X. - 3. The first 16byte data element has EOS and doesn't have the byte C. - 4. The first 16byte data element is valid and doesn't have the byte C. - - Here is the table of ECX, CFlag, ZFlag and SFlag for 3 cases: - - case ECX CFlag ZFlag SFlag - 1 X 1 1 0 - 2 X 1 0 0 - 3 16 0 1 0 - 4 16 0 0 0 - - We exit from the loop for cases 1 and 3 with jz which branches - when ZFlag is 1. If CFlag == 1, ECX has the offset X for case 1. */ - - - .section .text.sse4.2,"ax",@progbits - .align 16 - .type __strrchr_sse42, @function - .globl __strrchr_sse42 - .hidden __strrchr_sse42 -__strrchr_sse42: - cfi_startproc - CALL_MCOUNT - testb %sil, %sil - je __strend_sse4 - xor %eax,%eax /* RAX has the last occurrence of s. */ - movd %esi, %xmm1 - punpcklbw %xmm1, %xmm1 - movl %edi, %esi - punpcklbw %xmm1, %xmm1 - andl $15, %esi - pshufd $0, %xmm1, %xmm1 - movq %rdi, %r8 - je L(loop) - -/* Handle unaligned string using psrldq. */ - leaq L(psrldq_table)(%rip), %rdx - andq $-16, %r8 - movslq (%rdx,%rsi,4),%r9 - movdqa (%r8), %xmm0 - addq %rdx, %r9 - jmp *%r9 - -/* Handle unaligned string with offset 1 using psrldq. */ - .p2align 4 -L(psrldq_1): - psrldq $1, %xmm0 - - .p2align 4 -L(unaligned_pcmpistri): - pcmpistri $0x4a, %xmm1, %xmm0 - jnc L(unaligned_no_byte) - leaq (%rdi,%rcx), %rax -L(unaligned_no_byte): - /* Find the length of the unaligned string. */ - pcmpistri $0x3a, %xmm0, %xmm0 - movl $16, %edx - subl %esi, %edx - cmpl %ecx, %edx - /* Return RAX if the unaligned fragment to next 16B already - contain the NULL terminator. */ - jg L(exit) - addq $16, %r8 - -/* Loop start on aligned string. */ - .p2align 4 -L(loop): - pcmpistri $0x4a, (%r8), %xmm1 - jbe L(match_or_eos) - addq $16, %r8 - jmp L(loop) - .p2align 4 -L(match_or_eos): - je L(had_eos) -L(match_no_eos): - leaq (%r8,%rcx), %rax - addq $16, %r8 - jmp L(loop) - .p2align 4 -L(had_eos): - jnc L(exit) - leaq (%r8,%rcx), %rax - .p2align 4 -L(exit): - ret - -/* Handle unaligned string with offset 15 using psrldq. */ - .p2align 4 -L(psrldq_15): - psrldq $15, %xmm0 - jmp L(unaligned_pcmpistri) - -/* Handle unaligned string with offset 14 using psrldq. */ - .p2align 4 -L(psrldq_14): - psrldq $14, %xmm0 - jmp L(unaligned_pcmpistri) - -/* Handle unaligned string with offset 13 using psrldq. */ - .p2align 4 -L(psrldq_13): - psrldq $13, %xmm0 - jmp L(unaligned_pcmpistri) - -/* Handle unaligned string with offset 12 using psrldq. */ - .p2align 4 -L(psrldq_12): - psrldq $12, %xmm0 - jmp L(unaligned_pcmpistri) - -/* Handle unaligned string with offset 11 using psrldq. */ - .p2align 4 -L(psrldq_11): - psrldq $11, %xmm0 - jmp L(unaligned_pcmpistri) - -/* Handle unaligned string with offset 10 using psrldq. */ - .p2align 4 -L(psrldq_10): - psrldq $10, %xmm0 - jmp L(unaligned_pcmpistri) - -/* Handle unaligned string with offset 9 using psrldq. */ - .p2align 4 -L(psrldq_9): - psrldq $9, %xmm0 - jmp L(unaligned_pcmpistri) - -/* Handle unaligned string with offset 8 using psrldq. */ - .p2align 4 -L(psrldq_8): - psrldq $8, %xmm0 - jmp L(unaligned_pcmpistri) - -/* Handle unaligned string with offset 7 using psrldq. */ - .p2align 4 -L(psrldq_7): - psrldq $7, %xmm0 - jmp L(unaligned_pcmpistri) - -/* Handle unaligned string with offset 6 using psrldq. */ - .p2align 4 -L(psrldq_6): - psrldq $6, %xmm0 - jmp L(unaligned_pcmpistri) - -/* Handle unaligned string with offset 5 using psrldq. */ - .p2align 4 -L(psrldq_5): - psrldq $5, %xmm0 - jmp L(unaligned_pcmpistri) - -/* Handle unaligned string with offset 4 using psrldq. */ - .p2align 4 -L(psrldq_4): - psrldq $4, %xmm0 - jmp L(unaligned_pcmpistri) - -/* Handle unaligned string with offset 3 using psrldq. */ - .p2align 4 -L(psrldq_3): - psrldq $3, %xmm0 - jmp L(unaligned_pcmpistri) - -/* Handle unaligned string with offset 2 using psrldq. */ - .p2align 4 -L(psrldq_2): - psrldq $2, %xmm0 - jmp L(unaligned_pcmpistri) - - cfi_endproc - .size __strrchr_sse42, .-__strrchr_sse42 - - .section .rodata.sse4.2,"a",@progbits - .p2align 4 -L(psrldq_table): - .int L(loop) - L(psrldq_table) - .int L(psrldq_1) - L(psrldq_table) - .int L(psrldq_2) - L(psrldq_table) - .int L(psrldq_3) - L(psrldq_table) - .int L(psrldq_4) - L(psrldq_table) - .int L(psrldq_5) - L(psrldq_table) - .int L(psrldq_6) - L(psrldq_table) - .int L(psrldq_7) - L(psrldq_table) - .int L(psrldq_8) - L(psrldq_table) - .int L(psrldq_9) - L(psrldq_table) - .int L(psrldq_10) - L(psrldq_table) - .int L(psrldq_11) - L(psrldq_table) - .int L(psrldq_12) - L(psrldq_table) - .int L(psrldq_13) - L(psrldq_table) - .int L(psrldq_14) - L(psrldq_table) - .int L(psrldq_15) - L(psrldq_table) - - -# undef ENTRY -# define ENTRY(name) \ - .type __strrchr_sse2, @function; \ - .align 16; \ - .globl __strrchr_sse2; \ - .hidden __strrchr_sse2; \ - __strrchr_sse2: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __strrchr_sse2, .-__strrchr_sse2 -# undef libc_hidden_builtin_def -/* It doesn't make sense to send libc-internal strrchr calls through a PLT. - The speedup we get from using SSE4.2 instruction is likely eaten away - by the indirect call in the PLT. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_strrchr; __GI_strrchr = __strrchr_sse2 -#endif - -#include "../strrchr.S" diff --git a/sysdeps/x86_64/multiarch/strstr-c.c b/sysdeps/x86_64/multiarch/strstr-c.c deleted file mode 100644 index 42bbe48172..0000000000 --- a/sysdeps/x86_64/multiarch/strstr-c.c +++ /dev/null @@ -1,47 +0,0 @@ -/* Multiple versions of strstr. - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2012-2013 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* Redefine strstr so that the compiler won't complain about the type - mismatch with the IFUNC selector in strong_alias, below. */ -#undef strstr -#define strstr __redirect_strstr -#include <string.h> -#undef strstr - -#define STRSTR __strstr_sse2 -#ifdef SHARED -# undef libc_hidden_builtin_def -# define libc_hidden_builtin_def(name) \ - __hidden_ver1 (__strstr_sse2, __GI_strstr, __strstr_sse2); -#endif - -#include "string/strstr.c" - -extern __typeof (__redirect_strstr) __strstr_sse42 attribute_hidden; -extern __typeof (__redirect_strstr) __strstr_sse2 attribute_hidden; - -#include "init-arch.h" - -/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle - ifunc symbol properly. */ -extern __typeof (__redirect_strstr) __libc_strstr; -libc_ifunc (__libc_strstr, HAS_SSE4_2 ? __strstr_sse42 : __strstr_sse2) - -#undef strstr -strong_alias (__libc_strstr, strstr) diff --git a/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S new file mode 100644 index 0000000000..99bae2cc83 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S @@ -0,0 +1,374 @@ +/* strstr with unaligned loads + Copyright (C) 2009-2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY(__strstr_sse2_unaligned) + movzbl (%rsi), %eax + testb %al, %al + je L(empty) + movzbl 1(%rsi), %edx + testb %dl, %dl + je L(strchr) + movd %eax, %xmm1 + movd %edx, %xmm2 + movq %rdi, %rax + andl $4095, %eax + punpcklbw %xmm1, %xmm1 + cmpq $4031, %rax + punpcklbw %xmm2, %xmm2 + punpcklwd %xmm1, %xmm1 + punpcklwd %xmm2, %xmm2 + pshufd $0, %xmm1, %xmm1 + pshufd $0, %xmm2, %xmm2 + ja L(cross_page) + movdqu (%rdi), %xmm3 + pxor %xmm5, %xmm5 + movdqu 1(%rdi), %xmm4 + movdqa %xmm3, %xmm6 + pcmpeqb %xmm1, %xmm3 + pcmpeqb %xmm2, %xmm4 + movdqu 16(%rdi), %xmm0 + pcmpeqb %xmm5, %xmm6 + pminub %xmm4, %xmm3 + movdqa %xmm3, %xmm4 + movdqu 17(%rdi), %xmm3 + pcmpeqb %xmm0, %xmm5 + pcmpeqb %xmm2, %xmm3 + por %xmm6, %xmm4 + pcmpeqb %xmm1, %xmm0 + pminub %xmm3, %xmm0 + por %xmm5, %xmm0 + pmovmskb %xmm4, %r8d + pmovmskb %xmm0, %eax + salq $16, %rax + orq %rax, %r8 + je L(next_32_bytes) +L(next_pair_index): + bsf %r8, %rax + addq %rdi, %rax + cmpb $0, (%rax) + je L(zero1) + movzbl 2(%rsi), %edx + testb %dl, %dl + je L(found1) + cmpb 2(%rax), %dl + jne L(next_pair) + xorl %edx, %edx + jmp L(pair_loop_start) + + .p2align 4 +L(strchr): + movzbl %al, %esi + jmp __strchr_sse2 + + .p2align 4 +L(pair_loop): + addq $1, %rdx + cmpb 2(%rax,%rdx), %cl + jne L(next_pair) +L(pair_loop_start): + movzbl 3(%rsi,%rdx), %ecx + testb %cl, %cl + jne L(pair_loop) +L(found1): + ret +L(zero1): + xorl %eax, %eax + ret + + .p2align 4 +L(next_pair): + leaq -1(%r8), %rax + andq %rax, %r8 + jne L(next_pair_index) + + .p2align 4 +L(next_32_bytes): + movdqu 32(%rdi), %xmm3 + pxor %xmm5, %xmm5 + movdqu 33(%rdi), %xmm4 + movdqa %xmm3, %xmm6 + pcmpeqb %xmm1, %xmm3 + pcmpeqb %xmm2, %xmm4 + movdqu 48(%rdi), %xmm0 + pcmpeqb %xmm5, %xmm6 + pminub %xmm4, %xmm3 + movdqa %xmm3, %xmm4 + movdqu 49(%rdi), %xmm3 + pcmpeqb %xmm0, %xmm5 + pcmpeqb %xmm2, %xmm3 + por %xmm6, %xmm4 + pcmpeqb %xmm1, %xmm0 + pminub %xmm3, %xmm0 + por %xmm5, %xmm0 + pmovmskb %xmm4, %eax + salq $32, %rax + pmovmskb %xmm0, %r8d + salq $48, %r8 + orq %rax, %r8 + je L(loop_header) +L(next_pair2_index): + bsfq %r8, %rax + addq %rdi, %rax + cmpb $0, (%rax) + je L(zero2) + movzbl 2(%rsi), %edx + testb %dl, %dl + je L(found2) + cmpb 2(%rax), %dl + jne L(next_pair2) + xorl %edx, %edx + jmp L(pair_loop2_start) + + .p2align 4 +L(pair_loop2): + addq $1, %rdx + cmpb 2(%rax,%rdx), %cl + jne L(next_pair2) +L(pair_loop2_start): + movzbl 3(%rsi,%rdx), %ecx + testb %cl, %cl + jne L(pair_loop2) +L(found2): + ret + L(zero2): + xorl %eax, %eax + ret +L(empty): + mov %rdi, %rax + ret + + .p2align 4 +L(next_pair2): + leaq -1(%r8), %rax + andq %rax, %r8 + jne L(next_pair2_index) +L(loop_header): + movq $-512, %r11 + movq %rdi, %r9 + + pxor %xmm7, %xmm7 + andq $-64, %rdi + + .p2align 4 +L(loop): + movdqa 64(%rdi), %xmm3 + movdqu 63(%rdi), %xmm6 + movdqa %xmm3, %xmm0 + pxor %xmm2, %xmm3 + pxor %xmm1, %xmm6 + movdqa 80(%rdi), %xmm10 + por %xmm3, %xmm6 + pminub %xmm10, %xmm0 + movdqu 79(%rdi), %xmm3 + pxor %xmm2, %xmm10 + pxor %xmm1, %xmm3 + movdqa 96(%rdi), %xmm9 + por %xmm10, %xmm3 + pminub %xmm9, %xmm0 + pxor %xmm2, %xmm9 + movdqa 112(%rdi), %xmm8 + addq $64, %rdi + pminub %xmm6, %xmm3 + movdqu 31(%rdi), %xmm4 + pminub %xmm8, %xmm0 + pxor %xmm2, %xmm8 + pxor %xmm1, %xmm4 + por %xmm9, %xmm4 + pminub %xmm4, %xmm3 + movdqu 47(%rdi), %xmm5 + pxor %xmm1, %xmm5 + por %xmm8, %xmm5 + pminub %xmm5, %xmm3 + pminub %xmm3, %xmm0 + pcmpeqb %xmm7, %xmm0 + pmovmskb %xmm0, %eax + testl %eax, %eax + je L(loop) + pminub (%rdi), %xmm6 + pminub 32(%rdi),%xmm4 + pminub 48(%rdi),%xmm5 + pcmpeqb %xmm7, %xmm6 + pcmpeqb %xmm7, %xmm5 + pmovmskb %xmm6, %edx + movdqa 16(%rdi), %xmm8 + pcmpeqb %xmm7, %xmm4 + movdqu 15(%rdi), %xmm0 + pmovmskb %xmm5, %r8d + movdqa %xmm8, %xmm3 + pmovmskb %xmm4, %ecx + pcmpeqb %xmm1,%xmm0 + pcmpeqb %xmm2,%xmm3 + salq $32, %rcx + pcmpeqb %xmm7,%xmm8 + salq $48, %r8 + pminub %xmm0,%xmm3 + orq %rcx, %rdx + por %xmm3,%xmm8 + orq %rdx, %r8 + pmovmskb %xmm8, %eax + salq $16, %rax + orq %rax, %r8 + je L(loop) +L(next_pair_index3): + bsfq %r8, %rcx + addq %rdi, %rcx + cmpb $0, (%rcx) + je L(zero) + xorl %eax, %eax + movzbl 2(%rsi), %edx + testb %dl, %dl + je L(success3) + cmpb 1(%rcx), %dl + jne L(next_pair3) + jmp L(pair_loop_start3) + + .p2align 4 +L(pair_loop3): + addq $1, %rax + cmpb 1(%rcx,%rax), %dl + jne L(next_pair3) +L(pair_loop_start3): + movzbl 3(%rsi,%rax), %edx + testb %dl, %dl + jne L(pair_loop3) +L(success3): + lea -1(%rcx), %rax + ret + + .p2align 4 +L(next_pair3): + addq %rax, %r11 + movq %rdi, %rax + subq %r9, %rax + cmpq %r11, %rax + jl L(switch_strstr) + leaq -1(%r8), %rax + andq %rax, %r8 + jne L(next_pair_index3) + jmp L(loop) + + .p2align 4 +L(switch_strstr): + movq %rdi, %rdi + jmp __strstr_sse2 + + .p2align 4 +L(cross_page): + + movq %rdi, %rax + pxor %xmm0, %xmm0 + andq $-64, %rax + movdqa (%rax), %xmm3 + movdqu -1(%rax), %xmm4 + movdqa %xmm3, %xmm8 + movdqa 16(%rax), %xmm5 + pcmpeqb %xmm1, %xmm4 + pcmpeqb %xmm0, %xmm8 + pcmpeqb %xmm2, %xmm3 + movdqa %xmm5, %xmm7 + pminub %xmm4, %xmm3 + movdqu 15(%rax), %xmm4 + pcmpeqb %xmm0, %xmm7 + por %xmm3, %xmm8 + movdqa %xmm5, %xmm3 + movdqa 32(%rax), %xmm5 + pcmpeqb %xmm1, %xmm4 + pcmpeqb %xmm2, %xmm3 + movdqa %xmm5, %xmm6 + pmovmskb %xmm8, %ecx + pminub %xmm4, %xmm3 + movdqu 31(%rax), %xmm4 + por %xmm3, %xmm7 + movdqa %xmm5, %xmm3 + pcmpeqb %xmm0, %xmm6 + movdqa 48(%rax), %xmm5 + pcmpeqb %xmm1, %xmm4 + pmovmskb %xmm7, %r8d + pcmpeqb %xmm2, %xmm3 + pcmpeqb %xmm5, %xmm0 + pminub %xmm4, %xmm3 + movdqu 47(%rax), %xmm4 + por %xmm3, %xmm6 + movdqa %xmm5, %xmm3 + salq $16, %r8 + pcmpeqb %xmm1, %xmm4 + pcmpeqb %xmm2, %xmm3 + pmovmskb %xmm6, %r10d + pminub %xmm4, %xmm3 + por %xmm3, %xmm0 + salq $32, %r10 + orq %r10, %r8 + orq %rcx, %r8 + movl %edi, %ecx + pmovmskb %xmm0, %edx + subl %eax, %ecx + salq $48, %rdx + orq %rdx, %r8 + shrq %cl, %r8 + je L(loop_header) +L(next_pair_index4): + bsfq %r8, %rax + addq %rdi, %rax + cmpb $0, (%rax) + je L(zero) + + cmpq %rax,%rdi + je L(next_pair4) + + movzbl 2(%rsi), %edx + testb %dl, %dl + je L(found3) + cmpb 1(%rax), %dl + jne L(next_pair4) + xorl %edx, %edx + jmp L(pair_loop_start4) + + .p2align 4 +L(pair_loop4): + addq $1, %rdx + cmpb 1(%rax,%rdx), %cl + jne L(next_pair4) +L(pair_loop_start4): + movzbl 3(%rsi,%rdx), %ecx + testb %cl, %cl + jne L(pair_loop4) +L(found3): + subq $1, %rax + ret + + .p2align 4 +L(next_pair4): + leaq -1(%r8), %rax + andq %rax, %r8 + jne L(next_pair_index4) + jmp L(loop_header) + + .p2align 4 +L(found): + rep + ret + + .p2align 4 +L(zero): + xorl %eax, %eax + ret + + +END(__strstr_sse2_unaligned) diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c index cd63b68c01..fbff3a8ec0 100644 --- a/sysdeps/x86_64/multiarch/strstr.c +++ b/sysdeps/x86_64/multiarch/strstr.c @@ -1,6 +1,6 @@ -/* strstr with SSE4.2 intrinsics - Copyright (C) 2009-2013 Free Software Foundation, Inc. - Contributed by Intel Corporation. +/* Multiple versions of strstr. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2012-2013 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -17,369 +17,31 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <nmmintrin.h> -#include "varshift.h" - -#ifndef STRSTR_SSE42 -# define STRSTR_SSE42 __strstr_sse42 -#endif - -#ifdef USE_AS_STRCASESTR -# include <ctype.h> -# include <locale/localeinfo.h> - -# define LOADBYTE(C) tolower (C) -# define CMPBYTE(C1, C2) (tolower (C1) == tolower (C2)) -#else -# define LOADBYTE(C) (C) -# define CMPBYTE(C1, C2) ((C1) == (C2)) -#endif - -/* We use 0xe ordered-compare: - _SIDD_SBYTE_OPS - | _SIDD_CMP_EQUAL_ORDER - | _SIDD_LEAST_SIGNIFICANT - on pcmpistri to do the scanning and string comparsion requirements of - sub-string match. In the scanning phase, we process Cflag and ECX - index to locate the first fragment match; once the first fragment - match position has been identified, we do comparison of subsequent - string fragments until we can conclude false or true match; whe - n concluding a false match, we may need to repeat scanning process - from next relevant offset in the target string. - - In the scanning phase we have 4 cases: - case ECX CFlag ZFlag SFlag - 1 16 0 0 0 - 2a 16 0 0 1 - 2b 16 0 1 0 - 2c 16 0 1 1 - - 1. No ordered-comparison match, both 16B fragments are valid, so - continue to next fragment. - 2. No ordered-comparison match, there is EOS in either fragment, - 2a. Zflg = 0, Sflg = 1, we continue - 2b. Zflg = 1, Sflg = 0, we conclude no match and return. - 2c. Zflg = 1, sflg = 1, lenth determine match or no match - - In the string comparison phase, the 1st fragment match is fixed up - to produce ECX = 0. Subsequent fragment compare of nonzero index - and no match conclude a false match. - - case ECX CFlag ZFlag SFlag - 3 X 1 0 0/1 - 4a 0 1 0 0 - 4b 0 1 0 1 - 4c 0 < X 1 0 0/1 - 5 16 0 1 0 - - 3. An initial ordered-comparison fragment match, we fix up to do - subsequent string comparison - 4a. Continuation of fragment comparison of a string compare. - 4b. EOS reached in the reference string, we conclude true match and - return - 4c. String compare failed if index is nonzero, we need to go back to - scanning - 5. failed string compare, go back to scanning - */ - -#if !(defined USE_AS_STRCASESTR && defined STRCASESTR_NONASCII) -/* Simple replacement of movdqu to address 4KB boundary cross issue. - If EOS occurs within less than 16B before 4KB boundary, we don't - cross to next page. */ -static __m128i -__m128i_strloadu (const unsigned char * p, __m128i zero) -{ - if (__builtin_expect ((int) ((size_t) p & 0xfff) > 0xff0, 0)) - { - size_t offset = ((size_t) p & (16 - 1)); - __m128i a = _mm_load_si128 ((__m128i *) (p - offset)); - int bmsk = _mm_movemask_epi8 (_mm_cmpeq_epi8 (a, zero)); - if ((bmsk >> offset) != 0) - return __m128i_shift_right (a, offset); - } - return _mm_loadu_si128 ((__m128i *) p); -} -#endif - -#if defined USE_AS_STRCASESTR && !defined STRCASESTR_NONASCII - -/* Similar to __m128i_strloadu. Convert to lower case for POSIX/C - locale and other which have single-byte letters only in the ASCII - range. */ -static __m128i -__m128i_strloadu_tolower (const unsigned char *p, __m128i zero, __m128i uclow, - __m128i uchigh, __m128i lcqword) -{ - __m128i frag = __m128i_strloadu (p, zero); - - /* Compare if 'Z' > bytes. Inverted way to get a mask for byte <= 'Z'. */ - __m128i r2 = _mm_cmpgt_epi8 (uchigh, frag); - /* Compare if bytes are > 'A' - 1. */ - __m128i r1 = _mm_cmpgt_epi8 (frag, uclow); - /* Mask byte == ff if byte(r2) <= 'Z' and byte(r1) > 'A' - 1. */ - __m128i mask = _mm_and_si128 (r2, r1); - /* Apply lowercase bit 6 mask for above mask bytes == ff. */ - return _mm_or_si128 (frag, _mm_and_si128 (mask, lcqword)); -} - -#endif - -/* Calculate Knuth-Morris-Pratt string searching algorithm (or KMP - algorithm) overlap for a fully populated 16B vector. - Input parameter: 1st 16Byte loaded from the reference string of a - strstr function. - We don't use KMP algorithm if reference string is less than 16B. */ -static int -__inline__ __attribute__ ((__always_inline__,)) -KMP16Bovrlap (__m128i s2) -{ - __m128i b = _mm_unpacklo_epi8 (s2, s2); - __m128i a = _mm_unpacklo_epi8 (b, b); - a = _mm_shuffle_epi32 (a, 0); - b = _mm_srli_si128 (s2, sizeof (char)); - int bmsk = _mm_movemask_epi8 (_mm_cmpeq_epi8 (b, a)); - - /* _BitScanForward(&k1, bmsk); */ - int k1; - __asm ("bsfl %[bmsk], %[k1]" : [k1] "=r" (k1) : [bmsk] "r" (bmsk)); - if (!bmsk) - return 16; - else if (bmsk == 0x7fff) - return 1; - else if (!k1) - { - /* There are al least two distinct chars in s2. If byte 0 and 1 are - idential and the distinct value lies farther down, we can deduce - the next byte offset to restart full compare is least no earlier - than byte 3. */ - return 3; - } - else - { - /* Byte 1 is not degenerated to byte 0. */ - return k1 + 1; - } -} - -char * -__attribute__ ((section (".text.sse4.2"))) -STRSTR_SSE42 (const unsigned char *s1, const unsigned char *s2) -{ -#define p1 s1 - const unsigned char *p2 = s2; - -#ifndef STRCASESTR_NONASCII - if (__builtin_expect (p2[0] == '\0', 0)) - return (char *) p1; - - if (__builtin_expect (p1[0] == '\0', 0)) - return NULL; - - /* Check if p1 length is 1 byte long. */ - if (__builtin_expect (p1[1] == '\0', 0)) - return p2[1] == '\0' && CMPBYTE (p1[0], p2[0]) ? (char *) p1 : NULL; -#endif - -#ifdef USE_AS_STRCASESTR -# ifndef STRCASESTR_NONASCII - if (__builtin_expect (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_NONASCII_CASE) - != 0, 0)) - return __strcasestr_sse42_nonascii (s1, s2); - - const __m128i uclow = _mm_set1_epi8 (0x40); - const __m128i uchigh = _mm_set1_epi8 (0x5b); - const __m128i lcqword = _mm_set1_epi8 (0x20); - const __m128i zero = _mm_setzero_si128 (); -# define strloadu(p) __m128i_strloadu_tolower (p, zero, uclow, uchigh, lcqword) -# else -# define strloadu __m128i_strloadu_tolower -# define zero _mm_setzero_si128 () -# endif -#else -# define strloadu(p) __m128i_strloadu (p, zero) - const __m128i zero = _mm_setzero_si128 (); +/* Redefine strstr so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +#undef strstr +#define strstr __redirect_strstr +#include <string.h> +#undef strstr + +#define STRSTR __strstr_sse2 +#ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strstr_sse2, __GI_strstr, __strstr_sse2); #endif - /* p1 > 1 byte long. Load up to 16 bytes of fragment. */ - __m128i frag1 = strloadu (p1); - - __m128i frag2; - if (p2[1] != '\0') - /* p2 is > 1 byte long. */ - frag2 = strloadu (p2); - else - frag2 = _mm_insert_epi8 (zero, LOADBYTE (p2[0]), 0); - - /* Unsigned bytes, equal order, does frag2 has null? */ - int cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c); - int cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c); - int cmp = _mm_cmpistri (frag2, frag1, 0x0c); - int cmp_s = _mm_cmpistrs (frag2, frag1, 0x0c); - if (cmp_s & cmp_c) - { - int bmsk = _mm_movemask_epi8 (_mm_cmpeq_epi8 (frag2, zero)); - int len; - __asm ("bsfl %[bmsk], %[len]" - : [len] "=r" (len) : [bmsk] "r" (bmsk)); - p1 += cmp; - if ((len + cmp) <= 16) - return (char *) p1; - - /* Load up to 16 bytes of fragment. */ - frag1 = strloadu (p1); - cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c); - cmp_s = _mm_cmpistrs (frag2, frag1, 0x0c); - cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c); - cmp = _mm_cmpistri (frag2, frag1, 0x0c); - if ((len + cmp) <= 16) - return (char *) p1 + cmp; - } - - if (cmp_s) - { - /* Adjust addr for 16B alginment in ensuing loop. */ - while (!cmp_z) - { - p1 += cmp; - /* Load up to 16 bytes of fragment. */ - frag1 = strloadu (p1); - cmp = _mm_cmpistri (frag2, frag1, 0x0c); - cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c); - cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c); - /* Because s2 < 16 bytes and we adjusted p1 by non-zero cmp - once already, this time cmp will be zero and we can exit. */ - if ((!cmp) & cmp_c) - break; - } - - if (!cmp_c) - return NULL; - - /* Since s2 is less than 16 bytes, com_c is definitive - determination of full match. */ - return (char *) p1 + cmp; - } - - /* General case, s2 is at least 16 bytes or more. - First, the common case of false-match at first byte of p2. */ - const unsigned char *pt = NULL; - int kmp_fwd = 0; -re_trace: - while (!cmp_c) - { - /* frag1 has null. */ - if (cmp_z) - return NULL; - - /* frag 1 has no null, advance 16 bytes. */ - p1 += 16; - /* Load up to 16 bytes of fragment. */ - frag1 = strloadu (p1); - /* Unsigned bytes, equal order, is there a partial match? */ - cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c); - cmp = _mm_cmpistri (frag2, frag1, 0x0c); - cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c); - } - - /* Next, handle initial positive match as first byte of p2. We have - a partial fragment match, make full determination until we reached - end of s2. */ - if (!cmp) - { - if (cmp_z) - return (char *) p1; - - pt = p1; - p1 += 16; - p2 += 16; - /* Load up to 16 bytes of fragment. */ - frag2 = strloadu (p2); - } - else - { - /* Adjust 16B alignment. */ - p1 += cmp; - pt = p1; - } - - /* Load up to 16 bytes of fragment. */ - frag1 = strloadu (p1); - - /* Unsigned bytes, equal order, does frag2 has null? */ - cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c); - cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c); - cmp = _mm_cmpistri (frag2, frag1, 0x0c); - cmp_s = _mm_cmpistrs (frag2, frag1, 0x0c); - while (!(cmp | cmp_z | cmp_s)) - { - p1 += 16; - p2 += 16; - /* Load up to 16 bytes of fragment. */ - frag2 = strloadu (p2); - /* Load up to 16 bytes of fragment. */ - frag1 = strloadu (p1); - /* Unsigned bytes, equal order, does frag2 has null? */ - cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c); - cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c); - cmp = _mm_cmpistri (frag2, frag1, 0x0c); - cmp_s = _mm_cmpistrs (frag2, frag1, 0x0c); - } - - /* Full determination yielded a false result, retrace s1 to next - starting position. - Zflg 1 0 1 0/1 - Sflg 0 1 1 0/1 - cmp na 0 0 >0 - action done done continue continue if s2 < s1 - false match retrace s1 else false - */ - - if (cmp_s & !cmp) - return (char *) pt; - if (cmp_z) - { - if (!cmp_s) - return NULL; - - /* Handle both zero and sign flag set and s1 is shorter in - length. */ - int bmsk = _mm_movemask_epi8 (_mm_cmpeq_epi8 (zero, frag2)); - int bmsk1 = _mm_movemask_epi8 (_mm_cmpeq_epi8 (zero, frag1)); - int len; - int len1; - __asm ("bsfl %[bmsk], %[len]" - : [len] "=r" (len) : [bmsk] "r" (bmsk)); - __asm ("bsfl %[bmsk1], %[len1]" - : [len1] "=r" (len1) : [bmsk1] "r" (bmsk1)); - if (len >= len1) - return NULL; - } - else if (!cmp) - return (char *) pt; - - /* Otherwise, we have to retrace and continue. Default of multiple - paths that need to retrace from next byte in s1. */ - p2 = s2; - frag2 = strloadu (p2); - - if (!kmp_fwd) - kmp_fwd = KMP16Bovrlap (frag2); +#include "string/strstr.c" - /* KMP algorithm predicted overlap needs to be corrected for - partial fragment compare. */ - p1 = pt + (kmp_fwd > cmp ? cmp : kmp_fwd); +extern __typeof (__redirect_strstr) __strstr_sse2_unaligned attribute_hidden; +extern __typeof (__redirect_strstr) __strstr_sse2 attribute_hidden; - /* Since s2 is at least 16 bytes long, we're certain there is no - match. */ - if (p1[0] == '\0') - return NULL; +#include "init-arch.h" - /* Load up to 16 bytes of fragment. */ - frag1 = strloadu (p1); +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +extern __typeof (__redirect_strstr) __libc_strstr; +libc_ifunc (__libc_strstr, HAS_FAST_UNALIGNED_LOAD ? __strstr_sse2_unaligned : __strstr_sse2) - /* Unsigned bytes, equal order, is there a partial match? */ - cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c); - cmp = _mm_cmpistri (frag2, frag1, 0x0c); - cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c); - goto re_trace; -} +#undef strstr +strong_alias (__libc_strstr, strstr) diff --git a/sysdeps/x86_64/preconfigure b/sysdeps/x86_64/preconfigure index d5abba8824..c8f1e0e132 100644 --- a/sysdeps/x86_64/preconfigure +++ b/sysdeps/x86_64/preconfigure @@ -1,4 +1,4 @@ -# This file is generated from configure.in by Autoconf. DO NOT EDIT! +# This file is generated from configure.ac by Autoconf. DO NOT EDIT! # Local preconfigure fragment for sysdeps/x86_64 test -n "$base_machine" || case "$machine" in diff --git a/sysdeps/x86_64/preconfigure.in b/sysdeps/x86_64/preconfigure.ac index 600700ea1a..600700ea1a 100644 --- a/sysdeps/x86_64/preconfigure.in +++ b/sysdeps/x86_64/preconfigure.ac diff --git a/sysdeps/x86_64/stackguard-macros.h b/sysdeps/x86_64/stackguard-macros.h index d7fedb3737..1948800cd0 100644 --- a/sysdeps/x86_64/stackguard-macros.h +++ b/sysdeps/x86_64/stackguard-macros.h @@ -4,3 +4,8 @@ ({ uintptr_t x; \ asm ("mov %%fs:%c1, %0" : "=r" (x) \ : "i" (offsetof (tcbhead_t, stack_guard))); x; }) + +#define POINTER_CHK_GUARD \ + ({ uintptr_t x; \ + asm ("mov %%fs:%c1, %0" : "=r" (x) \ + : "i" (offsetof (tcbhead_t, pointer_guard))); x; }) diff --git a/sysdeps/x86_64/strchr.S b/sysdeps/x86_64/strchr.S index d89f1eba8f..7440500a67 100644 --- a/sysdeps/x86_64/strchr.S +++ b/sysdeps/x86_64/strchr.S @@ -19,51 +19,169 @@ #include <sysdep.h> - .text ENTRY (strchr) movd %esi, %xmm1 - movq %rdi, %rcx - punpcklbw %xmm1, %xmm1 - andq $~15, %rdi - pxor %xmm2, %xmm2 + movl %edi, %eax + andl $4095, %eax punpcklbw %xmm1, %xmm1 - orl $0xffffffff, %esi - movdqa (%rdi), %xmm0 + cmpl $4032, %eax + punpcklwd %xmm1, %xmm1 pshufd $0, %xmm1, %xmm1 - subq %rdi, %rcx - movdqa %xmm0, %xmm3 - leaq 16(%rdi), %rdi + jg L(cross_page) + movdqu (%rdi), %xmm0 + pxor %xmm3, %xmm3 + movdqa %xmm0, %xmm4 pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm3 - shl %cl, %esi - pmovmskb %xmm0, %edx - pmovmskb %xmm3, %ecx - andl %esi, %edx - andl %esi, %ecx - orl %edx, %ecx - jnz 1f + pcmpeqb %xmm3, %xmm4 + por %xmm4, %xmm0 + pmovmskb %xmm0, %eax + test %eax, %eax + je L(next_48_bytes) + bsf %eax, %eax +#ifdef AS_STRCHRNUL + leaq (%rdi,%rax), %rax +#else + movl $0, %edx + leaq (%rdi,%rax), %rax + cmpb %sil, (%rax) + cmovne %rdx, %rax +#endif + ret -2: movdqa (%rdi), %xmm0 - leaq 16(%rdi), %rdi - movdqa %xmm0, %xmm3 + .p2align 3 + L(next_48_bytes): + movdqu 16(%rdi), %xmm0 + movdqa %xmm0, %xmm4 pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm3 - pmovmskb %xmm0, %edx - pmovmskb %xmm3, %ecx - orl %edx, %ecx - jz 2b + pcmpeqb %xmm3, %xmm4 + por %xmm4, %xmm0 + pmovmskb %xmm0, %ecx + movdqu 32(%rdi), %xmm0 + movdqa %xmm0, %xmm4 + pcmpeqb %xmm1, %xmm0 + salq $16, %rcx + pcmpeqb %xmm3, %xmm4 + por %xmm4, %xmm0 + pmovmskb %xmm0, %eax + movdqu 48(%rdi), %xmm0 + pcmpeqb %xmm0, %xmm3 + salq $32, %rax + pcmpeqb %xmm1, %xmm0 + orq %rcx, %rax + por %xmm3, %xmm0 + pmovmskb %xmm0, %ecx + salq $48, %rcx + orq %rcx, %rax + testq %rax, %rax + jne L(return) +L(loop_start): + /* We use this alignment to force loop be aligned to 8 but not + 16 bytes. This gives better sheduling on AMD processors. */ + .p2align 4 + pxor %xmm6, %xmm6 + andq $-64, %rdi + .p2align 3 +L(loop64): + addq $64, %rdi + movdqa (%rdi), %xmm5 + movdqa 16(%rdi), %xmm2 + movdqa 32(%rdi), %xmm3 + pxor %xmm1, %xmm5 + movdqa 48(%rdi), %xmm4 + pxor %xmm1, %xmm2 + pxor %xmm1, %xmm3 + pminub (%rdi), %xmm5 + pxor %xmm1, %xmm4 + pminub 16(%rdi), %xmm2 + pminub 32(%rdi), %xmm3 + pminub %xmm2, %xmm5 + pminub 48(%rdi), %xmm4 + pminub %xmm3, %xmm5 + pminub %xmm4, %xmm5 + pcmpeqb %xmm6, %xmm5 + pmovmskb %xmm5, %eax + + testl %eax, %eax + je L(loop64) + + movdqa (%rdi), %xmm5 + movdqa %xmm5, %xmm0 + pcmpeqb %xmm1, %xmm5 + pcmpeqb %xmm6, %xmm0 + por %xmm0, %xmm5 + pcmpeqb %xmm6, %xmm2 + pcmpeqb %xmm6, %xmm3 + pcmpeqb %xmm6, %xmm4 + + pmovmskb %xmm5, %ecx + pmovmskb %xmm2, %eax + salq $16, %rax + pmovmskb %xmm3, %r8d + pmovmskb %xmm4, %edx + salq $32, %r8 + orq %r8, %rax + orq %rcx, %rax + salq $48, %rdx + orq %rdx, %rax + .p2align 3 +L(return): + bsfq %rax, %rax +#ifdef AS_STRCHRNUL + leaq (%rdi,%rax), %rax +#else + movl $0, %edx + leaq (%rdi,%rax), %rax + cmpb %sil, (%rax) + cmovne %rdx, %rax +#endif + ret + .p2align 4 + +L(cross_page): + movq %rdi, %rdx + pxor %xmm2, %xmm2 + andq $-64, %rdx + movdqa %xmm1, %xmm0 + movdqa (%rdx), %xmm3 + movdqa %xmm3, %xmm4 + pcmpeqb %xmm1, %xmm3 + pcmpeqb %xmm2, %xmm4 + por %xmm4, %xmm3 + pmovmskb %xmm3, %r8d + movdqa 16(%rdx), %xmm3 + movdqa %xmm3, %xmm4 + pcmpeqb %xmm1, %xmm3 + pcmpeqb %xmm2, %xmm4 + por %xmm4, %xmm3 + pmovmskb %xmm3, %eax + movdqa 32(%rdx), %xmm3 + movdqa %xmm3, %xmm4 + pcmpeqb %xmm1, %xmm3 + salq $16, %rax + pcmpeqb %xmm2, %xmm4 + por %xmm4, %xmm3 + pmovmskb %xmm3, %r9d + movdqa 48(%rdx), %xmm3 + pcmpeqb %xmm3, %xmm2 + salq $32, %r9 + pcmpeqb %xmm3, %xmm0 + orq %r9, %rax + orq %r8, %rax + por %xmm2, %xmm0 + pmovmskb %xmm0, %ecx + salq $48, %rcx + orq %rcx, %rax + movl %edi, %ecx + subb %dl, %cl + shrq %cl, %rax + testq %rax, %rax + jne L(return) + jmp L(loop_start) -1: bsfl %edx, %edx - jz 4f - bsfl %ecx, %ecx - leaq -16(%rdi,%rdx), %rax - cmpl %edx, %ecx - je 5f -4: xorl %eax, %eax -5: ret END (strchr) +#ifndef AS_STRCHRNUL weak_alias (strchr, index) libc_hidden_builtin_def (strchr) - +#endif diff --git a/sysdeps/x86_64/strchrnul.S b/sysdeps/x86_64/strchrnul.S index d8c345ba7d..bceeb61875 100644 --- a/sysdeps/x86_64/strchrnul.S +++ b/sysdeps/x86_64/strchrnul.S @@ -20,43 +20,8 @@ #include <sysdep.h> - - .text -ENTRY (__strchrnul) - movd %esi, %xmm1 - movq %rdi, %rcx - punpcklbw %xmm1, %xmm1 - andq $~15, %rdi - pxor %xmm2, %xmm2 - punpcklbw %xmm1, %xmm1 - orl $0xffffffff, %esi - movdqa (%rdi), %xmm0 - pshufd $0, %xmm1, %xmm1 - subq %rdi, %rcx - movdqa %xmm0, %xmm3 - leaq 16(%rdi), %rdi - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm3 - shl %cl, %esi - pmovmskb %xmm0, %edx - pmovmskb %xmm3, %ecx - orl %edx, %ecx - andl %esi, %ecx - jnz 1f - -2: movdqa (%rdi), %xmm0 - leaq 16(%rdi), %rdi - movdqa %xmm0, %xmm3 - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm3 - pmovmskb %xmm0, %edx - pmovmskb %xmm3, %ecx - orl %edx, %ecx - jz 2b - -1: bsfl %ecx, %edx - leaq -16(%rdi,%rdx), %rax - ret -END (__strchrnul) +#define strchr __strchrnul +#define AS_STRCHRNUL +#include "strchr.S" weak_alias (__strchrnul, strchrnul) diff --git a/sysdeps/x86_64/strrchr.S b/sysdeps/x86_64/strrchr.S index e413b07438..2a07ff75ac 100644 --- a/sysdeps/x86_64/strrchr.S +++ b/sysdeps/x86_64/strrchr.S @@ -1,6 +1,5 @@ /* strrchr (str, ch) -- Return pointer to last occurrence of CH in STR. - For AMD x86-64. - Copyright (C) 2009-2013 Free Software Foundation, Inc. + Copyright (C) 2013 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -17,63 +16,212 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> +#include <sysdep.h> .text ENTRY (strrchr) movd %esi, %xmm1 - movq %rdi, %rcx - punpcklbw %xmm1, %xmm1 - andq $~15, %rdi - pxor %xmm2, %xmm2 - punpcklbw %xmm1, %xmm1 - orl $0xffffffff, %esi - movdqa (%rdi), %xmm0 + movq %rdi, %rax + andl $4095, %eax + punpcklbw %xmm1, %xmm1 + cmpq $4032, %rax + punpcklwd %xmm1, %xmm1 pshufd $0, %xmm1, %xmm1 - subq %rdi, %rcx + ja L(cross_page) + movdqu (%rdi), %xmm0 + pxor %xmm2, %xmm2 movdqa %xmm0, %xmm3 - leaq 16(%rdi), %rdi pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm3 - shl %cl, %esi - pmovmskb %xmm0, %edx - pmovmskb %xmm3, %ecx - andl %esi, %edx - andl %esi, %ecx - xorl %eax, %eax - movl %edx, %esi - orl %ecx, %esi - jnz 1f + pmovmskb %xmm0, %ecx + pmovmskb %xmm3, %edx + testq %rdx, %rdx + je L(next_48_bytes) + leaq -1(%rdx), %rax + xorq %rdx, %rax + andq %rcx, %rax + je L(exit) + bsrq %rax, %rax + addq %rdi, %rax + ret -2: movdqa (%rdi), %xmm0 - leaq 16(%rdi), %rdi - movdqa %xmm0, %xmm3 + .p2align 4 +L(next_48_bytes): + movdqu 16(%rdi), %xmm4 + movdqa %xmm4, %xmm5 + movdqu 32(%rdi), %xmm3 + pcmpeqb %xmm1, %xmm4 + pcmpeqb %xmm2, %xmm5 + movdqu 48(%rdi), %xmm0 + pmovmskb %xmm5, %edx + movdqa %xmm3, %xmm5 + pcmpeqb %xmm1, %xmm3 + pcmpeqb %xmm2, %xmm5 + pcmpeqb %xmm0, %xmm2 + salq $16, %rdx + pmovmskb %xmm3, %r8d + pmovmskb %xmm5, %eax + pmovmskb %xmm2, %esi + salq $32, %r8 + salq $32, %rax pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm3 - pmovmskb %xmm0, %edx - pmovmskb %xmm3, %ecx - movl %edx, %esi - orl %ecx, %esi - jz 2b + orq %rdx, %rax + movq %rsi, %rdx + pmovmskb %xmm4, %esi + salq $48, %rdx + salq $16, %rsi + orq %r8, %rsi + orq %rcx, %rsi + pmovmskb %xmm0, %ecx + salq $48, %rcx + orq %rcx, %rsi + orq %rdx, %rax + je L(loop_header2) + leaq -1(%rax), %rcx + xorq %rax, %rcx + andq %rcx, %rsi + je L(exit) + bsrq %rsi, %rsi + leaq (%rdi,%rsi), %rax + ret -1: bsfl %ecx, %r9d - movl $0xffffffff, %r8d - movl $31, %ecx - jnz 5f + .p2align 4 +L(loop_header2): + testq %rsi, %rsi + movq %rdi, %rcx + je L(no_c_found) +L(loop_header): + addq $64, %rdi + pxor %xmm7, %xmm7 + andq $-64, %rdi + jmp L(loop_entry) + + .p2align 4 +L(loop64): + testq %rdx, %rdx + cmovne %rdx, %rsi + cmovne %rdi, %rcx + addq $64, %rdi +L(loop_entry): + movdqa 32(%rdi), %xmm3 + pxor %xmm6, %xmm6 + movdqa 48(%rdi), %xmm2 + movdqa %xmm3, %xmm0 + movdqa 16(%rdi), %xmm4 + pminub %xmm2, %xmm0 + movdqa (%rdi), %xmm5 + pminub %xmm4, %xmm0 + pminub %xmm5, %xmm0 + pcmpeqb %xmm7, %xmm0 + pmovmskb %xmm0, %eax + movdqa %xmm5, %xmm0 + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm0, %r9d + movdqa %xmm4, %xmm0 + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm0, %edx + movdqa %xmm3, %xmm0 + pcmpeqb %xmm1, %xmm0 + salq $16, %rdx + pmovmskb %xmm0, %r10d + movdqa %xmm2, %xmm0 + pcmpeqb %xmm1, %xmm0 + salq $32, %r10 + orq %r10, %rdx + pmovmskb %xmm0, %r8d + orq %r9, %rdx + salq $48, %r8 + orq %r8, %rdx + testl %eax, %eax + je L(loop64) + pcmpeqb %xmm6, %xmm4 + pcmpeqb %xmm6, %xmm3 + pcmpeqb %xmm6, %xmm5 + pmovmskb %xmm4, %eax + pmovmskb %xmm3, %r10d + pcmpeqb %xmm6, %xmm2 + pmovmskb %xmm5, %r9d + salq $32, %r10 + salq $16, %rax + pmovmskb %xmm2, %r8d + orq %r10, %rax + orq %r9, %rax + salq $48, %r8 + orq %r8, %rax + leaq -1(%rax), %r8 + xorq %rax, %r8 + andq %r8, %rdx + cmovne %rdi, %rcx + cmovne %rdx, %rsi + bsrq %rsi, %rsi + leaq (%rcx,%rsi), %rax + ret - bsrl %edx, %edx - jz 2b - leaq -16(%rdi,%rdx), %rax - jmp 2b + .p2align 4 +L(no_c_found): + movl $1, %esi + xorl %ecx, %ecx + jmp L(loop_header) + + .p2align 4 +L(exit): + xorl %eax, %eax + ret -5: subl %r9d, %ecx - shrl %cl, %r8d - andl %r8d, %edx - bsrl %edx, %edx - jz 4f - leaq -16(%rdi,%rdx), %rax -4: ret + .p2align 4 +L(cross_page): + movq %rdi, %rax + pxor %xmm0, %xmm0 + andq $-64, %rax + movdqu (%rax), %xmm5 + movdqa %xmm5, %xmm6 + movdqu 16(%rax), %xmm4 + pcmpeqb %xmm1, %xmm5 + pcmpeqb %xmm0, %xmm6 + movdqu 32(%rax), %xmm3 + pmovmskb %xmm6, %esi + movdqa %xmm4, %xmm6 + movdqu 48(%rax), %xmm2 + pcmpeqb %xmm1, %xmm4 + pcmpeqb %xmm0, %xmm6 + pmovmskb %xmm6, %edx + movdqa %xmm3, %xmm6 + pcmpeqb %xmm1, %xmm3 + pcmpeqb %xmm0, %xmm6 + pcmpeqb %xmm2, %xmm0 + salq $16, %rdx + pmovmskb %xmm3, %r9d + pmovmskb %xmm6, %r8d + pmovmskb %xmm0, %ecx + salq $32, %r9 + salq $32, %r8 + pcmpeqb %xmm1, %xmm2 + orq %r8, %rdx + salq $48, %rcx + pmovmskb %xmm5, %r8d + orq %rsi, %rdx + pmovmskb %xmm4, %esi + orq %rcx, %rdx + pmovmskb %xmm2, %ecx + salq $16, %rsi + salq $48, %rcx + orq %r9, %rsi + orq %r8, %rsi + orq %rcx, %rsi + movl %edi, %ecx + subl %eax, %ecx + shrq %cl, %rdx + shrq %cl, %rsi + testq %rdx, %rdx + je L(loop_header2) + leaq -1(%rdx), %rax + xorq %rdx, %rax + andq %rax, %rsi + je L(exit) + bsrq %rsi, %rax + addq %rdi, %rax + ret END (strrchr) weak_alias (strrchr, rindex) |