summaryrefslogtreecommitdiff
path: root/sysdeps
diff options
context:
space:
mode:
authorAndrew Senkevich <andrew.senkevich@intel.com>2015-06-23 19:21:50 +0300
committerAndrew Senkevich <andrew.senkevich@intel.com>2015-06-23 19:21:50 +0300
commit5872b8352a8b6c0aa49c4e9f82bbda32becc5f02 (patch)
tree87f9545391602848e21061e6d3d7a808672beef7 /sysdeps
parent718d34a309493f8697ff9a8fefcbacbba12a2ccd (diff)
Combination of data tables for x86_64 vector functions sin, cos and sincos.
* sysdeps/x86_64/fpu/Makefile (libmvec-support): Fixed files list. * sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S: Renamed variable and included header. * sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S: Likewise. * sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S: Likewise. * sysdeps/x86_64/fpu/svml_d_trig_data.S: New file. * sysdeps/x86_64/fpu/svml_d_trig_data.h: Likewise. * sysdeps/x86_64/fpu/svml_d_cos2_core.S: Removed unneeded include. * sysdeps/x86_64/fpu/svml_d_cos4_core.S: Likewise. * sysdeps/x86_64/fpu/svml_d_cos8_core.S: Likewise. * sysdeps/x86_64/fpu/svml_d_cos_data.S: Removed file. * sysdeps/x86_64/fpu/svml_d_cos_data.h: Likewise. * sysdeps/x86_64/fpu/svml_d_sin_data.S: Likewise. * sysdeps/x86_64/fpu/svml_d_sin_data.h: Likewise. * sysdeps/x86_64/fpu/svml_d_sincos_data.S: Likewise. * sysdeps/x86_64/fpu/svml_d_sincos_data.h: Likewise.
Diffstat (limited to 'sysdeps')
-rw-r--r--sysdeps/x86_64/fpu/Makefile6
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S4
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S4
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S6
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S30
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S18
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S46
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S4
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S4
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S6
-rw-r--r--sysdeps/x86_64/fpu/svml_d_cos2_core.S1
-rw-r--r--sysdeps/x86_64/fpu/svml_d_cos4_core.S1
-rw-r--r--sysdeps/x86_64/fpu/svml_d_cos8_core.S1
-rw-r--r--sysdeps/x86_64/fpu/svml_d_cos_data.h48
-rw-r--r--sysdeps/x86_64/fpu/svml_d_sin_data.S82
-rw-r--r--sysdeps/x86_64/fpu/svml_d_sin_data.h53
-rw-r--r--sysdeps/x86_64/fpu/svml_d_sincos_data.S111
-rw-r--r--sysdeps/x86_64/fpu/svml_d_sincos_data.h57
-rw-r--r--sysdeps/x86_64/fpu/svml_d_trig_data.S (renamed from sysdeps/x86_64/fpu/svml_d_cos_data.S)58
-rw-r--r--sysdeps/x86_64/fpu/svml_d_trig_data.h72
20 files changed, 173 insertions, 439 deletions
diff --git a/sysdeps/x86_64/fpu/Makefile b/sysdeps/x86_64/fpu/Makefile
index c6912cbadd..c377100d1a 100644
--- a/sysdeps/x86_64/fpu/Makefile
+++ b/sysdeps/x86_64/fpu/Makefile
@@ -1,14 +1,14 @@
ifeq ($(subdir),mathvec)
libmvec-support += svml_d_cos2_core svml_d_cos4_core_avx \
svml_d_cos4_core svml_d_cos8_core \
- svml_d_cos_data svml_d_sin2_core svml_d_sin4_core_avx \
- svml_d_sin4_core svml_d_sin8_core svml_d_sin_data \
+ svml_d_sin2_core svml_d_sin4_core_avx \
+ svml_d_sin4_core svml_d_sin8_core svml_d_trig_data \
svml_s_cosf4_core svml_s_cosf8_core_avx \
svml_s_cosf8_core svml_s_cosf16_core svml_s_cosf_data \
svml_s_sinf4_core svml_s_sinf8_core_avx \
svml_s_sinf8_core svml_s_sinf16_core svml_s_sinf_data \
svml_d_sincos2_core svml_d_sincos4_core_avx \
- svml_d_sincos4_core svml_d_sincos8_core svml_d_sincos_data \
+ svml_d_sincos4_core svml_d_sincos8_core \
svml_d_log2_core svml_d_log4_core_avx svml_d_log4_core \
svml_d_log8_core svml_d_log_data svml_s_logf4_core \
svml_s_logf8_core_avx svml_s_logf8_core svml_s_logf16_core \
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S
index 11348a37c5..4420edcae0 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S
@@ -17,7 +17,7 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
-#include "svml_d_cos_data.h"
+#include "svml_d_trig_data.h"
.text
ENTRY (_ZGVbN2v_cos_sse4)
@@ -41,7 +41,7 @@ ENTRY (_ZGVbN2v_cos_sse4)
andq $-64, %rsp
subq $320, %rsp
movaps %xmm0, %xmm3
- movq __svml_dcos_data@GOTPCREL(%rip), %rax
+ movq __svml_d_trig_data@GOTPCREL(%rip), %rax
movups __dHalfPI(%rax), %xmm2
/* ARGUMENT RANGE REDUCTION:
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S
index f192ba022e..9a776e7df7 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S
@@ -17,7 +17,7 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
-#include "svml_d_cos_data.h"
+#include "svml_d_trig_data.h"
.text
ENTRY (_ZGVdN4v_cos_avx2)
@@ -41,7 +41,7 @@ ENTRY (_ZGVdN4v_cos_avx2)
cfi_def_cfa_register (%rbp)
andq $-64, %rsp
subq $448, %rsp
- movq __svml_dcos_data@GOTPCREL(%rip), %rax
+ movq __svml_d_trig_data@GOTPCREL(%rip), %rax
vmovapd %ymm0, %ymm1
vmovupd __dInvPI(%rax), %ymm4
vmovupd __dRShifter(%rax), %ymm5
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S
index 14695ec3c9..b376155210 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S
@@ -17,7 +17,7 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
-#include "svml_d_cos_data.h"
+#include "svml_d_trig_data.h"
#include "svml_d_wrapper_impl.h"
.text
@@ -45,7 +45,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_cos
cfi_def_cfa_register (%rbp)
andq $-64, %rsp
subq $1280, %rsp
- movq __svml_dcos_data@GOTPCREL(%rip), %rax
+ movq __svml_d_trig_data@GOTPCREL(%rip), %rax
/* R = X - N*Pi1 */
vmovaps %zmm0, %zmm7
@@ -259,7 +259,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_cos
cfi_def_cfa_register (%rbp)
andq $-64, %rsp
subq $1280, %rsp
- movq __svml_dcos_data@GOTPCREL(%rip), %rax
+ movq __svml_d_trig_data@GOTPCREL(%rip), %rax
/* R = X - N*Pi1 */
vmovaps %zmm0, %zmm8
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S
index 4b4d8be272..3a1ccbf139 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S
@@ -17,7 +17,7 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
-#include "svml_d_sin_data.h"
+#include "svml_d_trig_data.h"
.text
ENTRY (_ZGVbN2v_sin_sse4)
@@ -41,11 +41,11 @@ ENTRY (_ZGVbN2v_sin_sse4)
andq $-64, %rsp
subq $320, %rsp
movaps %xmm0, %xmm5
- movq __svml_dsin_data@GOTPCREL(%rip), %rax
+ movq __svml_d_trig_data@GOTPCREL(%rip), %rax
movups __dAbsMask(%rax), %xmm3
/*
- * ARGUMENT RANGE REDUCTION:
- * X' = |X|
+ ARGUMENT RANGE REDUCTION:
+ X' = |X|
*/
movaps %xmm3, %xmm4
@@ -90,31 +90,31 @@ ENTRY (_ZGVbN2v_sin_sse4)
subpd %xmm1, %xmm0
/*
- * POLYNOMIAL APPROXIMATION:
- * R2 = R*R
+ POLYNOMIAL APPROXIMATION:
+ R2 = R*R
*/
movaps %xmm0, %xmm1
mulpd %xmm0, %xmm1
/* R = R^SignRes : update sign of reduced argument */
xorps %xmm2, %xmm0
- movups __dC7(%rax), %xmm2
+ movups __dC7_sin(%rax), %xmm2
mulpd %xmm1, %xmm2
- addpd __dC6(%rax), %xmm2
+ addpd __dC6_sin(%rax), %xmm2
mulpd %xmm1, %xmm2
- addpd __dC5(%rax), %xmm2
+ addpd __dC5_sin(%rax), %xmm2
mulpd %xmm1, %xmm2
- addpd __dC4(%rax), %xmm2
+ addpd __dC4_sin(%rax), %xmm2
/* Poly = C3+R2*(C4+R2*(C5+R2*(C6+R2*C7))) */
mulpd %xmm1, %xmm2
- addpd __dC3(%rax), %xmm2
+ addpd __dC3_sin(%rax), %xmm2
/* Poly = R2*(C1+R2*(C2+R2*Poly)) */
mulpd %xmm1, %xmm2
- addpd __dC2(%rax), %xmm2
+ addpd __dC2_sin(%rax), %xmm2
mulpd %xmm1, %xmm2
- addpd __dC1(%rax), %xmm2
+ addpd __dC1_sin(%rax), %xmm2
mulpd %xmm2, %xmm1
/* Poly = Poly*R + R */
@@ -122,8 +122,8 @@ ENTRY (_ZGVbN2v_sin_sse4)
addpd %xmm1, %xmm0
/*
- * RECONSTRUCTION:
- * Final sign setting: Res = Poly^SignX
+ RECONSTRUCTION:
+ Final sign setting: Res = Poly^SignX
*/
xorps %xmm3, %xmm0
testl %ecx, %ecx
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S
index e7e60d48c5..6bf8b32b4f 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S
@@ -17,7 +17,7 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
-#include "svml_d_sin_data.h"
+#include "svml_d_trig_data.h"
.text
ENTRY (_ZGVdN4v_sin_avx2)
@@ -40,7 +40,7 @@ ENTRY (_ZGVdN4v_sin_avx2)
cfi_def_cfa_register (%rbp)
andq $-64, %rsp
subq $448, %rsp
- movq __svml_dsin_data@GOTPCREL(%rip), %rax
+ movq __svml_d_trig_data@GOTPCREL(%rip), %rax
vmovdqa %ymm0, %ymm4
vmovupd __dAbsMask(%rax), %ymm2
vmovupd __dInvPI(%rax), %ymm6
@@ -80,17 +80,17 @@ ENTRY (_ZGVdN4v_sin_avx2)
/* R = R^SignRes : update sign of reduced argument */
vxorpd %ymm5, %ymm1, %ymm6
- vmovupd __dC7(%rax), %ymm1
- vfmadd213pd __dC6(%rax), %ymm0, %ymm1
- vfmadd213pd __dC5(%rax), %ymm0, %ymm1
- vfmadd213pd __dC4(%rax), %ymm0, %ymm1
+ vmovupd __dC7_sin(%rax), %ymm1
+ vfmadd213pd __dC6_sin(%rax), %ymm0, %ymm1
+ vfmadd213pd __dC5_sin(%rax), %ymm0, %ymm1
+ vfmadd213pd __dC4_sin(%rax), %ymm0, %ymm1
/* Poly = C3+R2*(C4+R2*(C5+R2*(C6+R2*C7))) */
- vfmadd213pd __dC3(%rax), %ymm0, %ymm1
+ vfmadd213pd __dC3_sin(%rax), %ymm0, %ymm1
/* Poly = R2*(C1+R2*(C2+R2*Poly)) */
- vfmadd213pd __dC2(%rax), %ymm0, %ymm1
- vfmadd213pd __dC1(%rax), %ymm0, %ymm1
+ vfmadd213pd __dC2_sin(%rax), %ymm0, %ymm1
+ vfmadd213pd __dC1_sin(%rax), %ymm0, %ymm1
/* SignX - sign bit of X */
vandnpd %ymm4, %ymm2, %ymm7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S
index c01ad1f8e0..422f6e8b0f 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S
@@ -17,7 +17,7 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
-#include "svml_d_sin_data.h"
+#include "svml_d_trig_data.h"
#include "svml_d_wrapper_impl.h"
.text
@@ -45,18 +45,18 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin
cfi_def_cfa_register (%rbp)
andq $-64, %rsp
subq $1280, %rsp
- movq __svml_dsin_data@GOTPCREL(%rip), %rax
+ movq __svml_d_trig_data@GOTPCREL(%rip), %rax
movq $-1, %rdx
vmovups __dAbsMask(%rax), %zmm6
vmovups __dInvPI(%rax), %zmm1
/*
- * ARGUMENT RANGE REDUCTION:
- * X' = |X|
+ ARGUMENT RANGE REDUCTION:
+ X' = |X|
*/
vpandq %zmm6, %zmm0, %zmm12
vmovups __dPI1_FMA(%rax), %zmm2
- vmovups __dC7(%rax), %zmm7
+ vmovups __dC7_sin(%rax), %zmm7
/* SignX - sign bit of X */
vpandnq %zmm0, %zmm6, %zmm11
@@ -86,31 +86,31 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin
vfnmadd132pd __dPI3_FMA(%rax), %zmm3, %zmm4
/*
- * POLYNOMIAL APPROXIMATION:
- * R2 = R*R
+ POLYNOMIAL APPROXIMATION:
+ R2 = R*R
*/
vmulpd %zmm4, %zmm4, %zmm8
/* R = R^SignRes : update sign of reduced argument */
vpxorq %zmm5, %zmm4, %zmm9
- vfmadd213pd __dC6(%rax), %zmm8, %zmm7
- vfmadd213pd __dC5(%rax), %zmm8, %zmm7
- vfmadd213pd __dC4(%rax), %zmm8, %zmm7
+ vfmadd213pd __dC6_sin(%rax), %zmm8, %zmm7
+ vfmadd213pd __dC5_sin(%rax), %zmm8, %zmm7
+ vfmadd213pd __dC4_sin(%rax), %zmm8, %zmm7
/* Poly = C3+R2*(C4+R2*(C5+R2*(C6+R2*C7))) */
- vfmadd213pd __dC3(%rax), %zmm8, %zmm7
+ vfmadd213pd __dC3_sin(%rax), %zmm8, %zmm7
/* Poly = R2*(C1+R2*(C2+R2*Poly)) */
- vfmadd213pd __dC2(%rax), %zmm8, %zmm7
- vfmadd213pd __dC1(%rax), %zmm8, %zmm7
+ vfmadd213pd __dC2_sin(%rax), %zmm8, %zmm7
+ vfmadd213pd __dC1_sin(%rax), %zmm8, %zmm7
vmulpd %zmm8, %zmm7, %zmm10
/* Poly = Poly*R + R */
vfmadd213pd %zmm9, %zmm9, %zmm10
/*
- * RECONSTRUCTION:
- * Final sign setting: Res = Poly^SignX
+ RECONSTRUCTION:
+ Final sign setting: Res = Poly^SignX
*/
vpxorq %zmm11, %zmm10, %zmm1
testl %ecx, %ecx
@@ -260,13 +260,13 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin
cfi_def_cfa_register (%rbp)
andq $-64, %rsp
subq $1280, %rsp
- movq __svml_dsin_data@GOTPCREL(%rip), %rax
+ movq __svml_d_trig_data@GOTPCREL(%rip), %rax
vpbroadcastq .L_2il0floatpacket.14(%rip), %zmm14
vmovups __dAbsMask(%rax), %zmm7
vmovups __dInvPI(%rax), %zmm2
vmovups __dRShifter(%rax), %zmm1
vmovups __dPI1_FMA(%rax), %zmm3
- vmovups __dC7(%rax), %zmm8
+ vmovups __dC7_sin(%rax), %zmm8
/*
ARGUMENT RANGE REDUCTION:
@@ -305,16 +305,16 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin
/* R = R^SignRes : update sign of reduced argument */
vxorpd %zmm6, %zmm5, %zmm10
- vfmadd213pd __dC6(%rax), %zmm9, %zmm8
- vfmadd213pd __dC5(%rax), %zmm9, %zmm8
- vfmadd213pd __dC4(%rax), %zmm9, %zmm8
+ vfmadd213pd __dC6_sin(%rax), %zmm9, %zmm8
+ vfmadd213pd __dC5_sin(%rax), %zmm9, %zmm8
+ vfmadd213pd __dC4_sin(%rax), %zmm9, %zmm8
/* Poly = C3+R2*(C4+R2*(C5+R2*(C6+R2*C7))) */
- vfmadd213pd __dC3(%rax), %zmm9, %zmm8
+ vfmadd213pd __dC3_sin(%rax), %zmm9, %zmm8
/* Poly = R2*(C1+R2*(C2+R2*Poly)) */
- vfmadd213pd __dC2(%rax), %zmm9, %zmm8
- vfmadd213pd __dC1(%rax), %zmm9, %zmm8
+ vfmadd213pd __dC2_sin(%rax), %zmm9, %zmm8
+ vfmadd213pd __dC1_sin(%rax), %zmm9, %zmm8
vmulpd %zmm9, %zmm8, %zmm11
/* Poly = Poly*R + R */
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S
index 0b37c7cdf0..b504d1d732 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S
@@ -17,7 +17,7 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
-#include "svml_d_sincos_data.h"
+#include "svml_d_trig_data.h"
.text
ENTRY (_ZGVbN2vvv_sincos_sse4)
@@ -43,7 +43,7 @@ ENTRY (_ZGVbN2vvv_sincos_sse4)
cfi_def_cfa_register (%rbp)
andq $-64, %rsp
subq $320, %rsp
- movq __svml_dsincos_data@GOTPCREL(%rip), %rax
+ movq __svml_d_trig_data@GOTPCREL(%rip), %rax
movups %xmm11, 160(%rsp)
movups %xmm12, 144(%rsp)
movups __dSignMask(%rax), %xmm11
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S
index ec1ccc6357..dca5604111 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S
@@ -17,7 +17,7 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
-#include "svml_d_sincos_data.h"
+#include "svml_d_trig_data.h"
.text
ENTRY (_ZGVdN4vvv_sincos_avx2)
@@ -43,7 +43,7 @@ ENTRY (_ZGVdN4vvv_sincos_avx2)
cfi_def_cfa_register (%rbp)
andq $-64, %rsp
subq $448, %rsp
- movq __svml_dsincos_data@GOTPCREL(%rip), %rax
+ movq __svml_d_trig_data@GOTPCREL(%rip), %rax
vmovups %ymm14, 288(%rsp)
vmovups %ymm8, 352(%rsp)
vmovupd __dSignMask(%rax), %ymm6
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S
index fcbf3935f1..e8388325f7 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S
@@ -17,7 +17,7 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
-#include "svml_d_sincos_data.h"
+#include "svml_d_trig_data.h"
#include "svml_d_wrapper_impl.h"
/*
@@ -47,7 +47,7 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos
cfi_def_cfa_register (%rbp)
andq $-64, %rsp
subq $1344, %rsp
- movq __svml_dsincos_data@GOTPCREL(%rip), %rax
+ movq __svml_d_trig_data@GOTPCREL(%rip), %rax
vmovaps %zmm0, %zmm4
movq $-1, %rdx
vmovups __dSignMask(%rax), %zmm12
@@ -317,7 +317,7 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos
cfi_def_cfa_register (%rbp)
andq $-64, %rsp
subq $1344, %rsp
- movq __svml_dsincos_data@GOTPCREL(%rip), %rax
+ movq __svml_d_trig_data@GOTPCREL(%rip), %rax
vmovaps %zmm0, %zmm8
vmovups __dSignMask(%rax), %zmm4
vmovups __dInvPI(%rax), %zmm9
diff --git a/sysdeps/x86_64/fpu/svml_d_cos2_core.S b/sysdeps/x86_64/fpu/svml_d_cos2_core.S
index a1c5bee935..a26beca4a1 100644
--- a/sysdeps/x86_64/fpu/svml_d_cos2_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_cos2_core.S
@@ -17,7 +17,6 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
-#include "svml_d_cos_data.h"
#include "svml_d_wrapper_impl.h"
.text
diff --git a/sysdeps/x86_64/fpu/svml_d_cos4_core.S b/sysdeps/x86_64/fpu/svml_d_cos4_core.S
index a505b44cc2..35996b7318 100644
--- a/sysdeps/x86_64/fpu/svml_d_cos4_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_cos4_core.S
@@ -17,7 +17,6 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
-#include "svml_d_cos_data.h"
#include "svml_d_wrapper_impl.h"
.text
diff --git a/sysdeps/x86_64/fpu/svml_d_cos8_core.S b/sysdeps/x86_64/fpu/svml_d_cos8_core.S
index c7507dbef0..1ba10e8c9b 100644
--- a/sysdeps/x86_64/fpu/svml_d_cos8_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_cos8_core.S
@@ -17,7 +17,6 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
-#include "svml_d_cos_data.h"
#include "svml_d_wrapper_impl.h"
.text
diff --git a/sysdeps/x86_64/fpu/svml_d_cos_data.h b/sysdeps/x86_64/fpu/svml_d_cos_data.h
deleted file mode 100644
index 4d28e6eda5..0000000000
--- a/sysdeps/x86_64/fpu/svml_d_cos_data.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/* Offsets for data table for vectorized cos.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#ifndef D_COS_DATA_H
-#define D_COS_DATA_H
-
-#define __dAbsMask 0
-#define __dRangeVal 64
-#define __dHalfPI 128
-#define __dInvPI 192
-#define __dRShifter 256
-#define __dOneHalf 320
-#define __dPI1 384
-#define __dPI2 448
-#define __dPI3 512
-#define __dPI4 576
-#define __dPI1_FMA 640
-#define __dPI2_FMA 704
-#define __dPI3_FMA 768
-#define __dC1 832
-#define __dC2 896
-#define __dC3 960
-#define __dC4 1024
-#define __dC5 1088
-#define __dC6 1152
-#define __dC7 1216
-#define __dAbsMask_la 1280
-#define __dInvPI_la 1344
-#define __dRShifter_la 1408
-#define __dRShifterm5_la 1472
-#define __dRXmax_la 1536
-
-#endif
diff --git a/sysdeps/x86_64/fpu/svml_d_sin_data.S b/sysdeps/x86_64/fpu/svml_d_sin_data.S
deleted file mode 100644
index e5e1ff721c..0000000000
--- a/sysdeps/x86_64/fpu/svml_d_sin_data.S
+++ /dev/null
@@ -1,82 +0,0 @@
-/* Data for vectorized sin.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include "svml_d_sin_data.h"
-
- .section .rodata, "a"
- .align 64
-
-/* Data table for vector implementations of function sin.
- The table may contain polynomial, reduction, lookup coefficients and other macro_names
- obtained through different methods of research and experimental work. */
-
- .globl __svml_dsin_data
-__svml_dsin_data:
-
-/* General purpose constants:
- absolute value mask */
-double_vector __dAbsMask 0x7fffffffffffffff
-
-/* working range threshold */
-double_vector __dRangeVal 0x4170000000000000
-
-/* 1/PI */
-double_vector __dInvPI 0x3fd45f306dc9c883
-
-/* right-shifter constant */
-double_vector __dRShifter 0x4338000000000000
-
-/* 0.0 */
-double_vector __dZero 0x0000000000000000
-
-/* -0.0 */
-double_vector __lNZero 0x8000000000000000
-
-/* Range reduction PI-based constants:
- PI high part */
-double_vector __dPI1 0x400921fb40000000
-
-/* PI mid part 1 */
-double_vector __dPI2 0x3e84442d00000000
-
-/* PI mid part 2 */
-double_vector __dPI3 0x3d08469880000000
-
-/* PI low part */
-double_vector __dPI4 0x3b88cc51701b839a
-
-/* Range reduction PI-based constants if FMA available:
- PI high part (FMA available) */
-double_vector __dPI1_FMA 0x400921fb54442d18
-
-/* PI mid part (FMA available) */
-double_vector __dPI2_FMA 0x3ca1a62633145c06
-
-/* PI low part (FMA available) */
-double_vector __dPI3_FMA 0x395c1cd129024e09
-
-/* Polynomial coefficients (relative error 2^(-52.115)): */
-double_vector __dC1 0xbfc55555555554a8
-double_vector __dC2 0x3f8111111110a573
-double_vector __dC3 0xbf2a01a019a659dd
-double_vector __dC4 0x3ec71de3806add1a
-double_vector __dC5 0xbe5ae6355aaa4a53
-double_vector __dC6 0x3de60e6bee01d83e
-double_vector __dC7 0xbd69f1517e9f65f0
- .type __svml_dsin_data,@object
- .size __svml_dsin_data,.-__svml_dsin_data
diff --git a/sysdeps/x86_64/fpu/svml_d_sin_data.h b/sysdeps/x86_64/fpu/svml_d_sin_data.h
deleted file mode 100644
index 76ab508c15..0000000000
--- a/sysdeps/x86_64/fpu/svml_d_sin_data.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/* Offsets for data table for vectorized sin.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#ifndef D_SIN_DATA_H
-#define D_SIN_DATA_H
-
-/* Offsets for data table */
-#define __dAbsMask 0
-#define __dRangeVal 64
-#define __dInvPI 128
-#define __dRShifter 192
-#define __dZero 256
-#define __lNZero 320
-#define __dPI1 384
-#define __dPI2 448
-#define __dPI3 512
-#define __dPI4 576
-#define __dPI1_FMA 640
-#define __dPI2_FMA 704
-#define __dPI3_FMA 768
-#define __dC1 832
-#define __dC2 896
-#define __dC3 960
-#define __dC4 1024
-#define __dC5 1088
-#define __dC6 1152
-#define __dC7 1216
-
-.macro double_vector offset value
-.if .-__svml_dsin_data != \offset
-.err
-.endif
-.rept 8
-.quad \value
-.endr
-.endm
-
-#endif
diff --git a/sysdeps/x86_64/fpu/svml_d_sincos_data.S b/sysdeps/x86_64/fpu/svml_d_sincos_data.S
deleted file mode 100644
index 6749ba62fd..0000000000
--- a/sysdeps/x86_64/fpu/svml_d_sincos_data.S
+++ /dev/null
@@ -1,111 +0,0 @@
-/* Data for function sincos.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include "svml_d_sincos_data.h"
-
- .section .rodata, "a"
- .align 64
-
-/* Data table for vector implementations of function sincos.
- The table may contain polynomial, reduction, lookup coefficients and
- other coefficients obtained through different methods of research and
- experimental work. */
-
- .globl __svml_dsincos_data
-__svml_dsincos_data:
-
-/* General purpose constants:
- dSignMask */
-double_vector __dSignMask 0x8000000000000000
-
-/* dAbsMask */
-double_vector __dAbsMask 0x7fffffffffffffff
-
-/* lRangeVal */
-double_vector __dRangeVal 0x4160000000000000
-
-/* HalfPI */
-double_vector __dHalfPI 0x3ff921fb54442d18
-
-/* InvPI */
-double_vector __dInvPI 0x3fd45f306dc9c883
-
-/* dRShifter */
-double_vector __dRShifter 0x4338000000000000
-
-/* dOneHalf */
-double_vector __dOneHalf 0x3fe0000000000000
-
-/* Range reduction PI-based constants:
- PI1 */
-double_vector __dPI1 0x400921fb40000000
-
-/* PI2 */
-double_vector __dPI2 0x3e84442d00000000
-
-/* PI3 */
-double_vector __dPI3 0x3d08469880000000
-
-/* PI4 */
-double_vector __dPI4 0x3b88cc51701b839a
-
-/* Range reduction PI-based constants if FMA available:
- PI1_FMA */
-double_vector __dPI1_FMA 0x400921fb54442d18
-
-/* PI2_FMA */
-double_vector __dPI2_FMA 0x3ca1a62633145c06
-
-/* PI3_FMA */
-double_vector __dPI3_FMA 0x395c1cd129024e09
-
-/* HalfPI1 */
-double_vector __dHalfPI1 0x3ff921fc00000000
-
-/* HalfPI2 */
-double_vector __dHalfPI2 0xbea5777a00000000
-
-/* HalfPI3 */
-double_vector __dHalfPI3 0xbd473dcc00000000
-
-/* HalfPI4 */
-double_vector __dHalfPI4 0x3bf898cc51701b84
-
-/* Polynomial coefficients (relative error 2^(-52.115)):
- C1 */
-double_vector __dC1 0xbfc55555555554a7
-
-/* C2 */
-double_vector __dC2 0x3f8111111110a4a8
-
-/* C3 */
-double_vector __dC3 0xbf2a01a019a5b86d
-
-/* C4 */
-double_vector __dC4 0x3ec71de38030fea0
-
-/* C5 */
-double_vector __dC5 0xbe5ae63546002231
-
-/* C6 */
-double_vector __dC6 0x3de60e6857a2f220
-
-/* C7 */
-double_vector __dC7 0xbd69f0d60811aac8
- .type __svml_dsincos_data,@object
- .size __svml_dsincos_data,.-__svml_dsincos_data
diff --git a/sysdeps/x86_64/fpu/svml_d_sincos_data.h b/sysdeps/x86_64/fpu/svml_d_sincos_data.h
deleted file mode 100644
index cc316dc329..0000000000
--- a/sysdeps/x86_64/fpu/svml_d_sincos_data.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/* Offsets for data table for function sincos.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#ifndef D_SINCOS_DATA_H
-#define D_SINCOS_DATA_H
-
-#define __dSignMask 0
-#define __dAbsMask 64
-#define __dRangeVal 128
-#define __dHalfPI 192
-#define __dInvPI 256
-#define __dRShifter 320
-#define __dOneHalf 384
-#define __dPI1 448
-#define __dPI2 512
-#define __dPI3 576
-#define __dPI4 640
-#define __dPI1_FMA 704
-#define __dPI2_FMA 768
-#define __dPI3_FMA 832
-#define __dHalfPI1 896
-#define __dHalfPI2 960
-#define __dHalfPI3 1024
-#define __dHalfPI4 1088
-#define __dC1 1152
-#define __dC2 1216
-#define __dC3 1280
-#define __dC4 1344
-#define __dC5 1408
-#define __dC6 1472
-#define __dC7 1536
-
-.macro double_vector offset value
-.if .-__svml_dsincos_data != \offset
-.err
-.endif
-.rept 8
-.quad \value
-.endr
-.endm
-
-#endif
diff --git a/sysdeps/x86_64/fpu/svml_d_cos_data.S b/sysdeps/x86_64/fpu/svml_d_trig_data.S
index c9bfd63840..d3b30598cc 100644
--- a/sysdeps/x86_64/fpu/svml_d_cos_data.S
+++ b/sysdeps/x86_64/fpu/svml_d_trig_data.S
@@ -1,4 +1,4 @@
-/* Data for vectorized cos.
+/* Data for vectorized sin, cos, sincos.
Copyright (C) 2014-2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -16,27 +16,18 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include "svml_d_cos_data.h"
-
-.macro double_vector offset value
-.if .-__svml_dcos_data != \offset
-.err
-.endif
-.rept 8
-.quad \value
-.endr
-.endm
+#include "svml_d_trig_data.h"
.section .rodata, "a"
.align 64
-/* Data table for vector implementations of function cos.
+/* Data table for vector implementations.
The table may contain polynomial, reduction, lookup
coefficients and other constants obtained through different
methods of research and experimental work.
*/
- .globl __svml_dcos_data
-__svml_dcos_data:
+ .globl __svml_d_trig_data
+__svml_d_trig_data:
/* General purpose constants:
absolute value mask
@@ -46,6 +37,9 @@ double_vector __dAbsMask 0x7fffffffffffffff
/* working range threshold */
double_vector __dRangeVal 0x4160000000000000
+/* working range threshold */
+double_vector __dRangeVal_sin 0x4170000000000000
+
/* PI/2 */
double_vector __dHalfPI 0x3ff921fb54442d18
@@ -55,6 +49,12 @@ double_vector __dInvPI 0x3fd45f306dc9c883
/* right-shifter constant */
double_vector __dRShifter 0x4338000000000000
+/* 0.0 */
+double_vector __dZero 0x0000000000000000
+
+/* -0.0 */
+double_vector __lNZero 0x8000000000000000
+
/* 0.5 */
double_vector __dOneHalf 0x3fe0000000000000
@@ -83,6 +83,18 @@ double_vector __dPI2_FMA 0x3ca1a62633145c06
/* PI low part (FMA available) */
double_vector __dPI3_FMA 0x395c1cd129024e09
+/* HalfPI1 */
+double_vector __dHalfPI1 0x3ff921fc00000000
+
+/* HalfPI2 */
+double_vector __dHalfPI2 0xbea5777a00000000
+
+/* HalfPI3 */
+double_vector __dHalfPI3 0xbd473dcc00000000
+
+/* HalfPI4 */
+double_vector __dHalfPI4 0x3bf898cc51701b84
+
/* Polynomial coefficients (relative error 2^(-52.115)): */
double_vector __dC1 0xbfc55555555554a7
double_vector __dC2 0x3f8111111110a4a8
@@ -92,15 +104,19 @@ double_vector __dC5 0xbe5ae63546002231
double_vector __dC6 0x3de60e6857a2f220
double_vector __dC7 0xbd69f0d60811aac8
+/* Polynomial coefficients (relative error 2^(-52.115)): */
+double_vector __dC1_sin 0xbfc55555555554a8
+double_vector __dC2_sin 0x3f8111111110a573
+double_vector __dC3_sin 0xbf2a01a019a659dd
+double_vector __dC4_sin 0x3ec71de3806add1a
+double_vector __dC5_sin 0xbe5ae6355aaa4a53
+double_vector __dC6_sin 0x3de60e6bee01d83e
+double_vector __dC7_sin 0xbd69f1517e9f65f0
+
/*
Additional constants:
absolute value mask
*/
-double_vector __dAbsMask_la 0x7fffffffffffffff
-
-/* 1/PI */
-double_vector __dInvPI_la 0x3fd45f306dc9c883
-
/* right-shifer for low accuracy version */
double_vector __dRShifter_la 0x4330000000000000
@@ -110,5 +126,5 @@ double_vector __dRShifterm5_la 0x432fffffffffffff
/* right-shifer with low mask for low accuracy version */
double_vector __dRXmax_la 0x43300000007ffffe
- .type __svml_dcos_data,@object
- .size __svml_dcos_data,.-__svml_dcos_data
+ .type __svml_d_trig_data,@object
+ .size __svml_d_trig_data,.-__svml_d_trig_data
diff --git a/sysdeps/x86_64/fpu/svml_d_trig_data.h b/sysdeps/x86_64/fpu/svml_d_trig_data.h
new file mode 100644
index 0000000000..1395337c7e
--- /dev/null
+++ b/sysdeps/x86_64/fpu/svml_d_trig_data.h
@@ -0,0 +1,72 @@
+/* Offsets for data table for vectorized sin, cos, sincos.
+ Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef D_TRIG_DATA_H
+#define D_TRIG_DATA_H
+
+#define __dAbsMask 0
+#define __dRangeVal 64
+#define __dRangeVal_sin 64*2
+#define __dHalfPI 64*3
+#define __dInvPI 64*4
+#define __dRShifter 64*5
+#define __dZero 64*6
+#define __lNZero 64*7
+#define __dOneHalf 64*8
+#define __dPI1 64*9
+#define __dPI2 64*10
+#define __dPI3 64*11
+#define __dPI4 64*12
+#define __dPI1_FMA 64*13
+#define __dPI2_FMA 64*14
+#define __dPI3_FMA 64*15
+#define __dHalfPI1 64*16
+#define __dHalfPI2 64*17
+#define __dHalfPI3 64*18
+#define __dHalfPI4 64*19
+#define __dC1 64*20
+#define __dC2 64*21
+#define __dC3 64*22
+#define __dC4 64*23
+#define __dC5 64*24
+#define __dC6 64*25
+#define __dC7 64*26
+#define __dC1_sin 64*27
+#define __dC2_sin 64*28
+#define __dC3_sin 64*29
+#define __dC4_sin 64*30
+#define __dC5_sin 64*31
+#define __dC6_sin 64*32
+#define __dC7_sin 64*33
+#define __dRShifter_la 64*34
+#define __dRShifterm5_la 64*35
+#define __dRXmax_la 64*36
+#define __dAbsMask_la __dAbsMask
+#define __dInvPI_la __dInvPI
+#define __dSignMask __lNZero
+
+.macro double_vector offset value
+.if .-__svml_d_trig_data != \offset
+.err
+.endif
+.rept 8
+.quad \value
+.endr
+.endm
+
+#endif