summaryrefslogtreecommitdiff
path: root/sysdeps/ieee754
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/ieee754')
-rw-r--r--sysdeps/ieee754/bits/nan.h10
-rw-r--r--sysdeps/ieee754/dbl-64/branred.c6
-rw-r--r--sysdeps/ieee754/dbl-64/dosincos.c6
-rw-r--r--sysdeps/ieee754/dbl-64/e_asin.c9
-rw-r--r--sysdeps/ieee754/dbl-64/e_atan2.c813
-rw-r--r--sysdeps/ieee754/dbl-64/e_exp.c6
-rw-r--r--sysdeps/ieee754/dbl-64/e_j0.c6
-rw-r--r--sysdeps/ieee754/dbl-64/e_j1.c6
-rw-r--r--sysdeps/ieee754/dbl-64/e_log.c3
-rw-r--r--sysdeps/ieee754/dbl-64/e_pow.c18
-rw-r--r--sysdeps/ieee754/dbl-64/e_remainder.c6
-rw-r--r--sysdeps/ieee754/dbl-64/mpa-arch.h47
-rw-r--r--sysdeps/ieee754/dbl-64/mpa.c517
-rw-r--r--sysdeps/ieee754/dbl-64/mpa.h28
-rw-r--r--sysdeps/ieee754/dbl-64/mpatan.c107
-rw-r--r--sysdeps/ieee754/dbl-64/mpatan2.c45
-rw-r--r--sysdeps/ieee754/dbl-64/mpexp.c57
-rw-r--r--sysdeps/ieee754/dbl-64/mplog.c44
-rw-r--r--sysdeps/ieee754/dbl-64/mpsqrt.c78
-rw-r--r--sysdeps/ieee754/dbl-64/mptan.c28
-rw-r--r--sysdeps/ieee754/dbl-64/s_atan.c9
-rw-r--r--sysdeps/ieee754/dbl-64/s_sin.c10
-rw-r--r--sysdeps/ieee754/dbl-64/s_tan.c3
-rw-r--r--sysdeps/ieee754/dbl-64/sincos32.c28
-rw-r--r--sysdeps/ieee754/dbl-64/slowexp.c64
-rw-r--r--sysdeps/ieee754/dbl-64/slowpow.c96
-rw-r--r--sysdeps/ieee754/dbl-64/x2y2m1.c2
-rw-r--r--sysdeps/ieee754/ldbl-128/e_j0l.c75
-rw-r--r--sysdeps/ieee754/ldbl-128/e_j1l.c76
-rw-r--r--sysdeps/ieee754/ldbl-128ibm/e_acoshl.c2
-rw-r--r--sysdeps/ieee754/ldbl-128ibm/e_expl.c4
-rw-r--r--sysdeps/ieee754/ldbl-128ibm/e_logl.c13
-rw-r--r--sysdeps/ieee754/ldbl-128ibm/math_ldbl.h10
-rw-r--r--sysdeps/ieee754/ldbl-128ibm/w_expl.c26
-rw-r--r--sysdeps/ieee754/ldbl-96/e_j1l.c2
35 files changed, 1367 insertions, 893 deletions
diff --git a/sysdeps/ieee754/bits/nan.h b/sysdeps/ieee754/bits/nan.h
index d3ab38ba72..41f47ba097 100644
--- a/sysdeps/ieee754/bits/nan.h
+++ b/sysdeps/ieee754/bits/nan.h
@@ -39,14 +39,14 @@
# include <endian.h>
# if __BYTE_ORDER == __BIG_ENDIAN
-# define __nan_bytes { 0x7f, 0xc0, 0, 0 }
+# define __qnan_bytes { 0x7f, 0xc0, 0, 0 }
# endif
# if __BYTE_ORDER == __LITTLE_ENDIAN
-# define __nan_bytes { 0, 0, 0xc0, 0x7f }
+# define __qnan_bytes { 0, 0, 0xc0, 0x7f }
# endif
-static union { unsigned char __c[4]; float __d; } __nan_union
- __attribute_used__ = { __nan_bytes };
-# define NAN (__nan_union.__d)
+static union { unsigned char __c[4]; float __d; } __qnan_union
+ __attribute__ ((__unused__)) = { __qnan_bytes };
+# define NAN (__qnan_union.__d)
#endif /* GCC. */
diff --git a/sysdeps/ieee754/dbl-64/branred.c b/sysdeps/ieee754/dbl-64/branred.c
index ecceeca634..524d091dc3 100644
--- a/sysdeps/ieee754/dbl-64/branred.c
+++ b/sysdeps/ieee754/dbl-64/branred.c
@@ -53,13 +53,7 @@ SECTION
__branred(double x, double *a, double *aa)
{
int i,k;
-#if 0
- int n;
-#endif
mynumber u,gor;
-#if 0
- mynumber v;
-#endif
double r[6],s,t,sum,b,bb,sum1,sum2,b1,bb1,b2,bb2,x1,x2,t1,t2;
x*=tm600.x;
diff --git a/sysdeps/ieee754/dbl-64/dosincos.c b/sysdeps/ieee754/dbl-64/dosincos.c
index bbef186ad1..00726285a9 100644
--- a/sysdeps/ieee754/dbl-64/dosincos.c
+++ b/sysdeps/ieee754/dbl-64/dosincos.c
@@ -63,9 +63,6 @@ __dubsin(double x, double dx, double v[]) {
#ifndef DLA_FMS
double p,hx,tx,hy,ty,q;
#endif
-#if 0
- double xx,y,yy,z,zz;
-#endif
mynumber u;
int4 k;
@@ -119,9 +116,6 @@ __dubcos(double x, double dx, double v[]) {
#ifndef DLA_FMS
double p,hx,tx,hy,ty,q;
#endif
-#if 0
- double xx,y,yy,z,zz;
-#endif
mynumber u;
int4 k;
u.x=x+big.x;
diff --git a/sysdeps/ieee754/dbl-64/e_asin.c b/sysdeps/ieee754/dbl-64/e_asin.c
index 5585ad1d0d..27cd2a10d3 100644
--- a/sysdeps/ieee754/dbl-64/e_asin.c
+++ b/sysdeps/ieee754/dbl-64/e_asin.c
@@ -62,9 +62,6 @@ __ieee754_asin(double x){
double x1,x2,xx,s1,s2,res1,p,t,res,r,cor,cc,y,c,z,w[2];
mynumber u,v;
int4 k,m,n;
-#if 0
- int4 nn;
-#endif
u.x = x;
m = u.i[HIGH_HALF];
@@ -344,14 +341,8 @@ SECTION
__ieee754_acos(double x)
{
double x1,x2,xx,s1,s2,res1,p,t,res,r,cor,cc,y,c,z,w[2],eps;
-#if 0
- double fc;
-#endif
mynumber u,v;
int4 k,m,n;
-#if 0
- int4 nn;
-#endif
u.x = x;
m = u.i[HIGH_HALF];
k = 0x7fffffff&m;
diff --git a/sysdeps/ieee754/dbl-64/e_atan2.c b/sysdeps/ieee754/dbl-64/e_atan2.c
index ee3215ed3f..bfe0b3b632 100644
--- a/sysdeps/ieee754/dbl-64/e_atan2.c
+++ b/sysdeps/ieee754/dbl-64/e_atan2.c
@@ -53,105 +53,171 @@
/* Assumption: Machine arithmetic operations are performed in */
/* round to nearest mode of IEEE 754 standard. */
/************************************************************************/
-static double atan2Mp(double ,double ,const int[]);
+static double atan2Mp (double, double, const int[]);
/* Fix the sign and return after stage 1 or stage 2 */
-static double signArctan2(double y,double z)
+static double
+signArctan2 (double y, double z)
{
- return __copysign(z, y);
+ return __copysign (z, y);
}
-static double normalized(double ,double,double ,double);
-void __mpatan2(mp_no *,mp_no *,mp_no *,int);
+
+static double normalized (double, double, double, double);
+void __mpatan2 (mp_no *, mp_no *, mp_no *, int);
double
SECTION
-__ieee754_atan2(double y,double x) {
-
- int i,de,ux,dx,uy,dy;
-#if 0
- int p;
-#endif
- static const int pr[MM]={6,8,10,20,32};
- double ax,ay,u,du,u9,ua,v,vv,dv,t1,t2,t3,t7,t8,
- z,zz,cor,s1,ss1,s2,ss2;
+__ieee754_atan2 (double y, double x)
+{
+ int i, de, ux, dx, uy, dy;
+ static const int pr[MM] = { 6, 8, 10, 20, 32 };
+ double ax, ay, u, du, u9, ua, v, vv, dv, t1, t2, t3, t7, t8,
+ z, zz, cor, s1, ss1, s2, ss2;
#ifndef DLA_FMS
- double t4,t5,t6;
-#endif
-#if 0
- double z1,z2;
+ double t4, t5, t6;
#endif
number num;
-#if 0
- mp_no mperr,mpt1,mpx,mpy,mpz,mpz1,mpz2;
-#endif
- static const int ep= 59768832, /* 57*16**5 */
- em=-59768832; /* -57*16**5 */
+ static const int ep = 59768832, /* 57*16**5 */
+ em = -59768832; /* -57*16**5 */
/* x=NaN or y=NaN */
- num.d = x; ux = num.i[HIGH_HALF]; dx = num.i[LOW_HALF];
- if ((ux&0x7ff00000) ==0x7ff00000) {
- if (((ux&0x000fffff)|dx)!=0x00000000) return x+x; }
- num.d = y; uy = num.i[HIGH_HALF]; dy = num.i[LOW_HALF];
- if ((uy&0x7ff00000) ==0x7ff00000) {
- if (((uy&0x000fffff)|dy)!=0x00000000) return y+y; }
+ num.d = x;
+ ux = num.i[HIGH_HALF];
+ dx = num.i[LOW_HALF];
+ if ((ux & 0x7ff00000) == 0x7ff00000)
+ {
+ if (((ux & 0x000fffff) | dx) != 0x00000000)
+ return x + x;
+ }
+ num.d = y;
+ uy = num.i[HIGH_HALF];
+ dy = num.i[LOW_HALF];
+ if ((uy & 0x7ff00000) == 0x7ff00000)
+ {
+ if (((uy & 0x000fffff) | dy) != 0x00000000)
+ return y + y;
+ }
/* y=+-0 */
- if (uy==0x00000000) {
- if (dy==0x00000000) {
- if ((ux&0x80000000)==0x00000000) return ZERO;
- else return opi.d; } }
- else if (uy==0x80000000) {
- if (dy==0x00000000) {
- if ((ux&0x80000000)==0x00000000) return MZERO;
- else return mopi.d;} }
+ if (uy == 0x00000000)
+ {
+ if (dy == 0x00000000)
+ {
+ if ((ux & 0x80000000) == 0x00000000)
+ return ZERO;
+ else
+ return opi.d;
+ }
+ }
+ else if (uy == 0x80000000)
+ {
+ if (dy == 0x00000000)
+ {
+ if ((ux & 0x80000000) == 0x00000000)
+ return MZERO;
+ else
+ return mopi.d;
+ }
+ }
/* x=+-0 */
- if (x==ZERO) {
- if ((uy&0x80000000)==0x00000000) return hpi.d;
- else return mhpi.d; }
+ if (x == ZERO)
+ {
+ if ((uy & 0x80000000) == 0x00000000)
+ return hpi.d;
+ else
+ return mhpi.d;
+ }
/* x=+-INF */
- if (ux==0x7ff00000) {
- if (dx==0x00000000) {
- if (uy==0x7ff00000) {
- if (dy==0x00000000) return qpi.d; }
- else if (uy==0xfff00000) {
- if (dy==0x00000000) return mqpi.d; }
- else {
- if ((uy&0x80000000)==0x00000000) return ZERO;
- else return MZERO; }
+ if (ux == 0x7ff00000)
+ {
+ if (dx == 0x00000000)
+ {
+ if (uy == 0x7ff00000)
+ {
+ if (dy == 0x00000000)
+ return qpi.d;
+ }
+ else if (uy == 0xfff00000)
+ {
+ if (dy == 0x00000000)
+ return mqpi.d;
+ }
+ else
+ {
+ if ((uy & 0x80000000) == 0x00000000)
+ return ZERO;
+ else
+ return MZERO;
+ }
+ }
}
- }
- else if (ux==0xfff00000) {
- if (dx==0x00000000) {
- if (uy==0x7ff00000) {
- if (dy==0x00000000) return tqpi.d; }
- else if (uy==0xfff00000) {
- if (dy==0x00000000) return mtqpi.d; }
- else {
- if ((uy&0x80000000)==0x00000000) return opi.d;
- else return mopi.d; }
+ else if (ux == 0xfff00000)
+ {
+ if (dx == 0x00000000)
+ {
+ if (uy == 0x7ff00000)
+ {
+ if (dy == 0x00000000)
+ return tqpi.d;
+ }
+ else if (uy == 0xfff00000)
+ {
+ if (dy == 0x00000000)
+ return mtqpi.d;
+ }
+ else
+ {
+ if ((uy & 0x80000000) == 0x00000000)
+ return opi.d;
+ else
+ return mopi.d;
+ }
+ }
}
- }
/* y=+-INF */
- if (uy==0x7ff00000) {
- if (dy==0x00000000) return hpi.d; }
- else if (uy==0xfff00000) {
- if (dy==0x00000000) return mhpi.d; }
+ if (uy == 0x7ff00000)
+ {
+ if (dy == 0x00000000)
+ return hpi.d;
+ }
+ else if (uy == 0xfff00000)
+ {
+ if (dy == 0x00000000)
+ return mhpi.d;
+ }
/* either x/y or y/x is very close to zero */
- ax = (x<ZERO) ? -x : x; ay = (y<ZERO) ? -y : y;
+ ax = (x < ZERO) ? -x : x;
+ ay = (y < ZERO) ? -y : y;
de = (uy & 0x7ff00000) - (ux & 0x7ff00000);
- if (de>=ep) { return ((y>ZERO) ? hpi.d : mhpi.d); }
- else if (de<=em) {
- if (x>ZERO) {
- if ((z=ay/ax)<TWOM1022) return normalized(ax,ay,y,z);
- else return signArctan2(y,z); }
- else { return ((y>ZERO) ? opi.d : mopi.d); } }
+ if (de >= ep)
+ {
+ return ((y > ZERO) ? hpi.d : mhpi.d);
+ }
+ else if (de <= em)
+ {
+ if (x > ZERO)
+ {
+ if ((z = ay / ax) < TWOM1022)
+ return normalized (ax, ay, y, z);
+ else
+ return signArctan2 (y, z);
+ }
+ else
+ {
+ return ((y > ZERO) ? opi.d : mopi.d);
+ }
+ }
/* if either x or y is extremely close to zero, scale abs(x), abs(y). */
- if (ax<twom500.d || ay<twom500.d) { ax*=two500.d; ay*=two500.d; }
+ if (ax < twom500.d || ay < twom500.d)
+ {
+ ax *= two500.d;
+ ay *= two500.d;
+ }
/* Likewise for large x and y. */
if (ax > two500.d || ay > two500.d)
@@ -161,268 +227,377 @@ __ieee754_atan2(double y,double x) {
}
/* x,y which are neither special nor extreme */
- if (ay<ax) {
- u=ay/ax;
- EMULV(ax,u,v,vv,t1,t2,t3,t4,t5)
- du=((ay-v)-vv)/ax; }
- else {
- u=ax/ay;
- EMULV(ay,u,v,vv,t1,t2,t3,t4,t5)
- du=((ax-v)-vv)/ay; }
-
- if (x>ZERO) {
-
- /* (i) x>0, abs(y)< abs(x): atan(ay/ax) */
- if (ay<ax) {
- if (u<inv16.d) {
- v=u*u; zz=du+u*v*(d3.d+v*(d5.d+v*(d7.d+v*(d9.d+v*(d11.d+v*d13.d)))));
- if ((z=u+(zz-u1.d*u)) == u+(zz+u1.d*u)) return signArctan2(y,z);
-
- MUL2(u,du,u,du,v,vv,t1,t2,t3,t4,t5,t6,t7,t8)
- s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d))));
- ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(f5.d,ff5.d,s1,ss1,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(f3.d,ff3.d,s1,ss1,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- MUL2(u,du,s1,ss1,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(u,du,s2,ss2,s1,ss1,t1,t2)
- if ((z=s1+(ss1-u5.d*s1)) == s1+(ss1+u5.d*s1)) return signArctan2(y,z);
- return atan2Mp(x,y,pr);
- }
- else {
- i=(TWO52+TWO8*u)-TWO52; i-=16;
- t3=u-cij[i][0].d;
- EADD(t3,du,v,dv)
- t1=cij[i][1].d; t2=cij[i][2].d;
- zz=v*t2+(dv*t2+v*v*(cij[i][3].d+v*(cij[i][4].d+
- v*(cij[i][5].d+v* cij[i][6].d))));
- if (i<112) {
- if (i<48) u9=u91.d; /* u < 1/4 */
- else u9=u92.d; } /* 1/4 <= u < 1/2 */
- else {
- if (i<176) u9=u93.d; /* 1/2 <= u < 3/4 */
- else u9=u94.d; } /* 3/4 <= u <= 1 */
- if ((z=t1+(zz-u9*t1)) == t1+(zz+u9*t1)) return signArctan2(y,z);
-
- t1=u-hij[i][0].d;
- EADD(t1,du,v,vv)
- s1=v*(hij[i][11].d+v*(hij[i][12].d+v*(hij[i][13].d+
- v*(hij[i][14].d+v* hij[i][15].d))));
- ADD2(hij[i][9].d,hij[i][10].d,s1,ZERO,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(hij[i][7].d,hij[i][8].d,s1,ss1,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(hij[i][5].d,hij[i][6].d,s1,ss1,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(hij[i][3].d,hij[i][4].d,s1,ss1,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(hij[i][1].d,hij[i][2].d,s1,ss1,s2,ss2,t1,t2)
- if ((z=s2+(ss2-ub.d*s2)) == s2+(ss2+ub.d*s2)) return signArctan2(y,z);
- return atan2Mp(x,y,pr);
- }
+ if (ay < ax)
+ {
+ u = ay / ax;
+ EMULV (ax, u, v, vv, t1, t2, t3, t4, t5);
+ du = ((ay - v) - vv) / ax;
+ }
+ else
+ {
+ u = ax / ay;
+ EMULV (ay, u, v, vv, t1, t2, t3, t4, t5);
+ du = ((ax - v) - vv) / ay;
}
- /* (ii) x>0, abs(x)<=abs(y): pi/2-atan(ax/ay) */
- else {
- if (u<inv16.d) {
- v=u*u;
- zz=u*v*(d3.d+v*(d5.d+v*(d7.d+v*(d9.d+v*(d11.d+v*d13.d)))));
- ESUB(hpi.d,u,t2,cor)
- t3=((hpi1.d+cor)-du)-zz;
- if ((z=t2+(t3-u2.d)) == t2+(t3+u2.d)) return signArctan2(y,z);
-
- MUL2(u,du,u,du,v,vv,t1,t2,t3,t4,t5,t6,t7,t8)
- s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d))));
- ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(f5.d,ff5.d,s1,ss1,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(f3.d,ff3.d,s1,ss1,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- MUL2(u,du,s1,ss1,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(u,du,s2,ss2,s1,ss1,t1,t2)
- SUB2(hpi.d,hpi1.d,s1,ss1,s2,ss2,t1,t2)
- if ((z=s2+(ss2-u6.d)) == s2+(ss2+u6.d)) return signArctan2(y,z);
- return atan2Mp(x,y,pr);
- }
- else {
- i=(TWO52+TWO8*u)-TWO52; i-=16;
- v=(u-cij[i][0].d)+du;
- zz=hpi1.d-v*(cij[i][2].d+v*(cij[i][3].d+v*(cij[i][4].d+
- v*(cij[i][5].d+v* cij[i][6].d))));
- t1=hpi.d-cij[i][1].d;
- if (i<112) ua=ua1.d; /* w < 1/2 */
- else ua=ua2.d; /* w >= 1/2 */
- if ((z=t1+(zz-ua)) == t1+(zz+ua)) return signArctan2(y,z);
-
- t1=u-hij[i][0].d;
- EADD(t1,du,v,vv)
- s1=v*(hij[i][11].d+v*(hij[i][12].d+v*(hij[i][13].d+
- v*(hij[i][14].d+v* hij[i][15].d))));
- ADD2(hij[i][9].d,hij[i][10].d,s1,ZERO,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(hij[i][7].d,hij[i][8].d,s1,ss1,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(hij[i][5].d,hij[i][6].d,s1,ss1,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(hij[i][3].d,hij[i][4].d,s1,ss1,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(hij[i][1].d,hij[i][2].d,s1,ss1,s2,ss2,t1,t2)
- SUB2(hpi.d,hpi1.d,s2,ss2,s1,ss1,t1,t2)
- if ((z=s1+(ss1-uc.d)) == s1+(ss1+uc.d)) return signArctan2(y,z);
- return atan2Mp(x,y,pr);
- }
+ if (x > ZERO)
+ {
+ /* (i) x>0, abs(y)< abs(x): atan(ay/ax) */
+ if (ay < ax)
+ {
+ if (u < inv16.d)
+ {
+ v = u * u;
+
+ zz = du + u * v * (d3.d
+ + v * (d5.d
+ + v * (d7.d
+ + v * (d9.d
+ + v * (d11.d
+ + v * d13.d)))));
+
+ if ((z = u + (zz - u1.d * u)) == u + (zz + u1.d * u))
+ return signArctan2 (y, z);
+
+ MUL2 (u, du, u, du, v, vv, t1, t2, t3, t4, t5, t6, t7, t8);
+ s1 = v * (f11.d + v * (f13.d
+ + v * (f15.d + v * (f17.d + v * f19.d))));
+ ADD2 (f9.d, ff9.d, s1, ZERO, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ MUL2 (u, du, s1, ss1, s2, ss2, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (u, du, s2, ss2, s1, ss1, t1, t2);
+
+ if ((z = s1 + (ss1 - u5.d * s1)) == s1 + (ss1 + u5.d * s1))
+ return signArctan2 (y, z);
+
+ return atan2Mp (x, y, pr);
+ }
+
+ i = (TWO52 + TWO8 * u) - TWO52;
+ i -= 16;
+ t3 = u - cij[i][0].d;
+ EADD (t3, du, v, dv);
+ t1 = cij[i][1].d;
+ t2 = cij[i][2].d;
+ zz = v * t2 + (dv * t2
+ + v * v * (cij[i][3].d
+ + v * (cij[i][4].d
+ + v * (cij[i][5].d
+ + v * cij[i][6].d))));
+ if (i < 112)
+ {
+ if (i < 48)
+ u9 = u91.d; /* u < 1/4 */
+ else
+ u9 = u92.d;
+ } /* 1/4 <= u < 1/2 */
+ else
+ {
+ if (i < 176)
+ u9 = u93.d; /* 1/2 <= u < 3/4 */
+ else
+ u9 = u94.d;
+ } /* 3/4 <= u <= 1 */
+ if ((z = t1 + (zz - u9 * t1)) == t1 + (zz + u9 * t1))
+ return signArctan2 (y, z);
+
+ t1 = u - hij[i][0].d;
+ EADD (t1, du, v, vv);
+ s1 = v * (hij[i][11].d
+ + v * (hij[i][12].d
+ + v * (hij[i][13].d
+ + v * (hij[i][14].d
+ + v * hij[i][15].d))));
+ ADD2 (hij[i][9].d, hij[i][10].d, s1, ZERO, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2);
+
+ if ((z = s2 + (ss2 - ub.d * s2)) == s2 + (ss2 + ub.d * s2))
+ return signArctan2 (y, z);
+ return atan2Mp (x, y, pr);
+ }
+
+ /* (ii) x>0, abs(x)<=abs(y): pi/2-atan(ax/ay) */
+ if (u < inv16.d)
+ {
+ v = u * u;
+ zz = u * v * (d3.d
+ + v * (d5.d
+ + v * (d7.d
+ + v * (d9.d
+ + v * (d11.d
+ + v * d13.d)))));
+ ESUB (hpi.d, u, t2, cor);
+ t3 = ((hpi1.d + cor) - du) - zz;
+ if ((z = t2 + (t3 - u2.d)) == t2 + (t3 + u2.d))
+ return signArctan2 (y, z);
+
+ MUL2 (u, du, u, du, v, vv, t1, t2, t3, t4, t5, t6, t7, t8);
+ s1 = v * (f11.d
+ + v * (f13.d
+ + v * (f15.d + v * (f17.d + v * f19.d))));
+ ADD2 (f9.d, ff9.d, s1, ZERO, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ MUL2 (u, du, s1, ss1, s2, ss2, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (u, du, s2, ss2, s1, ss1, t1, t2);
+ SUB2 (hpi.d, hpi1.d, s1, ss1, s2, ss2, t1, t2);
+
+ if ((z = s2 + (ss2 - u6.d)) == s2 + (ss2 + u6.d))
+ return signArctan2 (y, z);
+ return atan2Mp (x, y, pr);
+ }
+
+ i = (TWO52 + TWO8 * u) - TWO52;
+ i -= 16;
+ v = (u - cij[i][0].d) + du;
+
+ zz = hpi1.d - v * (cij[i][2].d
+ + v * (cij[i][3].d
+ + v * (cij[i][4].d
+ + v * (cij[i][5].d
+ + v * cij[i][6].d))));
+ t1 = hpi.d - cij[i][1].d;
+ if (i < 112)
+ ua = ua1.d; /* w < 1/2 */
+ else
+ ua = ua2.d; /* w >= 1/2 */
+ if ((z = t1 + (zz - ua)) == t1 + (zz + ua))
+ return signArctan2 (y, z);
+
+ t1 = u - hij[i][0].d;
+ EADD (t1, du, v, vv);
+
+ s1 = v * (hij[i][11].d
+ + v * (hij[i][12].d
+ + v * (hij[i][13].d
+ + v * (hij[i][14].d
+ + v * hij[i][15].d))));
+
+ ADD2 (hij[i][9].d, hij[i][10].d, s1, ZERO, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2);
+ SUB2 (hpi.d, hpi1.d, s2, ss2, s1, ss1, t1, t2);
+
+ if ((z = s1 + (ss1 - uc.d)) == s1 + (ss1 + uc.d))
+ return signArctan2 (y, z);
+ return atan2Mp (x, y, pr);
}
- }
- else {
-
- /* (iii) x<0, abs(x)< abs(y): pi/2+atan(ax/ay) */
- if (ax<ay) {
- if (u<inv16.d) {
- v=u*u;
- zz=u*v*(d3.d+v*(d5.d+v*(d7.d+v*(d9.d+v*(d11.d+v*d13.d)))));
- EADD(hpi.d,u,t2,cor)
- t3=((hpi1.d+cor)+du)+zz;
- if ((z=t2+(t3-u3.d)) == t2+(t3+u3.d)) return signArctan2(y,z);
-
- MUL2(u,du,u,du,v,vv,t1,t2,t3,t4,t5,t6,t7,t8)
- s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d))));
- ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(f5.d,ff5.d,s1,ss1,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(f3.d,ff3.d,s1,ss1,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- MUL2(u,du,s1,ss1,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(u,du,s2,ss2,s1,ss1,t1,t2)
- ADD2(hpi.d,hpi1.d,s1,ss1,s2,ss2,t1,t2)
- if ((z=s2+(ss2-u7.d)) == s2+(ss2+u7.d)) return signArctan2(y,z);
- return atan2Mp(x,y,pr);
- }
- else {
- i=(TWO52+TWO8*u)-TWO52; i-=16;
- v=(u-cij[i][0].d)+du;
- zz=hpi1.d+v*(cij[i][2].d+v*(cij[i][3].d+v*(cij[i][4].d+
- v*(cij[i][5].d+v* cij[i][6].d))));
- t1=hpi.d+cij[i][1].d;
- if (i<112) ua=ua1.d; /* w < 1/2 */
- else ua=ua2.d; /* w >= 1/2 */
- if ((z=t1+(zz-ua)) == t1+(zz+ua)) return signArctan2(y,z);
-
- t1=u-hij[i][0].d;
- EADD(t1,du,v,vv)
- s1=v*(hij[i][11].d+v*(hij[i][12].d+v*(hij[i][13].d+
- v*(hij[i][14].d+v* hij[i][15].d))));
- ADD2(hij[i][9].d,hij[i][10].d,s1,ZERO,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(hij[i][7].d,hij[i][8].d,s1,ss1,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(hij[i][5].d,hij[i][6].d,s1,ss1,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(hij[i][3].d,hij[i][4].d,s1,ss1,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(hij[i][1].d,hij[i][2].d,s1,ss1,s2,ss2,t1,t2)
- ADD2(hpi.d,hpi1.d,s2,ss2,s1,ss1,t1,t2)
- if ((z=s1+(ss1-uc.d)) == s1+(ss1+uc.d)) return signArctan2(y,z);
- return atan2Mp(x,y,pr);
- }
+
+ /* (iii) x<0, abs(x)< abs(y): pi/2+atan(ax/ay) */
+ if (ax < ay)
+ {
+ if (u < inv16.d)
+ {
+ v = u * u;
+ zz = u * v * (d3.d
+ + v * (d5.d
+ + v * (d7.d
+ + v * (d9.d
+ + v * (d11.d + v * d13.d)))));
+ EADD (hpi.d, u, t2, cor);
+ t3 = ((hpi1.d + cor) + du) + zz;
+ if ((z = t2 + (t3 - u3.d)) == t2 + (t3 + u3.d))
+ return signArctan2 (y, z);
+
+ MUL2 (u, du, u, du, v, vv, t1, t2, t3, t4, t5, t6, t7, t8);
+ s1 = v * (f11.d
+ + v * (f13.d + v * (f15.d + v * (f17.d + v * f19.d))));
+ ADD2 (f9.d, ff9.d, s1, ZERO, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ MUL2 (u, du, s1, ss1, s2, ss2, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (u, du, s2, ss2, s1, ss1, t1, t2);
+ ADD2 (hpi.d, hpi1.d, s1, ss1, s2, ss2, t1, t2);
+
+ if ((z = s2 + (ss2 - u7.d)) == s2 + (ss2 + u7.d))
+ return signArctan2 (y, z);
+ return atan2Mp (x, y, pr);
+ }
+
+ i = (TWO52 + TWO8 * u) - TWO52;
+ i -= 16;
+ v = (u - cij[i][0].d) + du;
+ zz = hpi1.d + v * (cij[i][2].d
+ + v * (cij[i][3].d
+ + v * (cij[i][4].d
+ + v * (cij[i][5].d
+ + v * cij[i][6].d))));
+ t1 = hpi.d + cij[i][1].d;
+ if (i < 112)
+ ua = ua1.d; /* w < 1/2 */
+ else
+ ua = ua2.d; /* w >= 1/2 */
+ if ((z = t1 + (zz - ua)) == t1 + (zz + ua))
+ return signArctan2 (y, z);
+
+ t1 = u - hij[i][0].d;
+ EADD (t1, du, v, vv);
+ s1 = v * (hij[i][11].d
+ + v * (hij[i][12].d
+ + v * (hij[i][13].d
+ + v * (hij[i][14].d
+ + v * hij[i][15].d))));
+ ADD2 (hij[i][9].d, hij[i][10].d, s1, ZERO, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2);
+ ADD2 (hpi.d, hpi1.d, s2, ss2, s1, ss1, t1, t2);
+
+ if ((z = s1 + (ss1 - uc.d)) == s1 + (ss1 + uc.d))
+ return signArctan2 (y, z);
+ return atan2Mp (x, y, pr);
}
- /* (iv) x<0, abs(y)<=abs(x): pi-atan(ax/ay) */
- else {
- if (u<inv16.d) {
- v=u*u;
- zz=u*v*(d3.d+v*(d5.d+v*(d7.d+v*(d9.d+v*(d11.d+v*d13.d)))));
- ESUB(opi.d,u,t2,cor)
- t3=((opi1.d+cor)-du)-zz;
- if ((z=t2+(t3-u4.d)) == t2+(t3+u4.d)) return signArctan2(y,z);
-
- MUL2(u,du,u,du,v,vv,t1,t2,t3,t4,t5,t6,t7,t8)
- s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d))));
- ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(f5.d,ff5.d,s1,ss1,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(f3.d,ff3.d,s1,ss1,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- MUL2(u,du,s1,ss1,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(u,du,s2,ss2,s1,ss1,t1,t2)
- SUB2(opi.d,opi1.d,s1,ss1,s2,ss2,t1,t2)
- if ((z=s2+(ss2-u8.d)) == s2+(ss2+u8.d)) return signArctan2(y,z);
- return atan2Mp(x,y,pr);
- }
- else {
- i=(TWO52+TWO8*u)-TWO52; i-=16;
- v=(u-cij[i][0].d)+du;
- zz=opi1.d-v*(cij[i][2].d+v*(cij[i][3].d+v*(cij[i][4].d+
- v*(cij[i][5].d+v* cij[i][6].d))));
- t1=opi.d-cij[i][1].d;
- if (i<112) ua=ua1.d; /* w < 1/2 */
- else ua=ua2.d; /* w >= 1/2 */
- if ((z=t1+(zz-ua)) == t1+(zz+ua)) return signArctan2(y,z);
-
- t1=u-hij[i][0].d;
- EADD(t1,du,v,vv)
- s1=v*(hij[i][11].d+v*(hij[i][12].d+v*(hij[i][13].d+
- v*(hij[i][14].d+v* hij[i][15].d))));
- ADD2(hij[i][9].d,hij[i][10].d,s1,ZERO,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(hij[i][7].d,hij[i][8].d,s1,ss1,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(hij[i][5].d,hij[i][6].d,s1,ss1,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(hij[i][3].d,hij[i][4].d,s1,ss1,s2,ss2,t1,t2)
- MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
- ADD2(hij[i][1].d,hij[i][2].d,s1,ss1,s2,ss2,t1,t2)
- SUB2(opi.d,opi1.d,s2,ss2,s1,ss1,t1,t2)
- if ((z=s1+(ss1-uc.d)) == s1+(ss1+uc.d)) return signArctan2(y,z);
- return atan2Mp(x,y,pr);
- }
+ /* (iv) x<0, abs(y)<=abs(x): pi-atan(ax/ay) */
+ if (u < inv16.d)
+ {
+ v = u * u;
+ zz = u * v * (d3.d
+ + v * (d5.d
+ + v * (d7.d
+ + v * (d9.d + v * (d11.d + v * d13.d)))));
+ ESUB (opi.d, u, t2, cor);
+ t3 = ((opi1.d + cor) - du) - zz;
+ if ((z = t2 + (t3 - u4.d)) == t2 + (t3 + u4.d))
+ return signArctan2 (y, z);
+
+ MUL2 (u, du, u, du, v, vv, t1, t2, t3, t4, t5, t6, t7, t8);
+ s1 = v * (f11.d + v * (f13.d + v * (f15.d + v * (f17.d + v * f19.d))));
+ ADD2 (f9.d, ff9.d, s1, ZERO, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ MUL2 (u, du, s1, ss1, s2, ss2, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (u, du, s2, ss2, s1, ss1, t1, t2);
+ SUB2 (opi.d, opi1.d, s1, ss1, s2, ss2, t1, t2);
+
+ if ((z = s2 + (ss2 - u8.d)) == s2 + (ss2 + u8.d))
+ return signArctan2 (y, z);
+ return atan2Mp (x, y, pr);
}
- }
+
+ i = (TWO52 + TWO8 * u) - TWO52;
+ i -= 16;
+ v = (u - cij[i][0].d) + du;
+ zz = opi1.d - v * (cij[i][2].d
+ + v * (cij[i][3].d
+ + v * (cij[i][4].d
+ + v * (cij[i][5].d + v * cij[i][6].d))));
+ t1 = opi.d - cij[i][1].d;
+ if (i < 112)
+ ua = ua1.d; /* w < 1/2 */
+ else
+ ua = ua2.d; /* w >= 1/2 */
+ if ((z = t1 + (zz - ua)) == t1 + (zz + ua))
+ return signArctan2 (y, z);
+
+ t1 = u - hij[i][0].d;
+
+ EADD (t1, du, v, vv);
+
+ s1 = v * (hij[i][11].d
+ + v * (hij[i][12].d
+ + v * (hij[i][13].d
+ + v * (hij[i][14].d + v * hij[i][15].d))));
+
+ ADD2 (hij[i][9].d, hij[i][10].d, s1, ZERO, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2);
+ MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+ ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2);
+ SUB2 (opi.d, opi1.d, s2, ss2, s1, ss1, t1, t2);
+
+ if ((z = s1 + (ss1 - uc.d)) == s1 + (ss1 + uc.d))
+ return signArctan2 (y, z);
+ return atan2Mp (x, y, pr);
}
+
#ifndef __ieee754_atan2
strong_alias (__ieee754_atan2, __atan2_finite)
#endif
- /* Treat the Denormalized case */
+/* Treat the Denormalized case */
static double
SECTION
-normalized(double ax,double ay,double y, double z)
- { int p;
- mp_no mpx,mpy,mpz,mperr,mpz2,mpt1;
- p=6;
- __dbl_mp(ax,&mpx,p); __dbl_mp(ay,&mpy,p); __dvd(&mpy,&mpx,&mpz,p);
- __dbl_mp(ue.d,&mpt1,p); __mul(&mpz,&mpt1,&mperr,p);
- __sub(&mpz,&mperr,&mpz2,p); __mp_dbl(&mpz2,&z,p);
- return signArctan2(y,z);
+normalized (double ax, double ay, double y, double z)
+{
+ int p;
+ mp_no mpx, mpy, mpz, mperr, mpz2, mpt1;
+ p = 6;
+ __dbl_mp (ax, &mpx, p);
+ __dbl_mp (ay, &mpy, p);
+ __dvd (&mpy, &mpx, &mpz, p);
+ __dbl_mp (ue.d, &mpt1, p);
+ __mul (&mpz, &mpt1, &mperr, p);
+ __sub (&mpz, &mperr, &mpz2, p);
+ __mp_dbl (&mpz2, &z, p);
+ return signArctan2 (y, z);
}
- /* Stage 3: Perform a multi-Precision computation */
+
+/* Stage 3: Perform a multi-Precision computation */
static double
SECTION
-atan2Mp(double x,double y,const int pr[])
+atan2Mp (double x, double y, const int pr[])
{
- double z1,z2;
- int i,p;
- mp_no mpx,mpy,mpz,mpz1,mpz2,mperr,mpt1;
- for (i=0; i<MM; i++) {
- p = pr[i];
- __dbl_mp(x,&mpx,p); __dbl_mp(y,&mpy,p);
- __mpatan2(&mpy,&mpx,&mpz,p);
- __dbl_mp(ud[i].d,&mpt1,p); __mul(&mpz,&mpt1,&mperr,p);
- __add(&mpz,&mperr,&mpz1,p); __sub(&mpz,&mperr,&mpz2,p);
- __mp_dbl(&mpz1,&z1,p); __mp_dbl(&mpz2,&z2,p);
- if (z1==z2) return z1;
- }
- return z1; /*if unpossible to do exact computing */
+ double z1, z2;
+ int i, p;
+ mp_no mpx, mpy, mpz, mpz1, mpz2, mperr, mpt1;
+ for (i = 0; i < MM; i++)
+ {
+ p = pr[i];
+ __dbl_mp (x, &mpx, p);
+ __dbl_mp (y, &mpy, p);
+ __mpatan2 (&mpy, &mpx, &mpz, p);
+ __dbl_mp (ud[i].d, &mpt1, p);
+ __mul (&mpz, &mpt1, &mperr, p);
+ __add (&mpz, &mperr, &mpz1, p);
+ __sub (&mpz, &mperr, &mpz2, p);
+ __mp_dbl (&mpz1, &z1, p);
+ __mp_dbl (&mpz2, &z2, p);
+ if (z1 == z2)
+ return z1;
+ }
+ return z1; /*if impossible to do exact computing */
}
diff --git a/sysdeps/ieee754/dbl-64/e_exp.c b/sysdeps/ieee754/dbl-64/e_exp.c
index 0f9d87ba59..07cc4a91b6 100644
--- a/sysdeps/ieee754/dbl-64/e_exp.c
+++ b/sysdeps/ieee754/dbl-64/e_exp.c
@@ -55,9 +55,6 @@ SECTION
__ieee754_exp(double x) {
double bexp, t, eps, del, base, y, al, bet, res, rem, cor;
mynumber junk1, junk2, binexp = {{0,0}};
-#if 0
- int4 k;
-#endif
int4 i,j,m,n,ex;
double retval;
@@ -174,9 +171,6 @@ SECTION
__exp1(double x, double xx, double error) {
double bexp, t, eps, del, base, y, al, bet, res, rem, cor;
mynumber junk1, junk2, binexp = {{0,0}};
-#if 0
- int4 k;
-#endif
int4 i,j,m,n,ex;
junk1.x = x;
diff --git a/sysdeps/ieee754/dbl-64/e_j0.c b/sysdeps/ieee754/dbl-64/e_j0.c
index f393a762b2..d641a09149 100644
--- a/sysdeps/ieee754/dbl-64/e_j0.c
+++ b/sysdeps/ieee754/dbl-64/e_j0.c
@@ -293,7 +293,8 @@ pzero(double x)
int32_t ix;
GET_HIGH_WORD(ix,x);
ix &= 0x7fffffff;
- if(ix>=0x40200000) {p = pR8; q= pS8;}
+ if (ix>=0x41b00000) {return one;}
+ else if(ix>=0x40200000){p = pR8; q= pS8;}
else if(ix>=0x40122E8B){p = pR5; q= pS5;}
else if(ix>=0x4006DB6D){p = pR3; q= pS3;}
else if(ix>=0x40000000){p = pR2; q= pS2;}
@@ -400,7 +401,8 @@ qzero(double x)
int32_t ix;
GET_HIGH_WORD(ix,x);
ix &= 0x7fffffff;
- if(ix>=0x40200000) {p = qR8; q= qS8;}
+ if (ix>=0x41b00000) {return -.125/x;}
+ else if(ix>=0x40200000){p = qR8; q= qS8;}
else if(ix>=0x40122E8B){p = qR5; q= qS5;}
else if(ix>=0x4006DB6D){p = qR3; q= qS3;}
else if(ix>=0x40000000){p = qR2; q= qS2;}
diff --git a/sysdeps/ieee754/dbl-64/e_j1.c b/sysdeps/ieee754/dbl-64/e_j1.c
index cba4d46b18..cca5f20b4f 100644
--- a/sysdeps/ieee754/dbl-64/e_j1.c
+++ b/sysdeps/ieee754/dbl-64/e_j1.c
@@ -291,7 +291,8 @@ pone(double x)
int32_t ix;
GET_HIGH_WORD(ix,x);
ix &= 0x7fffffff;
- if(ix>=0x40200000) {p = pr8; q= ps8;}
+ if (ix>=0x41b00000) {return one;}
+ else if(ix>=0x40200000){p = pr8; q= ps8;}
else if(ix>=0x40122E8B){p = pr5; q= ps5;}
else if(ix>=0x4006DB6D){p = pr3; q= ps3;}
else if(ix>=0x40000000){p = pr2; q= ps2;}
@@ -399,7 +400,8 @@ qone(double x)
int32_t ix;
GET_HIGH_WORD(ix,x);
ix &= 0x7fffffff;
- if(ix>=0x40200000) {p = qr8; q= qs8;}
+ if (ix>=0x41b00000) {return .375/x;}
+ else if(ix>=0x40200000){p = qr8; q= qs8;}
else if(ix>=0x40122E8B){p = qr5; q= qs5;}
else if(ix>=0x4006DB6D){p = qr3; q= qs3;}
else if(ix>=0x40000000){p = qr2; q= qs2;}
diff --git a/sysdeps/ieee754/dbl-64/e_log.c b/sysdeps/ieee754/dbl-64/e_log.c
index 762639bcdf..58c9a8e76b 100644
--- a/sysdeps/ieee754/dbl-64/e_log.c
+++ b/sysdeps/ieee754/dbl-64/e_log.c
@@ -56,9 +56,6 @@ __ieee754_log(double x) {
#define M 4
static const int pr[M]={8,10,18,32};
int i,j,n,ux,dx,p;
-#if 0
- int k;
-#endif
double dbl_n,u,p0,q,r0,w,nln2a,luai,lubi,lvaj,lvbj,
sij,ssij,ttij,A,B,B0,y,y1,y2,polI,polII,sa,sb,
t1,t2,t7,t8,t,ra,rb,ww,
diff --git a/sysdeps/ieee754/dbl-64/e_pow.c b/sysdeps/ieee754/dbl-64/e_pow.c
index ee2711322f..f8962a7dce 100644
--- a/sysdeps/ieee754/dbl-64/e_pow.c
+++ b/sysdeps/ieee754/dbl-64/e_pow.c
@@ -64,9 +64,6 @@ double
SECTION
__ieee754_pow(double x, double y) {
double z,a,aa,error, t,a1,a2,y1,y2;
-#if 0
- double gor=1.0;
-#endif
mynumber u,v;
int k;
int4 qx,qy;
@@ -206,13 +203,7 @@ static double
SECTION
log1(double x, double *delta, double *error) {
int i,j,m;
-#if 0
- int n;
-#endif
double uu,vv,eps,nx,e,e1,e2,t,t1,t2,res,add=0;
-#if 0
- double cor;
-#endif
mynumber u,v;
#ifdef BIG_ENDI
mynumber
@@ -300,13 +291,7 @@ static double
SECTION
my_log2(double x, double *delta, double *error) {
int i,j,m;
-#if 0
- int n;
-#endif
double uu,vv,eps,nx,e,e1,e2,t,t1,t2,res,add=0;
-#if 0
- double cor;
-#endif
double ou1,ou2,lu1,lu2,ov,lv1,lv2,a,a1,a2;
double y,yy,z,zz,j1,j2,j7,j8;
#ifndef DLA_FMS
@@ -397,9 +382,6 @@ SECTION
checkint(double x) {
union {int4 i[2]; double x;} u;
int k,m,n;
-#if 0
- int l;
-#endif
u.x = x;
m = u.i[HIGH_HALF]&0x7fffffff; /* no sign */
if (m >= 0x7ff00000) return 0; /* x is +/-inf or NaN */
diff --git a/sysdeps/ieee754/dbl-64/e_remainder.c b/sysdeps/ieee754/dbl-64/e_remainder.c
index ac4b55f9d4..c4db9316c7 100644
--- a/sysdeps/ieee754/dbl-64/e_remainder.c
+++ b/sysdeps/ieee754/dbl-64/e_remainder.c
@@ -42,13 +42,7 @@
double __ieee754_remainder(double x, double y)
{
double z,d,xx;
-#if 0
- double yy;
-#endif
int4 kx,ky,n,nn,n1,m1,l;
-#if 0
- int4 m;
-#endif
mynumber u,t,w={{0,0}},v={{0,0}},ww={{0,0}},r;
u.x=x;
t.x=y;
diff --git a/sysdeps/ieee754/dbl-64/mpa-arch.h b/sysdeps/ieee754/dbl-64/mpa-arch.h
new file mode 100644
index 0000000000..7de9d51ae2
--- /dev/null
+++ b/sysdeps/ieee754/dbl-64/mpa-arch.h
@@ -0,0 +1,47 @@
+/* Overridable constants and operations.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#include <stdint.h>
+
+typedef long mantissa_t;
+typedef int64_t mantissa_store_t;
+
+#define TWOPOW(i) (1L << i)
+
+#define RADIX_EXP 24
+#define RADIX TWOPOW (RADIX_EXP) /* 2^24 */
+
+/* Divide D by RADIX and put the remainder in R. D must be a non-negative
+ integral value. */
+#define DIV_RADIX(d, r) \
+ ({ \
+ r = d & (RADIX - 1); \
+ d >>= RADIX_EXP; \
+ })
+
+/* Put the integer component of a double X in R and retain the fraction in
+ X. This is used in extracting mantissa digits for MP_NO by using the
+ integer portion of the current value of the number as the current mantissa
+ digit and then scaling by RADIX to get the next mantissa digit in the same
+ manner. */
+#define INTEGER_OF(x, i) \
+ ({ \
+ i = (mantissa_t) x; \
+ x -= i; \
+ })
+
+/* Align IN down to F. The code assumes that F is a power of two. */
+#define ALIGN_DOWN_TO(in, f) ((in) & -(f))
diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c
index ede8ed1986..a3feb175ed 100644
--- a/sysdeps/ieee754/dbl-64/mpa.c
+++ b/sysdeps/ieee754/dbl-64/mpa.c
@@ -43,6 +43,7 @@
#include "endian.h"
#include "mpa.h"
#include <sys/param.h>
+#include <alloca.h>
#ifndef SECTION
# define SECTION
@@ -59,8 +60,9 @@ const mp_no mptwo = {1, {1.0, 2.0}};
static int
mcr (const mp_no *x, const mp_no *y, int p)
{
- int i;
- for (i = 1; i <= p; i++)
+ long i;
+ long p2 = p;
+ for (i = 1; i <= p2; i++)
{
if (X[i] == Y[i])
continue;
@@ -76,16 +78,16 @@ mcr (const mp_no *x, const mp_no *y, int p)
int
__acr (const mp_no *x, const mp_no *y, int p)
{
- int i;
+ long i;
- if (X[0] == ZERO)
+ if (X[0] == 0)
{
- if (Y[0] == ZERO)
+ if (Y[0] == 0)
i = 0;
else
i = -1;
}
- else if (Y[0] == ZERO)
+ else if (Y[0] == 0)
i = 1;
else
{
@@ -107,8 +109,10 @@ __acr (const mp_no *x, const mp_no *y, int p)
void
__cpy (const mp_no *x, mp_no *y, int p)
{
+ long i;
+
EY = EX;
- for (int i = 0; i <= p; i++)
+ for (i = 0; i <= p; i++)
Y[i] = X[i];
}
#endif
@@ -119,9 +123,10 @@ __cpy (const mp_no *x, mp_no *y, int p)
static void
norm (const mp_no *x, double *y, int p)
{
-#define R RADIXI
- int i;
- double a, c, u, v, z[5];
+#define R RADIXI
+ long i;
+ double c;
+ mantissa_t a, u, v, z[5];
if (p < 5)
{
if (p == 1)
@@ -135,44 +140,41 @@ norm (const mp_no *x, double *y, int p)
}
else
{
- for (a = ONE, z[1] = X[1]; z[1] < TWO23;)
+ for (a = 1, z[1] = X[1]; z[1] < TWO23;)
{
- a *= TWO;
- z[1] *= TWO;
+ a *= 2;
+ z[1] *= 2;
}
for (i = 2; i < 5; i++)
{
- z[i] = X[i] * a;
- u = (z[i] + CUTTER) - CUTTER;
- if (u > z[i])
- u -= RADIX;
- z[i] -= u;
- z[i - 1] += u * RADIXI;
+ mantissa_store_t d, r;
+ d = X[i] * (mantissa_store_t) a;
+ DIV_RADIX (d, r);
+ z[i] = r;
+ z[i - 1] += d;
}
- u = (z[3] + TWO71) - TWO71;
- if (u > z[3])
- u -= TWO19;
+ u = ALIGN_DOWN_TO (z[3], TWO19);
v = z[3] - u;
if (v == TWO18)
{
- if (z[4] == ZERO)
+ if (z[4] == 0)
{
for (i = 5; i <= p; i++)
{
- if (X[i] == ZERO)
+ if (X[i] == 0)
continue;
else
{
- z[3] += ONE;
+ z[3] += 1;
break;
}
}
}
else
- z[3] += ONE;
+ z[3] += 1;
}
c = (z[1] + R * (z[2] + R * z[3])) / a;
@@ -194,47 +196,49 @@ norm (const mp_no *x, double *y, int p)
static void
denorm (const mp_no *x, double *y, int p)
{
- int i, k;
- double c, u, z[5];
+ long i, k;
+ long p2 = p;
+ double c;
+ mantissa_t u, z[5];
-#define R RADIXI
+#define R RADIXI
if (EX < -44 || (EX == -44 && X[1] < TWO5))
{
- *y = ZERO;
+ *y = 0;
return;
}
- if (p == 1)
+ if (p2 == 1)
{
if (EX == -42)
{
z[1] = X[1] + TWO10;
- z[2] = ZERO;
- z[3] = ZERO;
+ z[2] = 0;
+ z[3] = 0;
k = 3;
}
else if (EX == -43)
{
z[1] = TWO10;
z[2] = X[1];
- z[3] = ZERO;
+ z[3] = 0;
k = 2;
}
else
{
z[1] = TWO10;
- z[2] = ZERO;
+ z[2] = 0;
z[3] = X[1];
k = 1;
}
}
- else if (p == 2)
+ else if (p2 == 2)
{
if (EX == -42)
{
z[1] = X[1] + TWO10;
z[2] = X[2];
- z[3] = ZERO;
+ z[3] = 0;
k = 3;
}
else if (EX == -43)
@@ -247,7 +251,7 @@ denorm (const mp_no *x, double *y, int p)
else
{
z[1] = TWO10;
- z[2] = ZERO;
+ z[2] = 0;
z[3] = X[1];
k = 1;
}
@@ -269,25 +273,23 @@ denorm (const mp_no *x, double *y, int p)
else
{
z[1] = TWO10;
- z[2] = ZERO;
+ z[2] = 0;
k = 1;
}
z[3] = X[k];
}
- u = (z[3] + TWO57) - TWO57;
- if (u > z[3])
- u -= TWO5;
+ u = ALIGN_DOWN_TO (z[3], TWO5);
if (u == z[3])
{
- for (i = k + 1; i <= p; i++)
+ for (i = k + 1; i <= p2; i++)
{
- if (X[i] == ZERO)
+ if (X[i] == 0)
continue;
else
{
- z[3] += ONE;
+ z[3] += 1;
break;
}
}
@@ -304,9 +306,9 @@ denorm (const mp_no *x, double *y, int p)
void
__mp_dbl (const mp_no *x, double *y, int p)
{
- if (X[0] == ZERO)
+ if (X[0] == 0)
{
- *y = ZERO;
+ *y = 0;
return;
}
@@ -323,42 +325,38 @@ void
SECTION
__dbl_mp (double x, mp_no *y, int p)
{
- int i, n;
- double u;
+ long i, n;
+ long p2 = p;
/* Sign. */
- if (x == ZERO)
+ if (x == 0)
{
- Y[0] = ZERO;
+ Y[0] = 0;
return;
}
- else if (x > ZERO)
- Y[0] = ONE;
+ else if (x > 0)
+ Y[0] = 1;
else
{
- Y[0] = MONE;
+ Y[0] = -1;
x = -x;
}
/* Exponent. */
- for (EY = ONE; x >= RADIX; EY += ONE)
+ for (EY = 1; x >= RADIX; EY += 1)
x *= RADIXI;
- for (; x < ONE; EY -= ONE)
+ for (; x < 1; EY -= 1)
x *= RADIX;
/* Digits. */
- n = MIN (p, 4);
+ n = MIN (p2, 4);
for (i = 1; i <= n; i++)
{
- u = (x + TWO52) - TWO52;
- if (u > x)
- u -= ONE;
- Y[i] = u;
- x -= u;
+ INTEGER_OF (x, Y[i]);
x *= RADIX;
}
- for (; i <= p; i++)
- Y[i] = ZERO;
+ for (; i <= p2; i++)
+ Y[i] = 0;
}
/* Add magnitudes of *X and *Y assuming that abs (*X) >= abs (*Y) > 0. The
@@ -369,53 +367,64 @@ static void
SECTION
add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
{
- int i, j, k;
+ long i, j, k;
+ long p2 = p;
+ mantissa_t zk;
EZ = EX;
- i = p;
- j = p + EY - EX;
- k = p + 1;
+ i = p2;
+ j = p2 + EY - EX;
+ k = p2 + 1;
- if (j < 1)
+ if (__glibc_unlikely (j < 1))
{
__cpy (x, z, p);
return;
}
- else
- Z[k] = ZERO;
+
+ zk = 0;
for (; j > 0; i--, j--)
{
- Z[k] += X[i] + Y[j];
- if (Z[k] >= RADIX)
+ zk += X[i] + Y[j];
+ if (zk >= RADIX)
{
- Z[k] -= RADIX;
- Z[--k] = ONE;
+ Z[k--] = zk - RADIX;
+ zk = 1;
}
else
- Z[--k] = ZERO;
+ {
+ Z[k--] = zk;
+ zk = 0;
+ }
}
for (; i > 0; i--)
{
- Z[k] += X[i];
- if (Z[k] >= RADIX)
+ zk += X[i];
+ if (zk >= RADIX)
{
- Z[k] -= RADIX;
- Z[--k] = ONE;
+ Z[k--] = zk - RADIX;
+ zk = 1;
}
else
- Z[--k] = ZERO;
+ {
+ Z[k--] = zk;
+ zk = 0;
+ }
}
- if (Z[1] == ZERO)
+ if (zk == 0)
{
- for (i = 1; i <= p; i++)
+ for (i = 1; i <= p2; i++)
Z[i] = Z[i + 1];
}
else
- EZ += ONE;
+ {
+ Z[1] = zk;
+ EZ += 1;
+ }
}
/* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0.
@@ -426,72 +435,71 @@ static void
SECTION
sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
{
- int i, j, k;
+ long i, j, k;
+ long p2 = p;
+ mantissa_t zk;
EZ = EX;
+ i = p2;
+ j = p2 + EY - EX;
+ k = p2;
- if (EX == EY)
+ /* Y is too small compared to X, copy X over to the result. */
+ if (__glibc_unlikely (j < 1))
{
- i = j = k = p;
- Z[k] = Z[k + 1] = ZERO;
+ __cpy (x, z, p);
+ return;
}
- else
+
+ /* The relevant least significant digit in Y is non-zero, so we factor it in
+ to enhance accuracy. */
+ if (j < p2 && Y[j + 1] > 0)
{
- j = EX - EY;
- if (j > p)
- {
- __cpy (x, z, p);
- return;
- }
- else
- {
- i = p;
- j = p + 1 - j;
- k = p;
- if (Y[j] > ZERO)
- {
- Z[k + 1] = RADIX - Y[j--];
- Z[k] = MONE;
- }
- else
- {
- Z[k + 1] = ZERO;
- Z[k] = ZERO;
- j--;
- }
- }
+ Z[k + 1] = RADIX - Y[j + 1];
+ zk = -1;
}
+ else
+ zk = Z[k + 1] = 0;
+ /* Subtract and borrow. */
for (; j > 0; i--, j--)
{
- Z[k] += (X[i] - Y[j]);
- if (Z[k] < ZERO)
+ zk += (X[i] - Y[j]);
+ if (zk < 0)
{
- Z[k] += RADIX;
- Z[--k] = MONE;
+ Z[k--] = zk + RADIX;
+ zk = -1;
}
else
- Z[--k] = ZERO;
+ {
+ Z[k--] = zk;
+ zk = 0;
+ }
}
+ /* We're done with digits from Y, so it's just digits in X. */
for (; i > 0; i--)
{
- Z[k] += X[i];
- if (Z[k] < ZERO)
+ zk += X[i];
+ if (zk < 0)
{
- Z[k] += RADIX;
- Z[--k] = MONE;
+ Z[k--] = zk + RADIX;
+ zk = -1;
}
else
- Z[--k] = ZERO;
+ {
+ Z[k--] = zk;
+ zk = 0;
+ }
}
- for (i = 1; Z[i] == ZERO; i++);
+ /* Normalize. */
+ for (i = 1; Z[i] == 0; i++);
EZ = EZ - i + 1;
- for (k = 1; i <= p + 1;)
+ for (k = 1; i <= p2 + 1;)
Z[k++] = Z[i++];
- for (; k <= p;)
- Z[k++] = ZERO;
+ for (; k <= p2;)
+ Z[k++] = 0;
}
/* Add *X and *Y and store the result in *Z. X and Y may overlap, but not X
@@ -503,12 +511,12 @@ __add (const mp_no *x, const mp_no *y, mp_no *z, int p)
{
int n;
- if (X[0] == ZERO)
+ if (X[0] == 0)
{
__cpy (y, z, p);
return;
}
- else if (Y[0] == ZERO)
+ else if (Y[0] == 0)
{
__cpy (x, z, p);
return;
@@ -540,7 +548,7 @@ __add (const mp_no *x, const mp_no *y, mp_no *z, int p)
Z[0] = Y[0];
}
else
- Z[0] = ZERO;
+ Z[0] = 0;
}
}
@@ -553,13 +561,13 @@ __sub (const mp_no *x, const mp_no *y, mp_no *z, int p)
{
int n;
- if (X[0] == ZERO)
+ if (X[0] == 0)
{
__cpy (y, z, p);
Z[0] = -Z[0];
return;
}
- else if (Y[0] == ZERO)
+ else if (Y[0] == 0)
{
__cpy (x, z, p);
return;
@@ -591,10 +599,11 @@ __sub (const mp_no *x, const mp_no *y, mp_no *z, int p)
Z[0] = -Y[0];
}
else
- Z[0] = ZERO;
+ Z[0] = 0;
}
}
+#ifndef NO__MUL
/* Multiply *X and *Y and store result in *Z. X and Y may overlap but not X
and Z or Y and Z. For P in [1, 2, 3], the exact result is truncated to P
digits. In case P > 3 the error is bounded by 1.001 ULP. */
@@ -602,57 +611,241 @@ void
SECTION
__mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
{
- int i, j, k, k2;
- double u, zk;
+ long i, j, k, ip, ip2;
+ long p2 = p;
+ mantissa_store_t zk;
+ const mp_no *a;
+ mantissa_store_t *diag;
/* Is z=0? */
- if (__glibc_unlikely (X[0] * Y[0] == ZERO))
+ if (__glibc_unlikely (X[0] * Y[0] == 0))
{
- Z[0] = ZERO;
+ Z[0] = 0;
return;
}
- /* Multiply, add and carry. */
- k2 = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
- zk = Z[k2] = ZERO;
+ /* We need not iterate through all X's and Y's since it's pointless to
+ multiply zeroes. Here, both are zero... */
+ for (ip2 = p2; ip2 > 0; ip2--)
+ if (X[ip2] != 0 || Y[ip2] != 0)
+ break;
+
+ a = X[ip2] != 0 ? y : x;
+
+ /* ... and here, at least one of them is still zero. */
+ for (ip = ip2; ip > 0; ip--)
+ if (a->d[ip] != 0)
+ break;
+
+ /* The product looks like this for p = 3 (as an example):
+
+
+ a1 a2 a3
+ x b1 b2 b3
+ -----------------------------
+ a1*b3 a2*b3 a3*b3
+ a1*b2 a2*b2 a3*b2
+ a1*b1 a2*b1 a3*b1
+
+ So our K needs to ideally be P*2, but we're limiting ourselves to P + 3
+ for P >= 3. We compute the above digits in two parts; the last P-1
+ digits and then the first P digits. The last P-1 digits are a sum of
+ products of the input digits from P to P-k where K is 0 for the least
+ significant digit and increases as we go towards the left. The product
+ term is of the form X[k]*X[P-k] as can be seen in the above example.
+
+ The first P digits are also a sum of products with the same product term,
+ except that the sum is from 1 to k. This is also evident from the above
+ example.
+
+ Another thing that becomes evident is that only the most significant
+ ip+ip2 digits of the result are non-zero, where ip and ip2 are the
+ 'internal precision' of the input numbers, i.e. digits after ip and ip2
+ are all 0. */
+
+ k = (__glibc_unlikely (p2 < 3)) ? p2 + p2 : p2 + 3;
+
+ while (k > ip + ip2 + 1)
+ Z[k--] = 0;
+
+ zk = 0;
- for (k = k2; k > p; k--)
+ /* Precompute sums of diagonal elements so that we can directly use them
+ later. See the next comment to know we why need them. */
+ diag = alloca (k * sizeof (mantissa_store_t));
+ mantissa_store_t d = 0;
+ for (i = 1; i <= ip; i++)
{
- for (i = k - p, j = p; i < p + 1; i++, j--)
- zk += X[i] * Y[j];
+ d += X[i] * (mantissa_store_t) Y[i];
+ diag[i] = d;
+ }
+ while (i < k)
+ diag[i++] = d;
+
+ while (k > p2)
+ {
+ long lim = k / 2;
+
+ if (k % 2 == 0)
+ /* We want to add this only once, but since we subtract it in the sum
+ of products above, we add twice. */
+ zk += 2 * X[lim] * (mantissa_store_t) Y[lim];
+
+ for (i = k - p2, j = p2; i < j; i++, j--)
+ zk += (X[i] + X[j]) * (mantissa_store_t) (Y[i] + Y[j]);
+
+ zk -= diag[k - 1];
- u = (zk + CUTTER) - CUTTER;
- if (u > zk)
- u -= RADIX;
- Z[k] = zk - u;
- zk = u * RADIXI;
+ DIV_RADIX (zk, Z[k]);
+ k--;
}
+ /* The real deal. Mantissa digit Z[k] is the sum of all X[i] * Y[j] where i
+ goes from 1 -> k - 1 and j goes the same range in reverse. To reduce the
+ number of multiplications, we halve the range and if k is an even number,
+ add the diagonal element X[k/2]Y[k/2]. Through the half range, we compute
+ X[i] * Y[j] as (X[i] + X[j]) * (Y[i] + Y[j]) - X[i] * Y[i] - X[j] * Y[j].
+
+ This reduction tells us that we're summing two things, the first term
+ through the half range and the negative of the sum of the product of all
+ terms of X and Y in the full range. i.e.
+
+ SUM(X[i] * Y[i]) for k terms. This is precalculated above for each k in
+ a single loop so that it completes in O(n) time and can hence be directly
+ used in the loop below. */
while (k > 1)
{
- for (i = 1, j = k - 1; i < k; i++, j--)
- zk += X[i] * Y[j];
+ long lim = k / 2;
+
+ if (k % 2 == 0)
+ /* We want to add this only once, but since we subtract it in the sum
+ of products above, we add twice. */
+ zk += 2 * X[lim] * (mantissa_store_t) Y[lim];
+
+ for (i = 1, j = k - 1; i < j; i++, j--)
+ zk += (X[i] + X[j]) * (mantissa_store_t) (Y[i] + Y[j]);
- u = (zk + CUTTER) - CUTTER;
- if (u > zk)
- u -= RADIX;
- Z[k] = zk - u;
- zk = u * RADIXI;
+ zk -= diag[k - 1];
+
+ DIV_RADIX (zk, Z[k]);
k--;
}
Z[k] = zk;
- EZ = EX + EY;
+ /* Get the exponent sum into an intermediate variable. This is a subtle
+ optimization, where given enough registers, all operations on the exponent
+ happen in registers and the result is written out only once into EZ. */
+ int e = EX + EY;
+
/* Is there a carry beyond the most significant digit? */
- if (__glibc_unlikely (Z[1] == ZERO))
+ if (__glibc_unlikely (Z[1] == 0))
{
- for (i = 1; i <= p; i++)
+ for (i = 1; i <= p2; i++)
Z[i] = Z[i + 1];
- EZ--;
+ e--;
}
+ EZ = e;
Z[0] = X[0] * Y[0];
}
+#endif
+
+#ifndef NO__SQR
+/* Square *X and store result in *Y. X and Y may not overlap. For P in
+ [1, 2, 3], the exact result is truncated to P digits. In case P > 3 the
+ error is bounded by 1.001 ULP. This is a faster special case of
+ multiplication. */
+void
+SECTION
+__sqr (const mp_no *x, mp_no *y, int p)
+{
+ long i, j, k, ip;
+ mantissa_store_t yk;
+
+ /* Is z=0? */
+ if (__glibc_unlikely (X[0] == 0))
+ {
+ Y[0] = 0;
+ return;
+ }
+
+ /* We need not iterate through all X's since it's pointless to
+ multiply zeroes. */
+ for (ip = p; ip > 0; ip--)
+ if (X[ip] != 0)
+ break;
+
+ k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
+
+ while (k > 2 * ip + 1)
+ Y[k--] = 0;
+
+ yk = 0;
+
+ while (k > p)
+ {
+ mantissa_store_t yk2 = 0;
+ long lim = k / 2;
+
+ if (k % 2 == 0)
+ yk += X[lim] * (mantissa_store_t) X[lim];
+
+ /* In __mul, this loop (and the one within the next while loop) run
+ between a range to calculate the mantissa as follows:
+
+ Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1]
+ + X[n] * Y[k]
+
+ For X == Y, we can get away with summing halfway and doubling the
+ result. For cases where the range size is even, the mid-point needs
+ to be added separately (above). */
+ for (i = k - p, j = p; i < j; i++, j--)
+ yk2 += X[i] * (mantissa_store_t) X[j];
+
+ yk += 2 * yk2;
+
+ DIV_RADIX (yk, Y[k]);
+ k--;
+ }
+
+ while (k > 1)
+ {
+ mantissa_store_t yk2 = 0;
+ long lim = k / 2;
+
+ if (k % 2 == 0)
+ yk += X[lim] * (mantissa_store_t) X[lim];
+
+ /* Likewise for this loop. */
+ for (i = 1, j = k - 1; i < j; i++, j--)
+ yk2 += X[i] * (mantissa_store_t) X[j];
+
+ yk += 2 * yk2;
+
+ DIV_RADIX (yk, Y[k]);
+ k--;
+ }
+ Y[k] = yk;
+
+ /* Squares are always positive. */
+ Y[0] = 1;
+
+ /* Get the exponent sum into an intermediate variable. This is a subtle
+ optimization, where given enough registers, all operations on the exponent
+ happen in registers and the result is written out only once into EZ. */
+ int e = EX * 2;
+
+ /* Is there a carry beyond the most significant digit? */
+ if (__glibc_unlikely (Y[1] == 0))
+ {
+ for (i = 1; i <= p; i++)
+ Y[i] = Y[i + 1];
+ e--;
+ }
+
+ EY = e;
+}
+#endif
/* Invert *X and store in *Y. Relative error bound:
- For P = 2: 1.001 * R ^ (1 - P)
@@ -664,7 +857,7 @@ static void
SECTION
__inv (const mp_no *x, mp_no *y, int p)
{
- int i;
+ long i;
double t;
mp_no z, w;
static const int np1[] =
@@ -675,7 +868,7 @@ __inv (const mp_no *x, mp_no *y, int p)
__cpy (x, &z, p);
z.e = 0;
__mp_dbl (&z, &t, p);
- t = ONE / t;
+ t = 1 / t;
__dbl_mp (t, y, p);
EY -= EX;
@@ -701,8 +894,8 @@ __dvd (const mp_no *x, const mp_no *y, mp_no *z, int p)
{
mp_no w;
- if (X[0] == ZERO)
- Z[0] = ZERO;
+ if (X[0] == 0)
+ Z[0] = 0;
else
{
__inv (y, &w, p);
diff --git a/sysdeps/ieee754/dbl-64/mpa.h b/sysdeps/ieee754/dbl-64/mpa.h
index 06343d46d1..54044a0586 100644
--- a/sysdeps/ieee754/dbl-64/mpa.h
+++ b/sysdeps/ieee754/dbl-64/mpa.h
@@ -35,6 +35,7 @@
/* Common types and definition */
/************************************************************************/
+#include <mpa-arch.h>
/* The mp_no structure holds the details of a multi-precision floating point
number.
@@ -61,7 +62,7 @@
typedef struct
{
int e;
- double d[40];
+ mantissa_t d[40];
} mp_no;
typedef union
@@ -82,9 +83,13 @@ extern const mp_no mptwo;
#define ABS(x) ((x) < 0 ? -(x) : (x))
-#define RADIX 0x1.0p24 /* 2^24 */
-#define RADIXI 0x1.0p-24 /* 2^-24 */
-#define CUTTER 0x1.0p76 /* 2^76 */
+#ifndef RADIXI
+# define RADIXI 0x1.0p-24 /* 2^-24 */
+#endif
+
+#ifndef TWO52
+# define TWO52 0x1.0p52 /* 2^52 */
+#endif
#define ZERO 0.0 /* 0 */
#define MZERO -0.0 /* 0 with the sign bit set */
@@ -92,13 +97,13 @@ extern const mp_no mptwo;
#define MONE -1.0 /* -1 */
#define TWO 2.0 /* 2 */
-#define TWO5 0x1.0p5 /* 2^5 */
-#define TWO8 0x1.0p8 /* 2^52 */
-#define TWO10 0x1.0p10 /* 2^10 */
-#define TWO18 0x1.0p18 /* 2^18 */
-#define TWO19 0x1.0p19 /* 2^19 */
-#define TWO23 0x1.0p23 /* 2^23 */
-#define TWO52 0x1.0p52 /* 2^52 */
+#define TWO5 TWOPOW (5) /* 2^5 */
+#define TWO8 TWOPOW (8) /* 2^52 */
+#define TWO10 TWOPOW (10) /* 2^10 */
+#define TWO18 TWOPOW (18) /* 2^18 */
+#define TWO19 TWOPOW (19) /* 2^19 */
+#define TWO23 TWOPOW (23) /* 2^23 */
+
#define TWO57 0x1.0p57 /* 2^57 */
#define TWO71 0x1.0p71 /* 2^71 */
#define TWOM1032 0x1.0p-1032 /* 2^-1032 */
@@ -115,6 +120,7 @@ void __dbl_mp (double, mp_no *, int);
void __add (const mp_no *, const mp_no *, mp_no *, int);
void __sub (const mp_no *, const mp_no *, mp_no *, int);
void __mul (const mp_no *, const mp_no *, mp_no *, int);
+void __sqr (const mp_no *, mp_no *, int);
void __dvd (const mp_no *, const mp_no *, mp_no *, int);
extern void __mpatan (mp_no *, mp_no *, int);
diff --git a/sysdeps/ieee754/dbl-64/mpatan.c b/sysdeps/ieee754/dbl-64/mpatan.c
index db5868092a..cc879d8ec4 100644
--- a/sysdeps/ieee754/dbl-64/mpatan.c
+++ b/sysdeps/ieee754/dbl-64/mpatan.c
@@ -39,63 +39,78 @@
#include "mpatan.h"
-void __mpsqrt(mp_no *, mp_no *, int);
-
void
SECTION
-__mpatan(mp_no *x, mp_no *y, int p) {
+__mpatan (mp_no *x, mp_no *y, int p)
+{
- int i,m,n;
+ int i, m, n;
double dx;
- mp_no
- mptwoim1 = {0,{0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
- 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
- 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}};
+ mp_no mptwoim1 =
+ {
+ 0,
+ {
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
+ }
+ };
- mp_no mps,mpsm,mpt,mpt1,mpt2,mpt3;
+ mp_no mps, mpsm, mpt, mpt1, mpt2, mpt3;
- /* Choose m and initiate mptwoim1 */
- if (EX>0) m=7;
- else if (EX<0) m=0;
- else {
- __mp_dbl(x,&dx,p); dx=ABS(dx);
- for (m=6; m>0; m--)
- {if (dx>__atan_xm[m].d) break;}
+ /* Choose m and initiate mptwoim1. */
+ if (EX > 0)
+ m = 7;
+ else if (EX < 0)
+ m = 0;
+ else
+ {
+ __mp_dbl (x, &dx, p);
+ dx = ABS (dx);
+ for (m = 6; m > 0; m--)
+ {
+ if (dx > __atan_xm[m].d)
+ break;
+ }
}
- mptwoim1.e = 1;
- mptwoim1.d[0] = ONE;
+ mptwoim1.e = 1;
+ mptwoim1.d[0] = ONE;
- /* Reduce x m times */
- __mul(x,x,&mpsm,p);
- if (m==0) __cpy(x,&mps,p);
- else {
- for (i=0; i<m; i++) {
- __add(&mpone,&mpsm,&mpt1,p);
- __mpsqrt(&mpt1,&mpt2,p);
- __add(&mpt2,&mpt2,&mpt1,p);
- __add(&mptwo,&mpsm,&mpt2,p);
- __add(&mpt1,&mpt2,&mpt3,p);
- __dvd(&mpsm,&mpt3,&mpt1,p);
- __cpy(&mpt1,&mpsm,p);
- }
- __mpsqrt(&mpsm,&mps,p); mps.d[0] = X[0];
+ /* Reduce x m times. */
+ __sqr (x, &mpsm, p);
+ if (m == 0)
+ __cpy (x, &mps, p);
+ else
+ {
+ for (i = 0; i < m; i++)
+ {
+ __add (&mpone, &mpsm, &mpt1, p);
+ __mpsqrt (&mpt1, &mpt2, p);
+ __add (&mpt2, &mpt2, &mpt1, p);
+ __add (&mptwo, &mpsm, &mpt2, p);
+ __add (&mpt1, &mpt2, &mpt3, p);
+ __dvd (&mpsm, &mpt3, &mpt1, p);
+ __cpy (&mpt1, &mpsm, p);
+ }
+ __mpsqrt (&mpsm, &mps, p);
+ mps.d[0] = X[0];
}
- /* Evaluate a truncated power series for Atan(s) */
- n=__atan_np[p]; mptwoim1.d[1] = __atan_twonm1[p].d;
- __dvd(&mpsm,&mptwoim1,&mpt,p);
- for (i=n-1; i>1; i--) {
+ /* Evaluate a truncated power series for Atan(s). */
+ n = __atan_np[p];
+ mptwoim1.d[1] = __atan_twonm1[p].d;
+ __dvd (&mpsm, &mptwoim1, &mpt, p);
+ for (i = n - 1; i > 1; i--)
+ {
mptwoim1.d[1] -= TWO;
- __dvd(&mpsm,&mptwoim1,&mpt1,p);
- __mul(&mpsm,&mpt,&mpt2,p);
- __sub(&mpt1,&mpt2,&mpt,p);
+ __dvd (&mpsm, &mptwoim1, &mpt1, p);
+ __mul (&mpsm, &mpt, &mpt2, p);
+ __sub (&mpt1, &mpt2, &mpt, p);
}
- __mul(&mps,&mpt,&mpt1,p);
- __sub(&mps,&mpt1,&mpt,p);
-
- /* Compute Atan(x) */
- mptwoim1.d[1] = 1 << m;
- __mul(&mptwoim1,&mpt,y,p);
+ __mul (&mps, &mpt, &mpt1, p);
+ __sub (&mps, &mpt1, &mpt, p);
- return;
+ /* Compute Atan(x). */
+ mptwoim1.d[1] = 1 << m;
+ __mul (&mptwoim1, &mpt, y, p);
}
diff --git a/sysdeps/ieee754/dbl-64/mpatan2.c b/sysdeps/ieee754/dbl-64/mpatan2.c
index c0b9aea1e2..d29c2fbade 100644
--- a/sysdeps/ieee754/dbl-64/mpatan2.c
+++ b/sysdeps/ieee754/dbl-64/mpatan2.c
@@ -32,37 +32,36 @@
/* */
/******************************************************************/
-
-
#include "mpa.h"
#ifndef SECTION
# define SECTION
#endif
-void __mpsqrt(mp_no *, mp_no *, int);
-void __mpatan(mp_no *, mp_no *, int);
-
-/* Multi-Precision Atan2(y,x) function subroutine, for p >= 4. */
-/* y=0 is not permitted if x<=0. No error messages are given. */
+/* Multi-Precision Atan2 (y, x) function subroutine, for p >= 4.
+ y = 0 is not permitted if x <= 0. No error messages are given. */
void
SECTION
-__mpatan2(mp_no *y, mp_no *x, mp_no *z, int p) {
+__mpatan2 (mp_no *y, mp_no *x, mp_no *z, int p)
+{
+ mp_no mpt1, mpt2, mpt3;
- mp_no mpt1,mpt2,mpt3;
-
-
- if (X[0] <= ZERO) {
- __dvd(x,y,&mpt1,p); __mul(&mpt1,&mpt1,&mpt2,p);
- if (mpt1.d[0] != ZERO) mpt1.d[0] = ONE;
- __add(&mpt2,&mpone,&mpt3,p); __mpsqrt(&mpt3,&mpt2,p);
- __add(&mpt1,&mpt2,&mpt3,p); mpt3.d[0]=Y[0];
- __mpatan(&mpt3,&mpt1,p); __add(&mpt1,&mpt1,z,p);
- }
+ if (X[0] <= ZERO)
+ {
+ __dvd (x, y, &mpt1, p);
+ __mul (&mpt1, &mpt1, &mpt2, p);
+ if (mpt1.d[0] != ZERO)
+ mpt1.d[0] = ONE;
+ __add (&mpt2, &mpone, &mpt3, p);
+ __mpsqrt (&mpt3, &mpt2, p);
+ __add (&mpt1, &mpt2, &mpt3, p);
+ mpt3.d[0] = Y[0];
+ __mpatan (&mpt3, &mpt1, p);
+ __add (&mpt1, &mpt1, z, p);
+ }
else
- { __dvd(y,x,&mpt1,p);
- __mpatan(&mpt1,z,p);
- }
-
- return;
+ {
+ __dvd (y, x, &mpt1, p);
+ __mpatan (&mpt1, z, p);
+ }
}
diff --git a/sysdeps/ieee754/dbl-64/mpexp.c b/sysdeps/ieee754/dbl-64/mpexp.c
index 8d288ff9a1..565c6c8531 100644
--- a/sysdeps/ieee754/dbl-64/mpexp.c
+++ b/sysdeps/ieee754/dbl-64/mpexp.c
@@ -49,6 +49,7 @@ __mpexp (mp_no *x, mp_no *y, int p)
0, 0, 0, 0, 3, 3, 4, 4, 5, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8
};
+
static const int m1p[33] =
{
0, 0, 0, 0,
@@ -71,16 +72,7 @@ __mpexp (mp_no *x, mp_no *y, int p)
{0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 0, 39, 43, 47, 51, 55, 59, 63},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 43, 47, 50, 54}
};
- mp_no mpk =
- {
- 0,
- {
- 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
- 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
- 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
- }
- };
- mp_no mps, mpak, mpt1, mpt2;
+ mp_no mps, mpk, mpt1, mpt2;
/* Choose m,n and compute a=2**(-m). */
n = np[p];
@@ -115,37 +107,52 @@ __mpexp (mp_no *x, mp_no *y, int p)
break;
}
- /* Compute s=x*2**(-m). Put result in mps. */
+ /* Compute s=x*2**(-m). Put result in mps. This is the range-reduced input
+ that we will use to compute e^s. For the final result, simply raise it
+ to 2^m. */
__pow_mp (-m, &mpt1, p);
__mul (x, &mpt1, &mps, p);
- /* Evaluate the polynomial. Put result in mpt2. */
- mpk.e = 1;
- mpk.d[0] = ONE;
- mpk.d[1] = n;
- __dvd (&mps, &mpk, &mpt1, p);
- __add (&mpone, &mpt1, &mpak, p);
- for (k = n - 1; k > 1; k--)
+ /* Compute the Taylor series for e^s:
+
+ 1 + x/1! + x^2/2! + x^3/3! ...
+
+ for N iterations. We compute this as:
+
+ e^x = 1 + (x * n!/1! + x^2 * n!/2! + x^3 * n!/3!) / n!
+ = 1 + (x * (n!/1! + x * (n!/2! + x * (n!/3! + x ...)))) / n!
+
+ k! is computed on the fly as KF and at the end of the polynomial loop, KF
+ is n!, which can be used directly. */
+ __cpy (&mps, &mpt2, p);
+
+ double kf = 1.0;
+
+ /* Evaluate the rest. The result will be in mpt2. */
+ for (k = n - 1; k > 0; k--)
{
- __mul (&mps, &mpak, &mpt1, p);
- mpk.d[1] = k;
- __dvd (&mpt1, &mpk, &mpt2, p);
- __add (&mpone, &mpt2, &mpak, p);
+ /* n! / k! = n * (n - 1) ... * (n - k + 1) */
+ kf *= k + 1;
+
+ __dbl_mp (kf, &mpk, p);
+ __add (&mpt2, &mpk, &mpt1, p);
+ __mul (&mps, &mpt1, &mpt2, p);
}
- __mul (&mps, &mpak, &mpt1, p);
+ __dbl_mp (kf, &mpk, p);
+ __dvd (&mpt2, &mpk, &mpt1, p);
__add (&mpone, &mpt1, &mpt2, p);
/* Raise polynomial value to the power of 2**m. Put result in y. */
for (k = 0, j = 0; k < m;)
{
- __mul (&mpt2, &mpt2, &mpt1, p);
+ __sqr (&mpt2, &mpt1, p);
k++;
if (k == m)
{
j = 1;
break;
}
- __mul (&mpt1, &mpt1, &mpt2, p);
+ __sqr (&mpt1, &mpt2, p);
k++;
}
if (j)
diff --git a/sysdeps/ieee754/dbl-64/mplog.c b/sysdeps/ieee754/dbl-64/mplog.c
index e3d10846e2..f8d5c1095f 100644
--- a/sysdeps/ieee754/dbl-64/mplog.c
+++ b/sysdeps/ieee754/dbl-64/mplog.c
@@ -1,4 +1,3 @@
-
/*
* IBM Accurate Mathematical Library
* written by International Business Machines Corp.
@@ -37,27 +36,30 @@
#include "endian.h"
#include "mpa.h"
-void __mpexp(mp_no *, mp_no *, int);
-
-void __mplog(mp_no *x, mp_no *y, int p) {
- int i,m;
- static const int mp[33] = {0,0,0,0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,
- 4,4,4,4,4,4,4,4,4,4,4,4,4,4};
- mp_no mpt1,mpt2;
+void
+__mplog (mp_no *x, mp_no *y, int p)
+{
+ int i, m;
+ static const int mp[33] =
+ {
+ 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
+ };
+ mp_no mpt1, mpt2;
- /* Choose m */
+ /* Choose m. */
m = mp[p];
- /* Perform m newton iterations to solve for y: exp(y)-x=0. */
- /* The iterations formula is: y(n+1)=y(n)+(x*exp(-y(n))-1). */
- __cpy(y,&mpt1,p);
- for (i=0; i<m; i++) {
- mpt1.d[0]=-mpt1.d[0];
- __mpexp(&mpt1,&mpt2,p);
- __mul(x,&mpt2,&mpt1,p);
- __sub(&mpt1,&mpone,&mpt2,p);
- __add(y,&mpt2,&mpt1,p);
- __cpy(&mpt1,y,p);
- }
- return;
+ /* Perform m newton iterations to solve for y: exp(y) - x = 0. The
+ iterations formula is: y(n + 1) = y(n) + (x * exp(-y(n)) - 1). */
+ __cpy (y, &mpt1, p);
+ for (i = 0; i < m; i++)
+ {
+ mpt1.d[0] = -mpt1.d[0];
+ __mpexp (&mpt1, &mpt2, p);
+ __mul (x, &mpt2, &mpt1, p);
+ __sub (&mpt1, &mpone, &mpt2, p);
+ __add (y, &mpt2, &mpt1, p);
+ __cpy (&mpt1, y, p);
+ }
}
diff --git a/sysdeps/ieee754/dbl-64/mpsqrt.c b/sysdeps/ieee754/dbl-64/mpsqrt.c
index 65df9fd067..230d1f36e8 100644
--- a/sysdeps/ieee754/dbl-64/mpsqrt.c
+++ b/sysdeps/ieee754/dbl-64/mpsqrt.c
@@ -45,33 +45,37 @@
/* p as integer. Routine computes sqrt(*x) and stores result in *y */
/****************************************************************************/
-static double fastiroot(double);
+static double fastiroot (double);
void
SECTION
-__mpsqrt(mp_no *x, mp_no *y, int p) {
- int i,m,ey;
- double dx,dy;
- static const mp_no
- mphalf = {0,{1.0,8388608.0 /* 2^23 */}},
- mp3halfs = {1,{1.0,1.0,8388608.0 /* 2^23 */}};
- mp_no mpxn,mpz,mpu,mpt1,mpt2;
+__mpsqrt (mp_no *x, mp_no *y, int p)
+{
+ int i, m, ey;
+ double dx, dy;
+ static const mp_no mphalf = {0, {1.0, HALFRAD}};
+ static const mp_no mp3halfs = {1, {1.0, 1.0, HALFRAD}};
+ mp_no mpxn, mpz, mpu, mpt1, mpt2;
- ey=EX/2; __cpy(x,&mpxn,p); mpxn.e -= (ey+ey);
- __mp_dbl(&mpxn,&dx,p); dy=fastiroot(dx); __dbl_mp(dy,&mpu,p);
- __mul(&mpxn,&mphalf,&mpz,p);
+ ey = EX / 2;
+ __cpy (x, &mpxn, p);
+ mpxn.e -= (ey + ey);
+ __mp_dbl (&mpxn, &dx, p);
+ dy = fastiroot (dx);
+ __dbl_mp (dy, &mpu, p);
+ __mul (&mpxn, &mphalf, &mpz, p);
- m=__mpsqrt_mp[p];
- for (i=0; i<m; i++) {
- __mul(&mpu,&mpu,&mpt1,p);
- __mul(&mpt1,&mpz,&mpt2,p);
- __sub(&mp3halfs,&mpt2,&mpt1,p);
- __mul(&mpu,&mpt1,&mpt2,p);
- __cpy(&mpt2,&mpu,p);
- }
- __mul(&mpxn,&mpu,y,p); EY += ey;
-
- return;
+ m = __mpsqrt_mp[p];
+ for (i = 0; i < m; i++)
+ {
+ __sqr (&mpu, &mpt1, p);
+ __mul (&mpt1, &mpz, &mpt2, p);
+ __sub (&mp3halfs, &mpt2, &mpt1, p);
+ __mul (&mpu, &mpt1, &mpt2, p);
+ __cpy (&mpt2, &mpu, p);
+ }
+ __mul (&mpxn, &mpu, y, p);
+ EY += ey;
}
/***********************************************************/
@@ -80,22 +84,28 @@ __mpsqrt(mp_no *x, mp_no *y, int p) {
/***********************************************************/
static double
SECTION
-fastiroot(double x) {
- union {int i[2]; double d;} p,q;
- double y,z, t;
+fastiroot (double x)
+{
+ union
+ {
+ int i[2];
+ double d;
+ } p, q;
+ double y, z, t;
int n;
- static const double c0 = 0.99674, c1 = -0.53380, c2 = 0.45472, c3 = -0.21553;
+ static const double c0 = 0.99674, c1 = -0.53380;
+ static const double c2 = 0.45472, c3 = -0.21553;
p.d = x;
- p.i[HIGH_HALF] = (p.i[HIGH_HALF] & 0x3FFFFFFF ) | 0x3FE00000 ;
+ p.i[HIGH_HALF] = (p.i[HIGH_HALF] & 0x3FFFFFFF) | 0x3FE00000;
q.d = x;
y = p.d;
- z = y -1.0;
- n = (q.i[HIGH_HALF] - p.i[HIGH_HALF])>>1;
- z = ((c3*z + c2)*z + c1)*z + c0; /* 2**-7 */
- z = z*(1.5 - 0.5*y*z*z); /* 2**-14 */
- p.d = z*(1.5 - 0.5*y*z*z); /* 2**-28 */
+ z = y - 1.0;
+ n = (q.i[HIGH_HALF] - p.i[HIGH_HALF]) >> 1;
+ z = ((c3 * z + c2) * z + c1) * z + c0; /* 2**-7 */
+ z = z * (1.5 - 0.5 * y * z * z); /* 2**-14 */
+ p.d = z * (1.5 - 0.5 * y * z * z); /* 2**-28 */
p.i[HIGH_HALF] -= n;
- t = x*p.d;
- return p.d*(1.5 - 0.5*p.d*t);
+ t = x * p.d;
+ return p.d * (1.5 - 0.5 * p.d * t);
}
diff --git a/sysdeps/ieee754/dbl-64/mptan.c b/sysdeps/ieee754/dbl-64/mptan.c
index 234108e373..51b5718e73 100644
--- a/sysdeps/ieee754/dbl-64/mptan.c
+++ b/sysdeps/ieee754/dbl-64/mptan.c
@@ -40,23 +40,25 @@
# define SECTION
#endif
-int __mpranred(double, mp_no *, int);
-void __c32(mp_no *, mp_no *, mp_no *, int);
-
void
SECTION
-__mptan(double x, mp_no *mpy, int p) {
+__mptan (double x, mp_no *mpy, int p)
+{
int n;
mp_no mpw, mpc, mps;
- n = __mpranred(x, &mpw, p) & 0x00000001; /* negative or positive result */
- __c32(&mpw, &mpc, &mps, p); /* computing sin(x) and cos(x) */
- if (n) /* second or fourth quarter of unit circle */
- { __dvd(&mpc,&mps,mpy,p);
- mpy->d[0] *= MONE;
- } /* tan is negative in this area */
- else __dvd(&mps,&mpc,mpy,p);
-
- return;
+ /* Negative or positive result. */
+ n = __mpranred (x, &mpw, p) & 0x00000001;
+ /* Computing sin(x) and cos(x). */
+ __c32 (&mpw, &mpc, &mps, p);
+ /* Second or fourth quarter of unit circle. */
+ if (n)
+ {
+ __dvd (&mpc, &mps, mpy, p);
+ mpy->d[0] *= MONE;
+ }
+ /* tan is negative in this area. */
+ else
+ __dvd (&mps, &mpc, mpy, p);
}
diff --git a/sysdeps/ieee754/dbl-64/s_atan.c b/sysdeps/ieee754/dbl-64/s_atan.c
index 37442b72e8..aa3564d560 100644
--- a/sysdeps/ieee754/dbl-64/s_atan.c
+++ b/sysdeps/ieee754/dbl-64/s_atan.c
@@ -62,18 +62,9 @@ double atan(double x) {
#ifndef DLA_FMS
double t4,t5,t6;
#endif
-#if 0
- double y1,y2;
-#endif
int i,ux,dx;
-#if 0
- int p;
-#endif
static const int pr[M]={6,8,10,32};
number num;
-#if 0
- mp_no mpt1,mpx,mpy,mpy1,mpy2,mperr;
-#endif
num.d = x; ux = num.i[HIGH_HALF]; dx = num.i[LOW_HALF];
diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c
index 496f1e1917..5038b72612 100644
--- a/sysdeps/ieee754/dbl-64/s_sin.c
+++ b/sysdeps/ieee754/dbl-64/s_sin.c
@@ -100,14 +100,8 @@ double
SECTION
__sin(double x){
double xx,res,t,cor,y,s,c,sn,ssn,cs,ccs,xn,a,da,db,eps,xn1,xn2;
-#if 0
- double w[2];
-#endif
mynumber u,v;
int4 k,m,n;
-#if 0
- int4 nn;
-#endif
double retval = 0;
SET_RESTORE_ROUND_53BIT (FE_TONEAREST);
@@ -902,10 +896,6 @@ SECTION
bsloww(double x,double dx, double orig,int n) {
static const double th2_36 = 206158430208.0; /* 1.5*2**37 */
double y,x1,x2,xx,r,t,res,cor,w[2];
-#if 0
- double a,da,xn;
- union {int4 i[2]; double x;} v;
-#endif
x1=(x+th2_36)-th2_36;
y = aa.x*x1*x1*x1;
r=x+y;
diff --git a/sysdeps/ieee754/dbl-64/s_tan.c b/sysdeps/ieee754/dbl-64/s_tan.c
index c5446a6e56..faa5221e5e 100644
--- a/sysdeps/ieee754/dbl-64/s_tan.c
+++ b/sysdeps/ieee754/dbl-64/s_tan.c
@@ -64,9 +64,6 @@ tan(double x) {
int p;
number num,v;
mp_no mpa,mpt1,mpt2;
-#if 0
- mp_no mpy;
-#endif
double retval;
diff --git a/sysdeps/ieee754/dbl-64/sincos32.c b/sysdeps/ieee754/dbl-64/sincos32.c
index 6c5ffded52..954db66d6b 100644
--- a/sysdeps/ieee754/dbl-64/sincos32.c
+++ b/sysdeps/ieee754/dbl-64/sincos32.c
@@ -57,17 +57,10 @@ SECTION
ss32(mp_no *x, mp_no *y, int p) {
int i;
double a;
-#if 0
- double b;
- static const mp_no mpone = {1,{1.0,1.0}};
-#endif
mp_no mpt1,x2,gor,sum ,mpk={1,{1.0}};
-#if 0
- mp_no mpt2;
-#endif
for (i=1;i<=p;i++) mpk.d[i]=0;
- __mul(x,x,&x2,p);
+ __sqr(x,&x2,p);
__cpy(&oofac27,&gor,p);
__cpy(&gor,&sum,p);
for (a=27.0;a>1.0;a-=2.0) {
@@ -89,17 +82,10 @@ SECTION
cc32(mp_no *x, mp_no *y, int p) {
int i;
double a;
-#if 0
- double b;
- static const mp_no mpone = {1,{1.0,1.0}};
-#endif
mp_no mpt1,x2,gor,sum ,mpk={1,{1.0}};
-#if 0
- mp_no mpt2;
-#endif
for (i=1;i<=p;i++) mpk.d[i]=0;
- __mul(x,x,&x2,p);
+ __sqr(x,&x2,p);
mpk.d[1]=27.0;
__mul(&oofac27,&mpk,&gor,p);
__cpy(&gor,&sum,p);
@@ -119,7 +105,6 @@ cc32(mp_no *x, mp_no *y, int p) {
void
SECTION
__c32(mp_no *x, mp_no *y, mp_no *z, int p) {
- static const mp_no mpt={1,{1.0,2.0}}, one={1,{1.0,1.0}};
mp_no u,t,t1,t2,c,s;
int i;
__cpy(x,&u,p);
@@ -130,11 +115,11 @@ __c32(mp_no *x, mp_no *y, mp_no *z, int p) {
__mul(&c,&s,&t,p);
__sub(&s,&t,&t1,p);
__add(&t1,&t1,&s,p);
- __sub(&mpt,&c,&t1,p);
+ __sub(&mptwo,&c,&t1,p);
__mul(&t1,&c,&t2,p);
__add(&t2,&t2,&c,p);
}
- __sub(&one,&c,y,p);
+ __sub(&mpone,&c,y,p);
__cpy(&s,z,p);
}
@@ -251,7 +236,6 @@ __mpranred(double x, mp_no *y, int p)
number v;
double t,xn;
int i,k,n;
- static const mp_no one = {1,{1.0,1.0}};
mp_no a,b,c;
if (ABS(x) < 2.8e14) {
@@ -278,9 +262,9 @@ __mpranred(double x, mp_no *y, int p)
for (i=1;i<=p-c.e;i++) c.d[i]=c.d[i+c.e];
for (i=p+1-c.e;i<=p;i++) c.d[i]=0;
c.e=0;
- if (c.d[1] >= 8388608.0)
+ if (c.d[1] >= HALFRAD)
{ t +=1.0;
- __sub(&c,&one,&b,p);
+ __sub(&c,&mpone,&b,p);
__mul(&b,&hp,y,p);
}
else __mul(&c,&hp,y,p);
diff --git a/sysdeps/ieee754/dbl-64/slowexp.c b/sysdeps/ieee754/dbl-64/slowexp.c
index 34ca3275eb..8f353f634f 100644
--- a/sysdeps/ieee754/dbl-64/slowexp.c
+++ b/sysdeps/ieee754/dbl-64/slowexp.c
@@ -27,45 +27,49 @@
/*Converting from double precision to Multi-precision and calculating */
/* e^x */
/**************************************************************************/
-#include "mpa.h"
#include <math_private.h>
+#ifndef USE_LONG_DOUBLE_FOR_MP
+# include "mpa.h"
+void __mpexp (mp_no *x, mp_no *y, int p);
+#endif
+
#ifndef SECTION
# define SECTION
#endif
-void __mpexp(mp_no *x, mp_no *y, int p);
-
/*Converting from double precision to Multi-precision and calculating e^x */
double
SECTION
-__slowexp(double x) {
- double w,z,res,eps=3.0e-26;
-#if 0
- double y;
-#endif
+__slowexp (double x)
+{
+#ifndef USE_LONG_DOUBLE_FOR_MP
+ double w, z, res, eps = 3.0e-26;
int p;
-#if 0
- int orig,i;
-#endif
- mp_no mpx, mpy, mpz,mpw,mpeps,mpcor;
+ mp_no mpx, mpy, mpz, mpw, mpeps, mpcor;
- p=6;
- __dbl_mp(x,&mpx,p); /* Convert a double precision number x */
- /* into a multiple precision number mpx with prec. p. */
- __mpexp(&mpx, &mpy, p); /* Multi-Precision exponential function */
- __dbl_mp(eps,&mpeps,p);
- __mul(&mpeps,&mpy,&mpcor,p);
- __add(&mpy,&mpcor,&mpw,p);
- __sub(&mpy,&mpcor,&mpz,p);
- __mp_dbl(&mpw, &w, p);
- __mp_dbl(&mpz, &z, p);
- if (w == z) return w;
- else { /* if calculating is not exactly */
- p = 32;
- __dbl_mp(x,&mpx,p);
- __mpexp(&mpx, &mpy, p);
- __mp_dbl(&mpy, &res, p);
- return res;
- }
+ /* Use the multiple precision __MPEXP function to compute the exponential
+ First at 144 bits and if it is not accurate enough, at 768 bits. */
+ p = 6;
+ __dbl_mp (x, &mpx, p);
+ __mpexp (&mpx, &mpy, p);
+ __dbl_mp (eps, &mpeps, p);
+ __mul (&mpeps, &mpy, &mpcor, p);
+ __add (&mpy, &mpcor, &mpw, p);
+ __sub (&mpy, &mpcor, &mpz, p);
+ __mp_dbl (&mpw, &w, p);
+ __mp_dbl (&mpz, &z, p);
+ if (w == z)
+ return w;
+ else
+ {
+ p = 32;
+ __dbl_mp (x, &mpx, p);
+ __mpexp (&mpx, &mpy, p);
+ __mp_dbl (&mpy, &res, p);
+ return res;
+ }
+#else
+ return (double) __ieee754_expl((long double)x);
+#endif
}
diff --git a/sysdeps/ieee754/dbl-64/slowpow.c b/sysdeps/ieee754/dbl-64/slowpow.c
index c303eaa5af..a379728b14 100644
--- a/sysdeps/ieee754/dbl-64/slowpow.c
+++ b/sysdeps/ieee754/dbl-64/slowpow.c
@@ -38,42 +38,76 @@
# define SECTION
#endif
-void __mpexp(mp_no *x, mp_no *y, int p);
-void __mplog(mp_no *x, mp_no *y, int p);
-double ulog(double);
-double __halfulp(double x,double y);
+void __mpexp (mp_no *x, mp_no *y, int p);
+void __mplog (mp_no *x, mp_no *y, int p);
+double ulog (double);
+double __halfulp (double x, double y);
double
SECTION
-__slowpow(double x, double y, double z) {
- double res,res1;
- mp_no mpx, mpy, mpz,mpw,mpp,mpr,mpr1;
- static const mp_no eps = {-3,{1.0,4.0}};
+__slowpow (double x, double y, double z)
+{
+ double res, res1;
+ mp_no mpx, mpy, mpz, mpw, mpp, mpr, mpr1;
+ static const mp_no eps = {-3, {1.0, 4.0}};
int p;
- res = __halfulp(x,y); /* halfulp() returns -10 or x^y */
- if (res >= 0) return res; /* if result was really computed by halfulp */
- /* else, if result was not really computed by halfulp */
- p = 10; /* p=precision */
- __dbl_mp(x,&mpx,p);
- __dbl_mp(y,&mpy,p);
- __dbl_mp(z,&mpz,p);
- __mplog(&mpx, &mpz, p); /* log(x) = z */
- __mul(&mpy,&mpz,&mpw,p); /* y * z =w */
- __mpexp(&mpw, &mpp, p); /* e^w =pp */
- __add(&mpp,&eps,&mpr,p); /* pp+eps =r */
- __mp_dbl(&mpr, &res, p);
- __sub(&mpp,&eps,&mpr1,p); /* pp -eps =r1 */
- __mp_dbl(&mpr1, &res1, p); /* converting into double precision */
- if (res == res1) return res;
+ /* __HALFULP returns -10 or X^Y. */
+ res = __halfulp (x, y);
- p = 32; /* if we get here result wasn't calculated exactly, continue */
- __dbl_mp(x,&mpx,p); /* for more exact calculation */
- __dbl_mp(y,&mpy,p);
- __dbl_mp(z,&mpz,p);
- __mplog(&mpx, &mpz, p); /* log(c)=z */
- __mul(&mpy,&mpz,&mpw,p); /* y*z =w */
- __mpexp(&mpw, &mpp, p); /* e^w=pp */
- __mp_dbl(&mpp, &res, p); /* converting into double precision */
+ /* Return if the result was computed by __HALFULP. */
+ if (res >= 0)
+ return res;
+
+ /* Compute pow as long double. This is currently only used by powerpc, where
+ one may get 106 bits of accuracy. */
+#ifdef USE_LONG_DOUBLE_FOR_MP
+ long double ldw, ldz, ldpp;
+ static const long double ldeps = 0x4.0p-96;
+
+ ldz = __ieee754_logl ((long double) x);
+ ldw = (long double) y *ldz;
+ ldpp = __ieee754_expl (ldw);
+ res = (double) (ldpp + ldeps);
+ res1 = (double) (ldpp - ldeps);
+
+ /* Return the result if it is accurate enough. */
+ if (res == res1)
+ return res;
+#endif
+
+ /* Or else, calculate using multiple precision. P = 10 implies accuracy of
+ 240 bits accuracy, since MP_NO has a radix of 2^24. */
+ p = 10;
+ __dbl_mp (x, &mpx, p);
+ __dbl_mp (y, &mpy, p);
+ __dbl_mp (z, &mpz, p);
+
+ /* z = x ^ y
+ log (z) = y * log (x)
+ z = exp (y * log (x)) */
+ __mplog (&mpx, &mpz, p);
+ __mul (&mpy, &mpz, &mpw, p);
+ __mpexp (&mpw, &mpp, p);
+
+ /* Add and subtract EPS to ensure that the result remains unchanged, i.e. we
+ have last bit accuracy. */
+ __add (&mpp, &eps, &mpr, p);
+ __mp_dbl (&mpr, &res, p);
+ __sub (&mpp, &eps, &mpr1, p);
+ __mp_dbl (&mpr1, &res1, p);
+ if (res == res1)
+ return res;
+
+ /* If we don't, then we repeat using a higher precision. 768 bits of
+ precision ought to be enough for anybody. */
+ p = 32;
+ __dbl_mp (x, &mpx, p);
+ __dbl_mp (y, &mpy, p);
+ __dbl_mp (z, &mpz, p);
+ __mplog (&mpx, &mpz, p);
+ __mul (&mpy, &mpz, &mpw, p);
+ __mpexp (&mpw, &mpp, p);
+ __mp_dbl (&mpp, &res, p);
return res;
}
diff --git a/sysdeps/ieee754/dbl-64/x2y2m1.c b/sysdeps/ieee754/dbl-64/x2y2m1.c
index 0b73f0a2ee..d36a950e36 100644
--- a/sysdeps/ieee754/dbl-64/x2y2m1.c
+++ b/sysdeps/ieee754/dbl-64/x2y2m1.c
@@ -37,7 +37,7 @@ add_split (double *hi, double *lo, double x, double y)
given that the values are small enough that no overflow occurs and
large enough (or zero) that no underflow occurs. */
-static inline void
+static void
mul_split (double *hi, double *lo, double x, double y)
{
#ifdef __FP_FAST_FMA
diff --git a/sysdeps/ieee754/ldbl-128/e_j0l.c b/sysdeps/ieee754/ldbl-128/e_j0l.c
index 1b18289588..108eff4435 100644
--- a/sysdeps/ieee754/ldbl-128/e_j0l.c
+++ b/sysdeps/ieee754/ldbl-128/e_j0l.c
@@ -93,6 +93,7 @@
#include <math.h>
#include <math_private.h>
+#include <float.h>
/* 1 / sqrt(pi) */
static const long double ONEOSQPI = 5.6418958354775628694807945156077258584405E-1L;
@@ -700,6 +701,28 @@ __ieee754_j0l (long double x)
return p;
}
+ /* X = x - pi/4
+ cos(X) = cos(x) cos(pi/4) + sin(x) sin(pi/4)
+ = 1/sqrt(2) * (cos(x) + sin(x))
+ sin(X) = sin(x) cos(pi/4) - cos(x) sin(pi/4)
+ = 1/sqrt(2) * (sin(x) - cos(x))
+ sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
+ cf. Fdlibm. */
+ __sincosl (xx, &s, &c);
+ ss = s - c;
+ cc = s + c;
+ if (xx <= LDBL_MAX / 2.0L)
+ {
+ z = -__cosl (xx + xx);
+ if ((s * c) < 0)
+ cc = z / ss;
+ else
+ ss = z / cc;
+ }
+
+ if (xx > 0x1p256L)
+ return ONEOSQPI * cc / __ieee754_sqrtl (xx);
+
xinv = 1.0L / xx;
z = xinv * xinv;
if (xinv <= 0.25)
@@ -761,21 +784,6 @@ __ieee754_j0l (long double x)
p = 1.0L + z * p;
q = z * xinv * q;
q = q - 0.125L * xinv;
- /* X = x - pi/4
- cos(X) = cos(x) cos(pi/4) + sin(x) sin(pi/4)
- = 1/sqrt(2) * (cos(x) + sin(x))
- sin(X) = sin(x) cos(pi/4) - cos(x) sin(pi/4)
- = 1/sqrt(2) * (sin(x) - cos(x))
- sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
- cf. Fdlibm. */
- __sincosl (xx, &s, &c);
- ss = s - c;
- cc = s + c;
- z = -__cosl (xx + xx);
- if ((s * c) < 0)
- cc = z / ss;
- else
- ss = z / cc;
z = ONEOSQPI * (p * cc - q * ss) / __ieee754_sqrtl (xx);
return z;
}
@@ -843,6 +851,28 @@ long double
return p;
}
+ /* X = x - pi/4
+ cos(X) = cos(x) cos(pi/4) + sin(x) sin(pi/4)
+ = 1/sqrt(2) * (cos(x) + sin(x))
+ sin(X) = sin(x) cos(pi/4) - cos(x) sin(pi/4)
+ = 1/sqrt(2) * (sin(x) - cos(x))
+ sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
+ cf. Fdlibm. */
+ __sincosl (x, &s, &c);
+ ss = s - c;
+ cc = s + c;
+ if (xx <= LDBL_MAX / 2.0L)
+ {
+ z = -__cosl (x + x);
+ if ((s * c) < 0)
+ cc = z / ss;
+ else
+ ss = z / cc;
+ }
+
+ if (xx > 0x1p256L)
+ return ONEOSQPI * ss / __ieee754_sqrtl (x);
+
xinv = 1.0L / xx;
z = xinv * xinv;
if (xinv <= 0.25)
@@ -904,21 +934,6 @@ long double
p = 1.0L + z * p;
q = z * xinv * q;
q = q - 0.125L * xinv;
- /* X = x - pi/4
- cos(X) = cos(x) cos(pi/4) + sin(x) sin(pi/4)
- = 1/sqrt(2) * (cos(x) + sin(x))
- sin(X) = sin(x) cos(pi/4) - cos(x) sin(pi/4)
- = 1/sqrt(2) * (sin(x) - cos(x))
- sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
- cf. Fdlibm. */
- __sincosl (x, &s, &c);
- ss = s - c;
- cc = s + c;
- z = -__cosl (x + x);
- if ((s * c) < 0)
- cc = z / ss;
- else
- ss = z / cc;
z = ONEOSQPI * (p * ss + q * cc) / __ieee754_sqrtl (x);
return z;
}
diff --git a/sysdeps/ieee754/ldbl-128/e_j1l.c b/sysdeps/ieee754/ldbl-128/e_j1l.c
index f16343b26b..70a1c86fd2 100644
--- a/sysdeps/ieee754/ldbl-128/e_j1l.c
+++ b/sysdeps/ieee754/ldbl-128/e_j1l.c
@@ -97,6 +97,7 @@
#include <math.h>
#include <math_private.h>
+#include <float.h>
/* 1 / sqrt(pi) */
static const long double ONEOSQPI = 5.6418958354775628694807945156077258584405E-1L;
@@ -706,6 +707,32 @@ __ieee754_j1l (long double x)
return p;
}
+ /* X = x - 3 pi/4
+ cos(X) = cos(x) cos(3 pi/4) + sin(x) sin(3 pi/4)
+ = 1/sqrt(2) * (-cos(x) + sin(x))
+ sin(X) = sin(x) cos(3 pi/4) - cos(x) sin(3 pi/4)
+ = -1/sqrt(2) * (sin(x) + cos(x))
+ cf. Fdlibm. */
+ __sincosl (xx, &s, &c);
+ ss = -s - c;
+ cc = s - c;
+ if (xx <= LDBL_MAX / 2.0L)
+ {
+ z = __cosl (xx + xx);
+ if ((s * c) > 0)
+ cc = z / ss;
+ else
+ ss = z / cc;
+ }
+
+ if (xx > 0x1p256L)
+ {
+ z = ONEOSQPI * cc / __ieee754_sqrtl (xx);
+ if (x < 0)
+ z = -z;
+ return z;
+ }
+
xinv = 1.0L / xx;
z = xinv * xinv;
if (xinv <= 0.25)
@@ -767,20 +794,6 @@ __ieee754_j1l (long double x)
p = 1.0L + z * p;
q = z * q;
q = q * xinv + 0.375L * xinv;
- /* X = x - 3 pi/4
- cos(X) = cos(x) cos(3 pi/4) + sin(x) sin(3 pi/4)
- = 1/sqrt(2) * (-cos(x) + sin(x))
- sin(X) = sin(x) cos(3 pi/4) - cos(x) sin(3 pi/4)
- = -1/sqrt(2) * (sin(x) + cos(x))
- cf. Fdlibm. */
- __sincosl (xx, &s, &c);
- ss = -s - c;
- cc = s - c;
- z = __cosl (xx + xx);
- if ((s * c) > 0)
- cc = z / ss;
- else
- ss = z / cc;
z = ONEOSQPI * (p * cc - q * ss) / __ieee754_sqrtl (xx);
if (x < 0)
z = -z;
@@ -850,6 +863,27 @@ __ieee754_y1l (long double x)
return p;
}
+ /* X = x - 3 pi/4
+ cos(X) = cos(x) cos(3 pi/4) + sin(x) sin(3 pi/4)
+ = 1/sqrt(2) * (-cos(x) + sin(x))
+ sin(X) = sin(x) cos(3 pi/4) - cos(x) sin(3 pi/4)
+ = -1/sqrt(2) * (sin(x) + cos(x))
+ cf. Fdlibm. */
+ __sincosl (xx, &s, &c);
+ ss = -s - c;
+ cc = s - c;
+ if (xx <= LDBL_MAX / 2.0L)
+ {
+ z = __cosl (xx + xx);
+ if ((s * c) > 0)
+ cc = z / ss;
+ else
+ ss = z / cc;
+ }
+
+ if (xx > 0x1p256L)
+ return ONEOSQPI * ss / __ieee754_sqrtl (xx);
+
xinv = 1.0L / xx;
z = xinv * xinv;
if (xinv <= 0.25)
@@ -911,20 +945,6 @@ __ieee754_y1l (long double x)
p = 1.0L + z * p;
q = z * q;
q = q * xinv + 0.375L * xinv;
- /* X = x - 3 pi/4
- cos(X) = cos(x) cos(3 pi/4) + sin(x) sin(3 pi/4)
- = 1/sqrt(2) * (-cos(x) + sin(x))
- sin(X) = sin(x) cos(3 pi/4) - cos(x) sin(3 pi/4)
- = -1/sqrt(2) * (sin(x) + cos(x))
- cf. Fdlibm. */
- __sincosl (xx, &s, &c);
- ss = -s - c;
- cc = s - c;
- z = __cosl (xx + xx);
- if ((s * c) > 0)
- cc = z / ss;
- else
- ss = z / cc;
z = ONEOSQPI * (p * ss + q * cc) / __ieee754_sqrtl (xx);
return z;
}
diff --git a/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c b/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c
index 117bd0f052..abc78a35bd 100644
--- a/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c
+++ b/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c
@@ -52,7 +52,7 @@ __ieee754_acoshl(long double x)
return __ieee754_logl(2.0*x-one/(x+__ieee754_sqrtl(t-one)));
} else { /* 1<x<2 */
t = x-one;
- return __log1p(t+__sqrtl(2.0*t+t*t));
+ return __log1p(t+__ieee754_sqrtl(2.0*t+t*t));
}
}
strong_alias (__ieee754_acoshl, __acoshl_finite)
diff --git a/sysdeps/ieee754/ldbl-128ibm/e_expl.c b/sysdeps/ieee754/ldbl-128ibm/e_expl.c
index 82363906b0..9fd61983e3 100644
--- a/sysdeps/ieee754/ldbl-128ibm/e_expl.c
+++ b/sysdeps/ieee754/ldbl-128ibm/e_expl.c
@@ -70,11 +70,11 @@
static const long double C[] = {
/* Smallest integer x for which e^x overflows. */
#define himark C[0]
- 709.08956571282405153382846025171462914L,
+ 709.78271289338399678773454114191496482L,
/* Largest integer x for which e^x underflows. */
#define lomark C[1]
--744.44007192138121808966388925909996033L,
+-744.44007192138126231410729844608163411L,
/* 3x2^96 */
#define THREEp96 C[2]
diff --git a/sysdeps/ieee754/ldbl-128ibm/e_logl.c b/sysdeps/ieee754/ldbl-128ibm/e_logl.c
index 14f47ebade..15b5edfab3 100644
--- a/sysdeps/ieee754/ldbl-128ibm/e_logl.c
+++ b/sysdeps/ieee754/ldbl-128ibm/e_logl.c
@@ -182,6 +182,9 @@ static const long double
ln2a = 6.93145751953125e-1L,
ln2b = 1.4286068203094172321214581765680755001344E-6L;
+static const long double
+ ldbl_epsilon = 0x1p-106L;
+
long double
__ieee754_logl(long double x)
{
@@ -258,7 +261,12 @@ __ieee754_logl(long double x)
}
/* Series expansion of log(1+z). */
w = z * z;
- y = ((((((((((((l15 * z
+ /* Avoid spurious underflows. */
+ if (__glibc_unlikely(w <= ldbl_epsilon))
+ y = 0.0L;
+ else
+ {
+ y = ((((((((((((l15 * z
+ l14) * z
+ l13) * z
+ l12) * z
@@ -271,7 +279,8 @@ __ieee754_logl(long double x)
+ l5) * z
+ l4) * z
+ l3) * z * w;
- y -= 0.5 * w;
+ y -= 0.5 * w;
+ }
y += e * ln2b; /* Base 2 exponent offset times ln(2). */
y += z;
y += logtbl[k-26]; /* log(t) - (t-1) */
diff --git a/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h b/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h
index be9ac71cb0..1cce1fc4dc 100644
--- a/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h
+++ b/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h
@@ -125,7 +125,7 @@ ldbl_insert_mantissa (int sign, int exp, int64_t hi64, u_int64_t lo64)
/* Handy utility functions to pack/unpack/cononicalize and find the nearbyint
of long double implemented as double double. */
static inline long double
-ldbl_pack (double a, double aa)
+default_ldbl_pack (double a, double aa)
{
union ibm_extended_long_double u;
u.dd[0] = a;
@@ -134,7 +134,7 @@ ldbl_pack (double a, double aa)
}
static inline void
-ldbl_unpack (long double l, double *a, double *aa)
+default_ldbl_unpack (long double l, double *a, double *aa)
{
union ibm_extended_long_double u;
u.d = l;
@@ -142,6 +142,12 @@ ldbl_unpack (long double l, double *a, double *aa)
*aa = u.dd[1];
}
+#ifndef ldbl_pack
+# define ldbl_pack default_ldbl_pack
+#endif
+#ifndef ldbl_unpack
+# define ldbl_unpack default_ldbl_unpack
+#endif
/* Convert a finite long double to canonical form.
Does not handle +/-Inf properly. */
diff --git a/sysdeps/ieee754/ldbl-128ibm/w_expl.c b/sysdeps/ieee754/ldbl-128ibm/w_expl.c
index a5e72b2170..70fe5f693e 100644
--- a/sysdeps/ieee754/ldbl-128ibm/w_expl.c
+++ b/sysdeps/ieee754/ldbl-128ibm/w_expl.c
@@ -1,6 +1,24 @@
-/* Looks like we can use ieee854 w_expl.c as is for IBM extended format. */
+#include <math.h>
+#include <math_private.h>
#include <math_ldbl_opt.h>
-#undef weak_alias
-#define weak_alias(n,a)
-#include <sysdeps/ieee754/ldbl-128/w_expl.c>
+
+static const long double o_thres = 709.78271289338399678773454114191496482L;
+static const long double u_thres = -744.44007192138126231410729844608163411L;
+
+long double __expl(long double x) /* wrapper exp */
+{
+ long double z;
+ z = __ieee754_expl(x);
+ if (_LIB_VERSION == _IEEE_)
+ return z;
+ if (__finitel(x))
+ {
+ if (x >= o_thres)
+ return __kernel_standard_l(x,x,206); /* exp overflow */
+ else if (x <= u_thres)
+ return __kernel_standard_l(x,x,207); /* exp underflow */
+ }
+ return z;
+}
+hidden_def (__expl)
long_double_symbol (libm, __expl, expl);
diff --git a/sysdeps/ieee754/ldbl-96/e_j1l.c b/sysdeps/ieee754/ldbl-96/e_j1l.c
index 785c0b0676..4c13018aea 100644
--- a/sysdeps/ieee754/ldbl-96/e_j1l.c
+++ b/sysdeps/ieee754/ldbl-96/e_j1l.c
@@ -203,7 +203,7 @@ __ieee754_y1l (long double x)
__sincosl (x, &s, &c);
ss = -s - c;
cc = s - c;
- if (ix < 0x7fe00000)
+ if (ix < 0x7ffe)
{ /* make sure x+x not overflow */
z = __cosl (x + x);
if ((s * c) > zero)