35 files changed, 1367 insertions, 893 deletions
diff --git a/sysdeps/ieee754/bits/nan.h b/sysdeps/ieee754/bits/nan.h
index d3ab38ba72..41f47ba097 100644
--- a/sysdeps/ieee754/bits/nan.h
+++ b/sysdeps/ieee754/bits/nan.h
@@ -39,14 +39,14 @@
 # include <endian.h>
 
 # if __BYTE_ORDER == __BIG_ENDIAN
-#  define __nan_bytes		{ 0x7f, 0xc0, 0, 0 }
+#  define __qnan_bytes		{ 0x7f, 0xc0, 0, 0 }
 # endif
 # if __BYTE_ORDER == __LITTLE_ENDIAN
-#  define __nan_bytes		{ 0, 0, 0xc0, 0x7f }
+#  define __qnan_bytes		{ 0, 0, 0xc0, 0x7f }
 # endif
 
-static union { unsigned char __c[4]; float __d; } __nan_union
-    __attribute_used__ = { __nan_bytes };
-# define NAN	(__nan_union.__d)
+static union { unsigned char __c[4]; float __d; } __qnan_union
+  __attribute__ ((__unused__)) = { __qnan_bytes };
+# define NAN	(__qnan_union.__d)
 
 #endif	/* GCC.  */
diff --git a/sysdeps/ieee754/dbl-64/branred.c b/sysdeps/ieee754/dbl-64/branred.c
index ecceeca634..524d091dc3 100644
--- a/sysdeps/ieee754/dbl-64/branred.c
+++ b/sysdeps/ieee754/dbl-64/branred.c
@@ -53,13 +53,7 @@ SECTION
 __branred(double x, double *a, double *aa)
 {
   int i,k;
-#if 0
-  int n;
-#endif
   mynumber  u,gor;
-#if 0
-  mynumber v;
-#endif
   double r[6],s,t,sum,b,bb,sum1,sum2,b1,bb1,b2,bb2,x1,x2,t1,t2;
 
   x*=tm600.x;
diff --git a/sysdeps/ieee754/dbl-64/dosincos.c b/sysdeps/ieee754/dbl-64/dosincos.c
index bbef186ad1..00726285a9 100644
--- a/sysdeps/ieee754/dbl-64/dosincos.c
+++ b/sysdeps/ieee754/dbl-64/dosincos.c
@@ -63,9 +63,6 @@ __dubsin(double x, double dx, double v[]) {
 #ifndef DLA_FMS
   double p,hx,tx,hy,ty,q;
 #endif
-#if 0
-  double xx,y,yy,z,zz;
-#endif
   mynumber u;
   int4 k;
 
@@ -119,9 +116,6 @@ __dubcos(double x, double dx, double v[]) {
 #ifndef DLA_FMS
   double p,hx,tx,hy,ty,q;
 #endif
-#if 0
-  double xx,y,yy,z,zz;
-#endif
   mynumber u;
   int4 k;
   u.x=x+big.x;
diff --git a/sysdeps/ieee754/dbl-64/e_asin.c b/sysdeps/ieee754/dbl-64/e_asin.c
index 5585ad1d0d..27cd2a10d3 100644
--- a/sysdeps/ieee754/dbl-64/e_asin.c
+++ b/sysdeps/ieee754/dbl-64/e_asin.c
@@ -62,9 +62,6 @@ __ieee754_asin(double x){
   double x1,x2,xx,s1,s2,res1,p,t,res,r,cor,cc,y,c,z,w[2];
   mynumber u,v;
   int4 k,m,n;
-#if 0
-  int4 nn;
-#endif
 
   u.x = x;
   m = u.i[HIGH_HALF];
@@ -344,14 +341,8 @@ SECTION
 __ieee754_acos(double x)
 {
   double x1,x2,xx,s1,s2,res1,p,t,res,r,cor,cc,y,c,z,w[2],eps;
-#if 0
-  double fc;
-#endif
   mynumber u,v;
   int4 k,m,n;
-#if 0
-  int4 nn;
-#endif
   u.x = x;
   m = u.i[HIGH_HALF];
   k = 0x7fffffff&m;
diff --git a/sysdeps/ieee754/dbl-64/e_atan2.c b/sysdeps/ieee754/dbl-64/e_atan2.c
index ee3215ed3f..bfe0b3b632 100644
--- a/sysdeps/ieee754/dbl-64/e_atan2.c
+++ b/sysdeps/ieee754/dbl-64/e_atan2.c
@@ -53,105 +53,171 @@
 /* Assumption: Machine arithmetic operations are performed in           */
 /* round to nearest mode of IEEE 754 standard.                          */
 /************************************************************************/
-static double atan2Mp(double ,double ,const int[]);
+static double atan2Mp (double, double, const int[]);
   /* Fix the sign and return after stage 1 or stage 2 */
-static double signArctan2(double y,double z)
+static double
+signArctan2 (double y, double z)
 {
-  return __copysign(z, y);
+  return __copysign (z, y);
 }
-static double normalized(double ,double,double ,double);
-void __mpatan2(mp_no *,mp_no *,mp_no *,int);
+
+static double normalized (double, double, double, double);
+void __mpatan2 (mp_no *, mp_no *, mp_no *, int);
 
 double
 SECTION
-__ieee754_atan2(double y,double x) {
-
-  int i,de,ux,dx,uy,dy;
-#if 0
-  int p;
-#endif
-  static const int pr[MM]={6,8,10,20,32};
-  double ax,ay,u,du,u9,ua,v,vv,dv,t1,t2,t3,t7,t8,
-	 z,zz,cor,s1,ss1,s2,ss2;
+__ieee754_atan2 (double y, double x)
+{
+  int i, de, ux, dx, uy, dy;
+  static const int pr[MM] = { 6, 8, 10, 20, 32 };
+  double ax, ay, u, du, u9, ua, v, vv, dv, t1, t2, t3, t7, t8,
+    z, zz, cor, s1, ss1, s2, ss2;
 #ifndef DLA_FMS
-  double t4,t5,t6;
-#endif
-#if 0
-  double z1,z2;
+  double t4, t5, t6;
 #endif
   number num;
-#if 0
-  mp_no mperr,mpt1,mpx,mpy,mpz,mpz1,mpz2;
-#endif
 
-  static const int ep= 59768832,   /*  57*16**5   */
-		   em=-59768832;   /* -57*16**5   */
+  static const int ep = 59768832,	/*  57*16**5   */
+    em = -59768832;		/* -57*16**5   */
 
   /* x=NaN or y=NaN */
-  num.d = x;  ux = num.i[HIGH_HALF];  dx = num.i[LOW_HALF];
-  if   ((ux&0x7ff00000)    ==0x7ff00000) {
-    if (((ux&0x000fffff)|dx)!=0x00000000) return x+x; }
-  num.d = y;  uy = num.i[HIGH_HALF];  dy = num.i[LOW_HALF];
-  if   ((uy&0x7ff00000)    ==0x7ff00000) {
-    if (((uy&0x000fffff)|dy)!=0x00000000) return y+y; }
+  num.d = x;
+  ux = num.i[HIGH_HALF];
+  dx = num.i[LOW_HALF];
+  if ((ux & 0x7ff00000) == 0x7ff00000)
+    {
+      if (((ux & 0x000fffff) | dx) != 0x00000000)
+	return x + x;
+    }
+  num.d = y;
+  uy = num.i[HIGH_HALF];
+  dy = num.i[LOW_HALF];
+  if ((uy & 0x7ff00000) == 0x7ff00000)
+    {
+      if (((uy & 0x000fffff) | dy) != 0x00000000)
+	return y + y;
+    }
 
   /* y=+-0 */
-  if      (uy==0x00000000) {
-    if    (dy==0x00000000) {
-      if  ((ux&0x80000000)==0x00000000)  return ZERO;
-      else                               return opi.d; } }
-  else if (uy==0x80000000) {
-    if    (dy==0x00000000) {
-      if  ((ux&0x80000000)==0x00000000)  return MZERO;
-      else                               return mopi.d;} }
+  if (uy == 0x00000000)
+    {
+      if (dy == 0x00000000)
+	{
+	  if ((ux & 0x80000000) == 0x00000000)
+	    return ZERO;
+	  else
+	    return opi.d;
+	}
+    }
+  else if (uy == 0x80000000)
+    {
+      if (dy == 0x00000000)
+	{
+	  if ((ux & 0x80000000) == 0x00000000)
+	    return MZERO;
+	  else
+	    return mopi.d;
+	}
+    }
 
   /* x=+-0 */
-  if (x==ZERO) {
-    if ((uy&0x80000000)==0x00000000)     return hpi.d;
-    else                                 return mhpi.d; }
+  if (x == ZERO)
+    {
+      if ((uy & 0x80000000) == 0x00000000)
+	return hpi.d;
+      else
+	return mhpi.d;
+    }
 
   /* x=+-INF */
-  if          (ux==0x7ff00000) {
-    if        (dx==0x00000000) {
-      if      (uy==0x7ff00000) {
-	if    (dy==0x00000000)  return qpi.d; }
-      else if (uy==0xfff00000) {
-	if    (dy==0x00000000)  return mqpi.d; }
-      else {
-	if    ((uy&0x80000000)==0x00000000)  return ZERO;
-	else                                 return MZERO; }
+  if (ux == 0x7ff00000)
+    {
+      if (dx == 0x00000000)
+	{
+	  if (uy == 0x7ff00000)
+	    {
+	      if (dy == 0x00000000)
+		return qpi.d;
+	    }
+	  else if (uy == 0xfff00000)
+	    {
+	      if (dy == 0x00000000)
+		return mqpi.d;
+	    }
+	  else
+	    {
+	      if ((uy & 0x80000000) == 0x00000000)
+		return ZERO;
+	      else
+		return MZERO;
+	    }
+	}
     }
-  }
-  else if     (ux==0xfff00000) {
-    if        (dx==0x00000000) {
-      if      (uy==0x7ff00000) {
-	if    (dy==0x00000000)  return tqpi.d; }
-      else if (uy==0xfff00000) {
-	if    (dy==0x00000000)  return mtqpi.d; }
-      else                     {
-	if    ((uy&0x80000000)==0x00000000)  return opi.d;
-	else                                 return mopi.d; }
+  else if (ux == 0xfff00000)
+    {
+      if (dx == 0x00000000)
+	{
+	  if (uy == 0x7ff00000)
+	    {
+	      if (dy == 0x00000000)
+		return tqpi.d;
+	    }
+	  else if (uy == 0xfff00000)
+	    {
+	      if (dy == 0x00000000)
+		return mtqpi.d;
+	    }
+	  else
+	    {
+	      if ((uy & 0x80000000) == 0x00000000)
+		return opi.d;
+	      else
+		return mopi.d;
+	    }
+	}
     }
-  }
 
   /* y=+-INF */
-  if      (uy==0x7ff00000) {
-    if    (dy==0x00000000)  return hpi.d; }
-  else if (uy==0xfff00000) {
-    if    (dy==0x00000000)  return mhpi.d; }
+  if (uy == 0x7ff00000)
+    {
+      if (dy == 0x00000000)
+	return hpi.d;
+    }
+  else if (uy == 0xfff00000)
+    {
+      if (dy == 0x00000000)
+	return mhpi.d;
+    }
 
   /* either x/y or y/x is very close to zero */
-  ax = (x<ZERO) ? -x : x;    ay = (y<ZERO) ? -y : y;
+  ax = (x < ZERO) ? -x : x;
+  ay = (y < ZERO) ? -y : y;
   de = (uy & 0x7ff00000) - (ux & 0x7ff00000);
-  if      (de>=ep)  { return ((y>ZERO) ? hpi.d : mhpi.d); }
-  else if (de<=em)  {
-    if    (x>ZERO)  {
-      if  ((z=ay/ax)<TWOM1022)  return normalized(ax,ay,y,z);
-      else                      return signArctan2(y,z); }
-    else            { return ((y>ZERO) ? opi.d : mopi.d); } }
+  if (de >= ep)
+    {
+      return ((y > ZERO) ? hpi.d : mhpi.d);
+    }
+  else if (de <= em)
+    {
+      if (x > ZERO)
+	{
+	  if ((z = ay / ax) < TWOM1022)
+	    return normalized (ax, ay, y, z);
+	  else
+	    return signArctan2 (y, z);
+	}
+      else
+	{
+	  return ((y > ZERO) ? opi.d : mopi.d);
+	}
+    }
 
   /* if either x or y is extremely close to zero, scale abs(x), abs(y). */
-  if (ax<twom500.d || ay<twom500.d) { ax*=two500.d;  ay*=two500.d; }
+  if (ax < twom500.d || ay < twom500.d)
+    {
+      ax *= two500.d;
+      ay *= two500.d;
+    }
 
   /* Likewise for large x and y.  */
   if (ax > two500.d || ay > two500.d)
@@ -161,268 +227,377 @@ __ieee754_atan2(double y,double x) {
     }
 
   /* x,y which are neither special nor extreme */
-  if (ay<ax) {
-    u=ay/ax;
-    EMULV(ax,u,v,vv,t1,t2,t3,t4,t5)
-    du=((ay-v)-vv)/ax; }
-  else {
-    u=ax/ay;
-    EMULV(ay,u,v,vv,t1,t2,t3,t4,t5)
-    du=((ax-v)-vv)/ay; }
-
-  if (x>ZERO) {
-
-    /* (i)   x>0, abs(y)< abs(x):  atan(ay/ax) */
-    if (ay<ax) {
-      if (u<inv16.d) {
-	v=u*u;  zz=du+u*v*(d3.d+v*(d5.d+v*(d7.d+v*(d9.d+v*(d11.d+v*d13.d)))));
-	if ((z=u+(zz-u1.d*u)) == u+(zz+u1.d*u))  return signArctan2(y,z);
-
-	MUL2(u,du,u,du,v,vv,t1,t2,t3,t4,t5,t6,t7,t8)
-	s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d))));
-	ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(f5.d,ff5.d,s1,ss1,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(f3.d,ff3.d,s1,ss1,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	MUL2(u,du,s1,ss1,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(u,du,s2,ss2,s1,ss1,t1,t2)
-	if ((z=s1+(ss1-u5.d*s1)) == s1+(ss1+u5.d*s1))  return signArctan2(y,z);
-	return atan2Mp(x,y,pr);
-      }
-      else {
-	i=(TWO52+TWO8*u)-TWO52;  i-=16;
-	t3=u-cij[i][0].d;
-	EADD(t3,du,v,dv)
-	t1=cij[i][1].d;  t2=cij[i][2].d;
-	zz=v*t2+(dv*t2+v*v*(cij[i][3].d+v*(cij[i][4].d+
-			 v*(cij[i][5].d+v* cij[i][6].d))));
-	if (i<112) {
-	  if (i<48)  u9=u91.d;    /* u < 1/4        */
-	  else       u9=u92.d; }  /* 1/4 <= u < 1/2 */
-	else {
-	  if (i<176) u9=u93.d;    /* 1/2 <= u < 3/4 */
-	  else       u9=u94.d; }  /* 3/4 <= u <= 1  */
-	if ((z=t1+(zz-u9*t1)) == t1+(zz+u9*t1))  return signArctan2(y,z);
-
-	t1=u-hij[i][0].d;
-	EADD(t1,du,v,vv)
-	s1=v*(hij[i][11].d+v*(hij[i][12].d+v*(hij[i][13].d+
-	   v*(hij[i][14].d+v* hij[i][15].d))));
-	ADD2(hij[i][9].d,hij[i][10].d,s1,ZERO,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(hij[i][7].d,hij[i][8].d,s1,ss1,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(hij[i][5].d,hij[i][6].d,s1,ss1,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(hij[i][3].d,hij[i][4].d,s1,ss1,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(hij[i][1].d,hij[i][2].d,s1,ss1,s2,ss2,t1,t2)
-	if ((z=s2+(ss2-ub.d*s2)) == s2+(ss2+ub.d*s2))  return signArctan2(y,z);
-	return atan2Mp(x,y,pr);
-      }
+  if (ay < ax)
+    {
+      u = ay / ax;
+      EMULV (ax, u, v, vv, t1, t2, t3, t4, t5);
+      du = ((ay - v) - vv) / ax;
+    }
+  else
+    {
+      u = ax / ay;
+      EMULV (ay, u, v, vv, t1, t2, t3, t4, t5);
+      du = ((ax - v) - vv) / ay;
     }
 
-    /* (ii)  x>0, abs(x)<=abs(y):  pi/2-atan(ax/ay) */
-    else {
-      if (u<inv16.d) {
-	v=u*u;
-	zz=u*v*(d3.d+v*(d5.d+v*(d7.d+v*(d9.d+v*(d11.d+v*d13.d)))));
-	ESUB(hpi.d,u,t2,cor)
-	t3=((hpi1.d+cor)-du)-zz;
-	if ((z=t2+(t3-u2.d)) == t2+(t3+u2.d))  return signArctan2(y,z);
-
-	MUL2(u,du,u,du,v,vv,t1,t2,t3,t4,t5,t6,t7,t8)
-	s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d))));
-	ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(f5.d,ff5.d,s1,ss1,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(f3.d,ff3.d,s1,ss1,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	MUL2(u,du,s1,ss1,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(u,du,s2,ss2,s1,ss1,t1,t2)
-	SUB2(hpi.d,hpi1.d,s1,ss1,s2,ss2,t1,t2)
-	if ((z=s2+(ss2-u6.d)) == s2+(ss2+u6.d))  return signArctan2(y,z);
-	return atan2Mp(x,y,pr);
-      }
-      else {
-	i=(TWO52+TWO8*u)-TWO52;  i-=16;
-	v=(u-cij[i][0].d)+du;
-	zz=hpi1.d-v*(cij[i][2].d+v*(cij[i][3].d+v*(cij[i][4].d+
-				 v*(cij[i][5].d+v* cij[i][6].d))));
-	t1=hpi.d-cij[i][1].d;
-	if (i<112)  ua=ua1.d;  /* w <  1/2 */
-	else        ua=ua2.d;  /* w >= 1/2 */
-	if ((z=t1+(zz-ua)) == t1+(zz+ua))  return signArctan2(y,z);
-
-	t1=u-hij[i][0].d;
-	EADD(t1,du,v,vv)
-	s1=v*(hij[i][11].d+v*(hij[i][12].d+v*(hij[i][13].d+
-	   v*(hij[i][14].d+v* hij[i][15].d))));
-	ADD2(hij[i][9].d,hij[i][10].d,s1,ZERO,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(hij[i][7].d,hij[i][8].d,s1,ss1,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(hij[i][5].d,hij[i][6].d,s1,ss1,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(hij[i][3].d,hij[i][4].d,s1,ss1,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(hij[i][1].d,hij[i][2].d,s1,ss1,s2,ss2,t1,t2)
-	SUB2(hpi.d,hpi1.d,s2,ss2,s1,ss1,t1,t2)
-	if ((z=s1+(ss1-uc.d)) == s1+(ss1+uc.d))  return signArctan2(y,z);
-	return atan2Mp(x,y,pr);
-      }
+  if (x > ZERO)
+    {
+      /* (i)   x>0, abs(y)< abs(x):  atan(ay/ax) */
+      if (ay < ax)
+	{
+	  if (u < inv16.d)
+	    {
+	      v = u * u;
+
+	      zz = du + u * v * (d3.d
+				 + v * (d5.d
+					+ v * (d7.d
+					       + v * (d9.d
+						      + v * (d11.d
+							     + v * d13.d)))));
+
+	      if ((z = u + (zz - u1.d * u)) == u + (zz + u1.d * u))
+		return signArctan2 (y, z);
+
+	      MUL2 (u, du, u, du, v, vv, t1, t2, t3, t4, t5, t6, t7, t8);
+	      s1 = v * (f11.d + v * (f13.d
+				     + v * (f15.d + v * (f17.d + v * f19.d))));
+	      ADD2 (f9.d, ff9.d, s1, ZERO, s2, ss2, t1, t2);
+	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+	      ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2);
+	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+	      ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2);
+	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+	      ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2);
+	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+	      MUL2 (u, du, s1, ss1, s2, ss2, t1, t2, t3, t4, t5, t6, t7, t8);
+	      ADD2 (u, du, s2, ss2, s1, ss1, t1, t2);
+
+	      if ((z = s1 + (ss1 - u5.d * s1)) == s1 + (ss1 + u5.d * s1))
+		return signArctan2 (y, z);
+
+	      return atan2Mp (x, y, pr);
+	    }
+
+	  i = (TWO52 + TWO8 * u) - TWO52;
+	  i -= 16;
+	  t3 = u - cij[i][0].d;
+	  EADD (t3, du, v, dv);
+	  t1 = cij[i][1].d;
+	  t2 = cij[i][2].d;
+	  zz = v * t2 + (dv * t2
+			 + v * v * (cij[i][3].d
+				    + v * (cij[i][4].d
+					   + v * (cij[i][5].d
+						  + v * cij[i][6].d))));
+	  if (i < 112)
+	    {
+	      if (i < 48)
+		u9 = u91.d;	/* u < 1/4	*/
+	      else
+		u9 = u92.d;
+	    }		/* 1/4 <= u < 1/2 */
+	  else
+	    {
+	      if (i < 176)
+		u9 = u93.d;	/* 1/2 <= u < 3/4 */
+	      else
+		u9 = u94.d;
+	    }		/* 3/4 <= u <= 1  */
+	  if ((z = t1 + (zz - u9 * t1)) == t1 + (zz + u9 * t1))
+	    return signArctan2 (y, z);
+
+	  t1 = u - hij[i][0].d;
+	  EADD (t1, du, v, vv);
+	  s1 = v * (hij[i][11].d
+		    + v * (hij[i][12].d
+			   +  v * (hij[i][13].d
+				   + v * (hij[i][14].d
+					  + v * hij[i][15].d))));
+	  ADD2 (hij[i][9].d, hij[i][10].d, s1, ZERO, s2, ss2, t1, t2);
+	  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+	  ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2);
+	  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+	  ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2);
+	  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+	  ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2);
+	  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+	  ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2);
+
+	  if ((z = s2 + (ss2 - ub.d * s2)) == s2 + (ss2 + ub.d * s2))
+	    return signArctan2 (y, z);
+	  return atan2Mp (x, y, pr);
+	}
+
+      /* (ii)  x>0, abs(x)<=abs(y):  pi/2-atan(ax/ay) */
+      if (u < inv16.d)
+	{
+	  v = u * u;
+	  zz = u * v * (d3.d
+			+ v * (d5.d
+			       + v * (d7.d
+				      + v * (d9.d
+					     + v * (d11.d
+						    + v * d13.d)))));
+	  ESUB (hpi.d, u, t2, cor);
+	  t3 = ((hpi1.d + cor) - du) - zz;
+	  if ((z = t2 + (t3 - u2.d)) == t2 + (t3 + u2.d))
+	    return signArctan2 (y, z);
+
+	  MUL2 (u, du, u, du, v, vv, t1, t2, t3, t4, t5, t6, t7, t8);
+	  s1 = v * (f11.d
+		    + v * (f13.d
+			   + v * (f15.d + v * (f17.d + v * f19.d))));
+	  ADD2 (f9.d, ff9.d, s1, ZERO, s2, ss2, t1, t2);
+	  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+	  ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2);
+	  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+	  ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2);
+	  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+	  ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2);
+	  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+	  MUL2 (u, du, s1, ss1, s2, ss2, t1, t2, t3, t4, t5, t6, t7, t8);
+	  ADD2 (u, du, s2, ss2, s1, ss1, t1, t2);
+	  SUB2 (hpi.d, hpi1.d, s1, ss1, s2, ss2, t1, t2);
+
+	  if ((z = s2 + (ss2 - u6.d)) == s2 + (ss2 + u6.d))
+	    return signArctan2 (y, z);
+	  return atan2Mp (x, y, pr);
+	}
+
+      i = (TWO52 + TWO8 * u) - TWO52;
+      i -= 16;
+      v = (u - cij[i][0].d) + du;
+
+      zz = hpi1.d - v * (cij[i][2].d
+			 + v * (cij[i][3].d
+				+ v * (cij[i][4].d
+				       + v * (cij[i][5].d
+					      + v * cij[i][6].d))));
+      t1 = hpi.d - cij[i][1].d;
+      if (i < 112)
+	ua = ua1.d;	/* w <  1/2 */
+      else
+	ua = ua2.d;	/* w >= 1/2 */
+      if ((z = t1 + (zz - ua)) == t1 + (zz + ua))
+	return signArctan2 (y, z);
+
+      t1 = u - hij[i][0].d;
+      EADD (t1, du, v, vv);
+
+      s1 = v * (hij[i][11].d
+		+ v * (hij[i][12].d
+		       + v * (hij[i][13].d
+			      + v * (hij[i][14].d
+				     + v * hij[i][15].d))));
+
+      ADD2 (hij[i][9].d, hij[i][10].d, s1, ZERO, s2, ss2, t1, t2);
+      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+      ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2);
+      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+      ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2);
+      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+      ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2);
+      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+      ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2);
+      SUB2 (hpi.d, hpi1.d, s2, ss2, s1, ss1, t1, t2);
+
+      if ((z = s1 + (ss1 - uc.d)) == s1 + (ss1 + uc.d))
+	return signArctan2 (y, z);
+      return atan2Mp (x, y, pr);
     }
-  }
-  else {
-
-    /* (iii) x<0, abs(x)< abs(y):  pi/2+atan(ax/ay) */
-    if (ax<ay) {
-      if (u<inv16.d) {
-	v=u*u;
-	zz=u*v*(d3.d+v*(d5.d+v*(d7.d+v*(d9.d+v*(d11.d+v*d13.d)))));
-	EADD(hpi.d,u,t2,cor)
-	t3=((hpi1.d+cor)+du)+zz;
-	if ((z=t2+(t3-u3.d)) == t2+(t3+u3.d))  return signArctan2(y,z);
-
-	MUL2(u,du,u,du,v,vv,t1,t2,t3,t4,t5,t6,t7,t8)
-	s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d))));
-	ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(f5.d,ff5.d,s1,ss1,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(f3.d,ff3.d,s1,ss1,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	MUL2(u,du,s1,ss1,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(u,du,s2,ss2,s1,ss1,t1,t2)
-	ADD2(hpi.d,hpi1.d,s1,ss1,s2,ss2,t1,t2)
-	if ((z=s2+(ss2-u7.d)) == s2+(ss2+u7.d))  return signArctan2(y,z);
-	return atan2Mp(x,y,pr);
-      }
-      else {
-	i=(TWO52+TWO8*u)-TWO52;  i-=16;
-	v=(u-cij[i][0].d)+du;
-	zz=hpi1.d+v*(cij[i][2].d+v*(cij[i][3].d+v*(cij[i][4].d+
-				 v*(cij[i][5].d+v* cij[i][6].d))));
-	t1=hpi.d+cij[i][1].d;
-	if (i<112)  ua=ua1.d;  /* w <  1/2 */
-	else        ua=ua2.d;  /* w >= 1/2 */
-	if ((z=t1+(zz-ua)) == t1+(zz+ua))  return signArctan2(y,z);
-
-	t1=u-hij[i][0].d;
-	EADD(t1,du,v,vv)
-	s1=v*(hij[i][11].d+v*(hij[i][12].d+v*(hij[i][13].d+
-	   v*(hij[i][14].d+v* hij[i][15].d))));
-	ADD2(hij[i][9].d,hij[i][10].d,s1,ZERO,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(hij[i][7].d,hij[i][8].d,s1,ss1,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(hij[i][5].d,hij[i][6].d,s1,ss1,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(hij[i][3].d,hij[i][4].d,s1,ss1,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(hij[i][1].d,hij[i][2].d,s1,ss1,s2,ss2,t1,t2)
-	ADD2(hpi.d,hpi1.d,s2,ss2,s1,ss1,t1,t2)
-	if ((z=s1+(ss1-uc.d)) == s1+(ss1+uc.d))  return signArctan2(y,z);
-	return atan2Mp(x,y,pr);
-      }
+
+  /* (iii) x<0, abs(x)< abs(y):  pi/2+atan(ax/ay) */
+  if (ax < ay)
+    {
+      if (u < inv16.d)
+	{
+	  v = u * u;
+	  zz = u * v * (d3.d
+			+ v * (d5.d
+			       + v * (d7.d
+				      + v * (d9.d
+					     + v * (d11.d + v * d13.d)))));
+	  EADD (hpi.d, u, t2, cor);
+	  t3 = ((hpi1.d + cor) + du) + zz;
+	  if ((z = t2 + (t3 - u3.d)) == t2 + (t3 + u3.d))
+	    return signArctan2 (y, z);
+
+	  MUL2 (u, du, u, du, v, vv, t1, t2, t3, t4, t5, t6, t7, t8);
+	  s1 = v * (f11.d
+		    + v * (f13.d + v * (f15.d + v * (f17.d + v * f19.d))));
+	  ADD2 (f9.d, ff9.d, s1, ZERO, s2, ss2, t1, t2);
+	  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+	  ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2);
+	  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+	  ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2);
+	  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+	  ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2);
+	  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+	  MUL2 (u, du, s1, ss1, s2, ss2, t1, t2, t3, t4, t5, t6, t7, t8);
+	  ADD2 (u, du, s2, ss2, s1, ss1, t1, t2);
+	  ADD2 (hpi.d, hpi1.d, s1, ss1, s2, ss2, t1, t2);
+
+	  if ((z = s2 + (ss2 - u7.d)) == s2 + (ss2 + u7.d))
+	    return signArctan2 (y, z);
+	  return atan2Mp (x, y, pr);
+	}
+
+      i = (TWO52 + TWO8 * u) - TWO52;
+      i -= 16;
+      v = (u - cij[i][0].d) + du;
+      zz = hpi1.d + v * (cij[i][2].d
+			 + v * (cij[i][3].d
+				+ v * (cij[i][4].d
+				       + v * (cij[i][5].d
+					      + v * cij[i][6].d))));
+      t1 = hpi.d + cij[i][1].d;
+      if (i < 112)
+	ua = ua1.d;	/* w <  1/2 */
+      else
+	ua = ua2.d;	/* w >= 1/2 */
+      if ((z = t1 + (zz - ua)) == t1 + (zz + ua))
+	return signArctan2 (y, z);
+
+      t1 = u - hij[i][0].d;
+      EADD (t1, du, v, vv);
+      s1 = v * (hij[i][11].d
+		+ v * (hij[i][12].d
+		       + v * (hij[i][13].d
+			      + v * (hij[i][14].d
+				     + v * hij[i][15].d))));
+      ADD2 (hij[i][9].d, hij[i][10].d, s1, ZERO, s2, ss2, t1, t2);
+      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+      ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2);
+      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+      ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2);
+      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+      ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2);
+      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+      ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2);
+      ADD2 (hpi.d, hpi1.d, s2, ss2, s1, ss1, t1, t2);
+
+      if ((z = s1 + (ss1 - uc.d)) == s1 + (ss1 + uc.d))
+	return signArctan2 (y, z);
+      return atan2Mp (x, y, pr);
     }
 
-    /* (iv)  x<0, abs(y)<=abs(x):  pi-atan(ax/ay) */
-    else {
-      if (u<inv16.d) {
-	v=u*u;
-	zz=u*v*(d3.d+v*(d5.d+v*(d7.d+v*(d9.d+v*(d11.d+v*d13.d)))));
-	ESUB(opi.d,u,t2,cor)
-	t3=((opi1.d+cor)-du)-zz;
-	if ((z=t2+(t3-u4.d)) == t2+(t3+u4.d))  return signArctan2(y,z);
-
-	MUL2(u,du,u,du,v,vv,t1,t2,t3,t4,t5,t6,t7,t8)
-	s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d))));
-	ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(f5.d,ff5.d,s1,ss1,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(f3.d,ff3.d,s1,ss1,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	MUL2(u,du,s1,ss1,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(u,du,s2,ss2,s1,ss1,t1,t2)
-	SUB2(opi.d,opi1.d,s1,ss1,s2,ss2,t1,t2)
-	if ((z=s2+(ss2-u8.d)) == s2+(ss2+u8.d))  return signArctan2(y,z);
-	return atan2Mp(x,y,pr);
-      }
-      else {
-	i=(TWO52+TWO8*u)-TWO52;  i-=16;
-	v=(u-cij[i][0].d)+du;
-	zz=opi1.d-v*(cij[i][2].d+v*(cij[i][3].d+v*(cij[i][4].d+
-				 v*(cij[i][5].d+v* cij[i][6].d))));
-	t1=opi.d-cij[i][1].d;
-	if (i<112)  ua=ua1.d;  /* w <  1/2 */
-	else        ua=ua2.d;  /* w >= 1/2 */
-	if ((z=t1+(zz-ua)) == t1+(zz+ua))  return signArctan2(y,z);
-
-	t1=u-hij[i][0].d;
-	EADD(t1,du,v,vv)
-	s1=v*(hij[i][11].d+v*(hij[i][12].d+v*(hij[i][13].d+
-	   v*(hij[i][14].d+v* hij[i][15].d))));
-	ADD2(hij[i][9].d,hij[i][10].d,s1,ZERO,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(hij[i][7].d,hij[i][8].d,s1,ss1,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(hij[i][5].d,hij[i][6].d,s1,ss1,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(hij[i][3].d,hij[i][4].d,s1,ss1,s2,ss2,t1,t2)
-	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-	ADD2(hij[i][1].d,hij[i][2].d,s1,ss1,s2,ss2,t1,t2)
-	SUB2(opi.d,opi1.d,s2,ss2,s1,ss1,t1,t2)
-	if ((z=s1+(ss1-uc.d)) == s1+(ss1+uc.d))  return signArctan2(y,z);
-	return atan2Mp(x,y,pr);
-      }
+  /* (iv)  x<0, abs(y)<=abs(x):  pi-atan(ax/ay) */
+  if (u < inv16.d)
+    {
+      v = u * u;
+      zz = u * v * (d3.d
+		    + v * (d5.d
+			   + v * (d7.d
+				  + v * (d9.d + v * (d11.d + v * d13.d)))));
+      ESUB (opi.d, u, t2, cor);
+      t3 = ((opi1.d + cor) - du) - zz;
+      if ((z = t2 + (t3 - u4.d)) == t2 + (t3 + u4.d))
+	return signArctan2 (y, z);
+
+      MUL2 (u, du, u, du, v, vv, t1, t2, t3, t4, t5, t6, t7, t8);
+      s1 = v * (f11.d + v * (f13.d + v * (f15.d + v * (f17.d + v * f19.d))));
+      ADD2 (f9.d, ff9.d, s1, ZERO, s2, ss2, t1, t2);
+      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+      ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2);
+      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+      ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2);
+      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+      ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2);
+      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+      MUL2 (u, du, s1, ss1, s2, ss2, t1, t2, t3, t4, t5, t6, t7, t8);
+      ADD2 (u, du, s2, ss2, s1, ss1, t1, t2);
+      SUB2 (opi.d, opi1.d, s1, ss1, s2, ss2, t1, t2);
+
+      if ((z = s2 + (ss2 - u8.d)) == s2 + (ss2 + u8.d))
+	return signArctan2 (y, z);
+      return atan2Mp (x, y, pr);
     }
-  }
+
+  i = (TWO52 + TWO8 * u) - TWO52;
+  i -= 16;
+  v = (u - cij[i][0].d) + du;
+  zz = opi1.d - v * (cij[i][2].d
+		     + v * (cij[i][3].d
+			    + v * (cij[i][4].d
+				   + v * (cij[i][5].d + v * cij[i][6].d))));
+  t1 = opi.d - cij[i][1].d;
+  if (i < 112)
+    ua = ua1.d;	/* w <  1/2 */
+  else
+    ua = ua2.d;	/* w >= 1/2 */
+  if ((z = t1 + (zz - ua)) == t1 + (zz + ua))
+    return signArctan2 (y, z);
+
+  t1 = u - hij[i][0].d;
+
+  EADD (t1, du, v, vv);
+
+  s1 = v * (hij[i][11].d
+	    + v * (hij[i][12].d
+		   + v * (hij[i][13].d
+			  + v * (hij[i][14].d + v * hij[i][15].d))));
+
+  ADD2 (hij[i][9].d, hij[i][10].d, s1, ZERO, s2, ss2, t1, t2);
+  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+  ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2);
+  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+  ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2);
+  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+  ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2);
+  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
+  ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2);
+  SUB2 (opi.d, opi1.d, s2, ss2, s1, ss1, t1, t2);
+
+  if ((z = s1 + (ss1 - uc.d)) == s1 + (ss1 + uc.d))
+    return signArctan2 (y, z);
+  return atan2Mp (x, y, pr);
 }
+
 #ifndef __ieee754_atan2
 strong_alias (__ieee754_atan2, __atan2_finite)
 #endif
 
-  /* Treat the Denormalized case */
+/* Treat the Denormalized case */
 static double
 SECTION
-normalized(double ax,double ay,double y, double z)
-    { int p;
-      mp_no mpx,mpy,mpz,mperr,mpz2,mpt1;
-  p=6;
-  __dbl_mp(ax,&mpx,p);  __dbl_mp(ay,&mpy,p);  __dvd(&mpy,&mpx,&mpz,p);
-  __dbl_mp(ue.d,&mpt1,p);   __mul(&mpz,&mpt1,&mperr,p);
-  __sub(&mpz,&mperr,&mpz2,p);  __mp_dbl(&mpz2,&z,p);
-  return signArctan2(y,z);
+normalized (double ax, double ay, double y, double z)
+{
+  int p;
+  mp_no mpx, mpy, mpz, mperr, mpz2, mpt1;
+  p = 6;
+  __dbl_mp (ax, &mpx, p);
+  __dbl_mp (ay, &mpy, p);
+  __dvd (&mpy, &mpx, &mpz, p);
+  __dbl_mp (ue.d, &mpt1, p);
+  __mul (&mpz, &mpt1, &mperr, p);
+  __sub (&mpz, &mperr, &mpz2, p);
+  __mp_dbl (&mpz2, &z, p);
+  return signArctan2 (y, z);
 }
-  /* Stage 3: Perform a multi-Precision computation */
+
+/* Stage 3: Perform a multi-Precision computation */
 static double
 SECTION
-atan2Mp(double x,double y,const int pr[])
+atan2Mp (double x, double y, const int pr[])
 {
-  double z1,z2;
-  int i,p;
-  mp_no mpx,mpy,mpz,mpz1,mpz2,mperr,mpt1;
-  for (i=0; i<MM; i++) {
-    p = pr[i];
-    __dbl_mp(x,&mpx,p);  __dbl_mp(y,&mpy,p);
-    __mpatan2(&mpy,&mpx,&mpz,p);
-    __dbl_mp(ud[i].d,&mpt1,p);   __mul(&mpz,&mpt1,&mperr,p);
-    __add(&mpz,&mperr,&mpz1,p);  __sub(&mpz,&mperr,&mpz2,p);
-    __mp_dbl(&mpz1,&z1,p);       __mp_dbl(&mpz2,&z2,p);
-    if (z1==z2)   return z1;
-  }
-  return z1; /*if unpossible to do exact computing */
+  double z1, z2;
+  int i, p;
+  mp_no mpx, mpy, mpz, mpz1, mpz2, mperr, mpt1;
+  for (i = 0; i < MM; i++)
+    {
+      p = pr[i];
+      __dbl_mp (x, &mpx, p);
+      __dbl_mp (y, &mpy, p);
+      __mpatan2 (&mpy, &mpx, &mpz, p);
+      __dbl_mp (ud[i].d, &mpt1, p);
+      __mul (&mpz, &mpt1, &mperr, p);
+      __add (&mpz, &mperr, &mpz1, p);
+      __sub (&mpz, &mperr, &mpz2, p);
+      __mp_dbl (&mpz1, &z1, p);
+      __mp_dbl (&mpz2, &z2, p);
+      if (z1 == z2)
+	return z1;
+    }
+  return z1;			/*if impossible to do exact computing */
 }
diff --git a/sysdeps/ieee754/dbl-64/e_exp.c b/sysdeps/ieee754/dbl-64/e_exp.c
index 0f9d87ba59..07cc4a91b6 100644
--- a/sysdeps/ieee754/dbl-64/e_exp.c
+++ b/sysdeps/ieee754/dbl-64/e_exp.c
@@ -55,9 +55,6 @@ SECTION
 __ieee754_exp(double x) {
   double bexp, t, eps, del, base, y, al, bet, res, rem, cor;
   mynumber junk1, junk2, binexp  = {{0,0}};
-#if 0
-  int4 k;
-#endif
   int4 i,j,m,n,ex;
   double retval;
 
@@ -174,9 +171,6 @@ SECTION
 __exp1(double x, double xx, double error) {
   double bexp, t, eps, del, base, y, al, bet, res, rem, cor;
   mynumber junk1, junk2, binexp  = {{0,0}};
-#if 0
-  int4 k;
-#endif
   int4 i,j,m,n,ex;
 
   junk1.x = x;
diff --git a/sysdeps/ieee754/dbl-64/e_j0.c b/sysdeps/ieee754/dbl-64/e_j0.c
index f393a762b2..d641a09149 100644
--- a/sysdeps/ieee754/dbl-64/e_j0.c
+++ b/sysdeps/ieee754/dbl-64/e_j0.c
@@ -293,7 +293,8 @@ pzero(double x)
 	int32_t ix;
 	GET_HIGH_WORD(ix,x);
 	ix &= 0x7fffffff;
-	if(ix>=0x40200000)     {p = pR8; q= pS8;}
+	if (ix>=0x41b00000)    {return one;}
+	else if(ix>=0x40200000){p = pR8; q= pS8;}
 	else if(ix>=0x40122E8B){p = pR5; q= pS5;}
 	else if(ix>=0x4006DB6D){p = pR3; q= pS3;}
 	else if(ix>=0x40000000){p = pR2; q= pS2;}
@@ -400,7 +401,8 @@ qzero(double x)
 	int32_t ix;
 	GET_HIGH_WORD(ix,x);
 	ix &= 0x7fffffff;
-	if(ix>=0x40200000)     {p = qR8; q= qS8;}
+	if (ix>=0x41b00000)    {return -.125/x;}
+	else if(ix>=0x40200000){p = qR8; q= qS8;}
 	else if(ix>=0x40122E8B){p = qR5; q= qS5;}
 	else if(ix>=0x4006DB6D){p = qR3; q= qS3;}
 	else if(ix>=0x40000000){p = qR2; q= qS2;}
diff --git a/sysdeps/ieee754/dbl-64/e_j1.c b/sysdeps/ieee754/dbl-64/e_j1.c
index cba4d46b18..cca5f20b4f 100644
--- a/sysdeps/ieee754/dbl-64/e_j1.c
+++ b/sysdeps/ieee754/dbl-64/e_j1.c
@@ -291,7 +291,8 @@ pone(double x)
 	int32_t ix;
 	GET_HIGH_WORD(ix,x);
 	ix &= 0x7fffffff;
-	if(ix>=0x40200000)     {p = pr8; q= ps8;}
+	if (ix>=0x41b00000)    {return one;}
+	else if(ix>=0x40200000){p = pr8; q= ps8;}
 	else if(ix>=0x40122E8B){p = pr5; q= ps5;}
 	else if(ix>=0x4006DB6D){p = pr3; q= ps3;}
 	else if(ix>=0x40000000){p = pr2; q= ps2;}
@@ -399,7 +400,8 @@ qone(double x)
 	int32_t ix;
 	GET_HIGH_WORD(ix,x);
 	ix &= 0x7fffffff;
-	if(ix>=0x40200000)     {p = qr8; q= qs8;}
+	if (ix>=0x41b00000)    {return .375/x;}
+	else if(ix>=0x40200000){p = qr8; q= qs8;}
 	else if(ix>=0x40122E8B){p = qr5; q= qs5;}
 	else if(ix>=0x4006DB6D){p = qr3; q= qs3;}
 	else if(ix>=0x40000000){p = qr2; q= qs2;}
diff --git a/sysdeps/ieee754/dbl-64/e_log.c b/sysdeps/ieee754/dbl-64/e_log.c
index 762639bcdf..58c9a8e76b 100644
--- a/sysdeps/ieee754/dbl-64/e_log.c
+++ b/sysdeps/ieee754/dbl-64/e_log.c
@@ -56,9 +56,6 @@ __ieee754_log(double x) {
 #define M 4
   static const int pr[M]={8,10,18,32};
   int i,j,n,ux,dx,p;
-#if 0
-  int k;
-#endif
   double dbl_n,u,p0,q,r0,w,nln2a,luai,lubi,lvaj,lvbj,
 	 sij,ssij,ttij,A,B,B0,y,y1,y2,polI,polII,sa,sb,
 	 t1,t2,t7,t8,t,ra,rb,ww,
diff --git a/sysdeps/ieee754/dbl-64/e_pow.c b/sysdeps/ieee754/dbl-64/e_pow.c
index ee2711322f..f8962a7dce 100644
--- a/sysdeps/ieee754/dbl-64/e_pow.c
+++ b/sysdeps/ieee754/dbl-64/e_pow.c
@@ -64,9 +64,6 @@ double
 SECTION
 __ieee754_pow(double x, double y) {
   double z,a,aa,error, t,a1,a2,y1,y2;
-#if 0
-  double gor=1.0;
-#endif
   mynumber u,v;
   int k;
   int4 qx,qy;
@@ -206,13 +203,7 @@ static double
 SECTION
 log1(double x, double *delta, double *error) {
   int i,j,m;
-#if 0
-  int n;
-#endif
   double uu,vv,eps,nx,e,e1,e2,t,t1,t2,res,add=0;
-#if 0
-  double cor;
-#endif
   mynumber u,v;
 #ifdef BIG_ENDI
   mynumber
@@ -300,13 +291,7 @@ static double
 SECTION
 my_log2(double x, double *delta, double *error) {
   int i,j,m;
-#if 0
-  int n;
-#endif
   double uu,vv,eps,nx,e,e1,e2,t,t1,t2,res,add=0;
-#if 0
-  double cor;
-#endif
   double ou1,ou2,lu1,lu2,ov,lv1,lv2,a,a1,a2;
   double y,yy,z,zz,j1,j2,j7,j8;
 #ifndef DLA_FMS
@@ -397,9 +382,6 @@ SECTION
 checkint(double x) {
   union {int4 i[2]; double x;} u;
   int k,m,n;
-#if 0
-  int l;
-#endif
   u.x = x;
   m = u.i[HIGH_HALF]&0x7fffffff;    /* no sign */
   if (m >= 0x7ff00000) return 0;    /*  x is +/-inf or NaN  */
diff --git a/sysdeps/ieee754/dbl-64/e_remainder.c b/sysdeps/ieee754/dbl-64/e_remainder.c
index ac4b55f9d4..c4db9316c7 100644
--- a/sysdeps/ieee754/dbl-64/e_remainder.c
+++ b/sysdeps/ieee754/dbl-64/e_remainder.c
@@ -42,13 +42,7 @@
 double __ieee754_remainder(double x, double y)
 {
   double z,d,xx;
-#if 0
-  double yy;
-#endif
   int4 kx,ky,n,nn,n1,m1,l;
-#if 0
-  int4 m;
-#endif
   mynumber u,t,w={{0,0}},v={{0,0}},ww={{0,0}},r;
   u.x=x;
   t.x=y;
diff --git a/sysdeps/ieee754/dbl-64/mpa-arch.h b/sysdeps/ieee754/dbl-64/mpa-arch.h
new file mode 100644
index 0000000000..7de9d51ae2
--- /dev/null
+++ b/sysdeps/ieee754/dbl-64/mpa-arch.h
@@ -0,0 +1,47 @@
+/* Overridable constants and operations.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License as published by
+   the Free Software Foundation; either version 2.1 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.  */
+
+#include <stdint.h>
+
+typedef long mantissa_t;
+typedef int64_t mantissa_store_t;
+
+#define TWOPOW(i) (1L << i)
+
+#define RADIX_EXP 24
+#define  RADIX TWOPOW (RADIX_EXP)		/* 2^24    */
+
+/* Divide D by RADIX and put the remainder in R.  D must be a non-negative
+   integral value.  */
+#define DIV_RADIX(d, r) \
+  ({									      \
+    r = d & (RADIX - 1);						      \
+    d >>= RADIX_EXP;							      \
+  })
+
+/* Put the integer component of a double X in R and retain the fraction in
+   X.  This is used in extracting mantissa digits for MP_NO by using the
+   integer portion of the current value of the number as the current mantissa
+   digit and then scaling by RADIX to get the next mantissa digit in the same
+   manner.  */
+#define INTEGER_OF(x, i) \
+  ({									      \
+    i = (mantissa_t) x;							      \
+    x -= i;								      \
+  })
+
+/* Align IN down to F.  The code assumes that F is a power of two.  */
+#define ALIGN_DOWN_TO(in, f) ((in) & -(f))
diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c
index ede8ed1986..a3feb175ed 100644
--- a/sysdeps/ieee754/dbl-64/mpa.c
+++ b/sysdeps/ieee754/dbl-64/mpa.c
@@ -43,6 +43,7 @@
 #include "endian.h"
 #include "mpa.h"
 #include <sys/param.h>
+#include <alloca.h>
 
 #ifndef SECTION
 # define SECTION
@@ -59,8 +60,9 @@ const mp_no mptwo = {1, {1.0, 2.0}};
 static int
 mcr (const mp_no *x, const mp_no *y, int p)
 {
-  int i;
-  for (i = 1; i <= p; i++)
+  long i;
+  long p2 = p;
+  for (i = 1; i <= p2; i++)
     {
       if (X[i] == Y[i])
 	continue;
@@ -76,16 +78,16 @@ mcr (const mp_no *x, const mp_no *y, int p)
 int
 __acr (const mp_no *x, const mp_no *y, int p)
 {
-  int i;
+  long i;
 
-  if (X[0] == ZERO)
+  if (X[0] == 0)
     {
-      if (Y[0] == ZERO)
+      if (Y[0] == 0)
 	i = 0;
       else
 	i = -1;
     }
-  else if (Y[0] == ZERO)
+  else if (Y[0] == 0)
     i = 1;
   else
     {
@@ -107,8 +109,10 @@ __acr (const mp_no *x, const mp_no *y, int p)
 void
 __cpy (const mp_no *x, mp_no *y, int p)
 {
+  long i;
+
   EY = EX;
-  for (int i = 0; i <= p; i++)
+  for (i = 0; i <= p; i++)
     Y[i] = X[i];
 }
 #endif
@@ -119,9 +123,10 @@ __cpy (const mp_no *x, mp_no *y, int p)
 static void
 norm (const mp_no *x, double *y, int p)
 {
-#define R  RADIXI
-  int i;
-  double a, c, u, v, z[5];
+#define R RADIXI
+  long i;
+  double c;
+  mantissa_t a, u, v, z[5];
   if (p < 5)
     {
       if (p == 1)
@@ -135,44 +140,41 @@ norm (const mp_no *x, double *y, int p)
     }
   else
     {
-      for (a = ONE, z[1] = X[1]; z[1] < TWO23;)
+      for (a = 1, z[1] = X[1]; z[1] < TWO23;)
 	{
-	  a *= TWO;
-	  z[1] *= TWO;
+	  a *= 2;
+	  z[1] *= 2;
 	}
 
       for (i = 2; i < 5; i++)
 	{
-	  z[i] = X[i] * a;
-	  u = (z[i] + CUTTER) - CUTTER;
-	  if (u > z[i])
-	    u -= RADIX;
-	  z[i] -= u;
-	  z[i - 1] += u * RADIXI;
+	  mantissa_store_t d, r;
+	  d = X[i] * (mantissa_store_t) a;
+	  DIV_RADIX (d, r);
+	  z[i] = r;
+	  z[i - 1] += d;
 	}
 
-      u = (z[3] + TWO71) - TWO71;
-      if (u > z[3])
-	u -= TWO19;
+      u = ALIGN_DOWN_TO (z[3], TWO19);
       v = z[3] - u;
 
       if (v == TWO18)
 	{
-	  if (z[4] == ZERO)
+	  if (z[4] == 0)
 	    {
 	      for (i = 5; i <= p; i++)
 		{
-		  if (X[i] == ZERO)
+		  if (X[i] == 0)
 		    continue;
 		  else
 		    {
-		      z[3] += ONE;
+		      z[3] += 1;
 		      break;
 		    }
 		}
 	    }
 	  else
-	    z[3] += ONE;
+	    z[3] += 1;
 	}
 
       c = (z[1] + R * (z[2] + R * z[3])) / a;
@@ -194,47 +196,49 @@ norm (const mp_no *x, double *y, int p)
 static void
 denorm (const mp_no *x, double *y, int p)
 {
-  int i, k;
-  double c, u, z[5];
+  long i, k;
+  long p2 = p;
+  double c;
+  mantissa_t u, z[5];
 
-#define R  RADIXI
+#define R RADIXI
   if (EX < -44 || (EX == -44 && X[1] < TWO5))
     {
-      *y = ZERO;
+      *y = 0;
       return;
     }
 
-  if (p == 1)
+  if (p2 == 1)
     {
       if (EX == -42)
 	{
 	  z[1] = X[1] + TWO10;
-	  z[2] = ZERO;
-	  z[3] = ZERO;
+	  z[2] = 0;
+	  z[3] = 0;
 	  k = 3;
 	}
       else if (EX == -43)
 	{
 	  z[1] = TWO10;
 	  z[2] = X[1];
-	  z[3] = ZERO;
+	  z[3] = 0;
 	  k = 2;
 	}
       else
 	{
 	  z[1] = TWO10;
-	  z[2] = ZERO;
+	  z[2] = 0;
 	  z[3] = X[1];
 	  k = 1;
 	}
     }
-  else if (p == 2)
+  else if (p2 == 2)
     {
       if (EX == -42)
 	{
 	  z[1] = X[1] + TWO10;
 	  z[2] = X[2];
-	  z[3] = ZERO;
+	  z[3] = 0;
 	  k = 3;
 	}
       else if (EX == -43)
@@ -247,7 +251,7 @@ denorm (const mp_no *x, double *y, int p)
       else
 	{
 	  z[1] = TWO10;
-	  z[2] = ZERO;
+	  z[2] = 0;
 	  z[3] = X[1];
 	  k = 1;
 	}
@@ -269,25 +273,23 @@ denorm (const mp_no *x, double *y, int p)
       else
 	{
 	  z[1] = TWO10;
-	  z[2] = ZERO;
+	  z[2] = 0;
 	  k = 1;
 	}
       z[3] = X[k];
     }
 
-  u = (z[3] + TWO57) - TWO57;
-  if (u > z[3])
-    u -= TWO5;
+  u = ALIGN_DOWN_TO (z[3], TWO5);
 
   if (u == z[3])
     {
-      for (i = k + 1; i <= p; i++)
+      for (i = k + 1; i <= p2; i++)
 	{
-	  if (X[i] == ZERO)
+	  if (X[i] == 0)
 	    continue;
 	  else
 	    {
-	      z[3] += ONE;
+	      z[3] += 1;
 	      break;
 	    }
 	}
@@ -304,9 +306,9 @@ denorm (const mp_no *x, double *y, int p)
 void
 __mp_dbl (const mp_no *x, double *y, int p)
 {
-  if (X[0] == ZERO)
+  if (X[0] == 0)
     {
-      *y = ZERO;
+      *y = 0;
       return;
     }
 
@@ -323,42 +325,38 @@ void
 SECTION
 __dbl_mp (double x, mp_no *y, int p)
 {
-  int i, n;
-  double u;
+  long i, n;
+  long p2 = p;
 
   /* Sign.  */
-  if (x == ZERO)
+  if (x == 0)
     {
-      Y[0] = ZERO;
+      Y[0] = 0;
       return;
     }
-  else if (x > ZERO)
-    Y[0] = ONE;
+  else if (x > 0)
+    Y[0] = 1;
   else
     {
-      Y[0] = MONE;
+      Y[0] = -1;
       x = -x;
     }
 
   /* Exponent.  */
-  for (EY = ONE; x >= RADIX; EY += ONE)
+  for (EY = 1; x >= RADIX; EY += 1)
     x *= RADIXI;
-  for (; x < ONE; EY -= ONE)
+  for (; x < 1; EY -= 1)
     x *= RADIX;
 
   /* Digits.  */
-  n = MIN (p, 4);
+  n = MIN (p2, 4);
   for (i = 1; i <= n; i++)
     {
-      u = (x + TWO52) - TWO52;
-      if (u > x)
-	u -= ONE;
-      Y[i] = u;
-      x -= u;
+      INTEGER_OF (x, Y[i]);
       x *= RADIX;
     }
-  for (; i <= p; i++)
-    Y[i] = ZERO;
+  for (; i <= p2; i++)
+    Y[i] = 0;
 }
 
 /* Add magnitudes of *X and *Y assuming that abs (*X) >= abs (*Y) > 0.  The
@@ -369,53 +367,64 @@ static void
 SECTION
 add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
 {
-  int i, j, k;
+  long i, j, k;
+  long p2 = p;
+  mantissa_t zk;
 
   EZ = EX;
 
-  i = p;
-  j = p + EY - EX;
-  k = p + 1;
+  i = p2;
+  j = p2 + EY - EX;
+  k = p2 + 1;
 
-  if (j < 1)
+  if (__glibc_unlikely (j < 1))
     {
       __cpy (x, z, p);
       return;
     }
-  else
-    Z[k] = ZERO;
+
+  zk = 0;
 
   for (; j > 0; i--, j--)
     {
-      Z[k] += X[i] + Y[j];
-      if (Z[k] >= RADIX)
+      zk += X[i] + Y[j];
+      if (zk >= RADIX)
 	{
-	  Z[k] -= RADIX;
-	  Z[--k] = ONE;
+	  Z[k--] = zk - RADIX;
+	  zk = 1;
 	}
       else
-	Z[--k] = ZERO;
+        {
+	  Z[k--] = zk;
+	  zk = 0;
+	}
     }
 
   for (; i > 0; i--)
     {
-      Z[k] += X[i];
-      if (Z[k] >= RADIX)
+      zk += X[i];
+      if (zk >= RADIX)
 	{
-	  Z[k] -= RADIX;
-	  Z[--k] = ONE;
+	  Z[k--] = zk - RADIX;
+	  zk = 1;
 	}
       else
-	Z[--k] = ZERO;
+        {
+	  Z[k--] = zk;
+	  zk = 0;
+	}
     }
 
-  if (Z[1] == ZERO)
+  if (zk == 0)
     {
-      for (i = 1; i <= p; i++)
+      for (i = 1; i <= p2; i++)
 	Z[i] = Z[i + 1];
     }
   else
-    EZ += ONE;
+    {
+      Z[1] = zk;
+      EZ += 1;
+    }
 }
 
 /* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0.
@@ -426,72 +435,71 @@ static void
 SECTION
 sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
 {
-  int i, j, k;
+  long i, j, k;
+  long p2 = p;
+  mantissa_t zk;
 
   EZ = EX;
+  i = p2;
+  j = p2 + EY - EX;
+  k = p2;
 
-  if (EX == EY)
+  /* Y is too small compared to X, copy X over to the result.  */
+  if (__glibc_unlikely (j < 1))
     {
-      i = j = k = p;
-      Z[k] = Z[k + 1] = ZERO;
+      __cpy (x, z, p);
+      return;
     }
-  else
+
+  /* The relevant least significant digit in Y is non-zero, so we factor it in
+     to enhance accuracy.  */
+  if (j < p2 && Y[j + 1] > 0)
     {
-      j = EX - EY;
-      if (j > p)
-	{
-	  __cpy (x, z, p);
-	  return;
-	}
-      else
-	{
-	  i = p;
-	  j = p + 1 - j;
-	  k = p;
-	  if (Y[j] > ZERO)
-	    {
-	      Z[k + 1] = RADIX - Y[j--];
-	      Z[k] = MONE;
-	    }
-	  else
-	    {
-	      Z[k + 1] = ZERO;
-	      Z[k] = ZERO;
-	      j--;
-	    }
-	}
+      Z[k + 1] = RADIX - Y[j + 1];
+      zk = -1;
     }
+  else
+    zk = Z[k + 1] = 0;
 
+  /* Subtract and borrow.  */
   for (; j > 0; i--, j--)
     {
-      Z[k] += (X[i] - Y[j]);
-      if (Z[k] < ZERO)
+      zk += (X[i] - Y[j]);
+      if (zk < 0)
 	{
-	  Z[k] += RADIX;
-	  Z[--k] = MONE;
+	  Z[k--] = zk + RADIX;
+	  zk = -1;
 	}
       else
-	Z[--k] = ZERO;
+        {
+	  Z[k--] = zk;
+	  zk = 0;
+	}
     }
 
+  /* We're done with digits from Y, so it's just digits in X.  */
   for (; i > 0; i--)
     {
-      Z[k] += X[i];
-      if (Z[k] < ZERO)
+      zk += X[i];
+      if (zk < 0)
 	{
-	  Z[k] += RADIX;
-	  Z[--k] = MONE;
+	  Z[k--] = zk + RADIX;
+	  zk = -1;
 	}
       else
-	Z[--k] = ZERO;
+        {
+	  Z[k--] = zk;
+	  zk = 0;
+	}
     }
 
-  for (i = 1; Z[i] == ZERO; i++);
+  /* Normalize.  */
+  for (i = 1; Z[i] == 0; i++);
   EZ = EZ - i + 1;
-  for (k = 1; i <= p + 1;)
+  for (k = 1; i <= p2 + 1;)
     Z[k++] = Z[i++];
-  for (; k <= p;)
-    Z[k++] = ZERO;
+  for (; k <= p2;)
+    Z[k++] = 0;
 }
 
 /* Add *X and *Y and store the result in *Z.  X and Y may overlap, but not X
@@ -503,12 +511,12 @@ __add (const mp_no *x, const mp_no *y, mp_no *z, int p)
 {
   int n;
 
-  if (X[0] == ZERO)
+  if (X[0] == 0)
     {
       __cpy (y, z, p);
       return;
     }
-  else if (Y[0] == ZERO)
+  else if (Y[0] == 0)
     {
       __cpy (x, z, p);
       return;
@@ -540,7 +548,7 @@ __add (const mp_no *x, const mp_no *y, mp_no *z, int p)
 	  Z[0] = Y[0];
 	}
       else
-	Z[0] = ZERO;
+	Z[0] = 0;
     }
 }
 
@@ -553,13 +561,13 @@ __sub (const mp_no *x, const mp_no *y, mp_no *z, int p)
 {
   int n;
 
-  if (X[0] == ZERO)
+  if (X[0] == 0)
     {
       __cpy (y, z, p);
       Z[0] = -Z[0];
       return;
     }
-  else if (Y[0] == ZERO)
+  else if (Y[0] == 0)
     {
       __cpy (x, z, p);
       return;
@@ -591,10 +599,11 @@ __sub (const mp_no *x, const mp_no *y, mp_no *z, int p)
 	  Z[0] = -Y[0];
 	}
       else
-	Z[0] = ZERO;
+	Z[0] = 0;
     }
 }
 
+#ifndef NO__MUL
 /* Multiply *X and *Y and store result in *Z.  X and Y may overlap but not X
    and Z or Y and Z.  For P in [1, 2, 3], the exact result is truncated to P
    digits.  In case P > 3 the error is bounded by 1.001 ULP.  */
@@ -602,57 +611,241 @@ void
 SECTION
 __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
 {
-  int i, j, k, k2;
-  double u, zk;
+  long i, j, k, ip, ip2;
+  long p2 = p;
+  mantissa_store_t zk;
+  const mp_no *a;
+  mantissa_store_t *diag;
 
   /* Is z=0?  */
-  if (__glibc_unlikely (X[0] * Y[0] == ZERO))
+  if (__glibc_unlikely (X[0] * Y[0] == 0))
     {
-      Z[0] = ZERO;
+      Z[0] = 0;
       return;
     }
 
-  /* Multiply, add and carry.  */
-  k2 = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
-  zk = Z[k2] = ZERO;
+  /* We need not iterate through all X's and Y's since it's pointless to
+     multiply zeroes.  Here, both are zero...  */
+  for (ip2 = p2; ip2 > 0; ip2--)
+    if (X[ip2] != 0 || Y[ip2] != 0)
+      break;
+
+  a = X[ip2] != 0 ? y : x;
+
+  /* ... and here, at least one of them is still zero.  */
+  for (ip = ip2; ip > 0; ip--)
+    if (a->d[ip] != 0)
+      break;
+
+  /* The product looks like this for p = 3 (as an example):
+
+
+				a1    a2    a3
+		 x		b1    b2    b3
+		 -----------------------------
+			     a1*b3 a2*b3 a3*b3
+		       a1*b2 a2*b2 a3*b2
+		 a1*b1 a2*b1 a3*b1
+
+     So our K needs to ideally be P*2, but we're limiting ourselves to P + 3
+     for P >= 3.  We compute the above digits in two parts; the last P-1
+     digits and then the first P digits.  The last P-1 digits are a sum of
+     products of the input digits from P to P-k where K is 0 for the least
+     significant digit and increases as we go towards the left.  The product
+     term is of the form X[k]*X[P-k] as can be seen in the above example.
+
+     The first P digits are also a sum of products with the same product term,
+     except that the sum is from 1 to k.  This is also evident from the above
+     example.
+
+     Another thing that becomes evident is that only the most significant
+     ip+ip2 digits of the result are non-zero, where ip and ip2 are the
+     'internal precision' of the input numbers, i.e. digits after ip and ip2
+     are all 0.  */
+
+  k = (__glibc_unlikely (p2 < 3)) ? p2 + p2 : p2 + 3;
+
+  while (k > ip + ip2 + 1)
+    Z[k--] = 0;
+
+  zk = 0;
 
-  for (k = k2; k > p; k--)
+  /* Precompute sums of diagonal elements so that we can directly use them
+     later.  See the next comment to know we why need them.  */
+  diag = alloca (k * sizeof (mantissa_store_t));
+  mantissa_store_t d = 0;
+  for (i = 1; i <= ip; i++)
     {
-      for (i = k - p, j = p; i < p + 1; i++, j--)
-	zk += X[i] * Y[j];
+      d += X[i] * (mantissa_store_t) Y[i];
+      diag[i] = d;
+    }
+  while (i < k)
+    diag[i++] = d;
+
+  while (k > p2)
+    {
+      long lim = k / 2;
+
+      if (k % 2 == 0)
+	/* We want to add this only once, but since we subtract it in the sum
+	   of products above, we add twice.  */
+	zk += 2 * X[lim] * (mantissa_store_t) Y[lim];
+
+      for (i = k - p2, j = p2; i < j; i++, j--)
+	zk += (X[i] + X[j]) * (mantissa_store_t) (Y[i] + Y[j]);
+
+      zk -= diag[k - 1];
 
-      u = (zk + CUTTER) - CUTTER;
-      if (u > zk)
-	u -= RADIX;
-      Z[k] = zk - u;
-      zk = u * RADIXI;
+      DIV_RADIX (zk, Z[k]);
+      k--;
     }
 
+  /* The real deal.  Mantissa digit Z[k] is the sum of all X[i] * Y[j] where i
+     goes from 1 -> k - 1 and j goes the same range in reverse.  To reduce the
+     number of multiplications, we halve the range and if k is an even number,
+     add the diagonal element X[k/2]Y[k/2].  Through the half range, we compute
+     X[i] * Y[j] as (X[i] + X[j]) * (Y[i] + Y[j]) - X[i] * Y[i] - X[j] * Y[j].
+
+     This reduction tells us that we're summing two things, the first term
+     through the half range and the negative of the sum of the product of all
+     terms of X and Y in the full range.  i.e.
+
+     SUM(X[i] * Y[i]) for k terms.  This is precalculated above for each k in
+     a single loop so that it completes in O(n) time and can hence be directly
+     used in the loop below.  */
   while (k > 1)
     {
-      for (i = 1, j = k - 1; i < k; i++, j--)
-	zk += X[i] * Y[j];
+      long lim = k / 2;
+
+      if (k % 2 == 0)
+	/* We want to add this only once, but since we subtract it in the sum
+	   of products above, we add twice.  */
+        zk += 2 * X[lim] * (mantissa_store_t) Y[lim];
+
+      for (i = 1, j = k - 1; i < j; i++, j--)
+	zk += (X[i] + X[j]) * (mantissa_store_t) (Y[i] + Y[j]);
 
-      u = (zk + CUTTER) - CUTTER;
-      if (u > zk)
-	u -= RADIX;
-      Z[k] = zk - u;
-      zk = u * RADIXI;
+      zk -= diag[k - 1];
+
+      DIV_RADIX (zk, Z[k]);
       k--;
     }
   Z[k] = zk;
 
-  EZ = EX + EY;
+  /* Get the exponent sum into an intermediate variable.  This is a subtle
+     optimization, where given enough registers, all operations on the exponent
+     happen in registers and the result is written out only once into EZ.  */
+  int e = EX + EY;
+
   /* Is there a carry beyond the most significant digit?  */
-  if (__glibc_unlikely (Z[1] == ZERO))
+  if (__glibc_unlikely (Z[1] == 0))
     {
-      for (i = 1; i <= p; i++)
+      for (i = 1; i <= p2; i++)
 	Z[i] = Z[i + 1];
-      EZ--;
+      e--;
     }
 
+  EZ = e;
   Z[0] = X[0] * Y[0];
 }
+#endif
+
+#ifndef NO__SQR
+/* Square *X and store result in *Y.  X and Y may not overlap.  For P in
+   [1, 2, 3], the exact result is truncated to P digits.  In case P > 3 the
+   error is bounded by 1.001 ULP.  This is a faster special case of
+   multiplication.  */
+void
+SECTION
+__sqr (const mp_no *x, mp_no *y, int p)
+{
+  long i, j, k, ip;
+  mantissa_store_t yk;
+
+  /* Is z=0?  */
+  if (__glibc_unlikely (X[0] == 0))
+    {
+      Y[0] = 0;
+      return;
+    }
+
+  /* We need not iterate through all X's since it's pointless to
+     multiply zeroes.  */
+  for (ip = p; ip > 0; ip--)
+    if (X[ip] != 0)
+      break;
+
+  k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
+
+  while (k > 2 * ip + 1)
+    Y[k--] = 0;
+
+  yk = 0;
+
+  while (k > p)
+    {
+      mantissa_store_t yk2 = 0;
+      long lim = k / 2;
+
+      if (k % 2 == 0)
+	yk += X[lim] * (mantissa_store_t) X[lim];
+
+      /* In __mul, this loop (and the one within the next while loop) run
+         between a range to calculate the mantissa as follows:
+
+         Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1]
+		+ X[n] * Y[k]
+
+         For X == Y, we can get away with summing halfway and doubling the
+	 result.  For cases where the range size is even, the mid-point needs
+	 to be added separately (above).  */
+      for (i = k - p, j = p; i < j; i++, j--)
+	yk2 += X[i] * (mantissa_store_t) X[j];
+
+      yk += 2 * yk2;
+
+      DIV_RADIX (yk, Y[k]);
+      k--;
+    }
+
+  while (k > 1)
+    {
+      mantissa_store_t yk2 = 0;
+      long lim = k / 2;
+
+      if (k % 2 == 0)
+	yk += X[lim] * (mantissa_store_t) X[lim];
+
+      /* Likewise for this loop.  */
+      for (i = 1, j = k - 1; i < j; i++, j--)
+	yk2 += X[i] * (mantissa_store_t) X[j];
+
+      yk += 2 * yk2;
+
+      DIV_RADIX (yk, Y[k]);
+      k--;
+    }
+  Y[k] = yk;
+
+  /* Squares are always positive.  */
+  Y[0] = 1;
+
+  /* Get the exponent sum into an intermediate variable.  This is a subtle
+     optimization, where given enough registers, all operations on the exponent
+     happen in registers and the result is written out only once into EZ.  */
+  int e = EX * 2;
+
+  /* Is there a carry beyond the most significant digit?  */
+  if (__glibc_unlikely (Y[1] == 0))
+    {
+      for (i = 1; i <= p; i++)
+	Y[i] = Y[i + 1];
+      e--;
+    }
+
+  EY = e;
+}
+#endif
 
 /* Invert *X and store in *Y.  Relative error bound:
    - For P = 2: 1.001 * R ^ (1 - P)
@@ -664,7 +857,7 @@ static void
 SECTION
 __inv (const mp_no *x, mp_no *y, int p)
 {
-  int i;
+  long i;
   double t;
   mp_no z, w;
   static const int np1[] =
@@ -675,7 +868,7 @@ __inv (const mp_no *x, mp_no *y, int p)
   __cpy (x, &z, p);
   z.e = 0;
   __mp_dbl (&z, &t, p);
-  t = ONE / t;
+  t = 1 / t;
   __dbl_mp (t, y, p);
   EY -= EX;
 
@@ -701,8 +894,8 @@ __dvd (const mp_no *x, const mp_no *y, mp_no *z, int p)
 {
   mp_no w;
 
-  if (X[0] == ZERO)
-    Z[0] = ZERO;
+  if (X[0] == 0)
+    Z[0] = 0;
   else
     {
       __inv (y, &w, p);
diff --git a/sysdeps/ieee754/dbl-64/mpa.h b/sysdeps/ieee754/dbl-64/mpa.h
index 06343d46d1..54044a0586 100644
--- a/sysdeps/ieee754/dbl-64/mpa.h
+++ b/sysdeps/ieee754/dbl-64/mpa.h
@@ -35,6 +35,7 @@
 /* Common types and definition                                          */
 /************************************************************************/
 
+#include <mpa-arch.h>
 
 /* The mp_no structure holds the details of a multi-precision floating point
    number.
@@ -61,7 +62,7 @@
 typedef struct
 {
   int e;
-  double d[40];
+  mantissa_t d[40];
 } mp_no;
 
 typedef union
@@ -82,9 +83,13 @@ extern const mp_no mptwo;
 
 #define ABS(x)   ((x) <  0  ? -(x) : (x))
 
-#define  RADIX     0x1.0p24		/* 2^24    */
-#define  RADIXI    0x1.0p-24		/* 2^-24   */
-#define  CUTTER    0x1.0p76		/* 2^76    */
+#ifndef RADIXI
+# define  RADIXI    0x1.0p-24		/* 2^-24   */
+#endif
+
+#ifndef TWO52
+# define  TWO52     0x1.0p52		/* 2^52    */
+#endif
 
 #define  ZERO      0.0			/* 0       */
 #define  MZERO     -0.0			/* 0 with the sign bit set */
@@ -92,13 +97,13 @@ extern const mp_no mptwo;
 #define  MONE      -1.0			/* -1      */
 #define  TWO       2.0			/*  2      */
 
-#define  TWO5      0x1.0p5		/* 2^5     */
-#define  TWO8      0x1.0p8		/* 2^52    */
-#define  TWO10     0x1.0p10		/* 2^10    */
-#define  TWO18     0x1.0p18		/* 2^18    */
-#define  TWO19     0x1.0p19		/* 2^19    */
-#define  TWO23     0x1.0p23		/* 2^23    */
-#define  TWO52     0x1.0p52		/* 2^52    */
+#define  TWO5      TWOPOW (5)		/* 2^5     */
+#define  TWO8      TWOPOW (8)		/* 2^52    */
+#define  TWO10     TWOPOW (10)		/* 2^10    */
+#define  TWO18     TWOPOW (18)		/* 2^18    */
+#define  TWO19     TWOPOW (19)		/* 2^19    */
+#define  TWO23     TWOPOW (23)		/* 2^23    */
+
 #define  TWO57     0x1.0p57		/* 2^57    */
 #define  TWO71     0x1.0p71		/* 2^71    */
 #define  TWOM1032  0x1.0p-1032		/* 2^-1032 */
@@ -115,6 +120,7 @@ void __dbl_mp (double, mp_no *, int);
 void __add (const mp_no *, const mp_no *, mp_no *, int);
 void __sub (const mp_no *, const mp_no *, mp_no *, int);
 void __mul (const mp_no *, const mp_no *, mp_no *, int);
+void __sqr (const mp_no *, mp_no *, int);
 void __dvd (const mp_no *, const mp_no *, mp_no *, int);
 
 extern void __mpatan (mp_no *, mp_no *, int);
diff --git a/sysdeps/ieee754/dbl-64/mpatan.c b/sysdeps/ieee754/dbl-64/mpatan.c
index db5868092a..cc879d8ec4 100644
--- a/sysdeps/ieee754/dbl-64/mpatan.c
+++ b/sysdeps/ieee754/dbl-64/mpatan.c
@@ -39,63 +39,78 @@
 
 #include "mpatan.h"
 
-void __mpsqrt(mp_no *, mp_no *, int);
-
 void
 SECTION
-__mpatan(mp_no *x, mp_no *y, int p) {
+__mpatan (mp_no *x, mp_no *y, int p)
+{
 
-  int i,m,n;
+  int i, m, n;
   double dx;
-  mp_no
-    mptwoim1 = {0,{0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
-		0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
-		0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}};
+  mp_no mptwoim1 =
+  {
+    0,
+    {
+      0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+      0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+      0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
+    }
+  };
 
-  mp_no mps,mpsm,mpt,mpt1,mpt2,mpt3;
+  mp_no mps, mpsm, mpt, mpt1, mpt2, mpt3;
 
-		      /* Choose m and initiate mptwoim1 */
-    if      (EX>0) m=7;
-    else if (EX<0) m=0;
-    else {
-      __mp_dbl(x,&dx,p);  dx=ABS(dx);
-      for (m=6; m>0; m--)
-	{if (dx>__atan_xm[m].d) break;}
+  /* Choose m and initiate mptwoim1.  */
+  if (EX > 0)
+    m = 7;
+  else if (EX < 0)
+    m = 0;
+  else
+    {
+      __mp_dbl (x, &dx, p);
+      dx = ABS (dx);
+      for (m = 6; m > 0; m--)
+	{
+	  if (dx > __atan_xm[m].d)
+	    break;
+	}
     }
-    mptwoim1.e = 1;
-    mptwoim1.d[0] = ONE;
+  mptwoim1.e = 1;
+  mptwoim1.d[0] = ONE;
 
-				 /* Reduce x m times */
-    __mul(x,x,&mpsm,p);
-    if (m==0) __cpy(x,&mps,p);
-    else {
-      for (i=0; i<m; i++) {
-	__add(&mpone,&mpsm,&mpt1,p);
-	__mpsqrt(&mpt1,&mpt2,p);
-	__add(&mpt2,&mpt2,&mpt1,p);
-	__add(&mptwo,&mpsm,&mpt2,p);
-	__add(&mpt1,&mpt2,&mpt3,p);
-	__dvd(&mpsm,&mpt3,&mpt1,p);
-	__cpy(&mpt1,&mpsm,p);
-      }
-      __mpsqrt(&mpsm,&mps,p);    mps.d[0] = X[0];
+  /* Reduce x m times.  */
+  __sqr (x, &mpsm, p);
+  if (m == 0)
+    __cpy (x, &mps, p);
+  else
+    {
+      for (i = 0; i < m; i++)
+	{
+	  __add (&mpone, &mpsm, &mpt1, p);
+	  __mpsqrt (&mpt1, &mpt2, p);
+	  __add (&mpt2, &mpt2, &mpt1, p);
+	  __add (&mptwo, &mpsm, &mpt2, p);
+	  __add (&mpt1, &mpt2, &mpt3, p);
+	  __dvd (&mpsm, &mpt3, &mpt1, p);
+	  __cpy (&mpt1, &mpsm, p);
+	}
+      __mpsqrt (&mpsm, &mps, p);
+      mps.d[0] = X[0];
     }
 
-		    /* Evaluate a truncated power series for Atan(s) */
-    n=__atan_np[p];    mptwoim1.d[1] = __atan_twonm1[p].d;
-    __dvd(&mpsm,&mptwoim1,&mpt,p);
-    for (i=n-1; i>1; i--) {
+  /* Evaluate a truncated power series for Atan(s).  */
+  n = __atan_np[p];
+  mptwoim1.d[1] = __atan_twonm1[p].d;
+  __dvd (&mpsm, &mptwoim1, &mpt, p);
+  for (i = n - 1; i > 1; i--)
+    {
       mptwoim1.d[1] -= TWO;
-      __dvd(&mpsm,&mptwoim1,&mpt1,p);
-      __mul(&mpsm,&mpt,&mpt2,p);
-      __sub(&mpt1,&mpt2,&mpt,p);
+      __dvd (&mpsm, &mptwoim1, &mpt1, p);
+      __mul (&mpsm, &mpt, &mpt2, p);
+      __sub (&mpt1, &mpt2, &mpt, p);
     }
-    __mul(&mps,&mpt,&mpt1,p);
-    __sub(&mps,&mpt1,&mpt,p);
-
-			  /* Compute Atan(x) */
-    mptwoim1.d[1] = 1 << m;
-    __mul(&mptwoim1,&mpt,y,p);
+  __mul (&mps, &mpt, &mpt1, p);
+  __sub (&mps, &mpt1, &mpt, p);
 
-  return;
+  /* Compute Atan(x).  */
+  mptwoim1.d[1] = 1 << m;
+  __mul (&mptwoim1, &mpt, y, p);
 }
diff --git a/sysdeps/ieee754/dbl-64/mpatan2.c b/sysdeps/ieee754/dbl-64/mpatan2.c
index c0b9aea1e2..d29c2fbade 100644
--- a/sysdeps/ieee754/dbl-64/mpatan2.c
+++ b/sysdeps/ieee754/dbl-64/mpatan2.c
@@ -32,37 +32,36 @@
 /*                                                                */
 /******************************************************************/
 
-
-
 #include "mpa.h"
 
 #ifndef SECTION
 # define SECTION
 #endif
 
-void __mpsqrt(mp_no *, mp_no *, int);
-void __mpatan(mp_no *, mp_no *, int);
-
-/* Multi-Precision Atan2(y,x) function subroutine, for p >= 4.    */
-/* y=0 is not permitted if x<=0. No error messages are given.     */
+/* Multi-Precision Atan2 (y, x) function subroutine, for p >= 4.
+   y = 0 is not permitted if x <= 0. No error messages are given.  */
 void
 SECTION
-__mpatan2(mp_no *y, mp_no *x, mp_no *z, int p) {
+__mpatan2 (mp_no *y, mp_no *x, mp_no *z, int p)
+{
+  mp_no mpt1, mpt2, mpt3;
 
-  mp_no mpt1,mpt2,mpt3;
-
-
-  if (X[0] <= ZERO) {
-    __dvd(x,y,&mpt1,p);          __mul(&mpt1,&mpt1,&mpt2,p);
-    if (mpt1.d[0] != ZERO)       mpt1.d[0] = ONE;
-    __add(&mpt2,&mpone,&mpt3,p); __mpsqrt(&mpt3,&mpt2,p);
-    __add(&mpt1,&mpt2,&mpt3,p);  mpt3.d[0]=Y[0];
-    __mpatan(&mpt3,&mpt1,p);     __add(&mpt1,&mpt1,z,p);
-  }
+  if (X[0] <= ZERO)
+    {
+      __dvd (x, y, &mpt1, p);
+      __mul (&mpt1, &mpt1, &mpt2, p);
+      if (mpt1.d[0] != ZERO)
+	mpt1.d[0] = ONE;
+      __add (&mpt2, &mpone, &mpt3, p);
+      __mpsqrt (&mpt3, &mpt2, p);
+      __add (&mpt1, &mpt2, &mpt3, p);
+      mpt3.d[0] = Y[0];
+      __mpatan (&mpt3, &mpt1, p);
+      __add (&mpt1, &mpt1, z, p);
+    }
   else
-  { __dvd(y,x,&mpt1,p);
-    __mpatan(&mpt1,z,p);
-  }
-
-  return;
+    {
+      __dvd (y, x, &mpt1, p);
+      __mpatan (&mpt1, z, p);
+    }
 }
diff --git a/sysdeps/ieee754/dbl-64/mpexp.c b/sysdeps/ieee754/dbl-64/mpexp.c
index 8d288ff9a1..565c6c8531 100644
--- a/sysdeps/ieee754/dbl-64/mpexp.c
+++ b/sysdeps/ieee754/dbl-64/mpexp.c
@@ -49,6 +49,7 @@ __mpexp (mp_no *x, mp_no *y, int p)
       0, 0, 0, 0, 3, 3, 4, 4, 5, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6, 6,
       6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8
     };
+
   static const int m1p[33] =
     {
       0, 0, 0, 0,
@@ -71,16 +72,7 @@ __mpexp (mp_no *x, mp_no *y, int p)
       {0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 0, 39, 43, 47, 51, 55, 59, 63},
       {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 43, 47, 50, 54}
     };
-  mp_no mpk =
-    {
-      0,
-      {
-	0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-	0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-	0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
-      }
-    };
-  mp_no mps, mpak, mpt1, mpt2;
+  mp_no mps, mpk, mpt1, mpt2;
 
   /* Choose m,n and compute a=2**(-m).  */
   n = np[p];
@@ -115,37 +107,52 @@ __mpexp (mp_no *x, mp_no *y, int p)
 	  break;
     }
 
-  /* Compute s=x*2**(-m). Put result in mps.  */
+  /* Compute s=x*2**(-m). Put result in mps.  This is the range-reduced input
+     that we will use to compute e^s.  For the final result, simply raise it
+     to 2^m.  */
   __pow_mp (-m, &mpt1, p);
   __mul (x, &mpt1, &mps, p);
 
-  /* Evaluate the polynomial. Put result in mpt2.  */
-  mpk.e = 1;
-  mpk.d[0] = ONE;
-  mpk.d[1] = n;
-  __dvd (&mps, &mpk, &mpt1, p);
-  __add (&mpone, &mpt1, &mpak, p);
-  for (k = n - 1; k > 1; k--)
+  /* Compute the Taylor series for e^s:
+
+         1 + x/1! + x^2/2! + x^3/3! ...
+
+     for N iterations.  We compute this as:
+
+         e^x = 1 + (x * n!/1! + x^2 * n!/2! + x^3 * n!/3!) / n!
+             = 1 + (x * (n!/1! + x * (n!/2! + x * (n!/3! + x ...)))) / n!
+
+     k! is computed on the fly as KF and at the end of the polynomial loop, KF
+     is n!, which can be used directly.  */
+  __cpy (&mps, &mpt2, p);
+
+  double kf = 1.0;
+
+  /* Evaluate the rest.  The result will be in mpt2.  */
+  for (k = n - 1; k > 0; k--)
     {
-      __mul (&mps, &mpak, &mpt1, p);
-      mpk.d[1] = k;
-      __dvd (&mpt1, &mpk, &mpt2, p);
-      __add (&mpone, &mpt2, &mpak, p);
+      /* n! / k! = n * (n - 1) ... * (n - k + 1) */
+      kf *= k + 1;
+
+      __dbl_mp (kf, &mpk, p);
+      __add (&mpt2, &mpk, &mpt1, p);
+      __mul (&mps, &mpt1, &mpt2, p);
     }
-  __mul (&mps, &mpak, &mpt1, p);
+  __dbl_mp (kf, &mpk, p);
+  __dvd (&mpt2, &mpk, &mpt1, p);
   __add (&mpone, &mpt1, &mpt2, p);
 
   /* Raise polynomial value to the power of 2**m. Put result in y.  */
   for (k = 0, j = 0; k < m;)
     {
-      __mul (&mpt2, &mpt2, &mpt1, p);
+      __sqr (&mpt2, &mpt1, p);
       k++;
       if (k == m)
 	{
 	  j = 1;
 	  break;
 	}
-      __mul (&mpt1, &mpt1, &mpt2, p);
+      __sqr (&mpt1, &mpt2, p);
       k++;
     }
   if (j)
diff --git a/sysdeps/ieee754/dbl-64/mplog.c b/sysdeps/ieee754/dbl-64/mplog.c
index e3d10846e2..f8d5c1095f 100644
--- a/sysdeps/ieee754/dbl-64/mplog.c
+++ b/sysdeps/ieee754/dbl-64/mplog.c
@@ -1,4 +1,3 @@
-
 /*
  * IBM Accurate Mathematical Library
  * written by International Business Machines Corp.
@@ -37,27 +36,30 @@
 #include "endian.h"
 #include "mpa.h"
 
-void __mpexp(mp_no *, mp_no *, int);
-
-void __mplog(mp_no *x, mp_no *y, int p) {
-  int i,m;
-  static const int mp[33] = {0,0,0,0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,
-                             4,4,4,4,4,4,4,4,4,4,4,4,4,4};
-  mp_no mpt1,mpt2;
+void
+__mplog (mp_no *x, mp_no *y, int p)
+{
+  int i, m;
+  static const int mp[33] =
+    {
+      0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
+      4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
+    };
+  mp_no mpt1, mpt2;
 
-  /* Choose m */
+  /* Choose m.  */
   m = mp[p];
 
-  /* Perform m newton iterations to solve for y: exp(y)-x=0.     */
-  /* The iterations formula is:  y(n+1)=y(n)+(x*exp(-y(n))-1).   */
-  __cpy(y,&mpt1,p);
-  for (i=0; i<m; i++) {
-    mpt1.d[0]=-mpt1.d[0];
-    __mpexp(&mpt1,&mpt2,p);
-    __mul(x,&mpt2,&mpt1,p);
-    __sub(&mpt1,&mpone,&mpt2,p);
-    __add(y,&mpt2,&mpt1,p);
-    __cpy(&mpt1,y,p);
-  }
-  return;
+  /* Perform m newton iterations to solve for y: exp(y) - x = 0.  The
+     iterations formula is:  y(n + 1) = y(n) + (x * exp(-y(n)) - 1).   */
+  __cpy (y, &mpt1, p);
+  for (i = 0; i < m; i++)
+    {
+      mpt1.d[0] = -mpt1.d[0];
+      __mpexp (&mpt1, &mpt2, p);
+      __mul (x, &mpt2, &mpt1, p);
+      __sub (&mpt1, &mpone, &mpt2, p);
+      __add (y, &mpt2, &mpt1, p);
+      __cpy (&mpt1, y, p);
+    }
 }
diff --git a/sysdeps/ieee754/dbl-64/mpsqrt.c b/sysdeps/ieee754/dbl-64/mpsqrt.c
index 65df9fd067..230d1f36e8 100644
--- a/sysdeps/ieee754/dbl-64/mpsqrt.c
+++ b/sysdeps/ieee754/dbl-64/mpsqrt.c
@@ -45,33 +45,37 @@
 /* p as integer. Routine computes sqrt(*x) and stores result in *y          */
 /****************************************************************************/
 
-static double fastiroot(double);
+static double fastiroot (double);
 
 void
 SECTION
-__mpsqrt(mp_no *x, mp_no *y, int p) {
-  int i,m,ey;
-  double dx,dy;
-  static const mp_no
-    mphalf   = {0,{1.0,8388608.0 /* 2^23 */}},
-    mp3halfs = {1,{1.0,1.0,8388608.0 /* 2^23 */}};
-  mp_no mpxn,mpz,mpu,mpt1,mpt2;
+__mpsqrt (mp_no *x, mp_no *y, int p)
+{
+  int i, m, ey;
+  double dx, dy;
+  static const mp_no mphalf = {0, {1.0, HALFRAD}};
+  static const mp_no mp3halfs = {1, {1.0, 1.0, HALFRAD}};
+  mp_no mpxn, mpz, mpu, mpt1, mpt2;
 
-  ey=EX/2;     __cpy(x,&mpxn,p);    mpxn.e -= (ey+ey);
-  __mp_dbl(&mpxn,&dx,p);   dy=fastiroot(dx);    __dbl_mp(dy,&mpu,p);
-  __mul(&mpxn,&mphalf,&mpz,p);
+  ey = EX / 2;
+  __cpy (x, &mpxn, p);
+  mpxn.e -= (ey + ey);
+  __mp_dbl (&mpxn, &dx, p);
+  dy = fastiroot (dx);
+  __dbl_mp (dy, &mpu, p);
+  __mul (&mpxn, &mphalf, &mpz, p);
 
-  m=__mpsqrt_mp[p];
-  for (i=0; i<m; i++) {
-    __mul(&mpu,&mpu,&mpt1,p);
-    __mul(&mpt1,&mpz,&mpt2,p);
-    __sub(&mp3halfs,&mpt2,&mpt1,p);
-    __mul(&mpu,&mpt1,&mpt2,p);
-    __cpy(&mpt2,&mpu,p);
-  }
-  __mul(&mpxn,&mpu,y,p);  EY += ey;
-
-  return;
+  m = __mpsqrt_mp[p];
+  for (i = 0; i < m; i++)
+    {
+      __sqr (&mpu, &mpt1, p);
+      __mul (&mpt1, &mpz, &mpt2, p);
+      __sub (&mp3halfs, &mpt2, &mpt1, p);
+      __mul (&mpu, &mpt1, &mpt2, p);
+      __cpy (&mpt2, &mpu, p);
+    }
+  __mul (&mpxn, &mpu, y, p);
+  EY += ey;
 }
 
 /***********************************************************/
@@ -80,22 +84,28 @@ __mpsqrt(mp_no *x, mp_no *y, int p) {
 /***********************************************************/
 static double
 SECTION
-fastiroot(double x) {
-  union {int i[2]; double d;} p,q;
-  double y,z, t;
+fastiroot (double x)
+{
+  union
+  {
+    int i[2];
+    double d;
+  } p, q;
+  double y, z, t;
   int n;
-  static const double c0 = 0.99674, c1 = -0.53380, c2 = 0.45472, c3 = -0.21553;
+  static const double c0 = 0.99674, c1 = -0.53380;
+  static const double c2 = 0.45472, c3 = -0.21553;
 
   p.d = x;
-  p.i[HIGH_HALF] = (p.i[HIGH_HALF] & 0x3FFFFFFF ) | 0x3FE00000 ;
+  p.i[HIGH_HALF] = (p.i[HIGH_HALF] & 0x3FFFFFFF) | 0x3FE00000;
   q.d = x;
   y = p.d;
-  z = y -1.0;
-  n = (q.i[HIGH_HALF] - p.i[HIGH_HALF])>>1;
-  z = ((c3*z + c2)*z + c1)*z + c0;            /* 2**-7         */
-  z = z*(1.5 - 0.5*y*z*z);                    /* 2**-14        */
-  p.d = z*(1.5 - 0.5*y*z*z);                  /* 2**-28        */
+  z = y - 1.0;
+  n = (q.i[HIGH_HALF] - p.i[HIGH_HALF]) >> 1;
+  z = ((c3 * z + c2) * z + c1) * z + c0;	/* 2**-7         */
+  z = z * (1.5 - 0.5 * y * z * z);		/* 2**-14        */
+  p.d = z * (1.5 - 0.5 * y * z * z);		/* 2**-28        */
   p.i[HIGH_HALF] -= n;
-  t = x*p.d;
-  return p.d*(1.5 - 0.5*p.d*t);
+  t = x * p.d;
+  return p.d * (1.5 - 0.5 * p.d * t);
 }
diff --git a/sysdeps/ieee754/dbl-64/mptan.c b/sysdeps/ieee754/dbl-64/mptan.c
index 234108e373..51b5718e73 100644
--- a/sysdeps/ieee754/dbl-64/mptan.c
+++ b/sysdeps/ieee754/dbl-64/mptan.c
@@ -40,23 +40,25 @@
 # define SECTION
 #endif
 
-int __mpranred(double, mp_no *, int);
-void __c32(mp_no *, mp_no *, mp_no *, int);
-
 void
 SECTION
-__mptan(double x, mp_no *mpy, int p) {
+__mptan (double x, mp_no *mpy, int p)
+{
 
   int n;
   mp_no mpw, mpc, mps;
 
-  n = __mpranred(x, &mpw, p) & 0x00000001; /* negative or positive result */
-  __c32(&mpw, &mpc, &mps, p);              /* computing sin(x) and cos(x) */
-  if (n)                     /* second or fourth quarter of unit circle */
-  { __dvd(&mpc,&mps,mpy,p);
-    mpy->d[0] *= MONE;
-  }                          /* tan is negative in this area */
-  else  __dvd(&mps,&mpc,mpy,p);
-
-  return;
+  /* Negative or positive result.  */
+  n = __mpranred (x, &mpw, p) & 0x00000001;
+  /* Computing sin(x) and cos(x).  */
+  __c32 (&mpw, &mpc, &mps, p);
+  /* Second or fourth quarter of unit circle.  */
+  if (n)
+    {
+      __dvd (&mpc, &mps, mpy, p);
+      mpy->d[0] *= MONE;
+    }
+  /* tan is negative in this area.  */
+  else
+    __dvd (&mps, &mpc, mpy, p);
 }
diff --git a/sysdeps/ieee754/dbl-64/s_atan.c b/sysdeps/ieee754/dbl-64/s_atan.c
index 37442b72e8..aa3564d560 100644
--- a/sysdeps/ieee754/dbl-64/s_atan.c
+++ b/sysdeps/ieee754/dbl-64/s_atan.c
@@ -62,18 +62,9 @@ double atan(double x) {
 #ifndef DLA_FMS
   double t4,t5,t6;
 #endif
-#if 0
-  double y1,y2;
-#endif
   int i,ux,dx;
-#if 0
-  int p;
-#endif
   static const int pr[M]={6,8,10,32};
   number num;
-#if 0
-  mp_no mpt1,mpx,mpy,mpy1,mpy2,mperr;
-#endif
 
   num.d = x;  ux = num.i[HIGH_HALF];  dx = num.i[LOW_HALF];
 
diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c
index 496f1e1917..5038b72612 100644
--- a/sysdeps/ieee754/dbl-64/s_sin.c
+++ b/sysdeps/ieee754/dbl-64/s_sin.c
@@ -100,14 +100,8 @@ double
 SECTION
 __sin(double x){
 	double xx,res,t,cor,y,s,c,sn,ssn,cs,ccs,xn,a,da,db,eps,xn1,xn2;
-#if 0
-	double w[2];
-#endif
 	mynumber u,v;
 	int4 k,m,n;
-#if 0
-	int4 nn;
-#endif
 	double retval = 0;
 
 	SET_RESTORE_ROUND_53BIT (FE_TONEAREST);
@@ -902,10 +896,6 @@ SECTION
 bsloww(double x,double dx, double orig,int n) {
   static const double th2_36 = 206158430208.0;   /*    1.5*2**37   */
   double y,x1,x2,xx,r,t,res,cor,w[2];
-#if 0
-  double a,da,xn;
-  union {int4 i[2]; double x;} v;
-#endif
   x1=(x+th2_36)-th2_36;
   y = aa.x*x1*x1*x1;
   r=x+y;
diff --git a/sysdeps/ieee754/dbl-64/s_tan.c b/sysdeps/ieee754/dbl-64/s_tan.c
index c5446a6e56..faa5221e5e 100644
--- a/sysdeps/ieee754/dbl-64/s_tan.c
+++ b/sysdeps/ieee754/dbl-64/s_tan.c
@@ -64,9 +64,6 @@ tan(double x) {
   int p;
   number num,v;
   mp_no mpa,mpt1,mpt2;
-#if 0
-  mp_no mpy;
-#endif
 
   double retval;
 
diff --git a/sysdeps/ieee754/dbl-64/sincos32.c b/sysdeps/ieee754/dbl-64/sincos32.c
index 6c5ffded52..954db66d6b 100644
--- a/sysdeps/ieee754/dbl-64/sincos32.c
+++ b/sysdeps/ieee754/dbl-64/sincos32.c
@@ -57,17 +57,10 @@ SECTION
 ss32(mp_no *x, mp_no *y, int p) {
   int i;
   double a;
-#if 0
-  double b;
-  static const mp_no mpone = {1,{1.0,1.0}};
-#endif
   mp_no mpt1,x2,gor,sum ,mpk={1,{1.0}};
-#if 0
-  mp_no mpt2;
-#endif
   for (i=1;i<=p;i++) mpk.d[i]=0;
 
-  __mul(x,x,&x2,p);
+  __sqr(x,&x2,p);
   __cpy(&oofac27,&gor,p);
   __cpy(&gor,&sum,p);
   for (a=27.0;a>1.0;a-=2.0) {
@@ -89,17 +82,10 @@ SECTION
 cc32(mp_no *x, mp_no *y, int p) {
   int i;
   double a;
-#if 0
-  double b;
-  static const mp_no mpone = {1,{1.0,1.0}};
-#endif
   mp_no mpt1,x2,gor,sum ,mpk={1,{1.0}};
-#if 0
-  mp_no mpt2;
-#endif
   for (i=1;i<=p;i++) mpk.d[i]=0;
 
-  __mul(x,x,&x2,p);
+  __sqr(x,&x2,p);
   mpk.d[1]=27.0;
   __mul(&oofac27,&mpk,&gor,p);
   __cpy(&gor,&sum,p);
@@ -119,7 +105,6 @@ cc32(mp_no *x, mp_no *y, int p) {
 void
 SECTION
 __c32(mp_no *x, mp_no *y, mp_no *z, int p) {
-  static const mp_no mpt={1,{1.0,2.0}}, one={1,{1.0,1.0}};
   mp_no u,t,t1,t2,c,s;
   int i;
   __cpy(x,&u,p);
@@ -130,11 +115,11 @@ __c32(mp_no *x, mp_no *y, mp_no *z, int p) {
     __mul(&c,&s,&t,p);
     __sub(&s,&t,&t1,p);
     __add(&t1,&t1,&s,p);
-    __sub(&mpt,&c,&t1,p);
+    __sub(&mptwo,&c,&t1,p);
     __mul(&t1,&c,&t2,p);
     __add(&t2,&t2,&c,p);
   }
-  __sub(&one,&c,y,p);
+  __sub(&mpone,&c,y,p);
   __cpy(&s,z,p);
 }
 
@@ -251,7 +236,6 @@ __mpranred(double x, mp_no *y, int p)
   number v;
   double t,xn;
   int i,k,n;
-  static const mp_no one = {1,{1.0,1.0}};
   mp_no a,b,c;
 
   if (ABS(x) < 2.8e14) {
@@ -278,9 +262,9 @@ __mpranred(double x, mp_no *y, int p)
     for (i=1;i<=p-c.e;i++) c.d[i]=c.d[i+c.e];
     for (i=p+1-c.e;i<=p;i++) c.d[i]=0;
     c.e=0;
-    if (c.d[1] >=  8388608.0)
+    if (c.d[1] >= HALFRAD)
     { t +=1.0;
-      __sub(&c,&one,&b,p);
+      __sub(&c,&mpone,&b,p);
       __mul(&b,&hp,y,p);
     }
     else __mul(&c,&hp,y,p);
diff --git a/sysdeps/ieee754/dbl-64/slowexp.c b/sysdeps/ieee754/dbl-64/slowexp.c
index 34ca3275eb..8f353f634f 100644
--- a/sysdeps/ieee754/dbl-64/slowexp.c
+++ b/sysdeps/ieee754/dbl-64/slowexp.c
@@ -27,45 +27,49 @@
 /*Converting from double precision to Multi-precision and calculating     */
 /* e^x                                                                    */
 /**************************************************************************/
-#include "mpa.h"
 #include <math_private.h>
 
+#ifndef USE_LONG_DOUBLE_FOR_MP
+# include "mpa.h"
+void __mpexp (mp_no *x, mp_no *y, int p);
+#endif
+
 #ifndef SECTION
 # define SECTION
 #endif
 
-void __mpexp(mp_no *x, mp_no *y, int p);
-
 /*Converting from double precision to Multi-precision and calculating  e^x */
 double
 SECTION
-__slowexp(double x) {
-  double w,z,res,eps=3.0e-26;
-#if 0
-  double y;
-#endif
+__slowexp (double x)
+{
+#ifndef USE_LONG_DOUBLE_FOR_MP
+  double w, z, res, eps = 3.0e-26;
   int p;
-#if 0
-  int orig,i;
-#endif
-  mp_no mpx, mpy, mpz,mpw,mpeps,mpcor;
+  mp_no mpx, mpy, mpz, mpw, mpeps, mpcor;
 
-  p=6;
-  __dbl_mp(x,&mpx,p); /* Convert a double precision number  x               */
-		    /* into a multiple precision number mpx with prec. p. */
-  __mpexp(&mpx, &mpy, p); /* Multi-Precision exponential function */
-  __dbl_mp(eps,&mpeps,p);
-  __mul(&mpeps,&mpy,&mpcor,p);
-  __add(&mpy,&mpcor,&mpw,p);
-  __sub(&mpy,&mpcor,&mpz,p);
-  __mp_dbl(&mpw, &w, p);
-  __mp_dbl(&mpz, &z, p);
-  if (w == z) return w;
-  else  {                   /* if calculating is not exactly   */
-    p = 32;
-    __dbl_mp(x,&mpx,p);
-    __mpexp(&mpx, &mpy, p);
-    __mp_dbl(&mpy, &res, p);
-    return res;
-  }
+  /* Use the multiple precision __MPEXP function to compute the exponential
+     First at 144 bits and if it is not accurate enough, at 768 bits.  */
+  p = 6;
+  __dbl_mp (x, &mpx, p);
+  __mpexp (&mpx, &mpy, p);
+  __dbl_mp (eps, &mpeps, p);
+  __mul (&mpeps, &mpy, &mpcor, p);
+  __add (&mpy, &mpcor, &mpw, p);
+  __sub (&mpy, &mpcor, &mpz, p);
+  __mp_dbl (&mpw, &w, p);
+  __mp_dbl (&mpz, &z, p);
+  if (w == z)
+    return w;
+  else
+    {
+      p = 32;
+      __dbl_mp (x, &mpx, p);
+      __mpexp (&mpx, &mpy, p);
+      __mp_dbl (&mpy, &res, p);
+      return res;
+    }
+#else
+  return (double) __ieee754_expl((long double)x);
+#endif
 }
diff --git a/sysdeps/ieee754/dbl-64/slowpow.c b/sysdeps/ieee754/dbl-64/slowpow.c
index c303eaa5af..a379728b14 100644
--- a/sysdeps/ieee754/dbl-64/slowpow.c
+++ b/sysdeps/ieee754/dbl-64/slowpow.c
@@ -38,42 +38,76 @@
 # define SECTION
 #endif
 
-void __mpexp(mp_no *x, mp_no *y, int p);
-void __mplog(mp_no *x, mp_no *y, int p);
-double ulog(double);
-double __halfulp(double x,double y);
+void __mpexp (mp_no *x, mp_no *y, int p);
+void __mplog (mp_no *x, mp_no *y, int p);
+double ulog (double);
+double __halfulp (double x, double y);
 
 double
 SECTION
-__slowpow(double x, double y, double z) {
-  double res,res1;
-  mp_no mpx, mpy, mpz,mpw,mpp,mpr,mpr1;
-  static const mp_no eps = {-3,{1.0,4.0}};
+__slowpow (double x, double y, double z)
+{
+  double res, res1;
+  mp_no mpx, mpy, mpz, mpw, mpp, mpr, mpr1;
+  static const mp_no eps = {-3, {1.0, 4.0}};
   int p;
 
-  res = __halfulp(x,y);        /* halfulp() returns -10 or x^y             */
-  if (res >= 0) return res;  /* if result was really computed by halfulp */
-		  /*  else, if result was not really computed by halfulp */
-  p = 10;         /*  p=precision   */
-  __dbl_mp(x,&mpx,p);
-  __dbl_mp(y,&mpy,p);
-  __dbl_mp(z,&mpz,p);
-  __mplog(&mpx, &mpz, p);     /* log(x) = z   */
-  __mul(&mpy,&mpz,&mpw,p);    /*  y * z =w    */
-  __mpexp(&mpw, &mpp, p);     /*  e^w =pp     */
-  __add(&mpp,&eps,&mpr,p);    /*  pp+eps =r   */
-  __mp_dbl(&mpr, &res, p);
-  __sub(&mpp,&eps,&mpr1,p);   /*  pp -eps =r1 */
-  __mp_dbl(&mpr1, &res1, p);  /*  converting into double precision */
-  if (res == res1) return res;
+  /* __HALFULP returns -10 or X^Y.  */
+  res = __halfulp (x, y);
 
-  p = 32;     /* if we get here result wasn't calculated exactly, continue */
-  __dbl_mp(x,&mpx,p);                          /* for more exact calculation */
-  __dbl_mp(y,&mpy,p);
-  __dbl_mp(z,&mpz,p);
-  __mplog(&mpx, &mpz, p);   /* log(c)=z  */
-  __mul(&mpy,&mpz,&mpw,p);  /* y*z =w    */
-  __mpexp(&mpw, &mpp, p);   /* e^w=pp    */
-  __mp_dbl(&mpp, &res, p);  /* converting into double precision */
+  /* Return if the result was computed by __HALFULP.  */
+  if (res >= 0)
+    return res;
+
+  /* Compute pow as long double.  This is currently only used by powerpc, where
+     one may get 106 bits of accuracy.  */
+#ifdef USE_LONG_DOUBLE_FOR_MP
+  long double ldw, ldz, ldpp;
+  static const long double ldeps = 0x4.0p-96;
+
+  ldz = __ieee754_logl ((long double) x);
+  ldw = (long double) y *ldz;
+  ldpp = __ieee754_expl (ldw);
+  res = (double) (ldpp + ldeps);
+  res1 = (double) (ldpp - ldeps);
+
+  /* Return the result if it is accurate enough.  */
+  if (res == res1)
+    return res;
+#endif
+
+  /* Or else, calculate using multiple precision.  P = 10 implies accuracy of
+     240 bits accuracy, since MP_NO has a radix of 2^24.  */
+  p = 10;
+  __dbl_mp (x, &mpx, p);
+  __dbl_mp (y, &mpy, p);
+  __dbl_mp (z, &mpz, p);
+
+  /* z = x ^ y
+     log (z) = y * log (x)
+     z = exp (y * log (x))  */
+  __mplog (&mpx, &mpz, p);
+  __mul (&mpy, &mpz, &mpw, p);
+  __mpexp (&mpw, &mpp, p);
+
+  /* Add and subtract EPS to ensure that the result remains unchanged, i.e. we
+     have last bit accuracy.  */
+  __add (&mpp, &eps, &mpr, p);
+  __mp_dbl (&mpr, &res, p);
+  __sub (&mpp, &eps, &mpr1, p);
+  __mp_dbl (&mpr1, &res1, p);
+  if (res == res1)
+    return res;
+
+  /* If we don't, then we repeat using a higher precision.  768 bits of
+     precision ought to be enough for anybody.  */
+  p = 32;
+  __dbl_mp (x, &mpx, p);
+  __dbl_mp (y, &mpy, p);
+  __dbl_mp (z, &mpz, p);
+  __mplog (&mpx, &mpz, p);
+  __mul (&mpy, &mpz, &mpw, p);
+  __mpexp (&mpw, &mpp, p);
+  __mp_dbl (&mpp, &res, p);
   return res;
 }
diff --git a/sysdeps/ieee754/dbl-64/x2y2m1.c b/sysdeps/ieee754/dbl-64/x2y2m1.c
index 0b73f0a2ee..d36a950e36 100644
--- a/sysdeps/ieee754/dbl-64/x2y2m1.c
+++ b/sysdeps/ieee754/dbl-64/x2y2m1.c
@@ -37,7 +37,7 @@ add_split (double *hi, double *lo, double x, double y)
    given that the values are small enough that no overflow occurs and
    large enough (or zero) that no underflow occurs.  */
 
-static inline void
+static void
 mul_split (double *hi, double *lo, double x, double y)
 {
 #ifdef __FP_FAST_FMA
diff --git a/sysdeps/ieee754/ldbl-128/e_j0l.c b/sysdeps/ieee754/ldbl-128/e_j0l.c
index 1b18289588..108eff4435 100644
--- a/sysdeps/ieee754/ldbl-128/e_j0l.c
+++ b/sysdeps/ieee754/ldbl-128/e_j0l.c
@@ -93,6 +93,7 @@
 
 #include <math.h>
 #include <math_private.h>
+#include <float.h>
 
 /* 1 / sqrt(pi) */
 static const long double ONEOSQPI = 5.6418958354775628694807945156077258584405E-1L;
@@ -700,6 +701,28 @@ __ieee754_j0l (long double x)
       return p;
     }
 
+  /* X = x - pi/4
+     cos(X) = cos(x) cos(pi/4) + sin(x) sin(pi/4)
+     = 1/sqrt(2) * (cos(x) + sin(x))
+     sin(X) = sin(x) cos(pi/4) - cos(x) sin(pi/4)
+     = 1/sqrt(2) * (sin(x) - cos(x))
+     sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
+     cf. Fdlibm.  */
+  __sincosl (xx, &s, &c);
+  ss = s - c;
+  cc = s + c;
+  if (xx <= LDBL_MAX / 2.0L)
+    {
+      z = -__cosl (xx + xx);
+      if ((s * c) < 0)
+	cc = z / ss;
+      else
+	ss = z / cc;
+    }
+
+  if (xx > 0x1p256L)
+    return ONEOSQPI * cc / __ieee754_sqrtl (xx);
+
   xinv = 1.0L / xx;
   z = xinv * xinv;
   if (xinv <= 0.25)
@@ -761,21 +784,6 @@ __ieee754_j0l (long double x)
   p = 1.0L + z * p;
   q = z * xinv * q;
   q = q - 0.125L * xinv;
-  /* X = x - pi/4
-     cos(X) = cos(x) cos(pi/4) + sin(x) sin(pi/4)
-     = 1/sqrt(2) * (cos(x) + sin(x))
-     sin(X) = sin(x) cos(pi/4) - cos(x) sin(pi/4)
-     = 1/sqrt(2) * (sin(x) - cos(x))
-     sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
-     cf. Fdlibm.  */
-  __sincosl (xx, &s, &c);
-  ss = s - c;
-  cc = s + c;
-  z = -__cosl (xx + xx);
-  if ((s * c) < 0)
-    cc = z / ss;
-  else
-    ss = z / cc;
   z = ONEOSQPI * (p * cc - q * ss) / __ieee754_sqrtl (xx);
   return z;
 }
@@ -843,6 +851,28 @@ long double
       return p;
     }
 
+  /* X = x - pi/4
+     cos(X) = cos(x) cos(pi/4) + sin(x) sin(pi/4)
+     = 1/sqrt(2) * (cos(x) + sin(x))
+     sin(X) = sin(x) cos(pi/4) - cos(x) sin(pi/4)
+     = 1/sqrt(2) * (sin(x) - cos(x))
+     sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
+     cf. Fdlibm.  */
+  __sincosl (x, &s, &c);
+  ss = s - c;
+  cc = s + c;
+  if (xx <= LDBL_MAX / 2.0L)
+    {
+      z = -__cosl (x + x);
+      if ((s * c) < 0)
+	cc = z / ss;
+      else
+	ss = z / cc;
+    }
+
+  if (xx > 0x1p256L)
+    return ONEOSQPI * ss / __ieee754_sqrtl (x);
+
   xinv = 1.0L / xx;
   z = xinv * xinv;
   if (xinv <= 0.25)
@@ -904,21 +934,6 @@ long double
   p = 1.0L + z * p;
   q = z * xinv * q;
   q = q - 0.125L * xinv;
-  /* X = x - pi/4
-     cos(X) = cos(x) cos(pi/4) + sin(x) sin(pi/4)
-     = 1/sqrt(2) * (cos(x) + sin(x))
-     sin(X) = sin(x) cos(pi/4) - cos(x) sin(pi/4)
-     = 1/sqrt(2) * (sin(x) - cos(x))
-     sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
-     cf. Fdlibm.  */
-  __sincosl (x, &s, &c);
-  ss = s - c;
-  cc = s + c;
-  z = -__cosl (x + x);
-  if ((s * c) < 0)
-    cc = z / ss;
-  else
-    ss = z / cc;
   z = ONEOSQPI * (p * ss + q * cc) / __ieee754_sqrtl (x);
   return z;
 }
diff --git a/sysdeps/ieee754/ldbl-128/e_j1l.c b/sysdeps/ieee754/ldbl-128/e_j1l.c
index f16343b26b..70a1c86fd2 100644
--- a/sysdeps/ieee754/ldbl-128/e_j1l.c
+++ b/sysdeps/ieee754/ldbl-128/e_j1l.c
@@ -97,6 +97,7 @@
 
 #include <math.h>
 #include <math_private.h>
+#include <float.h>
 
 /* 1 / sqrt(pi) */
 static const long double ONEOSQPI = 5.6418958354775628694807945156077258584405E-1L;
@@ -706,6 +707,32 @@ __ieee754_j1l (long double x)
       return p;
     }
 
+  /* X = x - 3 pi/4
+     cos(X) = cos(x) cos(3 pi/4) + sin(x) sin(3 pi/4)
+     = 1/sqrt(2) * (-cos(x) + sin(x))
+     sin(X) = sin(x) cos(3 pi/4) - cos(x) sin(3 pi/4)
+     = -1/sqrt(2) * (sin(x) + cos(x))
+     cf. Fdlibm.  */
+  __sincosl (xx, &s, &c);
+  ss = -s - c;
+  cc = s - c;
+  if (xx <= LDBL_MAX / 2.0L)
+    {
+      z = __cosl (xx + xx);
+      if ((s * c) > 0)
+	cc = z / ss;
+      else
+	ss = z / cc;
+    }
+
+  if (xx > 0x1p256L)
+    {
+      z = ONEOSQPI * cc / __ieee754_sqrtl (xx);
+      if (x < 0)
+	z = -z;
+      return z;
+    }
+
   xinv = 1.0L / xx;
   z = xinv * xinv;
   if (xinv <= 0.25)
@@ -767,20 +794,6 @@ __ieee754_j1l (long double x)
   p = 1.0L + z * p;
   q = z * q;
   q = q * xinv + 0.375L * xinv;
-  /* X = x - 3 pi/4
-     cos(X) = cos(x) cos(3 pi/4) + sin(x) sin(3 pi/4)
-     = 1/sqrt(2) * (-cos(x) + sin(x))
-     sin(X) = sin(x) cos(3 pi/4) - cos(x) sin(3 pi/4)
-     = -1/sqrt(2) * (sin(x) + cos(x))
-     cf. Fdlibm.  */
-  __sincosl (xx, &s, &c);
-  ss = -s - c;
-  cc = s - c;
-  z = __cosl (xx + xx);
-  if ((s * c) > 0)
-    cc = z / ss;
-  else
-    ss = z / cc;
   z = ONEOSQPI * (p * cc - q * ss) / __ieee754_sqrtl (xx);
   if (x < 0)
     z = -z;
@@ -850,6 +863,27 @@ __ieee754_y1l (long double x)
       return p;
     }
 
+  /* X = x - 3 pi/4
+     cos(X) = cos(x) cos(3 pi/4) + sin(x) sin(3 pi/4)
+     = 1/sqrt(2) * (-cos(x) + sin(x))
+     sin(X) = sin(x) cos(3 pi/4) - cos(x) sin(3 pi/4)
+     = -1/sqrt(2) * (sin(x) + cos(x))
+     cf. Fdlibm.  */
+  __sincosl (xx, &s, &c);
+  ss = -s - c;
+  cc = s - c;
+  if (xx <= LDBL_MAX / 2.0L)
+    {
+      z = __cosl (xx + xx);
+      if ((s * c) > 0)
+	cc = z / ss;
+      else
+	ss = z / cc;
+    }
+
+  if (xx > 0x1p256L)
+    return ONEOSQPI * ss / __ieee754_sqrtl (xx);
+
   xinv = 1.0L / xx;
   z = xinv * xinv;
   if (xinv <= 0.25)
@@ -911,20 +945,6 @@ __ieee754_y1l (long double x)
   p = 1.0L + z * p;
   q = z * q;
   q = q * xinv + 0.375L * xinv;
-  /* X = x - 3 pi/4
-     cos(X) = cos(x) cos(3 pi/4) + sin(x) sin(3 pi/4)
-     = 1/sqrt(2) * (-cos(x) + sin(x))
-     sin(X) = sin(x) cos(3 pi/4) - cos(x) sin(3 pi/4)
-     = -1/sqrt(2) * (sin(x) + cos(x))
-     cf. Fdlibm.  */
-  __sincosl (xx, &s, &c);
-  ss = -s - c;
-  cc = s - c;
-  z = __cosl (xx + xx);
-  if ((s * c) > 0)
-    cc = z / ss;
-  else
-    ss = z / cc;
   z = ONEOSQPI * (p * ss + q * cc) / __ieee754_sqrtl (xx);
   return z;
 }
diff --git a/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c b/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c
index 117bd0f052..abc78a35bd 100644
--- a/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c
+++ b/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c
@@ -52,7 +52,7 @@ __ieee754_acoshl(long double x)
 	    return __ieee754_logl(2.0*x-one/(x+__ieee754_sqrtl(t-one)));
 	} else {			/* 1<x<2 */
 	    t = x-one;
-	    return __log1p(t+__sqrtl(2.0*t+t*t));
+	    return __log1p(t+__ieee754_sqrtl(2.0*t+t*t));
 	}
 }
 strong_alias (__ieee754_acoshl, __acoshl_finite)
diff --git a/sysdeps/ieee754/ldbl-128ibm/e_expl.c b/sysdeps/ieee754/ldbl-128ibm/e_expl.c
index 82363906b0..9fd61983e3 100644
--- a/sysdeps/ieee754/ldbl-128ibm/e_expl.c
+++ b/sysdeps/ieee754/ldbl-128ibm/e_expl.c
@@ -70,11 +70,11 @@
 static const long double C[] = {
 /* Smallest integer x for which e^x overflows.  */
 #define himark C[0]
- 709.08956571282405153382846025171462914L,
+ 709.78271289338399678773454114191496482L,
 
 /* Largest integer x for which e^x underflows.  */
 #define lomark C[1]
--744.44007192138121808966388925909996033L,
+-744.44007192138126231410729844608163411L,
 
 /* 3x2^96 */
 #define THREEp96 C[2]
diff --git a/sysdeps/ieee754/ldbl-128ibm/e_logl.c b/sysdeps/ieee754/ldbl-128ibm/e_logl.c
index 14f47ebade..15b5edfab3 100644
--- a/sysdeps/ieee754/ldbl-128ibm/e_logl.c
+++ b/sysdeps/ieee754/ldbl-128ibm/e_logl.c
@@ -182,6 +182,9 @@ static const long double
   ln2a = 6.93145751953125e-1L,
   ln2b = 1.4286068203094172321214581765680755001344E-6L;
 
+static const long double
+  ldbl_epsilon = 0x1p-106L;
+
 long double
 __ieee754_logl(long double x)
 {
@@ -258,7 +261,12 @@ __ieee754_logl(long double x)
     }
   /* Series expansion of log(1+z).  */
   w = z * z;
-  y = ((((((((((((l15 * z
+  /* Avoid spurious underflows.  */
+  if (__glibc_unlikely(w <= ldbl_epsilon))
+    y = 0.0L;
+  else
+    {
+      y = ((((((((((((l15 * z
 		  + l14) * z
 		 + l13) * z
 		+ l12) * z
@@ -271,7 +279,8 @@ __ieee754_logl(long double x)
 	 + l5) * z
 	+ l4) * z
        + l3) * z * w;
-  y -= 0.5 * w;
+      y -= 0.5 * w;
+    }
   y += e * ln2b;  /* Base 2 exponent offset times ln(2).  */
   y += z;
   y += logtbl[k-26]; /* log(t) - (t-1) */
diff --git a/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h b/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h
index be9ac71cb0..1cce1fc4dc 100644
--- a/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h
+++ b/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h
@@ -125,7 +125,7 @@ ldbl_insert_mantissa (int sign, int exp, int64_t hi64, u_int64_t lo64)
 /* Handy utility functions to pack/unpack/cononicalize and find the nearbyint
    of long double implemented as double double.  */
 static inline long double
-ldbl_pack (double a, double aa)
+default_ldbl_pack (double a, double aa)
 {
   union ibm_extended_long_double u;
   u.dd[0] = a;
@@ -134,7 +134,7 @@ ldbl_pack (double a, double aa)
 }
 
 static inline void
-ldbl_unpack (long double l, double *a, double *aa)
+default_ldbl_unpack (long double l, double *a, double *aa)
 {
   union ibm_extended_long_double u;
   u.d = l;
@@ -142,6 +142,12 @@ ldbl_unpack (long double l, double *a, double *aa)
   *aa = u.dd[1];
 }
 
+#ifndef ldbl_pack
+# define ldbl_pack   default_ldbl_pack
+#endif
+#ifndef ldbl_unpack
+# define ldbl_unpack default_ldbl_unpack
+#endif
 
 /* Convert a finite long double to canonical form.
    Does not handle +/-Inf properly.  */
diff --git a/sysdeps/ieee754/ldbl-128ibm/w_expl.c b/sysdeps/ieee754/ldbl-128ibm/w_expl.c
index a5e72b2170..70fe5f693e 100644
--- a/sysdeps/ieee754/ldbl-128ibm/w_expl.c
+++ b/sysdeps/ieee754/ldbl-128ibm/w_expl.c
@@ -1,6 +1,24 @@
-/* Looks like we can use ieee854 w_expl.c as is for IBM extended format. */
+#include <math.h>
+#include <math_private.h>
 #include <math_ldbl_opt.h>
-#undef weak_alias
-#define weak_alias(n,a)
-#include <sysdeps/ieee754/ldbl-128/w_expl.c>
+
+static const long double o_thres = 709.78271289338399678773454114191496482L;
+static const long double u_thres = -744.44007192138126231410729844608163411L;
+
+long double __expl(long double x)	/* wrapper exp  */
+{
+  long double z;
+  z = __ieee754_expl(x);
+  if (_LIB_VERSION == _IEEE_)
+    return z;
+  if (__finitel(x))
+    {
+      if (x >= o_thres)
+	return __kernel_standard_l(x,x,206); /* exp overflow  */
+      else if (x <= u_thres)
+	return __kernel_standard_l(x,x,207); /* exp underflow  */
+    }
+  return z;
+}
+hidden_def (__expl)
 long_double_symbol (libm, __expl, expl);
diff --git a/sysdeps/ieee754/ldbl-96/e_j1l.c b/sysdeps/ieee754/ldbl-96/e_j1l.c
index 785c0b0676..4c13018aea 100644
--- a/sysdeps/ieee754/ldbl-96/e_j1l.c
+++ b/sysdeps/ieee754/ldbl-96/e_j1l.c
@@ -203,7 +203,7 @@ __ieee754_y1l (long double x)
       __sincosl (x, &s, &c);
       ss = -s - c;
       cc = s - c;
-      if (ix < 0x7fe00000)
+      if (ix < 0x7ffe)
 	{			/* make sure x+x not overflow */
 	  z = __cosl (x + x);
 	  if ((s * c) > zero)