summaryrefslogtreecommitdiff
path: root/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c
diff options
context:
space:
mode:
authorSiddhesh Poyarekar <siddhesh@redhat.com>2013-02-14 10:31:09 +0530
committerSiddhesh Poyarekar <siddhesh@redhat.com>2013-02-14 10:31:09 +0530
commitd6752ccd696c71d23cd3df8fb9cc60b61c32e65a (patch)
tree7fc0fe0151bc004056a5002c1b81c92159988085 /sysdeps/powerpc/powerpc64/power4/fpu/mpa.c
parent70d9946a44ba381f81eb08c71cc150315cc112ad (diff)
New __sqr function as a faster special case of __mul
Diffstat (limited to 'sysdeps/powerpc/powerpc64/power4/fpu/mpa.c')
-rw-r--r--sysdeps/powerpc/powerpc64/power4/fpu/mpa.c100
1 files changed, 100 insertions, 0 deletions
diff --git a/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c b/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c
index b1784f27c3..7ebf50b95d 100644
--- a/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c
+++ b/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c
@@ -687,6 +687,106 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
return;
}
+/* Square *X and store result in *Y. X and Y may not overlap. For P in
+ [1, 2, 3], the exact result is truncated to P digits. In case P > 3 the
+ error is bounded by 1.001 ULP. This is a faster special case of
+ multiplication. */
+void
+__sqr (const mp_no *x, mp_no *y, int p)
+{
+ long i, j, k, ip;
+ double u, yk;
+
+ /* Is z=0? */
+ if (__glibc_unlikely (X[0] == ZERO))
+ {
+ Y[0] = ZERO;
+ return;
+ }
+
+ /* We need not iterate through all X's since it's pointless to
+ multiply zeroes. */
+ for (ip = p; ip > 0; ip--)
+ if (X[ip] != ZERO)
+ break;
+
+ k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
+
+ while (k > 2 * ip + 1)
+ Y[k--] = ZERO;
+
+ yk = ZERO;
+
+ while (k > p)
+ {
+ double yk2 = 0.0;
+ long lim = k / 2;
+
+ if (k % 2 == 0)
+ {
+ yk += X[lim] * X[lim];
+ lim--;
+ }
+
+ /* In __mul, this loop (and the one within the next while loop) run
+ between a range to calculate the mantissa as follows:
+
+ Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1]
+ + X[n] * Y[k]
+
+ For X == Y, we can get away with summing halfway and doubling the
+ result. For cases where the range size is even, the mid-point needs
+ to be added separately (above). */
+ for (i = k - p, j = p; i <= lim; i++, j--)
+ yk2 += X[i] * X[j];
+
+ yk += 2.0 * yk2;
+
+ u = (yk + CUTTER) - CUTTER;
+ if (u > yk)
+ u -= RADIX;
+ Y[k--] = yk - u;
+ yk = u * RADIXI;
+ }
+
+ while (k > 1)
+ {
+ double yk2 = 0.0;
+ long lim = k / 2;
+
+ if (k % 2 == 0)
+ {
+ yk += X[lim] * X[lim];
+ lim--;
+ }
+
+ /* Likewise for this loop. */
+ for (i = 1, j = k - 1; i <= lim; i++, j--)
+ yk2 += X[i] * X[j];
+
+ yk += 2.0 * yk2;
+
+ u = (yk + CUTTER) - CUTTER;
+ if (u > yk)
+ u -= RADIX;
+ Y[k--] = yk - u;
+ yk = u * RADIXI;
+ }
+ Y[k] = yk;
+
+ /* Squares are always positive. */
+ Y[0] = 1.0;
+
+ EY = 2 * EX;
+ /* Is there a carry beyond the most significant digit? */
+ if (__glibc_unlikely (Y[1] == ZERO))
+ {
+ for (i = 1; i <= p; i++)
+ Y[i] = Y[i + 1];
+ EY--;
+ }
+}
+
/* Invert *X and store in *Y. Relative error bound:
- For P = 2: 1.001 * R ^ (1 - P)
- For P = 3: 1.063 * R ^ (1 - P)