summaryrefslogtreecommitdiff
path: root/sysdeps/ia64/memmove.S
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2001-05-27 06:45:14 +0000
committerUlrich Drepper <drepper@redhat.com>2001-05-27 06:45:14 +0000
commit995a692a486b2e250f598097543dd04135cec327 (patch)
treee50617b52eb6477edee1186983cb687e60125c38 /sysdeps/ia64/memmove.S
parent17ffa4986db1ce263a8a3c4c322dc237cfaa2777 (diff)
Update.
2001-05-22 David Mosberger <davidm@hpl.hp.com> * sysdeps/ia64/memmove.S: Increase MEMLAT from 6 to 21 for better performance. * sysdeps/ia64/memcpy.S: Likewise. * sysdeps/ia64/bcopy.S: New file. * sysdeps/ia64/bzero.S: New file (derived from memset.S). 2001-05-26 Ulrich Drepper <drepper@redhat.com> * sysdeps/ia64/fpu/libm-test-ulps: Add deltas for tanh(-0.7).
Diffstat (limited to 'sysdeps/ia64/memmove.S')
-rw-r--r--sysdeps/ia64/memmove.S22
1 files changed, 12 insertions, 10 deletions
diff --git a/sysdeps/ia64/memmove.S b/sysdeps/ia64/memmove.S
index a3f7edef92..7b9fe2265d 100644
--- a/sysdeps/ia64/memmove.S
+++ b/sysdeps/ia64/memmove.S
@@ -26,7 +26,7 @@
in2: byte count
The core of the function is the memcpy implementation used in memcpy.S.
- When bytes have to be copied backwards, only the easy case, when
+ When bytes have to be copied backwards, only the easy case, when
all arguments are multiples of 8, is optimised.
In this form, it assumes little endian mode. For big endian mode,
@@ -67,10 +67,12 @@
br.ctop.sptk .loop##shift ; \
br.cond.sptk .cpyfew ; /* deal with the remaining bytes */
+#define MEMLAT 21
+#define Nrot (((2*MEMLAT+3) + 7) & ~7)
+
ENTRY(memmove)
.prologue
- alloc r2 = ar.pfs, 3, 29, 0, 32
-#include "softpipe.h"
+ alloc r2 = ar.pfs, 3, Nrot - 3, 0, Nrot
.rotr r[MEMLAT + 2], q[MEMLAT + 1]
.rotp p[MEMLAT + 2]
mov ret0 = in0 // return value = dest
@@ -90,7 +92,7 @@ ENTRY(memmove)
and tmp4 = 7, tmp3 // tmp4 = (dest | src | len) & 7
cmp.le p6, p0 = dest, src // if dest <= src it's always safe
(p6) br.cond.spnt .forward // to copy forward
- add tmp3 = src, len;;
+ add tmp3 = src, len;;
cmp.lt p6, p0 = dest, tmp3 // if dest > src && dest < src + len
(p6) br.cond.spnt .backward // we have to copy backward
@@ -113,7 +115,7 @@ ENTRY(memmove)
(p6) br.cond.spnt .restore_and_exit;;// the one-word special case
adds adest = 8, dest // set adest one word ahead of dest
adds asrc = 8, src ;; // set asrc one word ahead of src
- nop.b 0 // get the "golden" alignment for
+ nop.b 0 // get the "golden" alignment for
nop.b 0 // the next loop
.l0:
(p[0]) ld8 r[0] = [src], 16
@@ -139,8 +141,8 @@ ENTRY(memmove)
.l1: // copy -dest % 8 bytes
ld1 value = [src], 1 // value = *src++
;;
- st1 [dest] = value, 1 // *dest++ = value
- br.cloop.dptk .l1
+ st1 [dest] = value, 1 // *dest++ = value
+ br.cloop.dptk .l1
.dest_aligned:
and sh1 = 7, src // sh1 = src % 8
and tmp2 = -8, len // tmp2 = len & -OPSIZ
@@ -148,7 +150,7 @@ ENTRY(memmove)
shr.u loopcnt = len, 3 // loopcnt = len / 8
and len = 7, len;; // len = len % 8
adds loopcnt = -1, loopcnt // --loopcnt
- addl tmp4 = @ltoff(.table), gp
+ addl tmp4 = @ltoff(.table), gp
addl tmp3 = @ltoff(.loop56), gp
mov ar.ec = MEMLAT + 1 // set EC
mov pr.rot = 1 << 16;; // set rotating predicates
@@ -174,7 +176,7 @@ ENTRY(memmove)
LOOP(40)
LOOP(48)
LOOP(56)
-
+
.src_aligned:
.l3:
(p[0]) ld8 r[0] = [src], 8
@@ -220,7 +222,7 @@ ENTRY(memmove)
adds src = -1, src // src points to the last byte
adds dest = -1, dest // dest points to the last byte
adds loopcnt = -1, len;; // loopcnt = len - 1
- mov ar.lc = loopcnt;; // set the loop counter
+ mov ar.lc = loopcnt;; // set the loop counter
.l6:
(p[0]) ld1 r[0] = [src], -1
(p[MEMLAT]) st1 [dest] = r[MEMLAT], -1