summaryrefslogtreecommitdiff
path: root/sysdeps/alpha/memcpy.S
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>1996-11-06 04:24:40 +0000
committerUlrich Drepper <drepper@redhat.com>1996-11-06 04:24:40 +0000
commit2c6fe0bd3b270fc644dd4c773f2d47b93f404efe (patch)
treea578bcc93bbeaafacb6012213c458e33b7907528 /sysdeps/alpha/memcpy.S
parentf5311448f83eada5c5cabf55aae2619dcb1869c0 (diff)
update from main archive 961105cvs/libc-961106
Wed Nov 6 04:30:26 1996 Ulrich Drepper <drepper@cygnus.com> * sysdeps/unix/sysv/linux/syscalls.list: Add weak alias llseek for _llseek syscall. Reported by Andy Sewell <puck@pookhill.demon.co.uk>. * string/argz.h: Don't protect by __USE_GNU. Tue Nov 5 23:38:28 1996 Ulrich Drepper <drepper@cygnus.com> * Lots of files: Update and reformat copyright. * Makefile (headers): Add xopen_lim.h. * catgets/nl_types.h: Move __BEGIN_DECLS before definition of nl_catd. * grp/grp.h: Define setgrent, getgrent, endgrent, and getgrent_r if __USE_XOPEN_EXTENDED is defined. * pwd/pwd.h: Define setpwent, getpwent, endpwent, and getpwent_r if __USE_XOPEN_EXTENDED is defined. * io/Makefile (routines): Add lchown. * io/sys/poll.h: Add definition of POLLWRNORM. * io/sys/stat.h: Declare lstat, fchmod, mknod when __USE_XOPEN_EXTENDED is defined. * libio/Makefile (routines): Add obprintf. * libio/obprintf.c: New file. * libio/iolibio.h: Add prototypes for _IO_obstack_vprintf and _IO_obstack_printf. * libio/libio.h: Fix typo. * libio/stdio.h: Declare tempnam if __USE_XOPEN_EXTENDED is defined. Add prototypes for obstack_vprintf and obstack_printf. * manual/creature.texi: Describe _XOPEN_SOURCE macro. * manual/intro.texi: Add reference to NSS chapter. * manual/libc.texinfo: Update UPDATED. Comment out `@printindex cp'. It works again. * manual/memory.texi: Add description for obstack_ptr_grow, obstack_int_grow, obstack_ptr_grow_fast, and obstack_int_grow_fast. * manual/nss.texi: Add a few @cindex entries and change NSS_STATUS_* index entries to @vindex. * manual/users.texi: Correct @cindex entry for Netgroup. * math/mathcalls.h: Use __USE_XOPEN and __USE_XOPEN_EXTENDED to make declarations visible for X/Open sources. * misc/search.h: Declare insque/remque only is __USE_SVID or __USE_XOPEN_EXTENDED is defined. * misc/sys/uio.h (readv, writev): Change return value from int to ssize_t. * posix/Makefile (headers): Add re_comp.h. * posix/re_comp.h: New file. XPG interface to regex functions. * posix/getconf.c: Add all names from XPG4.2. * posix/posix1_lim.h: Increase minimum values for _POSIX_CHILD_MAX and _POSIX_OPEN_MAX to minimums from XPG4.2. * sysdeps/generic/confname.h: Add all _SC_* names from XPG4.2. * sysdeps/posix/sysconf.c: Handle new _SC_* values. * sysdeps/stub/sysconf.c: Likewise. * posix/unistd.h: Add declaration of ualarm and lchown. Declare usleep, fchown, fchdir, nice, getpgid, setsid, getsid, setreuid, setregid, vfork, ttyslot, symlink, readlink, gethostid, truncate, ftruncate, getdtablesize, brk, sbrk, lockf when __USE_XOPEN_EXTENDED is defined. * posix/sys/wait.h: Declare wait3 if __USE_XOPEN_EXTENDED is defined. * shadow/shadow.h: Define SHADOW using _PATH_SHADOW. * sysdeps/generic/paths.h: Define _PATH_SHADOW. * sysdeps/unix/sysv/linux/paths.h: Likewise. * signal/signal.h: Declare killpg, sigstack and sigaltstack when __USE_XOPEN_EXTENDED is defined. * stdio/stdio.h: Declare tempnam when __USE_XOPEN is defined. * stdlib/stdlib.h: Make rand48 functions available when __USE_XOPEN is defined. Likewise for valloc, putenv, realpath, [efg]cvt*, and getsubopt functions. * string/string.h: Make memccpy, strdup, bcmp, bcopy, bzero, index, and rindex available when __USE_XOPEN_EXTENDED is defined. * sysdeps/mach/getpagesize.c: De-ANSI-fy. Change return type to int. * sysdeps/posix/getpagesize.c: Likewise. * sysdeps/stub/getpagesize.c: Likewise. * sysdeps/unix/getpagesize.c: Likewise. * time/africa: Update from tzdata1996l. * time/asia: Likewise. * time/australia: Likewise. * time/europe: Likewise. * time/northamerica: Likewise. * time/pacificnew: Likewise. * time/southamerica: Likewise. * time/tzfile.h: Update from tzcode1996m. * time/time.h: Declare strptime if __USE_XOPEN. Declare daylight and timezone also if __USE_XOPEN. * time/sys/time.h: Remove declaration of ualarm. * wctype/wctype.h: Just reference ISO C standard. Tue Nov 5 01:26:32 1996 Richard Henderson <rth@tamu.edu> * crypt/Makefile: Add crypt routines to libc as well iff $(crypt-in-libc) is set. Do this for temporary binary compatibility on existing Linux/Alpha installations. * stdlib/div.c, sysdeps/generic/div.c: Move file to .../generic/. * stdlib/ldiv.c, sysdeps/generic/ldiv.c: Likewise. * stdlib/lldiv.c, sysdeps/generic/lldiv.c: Likewise. * sysdeps/alpha/Makefile (divrem): Add divlu, dviqu, remlu, and remqu. * sysdeps/alpha/div.S: New file. * sysdeps/alpha/ldiv.S: New file. * sysdeps/alpha/lldiv.S: New file. * sysdeps/alpha/divrem.h: Merge signed and unsigned division. Take pointers from Linus and tighten the inner loops a bit. * sysdeps/alpha/divl.S: Change defines for merged routines. * sysdeps/alpha/divq.S: Likewise. * sysdeps/alpha/reml.S: Likewise. * sysdeps/alpha/remq.S: Likewise. * sysdeps/alpha/divlu.S: Remove file. * sysdeps/alpha/divqu.S: Likewise. * sysdeps/alpha/remlu.S: Likewise. * sysdeps/alpha/remqu.S: Likewise. * sysdeps/alpha/bsd-_setjmp.S: If PROF, call _mcount. * sysdeps/alpha/bsd-setjmp.S: Likewise. * sysdeps/alpha/bzero.S: Likewise. * sysdeps/alpha/ffs.S: Likewise. * sysdeps/alpha/htonl.S: Likewise. * sysdeps/alpha/htons.S: Likewise. * sysdeps/alpha/memchr.S: Likewise. * sysdeps/alpha/memset.S: Likewise. * sysdeps/alpha/s_copysign.S: Likewise. * sysdeps/alpha/s_fabs.S: Likewise. * sysdeps/alpha/setjmp.S: Likewise. * sysdeps/alpha/stpcpy.S: Likewise. * sysdeps/alpha/stpncpy.S: Likewise. * sysdeps/alpha/strcat.S: Likewise. * sysdeps/alpha/strchr.S: Likewise. * sysdeps/alpha/strcpy.S: Likewise. * sysdeps/alpha/strlen.S: Likewise. * sysdeps/alpha/strncat.S: Likewise. * sysdeps/alpha/strncpy.S: Likewise. * sysdeps/alpha/strrchr.S: Likewise. * sysdeps/alpha/udiv_qrnnd.S: Likewise. Fix private labels. Convert two small jumps to use conditional moves. * sysdeps/unix/alpha/sysdep.h: Compress all __STDC__ nastiness. (PSEUDO): If PROF, call _mcount. * sysdeps/unix/sysv/linux/alpha/brk.S: If PROF, call _mcount. * sysdeps/unix/sysv/linux/alpha/clone.S: Likewise. * sysdeps/unix/sysv/linux/alpha/ieee_get_fp_control.S: Likewise. * sysdeps/unix/sysv/linux/alpha/ieee_set_fp_control.S: Likewise. * sysdeps/unix/sysv/linux/alpha/llseek.S: Likewise. * sysdeps/unix/sysv/linux/alpha/sigsuspend.S: Likewise. * sysdeps/unix/sysv/linux/alpha/syscall.S: Likewise. * sysdeps/alpha/memcpy.S: New file. Odd layout because it should eventually contain memmove as well. * sysdeps/alpha/strcmp.S: New file. * sysdeps/alpha/strncmp.S: New file. * sysdeps/alpha/w_sqrt.S: New file. Tue Nov 5 18:06:06 1996 Ulrich Drepper <drepper@cygnus.com> * sysdeps/mach/hurd/ttyname_r.c: Use `size_t' for len variable. Tue Nov 5 12:09:29 1996 Ulrich Drepper <drepper@cygnus.com> * sysdep/generic/sysdep.h: Define END only if not yet defined. * sysdep/unix/sysdep.h: Define PSEUDO_END only if not yet defined. Reported by Thomas Bushnell, n/BSG. Mon Nov 4 22:46:53 1996 Ulrich Drepper <drepper@cygnus.com> * manual/users.texi (Netgroup Data): Remove { } around @cindex. Mon Nov 4 19:07:05 1996 Ulrich Drepper <drepper@cygnus.com> * malloc/calloc.c: Check for overflow before trying to allocate memory. Proposed by Neil Matthews <nm@adv.sbc.sony.co.jp>. Fri Nov 1 18:18:32 1996 Andreas Schwab <schwab@issan.informatik.uni-dortmund.de> * manual/llio.texi (Operating Modes): Add missing arguments to @deftypevr in O_NONBLOCK description. * manual/time.texi (Time Zone Functions): Enclose type name in braces in description of tzname. FIXME: this does not yet work correctly in info. Sun Nov 3 17:29:06 1996 Ulrich Drepper <drepper@cygnus.com> * features.h: Add X/Open macros. * posix/unistd.h: Define X/Open macros. * sysdeps/generic/confname.h: Add _SC_XOPEN_XCU_VERSION, _SC_XOPEN_UNIX, _SC_XOPEN_CRYPT, _SC_XOPEN_ENH_I18N, _SC_XOPEN_SHM, _SC_2_CHAR_TERM, _SC_2_C_VERSION, and _SC_2_UPE. * sysdeps/posix/sysconf.c: Handle new constants. * sysdeps/stub/sysconf.c: Likewise. * sysdeps/unix/sysv/linux/posix_opt.h: Add definition of _XOPEN_SHM. * catgets/catgets.c (catopen): Set errno to ENOMEM when we run out of memory. (catgets): Set errno to EBADF when catalog handle is invalid. Set errno to ENOMSG when translation is not available. (catclose): Set errno to EBADF when catalog handle is invalid. * ctype/ctype.h: Declare isascii and toascii when __USE_XOPEN. Likewise for _toupper and _tolower. * manual/arith.texi: Document strtoq, strtoll, strtouq, strtoull, strtof, and strtold. * manual/math.texi: Document HUGE_VALf and HUGE_VALl. * manual/stdio.h: Document ' flag for numeric formats of scanf. * manual/users.texi: Document that cuserid shouldn't be used. * misc/Makefile (routines): Add dirname. (headers): Add libgen.h. (tests): Add tst-dirname. * misc/dirname.c: New file. * misc/libgen.h: New file. * misc/tst-dirname.c: New file. * misc/search.h: Parameter of hcreate must be of type size_t. * misc/hsearch.c: Likewise. * misc/hsearch_r.c: Likewise for hcreate_r. * misc/search.h: Parameters of insque and remque must be `void *'. * misc/insremque.c: Likewise. * posix/unistd.h: Move declarations of mktemp and mkstemp to... * stdlib/stdlib.h: ...here. * posix/unistd.h [__USE_XOPEN]: Add prototypes for crypt, setkey, encrypt, and swab. * stdio-common/printf-parse.h (struct printf_spec): Add pa_wchar and pa_wstring. (parse_one_spec): Remove Linux compatibility code. Recognize %C and %S formats. * stdio-common/printf.h: Add PA_WCHAR and PA_WSTRING. * stdio-common/vfprintf.c: Add implementation of %C and %S format. * stdio-common/vfscanf.c: Likewise for scanf. * stdlib/l64a.c: Return value for 0 must be the empty string. * stdlib/stdlib.h: Declare reentrant function from rand49 family only if __USE_REENTRANT. Declare rand48 functions also if __USE_XOPEN. * stdlib/strtol.c: Return 0 and set errno to EINVAL when BASE is not a legal value. Return 0 and set errno to EINVAL when strou* sees negativ number. * stdlib/tst-strtol.c: De-ANSI-fy. Change expected results for test of unsigned function and negative input. * string/stratcliff.c: Prevent warnings. * string.h: Move declaration of swab to <unistd.h>. * string/swab.c: De-ANSI-fy. * sysdeps/posix/cuserid.c: Implement using getpwuid_r. * sysdeps/posix/mkstemp.c: Include <stdlib.h> for prototype. * sysdeps/posix/mktemp.c: Likewise. * sysdeps/stub/mkstemp.c: Likewise. * sysdeps/stub/mktemp.c: Likewise. * sysvipc/sys/ipc.h: Prototypes of ftok have to be of types `const char *' and `int'. * sysvipc/ftok.c: Likewise. Make sure only lower 8 bits of PROJ_ID are used. Sun Nov 3 03:21:28 1996 Heiko Schroeder <Heiko.Schroeder@post.rwth-aachen.de> * locale/programs/ld-numeric.c (numeric_output): Compute idx[0] correctly. Sat Nov 2 17:44:32 1996 Ulrich Drepper <drepper@cygnus.com> * sysdeps/posix/cuserid.c: Use reentrant functions. * manual/users.texi: Tell that cuserid is marked to be withdrawn in XPG4.2. Sat Nov 2 14:26:37 1996 Ulrich Drepper <drepper@cygnus.com> Linus said he will make sure no system call will return a value in -1 ... -4095 as a valid result. * sysdeps/unix/sysv/linux/i386/sysdep.h: Correct test for error. * sysdeps/unix/sysv/linux/i386/syscall.S: Likewise. * sysdeps/unix/sysv/linux/m68k/sysdep.h: Likewise. * sysdeps/unix/sysv/linux/m68k/syscall.S: Likewise. Sat Nov 2 16:54:49 1996 NIIBE Yutaka <gniibe@mri.co.jp> * sysdeps/stub/lockfile.c [!USE_IN_LIBIO]: Define weak alias for __funlockfile, not a circular alias. Define __IO_ftrylockfile if USE_IN_LIBIO and __ftrylockfile if not, not vice versa. * sysdeps/unix/sysv/linux/i386/sysdep.S (__errno_location): Make it a weak symbol. * sysdeps/unix/sysv/linux/m68k/sysdep.S (__errno_location): Likewise. Likewise. * crypt/Makefile (rpath-link): Extend search path to current directory.
Diffstat (limited to 'sysdeps/alpha/memcpy.S')
-rw-r--r--sysdeps/alpha/memcpy.S276
1 files changed, 276 insertions, 0 deletions
diff --git a/sysdeps/alpha/memcpy.S b/sysdeps/alpha/memcpy.S
new file mode 100644
index 0000000000..4ee9c115db
--- /dev/null
+++ b/sysdeps/alpha/memcpy.S
@@ -0,0 +1,276 @@
+/* Copyright (C) 1996 Free Software Foundation, Inc.
+ Contributed by Richard Henderson (rth@tamu.edu)
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If
+ not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+ Cambridge, MA 02139, USA. */
+
+
+/* This is the child of the C-with-inline-assembly memcpy posted by
+ Martin Ostermann (ost@comnets.rwth-aachen.de).
+
+ This is generally scheduled for the EV5, but whenever necessary and
+ possible, the autoswap slotting feature of the EV5 is used so that the
+ code lays out nicely for the EV4 as well. */
+
+#include <alpha/regdef.h>
+
+ .set noreorder
+
+ .text
+
+ .ent copy_fwd_aligned
+copy_fwd_aligned:
+ .frame sp, 0, ra, 0
+ .prologue 0
+
+ /* Aligned forward copy main loop. On entry to this basic block:
+ t0 == source word waiting to be stored
+ t2 == loop counter
+ a0 == destination pointer
+ a1 == source pointer
+ a2 mod 8 == byte count in final word */
+ .align 4
+$fa_loop:
+ and t2, 7, t1 # e0 :
+ beq t1, 1f # .. e1 :
+
+0: stq_u t0, 0(a0) # e0 :
+ subq t1, 1, t1 # .. e1 :
+ ldq_u t0, 8(a1) # e0 : copy up to seven words
+ addq a0, 8, a0 # .. e1 :
+ addq a1, 8, a1 # e0 :
+ bne t1, 0b # .. e1 :
+
+1: bic t2, 7, t2 # e0 :
+ beq t2, $fa_tail # .. e1 :
+
+2: stq_u t0, 0(a0) # e0 :
+ addq a0, 64, a0 # .. e1 :
+ ldq_u t3, 8(a1) # e0 : copy eight words as fast as we can
+ ldq_u t4, 16(a1) # .. e1 :
+ ldq_u t5, 24(a1) # e0 :
+ ldq_u t6, 32(a1) # .. e1 :
+ ldq_u t7, 40(a1) # e0 :
+ ldq_u t8, 48(a1) # .. e1 :
+ ldq_u t9, 56(a1) # e0 :
+ ldq_u t0, 64(a1) # .. e1 :
+ stq_u t3, -56(a0) # e0 :
+ subq t2, 8, t2 # .. e1 :
+ stq_u t4, -48(a0) # e0 :
+ addq a1, 64, a1 # .. e1 :
+ stq_u t5, -40(a0) # e0 :
+ stq_u t6, -32(a0) # e0 :
+ stq_u t7, -24(a0) # e0 :
+ stq_u t8, -16(a0) # e0 :
+ stq_u t9, -8(a0) # e0 :
+ bne t2, 2b # .. e1 :
+
+ /* Take care of a partial word tail. */
+$fa_tail:
+ and a2, 7, t3 # e0 :
+ bne t3, 1f # .. e1 (zdb)
+
+ /* Aligned copy, aligned tail, final store. */
+ stq_u t0, 0(a0)
+ ret
+
+1: ldq_u t1, 0(a0) # e1 :
+ mskql t0, a2, t0 # .. e1 :
+ mskqh t1, a2, t1 # e0 (stall)
+ bis t0, t1, t0 # e1 :
+ stq_u t0, 0(a0) # e0 :
+ ret # .. e1 :
+
+ /* This is the actual entry point to this function. */
+ .align 3
+$fwd_aligned:
+ ldq_u t0, 0(a1) # e0 :
+ and a0, 7, t3 # .. e1 :
+ addq a2, t3, a2 # e0 :
+ subq a2, 1, t2 # e1 :
+ sra t2, 3, t2 # e0 :
+ beq t3, $fa_loop # .. e1 :
+
+ ldq_u t1, 0(a0) # e0 :
+ beq t2, $fa_small # .. e1 :
+ mskqh t0, a0, t0 # e0 :
+ mskql t1, a0, t3 # e0 :
+ bis t0, t3, t0 # e0 :
+ br $fa_loop # .. e1 :
+
+ /* The move affects exactly one destination word. */
+$fa_small:
+ mskqh t0, a0, t0 # e0 :
+ and a2, 7, t4 # .. e1 :
+ mskql t1, a0, t3 # e0 :
+ bne t4, 1f # .. e1 :
+
+ or t0, t3, t0 # e0 :
+ unop # :
+ stq_u t0, 0(a0) # e0 :
+ ret # .. e1 :
+
+1: mskql t0, a2, t0 # e0 :
+ mskqh t1, a2, t1 # e0 :
+ or t0, t3, t0 # e0 :
+ or t0, t1, t0 # e1 :
+ stq_u t0, 0(a0) # e0 :
+ ret # .. e1 :
+
+ .end copy_fwd_aligned
+
+ .ent memcpy
+ .globl memcpy
+ .align 3
+memcpy:
+ .frame sp, 0, ra, 0
+#ifdef PROF
+ ldgp gp, 0(ra)
+ lda AT, _mcount
+ jsr AT, (AT), _mcount
+ .prologue 1
+#else
+ .prologue 0
+#endif
+
+ mov a0, v0
+ beq a2, $zero_length
+
+ /* Are source and destination co-aligned? */
+ xor a0, a1, t0
+ unop
+ and t0, 7, t0
+ beq t0, $fwd_aligned
+ br $fwd_unaligned
+
+ .end memcpy
+
+ .ent copy_fwd_unaligned
+copy_fwd_unaligned:
+ .frame sp, 0, ra, 0
+ .prologue 0
+
+ /* Unaligned forward copy main loop. On entry to this basic block:
+ t0 == source low word, unshifted
+ t2 == loop counter
+ t7 == last source byte + 1
+ a0 == destination pointer
+ a1 == source pointer
+ a2 mod 8 == byte count in final word */
+ .align 4
+$fu_loop:
+ beq t2, $fu_tail # e1 :
+ blbc t2, 0f # e1 :
+
+ ldq_u t1, 8(a1) # e1 : copy one unaligned word
+ extql t0, a1, t3 # .. e0 :
+ addq a1, 8, a1 # e0 :
+ addq a0, 8, a0 # .. e1 :
+ extqh t1, a1, t4 # e0 :
+ subq t2, 1, t2 # .. e1 :
+ mov t1, t0 # e0 :
+ or t3, t4, t3 # .. e1 :
+ stq_u t3, -8(a0) # e0 :
+ beq t2, $fu_tail # .. e1 :
+
+0: ldq_u t1, 8(a1) # e1 : copy two unaligned words
+ extql t0, a1, t3 # .. e0 :
+ ldq_u t0, 16(a1) # e0 :
+ subq t2, 2, t2 # .. e1 :
+ extqh t1, a1, t4 # e0 :
+ addq a0, 16, a0 # .. e1 :
+ extql t1, a1, t5 # e0 :
+ or t3, t4, t3 # .. e1 :
+ extqh t0, a1, t6 # e0 :
+ addq a1, 16, a1 # .. e1 :
+ stq_u t3, -16(a0) # e0 :
+ or t5, t6, t5 # .. e1 :
+ stq_u t5, -8(a0) # e0 :
+ bne t2, 0b # .. e1 :
+
+ /* Take care of a partial words tail. */
+$fu_tail:
+ ldq_u t4, -1(t7) # e1 :
+ extql t0, a1, t3 # .. e0 :
+ extqh t4, a1, t4 # e0 (stall)
+ and a2, 7, t5 # .. e1 :
+ or t3, t4, t3 # e0 :
+ beq t5, 1f # .. e1 :
+
+ ldq_u t1, 0(a0) # e1 :
+ mskql t3, a2, t3 # .. e0 :
+ mskqh t1, a2, t1 # e0 (stall)
+ or t1, t3, t3 # e1 :
+
+1: stq_u t3, 0(a0) # e0 :
+ ret # .. e1 :
+
+ /* The entry point to the unaligned forward copy. */
+ .align 3
+$fwd_unaligned:
+ ldq_u t0, 0(a1) # e0 : load initial bits of src
+ addq a1, a2, t7 # .. e1 : record last byte + 1 of src
+ and a0, 7, t3 # e0 : find dst misalignment
+ addq a2, t3, a2 # e1 : find number of words affected
+ subq a2, 1, t2 # e0 :
+ cmple a2, 8, t4 # .. e1 : are we dealing with a small block?
+ subq a1, t3, a1 # e0 :
+ bne t4, $fu_small # .. e1 :
+ srl t2, 3, t2 # e0 :
+ beq t3, $fu_loop # .. e1 :
+
+ /* Take care of an unaligned dst head. */
+ ldq_u t5, 0(a0) # e0 :
+ ldq_u t1, 8(a1) # .. e1 :
+ extql t0, a1, t3 # e0 :
+ addq a0, 8, a0 # .. e1 :
+ extqh t1, a1, t4 # e0 :
+ addq a1, 8, a1 # .. e1 :
+ mskql t5, a0, t5 # e0 :
+ or t3, t4, t3 # .. e1 :
+ mskqh t3, a0, t3 # e0 :
+ subq t2, 1, t2 # .. e1 :
+ or t3, t5, t3 # e0 :
+ mov t1, t0 # .. e1 :
+ stq_u t3, -8(a0) # e0 :
+ br $fu_loop # .. e1 :
+
+ /* The move affects exactly one destination word. */
+ .align 3
+$fu_small:
+ ldq_u t2, 0(a0) # e1 :
+ extql t0, a1, t3 # .. e0 :
+ ldq_u t1, -1(t7) # e0 :
+ and a2, 7, t8 # .. e1 :
+ mskqh t2, a2, t6 # e0 :
+ mskql t2, a0, t5 # e0 :
+ extqh t1, a1, t4 # e0 :
+ cmovne t8, t6, t8 # .. e1 :
+ or t3, t4, t3 # e0 :
+ or t5, t8, t5 # .. e1 :
+ mskqh t3, a0, t3 # e0 :
+ and a2, 7, t8 # .. e1 :
+ mskql t3, a2, t6 # e0 :
+ cmovne t8, t6, t8 # e1 :
+ or t3, t5, t3 # e0 :
+ unop # :
+ stq_u t3, 0(a0) # e0 :
+
+$zero_length:
+ ret # .. e1 :
+
+ .end copy_fwd_unaligned