summaryrefslogtreecommitdiff
path: root/sysdeps/powerpc/powerpc32/power7/memcpy.S
diff options
context:
space:
mode:
authorSamuel Thibault <samuel.thibault@ens-lyon.org>2015-03-25 02:28:16 +0100
committerSamuel Thibault <samuel.thibault@ens-lyon.org>2015-03-25 02:28:16 +0100
commitbf8c09f8484522144b4dddd9cf74efbd998e8f8f (patch)
tree8d85e1359a3f8c19adaf9bf7f2eeed5a5393ecd7 /sysdeps/powerpc/powerpc32/power7/memcpy.S
parent2bfa087aa11717761246f52b96c50454c3e4004e (diff)
parent58695b88a9deaecbcf7794760cc333177edaa2b4 (diff)
Merge commit 'refs/top-bases/t/ifaddrs_v6' into t/ifaddrs_v6
Diffstat (limited to 'sysdeps/powerpc/powerpc32/power7/memcpy.S')
-rw-r--r--sysdeps/powerpc/powerpc32/power7/memcpy.S32
1 files changed, 22 insertions, 10 deletions
diff --git a/sysdeps/powerpc/powerpc32/power7/memcpy.S b/sysdeps/powerpc/powerpc32/power7/memcpy.S
index 1f805d1198..52c2a6bcf4 100644
--- a/sysdeps/powerpc/powerpc32/power7/memcpy.S
+++ b/sysdeps/powerpc/powerpc32/power7/memcpy.S
@@ -1,5 +1,5 @@
/* Optimized memcpy implementation for PowerPC32/POWER7.
- Copyright (C) 2010, 2011 Free Software Foundation, Inc.
+ Copyright (C) 2010-2014 Free Software Foundation, Inc.
Contributed by Luis Machado <luisgpm@br.ibm.com>.
This file is part of the GNU C Library.
@@ -18,14 +18,12 @@
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
-#include <bp-sym.h>
-#include <bp-asm.h>
/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]);
Returns 'dst'. */
.machine power7
-EALIGN (BP_SYM (memcpy), 5, 0)
+EALIGN (memcpy, 5, 0)
CALL_MCOUNT
stwu 1,-32(1)
@@ -385,7 +383,7 @@ L(copy_GE_32_unaligned):
beq L(copy_GE_32_unaligned_cont)
- /* SRC is not quadword aligned, get it aligned. */
+ /* DST is not quadword aligned, get it aligned. */
mtcrf 0x01,0
subf 31,0,5
@@ -437,13 +435,21 @@ L(copy_GE_32_unaligned_cont):
mr 11,12
mtcrf 0x01,9
cmplwi cr6,9,1
+#ifdef __LITTLE_ENDIAN__
+ lvsr 5,0,12
+#else
lvsl 5,0,12
+#endif
lvx 3,0,12
bf 31,L(setup_unaligned_loop)
/* Copy another 16 bytes to align to 32-bytes due to the loop . */
lvx 4,12,6
+#ifdef __LITTLE_ENDIAN__
+ vperm 6,4,3,5
+#else
vperm 6,3,4,5
+#endif
addi 11,12,16
addi 10,3,16
stvx 6,0,3
@@ -463,11 +469,17 @@ L(unaligned_loop):
vector instructions though. */
lvx 4,11,6 /* vr4 = r11+16. */
- vperm 6,3,4,5 /* Merge the correctly-aligned portions
- of vr3/vr4 into vr6. */
+#ifdef __LITTLE_ENDIAN__
+ vperm 6,4,3,5
+#else
+ vperm 6,3,4,5
+#endif
lvx 3,11,7 /* vr3 = r11+32. */
- vperm 10,4,3,5 /* Merge the correctly-aligned portions
- of vr3/vr4 into vr10. */
+#ifdef __LITTLE_ENDIAN__
+ vperm 10,3,4,5
+#else
+ vperm 10,4,3,5
+#endif
addi 11,11,32
stvx 6,0,10
stvx 10,10,6
@@ -522,5 +534,5 @@ L(end_unaligned_loop):
addi 1,1,32
blr
-END (BP_SYM (memcpy))
+END (memcpy)
libc_hidden_builtin_def (memcpy)