diff options
author | Samuel Thibault <samuel.thibault@ens-lyon.org> | 2015-03-25 02:28:16 +0100 |
---|---|---|
committer | Samuel Thibault <samuel.thibault@ens-lyon.org> | 2015-03-25 02:28:16 +0100 |
commit | bf8c09f8484522144b4dddd9cf74efbd998e8f8f (patch) | |
tree | 8d85e1359a3f8c19adaf9bf7f2eeed5a5393ecd7 /sysdeps/powerpc/powerpc32/power7/memcpy.S | |
parent | 2bfa087aa11717761246f52b96c50454c3e4004e (diff) | |
parent | 58695b88a9deaecbcf7794760cc333177edaa2b4 (diff) |
Merge commit 'refs/top-bases/t/ifaddrs_v6' into t/ifaddrs_v6
Diffstat (limited to 'sysdeps/powerpc/powerpc32/power7/memcpy.S')
-rw-r--r-- | sysdeps/powerpc/powerpc32/power7/memcpy.S | 32 |
1 files changed, 22 insertions, 10 deletions
diff --git a/sysdeps/powerpc/powerpc32/power7/memcpy.S b/sysdeps/powerpc/powerpc32/power7/memcpy.S index 1f805d1198..52c2a6bcf4 100644 --- a/sysdeps/powerpc/powerpc32/power7/memcpy.S +++ b/sysdeps/powerpc/powerpc32/power7/memcpy.S @@ -1,5 +1,5 @@ /* Optimized memcpy implementation for PowerPC32/POWER7. - Copyright (C) 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 2010-2014 Free Software Foundation, Inc. Contributed by Luis Machado <luisgpm@br.ibm.com>. This file is part of the GNU C Library. @@ -18,14 +18,12 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <bp-sym.h> -#include <bp-asm.h> /* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); Returns 'dst'. */ .machine power7 -EALIGN (BP_SYM (memcpy), 5, 0) +EALIGN (memcpy, 5, 0) CALL_MCOUNT stwu 1,-32(1) @@ -385,7 +383,7 @@ L(copy_GE_32_unaligned): beq L(copy_GE_32_unaligned_cont) - /* SRC is not quadword aligned, get it aligned. */ + /* DST is not quadword aligned, get it aligned. */ mtcrf 0x01,0 subf 31,0,5 @@ -437,13 +435,21 @@ L(copy_GE_32_unaligned_cont): mr 11,12 mtcrf 0x01,9 cmplwi cr6,9,1 +#ifdef __LITTLE_ENDIAN__ + lvsr 5,0,12 +#else lvsl 5,0,12 +#endif lvx 3,0,12 bf 31,L(setup_unaligned_loop) /* Copy another 16 bytes to align to 32-bytes due to the loop . */ lvx 4,12,6 +#ifdef __LITTLE_ENDIAN__ + vperm 6,4,3,5 +#else vperm 6,3,4,5 +#endif addi 11,12,16 addi 10,3,16 stvx 6,0,3 @@ -463,11 +469,17 @@ L(unaligned_loop): vector instructions though. */ lvx 4,11,6 /* vr4 = r11+16. */ - vperm 6,3,4,5 /* Merge the correctly-aligned portions - of vr3/vr4 into vr6. */ +#ifdef __LITTLE_ENDIAN__ + vperm 6,4,3,5 +#else + vperm 6,3,4,5 +#endif lvx 3,11,7 /* vr3 = r11+32. */ - vperm 10,4,3,5 /* Merge the correctly-aligned portions - of vr3/vr4 into vr10. */ +#ifdef __LITTLE_ENDIAN__ + vperm 10,3,4,5 +#else + vperm 10,4,3,5 +#endif addi 11,11,32 stvx 6,0,10 stvx 10,10,6 @@ -522,5 +534,5 @@ L(end_unaligned_loop): addi 1,1,32 blr -END (BP_SYM (memcpy)) +END (memcpy) libc_hidden_builtin_def (memcpy) |