summaryrefslogtreecommitdiff
path: root/sysdeps/powerpc/powerpc64/power7/memmove.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/powerpc/powerpc64/power7/memmove.S')
-rw-r--r--sysdeps/powerpc/powerpc64/power7/memmove.S139
1 files changed, 71 insertions, 68 deletions
diff --git a/sysdeps/powerpc/powerpc64/power7/memmove.S b/sysdeps/powerpc/powerpc64/power7/memmove.S
index e263ba9608..b7f3dc28d1 100644
--- a/sysdeps/powerpc/powerpc64/power7/memmove.S
+++ b/sysdeps/powerpc/powerpc64/power7/memmove.S
@@ -1,5 +1,5 @@
/* Optimized memmove implementation for PowerPC64/POWER7.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
+ Copyright (C) 2014-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -27,8 +27,11 @@
If source and destiny overlaps, a optimized backwards memcpy is used
instead. */
+#ifndef MEMMOVE
+# define MEMMOVE memmove
+#endif
.machine power7
-EALIGN (memmove, 5, 0)
+ENTRY_TOCLESS (MEMMOVE, 5)
CALL_MCOUNT 3
L(_memmove):
@@ -89,63 +92,63 @@ L(aligned_copy):
srdi 12,r5,7
cmpdi 12,0
beq L(aligned_tail)
- lxvd2x 6,0,r4
- lxvd2x 7,r4,6
+ lvx 6,0,r4
+ lvx 7,r4,6
mtctr 12
b L(aligned_128loop)
.align 4
L(aligned_128head):
/* for the 2nd + iteration of this loop. */
- lxvd2x 6,0,r4
- lxvd2x 7,r4,6
+ lvx 6,0,r4
+ lvx 7,r4,6
L(aligned_128loop):
- lxvd2x 8,r4,7
- lxvd2x 9,r4,8
- stxvd2x 6,0,r11
+ lvx 8,r4,7
+ lvx 9,r4,8
+ stvx 6,0,r11
addi r4,r4,64
- stxvd2x 7,r11,6
- stxvd2x 8,r11,7
- stxvd2x 9,r11,8
- lxvd2x 6,0,r4
- lxvd2x 7,r4,6
+ stvx 7,r11,6
+ stvx 8,r11,7
+ stvx 9,r11,8
+ lvx 6,0,r4
+ lvx 7,r4,6
addi r11,r11,64
- lxvd2x 8,r4,7
- lxvd2x 9,r4,8
+ lvx 8,r4,7
+ lvx 9,r4,8
addi r4,r4,64
- stxvd2x 6,0,r11
- stxvd2x 7,r11,6
- stxvd2x 8,r11,7
- stxvd2x 9,r11,8
+ stvx 6,0,r11
+ stvx 7,r11,6
+ stvx 8,r11,7
+ stvx 9,r11,8
addi r11,r11,64
bdnz L(aligned_128head)
L(aligned_tail):
mtocrf 0x01,r5
bf 25,32f
- lxvd2x 6,0,r4
- lxvd2x 7,r4,6
- lxvd2x 8,r4,7
- lxvd2x 9,r4,8
+ lvx 6,0,r4
+ lvx 7,r4,6
+ lvx 8,r4,7
+ lvx 9,r4,8
addi r4,r4,64
- stxvd2x 6,0,r11
- stxvd2x 7,r11,6
- stxvd2x 8,r11,7
- stxvd2x 9,r11,8
+ stvx 6,0,r11
+ stvx 7,r11,6
+ stvx 8,r11,7
+ stvx 9,r11,8
addi r11,r11,64
32:
bf 26,16f
- lxvd2x 6,0,r4
- lxvd2x 7,r4,6
+ lvx 6,0,r4
+ lvx 7,r4,6
addi r4,r4,32
- stxvd2x 6,0,r11
- stxvd2x 7,r11,6
+ stvx 6,0,r11
+ stvx 7,r11,6
addi r11,r11,32
16:
bf 27,8f
- lxvd2x 6,0,r4
+ lvx 6,0,r4
addi r4,r4,16
- stxvd2x 6,0,r11
+ stvx 6,0,r11
addi r11,r11,16
8:
bf 28,4f
@@ -485,63 +488,63 @@ L(aligned_copy_bwd):
srdi r12,r5,7
cmpdi r12,0
beq L(aligned_tail_bwd)
- lxvd2x v6,r4,r6
- lxvd2x v7,r4,r7
+ lvx v6,r4,r6
+ lvx v7,r4,r7
mtctr 12
b L(aligned_128loop_bwd)
.align 4
L(aligned_128head_bwd):
/* for the 2nd + iteration of this loop. */
- lxvd2x v6,r4,r6
- lxvd2x v7,r4,r7
+ lvx v6,r4,r6
+ lvx v7,r4,r7
L(aligned_128loop_bwd):
- lxvd2x v8,r4,r8
- lxvd2x v9,r4,r9
- stxvd2x v6,r11,r6
+ lvx v8,r4,r8
+ lvx v9,r4,r9
+ stvx v6,r11,r6
subi r4,r4,64
- stxvd2x v7,r11,r7
- stxvd2x v8,r11,r8
- stxvd2x v9,r11,r9
- lxvd2x v6,r4,r6
- lxvd2x v7,r4,7
+ stvx v7,r11,r7
+ stvx v8,r11,r8
+ stvx v9,r11,r9
+ lvx v6,r4,r6
+ lvx v7,r4,7
subi r11,r11,64
- lxvd2x v8,r4,r8
- lxvd2x v9,r4,r9
+ lvx v8,r4,r8
+ lvx v9,r4,r9
subi r4,r4,64
- stxvd2x v6,r11,r6
- stxvd2x v7,r11,r7
- stxvd2x v8,r11,r8
- stxvd2x v9,r11,r9
+ stvx v6,r11,r6
+ stvx v7,r11,r7
+ stvx v8,r11,r8
+ stvx v9,r11,r9
subi r11,r11,64
bdnz L(aligned_128head_bwd)
L(aligned_tail_bwd):
mtocrf 0x01,r5
bf 25,32f
- lxvd2x v6,r4,r6
- lxvd2x v7,r4,r7
- lxvd2x v8,r4,r8
- lxvd2x v9,r4,r9
+ lvx v6,r4,r6
+ lvx v7,r4,r7
+ lvx v8,r4,r8
+ lvx v9,r4,r9
subi r4,r4,64
- stxvd2x v6,r11,r6
- stxvd2x v7,r11,r7
- stxvd2x v8,r11,r8
- stxvd2x v9,r11,r9
+ stvx v6,r11,r6
+ stvx v7,r11,r7
+ stvx v8,r11,r8
+ stvx v9,r11,r9
subi r11,r11,64
32:
bf 26,16f
- lxvd2x v6,r4,r6
- lxvd2x v7,r4,r7
+ lvx v6,r4,r6
+ lvx v7,r4,r7
subi r4,r4,32
- stxvd2x v6,r11,r6
- stxvd2x v7,r11,r7
+ stvx v6,r11,r6
+ stvx v7,r11,r7
subi r11,r11,32
16:
bf 27,8f
- lxvd2x v6,r4,r6
+ lvx v6,r4,r6
subi r4,r4,16
- stxvd2x v6,r11,r6
+ stvx v6,r11,r6
subi r11,r11,16
8:
bf 28,4f
@@ -816,14 +819,14 @@ L(end_unaligned_loop_bwd):
stb r8,-7(r11)
/* Return original DST pointer. */
blr
-END_GEN_TB (memmove, TB_TOCLESS)
+END_GEN_TB (MEMMOVE, TB_TOCLESS)
libc_hidden_builtin_def (memmove)
/* void bcopy(const void *src [r3], void *dest [r4], size_t n [r5])
Implemented in this file to avoid linker create a stub function call
in the branch to '_memmove'. */
-ENTRY (__bcopy)
+ENTRY_TOCLESS (__bcopy)
mr r6,r3
mr r3,r4
mr r4,r6