summaryrefslogtreecommitdiff
path: root/sysdeps/powerpc/powerpc64/power7/strcpy.S
diff options
context:
space:
mode:
authorThomas Schwinge <thomas@codesourcery.com>2013-12-20 09:29:29 +0100
committerThomas Schwinge <thomas@codesourcery.com>2013-12-20 09:29:29 +0100
commita65dd355fb80a05215e15ae97649de52aec885e3 (patch)
tree81701bb0c6b648630f2bf1729a85d7f5eb49e67b /sysdeps/powerpc/powerpc64/power7/strcpy.S
parent296a5732f94abe4d5699dc981e4ccfb950b48cee (diff)
parentb4578bab30f72cddd2cf38abfb39f9c8dc892249 (diff)
Merge branch 'baseline' into refs/top-bases/tschwinge/Roger_Whittaker
Diffstat (limited to 'sysdeps/powerpc/powerpc64/power7/strcpy.S')
-rw-r--r--sysdeps/powerpc/powerpc64/power7/strcpy.S274
1 files changed, 274 insertions, 0 deletions
diff --git a/sysdeps/powerpc/powerpc64/power7/strcpy.S b/sysdeps/powerpc/powerpc64/power7/strcpy.S
new file mode 100644
index 0000000000..5c341a1483
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power7/strcpy.S
@@ -0,0 +1,274 @@
+/* Optimized strcpy/stpcpy implementation for PowerPC64/POWER7.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+/* Implements the function
+
+ char * [r3] strcpy (char *dest [r3], const char *src [r4])
+
+ or
+
+ char * [r3] strcpy (char *dest [r3], const char *src [r4])
+
+ if USE_AS_STPCPY is defined. It tries to use aligned memory accesses
+ when possible using the following algorithm:
+
+ if (((((uintptr_t)dst & 0x7UL) == 0) && ((uintptr_t)src & 0x7UL) == 0))
+ goto aligned_doubleword_copy;
+ if (((((uintptr_t)dst & 0x3UL) == 0) && ((uintptr_t)src & 0x3UL) == 0))
+ goto aligned_word_copy;
+ if (((uintptr_t)dst & 0x7UL) == ((uintptr_t)src & 0x7UL))
+ goto same_alignment;
+ goto unaligned;
+
+ The aligned comparison are made using cmpb instructions. */
+
+#ifdef USE_AS_STPCPY
+# define FUNC_NAME __stpcpy
+#else
+# define FUNC_NAME strcpy
+#endif
+
+ .machine power7
+EALIGN (FUNC_NAME, 4, 0)
+ CALL_MCOUNT 2
+
+#define rTMP r0
+#ifdef USE_AS_STPCPY
+#define rRTN r3 /* pointer to previous word/doubleword in dest */
+#else
+#define rRTN r12 /* pointer to previous word/doubleword in dest */
+#endif
+#define rSRC r4 /* pointer to previous word/doubleword in src */
+#define rMASK r5 /* mask 0xffffffff | 0xffffffffffffffff */
+#define rWORD r6 /* current word from src */
+#define rALT r7 /* alternate word from src */
+#define rRTNAL r8 /* alignment of return pointer */
+#define rSRCAL r9 /* alignment of source pointer */
+#define rALCNT r10 /* bytes to read to reach 8 bytes alignment */
+#define rSUBAL r11 /* doubleword minus unaligned displacement */
+
+#ifndef USE_AS_STPCPY
+/* Save the dst pointer to use as return value. */
+ mr rRTN, r3
+#endif
+ or rTMP, rSRC, rRTN
+ clrldi. rTMP, rTMP, 61
+ bne L(check_word_alignment)
+ b L(aligned_doubleword_copy)
+
+L(same_alignment):
+/* Src and dst with same alignment: align both to doubleword. */
+ mr rALCNT, rRTN
+ lbz rWORD, 0(rSRC)
+ subfic rSUBAL, rRTNAL, 8
+ addi rRTN, rRTN, 1
+ addi rSRC, rSRC, 1
+ cmpdi cr7, rWORD, 0
+ stb rWORD, 0(rALCNT)
+ beq cr7, L(s2)
+
+ add rALCNT, rALCNT, rSUBAL
+ subf rALCNT, rRTN, rALCNT
+ addi rALCNT, rALCNT, 1
+ mtctr rALCNT
+ b L(s1)
+
+ .align 4
+L(s0):
+ addi rSRC, rSRC, 1
+ lbz rWORD, -1(rSRC)
+ cmpdi cr7, rWORD, 0
+ stb rWORD, -1(rALCNT)
+ beqlr cr7
+ mr rRTN, rALCNT
+L(s1):
+ addi rALCNT, rRTN,1
+ bdnz L(s0)
+ b L(aligned_doubleword_copy)
+ .align 4
+L(s2):
+ mr rRTN, rALCNT
+ blr
+
+/* For doubleword aligned memory, operate using doubleword load and stores. */
+ .align 4
+L(aligned_doubleword_copy):
+ li rMASK, 0
+ addi rRTN, rRTN, -8
+ ld rWORD, 0(rSRC)
+ b L(g2)
+
+ .align 4
+L(g0): ldu rALT, 8(rSRC)
+ stdu rWORD, 8(rRTN)
+ cmpb rTMP, rALT, rMASK
+ cmpdi rTMP, 0
+ bne L(g1)
+ ldu rWORD, 8(rSRC)
+ stdu rALT, 8(rRTN)
+L(g2): cmpb rTMP, rWORD, rMASK
+ cmpdi rTMP, 0 /* If rTMP is 0, no null's have been found. */
+ beq L(g0)
+
+ mr rALT, rWORD
+/* We've hit the end of the string. Do the rest byte-by-byte. */
+L(g1):
+#ifdef __LITTLE_ENDIAN__
+ extrdi. rTMP, rALT, 8, 56
+ stbu rALT, 8(rRTN)
+ beqlr-
+ extrdi. rTMP, rALT, 8, 48
+ stbu rTMP, 1(rRTN)
+ beqlr-
+ extrdi. rTMP, rALT, 8, 40
+ stbu rTMP, 1(rRTN)
+ beqlr-
+ extrdi. rTMP, rALT, 8, 32
+ stbu rTMP, 1(rRTN)
+ beqlr-
+ extrdi. rTMP, rALT, 8, 24
+ stbu rTMP, 1(rRTN)
+ beqlr-
+ extrdi. rTMP, rALT, 8, 16
+ stbu rTMP, 1(rRTN)
+ beqlr-
+ extrdi. rTMP, rALT, 8, 8
+ stbu rTMP, 1(rRTN)
+ beqlr-
+ extrdi rTMP, rALT, 8, 0
+ stbu rTMP, 1(rRTN)
+#else
+ extrdi. rTMP, rALT, 8, 0
+ stbu rTMP, 8(rRTN)
+ beqlr
+ extrdi. rTMP, rALT, 8, 8
+ stbu rTMP, 1(rRTN)
+ beqlr
+ extrdi. rTMP, rALT, 8, 16
+ stbu rTMP, 1(rRTN)
+ beqlr
+ extrdi. rTMP, rALT, 8, 24
+ stbu rTMP, 1(rRTN)
+ beqlr
+ extrdi. rTMP, rALT, 8, 32
+ stbu rTMP, 1(rRTN)
+ beqlr
+ extrdi. rTMP, rALT, 8, 40
+ stbu rTMP, 1(rRTN)
+ beqlr
+ extrdi. rTMP, rALT, 8, 48
+ stbu rTMP, 1(rRTN)
+ beqlr
+ stbu rALT, 1(rRTN)
+#endif
+ blr
+
+L(check_word_alignment):
+ clrldi. rTMP, rTMP, 62
+ beq L(aligned_word_copy)
+ rldicl rRTNAL, rRTN, 0, 61
+ rldicl rSRCAL, rSRC, 0, 61
+ cmpld cr7, rSRCAL, rRTNAL
+ beq cr7, L(same_alignment)
+ b L(unaligned)
+
+/* For word aligned memory, operate using word load and stores. */
+ .align 4
+L(aligned_word_copy):
+ li rMASK, 0
+ addi rRTN, rRTN, -4
+ lwz rWORD, 0(rSRC)
+ b L(g5)
+
+ .align 4
+L(g3): lwzu rALT, 4(rSRC)
+ stwu rWORD, 4(rRTN)
+ cmpb rTMP, rALT, rMASK
+ cmpwi rTMP, 0
+ bne L(g4)
+ lwzu rWORD, 4(rSRC)
+ stwu rALT, 4(rRTN)
+L(g5): cmpb rTMP, rWORD, rMASK
+ cmpwi rTMP, 0 /* If rTMP is 0, no null in word. */
+ beq L(g3)
+
+ mr rALT, rWORD
+/* We've hit the end of the string. Do the rest byte-by-byte. */
+L(g4):
+#ifdef __LITTLE_ENDIAN__
+ rlwinm. rTMP, rALT, 0, 24, 31
+ stbu rALT, 4(rRTN)
+ beqlr-
+ rlwinm. rTMP, rALT, 24, 24, 31
+ stbu rTMP, 1(rRTN)
+ beqlr-
+ rlwinm. rTMP, rALT, 16, 24, 31
+ stbu rTMP, 1(rRTN)
+ beqlr-
+ rlwinm rTMP, rALT, 8, 24, 31
+ stbu rTMP, 1(rRTN)
+#else
+ rlwinm. rTMP, rALT, 8, 24, 31
+ stbu rTMP, 4(rRTN)
+ beqlr
+ rlwinm. rTMP, rALT, 16, 24, 31
+ stbu rTMP, 1(rRTN)
+ beqlr
+ rlwinm. rTMP, rALT, 24, 24, 31
+ stbu rTMP, 1(rRTN)
+ beqlr
+ stbu rALT, 1(rRTN)
+#endif
+ blr
+
+/* Oh well. In this case, we just do a byte-by-byte copy. */
+ .align 4
+L(unaligned):
+ lbz rWORD, 0(rSRC)
+ addi rRTN, rRTN, -1
+ cmpdi rWORD, 0
+ beq L(u2)
+
+ .align 5
+L(u0): lbzu rALT, 1(rSRC)
+ stbu rWORD, 1(rRTN)
+ cmpdi rALT, 0
+ beq L(u1)
+ lbzu rWORD, 1(rSRC)
+ stbu rALT, 1(rRTN)
+ cmpdi rWORD, 0
+ beq L(u2)
+ lbzu rALT, 1(rSRC)
+ stbu rWORD, 1(rRTN)
+ cmpdi rALT, 0
+ beq L(u1)
+ lbzu rWORD, 1(rSRC)
+ stbu rALT, 1(rRTN)
+ cmpdi rWORD, 0
+ bne L(u0)
+L(u2): stbu rWORD, 1(rRTN)
+ blr
+L(u1): stbu rALT, 1(rRTN)
+ blr
+END (FUNC_NAME)
+
+#ifndef USE_AS_STPCPY
+libc_hidden_builtin_def (strcpy)
+#endif