summaryrefslogtreecommitdiff
path: root/sysdeps/s390/memcpy-z900.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/s390/memcpy-z900.S')
-rw-r--r--sysdeps/s390/memcpy-z900.S366
1 files changed, 366 insertions, 0 deletions
diff --git a/sysdeps/s390/memcpy-z900.S b/sysdeps/s390/memcpy-z900.S
new file mode 100644
index 0000000000..b5e8c2e1ab
--- /dev/null
+++ b/sysdeps/s390/memcpy-z900.S
@@ -0,0 +1,366 @@
+/* memcpy - copy a block from source to destination. 31/64 bit S/390 version.
+ Copyright (C) 2012-2019 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+#include <ifunc-memcpy.h>
+
+/* INPUT PARAMETERS
+ %r2 = address of destination memory area
+ %r3 = address of source memory area
+ %r4 = number of bytes to copy. */
+
+ .text
+
+#if defined __s390x__
+# define LTGR ltgr
+# define CGHI cghi
+# define LGR lgr
+# define AGHI aghi
+# define BRCTG brctg
+#else
+# define LTGR ltr
+# define CGHI chi
+# define LGR lr
+# define AGHI ahi
+# define BRCTG brct
+#endif /* ! defined __s390x__ */
+
+#if HAVE_MEMCPY_Z900_G5
+ENTRY(MEMPCPY_Z900_G5)
+# if defined __s390x__
+ .machine "z900"
+# else
+ .machine "g5"
+# endif /* ! defined __s390x__ */
+ LGR %r1,%r2 # Use as dest
+ la %r2,0(%r4,%r2) # Return dest + n
+ j .L_Z900_G5_start
+END(MEMPCPY_Z900_G5)
+
+ENTRY(MEMCPY_Z900_G5)
+# if defined __s390x__
+ .machine "z900"
+# else
+ .machine "g5"
+# endif /* ! defined __s390x__ */
+ LGR %r1,%r2 # r1: Use as dest ; r2: Return dest
+.L_Z900_G5_start:
+ LTGR %r4,%r4
+ je .L_Z900_G5_4
+ AGHI %r4,-1
+# if defined __s390x__
+ srlg %r5,%r4,8
+# else
+ lr %r5,%r4
+ srl %r5,8
+# endif /* ! defined __s390x__ */
+ LTGR %r5,%r5
+ jne .L_Z900_G5_13
+.L_Z900_G5_3:
+# if defined __s390x__
+ larl %r5,.L_Z900_G5_15
+# define Z900_G5_EX_D 0
+# else
+ basr %r5,0
+.L_Z900_G5_14:
+# define Z900_G5_EX_D .L_Z900_G5_15-.L_Z900_G5_14
+# endif /* ! defined __s390x__ */
+ ex %r4,Z900_G5_EX_D(%r5)
+.L_Z900_G5_4:
+ br %r14
+.L_Z900_G5_13:
+ CGHI %r5,4096 # Switch to mvcle for copies >1MB
+ jh __memcpy_mvcle
+.L_Z900_G5_12:
+ mvc 0(256,%r1),0(%r3)
+ la %r1,256(%r1)
+ la %r3,256(%r3)
+ BRCTG %r5,.L_Z900_G5_12
+ j .L_Z900_G5_3
+.L_Z900_G5_15:
+ mvc 0(1,%r1),0(%r3)
+END(MEMCPY_Z900_G5)
+#endif /* HAVE_MEMCPY_Z900_G5 */
+
+ENTRY(__memcpy_mvcle)
+ # Using as standalone function will result in unexpected
+ # results since the length field is incremented by 1 in order to
+ # compensate the changes already done in the functions above.
+ LGR %r0,%r2 # backup return dest [ + n ]
+ AGHI %r4,1 # length + 1
+ LGR %r5,%r4 # source length
+ LGR %r4,%r3 # source address
+ LGR %r2,%r1 # destination address
+ LGR %r3,%r5 # destination length = source length
+.L_MVCLE_1:
+ mvcle %r2,%r4,0 # thats it, MVCLE is your friend
+ jo .L_MVCLE_1
+ LGR %r2,%r0 # return destination address
+ br %r14
+END(__memcpy_mvcle)
+
+#undef LTGR
+#undef CGHI
+#undef LGR
+#undef AGHI
+#undef BRCTG
+
+#if HAVE_MEMCPY_Z10
+ENTRY(MEMPCPY_Z10)
+ .machine "z10"
+ .machinemode "zarch_nohighgprs"
+ lgr %r1,%r2 # Use as dest
+ la %r2,0(%r4,%r2) # Return dest + n
+ j .L_Z10_start
+END(MEMPCPY_Z10)
+
+ENTRY(MEMCPY_Z10)
+ .machine "z10"
+ .machinemode "zarch_nohighgprs"
+ lgr %r1,%r2 # r1: Use as dest ; r2: Return dest
+.L_Z10_start:
+# if !defined __s390x__
+ llgfr %r4,%r4
+# endif /* !defined __s390x__ */
+ cgije %r4,0,.L_Z10_4
+ aghi %r4,-1
+ srlg %r5,%r4,8
+ cgijlh %r5,0,.L_Z10_13
+.L_Z10_3:
+ exrl %r4,.L_Z10_15
+.L_Z10_4:
+ br %r14
+.L_Z10_13:
+ cgfi %r5,65535 # Switch to mvcle for copies >16MB
+ jh __memcpy_mvcle
+.L_Z10_12:
+ pfd 1,768(%r3)
+ pfd 2,768(%r1)
+ mvc 0(256,%r1),0(%r3)
+ la %r1,256(%r1)
+ la %r3,256(%r3)
+ brctg %r5,.L_Z10_12
+ j .L_Z10_3
+.L_Z10_15:
+ mvc 0(1,%r1),0(%r3)
+END(MEMCPY_Z10)
+#endif /* HAVE_MEMCPY_Z10 */
+
+#if HAVE_MEMCPY_Z196
+ENTRY(MEMPCPY_Z196)
+ .machine "z196"
+ .machinemode "zarch_nohighgprs"
+ lgr %r1,%r2 # Use as dest
+ la %r2,0(%r4,%r2) # Return dest + n
+ j .L_Z196_start
+END(MEMPCPY_Z196)
+
+ENTRY(MEMCPY_Z196)
+ .machine "z196"
+ .machinemode "zarch_nohighgprs"
+ lgr %r1,%r2 # r1: Use as dest ; r2: Return dest
+.L_Z196_start:
+# if !defined __s390x__
+ llgfr %r4,%r4
+# endif /* !defined __s390x__ */
+ ltgr %r4,%r4
+ je .L_Z196_4
+.L_Z196_start2:
+ aghi %r4,-1
+ srlg %r5,%r4,8
+ ltgr %r5,%r5
+ jne .L_Z196_5
+.L_Z196_3:
+ exrl %r4,.L_Z196_14
+.L_Z196_4:
+ br %r14
+.L_Z196_5:
+ cgfi %r5,262144 # Switch to mvcle for copies >64MB
+ jh __memcpy_mvcle
+.L_Z196_2:
+ pfd 1,768(%r3)
+ pfd 2,768(%r1)
+ mvc 0(256,%r1),0(%r3)
+ aghi %r5,-1
+ la %r1,256(%r1)
+ la %r3,256(%r3)
+ jne .L_Z196_2
+ j .L_Z196_3
+.L_Z196_14:
+ mvc 0(1,%r1),0(%r3)
+END(MEMCPY_Z196)
+#endif /* HAVE_MEMCPY_Z196 */
+
+#if HAVE_MEMMOVE_Z13
+ENTRY(MEMMOVE_Z13)
+ .machine "z13"
+ .machinemode "zarch_nohighgprs"
+# if !defined __s390x__
+ /* Note: The 31bit dst and src pointers are prefixed with zeroes. */
+ llgfr %r4,%r4
+ llgfr %r3,%r3
+ llgfr %r2,%r2
+# endif /* !defined __s390x__ */
+ sgrk %r0,%r2,%r3
+ clgijh %r4,16,.L_MEMMOVE_Z13_LARGE
+ aghik %r5,%r4,-1
+.L_MEMMOVE_Z13_SMALL:
+ jl .L_MEMMOVE_Z13_END /* Jump away if len was zero. */
+ /* Store up to 16 bytes with vll/vstl which needs the index
+ instead of lengths. */
+ vll %v16,%r5,0(%r3)
+ vstl %v16,%r5,0(%r2)
+.L_MEMMOVE_Z13_END:
+ br %r14
+.L_MEMMOVE_Z13_LARGE:
+ lgr %r1,%r2 /* For memcpy: r1: Use as dest ;
+ r2: Return dest */
+ /* The unsigned comparison (dst - src >= len) determines if we can
+ execute the forward case with memcpy. */
+#if ! HAVE_MEMCPY_Z196
+# error The z13 variant of memmove needs the z196 variant of memcpy!
+#endif
+ clgrjhe %r0,%r4,.L_Z196_start2
+ risbgn %r5,%r4,4,128+63,60 /* r5 = r4 / 16 */
+ aghi %r4,-16
+ clgijhe %r5,8,.L_MEMMOVE_Z13_LARGE_64B
+.L_MEMMOVE_Z13_LARGE_16B_LOOP:
+ /* Store at least 16 bytes with vl/vst. The number of 16byte blocks
+ is stored in r5. */
+ vl %v16,0(%r4,%r3)
+ vst %v16,0(%r4,%r2)
+ aghi %r4,-16
+ brctg %r5,.L_MEMMOVE_Z13_LARGE_16B_LOOP
+ aghik %r5,%r4,15
+ j .L_MEMMOVE_Z13_SMALL
+.L_MEMMOVE_Z13_LARGE_64B:
+ /* Store at least 128 bytes with 4x vl/vst. The number of 64byte blocks
+ will be stored in r0. */
+ aghi %r4,-48
+ srlg %r0,%r5,2 /* r5 = %r0 / 4
+ => Number of 64byte blocks. */
+.L_MEMMOVE_Z13_LARGE_64B_LOOP:
+ vl %v20,48(%r4,%r3)
+ vl %v19,32(%r4,%r3)
+ vl %v18,16(%r4,%r3)
+ vl %v17,0(%r4,%r3)
+ vst %v20,48(%r4,%r2)
+ vst %v19,32(%r4,%r2)
+ vst %v18,16(%r4,%r2)
+ vst %v17,0(%r4,%r2)
+ aghi %r4,-64
+ brctg %r0,.L_MEMMOVE_Z13_LARGE_64B_LOOP
+ aghi %r4,48
+ /* Recalculate the number of 16byte blocks. */
+ risbg %r5,%r5,62,128+63,0 /* r5 = r5 & 3
+ => Remaining 16byte blocks. */
+ jne .L_MEMMOVE_Z13_LARGE_16B_LOOP
+ aghik %r5,%r4,15
+ j .L_MEMMOVE_Z13_SMALL
+END(MEMMOVE_Z13)
+#endif /* HAVE_MEMMOVE_Z13 */
+
+#if HAVE_MEMMOVE_ARCH13
+ENTRY(MEMMOVE_ARCH13)
+ .machine "arch13"
+ .machinemode "zarch_nohighgprs"
+# if ! defined __s390x__
+ /* Note: The 31bit dst and src pointers are prefixed with zeroes. */
+ llgfr %r4,%r4
+ llgfr %r3,%r3
+ llgfr %r2,%r2
+# endif /* ! defined __s390x__ */
+ sgrk %r5,%r2,%r3
+ aghik %r0,%r4,-1 /* Both vstl and mvcrl needs highest index. */
+ clgijh %r4,16,.L_MEMMOVE_ARCH13_LARGE
+.L_MEMMOVE_ARCH13_SMALL:
+ jl .L_MEMMOVE_ARCH13_END /* Return if len was zero (cc of aghik). */
+ /* Store up to 16 bytes with vll/vstl (needs highest index). */
+ vll %v16,%r0,0(%r3)
+ vstl %v16,%r0,0(%r2)
+.L_MEMMOVE_ARCH13_END:
+ br %r14
+.L_MEMMOVE_ARCH13_LARGE:
+ lgr %r1,%r2 /* For memcpy: r1: Use as dest ; r2: Return dest */
+ /* The unsigned comparison (dst - src >= len) determines if we can
+ execute the forward case with memcpy. */
+#if ! HAVE_MEMCPY_Z196
+# error The arch13 variant of memmove needs the z196 variant of memcpy!
+#endif
+ /* Backward case. */
+ clgrjhe %r5,%r4,.L_Z196_start2
+ clgijh %r0,255,.L_MEMMOVE_ARCH13_LARGER_256B
+ /* Move up to 256bytes with mvcrl (move right to left). */
+ mvcrl 0(%r1),0(%r3) /* Move (r0 + 1) bytes from r3 to r1. */
+ br %r14
+.L_MEMMOVE_ARCH13_LARGER_256B:
+ /* First move the "remaining" block of up to 256 bytes at the end of
+ src/dst buffers. Then move blocks of 256bytes in a loop starting
+ with the block at the end.
+ (If src/dst pointers are aligned e.g. to 256 bytes, then the pointers
+ passed to mvcrl instructions are aligned, too) */
+ risbgn %r5,%r0,8,128+63,56 /* r5 = r0 / 256 */
+ risbgn %r0,%r0,56,128+63,0 /* r0 = r0 & 0xFF */
+ slgr %r4,%r0
+ lay %r1,-1(%r4,%r1)
+ lay %r3,-1(%r4,%r3)
+ mvcrl 0(%r1),0(%r3) /* Move (r0 + 1) bytes from r3 to r1. */
+ lghi %r0,255 /* Always copy 256 bytes in the loop below! */
+.L_MEMMOVE_ARCH13_LARGE_256B_LOOP:
+ aghi %r1,-256
+ aghi %r3,-256
+ mvcrl 0(%r1),0(%r3) /* Move (r0 + 1) bytes from r3 to r1. */
+ brctg %r5,.L_MEMMOVE_ARCH13_LARGE_256B_LOOP
+ br %r14
+END(MEMMOVE_ARCH13)
+#endif /* HAVE_MEMMOVE_ARCH13 */
+
+#if ! HAVE_MEMCPY_IFUNC
+/* If we don't use ifunc, define an alias for mem[p]cpy here.
+ Otherwise see sysdeps/s390/mem[p]cpy.c. */
+strong_alias (MEMCPY_DEFAULT, memcpy)
+strong_alias (MEMPCPY_DEFAULT, __mempcpy)
+weak_alias (__mempcpy, mempcpy)
+#endif
+
+#if ! HAVE_MEMMOVE_IFUNC
+/* If we don't use ifunc, define an alias for memmove here.
+ Otherwise see sysdeps/s390/memmove.c. */
+# if ! HAVE_MEMMOVE_C
+/* If the c variant is needed, then sysdeps/s390/memmove-c.c
+ defines memmove.
+ Otherwise MEMMOVE_DEFAULT is implemented here and we have to define it. */
+strong_alias (MEMMOVE_DEFAULT, memmove)
+# endif
+#endif
+
+#if defined SHARED && IS_IN (libc)
+/* Defines the internal symbols.
+ Compare to libc_hidden_[builtin_]def (mem[p]cpy) in string/mem[p]cpy.c. */
+strong_alias (MEMCPY_DEFAULT, __GI_memcpy)
+strong_alias (MEMPCPY_DEFAULT, __GI_mempcpy)
+strong_alias (MEMPCPY_DEFAULT, __GI___mempcpy)
+# if ! HAVE_MEMMOVE_C
+/* If the c variant is needed, then sysdeps/s390/memmove-c.c
+ defines the internal symbol.
+ Otherwise MEMMOVE_DEFAULT is implemented here and we have to define it. */
+strong_alias (MEMMOVE_DEFAULT, __GI_memmove)
+# endif
+#endif