diff options
Diffstat (limited to 'sysdeps/s390/memcpy-z900.S')
-rw-r--r-- | sysdeps/s390/memcpy-z900.S | 366 |
1 files changed, 366 insertions, 0 deletions
diff --git a/sysdeps/s390/memcpy-z900.S b/sysdeps/s390/memcpy-z900.S new file mode 100644 index 0000000000..b5e8c2e1ab --- /dev/null +++ b/sysdeps/s390/memcpy-z900.S @@ -0,0 +1,366 @@ +/* memcpy - copy a block from source to destination. 31/64 bit S/390 version. + Copyright (C) 2012-2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + + +#include <sysdep.h> +#include "asm-syntax.h" +#include <ifunc-memcpy.h> + +/* INPUT PARAMETERS + %r2 = address of destination memory area + %r3 = address of source memory area + %r4 = number of bytes to copy. */ + + .text + +#if defined __s390x__ +# define LTGR ltgr +# define CGHI cghi +# define LGR lgr +# define AGHI aghi +# define BRCTG brctg +#else +# define LTGR ltr +# define CGHI chi +# define LGR lr +# define AGHI ahi +# define BRCTG brct +#endif /* ! defined __s390x__ */ + +#if HAVE_MEMCPY_Z900_G5 +ENTRY(MEMPCPY_Z900_G5) +# if defined __s390x__ + .machine "z900" +# else + .machine "g5" +# endif /* ! defined __s390x__ */ + LGR %r1,%r2 # Use as dest + la %r2,0(%r4,%r2) # Return dest + n + j .L_Z900_G5_start +END(MEMPCPY_Z900_G5) + +ENTRY(MEMCPY_Z900_G5) +# if defined __s390x__ + .machine "z900" +# else + .machine "g5" +# endif /* ! defined __s390x__ */ + LGR %r1,%r2 # r1: Use as dest ; r2: Return dest +.L_Z900_G5_start: + LTGR %r4,%r4 + je .L_Z900_G5_4 + AGHI %r4,-1 +# if defined __s390x__ + srlg %r5,%r4,8 +# else + lr %r5,%r4 + srl %r5,8 +# endif /* ! defined __s390x__ */ + LTGR %r5,%r5 + jne .L_Z900_G5_13 +.L_Z900_G5_3: +# if defined __s390x__ + larl %r5,.L_Z900_G5_15 +# define Z900_G5_EX_D 0 +# else + basr %r5,0 +.L_Z900_G5_14: +# define Z900_G5_EX_D .L_Z900_G5_15-.L_Z900_G5_14 +# endif /* ! defined __s390x__ */ + ex %r4,Z900_G5_EX_D(%r5) +.L_Z900_G5_4: + br %r14 +.L_Z900_G5_13: + CGHI %r5,4096 # Switch to mvcle for copies >1MB + jh __memcpy_mvcle +.L_Z900_G5_12: + mvc 0(256,%r1),0(%r3) + la %r1,256(%r1) + la %r3,256(%r3) + BRCTG %r5,.L_Z900_G5_12 + j .L_Z900_G5_3 +.L_Z900_G5_15: + mvc 0(1,%r1),0(%r3) +END(MEMCPY_Z900_G5) +#endif /* HAVE_MEMCPY_Z900_G5 */ + +ENTRY(__memcpy_mvcle) + # Using as standalone function will result in unexpected + # results since the length field is incremented by 1 in order to + # compensate the changes already done in the functions above. + LGR %r0,%r2 # backup return dest [ + n ] + AGHI %r4,1 # length + 1 + LGR %r5,%r4 # source length + LGR %r4,%r3 # source address + LGR %r2,%r1 # destination address + LGR %r3,%r5 # destination length = source length +.L_MVCLE_1: + mvcle %r2,%r4,0 # thats it, MVCLE is your friend + jo .L_MVCLE_1 + LGR %r2,%r0 # return destination address + br %r14 +END(__memcpy_mvcle) + +#undef LTGR +#undef CGHI +#undef LGR +#undef AGHI +#undef BRCTG + +#if HAVE_MEMCPY_Z10 +ENTRY(MEMPCPY_Z10) + .machine "z10" + .machinemode "zarch_nohighgprs" + lgr %r1,%r2 # Use as dest + la %r2,0(%r4,%r2) # Return dest + n + j .L_Z10_start +END(MEMPCPY_Z10) + +ENTRY(MEMCPY_Z10) + .machine "z10" + .machinemode "zarch_nohighgprs" + lgr %r1,%r2 # r1: Use as dest ; r2: Return dest +.L_Z10_start: +# if !defined __s390x__ + llgfr %r4,%r4 +# endif /* !defined __s390x__ */ + cgije %r4,0,.L_Z10_4 + aghi %r4,-1 + srlg %r5,%r4,8 + cgijlh %r5,0,.L_Z10_13 +.L_Z10_3: + exrl %r4,.L_Z10_15 +.L_Z10_4: + br %r14 +.L_Z10_13: + cgfi %r5,65535 # Switch to mvcle for copies >16MB + jh __memcpy_mvcle +.L_Z10_12: + pfd 1,768(%r3) + pfd 2,768(%r1) + mvc 0(256,%r1),0(%r3) + la %r1,256(%r1) + la %r3,256(%r3) + brctg %r5,.L_Z10_12 + j .L_Z10_3 +.L_Z10_15: + mvc 0(1,%r1),0(%r3) +END(MEMCPY_Z10) +#endif /* HAVE_MEMCPY_Z10 */ + +#if HAVE_MEMCPY_Z196 +ENTRY(MEMPCPY_Z196) + .machine "z196" + .machinemode "zarch_nohighgprs" + lgr %r1,%r2 # Use as dest + la %r2,0(%r4,%r2) # Return dest + n + j .L_Z196_start +END(MEMPCPY_Z196) + +ENTRY(MEMCPY_Z196) + .machine "z196" + .machinemode "zarch_nohighgprs" + lgr %r1,%r2 # r1: Use as dest ; r2: Return dest +.L_Z196_start: +# if !defined __s390x__ + llgfr %r4,%r4 +# endif /* !defined __s390x__ */ + ltgr %r4,%r4 + je .L_Z196_4 +.L_Z196_start2: + aghi %r4,-1 + srlg %r5,%r4,8 + ltgr %r5,%r5 + jne .L_Z196_5 +.L_Z196_3: + exrl %r4,.L_Z196_14 +.L_Z196_4: + br %r14 +.L_Z196_5: + cgfi %r5,262144 # Switch to mvcle for copies >64MB + jh __memcpy_mvcle +.L_Z196_2: + pfd 1,768(%r3) + pfd 2,768(%r1) + mvc 0(256,%r1),0(%r3) + aghi %r5,-1 + la %r1,256(%r1) + la %r3,256(%r3) + jne .L_Z196_2 + j .L_Z196_3 +.L_Z196_14: + mvc 0(1,%r1),0(%r3) +END(MEMCPY_Z196) +#endif /* HAVE_MEMCPY_Z196 */ + +#if HAVE_MEMMOVE_Z13 +ENTRY(MEMMOVE_Z13) + .machine "z13" + .machinemode "zarch_nohighgprs" +# if !defined __s390x__ + /* Note: The 31bit dst and src pointers are prefixed with zeroes. */ + llgfr %r4,%r4 + llgfr %r3,%r3 + llgfr %r2,%r2 +# endif /* !defined __s390x__ */ + sgrk %r0,%r2,%r3 + clgijh %r4,16,.L_MEMMOVE_Z13_LARGE + aghik %r5,%r4,-1 +.L_MEMMOVE_Z13_SMALL: + jl .L_MEMMOVE_Z13_END /* Jump away if len was zero. */ + /* Store up to 16 bytes with vll/vstl which needs the index + instead of lengths. */ + vll %v16,%r5,0(%r3) + vstl %v16,%r5,0(%r2) +.L_MEMMOVE_Z13_END: + br %r14 +.L_MEMMOVE_Z13_LARGE: + lgr %r1,%r2 /* For memcpy: r1: Use as dest ; + r2: Return dest */ + /* The unsigned comparison (dst - src >= len) determines if we can + execute the forward case with memcpy. */ +#if ! HAVE_MEMCPY_Z196 +# error The z13 variant of memmove needs the z196 variant of memcpy! +#endif + clgrjhe %r0,%r4,.L_Z196_start2 + risbgn %r5,%r4,4,128+63,60 /* r5 = r4 / 16 */ + aghi %r4,-16 + clgijhe %r5,8,.L_MEMMOVE_Z13_LARGE_64B +.L_MEMMOVE_Z13_LARGE_16B_LOOP: + /* Store at least 16 bytes with vl/vst. The number of 16byte blocks + is stored in r5. */ + vl %v16,0(%r4,%r3) + vst %v16,0(%r4,%r2) + aghi %r4,-16 + brctg %r5,.L_MEMMOVE_Z13_LARGE_16B_LOOP + aghik %r5,%r4,15 + j .L_MEMMOVE_Z13_SMALL +.L_MEMMOVE_Z13_LARGE_64B: + /* Store at least 128 bytes with 4x vl/vst. The number of 64byte blocks + will be stored in r0. */ + aghi %r4,-48 + srlg %r0,%r5,2 /* r5 = %r0 / 4 + => Number of 64byte blocks. */ +.L_MEMMOVE_Z13_LARGE_64B_LOOP: + vl %v20,48(%r4,%r3) + vl %v19,32(%r4,%r3) + vl %v18,16(%r4,%r3) + vl %v17,0(%r4,%r3) + vst %v20,48(%r4,%r2) + vst %v19,32(%r4,%r2) + vst %v18,16(%r4,%r2) + vst %v17,0(%r4,%r2) + aghi %r4,-64 + brctg %r0,.L_MEMMOVE_Z13_LARGE_64B_LOOP + aghi %r4,48 + /* Recalculate the number of 16byte blocks. */ + risbg %r5,%r5,62,128+63,0 /* r5 = r5 & 3 + => Remaining 16byte blocks. */ + jne .L_MEMMOVE_Z13_LARGE_16B_LOOP + aghik %r5,%r4,15 + j .L_MEMMOVE_Z13_SMALL +END(MEMMOVE_Z13) +#endif /* HAVE_MEMMOVE_Z13 */ + +#if HAVE_MEMMOVE_ARCH13 +ENTRY(MEMMOVE_ARCH13) + .machine "arch13" + .machinemode "zarch_nohighgprs" +# if ! defined __s390x__ + /* Note: The 31bit dst and src pointers are prefixed with zeroes. */ + llgfr %r4,%r4 + llgfr %r3,%r3 + llgfr %r2,%r2 +# endif /* ! defined __s390x__ */ + sgrk %r5,%r2,%r3 + aghik %r0,%r4,-1 /* Both vstl and mvcrl needs highest index. */ + clgijh %r4,16,.L_MEMMOVE_ARCH13_LARGE +.L_MEMMOVE_ARCH13_SMALL: + jl .L_MEMMOVE_ARCH13_END /* Return if len was zero (cc of aghik). */ + /* Store up to 16 bytes with vll/vstl (needs highest index). */ + vll %v16,%r0,0(%r3) + vstl %v16,%r0,0(%r2) +.L_MEMMOVE_ARCH13_END: + br %r14 +.L_MEMMOVE_ARCH13_LARGE: + lgr %r1,%r2 /* For memcpy: r1: Use as dest ; r2: Return dest */ + /* The unsigned comparison (dst - src >= len) determines if we can + execute the forward case with memcpy. */ +#if ! HAVE_MEMCPY_Z196 +# error The arch13 variant of memmove needs the z196 variant of memcpy! +#endif + /* Backward case. */ + clgrjhe %r5,%r4,.L_Z196_start2 + clgijh %r0,255,.L_MEMMOVE_ARCH13_LARGER_256B + /* Move up to 256bytes with mvcrl (move right to left). */ + mvcrl 0(%r1),0(%r3) /* Move (r0 + 1) bytes from r3 to r1. */ + br %r14 +.L_MEMMOVE_ARCH13_LARGER_256B: + /* First move the "remaining" block of up to 256 bytes at the end of + src/dst buffers. Then move blocks of 256bytes in a loop starting + with the block at the end. + (If src/dst pointers are aligned e.g. to 256 bytes, then the pointers + passed to mvcrl instructions are aligned, too) */ + risbgn %r5,%r0,8,128+63,56 /* r5 = r0 / 256 */ + risbgn %r0,%r0,56,128+63,0 /* r0 = r0 & 0xFF */ + slgr %r4,%r0 + lay %r1,-1(%r4,%r1) + lay %r3,-1(%r4,%r3) + mvcrl 0(%r1),0(%r3) /* Move (r0 + 1) bytes from r3 to r1. */ + lghi %r0,255 /* Always copy 256 bytes in the loop below! */ +.L_MEMMOVE_ARCH13_LARGE_256B_LOOP: + aghi %r1,-256 + aghi %r3,-256 + mvcrl 0(%r1),0(%r3) /* Move (r0 + 1) bytes from r3 to r1. */ + brctg %r5,.L_MEMMOVE_ARCH13_LARGE_256B_LOOP + br %r14 +END(MEMMOVE_ARCH13) +#endif /* HAVE_MEMMOVE_ARCH13 */ + +#if ! HAVE_MEMCPY_IFUNC +/* If we don't use ifunc, define an alias for mem[p]cpy here. + Otherwise see sysdeps/s390/mem[p]cpy.c. */ +strong_alias (MEMCPY_DEFAULT, memcpy) +strong_alias (MEMPCPY_DEFAULT, __mempcpy) +weak_alias (__mempcpy, mempcpy) +#endif + +#if ! HAVE_MEMMOVE_IFUNC +/* If we don't use ifunc, define an alias for memmove here. + Otherwise see sysdeps/s390/memmove.c. */ +# if ! HAVE_MEMMOVE_C +/* If the c variant is needed, then sysdeps/s390/memmove-c.c + defines memmove. + Otherwise MEMMOVE_DEFAULT is implemented here and we have to define it. */ +strong_alias (MEMMOVE_DEFAULT, memmove) +# endif +#endif + +#if defined SHARED && IS_IN (libc) +/* Defines the internal symbols. + Compare to libc_hidden_[builtin_]def (mem[p]cpy) in string/mem[p]cpy.c. */ +strong_alias (MEMCPY_DEFAULT, __GI_memcpy) +strong_alias (MEMPCPY_DEFAULT, __GI_mempcpy) +strong_alias (MEMPCPY_DEFAULT, __GI___mempcpy) +# if ! HAVE_MEMMOVE_C +/* If the c variant is needed, then sysdeps/s390/memmove-c.c + defines the internal symbol. + Otherwise MEMMOVE_DEFAULT is implemented here and we have to define it. */ +strong_alias (MEMMOVE_DEFAULT, __GI_memmove) +# endif +#endif |