diff options
Diffstat (limited to 'sysdeps/s390/multiarch')
116 files changed, 8994 insertions, 0 deletions
diff --git a/sysdeps/s390/multiarch/Makefile b/sysdeps/s390/multiarch/Makefile new file mode 100644 index 0000000000..0805b07984 --- /dev/null +++ b/sysdeps/s390/multiarch/Makefile @@ -0,0 +1,44 @@ +ifeq ($(subdir),string) +sysdep_routines += strlen strlen-vx strlen-c \ + strnlen strnlen-vx strnlen-c \ + strcpy strcpy-vx \ + stpcpy stpcpy-vx stpcpy-c \ + strncpy strncpy-vx \ + stpncpy stpncpy-vx stpncpy-c \ + strcat strcat-vx strcat-c \ + strncat strncat-vx strncat-c \ + strcmp strcmp-vx \ + strncmp strncmp-vx strncmp-c \ + strchr strchr-vx strchr-c \ + strchrnul strchrnul-vx strchrnul-c \ + strrchr strrchr-vx strrchr-c \ + strspn strspn-vx strspn-c \ + strpbrk strpbrk-vx strpbrk-c \ + strcspn strcspn-vx strcspn-c \ + memchr memchr-vx \ + rawmemchr rawmemchr-vx rawmemchr-c \ + memccpy memccpy-vx memccpy-c \ + memrchr memrchr-vx memrchr-c +endif + +ifeq ($(subdir),wcsmbs) +sysdep_routines += wcslen wcslen-vx wcslen-c \ + wcsnlen wcsnlen-vx wcsnlen-c \ + wcscpy wcscpy-vx wcscpy-c \ + wcpcpy wcpcpy-vx wcpcpy-c \ + wcsncpy wcsncpy-vx wcsncpy-c \ + wcpncpy wcpncpy-vx wcpncpy-c \ + wcscat wcscat-vx wcscat-c \ + wcsncat wcsncat-vx wcsncat-c \ + wcscmp wcscmp-vx wcscmp-c \ + wcsncmp wcsncmp-vx wcsncmp-c \ + wcschr wcschr-vx wcschr-c \ + wcschrnul wcschrnul-vx wcschrnul-c \ + wcsrchr wcsrchr-vx wcsrchr-c \ + wcsspn wcsspn-vx wcsspn-c \ + wcspbrk wcspbrk-vx wcspbrk-c \ + wcscspn wcscspn-vx wcscspn-c \ + wmemchr wmemchr-vx wmemchr-c \ + wmemset wmemset-vx wmemset-c \ + wmemcmp wmemcmp-vx wmemcmp-c +endif diff --git a/sysdeps/s390/multiarch/ifunc-impl-list.c b/sysdeps/s390/multiarch/ifunc-impl-list.c new file mode 100644 index 0000000000..62a435983c --- /dev/null +++ b/sysdeps/s390/multiarch/ifunc-impl-list.c @@ -0,0 +1,145 @@ +/* Enumerate available IFUNC implementations of a function. s390/s390x version. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <assert.h> +#include <string.h> +#include <wchar.h> +#include <ifunc-impl-list.h> +#include <ifunc-resolve.h> + +/* Maximum number of IFUNC implementations. */ +#define MAX_IFUNC 3 + +/* Fill ARRAY of MAX elements with IFUNC implementations for function + NAME supported on target machine and return the number of valid + entries. */ +size_t +__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + size_t max) +{ + assert (max >= MAX_IFUNC); + + size_t i = 0; + + /* Get hardware information. */ + unsigned long int dl_hwcap = GLRO (dl_hwcap); + unsigned long long stfle_bits = 0ULL; + if ((dl_hwcap & HWCAP_S390_STFLE) + && (dl_hwcap & HWCAP_S390_ZARCH) + && (dl_hwcap & HWCAP_S390_HIGH_GPRS)) + { + S390_STORE_STFLE (stfle_bits); + } + + IFUNC_IMPL (i, name, memset, + IFUNC_IMPL_ADD (array, i, memset, + S390_IS_Z196 (stfle_bits), __memset_z196) + IFUNC_IMPL_ADD (array, i, memset, + S390_IS_Z10 (stfle_bits), __memset_z10) + IFUNC_IMPL_ADD (array, i, memset, 1, __memset_default)) + + IFUNC_IMPL (i, name, memcmp, + IFUNC_IMPL_ADD (array, i, memcmp, + S390_IS_Z196 (stfle_bits), __memcmp_z196) + IFUNC_IMPL_ADD (array, i, memcmp, + S390_IS_Z10 (stfle_bits), __memcmp_z10) + IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_default)) + +#ifdef SHARED + + IFUNC_IMPL (i, name, memcpy, + IFUNC_IMPL_ADD (array, i, memcpy, + S390_IS_Z196 (stfle_bits), __memcpy_z196) + IFUNC_IMPL_ADD (array, i, memcpy, + S390_IS_Z10 (stfle_bits), __memcpy_z10) + IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_default)) + +#endif /* SHARED */ + +#ifdef HAVE_S390_VX_ASM_SUPPORT + +# define IFUNC_VX_IMPL(FUNC) \ + IFUNC_IMPL (i, name, FUNC, \ + IFUNC_IMPL_ADD (array, i, FUNC, dl_hwcap & HWCAP_S390_VX, \ + __##FUNC##_vx) \ + IFUNC_IMPL_ADD (array, i, FUNC, 1, __##FUNC##_c)) + + IFUNC_VX_IMPL (strlen); + IFUNC_VX_IMPL (wcslen); + + IFUNC_VX_IMPL (strnlen); + IFUNC_VX_IMPL (wcsnlen); + + IFUNC_VX_IMPL (strcpy); + IFUNC_VX_IMPL (wcscpy); + + IFUNC_VX_IMPL (stpcpy); + IFUNC_VX_IMPL (wcpcpy); + + IFUNC_VX_IMPL (strncpy); + IFUNC_VX_IMPL (wcsncpy); + + IFUNC_VX_IMPL (stpncpy); + IFUNC_VX_IMPL (wcpncpy); + + IFUNC_VX_IMPL (strcat); + IFUNC_VX_IMPL (wcscat); + + IFUNC_VX_IMPL (strncat); + IFUNC_VX_IMPL (wcsncat); + + IFUNC_VX_IMPL (strcmp); + IFUNC_VX_IMPL (wcscmp); + + IFUNC_VX_IMPL (strncmp); + IFUNC_VX_IMPL (wcsncmp); + + IFUNC_VX_IMPL (strchr); + IFUNC_VX_IMPL (wcschr); + + IFUNC_VX_IMPL (strchrnul); + IFUNC_VX_IMPL (wcschrnul); + + IFUNC_VX_IMPL (strrchr); + IFUNC_VX_IMPL (wcsrchr); + + IFUNC_VX_IMPL (strspn); + IFUNC_VX_IMPL (wcsspn); + + IFUNC_VX_IMPL (strpbrk); + IFUNC_VX_IMPL (wcspbrk); + + IFUNC_VX_IMPL (strcspn); + IFUNC_VX_IMPL (wcscspn); + + IFUNC_VX_IMPL (memchr); + IFUNC_VX_IMPL (wmemchr); + IFUNC_VX_IMPL (rawmemchr); + + IFUNC_VX_IMPL (memccpy); + + IFUNC_VX_IMPL (wmemset); + + IFUNC_VX_IMPL (wmemcmp); + + IFUNC_VX_IMPL (memrchr); + +#endif /* HAVE_S390_VX_ASM_SUPPORT */ + + return i; +} diff --git a/sysdeps/s390/multiarch/ifunc-resolve.h b/sysdeps/s390/multiarch/ifunc-resolve.h new file mode 100644 index 0000000000..744a0d8d6d --- /dev/null +++ b/sysdeps/s390/multiarch/ifunc-resolve.h @@ -0,0 +1,94 @@ +/* IFUNC resolver function for CPU specific functions. + 32/64 bit S/390 version. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <unistd.h> +#include <dl-procinfo.h> + +#define S390_STFLE_BITS_Z10 34 /* General instructions extension */ +#define S390_STFLE_BITS_Z196 45 /* Distinct operands, pop ... */ + +#define S390_IS_Z196(STFLE_BITS) \ + ((STFLE_BITS & (1ULL << (63 - S390_STFLE_BITS_Z196))) != 0) + +#define S390_IS_Z10(STFLE_BITS) \ + ((STFLE_BITS & (1ULL << (63 - S390_STFLE_BITS_Z10))) != 0) + +#define S390_STORE_STFLE(STFLE_BITS) \ + /* We want just 1 double word to be returned. */ \ + register unsigned long reg0 __asm__("0") = 0; \ + \ + __asm__ __volatile__(".machine push" "\n\t" \ + ".machine \"z9-109\"" "\n\t" \ + ".machinemode \"zarch_nohighgprs\"\n\t" \ + "stfle %0" "\n\t" \ + ".machine pop" "\n" \ + : "=QS" (STFLE_BITS), "+d" (reg0) \ + : : "cc"); + +#define s390_libc_ifunc(FUNC) \ + __asm__ (".globl " #FUNC "\n\t" \ + ".type " #FUNC ",@gnu_indirect_function\n\t" \ + ".set " #FUNC ",__resolve_" #FUNC "\n\t" \ + ".globl __GI_" #FUNC "\n\t" \ + ".set __GI_" #FUNC "," #FUNC "\n"); \ + \ + /* Make the declarations of the optimized functions hidden in order + to prevent GOT slots being generated for them. */ \ + extern void *__##FUNC##_z196 attribute_hidden; \ + extern void *__##FUNC##_z10 attribute_hidden; \ + extern void *__##FUNC##_default attribute_hidden; \ + \ + void *__resolve_##FUNC (unsigned long int dl_hwcap) \ + { \ + if ((dl_hwcap & HWCAP_S390_STFLE) \ + && (dl_hwcap & HWCAP_S390_ZARCH) \ + && (dl_hwcap & HWCAP_S390_HIGH_GPRS)) \ + { \ + unsigned long long stfle_bits; \ + S390_STORE_STFLE (stfle_bits); \ + \ + if (S390_IS_Z196 (stfle_bits)) \ + return &__##FUNC##_z196; \ + else if (S390_IS_Z10 (stfle_bits)) \ + return &__##FUNC##_z10; \ + else \ + return &__##FUNC##_default; \ + } \ + else \ + return &__##FUNC##_default; \ + } + +#define s390_vx_libc_ifunc(FUNC) \ + s390_vx_libc_ifunc2(FUNC, FUNC) + +#define s390_vx_libc_ifunc2(RESOLVERFUNC, FUNC) \ + /* Make the declarations of the optimized functions hidden in order + to prevent GOT slots being generated for them. */ \ + extern __typeof (FUNC) RESOLVERFUNC##_vx attribute_hidden; \ + extern __typeof (FUNC) RESOLVERFUNC##_c attribute_hidden; \ + extern void *__resolve_##RESOLVERFUNC (unsigned long int) __asm__ (#FUNC); \ + \ + void *__resolve_##RESOLVERFUNC (unsigned long int dl_hwcap) \ + { \ + if (dl_hwcap & HWCAP_S390_VX) \ + return &RESOLVERFUNC##_vx; \ + else \ + return &RESOLVERFUNC##_c; \ + } \ + __asm__ (".type " #FUNC ", %gnu_indirect_function"); diff --git a/sysdeps/s390/multiarch/memccpy-c.c b/sysdeps/s390/multiarch/memccpy-c.c new file mode 100644 index 0000000000..9309bd108b --- /dev/null +++ b/sysdeps/s390/multiarch/memccpy-c.c @@ -0,0 +1,25 @@ +/* Default memccpy implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define MEMCCPY __memccpy_c + +# include <string.h> +extern __typeof (__memccpy) __memccpy_c; +# include <string/memccpy.c> +#endif diff --git a/sysdeps/s390/multiarch/memccpy-vx.S b/sysdeps/s390/multiarch/memccpy-vx.S new file mode 100644 index 0000000000..2db9b2cef4 --- /dev/null +++ b/sysdeps/s390/multiarch/memccpy-vx.S @@ -0,0 +1,156 @@ +/* Vector optimized 32/64 bit S/390 version of memccpy. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* void *memccpy (void * dest, const void *src, int c, size_t n) + Copies no more than n bytes from src to dest, + stopping when the character c is found + and returns pointer next to c in dest or null if c not found. + + Register usage: + -r0=tmp + -r1=tmp + -r2=dest + -r3=src + -r4=c + -r5=n + -r6=current_len + -v16=part of s + -v17=index of found c + -v18=c replicated + -v19=part #2 of s + -v31=save area for r6 +*/ +ENTRY(__memccpy_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r5,%r5 +# endif /* !defined __s390x__ */ + + vlvgp %v31,%r6,%r7 /* Save registers. */ + clgije %r5,0,.Lnf_end /* If len == 0 then exit. */ + + vlbb %v16,0(%r3),6 /* Load s until next 4k-byte boundary. */ + lcbb %r0,0(%r3),6 /* Get bytes to 4k-byte boundary or 16. */ + llgfr %r0,%r0 /* Convert 32bit to 64bit. */ + + vlvgb %v18,%r4,0 /* Generate vector which elements are all c. + if c > 255, c will be truncated. */ + vrepb %v18,%v18,0 + lghi %r6,0 /* current_len = 0. */ + + clgrjle %r5,%r0,.Lremaining_v16 /* If maxlen <= loaded-bytes + -> Process remaining. */ + + vfeebs %v17,%v16,%v18 /* Find c. */ + vlgvb %r1,%v17,7 /* Load byte index of c. */ + clgrjl %r1,%r0,.Lfound_v16 /* Found c is within loaded bytes. */ + + /* Align s to 16 byte. */ + risbgn %r1,%r3,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r6,15 /* current_len = 15. */ + slr %r6,%r1 /* Compute highest index to 16byte boundary. */ + + vstl %v16,%r6,0(%r2) /* Store prcessed bytes */ + ahi %r6,1 + +.Lpreloop1: + /* Now we are 16byte aligned, so we can load + a full vreg without page fault. */ + vl %v16,0(%r6,%r3) /* Load s. */ + clgijl %r5,17,.Lremaining_v16 /* If n <= 16, + process remaining bytes. */ + lgr %r7,%r5 + slgfi %r7,16 /* border_len = n - 16. */ + j .Lloop1 + +.Lloop2: + vl %v16,16(%r6,%r3) + vst %v19,0(%r6,%r2) + aghi %r6,16 + +.Lloop1: + clgrjhe %r6,%r7,.Lremaining_v16 /* If current_len >= border + then process remaining bytes. */ + vfeebs %v17,%v16,%v18 /* Find c. */ + jl .Lfound_v16 /* Jump away if c was found. */ + vl %v19,16(%r6,%r3) /* Load next s part. */ + vst %v16,0(%r6,%r2) /* Store previous part without c. */ + aghi %r6,16 + + clgrjhe %r6,%r7,.Lremaining_v19 + vfeebs %v17,%v19,%v18 + jl .Lfound_v19 + vl %v16,16(%r6,%r3) + vst %v19,0(%r6,%r2) + aghi %r6,16 + + clgrjhe %r6,%r7,.Lremaining_v16 + vfeebs %v17,%v16,%v18 + jl .Lfound_v16 + vl %v19,16(%r6,%r3) + vst %v16,0(%r6,%r2) + aghi %r6,16 + + clgrjhe %r6,%r7,.Lremaining_v19 + vfeebs %v17,%v19,%v18 + jo .Lloop2 + +.Lfound_v19: + vlr %v16,%v19 +.Lfound_v16: + /* v16 contains c. Store remaining bytes to c. currlen hasn´t + reached border, thus checking for maxlen is not needed! */ + vlgvb %r1,%v17,7 /* Load byte index of c. */ + la %r2,0(%r6,%r2) /* vstl has no support for index-register. */ +.Lfound_v16_store: + vstl %v16,%r1,0(%r2) /* Copy bytes including c. */ + la %r2,1(%r1,%r2) /* Return pointer next to c in dest. */ + vlgvg %r6,%v31,0 + vlgvg %r7,%v31,1 + br %r14 + +.Lremaining_v19: + vlr %v16,%v19 +.Lremaining_v16: + /* v16 contains the remaining bytes [1...16]. + Check and store remaining bytes. */ + vfeebs %v17,%v16,%v18 + slgrk %r7,%r5,%r6 /* Remaining bytes = maxlen - current_len. */ + aghi %r7,-1 /* vstl needs highest index. */ + la %r2,0(%r6,%r2) /* vstl has no index register. */ + vlgvb %r1,%v17,7 /* Load index of c or 16 if not found. */ + /* c in remaining bytes? -> Jump away (c-index <= max-index) */ + clrjle %r1,%r7,.Lfound_v16_store + vstl %v16,%r7,0(%r2) /* Store remaining bytes. */ + +.Lnf_end: + vlgvg %r6,%v31,0 + vlgvg %r7,%v31,1 + lghi %r2,0 /* Return null. */ + br %r14 +END(__memccpy_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/memccpy.c b/sysdeps/s390/multiarch/memccpy.c new file mode 100644 index 0000000000..0a0936e340 --- /dev/null +++ b/sysdeps/s390/multiarch/memccpy.c @@ -0,0 +1,28 @@ +/* Multiple versions of memccpy. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <string.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc (__memccpy) +weak_alias (__memccpy, memccpy) + +#else +# include <string/memccpy.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/memchr-vx.S b/sysdeps/s390/multiarch/memchr-vx.S new file mode 100644 index 0000000000..875eee2b43 --- /dev/null +++ b/sysdeps/s390/multiarch/memchr-vx.S @@ -0,0 +1,159 @@ +/* Vector optimized 32/64 bit S/390 version of memchr. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* void *memchr (const void *s, int c, size_t n) + Scans memory for character c + and returns pointer to first c. + + Register usage: + -r0=tmp + -r1=tmp + -r2=s + -r3=c + -r4=n + -r5=current_len + -v16=part of s + -v17=index of found c + -v18=c replicated +*/ +ENTRY(__memchr_vx) + + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r4,%r4 +# endif /* !defined __s390x__ */ + + clgije %r4,0,.Lnf_end /* If len == 0 then exit. */ + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r0,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + llgfr %r0,%r0 /* Convert 32bit to 64bit. */ + + vlvgb %v18,%r3,0 /* Generate vector which elements are all c. + if c > 255, c will be truncated. */ + vrepb %v18,%v18,0 + lghi %r5,16 /* current_len = 16. */ + + clgrjhe %r0,%r4,.Llastcmp /* If (bytes to boundary) >= n, + jump to lastcmp. */ + + vfeebs %v17,%v16,%v18 /* Find c. */ + vlgvb %r1,%v17,7 /* Load byte index of c. */ + clgrjl %r1,%r0,.Lfound2 /* Found c is within loaded bytes. */ + + /* Align s to 16 byte. */ + risbgn %r1,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + slr %r5,%r1 /* Compute bytes to 16bytes boundary. */ + + lgr %r0,%r5 /* If %r5 + 64 < n? -> loop64. */ + aghi %r0,64 + clgrjl %r0,%r4,.Lloop64 +.Llt64: + vl %v16,0(%r5,%r2) + aghi %r5,16 + clgrjhe %r5,%r4,.Llastcmp /* Do last compare if curr-len >= n. */ + vfeebs %v17,%v16,%v18 /* Find c. */ + jl .Lfound /* Jump away if c was found. */ + + vl %v16,0(%r5,%r2) + aghi %r5,16 + clgrjhe %r5,%r4,.Llastcmp + vfeebs %v17,%v16,%v18 + jl .Lfound + + vl %v16,0(%r5,%r2) + aghi %r5,16 + clgrjhe %r5,%r4,.Llastcmp + vfeebs %v17,%v16,%v18 + jl .Lfound + + vl %v16,0(%r5,%r2) + aghi %r5,16 + +.Llastcmp: + /* Use comparision result only if located within first n characters. + %r5: current_len; + %r4: n; + (current_len - n): [0...16[ + first ignored match index: vr-width - (current_len - n) ]0...16] + */ + vfeebs %v17,%v16,%v18 /* Find c. */ + slgrk %r4,%r5,%r4 /* %r5 = current_len - n. */ + lghi %r0,16 /* Register width = 16. */ + vlgvb %r1,%v17,7 /* Extract found index or 16 if all equal. */ + slr %r0,%r4 /* %r0 = first ignored match index. */ + clrjl %r1,%r0,.Lfound2 /* Go away if miscompare is below n bytes. */ + /* c not found within n-bytes. */ +.Lnf_end: + lghi %r2,0 /* Return null. */ + br %r14 + +.Lfound48: + aghi %r5,16 +.Lfound32: + aghi %r5,16 +.Lfound16: + aghi %r5,16 +.Lfound0: + aghi %r5,16 +.Lfound: + vlgvb %r1,%v17,7 /* Load byte index of c. */ +.Lfound2: + slgfi %r5,16 /* current_len -=16 */ + algr %r5,%r1 /* Zero byte index is added to current len. */ + la %r2,0(%r5,%r2) /* Return pointer to c. */ + br %r14 + + +.Lloop64: + vl %v16,0(%r5,%r2) + vfeebs %v17,%v16,%v18 /* Find c. */ + jl .Lfound0 /* Jump away if c was found. */ + vl %v16,16(%r5,%r2) + vfeebs %v17,%v16,%v18 + jl .Lfound16 + vl %v16,32(%r5,%r2) + vfeebs %v17,%v16,%v18 + jl .Lfound32 + vl %v16,48(%r5,%r2) + vfeebs %v17,%v16,%v18 + jl .Lfound48 + + aghi %r5,64 + lgr %r0,%r5 /* If %r5 + 64 < n? -> loop64. */ + aghi %r0,64 + clgrjl %r0,%r4,.Lloop64 + + j .Llt64 +END(__memchr_vx) + +# define memchr __memchr_c +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) strong_alias(__memchr_c, __GI_memchr) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ + +#include <memchr.S> diff --git a/sysdeps/s390/multiarch/memchr.c b/sysdeps/s390/multiarch/memchr.c new file mode 100644 index 0000000000..f80de1cc1f --- /dev/null +++ b/sysdeps/s390/multiarch/memchr.c @@ -0,0 +1,24 @@ +/* Multiple versions of memchr. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <string.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2 (__memchr, memchr) +#endif diff --git a/sysdeps/s390/multiarch/memrchr-c.c b/sysdeps/s390/multiarch/memrchr-c.c new file mode 100644 index 0000000000..af54097376 --- /dev/null +++ b/sysdeps/s390/multiarch/memrchr-c.c @@ -0,0 +1,25 @@ +/* Default memrchr implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define MEMRCHR __memrchr_c + +# include <string.h> +extern __typeof (__memrchr) __memrchr_c; +# include <string/memrchr.c> +#endif diff --git a/sysdeps/s390/multiarch/memrchr-vx.S b/sysdeps/s390/multiarch/memrchr-vx.S new file mode 100644 index 0000000000..fdb8c30ebe --- /dev/null +++ b/sysdeps/s390/multiarch/memrchr-vx.S @@ -0,0 +1,160 @@ +/* Vector optimized 32/64 bit S/390 version of memrchr. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* void *memrchr (const void *s, int c, size_t n) + Scans memory for character c backwards + and returns pointer to first c. + + Register usage: + -r0=tmp + -r1=tmp + -r2=s + -r3=c + -r4=n + -r5=s in loop + + -v16=part of s + -v17=index of found c + -v18=c replicated + -v20=permute pattern +*/ +ENTRY(__memrchr_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r4,%r4 +# endif /* !defined __s390x__ */ + clgije %r4,0,.Lnot_found + + vlvgb %v18,%r3,0 /* Generate vector which elements are all c. + If c > 255, c will be truncated. */ + vrepb %v18,%v18,0 + + llcr %r3,%r3 /* char c_char = (char) c. */ + + /* check byte n - 1. */ + llc %r0,-1(%r4,%r2) + slgfi %r4,1 + clrje %r0,%r3,.Lfound_end + jh .Lnot_found /* Return NULL if n is now 0. */ + + larl %r1,.Lpermute_mask /* Load permute mask. */ + vl %v20,0(%r1) + + /* check byte n - 2. */ + llc %r0,-1(%r4,%r2) + slgfi %r4,1 + clrje %r0,%r3,.Lfound_end + jh .Lnot_found /* Return NULL if n is now 0. */ + + clgijhe %r4,64,.Lloop64 /* If n >= 64 -> loop64. */ + +.Llt64: + /* Process n < 64 bytes. */ + clgijl %r4,16,.Llt16 /* Jump away if n < 16. */ + aghi %r4,-16 + vl %v16,0(%r4,%r2) + vfeebs %v17,%v16,%v18 + jno .Lfound0 + clgijl %r4,16,.Llt16 + aghi %r4,-16 + vl %v16,0(%r4,%r2) + vfeebs %v17,%v16,%v18 + jno .Lfound0 + clgijl %r4,16,.Llt16 + aghi %r4,-16 + vl %v16,0(%r4,%r2) + vfeebs %v17,%v16,%v18 + jno .Lfound0 +.Llt16: + clgfi %r4,0 /* if remaining bytes == 0, return NULL. */ + locghie %r2,0 + ber %r14 + + aghi %r4,-1 /* vll needs highest index. */ + vll %v16,%r4,0(%r2) /* Load remaining bytes. */ + + /* Right-shift of v16 to mask bytes after highest index. */ + lhi %r0,15 + slr %r0,%r4 /* Compute byte count for vector shift right. */ + sll %r0,3 /* Convert to bit count. */ + vlvgb %v17,%r0,7 + vsrlb %v16,%v16,%v17 /* Vector shift right by byte by number of bytes + specified in bits 1-4 of byte 7 in v17. */ + j .Lfound_permute + +.Lfound48: + aghi %r4,16 +.Lfound32: + aghi %r4,16 +.Lfound16: + aghi %r4,16 +.Lfound0: + la %r2,0(%r4,%r2) /* Set pointer to start of v16. */ + lghi %r4,15 /* Set highest index in v16 to last index. */ +.Lfound_permute: + /* Search for a c in v16 in reversed byte order. v16 contains %r4 + 1 + bytes. If v16 was not fully loaded, the bytes are already + right shifted, so that the bytes in v16 can simply be reversed. */ + vperm %v16,%v16,%v16,%v20 /* Permute v16 to reversed order. */ + vfeeb %v16,%v16,%v18 /* Find c in reversed v16. */ + vlgvb %r1,%v16,7 /* Index of c or 16 if not found. */ + + /* Return NULL if there is no c in loaded bytes. */ + clrjh %r1,%r4,.Lnot_found + + slgr %r4,%r1 +.Lfound_end: + la %r2,0(%r4,%r2) /* Return pointer to c. */ + br %r14 + +.Lnot_found: + lghi %r2,0 + br %r14 + +.Lpermute_mask: + .byte 0x0F,0x0E,0x0D,0x0C,0x0B,0x0A,0x09,0x08 + .byte 0x07,0x06,0x05,0x04,0x03,0x02,0x01,0x00 + +.Lloop64: + aghi %r4,-64 + vl %v16,48(%r4,%r2) /* Load 16bytes of memory area. */ + vfeebs %v17,%v16,%v18 /* Find c. */ + jno .Lfound48 /* Jump away if c was found. */ + vl %v16,32(%r4,%r2) + vfeebs %v17,%v16,%v18 + jno .Lfound32 + vl %v16,16(%r4,%r2) + vfeebs %v17,%v16,%v18 + jno .Lfound16 + vl %v16,0(%r4,%r2) + vfeebs %v17,%v16,%v18 + jno .Lfound0 + + clgijhe %r4,64,.Lloop64 /* If n >= 64 -> loop64. */ + j .Llt64 +END(__memrchr_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/memrchr.c b/sysdeps/s390/multiarch/memrchr.c new file mode 100644 index 0000000000..7681890d01 --- /dev/null +++ b/sysdeps/s390/multiarch/memrchr.c @@ -0,0 +1,28 @@ +/* Multiple versions of memrchr. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <string.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc (__memrchr) +weak_alias (__memrchr, memrchr) + +#else +# include <string/memrchr.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/rawmemchr-c.c b/sysdeps/s390/multiarch/rawmemchr-c.c new file mode 100644 index 0000000000..20dcdb5a28 --- /dev/null +++ b/sysdeps/s390/multiarch/rawmemchr-c.c @@ -0,0 +1,34 @@ +/* Default rawmemchr implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <string.h> + +# define RAWMEMCHR __rawmemchr_c +# undef weak_alias +# define weak_alias(a, b) +# ifdef SHARED +# undef libc_hidden_def +# define libc_hidden_def(name) \ + __hidden_ver1 (__rawmemchr_c, __GI___rawmemchr, __rawmemchr_c); +# endif /* SHARED */ + +extern __typeof (rawmemchr) __rawmemchr_c attribute_hidden; + +# include <string/rawmemchr.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/rawmemchr-vx.S b/sysdeps/s390/multiarch/rawmemchr-vx.S new file mode 100644 index 0000000000..5af2419e98 --- /dev/null +++ b/sysdeps/s390/multiarch/rawmemchr-vx.S @@ -0,0 +1,92 @@ +/* Vector optimized 32/64 bit S/390 version of rawmemchr. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* void *rawmemchr (const void *s, int c) + Scans memory for character c + and returns pointer to first c. + + Register usage: + -r1=tmp + -r2=s + -r3=c + -r4=tmp + -r5=current_len + -v16=part of s + -v17=index of unequal + -v18=c replicated +*/ +ENTRY(__rawmemchr_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + vlvgb %v18,%r3,0 /* Generate vector which elements are all c. + If c > 255, c will be truncated. */ + vrepb %v18,%v18,0 + + vfeeb %v17,%v16,%v18 /* Vector find element equal. */ + vlgvb %r5,%v17,7 /* Load byte index of character or zero. */ + clrjl %r5,%r1,.Lend_found /* If found c is in loaded bytes, end. */ + + /* Align s to 16 byte. */ + risbgn %r1,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,16 + slr %r5,%r1 /* Compute bytes to 16bytes boundary. */ + + /* Find c in a 16byte aligned loop. */ +.Lloop: + vl %v16,0(%r5,%r2) /* Load s. */ + vfeebs %v17,%v16,%v18 /* Vector find element equal. */ + jno .Lcharacter /* Jump away if element found. */ + vl %v16,16(%r5,%r2) + vfeebs %v17,%v16,%v18 + jno .Lcharacter16 + vl %v16,32(%r5,%r2) + vfeebs %v17,%v16,%v18 + jno .Lcharacter32 + vl %v16,48(%r5,%r2) + vfeebs %v17,%v16,%v18 + jno .Lcharacter48 + + aghi %r5,64 + j .Lloop /* No character found -> loop. */ + + /* Found character. */ +.Lcharacter48: + aghi %r5,16 +.Lcharacter32: + aghi %r5,16 +.Lcharacter16: + aghi %r5,16 +.Lcharacter: + vlgvb %r1,%v17,7 /* Load byte index of character. */ + algr %r5,%r1 +.Lend_found: + la %r2,0(%r5,%r2) /* Return pointer to character. */ + br %r14 +END(__rawmemchr_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/rawmemchr.c b/sysdeps/s390/multiarch/rawmemchr.c new file mode 100644 index 0000000000..7186ccd9d4 --- /dev/null +++ b/sysdeps/s390/multiarch/rawmemchr.c @@ -0,0 +1,28 @@ +/* Multiple versions of rawmemchr. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <string.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc (__rawmemchr) +weak_alias (__rawmemchr, rawmemchr) + +#else +# include <string/rawmemchr.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/stpcpy-c.c b/sysdeps/s390/multiarch/stpcpy-c.c new file mode 100644 index 0000000000..85a8a93c7f --- /dev/null +++ b/sysdeps/s390/multiarch/stpcpy-c.c @@ -0,0 +1,35 @@ +/* Default stpcpy implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define STPCPY __stpcpy_c +# undef weak_alias +# define weak_alias(a, b) +# ifdef SHARED +# undef libc_hidden_def +# define libc_hidden_def(name) \ + __hidden_ver1 (__stpcpy_c, __GI___stpcpy, __stpcpy_c); +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + strong_alias (__stpcpy_c, __stpcpy_c_1); \ + __hidden_ver1 (__stpcpy_c_1, __GI_stpcpy, __stpcpy_c_1); +# endif /* SHARED */ + + +# include <string/stpcpy.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/stpcpy-vx.S b/sysdeps/s390/multiarch/stpcpy-vx.S new file mode 100644 index 0000000000..da9f2760de --- /dev/null +++ b/sysdeps/s390/multiarch/stpcpy-vx.S @@ -0,0 +1,104 @@ +/* Vector optimized 32/64 bit S/390 version of stpcpy. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* char * stpcpy (const char *dest, const char *src) + Copy string src to dest returning a pointer to its end. + + Register usage: + -r1=tmp + -r2=dest and return value + -r3=src + -r4=tmp + -r5=current_len + -v16=part of src + -v17=index of zero + -v18=part of src +*/ +ENTRY(__stpcpy_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + vlbb %v16,0(%r3),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r3),6 /* Get bytes to 4k-byte boundary or 16. */ + + vfenezb %v17,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r5,%v17,7 /* Load zero index or 16 if not found. */ + clrjl %r5,%r1,.Lfound_align /* If found zero within loaded bytes, + copy bytes before and return. */ + + /* Align s to 16 byte. */ + risbgn %r4,%r3,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,15 /* current_len = 15. */ + slr %r5,%r4 /* Compute highest index to 16byte boundary. */ + + vstl %v16,%r5,0(%r2) /* Copy loaded characters - no zero. */ + ahi %r5,1 /* Start loop at next character. */ + + /* Find zero in 16byte aligned loop. */ +.Lloop: + vl %v16,0(%r5,%r3) /* Load s. */ + vfenezbs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound_v16_0 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Store previous part without zero to dst. */ + vfenezbs %v17,%v18,%v18 + je .Lfound_v18_16 + vl %v16,32(%r5,%r3) + vst %v18,16(%r5,%r2) + vfenezbs %v17,%v16,%v16 + je .Lfound_v16_32 + vl %v18,48(%r5,%r3) + vst %v16,32(%r5,%r2) + vfenezbs %v17,%v18,%v18 + je .Lfound_v18_48 + vst %v18,48(%r5,%r2) + + aghi %r5,64 + j .Lloop /* No zero found -> loop. */ + +.Lfound_v16_32: + aghi %r5,32 +.Lfound_v16_0: + la %r3,0(%r5,%r2) + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + vstl %v16,%r1,0(%r3) /* Copy characters including zero. */ + la %r2,0(%r1,%r3) /* Return pointer to zero. */ + br %r14 + +.Lfound_v18_48: + aghi %r5,32 +.Lfound_v18_16: + la %r3,16(%r5,%r2) + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + vstl %v18,%r1,0(%r3) /* Copy characters including zero. */ + la %r2,0(%r1,%r3) /* Return pointer to zero. */ + br %r14 + +.Lfound_align: + vstl %v16,%r5,0(%r2) /* Copy characters including zero. */ + la %r2,0(%r5,%r2) /* Return pointer to zero. */ + br %r14 +END(__stpcpy_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/stpcpy.c b/sysdeps/s390/multiarch/stpcpy.c new file mode 100644 index 0000000000..dcde01278b --- /dev/null +++ b/sysdeps/s390/multiarch/stpcpy.c @@ -0,0 +1,30 @@ +/* Multiple versions of stpcpy. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define NO_MEMPCPY_STPCPY_REDIRECT +# include <string.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc (__stpcpy) +weak_alias (__stpcpy, stpcpy) +libc_hidden_builtin_def (stpcpy) + +#else +# include <string/stpcpy.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/stpncpy-c.c b/sysdeps/s390/multiarch/stpncpy-c.c new file mode 100644 index 0000000000..32b61a8e3e --- /dev/null +++ b/sysdeps/s390/multiarch/stpncpy-c.c @@ -0,0 +1,28 @@ +/* Default stpncpy implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define STPNCPY __stpncpy_c +# ifdef SHARED +# undef libc_hidden_def +# define libc_hidden_def(name) \ + __hidden_ver1 (__stpncpy_c, __GI___stpncpy, __stpncpy_c); +# endif /* SHARED */ + +# include <string/stpncpy.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/stpncpy-vx.S b/sysdeps/s390/multiarch/stpncpy-vx.S new file mode 100644 index 0000000000..2e536d9e0f --- /dev/null +++ b/sysdeps/s390/multiarch/stpncpy-vx.S @@ -0,0 +1,200 @@ +/* Vector optimized 32/64 bit S/390 version of stpncpy. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* char * stpncpy (char *dest, const char *src, size_t n) + Copies at most n characters of string src to dest + returning a pointer to its end or dest+n + if src is smaller than n. + + Register usage: + -%r0 = return value + -%r1 = zero byte index + -%r2 = curr dst pointer + -%r3 = curr src pointer + -%r4 = n + -%r5 = current_len + -%r6 = loaded bytes + -%r7 = border, tmp +*/ +ENTRY(__stpncpy_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r4,%r4 +# endif /* !defined __s390x__ */ + + clgfi %r4,0 + ber %r14 /* Nothing to do, if n == 0. */ + + la %r0,0(%r4,%r2) /* Save destination pointer + n for return. */ + vlvgp %v31,%r6,%r7 /* Save registers. */ + + vlbb %v16,0(%r3),6 /* Load s until next 4k-byte boundary. */ + lcbb %r6,0(%r3),6 /* Get bytes to 4k-byte boundary or 16. */ + llgfr %r6,%r6 /* Convert 32bit to 64bit. */ + + lghi %r5,0 /* current_len = 0. */ + + clgrjle %r4,%r6,.Lremaining_v16 /* If n <= loaded-bytes + -> process remaining. */ + + /* n > loaded-byte-count */ + vfenezb %v17,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r1,%v17,7 /* Load zero index or 16 if not found. */ + clrjl %r1,%r6,.Lfound_v16_store /* Found zero within loaded bytes, + copy and return. */ + + /* Align s to 16 byte. */ + risbgn %r7,%r3,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,15 /* current_len = 15. */ + slr %r5,%r7 /* Compute highest index to 16byte boundary. */ + + /* Zero not found and n > loaded-byte-count. */ + vstl %v16,%r5,0(%r2) /* Copy loaded characters - no zero. */ + ahi %r5,1 /* Start loop at next character. */ + + /* Now we are 16byte aligned, so we can load a full vreg + without page fault. */ + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r4,.Lloop64 + + vl %v16,0(%r5,%r3) /* Load s. */ + clgijl %r4,17,.Lremaining_v16 /* If n <= 16, process remaining + bytes. */ +.Llt64: + lgr %r7,%r4 + slgfi %r7,16 /* border_len = n - 16. */ + + clgrjhe %r5,%r7,.Lremaining_v16 /* If current_len >= border + then process remaining bytes. */ + vfenezbs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound_v16 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Save previous part without zero to dst. */ + aghi %r5,16 + + clgrjhe %r5,%r7,.Lremaining_v18 + vfenezbs %v17,%v18,%v18 + je .Lfound_v18 + vl %v16,16(%r5,%r3) + vst %v18,0(%r5,%r2) + aghi %r5,16 + + clgrjhe %r5,%r7,.Lremaining_v16 + vfenezbs %v17,%v16,%v16 + je .Lfound_v16 + vl %v18,16(%r5,%r3) + vst %v16,0(%r5,%r2) + aghi %r5,16 + +.Lremaining_v18: + vlr %v16,%v18 +.Lremaining_v16: + /* v16 contains the remaining bytes [1...16]. + Store remaining bytes and append string-termination. */ + vfenezb %v17,%v16,%v16 /* Find element not equal with zero search. */ + slgrk %r7,%r4,%r5 /* Remaining bytes = maxlen - current_len */ + aghi %r7,-1 /* vstl needs highest index. */ + la %r2,0(%r5,%r2) /* vstl has no index register. */ + vlgvb %r1,%v17,7 /* Load zero index or 16 if not found. */ + /* Zero in remaining bytes? -> jump away (zero-index <= max-index). */ + clrjle %r1,%r7,.Lfound_v16_store + vstl %v16,%r7,0(%r2) /* Store remaining bytes without null + termination! */ +.Lend: + /* Restore saved registers. */ + vlgvg %r6,%v31,0 + vlgvg %r7,%v31,1 + lgr %r2,%r0 /* Load saved dest-ptr. */ + br %r14 + +.Lfound_v16_32: + aghi %r5,32 + j .Lfound_v16 +.Lfound_v18_48: + aghi %r5,32 +.Lfound_v18_16: + aghi %r5,16 +.Lfound_v18: + vlr %v16,%v18 +.Lfound_v16: + /* v16 contains a zero. Store remaining bytes to zero. current_len + has not reached border, thus checking for n is not needed! */ + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + la %r2,0(%r5,%r2) /* vstl has no support for index-register. */ +.Lfound_v16_store: + vstl %v16,%r1,0(%r2) /* Copy characters including zero. */ + /* Fill remaining bytes with zero - remaining count always > 0. */ + algr %r5,%r1 /* Remaining bytes (=%r4) = ... */ + slgr %r4,%r5 /* = maxlen - (currlen + zero_index + 1) */ + la %r2,0(%r1,%r2) /* Pointer to zero. start filling beyond. */ + lgr %r0,%r2 /* Save return-pointer to found zero. */ + clgije %r4,1,.Lend /* Skip zero-filling, if found zero is last + possible character. + (1 is substracted from r4 below!). */ + aghi %r4,-2 /* mvc with exrl needs count - 1. + (additional -1, see remaining bytes above) */ + srlg %r6,%r4,8 /* Split into 256 byte blocks. */ + ltgr %r6,%r6 + je .Lzero_lt256 +.Lzero_loop256: + mvc 1(256,%r2),0(%r2) /* Fill 256 zeros at once. */ + la %r2,256(%r2) + brctg %r6,.Lzero_loop256 /* Loop until all blocks are processed. */ +.Lzero_lt256: + exrl %r4,.Lmvc_lt256 + j .Lend +.Lmvc_lt256: + mvc 1(1,%r2),0(%r2) + +.Lloop64: + vl %v16,0(%r5,%r3) + vfenezbs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound_v16 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Save previous part without zero to dst. */ + vfenezbs %v17,%v18,%v18 + je .Lfound_v18_16 + vl %v16,32(%r5,%r3) + vst %v18,16(%r5,%r2) + vfenezbs %v17,%v16,%v16 + je .Lfound_v16_32 + vl %v18,48(%r5,%r3) + vst %v16,32(%r5,%r2) + vfenezbs %v17,%v18,%v18 + je .Lfound_v18_48 + vst %v18,48(%r5,%r2) + + aghi %r5,64 + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r4,.Lloop64 + + vl %v16,0(%r5,%r3) /* Load s. */ + j .Llt64 +END(__stpncpy_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/stpncpy.c b/sysdeps/s390/multiarch/stpncpy.c new file mode 100644 index 0000000000..f5335b42ac --- /dev/null +++ b/sysdeps/s390/multiarch/stpncpy.c @@ -0,0 +1,28 @@ +/* Multiple versions of stpncpy. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <string.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc (__stpncpy) +weak_alias (__stpncpy, stpncpy) + +#else +# include <string/stpncpy.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/strcat-c.c b/sysdeps/s390/multiarch/strcat-c.c new file mode 100644 index 0000000000..ae7cc2149d --- /dev/null +++ b/sysdeps/s390/multiarch/strcat-c.c @@ -0,0 +1,28 @@ +/* Default strcat implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define STRCAT __strcat_c +# ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strcat_c, __GI_strcat, __strcat_c); +# endif /* SHARED */ + +# include <string/strcat.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/strcat-vx.S b/sysdeps/s390/multiarch/strcat-vx.S new file mode 100644 index 0000000000..e77fc2aa2f --- /dev/null +++ b/sysdeps/s390/multiarch/strcat-vx.S @@ -0,0 +1,161 @@ +/* Vector optimized 32/64 bit S/390 version of strcat. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* char * strcat (const char *dest, const char *src) + Concatenate two strings. + + Register usage: + -r0=saved dest pointer for return + -r1=tmp + -r2=dest + -r3=src + -r4=tmp + -r5=current_len + -v16=part of src + -v17=index of zero + -v18=part of src +*/ +ENTRY(__strcat_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + lgr %r0,%r2 /* Save destination pointer for return. */ + + /* STRLEN + r1 = loaded bytes (tmp) + r4 = zero byte index (tmp) + r2 = dst + */ + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + vfenezb %v16,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r5,%v16,7 /* Load zero index or 16 if not found. */ + clrjl %r5,%r1,.Llen_end /* Found zero within loaded bytes, end. */ + + /* Align s to 16 byte. */ + risbgn %r1,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,16 /* current_len = 16. */ + slr %r5,%r1 /* Compute bytes to 16bytes boundary. */ + + /* Find zero in 16byte aligned loop. */ +.Llen_loop: + vl %v16,0(%r5,%r2) /* Load s. */ + vfenezbs %v16,%v16,%v16 /* Find element not equal with zero search. */ + je .Llen_found /* Jump away if zero was found. */ + vl %v16,16(%r5,%r2) + vfenezbs %v16,%v16,%v16 + je .Llen_found16 + vl %v16,32(%r5,%r2) + vfenezbs %v16,%v16,%v16 + je .Llen_found32 + vl %v16,48(%r5,%r2) + vfenezbs %v16,%v16,%v16 + je .Llen_found48 + + aghi %r5,64 + j .Llen_loop /* No zero -> loop. */ + +.Llen_found48: + aghi %r5,16 +.Llen_found32: + aghi %r5,16 +.Llen_found16: + aghi %r5,16 +.Llen_found: + vlgvb %r4,%v16,7 /* Load byte index of zero. */ + algr %r5,%r4 + +.Llen_end: + /* STRCPY + %r1 = loaded bytes (tmp) + %r4 = zero byte index (tmp) + %r3 = curr src pointer + %r2 = curr dst pointer + */ + la %r2,0(%r5,%r2) /* strcpy at end of dst-string. */ + + vlbb %v16,0(%r3),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r3),6 /* Get bytes to 4k-byte boundary or 16. */ + + vfenezb %v17,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r5,%v17,7 /* Load zero index or 16 if not found. */ + clrjl %r5,%r1,.Lcpy_found_align /* If found zero within loaded bytes, + copy bytes before and return. */ + + /* Align s to 16 byte. */ + risbgn %r4,%r3,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,15 /* current_len = 15. */ + slr %r5,%r4 /* Compute highest index to 16byte boundary. */ + + vstl %v16,%r5,0(%r2) /* Copy loaded characters - no zero. */ + ahi %r5,1 /* Start loop at next character. */ + + /* Find zero in 16byte aligned loop. */ +.Lcpy_loop: + vl %v16,0(%r5,%r3) /* Load s. */ + vfenezbs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lcpy_found_v16_0 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3)/* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Store previous part without zero to dst. */ + vfenezbs %v17,%v18,%v18 + je .Lcpy_found_v18_16 + vl %v16,32(%r5,%r3) + vst %v18,16(%r5,%r2) + vfenezbs %v17,%v16,%v16 + je .Lcpy_found_v16_32 + vl %v18,48(%r5,%r3) + vst %v16,32(%r5,%r2) + vfenezbs %v17,%v18,%v18 + je .Lcpy_found_v18_48 + vst %v18,48(%r5,%r2) + + aghi %r5,64 + j .Lcpy_loop /* No zero -> loop. */ + +.Lcpy_found_v16_32: + aghi %r5,32 +.Lcpy_found_v16_0: + la %r4,0(%r5,%r2) + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + vstl %v16,%r1,0(%r4) /* Copy characters including zero. */ + lgr %r2,%r0 /* Load saved dest-ptr. */ + br %r14 + +.Lcpy_found_v18_48: + aghi %r5,32 +.Lcpy_found_v18_16: + la %r4,16(%r5,%r2) + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + vstl %v18,%r1,0(%r4) /* Copy characters including zero. */ + lgr %r2,%r0 /* Load saved dest-ptr. */ + br %r14 + +.Lcpy_found_align: + vstl %v16,%r5,0(%r2) /* Copy characters including zero. */ + lgr %r2,%r0 /* Load saved dest-ptr. */ + br %r14 +END(__strcat_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/strcat.c b/sysdeps/s390/multiarch/strcat.c new file mode 100644 index 0000000000..c3b5e1c9d6 --- /dev/null +++ b/sysdeps/s390/multiarch/strcat.c @@ -0,0 +1,27 @@ +/* Multiple versions of strcat. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <string.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2 (__strcat, strcat) + +#else +# include <string/strcat.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/strchr-c.c b/sysdeps/s390/multiarch/strchr-c.c new file mode 100644 index 0000000000..2250dbbf5e --- /dev/null +++ b/sysdeps/s390/multiarch/strchr-c.c @@ -0,0 +1,29 @@ +/* Default strchr implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define STRCHR __strchr_c +# undef weak_alias +# ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strchr_c, __GI_strchr, __strchr_c); +# endif /* SHARED */ + +# include <string/strchr.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/strchr-vx.S b/sysdeps/s390/multiarch/strchr-vx.S new file mode 100644 index 0000000000..4fe5dc0293 --- /dev/null +++ b/sysdeps/s390/multiarch/strchr-vx.S @@ -0,0 +1,100 @@ +/* Vector optimized 32/64 bit S/390 version of strchr. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* char *strchr (const char *s, int c) + Locate character in string. + + Register usage: + -r1=tmp + -r2=s + -r3=c + -r4=tmp + -r5=current_len + -v16=part of s + -v17=index of unequal + -v18=replicated c +*/ +ENTRY(__strchr_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + lghi %r5,0 /* current_len = 0. */ + + vlvgb %v18,%r3,0 /* Generate vector which elements are all c. + If c > 255, c will be truncated. */ + vrepb %v18,%v18,0 + + vfeezbs %v16,%v16,%v18 /* Find element equal with zero search. */ + vlgvb %r4,%v16,7 /* Load byte index of character or zero. */ + clrjl %r4,%r1,.Lfound /* Return if c/zero is in loaded bytes. */ + + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,16 /* current_len = 16. */ + slr %r5,%r4 /* Compute bytes to 16bytes boundary. */ + + /* Find c/zero in 16 byte aligned loop */ +.Lloop: + vl %v16,0(%r5,%r2) /* Load s. */ + vfeezbs %v16,%v16,%v18 /* Find element equal with zero search. */ + jno .Lfound /* Found c/zero (cc=0|1|2). */ + vl %v16,16(%r5,%r2) + vfeezbs %v16,%v16,%v18 + jno .Lfound16 + vl %v16,32(%r5,%r2) + vfeezbs %v16,%v16,%v18 + jno .Lfound32 + vl %v16,48(%r5,%r2) + vfeezbs %v16,%v16,%v18 + jno .Lfound48 + + aghi %r5,64 + j .Lloop /* No character and no zero -> loop. */ + +.Lfound48: + la %r5,16(%r5) /* Use la since aghi would clobber cc. */ +.Lfound32: + la %r5,16(%r5) +.Lfound16: + la %r5,16(%r5) +.Lfound: + je .Lzero /* Found zero, but no c before that zero. */ + +.Lcharacter: + vlgvb %r4,%v16,7 /* Load byte index of character. */ + algr %r5,%r4 + la %r2,0(%r5,%r2) /* Return pointer to character. */ + br %r14 + +.Lzero: + llgcr %r3,%r3 /* char c_char = (char) c. */ + clije %r3,0,.Lcharacter /* Found zero and c is zero. */ + lghi %r2,0 /* Return null if character not found. */ + br %r14 +END(__strchr_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/strchr.c b/sysdeps/s390/multiarch/strchr.c new file mode 100644 index 0000000000..3c8c7e4600 --- /dev/null +++ b/sysdeps/s390/multiarch/strchr.c @@ -0,0 +1,28 @@ +/* Multiple versions of strchr. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <string.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2 (__strchr, strchr) +weak_alias (strchr, index) + +#else +# include <string/strchr.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/strchrnul-c.c b/sysdeps/s390/multiarch/strchrnul-c.c new file mode 100644 index 0000000000..1f77c40cea --- /dev/null +++ b/sysdeps/s390/multiarch/strchrnul-c.c @@ -0,0 +1,26 @@ +/* Default strchrnul implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define STRCHRNUL __strchrnul_c +# define __strchrnul STRCHRNUL +# undef weak_alias +# define weak_alias(name, alias) + +# include <string/strchrnul.c> +#endif diff --git a/sysdeps/s390/multiarch/strchrnul-vx.S b/sysdeps/s390/multiarch/strchrnul-vx.S new file mode 100644 index 0000000000..43ca29ead0 --- /dev/null +++ b/sysdeps/s390/multiarch/strchrnul-vx.S @@ -0,0 +1,93 @@ +/* Vector optimized 32/64 bit S/390 version of strchrnul. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* char *strchrnul (const char *s, int c) + Returns pointer to first c or to \0 if c not found. + + Register usage: + -r1=tmp + -r2=s and return pointer + -r3=c + -r4=tmp + -r5=current_len + -v16=part of s + -v18=vector with c replicated in every byte +*/ +ENTRY(__strchrnul_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + lghi %r5,0 /* current_len = 0. */ + + vlvgb %v18,%r3,0 /* Generate vector which elements are all c. + If c > 255, c will be truncated. */ + vrepb %v18,%v18,0 + + vfeezbs %v16,%v16,%v18 /* Find element equal with zero search. */ + vlgvb %r4,%v16,7 /* Load byte index of character or zero. */ + clrjl %r4,%r1,.Lfound /* Return if c/zero is in loaded bytes. */ + + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,16 /* current_len = 16. */ + slr %r5,%r4 /* Compute bytes to 16bytes boundary. */ + + /* Find c/zero in 16byte aligned loop */ +.Lloop: + vl %v16,0(%r5,%r2) /* Load s */ + vfeezbs %v16,%v16,%v18 /* Find element equal with zero search. */ + jno .Lfound /* Found c/zero (cc=0|1|2). */ + vl %v16,16(%r5,%r2) + vfeezbs %v16,%v16,%v18 + jno .Lfound16 + vl %v16,32(%r5,%r2) + vfeezbs %v16,%v16,%v18 + jno .Lfound32 + vl %v16,48(%r5,%r2) + vfeezbs %v16,%v16,%v18 + jno .Lfound48 + + aghi %r5,64 + j .Lloop /* No character and no zero -> loop. */ + + /* Found character or zero */ +.Lfound48: + aghi %r5,16 +.Lfound32: + aghi %r5,16 +.Lfound16: + aghi %r5,16 +.Lfound: + vlgvb %r1,%v16,7 /* Load byte index of character. */ + algr %r5,%r1 + la %r2,0(%r5,%r2) /* Return pointer to character. */ + +.Lend: + br %r14 +END(__strchrnul_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/strchrnul.c b/sysdeps/s390/multiarch/strchrnul.c new file mode 100644 index 0000000000..627c084521 --- /dev/null +++ b/sysdeps/s390/multiarch/strchrnul.c @@ -0,0 +1,28 @@ +/* Multiple versions of strchrnul. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <string.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc (__strchrnul) +weak_alias (__strchrnul, strchrnul) + +#else +# include <string/strchrnul.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/strcmp-vx.S b/sysdeps/s390/multiarch/strcmp-vx.S new file mode 100644 index 0000000000..edf557b5eb --- /dev/null +++ b/sysdeps/s390/multiarch/strcmp-vx.S @@ -0,0 +1,116 @@ +/* Vector optimized 32/64 bit S/390 version of strcmp. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* int strcmp (const char *s1, const char *s2) + Compare two strings + + Register usage: + -r1=loaded byte count s1 + -r2=s1 + -r3=s2 + -r4=loaded byte coutn s2, tmp + -r5=current_len + -v16=part of s1 + -v17=part of s2 + -v18=index of unequal +*/ +ENTRY(__strcmp_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + lghi %r5,0 /* current_len = 0. */ + +.Lloop: + vlbb %v16,0(%r5,%r2),6 /* Load s1 to block boundary. */ + vlbb %v17,0(%r5,%r3),6 /* Load s2 to block boundary. */ + lcbb %r1,0(%r5,%r2),6 /* Get loaded byte count of s1. */ + jo .Llt16_1 /* Jump away if vr is not fully loaded. */ + lcbb %r4,0(%r5,%r3),6 + jo .Llt16_2 /* Jump away if vr is not fully loaded. */ + /* Both vrs are fully loaded. */ + aghi %r5,16 + vfenezbs %v18,%v16,%v17 /* Compare not equal with zero search. */ + jno .Lfound + + vlbb %v16,0(%r5,%r2),6 + vlbb %v17,0(%r5,%r3),6 + lcbb %r1,0(%r5,%r2),6 + jo .Llt16_1 + lcbb %r4,0(%r5,%r3),6 + jo .Llt16_2 + aghi %r5,16 + vfenezbs %v18,%v16,%v17 + jno .Lfound + + vlbb %v16,0(%r5,%r2),6 + vlbb %v17,0(%r5,%r3),6 + lcbb %r1,0(%r5,%r2),6 + jo .Llt16_1 + lcbb %r4,0(%r5,%r3),6 + jo .Llt16_2 + aghi %r5,16 + vfenezbs %v18,%v16,%v17 + jno .Lfound + + vlbb %v16,0(%r5,%r2),6 + vlbb %v17,0(%r5,%r3),6 + lcbb %r1,0(%r5,%r2),6 + jo .Llt16_1 + lcbb %r4,0(%r5,%r3),6 + jo .Llt16_2 + aghi %r5,16 + vfenezbs %v18,%v16,%v17 + jno .Lfound + j .Lloop + +.Llt16_1: + lcbb %r4,0(%r5,%r3),6 /* Get loaded byte count of s2. */ +.Llt16_2: + clr %r1,%r4 + locrh %r1,%r4 /* Get minimum of bytes loaded in s1/2. */ + algfr %r5,%r1 /* Add smallest loaded bytes to current_len. */ + vfenezbs %v18,%v16,%v17 /* Compare not equal with zero search. */ + vlgvb %r4,%v18,7 /* Get not equal index or 16 if all equal. */ + clrjl %r4,%r1,.Lfound /* Jump away if miscompare is within loaded + bytes. */ + j .Lloop + +.Lfound: + je .Lend_equal + lghi %r2,1 + lghi %r1,-1 + locgrl %r2,%r1 + br %r14 +.Lend_equal: + lghi %r2,0 + br %r14 +END(__strcmp_vx) + +# define strcmp __strcmp_c +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) strong_alias(__strcmp_c, __GI_strcmp) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ + +#include <strcmp.S> diff --git a/sysdeps/s390/multiarch/strcmp.c b/sysdeps/s390/multiarch/strcmp.c new file mode 100644 index 0000000000..c4ccd34420 --- /dev/null +++ b/sysdeps/s390/multiarch/strcmp.c @@ -0,0 +1,26 @@ +/* Multiple versions of strcmp. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <string.h> +# include <ifunc-resolve.h> + + +# undef strcmp +s390_vx_libc_ifunc2 (__strcmp, strcmp) +#endif diff --git a/sysdeps/s390/multiarch/strcpy-vx.S b/sysdeps/s390/multiarch/strcpy-vx.S new file mode 100644 index 0000000000..d3472b821d --- /dev/null +++ b/sysdeps/s390/multiarch/strcpy-vx.S @@ -0,0 +1,109 @@ +/* Vector optimized 32/64 bit S/390 version of strcpy. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* char * strcpy (const char *dest, const char *src) + Copy string src to dest. + + Register usage: + -r1=tmp + -r2=dest and return_value + -r3=src + -r4=tmp + -r5=current_len + -v16=part of src + -v17=index of zero + -v18=part of src +*/ +ENTRY(__strcpy_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + vlbb %v16,0(%r3),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r3),6 /* Get bytes to 4k-byte boundary or 16. */ + + vfenezb %v17,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r5,%v17,7 /* Load zero index or 16 if not found. */ + clrjl %r5,%r1,.Lfound_align /* If found zero within loaded bytes, + copy bytes before and return. */ + + /* Align s to 16 byte. */ + risbgn %r4,%r3,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,15 /* current_len = 15. */ + slr %r5,%r4 /* Compute highest index to 16byte boundary. */ + + vstl %v16,%r5,0(%r2) /* Copy loaded characters - no zero. */ + ahi %r5,1 /* Start loop at next character. */ + + /* Find zero in 16byte aligned loop. */ +.Lloop: + vl %v16,0(%r5,%r3) /* Load s. */ + vfenezbs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound_v16_0 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3)/* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Store previous part without zero to dst. */ + vfenezbs %v17,%v18,%v18 + je .Lfound_v18_16 + vl %v16,32(%r5,%r3) + vst %v18,16(%r5,%r2) + vfenezbs %v17,%v16,%v16 + je .Lfound_v16_32 + vl %v18,48(%r5,%r3) + vst %v16,32(%r5,%r2) + vfenezbs %v17,%v18,%v18 + je .Lfound_v18_48 + vst %v18,48(%r5,%r2) + + aghi %r5,64 + j .Lloop /* No zero found -> loop. */ + +.Lfound_v16_32: + aghi %r5,32 +.Lfound_v16_0: + la %r3,0(%r5,%r2) + vlgvb %r4,%v17,7 /* Load byte index of zero. */ + vstl %v16,%r4,0(%r3) /* Store characters including zero. */ + br %r14 + +.Lfound_v18_48: + aghi %r5,32 +.Lfound_v18_16: + la %r3,16(%r5,%r2) + vlgvb %r4,%v17,7 /* Load byte index of zero. */ + vstl %v18,%r4,0(%r3) /* Store characters including zero. */ + br %r14 + +.Lfound_align: + vstl %v16,%r5,0(%r2) /* Copy characters including zero. */ + br %r14 +END(__strcpy_vx) + +/* Use mvst-strcpy-implementation as default implementation. */ +# define strcpy __strcpy_c +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) strong_alias(__strcpy_c, __GI_strcpy) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ + +/* Include mvst-strcpy-implementation in s390-32/s390-64 subdirectory. */ +#include <strcpy.S> diff --git a/sysdeps/s390/multiarch/strcpy.c b/sysdeps/s390/multiarch/strcpy.c new file mode 100644 index 0000000000..f348199112 --- /dev/null +++ b/sysdeps/s390/multiarch/strcpy.c @@ -0,0 +1,24 @@ +/* Multiple versions of strcpy. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <string.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2 (__strcpy, strcpy) +#endif diff --git a/sysdeps/s390/multiarch/strcspn-c.c b/sysdeps/s390/multiarch/strcspn-c.c new file mode 100644 index 0000000000..bc195b6625 --- /dev/null +++ b/sysdeps/s390/multiarch/strcspn-c.c @@ -0,0 +1,28 @@ +/* Default strcspn implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define STRCSPN __strcspn_c +# ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strcspn_c, __GI_strcspn, __strcspn_c); +# endif /* SHARED */ + +# include <string/strcspn.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/strcspn-vx.S b/sysdeps/s390/multiarch/strcspn-vx.S new file mode 100644 index 0000000000..1c6250661e --- /dev/null +++ b/sysdeps/s390/multiarch/strcspn-vx.S @@ -0,0 +1,281 @@ +/* Vector optimized 32/64 bit S/390 version of strcspn. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* size_t strcspn (const char *s, const char * reject) + The strcspn() function calculates the length of the initial segment + of s which consists entirely of characters not in reject. + + This method checks the length of reject string. If it fits entirely + in one vector register, a fast algorithm is used, which does not need + to check multiple parts of accept-string. Otherwise a slower full + check of accept-string is used. + + register overview: + r3: pointer to start of reject-string + r2: pointer to start of search-string + r0: loaded byte count of vlbb search-string + r4: found byte index + r1: current return len + v16: search-string + v17: reject-string + v18: temp-vreg + + ONLY FOR SLOW: + v19: first reject-string + v20: zero for preparing acc-vector + v21: global mask; 1 indicates a match between + search-string-vreg and any reject-character + v22: current mask; 1 indicates a match between + search-string-vreg and any reject-character in current acc-vreg + v24: one for result-checking of former string-part + v30, v31: for re-/storing registers r6, r8, r9 + r5: current len of reject-string + r6: zero-index in search-string or 16 if no zero + or min(zero-index, loaded byte count) + r8: >0, if former reject-string-part contains a zero, + otherwise =0; + r9: loaded byte count of vlbb reject-string +*/ +ENTRY(__strcspn_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + /* + Check if reject-string fits in one vreg: + ---------------------------------------- + */ + vlbb %v17,0(%r3),6 /* Load reject. */ + lghi %r1,0 /* Zero out current len. */ + lcbb %r0,0(%r3),6 + jo .Lcheck_onbb /* Special case if reject + lays on block-boundary. */ +.Lcheck_notonbb: + vistrbs %v17,%v17 /* Fill with zeros after first zero. */ + je .Lfast /* Zero found -> reject fits in one vreg. */ + j .Lslow /* No zero -> reject exceeds one vreg. */ + + +.Lcheck_onbb: + /* Reject lays on block-boundary. */ + vfenezb %v18,%v17,%v17 /* Search zero in loaded reject bytes. */ + vlgvb %r4,%v18,7 /* Get index of zero or 16 if not found. */ + clrjl %r4,%r0,.Lcheck_notonbb /* Zero index < loaded bytes count -> + Reject fits in one vreg; + Fill with zeros and proceed + with FAST. */ + vl %v17,0(%r3) /* Load reject, which exceeds loaded bytes. */ + j .Lcheck_notonbb /* Check if reject fits in one vreg. */ + + + /* + Search s for reject in one vreg + ------------------------------- + */ +.Lfast: + /* Complete reject-string in v17 and remaining bytes are zero. */ + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r0,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + vfaezbs %v18,%v16,%v17,0 /* Find first element in v16 + unequal to any in v17 + or first zero element. */ + + vlgvb %r4,%v18,7 /* Load byte index of found element. */ + clrjl %r4,%r0,.Lfast_loop_found2 /* If found index is within loaded + bytes, return with found element + index (=equal count). */ + + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r1,16 /* current_len = 16. */ + slr %r1,%r4 /* Compute bytes to 16bytes boundary. */ + + /* Process s in 16byte aligned loop. */ +.Lfast_loop: + vl %v16,0(%r1,%r2) /* Load search-string. */ + vfaezbs %v18,%v16,%v17,0 /* Find first element in v16 equal to any + in v17 or first zero element. */ + jno .Lfast_loop_found + + vl %v16,16(%r1,%r2) + vfaezbs %v18,%v16,%v17,0 + jno .Lfast_loop_found16 + + vl %v16,32(%r1,%r2) + vfaezbs %v18,%v16,%v17,0 + jno .Lfast_loop_found32 + + vl %v16,48(%r1,%r2) + vfaezbs %v18,%v16,%v17,0 + jno .Lfast_loop_found48 + + aghi %r1,64 + j .Lfast_loop /* Loop if no element was unequal to reject + and not zero. */ + + /* Found equal or zero element. */ +.Lfast_loop_found48: + aghi %r1,16 +.Lfast_loop_found32: + aghi %r1,16 +.Lfast_loop_found16: + aghi %r1,16 +.Lfast_loop_found: + vlgvb %r4,%v18,7 /* Load byte index of found element or zero. */ +.Lfast_loop_found2: + algrk %r2,%r1,%r4 /* Add found index to current len. */ + br %r14 + + + + /* + Search s for reject in multiple vregs + ------------------------------------- + */ +.Lslow: + /* Save registers. */ + vlvgg %v30,%r6,0 + vlvgp %v31,%r8,%r9 + + /* Reject in v17 without zero. */ + vlr %v19,%v17 /* Save first acc-part for a fast reload. */ + vzero %v20 /* Zero for preparing acc-vector. */ + vone %v24 /* One for checking result of former + string-part. */ + + /* Align s to 16 byte. */ + risbg %r4,%r2,60,128+63,0 /* Test if s is aligned and + %r4 = bits 60-63 'and' 15. */ + je .Lslow_loop_str /* If s is aligned, loop aligned. */ + lghi %r0,15 + slr %r0,%r4 /* Compute highest index to load (15-x). */ + vll %v16,%r0,0(%r2) /* Load up to 16 byte boundary (vll needs + highest index, remaining bytes are 0). */ + ahi %r0,1 /* Work with loaded byte count. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of reject-string to zero. */ + vfenezb %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first reject-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ + clije %r6,0,.Lslow_end /* If first element is zero -> return 0. */ + clr %r0,%r6 /* cc==1 if loaded byte count < zero-index. */ + locrl %r6,%r0 /* Load on cc==1; zero-index = lbc. */ + j .Lslow_loop_acc + + + /* Process s in 16byte aligned loop. */ +.Lslow_next_str: + /* Check results of former processed str-part. */ + vfeeb %v18,%v21,%v24 /* Find first equal match in global mask + (ones in element). */ + vlgvb %r4,%v18,7 /* Get index of first one (=equal) or 16. */ + /* Equal-index < min(zero-index, loaded byte count) + -> Return pointer to equal element. */ + clrjl %r4,%r6,.Lslow_index_found + /* Zero-index < loaded byte count + -> Former str-part was last str-part + -> Return null */ + clrjl %r6,%r0,.Lslow_end_not_found + + /* All elements are zero (=no match) -> Proceed with next str-part. */ + vlr %v17,%v19 /* Load first part of reject (no zero). */ + algfr %r1,%r0 /* Add loaded byte count to current len. */ + +.Lslow_loop_str: + vl %v16,0(%r1,%r2) /* Load search-string. */ + lghi %r0,16 /* Loaded byte count is 16. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of reject to zero. */ + vfenezb %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first reject-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ + clije %r6,0,.Lslow_end /* If first element is zero (end of string) + -> Return current length. */ + +.Lslow_loop_acc: + vfaeb %v22,%v16,%v17,4 /* Create matching-mask (1 in mask -> + Character matches any rejected character in + this reject-string-part) IN=0, RT=1. */ + vlgvb %r4,%v22,0 /* Get result of first element. */ + /* First element is equal to any rejected characters? + (all other parts of reject cannot lead to a match before this one) + -> Return current len, which is pointing to this element. */ + clijh %r4,0,.Lslow_end + vo %v21,%v21,%v22 /* Global-mask = global-|matching-mask. */ + /* Proceed with next acc until end of acc is reached. */ + + +.Lslow_next_acc: + clijh %r8,0,.Lslow_next_str /* There was a zero in last reject-part + -> Add found index to current len + and end. */ + vlbb %v17,16(%r5,%r3),6 /* Load next reject part. */ + aghi %r5,16 /* Increment current len of reject-string. */ + lcbb %r9,0(%r5,%r3),6 /* Get loaded byte count of reject-string. */ + jo .Lslow_next_acc_onbb /* Jump away if reject-string is + on block-boundary. */ +.Lslow_next_acc_notonbb: + vistrbs %v17,%v17 /* Fill with zeros after first zero. */ + jo .Lslow_loop_acc /* No zero found -> no preparation needed. */ + +.Lslow_next_acc_prepare_zero: + /* Zero in reject-part: fill zeros with first-reject-character. */ + vlgvb %r8,%v17,0 /* Load first element of reject-part. */ + clije %r8,0,.Lslow_next_str /* Process next str-part if first + character in this part of reject + is a zero. */ + /* r8>0 -> zero found in this acc-part. */ + vrepb %v18,%v17,0 /* Replicate first char accross all chars. */ + vceqb %v22,%v20,%v17 /* Create a mask (v22) of null chars + by comparing with 0 (v20). */ + vsel %v17,%v18,%v17,%v22 /* Replace null chars with first char. */ + j .Lslow_loop_acc /* Reject-string part is prepared. */ + +.Lslow_next_acc_onbb: + vfenezb %v18,%v17,%v17 /* Find zero in loaded bytes of reject part. */ + vlgvb %r8,%v18,7 /* Load byte index of zero. */ + clrjl %r8,%r9,.Lslow_next_acc_notonbb /* Found a zero in loaded bytes + -> Prepare vreg. */ + vl %v17,0(%r5,%r3) /* Load over boundary ... */ + lghi %r8,0 /* r8=0 -> no zero in this part of acc, + check for zero is in jump-target. */ + j .Lslow_next_acc_notonbb /* ... and search for zero in + fully loaded vreg again. */ + +.Lslow_end_not_found: + algfr %r1,%r6 /* Add zero-index to current len. */ + j .Lslow_end +.Lslow_index_found: + algfr %r1,%r4 /* Add found index of char to current len. */ +.Lslow_end: + lgr %r2,%r1 + /* Restore registers. */ + vlgvg %r6,%v30,0 + vlgvg %r8,%v31,0 + vlgvg %r9,%v31,1 + br %r14 +END(__strcspn_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/strcspn.c b/sysdeps/s390/multiarch/strcspn.c new file mode 100644 index 0000000000..c23452a791 --- /dev/null +++ b/sysdeps/s390/multiarch/strcspn.c @@ -0,0 +1,27 @@ +/* Multiple versions of strcspn. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <string.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2 (__strcspn, strcspn) + +#else +# include <string/strcspn.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/strlen-c.c b/sysdeps/s390/multiarch/strlen-c.c new file mode 100644 index 0000000000..63c0d9e3e6 --- /dev/null +++ b/sysdeps/s390/multiarch/strlen-c.c @@ -0,0 +1,28 @@ +/* Default strlen implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define STRLEN __strlen_c +# ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strlen_c, __GI_strlen, __strlen_c); +# endif /* SHARED */ + +# include <string/strlen.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/strlen-vx.S b/sysdeps/s390/multiarch/strlen-vx.S new file mode 100644 index 0000000000..3fe834a0c7 --- /dev/null +++ b/sysdeps/s390/multiarch/strlen-vx.S @@ -0,0 +1,84 @@ +/* Vector optimized 32/64 bit S/390 version of strlen. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* size_t strlen (const char *s) + Returns length of string s. + + Register usage: + -r1=bytes to 4k-byte boundary + -r2=s + -r3=tmp + -r4=tmp + -r5=current_len and return_value + -v16=part of s +*/ +ENTRY(__strlen_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + vfenezb %v16,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r4,%v16,7 /* Load zero index or 16 if not found. */ + clr %r4,%r1 /* If found zero within loaded bytes? */ + locgrl %r2,%r4 /* Then copy return value. */ + blr %r14 /* And return. */ + + /* Align s to 16 byte. */ + risbgn %r3,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,16 /* current_len = 16. */ + slr %r5,%r3 /* Compute bytes to 16bytes boundary. */ + + /* Find zero in 16 byte aligned loop. */ +.Lloop: + vl %v16,0(%r5,%r2) /* Load s. */ + vfenezbs %v16,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound /* Jump away if zero was found. */ + vl %v16,16(%r5,%r2) + vfenezbs %v16,%v16,%v16 + je .Lfound16 + vl %v16,32(%r5,%r2) + vfenezbs %v16,%v16,%v16 + je .Lfound32 + vl %v16,48(%r5,%r2) + vfenezbs %v16,%v16,%v16 + je .Lfound48 + + aghi %r5,64 + j .Lloop /* No zero found -> loop. */ + +.Lfound48: + aghi %r5,16 +.Lfound32: + aghi %r5,16 +.Lfound16: + aghi %r5,16 +.Lfound: + vlgvb %r2,%v16,7 /* Load byte index of zero. */ + algr %r2,%r5 + br %r14 +END(__strlen_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/strlen.c b/sysdeps/s390/multiarch/strlen.c new file mode 100644 index 0000000000..098d4e1e58 --- /dev/null +++ b/sysdeps/s390/multiarch/strlen.c @@ -0,0 +1,27 @@ +/* Multiple versions of strlen. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <string.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2 (__strlen, strlen) + +#else +# include <string/strlen.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/strncat-c.c b/sysdeps/s390/multiarch/strncat-c.c new file mode 100644 index 0000000000..538b1fa51e --- /dev/null +++ b/sysdeps/s390/multiarch/strncat-c.c @@ -0,0 +1,23 @@ +/* Default strncat implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define STRNCAT __strncat_c + +# include <string/strncat.c> +#endif diff --git a/sysdeps/s390/multiarch/strncat-vx.S b/sysdeps/s390/multiarch/strncat-vx.S new file mode 100644 index 0000000000..b9857c1233 --- /dev/null +++ b/sysdeps/s390/multiarch/strncat-vx.S @@ -0,0 +1,239 @@ +/* Vector optimized 32/64 bit S/390 version of strncat. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* char * strncat (const char *dest, const char *src, size_t n) + Concatenate two strings - at most n characters of src. + + Register usage: + -r0=saved dest pointer for return + -r1=tmp + -r2=dest + -r3=src + -r4=n + -r5=current_len + -r6=tmp + -r7=tmp + -v16=part of src + -v17=index of zero + -v18=part of src + -v31=register save area for r6, r7 +*/ +ENTRY(__strncat_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r4,%r4 +# endif /* !defined __s390x__ */ + + clgfi %r4,0 + ber %r14 /* Nothing to do, if n == 0. */ + lgr %r0,%r2 /* Save destination pointer for return. */ + vlvgp %v31,%r6,%r7 /* Save registers. */ + + /* STRLEN + %r1 = loaded bytes (tmp) + %r6 = zero byte index (tmp) + %r2 = dst + */ + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + vfenezb %v16,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r5,%v16,7 /* Load zero index or 16 if not found. */ + clrjl %r5,%r1,.Llen_end /* Found zero within loaded bytes, end. */ + + /* Align s to 16 byte. */ + risbgn %r1,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,16 /* current_len = 16. */ + slr %r5,%r1 /* Compute bytes to 16bytes boundary. */ + + /* Find zero in 16byte aligned loop. */ +.Llen_loop: + vl %v16,0(%r5,%r2) /* Load s. */ + vfenezbs %v16,%v16,%v16 /* Find element not equal with zero search. */ + je .Llen_found /* Jump away if zero was found. */ + vl %v16,16(%r5,%r2) + vfenezbs %v16,%v16,%v16 + je .Llen_found16 + vl %v16,32(%r5,%r2) + vfenezbs %v16,%v16,%v16 + je .Llen_found32 + vl %v16,48(%r5,%r2) + vfenezbs %v16,%v16,%v16 + je .Llen_found48 + + aghi %r5,64 + j .Llen_loop /* No zero -> loop. */ + +.Llen_found48: + aghi %r5,16 +.Llen_found32: + aghi %r5,16 +.Llen_found16: + aghi %r5,16 +.Llen_found: + vlgvb %r1,%v16,7 /* Load byte index of zero. */ + algr %r5,%r1 + +.Llen_end: + /* STRCPY + %r1 = zero byte index (tmp) + %r6 = loaded bytes (tmp) + %r3 = curr src pointer + %r2 = curr dst pointer + %r7 = border, tmp + */ + la %r2,0(%r5,%r2) /* strcpy at end of dst-string. */ + + vlbb %v16,0(%r3),6 /* Load s until next 4k-byte boundary. */ + lcbb %r6,0(%r3),6 /* Get bytes to 4k-byte boundary or 16. */ + llgfr %r6,%r6 /* Convert 32bit to 64bit. */ + + lghi %r5,0 /* current_len = 0. */ + + clgrjle %r4,%r6,.Lcpy_remaining_v16 /* If n <= loaded-bytes + -> process remaining. */ + + /* n > loaded-byte-count. */ + vfenezb %v17,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r1,%v17,7 /* Load zero index or 16 if not found. */ + clrjl %r1,%r6,.Lcpy_found_v16_store /* Found zero within loaded + bytes, copy and return. */ + + /* Align s to 16 byte. */ + risbgn %r7,%r3,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,15 /* current_len = 15. */ + slr %r5,%r7 /* Compute highest index to 16byte boundary. */ + + /* Zero not found and n > loaded-byte-count. */ + vstl %v16,%r5,0(%r2) /* Copy loaded characters - no zero. */ + ahi %r5,1 /* Start loop at next character. */ + + /* + Now we are 16byte aligned, so we can load a full vreg + without page fault. + */ + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r4,.Lcpy_loop64 + + vl %v16,0(%r5,%r3) /* Load s. */ + clgijl %r4,17,.Lcpy_remaining_v16 /* If n <=16, + process remaining bytes. */ +.Lcpy_lt64: + lgr %r7,%r4 + slgfi %r7,16 /* border_len = n - 16. */ + + /* If current_len >= border then process remaining bytes. */ + clgrjhe %r5,%r7,.Lcpy_remaining_v16 + vfenezbs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lcpy_found_v16 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Store previous part without zero to dst. */ + aghi %r5,16 + + clgrjhe %r5,%r7,.Lcpy_remaining_v18 + vfenezbs %v17,%v18,%v18 + je .Lcpy_found_v18 + vl %v16,16(%r5,%r3) + vst %v18,0(%r5,%r2) + aghi %r5,16 + + clgrjhe %r5,%r7,.Lcpy_remaining_v16 + vfenezbs %v17,%v16,%v16 + je .Lcpy_found_v16 + vl %v18,16(%r5,%r3) + vst %v16,0(%r5,%r2) + aghi %r5,16 + +.Lcpy_remaining_v18: + vlr %v16,%v18 +.Lcpy_remaining_v16: + /* v16 contains the remaining bytes [1...16]. + Store remaining bytes and append string-termination. */ + vfenezb %v17,%v16,%v16 /* Find element not equal with zero search. */ + slgrk %r7,%r4,%r5 /* Remaining bytes = maxlen - current_len. */ + aghi %r7,-1 /* vstl needs highest index. */ + vlgvb %r1,%v17,7 /* Load zero index or 16 if not found. */ + la %r2,0(%r5,%r2) /* vstl has no index register. */ + /* Zero-index within remaining-bytes, store up to zero and end. */ + clgrjle %r1,%r7,.Lcpy_found_v16_store + vstl %v16,%r7,0(%r2) /* Store remaining bytes. */ + lghi %r1,0 + stc %r1,1(%r7,%r2) /* Store string-null-termination beyond n. */ +.Lcpy_end: + /* Restore saved registers. */ + vlgvg %r6,%v31,0 + vlgvg %r7,%v31,1 + lgr %r2,%r0 /* Load saved dest-ptr. */ + br %r14 + +.Lcpy_found_v16_32: + aghi %r5,32 + j .Lcpy_found_v16 +.Lcpy_found_v18_48: + aghi %r5,32 +.Lcpy_found_v18_16: + aghi %r5,16 +.Lcpy_found_v18: + vlr %v16,%v18 +.Lcpy_found_v16: + /* v16 contains a zero. Store remaining bytes to zero. current_len + has not reached border, thus checking for n is not needed! */ + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + la %r2,0(%r5,%r2) +.Lcpy_found_v16_store: + vstl %v16,%r1,0(%r2) /* Copy characters including zero. */ + j .Lcpy_end + + /* Find zero in 16byte aligned loop. */ +.Lcpy_loop64: + vl %v16,0(%r5,%r3) /* Load s. */ + vfenezbs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lcpy_found_v16 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Store previous part without zero to dst. */ + vfenezbs %v17,%v18,%v18 + je .Lcpy_found_v18_16 + vl %v16,32(%r5,%r3) + vst %v18,16(%r5,%r2) + vfenezbs %v17,%v16,%v16 + je .Lcpy_found_v16_32 + vl %v18,48(%r5,%r3) + vst %v16,32(%r5,%r2) + vfenezbs %v17,%v18,%v18 + je .Lcpy_found_v18_48 + vst %v18,48(%r5,%r2) + + aghi %r5,64 + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r4,.Lcpy_loop64 + + vl %v16,0(%r5,%r3) /* Load s. */ + j .Lcpy_lt64 +END(__strncat_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/strncat.c b/sysdeps/s390/multiarch/strncat.c new file mode 100644 index 0000000000..eb1410d5ac --- /dev/null +++ b/sysdeps/s390/multiarch/strncat.c @@ -0,0 +1,27 @@ +/* Multiple versions of strncat. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <string.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2 (__strncat, strncat) + +#else +# include <string/strncat.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/strncmp-c.c b/sysdeps/s390/multiarch/strncmp-c.c new file mode 100644 index 0000000000..e781aefbe3 --- /dev/null +++ b/sysdeps/s390/multiarch/strncmp-c.c @@ -0,0 +1,28 @@ +/* Default strncmp implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define STRNCMP __strncmp_c +# ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strncmp_c, __GI_strncmp, __strncmp_c); +# endif /* SHARED */ + +# include <string/strncmp.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/strncmp-vx.S b/sysdeps/s390/multiarch/strncmp-vx.S new file mode 100644 index 0000000000..9c4b207f41 --- /dev/null +++ b/sysdeps/s390/multiarch/strncmp-vx.S @@ -0,0 +1,137 @@ +/* Vector optimized 32/64 bit S/390 version of strncmp. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* int strncmp (const char *s1, const char *s2, size_t n) + Compare at most n characters of two strings. + + Register usage: + -r0=tmp + -r1=tmp + -r2=s1 + -r3=s2 + -r4=n + -r5=current_len + -v16=part of s1 + -v17=part of s2 + -v18=index of unequal +*/ +ENTRY(__strncmp_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r4,%r4 +# endif /* !defined __s390x__ */ + + clgije %r4,0,.Lend_equal /* Nothing to do if n == 0, */ + lghi %r5,0 /* current_len = 0. */ + +.Lloop: + vlbb %v16,0(%r5,%r2),6 /* Load s1 to block boundary. */ + vlbb %v17,0(%r5,%r3),6 /* Load s2 to block boundary. */ + lcbb %r0,0(%r5,%r2),6 /* Get loaded byte count of s1. */ + jo .Llt16_1 /* Jump away if vr is not fully loaded. */ + lcbb %r1,0(%r5,%r3),6 /* Get loaded byte count of s2. */ + jo .Llt16_2 /* Jump away if vr is not fully loaded. */ + aghi %r5,16 /* Both vrs are fully loaded. */ + clgrjhe %r5,%r4,.Llastcmp /* If current_len >= n ->last compare. */ + vfenezbs %v18,%v16,%v17 /* Compare not equal with zero search. */ + jno .Lfound + + vlbb %v16,0(%r5,%r2),6 + vlbb %v17,0(%r5,%r3),6 + lcbb %r0,0(%r5,%r2),6 + jo .Llt16_1 + lcbb %r1,0(%r5,%r3),6 + jo .Llt16_2 + aghi %r5,16 + clgrjhe %r5,%r4,.Llastcmp + vfenezbs %v18,%v16,%v17 + jno .Lfound + + vlbb %v16,0(%r5,%r2),6 + vlbb %v17,0(%r5,%r3),6 + lcbb %r0,0(%r5,%r2),6 + jo .Llt16_1 + lcbb %r1,0(%r5,%r3),6 + jo .Llt16_2 + aghi %r5,16 + clgrjhe %r5,%r4,.Llastcmp + vfenezbs %v18,%v16,%v17 + jno .Lfound + + vlbb %v16,0(%r5,%r2),6 + vlbb %v17,0(%r5,%r3),6 + lcbb %r0,0(%r5,%r2),6 + jo .Llt16_1 + lcbb %r1,0(%r5,%r3),6 + jo .Llt16_2 + aghi %r5,16 + clgrjhe %r5,%r4,.Llastcmp + vfenezbs %v18,%v16,%v17 + jno .Lfound + j .Lloop + +.Llt16_1: + lcbb %r1,0(%r5,%r3),6 /* Get loaded byte count ofs2. */ +.Llt16_2: + clr %r0,%r1 /* Compare logical. */ + locrh %r0,%r1 /* Compute minimum of bytes loaded. */ + algfr %r5,%r0 /* Add smallest loaded bytes to current_len. */ + clgrj %r5,%r4,10,.Llastcmp /* If current_len >= n ->last compare. */ + vfenezbs %v18,%v16,%v17 /* Compare not equal with zero search. */ + vlgvb %r1,%v18,7 /* Get not equal index or 16 if all equal. */ + clrjl %r1,%r0,.Lfound /* Jump away if miscompare is within + loaded bytes (index < loaded-bytes) */ + j .Lloop + +.Llastcmp: + /* Use comparision result only if located within first n characters. + %r0: loaded byte count in vreg; + %r5: current_len; + %r4: n; + (current_len - n): [0...16[ + First ignored match index: loaded bytes - (current_len-n): ]0...16] + */ + slgr %r5,%r4 /* %r5 = current_len - n. */ + slr %r0,%r5 /* %r0 = first ignored match index. */ + vfenezbs %v18,%v16,%v17 /* Compare not equal with zero search. */ + vlgvb %r1,%v18,7 /* Get not equal index or 16 if all equal. */ + clrjl %r1,%r0,.Lfound /* Jump away if miscompare is within + loaded bytes and below n bytes. */ + j .Lend_equal /* Miscompare after n-bytes -> end equal. */ + +.Lfound: + /* Difference or end of string. */ + je .Lend_equal + lghi %r2,1 + lghi %r1,-1 + locgrl %r2,%r1 + br %r14 +.Lend_equal: + lghi %r2,0 + br %r14 +END(__strncmp_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/strncmp.c b/sysdeps/s390/multiarch/strncmp.c new file mode 100644 index 0000000000..9a72c79bfd --- /dev/null +++ b/sysdeps/s390/multiarch/strncmp.c @@ -0,0 +1,30 @@ +/* Multiple versions of strncmp. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <string.h> +# include <ifunc-resolve.h> + + +# undef strcmp +extern __typeof (strncmp) __strncmp; +s390_vx_libc_ifunc2 (__strncmp, strncmp) + +#else +# include <string/strncmp.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/strncpy-vx.S b/sysdeps/s390/multiarch/strncpy-vx.S new file mode 100644 index 0000000000..08a0b29e8b --- /dev/null +++ b/sysdeps/s390/multiarch/strncpy-vx.S @@ -0,0 +1,207 @@ +/* Vector optimized 32/64 bit S/390 version of strncpy. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* char * strncpy (const char *dest, const char *src, size_t n) + Copy at most n characters of string src to dest. + + Register usage: + -r0=dest pointer for return + -r1=tmp, zero byte index + -r2=dest + -r3=src + -r4=n + -r5=current_len + -r6=tmp, loaded bytes + -r7=tmp, border + -v16=part of src + -v17=index of zero + -v18=part of src + -v31=register save area for r6, r7 +*/ +ENTRY(__strncpy_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r4,%r4 +# endif /* !defined __s390x__ */ + + clgfi %r4,0 + ber %r14 /* Nothing to do, if n == 0. */ + lgr %r0,%r2 /* Save destination pointer for return. */ + vlvgp %v31,%r6,%r7 /* Save registers. */ + + vlbb %v16,0(%r3),6 /* Load s until next 4k-byte boundary. */ + lcbb %r6,0(%r3),6 /* Get bytes to 4k-byte boundary or 16. */ + llgfr %r6,%r6 /* Convert 32bit to 64bit. */ + + lghi %r5,0 /* current_len = 0. */ + + clgrjle %r4,%r6,.Lremaining_v16 /* If n <= loaded-bytes + -> process remaining. */ + + /* n > loaded-byte-count. */ + vfenezb %v17,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r1,%v17,7 /* Load zero index or 16 if not found. */ + clrjl %r1,%r6,.Lfound_v16_store /* Found zero within loaded bytes, + copy and return. */ + + /* Align s to 16 byte. */ + risbgn %r7,%r3,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,15 /* current_len = 15. */ + slr %r5,%r7 /* Compute highest index to 16byte boundary. */ + + /* Zero not found and n > loaded-byte-count. */ + vstl %v16,%r5,0(%r2) /* Copy loaded characters - no zero. */ + ahi %r5,1 /* Start loop at next character. */ + + /* Now we are 16byte aligned, so we can load + a full vreg without page fault. */ + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r4,.Lloop64 + + vl %v16,0(%r5,%r3) /* Load s. */ + clgijl %r4,17,.Lremaining_v16 /* If n <= 16, process remaining + bytes. */ +.Llt64: + lgr %r7,%r4 + slgfi %r7,16 /* border_len = n - 16. */ + + clgrjhe %r5,%r7,.Lremaining_v16 /* If current_len >= border + then process remaining bytes. */ + vfenezbs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound_v16 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Store previous part without zero to dst. */ + aghi %r5,16 + + clgrjhe %r5,%r7,.Lremaining_v18 + vfenezbs %v17,%v18,%v18 + je .Lfound_v18 + vl %v16,16(%r5,%r3) + vst %v18,0(%r5,%r2) + aghi %r5,16 + + clgrjhe %r5,%r7,.Lremaining_v16 + vfenezbs %v17,%v16,%v16 + je .Lfound_v16 + vl %v18,16(%r5,%r3) + vst %v16,0(%r5,%r2) + aghi %r5,16 + +.Lremaining_v18: + vlr %v16,%v18 +.Lremaining_v16: + /* v16 contains the remaining bytes [1...16]. + Store remaining bytes and append string-termination. */ + vfenezb %v17,%v16,%v16 /* Find element not equal with zero search. */ + slgrk %r7,%r4,%r5 /* Remaining bytes = maxlen - current_len. */ + aghi %r7,-1 /* vstl needs highest index. */ + la %r2,0(%r5,%r2) /* vstl has no index register. */ + vlgvb %r1,%v17,7 /* Load zero index or 16 if not found. */ + /* Zero in remaining bytes? -> jump away (zero-index < max-index) + Do not jump away if zero-index == max-index, + but simply copy zero with vstl below. */ + clrjl %r1,%r7,.Lfound_v16_store + vstl %v16,%r7,0(%r2) /* Store remaining bytes without null + termination!. */ +.Lend: + /* Restore saved registers. */ + vlgvg %r6,%v31,0 + vlgvg %r7,%v31,1 + lgr %r2,%r0 /* Load saved dest-ptr. */ + br %r14 + + +.Lfound_v16_32: + aghi %r5,32 + j .Lfound_v16 +.Lfound_v18_48: + aghi %r5,32 +.Lfound_v18_16: + aghi %r5,16 +.Lfound_v18: + vlr %v16,%v18 +.Lfound_v16: + /* v16 contains a zero. Store remaining bytes to zero. current_len + has not reached border, thus checking for n is not needed! */ + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + la %r2,0(%r5,%r2) /* vstl has no support for index-register. */ +.Lfound_v16_store: + vstl %v16,%r1,0(%r2) /* Copy characters including zero. */ + /* Fill remaining bytes with zero - remaining count always > 0. */ + algr %r5,%r1 /* Remaining bytes (=%r4) = ... */ + slgr %r4,%r5 /* = n - (current_len + zero_index + 1). */ + la %r2,0(%r1,%r2) /* Pointer to zero. start filling beyond. */ + aghi %r4,-2 /* mvc with exrl needs count - 1. + (additional -1, see remaining bytes above) */ + srlg %r6,%r4,8 /* Split into 256 byte blocks. */ + ltgr %r6,%r6 + je .Lzero_lt256 +.Lzero_loop256: + mvc 1(256,%r2),0(%r2) /* Fill 256 zeros at once. */ + la %r2,256(%r2) + brctg %r6,.Lzero_loop256 /* Loop until all blocks are processed. */ +.Lzero_lt256: + exrl %r4,.Lmvc_lt256 + j .Lend +.Lmvc_lt256: + mvc 1(1,%r2),0(%r2) + +.Lloop64: + vl %v16,0(%r5,%r3) /* Load s. */ + vfenezbs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound_v16 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Store previous part without zero to dst. */ + vfenezbs %v17,%v18,%v18 + je .Lfound_v18_16 + vl %v16,32(%r5,%r3) + vst %v18,16(%r5,%r2) + vfenezbs %v17,%v16,%v16 + je .Lfound_v16_32 + vl %v18,48(%r5,%r3) + vst %v16,32(%r5,%r2) + vfenezbs %v17,%v18,%v18 + je .Lfound_v18_48 + vst %v18,48(%r5,%r2) + + aghi %r5,64 + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r4,.Lloop64 + + vl %v16,0(%r5,%r3) /* Load s. */ + j .Llt64 +END(__strncpy_vx) + +# define strncpy __strncpy_c +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) strong_alias(__strncpy_c, __GI_strncpy) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ + +/* Include strncpy-implementation in s390-32/s390-64 subdirectory. */ +#include <strncpy.S> diff --git a/sysdeps/s390/multiarch/strncpy.c b/sysdeps/s390/multiarch/strncpy.c new file mode 100644 index 0000000000..1464551875 --- /dev/null +++ b/sysdeps/s390/multiarch/strncpy.c @@ -0,0 +1,24 @@ +/* Multiple versions of strncpy. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <string.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2 (__strncpy, strncpy) +#endif diff --git a/sysdeps/s390/multiarch/strnlen-c.c b/sysdeps/s390/multiarch/strnlen-c.c new file mode 100644 index 0000000000..99ad65a103 --- /dev/null +++ b/sysdeps/s390/multiarch/strnlen-c.c @@ -0,0 +1,30 @@ +/* Default strnlen implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define STRNLEN __strnlen_c +# ifdef SHARED +# undef libc_hidden_def +# define libc_hidden_def(name) \ + __hidden_ver1 (__strnlen_c, __GI_strnlen, __strnlen_c); \ + strong_alias (__strnlen_c, __strnlen_c_1); \ + __hidden_ver1 (__strnlen_c_1, __GI___strnlen, __strnlen_c_1); +# endif /* SHARED */ + +# include <string/strnlen.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/strnlen-vx.S b/sysdeps/s390/multiarch/strnlen-vx.S new file mode 100644 index 0000000000..3e3a31dd9c --- /dev/null +++ b/sysdeps/s390/multiarch/strnlen-vx.S @@ -0,0 +1,134 @@ +/* Vector optimized 32/64 bit S/390 version of strnlen. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* size_t strnlen (const char *s, size_t maxlen) + Returns the number of characters in s or at most maxlen. + + Register usage: + -r1=tmp + -r2=address of string + -r3=maxlen (number of characters to be read) + -r4=tmp + -r5=current_len and return_value + -v16=part of s +*/ +ENTRY(__strnlen_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r3,%r3 +# endif /* !defined __s390x__ */ + + clgfi %r3,0 /* if maxlen == 0, return 0. */ + locgre %r2,%r3 + ber %r14 + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + llgfr %r1,%r1 /* Convert 32bit to 64bit. */ + + vfenezb %v16,%v16,%v16 /* Find element not equal with zero search. */ + clgr %r1,%r3 + locgrh %r1,%r3 /* loaded_byte_count + = min (loaded_byte_count, maxlen) */ + + vlgvb %r5,%v16,7 /* Load zero index or 16 if not found. */ + clr %r5,%r1 /* If found zero within loaded bytes? */ + locgrl %r2,%r5 /* Then copy return value. */ + blr %r14 /* And return. */ + + clgr %r1,%r3 /* If loaded_byte_count == maxlen? */ + locgre %r2,%r3 /* Then copy return value. */ + ber %r14 /* And return. */ + + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,16 /* current_len = 16. */ + slr %r5,%r4 /* Compute bytes to 16bytes boundary. */ + + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r3,.Lloop64 + + /* Find zero in max 64byte with aligned s. */ +.Llt64: + vl %v16,0(%r5,%r2) /* Load s. */ + vfenezbs %v16,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound /* Jump away if zero was found. */ + aghi %r5,16 + clgrjhe %r5,%r3,.Lfound /* current_len >= maxlen -> end. */ + vl %v16,0(%r5,%r2) + vfenezbs %v16,%v16,%v16 + je .Lfound + aghi %r5,16 + clgrjhe %r5,%r3,.Lfound + vl %v16,0(%r5,%r2) + vfenezbs %v16,%v16,%v16 + je .Lfound + aghi %r5,16 + clgrjhe %r5,%r3,.Lfound + vl %v16,0(%r5,%r2) + vfenezbs %v16,%v16,%v16 + j .Lfound + +.Lfound48: + aghi %r5,16 +.Lfound32: + aghi %r5,16 +.Lfound16: + aghi %r5,16 +.Lfound: + vlgvb %r4,%v16,7 /* Load byte index of zero or 16 if no zero. */ + algr %r5,%r4 + + clgr %r5,%r3 + locgrh %r5,%r3 /* Return min (current_len, maxlen). */ + lgr %r2,%r5 + br %r14 + + /* Find zero in 16 byte aligned loop. */ +.Lloop64: + vl %v16,0(%r5,%r2) /* Load s. */ + vfenezbs %v16,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound /* Jump away if zero was found. */ + vl %v16,16(%r5,%r2) + vfenezbs %v16,%v16,%v16 + je .Lfound16 + vl %v16,32(%r5,%r2) + vfenezbs %v16,%v16,%v16 + je .Lfound32 + vl %v16,48(%r5,%r2) + vfenezbs %v16,%v16,%v16 + je .Lfound48 + + aghi %r5,64 + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r3,.Lloop64 + + j .Llt64 +END(__strnlen_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/strnlen.c b/sysdeps/s390/multiarch/strnlen.c new file mode 100644 index 0000000000..48c3bb73e6 --- /dev/null +++ b/sysdeps/s390/multiarch/strnlen.c @@ -0,0 +1,29 @@ +/* Multiple versions of strnlen. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <string.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc (__strnlen) +weak_alias (__strnlen, strnlen) +libc_hidden_def (strnlen) + +#else +# include <string/strnlen.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/strpbrk-c.c b/sysdeps/s390/multiarch/strpbrk-c.c new file mode 100644 index 0000000000..49c5e1258b --- /dev/null +++ b/sysdeps/s390/multiarch/strpbrk-c.c @@ -0,0 +1,28 @@ +/* Default strpbrk implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define STRPBRK __strpbrk_c +# ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strpbrk_c, __GI_strpbrk, __strpbrk_c); +# endif /* SHARED */ + +# include <string/strpbrk.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/strpbrk-vx.S b/sysdeps/s390/multiarch/strpbrk-vx.S new file mode 100644 index 0000000000..6a0bbd9d19 --- /dev/null +++ b/sysdeps/s390/multiarch/strpbrk-vx.S @@ -0,0 +1,302 @@ +/* Vector optimized 32/64 bit S/390 version of strpbrk. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* char *strpbrk (const char *s, const char * accept) + The strpbrk() function locates the first occurrence in the string s + of any of the characters in the string accept and returns a pointer + to that character or NULL if not found. + + This method checks the length of accept string. If it fits entirely + in one vector register, a fast algorithm is used, which does not need + to check multiple parts of accept-string. Otherwise a slower full + check of accept-string is used. + + register overview: + r3: pointer to start of accept-string + r2: pointer to start of search-string + r0: loaded byte count of vlbb search-string (32bit unsigned) + r4: found byte index (32bit unsigned) + r1: current return len (64bit unsigned) + v16: search-string + v17: accept-string + v18: temp-vreg + + ONLY FOR SLOW: + v19: first accept-string + v20: zero for preparing acc-vector + v21: global mask; 1 indicates a match between + search-string-vreg and any accept-character + v22: current mask; 1 indicates a match between + search-string-vreg and any accept-character in current acc-vreg + v24: one for result-checking of former string-part + v30, v31: for re-/storing registers r6, r8, r9 + r5: current len of accept-string + r6: zero-index in search-string or 16 if no zero + or min(zero-index, loaded byte count) + r8: >0, if former accept-string-part contains a zero, + otherwise =0; + r9: loaded byte count of vlbb accept-string +*/ +ENTRY(__strpbrk_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + /* + Check if accept-string fits in one vreg: + ---------------------------------------- + */ + vlbb %v17,0(%r3),6 /* Load accept. */ + lghi %r1,0 /* Zero out current len. */ + vlgvb %r0,%v17,0 /* Get first element. */ + clije %r0,0,.Lfast_end_null /* Return null if accept is empty. */ + lcbb %r0,0(%r3),6 + jo .Lcheck_onbb /* Special case if accept lays + on block-boundary. */ +.Lcheck_notonbb: + vistrbs %v17,%v17 /* Fill with zeros after first zero. */ + je .Lfast /* Zero found -> accept fits in one vreg. */ + j .Lslow /* No zero -> accept exceeds one vreg */ + + +.Lcheck_onbb: + /* Accept lays on block-boundary. */ + vfenezb %v18,%v17,%v17 /* Search zero in loaded accept bytes. */ + vlgvb %r4,%v18,7 /* Get index of zero or 16 if not found. */ + clrjl %r4,%r0,.Lcheck_notonbb /* Zero index < loaded bytes count -> + Accept fits in one vreg; + Fill with zeros and proceed + with FAST. */ + vl %v17,0(%r3) /* Load accept, which exceeds loaded bytes. */ + j .Lcheck_notonbb /* Check if accept fits in one vreg. */ + + + /* + Search s for accept in one vreg + ------------------------------- + */ +.Lfast: + /* Complete accept-string in v17 and remaining bytes are zero. */ + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r0,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + vfaezbs %v18,%v16,%v17,0 /* Find first element in v16 unequal to any + in v17 or first zero element. */ + + vlgvb %r4,%v18,7 /* Load byte index of found element. */ + /* If found index is within loaded bytes, return with found + element index (=equal count). */ + clrjl %r4,%r0,.Lfast_loop_found2 + + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r1,16 /* current_len = 16. */ + slr %r1,%r4 /* Compute bytes to 16bytes boundary. */ + + /* Process s in 16byte aligned loop. */ +.Lfast_loop: + vl %v16,0(%r1,%r2) /* Load search-string. */ + vfaezbs %v18,%v16,%v17,0 /* Find first element in v16 equal to any + in v17 or first zero element. */ + jno .Lfast_loop_found + + vl %v16,16(%r1,%r2) + vfaezbs %v18,%v16,%v17,0 + jno .Lfast_loop_found16 + + vl %v16,32(%r1,%r2) + vfaezbs %v18,%v16,%v17,0 + jno .Lfast_loop_found32 + + vl %v16,48(%r1,%r2) + vfaezbs %v18,%v16,%v17,0 + jno .Lfast_loop_found48 + + aghi %r1,64 + j .Lfast_loop /* Loop if no element was unequal to accept + and not zero. */ + + /* Found equal or zero element. */ +.Lfast_loop_found48: + aghi %r1,16 +.Lfast_loop_found32: + aghi %r1,16 +.Lfast_loop_found16: + aghi %r1,16 +.Lfast_loop_found: + vlgvb %r4,%v18,7 /* Load byte index of found element. */ +.Lfast_loop_found2: + vlgvb %r0,%v16,0(%r4) /* Get found element. */ + clije %r0,0,.Lfast_end_null /* Return null if no accept-char found */ + algfr %r1,%r4 /* Add found index of char to current len. */ + la %r2,0(%r1,%r2) /* And return pointer to first equal char. */ + br %r14 + +.Lfast_end_null: + lghi %r2,0 /* Return null if no character is equal. */ + br %r14 + + + + + /* + Search s for accept in multiple vregs + ------------------------------------- + */ +.Lslow: + /* Save registers. */ + vlvgg %v30,%r6,0 + vlvgp %v31,%r8,%r9 + + /* accept in v17 without zero. */ + vlr %v19,%v17 /* Save first acc-part for a fast reload. */ + vzero %v20 /* Zero for preparing acc-vector. */ + vone %v24 /* One for checking result of former string. */ + + /* Align s to 16 byte. */ + risbg %r4,%r2,60,128+63,0 /* Test if s is aligned and + %r4 = bits 60-63 'and' 15. */ + je .Lslow_loop_str /* If s is aligned, loop aligned. */ + lghi %r0,15 + slr %r0,%r4 /* Compute highest index to load (15-x). */ + vll %v16,%r0,0(%r2) /* Load up to 16 byte boundary (vll needs + highest index, remaining bytes are 0). */ + ahi %r0,1 /* Work with loaded byte count. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of accept-string to zero. */ + vfenezb %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first accept-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ + clije %r6,0,.Lslow_end_null /* If first element is zero + (end of string) -> return null */ + clr %r0,%r6 /* cc==1 if loaded byte count < zero-index. */ + locrl %r6,%r0 /* Load on cc==1; zero-index = lbc. */ + j .Lslow_loop_acc + + + /* Process s in 16byte aligned loop. */ +.Lslow_next_str: + /* Check results of former processed str-part. */ + vfeeb %v18,%v21,%v24 /* Find first equal match in global mask + (ones in element). */ + vlgvb %r4,%v18,7 /* Get index of first one (=equal) + or 16 if no match. */ + /* Equal-index < min(zero-index, loaded byte count) + -> return pointer to equal element. */ + clrjl %r4,%r6,.Lslow_index_found + /* Zero-index < loaded byte count + -> former str-part was last str-part + -> return null */ + clrjl %r6,%r0,.Lslow_end_null + /* All elements are zero (=no match) -> proceed with next str-part. */ + + vlr %v17,%v19 /* Load first part of accept (no zero). */ + algfr %r1,%r0 /* Add loaded byte count to current len. */ + +.Lslow_loop_str: + vl %v16,0(%r1,%r2) /* Load search-string */ + lghi %r0,16 /* Loaded byte count is 16. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of accept to zero. */ + vfenezb %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first accept-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ + clije %r6,0,.Lslow_end_null /* If first element is zero + (end of string) -> return null. */ + +.Lslow_loop_acc: + vfaeb %v22,%v16,%v17,4 /* Create matching-mask (1 in mask -> + Character matches any accepted character in + this accept-string-part) IN=0, RT=1. */ + vlgvb %r4,%v22,0 /* Get result of first element. */ + /* First element is equal to any accepted characters + (all other parts of accept cannot lead to a match before this one) + -> current len is pointing to first element + -> return found */ + clijh %r4,0,.Lslow_end_found + vo %v21,%v21,%v22 /* Global-mask = global-|matching-mask. */ + /* Proceed with next acc until end of acc is reached. */ + + +.Lslow_next_acc: + clijh %r8,0,.Lslow_next_str /* There was a zero in the last acc-part + -> add index to current_len and + end. */ + vlbb %v17,16(%r5,%r3),6 /* Load next accept part. */ + aghi %r5,16 /* Increment current len of accept-string. */ + lcbb %r9,0(%r5,%r3),6 /* Get loaded byte count of accept-string. */ + jo .Lslow_next_acc_onbb /* Jump away ifaccept-string is + on block-boundary. */ +.Lslow_next_acc_notonbb: + vistrbs %v17,%v17 /* Fill with zeros after first zero. */ + jo .Lslow_loop_acc /* No zero found -> no preparation needed. */ + +.Lslow_next_acc_prepare_zero: + /* Zero in accept-part: fill zeros with first-accept-character. */ + vlgvb %r8,%v17,0 /* Load first element of acc-part. */ + clije %r8,0,.Lslow_next_str /* Proceed with next string-part, + if first char in this part of accept + is a zero. */ + /* r8>0 -> zero found in this acc-part. */ + vrepb %v18,%v17,0 /* Replicate first char accross all chars. */ + vceqb %v22,%v20,%v17 /* Create a mask (v22) of null chars + by comparing with 0 (v20). */ + vsel %v17,%v18,%v17,%v22 /* Replace null chars with first char. */ + j .Lslow_loop_acc /* Accept part is prepared -> process. */ + +.Lslow_next_acc_onbb: + vfenezb %v18,%v17,%v17 /* Find zero in loaded bytes of accept part. */ + vlgvb %r8,%v18,7 /* Load byte index of zero. */ + clrjl %r8,%r9,.Lslow_next_acc_notonbb /* Found a zero in loaded bytes + -> Prepare vreg. */ + vl %v17,0(%r5,%r3) /* Load over boundary ... */ + lghi %r8,0 /* r8=0 -> no zero in this part of acc, + check for zero is in jump-target. */ + j .Lslow_next_acc_notonbb /* ... and search for zero in + fully loaded vreg again. */ + +.Lslow_end_null: + lghi %r1,0 /* Return null if no character is equal. */ + j .Lslow_end + +.Lslow_loop_found: + vlgvb %r4,%v18,7 /* Load byte index of found element. */ + vlgvb %r0,%v16,0(%r4) /* Get found element. */ + clije %r0,0,.Lslow_end_null /* Return null if no acc-char found. */ + +.Lslow_index_found: + algfr %r1,%r4 /* Add found index of char to current len. */ +.Lslow_end_found: + la %r1,0(%r1,%r2) /* And return pointer to first equal char. */ + +.Lslow_end: + /* Restore registers. */ + vlgvg %r6,%v30,0 + vlgvg %r8,%v31,0 + vlgvg %r9,%v31,1 + lgr %r2,%r1 + br %r14 +END(__strpbrk_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/strpbrk.c b/sysdeps/s390/multiarch/strpbrk.c new file mode 100644 index 0000000000..cdc139929f --- /dev/null +++ b/sysdeps/s390/multiarch/strpbrk.c @@ -0,0 +1,27 @@ +/* Multiple versions of strpbrk. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <string.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2 (__strpbrk, strpbrk) + +#else +# include <string/strpbrk.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/strrchr-c.c b/sysdeps/s390/multiarch/strrchr-c.c new file mode 100644 index 0000000000..2513af956d --- /dev/null +++ b/sysdeps/s390/multiarch/strrchr-c.c @@ -0,0 +1,29 @@ +/* Default strrchr implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define STRRCHR __strrchr_c +# undef weak_alias +# ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strrchr_c, __GI_strrchr, __strrchr_c); +# endif /* SHARED */ + +# include <string/strrchr.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/strrchr-vx.S b/sysdeps/s390/multiarch/strrchr-vx.S new file mode 100644 index 0000000000..175d2cba3c --- /dev/null +++ b/sysdeps/s390/multiarch/strrchr-vx.S @@ -0,0 +1,180 @@ +/* Vector optimized 32/64 bit S/390 version of strrchr. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* char *strrchr (const char *s, int c) + Locate the last character c in string. + + Register usage: + -r0=loaded bytes in first part of s. + -r1=pointer to last occurence of c or NULL if not found. + -r2=s + -r3=c + -r4=tmp + -r5=current_len + -v16=part of s + -v17=index of found element + -v18=replicated c + -v19=part of s with last occurence of c. + -v20=permute pattern +*/ +ENTRY(__strrchr_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r0,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + vlvgb %v18,%r3,0 /* Generate vector which elements are all c. + if c > 255, c will be truncated. */ + vrepb %v18,%v18,0 + + lghi %r1,-1 /* Currently no c found. */ + lghi %r5,0 /* current_len = 0. */ + + vfeezbs %v17,%v16,%v18 /* Find element equal or zero. */ + vlgvb %r4,%v17,7 /* Load byte index of c/zero or 16. */ + clrjl %r4,%r0,.Lfound_first_part /* Found c/zero in loaded bytes. */ +.Lalign: + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,16 /* current_len = 16. */ + slr %r5,%r4 /* Compute bytes to 16bytes boundary. */ + +.Lloop: + vl %v16,0(%r5,%r2) /* Load s. */ + vfeezbs %v17,%v16,%v18 /* Find element equal with zero search. */ + jno .Lfound /* Found c/zero (cc=0|1|2). */ + vl %v16,16(%r5,%r2) + vfeezbs %v17,%v16,%v18 + jno .Lfound16 + vl %v16,32(%r5,%r2) + vfeezbs %v17,%v16,%v18 + jno .Lfound32 + vl %v16,48(%r5,%r2) + vfeezbs %v17,%v16,%v18 + jno .Lfound48 + + aghi %r5,64 + j .Lloop /* No character and no zero -> loop. */ + +.Lfound48: + la %r5,16(%r5) /* Use la since aghi would clobber cc. */ +.Lfound32: + la %r5,16(%r5) +.Lfound16: + la %r5,16(%r5) +.Lfound: + je .Lzero /* Found zero, but no c before that zero. */ + /* Save this part of s to check for further matches after reaching + the end of the complete string. */ + vlr %v19,%v16 + lgr %r1,%r5 + + jh .Lzero /* Found a zero after the found c. */ + aghi %r5,16 /* Start search of next part of s. */ + j .Lloop + +.Lfound_first_part: + /* This code is only executed if the found c/zero is whithin loaded + bytes. If no c/zero was found (cc==3) the found index = 16, thus + this code is not called. + Resulting condition code of vector find element equal: + cc==0: no c, found zero + cc==1: c found, no zero + cc==2: c found, found zero after c + cc==3: no c, no zero (this case can be ignored). */ + je .Lzero /* Found zero, but no c before that zero. */ + + locgrne %r1,%r5 /* Mark c as found in first part of s. */ + vlr %v19,%v16 + + jl .Lalign /* No zero (e.g. if vr was fully loaded) + -> Align and loop afterwards. */ + + /* Found a zero in vr. If vr was not fully loaded due to block + boundary, the remaining bytes are filled with zero and we can't + rely on zero indication of condition code here! */ + + vfenezb %v17,%v16,%v16 /* Find zero. */ + vlgvb %r4,%v17,7 /* Load byte index of zero or 16. */ + clrjl %r4,%r0,.Lzero /* Zero within loaded bytes -> end. */ + j .Lalign /* Align and loop afterwards. */ + +.Lend_searched_zero: + vlgvb %r4,%v17,7 /* Load byte index of zero. */ + algr %r5,%r4 + la %r2,0(%r5,%r2) /* Return pointer to zero. */ + br %r14 + +.Lzero: + /* Reached end of string. Check if one c was found before. */ + clije %r3,0,.Lend_searched_zero /* Found zero and c is zero. */ + + cgfi %r1,-1 /* No c found -> return NULL. */ + locghie %r2,0 + ber %r14 + + larl %r3,.Lpermute_mask /* Load permute mask. */ + vl %v20,0(%r3) + + /* c was found and is part of v19. */ + vfenezb %v17,%v19,%v19 /* Find zero. */ + vlgvb %r4,%v17,7 /* Load byte index of zero or 16. */ + + clgfi %r5,0 /* Loaded byte count in v19 is 16, ... */ + lochine %r0,16 /* ... if v19 is not the first part of s. */ + ahi %r0,-1 /* Convert byte count to highest index. */ + + clr %r0,%r4 + locrl %r4,%r0 /* r4 = min (zero-index, highest-index). */ + + /* Right-shift of v19 to mask bytes after zero. */ + clije %r4,15,.Lzero_permute /* No shift is needed if highest index + in vr is 15. */ + lhi %r0,15 + slr %r0,%r4 /* Compute byte count for vector shift right. */ + sll %r0,3 /* Convert to bit count. */ + vlvgb %v17,%r0,7 + vsrlb %v19,%v19,%v17 /* Vector shift right by byte by number of bytes + specified in bits 1-4 of byte 7 in v17. */ + + /* Reverse bytes in v19. */ +.Lzero_permute: + vperm %v19,%v19,%v19,%v20 /* Permute v19 to reversed order. */ + + /* Find c in reversed v19. */ + vfeeb %v19,%v19,%v18 /* Find c. */ + la %r2,0(%r1,%r2) + vlgvb %r3,%v19,7 /* Load byte index of c. */ + + /* Compute index in real s and return. */ + slgr %r4,%r3 + la %r2,0(%r4,%r2) /* Return pointer to zero. */ + br %r14 +.Lpermute_mask: + .byte 0x0F,0x0E,0x0D,0x0C,0x0B,0x0A,0x09,0x08 + .byte 0x07,0x06,0x05,0x04,0x03,0x02,0x01,0x00 +END(__strrchr_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/strrchr.c b/sysdeps/s390/multiarch/strrchr.c new file mode 100644 index 0000000000..e515d6b6e6 --- /dev/null +++ b/sysdeps/s390/multiarch/strrchr.c @@ -0,0 +1,28 @@ +/* Multiple versions of strrchr. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <string.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2 (__strrchr, strrchr) +weak_alias (strrchr, rindex) + +#else +# include <string/strrchr.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/strspn-c.c b/sysdeps/s390/multiarch/strspn-c.c new file mode 100644 index 0000000000..8928d3cc24 --- /dev/null +++ b/sysdeps/s390/multiarch/strspn-c.c @@ -0,0 +1,28 @@ +/* Default strspn implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define STRSPN __strspn_c +# ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strspn_c, __GI_strspn, __strspn_c); +# endif /* SHARED */ + +# include <string/strspn.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/strspn-vx.S b/sysdeps/s390/multiarch/strspn-vx.S new file mode 100644 index 0000000000..65d295937a --- /dev/null +++ b/sysdeps/s390/multiarch/strspn-vx.S @@ -0,0 +1,256 @@ +/* Vector optimized 32/64 bit S/390 version of strspn. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* size_t strspn (const char *s, const char * accept) + The strspn() function calculates the length of the initial segment + of s which consists entirely of characters in accept. + + This method checks the length of accept string. If it fits entirely + in one vector register, a fast algorithm is used, which does not need + to check multiple parts of accept-string. Otherwise a slower full + check of accept-string is used. + + register overview: + r3: pointer to start of accept-string + r2: pointer to start of search-string + r4: loaded byte count of vl search-string + r0: found byte index + r1: current return len of s + v16: search-string + v17: accept-string + v18: temp-vreg + + ONLY FOR SLOW: + v19: first accept-string + v20: zero for preparing acc-vector + v21: global mask; 1 indicates a match between + search-string-vreg and any accept-character + v22: current mask; 1 indicates a match between + search-string-vreg and any accept-character in current acc-vreg + v30, v31: for re-/storing registers r6, r8, r9 + r5: current len of accept-string + r6: zero-index in search-string or 16 if no zero + or min(zero-index, loaded byte count) + r8: >0, if former accept-string-part contains a zero, + otherwise =0; + r9: loaded byte count of vlbb accept-string +*/ +ENTRY(__strspn_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + /* + Check if accept-string fits in one vreg: + ---------------------------------------- + */ + vlbb %v17,0(%r3),6 /* Load accept. */ + lcbb %r4,0(%r3),6 + jo .Lcheck_onbb /* Special case if accept lays + on block-boundary. */ +.Lcheck_notonbb: + vistrbs %v17,%v17 /* Fill with zeros after first zero. */ + je .Lfast /* Zero found -> accept fits in one vreg. */ + j .Lslow /* No zero -> accept exceeds one vreg. */ + +.Lcheck_onbb: + /* Accept lays on block-boundary. */ + vfenezb %v18,%v17,%v17 /* Search zero in loaded accept bytes. */ + vlgvb %r0,%v18,7 /* Get index of zero or 16 if not found. */ + clrjl %r0,%r4,.Lcheck_notonbb /* Zero index < loaded bytes count -> + Accept fits in one vreg; + Fill with zeros and proceed + with FAST. */ + vl %v17,0(%r3) /* Load accept, which exceeds loaded bytes. */ + j .Lcheck_notonbb /* Check if accept fits in one vreg. */ + + + /* + Search s for accept in one vreg + ------------------------------- + */ +.Lfast: + /* Complete accept-string is in v17 and remaining bytes are zero. */ + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + vfaezbs %v16,%v16,%v17,8 /* Find first element in v16 + unequal to any in v17 + or first zero element. */ + vlgvb %r0,%v16,7 /* Load byte index of found element. */ + /* If found index is within loaded bytes (%r0 < %r1), + return with found element index (=equal count). */ + clr %r0,%r1 + locgrl %r2,%r0 + blr %r14 + + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r1,16 /* current_len = 16. */ + slr %r1,%r4 /* Compute bytes to 16bytes boundary. */ + +.Lfast_loop: + vl %v16,0(%r1,%r2) /* Load search-string. */ + vfaezbs %v16,%v16,%v17,8 /* Find first element in v16 + unequal to any in v17 + or first zero element. */ + jno .Lfast_loop_found + vl %v16,16(%r1,%r2) + vfaezbs %v16,%v16,%v17,8 + jno .Lfast_loop_found16 + vl %v16,32(%r1,%r2) + vfaezbs %v16,%v16,%v17,8 + jno .Lfast_loop_found32 + vl %v16,48(%r1,%r2) + vfaezbs %v16,%v16,%v17,8 + jno .Lfast_loop_found48 + + aghi %r1,64 + j .Lfast_loop /* Loop if no element was unequal to accept + and not zero. */ + + /* Found unequal or zero element. */ +.Lfast_loop_found48: + aghi %r1,16 +.Lfast_loop_found32: + aghi %r1,16 +.Lfast_loop_found16: + aghi %r1,16 +.Lfast_loop_found: + vlgvb %r0,%v16,7 /* Load byte index of found element. */ + algrk %r2,%r1,%r0 /* And add it to current len. */ + br %r14 + + + /* + Search s for accept in multiple vregs + ------------------------------------- + */ +.Lslow: + /* Save registers. */ + vlvgg %v30,%r6,0 + vlvgp %v31,%r8,%r9 + lghi %r1,0 /* current_len = 0. */ + + /* Accept in v17 without zero. */ + vlr %v19,%v17 /* Save first acc-part for a fast reload. */ + vzero %v20 /* Zero for preparing acc-vector. */ + + /* Align s to 16 byte. */ + risbg %r0,%r2,60,128+63,0 /* Test if s is aligned and + %r0 = bits 60-63 'and' 15 */ + je .Lslow_loop_str /* If s is aligned, loop aligned */ + lghi %r4,15 + slr %r4,%r0 /* Compute highest index to load (15-x). */ + vll %v16,%r4,0(%r2) /* Load up to 16byte boundary (vll needs + highest index, left bytes are 0). */ + ahi %r4,1 /* Work with loaded byte count. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of accept-string to zero. */ + vfenezb %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first accept-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 + if there is no zero. */ + clr %r4,%r6 /* cc==1 if loaded byte count < zero-index. */ + locrl %r6,%r4 /* Load on cc==1. */ + j .Lslow_loop_acc + + /* Process s in 16byte aligned loop. */ +.Lslow_next_str: + vlr %v17,%v19 /* Load first part of accept (no zero). */ + algfr %r1,%r4 /* Add loaded byte count to current len. */ +.Lslow_loop_str: + vl %v16,0(%r1,%r2) /* Load search-string. */ + lghi %r4,16 /* Loaded byte count is 16. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of accept-string to zero. */ + vfenezb %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first accept-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ + +.Lslow_loop_acc: + vfaeb %v22,%v16,%v17,4 /* Create matching-mask (1 in mask -> + character matches any accepted character in + this accept-string-part) IN=0, RT=1. */ + vo %v21,%v21,%v22 /* global-mask = global- | matching-mask. */ + vfenezb %v18,%v21,%v21 /* Find first zero in global-mask. */ + vlgvb %r0,%v18,7 /* Get first found zero-index + (= first mismatch). */ + clrjl %r0,%r6,.Lslow_next_acc /* Mismatch-index < min(lbc,zero-index) + -> Process this string-part + with next acc-part. */ + clrjhe %r0,%r4,.Lslow_next_str /* Found-index >= loaded byte count + -> All loaded bytes are matching + any accept-character + and are not zero. */ + /* All bytes are matching any characters in accept-string + and search-string is fully processed (found-index == zero-index) */ +.Lslow_add_lbc_end: + algrk %r2,%r1,%r0 /* Add matching characters to current_len. */ + /* Restore registers. */ + vlgvg %r6,%v30,0 + vlgvg %r8,%v31,0 + vlgvg %r9,%v31,1 + br %r14 + + + +.Lslow_next_acc: + clijh %r8,0,.Lslow_add_lbc_end /* There was a zero in last acc-part + -> Add found index to current len + and end. */ + vlbb %v17,16(%r5,%r3),6 /* Load next accept part. */ + aghi %r5,16 /* Add current_len of accept-string. */ + lcbb %r9,0(%r5,%r3),6 /* Get loaded byte count of accept-string. */ + jo .Lslow_next_acc_onbb /* Jump away if accept-string is + on block-boundary. */ +.Lslow_next_acc_notonbb: + vistrbs %v17,%v17 /* Fill with zeros after first zero. */ + jo .Lslow_loop_acc /* No zero found -> no preparation needed. */ + +.Lslow_next_acc_prepare_zero: + /* Zero in accept-part: fill zeros with first-accept-character. */ + vlgvb %r8,%v17,0 /* Load first element of acc-part. */ + clije %r8,0,.Lslow_add_lbc_end /* End if zero is first character + in this part of accept-string. */ + /* r8>0 -> zero found in this acc-part. */ + vrepb %v18,%v17,0 /* Replicate first char accross all chars. */ + vceqb %v22,%v20,%v17 /* Create a mask (v22) of null chars + by comparing with 0 (v20). */ + vsel %v17,%v18,%v17,%v22 /* Replace null chars with first char. */ + j .Lslow_loop_acc /* Accept part is prepared -> process. */ + +.Lslow_next_acc_onbb: + vfenezb %v18,%v17,%v17 /* Find zero in loaded bytes of accept part. */ + vlgvb %r8,%v18,7 /* Load byte index of zero. */ + clrjl %r8,%r9,.Lslow_next_acc_notonbb /* Found a zero in loaded bytes + -> Prepare vr. */ + vl %v17,0(%r5,%r3) /* Load over boundary ... */ + lghi %r8,0 /* r8=0 -> no zero in this part of acc, + Check for zero is in jump-target. */ + j .Lslow_next_acc_notonbb /* ... and search for zero in + fully loaded vreg again. */ +END(__strspn_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/strspn.c b/sysdeps/s390/multiarch/strspn.c new file mode 100644 index 0000000000..7c26af8ced --- /dev/null +++ b/sysdeps/s390/multiarch/strspn.c @@ -0,0 +1,27 @@ +/* Multiple versions of strspn. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <string.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2 (__strspn, strspn) + +#else +# include <string/strspn.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/wcpcpy-c.c b/sysdeps/s390/multiarch/wcpcpy-c.c new file mode 100644 index 0000000000..b4849a3321 --- /dev/null +++ b/sysdeps/s390/multiarch/wcpcpy-c.c @@ -0,0 +1,25 @@ +/* Default wcslen implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCPCPY __wcpcpy_c + +# include <wchar.h> +extern __typeof (__wcpcpy) __wcpcpy_c; +# include <wcsmbs/wcpcpy.c> +#endif diff --git a/sysdeps/s390/multiarch/wcpcpy-vx.S b/sysdeps/s390/multiarch/wcpcpy-vx.S new file mode 100644 index 0000000000..8a466c6a37 --- /dev/null +++ b/sysdeps/s390/multiarch/wcpcpy-vx.S @@ -0,0 +1,114 @@ +/* Vector optimized 32/64 bit S/390 version of wcpcpy. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* wchar_t * wcpcpy (const wchar_t *dest, const wchar_t *src) + Copy string src to dest returning a pointer to its end. + + Register usage: + -r0=border-len for switching to vector-instructions + -r1=tmp + -r2=dest and return value + -r3=src + -r4=tmp + -r5=current_len + -v16=part of src + -v17=index of zero + -v18=part of src +*/ +ENTRY(__wcpcpy_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + vlbb %v16,0(%r3),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r3),6 /* Get bytes to 4k-byte boundary or 16. */ + + tmll %r3,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + vfenezf %v17,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r5,%v17,7 /* Load zero index or 16 if not found. */ + clrjl %r5,%r1,.Lfound_align /* If found zero within loaded bytes, + copy bytes before and return. */ + + /* Align s to 16 byte. */ + risbgn %r4,%r3,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,15 /* current_len = 15. */ + slr %r5,%r4 /* Compute highest index to 16byte boundary. */ + + vstl %v16,%r5,0(%r2) /* Copy loaded characters - no zero. */ + ahi %r5,1 /* Start loop at next character. */ + + /* Find zero in 16byte aligned loop. */ +.Lloop: + vl %v16,0(%r5,%r3) /* Load s. */ + vfenezfs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound_v16_0 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Store previous part without zero to dst. */ + vfenezfs %v17,%v18,%v18 + je .Lfound_v18_16 + vl %v16,32(%r5,%r3) + vst %v18,16(%r5,%r2) + vfenezfs %v17,%v16,%v16 + je .Lfound_v16_32 + vl %v18,48(%r5,%r3) + vst %v16,32(%r5,%r2) + vfenezfs %v17,%v18,%v18 + je .Lfound_v18_48 + vst %v18,48(%r5,%r2) + + aghi %r5,64 + j .Lloop /* No zero found -> loop. */ + +.Lfound_v16_32: + aghi %r5,32 +.Lfound_v16_0: + la %r3,0(%r5,%r2) + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + aghi %r1,3 /* Also copy remaining bytes of zero. */ + vstl %v16,%r1,0(%r3) /* Copy characters including zero. */ + lay %r2,-3(%r1,%r3) /* Return pointer to zero. */ + br %r14 + +.Lfound_v18_48: + aghi %r5,32 +.Lfound_v18_16: + la %r3,16(%r5,%r2) + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + aghi %r1,3 /* Also copy remaining bytes of zero. */ + vstl %v18,%r1,0(%r3) /* Copy characters including zero. */ + lay %r2,-3(%r1,%r3) /* Return pointer to zero. */ + br %r14 + +.Lfound_align: + aghi %r5,3 /* Also copy remaining bytes of zero. */ + vstl %v16,%r5,0(%r2) /* Copy characters including zero. */ + lay %r2,-3(%r5,%r2) /* Return pointer to zero. */ + br %r14 + +.Lfallback: + jg __wcpcpy_c +END(__wcpcpy_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wcpcpy.c b/sysdeps/s390/multiarch/wcpcpy.c new file mode 100644 index 0000000000..8afd98d7d4 --- /dev/null +++ b/sysdeps/s390/multiarch/wcpcpy.c @@ -0,0 +1,28 @@ +/* Multiple versions of wcpcpy. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc (__wcpcpy) +weak_alias (__wcpcpy, wcpcpy) + +#else +# include <wcsmbs/wcpcpy.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/wcpncpy-c.c b/sysdeps/s390/multiarch/wcpncpy-c.c new file mode 100644 index 0000000000..86db27b525 --- /dev/null +++ b/sysdeps/s390/multiarch/wcpncpy-c.c @@ -0,0 +1,25 @@ +/* Default wcsncpy implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCPNCPY __wcpncpy_c + +# include <wchar.h> +extern __typeof (__wcpncpy) __wcpncpy_c; +# include <wcsmbs/wcpncpy.c> +#endif diff --git a/sysdeps/s390/multiarch/wcpncpy-vx.S b/sysdeps/s390/multiarch/wcpncpy-vx.S new file mode 100644 index 0000000000..ca0203f451 --- /dev/null +++ b/sysdeps/s390/multiarch/wcpncpy-vx.S @@ -0,0 +1,222 @@ +/* Vector optimized 32/64 bit S/390 version of wcpncpy. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* wchar_t * wcpncpy (wchar_t *dest, const wchar_t *src, size_t n) + Copies at most n characters of string src to dest + returning a pointer to its end or dest+n + if src is smaller than n. + + Register usage: + -%r0 = return value + -%r1 = zero byte index + -%r2 = curr dst pointer + -%r3 = curr src pointer + -%r4 = n + -%r5 = current_len + -%r6 = loaded bytes + -%r7 = border, tmp +*/ +ENTRY(__wcpncpy_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r4,%r4 +# endif /* !defined __s390x__ */ + + clgfi %r4,0 + ber %r14 /* Nothing to do, if n == 0. */ + + vlbb %v16,0(%r3),6 /* Load s until next 4k-byte boundary. */ + + tmll %r3,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + vlvgp %v31,%r6,%r7 /* Save registers. */ + lghi %r5,0 /* current_len = 0. */ + + lcbb %r6,0(%r3),6 /* Get bytes to 4k-byte boundary or 16. */ + llgfr %r6,%r6 /* Convert 32bit to 64bit. */ + + /* Check range of maxlen and convert to byte-count. */ +# ifdef __s390x__ + tmhh %r4,49152 /* Test bit 0 or 1 of maxlen. */ + lghi %r1,-4 /* Max byte-count is 18446744073709551612. */ +# else + tmlh %r4,49152 /* Test bit 0 or 1 of maxlen. */ + llilf %r1,4294967292 /* Max byte-count is 4294967292. */ +# endif /* !__s390x__ */ + sllg %r4,%r4,2 /* Convert character-count to byte-count. */ + locgrne %r4,%r1 /* Use max byte-count, if bit 0/1 was one. */ + + la %r0,0(%r4,%r2) /* Save destination pointer + n for return. */ + + clgrjle %r4,%r6,.Lremaining_v16 /* If n <= loaded-bytes + -> process remaining. */ + + /* n > loaded-byte-count */ + vfenezf %v17,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r1,%v17,7 /* Load zero index or 16 if not found. */ + aghi %r1,3 /* Also copy remaining bytes of zero. */ + clrjl %r1,%r6,.Lfound_v16_store /* Found zero within loaded bytes, + copy and return. */ + + /* Align s to 16 byte. */ + risbgn %r7,%r3,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,15 /* current_len = 15. */ + slr %r5,%r7 /* Compute highest index to 16byte boundary. */ + + /* Zero not found and n > loaded-byte-count. */ + vstl %v16,%r5,0(%r2) /* Copy loaded characters - no zero. */ + ahi %r5,1 /* Start loop at next character. */ + + /* Now we are 16byte aligned, so we can load a full vreg + without page fault. */ + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r4,.Lloop64 + + vl %v16,0(%r5,%r3) /* Load s. */ + clgijl %r4,17,.Lremaining_v16 /* If n <=16, + process remaining bytes. */ +.Llt64: + lgr %r7,%r4 + slgfi %r7,16 /* border_len = n - 16. */ + + clgrjhe %r5,%r7,.Lremaining_v16 /* If current_len >= border + then process remaining bytes. */ + vfenezfs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound_v16 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Store previous part without zero to dst. */ + aghi %r5,16 + + clgrjhe %r5,%r7,.Lremaining_v18 + vfenezfs %v17,%v18,%v18 + je .Lfound_v18 + vl %v16,16(%r5,%r3) + vst %v18,0(%r5,%r2) + aghi %r5,16 + + clgrjhe %r5,%r7,.Lremaining_v16 + vfenezfs %v17,%v16,%v16 + je .Lfound_v16 + vl %v18,16(%r5,%r3) + vst %v16,0(%r5,%r2) + aghi %r5,16 + +.Lremaining_v18: + vlr %v16,%v18 +.Lremaining_v16: + /* v16 contains the remaining bytes [1...16]. + Store remaining bytes and append string-termination. */ + vfenezf %v17,%v16,%v16 /* Find element not equal with zero search. */ + slgrk %r7,%r4,%r5 /* Remaining bytes = maxlen - current_len */ + aghi %r7,-1 /* vstl needs highest index. */ + la %r2,0(%r5,%r2) /* vstl has no index register. */ + vlgvb %r1,%v17,7 /* Load zero index or 16 if not found. */ + aghi %r1,3 /* Also copy remaining bytes of zero. */ + /* Zero in remaining bytes? -> jump away (zero-index <= max-index). */ + clrjle %r1,%r7,.Lfound_v16_store + vstl %v16,%r7,0(%r2) /* Store remaining bytes without null + termination! */ +.Lend: + /* Restore saved registers. */ + vlgvg %r6,%v31,0 + vlgvg %r7,%v31,1 + lgr %r2,%r0 /* Load saved dest-ptr. */ + br %r14 + +.Lfound_v16_32: + aghi %r5,32 + j .Lfound_v16 +.Lfound_v18_48: + aghi %r5,32 +.Lfound_v18_16: + aghi %r5,16 +.Lfound_v18: + vlr %v16,%v18 +.Lfound_v16: + /* v16 contains a zero. Store remaining bytes to zero. current_len + has not reached border, thus checking for n is not needed! */ + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + la %r2,0(%r5,%r2) /* vstl has no support for index-register. */ + aghi %r1,3 /* Also copy remaining bytes of zero. */ +.Lfound_v16_store: + vstl %v16,%r1,0(%r2) /* Copy characters including zero. */ + /* Fill remaining bytes with zero - remaining byte count always > 0. */ + algr %r5,%r1 /* Remaining bytes (=%r4) = ... */ + slgr %r4,%r5 /* = n - (currlen + zero_index + 1) */ + la %r2,0(%r1,%r2) /* Pointer to zero. start filling beyond. */ + lay %r0,-3(%r2) /* Save return-pointer to found zero. */ + clgije %r4,1,.Lend /* Skip zero-filling, if found-zero is last + possible character. + (1 is substracted from r4 below!). */ + aghi %r4,-2 /* mvc with exrl needs count - 1. + (additional -1, see remaining bytes above) */ + srlg %r6,%r4,8 /* Split into 256 byte blocks. */ + ltgr %r6,%r6 + je .Lzero_lt256 +.Lzero_loop256: + mvc 1(256,%r2),0(%r2) /* Fill 256 zeros at once. */ + la %r2,256(%r2) + brctg %r6,.Lzero_loop256 /* Loop until all blocks are processed. */ +.Lzero_lt256: + exrl %r4,.Lmvc_lt256 + j .Lend +.Lmvc_lt256: + mvc 1(1,%r2),0(%r2) + + /* Find zero in 16byte aligned loop. */ +.Lloop64: + vl %v16,0(%r5,%r3) + vfenezfs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound_v16 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Store previous part without zero to dst. */ + vfenezfs %v17,%v18,%v18 + je .Lfound_v18_16 + vl %v16,32(%r5,%r3) + vst %v18,16(%r5,%r2) + vfenezfs %v17,%v16,%v16 + je .Lfound_v16_32 + vl %v18,48(%r5,%r3) + vst %v16,32(%r5,%r2) + vfenezfs %v17,%v18,%v18 + je .Lfound_v18_48 + vst %v18,48(%r5,%r2) + + aghi %r5,64 + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r4,.Lloop64 + + vl %v16,0(%r5,%r3) /* Load s. */ + j .Llt64 + +.Lfallback: + jg __wcpncpy_c +END(__wcpncpy_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wcpncpy.c b/sysdeps/s390/multiarch/wcpncpy.c new file mode 100644 index 0000000000..13bc543a8a --- /dev/null +++ b/sysdeps/s390/multiarch/wcpncpy.c @@ -0,0 +1,28 @@ +/* Multiple versions of wcpncpy. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc (__wcpncpy) +weak_alias (__wcpncpy, wcpncpy) + +#else +# include <wcsmbs/wcpncpy.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/wcscat-c.c b/sysdeps/s390/multiarch/wcscat-c.c new file mode 100644 index 0000000000..bceec55408 --- /dev/null +++ b/sysdeps/s390/multiarch/wcscat-c.c @@ -0,0 +1,25 @@ +/* Default wcscat implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCSCAT __wcscat_c + +# include <wchar.h> +extern __typeof (__wcscat) __wcscat_c; +# include <wcsmbs/wcscat.c> +#endif diff --git a/sysdeps/s390/multiarch/wcscat-vx.S b/sysdeps/s390/multiarch/wcscat-vx.S new file mode 100644 index 0000000000..8353caafa9 --- /dev/null +++ b/sysdeps/s390/multiarch/wcscat-vx.S @@ -0,0 +1,175 @@ +/* Vector optimized 32/64 bit S/390 version of wcscat. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* wchar_t * wcscat (wchar_t *dest, const wchar_t *src) + Concatenate two strings. + + Register usage: + -r0=saved dest pointer for return + -r1=tmp + -r2=dest + -r3=src + -r4=tmp + -r5=current_len + -v16=part of src + -v17=index of zero + -v18=part of src +*/ +ENTRY(__wcscat_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + /* __wcslen_c can handle non 4byte aligned pointers, + but __wcscpy_c not. Thus if either src or dest is + not 4byte aligned, use __wcscat_c. */ + tmll %r2,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + tmll %r3,3 /* Test if src is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + lgr %r0,%r2 /* Save destination pointer for return. */ + + /* WCSLEN + r1 = loaded bytes (tmp) + r4 = zero byte index (tmp) + r2 = dst + */ + + vfenezf %v16,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r5,%v16,7 /* Load zero index or 16 if not found. */ + clrjl %r5,%r1,.Llen_end /* Found zero within loaded bytes, end. */ + + /* Align s to 16 byte. */ + risbgn %r1,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,16 /* current_len = 16. */ + slr %r5,%r1 /* Compute bytes to 16bytes boundary. */ + + /* Find zero in 16byte aligned loop. */ +.Llen_loop: + vl %v16,0(%r5,%r2) /* Load s. */ + vfenezfs %v16,%v16,%v16 /* Find element not equal with zero search. */ + je .Llen_found /* Jump away if zero was found. */ + vl %v16,16(%r5,%r2) + vfenezfs %v16,%v16,%v16 + je .Llen_found16 + vl %v16,32(%r5,%r2) + vfenezfs %v16,%v16,%v16 + je .Llen_found32 + vl %v16,48(%r5,%r2) + vfenezfs %v16,%v16,%v16 + je .Llen_found48 + + aghi %r5,64 + j .Llen_loop /* No zero -> loop. */ + +.Llen_found48: + aghi %r5,16 +.Llen_found32: + aghi %r5,16 +.Llen_found16: + aghi %r5,16 +.Llen_found: + vlgvb %r4,%v16,7 /* Load byte index of zero. */ + algr %r5,%r4 + +.Llen_end: + /* WCSCPY + %r1 = loaded bytes (tmp) + %r4 = zero byte index (tmp) + %r3 = curr src pointer + %r2 = curr dst pointer + */ + la %r2,0(%r5,%r2) /* strcpy at end of dst-string. */ + + vlbb %v16,0(%r3),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r3),6 /* Get bytes to 4k-byte boundary or 16. */ + + vfenezf %v17,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r5,%v17,7 /* Load zero index or 16 if not found. */ + clrjl %r5,%r1,.Lcpy_found_align /* If found zero within loaded bytes, + copy bytes before and return. */ + + /* Align s to 16 byte. */ + risbgn %r4,%r3,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,15 /* current_len = 15. */ + slr %r5,%r4 /* Compute highest index to 16byte boundary. */ + + vstl %v16,%r5,0(%r2) /* Copy loaded characters - no zero. */ + ahi %r5,1 /* Start loop at next character. */ + + /* Find zero in 16byte aligned loop. */ +.Lcpy_loop: + vl %v16,0(%r5,%r3) /* Load s. */ + vfenezfs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lcpy_found_v16_0 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Save previous part without zero to dst. */ + vfenezfs %v17,%v18,%v18 + je .Lcpy_found_v18_16 + vl %v16,32(%r5,%r3) + vst %v18,16(%r5,%r2) + vfenezfs %v17,%v16,%v16 + je .Lcpy_found_v16_32 + vl %v18,48(%r5,%r3) + vst %v16,32(%r5,%r2) + vfenezfs %v17,%v18,%v18 + je .Lcpy_found_v18_48 + vst %v18,48(%r5,%r2) + + aghi %r5,64 + j .Lcpy_loop /* No zero -> loop. */ + +.Lcpy_found_v16_32: + aghi %r5,32 +.Lcpy_found_v16_0: + la %r4,0(%r5,%r2) + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + aghi %r1,3 /* Also copy remaining bytes of zero. */ + vstl %v16,%r1,0(%r4) /* Copy characters including zero. */ + lgr %r2,%r0 /* Load saved dest-ptr. */ + br %r14 + +.Lcpy_found_v18_48: + aghi %r5,32 +.Lcpy_found_v18_16: + la %r4,16(%r5,%r2) + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + aghi %r1,3 /* Also copy remaining bytes of zero. */ + vstl %v18,%r1,0(%r4) /* Copy characters including zero. */ + lgr %r2,%r0 /* Load saved dest-ptr. */ + br %r14 + +.Lcpy_found_align: + aghi %r5,3 /* Also copy remaining bytes of found zero. */ + vstl %v16,%r5,0(%r2) /* Copy characters including zero. */ + lgr %r2,%r0 /* Load saved dest-ptr. */ + br %r14 +.Lfallback: + jg __wcscat_c +END(__wcscat_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wcscat.c b/sysdeps/s390/multiarch/wcscat.c new file mode 100644 index 0000000000..8d71c2f1b9 --- /dev/null +++ b/sysdeps/s390/multiarch/wcscat.c @@ -0,0 +1,28 @@ +/* Multiple versions of wcscat. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc (__wcscat) +weak_alias (__wcscat, wcscat) + +#else +# include <wcsmbs/wcscat.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/wcschr-c.c b/sysdeps/s390/multiarch/wcschr-c.c new file mode 100644 index 0000000000..9ba1d5f861 --- /dev/null +++ b/sysdeps/s390/multiarch/wcschr-c.c @@ -0,0 +1,37 @@ +/* Default wcschr implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCSCHR __wcschr_c + +# include <wchar.h> +extern __typeof (__wcschr) __wcschr_c; +# undef weak_alias +# define weak_alias(name, alias) +# ifdef SHARED +# undef libc_hidden_def +# define libc_hidden_def(name) \ + __hidden_ver1 (__wcschr_c, __GI_wcschr, __wcschr_c); \ + strong_alias (__wcschr_c, __wcschr_c_1); \ + __hidden_ver1 (__wcschr_c_1, __GI___wcschr, __wcschr_c_1); +# undef libc_hidden_weak +# define libc_hidden_weak(name) +# endif /* SHARED */ + +# include <wcsmbs/wcschr.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wcschr-vx.S b/sysdeps/s390/multiarch/wcschr-vx.S new file mode 100644 index 0000000000..ff7d1c4b4e --- /dev/null +++ b/sysdeps/s390/multiarch/wcschr-vx.S @@ -0,0 +1,103 @@ +/* Vector optimized 32/64 bit S/390 version of wcschr. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* wchar_t *wcschr (const wchar_t *s, wchar_t c) + Locate character in string. + + Register usage: + -r1=tmp + -r2=s + -r3=c + -r4=tmp + -r5=current_len + -v16=part of s + -v17=index of unequal + -v18=replicated c +*/ +ENTRY(__wcschr_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + tmll %r2,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + lghi %r5,0 /* current_len = 0. */ + + vlvgf %v18,%r3,0 /* Generate vector which elements are all c. */ + vrepf %v18,%v18,0 + + vfeezfs %v16,%v16,%v18 /* Find element equal with zero search. */ + vlgvb %r4,%v16,7 /* Load byte index of character or zero. */ + clrjl %r4,%r1,.Lfound /* Return if c/zero is in loaded bytes. */ + + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,16 /* current_len = 16. */ + slr %r5,%r4 /* Compute bytes to 16bytes boundary. */ + + /* Find c/zero in 16byte aligned loop */ +.Lloop: + vl %v16,0(%r5,%r2) /* Load s. */ + vfeezfs %v16,%v16,%v18 /* Find element equal with zero search. */ + jno .Lfound /* Found c/zero (cc=0|1|2). */ + vl %v16,16(%r5,%r2) + vfeezfs %v16,%v16,%v18 + jno .Lfound16 + vl %v16,32(%r5,%r2) + vfeezfs %v16,%v16,%v18 + jno .Lfound32 + vl %v16,48(%r5,%r2) + vfeezfs %v16,%v16,%v18 + jno .Lfound48 + + aghi %r5,64 + j .Lloop /* No character and no zero -> loop. */ + +.Lfound48: + la %r5,16(%r5) /* Use la since aghi would clobber cc. */ +.Lfound32: + la %r5,16(%r5) +.Lfound16: + la %r5,16(%r5) +.Lfound: + je .Lzero /* Found zero, but no c before that zero. */ + +.Lcharacter: + vlgvb %r4,%v16,7 /* Load byte index of character. */ + algr %r5,%r4 + la %r2,0(%r5,%r2) /* Return pointer to character. */ + br %r14 + +.Lzero: + clije %r3,0,.Lcharacter /* Found zero and c is zero. */ + lghi %r2,0 /* Return null if character not found. */ + br %r14 +.Lfallback: + jg __wcschr_c +END(__wcschr_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wcschr.c b/sysdeps/s390/multiarch/wcschr.c new file mode 100644 index 0000000000..fb51097cd6 --- /dev/null +++ b/sysdeps/s390/multiarch/wcschr.c @@ -0,0 +1,29 @@ +/* Multiple versions of wcschr. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc (__wcschr) +weak_alias (__wcschr, wcschr) +libc_hidden_weak (wcschr) + +#else +# include <wcsmbs/wcschr.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/wcschrnul-c.c b/sysdeps/s390/multiarch/wcschrnul-c.c new file mode 100644 index 0000000000..bbee3288fe --- /dev/null +++ b/sysdeps/s390/multiarch/wcschrnul-c.c @@ -0,0 +1,25 @@ +/* Default wcschrnul implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCSCHRNUL __wcschrnul_c + +# include <wchar.h> +extern __typeof (__wcschrnul) __wcschrnul_c; +# include <wcsmbs/wcschrnul.c> +#endif diff --git a/sysdeps/s390/multiarch/wcschrnul-vx.S b/sysdeps/s390/multiarch/wcschrnul-vx.S new file mode 100644 index 0000000000..e54e48d894 --- /dev/null +++ b/sysdeps/s390/multiarch/wcschrnul-vx.S @@ -0,0 +1,97 @@ +/* Vector optimized 32/64 bit S/390 version of wcschrnul. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* wchar_t* wcschrnul (const wchar_t *s, wchar_t c) + Returns pointer to first c or to \0 if c not found. + + Register usage: + -r1=tmp + -r2=s and return pointer + -r3=c + -r4=tmp + -r5=current_len + -v16=part of s + -v18=vector with c replicated in every byte +*/ +ENTRY(__wcschrnul_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + tmll %r2,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + lghi %r5,0 /* current_len = 0. */ + + vlvgf %v18,%r3,0 /* Generate vector which elements are all c. */ + vrepf %v18,%v18,0 + + vfeezfs %v16,%v16,%v18 /* Find element equal with zero search. */ + vlgvb %r4,%v16,7 /* Load byte index of character or zero. */ + clrjl %r4,%r1,.Lfound /* Return if c/zero is in loaded bytes. */ + + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,16 /* current_len = 16. */ + slr %r5,%r4 /* Compute bytes to 16bytes boundary. */ + + /* Find c/zero in 16byte aligned loop */ +.Lloop: + vl %v16,0(%r5,%r2) /* Load s. */ + vfeezfs %v16,%v16,%v18 /* Find element equal with zero search. */ + jno .Lfound /* Found c/zero (cc=0|1|2). */ + vl %v16,16(%r5,%r2) + vfeezfs %v16,%v16,%v18 + jno .Lfound16 + vl %v16,32(%r5,%r2) + vfeezfs %v16,%v16,%v18 + jno .Lfound32 + vl %v16,48(%r5,%r2) + vfeezfs %v16,%v16,%v18 + jno .Lfound48 + + aghi %r5,64 + j .Lloop /* No character and no zero -> loop. */ + + /* Found character or zero */ +.Lfound48: + aghi %r5,16 +.Lfound32: + aghi %r5,16 +.Lfound16: + aghi %r5,16 +.Lfound: + vlgvb %r1,%v16,7 /* Load byte index of character. */ + algr %r5,%r1 + la %r2,0(%r5,%r2) /* Return pointer to character. */ + +.Lend: + br %r14 +.Lfallback: + jg __wcschrnul_c +END(__wcschrnul_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wcschrnul.c b/sysdeps/s390/multiarch/wcschrnul.c new file mode 100644 index 0000000000..7436a596bd --- /dev/null +++ b/sysdeps/s390/multiarch/wcschrnul.c @@ -0,0 +1,28 @@ +/* Multiple versions of wcschrnul. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc (__wcschrnul) +weak_alias (__wcschrnul, wcschrnul) + +#else +# include <wcsmbs/wcschrnul.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/wcscmp-c.c b/sysdeps/s390/multiarch/wcscmp-c.c new file mode 100644 index 0000000000..3add8e4095 --- /dev/null +++ b/sysdeps/s390/multiarch/wcscmp-c.c @@ -0,0 +1,32 @@ +/* Default wcscmp implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCSCMP __wcscmp_c + +# include <wchar.h> +extern __typeof (wcscmp) __wcscmp_c; +# undef weak_alias +# define weak_alias(name, alias) +# ifdef SHARED +# undef libc_hidden_def +# define libc_hidden_def(name) \ + __hidden_ver1 (__wcscmp_c, __GI___wcscmp, __wcscmp_c); +# endif /* SHARED */ +# include <wcsmbs/wcscmp.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wcscmp-vx.S b/sysdeps/s390/multiarch/wcscmp-vx.S new file mode 100644 index 0000000000..549ae3c733 --- /dev/null +++ b/sysdeps/s390/multiarch/wcscmp-vx.S @@ -0,0 +1,131 @@ +/* Vector optimized 32/64 bit S/390 version of wcscmp. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* int wcscmp (const wchar_t *s1, const wchar_t *s2) + Compare two strings + + Register usage: + -r1=loaded byte count s1 + -r2=s1 + -r3=s2 + -r4=loaded byte coutn s2, tmp + -r5=current_len + -v16=part of s1 + -v17=part of s2 + -v18=index of unequal +*/ +ENTRY(__wcscmp_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + lghi %r5,0 /* current_len = 0. */ + +.Lloop: + vlbb %v16,0(%r5,%r2),6 /* Load s1 to block boundary. */ + vlbb %v17,0(%r5,%r3),6 /* Load s2 to block boundary. */ + lcbb %r1,0(%r5,%r2),6 /* Get loaded byte count of s1. */ + jo .Llt16_1 /* Jump away if vr is not fully loaded. */ + lcbb %r4,0(%r5,%r3),6 + jo .Llt16_2 /* Jump away if vr is not fully loaded. */ + /* Both vrs are fully loaded. */ + aghi %r5,16 + vfenezfs %v18,%v16,%v17 /* Compare not equal with zero search. */ + jno .Lfound + + vlbb %v16,0(%r5,%r2),6 + vlbb %v17,0(%r5,%r3),6 + lcbb %r1,0(%r5,%r2),6 + jo .Llt16_1 + lcbb %r4,0(%r5,%r3),6 + jo .Llt16_2 + aghi %r5,16 + vfenezfs %v18,%v16,%v17 + jno .Lfound + + vlbb %v16,0(%r5,%r2),6 + vlbb %v17,0(%r5,%r3),6 + lcbb %r1,0(%r5,%r2),6 + jo .Llt16_1 + lcbb %r4,0(%r5,%r3),6 + jo .Llt16_2 + aghi %r5,16 + vfenezfs %v18,%v16,%v17 + jno .Lfound + + vlbb %v16,0(%r5,%r2),6 + vlbb %v17,0(%r5,%r3),6 + lcbb %r1,0(%r5,%r2),6 + jo .Llt16_1 + lcbb %r4,0(%r5,%r3),6 + jo .Llt16_2 + aghi %r5,16 + vfenezfs %v18,%v16,%v17 + jno .Lfound + j .Lloop + +.Lcmp_one_char: + /* At least one of both strings is not 4-byte aligned + and there is no full character before next block-boundary. + Compare one character to get over the boundary and + proceed with normal loop! */ + vlef %v16,0(%r5,%r2),0 /* Load one character. */ + vlef %v17,0(%r5,%r3),0 + lghi %r1,4 /* Loaded byte count is 4. */ + j .Llt_cmp /* Proceed with comparision. */ + +.Llt16_1: + lcbb %r4,0(%r5,%r3),6 /* Get loaded byte count of s2. */ +.Llt16_2: + clr %r1,%r4 + locrh %r1,%r4 /* Get minimum of bytes loaded in s1/2. */ + nill %r1,65532 /* Align bytes loaded to full characters. */ + jz .Lcmp_one_char /* Jump away if no full char is available. */ +.Llt_cmp: + algfr %r5,%r1 /* Add smallest loaded bytes to current_len. */ + vfenezfs %v18,%v16,%v17 /* Compare not equal with zero search. */ + vlgvb %r4,%v18,7 /* Get not equal index or 16 if all equal. */ + clrjl %r4,%r1,.Lfound /* Jump away if miscompare is within loaded + bytes. */ + j .Lloop + +.Lfound: + /* vfenezf found an unequal element or zero. + This instruction compares unsigned words, but wchar_t is signed. + Thus we have to compare the found element again. */ + vlgvb %r4,%v18,7 /* Extract not equal byte-index, */ + srl %r4,2 /* Convert it to character-index. */ + vlgvf %r3,%v16,0(%r4) /* Load character-values. */ + vlgvf %r4,%v17,0(%r4) + cr %r3,%r4 + je .Lend_equal + lghi %r2,1 + lghi %r1,-1 + locgrl %r2,%r1 + br %r14 +.Lend_equal: + lghi %r2,0 + br %r14 +END(__wcscmp_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wcscmp.c b/sysdeps/s390/multiarch/wcscmp.c new file mode 100644 index 0000000000..705ef4596e --- /dev/null +++ b/sysdeps/s390/multiarch/wcscmp.c @@ -0,0 +1,28 @@ +/* Multiple versions of wcscmp. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc (__wcscmp) +weak_alias (__wcscmp, wcscmp) + +#else +# include <wcsmbs/wcscmp.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/wcscpy-c.c b/sysdeps/s390/multiarch/wcscpy-c.c new file mode 100644 index 0000000000..3450c00048 --- /dev/null +++ b/sysdeps/s390/multiarch/wcscpy-c.c @@ -0,0 +1,25 @@ +/* Default wcscpy implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCSCPY __wcscpy_c + +# include <wchar.h> +extern __typeof (wcscpy) __wcscpy_c; +# include <wcsmbs/wcscpy.c> +#endif diff --git a/sysdeps/s390/multiarch/wcscpy-vx.S b/sysdeps/s390/multiarch/wcscpy-vx.S new file mode 100644 index 0000000000..2077893130 --- /dev/null +++ b/sysdeps/s390/multiarch/wcscpy-vx.S @@ -0,0 +1,111 @@ +/* Vector optimized 32/64 bit S/390 version of wcscpy. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* char * wcscpy (const wchar_t *dest, const wchar_t *src) + Copy string src to dest. + + Register usage: + -r0=border-len for switching to vector-instructions + -r1=tmp + -r2=dest and return value + -r3=src + -r4=tmp + -r5=current_len + -v16=part of src + -v17=index of zero + -v18=part of src +*/ +ENTRY(__wcscpy_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + vlbb %v16,0(%r3),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r3),6 /* Get bytes to 4k-byte boundary or 16. */ + + tmll %r3,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + vfenezf %v17,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r5,%v17,7 /* Load zero index or 16 if not found. */ + clrjl %r5,%r1,.Lfound_align /* If found zero within loaded bytes, + copy bytes before and return. */ + + /* Align s to 16 byte. */ + risbgn %r4,%r3,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,15 /* current_len = 15. */ + slr %r5,%r4 /* Compute highest index to 16byte boundary. */ + + vstl %v16,%r5,0(%r2) /* Copy loaded characters - no zero. */ + ahi %r5,1 /* Start loop at next character. */ + + /* Find zero in 16byte aligned loop. */ +.Lloop: + vl %v16,0(%r5,%r3) /* Load s. */ + vfenezfs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound_v16_0 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Store previous part without zero to dst. */ + vfenezfs %v17,%v18,%v18 + je .Lfound_v18_16 + vl %v16,32(%r5,%r3) + vst %v18,16(%r5,%r2) + vfenezfs %v17,%v16,%v16 + je .Lfound_v16_32 + vl %v18,48(%r5,%r3) + vst %v16,32(%r5,%r2) + vfenezfs %v17,%v18,%v18 + je .Lfound_v18_48 + vst %v18,48(%r5,%r2) + + aghi %r5,64 + j .Lloop /* No zero found -> loop. */ + +.Lfound_v16_32: + aghi %r5,32 +.Lfound_v16_0: + la %r3,0(%r5,%r2) + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + aghi %r1,3 /* Also copy remaining bytes of zero. */ + vstl %v16,%r1,0(%r3) /* Copy characters including zero. */ + br %r14 + +.Lfound_v18_48: + aghi %r5,32 +.Lfound_v18_16: + la %r3,16(%r5,%r2) + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + aghi %r1,3 /* Also copy remaining bytes of zero. */ + vstl %v18,%r1,0(%r3) /* Copy characters including zero. */ + br %r14 + +.Lfound_align: + aghi %r5,3 /* Also copy remaining bytes of zero. */ + vstl %v16,%r5,0(%r2) /* Copy characters including zero. */ + br %r14 + +.Lfallback: + jg __wcscpy_c +END(__wcscpy_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wcscpy.c b/sysdeps/s390/multiarch/wcscpy.c new file mode 100644 index 0000000000..8c5f54910b --- /dev/null +++ b/sysdeps/s390/multiarch/wcscpy.c @@ -0,0 +1,27 @@ +/* Multiple versions of wcscpy. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2 (__wcscpy, wcscpy) + +#else +# include <wcsmbs/wcscpy.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/wcscspn-c.c b/sysdeps/s390/multiarch/wcscspn-c.c new file mode 100644 index 0000000000..e8fd2a53d9 --- /dev/null +++ b/sysdeps/s390/multiarch/wcscspn-c.c @@ -0,0 +1,26 @@ +/* Default wcscscpn implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCSCSPN __wcscspn_c + +# include <wchar.h> +extern __typeof (wcscspn) __wcscspn_c; + +# include <wcsmbs/wcscspn.c> +#endif diff --git a/sysdeps/s390/multiarch/wcscspn-vx.S b/sysdeps/s390/multiarch/wcscspn-vx.S new file mode 100644 index 0000000000..b0b1066658 --- /dev/null +++ b/sysdeps/s390/multiarch/wcscspn-vx.S @@ -0,0 +1,293 @@ +/* Vector optimized 32/64 bit S/390 version of wcscspn. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* size_t wcscspn (const wchar_t *s, const wchar_t * reject) + The wcscspn() function calculates the length of the initial segment + of s which consists entirely of characters not in reject. + + This method checks the length of reject string. If it fits entirely + in one vector register, a fast algorithm is used, which does not need + to check multiple parts of accept-string. Otherwise a slower full + check of accept-string is used. + + register overview: + r3: pointer to start of reject-string + r2: pointer to start of search-string + r0: loaded byte count of vlbb search-string + r4: found byte index + r1: current return len + v16: search-string + v17: reject-string + v18: temp-vreg + + ONLY FOR SLOW: + v19: first reject-string + v20: zero for preparing acc-vector + v21: global mask; 1 indicates a match between + search-string-vreg and any reject-character + v22: current mask; 1 indicates a match between + search-string-vreg and any reject-character in current acc-vreg + v30, v31: for re-/storing registers r6, r8, r9 + r5: current len of reject-string + r6: zero-index in search-string or 16 if no zero + or min(zero-index, loaded byte count) + r8: >0, if former reject-string-part contains a zero, + otherwise =0; + r9: loaded byte count of vlbb reject-string +*/ +ENTRY(__wcscspn_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + tmll %r2,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + /* + Check if reject-string fits in one vreg: + ---------------------------------------- + */ + vlbb %v17,0(%r3),0 /* Load reject. */ + lcbb %r0,0(%r3),0 + jo .Lcheck_onbb /* Special case if reject + lays on block-boundary. */ + +.Lcheck_notonbb: + lghi %r1,0 /* Zero out current len. */ + vistrfs %v17,%v17 /* Fill with zeros after first zero. */ + je .Lfast /* Zero found -> reject fits in one vreg. */ + j .Lslow /* No zero -> reject exceeds one vreg. */ + + +.Lcheck_onbb: + /* Reject lays on block-boundary. */ + nill %r0,65532 /* Recognize only fully loaded characters. */ + je .Lcheck_onbb2 /* Reload vr, if we loaded no full wchar_t. */ + vfenezf %v18,%v17,%v17 /* Search zero in loaded reject bytes. */ + vlgvb %r4,%v18,7 /* Get index of zero or 16 if not found. */ + clrjl %r4,%r0,.Lcheck_notonbb /* Zero index < loaded bytes count -> + Reject fits in one vreg; + Fill with zeros and proceed + with FAST. */ +.Lcheck_onbb2: + vl %v17,0(%r3) /* Load reject, which exceeds loaded bytes. */ + j .Lcheck_notonbb /* Check if reject fits in one vreg. */ + + + /* + Search s for reject in one vreg + ------------------------------- + */ +.Lfast: + /* Complete reject-string in v17 and remaining bytes are zero. */ + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r0,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + vfaezfs %v18,%v16,%v17,0 /* Find first element in v16 + unequal to any in v17 + or first zero element. */ + vlgvb %r4,%v18,7 /* Load byte index of found element. */ + clrjl %r4,%r0,.Lfast_loop_found2 /* If found index is within loaded + bytes, return with found element + index (=equal count). */ + + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r1,16 /* current_len = 16. */ + slr %r1,%r4 /* Compute bytes to 16bytes boundary. */ + + /* Process s in 16byte aligned loop. */ +.Lfast_loop: + vl %v16,0(%r1,%r2) /* Load search-string. */ + vfaezfs %v18,%v16,%v17,0 /* Find first element in v16 equal to any + in v17 or first zero element. */ + jno .Lfast_loop_found + + vl %v16,16(%r1,%r2) + vfaezfs %v18,%v16,%v17,0 + jno .Lfast_loop_found16 + + vl %v16,32(%r1,%r2) + vfaezfs %v18,%v16,%v17,0 + jno .Lfast_loop_found32 + + vl %v16,48(%r1,%r2) + vfaezfs %v18,%v16,%v17,0 + jno .Lfast_loop_found48 + + aghi %r1,64 + j .Lfast_loop /* Loop if no element was unequal to reject + and not zero. */ + + /* Found equal or zero element. */ +.Lfast_loop_found48: + aghi %r1,16 +.Lfast_loop_found32: + aghi %r1,16 +.Lfast_loop_found16: + aghi %r1,16 +.Lfast_loop_found: + vlgvb %r4,%v18,7 /* Load byte index of found element or zero. */ +.Lfast_loop_found2: + algrk %r2,%r1,%r4 /* Add found index to current len. */ + srlg %r2,%r2,2 /* Convert byte-count to character-count. */ + br %r14 + + + + /* + Search s for reject in multiple vregs + ------------------------------------- + */ +.Lslow: + /* Save registers. */ + vlvgg %v30,%r6,0 + vlvgp %v31,%r8,%r9 + + /* Reject in v17 without zero. */ + vlr %v19,%v17 /* Save first acc-part for a fast reload. */ + vzero %v20 /* Zero for preparing acc-vector. */ + vone %v24 /* One for checking result of former + string-part. */ + + /* Align s to 16 byte. */ + risbg %r4,%r2,60,128+63,0 /* Test if s is aligned and + %r4 = bits 60-63 'and' 15. */ + je .Lslow_loop_str /* If s is aligned, loop aligned. */ + lghi %r0,15 + slr %r0,%r4 /* Compute highest index to load (15-x). */ + vll %v16,%r0,0(%r2) /* Load up to 16byte boundary (vll needs + highest index, remaining bytes are 0). */ + ahi %r0,1 /* Work with loaded byte count. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of reject-string to zero. */ + vfenezf %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first reject-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ + clije %r6,0,.Lslow_end /* If first element is zero -> return 0. */ + clr %r0,%r6 /* cc==1 if loaded byte count < zero-index. */ + locrl %r6,%r0 /* Load on cc==1; zero-index = lbc. */ + j .Lslow_loop_acc + + + /* Process s in 16byte aligned loop. */ +.Lslow_next_str: + /* Check results of former processed str-part. */ + vfeef %v18,%v21,%v24 /* Find first equal match in global mask + (ones in element). */ + vlgvb %r4,%v18,7 /* Get index of first one (=equal) or 16. */ + /* Equal-index < min(zero-index, loaded byte count) + -> Return pointer to equal element. */ + clrjl %r4,%r6,.Lslow_index_found + /* Zero-index < loaded byte count + -> Former str-part was last str-part + -> Return null */ + clrjl %r6,%r0,.Lslow_end_not_found + + /* All elements are zero (=no match) -> proceed with next str-part. */ + vlr %v17,%v19 /* Load first part of reject (no zero). */ + algfr %r1,%r0 /* Add loaded byte count to current len. */ + +.Lslow_loop_str: + vl %v16,0(%r1,%r2) /* Load search-string. */ + lghi %r0,16 /* Loaded byte count is 16. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of reject to zero. */ + vfenezf %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first reject-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ + clije %r6,0,.Lslow_end /* If first element is zero (end of string) + -> Return current length. */ + +.Lslow_loop_acc: + vfaef %v22,%v16,%v17,4 /* Create matching-mask (1 in mask -> + Character matches any rejected character in + this reject-string-part) IN=0, RT=1. */ + vlgvf %r4,%v22,0 /* Get result of first element. */ + /* First element is equal to any rejected characters? + (All other parts of reject cannot lead to a match before this one) + -> Return current len, which is pointing to this element. */ + clijh %r4,0,.Lslow_end + vo %v21,%v21,%v22 /* Global-mask = global-|matching-mask. */ + /* Proceed with next acc until end of acc is reached. */ + + +.Lslow_next_acc: + clijh %r8,0,.Lslow_next_str /* There was a zero in last reject-part + -> Add found index to current len + and end. */ + vlbb %v17,16(%r5,%r3),6 /* Load next reject part. */ + aghi %r5,16 /* Increment current len of reject-string. */ + lcbb %r9,0(%r5,%r3),6 /* Get loaded byte count of reject-string. */ + jo .Lslow_next_acc_onbb /* Jump away if reject-string is + on block-boundary. */ +.Lslow_next_acc_notonbb: + vistrfs %v17,%v17 /* Fill with zeros after first zero. */ + jo .Lslow_loop_acc /* No zero found -> no preparation needed. */ + +.Lslow_next_acc_prepare_zero: + /* Zero in reject-part: fill zeros with first-reject-character. */ + vlgvf %r8,%v17,0 /* Load first element of reject-part. */ + clije %r8,0,.Lslow_next_str /* Process next str-part if first + character in this part of reject + is a zero. */ + /* r8>0 -> zero found in this acc-part. */ + vrepf %v18,%v17,0 /* Replicate first char accross all chars. */ + vceqf %v22,%v20,%v17 /* Create a mask (v22) of null chars + by comparing with 0 (v20). */ + vsel %v17,%v18,%v17,%v22 /* Replace null chars with first char. */ + j .Lslow_loop_acc /* Reject-string part is prepared. */ + +.Lslow_next_acc_onbb: + nill %r9,65532 /* Recognize only fully loaded characters. */ + je .Lslow_next_acc_onbb2 /* Reload vr, if no full wchar_t + loaded. */ + vfenezf %v18,%v17,%v17 /* Find zero in loaded bytes of reject part. */ + vlgvb %r8,%v18,7 /* Load byte index of zero. */ + clrjl %r8,%r9,.Lslow_next_acc_notonbb /* Found a zero in loaded bytes + -> Prepare vreg. */ +.Lslow_next_acc_onbb2: + vl %v17,0(%r5,%r3) /* Load over boundary ... */ + lghi %r8,0 /* r8=0 -> no zero in this part of acc, + check for zero is in jump-target. */ + j .Lslow_next_acc_notonbb /* ... and search for zero in + fully loaded vreg again. */ + +.Lslow_end_not_found: + algfr %r1,%r6 /* Add zero-index to current len. */ + j .Lslow_end +.Lslow_index_found: + algfr %r1,%r4 /* Add found index of char to current len. */ +.Lslow_end: + srlg %r2,%r1,2 /* Convert byte-count to character-count. */ + /* Restore registers. */ + vlgvg %r6,%v30,0 + vlgvg %r8,%v31,0 + vlgvg %r9,%v31,1 + br %r14 +.Lfallback: + jg __wcscspn_c +END(__wcscspn_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wcscspn.c b/sysdeps/s390/multiarch/wcscspn.c new file mode 100644 index 0000000000..ebd77734ac --- /dev/null +++ b/sysdeps/s390/multiarch/wcscspn.c @@ -0,0 +1,27 @@ +/* Multiple versions of wcscspn. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2 (__wcscspn, wcscspn) + +#else +# include <wcsmbs/wcscspn.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/wcslen-c.c b/sysdeps/s390/multiarch/wcslen-c.c new file mode 100644 index 0000000000..dcbe3094d9 --- /dev/null +++ b/sysdeps/s390/multiarch/wcslen-c.c @@ -0,0 +1,25 @@ +/* Default wcslen implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCSLEN __wcslen_c + +# include <wchar.h> +extern __typeof (__wcslen) __wcslen_c; +# include <wcsmbs/wcslen.c> +#endif diff --git a/sysdeps/s390/multiarch/wcslen-vx.S b/sysdeps/s390/multiarch/wcslen-vx.S new file mode 100644 index 0000000000..dafb7b799d --- /dev/null +++ b/sysdeps/s390/multiarch/wcslen-vx.S @@ -0,0 +1,91 @@ +/* Vector optimized 32/64 bit S/390 version of wcslen. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* size_t wcslen (const wchar_t *s) + Returns length of string s. + + Register usage: + -r1=bytes to 4k-byte boundary + -r2=s + -r3=tmp + -r4=tmp + -r5=current_len and return_value + -v16=part of s +*/ +ENTRY(__wcslen_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + tmll %r2,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + vfenezf %v16,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r4,%v16,7 /* Load zero index or 16 if not found. */ + clr %r4,%r1 /* If found zero within loaded bytes? */ + locgrl %r2,%r4 /* Then copy return value. */ + jl .Lend /* And return. */ + + /* Align s to 16 byte. */ + risbgn %r3,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,16 /* current_len = 16. */ + slr %r5,%r3 /* Compute bytes to 16bytes boundary. */ + + /* Find zero in 16byte aligned loop. */ +.Lloop: + vl %v16,0(%r5,%r2) /* Load s. */ + vfenezfs %v16,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound /* Jump away if zero was found. */ + vl %v16,16(%r5,%r2) + vfenezfs %v16,%v16,%v16 + je .Lfound16 + vl %v16,32(%r5,%r2) + vfenezfs %v16,%v16,%v16 + je .Lfound32 + vl %v16,48(%r5,%r2) + vfenezfs %v16,%v16,%v16 + je .Lfound48 + + aghi %r5,64 + j .Lloop /* No zero found -> loop. */ + +.Lfound48: + aghi %r5,16 +.Lfound32: + aghi %r5,16 +.Lfound16: + aghi %r5,16 +.Lfound: + vlgvb %r2,%v16,7 /* Load byte index of zero. */ + algr %r2,%r5 +.Lend: + srlg %r2,%r2,2 /* Convert byte-count to character-count. */ + br %r14 +.Lfallback: + jg __wcslen_c +END(__wcslen_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wcslen.c b/sysdeps/s390/multiarch/wcslen.c new file mode 100644 index 0000000000..540845f70a --- /dev/null +++ b/sysdeps/s390/multiarch/wcslen.c @@ -0,0 +1,28 @@ +/* Multiple versions of wcslen. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc (__wcslen) +weak_alias (__wcslen, wcslen) + +#else +# include <wcsmbs/wcslen.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/wcsncat-c.c b/sysdeps/s390/multiarch/wcsncat-c.c new file mode 100644 index 0000000000..e8cc219eac --- /dev/null +++ b/sysdeps/s390/multiarch/wcsncat-c.c @@ -0,0 +1,25 @@ +/* Default wcsncat implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCSNCAT __wcsncat_c + +# include <wchar.h> +extern __typeof (wcsncat) __wcsncat_c; +# include <wcsmbs/wcsncat.c> +#endif diff --git a/sysdeps/s390/multiarch/wcsncat-vx.S b/sysdeps/s390/multiarch/wcsncat-vx.S new file mode 100644 index 0000000000..4264f6d21d --- /dev/null +++ b/sysdeps/s390/multiarch/wcsncat-vx.S @@ -0,0 +1,265 @@ +/* Vector optimized 32/64 bit S/390 version of wcsncat. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* wchar_t * wcsncat (wchar_t *dest, const wchar_t *src, size_t n) + Concatenate two strings - at most n characters of src. + + Register usage: + -r0=saved dest pointer for return + -r1=tmp + -r2=dest + -r3=src + -r4=n + -r5=current_len + -r6=tmp + -r7=tmp + -v16=part of src + -v17=index of zero + -v18=part of src + -v31=register save area for r6, r7 +*/ +ENTRY(__wcsncat_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r4,%r4 +# endif /* !defined __s390x__ */ + + clgfi %r4,0 + ber %r14 /* Nothing to do, if n == 0. */ + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + /* If either src or dest is not 4byte aligned, use __wcsncat_c. */ + tmll %r2,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + tmll %r3,3 /* Test if src is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + lgr %r0,%r2 /* Save destination pointer for return. */ + vlvgp %v31,%r6,%r7 /* Save registers. */ + + /* WCSLEN + %r1 = loaded bytes (tmp) + %r6 = zero byte index (tmp) + %r2 = dst + */ + vfenezf %v16,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r5,%v16,7 /* Load zero index or 16 if not found. */ + clrjl %r5,%r1,.Llen_end /* Found zero within loaded bytes, end. */ + + /* Align s to 16 byte. */ + risbgn %r1,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,16 /* current_len = 16. */ + slr %r5,%r1 /* Compute bytes to 16bytes boundary. */ + + /* Find zero in 16byte aligned loop. */ +.Llen_loop: + vl %v16,0(%r5,%r2) /* Load s. */ + vfenezfs %v16,%v16,%v16 /* Find element not equal with zero search. */ + je .Llen_found /* Jump away if zero was found. */ + vl %v16,16(%r5,%r2) + vfenezfs %v16,%v16,%v16 + je .Llen_found16 + vl %v16,32(%r5,%r2) + vfenezfs %v16,%v16,%v16 + je .Llen_found32 + vl %v16,48(%r5,%r2) + vfenezfs %v16,%v16,%v16 + je .Llen_found48 + + aghi %r5,64 + j .Llen_loop /* No zero -> loop. */ + +.Llen_found48: + aghi %r5,16 +.Llen_found32: + aghi %r5,16 +.Llen_found16: + aghi %r5,16 +.Llen_found: + vlgvb %r1,%v16,7 /* Load byte index of zero. */ + algr %r5,%r1 + +.Llen_end: + /* WCSNCPY + %r1 = zero byte index (tmp) + %r6 = loaded bytes (tmp) + %r3 = curr src pointer + %r2 = curr dst pointer + %r7 = border, tmp + */ + la %r2,0(%r5,%r2) /* strcpy at end of dst-string. */ + + vlbb %v16,0(%r3),6 /* Load s until next 4k-byte boundary. */ + lcbb %r6,0(%r3),6 /* Get bytes to 4k-byte boundary or 16. */ + llgfr %r6,%r6 /* Convert 32bit to 64bit. */ + + lghi %r5,0 /* current_len = 0. */ + + /* Check range of maxlen and convert to byte-count. */ +# ifdef __s390x__ + tmhh %r4,49152 /* Test bit 0 or 1 of maxlen. */ + lghi %r1,-4 /* Max byte-count is 18446744073709551612. */ +# else + tmlh %r4,49152 /* Test bit 0 or 1 of maxlen. */ + llilf %r1,4294967292 /* Max byte-count is 4294967292. */ +# endif /* !__s390x__ */ + sllg %r4,%r4,2 /* Convert character-count to byte-count. */ + locgrne %r4,%r1 /* Use max byte-count, if bit 0/1 was one. */ + + clgrjle %r4,%r6,.Lcpy_remaining_v16 /* If n <= loaded-bytes + -> process remaining. */ + + /* n > loaded-byte-count. */ + vfenezf %v17,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r1,%v17,7 /* Load zero index or 16 if not found. */ + clrjl %r1,%r6,.Lcpy_found_v16_store /* Found zero within loaded bytes, + copy and return. */ + + /* Align s to 16 byte. */ + risbgn %r1,%r3,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,15 /* current_len = 15. */ + slr %r5,%r1 /* Compute highest index to 16byte boundary. * + + /* Zero not found and maxlen > loaded-byte-count. */ + vstl %v16,%r5,0(%r2) /* Copy loaded characters - no zero. */ + ahi %r5,1 /* Start loop at next character. */ + + /* + Now we are 16byte aligned, so we can load a full vreg + without page fault. + */ + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r4,.Lcpy_loop64 + + vl %v16,0(%r5,%r3) /* Load s. */ + clgijl %r4,17,.Lcpy_remaining_v16 /* If n <=16, + process remaining bytes. */ +.Lcpy_lt64: + lgr %r7,%r4 + slgfi %r7,16 /* border_len = n - 16. */ + + clgrjhe %r5,%r7,.Lcpy_remaining_v16 + vfenezfs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lcpy_found_v16 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Save previous part without zero to dst. */ + aghi %r5,16 + + clgrjhe %r5,%r7,.Lcpy_remaining_v18 + vfenezfs %v17,%v18,%v18 + je .Lcpy_found_v18 + vl %v16,16(%r5,%r3) + vst %v18,0(%r5,%r2) + aghi %r5,16 + + clgrjhe %r5,%r7,.Lcpy_remaining_v16 + vfenezfs %v17,%v16,%v16 + je .Lcpy_found_v16 + vl %v18,16(%r5,%r3) + vst %v16,0(%r5,%r2) + aghi %r5,16 + +.Lcpy_remaining_v18: + vlr %v16,%v18 +.Lcpy_remaining_v16: + /* v16 contains the remaining bytes [1...16]. + Store remaining bytes and append string-termination. */ + vfenezf %v17,%v16,%v16 /* Find element not equal with zero search. */ + slgrk %r7,%r4,%r5 /* Remaining bytes = maxlen - current_len. */ + aghi %r7,-1 /* vstl needs highest index. */ + vlgvb %r1,%v17,7 /* Load zero index or 16 if not found. */ + la %r2,0(%r5,%r2) /* vstl has no index register. */ + /* Zero-index within remaining-bytes, store up to zero and end. */ + clgrjle %r1,%r7,.Lcpy_found_v16_store + vstl %v16,%r7,0(%r2) /* Store remaining bytes. */ + lghi %r1,0 + st %r1,1(%r7,%r2) /* Store string-null-termination beyond n. */ +.Lcpy_end: + /* Restore saved registers. */ + vlgvg %r6,%v31,0 + vlgvg %r7,%v31,1 + lgr %r2,%r0 /* Load saved dest-ptr. */ + br %r14 + +.Lcpy_found_v16_32: + aghi %r5,32 + j .Lcpy_found_v16 +.Lcpy_found_v18_48: + aghi %r5,32 +.Lcpy_found_v18_16: + aghi %r5,16 +.Lcpy_found_v18: + vlr %v16,%v18 +.Lcpy_found_v16: + /* v16 contains a zero. Store remaining bytes to zero. current_len + has not reached border, thus checking for n is not needed! */ + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + la %r2,0(%r5,%r2) +.Lcpy_found_v16_store: + aghi %r1,3 /* Also copy remaining bytes of zero. */ + vstl %v16,%r1,0(%r2) /* Copy characters including zero. */ + j .Lcpy_end + + /* Find zero in 16byte aligned loop. */ +.Lcpy_loop2: + vl %v16,16(%r5,%r3) + vst %v18,0(%r5,%r2) + aghi %r5,16 + +.Lcpy_loop64: + vl %v16,0(%r5,%r3) + vfenezfs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lcpy_found_v16 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Save previous part without zero to dst. */ + vfenezfs %v17,%v18,%v18 + je .Lcpy_found_v18_16 + vl %v16,32(%r5,%r3) + vst %v18,16(%r5,%r2) + vfenezfs %v17,%v16,%v16 + je .Lcpy_found_v16_32 + vl %v18,48(%r5,%r3) + vst %v16,32(%r5,%r2) + vfenezfs %v17,%v18,%v18 + je .Lcpy_found_v18_48 + vst %v18,48(%r5,%r2) + + aghi %r5,64 + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r4,.Lcpy_loop64 + + vl %v16,0(%r5,%r3) /* Load s. */ + j .Lcpy_lt64 + +.Lfallback: + jg __wcsncat_c +END(__wcsncat_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wcsncat.c b/sysdeps/s390/multiarch/wcsncat.c new file mode 100644 index 0000000000..62073321e8 --- /dev/null +++ b/sysdeps/s390/multiarch/wcsncat.c @@ -0,0 +1,27 @@ +/* Multiple versions of wcsncat. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2 (__wcsncat, wcsncat) + +#else +# include <wcsmbs/wcsncat.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/wcsncmp-c.c b/sysdeps/s390/multiarch/wcsncmp-c.c new file mode 100644 index 0000000000..8f2573810d --- /dev/null +++ b/sysdeps/s390/multiarch/wcsncmp-c.c @@ -0,0 +1,25 @@ +/* Default wcsncmp implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCSNCMP __wcsncmp_c + +# include <wchar.h> +extern __typeof (wcsncmp) __wcsncmp_c; +# include <wcsmbs/wcsncmp.c> +#endif diff --git a/sysdeps/s390/multiarch/wcsncmp-vx.S b/sysdeps/s390/multiarch/wcsncmp-vx.S new file mode 100644 index 0000000000..e77f17dcaf --- /dev/null +++ b/sysdeps/s390/multiarch/wcsncmp-vx.S @@ -0,0 +1,177 @@ +/* Vector optimized 32/64 bit S/390 version of wcsncmp. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* int wcsncmp (const wchar_t *s1, const wchar_t *s2, size_t n) + Compare at most n characters of two strings. + + Register usage: + -r0=tmp + -r1=tmp + -r2=s1 + -r3=s2 + -r4=n + -r5=current_len + -v16=part of s1 + -v17=part of s2 + -v18=index of unequal +*/ +ENTRY(__wcsncmp_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r4,%r4 +# endif /* !defined __s390x__ */ + + clgije %r4,0,.Lend_equal /* Nothing to do if n == 0. */ + + /* Check range of n and convert to byte-count. */ +# ifdef __s390x__ + tmhh %r4,49152 /* Test bit 0 or 1 of maxlen. */ + lghi %r1,-4 /* Max byte-count is 18446744073709551612. */ +# else + tmlh %r4,49152 /* Test bit 0 or 1 of maxlen. */ + llilf %r1,4294967292 /* Max byte-count is 4294967292. */ +# endif /* !__s390x__ */ + sllg %r4,%r4,2 /* Convert character-count to byte-count. */ + locgrne %r4,%r1 /* Use max byte-count, if bit 0/1 was one. */ + + /* Check first character without vector load. */ + lghi %r5,4 /* current_len = 4 bytes. */ + /* Check s1/2[0]. */ + lt %r0,0(%r2) + l %r1,0(%r3) + je .Lend_cmp_one_char + crjne %r0,%r1,.Lend_cmp_one_char + +.Lloop: + vlbb %v17,0(%r5,%r3),6 /* Load s2 to block boundary. */ + vlbb %v16,0(%r5,%r2),6 /* Load s1 to block boundary. */ + lcbb %r0,0(%r5,%r2),6 /* Get loaded byte count of s1. */ + jo .Llt16_1 /* Jump away if vector not fully loaded. */ + lcbb %r1,0(%r5,%r3),6 /* Get loaded byte count of s2. */ + jo .Llt16_2 /* Jump away if vector not fully loaded. */ + aghi %r5,16 /* Both vectors are fully loaded. */ + vfenezfs %v18,%v16,%v17 /* Compare not equal with zero search. */ + clgrjhe %r5,%r4,.Llastcmp /* If current_len >= n ->last compare. */ + jno .Lfound + + vlbb %v17,0(%r5,%r3),6 + vlbb %v16,0(%r5,%r2),6 + lcbb %r0,0(%r5,%r2),6 + jo .Llt16_1 + lcbb %r1,0(%r5,%r3),6 + jo .Llt16_2 + aghi %r5,16 + vfenezfs %v18,%v16,%v17 + clgrjhe %r5,%r4,.Llastcmp + jno .Lfound + + vlbb %v17,0(%r5,%r3),6 + vlbb %v16,0(%r5,%r2),6 + lcbb %r0,0(%r5,%r2),6 + jo .Llt16_1 + lcbb %r1,0(%r5,%r3),6 + jo .Llt16_2 + aghi %r5,16 + vfenezfs %v18,%v16,%v17 + clgrjhe %r5,%r4,.Llastcmp + jno .Lfound + + vlbb %v17,0(%r5,%r3),6 + vlbb %v16,0(%r5,%r2),6 + lcbb %r0,0(%r5,%r2),6 + jo .Llt16_1 + lcbb %r1,0(%r5,%r3),6 + jo .Llt16_2 + aghi %r5,16 + vfenezfs %v18,%v16,%v17 + clgrjhe %r5,%r4,.Llastcmp + jno .Lfound + + j .Lloop + +.Llt16_1: + lcbb %r1,0(%r5,%r3),6 /* Get loaded byte count of s2. */ +.Llt16_2: + clr %r0,%r1 /* Compare logical. */ + locrh %r0,%r1 /* Compute minimum of bytes loaded. */ + nill %r0,65532 /* Align bytes loaded to full characters. */ + jz .Lcmp_one_char /* Jump away if no full char is available. */ +.Llt_cmp: + algfr %r5,%r0 /* Add smallest loaded bytes to current_len. */ + vfenezfs %v18,%v16,%v17 /* Compare not equal with zero search. */ + clgrj %r5,%r4,10,.Llastcmp /* If current_len >= n -> last compare */ + vlgvb %r1,%v18,7 /* Get not equal index or 16 if all equal. */ + clrjl %r1,%r0,.Lfound /* Jump away if miscompare is within + loaded bytes; (index < loaded-bytes) */ + j .Lloop + +.Lcmp_one_char: + /* At least one of both strings is not 4-byte aligned + and there is no full character before next block-boundary. + Compare one character to get over the boundary and + proceed with normal loop! */ + vlef %v16,0(%r5,%r2),0 /* Load one character. */ + lghi %r0,4 /* Loaded byte count is 4. */ + vlef %v17,0(%r5,%r3),0 + j .Llt_cmp /* Proceed with comparision. */ + +.Llastcmp: + /* Use comparision result only if located within first n characters. + %r0: loaded byte count in vreg; + %r5: current_len; + %r4: n; + (current_len - n): [0...16[ + First ignored match index: loaded bytes - (current_len-n): ]0...16] + */ + slgr %r5,%r4 /* %r5 = current_len - n. */ + slr %r0,%r5 /* %r0 = first ignored match index. */ + vlgvb %r4,%v18,7 /* Get not equal index or 16 if all equal. */ + clrjl %r4,%r0,.Lfound2 /* Jump away if miscompare is within + loaded bytes and below n bytes. */ +.Lend_equal: + lghi %r2,0 + br %r14 + +.Lfound: + /* Difference or end of string. */ + /* vfenezf found an unequal element or zero. + This instruction compares unsigned words, but wchar_t is signed. + Thus we have to compare the found element again. */ + vlgvb %r4,%v18,7 /* Extract not equal byte-index. */ +.Lfound2: + srl %r4,2 /* And convert it to character-index. */ + vlgvf %r0,%v16,0(%r4) /* Load character-values. */ + vlgvf %r1,%v17,0(%r4) +.Lend_cmp_one_char: + cr %r0,%r1 + je .Lend_equal + lghi %r2,1 + lghi %r1,-1 + locgrl %r2,%r1 + br %r14 +END(__wcsncmp_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wcsncmp.c b/sysdeps/s390/multiarch/wcsncmp.c new file mode 100644 index 0000000000..3482d90e4e --- /dev/null +++ b/sysdeps/s390/multiarch/wcsncmp.c @@ -0,0 +1,27 @@ +/* Multiple versions of wcsncmp. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2 (__wcsncmp, wcsncmp) + +#else +# include <wcsmbs/wcsncmp.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/wcsncpy-c.c b/sysdeps/s390/multiarch/wcsncpy-c.c new file mode 100644 index 0000000000..b63d86ef5f --- /dev/null +++ b/sysdeps/s390/multiarch/wcsncpy-c.c @@ -0,0 +1,25 @@ +/* Default wcsncpy implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCSNCPY __wcsncpy_c + +# include <wchar.h> +extern __typeof (__wcsncpy) __wcsncpy_c; +# include <wcsmbs/wcsncpy.c> +#endif diff --git a/sysdeps/s390/multiarch/wcsncpy-vx.S b/sysdeps/s390/multiarch/wcsncpy-vx.S new file mode 100644 index 0000000000..33cc33f28b --- /dev/null +++ b/sysdeps/s390/multiarch/wcsncpy-vx.S @@ -0,0 +1,223 @@ +/* Vector optimized 32/64 bit S/390 version of wcsncpy. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* wchar_t *wcsncpy (const wchar_t *dest, const wchar_t *src, size_t n) + Copy at most n characters of string src to dest. + + Register usage: + -r0=dest pointer for return + -r1=tmp, zero byte index + -r2=dest + -r3=src + -r4=n + -r5=current_len + -r6=tmp, loaded bytes + -r7=tmp, border + -v16=part of src + -v17=index of zero + -v18=part of src + -v31=register save area for r6, r7 +*/ +ENTRY(__wcsncpy_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r4,%r4 +# endif /* !defined __s390x__ */ + + clgfi %r4,0 + ber %r14 /* Nothing to do, if n == 0. */ + + vlbb %v16,0(%r3),6 /* Load s until next 4k-byte boundary. */ + + tmll %r3,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + vlvgp %v31,%r6,%r7 /* Save registers. */ + lgr %r0,%r2 /* Save destination pointer for return. */ + + lcbb %r6,0(%r3),6 /* Get bytes to 4k-byte boundary or 16. */ + llgfr %r6,%r6 /* Convert 32bit to 64bit. */ + + lghi %r5,0 /* current_len = 0. */ + + /* Check range of maxlen and convert to byte-count. */ +# ifdef __s390x__ + tmhh %r4,49152 /* Test bit 0 or 1 of n. */ + lghi %r1,-4 /* Max byte-count is 18446744073709551612. */ +# else + tmlh %r4,49152 /* Test bit 0 or 1 of n. */ + llilf %r1,4294967292 /* Max byte-count is 4294967292. */ +# endif /* !__s390x__ */ + sllg %r4,%r4,2 /* Convert character-count to byte-count. */ + locgrne %r4,%r1 /* Use max byte-count, if bit 0/1 was one. */ + + clgrjle %r4,%r6,.Lremaining_v16 /* If n <= loaded-bytes + -> process remaining. */ + + /* n > loaded-byte-count. */ + vfenezf %v17,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r1,%v17,7 /* Load zero index or 16 if not found. */ + aghi %r1,3 /* Also copy remaining bytes of zero. */ + clrjl %r1,%r6,.Lfound_v16_store /* Found zero within loaded bytes, + copy and return. */ + + /* Align s to 16 byte. */ + risbgn %r7,%r3,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,15 /* current_len = 15. */ + slr %r5,%r7 /* Compute highest index to 16byte boundary. */ + + /* Zero not found and n > loaded-byte-count. */ + vstl %v16,%r5,0(%r2) /* Copy loaded characters - no zero. */ + ahi %r5,1 /* Start loop at next character. */ + + /* Now we are 16byte aligned, so we can load + a full vreg without page fault. */ + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r4,.Lloop64 + + vl %v16,0(%r5,%r3) /* Load s. */ + clgijl %r4,17,.Lremaining_v16 /* If n <=16, process remaining + bytes. */ +.Llt64: + lgr %r7,%r4 + slgfi %r7,16 /* border_len = maxlen - 16. */ + + clgrjhe %r5,%r7,.Lremaining_v16 /* If current_len >= border + then process remaining bytes. */ + vfenezfs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound_v16 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Store previous part without zero to dst. */ + aghi %r5,16 + + clgrjhe %r5,%r7,.Lremaining_v18 + vfenezfs %v17,%v18,%v18 + je .Lfound_v18 + vl %v16,16(%r5,%r3) + vst %v18,0(%r5,%r2) + aghi %r5,16 + + clgrjhe %r5,%r7,.Lremaining_v16 + vfenezfs %v17,%v16,%v16 + je .Lfound_v16 + vl %v18,16(%r5,%r3) + vst %v16,0(%r5,%r2) + aghi %r5,16 + +.Lremaining_v18: + vlr %v16,%v18 +.Lremaining_v16: + /* v16 contains the remaining bytes [1...16]. + Store remaining bytes and append string-termination. */ + vfenezf %v17,%v16,%v16 /* Find element not equal with zero search. */ + slgrk %r7,%r4,%r5 /* Remaining bytes = maxlen - current_len. */ + aghi %r7,-1 /* vstl needs highest index. */ + la %r2,0(%r5,%r2) /* vstl has no index register. */ + vlgvb %r1,%v17,7 /* Load zero index or 16 if not found. */ + aghi %r1,3 /* Also copy remaining bytes of zero. */ + /* Zero in remaining bytes? -> jump away (zero-index < max-index) + Do not jump away if zero-index == max-index, + but simply copy zero with vstl below. */ + clrjl %r1,%r7,.Lfound_v16_store + vstl %v16,%r7,0(%r2) /* Store remaining bytes without null + termination!. */ +.Lend: + /* Restore saved registers. */ + vlgvg %r6,%v31,0 + vlgvg %r7,%v31,1 + lgr %r2,%r0 /* Load saved dest-ptr. */ + br %r14 + +.Lfound_v16_32: + aghi %r5,32 + j .Lfound_v16 +.Lfound_v18_48: + aghi %r5,32 +.Lfound_v18_16: + aghi %r5,16 +.Lfound_v18: + vlr %v16,%v18 +.Lfound_v16: + /* v16 contains a zero. Store remaining bytes to zero. current_len + has not reached border, thus checking for n is not needed! */ + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + la %r2,0(%r5,%r2) /* vstl has no support for index-register. */ + aghi %r1,3 /* Also copy remaining bytes of zero. */ +.Lfound_v16_store: + vstl %v16,%r1,0(%r2) /* Copy characters including zero. */ + /* Fill remaining bytes with zero - remaining count always > 0. */ + algr %r5,%r1 /* Remaining bytes (=%r4) = ... */ + slgr %r4,%r5 /* = maxlen - (currlen + zero_index + 1). */ + la %r2,0(%r1,%r2) /* Pointer to zero. start filling beyond. */ + aghi %r4,-2 /* mvc with exrl needs count - 1. + (additional -1, see remaining bytes above) */ + srlg %r6,%r4,8 /* Split into 256 byte blocks. */ + ltgr %r6,%r6 + je .Lzero_lt256 +.Lzero_loop256: + mvc 1(256,%r2),0(%r2) /* Fill 256 zeros at once. */ + la %r2,256(%r2) + brctg %r6,.Lzero_loop256 /* Loop until all blocks are processed. */ +.Lzero_lt256: + exrl %r4,.Lmvc_lt256 + j .Lend +.Lmvc_lt256: + mvc 1(1,%r2),0(%r2) + + /* Find zero in 16byte aligned loop. */ +.Lloop64: + vl %v16,0(%r5,%r3) /* Load s. */ + vfenezfs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound_v16 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Store previous part without zero to dst. */ + vfenezfs %v17,%v18,%v18 + je .Lfound_v18_16 + vl %v16,32(%r5,%r3) + vst %v18,16(%r5,%r2) + vfenezfs %v17,%v16,%v16 + je .Lfound_v16_32 + vl %v18,48(%r5,%r3) + vst %v16,32(%r5,%r2) + vfenezfs %v17,%v18,%v18 + je .Lfound_v18_48 + vst %v18,48(%r5,%r2) + + aghi %r5,64 + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r4,.Lloop64 + + vl %v16,0(%r5,%r3) /* Load s. */ + j .Llt64 + +.Lfallback: + jg __wcsncpy_c +END(__wcsncpy_vx) + +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wcsncpy.c b/sysdeps/s390/multiarch/wcsncpy.c new file mode 100644 index 0000000000..eb225a97b4 --- /dev/null +++ b/sysdeps/s390/multiarch/wcsncpy.c @@ -0,0 +1,28 @@ +/* Multiple versions of wcsncpy. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc (__wcsncpy) +weak_alias (__wcsncpy, wcsncpy) + +#else +# include <wcsmbs/wcsncpy.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/wcsnlen-c.c b/sysdeps/s390/multiarch/wcsnlen-c.c new file mode 100644 index 0000000000..89984e9f18 --- /dev/null +++ b/sysdeps/s390/multiarch/wcsnlen-c.c @@ -0,0 +1,25 @@ +/* Default wcsnlen implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCSNLEN __wcsnlen_c + +# include <wchar.h> +extern __typeof (__wcsnlen) __wcsnlen_c; +# include <wcsmbs/wcsnlen.c> +#endif diff --git a/sysdeps/s390/multiarch/wcsnlen-vx.S b/sysdeps/s390/multiarch/wcsnlen-vx.S new file mode 100644 index 0000000000..1ba00c3cae --- /dev/null +++ b/sysdeps/s390/multiarch/wcsnlen-vx.S @@ -0,0 +1,151 @@ +/* Vector optimized 32/64 bit S/390 version of wcsnlen. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* size_t wcsnlen (const wchar_t *s, size_t maxlen) + Returns the number of characters in s or at most maxlen. + + Register usage: + -r1=tmp + -r2=address of string + -r3=maxlen (number of characters to be read) + -r4=tmp + -r5=current_len and return_value + -v16=part of s +*/ +ENTRY(__wcsnlen_vx) + + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r3,%r3 +# endif /* !defined __s390x__ */ + + clgfi %r3,0 /* if maxlen == 0, return 0. */ + locgre %r2,%r3 + ber %r14 + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + llgfr %r1,%r1 /* Convert 32bit to 64bit. */ + + tmll %r2,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + /* Check range of maxlen and convert to byte-count. */ +# ifdef __s390x__ + tmhh %r3,49152 /* Test bit 0 or 1 of maxlen. */ + lghi %r4,-4 /* Max byte-count is 18446744073709551612. */ +# else + tmlh %r3,49152 /* Test bit 0 or 1 of maxlen. */ + llilf %r4,4294967292 /* Max byte-count is 4294967292. */ +# endif /* !__s390x__ */ + sllg %r3,%r3,2 /* Convert character-count to byte-count. */ + locgrne %r3,%r4 /* Use max byte-count, if bit 0/1 was one. */ + + vfenezf %v16,%v16,%v16 /* Find element not equal with zero search. */ + clgr %r1,%r3 + locgrh %r1,%r3 /* loaded_byte_count + = min (loaded_byte_count, maxlen) */ + + vlgvb %r5,%v16,7 /* Load zero index or 16 if not found. */ + clrjl %r5,%r1,.Lend /* Found zero within loaded bytes -> return. */ + + clgr %r1,%r3 /* If loaded_byte_count == maxlen -> end. */ + locgre %r5,%r3 + je .Lend + + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,16 /* current_len = 16. */ + slr %r5,%r4 /* Compute bytes to 16bytes boundary. */ + + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r3,.Lloop64 + + /* Find zero in max 64byte with aligned s. */ +.Llt64: + vl %v16,0(%r5,%r2) /* Load s. */ + vfenezfs %v16,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound /* Jump away if zero was found. */ + aghi %r5,16 + clgrjhe %r5,%r3,.Lfound /* If current_len >= maxlen -> end. */ + vl %v16,0(%r5,%r2) + vfenezfs %v16,%v16,%v16 + je .Lfound + aghi %r5,16 + clgrjhe %r5,%r3,.Lfound + vl %v16,0(%r5,%r2) + vfenezfs %v16,%v16,%v16 + je .Lfound + aghi %r5,16 + clgrjhe %r5,%r3,.Lfound + vl %v16,0(%r5,%r2) + vfenezfs %v16,%v16,%v16 + j .Lfound + +.Lfound48: + aghi %r5,16 +.Lfound32: + aghi %r5,16 +.Lfound16: + aghi %r5,16 +.Lfound: + vlgvb %r4,%v16,7 /* Load byte index of zero or 16 if no zero. */ + algr %r5,%r4 + + clgr %r5,%r3 + locgrh %r5,%r3 /* Return min (current_len, maxlen). */ +.Lend: + srlg %r2,%r5,2 /* Convert byte-count to character-count. */ + br %r14 + + /* Find zero in 16byte aligned loop. */ +.Lloop64: + vl %v16,0(%r5,%r2) /* Load s. */ + vfenezfs %v16,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound /* Jump away if zero was found. */ + vl %v16,16(%r5,%r2) + vfenezfs %v16,%v16,%v16 + je .Lfound16 + vl %v16,32(%r5,%r2) + vfenezfs %v16,%v16,%v16 + je .Lfound32 + vl %v16,48(%r5,%r2) + vfenezfs %v16,%v16,%v16 + je .Lfound48 + + aghi %r5,64 + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r3,.Lloop64 + + j .Llt64 + +.Lfallback: + jg __wcsnlen_c +END(__wcsnlen_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wcsnlen.c b/sysdeps/s390/multiarch/wcsnlen.c new file mode 100644 index 0000000000..4308472a81 --- /dev/null +++ b/sysdeps/s390/multiarch/wcsnlen.c @@ -0,0 +1,28 @@ +/* Multiple versions of wcsnlen. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc (__wcsnlen) +weak_alias (__wcsnlen, wcsnlen) + +#else +# include <wcsmbs/wcsnlen.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/wcspbrk-c.c b/sysdeps/s390/multiarch/wcspbrk-c.c new file mode 100644 index 0000000000..8b74eaf017 --- /dev/null +++ b/sysdeps/s390/multiarch/wcspbrk-c.c @@ -0,0 +1,31 @@ +/* Default wcspbrk implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCSPBRK __wcspbrk_c + +# include <wchar.h> +extern __typeof (wcspbrk) __wcspbrk_c; +# ifdef SHARED +# undef libc_hidden_def +# define libc_hidden_def(name) \ + __hidden_ver1 (__wcspbrk_c, __GI_wcspbrk, __wcspbrk_c); +# endif /* SHARED */ + +# include <wcsmbs/wcspbrk.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wcspbrk-vx.S b/sysdeps/s390/multiarch/wcspbrk-vx.S new file mode 100644 index 0000000000..3e28e9aa90 --- /dev/null +++ b/sysdeps/s390/multiarch/wcspbrk-vx.S @@ -0,0 +1,315 @@ +/* Vector optimized 32/64 bit S/390 version of wcspbrk. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* wchar_t *wcspbrk (const wchar_t *s, const wchar_t * accept) + The wcspbrk() function locates the first occurrence in the string s + of any of the characters in the string accept and returns a pointer + to that character or NULL if not found. + + This method checks the length of accept string. If it fits entirely + in one vector register, a fast algorithm is used, which does not need + to check multiple parts of accept-string. Otherwise a slower full + check of accept-string is used. + + register overview: + r3: pointer to start of accept-string + r2: pointer to start of search-string + r0: loaded byte count of vlbb search-string (32bit unsigned) + r4: found byte index (32bit unsigned) + r1: current return len (64bit unsigned) + v16: search-string + v17: accept-string + v18: temp-vreg + + ONLY FOR SLOW: + v19: first accept-string + v20: zero for preparing acc-vector + v21: global mask; 1 indicates a match between + search-string-vreg and any accept-character + v22: current mask; 1 indicates a match between + search-string-vreg and any accept-character in current acc-vreg + v24: one for result-checking of former string-part + v30, v31: for re-/storing registers r6, r8, r9 + r5: current len of accept-string + r6: zero-index in search-string or 16 if no zero + or min(zero-index, loaded byte count) + r8: >0, if former accept-string-part contains a zero, + otherwise =0; + r9: loaded byte count of vlbb accept-string +*/ +ENTRY(__wcspbrk_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + tmll %r2,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + /* + Check if accept-string fits in one vreg: + ---------------------------------------- + */ + vlbb %v17,0(%r3),6 /* Load accept. */ + lcbb %r0,0(%r3),6 + jo .Lcheck_onbb /* Special case if accept lays + on block-boundary. */ + +.Lcheck_notonbb: + lghi %r1,0 /* Zero out current len. */ + vlgvf %r0,%v17,0 /* Get first element. */ + clije %r0,0,.Lfast_end_null /* Return null if accept is empty. */ + + vistrfs %v17,%v17 /* Fill with zeros after first zero. */ + je .Lfast /* Zero found -> accept fits in one vreg. */ + j .Lslow /* No zero -> accept exceeds one vreg */ + + +.Lcheck_onbb: + /* Accept lays on block-boundary. */ + nill %r0,65532 /* Recognize only fully loaded characters. */ + je .Lcheck_onbb2 /* Reload vr, if we loaded no full wchar_t. */ + vfenezf %v18,%v17,%v17 /* Search zero in loaded accept bytes. */ + vlgvb %r4,%v18,7 /* Get index of zero or 16 if not found. */ + clrjl %r4,%r0,.Lcheck_notonbb /* Zero index < loaded bytes count -> + accept fits in one vreg; + Fill with zeros and proceed + with FAST. */ +.Lcheck_onbb2: + vl %v17,0(%r3) /* Load accept, which exceeds loaded bytes. */ + j .Lcheck_notonbb /* Check if accept fits in one vreg. */ + + + /* + Search s for accept in one vreg + ------------------------------- + */ +.Lfast: + /* Complete accept-string in v17 and remaining bytes are zero. */ + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r0,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + vfaezfs %v18,%v16,%v17,0 /* Find first element in v16 unequal to any + in v17 or first zero element. */ + vlgvb %r4,%v18,7 /* Load byte index of found element. */ + /* If found index is within loaded bytes, return with found + element index (=equal count). */ + clrjl %r4,%r0,.Lfast_loop_found2 + + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r1,16 /* current_len = 16. */ + slr %r1,%r4 /* Compute bytes to 16bytes boundary. */ + +.Lfast_loop: + vl %v16,0(%r1,%r2) /* Load search-string. */ + vfaezfs %v18,%v16,%v17,0 /* Find first element in v16 equal to any + in v17 or first zero element. */ + jno .Lfast_loop_found + + vl %v16,16(%r1,%r2) + vfaezfs %v18,%v16,%v17,0 + jno .Lfast_loop_found16 + + vl %v16,32(%r1,%r2) + vfaezfs %v18,%v16,%v17,0 + jno .Lfast_loop_found32 + + vl %v16,48(%r1,%r2) + vfaezfs %v18,%v16,%v17,0 + jno .Lfast_loop_found48 + + aghi %r1,64 + j .Lfast_loop /* Loop if no element was unequal to accept + and not zero. */ + + /* Found equal or zero element. */ +.Lfast_loop_found48: + aghi %r1,16 +.Lfast_loop_found32: + aghi %r1,16 +.Lfast_loop_found16: + aghi %r1,16 +.Lfast_loop_found: + vlgvb %r4,%v18,7 /* Load byte index of found element. */ +.Lfast_loop_found2: + srlg %r5,%r4,2 /* Convert byte-index to character-index. */ + vlgvf %r0,%v16,0(%r5) /* Get found element. */ + clije %r0,0,.Lfast_end_null /* Return null if no accept-char found */ + algfr %r1,%r4 /* Add found index of char to current len. */ + la %r2,0(%r1,%r2) /* And return pointer to first equal char. */ + br %r14 + +.Lfast_end_null: + lghi %r2,0 /* Return null if no character is equal. */ + br %r14 + + + + + /* + Search s for accept in multiple vregs + ------------------------------------- + */ +.Lslow: + /* Save registers. */ + vlvgg %v30,%r6,0 + vlvgp %v31,%r8,%r9 + + /* Accept in v17 without zero */ + vlr %v19,%v17 /* Save first acc-part for a fast reload. */ + vzero %v20 /* Zero for preparing acc-vector. */ + vone %v24 /* One for checking result of former string. */ + + /* Align s to 16 byte. */ + risbg %r4,%r2,60,128+63,0 /* Test if s is aligned and + %r4 = bits 60-63 'and' 15. */ + je .Lslow_loop_str /* If s is aligned, loop aligned. */ + lghi %r0,15 + slr %r0,%r4 /* Compute highest index to load (15-x). */ + vll %v16,%r0,0(%r2) /* Load up to 16byte boundary; + needs highest index, left bytes are 0. */ + ahi %r0,1 /* Work with loaded byte count. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of accept-string to zero. */ + vfenezf %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first accept-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ + clije %r6,0,.Lslow_end_null /* If first element is zero + (end of string) -> return null */ + clr %r0,%r6 /* cc==1 if loaded byte count < zero-index. */ + locrl %r6,%r0 /* Load on cc==1; zero-index = lbc. */ + j .Lslow_loop_acc + + + /* Process s in 16byte aligned loop. */ +.Lslow_next_str: + /* Check results of former processed str-part. */ + vfeef %v18,%v21,%v24 /* Find first equal match in global mask + (ones in element). */ + vlgvb %r4,%v18,7 /* Get index of first one (=equal) + or 16 if no match. */ + /* Equal-index < min(zero-index, loaded byte count) + -> return pointer to equal element. */ + clrjl %r4,%r6,.Lslow_index_found + /* Zero-index < loaded byte count + -> former str-part was last str-part + -> return null */ + clrjl %r6,%r0,.Lslow_end_null + /* All elements are zero (=no match) -> proceed with next str-part. */ + + vlr %v17,%v19 /* Load first part of accept (no zero). */ + algfr %r1,%r0 /* Add loaded byte count to current len. */ + +.Lslow_loop_str: + vl %v16,0(%r1,%r2) /* Load search-string */ + lghi %r0,16 /* Loaded byte count is 16. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of accept to zero. */ + vfenezf %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first accept-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ + clije %r6,0,.Lslow_end_null /* If first element is zero + (end of string) -> return null. */ + +.Lslow_loop_acc: + vfaef %v22,%v16,%v17,4 /* Create matching-mask (1 in mask -> + Character matches any accepted character in + this accept-string-part) IN=0, RT=1. */ + vlgvf %r4,%v22,0 /* Get result of first element. */ + /* First element is equal to any accepted characters + (all other parts of accept cannot lead to a match before this one) + -> current len is pointing to first element + -> return found */ + clijh %r4,0,.Lslow_end_found + vo %v21,%v21,%v22 /* Global-mask = global-|matching-mask. */ + /* Proceed with next acc until end of acc is reached. */ + + +.Lslow_next_acc: + clijh %r8,0,.Lslow_next_str /* There was a zero in the last acc-part + -> add index to current len and + end. */ + vlbb %v17,16(%r5,%r3),6 /* Load next accept part. */ + aghi %r5,16 /* Increment current len of accept-string. */ + lcbb %r9,0(%r5,%r3),6 /* Get loaded byte count of accept-string. */ + jo .Lslow_next_acc_onbb /* Jump away ifaccept-string is + on block-boundary. */ +.Lslow_next_acc_notonbb: + vistrfs %v17,%v17 /* Fill with zeros after first zero. */ + jo .Lslow_loop_acc /* No zero found -> no preparation needed. */ + +.Lslow_next_acc_prepare_zero: + /* Zero in accept-part: fill zeros with first-accept-character. */ + vlgvf %r8,%v17,0 /* Load first element of acc-part. */ + clije %r8,0,.Lslow_next_str /* Proceed with next string-part, + If first char in this part of accept + is a zero. */ + /* r8>0 -> zero found in this acc-part. */ + vrepf %v18,%v17,0 /* Replicate first char accross all chars. */ + vceqf %v22,%v20,%v17 /* Create a mask (v22) of null chars + by comparing with 0 (v20). */ + vsel %v17,%v18,%v17,%v22 /* Replace null chars with first char. */ + j .Lslow_loop_acc /* Accept part is prepared -> process. */ + +.Lslow_next_acc_onbb: + nill %r9,65532 /* Recognize only fully loaded characters. */ + je .Lslow_next_acc_onbb2 /* Reload vr, if no full wchar_t. */ + vfenezf %v18,%v17,%v17 /* Find zero in loaded bytes of accept part. */ + vlgvb %r8,%v18,7 /* Load byte index of zero. */ + clrjl %r8,%r9,.Lslow_next_acc_notonbb /* Found a zero in loaded bytes + -> Prepare vreg. */ +.Lslow_next_acc_onbb2: + vl %v17,0(%r5,%r3) /* Load over boundary ... */ + lghi %r8,0 /* r8=0 -> no zero in this part of acc, + check for zero is in jump-target. */ + j .Lslow_next_acc_notonbb /* ... and search for zero in + fully loaded vreg again. */ + +.Lslow_end_null: + lghi %r1,0 /* Return null if no character is equal. */ + j .Lslow_end + +.Lslow_loop_found: + vlgvb %r4,%v18,7 /* Load byte index of found element. */ + srlg %r5,%r4,2 /* Convert byte-index to character-index. */ + vlgvf %r0,%v16,0(%r5) /* Get found element. */ + clije %r0,0,.Lslow_end_null /* Return null if no acc-char found. */ + +.Lslow_index_found: + algfr %r1,%r4 /* Add found index of char to current len. */ +.Lslow_end_found: + la %r1,0(%r1,%r2) /* And return pointer to first equal char. */ + +.Lslow_end: + /* Restore registers. */ + vlgvg %r6,%v30,0 + vlgvg %r8,%v31,0 + vlgvg %r9,%v31,1 + lgr %r2,%r1 + br %r14 +.Lfallback: + jg __wcspbrk_c +END(__wcspbrk_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wcspbrk.c b/sysdeps/s390/multiarch/wcspbrk.c new file mode 100644 index 0000000000..198144d2c5 --- /dev/null +++ b/sysdeps/s390/multiarch/wcspbrk.c @@ -0,0 +1,27 @@ +/* Multiple versions of wcspbrk. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2 (__wcspbrk, wcspbrk) + +#else +# include <wcsmbs/wcspbrk.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/wcsrchr-c.c b/sysdeps/s390/multiarch/wcsrchr-c.c new file mode 100644 index 0000000000..eac588b79e --- /dev/null +++ b/sysdeps/s390/multiarch/wcsrchr-c.c @@ -0,0 +1,25 @@ +/* Default wcsrchr implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCSRCHR __wcsrchr_c + +# include <wchar.h> +extern __typeof (wcsrchr) __wcsrchr_c; +# include <wcsmbs/wcsrchr.c> +#endif diff --git a/sysdeps/s390/multiarch/wcsrchr-vx.S b/sysdeps/s390/multiarch/wcsrchr-vx.S new file mode 100644 index 0000000000..0b99edc7a5 --- /dev/null +++ b/sysdeps/s390/multiarch/wcsrchr-vx.S @@ -0,0 +1,190 @@ +/* Vector optimized 32/64 bit S/390 version of wcsrchr. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* wchar_t *wcsrchr (const wchar_t *s, wchar_t c) + Locate the last character c in string. + + Register usage: + -r0=loaded bytes in first part of s. + -r1=pointer to last occurence of c or NULL if not found. + -r2=s + -r3=c + -r4=tmp + -r5=current_len + -v16=part of s + -v17=index of found element + -v18=replicated c + -v19=part of s with last occurence of c. + -v20=permute pattern +*/ +ENTRY(__wcsrchr_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r0,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + tmll %r2,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + vlvgf %v18,%r3,0 /* Generate vector which elements are all c. */ + vrepf %v18,%v18,0 + + lghi %r1,-1 /* Currently no c found. */ + lghi %r5,0 /* current_len = 0. */ + + vfeezfs %v17,%v16,%v18 /* Find element equal or zero. */ + vlgvb %r4,%v17,7 /* Load byte index of c/zero or 16. */ + clrjl %r4,%r0,.Lfound_first_part /* Found c/zero in loaded bytes. */ +.Lalign: + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,16 /* current_len = 16. */ + slr %r5,%r4 /* Compute bytes to 16bytes boundary. */ + +.Lloop: + vl %v16,0(%r5,%r2) /* Load s. */ + vfeezfs %v17,%v16,%v18 /* Find element equal with zero search. */ + jno .Lfound /* Found c/zero (cc=0|1|2). */ + vl %v16,16(%r5,%r2) + vfeezfs %v17,%v16,%v18 + jno .Lfound16 + vl %v16,32(%r5,%r2) + vfeezfs %v17,%v16,%v18 + jno .Lfound32 + vl %v16,48(%r5,%r2) + vfeezfs %v17,%v16,%v18 + jno .Lfound48 + + aghi %r5,64 + j .Lloop /* No character and no zero -> loop. */ + +.Lfound48: + la %r5,16(%r5) /* Use la since aghi would clobber cc. */ +.Lfound32: + la %r5,16(%r5) +.Lfound16: + la %r5,16(%r5) +.Lfound: + je .Lzero /* Found zero, but no c before that zero. */ + /* Save this part of s to check for further matches after reaching + the end of the complete string. */ + vlr %v19,%v16 + lgr %r1,%r5 + + jh .Lzero /* Found a zero after the found c. */ + aghi %r5,16 /* Start search of next part of s. */ + j .Lloop + +.Lfound_first_part: + /* This code is only executed if the found c/zero is whithin loaded + bytes. If no c/zero was found (cc==3) the found index = 16, thus + this code is not called. + Resulting condition code of vector find element equal: + cc==0: no c, found zero + cc==1: c found, no zero + cc==2: c found, found zero after c + cc==3: no c, no zero (this case can be ignored). */ + je .Lzero /* Found zero, but no c before that zero. */ + + locgrne %r1,%r5 /* Mark c as found in first part of s. */ + vlr %v19,%v16 + + jl .Lalign /* No zero (e.g. if vr was fully loaded) + -> Align and loop afterwards. */ + + /* Found a zero in vr. If vr was not fully loaded due to block + boundary, the remaining bytes are filled with zero and we can't + rely on zero indication of condition code here! */ + + vfenezf %v17,%v16,%v16 + vlgvb %r4,%v17,7 /* Load byte index of zero or 16. */ + clrjl %r4,%r0,.Lzero /* Zero within loaded bytes -> end. */ + j .Lalign /* Align and loop afterwards. */ + +.Lend_searched_zero: + vlgvb %r4,%v17,7 /* Load byte index of zero. */ + algr %r5,%r4 + la %r2,0(%r5,%r2) /* Return pointer to zero. */ + br %r14 + +.Lzero: + /* Reached end of string. Check if one c was found before. */ + clije %r3,0,.Lend_searched_zero /* Found zero and c is zero. */ + + cgfi %r1,-1 /* No c found -> return NULL. */ + locghie %r2,0 + ber %r14 + + larl %r3,.Lpermute_mask /* Load permute mask. */ + vl %v20,0(%r3) + + /* c was found and is part of v19. */ + vfenezf %v17,%v19,%v19 /* Find zero. */ + vlgvb %r4,%v17,7 /* Load byte index of zero or 16. */ + ahi %r4,3 /* Found zero index is first byte, + thus highest byte index is last byte of + wchar_t zero. */ + + clgfi %r5,0 /* Loaded byte count in v19 is 16, ... */ + lochine %r0,16 /* ... if v19 is not the first part of s. */ + ahi %r0,-1 /* Convert byte count to highest index. */ + + clr %r0,%r4 + locrl %r4,%r0 /* r4 = min (zero-index, highest-index). */ + + /* Right-shift of v19 to mask bytes after zero. */ + clije %r4,15,.Lzero_permute /* No shift is needed if highest index + in vr is 15. */ + lhi %r0,15 + slr %r0,%r4 /* Compute byte count for vector shift left. */ + sll %r0,3 /* Convert to bit count. */ + vlvgb %v17,%r0,7 + vsrlb %v19,%v19,%v17 /* Vector shift right by byte by number of bytes + specified in bits 1-4 of byte 7 in v17. */ + + /* Reverse bytes in v19. */ +.Lzero_permute: + vperm %v19,%v19,%v19,%v20 /* Permute v19 to reversed order. */ + + /* Find c in reversed v19. */ + vfeef %v19,%v19,%v18 /* Find c. */ + la %r2,0(%r1,%r2) + vlgvb %r3,%v19,7 /* Load byte index of c. */ + + /* Compute index in real s and return. */ + slgr %r4,%r3 + lay %r2,-3(%r4,%r2) /* Return pointer to zero. -3 is needed, + because the found byte index is reversed in + vector-register. Thus point to first byte of + wchar_t. */ + br %r14 +.Lpermute_mask: + .byte 0x0C,0x0D,0x0E,0x0F,0x08,0x09,0x0A,0x0B + .byte 0x04,0x05,0x06,0x07,0x00,0x01,0x02,0x03 +.Lfallback: + jg __wcsrchr_c +END(__wcsrchr_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wcsrchr.c b/sysdeps/s390/multiarch/wcsrchr.c new file mode 100644 index 0000000000..9281e12898 --- /dev/null +++ b/sysdeps/s390/multiarch/wcsrchr.c @@ -0,0 +1,27 @@ +/* Multiple versions of wcsrchr. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2 (__wcsrchr, wcsrchr) + +#else +# include <wcsmbs/wcsrchr.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/wcsspn-c.c b/sysdeps/s390/multiarch/wcsspn-c.c new file mode 100644 index 0000000000..54c2698bd8 --- /dev/null +++ b/sysdeps/s390/multiarch/wcsspn-c.c @@ -0,0 +1,31 @@ +/* Default wcsspn implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCSSPN __wcsspn_c + +# include <wchar.h> +extern __typeof (wcsspn) __wcsspn_c; +# ifdef SHARED +# undef libc_hidden_def +# define libc_hidden_def(name) \ + __hidden_ver1 (__wcsspn_c, __GI_wcsspn, __wcsspn_c); +# endif /* SHARED */ + +# include <wcsmbs/wcsspn.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wcsspn-vx.S b/sysdeps/s390/multiarch/wcsspn-vx.S new file mode 100644 index 0000000000..e1785ea7cf --- /dev/null +++ b/sysdeps/s390/multiarch/wcsspn-vx.S @@ -0,0 +1,270 @@ +/* Vector optimized 32/64 bit S/390 version of wcsspn. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* size_t wcsspn (const wchar_t *s, const wchar_t * accept) + The wcsspn() function calculates the length of the initial segment + of s which consists entirely of characters in accept. + + This method checks the length of accept string. If it fits entirely + in one vector register, a fast algorithm is used, which does not need + to check multiple parts of accept-string. Otherwise a slower full + check of accept-string is used. + + register overview: + r3: pointer to start of accept-string + r2: pointer to start of search-string + r4: loaded byte count of vl search-string + r0: found byte index + r1: current return len of s + v16: search-string + v17: accept-string + v18: temp-vreg + + ONLY FOR SLOW: + v19: first accept-string + v20: zero for preparing acc-vector + v21: global mask; 1 indicates a match between + search-string-vreg and any accept-character + v22: current mask; 1 indicates a match between + search-string-vreg and any accept-character in current acc-vreg + v30, v31: for re-/storing registers r6, r8, r9 + r5: current len of accept-string + r6: zero-index in search-string or 16 if no zero + or min(zero-index, loaded byte count) + r8: >0, if former accept-string-part contains a zero, + otherwise =0; + r9: loaded byte count of vlbb accept-string +*/ +ENTRY(__wcsspn_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + tmll %r2,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + /* + Check if accept-string fits in one vreg: + ---------------------------------------- + */ + vlbb %v17,0(%r3),6 /* Load accept. */ + lcbb %r4,0(%r3),6 + jo .Lcheck_onbb /* Special case if accept lays + on block-boundary. */ +.Lcheck_notonbb: + vistrfs %v17,%v17 /* Fill with zeros after first zero. */ + je .Lfast /* Zero found -> accept fits in one vreg. */ + j .Lslow /* No zero -> accept exceeds one vreg. */ + +.Lcheck_onbb: + /* Accept lays on block-boundary. */ + nill %r4,65532 /* Recognize only fully loaded characters. */ + je .Lcheck_onbb2 /* Reload vr if no full wchar_t. */ + vfenezf %v18,%v17,%v17 /* Search zero in loaded accept bytes. */ + vlgvb %r0,%v18,7 /* Get index of zero or 16 if not found. */ + clrjl %r0,%r4,.Lcheck_notonbb /* Zero index < loaded bytes count -> + Accept fits in one vreg; + Fill with zeros and proceed + with FAST. */ +.Lcheck_onbb2: + vl %v17,0(%r3) /* Load accept, which exceeds loaded bytes. */ + j .Lcheck_notonbb /* Check if accept fits in one vreg. */ + + + /* + Search s for accept in one vreg + ------------------------------- + */ +.Lfast: + /* Complete accept-string in v17 and remaining bytes are zero. */ + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + vfaezfs %v16,%v16,%v17,8 /* Find first element in v16 + unequal to any in v17 + or first zero element. */ + + vlgvb %r0,%v16,7 /* Load byte index of found element. */ + /* If found index is within loaded bytes (%r0 < %r1), + return with found element index (=equal count). */ + clr %r0,%r1 + srlg %r0,%r0,2 /* Convert byte-count to character-count. */ + locgrl %r2,%r0 + blr %r14 + + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r1,16 /* current_len = 16. */ + slr %r1,%r4 /* Compute bytes to 16bytes boundary. */ + +.Lfast_loop: + vl %v16,0(%r1,%r2) /* Load search-string. */ + vfaezfs %v16,%v16,%v17,8 /* Find first element in v16 + unequal to any in v17 + or first zero element. */ + jno .Lfast_loop_found + vl %v16,16(%r1,%r2) + vfaezfs %v16,%v16,%v17,8 + jno .Lfast_loop_found16 + vl %v16,32(%r1,%r2) + vfaezfs %v16,%v16,%v17,8 + jno .Lfast_loop_found32 + vl %v16,48(%r1,%r2) + vfaezfs %v16,%v16,%v17,8 + jno .Lfast_loop_found48 + + aghi %r1,64 + j .Lfast_loop /* Loop if no element was unequal to accept + and not zero. */ + + /* Found unequal or zero element. */ +.Lfast_loop_found48: + aghi %r1,16 +.Lfast_loop_found32: + aghi %r1,16 +.Lfast_loop_found16: + aghi %r1,16 +.Lfast_loop_found: + vlgvb %r0,%v16,7 /* Load byte index of found element. */ + algrk %r2,%r1,%r0 /* And add it to current len. */ + srlg %r2,%r2,2 /* Convert byte-count to character-count. */ + br %r14 + + + /* + Search s for accept in multiple vregs + ------------------------------------- + */ +.Lslow: + /* Save registers. */ + vlvgg %v30,%r6,0 + vlvgp %v31,%r8,%r9 + lghi %r1,0 /* Zero out current len. */ + + /* accept in v17 without zero. */ + vlr %v19,%v17 /* Save first acc-part for a fast reload. */ + vzero %v20 /* Zero for preparing acc-vector. */ + + /* Align s to 16 byte. */ + risbg %r0,%r2,60,128+63,0 /* Test if s is aligned and + %r0 = bits 60-63 'and' 15. */ + je .Lslow_loop_str /* If s is aligned, loop aligned */ + lghi %r4,15 + slr %r4,%r0 /* Compute highest index to load (15-x). */ + vll %v16,%r4,0(%r2) /* Load up to 16byte boundary (vll needs + highest index, remaining bytes are 0). */ + aghi %r4,1 /* Work with loaded byte count. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of accept-string to zero. */ + vfenezf %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first accept-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 + if there is no zero. */ + clr %r4,%r6 /* cc==1 if loaded byte count < zero-index. */ + locrl %r6,%r4 /* Load on cc==1. */ + j .Lslow_loop_acc + + /* Process s in 16byte aligned loop. */ +.Lslow_next_str: + vlr %v17,%v19 /* Load first part of accept (no zero). */ + algfr %r1,%r4 /* Add loaded byte count to current len. */ +.Lslow_loop_str: + vl %v16,0(%r1,%r2) /* Load search-string. */ + lghi %r4,16 /* Loaded byte count is 16. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of accept-string to zero. */ + vfenezf %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first accept-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ + +.Lslow_loop_acc: + vfaef %v22,%v16,%v17,4 /* Create matching-mask (1 in mask -> + character matches any accepted character in + this accept-string-part) IN=0, RT=1. */ + vo %v21,%v21,%v22 /* global-mask = global- | matching-mask. */ + vfenezf %v18,%v21,%v21 /* Find first zero in global-mask. */ + vlgvb %r0,%v18,7 /* Get first found zero-index + (= first mismatch). */ + clrjl %r0,%r6,.Lslow_next_acc /* Mismatch-index < min(lbc,zero-index) + -> Process this string-part + with next acc-part. */ + clrjhe %r0,%r4,.Lslow_next_str /* Found-index >= loaded byte count + -> All loaded bytes are matching + any accept-character + and are not zero. */ + /* All bytes are matching any characters in accept-string + and search-string is fully processed (found-index == zero-index). */ +.Lslow_add_lbc_end: + algrk %r2,%r1,%r0 /* Add matching characters to current len. */ + srlg %r2,%r2,2 /* Convert byte-count to character-count. */ + /* Restore registers. */ + vlgvg %r6,%v30,0 + vlgvg %r8,%v31,0 + vlgvg %r9,%v31,1 + br %r14 + +.Lslow_next_acc: + clijh %r8,0,.Lslow_add_lbc_end /* There was a zero in last acc-part + -> Add found index to current len + and end. */ + vlbb %v17,16(%r5,%r3),6 /* Load next accept part. */ + aghi %r5,16 /* Increment current len of accept-string. */ + lcbb %r9,0(%r5,%r3),6 /* Get loaded byte count of accept-string. */ + jo .Lslow_next_acc_onbb /* Jump away if accept-string is + on block-boundary. */ +.Lslow_next_acc_notonbb: + vistrfs %v17,%v17 /* Fill with zeros after first zero. */ + jo .Lslow_loop_acc /* No zero found -> no preparation needed. */ + +.Lslow_next_acc_prepare_zero: + /* Zero in accept-part: fill zeros with first-accept-character. */ + vlgvf %r8,%v17,0 /* Load first element of acc-part. */ + clije %r8,0,.Lslow_add_lbc_end /* End if zero is first character + in this part of accept-string. */ + /* r8>0 -> zero found in this acc-part. */ + vrepf %v18,%v17,0 /* Replicate first char accross all chars. */ + vceqf %v22,%v20,%v17 /* Create a mask (v22) of null chars + by comparing with 0 (v20). */ + vsel %v17,%v18,%v17,%v22 /* Replace null chars with first char. */ + j .Lslow_loop_acc /* Accept part is prepared -> process. */ + +.Lslow_next_acc_onbb: + nill %r9,65532 /* Recognize only fully loaded characters. */ + je .Lslow_next_acc_onbb2 /* Reload vr, if we loaded no full + wchar_t. */ + vfenezf %v18,%v17,%v17 /* Find zero in loaded bytes of accept part. */ + vlgvb %r8,%v18,7 /* Load byte index of zero. */ + clrjl %r8,%r9,.Lslow_next_acc_notonbb /* Found a zero in loaded bytes + -> Prepare vreg. */ +.Lslow_next_acc_onbb2: + vl %v17,0(%r5,%r3) /* Load over boundary ... */ + lghi %r8,0 /* r8=0 -> no zero in this part of acc, + check for zero is in jump-target. */ + j .Lslow_next_acc_notonbb /* ... and search for zero in + fully loaded vreg again. */ +.Lfallback: + jg __wcsspn_c +END(__wcsspn_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wcsspn.c b/sysdeps/s390/multiarch/wcsspn.c new file mode 100644 index 0000000000..167a881d13 --- /dev/null +++ b/sysdeps/s390/multiarch/wcsspn.c @@ -0,0 +1,27 @@ +/* Multiple versions of wcsspn. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2 (__wcsspn, wcsspn) + +#else +# include <wcsmbs/wcsspn.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/wmemchr-c.c b/sysdeps/s390/multiarch/wmemchr-c.c new file mode 100644 index 0000000000..32dddc6c3d --- /dev/null +++ b/sysdeps/s390/multiarch/wmemchr-c.c @@ -0,0 +1,37 @@ +/* Default wmemchr implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WMEMCHR __wmemchr_c + +# include <wchar.h> +extern __typeof (wmemchr) __wmemchr_c; +# undef weak_alias +# define weak_alias(name, alias) +# ifdef SHARED +# undef libc_hidden_def +# define libc_hidden_def(name) \ + __hidden_ver1 (__wmemchr_c, __GI___wmemchr, __wmemchr_c); +# undef libc_hidden_weak +# define libc_hidden_weak(name) \ + strong_alias (__wmemchr_c, __wmemchr_c_1); \ + __hidden_ver1 (__wmemchr_c_1, __GI_wmemchr, __wmemchr_c_1); +# endif /* SHARED */ + +# include <wcsmbs/wmemchr.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wmemchr-vx.S b/sysdeps/s390/multiarch/wmemchr-vx.S new file mode 100644 index 0000000000..a729681341 --- /dev/null +++ b/sysdeps/s390/multiarch/wmemchr-vx.S @@ -0,0 +1,166 @@ +/* Vector optimized 32/64 bit S/390 version of wmemchr. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* wchar_t *wmemchr (const wchar_t *s, wchar_t c, size_t n) + Scans memory for character c + and returns pointer to first c. + + Register usage: + -r0=tmp + -r1=tmp + -r2=s + -r3=c + -r4=n + -r5=current_len + -v16=part of s + -v17=index of found c + -v18=c replicated +*/ +ENTRY(__wmemchr_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r4,%r4 +# endif /* !defined __s390x__ */ + + clgije %r4,0,.Lnf_end /* If len == 0 then exit. */ + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r0,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + llgfr %r0,%r0 /* Convert 32bit to 64bit. */ + + tmll %r2,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + vlvgf %v18,%r3,0 /* Generate vector which elements are all c. */ + vrepf %v18,%v18,0 + lghi %r5,16 /* current_len = 16. */ + + /* Check range of maxlen and convert to byte-count. */ +# ifdef __s390x__ + tmhh %r4,49152 /* Test bit 0 or 1 of maxlen. */ + lghi %r1,-4 /* Max byte-count is 18446744073709551612. */ +# else + tmlh %r4,49152 /* Test bit 0 or 1 of maxlen. */ + llilf %r1,4294967292 /* Max byte-count is 4294967292. */ +# endif /* !__s390x__ */ + sllg %r4,%r4,2 /* Convert character-count to byte-count. */ + locgrne %r4,%r1 /* Use max byte-count, if bit 0/1 was one. */ + + clgrjhe %r0,%r4,.Llastcmp /* If (bytes to boundary) >= n, + jump to lastcmp. */ + + vfeefs %v17,%v16,%v18 /* Find c. */ + vlgvb %r1,%v17,7 /* Load byte index of c. */ + clgrjl %r1,%r0,.Lfound2 /* Found c is within loaded bytes. */ + + /* Align s to 16 byte. */ + risbgn %r1,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + slr %r5,%r1 /* Compute bytes to 16bytes boundary. */ + + lgr %r0,%r5 /* If %r5 + 64 < n? -> loop64. */ + aghi %r0,64 + clgrjl %r0,%r4,.Lloop64 +.Llt64: + vl %v16,0(%r5,%r2) + aghi %r5,16 + clgrjhe %r5,%r4,.Llastcmp /* Do last compare if curr-len >= n. */ + vfeefs %v17,%v16,%v18 /* Find c. */ + jl .Lfound /* Jump away if c was found. */ + + vl %v16,0(%r5,%r2) + aghi %r5,16 + clgrjhe %r5,%r4,.Llastcmp + vfeefs %v17,%v16,%v18 + jl .Lfound + + vl %v16,0(%r5,%r2) + aghi %r5,16 + clgrjhe %r5,%r4,.Llastcmp + vfeefs %v17,%v16,%v18 + jl .Lfound + + vl %v16,0(%r5,%r2) + aghi %r5,16 + +.Llastcmp: + /* Use comparision result only if located within first n characters. + %r5: current_len; + %r4: n; + (current_len - n): [0...16[ + first ignored match index = vr-width - (current_len - n) ]0...16] + */ + vfeefs %v17,%v16,%v18 /* Find c. */ + slgrk %r4,%r5,%r4 /* %r5 = current_len - n. */ + lghi %r0,16 /* Register width = 16. */ + vlgvb %r1,%v17,7 /* Extract found index or 16 if all equal. */ + slr %r0,%r4 /* %r0 = first ignored match index. */ + clrjl %r1,%r0,.Lfound2 /* Go away if miscompare is below n bytes. */ + /* c not found within n-bytes. */ +.Lnf_end: + lghi %r2,0 /* Return null. */ + br %r14 + +.Lfound48: + aghi %r5,16 +.Lfound32: + aghi %r5,16 +.Lfound16: + aghi %r5,16 +.Lfound0: + aghi %r5,16 +.Lfound: + vlgvb %r1,%v17,7 /* Load byte index of c. */ +.Lfound2: + slgfi %r5,16 /* current_len -=16 */ + algr %r5,%r1 /* Zero byte index is added to current len. */ + la %r2,0(%r5,%r2) /* Return pointer to c. */ + br %r14 + +.Lloop64: + vl %v16,0(%r5,%r2) + vfeefs %v17,%v16,%v18 /* Find c. */ + jl .Lfound0 /* Jump away if c was found. */ + vl %v16,16(%r5,%r2) + vfeefs %v17,%v16,%v18 + jl .Lfound16 + vl %v16,32(%r5,%r2) + vfeefs %v17,%v16,%v18 + jl .Lfound32 + vl %v16,48(%r5,%r2) + vfeefs %v17,%v16,%v18 + jl .Lfound48 + + aghi %r5,64 + lgr %r0,%r5 /* If %r5 + 64 < n? -> loop64. */ + aghi %r0,64 + clgrjl %r0,%r4,.Lloop64 + + j .Llt64 +.Lfallback: + jg __wmemchr_c +END(__wmemchr_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wmemchr.c b/sysdeps/s390/multiarch/wmemchr.c new file mode 100644 index 0000000000..f2bfe3c7a5 --- /dev/null +++ b/sysdeps/s390/multiarch/wmemchr.c @@ -0,0 +1,29 @@ +/* Multiple versions of wmemchr. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc (__wmemchr) +weak_alias (__wmemchr, wmemchr) +libc_hidden_weak (wmemchr) + +#else +# include <wcsmbs/wmemchr.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/wmemcmp-c.c b/sysdeps/s390/multiarch/wmemcmp-c.c new file mode 100644 index 0000000000..683385431e --- /dev/null +++ b/sysdeps/s390/multiarch/wmemcmp-c.c @@ -0,0 +1,26 @@ +/* Default wmemcmp implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WMEMCMP __wmemcmp_c + +# include <wchar.h> +extern __typeof (wmemcmp) __wmemcmp_c; + +# include <wcsmbs/wmemcmp.c> +#endif diff --git a/sysdeps/s390/multiarch/wmemcmp-vx.S b/sysdeps/s390/multiarch/wmemcmp-vx.S new file mode 100644 index 0000000000..761cc17771 --- /dev/null +++ b/sysdeps/s390/multiarch/wmemcmp-vx.S @@ -0,0 +1,149 @@ +/* Vector Optimized 32/64 bit S/390 version of wmemcmp. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* int wmemcmp (const wchar_t *s1, const wchar_t *s2, size_t n) + Compare at most n characters of two wchar_t-arrays. + + Register usage: + -r0=tmp + -r1=number of blocks + -r2=s1 + -r3=s2 + -r4=n + -r5=current_len + -v16=part of s1 + -v17=part of s2 + -v18=index of unequal +*/ +ENTRY(__wmemcmp_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r4,%r4 +# endif /* !defined __s390x__ */ + clgije %r4,0,.Lend_equal /* Nothing to do if n == 0. */ + + /* Check range of maxlen and convert to byte-count. */ +# ifdef __s390x__ + tmhh %r4,49152 /* Test bit 0 or 1 of maxlen. */ + lghi %r1,-4 /* Max byte-count is 18446744073709551612. */ +# else + tmlh %r4,49152 /* Test bit 0 or 1 of maxlen. */ + llilf %r1,4294967292 /* Max byte-count is 4294967292. */ +# endif /* !__s390x__ */ + sllg %r4,%r4,2 /* Convert character-count to byte-count. */ + locgrne %r4,%r1 /* Use max byte-count, if bit 0/1 was one. */ + + lghi %r5,0 /* current_len = 0. */ + + clgijh %r4,16,.Lgt16 + +.Lremaining: + aghi %r4,-1 /* vstl needs highest index. */ + vll %v16,%r4,0(%r2) + vll %v17,%r4,0(%r3) + vfenef %v18,%v16,%v17 /* Compare not equal. */ + vlgvb %r1,%v18,7 /* Load unequal index or 16 if not found. */ + clrj %r1,%r4,12,.Lfound2 /* r1 <= r4 -> unequal within loaded + bytes. */ + +.Lend_equal: + lghi %r2,0 + br %r14 + +.Lfound: + /* vfenezf found an unequal element or zero. + This instruction compares unsigned words, but wchar_t is signed. + Thus we have to compare the found element again. */ + vlgvb %r1,%v18,7 /* Extract not equal byte-index. */ +.Lfound2: + srl %r1,2 /* And convert it to character-index. */ + vlgvf %r0,%v16,0(%r1) /* Load character-values. */ + vlgvf %r1,%v17,0(%r1) + cr %r0,%r1 + je .Lend_equal + lghi %r2,1 + lghi %r1,-1 + locgrl %r2,%r1 + br %r14 + +.Lgt16: + clgijh %r4,64,.Lpreloop64 + +.Lpreloop16: + srlg %r1,%r4,4 /* Split into 16byte blocks */ +.Lloop16: + vl %v16,0(%r5,%r2) + vl %v17,0(%r5,%r3) + aghi %r5,16 + vfenefs %v18,%v16,%v17 /* Compare not equal. */ + jno .Lfound + brctg %r1,.Lloop16 /* Loop until all blocks are processed. */ + + llgfr %r4,%r4 + nilf %r4,15 /* Get remaining bytes */ + locgre %r2,%r4 + ber %r14 + la %r2,0(%r5,%r2) + la %r3,0(%r5,%r3) + j .Lremaining + +.Lpreloop64: + srlg %r1,%r4,6 /* Split into 64byte blocks */ +.Lloop64: + vl %v16,0(%r5,%r2) + vl %v17,0(%r5,%r3) + vfenefs %v18,%v16,%v17 /* Compare not equal. */ + jno .Lfound + + vl %v16,16(%r5,%r2) + vl %v17,16(%r5,%r3) + vfenefs %v18,%v16,%v17 + jno .Lfound + + vl %v16,32(%r5,%r2) + vl %v17,32(%r5,%r3) + vfenefs %v18,%v16,%v17 + jno .Lfound + + vl %v16,48(%r5,%r2) + vl %v17,48(%r5,%r3) + aghi %r5,64 + vfenefs %v18,%v16,%v17 + jno .Lfound + + brctg %r1,.Lloop64 /* Loop until all blocks are processed. */ + + llgfr %r4,%r4 + nilf %r4,63 /* Get remaining bytes */ + locgre %r2,%r4 + ber %r14 + clgijh %r4,16,.Lpreloop16 + la %r2,0(%r5,%r2) + la %r3,0(%r5,%r3) + j .Lremaining +END(__wmemcmp_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wmemcmp.c b/sysdeps/s390/multiarch/wmemcmp.c new file mode 100644 index 0000000000..95106fcaf9 --- /dev/null +++ b/sysdeps/s390/multiarch/wmemcmp.c @@ -0,0 +1,27 @@ +/* Multiple versions of wmemcmp. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2 (__wmemcmp, wmemcmp) + +#else +# include <wcsmbs/wmemcmp.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/wmemset-c.c b/sysdeps/s390/multiarch/wmemset-c.c new file mode 100644 index 0000000000..61ccd8fc09 --- /dev/null +++ b/sysdeps/s390/multiarch/wmemset-c.c @@ -0,0 +1,37 @@ +/* Default wmemset implementation for S/390. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WMEMSET __wmemset_c + +# include <wchar.h> +extern __typeof (__wmemset) __wmemset_c; +# undef weak_alias +# define weak_alias(name, alias) +# ifdef SHARED +# undef libc_hidden_def +# define libc_hidden_def(name) \ + __hidden_ver1 (__wmemset_c, __GI___wmemset, __wmemset_c); +# undef libc_hidden_weak +# define libc_hidden_weak(name) \ + strong_alias (__wmemset_c, __wmemset_c_1); \ + __hidden_ver1 (__wmemset_c_1, __GI_wmemset, __wmemset_c_1); +# endif /* SHARED */ + +# include <wcsmbs/wmemset.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wmemset-vx.S b/sysdeps/s390/multiarch/wmemset-vx.S new file mode 100644 index 0000000000..7a28bb4ca6 --- /dev/null +++ b/sysdeps/s390/multiarch/wmemset-vx.S @@ -0,0 +1,142 @@ +/* Vector Optimized 32/64 bit S/390 version of wmemset. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* wchar_t *wmemset(wchar_t *dest, wchar_t wc, size_t n) + Fill an array of wide-characters with a constant wide character + and returns dest. + + Register usage: + -r0=tmp + -r1=tmp + -r2=dest or current-pointer + -r3=wc + -r4=n + -r5=tmp + -v16=replicated wc + -v17,v18,v19=copy of v16 for vstm + -v31=saved dest for return +*/ +ENTRY(__wmemset_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r4,%r4 +# endif /* !defined __s390x__ */ + + vlvgg %v31,%r2,0 /* Save destination pointer for return. */ + clgije %r4,0,.Lend + + vlvgf %v16,%r3,0 /* Generate vector with wchar_t wc. */ + vrepf %v16,%v16,0 + + /* Check range of maxlen and convert to byte-count. */ +# ifdef __s390x__ + tmhh %r4,49152 /* Test bit 0 or 1 of maxlen. */ + lghi %r5,-4 /* Max byte-count is 18446744073709551612. */ +# else + tmlh %r4,49152 /* Test bit 0 or 1 of maxlen. */ + llilf %r5,4294967292 /* Max byte-count is 4294967292. */ +# endif /* !__s390x__ */ + sllg %r4,%r4,2 /* Convert character-count to byte-count. */ + locgrne %r4,%r5 /* Use max byte-count, if bit 0/1 was one. */ + + /* Align dest to 16 byte. */ + risbg %r0,%r2,60,128+63,0 /* Test if s is aligned and + %r3 = bits 60-63 'and' 15. */ + je .Lpreloop /* If s is aligned, loop aligned. */ + tmll %r2,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + lghi %r1,16 + slr %r1,%r0 /* Compute byte count to load (16-x). */ + clgr %r1,%r4 + locgrh %r1,%r4 /* min (byte count, n) */ + aghik %r5,%r1,-1 /* vstl needs highest index. */ + vstl %v16,%r5,0(%r2) /* Store remaining bytes. */ + clgrje %r1,%r4,.Lend /* Return if n bytes where set. */ + slgr %r4,%r1 /* Compute remaining byte count. */ + la %r2,0(%r1,%r2) + +.Lpreloop: + /* Now we are 16-byte aligned. */ + clgijl %r4,17,.Lremaining + srlg %r1,%r4,8 /* Split into 256byte blocks */ + clgije %r1,0,.Lpreloop64 + vlr %v17,%v16 + vlr %v18,%v16 + vlr %v19,%v16 + +.Lloop256: + vstm %v16,%v19,0(%r2) + vstm %v16,%v19,64(%r2) + vstm %v16,%v19,128(%r2) + vstm %v16,%v19,192(%r2) + la %r2,256(%r2) + brctg %r1,.Lloop256 /* Loop until all blocks are processed. */ + + llgfr %r4,%r4 + nilf %r4,255 /* Get remaining bytes */ + je .Lend /* Skip store remaining bytes if zero. */ + +.Lpreloop64: + clgijl %r4,17,.Lremaining + clgijl %r4,33,.Lpreloop16 + srlg %r1,%r4,5 /* Split into 32byte blocks */ + +.Lloop32: + vst %v16,0(%r2) + vst %v16,16(%r2) + la %r2,32(%r2) + brctg %r1,.Lloop32 /* Loop until all blocks are processed. */ + + llgfr %r4,%r4 + nilf %r4,31 /* Get remaining bytes */ + je .Lend /* Skip store remaining bytes if zero. */ + +.Lpreloop16: + clgijl %r4,17,.Lremaining + srlg %r1,%r4,4 /* Split into 16byte blocks */ + +.Lloop16: + vst %v16,0(%r2) + la %r2,16(%r2) + brctg %r1,.Lloop16 /* Loop until all blocks are processed. */ + + llgfr %r4,%r4 + nilf %r4,15 /* Get remaining bytes */ + je .Lend /* Skip store remaining bytes if zero. */ + +.Lremaining: + aghi %r4,-1 /* vstl needs highest index. */ + vstl %v16,%r4,0(%r2) + +.Lend: + vlgvg %r2,%v31,0 /* Load saved dest for return value. */ + br %r14 +.Lfallback: + srlg %r4,%r4,2 /* Convert byte-count to character-count. */ + jg __wmemset_c +END(__wmemset_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wmemset.c b/sysdeps/s390/multiarch/wmemset.c new file mode 100644 index 0000000000..e9e695fc0a --- /dev/null +++ b/sysdeps/s390/multiarch/wmemset.c @@ -0,0 +1,29 @@ +/* Multiple versions of wmemset. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc (__wmemset) +weak_alias (__wmemset, wmemset) +libc_hidden_weak (wmemset) + +#else +# include <wcsmbs/wmemset.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ |