diff options
Diffstat (limited to 'sysdeps/sparc/sparc64/multiarch')
49 files changed, 1866 insertions, 602 deletions
diff --git a/sysdeps/sparc/sparc64/multiarch/Makefile b/sysdeps/sparc/sparc64/multiarch/Makefile index 55b757f9ad..eaf758e7aa 100644 --- a/sysdeps/sparc/sparc64/multiarch/Makefile +++ b/sysdeps/sparc/sparc64/multiarch/Makefile @@ -8,11 +8,15 @@ endif ifeq ($(subdir),string) sysdep_routines += memcpy-ultra3 memcpy-niagara1 memcpy-niagara2 \ - memset-niagara1 memcpy-niagara4 memset-niagara4 + memset-niagara1 memcpy-niagara4 memset-niagara4 \ + memcpy-ultra1 memset-ultra1 memcpy-memmove-niagara7 \ + memmove-ultra1 memset-niagara7 endif ifeq ($(subdir),stdlib) -sysdep_routines += mul_1-vis3 addmul_1-vis3 submul_1-vis3 add_n-vis3 sub_n-vis3 +sysdep_routines += mul_1-vis3 mul_1-generic addmul_1-vis3 addmul_1-generic \ + submul_1-vis3 submul_1-generic add_n-vis3 add_n-generic \ + sub_n-vis3 sub_n-generic endif ifeq ($(subdir),math) diff --git a/sysdeps/sparc/sparc64/multiarch/add_n-generic.S b/sysdeps/sparc/sparc64/multiarch/add_n-generic.S new file mode 100644 index 0000000000..a16e7091b4 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/add_n-generic.S @@ -0,0 +1,2 @@ +#define __mpn_add_n __mpn_add_n_generic +#include <sysdeps/sparc/sparc64/add_n.S> diff --git a/sysdeps/sparc/sparc64/multiarch/add_n-vis3.S b/sysdeps/sparc/sparc64/multiarch/add_n-vis3.S index 0fda45a208..b4f1ef5181 100644 --- a/sysdeps/sparc/sparc64/multiarch/add_n-vis3.S +++ b/sysdeps/sparc/sparc64/multiarch/add_n-vis3.S @@ -1,7 +1,7 @@ ! SPARC v9 64-bit VIS3 __mpn_add_n -- Add two limb vectors of the same length > 0 and ! store sum in a third limb vector. ! -! Copyright (C) 2013-2016 Free Software Foundation, Inc. +! Copyright (C) 2013-2018 Free Software Foundation, Inc. ! This file is part of the GNU C Library. ! Contributed by David S. Miller <davem@davemloft.net> ! diff --git a/sysdeps/sparc/sparc64/multiarch/add_n.S b/sysdeps/sparc/sparc64/multiarch/add_n.S deleted file mode 100644 index 8e67d75921..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/add_n.S +++ /dev/null @@ -1,56 +0,0 @@ -/* Multiple versions of add_n - - Copyright (C) 2013-2016 Free Software Foundation, Inc. - Contributed by David S. Miller (davem@davemloft.net) - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -ENTRY(__mpn_add_n) - .type __mpn_add_n, @gnu_indirect_function -# ifdef SHARED - SETUP_PIC_REG_LEAF(o3, o5) -# endif - set HWCAP_SPARC_VIS3, %o1 - andcc %o0, %o1, %g0 - be 1f - nop -# ifdef SHARED - sethi %gdop_hix22(__mpn_add_n_vis3), %o1 - xor %o1, %gdop_lox10(__mpn_add_n_vis3), %o1 -# else - set __mpn_add_n_vis3, %o1 -# endif - ba 10f - nop -1: -# ifdef SHARED - sethi %gdop_hix22(__mpn_add_n_generic), %o1 - xor %o1, %gdop_lox10(__mpn_add_n_generic), %o1 -# else - set __mpn_add_n_generic, %o1 -# endif -10: -# ifdef SHARED - add %o3, %o1, %o1 -# endif - retl - mov %o1, %o0 -END(__mpn_add_n) - -#define __mpn_add_n __mpn_add_n_generic -#include "../add_n.S" diff --git a/sysdeps/sparc/sparc64/multiarch/add_n.c b/sysdeps/sparc/sparc64/multiarch/add_n.c new file mode 100644 index 0000000000..47b0d0e3bc --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/add_n.c @@ -0,0 +1,28 @@ +/* __mpn_add_n ifunc resolver, Linux/sparc64 version. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <gmp.h> +#include <sparc-ifunc.h> + +extern __typeof (mpn_add_n) __mpn_add_n_vis3 attribute_hidden; +extern __typeof (mpn_add_n) __mpn_add_n_generic attribute_hidden; + +sparc_libm_ifunc (__mpn_add_n, + hwcap & HWCAP_SPARC_VIS3 + ? __mpn_add_n_vis3 + : __mpn_add_n_generic) diff --git a/sysdeps/sparc/sparc64/multiarch/addmul_1-generic.S b/sysdeps/sparc/sparc64/multiarch/addmul_1-generic.S new file mode 100644 index 0000000000..5bf1da7fde --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/addmul_1-generic.S @@ -0,0 +1,2 @@ +#define __mpn_addmul_1 __mpn_addmul_1_generic +#include <sysdeps/sparc/sparc64/addmul_1.S> diff --git a/sysdeps/sparc/sparc64/multiarch/addmul_1-vis3.S b/sysdeps/sparc/sparc64/multiarch/addmul_1-vis3.S index 9a2f6acd9c..b5e808bddd 100644 --- a/sysdeps/sparc/sparc64/multiarch/addmul_1-vis3.S +++ b/sysdeps/sparc/sparc64/multiarch/addmul_1-vis3.S @@ -1,7 +1,7 @@ ! SPARC v9 64-bit VIS3 __mpn_addmul_1 -- Multiply a limb vector with a ! limb and add the result to a second limb vector. ! -! Copyright (C) 2013-2016 Free Software Foundation, Inc. +! Copyright (C) 2013-2018 Free Software Foundation, Inc. ! This file is part of the GNU C Library. ! Contributed by David S. Miller <davem@davemloft.net> ! diff --git a/sysdeps/sparc/sparc64/multiarch/addmul_1.S b/sysdeps/sparc/sparc64/multiarch/addmul_1.S deleted file mode 100644 index 4763edd457..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/addmul_1.S +++ /dev/null @@ -1,56 +0,0 @@ -/* Multiple versions of addmul_1 - - Copyright (C) 2013-2016 Free Software Foundation, Inc. - Contributed by David S. Miller (davem@davemloft.net) - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -ENTRY(__mpn_addmul_1) - .type __mpn_addmul_1, @gnu_indirect_function -# ifdef SHARED - SETUP_PIC_REG_LEAF(o3, o5) -# endif - set HWCAP_SPARC_VIS3, %o1 - andcc %o0, %o1, %g0 - be 1f - nop -# ifdef SHARED - sethi %gdop_hix22(__mpn_addmul_1_vis3), %o1 - xor %o1, %gdop_lox10(__mpn_addmul_1_vis3), %o1 -# else - set __mpn_addmul_1_vis3, %o1 -# endif - ba 10f - nop -1: -# ifdef SHARED - sethi %gdop_hix22(__mpn_addmul_1_generic), %o1 - xor %o1, %gdop_lox10(__mpn_addmul_1_generic), %o1 -# else - set __mpn_addmul_1_generic, %o1 -# endif -10: -# ifdef SHARED - add %o3, %o1, %o1 -# endif - retl - mov %o1, %o0 -END(__mpn_addmul_1) - -#define __mpn_addmul_1 __mpn_addmul_1_generic -#include "../addmul_1.S" diff --git a/sysdeps/sparc/sparc64/multiarch/addmul_1.c b/sysdeps/sparc/sparc64/multiarch/addmul_1.c new file mode 100644 index 0000000000..afaeca870d --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/addmul_1.c @@ -0,0 +1,28 @@ +/* __mpn_addmul_1 ifunc resolver, Linux/sparc64 version. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <gmp.h> +#include <sparc-ifunc.h> + +extern __typeof (mpn_addmul_1) __mpn_addmul_1_vis3 attribute_hidden; +extern __typeof (mpn_addmul_1) __mpn_addmul_1_generic attribute_hidden; + +sparc_libm_ifunc (__mpn_addmul_1, + hwcap & HWCAP_SPARC_VIS3 + ? __mpn_addmul_1_vis3 + : __mpn_addmul_1_generic) diff --git a/sysdeps/sparc/sparc64/multiarch/bzero.c b/sysdeps/sparc/sparc64/multiarch/bzero.c new file mode 100644 index 0000000000..3af2ff3d47 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/bzero.c @@ -0,0 +1,33 @@ +/* Multiple versions of bzero. SPARC64/Linux version. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define bzero __redirect_bzero +# include <string.h> +# undef bzero + +# include <sparc-ifunc.h> + +# define SYMBOL_NAME bzero +# include "ifunc-memset.h" + +sparc_libc_ifunc_redirected (__redirect_bzero, __bzero, IFUNC_SELECTOR) +weak_alias (__bzero, bzero) + +#endif diff --git a/sysdeps/sparc/sparc64/multiarch/ifunc-impl-list.c b/sysdeps/sparc/sparc64/multiarch/ifunc-impl-list.c index e52eeb0650..91c6565c7a 100644 --- a/sysdeps/sparc/sparc64/multiarch/ifunc-impl-list.c +++ b/sysdeps/sparc/sparc64/multiarch/ifunc-impl-list.c @@ -1,5 +1,5 @@ /* Enumerate available IFUNC implementations of a function. sparc version. - Copyright (C) 2012-2016 Free Software Foundation, Inc. + Copyright (C) 2012-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -36,6 +36,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, hwcap = GLRO(dl_hwcap); IFUNC_IMPL (i, name, memcpy, + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & HWCAP_SPARC_ADP, + __memcpy_niagara7) IFUNC_IMPL_ADD (array, i, memcpy, hwcap & HWCAP_SPARC_CRYPTO, __memcpy_niagara4) IFUNC_IMPL_ADD (array, i, memcpy, hwcap & HWCAP_SPARC_N2, @@ -47,6 +49,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ultra1)); IFUNC_IMPL (i, name, mempcpy, + IFUNC_IMPL_ADD (array, i, mempcpy, hwcap & HWCAP_SPARC_ADP, + __mempcpy_niagara7) IFUNC_IMPL_ADD (array, i, mempcpy, hwcap & HWCAP_SPARC_CRYPTO, __mempcpy_niagara4) IFUNC_IMPL_ADD (array, i, mempcpy, hwcap & HWCAP_SPARC_N2, @@ -58,6 +62,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_ultra1)); IFUNC_IMPL (i, name, bzero, + IFUNC_IMPL_ADD (array, i, bzero, hwcap & HWCAP_SPARC_ADP, + __bzero_niagara7) IFUNC_IMPL_ADD (array, i, bzero, hwcap & HWCAP_SPARC_CRYPTO, __bzero_niagara4) IFUNC_IMPL_ADD (array, i, bzero, hwcap & HWCAP_SPARC_BLKINIT, @@ -65,11 +71,18 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, bzero, 1, __bzero_ultra1)); IFUNC_IMPL (i, name, memset, + IFUNC_IMPL_ADD (array, i, memset, hwcap & HWCAP_SPARC_ADP, + __memset_niagara7) IFUNC_IMPL_ADD (array, i, memset, hwcap & HWCAP_SPARC_CRYPTO, __memset_niagara4) IFUNC_IMPL_ADD (array, i, memset, hwcap & HWCAP_SPARC_BLKINIT, __memset_niagara1) IFUNC_IMPL_ADD (array, i, memset, 1, __memset_ultra1)); + IFUNC_IMPL (i, name, memmove, + IFUNC_IMPL_ADD (array, i, memmove, hwcap & HWCAP_SPARC_ADP, + __memmove_niagara7) + IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ultra1)); + return i; } diff --git a/sysdeps/sparc/sparc64/multiarch/ifunc-memcpy.h b/sysdeps/sparc/sparc64/multiarch/ifunc-memcpy.h new file mode 100644 index 0000000000..73ea15297a --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/ifunc-memcpy.h @@ -0,0 +1,43 @@ +/* Common definition for memcpy and mempcpy implementation. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <ifunc-init.h> + +extern __typeof (REDIRECT_NAME) OPTIMIZE (niagara7) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (niagara4) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (niagara2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (niagara1) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (ultra3) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (ultra1) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (int hwcap) +{ + if (hwcap & HWCAP_SPARC_ADP) + return OPTIMIZE (niagara7); + if (hwcap & HWCAP_SPARC_CRYPTO) + return OPTIMIZE (niagara4); + if (hwcap & HWCAP_SPARC_N2) + return OPTIMIZE (niagara2); + if (hwcap & HWCAP_SPARC_BLKINIT) + return OPTIMIZE (niagara1); + if (hwcap & HWCAP_SPARC_ULTRA3) + return OPTIMIZE (ultra3); + return OPTIMIZE (ultra1); +} diff --git a/sysdeps/sparc/sparc64/multiarch/ifunc-memmove.h b/sysdeps/sparc/sparc64/multiarch/ifunc-memmove.h new file mode 100644 index 0000000000..4b89ff4baf --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/ifunc-memmove.h @@ -0,0 +1,31 @@ +/* Common definition for memmove implementation. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <ifunc-init.h> + +extern __typeof (REDIRECT_NAME) OPTIMIZE (niagara7) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (ultra1) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (int hwcap) +{ + if (hwcap & HWCAP_SPARC_ADP) + return OPTIMIZE (niagara7); + return OPTIMIZE (ultra1); +} diff --git a/sysdeps/sparc/sparc64/multiarch/ifunc-memset.h b/sysdeps/sparc/sparc64/multiarch/ifunc-memset.h new file mode 100644 index 0000000000..d554638bd6 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/ifunc-memset.h @@ -0,0 +1,37 @@ +/* Common definition for memset/bzero implementation. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <ifunc-init.h> + +extern __typeof (REDIRECT_NAME) OPTIMIZE (niagara7) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (niagara4) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (niagara1) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (ultra1) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (int hwcap) +{ + if (hwcap & HWCAP_SPARC_ADP) + return OPTIMIZE (niagara7); + if (hwcap & HWCAP_SPARC_CRYPTO) + return OPTIMIZE (niagara4); + if (hwcap & HWCAP_SPARC_BLKINIT) + return OPTIMIZE (niagara1); + return OPTIMIZE (ultra1); +} diff --git a/sysdeps/sparc/sparc64/multiarch/md5-crop.S b/sysdeps/sparc/sparc64/multiarch/md5-crop.S index de1ba6df2f..764a8aae48 100644 --- a/sysdeps/sparc/sparc64/multiarch/md5-crop.S +++ b/sysdeps/sparc/sparc64/multiarch/md5-crop.S @@ -1,5 +1,5 @@ /* MD5 using sparc crypto opcodes. - Copyright (C) 2012-2016 Free Software Foundation, Inc. + Copyright (C) 2012-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by David S. Miller (davem@davemloft.net) diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy-memmove-niagara7.S b/sysdeps/sparc/sparc64/multiarch/memcpy-memmove-niagara7.S new file mode 100644 index 0000000000..61ba1ed408 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/memcpy-memmove-niagara7.S @@ -0,0 +1,980 @@ +/* Copy SIZE bytes from SRC to DEST. For SUN4V M7. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifndef XCC +# define XCC xcc +#endif + .register %g2,#scratch + .register %g3,#scratch + .register %g6,#scratch + +#define FPRS_FEF 0x04 + +/* + * ASI_STBI_P marks the cache line as "least recently used" + * which means if many threads are active, it has a high chance + * of being pushed out of the cache between the first initializing + * store and the final stores. + * Thus, in this algorithm we use ASI_STBIMRU_P which marks the + * cache line as "most recently used" for all but the last cache + * line. + */ + +#define ASI_BLK_INIT_QUAD_LDD_P 0xe2 +#define ASI_ST_BLK_INIT_MRU_P 0xf2 + +#define ASI_STBI_P ASI_BLK_INIT_QUAD_LDD_P +#define ASI_STBIMRU_P ASI_ST_BLK_INIT_MRU_P + +#define BLOCK_SIZE 64 /* L2 data cache line size */ +#define SHORTCOPY 3 +#define SHORTCHECK 14 +#define SHORT_LONG 64 /* max copy for short longword-aligned case */ + /* must be at least 64 */ +#define SMALL_MAX 255 /* max small copy for word/long aligned */ +#define SMALL_UMAX 128 /* max small copy for unaligned case */ +#define MED_WMAX 1023 /* max copy for medium word-aligned case */ +#define MED_MAX 511 /* max copy for medium longword-aligned case */ +#define ST_CHUNK 20 /* ST_CHUNK - block of values for BIS Store */ +/* on T4, prefetch 20 is a strong read prefetch to L1 and L2 data cache + * prefetch 20 can cause inst pipeline to delay if data is in memory + * prefetch 21 is a strong read prefetch to L2 data cache, not L1 data cache */ +#define ALIGN_PRE 20 /* distance for aligned prefetch loop */ + +#define EX_ST(x) x +#define EX_RETVAL(x) x +#define STORE_ASI(src,addr) stxa src, [addr] ASI_STBIMRU_P +#define STORE_INIT(src,addr) stxa src, [addr] ASI_STBI_P + +#if IS_IN (libc) + + .text + +ENTRY(__memmove_niagara7) + /* %o0=dst, %o1=src, %o2=len */ + cmp %o1, %o0 /* if from address is >= to use forward copy */ + bgeu,pn %XCC, .Lforcpy /* else use backward if ... */ + sub %o0, %o1, %o4 /* get difference of two addresses */ + cmp %o2, %o4 /* compare size and difference of addresses */ + bleu,pn %XCC, .Lforcpy /* if size is bigger, do overlapped copy */ + add %o1, %o2, %o5 /* get to end of source space */ + +/* an overlapped copy that must be done "backwards" */ +.Lchksize: + cmp %o2, 8 /* less than 8 byte do byte copy */ + blu,pn %XCC, 2f /* else continue */ + +/* Now size is bigger than 8 */ +.Ldbalign: + add %o0, %o2, %g1 /* get to end of dest space */ + andcc %g1, 7, %o3 /* %o3 has cnt til dst 8 byte align */ + bz,a,pn %XCC, .Ldbbck /* skip if dst is 8 byte aligned */ + andn %o2, 7, %o3 /* force %o3 cnt to multiple of 8 */ + sub %o2, %o3, %o2 /* update o2 with new count */ + +1: dec %o5 /* decrement source */ + ldub [%o5], %g1 /* load one byte */ + deccc %o3 /* decrement count */ + bgu,pt %XCC, 1b /* if not done keep copying */ + stb %g1, [%o5+%o4] /* store one byte into dest */ + andncc %o2, 7, %o3 /* force %o3 cnt to multiple of 8 */ + bz,pn %XCC, 2f /* if size < 8, move to byte copy */ + +/* Now Destination is 8 byte aligned */ +.Ldbbck: + andcc %o5, 7, %o0 /* %o0 has src offset */ + bz,a,pn %XCC, .Ldbcopybc /* if src is aligned do fast memmove */ + sub %o2, %o3, %o2 /* Residue bytes in %o2 */ + +.Lcpy_dbwdbc: /* alignment of src is needed */ + sub %o2, 8, %o2 /* set size one loop ahead */ + sll %o0, 3, %g1 /* %g1 is left shift */ + mov 64, %g5 /* init %g5 to be 64 */ + sub %g5, %g1, %g5 /* %g5 rightshift = (64 - leftshift) */ + sub %o5, %o0, %o5 /* align the src at 8 bytes. */ + add %o4, %o0, %o4 /* increase diff between src & dst */ + ldx [%o5], %o1 /* load first 8 bytes */ + srlx %o1, %g5, %o1 +1: sub %o5, 8, %o5 /* subtract 8 from src */ + ldx [%o5], %o0 /* load 8 byte */ + sllx %o0, %g1, %o3 /* shift loaded val left to tmp reg */ + or %o1, %o3, %o3 /* align data */ + stx %o3, [%o5+%o4] /* store 8 byte */ + subcc %o2, 8, %o2 /* subtract 8 byte from size */ + bg,pt %XCC, 1b /* if size > 0 continue */ + srlx %o0, %g5, %o1 /* move extra byte for the next use */ + + srl %g1, 3, %o0 /* restore %o0 value for alignment */ + add %o5, %o0, %o5 /* restore src alignment */ + sub %o4, %o0, %o4 /* restore diff between src & dest */ + + ba 2f /* branch to the trailing byte copy */ + add %o2, 8, %o2 /* restore size value */ + +.Ldbcopybc: /* alignment of src is not needed */ +1: sub %o5, 8, %o5 /* subtract from src */ + ldx [%o5], %g1 /* load 8 bytes */ + subcc %o3, 8, %o3 /* subtract from size */ + bgu,pt %XCC, 1b /* if size is bigger 0 continue */ + stx %g1, [%o5+%o4] /* store 8 bytes to destination */ + + ba 2f + nop + +.Lbcbyte: +1: ldub [%o5], %g1 /* load one byte */ + stb %g1, [%o5+%o4] /* store one byte */ +2: deccc %o2 /* decrement size */ + bgeu,a,pt %XCC, 1b /* if size is >= 0 continue */ + dec %o5 /* decrement from address */ + +.Lexitbc: /* exit from backward copy */ + retl + add %o5, %o4, %o0 /* restore dest addr */ + + +/* Check to see if memmove is large aligned copy + * If so, use special version of copy that avoids + * use of block store init. */ +.Lforcpy: + cmp %o2, SMALL_MAX /* check for not small case */ + blt,pn %XCC, .Lmv_short /* merge with memcpy */ + mov %o0, %g1 /* save %o0 */ + neg %o0, %o5 + andcc %o5, 7, %o5 /* bytes till DST 8 byte aligned */ + brz,pt %o5, .Lmv_dst_aligned_on_8 + +/* %o5 has the bytes to be written in partial store. */ + sub %o2, %o5, %o2 + sub %o1, %o0, %o1 /* %o1 gets the difference */ +7: /* dst aligning loop */ + ldub [%o1+%o0], %o4 /* load one byte */ + subcc %o5, 1, %o5 + stb %o4, [%o0] + bgu,pt %XCC, 7b + add %o0, 1, %o0 /* advance dst */ + add %o1, %o0, %o1 /* restore %o1 */ +.Lmv_dst_aligned_on_8: + andcc %o1, 7, %o5 + brnz,pn %o5, .Lsrc_dst_unaligned_on_8 + prefetch [%o1 + (1 * BLOCK_SIZE)], 20 + +.Lmv_src_dst_aligned_on_8: +/* check if we are copying MED_MAX or more bytes */ + cmp %o2, MED_MAX /* limit to store buffer size */ + bleu,pt %XCC, .Lmedlong + prefetch [%o1 + (2 * BLOCK_SIZE)], 20 + +/* The mv_align loop below mimics the memcpy code for large aligned copies, + * but does not use the ASI_STBI_P (block initializing store) performance + * optimization. This is used when memcpy is incorrectly invoked with + * overlapping buffers. */ + +.Lmv_large_align8_copy: /* Src and dst share 8 byte align */ + /* align dst to 64 byte boundary */ + andcc %o0, 0x3f, %o3 /* check for dst 64 byte aligned */ + brz,pn %o3, .Lmv_aligned_on_64 + sub %o3, 64, %o3 /* %o3 has negative bytes to move */ + add %o2, %o3, %o2 /* adjust remaining count */ +.Lmv_align_to_64: + ldx [%o1], %o4 + add %o1, 8, %o1 /* increment src ptr */ + addcc %o3, 8, %o3 + stx %o4, [%o0] + brnz,pt %o3, .Lmv_align_to_64 + add %o0, 8, %o0 /* increment dst ptr */ + +.Lmv_aligned_on_64: + andn %o2, 0x3f, %o5 /* %o5 is multiple of block size */ + and %o2, 0x3f, %o2 /* residue bytes in %o2 */ +.Lmv_align_loop: + ldx [%o1],%o4 + stx %o4,[%o0] + prefetch [%o0 + (10 * BLOCK_SIZE)], 22 + prefetch [%o1 + (10 * BLOCK_SIZE)], 21 + subcc %o5, 64, %o5 + ldx [%o1+8],%o4 + stx %o4,[%o0+8] + ldx [%o1+16],%o4 + stx %o4,[%o0+16] + ldx [%o1+24],%o4 + stx %o4,[%o0+24] + ldx [%o1+32],%o4 + stx %o4,[%o0+32] + ldx [%o1+40],%o4 + stx %o4,[%o0+40] + ldx [%o1+48],%o4 + add %o1, 64, %o1 + stx %o4,[%o0+48] + add %o0, 64, %o0 + ldx [%o1-8],%o4 + bgt,pt %XCC, .Lmv_align_loop + stx %o4,[%o0-8] + + ba .Lmedlong + nop +END(__memmove_niagara7) + +ENTRY(__mempcpy_niagara7) + /* %o0=dst, %o1=src, %o2=len */ + ba,pt %icc, 101f + add %o0, %o2, %g1 /* save dst + len */ +END(__mempcpy_niagara7) + + .align 32 +ENTRY(__memcpy_niagara7) +100: /* %o0=dst, %o1=src, %o2=len */ + mov %o0, %g1 /* save %o0 */ +101: +#ifndef __arch64__ + srl %o2, 0, %o2 +#endif + cmp %o2, SMALL_MAX /* check for not small case */ + bgeu,pn %XCC, .Lmedium /* go to larger cases */ +.Lmv_short: + cmp %o2, SHORTCOPY /* check for really short case */ + ble,pn %XCC, .Lsmallfin + or %o0, %o1, %o4 /* prepare alignment check */ + andcc %o4, 0x3, %o5 /* test for word alignment */ + bnz,pn %XCC, .Lsmallunalign /* branch to non-word aligned case */ + nop + subcc %o2, 7, %o2 /* adjust count */ + ble,pn %XCC, .Lsmallwordx + andcc %o4, 0x7, %o5 /* test for long alignment */ +/* 8 or more bytes, src and dest start on word boundary + * %o4 contains or %o0, %o1 */ +.Lsmalllong: + bnz,pn %XCC, .Lsmallwords /* branch to word aligned case */ + cmp %o2, SHORT_LONG-7 + bge,a %XCC, .Lmedl64 /* if we branch */ + sub %o2,56,%o2 /* adjust %o2 to -63 off count */ + +/* slightly unroll the small_long_loop to improve very short copies */ + cmp %o2, 32-7 + blt,a,pn %XCC, .Lsmall_long_l + sub %o1, %o0, %o1 /* %o1 gets the difference */ + + ldx [%o1], %o5 + ldx [%o1+8], %o4 + ldx [%o1+16], %o3 + + subcc %o2, 24, %o2 + sub %o1, %o0, %o1 /* %o1 gets the difference */ + + stx %o5, [%o0] /* write word */ + stx %o4, [%o0+8] /* write word */ + stx %o3, [%o0+16] /* write word */ + + add %o0, 24, %o0 + +/* end loop unroll */ + +.Lsmall_long_l: + ldx [%o1+%o0], %o3 + subcc %o2, 8, %o2 + add %o0, 8, %o0 + bgu,pn %XCC, .Lsmall_long_l /* loop until done */ + stx %o3, [%o0-8] /* write word */ + addcc %o2, 7, %o2 /* restore %o2 to correct count */ + bnz,pn %XCC, .Lsmall_long_x /* check for completion */ + add %o1, %o0, %o1 /* restore %o1 */ + retl + mov EX_RETVAL(%g1), %o0 /* restore %o0 */ +.Lsmall_long_x: + cmp %o2, 4 /* check for 4 or more bytes left */ + blt,pn %XCC, .Lsmallleft3 /* if not, go to finish up */ + nop + lduw [%o1], %o3 + add %o1, 4, %o1 + subcc %o2, 4, %o2 + stw %o3, [%o0] + bnz,pn %XCC, .Lsmallleft3 + add %o0, 4, %o0 + retl + mov EX_RETVAL(%g1), %o0 /* restore %o0 */ + + .align 32 +/* src and dest start on word boundary; 7 or fewer bytes */ +.Lsmallwordx: + lduw [%o1], %o3 /* read word */ + addcc %o2, 3, %o2 /* restore count */ + bz,pt %XCC, .Lsmallexit + stw %o3, [%o0] /* write word */ + deccc %o2 /* reduce count for cc test */ + ldub [%o1+4], %o3 /* load one byte */ + bz,pt %XCC, .Lsmallexit + stb %o3, [%o0+4] /* store one byte */ + ldub [%o1+5], %o3 /* load second byte */ + deccc %o2 + bz,pt %XCC, .Lsmallexit + stb %o3, [%o0+5] /* store second byte */ + ldub [%o1+6], %o3 /* load third byte */ + stb %o3, [%o0+6] /* store third byte */ +.Lsmallexit: + retl + mov EX_RETVAL(%g1), %o0 /* restore %o0 */ + + .align 32 +.Lsmallunalign: + cmp %o2, SHORTCHECK + ble,pn %XCC, .Lsmallrest + cmp %o2, SMALL_UMAX + bge,pt %XCC, .Lmedium_join + andcc %o1, 0x3, %o5 /* is src word aligned */ + bz,pn %XCC, .Laldst + cmp %o5, 2 /* is src half-word aligned */ + be,pt %XCC, .Ls2algn + cmp %o5, 3 /* src is byte aligned */ +.Ls1algn: + ldub [%o1], %o3 /* move 1 or 3 bytes to align it */ + inc 1, %o1 + stb %o3, [%o0] /* move a byte to align src */ + inc 1, %o0 + bne,pt %XCC, .Ls2algn + dec %o2 + b .Lald /* now go align dest */ + andcc %o0, 0x3, %o5 + +.Ls2algn: + lduh [%o1], %o3 /* know src is 2 byte aligned */ + inc 2, %o1 + srl %o3, 8, %o4 + stb %o4, [%o0] /* have to do bytes, */ + stb %o3, [%o0 + 1] /* do not know dst alignment */ + inc 2, %o0 + dec 2, %o2 + +.Laldst: + andcc %o0, 0x3, %o5 /* align the destination address */ +.Lald: + bz,pn %XCC, .Lw4cp + cmp %o5, 2 + be,pn %XCC, .Lw2cp + cmp %o5, 3 +.Lw3cp: lduw [%o1], %o4 + inc 4, %o1 + srl %o4, 24, %o5 + stb %o5, [%o0] + bne,pt %XCC, .Lw1cp + inc %o0 + dec 1, %o2 + andn %o2, 3, %o3 /* %o3 is aligned word count */ + dec 4, %o3 /* avoid reading beyond tail of src */ + sub %o1, %o0, %o1 /* %o1 gets the difference */ + +1: sll %o4, 8, %g5 /* save residual bytes */ + lduw [%o1+%o0], %o4 + deccc 4, %o3 + srl %o4, 24, %o5 /* merge with residual */ + or %o5, %g5, %g5 + st %g5, [%o0] + bnz,pt %XCC, 1b + inc 4, %o0 + sub %o1, 3, %o1 /* used one byte of last word read */ + and %o2, 3, %o2 + b 7f + inc 4, %o2 + +.Lw1cp: srl %o4, 8, %o5 + sth %o5, [%o0] + inc 2, %o0 + dec 3, %o2 + andn %o2, 3, %o3 /* %o3 is aligned word count */ + dec 4, %o3 /* avoid reading beyond tail of src */ + sub %o1, %o0, %o1 /* %o1 gets the difference */ + +2: sll %o4, 24, %g5 /* save residual bytes */ + lduw [%o1+%o0], %o4 + deccc 4, %o3 + srl %o4, 8, %o5 /* merge with residual */ + or %o5, %g5, %g5 + st %g5, [%o0] + bnz,pt %XCC, 2b + inc 4, %o0 + sub %o1, 1, %o1 /* used 3 bytes of last word read */ + and %o2, 3, %o2 + b 7f + inc 4, %o2 + +.Lw2cp: lduw [%o1], %o4 + inc 4, %o1 + srl %o4, 16, %o5 + sth %o5, [%o0] + inc 2, %o0 + dec 2, %o2 + andn %o2, 3, %o3 /* %o3 is aligned word count */ + dec 4, %o3 /* avoid reading beyond tail of src */ + sub %o1, %o0, %o1 /* %o1 gets the difference */ + +3: sll %o4, 16, %g5 /* save residual bytes */ + lduw [%o1+%o0], %o4 + deccc 4, %o3 + srl %o4, 16, %o5 /* merge with residual */ + or %o5, %g5, %g5 + st %g5, [%o0] + bnz,pt %XCC, 3b + inc 4, %o0 + sub %o1, 2, %o1 /* used two bytes of last word read */ + and %o2, 3, %o2 + b 7f + inc 4, %o2 + +.Lw4cp: andn %o2, 3, %o3 /* %o3 is aligned word count */ + sub %o1, %o0, %o1 /* %o1 gets the difference */ + +1: lduw [%o1+%o0], %o4 /* read from address */ + deccc 4, %o3 /* decrement count */ + st %o4, [%o0] /* write at destination address */ + bgu,pt %XCC, 1b + inc 4, %o0 /* increment to address */ + and %o2, 3, %o2 /* number of leftover bytes, if any */ + + /* simple finish up byte copy, works with any alignment */ +7: + add %o1, %o0, %o1 /* restore %o1 */ +.Lsmallrest: + tst %o2 + bz,pt %XCC, .Lsmallx + cmp %o2, 4 + blt,pn %XCC, .Lsmallleft3 + nop + sub %o2, 3, %o2 +.Lsmallnotalign4: + ldub [%o1], %o3 /* read byte */ + subcc %o2, 4, %o2 /* reduce count by 4 */ + stb %o3, [%o0] /* write byte */ + ldub [%o1+1], %o3 /* repeat for total of 4 bytes */ + add %o1, 4, %o1 /* advance SRC by 4 */ + stb %o3, [%o0+1] + ldub [%o1-2], %o3 + add %o0, 4, %o0 /* advance DST by 4 */ + stb %o3, [%o0-2] + ldub [%o1-1], %o3 + bgu,pt %XCC, .Lsmallnotalign4 /* loop til 3 or fewer bytes remain */ + stb %o3, [%o0-1] + addcc %o2, 3, %o2 /* restore count */ + bz,pt %XCC, .Lsmallx +.Lsmallleft3: /* 1, 2, or 3 bytes remain */ + subcc %o2, 1, %o2 + ldub [%o1], %o3 /* load one byte */ + bz,pt %XCC, .Lsmallx + stb %o3, [%o0] /* store one byte */ + ldub [%o1+1], %o3 /* load second byte */ + subcc %o2, 1, %o2 + bz,pt %XCC, .Lsmallx + stb %o3, [%o0+1] /* store second byte */ + ldub [%o1+2], %o3 /* load third byte */ + stb %o3, [%o0+2] /* store third byte */ +.Lsmallx: + retl + mov EX_RETVAL(%g1), %o0 /* restore %o0 */ + +.Lsmallfin: + tst %o2 + bnz,pn %XCC, .Lsmallleft3 + nop + retl + mov EX_RETVAL(%g1), %o0 /* restore %o0 */ + + .align 16 +.Lsmallwords: + lduw [%o1], %o3 /* read word */ + subcc %o2, 8, %o2 /* update count */ + stw %o3, [%o0] /* write word */ + add %o1, 8, %o1 /* update SRC */ + lduw [%o1-4], %o3 /* read word */ + add %o0, 8, %o0 /* update DST */ + bgu,pt %XCC, .Lsmallwords /* loop until done */ + stw %o3, [%o0-4] /* write word */ + addcc %o2, 7, %o2 /* restore count */ + bz,pt %XCC, .Lsmallexit /* check for completion */ + cmp %o2, 4 /* check for 4 or more bytes left */ + blt,pt %XCC, .Lsmallleft3 /* if not, go to finish up */ + nop + lduw [%o1], %o3 + add %o1, 4, %o1 + subcc %o2, 4, %o2 + add %o0, 4, %o0 + bnz,pn %XCC, .Lsmallleft3 + stw %o3, [%o0-4] + retl + mov EX_RETVAL(%g1), %o0 /* restore %o0 */ + + .align 16 +.Lmedium: +.Lmedium_join: + neg %o0, %o5 + andcc %o5, 7, %o5 /* bytes till DST 8 byte aligned */ + brz,pt %o5, .Ldst_aligned_on_8 + + /* %o5 has the bytes to be written in partial store. */ + sub %o2, %o5, %o2 + sub %o1, %o0, %o1 /* %o1 gets the difference */ +7: /* dst aligning loop */ + ldub [%o1+%o0], %o4 /* load one byte */ + subcc %o5, 1, %o5 + stb %o4, [%o0] + bgu,pt %XCC, 7b + add %o0, 1, %o0 /* advance dst */ + add %o1, %o0, %o1 /* restore %o1 */ +.Ldst_aligned_on_8: + andcc %o1, 7, %o5 + brnz,pt %o5, .Lsrc_dst_unaligned_on_8 + nop + +.Lsrc_dst_aligned_on_8: + /* check if we are copying MED_MAX or more bytes */ + cmp %o2, MED_MAX /* limit to store buffer size */ + bgu,pn %XCC, .Llarge_align8_copy + nop +/* + * Special case for handling when src and dest are both long word aligned + * and total data to move is less than MED_MAX bytes + */ +.Lmedlong: + subcc %o2, 63, %o2 /* adjust length to allow cc test */ + ble,pn %XCC, .Lmedl63 /* skip big loop if < 64 bytes */ + nop +.Lmedl64: + ldx [%o1], %o4 /* load */ + subcc %o2, 64, %o2 /* decrement length count */ + stx %o4, [%o0] /* and store */ + ldx [%o1+8], %o3 /* a block of 64 bytes */ + stx %o3, [%o0+8] + ldx [%o1+16], %o4 + stx %o4, [%o0+16] + ldx [%o1+24], %o3 + stx %o3, [%o0+24] + ldx [%o1+32], %o4 /* load */ + stx %o4, [%o0+32] /* and store */ + ldx [%o1+40], %o3 /* a block of 64 bytes */ + add %o1, 64, %o1 /* increase src ptr by 64 */ + stx %o3, [%o0+40] + ldx [%o1-16], %o4 + add %o0, 64, %o0 /* increase dst ptr by 64 */ + stx %o4, [%o0-16] + ldx [%o1-8], %o3 + bgu,pt %XCC, .Lmedl64 /* repeat if at least 64 bytes left */ + stx %o3, [%o0-8] +.Lmedl63: + addcc %o2, 32, %o2 /* adjust remaining count */ + ble,pt %XCC, .Lmedl31 /* to skip if 31 or fewer bytes left */ + nop + ldx [%o1], %o4 /* load */ + sub %o2, 32, %o2 /* decrement length count */ + stx %o4, [%o0] /* and store */ + ldx [%o1+8], %o3 /* a block of 32 bytes */ + add %o1, 32, %o1 /* increase src ptr by 32 */ + stx %o3, [%o0+8] + ldx [%o1-16], %o4 + add %o0, 32, %o0 /* increase dst ptr by 32 */ + stx %o4, [%o0-16] + ldx [%o1-8], %o3 + stx %o3, [%o0-8] +.Lmedl31: + addcc %o2, 16, %o2 /* adjust remaining count */ + ble,pt %XCC, .Lmedl15 /* skip if 15 or fewer bytes left */ + nop + ldx [%o1], %o4 /* load and store 16 bytes */ + add %o1, 16, %o1 /* increase src ptr by 16 */ + stx %o4, [%o0] + sub %o2, 16, %o2 /* decrease count by 16 */ + ldx [%o1-8], %o3 + add %o0, 16, %o0 /* increase dst ptr by 16 */ + stx %o3, [%o0-8] +.Lmedl15: + addcc %o2, 15, %o2 /* restore count */ + bz,pt %XCC, .Lsmallexit /* exit if finished */ + cmp %o2, 8 + blt,pt %XCC, .Lmedw7 /* skip if 7 or fewer bytes left */ + tst %o2 + ldx [%o1], %o4 /* load 8 bytes */ + add %o1, 8, %o1 /* increase src ptr by 8 */ + add %o0, 8, %o0 /* increase dst ptr by 8 */ + subcc %o2, 8, %o2 /* decrease count by 8 */ + bnz,pn %XCC, .Lmedw7 + stx %o4, [%o0-8] /* and store 8 bytes */ + retl + mov EX_RETVAL(%g1), %o0 /* restore %o0 */ + + .align 16 +.Lsrc_dst_unaligned_on_8: + /* DST is 8-byte aligned, src is not */ + andcc %o1, 0x3, %o5 /* test word alignment */ + bnz,pt %XCC, .Lunalignsetup /* branch if not word aligned */ + nop + +/* + * Handle all cases where src and dest are aligned on word + * boundaries. Use unrolled loops for better performance. + * This option wins over standard large data move when + * source and destination is in cache for medium + * to short data moves. + */ + cmp %o2, MED_WMAX /* limit to store buffer size */ + bge,pt %XCC, .Lunalignrejoin /* otherwise rejoin main loop */ + nop + + subcc %o2, 31, %o2 /* adjust length to allow cc test */ + /* for end of loop */ + ble,pt %XCC, .Lmedw31 /* skip big loop if less than 16 */ +.Lmedw32: + ld [%o1], %o4 /* move a block of 32 bytes */ + sllx %o4, 32, %o5 + ld [%o1+4], %o4 + or %o4, %o5, %o5 + stx %o5, [%o0] + subcc %o2, 32, %o2 /* decrement length count */ + ld [%o1+8], %o4 + sllx %o4, 32, %o5 + ld [%o1+12], %o4 + or %o4, %o5, %o5 + stx %o5, [%o0+8] + add %o1, 32, %o1 /* increase src ptr by 32 */ + ld [%o1-16], %o4 + sllx %o4, 32, %o5 + ld [%o1-12], %o4 + or %o4, %o5, %o5 + stx %o5, [%o0+16] + add %o0, 32, %o0 /* increase dst ptr by 32 */ + ld [%o1-8], %o4 + sllx %o4, 32, %o5 + ld [%o1-4], %o4 + or %o4, %o5, %o5 + bgu,pt %XCC, .Lmedw32 /* repeat if at least 32 bytes left */ + stx %o5, [%o0-8] +.Lmedw31: + addcc %o2, 31, %o2 /* restore count */ + bz,pt %XCC, .Lsmallexit /* exit if finished */ + cmp %o2, 16 + blt,pt %XCC, .Lmedw15 + nop + ld [%o1], %o4 /* move a block of 16 bytes */ + sllx %o4, 32, %o5 + subcc %o2, 16, %o2 /* decrement length count */ + ld [%o1+4], %o4 + or %o4, %o5, %o5 + stx %o5, [%o0] + add %o1, 16, %o1 /* increase src ptr by 16 */ + ld [%o1-8], %o4 + add %o0, 16, %o0 /* increase dst ptr by 16 */ + sllx %o4, 32, %o5 + ld [%o1-4], %o4 + or %o4, %o5, %o5 + stx %o5, [%o0-8] +.Lmedw15: + bz,pt %XCC, .Lsmallexit /* exit if finished */ + cmp %o2, 8 + blt,pn %XCC, .Lmedw7 /* skip if 7 or fewer bytes left */ + tst %o2 + ld [%o1], %o4 /* load 4 bytes */ + subcc %o2, 8, %o2 /* decrease count by 8 */ + stw %o4, [%o0] /* and store 4 bytes */ + add %o1, 8, %o1 /* increase src ptr by 8 */ + ld [%o1-4], %o3 /* load 4 bytes */ + add %o0, 8, %o0 /* increase dst ptr by 8 */ + stw %o3, [%o0-4] /* and store 4 bytes */ + bz,pt %XCC, .Lsmallexit /* exit if finished */ +.Lmedw7: /* count is ge 1, less than 8 */ + cmp %o2, 4 /* check for 4 bytes left */ + blt,pn %XCC, .Lsmallleft3 /* skip if 3 or fewer bytes left */ + nop + ld [%o1], %o4 /* load 4 bytes */ + add %o1, 4, %o1 /* increase src ptr by 4 */ + add %o0, 4, %o0 /* increase dst ptr by 4 */ + subcc %o2, 4, %o2 /* decrease count by 4 */ + bnz,pt %XCC, .Lsmallleft3 + stw %o4, [%o0-4] /* and store 4 bytes */ + retl + mov EX_RETVAL(%g1), %o0 /* restore %o0 */ + + .align 16 +.Llarge_align8_copy: /* Src and dst 8 byte aligned */ + /* align dst to 64 byte boundary */ + andcc %o0, 0x3f, %o3 /* check for dst 64 byte aligned */ + brz,pn %o3, .Laligned_to_64 + andcc %o0, 8, %o3 /* odd long words to move? */ + brz,pt %o3, .Laligned_to_16 + nop + ldx [%o1], %o4 + sub %o2, 8, %o2 + add %o1, 8, %o1 /* increment src ptr */ + add %o0, 8, %o0 /* increment dst ptr */ + stx %o4, [%o0-8] +.Laligned_to_16: + andcc %o0, 16, %o3 /* pair of long words to move? */ + brz,pt %o3, .Laligned_to_32 + nop + ldx [%o1], %o4 + sub %o2, 16, %o2 + stx %o4, [%o0] + add %o1, 16, %o1 /* increment src ptr */ + ldx [%o1-8], %o4 + add %o0, 16, %o0 /* increment dst ptr */ + stx %o4, [%o0-8] +.Laligned_to_32: + andcc %o0, 32, %o3 /* four long words to move? */ + brz,pt %o3, .Laligned_to_64 + nop + ldx [%o1], %o4 + sub %o2, 32, %o2 + stx %o4, [%o0] + ldx [%o1+8], %o4 + stx %o4, [%o0+8] + ldx [%o1+16], %o4 + stx %o4, [%o0+16] + add %o1, 32, %o1 /* increment src ptr */ + ldx [%o1-8], %o4 + add %o0, 32, %o0 /* increment dst ptr */ + stx %o4, [%o0-8] +.Laligned_to_64: +/* Following test is included to avoid issues where existing executables + * incorrectly call memcpy with overlapping src and dest instead of memmove + * + * if ( (src ge dst) and (dst+len > src)) go to overlap case + * if ( (src lt dst) and (src+len > dst)) go to overlap case + */ + cmp %o1,%o0 + bge,pt %XCC, 1f + nop +/* src+len > dst? */ + add %o1, %o2, %o4 + cmp %o4, %o0 + bgt,pt %XCC, .Lmv_aligned_on_64 + nop + ba 2f + nop +1: +/* dst+len > src? */ + add %o0, %o2, %o4 + cmp %o4, %o1 + bgt,pt %XCC, .Lmv_aligned_on_64 + nop +2: +/* handle non-overlapped copies + * + * Using block init store (BIS) instructions to avoid fetching cache + * lines from memory. Use ST_CHUNK stores to first element of each cache + * line (similar to prefetching) to avoid overfilling STQ or miss buffers. + * Gives existing cache lines time to be moved out of L1/L2/L3 cache. + */ + andn %o2, 0x3f, %o5 /* %o5 is multiple of block size */ + and %o2, 0x3f, %o2 /* residue bytes in %o2 */ + +/* We use ASI_STBIMRU_P for the first store to each cache line + * followed by ASI_STBI_P (mark as LRU) for the last store. That + * mixed approach reduces the chances the cache line is removed + * before we finish setting it, while minimizing the effects on + * other cached values during a large memcpy + * + * Intermediate stores can be normal since first BIS activates the + * cache line in the L2 cache. + * + * ST_CHUNK batches up initial BIS operations for several cache lines + * to allow multiple requests to not be blocked by overflowing the + * the store miss buffer. Then the matching stores for all those + * BIS operations are executed. + */ + +.Lalign_loop: + cmp %o5, ST_CHUNK*64 + blu,pt %XCC, .Lalign_short + mov ST_CHUNK, %o3 + sllx %o3, 6, %g5 /* ST_CHUNK*64 */ + +.Lalign_loop_start: + prefetch [%o1 + (ALIGN_PRE * BLOCK_SIZE)], 21 + subcc %o3, 2, %o3 + ldx [%o1], %o4 + add %o1, 128, %o1 + EX_ST(STORE_ASI(%o4, %o0)) + add %o0, 64, %o0 + ldx [%o1-64], %o4 + EX_ST(STORE_ASI(%o4, %o0)) + add %o0, 64, %o0 + bgu,pt %XCC, .Lalign_loop_start + prefetch [%o1 + ((ALIGN_PRE-1) * BLOCK_SIZE)], 21 + + mov ST_CHUNK, %o3 + sub %o1, %g5, %o1 /* reset %o1 */ + sub %o0, %g5, %o0 /* reset %o0 */ + + sub %o0, 8, %o0 /* adjust %o0 for ASI alignment */ +.Lalign_loop_rest: + ldx [%o1+8],%o4 + add %o0, 64, %o0 + stx %o4, [%o0-48] + subcc %o3, 1, %o3 + ldx [%o1+16],%o4 + stx %o4, [%o0-40] + sub %o5, 64, %o5 + ldx [%o1+24],%o4 + stx %o4, [%o0-32] + ldx [%o1+32],%o4 + stx %o4, [%o0-24] + ldx [%o1+40],%o4 + stx %o4, [%o0-16] + ldx [%o1+48],%o4 + stx %o4, [%o0-8] + add %o1, 64, %o1 + ldx [%o1-8],%o4 + bgu,pt %XCC, .Lalign_loop_rest + EX_ST(STORE_INIT(%o4,%o0)) /* mark cache line as LRU */ + + mov ST_CHUNK, %o3 + cmp %o5, ST_CHUNK*64 + bgu,pt %XCC, .Lalign_loop_start + add %o0, 8, %o0 /* restore %o0 from ASI alignment */ + + cmp %o5, 0 + beq,pt %XCC, .Lalign_done + +/* no prefetches needed in these loops + * since we are within ALIGN_PRE of the end */ +.Lalign_short: + srl %o5, 6, %o3 +.Lalign_loop_short: + subcc %o3, 1, %o3 + ldx [%o1], %o4 + add %o1, 64, %o1 + EX_ST(STORE_ASI(%o4, %o0)) + bgu,pt %XCC, .Lalign_loop_short + add %o0, 64, %o0 + + sub %o1, %o5, %o1 /* reset %o1 */ + sub %o0, %o5, %o0 /* reset %o0 */ + + sub %o0, 8, %o0 /* adjust %o0 for ASI alignment */ +.Lalign_short_rest: + ldx [%o1+8],%o4 + add %o0, 64, %o0 + stx %o4, [%o0-48] + ldx [%o1+16],%o4 + subcc %o5, 64, %o5 + stx %o4, [%o0-40] + ldx [%o1+24],%o4 + stx %o4, [%o0-32] + ldx [%o1+32],%o4 + stx %o4, [%o0-24] + ldx [%o1+40],%o4 + stx %o4, [%o0-16] + ldx [%o1+48],%o4 + stx %o4, [%o0-8] + add %o1, 64, %o1 + ldx [%o1-8],%o4 + bgu,pt %XCC, .Lalign_short_rest + EX_ST(STORE_INIT(%o4,%o0)) /* mark cache line as LRU */ + + add %o0, 8, %o0 /* restore %o0 from ASI alignment */ + +.Lalign_done: + cmp %o2, 0 + membar #StoreStore + bne,pt %XCC, .Lmedl63 + subcc %o2, 63, %o2 /* adjust length to allow cc test */ + retl + mov EX_RETVAL(%g1), %o0 /* restore %o0 */ + + .align 16 + /* Dst is on 8 byte boundary; src is not; remaining cnt > SMALL_MAX */ + /* Since block load/store and BIS are not in use for unaligned data, + * no need to align dst on 64 byte cache line boundary */ +.Lunalignsetup: +.Lunalignrejoin: + rd %fprs, %g5 /* check for unused fp */ + /* if fprs.fef == 0, set it. + * Setting it when already set costs more than checking */ + andcc %g5, FPRS_FEF, %g5 /* test FEF, fprs.du = fprs.dl = 0 */ + bz,a %XCC, 1f + wr %g0, FPRS_FEF, %fprs /* fprs.fef = 1 */ +1: + andn %o2, 0x3f, %o5 /* %o5 is multiple of block size */ + and %o2, 0x3f, %o2 /* residue bytes in %o2 */ + cmp %o2, 8 /* Insure we do not load beyond */ + bgt,pt %XCC, .Lunalign_adjust /* end of source buffer */ + andn %o1, 0x7, %o4 /* %o4 has 8 byte aligned src addr */ + add %o2, 64, %o2 /* adjust to leave loop */ + sub %o5, 64, %o5 /* early if necessary */ +.Lunalign_adjust: + alignaddr %o1, %g0, %g0 /* generate %gsr */ + add %o1, %o5, %o1 /* advance %o1 to after blocks */ + ldd [%o4], %f0 +.Lunalign_loop: + prefetch [%o0 + (9 * BLOCK_SIZE)], 20 + ldd [%o4+8], %f2 + faligndata %f0, %f2, %f16 + ldd [%o4+16], %f4 + subcc %o5, BLOCK_SIZE, %o5 + std %f16, [%o0] + faligndata %f2, %f4, %f18 + ldd [%o4+24], %f6 + std %f18, [%o0+8] + faligndata %f4, %f6, %f20 + ldd [%o4+32], %f8 + std %f20, [%o0+16] + faligndata %f6, %f8, %f22 + ldd [%o4+40], %f10 + std %f22, [%o0+24] + faligndata %f8, %f10, %f24 + ldd [%o4+48], %f12 + std %f24, [%o0+32] + faligndata %f10, %f12, %f26 + ldd [%o4+56], %f14 + add %o4, BLOCK_SIZE, %o4 + std %f26, [%o0+40] + faligndata %f12, %f14, %f28 + ldd [%o4], %f0 + std %f28, [%o0+48] + faligndata %f14, %f0, %f30 + std %f30, [%o0+56] + add %o0, BLOCK_SIZE, %o0 + bgu,pt %XCC, .Lunalign_loop + prefetch [%o4 + (11 * BLOCK_SIZE)], 20 + + /* Handle trailing bytes, 64 to 127 + * Dest long word aligned, Src not long word aligned */ + cmp %o2, 15 + bleu,pt %XCC, .Lunalign_short + + andn %o2, 0x7, %o5 /* %o5 is multiple of 8 */ + and %o2, 0x7, %o2 /* residue bytes in %o2 */ + add %o2, 8, %o2 + sub %o5, 8, %o5 /* do not load past end of src */ + andn %o1, 0x7, %o4 /* %o4 has 8 byte aligned src addr */ + add %o1, %o5, %o1 /* move %o1 to after multiple of 8 */ + ldd [%o4], %f0 /* fetch partial word */ +.Lunalign_by8: + ldd [%o4+8], %f2 + add %o4, 8, %o4 + faligndata %f0, %f2, %f16 + subcc %o5, 8, %o5 + std %f16, [%o0] + fsrc2 %f2, %f0 + bgu,pt %XCC, .Lunalign_by8 + add %o0, 8, %o0 + +.Lunalign_short: /* restore fprs state */ + brnz,pt %g5, .Lsmallrest + nop + ba .Lsmallrest + wr %g5, %g0, %fprs +END(__memcpy_niagara7) + +#endif diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S index a3b69f9ef1..50b37af104 100644 --- a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S +++ b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S @@ -1,5 +1,5 @@ /* Copy SIZE bytes from SRC to DEST. For SUN4V Niagara. - Copyright (C) 2006-2016 Free Software Foundation, Inc. + Copyright (C) 2006-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by David S. Miller (davem@davemloft.net) diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S index 9b3e1651b1..91d9eb3221 100644 --- a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S +++ b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S @@ -1,5 +1,5 @@ /* Copy SIZE bytes from SRC to DEST. For SUN4V Niagara-2. - Copyright (C) 2007-2016 Free Software Foundation, Inc. + Copyright (C) 2007-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by David S. Miller (davem@davemloft.net) diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara4.S b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara4.S index 7234a7bf75..096a11cfd8 100644 --- a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara4.S +++ b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara4.S @@ -1,5 +1,5 @@ /* Copy SIZE bytes from SRC to DEST. For SUN4V Niagara-4. - Copyright (C) 2012-2016 Free Software Foundation, Inc. + Copyright (C) 2012-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by David S. Miller (davem@davemloft.net) diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy-ultra1.S b/sysdeps/sparc/sparc64/multiarch/memcpy-ultra1.S new file mode 100644 index 0000000000..8e0b3e2d48 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/memcpy-ultra1.S @@ -0,0 +1,33 @@ +/* Default SPARC64 memcpy implementation. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <sysdep.h> + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) +# undef weak_alias +# define weak_alias(x, y) +# undef libc_hidden_def +# define libc_hidden_def(name) + +# define memcpy __memcpy_ultra1 +# define __memcpy_large __memcpy_large_ultra1 +# define __mempcpy __mempcpy_ultra1 +# include <sysdeps/sparc/sparc64/memcpy.S> +#endif diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S b/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S index 5b00c35d44..41cd606f59 100644 --- a/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S +++ b/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S @@ -1,6 +1,6 @@ /* Copy SIZE bytes from SRC to DEST. For UltraSPARC-III. - Copyright (C) 2001-2016 Free Software Foundation, Inc. + Copyright (C) 2001-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by David S. Miller (davem@redhat.com) diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy.S b/sysdeps/sparc/sparc64/multiarch/memcpy.S deleted file mode 100644 index 328f62152b..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/memcpy.S +++ /dev/null @@ -1,167 +0,0 @@ -/* Multiple versions of memcpy - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2016 Free Software Foundation, Inc. - Contributed by David S. Miller (davem@davemloft.net) - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -#if IS_IN (libc) - .text -ENTRY(memcpy) - .type memcpy, @gnu_indirect_function -# ifdef SHARED - SETUP_PIC_REG_LEAF(o3, o5) -# endif - set HWCAP_SPARC_CRYPTO, %o1 - andcc %o0, %o1, %g0 - be 1f - andcc %o0, HWCAP_SPARC_N2, %g0 -# ifdef SHARED - sethi %gdop_hix22(__memcpy_niagara4), %o1 - xor %o1, %gdop_lox10(__memcpy_niagara4), %o1 -# else - set __memcpy_niagara4, %o1 -# endif - ba 10f - nop -1: be 1f - andcc %o0, HWCAP_SPARC_BLKINIT, %g0 -# ifdef SHARED - sethi %gdop_hix22(__memcpy_niagara2), %o1 - xor %o1, %gdop_lox10(__memcpy_niagara2), %o1 -# else - set __memcpy_niagara2, %o1 -# endif - ba 10f - nop -1: be 1f - andcc %o0, HWCAP_SPARC_ULTRA3, %g0 -# ifdef SHARED - sethi %gdop_hix22(__memcpy_niagara1), %o1 - xor %o1, %gdop_lox10(__memcpy_niagara1), %o1 -# else - set __memcpy_niagara1, %o1 -# endif - ba 10f - nop -1: be 9f - nop -# ifdef SHARED - sethi %gdop_hix22(__memcpy_ultra3), %o1 - xor %o1, %gdop_lox10(__memcpy_ultra3), %o1 -# else - set __memcpy_ultra3, %o1 -# endif - ba 10f - nop -9: -# ifdef SHARED - sethi %gdop_hix22(__memcpy_ultra1), %o1 - xor %o1, %gdop_lox10(__memcpy_ultra1), %o1 -# else - set __memcpy_ultra1, %o1 -# endif -10: -# ifdef SHARED - add %o3, %o1, %o1 -# endif - retl - mov %o1, %o0 -END(memcpy) - -ENTRY(__mempcpy) - .type __mempcpy, @gnu_indirect_function -# ifdef SHARED - SETUP_PIC_REG_LEAF(o3, o5) -# endif - set HWCAP_SPARC_CRYPTO, %o1 - andcc %o0, %o1, %g0 - be 1f - andcc %o0, HWCAP_SPARC_N2, %g0 -# ifdef SHARED - sethi %gdop_hix22(__mempcpy_niagara4), %o1 - xor %o1, %gdop_lox10(__mempcpy_niagara4), %o1 -# else - set __mempcpy_niagara4, %o1 -# endif - ba 10f - nop -1: be 1f - andcc %o0, HWCAP_SPARC_BLKINIT, %g0 -# ifdef SHARED - sethi %gdop_hix22(__mempcpy_niagara2), %o1 - xor %o1, %gdop_lox10(__mempcpy_niagara2), %o1 -# else - set __mempcpy_niagara2, %o1 -# endif - ba 10f - nop -1: be 1f - andcc %o0, HWCAP_SPARC_ULTRA3, %g0 -# ifdef SHARED - sethi %gdop_hix22(__mempcpy_niagara1), %o1 - xor %o1, %gdop_lox10(__mempcpy_niagara1), %o1 -# else - set __mempcpy_niagara1, %o1 -# endif - ba 10f - nop -1: be 9f - nop -# ifdef SHARED - sethi %gdop_hix22(__mempcpy_ultra3), %o1 - xor %o1, %gdop_lox10(__mempcpy_ultra3), %o1 -# else - set __mempcpy_ultra3, %o1 -# endif - ba 10f - nop -9: -# ifdef SHARED - sethi %gdop_hix22(__mempcpy_ultra1), %o1 - xor %o1, %gdop_lox10(__mempcpy_ultra1), %o1 -# else - set __mempcpy_ultra1, %o1 -# endif -10: -# ifdef SHARED - add %o3, %o1, %o1 -# endif - retl - mov %o1, %o0 -END(__mempcpy) - -libc_hidden_builtin_def (memcpy) - -libc_hidden_def (__mempcpy) -weak_alias (__mempcpy, mempcpy) -libc_hidden_builtin_def (mempcpy) - -#undef libc_hidden_builtin_def -#define libc_hidden_builtin_def(name) -#undef weak_alias -#define weak_alias(x, y) -#undef libc_hidden_def -#define libc_hidden_def(name) - -#define memcpy __memcpy_ultra1 -#define __mempcpy __mempcpy_ultra1 - -#endif - -#include "../memcpy.S" diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy.c b/sysdeps/sparc/sparc64/multiarch/memcpy.c new file mode 100644 index 0000000000..7adb2936c7 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/memcpy.c @@ -0,0 +1,33 @@ +/* Multiple versions of memcpy. SPARC64/Linux version. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define memcpy __redirect_memcpy +# include <string.h> +# undef memcpy + +# include <sparc-ifunc.h> + +# define SYMBOL_NAME memcpy +# include "ifunc-memcpy.h" + +sparc_libc_ifunc_redirected (__redirect_memcpy, memcpy, IFUNC_SELECTOR) + +sparc_ifunc_redirected_hidden_def (__redirect_memcpy, memcpy) +#endif diff --git a/sysdeps/sparc/sparc64/multiarch/memmove-ultra1.S b/sysdeps/sparc/sparc64/multiarch/memmove-ultra1.S new file mode 100644 index 0000000000..2ed85d92e6 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/memmove-ultra1.S @@ -0,0 +1,4 @@ +#define memmove __memmove_ultra1 +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) +#include <sysdeps/sparc/sparc64/memmove.S> diff --git a/sysdeps/sparc/sparc64/multiarch/memmove.c b/sysdeps/sparc/sparc64/multiarch/memmove.c new file mode 100644 index 0000000000..878d532cea --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/memmove.c @@ -0,0 +1,33 @@ +/* Multiple versions of memmove. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define memmove __redirect_memmove +# include <string.h> +# undef memmove + +# include <sparc-ifunc.h> + +# define SYMBOL_NAME memmove +# include "ifunc-memmove.h" + +sparc_libc_ifunc_redirected (__redirect_memmove, memmove, IFUNC_SELECTOR); + +sparc_ifunc_redirected_hidden_def (__redirect_memmove, memmove) +#endif diff --git a/sysdeps/sparc/sparc64/multiarch/mempcpy.c b/sysdeps/sparc/sparc64/multiarch/mempcpy.c new file mode 100644 index 0000000000..ab398c6b08 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/mempcpy.c @@ -0,0 +1,39 @@ +/* Multiple versions of mempcpy. SPARC64/Linux version. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define mempcpy __redirect_mempcpy +# define __mempcpy __redirect___mempcpy +# define NO_MEMPCPY_STPCPY_REDIRECT +# define __NO_STRING_INLINES +# include <string.h> +# undef mempcpy +# undef __mempcpy + +# include <sparc-ifunc.h> + +# define SYMBOL_NAME mempcpy +# include "ifunc-memcpy.h" + +sparc_libc_ifunc_redirected (__redirect_mempcpy, __mempcpy, IFUNC_SELECTOR) + +sparc_ifunc_redirected_hidden_def (__redirect___mempcpy, __mempcpy) +weak_alias (__mempcpy, mempcpy) +sparc_ifunc_redirected_hidden_def (__redirect_mempcpy, mempcpy) +#endif diff --git a/sysdeps/sparc/sparc64/multiarch/memset-niagara1.S b/sysdeps/sparc/sparc64/multiarch/memset-niagara1.S index fe3e09df73..8752b16f4a 100644 --- a/sysdeps/sparc/sparc64/multiarch/memset-niagara1.S +++ b/sysdeps/sparc/sparc64/multiarch/memset-niagara1.S @@ -1,5 +1,5 @@ /* Set a block of memory to some byte value. For SUN4V Niagara. - Copyright (C) 2006-2016 Free Software Foundation, Inc. + Copyright (C) 2006-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by David S. Miller (davem@davemloft.net) diff --git a/sysdeps/sparc/sparc64/multiarch/memset-niagara4.S b/sysdeps/sparc/sparc64/multiarch/memset-niagara4.S index 85ab05485f..2198463a27 100644 --- a/sysdeps/sparc/sparc64/multiarch/memset-niagara4.S +++ b/sysdeps/sparc/sparc64/multiarch/memset-niagara4.S @@ -1,5 +1,5 @@ /* Set a block of memory to some byte value. For SUN4V Niagara-4. - Copyright (C) 2012-2016 Free Software Foundation, Inc. + Copyright (C) 2012-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by David S. Miller (davem@davemloft.net) diff --git a/sysdeps/sparc/sparc64/multiarch/memset-niagara7.S b/sysdeps/sparc/sparc64/multiarch/memset-niagara7.S new file mode 100644 index 0000000000..77910c7b62 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/memset-niagara7.S @@ -0,0 +1,334 @@ +/* Set a block of memory to some byte value. For SUN4V M7. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifndef XCC +# define XCC xcc +#endif + .register %g2, #scratch + .register %g3, #scratch + +/* The algorithm is as follows : + * + * For small 7 or fewer bytes stores, bytes will be stored. + * + * For less than 32 bytes stores, align the address on 4 byte boundary. + * Then store as many 4-byte chunks, followed by trailing bytes. + * + * For sizes greater than 32 bytes, align the address on 8 byte boundary. + * if (count >= 64) { + * store 8-bytes chunks to align the address on 64 byte boundary + * if (value to be set is zero && count >= MIN_ZERO) { + * Using BIS stores, set the first long word of each + * 64-byte cache line to zero which will also clear the + * other seven long words of the cache line. + * } + * else if (count >= MIN_LOOP) { + * Using BIS stores, set the first long word of each of + * ST_CHUNK cache lines (64 bytes each) before the main + * loop is entered. + * In the main loop, continue pre-setting the first long + * word of each cache line ST_CHUNK lines in advance while + * setting the other seven long words (56 bytes) of each + * cache line until fewer than ST_CHUNK*64 bytes remain. + * Then set the remaining seven long words of each cache + * line that has already had its first long word set. + * } + * store remaining data in 64-byte chunks until less than + * 64 bytes remain. + * } + * Store as many 8-byte chunks, followed by trailing bytes. + * + * + * BIS = Block Init Store + * Doing the advance store of the first element of the cache line + * initiates the displacement of a cache line while only using a single + * instruction in the pipeline. That avoids various pipeline delays, + * such as filling the miss buffer. The performance effect is + * similar to prefetching for normal stores. + * The special case for zero fills runs faster and uses fewer instruction + * cycles than the normal memset loop. + * + * We only use BIS for memset of greater than MIN_LOOP bytes because a sequence + * BIS stores must be followed by a membar #StoreStore. The benefit of + * the BIS store must be balanced against the cost of the membar operation. + */ + +/* + * ASI_STBI_P marks the cache line as "least recently used" + * which means if many threads are active, it has a high chance + * of being pushed out of the cache between the first initializing + * store and the final stores. + * Thus, we use ASI_STBIMRU_P which marks the cache line as + * "most recently used" for all but the last store to the cache line. + */ + +#define ASI_BLK_INIT_QUAD_LDD_P 0xe2 +#define ASI_ST_BLK_INIT_MRU_P 0xf2 + +#define ASI_STBI_P ASI_BLK_INIT_QUAD_LDD_P +#define ASI_STBIMRU_P ASI_ST_BLK_INIT_MRU_P + +#define ST_CHUNK 24 /* multiple of 4 due to loop unrolling */ +#define MIN_LOOP (ST_CHUNK)*64 +#define MIN_ZERO 256 + +#define EX_ST(x) x +#define EX_RETVAL(x) x +#define STORE_ASI(src,addr) stxa src, [addr] ASI_STBIMRU_P +#define STORE_INIT(src,addr) stxa src, [addr] ASI_STBI_P + +#if IS_IN (libc) + + .text + .align 32 + +ENTRY(__bzero_niagara7) + /* bzero (dst, size) */ + mov %o1, %o2 + mov 0, %o1 + /* fall through into memset code */ +END(__bzero_niagara7) + +ENTRY(__memset_niagara7) + /* memset (src, c, size) */ + mov %o0, %o5 /* copy sp1 before using it */ + cmp %o2, 7 /* if small counts, just write bytes */ + bleu,pn %XCC, .Lwrchar + and %o1, 0xff, %o1 /* o1 is (char)c */ + + sll %o1, 8, %o3 + or %o1, %o3, %o1 /* now o1 has 2 bytes of c */ + sll %o1, 16, %o3 + cmp %o2, 32 + blu,pn %XCC, .Lwdalign + or %o1, %o3, %o1 /* now o1 has 4 bytes of c */ + + sllx %o1, 32, %o3 + or %o1, %o3, %o1 /* now o1 has 8 bytes of c */ + +.Ldbalign: + andcc %o5, 7, %o3 /* is sp1 aligned on a 8 byte bound? */ + bz,pt %XCC, .Lblkalign /* already long word aligned */ + sub %o3, 8, %o3 /* -(bytes till long word aligned) */ + + add %o2, %o3, %o2 /* update o2 with new count */ + /* Set -(%o3) bytes till sp1 long word aligned */ +1: stb %o1, [%o5] /* there is at least 1 byte to set */ + inccc %o3 /* byte clearing loop */ + bl,pt %XCC, 1b + inc %o5 + + /* Now sp1 is long word aligned (sp1 is found in %o5) */ +.Lblkalign: + cmp %o2, 64 /* check if there are 64 bytes to set */ + blu,pn %XCC, .Lwrshort + mov %o2, %o3 + + andcc %o5, 63, %o3 /* is sp1 block aligned? */ + bz,pt %XCC, .Lblkwr /* now block aligned */ + sub %o3, 64, %o3 /* o3 is -(bytes till block aligned) */ + add %o2, %o3, %o2 /* o2 is the remainder */ + + /* Store -(%o3) bytes till dst is block (64 byte) aligned. */ + /* Use long word stores. */ + /* Recall that dst is already long word aligned */ +1: + addcc %o3, 8, %o3 + stx %o1, [%o5] + bl,pt %XCC, 1b + add %o5, 8, %o5 + + /* Now sp1 is block aligned */ +.Lblkwr: + andn %o2, 63, %o4 /* calculate size of blocks in bytes */ + brz,pn %o1, .Lwrzero /* special case if c == 0 */ + and %o2, 63, %o3 /* %o3 = bytes left after blk stores */ + + cmp %o4, MIN_LOOP /* check for enough bytes to set */ + blu,pn %XCC, .Lshort_set /* to justify cost of membar */ + nop /* must be > pre-cleared lines */ + + /* initial cache-clearing stores */ + /* get store pipeline moving */ + +/* Primary memset loop for large memsets */ +.Lwr_loop: + mov ST_CHUNK, %g1 +.Lwr_loop_start: + subcc %g1, 4, %g1 + EX_ST(STORE_ASI(%o1,%o5)) + add %o5, 64, %o5 + EX_ST(STORE_ASI(%o1,%o5)) + add %o5, 64, %o5 + EX_ST(STORE_ASI(%o1,%o5)) + add %o5, 64, %o5 + EX_ST(STORE_ASI(%o1,%o5)) + bgu %XCC, .Lwr_loop_start + add %o5, 64, %o5 + + sub %o5, ST_CHUNK*64, %o5 /* reset %o5 */ + mov ST_CHUNK, %g1 + sub %o5, 8, %o5 /* adjust %o5 for ASI store */ + +.Lwr_loop_rest: + stx %o1,[%o5+8+8] + sub %o4, 64, %o4 + stx %o1,[%o5+16+8] + subcc %g1, 1, %g1 + stx %o1,[%o5+24+8] + stx %o1,[%o5+32+8] + stx %o1,[%o5+40+8] + add %o5, 64, %o5 + stx %o1,[%o5-8] + bgu %XCC, .Lwr_loop_rest + EX_ST(STORE_INIT(%o1,%o5)) + + add %o5, 8, %o5 /* restore %o5 offset */ + + /* If more than ST_CHUNK*64 bytes remain to set, continue */ + /* setting the first long word of each cache line in advance */ + /* to keep the store pipeline moving. */ + + cmp %o4, ST_CHUNK*64 + bge,pt %XCC, .Lwr_loop_start + mov ST_CHUNK, %g1 + + brz,a,pn %o4, .Lasi_done + nop + + sub %o5, 8, %o5 /* adjust %o5 for ASI store */ +.Lwr_loop_small: + add %o5, 8, %o5 /* adjust %o5 for ASI store */ + EX_ST(STORE_ASI(%o1,%o5)) + stx %o1,[%o5+8] + stx %o1,[%o5+16] + stx %o1,[%o5+24] + stx %o1,[%o5+32] + subcc %o4, 64, %o4 + stx %o1,[%o5+40] + add %o5, 56, %o5 + stx %o1,[%o5-8] + bgu,pt %XCC, .Lwr_loop_small + EX_ST(STORE_INIT(%o1,%o5)) + + ba .Lasi_done + add %o5, 8, %o5 /* restore %o5 offset */ + +/* Special case loop for zero fill memsets */ +/* For each 64 byte cache line, single STBI to first element */ +/* clears line */ +.Lwrzero: + cmp %o4, MIN_ZERO /* check if enough bytes to set */ + /* to pay %asi + membar cost */ + blu %XCC, .Lshort_set + nop + sub %o4, 256, %o4 + +.Lwrzero_loop: + mov 64, %g3 + EX_ST(STORE_INIT(%o1,%o5)) + subcc %o4, 256, %o4 + EX_ST(STORE_INIT(%o1,%o5+%g3)) + add %o5, 256, %o5 + sub %g3, 192, %g3 + EX_ST(STORE_INIT(%o1,%o5+%g3)) + add %g3, 64, %g3 + bge,pt %XCC, .Lwrzero_loop + EX_ST(STORE_INIT(%o1,%o5+%g3)) + add %o4, 256, %o4 + + brz,pn %o4, .Lbsi_done + nop +.Lwrzero_small: + EX_ST(STORE_INIT(%o1,%o5)) + subcc %o4, 64, %o4 + bgu,pt %XCC, .Lwrzero_small + add %o5, 64, %o5 + +.Lasi_done: +.Lbsi_done: + membar #StoreStore /* required by use of BSI */ + +.Lshort_set: + cmp %o4, 64 /* check if 64 bytes to set */ + blu %XCC, 5f + nop +4: /* set final blocks of 64 bytes */ + stx %o1, [%o5] + stx %o1, [%o5+8] + stx %o1, [%o5+16] + stx %o1, [%o5+24] + subcc %o4, 64, %o4 + stx %o1, [%o5+32] + stx %o1, [%o5+40] + add %o5, 64, %o5 + stx %o1, [%o5-16] + bgu,pt %XCC, 4b + stx %o1, [%o5-8] + +5: + /* Set the remaining long words */ +.Lwrshort: + subcc %o3, 8, %o3 /* Can we store any long words? */ + blu,pn %XCC, .Lwrchars + and %o2, 7, %o2 /* calc bytes left after long words */ +6: + subcc %o3, 8, %o3 + stx %o1, [%o5] /* store the long words */ + bgeu,pt %XCC, 6b + add %o5, 8, %o5 + +.Lwrchars: /* check for extra chars */ + brnz %o2, .Lwrfin + nop + retl + nop + +.Lwdalign: + andcc %o5, 3, %o3 /* is sp1 aligned on a word boundary */ + bz,pn %XCC, .Lwrword + andn %o2, 3, %o3 /* create word sized count in %o3 */ + + dec %o2 /* decrement count */ + stb %o1, [%o5] /* clear a byte */ + b .Lwdalign + inc %o5 /* next byte */ + +.Lwrword: + subcc %o3, 4, %o3 + st %o1, [%o5] /* 4-byte writing loop */ + bnz,pt %XCC, .Lwrword + add %o5, 4, %o5 + and %o2, 3, %o2 /* leftover count, if any */ + +.Lwrchar: + /* Set the remaining bytes, if any */ + brz %o2, .Lexit + nop +.Lwrfin: + deccc %o2 + stb %o1, [%o5] + bgu,pt %XCC, .Lwrfin + inc %o5 +.Lexit: + retl /* %o0 was preserved */ + nop +END(__memset_niagara7) +#endif diff --git a/sysdeps/sparc/sparc64/multiarch/memset-ultra1.S b/sysdeps/sparc/sparc64/multiarch/memset-ultra1.S new file mode 100644 index 0000000000..dd9d2c17cd --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/memset-ultra1.S @@ -0,0 +1,30 @@ +/* Default SPARC64 memset implementation. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <sysdep.h> + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) +# undef weak_alias +# define weak_alias(x, y) + +# define memset __memset_ultra1 +# define __bzero __bzero_ultra1 +# include <sysdeps/sparc/sparc64/memset.S> +#endif diff --git a/sysdeps/sparc/sparc64/multiarch/memset.S b/sysdeps/sparc/sparc64/multiarch/memset.S deleted file mode 100644 index bd0e160d70..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/memset.S +++ /dev/null @@ -1,124 +0,0 @@ -/* Multiple versions of memset and bzero - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2016 Free Software Foundation, Inc. - Contributed by David S. Miller (davem@davemloft.net) - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -#if IS_IN (libc) - .text -ENTRY(memset) - .type memset, @gnu_indirect_function -# ifdef SHARED - SETUP_PIC_REG_LEAF(o3, o5) -# endif - set HWCAP_SPARC_CRYPTO, %o1 - andcc %o0, %o1, %g0 - be 1f - andcc %o0, HWCAP_SPARC_BLKINIT, %g0 -# ifdef SHARED - sethi %gdop_hix22(__memset_niagara4), %o1 - xor %o1, %gdop_lox10(__memset_niagara4), %o1 -# else - set __memset_niagara4, %o1 -# endif - ba 10f - nop -1: be 9f - nop -# ifdef SHARED - sethi %gdop_hix22(__memset_niagara1), %o1 - xor %o1, %gdop_lox10(__memset_niagara1), %o1 -# else - set __memset_niagara1, %o1 -# endif - ba 10f - nop -9: -# ifdef SHARED - sethi %gdop_hix22(__memset_ultra1), %o1 - xor %o1, %gdop_lox10(__memset_ultra1), %o1 -# else - set __memset_ultra1, %o1 -# endif -10: -# ifdef SHARED - add %o3, %o1, %o1 -# endif - retl - mov %o1, %o0 -END(memset) - -ENTRY(__bzero) - .type bzero, @gnu_indirect_function -# ifdef SHARED - SETUP_PIC_REG_LEAF(o3, o5) -# endif - set HWCAP_SPARC_CRYPTO, %o1 - andcc %o0, %o1, %g0 - be 1f - andcc %o0, HWCAP_SPARC_BLKINIT, %g0 -# ifdef SHARED - sethi %gdop_hix22(__bzero_niagara4), %o1 - xor %o1, %gdop_lox10(__bzero_niagara4), %o1 -# else - set __bzero_niagara4, %o1 -# endif - ba 10f - nop -1: be 9f - nop -# ifdef SHARED - sethi %gdop_hix22(__bzero_niagara1), %o1 - xor %o1, %gdop_lox10(__bzero_niagara1), %o1 -# else - set __bzero_niagara1, %o1 -# endif - ba 10f - nop -9: -# ifdef SHARED - sethi %gdop_hix22(__bzero_ultra1), %o1 - xor %o1, %gdop_lox10(__bzero_ultra1), %o1 -# else - set __bzero_ultra1, %o1 -# endif -10: -# ifdef SHARED - add %o3, %o1, %o1 -# endif - retl - mov %o1, %o0 -END(__bzero) - -weak_alias (__bzero, bzero) - -# undef weak_alias -# define weak_alias(a, b) - -libc_hidden_builtin_def (memset) - -#undef libc_hidden_builtin_def -#define libc_hidden_builtin_def(name) - -#define memset __memset_ultra1 -#define __bzero __bzero_ultra1 - -#endif - -#include "../memset.S" diff --git a/sysdeps/sparc/sparc64/multiarch/memset.c b/sysdeps/sparc/sparc64/multiarch/memset.c new file mode 100644 index 0000000000..c2920c7df7 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/memset.c @@ -0,0 +1,33 @@ +/* Multiple versions of memset. SPARC64/Linux version. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define memset __redirect_memset +# include <string.h> +# undef memset + +# include <sparc-ifunc.h> + +# define SYMBOL_NAME memset +# include "ifunc-memset.h" + +sparc_libc_ifunc_redirected (__redirect_memset, memset, IFUNC_SELECTOR) +sparc_ifunc_redirected_hidden_def (__redirect_memset, memset) + +#endif diff --git a/sysdeps/sparc/sparc64/multiarch/mul_1-generic.S b/sysdeps/sparc/sparc64/multiarch/mul_1-generic.S new file mode 100644 index 0000000000..f1b7e6026a --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/mul_1-generic.S @@ -0,0 +1,2 @@ +#define __mpn_mul_1 __mpn_mul_1_generic +#include <sysdeps/sparc/sparc64/mul_1.S> diff --git a/sysdeps/sparc/sparc64/multiarch/mul_1-vis3.S b/sysdeps/sparc/sparc64/multiarch/mul_1-vis3.S index d2ddd110b0..79452919cc 100644 --- a/sysdeps/sparc/sparc64/multiarch/mul_1-vis3.S +++ b/sysdeps/sparc/sparc64/multiarch/mul_1-vis3.S @@ -1,7 +1,7 @@ ! SPARC v9 64-bit VIS3 __mpn_mul_1 -- Multiply a limb vector with a single ! limb and store the product in a second limb vector. ! -! Copyright (C) 2013-2016 Free Software Foundation, Inc. +! Copyright (C) 2013-2018 Free Software Foundation, Inc. ! This file is part of the GNU C Library. ! Contributed by David S. Miller <davem@davemloft.net> ! diff --git a/sysdeps/sparc/sparc64/multiarch/mul_1.S b/sysdeps/sparc/sparc64/multiarch/mul_1.S deleted file mode 100644 index 0a1fbdbc51..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/mul_1.S +++ /dev/null @@ -1,56 +0,0 @@ -/* Multiple versions of mul_1 - - Copyright (C) 2013-2016 Free Software Foundation, Inc. - Contributed by David S. Miller (davem@davemloft.net) - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -ENTRY(__mpn_mul_1) - .type __mpn_mul_1, @gnu_indirect_function -# ifdef SHARED - SETUP_PIC_REG_LEAF(o3, o5) -# endif - set HWCAP_SPARC_VIS3, %o1 - andcc %o0, %o1, %g0 - be 1f - nop -# ifdef SHARED - sethi %gdop_hix22(__mpn_mul_1_vis3), %o1 - xor %o1, %gdop_lox10(__mpn_mul_1_vis3), %o1 -# else - set __mpn_mul_1_vis3, %o1 -# endif - ba 10f - nop -1: -# ifdef SHARED - sethi %gdop_hix22(__mpn_mul_1_generic), %o1 - xor %o1, %gdop_lox10(__mpn_mul_1_generic), %o1 -# else - set __mpn_mul_1_generic, %o1 -# endif -10: -# ifdef SHARED - add %o3, %o1, %o1 -# endif - retl - mov %o1, %o0 -END(__mpn_mul_1) - -#define __mpn_mul_1 __mpn_mul_1_generic -#include "../mul_1.S" diff --git a/sysdeps/sparc/sparc64/multiarch/mul_1.c b/sysdeps/sparc/sparc64/multiarch/mul_1.c new file mode 100644 index 0000000000..0b8d0cf0da --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/mul_1.c @@ -0,0 +1,28 @@ +/* __mpn_mul_1 ifunc resolver, Linux/sparc64 version. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <gmp.h> +#include <sparc-ifunc.h> + +extern __typeof (mpn_mul_1) __mpn_mul_1_vis3 attribute_hidden; +extern __typeof (mpn_mul_1) __mpn_mul_1_generic attribute_hidden; + +sparc_libm_ifunc (__mpn_mul_1, + hwcap & HWCAP_SPARC_VIS3 + ? __mpn_mul_1_vis3 + : __mpn_mul_1_generic) diff --git a/sysdeps/sparc/sparc64/multiarch/rtld-memmove.c b/sysdeps/sparc/sparc64/multiarch/rtld-memmove.c new file mode 100644 index 0000000000..e6d9a5c686 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/rtld-memmove.c @@ -0,0 +1 @@ +#include <sysdeps/sparc/sparc64/rtld-memmove.c> diff --git a/sysdeps/sparc/sparc64/multiarch/sha256-block.c b/sysdeps/sparc/sparc64/multiarch/sha256-block.c index 79966b93d7..9d65315a5a 100644 --- a/sysdeps/sparc/sparc64/multiarch/sha256-block.c +++ b/sysdeps/sparc/sparc64/multiarch/sha256-block.c @@ -1,12 +1,12 @@ #include <sparc-ifunc.h> -#define sha256_process_block sha256_process_block_generic -extern void sha256_process_block_generic (const void *buffer, size_t len, - struct sha256_ctx *ctx); +#define __sha256_process_block __sha256_process_block_generic +extern void __sha256_process_block_generic (const void *buffer, size_t len, + struct sha256_ctx *ctx); #include <crypt/sha256-block.c> -#undef sha256_process_block +#undef __sha256_process_block extern void __sha256_process_block_crop (const void *buffer, size_t len, struct sha256_ctx *ctx); @@ -25,6 +25,8 @@ static bool cpu_supports_sha256(int hwcap) return false; } -extern void sha256_process_block (const void *buffer, size_t len, - struct sha256_ctx *ctx); -sparc_libc_ifunc(sha256_process_block, cpu_supports_sha256(hwcap) ? __sha256_process_block_crop : sha256_process_block_generic); +extern void __sha256_process_block (const void *buffer, size_t len, + struct sha256_ctx *ctx); +sparc_libc_ifunc (__sha256_process_block, + cpu_supports_sha256(hwcap) ? __sha256_process_block_crop + : __sha256_process_block_generic); diff --git a/sysdeps/sparc/sparc64/multiarch/sha256-crop.S b/sysdeps/sparc/sparc64/multiarch/sha256-crop.S index 55186780eb..0f07b8d8a2 100644 --- a/sysdeps/sparc/sparc64/multiarch/sha256-crop.S +++ b/sysdeps/sparc/sparc64/multiarch/sha256-crop.S @@ -1,5 +1,5 @@ /* SHA256 using sparc crypto opcodes. - Copyright (C) 2012-2016 Free Software Foundation, Inc. + Copyright (C) 2012-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by David S. Miller (davem@davemloft.net) diff --git a/sysdeps/sparc/sparc64/multiarch/sha512-block.c b/sysdeps/sparc/sparc64/multiarch/sha512-block.c index 0d1c3dd6d8..2863e05d09 100644 --- a/sysdeps/sparc/sparc64/multiarch/sha512-block.c +++ b/sysdeps/sparc/sparc64/multiarch/sha512-block.c @@ -1,12 +1,12 @@ #include <sparc-ifunc.h> -#define sha512_process_block sha512_process_block_generic -extern void sha512_process_block_generic (const void *buffer, size_t len, - struct sha512_ctx *ctx); +#define __sha512_process_block __sha512_process_block_generic +extern void __sha512_process_block_generic (const void *buffer, size_t len, + struct sha512_ctx *ctx); #include <crypt/sha512-block.c> -#undef sha512_process_block +#undef __sha512_process_block extern void __sha512_process_block_crop (const void *buffer, size_t len, struct sha512_ctx *ctx); @@ -25,6 +25,8 @@ static bool cpu_supports_sha512(int hwcap) return false; } -extern void sha512_process_block (const void *buffer, size_t len, - struct sha512_ctx *ctx); -sparc_libc_ifunc(sha512_process_block, cpu_supports_sha512(hwcap) ? __sha512_process_block_crop : sha512_process_block_generic); +extern void __sha512_process_block (const void *buffer, size_t len, + struct sha512_ctx *ctx); +sparc_libc_ifunc (__sha512_process_block, + cpu_supports_sha512(hwcap) ? __sha512_process_block_crop + : __sha512_process_block_generic); diff --git a/sysdeps/sparc/sparc64/multiarch/sha512-crop.S b/sysdeps/sparc/sparc64/multiarch/sha512-crop.S index c08a580e0c..f22eef3206 100644 --- a/sysdeps/sparc/sparc64/multiarch/sha512-crop.S +++ b/sysdeps/sparc/sparc64/multiarch/sha512-crop.S @@ -1,5 +1,5 @@ /* SHA512 using sparc crypto opcodes. - Copyright (C) 2012-2016 Free Software Foundation, Inc. + Copyright (C) 2012-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by David S. Miller (davem@davemloft.net) diff --git a/sysdeps/sparc/sparc64/multiarch/sub_n-generic.S b/sysdeps/sparc/sparc64/multiarch/sub_n-generic.S new file mode 100644 index 0000000000..7cece934a7 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/sub_n-generic.S @@ -0,0 +1,2 @@ +#define __mpn_sub_n __mpn_sub_n_generic +#include <sysdeps/sparc/sparc64/sub_n.S> diff --git a/sysdeps/sparc/sparc64/multiarch/sub_n-vis3.S b/sysdeps/sparc/sparc64/multiarch/sub_n-vis3.S index cc659ed7f4..b71c93d36d 100644 --- a/sysdeps/sparc/sparc64/multiarch/sub_n-vis3.S +++ b/sysdeps/sparc/sparc64/multiarch/sub_n-vis3.S @@ -1,7 +1,7 @@ ! SPARC v9 64-bit VIS3 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 ! and store difference in a third limb vector. ! -! Copyright (C) 2013-2016 Free Software Foundation, Inc. +! Copyright (C) 2013-2018 Free Software Foundation, Inc. ! This file is part of the GNU C Library. ! Contributed by David S. Miller <davem@davemloft.net> ! diff --git a/sysdeps/sparc/sparc64/multiarch/sub_n.S b/sysdeps/sparc/sparc64/multiarch/sub_n.S deleted file mode 100644 index f69d909614..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/sub_n.S +++ /dev/null @@ -1,56 +0,0 @@ -/* Multiple versions of sub_n - - Copyright (C) 2013-2016 Free Software Foundation, Inc. - Contributed by David S. Miller (davem@davemloft.net) - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -ENTRY(__mpn_sub_n) - .type __mpn_sub_n, @gnu_indirect_function -# ifdef SHARED - SETUP_PIC_REG_LEAF(o3, o5) -# endif - set HWCAP_SPARC_VIS3, %o1 - andcc %o0, %o1, %g0 - be 1f - nop -# ifdef SHARED - sethi %gdop_hix22(__mpn_sub_n_vis3), %o1 - xor %o1, %gdop_lox10(__mpn_sub_n_vis3), %o1 -# else - set __mpn_sub_n_vis3, %o1 -# endif - ba 10f - nop -1: -# ifdef SHARED - sethi %gdop_hix22(__mpn_sub_n_generic), %o1 - xor %o1, %gdop_lox10(__mpn_sub_n_generic), %o1 -# else - set __mpn_sub_n_generic, %o1 -# endif -10: -# ifdef SHARED - add %o3, %o1, %o1 -# endif - retl - mov %o1, %o0 -END(__mpn_sub_n) - -#define __mpn_sub_n __mpn_sub_n_generic -#include "../sub_n.S" diff --git a/sysdeps/sparc/sparc64/multiarch/sub_n.c b/sysdeps/sparc/sparc64/multiarch/sub_n.c new file mode 100644 index 0000000000..2c1f428932 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/sub_n.c @@ -0,0 +1,28 @@ +/* __mpn_sub_n ifunc resolver, Linux/sparc64 version. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <gmp.h> +#include <sparc-ifunc.h> + +extern __typeof (mpn_sub_n) __mpn_sub_n_vis3 attribute_hidden; +extern __typeof (mpn_sub_n) __mpn_sub_n_generic attribute_hidden; + +sparc_libm_ifunc (__mpn_sub_n, + hwcap & HWCAP_SPARC_VIS3 + ? __mpn_sub_n_vis3 + : __mpn_sub_n_generic) diff --git a/sysdeps/sparc/sparc64/multiarch/submul_1-generic.S b/sysdeps/sparc/sparc64/multiarch/submul_1-generic.S new file mode 100644 index 0000000000..4c1536023d --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/submul_1-generic.S @@ -0,0 +1,2 @@ +#define __mpn_submul_1 __mpn_submul_1_generic +#include <sysdeps/sparc/sparc64/submul_1.S> diff --git a/sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S b/sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S index e92c73e912..823f90afdd 100644 --- a/sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S +++ b/sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S @@ -1,7 +1,7 @@ ! SPARC v9 64-bit VIS3 __mpn_submul_1 -- Multiply a limb vector with a ! limb and subtract the result from a second limb vector. ! -! Copyright (C) 2013-2016 Free Software Foundation, Inc. +! Copyright (C) 2013-2018 Free Software Foundation, Inc. ! This file is part of the GNU C Library. ! Contributed by David S. Miller <davem@davemloft.net> ! diff --git a/sysdeps/sparc/sparc64/multiarch/submul_1.S b/sysdeps/sparc/sparc64/multiarch/submul_1.S deleted file mode 100644 index f0d9f2ffe3..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/submul_1.S +++ /dev/null @@ -1,56 +0,0 @@ -/* Multiple versions of submul_1 - - Copyright (C) 2013-2016 Free Software Foundation, Inc. - Contributed by David S. Miller (davem@davemloft.net) - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -ENTRY(__mpn_submul_1) - .type __mpn_submul_1, @gnu_indirect_function -# ifdef SHARED - SETUP_PIC_REG_LEAF(o3, o5) -# endif - set HWCAP_SPARC_VIS3, %o1 - andcc %o0, %o1, %g0 - be 1f - nop -# ifdef SHARED - sethi %gdop_hix22(__mpn_submul_1_vis3), %o1 - xor %o1, %gdop_lox10(__mpn_submul_1_vis3), %o1 -# else - set __mpn_submul_1_vis3, %o1 -# endif - ba 10f - nop -1: -# ifdef SHARED - sethi %gdop_hix22(__mpn_submul_1_generic), %o1 - xor %o1, %gdop_lox10(__mpn_submul_1_generic), %o1 -# else - set __mpn_submul_1_generic, %o1 -# endif -10: -# ifdef SHARED - add %o3, %o1, %o1 -# endif - retl - mov %o1, %o0 -END(__mpn_submul_1) - -#define __mpn_submul_1 __mpn_submul_1_generic -#include "../submul_1.S" diff --git a/sysdeps/sparc/sparc64/multiarch/submul_1.c b/sysdeps/sparc/sparc64/multiarch/submul_1.c new file mode 100644 index 0000000000..6934c6915a --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/submul_1.c @@ -0,0 +1,28 @@ +/* __mpn_submul_1 ifunc resolver, Linux/sparc64 version. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <gmp.h> +#include <sparc-ifunc.h> + +extern __typeof (mpn_submul_1) __mpn_submul_1_vis3 attribute_hidden; +extern __typeof (mpn_submul_1) __mpn_submul_1_generic attribute_hidden; + +sparc_libm_ifunc (__mpn_submul_1, + hwcap & HWCAP_SPARC_VIS3 + ? __mpn_submul_1_vis3 + : __mpn_submul_1_generic) |