summaryrefslogtreecommitdiff
path: root/sysdeps/sparc/sparc64/multiarch
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/sparc/sparc64/multiarch')
-rw-r--r--sysdeps/sparc/sparc64/multiarch/Makefile8
-rw-r--r--sysdeps/sparc/sparc64/multiarch/add_n-generic.S2
-rw-r--r--sysdeps/sparc/sparc64/multiarch/add_n-vis3.S2
-rw-r--r--sysdeps/sparc/sparc64/multiarch/add_n.S56
-rw-r--r--sysdeps/sparc/sparc64/multiarch/add_n.c28
-rw-r--r--sysdeps/sparc/sparc64/multiarch/addmul_1-generic.S2
-rw-r--r--sysdeps/sparc/sparc64/multiarch/addmul_1-vis3.S2
-rw-r--r--sysdeps/sparc/sparc64/multiarch/addmul_1.S56
-rw-r--r--sysdeps/sparc/sparc64/multiarch/addmul_1.c28
-rw-r--r--sysdeps/sparc/sparc64/multiarch/bzero.c33
-rw-r--r--sysdeps/sparc/sparc64/multiarch/ifunc-impl-list.c15
-rw-r--r--sysdeps/sparc/sparc64/multiarch/ifunc-memcpy.h43
-rw-r--r--sysdeps/sparc/sparc64/multiarch/ifunc-memmove.h31
-rw-r--r--sysdeps/sparc/sparc64/multiarch/ifunc-memset.h37
-rw-r--r--sysdeps/sparc/sparc64/multiarch/md5-crop.S2
-rw-r--r--sysdeps/sparc/sparc64/multiarch/memcpy-memmove-niagara7.S980
-rw-r--r--sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S2
-rw-r--r--sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S2
-rw-r--r--sysdeps/sparc/sparc64/multiarch/memcpy-niagara4.S2
-rw-r--r--sysdeps/sparc/sparc64/multiarch/memcpy-ultra1.S33
-rw-r--r--sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S2
-rw-r--r--sysdeps/sparc/sparc64/multiarch/memcpy.S167
-rw-r--r--sysdeps/sparc/sparc64/multiarch/memcpy.c33
-rw-r--r--sysdeps/sparc/sparc64/multiarch/memmove-ultra1.S4
-rw-r--r--sysdeps/sparc/sparc64/multiarch/memmove.c33
-rw-r--r--sysdeps/sparc/sparc64/multiarch/mempcpy.c39
-rw-r--r--sysdeps/sparc/sparc64/multiarch/memset-niagara1.S2
-rw-r--r--sysdeps/sparc/sparc64/multiarch/memset-niagara4.S2
-rw-r--r--sysdeps/sparc/sparc64/multiarch/memset-niagara7.S334
-rw-r--r--sysdeps/sparc/sparc64/multiarch/memset-ultra1.S30
-rw-r--r--sysdeps/sparc/sparc64/multiarch/memset.S124
-rw-r--r--sysdeps/sparc/sparc64/multiarch/memset.c33
-rw-r--r--sysdeps/sparc/sparc64/multiarch/mul_1-generic.S2
-rw-r--r--sysdeps/sparc/sparc64/multiarch/mul_1-vis3.S2
-rw-r--r--sysdeps/sparc/sparc64/multiarch/mul_1.S56
-rw-r--r--sysdeps/sparc/sparc64/multiarch/mul_1.c28
-rw-r--r--sysdeps/sparc/sparc64/multiarch/rtld-memmove.c1
-rw-r--r--sysdeps/sparc/sparc64/multiarch/sha256-block.c16
-rw-r--r--sysdeps/sparc/sparc64/multiarch/sha256-crop.S2
-rw-r--r--sysdeps/sparc/sparc64/multiarch/sha512-block.c16
-rw-r--r--sysdeps/sparc/sparc64/multiarch/sha512-crop.S2
-rw-r--r--sysdeps/sparc/sparc64/multiarch/sub_n-generic.S2
-rw-r--r--sysdeps/sparc/sparc64/multiarch/sub_n-vis3.S2
-rw-r--r--sysdeps/sparc/sparc64/multiarch/sub_n.S56
-rw-r--r--sysdeps/sparc/sparc64/multiarch/sub_n.c28
-rw-r--r--sysdeps/sparc/sparc64/multiarch/submul_1-generic.S2
-rw-r--r--sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S2
-rw-r--r--sysdeps/sparc/sparc64/multiarch/submul_1.S56
-rw-r--r--sysdeps/sparc/sparc64/multiarch/submul_1.c28
49 files changed, 1866 insertions, 602 deletions
diff --git a/sysdeps/sparc/sparc64/multiarch/Makefile b/sysdeps/sparc/sparc64/multiarch/Makefile
index 55b757f9ad..eaf758e7aa 100644
--- a/sysdeps/sparc/sparc64/multiarch/Makefile
+++ b/sysdeps/sparc/sparc64/multiarch/Makefile
@@ -8,11 +8,15 @@ endif
ifeq ($(subdir),string)
sysdep_routines += memcpy-ultra3 memcpy-niagara1 memcpy-niagara2 \
- memset-niagara1 memcpy-niagara4 memset-niagara4
+ memset-niagara1 memcpy-niagara4 memset-niagara4 \
+ memcpy-ultra1 memset-ultra1 memcpy-memmove-niagara7 \
+ memmove-ultra1 memset-niagara7
endif
ifeq ($(subdir),stdlib)
-sysdep_routines += mul_1-vis3 addmul_1-vis3 submul_1-vis3 add_n-vis3 sub_n-vis3
+sysdep_routines += mul_1-vis3 mul_1-generic addmul_1-vis3 addmul_1-generic \
+ submul_1-vis3 submul_1-generic add_n-vis3 add_n-generic \
+ sub_n-vis3 sub_n-generic
endif
ifeq ($(subdir),math)
diff --git a/sysdeps/sparc/sparc64/multiarch/add_n-generic.S b/sysdeps/sparc/sparc64/multiarch/add_n-generic.S
new file mode 100644
index 0000000000..a16e7091b4
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/add_n-generic.S
@@ -0,0 +1,2 @@
+#define __mpn_add_n __mpn_add_n_generic
+#include <sysdeps/sparc/sparc64/add_n.S>
diff --git a/sysdeps/sparc/sparc64/multiarch/add_n-vis3.S b/sysdeps/sparc/sparc64/multiarch/add_n-vis3.S
index 0fda45a208..b4f1ef5181 100644
--- a/sysdeps/sparc/sparc64/multiarch/add_n-vis3.S
+++ b/sysdeps/sparc/sparc64/multiarch/add_n-vis3.S
@@ -1,7 +1,7 @@
! SPARC v9 64-bit VIS3 __mpn_add_n -- Add two limb vectors of the same length > 0 and
! store sum in a third limb vector.
!
-! Copyright (C) 2013-2016 Free Software Foundation, Inc.
+! Copyright (C) 2013-2018 Free Software Foundation, Inc.
! This file is part of the GNU C Library.
! Contributed by David S. Miller <davem@davemloft.net>
!
diff --git a/sysdeps/sparc/sparc64/multiarch/add_n.S b/sysdeps/sparc/sparc64/multiarch/add_n.S
deleted file mode 100644
index 8e67d75921..0000000000
--- a/sysdeps/sparc/sparc64/multiarch/add_n.S
+++ /dev/null
@@ -1,56 +0,0 @@
-/* Multiple versions of add_n
-
- Copyright (C) 2013-2016 Free Software Foundation, Inc.
- Contributed by David S. Miller (davem@davemloft.net)
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-ENTRY(__mpn_add_n)
- .type __mpn_add_n, @gnu_indirect_function
-# ifdef SHARED
- SETUP_PIC_REG_LEAF(o3, o5)
-# endif
- set HWCAP_SPARC_VIS3, %o1
- andcc %o0, %o1, %g0
- be 1f
- nop
-# ifdef SHARED
- sethi %gdop_hix22(__mpn_add_n_vis3), %o1
- xor %o1, %gdop_lox10(__mpn_add_n_vis3), %o1
-# else
- set __mpn_add_n_vis3, %o1
-# endif
- ba 10f
- nop
-1:
-# ifdef SHARED
- sethi %gdop_hix22(__mpn_add_n_generic), %o1
- xor %o1, %gdop_lox10(__mpn_add_n_generic), %o1
-# else
- set __mpn_add_n_generic, %o1
-# endif
-10:
-# ifdef SHARED
- add %o3, %o1, %o1
-# endif
- retl
- mov %o1, %o0
-END(__mpn_add_n)
-
-#define __mpn_add_n __mpn_add_n_generic
-#include "../add_n.S"
diff --git a/sysdeps/sparc/sparc64/multiarch/add_n.c b/sysdeps/sparc/sparc64/multiarch/add_n.c
new file mode 100644
index 0000000000..47b0d0e3bc
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/add_n.c
@@ -0,0 +1,28 @@
+/* __mpn_add_n ifunc resolver, Linux/sparc64 version.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <gmp.h>
+#include <sparc-ifunc.h>
+
+extern __typeof (mpn_add_n) __mpn_add_n_vis3 attribute_hidden;
+extern __typeof (mpn_add_n) __mpn_add_n_generic attribute_hidden;
+
+sparc_libm_ifunc (__mpn_add_n,
+ hwcap & HWCAP_SPARC_VIS3
+ ? __mpn_add_n_vis3
+ : __mpn_add_n_generic)
diff --git a/sysdeps/sparc/sparc64/multiarch/addmul_1-generic.S b/sysdeps/sparc/sparc64/multiarch/addmul_1-generic.S
new file mode 100644
index 0000000000..5bf1da7fde
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/addmul_1-generic.S
@@ -0,0 +1,2 @@
+#define __mpn_addmul_1 __mpn_addmul_1_generic
+#include <sysdeps/sparc/sparc64/addmul_1.S>
diff --git a/sysdeps/sparc/sparc64/multiarch/addmul_1-vis3.S b/sysdeps/sparc/sparc64/multiarch/addmul_1-vis3.S
index 9a2f6acd9c..b5e808bddd 100644
--- a/sysdeps/sparc/sparc64/multiarch/addmul_1-vis3.S
+++ b/sysdeps/sparc/sparc64/multiarch/addmul_1-vis3.S
@@ -1,7 +1,7 @@
! SPARC v9 64-bit VIS3 __mpn_addmul_1 -- Multiply a limb vector with a
! limb and add the result to a second limb vector.
!
-! Copyright (C) 2013-2016 Free Software Foundation, Inc.
+! Copyright (C) 2013-2018 Free Software Foundation, Inc.
! This file is part of the GNU C Library.
! Contributed by David S. Miller <davem@davemloft.net>
!
diff --git a/sysdeps/sparc/sparc64/multiarch/addmul_1.S b/sysdeps/sparc/sparc64/multiarch/addmul_1.S
deleted file mode 100644
index 4763edd457..0000000000
--- a/sysdeps/sparc/sparc64/multiarch/addmul_1.S
+++ /dev/null
@@ -1,56 +0,0 @@
-/* Multiple versions of addmul_1
-
- Copyright (C) 2013-2016 Free Software Foundation, Inc.
- Contributed by David S. Miller (davem@davemloft.net)
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-ENTRY(__mpn_addmul_1)
- .type __mpn_addmul_1, @gnu_indirect_function
-# ifdef SHARED
- SETUP_PIC_REG_LEAF(o3, o5)
-# endif
- set HWCAP_SPARC_VIS3, %o1
- andcc %o0, %o1, %g0
- be 1f
- nop
-# ifdef SHARED
- sethi %gdop_hix22(__mpn_addmul_1_vis3), %o1
- xor %o1, %gdop_lox10(__mpn_addmul_1_vis3), %o1
-# else
- set __mpn_addmul_1_vis3, %o1
-# endif
- ba 10f
- nop
-1:
-# ifdef SHARED
- sethi %gdop_hix22(__mpn_addmul_1_generic), %o1
- xor %o1, %gdop_lox10(__mpn_addmul_1_generic), %o1
-# else
- set __mpn_addmul_1_generic, %o1
-# endif
-10:
-# ifdef SHARED
- add %o3, %o1, %o1
-# endif
- retl
- mov %o1, %o0
-END(__mpn_addmul_1)
-
-#define __mpn_addmul_1 __mpn_addmul_1_generic
-#include "../addmul_1.S"
diff --git a/sysdeps/sparc/sparc64/multiarch/addmul_1.c b/sysdeps/sparc/sparc64/multiarch/addmul_1.c
new file mode 100644
index 0000000000..afaeca870d
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/addmul_1.c
@@ -0,0 +1,28 @@
+/* __mpn_addmul_1 ifunc resolver, Linux/sparc64 version.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <gmp.h>
+#include <sparc-ifunc.h>
+
+extern __typeof (mpn_addmul_1) __mpn_addmul_1_vis3 attribute_hidden;
+extern __typeof (mpn_addmul_1) __mpn_addmul_1_generic attribute_hidden;
+
+sparc_libm_ifunc (__mpn_addmul_1,
+ hwcap & HWCAP_SPARC_VIS3
+ ? __mpn_addmul_1_vis3
+ : __mpn_addmul_1_generic)
diff --git a/sysdeps/sparc/sparc64/multiarch/bzero.c b/sysdeps/sparc/sparc64/multiarch/bzero.c
new file mode 100644
index 0000000000..3af2ff3d47
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/bzero.c
@@ -0,0 +1,33 @@
+/* Multiple versions of bzero. SPARC64/Linux version.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# define bzero __redirect_bzero
+# include <string.h>
+# undef bzero
+
+# include <sparc-ifunc.h>
+
+# define SYMBOL_NAME bzero
+# include "ifunc-memset.h"
+
+sparc_libc_ifunc_redirected (__redirect_bzero, __bzero, IFUNC_SELECTOR)
+weak_alias (__bzero, bzero)
+
+#endif
diff --git a/sysdeps/sparc/sparc64/multiarch/ifunc-impl-list.c b/sysdeps/sparc/sparc64/multiarch/ifunc-impl-list.c
index e52eeb0650..91c6565c7a 100644
--- a/sysdeps/sparc/sparc64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/sparc/sparc64/multiarch/ifunc-impl-list.c
@@ -1,5 +1,5 @@
/* Enumerate available IFUNC implementations of a function. sparc version.
- Copyright (C) 2012-2016 Free Software Foundation, Inc.
+ Copyright (C) 2012-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -36,6 +36,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
hwcap = GLRO(dl_hwcap);
IFUNC_IMPL (i, name, memcpy,
+ IFUNC_IMPL_ADD (array, i, memcpy, hwcap & HWCAP_SPARC_ADP,
+ __memcpy_niagara7)
IFUNC_IMPL_ADD (array, i, memcpy, hwcap & HWCAP_SPARC_CRYPTO,
__memcpy_niagara4)
IFUNC_IMPL_ADD (array, i, memcpy, hwcap & HWCAP_SPARC_N2,
@@ -47,6 +49,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ultra1));
IFUNC_IMPL (i, name, mempcpy,
+ IFUNC_IMPL_ADD (array, i, mempcpy, hwcap & HWCAP_SPARC_ADP,
+ __mempcpy_niagara7)
IFUNC_IMPL_ADD (array, i, mempcpy, hwcap & HWCAP_SPARC_CRYPTO,
__mempcpy_niagara4)
IFUNC_IMPL_ADD (array, i, mempcpy, hwcap & HWCAP_SPARC_N2,
@@ -58,6 +62,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_ultra1));
IFUNC_IMPL (i, name, bzero,
+ IFUNC_IMPL_ADD (array, i, bzero, hwcap & HWCAP_SPARC_ADP,
+ __bzero_niagara7)
IFUNC_IMPL_ADD (array, i, bzero, hwcap & HWCAP_SPARC_CRYPTO,
__bzero_niagara4)
IFUNC_IMPL_ADD (array, i, bzero, hwcap & HWCAP_SPARC_BLKINIT,
@@ -65,11 +71,18 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, bzero, 1, __bzero_ultra1));
IFUNC_IMPL (i, name, memset,
+ IFUNC_IMPL_ADD (array, i, memset, hwcap & HWCAP_SPARC_ADP,
+ __memset_niagara7)
IFUNC_IMPL_ADD (array, i, memset, hwcap & HWCAP_SPARC_CRYPTO,
__memset_niagara4)
IFUNC_IMPL_ADD (array, i, memset, hwcap & HWCAP_SPARC_BLKINIT,
__memset_niagara1)
IFUNC_IMPL_ADD (array, i, memset, 1, __memset_ultra1));
+ IFUNC_IMPL (i, name, memmove,
+ IFUNC_IMPL_ADD (array, i, memmove, hwcap & HWCAP_SPARC_ADP,
+ __memmove_niagara7)
+ IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ultra1));
+
return i;
}
diff --git a/sysdeps/sparc/sparc64/multiarch/ifunc-memcpy.h b/sysdeps/sparc/sparc64/multiarch/ifunc-memcpy.h
new file mode 100644
index 0000000000..73ea15297a
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/ifunc-memcpy.h
@@ -0,0 +1,43 @@
+/* Common definition for memcpy and mempcpy implementation.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <ifunc-init.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (niagara7) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (niagara4) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (niagara2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (niagara1) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (ultra3) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (ultra1) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (int hwcap)
+{
+ if (hwcap & HWCAP_SPARC_ADP)
+ return OPTIMIZE (niagara7);
+ if (hwcap & HWCAP_SPARC_CRYPTO)
+ return OPTIMIZE (niagara4);
+ if (hwcap & HWCAP_SPARC_N2)
+ return OPTIMIZE (niagara2);
+ if (hwcap & HWCAP_SPARC_BLKINIT)
+ return OPTIMIZE (niagara1);
+ if (hwcap & HWCAP_SPARC_ULTRA3)
+ return OPTIMIZE (ultra3);
+ return OPTIMIZE (ultra1);
+}
diff --git a/sysdeps/sparc/sparc64/multiarch/ifunc-memmove.h b/sysdeps/sparc/sparc64/multiarch/ifunc-memmove.h
new file mode 100644
index 0000000000..4b89ff4baf
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/ifunc-memmove.h
@@ -0,0 +1,31 @@
+/* Common definition for memmove implementation.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <ifunc-init.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (niagara7) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (ultra1) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (int hwcap)
+{
+ if (hwcap & HWCAP_SPARC_ADP)
+ return OPTIMIZE (niagara7);
+ return OPTIMIZE (ultra1);
+}
diff --git a/sysdeps/sparc/sparc64/multiarch/ifunc-memset.h b/sysdeps/sparc/sparc64/multiarch/ifunc-memset.h
new file mode 100644
index 0000000000..d554638bd6
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/ifunc-memset.h
@@ -0,0 +1,37 @@
+/* Common definition for memset/bzero implementation.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <ifunc-init.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (niagara7) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (niagara4) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (niagara1) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (ultra1) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (int hwcap)
+{
+ if (hwcap & HWCAP_SPARC_ADP)
+ return OPTIMIZE (niagara7);
+ if (hwcap & HWCAP_SPARC_CRYPTO)
+ return OPTIMIZE (niagara4);
+ if (hwcap & HWCAP_SPARC_BLKINIT)
+ return OPTIMIZE (niagara1);
+ return OPTIMIZE (ultra1);
+}
diff --git a/sysdeps/sparc/sparc64/multiarch/md5-crop.S b/sysdeps/sparc/sparc64/multiarch/md5-crop.S
index de1ba6df2f..764a8aae48 100644
--- a/sysdeps/sparc/sparc64/multiarch/md5-crop.S
+++ b/sysdeps/sparc/sparc64/multiarch/md5-crop.S
@@ -1,5 +1,5 @@
/* MD5 using sparc crypto opcodes.
- Copyright (C) 2012-2016 Free Software Foundation, Inc.
+ Copyright (C) 2012-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by David S. Miller (davem@davemloft.net)
diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy-memmove-niagara7.S b/sysdeps/sparc/sparc64/multiarch/memcpy-memmove-niagara7.S
new file mode 100644
index 0000000000..61ba1ed408
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/memcpy-memmove-niagara7.S
@@ -0,0 +1,980 @@
+/* Copy SIZE bytes from SRC to DEST. For SUN4V M7.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+#ifndef XCC
+# define XCC xcc
+#endif
+ .register %g2,#scratch
+ .register %g3,#scratch
+ .register %g6,#scratch
+
+#define FPRS_FEF 0x04
+
+/*
+ * ASI_STBI_P marks the cache line as "least recently used"
+ * which means if many threads are active, it has a high chance
+ * of being pushed out of the cache between the first initializing
+ * store and the final stores.
+ * Thus, in this algorithm we use ASI_STBIMRU_P which marks the
+ * cache line as "most recently used" for all but the last cache
+ * line.
+ */
+
+#define ASI_BLK_INIT_QUAD_LDD_P 0xe2
+#define ASI_ST_BLK_INIT_MRU_P 0xf2
+
+#define ASI_STBI_P ASI_BLK_INIT_QUAD_LDD_P
+#define ASI_STBIMRU_P ASI_ST_BLK_INIT_MRU_P
+
+#define BLOCK_SIZE 64 /* L2 data cache line size */
+#define SHORTCOPY 3
+#define SHORTCHECK 14
+#define SHORT_LONG 64 /* max copy for short longword-aligned case */
+ /* must be at least 64 */
+#define SMALL_MAX 255 /* max small copy for word/long aligned */
+#define SMALL_UMAX 128 /* max small copy for unaligned case */
+#define MED_WMAX 1023 /* max copy for medium word-aligned case */
+#define MED_MAX 511 /* max copy for medium longword-aligned case */
+#define ST_CHUNK 20 /* ST_CHUNK - block of values for BIS Store */
+/* on T4, prefetch 20 is a strong read prefetch to L1 and L2 data cache
+ * prefetch 20 can cause inst pipeline to delay if data is in memory
+ * prefetch 21 is a strong read prefetch to L2 data cache, not L1 data cache */
+#define ALIGN_PRE 20 /* distance for aligned prefetch loop */
+
+#define EX_ST(x) x
+#define EX_RETVAL(x) x
+#define STORE_ASI(src,addr) stxa src, [addr] ASI_STBIMRU_P
+#define STORE_INIT(src,addr) stxa src, [addr] ASI_STBI_P
+
+#if IS_IN (libc)
+
+ .text
+
+ENTRY(__memmove_niagara7)
+ /* %o0=dst, %o1=src, %o2=len */
+ cmp %o1, %o0 /* if from address is >= to use forward copy */
+ bgeu,pn %XCC, .Lforcpy /* else use backward if ... */
+ sub %o0, %o1, %o4 /* get difference of two addresses */
+ cmp %o2, %o4 /* compare size and difference of addresses */
+ bleu,pn %XCC, .Lforcpy /* if size is bigger, do overlapped copy */
+ add %o1, %o2, %o5 /* get to end of source space */
+
+/* an overlapped copy that must be done "backwards" */
+.Lchksize:
+ cmp %o2, 8 /* less than 8 byte do byte copy */
+ blu,pn %XCC, 2f /* else continue */
+
+/* Now size is bigger than 8 */
+.Ldbalign:
+ add %o0, %o2, %g1 /* get to end of dest space */
+ andcc %g1, 7, %o3 /* %o3 has cnt til dst 8 byte align */
+ bz,a,pn %XCC, .Ldbbck /* skip if dst is 8 byte aligned */
+ andn %o2, 7, %o3 /* force %o3 cnt to multiple of 8 */
+ sub %o2, %o3, %o2 /* update o2 with new count */
+
+1: dec %o5 /* decrement source */
+ ldub [%o5], %g1 /* load one byte */
+ deccc %o3 /* decrement count */
+ bgu,pt %XCC, 1b /* if not done keep copying */
+ stb %g1, [%o5+%o4] /* store one byte into dest */
+ andncc %o2, 7, %o3 /* force %o3 cnt to multiple of 8 */
+ bz,pn %XCC, 2f /* if size < 8, move to byte copy */
+
+/* Now Destination is 8 byte aligned */
+.Ldbbck:
+ andcc %o5, 7, %o0 /* %o0 has src offset */
+ bz,a,pn %XCC, .Ldbcopybc /* if src is aligned do fast memmove */
+ sub %o2, %o3, %o2 /* Residue bytes in %o2 */
+
+.Lcpy_dbwdbc: /* alignment of src is needed */
+ sub %o2, 8, %o2 /* set size one loop ahead */
+ sll %o0, 3, %g1 /* %g1 is left shift */
+ mov 64, %g5 /* init %g5 to be 64 */
+ sub %g5, %g1, %g5 /* %g5 rightshift = (64 - leftshift) */
+ sub %o5, %o0, %o5 /* align the src at 8 bytes. */
+ add %o4, %o0, %o4 /* increase diff between src & dst */
+ ldx [%o5], %o1 /* load first 8 bytes */
+ srlx %o1, %g5, %o1
+1: sub %o5, 8, %o5 /* subtract 8 from src */
+ ldx [%o5], %o0 /* load 8 byte */
+ sllx %o0, %g1, %o3 /* shift loaded val left to tmp reg */
+ or %o1, %o3, %o3 /* align data */
+ stx %o3, [%o5+%o4] /* store 8 byte */
+ subcc %o2, 8, %o2 /* subtract 8 byte from size */
+ bg,pt %XCC, 1b /* if size > 0 continue */
+ srlx %o0, %g5, %o1 /* move extra byte for the next use */
+
+ srl %g1, 3, %o0 /* restore %o0 value for alignment */
+ add %o5, %o0, %o5 /* restore src alignment */
+ sub %o4, %o0, %o4 /* restore diff between src & dest */
+
+ ba 2f /* branch to the trailing byte copy */
+ add %o2, 8, %o2 /* restore size value */
+
+.Ldbcopybc: /* alignment of src is not needed */
+1: sub %o5, 8, %o5 /* subtract from src */
+ ldx [%o5], %g1 /* load 8 bytes */
+ subcc %o3, 8, %o3 /* subtract from size */
+ bgu,pt %XCC, 1b /* if size is bigger 0 continue */
+ stx %g1, [%o5+%o4] /* store 8 bytes to destination */
+
+ ba 2f
+ nop
+
+.Lbcbyte:
+1: ldub [%o5], %g1 /* load one byte */
+ stb %g1, [%o5+%o4] /* store one byte */
+2: deccc %o2 /* decrement size */
+ bgeu,a,pt %XCC, 1b /* if size is >= 0 continue */
+ dec %o5 /* decrement from address */
+
+.Lexitbc: /* exit from backward copy */
+ retl
+ add %o5, %o4, %o0 /* restore dest addr */
+
+
+/* Check to see if memmove is large aligned copy
+ * If so, use special version of copy that avoids
+ * use of block store init. */
+.Lforcpy:
+ cmp %o2, SMALL_MAX /* check for not small case */
+ blt,pn %XCC, .Lmv_short /* merge with memcpy */
+ mov %o0, %g1 /* save %o0 */
+ neg %o0, %o5
+ andcc %o5, 7, %o5 /* bytes till DST 8 byte aligned */
+ brz,pt %o5, .Lmv_dst_aligned_on_8
+
+/* %o5 has the bytes to be written in partial store. */
+ sub %o2, %o5, %o2
+ sub %o1, %o0, %o1 /* %o1 gets the difference */
+7: /* dst aligning loop */
+ ldub [%o1+%o0], %o4 /* load one byte */
+ subcc %o5, 1, %o5
+ stb %o4, [%o0]
+ bgu,pt %XCC, 7b
+ add %o0, 1, %o0 /* advance dst */
+ add %o1, %o0, %o1 /* restore %o1 */
+.Lmv_dst_aligned_on_8:
+ andcc %o1, 7, %o5
+ brnz,pn %o5, .Lsrc_dst_unaligned_on_8
+ prefetch [%o1 + (1 * BLOCK_SIZE)], 20
+
+.Lmv_src_dst_aligned_on_8:
+/* check if we are copying MED_MAX or more bytes */
+ cmp %o2, MED_MAX /* limit to store buffer size */
+ bleu,pt %XCC, .Lmedlong
+ prefetch [%o1 + (2 * BLOCK_SIZE)], 20
+
+/* The mv_align loop below mimics the memcpy code for large aligned copies,
+ * but does not use the ASI_STBI_P (block initializing store) performance
+ * optimization. This is used when memcpy is incorrectly invoked with
+ * overlapping buffers. */
+
+.Lmv_large_align8_copy: /* Src and dst share 8 byte align */
+ /* align dst to 64 byte boundary */
+ andcc %o0, 0x3f, %o3 /* check for dst 64 byte aligned */
+ brz,pn %o3, .Lmv_aligned_on_64
+ sub %o3, 64, %o3 /* %o3 has negative bytes to move */
+ add %o2, %o3, %o2 /* adjust remaining count */
+.Lmv_align_to_64:
+ ldx [%o1], %o4
+ add %o1, 8, %o1 /* increment src ptr */
+ addcc %o3, 8, %o3
+ stx %o4, [%o0]
+ brnz,pt %o3, .Lmv_align_to_64
+ add %o0, 8, %o0 /* increment dst ptr */
+
+.Lmv_aligned_on_64:
+ andn %o2, 0x3f, %o5 /* %o5 is multiple of block size */
+ and %o2, 0x3f, %o2 /* residue bytes in %o2 */
+.Lmv_align_loop:
+ ldx [%o1],%o4
+ stx %o4,[%o0]
+ prefetch [%o0 + (10 * BLOCK_SIZE)], 22
+ prefetch [%o1 + (10 * BLOCK_SIZE)], 21
+ subcc %o5, 64, %o5
+ ldx [%o1+8],%o4
+ stx %o4,[%o0+8]
+ ldx [%o1+16],%o4
+ stx %o4,[%o0+16]
+ ldx [%o1+24],%o4
+ stx %o4,[%o0+24]
+ ldx [%o1+32],%o4
+ stx %o4,[%o0+32]
+ ldx [%o1+40],%o4
+ stx %o4,[%o0+40]
+ ldx [%o1+48],%o4
+ add %o1, 64, %o1
+ stx %o4,[%o0+48]
+ add %o0, 64, %o0
+ ldx [%o1-8],%o4
+ bgt,pt %XCC, .Lmv_align_loop
+ stx %o4,[%o0-8]
+
+ ba .Lmedlong
+ nop
+END(__memmove_niagara7)
+
+ENTRY(__mempcpy_niagara7)
+ /* %o0=dst, %o1=src, %o2=len */
+ ba,pt %icc, 101f
+ add %o0, %o2, %g1 /* save dst + len */
+END(__mempcpy_niagara7)
+
+ .align 32
+ENTRY(__memcpy_niagara7)
+100: /* %o0=dst, %o1=src, %o2=len */
+ mov %o0, %g1 /* save %o0 */
+101:
+#ifndef __arch64__
+ srl %o2, 0, %o2
+#endif
+ cmp %o2, SMALL_MAX /* check for not small case */
+ bgeu,pn %XCC, .Lmedium /* go to larger cases */
+.Lmv_short:
+ cmp %o2, SHORTCOPY /* check for really short case */
+ ble,pn %XCC, .Lsmallfin
+ or %o0, %o1, %o4 /* prepare alignment check */
+ andcc %o4, 0x3, %o5 /* test for word alignment */
+ bnz,pn %XCC, .Lsmallunalign /* branch to non-word aligned case */
+ nop
+ subcc %o2, 7, %o2 /* adjust count */
+ ble,pn %XCC, .Lsmallwordx
+ andcc %o4, 0x7, %o5 /* test for long alignment */
+/* 8 or more bytes, src and dest start on word boundary
+ * %o4 contains or %o0, %o1 */
+.Lsmalllong:
+ bnz,pn %XCC, .Lsmallwords /* branch to word aligned case */
+ cmp %o2, SHORT_LONG-7
+ bge,a %XCC, .Lmedl64 /* if we branch */
+ sub %o2,56,%o2 /* adjust %o2 to -63 off count */
+
+/* slightly unroll the small_long_loop to improve very short copies */
+ cmp %o2, 32-7
+ blt,a,pn %XCC, .Lsmall_long_l
+ sub %o1, %o0, %o1 /* %o1 gets the difference */
+
+ ldx [%o1], %o5
+ ldx [%o1+8], %o4
+ ldx [%o1+16], %o3
+
+ subcc %o2, 24, %o2
+ sub %o1, %o0, %o1 /* %o1 gets the difference */
+
+ stx %o5, [%o0] /* write word */
+ stx %o4, [%o0+8] /* write word */
+ stx %o3, [%o0+16] /* write word */
+
+ add %o0, 24, %o0
+
+/* end loop unroll */
+
+.Lsmall_long_l:
+ ldx [%o1+%o0], %o3
+ subcc %o2, 8, %o2
+ add %o0, 8, %o0
+ bgu,pn %XCC, .Lsmall_long_l /* loop until done */
+ stx %o3, [%o0-8] /* write word */
+ addcc %o2, 7, %o2 /* restore %o2 to correct count */
+ bnz,pn %XCC, .Lsmall_long_x /* check for completion */
+ add %o1, %o0, %o1 /* restore %o1 */
+ retl
+ mov EX_RETVAL(%g1), %o0 /* restore %o0 */
+.Lsmall_long_x:
+ cmp %o2, 4 /* check for 4 or more bytes left */
+ blt,pn %XCC, .Lsmallleft3 /* if not, go to finish up */
+ nop
+ lduw [%o1], %o3
+ add %o1, 4, %o1
+ subcc %o2, 4, %o2
+ stw %o3, [%o0]
+ bnz,pn %XCC, .Lsmallleft3
+ add %o0, 4, %o0
+ retl
+ mov EX_RETVAL(%g1), %o0 /* restore %o0 */
+
+ .align 32
+/* src and dest start on word boundary; 7 or fewer bytes */
+.Lsmallwordx:
+ lduw [%o1], %o3 /* read word */
+ addcc %o2, 3, %o2 /* restore count */
+ bz,pt %XCC, .Lsmallexit
+ stw %o3, [%o0] /* write word */
+ deccc %o2 /* reduce count for cc test */
+ ldub [%o1+4], %o3 /* load one byte */
+ bz,pt %XCC, .Lsmallexit
+ stb %o3, [%o0+4] /* store one byte */
+ ldub [%o1+5], %o3 /* load second byte */
+ deccc %o2
+ bz,pt %XCC, .Lsmallexit
+ stb %o3, [%o0+5] /* store second byte */
+ ldub [%o1+6], %o3 /* load third byte */
+ stb %o3, [%o0+6] /* store third byte */
+.Lsmallexit:
+ retl
+ mov EX_RETVAL(%g1), %o0 /* restore %o0 */
+
+ .align 32
+.Lsmallunalign:
+ cmp %o2, SHORTCHECK
+ ble,pn %XCC, .Lsmallrest
+ cmp %o2, SMALL_UMAX
+ bge,pt %XCC, .Lmedium_join
+ andcc %o1, 0x3, %o5 /* is src word aligned */
+ bz,pn %XCC, .Laldst
+ cmp %o5, 2 /* is src half-word aligned */
+ be,pt %XCC, .Ls2algn
+ cmp %o5, 3 /* src is byte aligned */
+.Ls1algn:
+ ldub [%o1], %o3 /* move 1 or 3 bytes to align it */
+ inc 1, %o1
+ stb %o3, [%o0] /* move a byte to align src */
+ inc 1, %o0
+ bne,pt %XCC, .Ls2algn
+ dec %o2
+ b .Lald /* now go align dest */
+ andcc %o0, 0x3, %o5
+
+.Ls2algn:
+ lduh [%o1], %o3 /* know src is 2 byte aligned */
+ inc 2, %o1
+ srl %o3, 8, %o4
+ stb %o4, [%o0] /* have to do bytes, */
+ stb %o3, [%o0 + 1] /* do not know dst alignment */
+ inc 2, %o0
+ dec 2, %o2
+
+.Laldst:
+ andcc %o0, 0x3, %o5 /* align the destination address */
+.Lald:
+ bz,pn %XCC, .Lw4cp
+ cmp %o5, 2
+ be,pn %XCC, .Lw2cp
+ cmp %o5, 3
+.Lw3cp: lduw [%o1], %o4
+ inc 4, %o1
+ srl %o4, 24, %o5
+ stb %o5, [%o0]
+ bne,pt %XCC, .Lw1cp
+ inc %o0
+ dec 1, %o2
+ andn %o2, 3, %o3 /* %o3 is aligned word count */
+ dec 4, %o3 /* avoid reading beyond tail of src */
+ sub %o1, %o0, %o1 /* %o1 gets the difference */
+
+1: sll %o4, 8, %g5 /* save residual bytes */
+ lduw [%o1+%o0], %o4
+ deccc 4, %o3
+ srl %o4, 24, %o5 /* merge with residual */
+ or %o5, %g5, %g5
+ st %g5, [%o0]
+ bnz,pt %XCC, 1b
+ inc 4, %o0
+ sub %o1, 3, %o1 /* used one byte of last word read */
+ and %o2, 3, %o2
+ b 7f
+ inc 4, %o2
+
+.Lw1cp: srl %o4, 8, %o5
+ sth %o5, [%o0]
+ inc 2, %o0
+ dec 3, %o2
+ andn %o2, 3, %o3 /* %o3 is aligned word count */
+ dec 4, %o3 /* avoid reading beyond tail of src */
+ sub %o1, %o0, %o1 /* %o1 gets the difference */
+
+2: sll %o4, 24, %g5 /* save residual bytes */
+ lduw [%o1+%o0], %o4
+ deccc 4, %o3
+ srl %o4, 8, %o5 /* merge with residual */
+ or %o5, %g5, %g5
+ st %g5, [%o0]
+ bnz,pt %XCC, 2b
+ inc 4, %o0
+ sub %o1, 1, %o1 /* used 3 bytes of last word read */
+ and %o2, 3, %o2
+ b 7f
+ inc 4, %o2
+
+.Lw2cp: lduw [%o1], %o4
+ inc 4, %o1
+ srl %o4, 16, %o5
+ sth %o5, [%o0]
+ inc 2, %o0
+ dec 2, %o2
+ andn %o2, 3, %o3 /* %o3 is aligned word count */
+ dec 4, %o3 /* avoid reading beyond tail of src */
+ sub %o1, %o0, %o1 /* %o1 gets the difference */
+
+3: sll %o4, 16, %g5 /* save residual bytes */
+ lduw [%o1+%o0], %o4
+ deccc 4, %o3
+ srl %o4, 16, %o5 /* merge with residual */
+ or %o5, %g5, %g5
+ st %g5, [%o0]
+ bnz,pt %XCC, 3b
+ inc 4, %o0
+ sub %o1, 2, %o1 /* used two bytes of last word read */
+ and %o2, 3, %o2
+ b 7f
+ inc 4, %o2
+
+.Lw4cp: andn %o2, 3, %o3 /* %o3 is aligned word count */
+ sub %o1, %o0, %o1 /* %o1 gets the difference */
+
+1: lduw [%o1+%o0], %o4 /* read from address */
+ deccc 4, %o3 /* decrement count */
+ st %o4, [%o0] /* write at destination address */
+ bgu,pt %XCC, 1b
+ inc 4, %o0 /* increment to address */
+ and %o2, 3, %o2 /* number of leftover bytes, if any */
+
+ /* simple finish up byte copy, works with any alignment */
+7:
+ add %o1, %o0, %o1 /* restore %o1 */
+.Lsmallrest:
+ tst %o2
+ bz,pt %XCC, .Lsmallx
+ cmp %o2, 4
+ blt,pn %XCC, .Lsmallleft3
+ nop
+ sub %o2, 3, %o2
+.Lsmallnotalign4:
+ ldub [%o1], %o3 /* read byte */
+ subcc %o2, 4, %o2 /* reduce count by 4 */
+ stb %o3, [%o0] /* write byte */
+ ldub [%o1+1], %o3 /* repeat for total of 4 bytes */
+ add %o1, 4, %o1 /* advance SRC by 4 */
+ stb %o3, [%o0+1]
+ ldub [%o1-2], %o3
+ add %o0, 4, %o0 /* advance DST by 4 */
+ stb %o3, [%o0-2]
+ ldub [%o1-1], %o3
+ bgu,pt %XCC, .Lsmallnotalign4 /* loop til 3 or fewer bytes remain */
+ stb %o3, [%o0-1]
+ addcc %o2, 3, %o2 /* restore count */
+ bz,pt %XCC, .Lsmallx
+.Lsmallleft3: /* 1, 2, or 3 bytes remain */
+ subcc %o2, 1, %o2
+ ldub [%o1], %o3 /* load one byte */
+ bz,pt %XCC, .Lsmallx
+ stb %o3, [%o0] /* store one byte */
+ ldub [%o1+1], %o3 /* load second byte */
+ subcc %o2, 1, %o2
+ bz,pt %XCC, .Lsmallx
+ stb %o3, [%o0+1] /* store second byte */
+ ldub [%o1+2], %o3 /* load third byte */
+ stb %o3, [%o0+2] /* store third byte */
+.Lsmallx:
+ retl
+ mov EX_RETVAL(%g1), %o0 /* restore %o0 */
+
+.Lsmallfin:
+ tst %o2
+ bnz,pn %XCC, .Lsmallleft3
+ nop
+ retl
+ mov EX_RETVAL(%g1), %o0 /* restore %o0 */
+
+ .align 16
+.Lsmallwords:
+ lduw [%o1], %o3 /* read word */
+ subcc %o2, 8, %o2 /* update count */
+ stw %o3, [%o0] /* write word */
+ add %o1, 8, %o1 /* update SRC */
+ lduw [%o1-4], %o3 /* read word */
+ add %o0, 8, %o0 /* update DST */
+ bgu,pt %XCC, .Lsmallwords /* loop until done */
+ stw %o3, [%o0-4] /* write word */
+ addcc %o2, 7, %o2 /* restore count */
+ bz,pt %XCC, .Lsmallexit /* check for completion */
+ cmp %o2, 4 /* check for 4 or more bytes left */
+ blt,pt %XCC, .Lsmallleft3 /* if not, go to finish up */
+ nop
+ lduw [%o1], %o3
+ add %o1, 4, %o1
+ subcc %o2, 4, %o2
+ add %o0, 4, %o0
+ bnz,pn %XCC, .Lsmallleft3
+ stw %o3, [%o0-4]
+ retl
+ mov EX_RETVAL(%g1), %o0 /* restore %o0 */
+
+ .align 16
+.Lmedium:
+.Lmedium_join:
+ neg %o0, %o5
+ andcc %o5, 7, %o5 /* bytes till DST 8 byte aligned */
+ brz,pt %o5, .Ldst_aligned_on_8
+
+ /* %o5 has the bytes to be written in partial store. */
+ sub %o2, %o5, %o2
+ sub %o1, %o0, %o1 /* %o1 gets the difference */
+7: /* dst aligning loop */
+ ldub [%o1+%o0], %o4 /* load one byte */
+ subcc %o5, 1, %o5
+ stb %o4, [%o0]
+ bgu,pt %XCC, 7b
+ add %o0, 1, %o0 /* advance dst */
+ add %o1, %o0, %o1 /* restore %o1 */
+.Ldst_aligned_on_8:
+ andcc %o1, 7, %o5
+ brnz,pt %o5, .Lsrc_dst_unaligned_on_8
+ nop
+
+.Lsrc_dst_aligned_on_8:
+ /* check if we are copying MED_MAX or more bytes */
+ cmp %o2, MED_MAX /* limit to store buffer size */
+ bgu,pn %XCC, .Llarge_align8_copy
+ nop
+/*
+ * Special case for handling when src and dest are both long word aligned
+ * and total data to move is less than MED_MAX bytes
+ */
+.Lmedlong:
+ subcc %o2, 63, %o2 /* adjust length to allow cc test */
+ ble,pn %XCC, .Lmedl63 /* skip big loop if < 64 bytes */
+ nop
+.Lmedl64:
+ ldx [%o1], %o4 /* load */
+ subcc %o2, 64, %o2 /* decrement length count */
+ stx %o4, [%o0] /* and store */
+ ldx [%o1+8], %o3 /* a block of 64 bytes */
+ stx %o3, [%o0+8]
+ ldx [%o1+16], %o4
+ stx %o4, [%o0+16]
+ ldx [%o1+24], %o3
+ stx %o3, [%o0+24]
+ ldx [%o1+32], %o4 /* load */
+ stx %o4, [%o0+32] /* and store */
+ ldx [%o1+40], %o3 /* a block of 64 bytes */
+ add %o1, 64, %o1 /* increase src ptr by 64 */
+ stx %o3, [%o0+40]
+ ldx [%o1-16], %o4
+ add %o0, 64, %o0 /* increase dst ptr by 64 */
+ stx %o4, [%o0-16]
+ ldx [%o1-8], %o3
+ bgu,pt %XCC, .Lmedl64 /* repeat if at least 64 bytes left */
+ stx %o3, [%o0-8]
+.Lmedl63:
+ addcc %o2, 32, %o2 /* adjust remaining count */
+ ble,pt %XCC, .Lmedl31 /* to skip if 31 or fewer bytes left */
+ nop
+ ldx [%o1], %o4 /* load */
+ sub %o2, 32, %o2 /* decrement length count */
+ stx %o4, [%o0] /* and store */
+ ldx [%o1+8], %o3 /* a block of 32 bytes */
+ add %o1, 32, %o1 /* increase src ptr by 32 */
+ stx %o3, [%o0+8]
+ ldx [%o1-16], %o4
+ add %o0, 32, %o0 /* increase dst ptr by 32 */
+ stx %o4, [%o0-16]
+ ldx [%o1-8], %o3
+ stx %o3, [%o0-8]
+.Lmedl31:
+ addcc %o2, 16, %o2 /* adjust remaining count */
+ ble,pt %XCC, .Lmedl15 /* skip if 15 or fewer bytes left */
+ nop
+ ldx [%o1], %o4 /* load and store 16 bytes */
+ add %o1, 16, %o1 /* increase src ptr by 16 */
+ stx %o4, [%o0]
+ sub %o2, 16, %o2 /* decrease count by 16 */
+ ldx [%o1-8], %o3
+ add %o0, 16, %o0 /* increase dst ptr by 16 */
+ stx %o3, [%o0-8]
+.Lmedl15:
+ addcc %o2, 15, %o2 /* restore count */
+ bz,pt %XCC, .Lsmallexit /* exit if finished */
+ cmp %o2, 8
+ blt,pt %XCC, .Lmedw7 /* skip if 7 or fewer bytes left */
+ tst %o2
+ ldx [%o1], %o4 /* load 8 bytes */
+ add %o1, 8, %o1 /* increase src ptr by 8 */
+ add %o0, 8, %o0 /* increase dst ptr by 8 */
+ subcc %o2, 8, %o2 /* decrease count by 8 */
+ bnz,pn %XCC, .Lmedw7
+ stx %o4, [%o0-8] /* and store 8 bytes */
+ retl
+ mov EX_RETVAL(%g1), %o0 /* restore %o0 */
+
+ .align 16
+.Lsrc_dst_unaligned_on_8:
+ /* DST is 8-byte aligned, src is not */
+ andcc %o1, 0x3, %o5 /* test word alignment */
+ bnz,pt %XCC, .Lunalignsetup /* branch if not word aligned */
+ nop
+
+/*
+ * Handle all cases where src and dest are aligned on word
+ * boundaries. Use unrolled loops for better performance.
+ * This option wins over standard large data move when
+ * source and destination is in cache for medium
+ * to short data moves.
+ */
+ cmp %o2, MED_WMAX /* limit to store buffer size */
+ bge,pt %XCC, .Lunalignrejoin /* otherwise rejoin main loop */
+ nop
+
+ subcc %o2, 31, %o2 /* adjust length to allow cc test */
+ /* for end of loop */
+ ble,pt %XCC, .Lmedw31 /* skip big loop if less than 16 */
+.Lmedw32:
+ ld [%o1], %o4 /* move a block of 32 bytes */
+ sllx %o4, 32, %o5
+ ld [%o1+4], %o4
+ or %o4, %o5, %o5
+ stx %o5, [%o0]
+ subcc %o2, 32, %o2 /* decrement length count */
+ ld [%o1+8], %o4
+ sllx %o4, 32, %o5
+ ld [%o1+12], %o4
+ or %o4, %o5, %o5
+ stx %o5, [%o0+8]
+ add %o1, 32, %o1 /* increase src ptr by 32 */
+ ld [%o1-16], %o4
+ sllx %o4, 32, %o5
+ ld [%o1-12], %o4
+ or %o4, %o5, %o5
+ stx %o5, [%o0+16]
+ add %o0, 32, %o0 /* increase dst ptr by 32 */
+ ld [%o1-8], %o4
+ sllx %o4, 32, %o5
+ ld [%o1-4], %o4
+ or %o4, %o5, %o5
+ bgu,pt %XCC, .Lmedw32 /* repeat if at least 32 bytes left */
+ stx %o5, [%o0-8]
+.Lmedw31:
+ addcc %o2, 31, %o2 /* restore count */
+ bz,pt %XCC, .Lsmallexit /* exit if finished */
+ cmp %o2, 16
+ blt,pt %XCC, .Lmedw15
+ nop
+ ld [%o1], %o4 /* move a block of 16 bytes */
+ sllx %o4, 32, %o5
+ subcc %o2, 16, %o2 /* decrement length count */
+ ld [%o1+4], %o4
+ or %o4, %o5, %o5
+ stx %o5, [%o0]
+ add %o1, 16, %o1 /* increase src ptr by 16 */
+ ld [%o1-8], %o4
+ add %o0, 16, %o0 /* increase dst ptr by 16 */
+ sllx %o4, 32, %o5
+ ld [%o1-4], %o4
+ or %o4, %o5, %o5
+ stx %o5, [%o0-8]
+.Lmedw15:
+ bz,pt %XCC, .Lsmallexit /* exit if finished */
+ cmp %o2, 8
+ blt,pn %XCC, .Lmedw7 /* skip if 7 or fewer bytes left */
+ tst %o2
+ ld [%o1], %o4 /* load 4 bytes */
+ subcc %o2, 8, %o2 /* decrease count by 8 */
+ stw %o4, [%o0] /* and store 4 bytes */
+ add %o1, 8, %o1 /* increase src ptr by 8 */
+ ld [%o1-4], %o3 /* load 4 bytes */
+ add %o0, 8, %o0 /* increase dst ptr by 8 */
+ stw %o3, [%o0-4] /* and store 4 bytes */
+ bz,pt %XCC, .Lsmallexit /* exit if finished */
+.Lmedw7: /* count is ge 1, less than 8 */
+ cmp %o2, 4 /* check for 4 bytes left */
+ blt,pn %XCC, .Lsmallleft3 /* skip if 3 or fewer bytes left */
+ nop
+ ld [%o1], %o4 /* load 4 bytes */
+ add %o1, 4, %o1 /* increase src ptr by 4 */
+ add %o0, 4, %o0 /* increase dst ptr by 4 */
+ subcc %o2, 4, %o2 /* decrease count by 4 */
+ bnz,pt %XCC, .Lsmallleft3
+ stw %o4, [%o0-4] /* and store 4 bytes */
+ retl
+ mov EX_RETVAL(%g1), %o0 /* restore %o0 */
+
+ .align 16
+.Llarge_align8_copy: /* Src and dst 8 byte aligned */
+ /* align dst to 64 byte boundary */
+ andcc %o0, 0x3f, %o3 /* check for dst 64 byte aligned */
+ brz,pn %o3, .Laligned_to_64
+ andcc %o0, 8, %o3 /* odd long words to move? */
+ brz,pt %o3, .Laligned_to_16
+ nop
+ ldx [%o1], %o4
+ sub %o2, 8, %o2
+ add %o1, 8, %o1 /* increment src ptr */
+ add %o0, 8, %o0 /* increment dst ptr */
+ stx %o4, [%o0-8]
+.Laligned_to_16:
+ andcc %o0, 16, %o3 /* pair of long words to move? */
+ brz,pt %o3, .Laligned_to_32
+ nop
+ ldx [%o1], %o4
+ sub %o2, 16, %o2
+ stx %o4, [%o0]
+ add %o1, 16, %o1 /* increment src ptr */
+ ldx [%o1-8], %o4
+ add %o0, 16, %o0 /* increment dst ptr */
+ stx %o4, [%o0-8]
+.Laligned_to_32:
+ andcc %o0, 32, %o3 /* four long words to move? */
+ brz,pt %o3, .Laligned_to_64
+ nop
+ ldx [%o1], %o4
+ sub %o2, 32, %o2
+ stx %o4, [%o0]
+ ldx [%o1+8], %o4
+ stx %o4, [%o0+8]
+ ldx [%o1+16], %o4
+ stx %o4, [%o0+16]
+ add %o1, 32, %o1 /* increment src ptr */
+ ldx [%o1-8], %o4
+ add %o0, 32, %o0 /* increment dst ptr */
+ stx %o4, [%o0-8]
+.Laligned_to_64:
+/* Following test is included to avoid issues where existing executables
+ * incorrectly call memcpy with overlapping src and dest instead of memmove
+ *
+ * if ( (src ge dst) and (dst+len > src)) go to overlap case
+ * if ( (src lt dst) and (src+len > dst)) go to overlap case
+ */
+ cmp %o1,%o0
+ bge,pt %XCC, 1f
+ nop
+/* src+len > dst? */
+ add %o1, %o2, %o4
+ cmp %o4, %o0
+ bgt,pt %XCC, .Lmv_aligned_on_64
+ nop
+ ba 2f
+ nop
+1:
+/* dst+len > src? */
+ add %o0, %o2, %o4
+ cmp %o4, %o1
+ bgt,pt %XCC, .Lmv_aligned_on_64
+ nop
+2:
+/* handle non-overlapped copies
+ *
+ * Using block init store (BIS) instructions to avoid fetching cache
+ * lines from memory. Use ST_CHUNK stores to first element of each cache
+ * line (similar to prefetching) to avoid overfilling STQ or miss buffers.
+ * Gives existing cache lines time to be moved out of L1/L2/L3 cache.
+ */
+ andn %o2, 0x3f, %o5 /* %o5 is multiple of block size */
+ and %o2, 0x3f, %o2 /* residue bytes in %o2 */
+
+/* We use ASI_STBIMRU_P for the first store to each cache line
+ * followed by ASI_STBI_P (mark as LRU) for the last store. That
+ * mixed approach reduces the chances the cache line is removed
+ * before we finish setting it, while minimizing the effects on
+ * other cached values during a large memcpy
+ *
+ * Intermediate stores can be normal since first BIS activates the
+ * cache line in the L2 cache.
+ *
+ * ST_CHUNK batches up initial BIS operations for several cache lines
+ * to allow multiple requests to not be blocked by overflowing the
+ * the store miss buffer. Then the matching stores for all those
+ * BIS operations are executed.
+ */
+
+.Lalign_loop:
+ cmp %o5, ST_CHUNK*64
+ blu,pt %XCC, .Lalign_short
+ mov ST_CHUNK, %o3
+ sllx %o3, 6, %g5 /* ST_CHUNK*64 */
+
+.Lalign_loop_start:
+ prefetch [%o1 + (ALIGN_PRE * BLOCK_SIZE)], 21
+ subcc %o3, 2, %o3
+ ldx [%o1], %o4
+ add %o1, 128, %o1
+ EX_ST(STORE_ASI(%o4, %o0))
+ add %o0, 64, %o0
+ ldx [%o1-64], %o4
+ EX_ST(STORE_ASI(%o4, %o0))
+ add %o0, 64, %o0
+ bgu,pt %XCC, .Lalign_loop_start
+ prefetch [%o1 + ((ALIGN_PRE-1) * BLOCK_SIZE)], 21
+
+ mov ST_CHUNK, %o3
+ sub %o1, %g5, %o1 /* reset %o1 */
+ sub %o0, %g5, %o0 /* reset %o0 */
+
+ sub %o0, 8, %o0 /* adjust %o0 for ASI alignment */
+.Lalign_loop_rest:
+ ldx [%o1+8],%o4
+ add %o0, 64, %o0
+ stx %o4, [%o0-48]
+ subcc %o3, 1, %o3
+ ldx [%o1+16],%o4
+ stx %o4, [%o0-40]
+ sub %o5, 64, %o5
+ ldx [%o1+24],%o4
+ stx %o4, [%o0-32]
+ ldx [%o1+32],%o4
+ stx %o4, [%o0-24]
+ ldx [%o1+40],%o4
+ stx %o4, [%o0-16]
+ ldx [%o1+48],%o4
+ stx %o4, [%o0-8]
+ add %o1, 64, %o1
+ ldx [%o1-8],%o4
+ bgu,pt %XCC, .Lalign_loop_rest
+ EX_ST(STORE_INIT(%o4,%o0)) /* mark cache line as LRU */
+
+ mov ST_CHUNK, %o3
+ cmp %o5, ST_CHUNK*64
+ bgu,pt %XCC, .Lalign_loop_start
+ add %o0, 8, %o0 /* restore %o0 from ASI alignment */
+
+ cmp %o5, 0
+ beq,pt %XCC, .Lalign_done
+
+/* no prefetches needed in these loops
+ * since we are within ALIGN_PRE of the end */
+.Lalign_short:
+ srl %o5, 6, %o3
+.Lalign_loop_short:
+ subcc %o3, 1, %o3
+ ldx [%o1], %o4
+ add %o1, 64, %o1
+ EX_ST(STORE_ASI(%o4, %o0))
+ bgu,pt %XCC, .Lalign_loop_short
+ add %o0, 64, %o0
+
+ sub %o1, %o5, %o1 /* reset %o1 */
+ sub %o0, %o5, %o0 /* reset %o0 */
+
+ sub %o0, 8, %o0 /* adjust %o0 for ASI alignment */
+.Lalign_short_rest:
+ ldx [%o1+8],%o4
+ add %o0, 64, %o0
+ stx %o4, [%o0-48]
+ ldx [%o1+16],%o4
+ subcc %o5, 64, %o5
+ stx %o4, [%o0-40]
+ ldx [%o1+24],%o4
+ stx %o4, [%o0-32]
+ ldx [%o1+32],%o4
+ stx %o4, [%o0-24]
+ ldx [%o1+40],%o4
+ stx %o4, [%o0-16]
+ ldx [%o1+48],%o4
+ stx %o4, [%o0-8]
+ add %o1, 64, %o1
+ ldx [%o1-8],%o4
+ bgu,pt %XCC, .Lalign_short_rest
+ EX_ST(STORE_INIT(%o4,%o0)) /* mark cache line as LRU */
+
+ add %o0, 8, %o0 /* restore %o0 from ASI alignment */
+
+.Lalign_done:
+ cmp %o2, 0
+ membar #StoreStore
+ bne,pt %XCC, .Lmedl63
+ subcc %o2, 63, %o2 /* adjust length to allow cc test */
+ retl
+ mov EX_RETVAL(%g1), %o0 /* restore %o0 */
+
+ .align 16
+ /* Dst is on 8 byte boundary; src is not; remaining cnt > SMALL_MAX */
+ /* Since block load/store and BIS are not in use for unaligned data,
+ * no need to align dst on 64 byte cache line boundary */
+.Lunalignsetup:
+.Lunalignrejoin:
+ rd %fprs, %g5 /* check for unused fp */
+ /* if fprs.fef == 0, set it.
+ * Setting it when already set costs more than checking */
+ andcc %g5, FPRS_FEF, %g5 /* test FEF, fprs.du = fprs.dl = 0 */
+ bz,a %XCC, 1f
+ wr %g0, FPRS_FEF, %fprs /* fprs.fef = 1 */
+1:
+ andn %o2, 0x3f, %o5 /* %o5 is multiple of block size */
+ and %o2, 0x3f, %o2 /* residue bytes in %o2 */
+ cmp %o2, 8 /* Insure we do not load beyond */
+ bgt,pt %XCC, .Lunalign_adjust /* end of source buffer */
+ andn %o1, 0x7, %o4 /* %o4 has 8 byte aligned src addr */
+ add %o2, 64, %o2 /* adjust to leave loop */
+ sub %o5, 64, %o5 /* early if necessary */
+.Lunalign_adjust:
+ alignaddr %o1, %g0, %g0 /* generate %gsr */
+ add %o1, %o5, %o1 /* advance %o1 to after blocks */
+ ldd [%o4], %f0
+.Lunalign_loop:
+ prefetch [%o0 + (9 * BLOCK_SIZE)], 20
+ ldd [%o4+8], %f2
+ faligndata %f0, %f2, %f16
+ ldd [%o4+16], %f4
+ subcc %o5, BLOCK_SIZE, %o5
+ std %f16, [%o0]
+ faligndata %f2, %f4, %f18
+ ldd [%o4+24], %f6
+ std %f18, [%o0+8]
+ faligndata %f4, %f6, %f20
+ ldd [%o4+32], %f8
+ std %f20, [%o0+16]
+ faligndata %f6, %f8, %f22
+ ldd [%o4+40], %f10
+ std %f22, [%o0+24]
+ faligndata %f8, %f10, %f24
+ ldd [%o4+48], %f12
+ std %f24, [%o0+32]
+ faligndata %f10, %f12, %f26
+ ldd [%o4+56], %f14
+ add %o4, BLOCK_SIZE, %o4
+ std %f26, [%o0+40]
+ faligndata %f12, %f14, %f28
+ ldd [%o4], %f0
+ std %f28, [%o0+48]
+ faligndata %f14, %f0, %f30
+ std %f30, [%o0+56]
+ add %o0, BLOCK_SIZE, %o0
+ bgu,pt %XCC, .Lunalign_loop
+ prefetch [%o4 + (11 * BLOCK_SIZE)], 20
+
+ /* Handle trailing bytes, 64 to 127
+ * Dest long word aligned, Src not long word aligned */
+ cmp %o2, 15
+ bleu,pt %XCC, .Lunalign_short
+
+ andn %o2, 0x7, %o5 /* %o5 is multiple of 8 */
+ and %o2, 0x7, %o2 /* residue bytes in %o2 */
+ add %o2, 8, %o2
+ sub %o5, 8, %o5 /* do not load past end of src */
+ andn %o1, 0x7, %o4 /* %o4 has 8 byte aligned src addr */
+ add %o1, %o5, %o1 /* move %o1 to after multiple of 8 */
+ ldd [%o4], %f0 /* fetch partial word */
+.Lunalign_by8:
+ ldd [%o4+8], %f2
+ add %o4, 8, %o4
+ faligndata %f0, %f2, %f16
+ subcc %o5, 8, %o5
+ std %f16, [%o0]
+ fsrc2 %f2, %f0
+ bgu,pt %XCC, .Lunalign_by8
+ add %o0, 8, %o0
+
+.Lunalign_short: /* restore fprs state */
+ brnz,pt %g5, .Lsmallrest
+ nop
+ ba .Lsmallrest
+ wr %g5, %g0, %fprs
+END(__memcpy_niagara7)
+
+#endif
diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S
index a3b69f9ef1..50b37af104 100644
--- a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S
+++ b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S
@@ -1,5 +1,5 @@
/* Copy SIZE bytes from SRC to DEST. For SUN4V Niagara.
- Copyright (C) 2006-2016 Free Software Foundation, Inc.
+ Copyright (C) 2006-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by David S. Miller (davem@davemloft.net)
diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S
index 9b3e1651b1..91d9eb3221 100644
--- a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S
+++ b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S
@@ -1,5 +1,5 @@
/* Copy SIZE bytes from SRC to DEST. For SUN4V Niagara-2.
- Copyright (C) 2007-2016 Free Software Foundation, Inc.
+ Copyright (C) 2007-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by David S. Miller (davem@davemloft.net)
diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara4.S b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara4.S
index 7234a7bf75..096a11cfd8 100644
--- a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara4.S
+++ b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara4.S
@@ -1,5 +1,5 @@
/* Copy SIZE bytes from SRC to DEST. For SUN4V Niagara-4.
- Copyright (C) 2012-2016 Free Software Foundation, Inc.
+ Copyright (C) 2012-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by David S. Miller (davem@davemloft.net)
diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy-ultra1.S b/sysdeps/sparc/sparc64/multiarch/memcpy-ultra1.S
new file mode 100644
index 0000000000..8e0b3e2d48
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/memcpy-ultra1.S
@@ -0,0 +1,33 @@
+/* Default SPARC64 memcpy implementation.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# include <sysdep.h>
+
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(name)
+# undef weak_alias
+# define weak_alias(x, y)
+# undef libc_hidden_def
+# define libc_hidden_def(name)
+
+# define memcpy __memcpy_ultra1
+# define __memcpy_large __memcpy_large_ultra1
+# define __mempcpy __mempcpy_ultra1
+# include <sysdeps/sparc/sparc64/memcpy.S>
+#endif
diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S b/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S
index 5b00c35d44..41cd606f59 100644
--- a/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S
+++ b/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S
@@ -1,6 +1,6 @@
/* Copy SIZE bytes from SRC to DEST.
For UltraSPARC-III.
- Copyright (C) 2001-2016 Free Software Foundation, Inc.
+ Copyright (C) 2001-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by David S. Miller (davem@redhat.com)
diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy.S b/sysdeps/sparc/sparc64/multiarch/memcpy.S
deleted file mode 100644
index 328f62152b..0000000000
--- a/sysdeps/sparc/sparc64/multiarch/memcpy.S
+++ /dev/null
@@ -1,167 +0,0 @@
-/* Multiple versions of memcpy
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2016 Free Software Foundation, Inc.
- Contributed by David S. Miller (davem@davemloft.net)
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-#if IS_IN (libc)
- .text
-ENTRY(memcpy)
- .type memcpy, @gnu_indirect_function
-# ifdef SHARED
- SETUP_PIC_REG_LEAF(o3, o5)
-# endif
- set HWCAP_SPARC_CRYPTO, %o1
- andcc %o0, %o1, %g0
- be 1f
- andcc %o0, HWCAP_SPARC_N2, %g0
-# ifdef SHARED
- sethi %gdop_hix22(__memcpy_niagara4), %o1
- xor %o1, %gdop_lox10(__memcpy_niagara4), %o1
-# else
- set __memcpy_niagara4, %o1
-# endif
- ba 10f
- nop
-1: be 1f
- andcc %o0, HWCAP_SPARC_BLKINIT, %g0
-# ifdef SHARED
- sethi %gdop_hix22(__memcpy_niagara2), %o1
- xor %o1, %gdop_lox10(__memcpy_niagara2), %o1
-# else
- set __memcpy_niagara2, %o1
-# endif
- ba 10f
- nop
-1: be 1f
- andcc %o0, HWCAP_SPARC_ULTRA3, %g0
-# ifdef SHARED
- sethi %gdop_hix22(__memcpy_niagara1), %o1
- xor %o1, %gdop_lox10(__memcpy_niagara1), %o1
-# else
- set __memcpy_niagara1, %o1
-# endif
- ba 10f
- nop
-1: be 9f
- nop
-# ifdef SHARED
- sethi %gdop_hix22(__memcpy_ultra3), %o1
- xor %o1, %gdop_lox10(__memcpy_ultra3), %o1
-# else
- set __memcpy_ultra3, %o1
-# endif
- ba 10f
- nop
-9:
-# ifdef SHARED
- sethi %gdop_hix22(__memcpy_ultra1), %o1
- xor %o1, %gdop_lox10(__memcpy_ultra1), %o1
-# else
- set __memcpy_ultra1, %o1
-# endif
-10:
-# ifdef SHARED
- add %o3, %o1, %o1
-# endif
- retl
- mov %o1, %o0
-END(memcpy)
-
-ENTRY(__mempcpy)
- .type __mempcpy, @gnu_indirect_function
-# ifdef SHARED
- SETUP_PIC_REG_LEAF(o3, o5)
-# endif
- set HWCAP_SPARC_CRYPTO, %o1
- andcc %o0, %o1, %g0
- be 1f
- andcc %o0, HWCAP_SPARC_N2, %g0
-# ifdef SHARED
- sethi %gdop_hix22(__mempcpy_niagara4), %o1
- xor %o1, %gdop_lox10(__mempcpy_niagara4), %o1
-# else
- set __mempcpy_niagara4, %o1
-# endif
- ba 10f
- nop
-1: be 1f
- andcc %o0, HWCAP_SPARC_BLKINIT, %g0
-# ifdef SHARED
- sethi %gdop_hix22(__mempcpy_niagara2), %o1
- xor %o1, %gdop_lox10(__mempcpy_niagara2), %o1
-# else
- set __mempcpy_niagara2, %o1
-# endif
- ba 10f
- nop
-1: be 1f
- andcc %o0, HWCAP_SPARC_ULTRA3, %g0
-# ifdef SHARED
- sethi %gdop_hix22(__mempcpy_niagara1), %o1
- xor %o1, %gdop_lox10(__mempcpy_niagara1), %o1
-# else
- set __mempcpy_niagara1, %o1
-# endif
- ba 10f
- nop
-1: be 9f
- nop
-# ifdef SHARED
- sethi %gdop_hix22(__mempcpy_ultra3), %o1
- xor %o1, %gdop_lox10(__mempcpy_ultra3), %o1
-# else
- set __mempcpy_ultra3, %o1
-# endif
- ba 10f
- nop
-9:
-# ifdef SHARED
- sethi %gdop_hix22(__mempcpy_ultra1), %o1
- xor %o1, %gdop_lox10(__mempcpy_ultra1), %o1
-# else
- set __mempcpy_ultra1, %o1
-# endif
-10:
-# ifdef SHARED
- add %o3, %o1, %o1
-# endif
- retl
- mov %o1, %o0
-END(__mempcpy)
-
-libc_hidden_builtin_def (memcpy)
-
-libc_hidden_def (__mempcpy)
-weak_alias (__mempcpy, mempcpy)
-libc_hidden_builtin_def (mempcpy)
-
-#undef libc_hidden_builtin_def
-#define libc_hidden_builtin_def(name)
-#undef weak_alias
-#define weak_alias(x, y)
-#undef libc_hidden_def
-#define libc_hidden_def(name)
-
-#define memcpy __memcpy_ultra1
-#define __mempcpy __mempcpy_ultra1
-
-#endif
-
-#include "../memcpy.S"
diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy.c b/sysdeps/sparc/sparc64/multiarch/memcpy.c
new file mode 100644
index 0000000000..7adb2936c7
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/memcpy.c
@@ -0,0 +1,33 @@
+/* Multiple versions of memcpy. SPARC64/Linux version.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# define memcpy __redirect_memcpy
+# include <string.h>
+# undef memcpy
+
+# include <sparc-ifunc.h>
+
+# define SYMBOL_NAME memcpy
+# include "ifunc-memcpy.h"
+
+sparc_libc_ifunc_redirected (__redirect_memcpy, memcpy, IFUNC_SELECTOR)
+
+sparc_ifunc_redirected_hidden_def (__redirect_memcpy, memcpy)
+#endif
diff --git a/sysdeps/sparc/sparc64/multiarch/memmove-ultra1.S b/sysdeps/sparc/sparc64/multiarch/memmove-ultra1.S
new file mode 100644
index 0000000000..2ed85d92e6
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/memmove-ultra1.S
@@ -0,0 +1,4 @@
+#define memmove __memmove_ultra1
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name)
+#include <sysdeps/sparc/sparc64/memmove.S>
diff --git a/sysdeps/sparc/sparc64/multiarch/memmove.c b/sysdeps/sparc/sparc64/multiarch/memmove.c
new file mode 100644
index 0000000000..878d532cea
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/memmove.c
@@ -0,0 +1,33 @@
+/* Multiple versions of memmove.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# define memmove __redirect_memmove
+# include <string.h>
+# undef memmove
+
+# include <sparc-ifunc.h>
+
+# define SYMBOL_NAME memmove
+# include "ifunc-memmove.h"
+
+sparc_libc_ifunc_redirected (__redirect_memmove, memmove, IFUNC_SELECTOR);
+
+sparc_ifunc_redirected_hidden_def (__redirect_memmove, memmove)
+#endif
diff --git a/sysdeps/sparc/sparc64/multiarch/mempcpy.c b/sysdeps/sparc/sparc64/multiarch/mempcpy.c
new file mode 100644
index 0000000000..ab398c6b08
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/mempcpy.c
@@ -0,0 +1,39 @@
+/* Multiple versions of mempcpy. SPARC64/Linux version.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# define mempcpy __redirect_mempcpy
+# define __mempcpy __redirect___mempcpy
+# define NO_MEMPCPY_STPCPY_REDIRECT
+# define __NO_STRING_INLINES
+# include <string.h>
+# undef mempcpy
+# undef __mempcpy
+
+# include <sparc-ifunc.h>
+
+# define SYMBOL_NAME mempcpy
+# include "ifunc-memcpy.h"
+
+sparc_libc_ifunc_redirected (__redirect_mempcpy, __mempcpy, IFUNC_SELECTOR)
+
+sparc_ifunc_redirected_hidden_def (__redirect___mempcpy, __mempcpy)
+weak_alias (__mempcpy, mempcpy)
+sparc_ifunc_redirected_hidden_def (__redirect_mempcpy, mempcpy)
+#endif
diff --git a/sysdeps/sparc/sparc64/multiarch/memset-niagara1.S b/sysdeps/sparc/sparc64/multiarch/memset-niagara1.S
index fe3e09df73..8752b16f4a 100644
--- a/sysdeps/sparc/sparc64/multiarch/memset-niagara1.S
+++ b/sysdeps/sparc/sparc64/multiarch/memset-niagara1.S
@@ -1,5 +1,5 @@
/* Set a block of memory to some byte value. For SUN4V Niagara.
- Copyright (C) 2006-2016 Free Software Foundation, Inc.
+ Copyright (C) 2006-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by David S. Miller (davem@davemloft.net)
diff --git a/sysdeps/sparc/sparc64/multiarch/memset-niagara4.S b/sysdeps/sparc/sparc64/multiarch/memset-niagara4.S
index 85ab05485f..2198463a27 100644
--- a/sysdeps/sparc/sparc64/multiarch/memset-niagara4.S
+++ b/sysdeps/sparc/sparc64/multiarch/memset-niagara4.S
@@ -1,5 +1,5 @@
/* Set a block of memory to some byte value. For SUN4V Niagara-4.
- Copyright (C) 2012-2016 Free Software Foundation, Inc.
+ Copyright (C) 2012-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by David S. Miller (davem@davemloft.net)
diff --git a/sysdeps/sparc/sparc64/multiarch/memset-niagara7.S b/sysdeps/sparc/sparc64/multiarch/memset-niagara7.S
new file mode 100644
index 0000000000..77910c7b62
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/memset-niagara7.S
@@ -0,0 +1,334 @@
+/* Set a block of memory to some byte value. For SUN4V M7.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+#ifndef XCC
+# define XCC xcc
+#endif
+ .register %g2, #scratch
+ .register %g3, #scratch
+
+/* The algorithm is as follows :
+ *
+ * For small 7 or fewer bytes stores, bytes will be stored.
+ *
+ * For less than 32 bytes stores, align the address on 4 byte boundary.
+ * Then store as many 4-byte chunks, followed by trailing bytes.
+ *
+ * For sizes greater than 32 bytes, align the address on 8 byte boundary.
+ * if (count >= 64) {
+ * store 8-bytes chunks to align the address on 64 byte boundary
+ * if (value to be set is zero && count >= MIN_ZERO) {
+ * Using BIS stores, set the first long word of each
+ * 64-byte cache line to zero which will also clear the
+ * other seven long words of the cache line.
+ * }
+ * else if (count >= MIN_LOOP) {
+ * Using BIS stores, set the first long word of each of
+ * ST_CHUNK cache lines (64 bytes each) before the main
+ * loop is entered.
+ * In the main loop, continue pre-setting the first long
+ * word of each cache line ST_CHUNK lines in advance while
+ * setting the other seven long words (56 bytes) of each
+ * cache line until fewer than ST_CHUNK*64 bytes remain.
+ * Then set the remaining seven long words of each cache
+ * line that has already had its first long word set.
+ * }
+ * store remaining data in 64-byte chunks until less than
+ * 64 bytes remain.
+ * }
+ * Store as many 8-byte chunks, followed by trailing bytes.
+ *
+ *
+ * BIS = Block Init Store
+ * Doing the advance store of the first element of the cache line
+ * initiates the displacement of a cache line while only using a single
+ * instruction in the pipeline. That avoids various pipeline delays,
+ * such as filling the miss buffer. The performance effect is
+ * similar to prefetching for normal stores.
+ * The special case for zero fills runs faster and uses fewer instruction
+ * cycles than the normal memset loop.
+ *
+ * We only use BIS for memset of greater than MIN_LOOP bytes because a sequence
+ * BIS stores must be followed by a membar #StoreStore. The benefit of
+ * the BIS store must be balanced against the cost of the membar operation.
+ */
+
+/*
+ * ASI_STBI_P marks the cache line as "least recently used"
+ * which means if many threads are active, it has a high chance
+ * of being pushed out of the cache between the first initializing
+ * store and the final stores.
+ * Thus, we use ASI_STBIMRU_P which marks the cache line as
+ * "most recently used" for all but the last store to the cache line.
+ */
+
+#define ASI_BLK_INIT_QUAD_LDD_P 0xe2
+#define ASI_ST_BLK_INIT_MRU_P 0xf2
+
+#define ASI_STBI_P ASI_BLK_INIT_QUAD_LDD_P
+#define ASI_STBIMRU_P ASI_ST_BLK_INIT_MRU_P
+
+#define ST_CHUNK 24 /* multiple of 4 due to loop unrolling */
+#define MIN_LOOP (ST_CHUNK)*64
+#define MIN_ZERO 256
+
+#define EX_ST(x) x
+#define EX_RETVAL(x) x
+#define STORE_ASI(src,addr) stxa src, [addr] ASI_STBIMRU_P
+#define STORE_INIT(src,addr) stxa src, [addr] ASI_STBI_P
+
+#if IS_IN (libc)
+
+ .text
+ .align 32
+
+ENTRY(__bzero_niagara7)
+ /* bzero (dst, size) */
+ mov %o1, %o2
+ mov 0, %o1
+ /* fall through into memset code */
+END(__bzero_niagara7)
+
+ENTRY(__memset_niagara7)
+ /* memset (src, c, size) */
+ mov %o0, %o5 /* copy sp1 before using it */
+ cmp %o2, 7 /* if small counts, just write bytes */
+ bleu,pn %XCC, .Lwrchar
+ and %o1, 0xff, %o1 /* o1 is (char)c */
+
+ sll %o1, 8, %o3
+ or %o1, %o3, %o1 /* now o1 has 2 bytes of c */
+ sll %o1, 16, %o3
+ cmp %o2, 32
+ blu,pn %XCC, .Lwdalign
+ or %o1, %o3, %o1 /* now o1 has 4 bytes of c */
+
+ sllx %o1, 32, %o3
+ or %o1, %o3, %o1 /* now o1 has 8 bytes of c */
+
+.Ldbalign:
+ andcc %o5, 7, %o3 /* is sp1 aligned on a 8 byte bound? */
+ bz,pt %XCC, .Lblkalign /* already long word aligned */
+ sub %o3, 8, %o3 /* -(bytes till long word aligned) */
+
+ add %o2, %o3, %o2 /* update o2 with new count */
+ /* Set -(%o3) bytes till sp1 long word aligned */
+1: stb %o1, [%o5] /* there is at least 1 byte to set */
+ inccc %o3 /* byte clearing loop */
+ bl,pt %XCC, 1b
+ inc %o5
+
+ /* Now sp1 is long word aligned (sp1 is found in %o5) */
+.Lblkalign:
+ cmp %o2, 64 /* check if there are 64 bytes to set */
+ blu,pn %XCC, .Lwrshort
+ mov %o2, %o3
+
+ andcc %o5, 63, %o3 /* is sp1 block aligned? */
+ bz,pt %XCC, .Lblkwr /* now block aligned */
+ sub %o3, 64, %o3 /* o3 is -(bytes till block aligned) */
+ add %o2, %o3, %o2 /* o2 is the remainder */
+
+ /* Store -(%o3) bytes till dst is block (64 byte) aligned. */
+ /* Use long word stores. */
+ /* Recall that dst is already long word aligned */
+1:
+ addcc %o3, 8, %o3
+ stx %o1, [%o5]
+ bl,pt %XCC, 1b
+ add %o5, 8, %o5
+
+ /* Now sp1 is block aligned */
+.Lblkwr:
+ andn %o2, 63, %o4 /* calculate size of blocks in bytes */
+ brz,pn %o1, .Lwrzero /* special case if c == 0 */
+ and %o2, 63, %o3 /* %o3 = bytes left after blk stores */
+
+ cmp %o4, MIN_LOOP /* check for enough bytes to set */
+ blu,pn %XCC, .Lshort_set /* to justify cost of membar */
+ nop /* must be > pre-cleared lines */
+
+ /* initial cache-clearing stores */
+ /* get store pipeline moving */
+
+/* Primary memset loop for large memsets */
+.Lwr_loop:
+ mov ST_CHUNK, %g1
+.Lwr_loop_start:
+ subcc %g1, 4, %g1
+ EX_ST(STORE_ASI(%o1,%o5))
+ add %o5, 64, %o5
+ EX_ST(STORE_ASI(%o1,%o5))
+ add %o5, 64, %o5
+ EX_ST(STORE_ASI(%o1,%o5))
+ add %o5, 64, %o5
+ EX_ST(STORE_ASI(%o1,%o5))
+ bgu %XCC, .Lwr_loop_start
+ add %o5, 64, %o5
+
+ sub %o5, ST_CHUNK*64, %o5 /* reset %o5 */
+ mov ST_CHUNK, %g1
+ sub %o5, 8, %o5 /* adjust %o5 for ASI store */
+
+.Lwr_loop_rest:
+ stx %o1,[%o5+8+8]
+ sub %o4, 64, %o4
+ stx %o1,[%o5+16+8]
+ subcc %g1, 1, %g1
+ stx %o1,[%o5+24+8]
+ stx %o1,[%o5+32+8]
+ stx %o1,[%o5+40+8]
+ add %o5, 64, %o5
+ stx %o1,[%o5-8]
+ bgu %XCC, .Lwr_loop_rest
+ EX_ST(STORE_INIT(%o1,%o5))
+
+ add %o5, 8, %o5 /* restore %o5 offset */
+
+ /* If more than ST_CHUNK*64 bytes remain to set, continue */
+ /* setting the first long word of each cache line in advance */
+ /* to keep the store pipeline moving. */
+
+ cmp %o4, ST_CHUNK*64
+ bge,pt %XCC, .Lwr_loop_start
+ mov ST_CHUNK, %g1
+
+ brz,a,pn %o4, .Lasi_done
+ nop
+
+ sub %o5, 8, %o5 /* adjust %o5 for ASI store */
+.Lwr_loop_small:
+ add %o5, 8, %o5 /* adjust %o5 for ASI store */
+ EX_ST(STORE_ASI(%o1,%o5))
+ stx %o1,[%o5+8]
+ stx %o1,[%o5+16]
+ stx %o1,[%o5+24]
+ stx %o1,[%o5+32]
+ subcc %o4, 64, %o4
+ stx %o1,[%o5+40]
+ add %o5, 56, %o5
+ stx %o1,[%o5-8]
+ bgu,pt %XCC, .Lwr_loop_small
+ EX_ST(STORE_INIT(%o1,%o5))
+
+ ba .Lasi_done
+ add %o5, 8, %o5 /* restore %o5 offset */
+
+/* Special case loop for zero fill memsets */
+/* For each 64 byte cache line, single STBI to first element */
+/* clears line */
+.Lwrzero:
+ cmp %o4, MIN_ZERO /* check if enough bytes to set */
+ /* to pay %asi + membar cost */
+ blu %XCC, .Lshort_set
+ nop
+ sub %o4, 256, %o4
+
+.Lwrzero_loop:
+ mov 64, %g3
+ EX_ST(STORE_INIT(%o1,%o5))
+ subcc %o4, 256, %o4
+ EX_ST(STORE_INIT(%o1,%o5+%g3))
+ add %o5, 256, %o5
+ sub %g3, 192, %g3
+ EX_ST(STORE_INIT(%o1,%o5+%g3))
+ add %g3, 64, %g3
+ bge,pt %XCC, .Lwrzero_loop
+ EX_ST(STORE_INIT(%o1,%o5+%g3))
+ add %o4, 256, %o4
+
+ brz,pn %o4, .Lbsi_done
+ nop
+.Lwrzero_small:
+ EX_ST(STORE_INIT(%o1,%o5))
+ subcc %o4, 64, %o4
+ bgu,pt %XCC, .Lwrzero_small
+ add %o5, 64, %o5
+
+.Lasi_done:
+.Lbsi_done:
+ membar #StoreStore /* required by use of BSI */
+
+.Lshort_set:
+ cmp %o4, 64 /* check if 64 bytes to set */
+ blu %XCC, 5f
+ nop
+4: /* set final blocks of 64 bytes */
+ stx %o1, [%o5]
+ stx %o1, [%o5+8]
+ stx %o1, [%o5+16]
+ stx %o1, [%o5+24]
+ subcc %o4, 64, %o4
+ stx %o1, [%o5+32]
+ stx %o1, [%o5+40]
+ add %o5, 64, %o5
+ stx %o1, [%o5-16]
+ bgu,pt %XCC, 4b
+ stx %o1, [%o5-8]
+
+5:
+ /* Set the remaining long words */
+.Lwrshort:
+ subcc %o3, 8, %o3 /* Can we store any long words? */
+ blu,pn %XCC, .Lwrchars
+ and %o2, 7, %o2 /* calc bytes left after long words */
+6:
+ subcc %o3, 8, %o3
+ stx %o1, [%o5] /* store the long words */
+ bgeu,pt %XCC, 6b
+ add %o5, 8, %o5
+
+.Lwrchars: /* check for extra chars */
+ brnz %o2, .Lwrfin
+ nop
+ retl
+ nop
+
+.Lwdalign:
+ andcc %o5, 3, %o3 /* is sp1 aligned on a word boundary */
+ bz,pn %XCC, .Lwrword
+ andn %o2, 3, %o3 /* create word sized count in %o3 */
+
+ dec %o2 /* decrement count */
+ stb %o1, [%o5] /* clear a byte */
+ b .Lwdalign
+ inc %o5 /* next byte */
+
+.Lwrword:
+ subcc %o3, 4, %o3
+ st %o1, [%o5] /* 4-byte writing loop */
+ bnz,pt %XCC, .Lwrword
+ add %o5, 4, %o5
+ and %o2, 3, %o2 /* leftover count, if any */
+
+.Lwrchar:
+ /* Set the remaining bytes, if any */
+ brz %o2, .Lexit
+ nop
+.Lwrfin:
+ deccc %o2
+ stb %o1, [%o5]
+ bgu,pt %XCC, .Lwrfin
+ inc %o5
+.Lexit:
+ retl /* %o0 was preserved */
+ nop
+END(__memset_niagara7)
+#endif
diff --git a/sysdeps/sparc/sparc64/multiarch/memset-ultra1.S b/sysdeps/sparc/sparc64/multiarch/memset-ultra1.S
new file mode 100644
index 0000000000..dd9d2c17cd
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/memset-ultra1.S
@@ -0,0 +1,30 @@
+/* Default SPARC64 memset implementation.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# include <sysdep.h>
+
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(name)
+# undef weak_alias
+# define weak_alias(x, y)
+
+# define memset __memset_ultra1
+# define __bzero __bzero_ultra1
+# include <sysdeps/sparc/sparc64/memset.S>
+#endif
diff --git a/sysdeps/sparc/sparc64/multiarch/memset.S b/sysdeps/sparc/sparc64/multiarch/memset.S
deleted file mode 100644
index bd0e160d70..0000000000
--- a/sysdeps/sparc/sparc64/multiarch/memset.S
+++ /dev/null
@@ -1,124 +0,0 @@
-/* Multiple versions of memset and bzero
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2016 Free Software Foundation, Inc.
- Contributed by David S. Miller (davem@davemloft.net)
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-#if IS_IN (libc)
- .text
-ENTRY(memset)
- .type memset, @gnu_indirect_function
-# ifdef SHARED
- SETUP_PIC_REG_LEAF(o3, o5)
-# endif
- set HWCAP_SPARC_CRYPTO, %o1
- andcc %o0, %o1, %g0
- be 1f
- andcc %o0, HWCAP_SPARC_BLKINIT, %g0
-# ifdef SHARED
- sethi %gdop_hix22(__memset_niagara4), %o1
- xor %o1, %gdop_lox10(__memset_niagara4), %o1
-# else
- set __memset_niagara4, %o1
-# endif
- ba 10f
- nop
-1: be 9f
- nop
-# ifdef SHARED
- sethi %gdop_hix22(__memset_niagara1), %o1
- xor %o1, %gdop_lox10(__memset_niagara1), %o1
-# else
- set __memset_niagara1, %o1
-# endif
- ba 10f
- nop
-9:
-# ifdef SHARED
- sethi %gdop_hix22(__memset_ultra1), %o1
- xor %o1, %gdop_lox10(__memset_ultra1), %o1
-# else
- set __memset_ultra1, %o1
-# endif
-10:
-# ifdef SHARED
- add %o3, %o1, %o1
-# endif
- retl
- mov %o1, %o0
-END(memset)
-
-ENTRY(__bzero)
- .type bzero, @gnu_indirect_function
-# ifdef SHARED
- SETUP_PIC_REG_LEAF(o3, o5)
-# endif
- set HWCAP_SPARC_CRYPTO, %o1
- andcc %o0, %o1, %g0
- be 1f
- andcc %o0, HWCAP_SPARC_BLKINIT, %g0
-# ifdef SHARED
- sethi %gdop_hix22(__bzero_niagara4), %o1
- xor %o1, %gdop_lox10(__bzero_niagara4), %o1
-# else
- set __bzero_niagara4, %o1
-# endif
- ba 10f
- nop
-1: be 9f
- nop
-# ifdef SHARED
- sethi %gdop_hix22(__bzero_niagara1), %o1
- xor %o1, %gdop_lox10(__bzero_niagara1), %o1
-# else
- set __bzero_niagara1, %o1
-# endif
- ba 10f
- nop
-9:
-# ifdef SHARED
- sethi %gdop_hix22(__bzero_ultra1), %o1
- xor %o1, %gdop_lox10(__bzero_ultra1), %o1
-# else
- set __bzero_ultra1, %o1
-# endif
-10:
-# ifdef SHARED
- add %o3, %o1, %o1
-# endif
- retl
- mov %o1, %o0
-END(__bzero)
-
-weak_alias (__bzero, bzero)
-
-# undef weak_alias
-# define weak_alias(a, b)
-
-libc_hidden_builtin_def (memset)
-
-#undef libc_hidden_builtin_def
-#define libc_hidden_builtin_def(name)
-
-#define memset __memset_ultra1
-#define __bzero __bzero_ultra1
-
-#endif
-
-#include "../memset.S"
diff --git a/sysdeps/sparc/sparc64/multiarch/memset.c b/sysdeps/sparc/sparc64/multiarch/memset.c
new file mode 100644
index 0000000000..c2920c7df7
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/memset.c
@@ -0,0 +1,33 @@
+/* Multiple versions of memset. SPARC64/Linux version.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# define memset __redirect_memset
+# include <string.h>
+# undef memset
+
+# include <sparc-ifunc.h>
+
+# define SYMBOL_NAME memset
+# include "ifunc-memset.h"
+
+sparc_libc_ifunc_redirected (__redirect_memset, memset, IFUNC_SELECTOR)
+sparc_ifunc_redirected_hidden_def (__redirect_memset, memset)
+
+#endif
diff --git a/sysdeps/sparc/sparc64/multiarch/mul_1-generic.S b/sysdeps/sparc/sparc64/multiarch/mul_1-generic.S
new file mode 100644
index 0000000000..f1b7e6026a
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/mul_1-generic.S
@@ -0,0 +1,2 @@
+#define __mpn_mul_1 __mpn_mul_1_generic
+#include <sysdeps/sparc/sparc64/mul_1.S>
diff --git a/sysdeps/sparc/sparc64/multiarch/mul_1-vis3.S b/sysdeps/sparc/sparc64/multiarch/mul_1-vis3.S
index d2ddd110b0..79452919cc 100644
--- a/sysdeps/sparc/sparc64/multiarch/mul_1-vis3.S
+++ b/sysdeps/sparc/sparc64/multiarch/mul_1-vis3.S
@@ -1,7 +1,7 @@
! SPARC v9 64-bit VIS3 __mpn_mul_1 -- Multiply a limb vector with a single
! limb and store the product in a second limb vector.
!
-! Copyright (C) 2013-2016 Free Software Foundation, Inc.
+! Copyright (C) 2013-2018 Free Software Foundation, Inc.
! This file is part of the GNU C Library.
! Contributed by David S. Miller <davem@davemloft.net>
!
diff --git a/sysdeps/sparc/sparc64/multiarch/mul_1.S b/sysdeps/sparc/sparc64/multiarch/mul_1.S
deleted file mode 100644
index 0a1fbdbc51..0000000000
--- a/sysdeps/sparc/sparc64/multiarch/mul_1.S
+++ /dev/null
@@ -1,56 +0,0 @@
-/* Multiple versions of mul_1
-
- Copyright (C) 2013-2016 Free Software Foundation, Inc.
- Contributed by David S. Miller (davem@davemloft.net)
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-ENTRY(__mpn_mul_1)
- .type __mpn_mul_1, @gnu_indirect_function
-# ifdef SHARED
- SETUP_PIC_REG_LEAF(o3, o5)
-# endif
- set HWCAP_SPARC_VIS3, %o1
- andcc %o0, %o1, %g0
- be 1f
- nop
-# ifdef SHARED
- sethi %gdop_hix22(__mpn_mul_1_vis3), %o1
- xor %o1, %gdop_lox10(__mpn_mul_1_vis3), %o1
-# else
- set __mpn_mul_1_vis3, %o1
-# endif
- ba 10f
- nop
-1:
-# ifdef SHARED
- sethi %gdop_hix22(__mpn_mul_1_generic), %o1
- xor %o1, %gdop_lox10(__mpn_mul_1_generic), %o1
-# else
- set __mpn_mul_1_generic, %o1
-# endif
-10:
-# ifdef SHARED
- add %o3, %o1, %o1
-# endif
- retl
- mov %o1, %o0
-END(__mpn_mul_1)
-
-#define __mpn_mul_1 __mpn_mul_1_generic
-#include "../mul_1.S"
diff --git a/sysdeps/sparc/sparc64/multiarch/mul_1.c b/sysdeps/sparc/sparc64/multiarch/mul_1.c
new file mode 100644
index 0000000000..0b8d0cf0da
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/mul_1.c
@@ -0,0 +1,28 @@
+/* __mpn_mul_1 ifunc resolver, Linux/sparc64 version.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <gmp.h>
+#include <sparc-ifunc.h>
+
+extern __typeof (mpn_mul_1) __mpn_mul_1_vis3 attribute_hidden;
+extern __typeof (mpn_mul_1) __mpn_mul_1_generic attribute_hidden;
+
+sparc_libm_ifunc (__mpn_mul_1,
+ hwcap & HWCAP_SPARC_VIS3
+ ? __mpn_mul_1_vis3
+ : __mpn_mul_1_generic)
diff --git a/sysdeps/sparc/sparc64/multiarch/rtld-memmove.c b/sysdeps/sparc/sparc64/multiarch/rtld-memmove.c
new file mode 100644
index 0000000000..e6d9a5c686
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/rtld-memmove.c
@@ -0,0 +1 @@
+#include <sysdeps/sparc/sparc64/rtld-memmove.c>
diff --git a/sysdeps/sparc/sparc64/multiarch/sha256-block.c b/sysdeps/sparc/sparc64/multiarch/sha256-block.c
index 79966b93d7..9d65315a5a 100644
--- a/sysdeps/sparc/sparc64/multiarch/sha256-block.c
+++ b/sysdeps/sparc/sparc64/multiarch/sha256-block.c
@@ -1,12 +1,12 @@
#include <sparc-ifunc.h>
-#define sha256_process_block sha256_process_block_generic
-extern void sha256_process_block_generic (const void *buffer, size_t len,
- struct sha256_ctx *ctx);
+#define __sha256_process_block __sha256_process_block_generic
+extern void __sha256_process_block_generic (const void *buffer, size_t len,
+ struct sha256_ctx *ctx);
#include <crypt/sha256-block.c>
-#undef sha256_process_block
+#undef __sha256_process_block
extern void __sha256_process_block_crop (const void *buffer, size_t len,
struct sha256_ctx *ctx);
@@ -25,6 +25,8 @@ static bool cpu_supports_sha256(int hwcap)
return false;
}
-extern void sha256_process_block (const void *buffer, size_t len,
- struct sha256_ctx *ctx);
-sparc_libc_ifunc(sha256_process_block, cpu_supports_sha256(hwcap) ? __sha256_process_block_crop : sha256_process_block_generic);
+extern void __sha256_process_block (const void *buffer, size_t len,
+ struct sha256_ctx *ctx);
+sparc_libc_ifunc (__sha256_process_block,
+ cpu_supports_sha256(hwcap) ? __sha256_process_block_crop
+ : __sha256_process_block_generic);
diff --git a/sysdeps/sparc/sparc64/multiarch/sha256-crop.S b/sysdeps/sparc/sparc64/multiarch/sha256-crop.S
index 55186780eb..0f07b8d8a2 100644
--- a/sysdeps/sparc/sparc64/multiarch/sha256-crop.S
+++ b/sysdeps/sparc/sparc64/multiarch/sha256-crop.S
@@ -1,5 +1,5 @@
/* SHA256 using sparc crypto opcodes.
- Copyright (C) 2012-2016 Free Software Foundation, Inc.
+ Copyright (C) 2012-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by David S. Miller (davem@davemloft.net)
diff --git a/sysdeps/sparc/sparc64/multiarch/sha512-block.c b/sysdeps/sparc/sparc64/multiarch/sha512-block.c
index 0d1c3dd6d8..2863e05d09 100644
--- a/sysdeps/sparc/sparc64/multiarch/sha512-block.c
+++ b/sysdeps/sparc/sparc64/multiarch/sha512-block.c
@@ -1,12 +1,12 @@
#include <sparc-ifunc.h>
-#define sha512_process_block sha512_process_block_generic
-extern void sha512_process_block_generic (const void *buffer, size_t len,
- struct sha512_ctx *ctx);
+#define __sha512_process_block __sha512_process_block_generic
+extern void __sha512_process_block_generic (const void *buffer, size_t len,
+ struct sha512_ctx *ctx);
#include <crypt/sha512-block.c>
-#undef sha512_process_block
+#undef __sha512_process_block
extern void __sha512_process_block_crop (const void *buffer, size_t len,
struct sha512_ctx *ctx);
@@ -25,6 +25,8 @@ static bool cpu_supports_sha512(int hwcap)
return false;
}
-extern void sha512_process_block (const void *buffer, size_t len,
- struct sha512_ctx *ctx);
-sparc_libc_ifunc(sha512_process_block, cpu_supports_sha512(hwcap) ? __sha512_process_block_crop : sha512_process_block_generic);
+extern void __sha512_process_block (const void *buffer, size_t len,
+ struct sha512_ctx *ctx);
+sparc_libc_ifunc (__sha512_process_block,
+ cpu_supports_sha512(hwcap) ? __sha512_process_block_crop
+ : __sha512_process_block_generic);
diff --git a/sysdeps/sparc/sparc64/multiarch/sha512-crop.S b/sysdeps/sparc/sparc64/multiarch/sha512-crop.S
index c08a580e0c..f22eef3206 100644
--- a/sysdeps/sparc/sparc64/multiarch/sha512-crop.S
+++ b/sysdeps/sparc/sparc64/multiarch/sha512-crop.S
@@ -1,5 +1,5 @@
/* SHA512 using sparc crypto opcodes.
- Copyright (C) 2012-2016 Free Software Foundation, Inc.
+ Copyright (C) 2012-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by David S. Miller (davem@davemloft.net)
diff --git a/sysdeps/sparc/sparc64/multiarch/sub_n-generic.S b/sysdeps/sparc/sparc64/multiarch/sub_n-generic.S
new file mode 100644
index 0000000000..7cece934a7
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/sub_n-generic.S
@@ -0,0 +1,2 @@
+#define __mpn_sub_n __mpn_sub_n_generic
+#include <sysdeps/sparc/sparc64/sub_n.S>
diff --git a/sysdeps/sparc/sparc64/multiarch/sub_n-vis3.S b/sysdeps/sparc/sparc64/multiarch/sub_n-vis3.S
index cc659ed7f4..b71c93d36d 100644
--- a/sysdeps/sparc/sparc64/multiarch/sub_n-vis3.S
+++ b/sysdeps/sparc/sparc64/multiarch/sub_n-vis3.S
@@ -1,7 +1,7 @@
! SPARC v9 64-bit VIS3 __mpn_sub_n -- Subtract two limb vectors of the same length > 0
! and store difference in a third limb vector.
!
-! Copyright (C) 2013-2016 Free Software Foundation, Inc.
+! Copyright (C) 2013-2018 Free Software Foundation, Inc.
! This file is part of the GNU C Library.
! Contributed by David S. Miller <davem@davemloft.net>
!
diff --git a/sysdeps/sparc/sparc64/multiarch/sub_n.S b/sysdeps/sparc/sparc64/multiarch/sub_n.S
deleted file mode 100644
index f69d909614..0000000000
--- a/sysdeps/sparc/sparc64/multiarch/sub_n.S
+++ /dev/null
@@ -1,56 +0,0 @@
-/* Multiple versions of sub_n
-
- Copyright (C) 2013-2016 Free Software Foundation, Inc.
- Contributed by David S. Miller (davem@davemloft.net)
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-ENTRY(__mpn_sub_n)
- .type __mpn_sub_n, @gnu_indirect_function
-# ifdef SHARED
- SETUP_PIC_REG_LEAF(o3, o5)
-# endif
- set HWCAP_SPARC_VIS3, %o1
- andcc %o0, %o1, %g0
- be 1f
- nop
-# ifdef SHARED
- sethi %gdop_hix22(__mpn_sub_n_vis3), %o1
- xor %o1, %gdop_lox10(__mpn_sub_n_vis3), %o1
-# else
- set __mpn_sub_n_vis3, %o1
-# endif
- ba 10f
- nop
-1:
-# ifdef SHARED
- sethi %gdop_hix22(__mpn_sub_n_generic), %o1
- xor %o1, %gdop_lox10(__mpn_sub_n_generic), %o1
-# else
- set __mpn_sub_n_generic, %o1
-# endif
-10:
-# ifdef SHARED
- add %o3, %o1, %o1
-# endif
- retl
- mov %o1, %o0
-END(__mpn_sub_n)
-
-#define __mpn_sub_n __mpn_sub_n_generic
-#include "../sub_n.S"
diff --git a/sysdeps/sparc/sparc64/multiarch/sub_n.c b/sysdeps/sparc/sparc64/multiarch/sub_n.c
new file mode 100644
index 0000000000..2c1f428932
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/sub_n.c
@@ -0,0 +1,28 @@
+/* __mpn_sub_n ifunc resolver, Linux/sparc64 version.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <gmp.h>
+#include <sparc-ifunc.h>
+
+extern __typeof (mpn_sub_n) __mpn_sub_n_vis3 attribute_hidden;
+extern __typeof (mpn_sub_n) __mpn_sub_n_generic attribute_hidden;
+
+sparc_libm_ifunc (__mpn_sub_n,
+ hwcap & HWCAP_SPARC_VIS3
+ ? __mpn_sub_n_vis3
+ : __mpn_sub_n_generic)
diff --git a/sysdeps/sparc/sparc64/multiarch/submul_1-generic.S b/sysdeps/sparc/sparc64/multiarch/submul_1-generic.S
new file mode 100644
index 0000000000..4c1536023d
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/submul_1-generic.S
@@ -0,0 +1,2 @@
+#define __mpn_submul_1 __mpn_submul_1_generic
+#include <sysdeps/sparc/sparc64/submul_1.S>
diff --git a/sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S b/sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S
index e92c73e912..823f90afdd 100644
--- a/sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S
+++ b/sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S
@@ -1,7 +1,7 @@
! SPARC v9 64-bit VIS3 __mpn_submul_1 -- Multiply a limb vector with a
! limb and subtract the result from a second limb vector.
!
-! Copyright (C) 2013-2016 Free Software Foundation, Inc.
+! Copyright (C) 2013-2018 Free Software Foundation, Inc.
! This file is part of the GNU C Library.
! Contributed by David S. Miller <davem@davemloft.net>
!
diff --git a/sysdeps/sparc/sparc64/multiarch/submul_1.S b/sysdeps/sparc/sparc64/multiarch/submul_1.S
deleted file mode 100644
index f0d9f2ffe3..0000000000
--- a/sysdeps/sparc/sparc64/multiarch/submul_1.S
+++ /dev/null
@@ -1,56 +0,0 @@
-/* Multiple versions of submul_1
-
- Copyright (C) 2013-2016 Free Software Foundation, Inc.
- Contributed by David S. Miller (davem@davemloft.net)
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-ENTRY(__mpn_submul_1)
- .type __mpn_submul_1, @gnu_indirect_function
-# ifdef SHARED
- SETUP_PIC_REG_LEAF(o3, o5)
-# endif
- set HWCAP_SPARC_VIS3, %o1
- andcc %o0, %o1, %g0
- be 1f
- nop
-# ifdef SHARED
- sethi %gdop_hix22(__mpn_submul_1_vis3), %o1
- xor %o1, %gdop_lox10(__mpn_submul_1_vis3), %o1
-# else
- set __mpn_submul_1_vis3, %o1
-# endif
- ba 10f
- nop
-1:
-# ifdef SHARED
- sethi %gdop_hix22(__mpn_submul_1_generic), %o1
- xor %o1, %gdop_lox10(__mpn_submul_1_generic), %o1
-# else
- set __mpn_submul_1_generic, %o1
-# endif
-10:
-# ifdef SHARED
- add %o3, %o1, %o1
-# endif
- retl
- mov %o1, %o0
-END(__mpn_submul_1)
-
-#define __mpn_submul_1 __mpn_submul_1_generic
-#include "../submul_1.S"
diff --git a/sysdeps/sparc/sparc64/multiarch/submul_1.c b/sysdeps/sparc/sparc64/multiarch/submul_1.c
new file mode 100644
index 0000000000..6934c6915a
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/submul_1.c
@@ -0,0 +1,28 @@
+/* __mpn_submul_1 ifunc resolver, Linux/sparc64 version.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <gmp.h>
+#include <sparc-ifunc.h>
+
+extern __typeof (mpn_submul_1) __mpn_submul_1_vis3 attribute_hidden;
+extern __typeof (mpn_submul_1) __mpn_submul_1_generic attribute_hidden;
+
+sparc_libm_ifunc (__mpn_submul_1,
+ hwcap & HWCAP_SPARC_VIS3
+ ? __mpn_submul_1_vis3
+ : __mpn_submul_1_generic)