summaryrefslogtreecommitdiff
path: root/sysdeps
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2006-03-02 09:06:20 +0000
committerJakub Jelinek <jakub@redhat.com>2006-03-02 09:06:20 +0000
commit4a22fa60cd42eba5ab1931547be33f7764ef6f73 (patch)
treebf57a7c7809c17153a9d680ebf8c6e4dd553cd64 /sysdeps
parenta07552b6e042c28f925c6df06a1849f734975ea2 (diff)
Updated to fedora-glibc-20060302T0855
Diffstat (limited to 'sysdeps')
-rw-r--r--sysdeps/ia64/memccpy.S55
-rw-r--r--sysdeps/mach/hurd/Subdirs10
-rw-r--r--sysdeps/rs6000/add_n.s81
-rw-r--r--sysdeps/rs6000/addmul_1.s123
-rw-r--r--sysdeps/rs6000/ffs.c42
-rw-r--r--sysdeps/rs6000/lshift.s59
-rw-r--r--sysdeps/rs6000/memcopy.h86
-rw-r--r--sysdeps/rs6000/mul_1.s110
-rw-r--r--sysdeps/rs6000/rshift.s57
-rw-r--r--sysdeps/rs6000/sub_n.s82
-rw-r--r--sysdeps/rs6000/submul_1.s128
-rw-r--r--sysdeps/sparc/fpu/fraiseexcpt.c34
-rw-r--r--sysdeps/sparc/sparc32/fpu/libm-test-ulps9
-rw-r--r--sysdeps/sparc/sparc32/sparcv9v/memcpy.S2
-rw-r--r--sysdeps/sparc/sparc32/sparcv9v/memset.S2
-rw-r--r--sysdeps/sparc/sparc64/fpu/libm-test-ulps9
-rw-r--r--sysdeps/sparc/sparc64/sparcv9v/memcpy.S593
-rw-r--r--sysdeps/sparc/sparc64/sparcv9v/memset.S127
-rw-r--r--sysdeps/unix/sysv/linux/i386/fxstatat.c2
-rw-r--r--sysdeps/unix/sysv/linux/s390/s390-64/sysdep.h2
20 files changed, 826 insertions, 787 deletions
diff --git a/sysdeps/ia64/memccpy.S b/sysdeps/ia64/memccpy.S
index 53c43c512b..dd638d43c8 100644
--- a/sysdeps/ia64/memccpy.S
+++ b/sysdeps/ia64/memccpy.S
@@ -1,6 +1,6 @@
/* Optimized version of the memccpy() function.
This file is part of the GNU C Library.
- Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc.
+ Copyright (C) 2000,2001,2003,2006 Free Software Foundation, Inc.
Contributed by Dan Pop <Dan.Pop@cern.ch>.
The GNU C Library is free software; you can redistribute it and/or
@@ -183,27 +183,64 @@ ENTRY(memccpy)
br.ret.sptk.many b0
.recovery1:
- adds src = -(MEMLAT + 6 + 1) * 8, asrc
+#if MEMLAT != 6
+# error "MEMLAT must be 6!"
+#endif
+ adds src = -8, asrc
mov loopcnt = ar.lc
- mov tmp = ar.ec ;;
+ mov tmp = ar.ec
+ ;;
+(p[0]) adds src = -8, src
+ ;;
+(p[1]) adds src = -8, src
sub sh1 = (MEMLAT + 6 + 1), tmp
- shr.u sh2 = sh2, 3
- ;;
+ ;;
+(p[2]) adds src = -8, src
+ ;;
+(p[3]) adds src = -8, src
shl loopcnt = loopcnt, 3
- sub src = src, sh2
+ ;;
+(p[4]) adds src = -8, src
+ ;;
+(p[5]) adds src = -8, src
shl sh1 = sh1, 3
+ ;;
+(p[6]) adds src = -8, src
+ ;;
+(p[7]) adds src = -8, src
shl tmp = tmp, 3
;;
+(p[8]) adds src = -8, src
+ ;;
+(p[9]) adds src = -8, src
+ shr.u sh2 = sh2, 3
+ ;;
+(p[10]) adds src = -8, src
+ ;;
+(p[11]) adds src = -8, src
add len = len, loopcnt
- add src = sh1, src ;;
+ ;;
+ sub src = src, sh2
+ ;;
add len = tmp, len
-.back1:
+ add src = sh1, src
br.cond.sptk .cpyfew
.recovery2:
- add tmp = -(MEMLAT + 3) * 8, src
+#if MEMLAT != 6
+# error "MEMLAT must be 6!"
+#endif
+ add tmp = -8, src
(p7) br.cond.spnt .gotit
;;
+(p[0]) add tmp = -8, tmp ;;
+(p[1]) add tmp = -8, tmp ;;
+(p[2]) add tmp = -8, tmp ;;
+(p[3]) add tmp = -8, tmp ;;
+(p[4]) add tmp = -8, tmp ;;
+(p[5]) add tmp = -8, tmp ;;
+(p[6]) add tmp = -8, tmp ;;
+(p[7]) add tmp = -8, tmp ;;
ld8 r[MEMLAT+2] = [tmp] ;;
xor pos0[1] = r[MEMLAT+2], charx8 ;;
czx1.r pos0[1] = pos0[1] ;;
diff --git a/sysdeps/mach/hurd/Subdirs b/sysdeps/mach/hurd/Subdirs
index 16b8348437..7a7757582a 100644
--- a/sysdeps/mach/hurd/Subdirs
+++ b/sysdeps/mach/hurd/Subdirs
@@ -1 +1,9 @@
-hurd
+# This file says that the hurd subdirectory should appear before all others.
+# The mach and hurd subdirectories have many generated header files which
+# much of the rest of the library depends on, so it is best to build them
+# first (and mach before hurd, at that). The before-compile additions in
+# sysdeps/{mach,hurd}/Makefile should make it reliably work for these files
+# not to exist when making in other directories, but it will be slower that
+# way with more somewhat expensive `make' invocations.
+
+first hurd
diff --git a/sysdeps/rs6000/add_n.s b/sysdeps/rs6000/add_n.s
deleted file mode 100644
index 216874e7a4..0000000000
--- a/sysdeps/rs6000/add_n.s
+++ /dev/null
@@ -1,81 +0,0 @@
-# IBM POWER __mpn_add_n -- Add two limb vectors of equal, non-zero length.
-
-# Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-
-# INPUT PARAMETERS
-# res_ptr r3
-# s1_ptr r4
-# s2_ptr r5
-# size r6
-
- .toc
- .extern __mpn_add_n[DS]
- .extern .__mpn_add_n
-.csect [PR]
- .align 2
- .globl __mpn_add_n
- .globl .__mpn_add_n
- .csect __mpn_add_n[DS]
-__mpn_add_n:
- .long .__mpn_add_n, TOC[tc0], 0
- .csect [PR]
-.__mpn_add_n:
- andil. 10,6,1 # odd or even number of limbs?
- l 8,0(4) # load least significant s1 limb
- l 0,0(5) # load least significant s2 limb
- cal 3,-4(3) # offset res_ptr, it's updated before it's used
- sri 10,6,1 # count for unrolled loop
- a 7,0,8 # add least significant limbs, set cy
- mtctr 10 # copy count into CTR
- beq 0,Leven # branch if even # of limbs (# of limbs >= 2)
-
-# We have an odd # of limbs. Add the first limbs separately.
- cmpi 1,10,0 # is count for unrolled loop zero?
- bne 1,L1 # branch if not
- st 7,4(3)
- aze 3,10 # use the fact that r10 is zero...
- br # return
-
-# We added least significant limbs. Now reload the next limbs to enter loop.
-L1: lu 8,4(4) # load s1 limb and update s1_ptr
- lu 0,4(5) # load s2 limb and update s2_ptr
- stu 7,4(3)
- ae 7,0,8 # add limbs, set cy
-Leven: lu 9,4(4) # load s1 limb and update s1_ptr
- lu 10,4(5) # load s2 limb and update s2_ptr
- bdz Lend # If done, skip loop
-
-Loop: lu 8,4(4) # load s1 limb and update s1_ptr
- lu 0,4(5) # load s2 limb and update s2_ptr
- ae 11,9,10 # add previous limbs with cy, set cy
- stu 7,4(3) #
- lu 9,4(4) # load s1 limb and update s1_ptr
- lu 10,4(5) # load s2 limb and update s2_ptr
- ae 7,0,8 # add previous limbs with cy, set cy
- stu 11,4(3) #
- bdn Loop # decrement CTR and loop back
-
-Lend: ae 11,9,10 # add limbs with cy, set cy
- st 7,4(3) #
- st 11,8(3) #
- lil 3,0 # load cy into ...
- aze 3,3 # ... return value register
- br
diff --git a/sysdeps/rs6000/addmul_1.s b/sysdeps/rs6000/addmul_1.s
deleted file mode 100644
index 7cd743cede..0000000000
--- a/sysdeps/rs6000/addmul_1.s
+++ /dev/null
@@ -1,123 +0,0 @@
-# IBM POWER __mpn_addmul_1 -- Multiply a limb vector with a limb and add
-# the result to a second limb vector.
-
-# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-
-# INPUT PARAMETERS
-# res_ptr r3
-# s1_ptr r4
-# size r5
-# s2_limb r6
-
-# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To
-# obtain that operation, we have to use the 32x32->64 signed multiplication
-# instruction, and add the appropriate compensation to the high limb of the
-# result. We add the multiplicand if the multiplier has its most significant
-# bit set, and we add the multiplier if the multiplicand has its most
-# significant bit set. We need to preserve the carry flag between each
-# iteration, so we have to compute the compensation carefully (the natural,
-# srai+and doesn't work). Since the POWER architecture has a branch unit
-# we can branch in zero cycles, so that's how we perform the additions.
-
- .toc
- .csect .__mpn_addmul_1[PR]
- .align 2
- .globl __mpn_addmul_1
- .globl .__mpn_addmul_1
- .csect __mpn_addmul_1[DS]
-__mpn_addmul_1:
- .long .__mpn_addmul_1[PR], TOC[tc0], 0
- .csect .__mpn_addmul_1[PR]
-.__mpn_addmul_1:
-
- cal 3,-4(3)
- l 0,0(4)
- cmpi 0,6,0
- mtctr 5
- mul 9,0,6
- srai 7,0,31
- and 7,7,6
- mfmq 8
- cax 9,9,7
- l 7,4(3)
- a 8,8,7 # add res_limb
- blt Lneg
-Lpos: bdz Lend
-
-Lploop: lu 0,4(4)
- stu 8,4(3)
- cmpi 0,0,0
- mul 10,0,6
- mfmq 0
- ae 8,0,9 # low limb + old_cy_limb + old cy
- l 7,4(3)
- aze 10,10 # propagate cy to new cy_limb
- a 8,8,7 # add res_limb
- bge Lp0
- cax 10,10,6 # adjust high limb for negative limb from s1
-Lp0: bdz Lend0
- lu 0,4(4)
- stu 8,4(3)
- cmpi 0,0,0
- mul 9,0,6
- mfmq 0
- ae 8,0,10
- l 7,4(3)
- aze 9,9
- a 8,8,7
- bge Lp1
- cax 9,9,6 # adjust high limb for negative limb from s1
-Lp1: bdn Lploop
-
- b Lend
-
-Lneg: cax 9,9,0
- bdz Lend
-Lnloop: lu 0,4(4)
- stu 8,4(3)
- cmpi 0,0,0
- mul 10,0,6
- mfmq 7
- ae 8,7,9
- l 7,4(3)
- ae 10,10,0 # propagate cy to new cy_limb
- a 8,8,7 # add res_limb
- bge Ln0
- cax 10,10,6 # adjust high limb for negative limb from s1
-Ln0: bdz Lend0
- lu 0,4(4)
- stu 8,4(3)
- cmpi 0,0,0
- mul 9,0,6
- mfmq 7
- ae 8,7,10
- l 7,4(3)
- ae 9,9,0 # propagate cy to new cy_limb
- a 8,8,7 # add res_limb
- bge Ln1
- cax 9,9,6 # adjust high limb for negative limb from s1
-Ln1: bdn Lnloop
- b Lend
-
-Lend0: cal 9,0(10)
-Lend: st 8,4(3)
- aze 3,9
- br
diff --git a/sysdeps/rs6000/ffs.c b/sysdeps/rs6000/ffs.c
deleted file mode 100644
index 619412cb50..0000000000
--- a/sysdeps/rs6000/ffs.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/* ffs -- find first set bit in a word, counted from least significant end.
- For IBM rs6000.
- Copyright (C) 1991, 1992, 1997, 2004, 2005 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Torbjorn Granlund (tege@sics.se).
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#include <string.h>
-
-#undef ffs
-
-#ifdef __GNUC__
-
-int
-__ffs (x)
- int x;
-{
- int cnt;
-
- asm ("cntlz %0,%1" : "=r" (cnt) : "r" (x & -x));
- return 32 - cnt;
-}
-weak_alias (__ffs, ffs)
-libc_hidden_builtin_def (ffs)
-
-#else
-#include <string/ffs.c>
-#endif
diff --git a/sysdeps/rs6000/lshift.s b/sysdeps/rs6000/lshift.s
deleted file mode 100644
index 8ccba7407e..0000000000
--- a/sysdeps/rs6000/lshift.s
+++ /dev/null
@@ -1,59 +0,0 @@
-# IBM POWER __mpn_lshift --
-
-# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-
-# INPUT PARAMETERS
-# res_ptr r3
-# s_ptr r4
-# size r5
-# cnt r6
-
- .toc
- .extern __mpn_lshift[DS]
- .extern .__mpn_lshift
-.csect [PR]
- .align 2
- .globl __mpn_lshift
- .globl .__mpn_lshift
- .csect __mpn_lshift[DS]
-__mpn_lshift:
- .long .__mpn_lshift, TOC[tc0], 0
- .csect [PR]
-.__mpn_lshift:
- sli 0,5,2
- cax 9,3,0
- cax 4,4,0
- sfi 8,6,32
- mtctr 5 # put limb count in CTR loop register
- lu 0,-4(4) # read most significant limb
- sre 3,0,8 # compute carry out limb, and init MQ register
- bdz Lend2 # if just one limb, skip loop
- lu 0,-4(4) # read 2:nd most significant limb
- sreq 7,0,8 # compute most significant limb of result
- bdz Lend # if just two limb, skip loop
-Loop: lu 0,-4(4) # load next lower limb
- stu 7,-4(9) # store previous result during read latency
- sreq 7,0,8 # compute result limb
- bdn Loop # loop back until CTR is zero
-Lend: stu 7,-4(9) # store 2:nd least significant limb
-Lend2: sle 7,0,6 # compute least significant limb
- st 7,-4(9) # store it" \
- br
diff --git a/sysdeps/rs6000/memcopy.h b/sysdeps/rs6000/memcopy.h
deleted file mode 100644
index 8bdb6e9766..0000000000
--- a/sysdeps/rs6000/memcopy.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/* Copyright (C) 1991, 1997 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#include <sysdeps/generic/memcopy.h>
-
-#undef OP_T_THRES
-#define OP_T_THRES 32
-
-#undef BYTE_COPY_FWD
-#define BYTE_COPY_FWD(dst_bp, src_bp, nbytes) \
- do \
- { \
- size_t __nbytes = nbytes; \
- asm volatile("mtspr 1,%2\n" \
- "lsx 6,0,%1\n" \
- "stsx 6,0,%0" : /* No outputs. */ : \
- "b" (dst_bp), "b" (src_bp), "r" (__nbytes) : \
- "6", "7", "8", "9", "10", "11", "12", "13"); \
- dst_bp += __nbytes; \
- src_bp += __nbytes; \
- } while (0)
-
-#undef BYTE_COPY_BWD
-#define BYTE_COPY_BWD(dst_ep, src_ep, nbytes) \
- do \
- { \
- size_t __nbytes = (nbytes); \
- dst_ep -= __nbytes; \
- src_ep -= __nbytes; \
- asm volatile("mtspr 1,%2\n" \
- "lsx 6,0,%1\n" \
- "stsx 6,0,%0" : /* No outputs. */ : \
- "b" (dst_ep), "b" (src_ep), "r" (__nbytes) : \
- "6", "7", "8", "9", "10", "11", "12", "13"); \
- } while (0)
-
-#undef WORD_COPY_FWD
-#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes) \
- do \
- { \
- size_t __nblocks = (nbytes) / 32; \
- if (__nblocks != 0) \
- asm volatile("mtctr %4\n" \
- "lsi 6,%1,32\n" \
- "ai %1,%1,32\n" \
- "stsi 6,%0,32\n" \
- "ai %0,%0,32\n" \
- "bdn $-16" : \
- "=b" (dst_bp), "=b" (src_bp) : \
- "0" (dst_bp), "1" (src_bp), "r" (__nblocks) : \
- "6", "7", "8", "9", "10", "11", "12", "13"); \
- (nbytes_left) = (nbytes) % 32; \
- } while (0)
-
-#undef WORD_COPY_BWD
-#define WORD_COPY_BWD(dst_ep, src_ep, nbytes_left, nbytes) \
- do \
- { \
- size_t __nblocks = (nbytes) / 32; \
- if (__nblocks != 0) \
- asm volatile("mtctr %4\n" \
- "ai %1,%1,-32\n" \
- "lsi 6,%1,32\n" \
- "ai %0,%0,-32\n" \
- "stsi 6,%0,32\n" \
- "bdn $-16" : \
- "=b" (dst_ep), "=b" (src_ep) : \
- "0" (dst_ep), "1" (src_ep), "r" (__nblocks) : \
- "6", "7", "8", "9", "10", "11", "12", "13"); \
- (nbytes_left) = (nbytes) % 32; \
- } while (0)
diff --git a/sysdeps/rs6000/mul_1.s b/sysdeps/rs6000/mul_1.s
deleted file mode 100644
index c0feef4b72..0000000000
--- a/sysdeps/rs6000/mul_1.s
+++ /dev/null
@@ -1,110 +0,0 @@
-# IBM POWER __mpn_mul_1 -- Multiply a limb vector with a limb and store
-# the result in a second limb vector.
-
-# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-
-# INPUT PARAMETERS
-# res_ptr r3
-# s1_ptr r4
-# size r5
-# s2_limb r6
-
-# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To
-# obtain that operation, we have to use the 32x32->64 signed multiplication
-# instruction, and add the appropriate compensation to the high limb of the
-# result. We add the multiplicand if the multiplier has its most significant
-# bit set, and we add the multiplier if the multiplicand has its most
-# significant bit set. We need to preserve the carry flag between each
-# iteration, so we have to compute the compensation carefully (the natural,
-# srai+and doesn't work). Since the POWER architecture has a branch unit
-# we can branch in zero cycles, so that's how we perform the additions.
-
- .toc
- .csect .__mpn_mul_1[PR]
- .align 2
- .globl __mpn_mul_1
- .globl .__mpn_mul_1
- .csect __mpn_mul_1[DS]
-__mpn_mul_1:
- .long .__mpn_mul_1[PR], TOC[tc0], 0
- .csect .__mpn_mul_1[PR]
-.__mpn_mul_1:
-
- cal 3,-4(3)
- l 0,0(4)
- cmpi 0,6,0
- mtctr 5
- mul 9,0,6
- srai 7,0,31
- and 7,7,6
- mfmq 8
- ai 0,0,0 # reset carry
- cax 9,9,7
- blt Lneg
-Lpos: bdz Lend
-Lploop: lu 0,4(4)
- stu 8,4(3)
- cmpi 0,0,0
- mul 10,0,6
- mfmq 0
- ae 8,0,9
- bge Lp0
- cax 10,10,6 # adjust high limb for negative limb from s1
-Lp0: bdz Lend0
- lu 0,4(4)
- stu 8,4(3)
- cmpi 0,0,0
- mul 9,0,6
- mfmq 0
- ae 8,0,10
- bge Lp1
- cax 9,9,6 # adjust high limb for negative limb from s1
-Lp1: bdn Lploop
- b Lend
-
-Lneg: cax 9,9,0
- bdz Lend
-Lnloop: lu 0,4(4)
- stu 8,4(3)
- cmpi 0,0,0
- mul 10,0,6
- cax 10,10,0 # adjust high limb for negative s2_limb
- mfmq 0
- ae 8,0,9
- bge Ln0
- cax 10,10,6 # adjust high limb for negative limb from s1
-Ln0: bdz Lend0
- lu 0,4(4)
- stu 8,4(3)
- cmpi 0,0,0
- mul 9,0,6
- cax 9,9,0 # adjust high limb for negative s2_limb
- mfmq 0
- ae 8,0,10
- bge Ln1
- cax 9,9,6 # adjust high limb for negative limb from s1
-Ln1: bdn Lnloop
- b Lend
-
-Lend0: cal 9,0(10)
-Lend: st 8,4(3)
- aze 3,9
- br
diff --git a/sysdeps/rs6000/rshift.s b/sysdeps/rs6000/rshift.s
deleted file mode 100644
index 145218fabd..0000000000
--- a/sysdeps/rs6000/rshift.s
+++ /dev/null
@@ -1,57 +0,0 @@
-# IBM POWER __mpn_rshift --
-
-# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-
-# INPUT PARAMETERS
-# res_ptr r3
-# s_ptr r4
-# size r5
-# cnt r6
-
- .toc
- .extern __mpn_rshift[DS]
- .extern .__mpn_rshift
-.csect [PR]
- .align 2
- .globl __mpn_rshift
- .globl .__mpn_rshift
- .csect __mpn_rshift[DS]
-__mpn_rshift:
- .long .__mpn_rshift, TOC[tc0], 0
- .csect [PR]
-.__mpn_rshift:
- sfi 8,6,32
- mtctr 5 # put limb count in CTR loop register
- l 0,0(4) # read least significant limb
- ai 9,3,-4 # adjust res_ptr since it's offset in the stu:s
- sle 3,0,8 # compute carry limb, and init MQ register
- bdz Lend2 # if just one limb, skip loop
- lu 0,4(4) # read 2:nd least significant limb
- sleq 7,0,8 # compute least significant limb of result
- bdz Lend # if just two limb, skip loop
-Loop: lu 0,4(4) # load next higher limb
- stu 7,4(9) # store previous result during read latency
- sleq 7,0,8 # compute result limb
- bdn Loop # loop back until CTR is zero
-Lend: stu 7,4(9) # store 2:nd most significant limb
-Lend2: sre 7,0,6 # compute most significant limb
- st 7,4(9) # store it" \
- br
diff --git a/sysdeps/rs6000/sub_n.s b/sysdeps/rs6000/sub_n.s
deleted file mode 100644
index d931870935..0000000000
--- a/sysdeps/rs6000/sub_n.s
+++ /dev/null
@@ -1,82 +0,0 @@
-# IBM POWER __mpn_sub_n -- Subtract two limb vectors of equal, non-zero length.
-
-# Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-
-# INPUT PARAMETERS
-# res_ptr r3
-# s1_ptr r4
-# s2_ptr r5
-# size r6
-
- .toc
- .extern __mpn_sub_n[DS]
- .extern .__mpn_sub_n
-.csect [PR]
- .align 2
- .globl __mpn_sub_n
- .globl .__mpn_sub_n
- .csect __mpn_sub_n[DS]
-__mpn_sub_n:
- .long .__mpn_sub_n, TOC[tc0], 0
- .csect [PR]
-.__mpn_sub_n:
- andil. 10,6,1 # odd or even number of limbs?
- l 8,0(4) # load least significant s1 limb
- l 0,0(5) # load least significant s2 limb
- cal 3,-4(3) # offset res_ptr, it's updated before it's used
- sri 10,6,1 # count for unrolled loop
- sf 7,0,8 # subtract least significant limbs, set cy
- mtctr 10 # copy count into CTR
- beq 0,Leven # branch if even # of limbs (# of limbs >= 2)
-
-# We have an odd # of limbs. Add the first limbs separately.
- cmpi 1,10,0 # is count for unrolled loop zero?
- bne 1,L1 # branch if not
- st 7,4(3)
- sfe 3,0,0 # load !cy into ...
- sfi 3,3,0 # ... return value register
- br # return
-
-# We added least significant limbs. Now reload the next limbs to enter loop.
-L1: lu 8,4(4) # load s1 limb and update s1_ptr
- lu 0,4(5) # load s2 limb and update s2_ptr
- stu 7,4(3)
- sfe 7,0,8 # subtract limbs, set cy
-Leven: lu 9,4(4) # load s1 limb and update s1_ptr
- lu 10,4(5) # load s2 limb and update s2_ptr
- bdz Lend # If done, skip loop
-
-Loop: lu 8,4(4) # load s1 limb and update s1_ptr
- lu 0,4(5) # load s2 limb and update s2_ptr
- sfe 11,10,9 # subtract previous limbs with cy, set cy
- stu 7,4(3) #
- lu 9,4(4) # load s1 limb and update s1_ptr
- lu 10,4(5) # load s2 limb and update s2_ptr
- sfe 7,0,8 # subtract previous limbs with cy, set cy
- stu 11,4(3) #
- bdn Loop # decrement CTR and loop back
-
-Lend: sfe 11,10,9 # subtract limbs with cy, set cy
- st 7,4(3) #
- st 11,8(3) #
- sfe 3,0,0 # load !cy into ...
- sfi 3,3,0 # ... return value register
- br
diff --git a/sysdeps/rs6000/submul_1.s b/sysdeps/rs6000/submul_1.s
deleted file mode 100644
index 41095ab001..0000000000
--- a/sysdeps/rs6000/submul_1.s
+++ /dev/null
@@ -1,128 +0,0 @@
-# IBM POWER __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
-# the result from a second limb vector.
-
-# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-
-# INPUT PARAMETERS
-# res_ptr r3
-# s1_ptr r4
-# size r5
-# s2_limb r6
-
-# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To
-# obtain that operation, we have to use the 32x32->64 signed multiplication
-# instruction, and add the appropriate compensation to the high limb of the
-# result. We add the multiplicand if the multiplier has its most significant
-# bit set, and we add the multiplier if the multiplicand has its most
-# significant bit set. We need to preserve the carry flag between each
-# iteration, so we have to compute the compensation carefully (the natural,
-# srai+and doesn't work). Since the POWER architecture has a branch unit
-# we can branch in zero cycles, so that's how we perform the additions.
-
- .toc
- .csect .__mpn_submul_1[PR]
- .align 2
- .globl __mpn_submul_1
- .globl .__mpn_submul_1
- .csect __mpn_submul_1[DS]
-__mpn_submul_1:
- .long .__mpn_submul_1[PR], TOC[tc0], 0
- .csect .__mpn_submul_1[PR]
-.__mpn_submul_1:
-
- cal 3,-4(3)
- l 0,0(4)
- cmpi 0,6,0
- mtctr 5
- mul 9,0,6
- srai 7,0,31
- and 7,7,6
- mfmq 11
- cax 9,9,7
- l 7,4(3)
- sf 8,11,7 # add res_limb
- a 11,8,11 # invert cy (r11 is junk)
- blt Lneg
-Lpos: bdz Lend
-
-Lploop: lu 0,4(4)
- stu 8,4(3)
- cmpi 0,0,0
- mul 10,0,6
- mfmq 0
- ae 11,0,9 # low limb + old_cy_limb + old cy
- l 7,4(3)
- aze 10,10 # propagate cy to new cy_limb
- sf 8,11,7 # add res_limb
- a 11,8,11 # invert cy (r11 is junk)
- bge Lp0
- cax 10,10,6 # adjust high limb for negative limb from s1
-Lp0: bdz Lend0
- lu 0,4(4)
- stu 8,4(3)
- cmpi 0,0,0
- mul 9,0,6
- mfmq 0
- ae 11,0,10
- l 7,4(3)
- aze 9,9
- sf 8,11,7
- a 11,8,11 # invert cy (r11 is junk)
- bge Lp1
- cax 9,9,6 # adjust high limb for negative limb from s1
-Lp1: bdn Lploop
-
- b Lend
-
-Lneg: cax 9,9,0
- bdz Lend
-Lnloop: lu 0,4(4)
- stu 8,4(3)
- cmpi 0,0,0
- mul 10,0,6
- mfmq 7
- ae 11,7,9
- l 7,4(3)
- ae 10,10,0 # propagate cy to new cy_limb
- sf 8,11,7 # add res_limb
- a 11,8,11 # invert cy (r11 is junk)
- bge Ln0
- cax 10,10,6 # adjust high limb for negative limb from s1
-Ln0: bdz Lend0
- lu 0,4(4)
- stu 8,4(3)
- cmpi 0,0,0
- mul 9,0,6
- mfmq 7
- ae 11,7,10
- l 7,4(3)
- ae 9,9,0 # propagate cy to new cy_limb
- sf 8,11,7 # add res_limb
- a 11,8,11 # invert cy (r11 is junk)
- bge Ln1
- cax 9,9,6 # adjust high limb for negative limb from s1
-Ln1: bdn Lnloop
- b Lend
-
-Lend0: cal 9,0(10)
-Lend: st 8,4(3)
- aze 3,9
- br
diff --git a/sysdeps/sparc/fpu/fraiseexcpt.c b/sysdeps/sparc/fpu/fraiseexcpt.c
index 0d45ec82d2..cbb8be80ec 100644
--- a/sysdeps/sparc/fpu/fraiseexcpt.c
+++ b/sysdeps/sparc/fpu/fraiseexcpt.c
@@ -25,12 +25,12 @@
int
__feraiseexcept (int excepts)
{
- static volatile double sink;
static const struct {
double zero, one, max, min, sixteen, pi;
} c = {
0.0, 1.0, DBL_MAX, DBL_MIN, 16.0, M_PI
};
+ double d;
/* Raise exceptions represented by EXPECTS. But we must raise only
one signal at a time. It is important the if the overflow/underflow
@@ -39,24 +39,44 @@ __feraiseexcept (int excepts)
/* First: invalid exception. */
if ((FE_INVALID & excepts) != 0)
- /* One example of a invalid operation is 0/0. */
- sink = c.zero / c.zero;
+ {
+ /* One example of a invalid operation is 0/0. */
+ __asm ("" : "=e" (d) : "0" (c.zero));
+ d /= c.zero;
+ __asm __volatile ("" : : "e" (d));
+ }
/* Next: division by zero. */
if ((FE_DIVBYZERO & excepts) != 0)
- sink = c.one / c.zero;
+ {
+ __asm ("" : "=e" (d) : "0" (c.one));
+ d /= c.zero;
+ __asm __volatile ("" : : "e" (d));
+ }
/* Next: overflow. */
if ((FE_OVERFLOW & excepts) != 0)
- sink = c.max * c.max;
+ {
+ __asm ("" : "=e" (d) : "0" (c.max));
+ d *= d;
+ __asm __volatile ("" : : "e" (d));
+ }
/* Next: underflow. */
if ((FE_UNDERFLOW & excepts) != 0)
- sink = c.min / c.sixteen;
+ {
+ __asm ("" : "=e" (d) : "0" (c.min));
+ d /= c.sixteen;
+ __asm __volatile ("" : : "e" (d));
+ }
/* Last: inexact. */
if ((FE_INEXACT & excepts) != 0)
- sink = c.one / c.pi;
+ {
+ __asm ("" : "=e" (d) : "0" (c.one));
+ d /= c.pi;
+ __asm __volatile ("" : : "e" (d));
+ }
/* Success. */
return 0;
diff --git a/sysdeps/sparc/sparc32/fpu/libm-test-ulps b/sysdeps/sparc/sparc32/fpu/libm-test-ulps
index 40d563971a..ccf53788a6 100644
--- a/sysdeps/sparc/sparc32/fpu/libm-test-ulps
+++ b/sysdeps/sparc/sparc32/fpu/libm-test-ulps
@@ -465,6 +465,11 @@ ifloat: 2
ildouble: 1
ldouble: 1
+# exp2
+Test "exp2 (10) == 1024":
+ildouble: 2
+ldouble: 2
+
# expm1
Test "expm1 (0.75) == 1.11700001661267466854536981983709561":
double: 1
@@ -1192,6 +1197,10 @@ ifloat: 2
ildouble: 1
ldouble: 1
+Function: "exp2":
+ildouble: 2
+ldouble: 2
+
Function: "expm1":
double: 1
float: 1
diff --git a/sysdeps/sparc/sparc32/sparcv9v/memcpy.S b/sysdeps/sparc/sparc32/sparcv9v/memcpy.S
new file mode 100644
index 0000000000..4c05f57bc2
--- /dev/null
+++ b/sysdeps/sparc/sparc32/sparcv9v/memcpy.S
@@ -0,0 +1,2 @@
+#define XCC icc
+#include <sparc64/sparcv9v/memcpy.S>
diff --git a/sysdeps/sparc/sparc32/sparcv9v/memset.S b/sysdeps/sparc/sparc32/sparcv9v/memset.S
new file mode 100644
index 0000000000..5e46c7489f
--- /dev/null
+++ b/sysdeps/sparc/sparc32/sparcv9v/memset.S
@@ -0,0 +1,2 @@
+#define XCC icc
+#include <sparc64/sparcv9v/memset.S>
diff --git a/sysdeps/sparc/sparc64/fpu/libm-test-ulps b/sysdeps/sparc/sparc64/fpu/libm-test-ulps
index 5719a7ca54..db5543e9eb 100644
--- a/sysdeps/sparc/sparc64/fpu/libm-test-ulps
+++ b/sysdeps/sparc/sparc64/fpu/libm-test-ulps
@@ -465,6 +465,11 @@ ifloat: 2
ildouble: 1
ldouble: 1
+# exp2
+Test "exp2 (10) == 1024":
+ildouble: 2
+ldouble: 2
+
# expm1
Test "expm1 (0.75) == 1.11700001661267466854536981983709561":
double: 1
@@ -1192,6 +1197,10 @@ ifloat: 2
ildouble: 1
ldouble: 1
+Function: "exp2":
+ildouble: 2
+ldouble: 2
+
Function: "expm1":
double: 1
float: 1
diff --git a/sysdeps/sparc/sparc64/sparcv9v/memcpy.S b/sysdeps/sparc/sparc64/sparcv9v/memcpy.S
new file mode 100644
index 0000000000..05c837fa25
--- /dev/null
+++ b/sysdeps/sparc/sparc64/sparcv9v/memcpy.S
@@ -0,0 +1,593 @@
+/* Copy SIZE bytes from SRC to DEST. For SUN4V Niagara.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by David S. Miller (davem@davemloft.net)
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+
+#define ASI_BLK_INIT_QUAD_LDD_P 0xe2
+#define ASI_P 0x80
+#define ASI_PNF 0x82
+
+#define LOAD(type,addr,dest) type##a [addr] ASI_P, dest
+#define LOAD_TWIN(addr_reg,dest0,dest1) \
+ ldda [addr_reg] ASI_BLK_INIT_QUAD_LDD_P, dest0
+
+#define STORE(type,src,addr) type src, [addr]
+#define STORE_INIT(src,addr) stxa src, [addr] %asi
+
+#ifndef XCC
+#define USE_BPR
+#define XCC xcc
+#endif
+
+ .register %g2,#scratch
+ .register %g3,#scratch
+ .register %g6,#scratch
+
+ .text
+ .align 32
+
+ENTRY(bcopy)
+ sub %o1, %o0, %o4
+ mov %o0, %g4
+ cmp %o4, %o2
+ mov %o1, %o0
+ bgeu,pt %XCC, 100f
+ mov %g4, %o1
+#ifndef USE_BPR
+ srl %o2, 0, %o2
+#endif
+ brnz,pn %o2, 220f
+ add %o0, %o2, %o0
+ retl
+ nop
+END(bcopy)
+
+ .align 32
+ENTRY(memcpy)
+100: /* %o0=dst, %o1=src, %o2=len */
+ mov %o0, %g5
+ cmp %o2, 0
+ be,pn %XCC, 85f
+218: or %o0, %o1, %o3
+ cmp %o2, 16
+ blu,a,pn %XCC, 80f
+ or %o3, %o2, %o3
+
+ /* 2 blocks (128 bytes) is the minimum we can do the block
+ * copy with. We need to ensure that we'll iterate at least
+ * once in the block copy loop. At worst we'll need to align
+ * the destination to a 64-byte boundary which can chew up
+ * to (64 - 1) bytes from the length before we perform the
+ * block copy loop.
+ */
+ cmp %o2, (2 * 64)
+ blu,pt %XCC, 70f
+ andcc %o3, 0x7, %g0
+
+ /* %o0: dst
+ * %o1: src
+ * %o2: len (known to be >= 128)
+ *
+ * The block copy loops will use %o4/%o5,%g2/%g3 as
+ * temporaries while copying the data.
+ */
+
+ LOAD(prefetch, %o1, #one_read)
+ wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
+
+ /* Align destination on 64-byte boundary. */
+ andcc %o0, (64 - 1), %o4
+ be,pt %XCC, 2f
+ sub %o4, 64, %o4
+ sub %g0, %o4, %o4 ! bytes to align dst
+ sub %o2, %o4, %o2
+1: subcc %o4, 1, %o4
+ LOAD(ldub, %o1, %g1)
+ STORE(stb, %g1, %o0)
+ add %o1, 1, %o1
+ bne,pt %XCC, 1b
+ add %o0, 1, %o0
+
+ /* If the source is on a 16-byte boundary we can do
+ * the direct block copy loop. If it is 8-byte aligned
+ * we can do the 16-byte loads offset by -8 bytes and the
+ * init stores offset by one register.
+ *
+ * If the source is not even 8-byte aligned, we need to do
+ * shifting and masking (basically integer faligndata).
+ *
+ * The careful bit with init stores is that if we store
+ * to any part of the cache line we have to store the whole
+ * cacheline else we can end up with corrupt L2 cache line
+ * contents. Since the loop works on 64-bytes of 64-byte
+ * aligned store data at a time, this is easy to ensure.
+ */
+2:
+ andcc %o1, (16 - 1), %o4
+ andn %o2, (64 - 1), %g1 ! block copy loop iterator
+ sub %o2, %g1, %o2 ! final sub-block copy bytes
+ be,pt %XCC, 50f
+ cmp %o4, 8
+ be,a,pt %XCC, 10f
+ sub %o1, 0x8, %o1
+
+ /* Neither 8-byte nor 16-byte aligned, shift and mask. */
+ mov %g1, %o4
+ and %o1, 0x7, %g1
+ sll %g1, 3, %g1
+ mov 64, %o3
+ andn %o1, 0x7, %o1
+ LOAD(ldx, %o1, %g2)
+ sub %o3, %g1, %o3
+ sllx %g2, %g1, %g2
+
+#define SWIVEL_ONE_DWORD(SRC, TMP1, TMP2, PRE_VAL, PRE_SHIFT, POST_SHIFT, DST)\
+ LOAD(ldx, SRC, TMP1); \
+ srlx TMP1, PRE_SHIFT, TMP2; \
+ or TMP2, PRE_VAL, TMP2; \
+ STORE_INIT(TMP2, DST); \
+ sllx TMP1, POST_SHIFT, PRE_VAL;
+
+1: add %o1, 0x8, %o1
+ SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x00)
+ add %o1, 0x8, %o1
+ SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x08)
+ add %o1, 0x8, %o1
+ SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x10)
+ add %o1, 0x8, %o1
+ SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x18)
+ add %o1, 32, %o1
+ LOAD(prefetch, %o1, #one_read)
+ sub %o1, 32 - 8, %o1
+ SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x20)
+ add %o1, 8, %o1
+ SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x28)
+ add %o1, 8, %o1
+ SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x30)
+ add %o1, 8, %o1
+ SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x38)
+ subcc %o4, 64, %o4
+ bne,pt %XCC, 1b
+ add %o0, 64, %o0
+
+#undef SWIVEL_ONE_DWORD
+
+ srl %g1, 3, %g1
+ ba,pt %XCC, 60f
+ add %o1, %g1, %o1
+
+10: /* Destination is 64-byte aligned, source was only 8-byte
+ * aligned but it has been subtracted by 8 and we perform
+ * one twin load ahead, then add 8 back into source when
+ * we finish the loop.
+ */
+ LOAD_TWIN(%o1, %o4, %o5)
+1: add %o1, 16, %o1
+ LOAD_TWIN(%o1, %g2, %g3)
+ add %o1, 16 + 32, %o1
+ LOAD(prefetch, %o1, #one_read)
+ sub %o1, 32, %o1
+ STORE_INIT(%o5, %o0 + 0x00) ! initializes cache line
+ STORE_INIT(%g2, %o0 + 0x08)
+ LOAD_TWIN(%o1, %o4, %o5)
+ add %o1, 16, %o1
+ STORE_INIT(%g3, %o0 + 0x10)
+ STORE_INIT(%o4, %o0 + 0x18)
+ LOAD_TWIN(%o1, %g2, %g3)
+ add %o1, 16, %o1
+ STORE_INIT(%o5, %o0 + 0x20)
+ STORE_INIT(%g2, %o0 + 0x28)
+ LOAD_TWIN(%o1, %o4, %o5)
+ STORE_INIT(%g3, %o0 + 0x30)
+ STORE_INIT(%o4, %o0 + 0x38)
+ subcc %g1, 64, %g1
+ bne,pt %XCC, 1b
+ add %o0, 64, %o0
+
+ ba,pt %XCC, 60f
+ add %o1, 0x8, %o1
+
+50: /* Destination is 64-byte aligned, and source is 16-byte
+ * aligned.
+ */
+1: LOAD_TWIN(%o1, %o4, %o5)
+ add %o1, 16, %o1
+ LOAD_TWIN(%o1, %g2, %g3)
+ add %o1, 16 + 32, %o1
+ LOAD(prefetch, %o1, #one_read)
+ sub %o1, 32, %o1
+ STORE_INIT(%o4, %o0 + 0x00) ! initializes cache line
+ STORE_INIT(%o5, %o0 + 0x08)
+ LOAD_TWIN(%o1, %o4, %o5)
+ add %o1, 16, %o1
+ STORE_INIT(%g2, %o0 + 0x10)
+ STORE_INIT(%g3, %o0 + 0x18)
+ LOAD_TWIN(%o1, %g2, %g3)
+ add %o1, 16, %o1
+ STORE_INIT(%o4, %o0 + 0x20)
+ STORE_INIT(%o5, %o0 + 0x28)
+ STORE_INIT(%g2, %o0 + 0x30)
+ STORE_INIT(%g3, %o0 + 0x38)
+ subcc %g1, 64, %g1
+ bne,pt %XCC, 1b
+ add %o0, 64, %o0
+ /* fall through */
+
+60:
+ /* %o2 contains any final bytes still needed to be copied
+ * over. If anything is left, we copy it one byte at a time.
+ */
+ wr %g0, ASI_PNF, %asi
+ brz,pt %o2, 85f
+ sub %o0, %o1, %o3
+ ba,a,pt %XCC, 90f
+
+ .align 64
+70: /* 16 < len <= 64 */
+ bne,pn %XCC, 75f
+ sub %o0, %o1, %o3
+
+72:
+ andn %o2, 0xf, %o4
+ and %o2, 0xf, %o2
+1: subcc %o4, 0x10, %o4
+ LOAD(ldx, %o1, %o5)
+ add %o1, 0x08, %o1
+ LOAD(ldx, %o1, %g1)
+ sub %o1, 0x08, %o1
+ STORE(stx, %o5, %o1 + %o3)
+ add %o1, 0x8, %o1
+ STORE(stx, %g1, %o1 + %o3)
+ bgu,pt %XCC, 1b
+ add %o1, 0x8, %o1
+73: andcc %o2, 0x8, %g0
+ be,pt %XCC, 1f
+ nop
+ sub %o2, 0x8, %o2
+ LOAD(ldx, %o1, %o5)
+ STORE(stx, %o5, %o1 + %o3)
+ add %o1, 0x8, %o1
+1: andcc %o2, 0x4, %g0
+ be,pt %XCC, 1f
+ nop
+ sub %o2, 0x4, %o2
+ LOAD(lduw, %o1, %o5)
+ STORE(stw, %o5, %o1 + %o3)
+ add %o1, 0x4, %o1
+1: cmp %o2, 0
+ be,pt %XCC, 85f
+ nop
+ ba,pt %XCC, 90f
+ nop
+
+75:
+ andcc %o0, 0x7, %g1
+ sub %g1, 0x8, %g1
+ be,pn %icc, 2f
+ sub %g0, %g1, %g1
+ sub %o2, %g1, %o2
+
+1: subcc %g1, 1, %g1
+ LOAD(ldub, %o1, %o5)
+ STORE(stb, %o5, %o1 + %o3)
+ bgu,pt %icc, 1b
+ add %o1, 1, %o1
+
+2: add %o1, %o3, %o0
+ andcc %o1, 0x7, %g1
+ bne,pt %icc, 8f
+ sll %g1, 3, %g1
+
+ cmp %o2, 16
+ bgeu,pt %icc, 72b
+ nop
+ ba,a,pt %XCC, 73b
+
+8: mov 64, %o3
+ andn %o1, 0x7, %o1
+ LOAD(ldx, %o1, %g2)
+ sub %o3, %g1, %o3
+ andn %o2, 0x7, %o4
+ sllx %g2, %g1, %g2
+1: add %o1, 0x8, %o1
+ LOAD(ldx, %o1, %g3)
+ subcc %o4, 0x8, %o4
+ srlx %g3, %o3, %o5
+ or %o5, %g2, %o5
+ STORE(stx, %o5, %o0)
+ add %o0, 0x8, %o0
+ bgu,pt %icc, 1b
+ sllx %g3, %g1, %g2
+
+ srl %g1, 3, %g1
+ andcc %o2, 0x7, %o2
+ be,pn %icc, 85f
+ add %o1, %g1, %o1
+ ba,pt %XCC, 90f
+ sub %o0, %o1, %o3
+
+ .align 64
+80: /* 0 < len <= 16 */
+ andcc %o3, 0x3, %g0
+ bne,pn %XCC, 90f
+ sub %o0, %o1, %o3
+
+1:
+ subcc %o2, 4, %o2
+ LOAD(lduw, %o1, %g1)
+ STORE(stw, %g1, %o1 + %o3)
+ bgu,pt %XCC, 1b
+ add %o1, 4, %o1
+
+85: retl
+ mov %g5, %o0
+
+ .align 32
+90:
+ subcc %o2, 1, %o2
+ LOAD(ldub, %o1, %g1)
+ STORE(stb, %g1, %o1 + %o3)
+ bgu,pt %XCC, 90b
+ add %o1, 1, %o1
+ retl
+ mov %g5, %o0
+
+END(memcpy)
+
+#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \
+ ldx [%src - offset - 0x20], %t0; \
+ ldx [%src - offset - 0x18], %t1; \
+ ldx [%src - offset - 0x10], %t2; \
+ ldx [%src - offset - 0x08], %t3; \
+ stw %t0, [%dst - offset - 0x1c]; \
+ srlx %t0, 32, %t0; \
+ stw %t0, [%dst - offset - 0x20]; \
+ stw %t1, [%dst - offset - 0x14]; \
+ srlx %t1, 32, %t1; \
+ stw %t1, [%dst - offset - 0x18]; \
+ stw %t2, [%dst - offset - 0x0c]; \
+ srlx %t2, 32, %t2; \
+ stw %t2, [%dst - offset - 0x10]; \
+ stw %t3, [%dst - offset - 0x04]; \
+ srlx %t3, 32, %t3; \
+ stw %t3, [%dst - offset - 0x08];
+
+#define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \
+ ldx [%src - offset - 0x20], %t0; \
+ ldx [%src - offset - 0x18], %t1; \
+ ldx [%src - offset - 0x10], %t2; \
+ ldx [%src - offset - 0x08], %t3; \
+ stx %t0, [%dst - offset - 0x20]; \
+ stx %t1, [%dst - offset - 0x18]; \
+ stx %t2, [%dst - offset - 0x10]; \
+ stx %t3, [%dst - offset - 0x08]; \
+ ldx [%src - offset - 0x40], %t0; \
+ ldx [%src - offset - 0x38], %t1; \
+ ldx [%src - offset - 0x30], %t2; \
+ ldx [%src - offset - 0x28], %t3; \
+ stx %t0, [%dst - offset - 0x40]; \
+ stx %t1, [%dst - offset - 0x38]; \
+ stx %t2, [%dst - offset - 0x30]; \
+ stx %t3, [%dst - offset - 0x28];
+
+#define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
+ ldx [%src + offset + 0x00], %t0; \
+ ldx [%src + offset + 0x08], %t1; \
+ stw %t0, [%dst + offset + 0x04]; \
+ srlx %t0, 32, %t2; \
+ stw %t2, [%dst + offset + 0x00]; \
+ stw %t1, [%dst + offset + 0x0c]; \
+ srlx %t1, 32, %t3; \
+ stw %t3, [%dst + offset + 0x08];
+
+#define RMOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \
+ ldx [%src + offset + 0x00], %t0; \
+ ldx [%src + offset + 0x08], %t1; \
+ stx %t0, [%dst + offset + 0x00]; \
+ stx %t1, [%dst + offset + 0x08];
+
+ .align 32
+228: andcc %o2, 1, %g0
+ be,pt %icc, 2f+4
+1: ldub [%o1 - 1], %o5
+ sub %o1, 1, %o1
+ sub %o0, 1, %o0
+ subcc %o2, 1, %o2
+ be,pn %xcc, 229f
+ stb %o5, [%o0]
+2: ldub [%o1 - 1], %o5
+ sub %o0, 2, %o0
+ ldub [%o1 - 2], %g5
+ sub %o1, 2, %o1
+ subcc %o2, 2, %o2
+ stb %o5, [%o0 + 1]
+ bne,pt %xcc, 2b
+ stb %g5, [%o0]
+229: retl
+ mov %g4, %o0
+out: retl
+ mov %g5, %o0
+
+ .align 32
+ENTRY(memmove)
+ mov %o0, %g5
+#ifndef USE_BPR
+ srl %o2, 0, %o2
+#endif
+ brz,pn %o2, out
+ sub %o0, %o1, %o4
+ cmp %o4, %o2
+ bgeu,pt %XCC, 218b
+ mov %o0, %g4
+ add %o0, %o2, %o0
+220: add %o1, %o2, %o1
+ cmp %o2, 15
+ bleu,pn %xcc, 228b
+ andcc %o0, 7, %g2
+ sub %o0, %o1, %g5
+ andcc %g5, 3, %o5
+ bne,pn %xcc, 232f
+ andcc %o1, 3, %g0
+ be,a,pt %xcc, 236f
+ andcc %o1, 4, %g0
+ andcc %o1, 1, %g0
+ be,pn %xcc, 4f
+ andcc %o1, 2, %g0
+ ldub [%o1 - 1], %g2
+ sub %o1, 1, %o1
+ sub %o0, 1, %o0
+ sub %o2, 1, %o2
+ be,pn %xcc, 5f
+ stb %g2, [%o0]
+4: lduh [%o1 - 2], %g2
+ sub %o1, 2, %o1
+ sub %o0, 2, %o0
+ sub %o2, 2, %o2
+ sth %g2, [%o0]
+5: andcc %o1, 4, %g0
+236: be,a,pn %xcc, 2f
+ andcc %o2, -128, %g6
+ lduw [%o1 - 4], %g5
+ sub %o1, 4, %o1
+ sub %o0, 4, %o0
+ sub %o2, 4, %o2
+ stw %g5, [%o0]
+ andcc %o2, -128, %g6
+2: be,pn %xcc, 235f
+ andcc %o0, 4, %g0
+ be,pn %xcc, 282f + 4
+5: RMOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
+ RMOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5)
+ RMOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
+ RMOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5)
+ subcc %g6, 128, %g6
+ sub %o1, 128, %o1
+ bne,pt %xcc, 5b
+ sub %o0, 128, %o0
+235: andcc %o2, 0x70, %g6
+41: be,pn %xcc, 280f
+ andcc %o2, 8, %g0
+
+279: rd %pc, %o5
+ sll %g6, 1, %g5
+ sub %o1, %g6, %o1
+ sub %o5, %g5, %o5
+ jmpl %o5 + %lo(280f - 279b), %g0
+ sub %o0, %g6, %o0
+ RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5)
+ RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5)
+ RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5)
+ RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5)
+ RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5)
+ RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5)
+ RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5)
+280: be,pt %xcc, 281f
+ andcc %o2, 4, %g0
+ ldx [%o1 - 8], %g2
+ sub %o0, 8, %o0
+ stw %g2, [%o0 + 4]
+ sub %o1, 8, %o1
+ srlx %g2, 32, %g2
+ stw %g2, [%o0]
+281: be,pt %xcc, 1f
+ andcc %o2, 2, %g0
+ lduw [%o1 - 4], %g2
+ sub %o1, 4, %o1
+ stw %g2, [%o0 - 4]
+ sub %o0, 4, %o0
+1: be,pt %xcc, 1f
+ andcc %o2, 1, %g0
+ lduh [%o1 - 2], %g2
+ sub %o1, 2, %o1
+ sth %g2, [%o0 - 2]
+ sub %o0, 2, %o0
+1: be,pt %xcc, 211f
+ nop
+ ldub [%o1 - 1], %g2
+ stb %g2, [%o0 - 1]
+211: retl
+ mov %g4, %o0
+
+282: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
+ RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
+ subcc %g6, 128, %g6
+ sub %o1, 128, %o1
+ bne,pt %xcc, 282b
+ sub %o0, 128, %o0
+ andcc %o2, 0x70, %g6
+ be,pn %xcc, 284f
+ andcc %o2, 8, %g0
+
+283: rd %pc, %o5
+ sub %o1, %g6, %o1
+ sub %o5, %g6, %o5
+ jmpl %o5 + %lo(284f - 283b), %g0
+ sub %o0, %g6, %o0
+ RMOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3)
+ RMOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3)
+ RMOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3)
+ RMOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3)
+ RMOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3)
+ RMOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3)
+ RMOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3)
+284: be,pt %xcc, 285f
+ andcc %o2, 4, %g0
+ ldx [%o1 - 8], %g2
+ sub %o0, 8, %o0
+ sub %o1, 8, %o1
+ stx %g2, [%o0]
+285: be,pt %xcc, 1f
+ andcc %o2, 2, %g0
+ lduw [%o1 - 4], %g2
+ sub %o0, 4, %o0
+ sub %o1, 4, %o1
+ stw %g2, [%o0]
+1: be,pt %xcc, 1f
+ andcc %o2, 1, %g0
+ lduh [%o1 - 2], %g2
+ sub %o0, 2, %o0
+ sub %o1, 2, %o1
+ sth %g2, [%o0]
+1: be,pt %xcc, 1f
+ nop
+ ldub [%o1 - 1], %g2
+ stb %g2, [%o0 - 1]
+1: retl
+ mov %g4, %o0
+
+232: ldub [%o1 - 1], %g5
+ sub %o1, 1, %o1
+ sub %o0, 1, %o0
+ subcc %o2, 1, %o2
+ bne,pt %xcc, 232b
+ stb %g5, [%o0]
+234: retl
+ mov %g4, %o0
+END(memmove)
+
+#ifdef USE_BPR
+weak_alias (memcpy, __align_cpy_1)
+weak_alias (memcpy, __align_cpy_2)
+weak_alias (memcpy, __align_cpy_4)
+weak_alias (memcpy, __align_cpy_8)
+weak_alias (memcpy, __align_cpy_16)
+#endif
+libc_hidden_builtin_def (memcpy)
+libc_hidden_builtin_def (memmove)
diff --git a/sysdeps/sparc/sparc64/sparcv9v/memset.S b/sysdeps/sparc/sparc64/sparcv9v/memset.S
new file mode 100644
index 0000000000..7a51ef77dc
--- /dev/null
+++ b/sysdeps/sparc/sparc64/sparcv9v/memset.S
@@ -0,0 +1,127 @@
+/* Set a block of memory to some byte value. For SUN4V Niagara.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by David S. Miller (davem@davemloft.net)
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+
+#define ASI_BLK_INIT_QUAD_LDD_P 0xe2
+#define ASI_P 0x80
+#define ASI_PNF 0x82
+
+#ifndef XCC
+#define USE_BPR
+#define XCC xcc
+#endif
+
+ .register %g2,#scratch
+
+ .text
+ .align 32
+
+ENTRY(memset)
+ /* %o0=buf, %o1=pat, %o2=len */
+ and %o1, 0xff, %o3
+ mov %o2, %o1
+ sllx %o3, 8, %g1
+ or %g1, %o3, %o2
+ sllx %o2, 16, %g1
+ or %g1, %o2, %o2
+ sllx %o2, 32, %g1
+ ba,pt %XCC, 1f
+ or %g1, %o2, %o2
+
+ENTRY(__bzero)
+ clr %o2
+1: brz,pn %o1, 90f
+ mov %o0, %o3
+
+ wr %g0, ASI_P, %asi
+
+ cmp %o1, 15
+ bl,pn %icc, 70f
+ andcc %o0, 0x7, %g1
+ be,pt %XCC, 2f
+ mov 8, %g2
+ sub %g2, %g1, %g1
+ sub %o1, %g1, %o1
+1: stba %o2, [%o0 + 0x00] %asi
+ subcc %g1, 1, %g1
+ bne,pt %XCC, 1b
+ add %o0, 1, %o0
+2: cmp %o1, 128
+ bl,pn %icc, 60f
+ andcc %o0, (64 - 1), %g1
+ be,pt %XCC, 40f
+ mov 64, %g2
+ sub %g2, %g1, %g1
+ sub %o1, %g1, %o1
+1: stxa %o2, [%o0 + 0x00] %asi
+ subcc %g1, 8, %g1
+ bne,pt %XCC, 1b
+ add %o0, 8, %o0
+
+40:
+ wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
+ andn %o1, (64 - 1), %g1
+ sub %o1, %g1, %o1
+50:
+ stxa %o2, [%o0 + 0x00] %asi
+ stxa %o2, [%o0 + 0x08] %asi
+ stxa %o2, [%o0 + 0x10] %asi
+ stxa %o2, [%o0 + 0x18] %asi
+ stxa %o2, [%o0 + 0x20] %asi
+ stxa %o2, [%o0 + 0x28] %asi
+ stxa %o2, [%o0 + 0x30] %asi
+ stxa %o2, [%o0 + 0x38] %asi
+ subcc %g1, 64, %g1
+ bne,pt %XCC, 50b
+ add %o0, 64, %o0
+
+ wr %g0, ASI_P, %asi
+ brz,pn %o1, 80f
+60:
+ andncc %o1, 0x7, %g1
+ be,pn %XCC, 2f
+ sub %o1, %g1, %o1
+1: stxa %o2, [%o0 + 0x00] %asi
+ subcc %g1, 8, %g1
+ bne,pt %XCC, 1b
+ add %o0, 8, %o0
+2: brz,pt %o1, 80f
+ nop
+
+70:
+1: stba %o2, [%o0 + 0x00] %asi
+ subcc %o1, 1, %o1
+ bne,pt %icc, 1b
+ add %o0, 1, %o0
+
+ /* fallthrough */
+
+80:
+ wr %g0, ASI_PNF, %asi
+
+90:
+ retl
+ mov %o3, %o0
+END(__bzero)
+END(memset)
+
+libc_hidden_builtin_def (memset)
+weak_alias (__bzero, bzero)
diff --git a/sysdeps/unix/sysv/linux/i386/fxstatat.c b/sysdeps/unix/sysv/linux/i386/fxstatat.c
index b077435553..94f6e81186 100644
--- a/sysdeps/unix/sysv/linux/i386/fxstatat.c
+++ b/sysdeps/unix/sysv/linux/i386/fxstatat.c
@@ -172,5 +172,5 @@ libc_hidden_def (__fxstatat)
#ifdef XSTAT_IS_XSTAT64
# undef __fxstatat64
strong_alias (__fxstatat, __fxstatat64);
-libc_hidden_def (__fxstatat64)
+libc_hidden_ver (__fxstatat, __fxstatat64)
#endif
diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/sysdep.h b/sysdeps/unix/sysv/linux/s390/s390-64/sysdep.h
index fc80c9ff86..9ddec8e041 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-64/sysdep.h
+++ b/sysdeps/unix/sysv/linux/s390/s390-64/sysdep.h
@@ -268,7 +268,7 @@
register unsigned long gpr6 asm ("6") = (unsigned long)(arg5);
#define DECLARGS_6(arg1, arg2, arg3, arg4, arg5, arg6) \
DECLARGS_5(arg1, arg2, arg3, arg4, arg5) \
- register unsigned long gpr6 asm ("7") = (unsigned long)(arg6);
+ register unsigned long gpr7 asm ("7") = (unsigned long)(arg6);
#define ASMFMT_0
#define ASMFMT_1 , "0" (gpr2)