summaryrefslogtreecommitdiff
path: root/sysdeps/sparc/sparc32
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/sparc/sparc32')
-rw-r--r--sysdeps/sparc/sparc32/Dist4
-rw-r--r--sysdeps/sparc/sparc32/Implies1
-rw-r--r--sysdeps/sparc/sparc32/Makefile51
-rw-r--r--sysdeps/sparc/sparc32/__longjmp.S59
-rw-r--r--sysdeps/sparc/sparc32/add_n.S226
-rw-r--r--sysdeps/sparc/sparc32/addmul_1.S147
-rw-r--r--sysdeps/sparc/sparc32/alloca.S32
-rw-r--r--sysdeps/sparc/sparc32/bits/endian.h3
-rw-r--r--sysdeps/sparc/sparc32/bits/setjmp.h16
-rw-r--r--sysdeps/sparc/sparc32/bsd-_setjmp.S40
-rw-r--r--sysdeps/sparc/sparc32/bsd-setjmp.S40
-rw-r--r--sysdeps/sparc/sparc32/divrem.m4238
-rw-r--r--sysdeps/sparc/sparc32/dl-machine.h363
-rw-r--r--sysdeps/sparc/sparc32/dotmul.S123
-rw-r--r--sysdeps/sparc/sparc32/e_sqrt.c34
-rw-r--r--sysdeps/sparc/sparc32/elf/Makefile4
-rw-r--r--sysdeps/sparc/sparc32/elf/start.c68
-rw-r--r--sysdeps/sparc/sparc32/fpu/bits/fenv.h76
-rw-r--r--sysdeps/sparc/sparc32/fpu/fpu_control.h69
-rw-r--r--sysdeps/sparc/sparc32/lshift.S95
-rw-r--r--sysdeps/sparc/sparc32/memcopy.h21
-rw-r--r--sysdeps/sparc/sparc32/mul_1.S199
-rw-r--r--sysdeps/sparc/sparc32/rem.S369
-rw-r--r--sysdeps/sparc/sparc32/rshift.S92
-rw-r--r--sysdeps/sparc/sparc32/sdiv.S369
-rw-r--r--sysdeps/sparc/sparc32/setjmp.S54
-rw-r--r--sysdeps/sparc/sparc32/sparcv8/addmul_1.S124
-rw-r--r--sysdeps/sparc/sparc32/sparcv8/mul_1.S99
-rw-r--r--sysdeps/sparc/sparc32/sparcv8/submul_1.S58
-rw-r--r--sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S186
-rw-r--r--sysdeps/sparc/sparc32/sub_n.S311
-rw-r--r--sysdeps/sparc/sparc32/submul_1.S147
-rw-r--r--sysdeps/sparc/sparc32/udiv.S352
-rw-r--r--sysdeps/sparc/sparc32/udiv_qrnnd.S165
-rw-r--r--sysdeps/sparc/sparc32/umul.S153
-rw-r--r--sysdeps/sparc/sparc32/urem.S352
36 files changed, 4740 insertions, 0 deletions
diff --git a/sysdeps/sparc/sparc32/Dist b/sysdeps/sparc/sparc32/Dist
new file mode 100644
index 0000000000..6c3b23baa5
--- /dev/null
+++ b/sysdeps/sparc/sparc32/Dist
@@ -0,0 +1,4 @@
+dotmul.S umul.S
+divrem.m4 sdiv.S udiv.S rem.S urem.S
+alloca.S
+sys/trap.h
diff --git a/sysdeps/sparc/sparc32/Implies b/sysdeps/sparc/sparc32/Implies
new file mode 100644
index 0000000000..39a34c5f57
--- /dev/null
+++ b/sysdeps/sparc/sparc32/Implies
@@ -0,0 +1 @@
+wordsize-32
diff --git a/sysdeps/sparc/sparc32/Makefile b/sysdeps/sparc/sparc32/Makefile
new file mode 100644
index 0000000000..deec2f8145
--- /dev/null
+++ b/sysdeps/sparc/sparc32/Makefile
@@ -0,0 +1,51 @@
+# Copyright (C) 1991, 92, 93, 94, 95, 96, 97 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Library General Public License
+# as published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Library General Public License for more details.
+
+# You should have received a copy of the GNU Library General Public
+# License along with the GNU C Library; see the file COPYING.LIB. If not,
+# write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA.
+
+ifeq ($(subdir),gnulib)
+routines = dotmul umul $(divrem) alloca
+endif # gnulib
+
+# We distribute these files, even though they are generated,
+# so as to avoid the need for a functioning m4 to build the library.
+divrem := sdiv udiv rem urem
+
++divrem-NAME-sdiv := div
++divrem-NAME-udiv := udiv
++divrem-NAME-rem := rem
++divrem-NAME-urem := urem
++divrem-NAME = $(+divrem-NAME-$(basename $(notdir $@)))
++divrem-OP-div := div
++divrem-OP-udiv := div
++divrem-OP-rem := rem
++divrem-OP-urem := rem
++divrem-S-div := true
++divrem-S-rem := true
++divrem-S-udiv := false
++divrem-S-urem := false
+$(divrem:%=$(sysdep_dir)/sparc/%.S): $(sysdep_dir)/sparc/divrem.m4
+ (echo "define(NAME,\`.$(+divrem-NAME)')\
+ define(OP,\`$(+divrem-OP-$(+divrem-NAME))')\
+ define(S,\`$(+divrem-S-$(+divrem-NAME))')\
+ /* This file is generated from divrem.m4; DO NOT EDIT! */"; \
+ cat $<) | $(M4) > $@-tmp
+# Make it unwritable so noone will edit it by mistake.
+ -chmod a-w $@-tmp
+ mv -f $@-tmp $@
+ test ! -d CVS || cvs commit -m'Regenerated from $<' $@
+
+sysdep-realclean := $(sysdep-realclean) $(divrem:%=sysdeps/sparc/%.S)
diff --git a/sysdeps/sparc/sparc32/__longjmp.S b/sysdeps/sparc/sparc32/__longjmp.S
new file mode 100644
index 0000000000..36e1c170dd
--- /dev/null
+++ b/sysdeps/sparc/sparc32/__longjmp.S
@@ -0,0 +1,59 @@
+/* Copyright (C) 1991, 1993, 1996, 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include <sysdep.h>
+
+#define _ASM 1
+#include <bits/setjmp.h>
+#define ENV(reg) [%g1 + (reg * 4)]
+
+ENTRY (__longjmp)
+ /* Store our arguments in global registers so we can still
+ use them while unwinding frames and their register windows. */
+ mov %o0, %g1 /* ENV in %g1 */
+ orcc %o1, %g0, %g6 /* VAL in %g6 */
+ be,a 0f /* Branch if zero; else skip delay slot. */
+ mov 1, %g6 /* Delay slot only hit if zero: VAL = 1. */
+0:
+
+ /* Cache target FP in register %g7. */
+ ld ENV (JB_FP), %g7
+
+ /* Now we will loop, unwinding the register windows up the stack
+ until the restored %fp value matches the target value in %g7. */
+
+loop: cmp %fp, %g7 /* Have we reached the target frame? */
+ bl,a loop /* Loop while current fp is below target. */
+ restore /* Unwind register window in delay slot. */
+ be,a found /* Better have hit it exactly. */
+ ld ENV (JB_SP), %o0 /* Delay slot: extract target SP. */
+
+bogus: /* Get here only if the jmp_buf or stack is clobbered. */
+ call C_SYMBOL_NAME (abort)
+ nop
+ unimp 0
+
+found: /* We have unwound register windows so %fp matches the target. */
+ cmp %o0, %sp /* Check jmp_buf SP vs register window. */
+ bge,a sp_ok /* Saved must not be deeper than register. */
+ mov %o0, %sp /* OK, install new SP. */
+ b,a bogus /* Bogus, we lose. */
+
+sp_ok: ld ENV (JB_PC), %o0 /* Extract target return PC. */
+ jmp %o0 + 8 /* Return there. */
+ mov %g6, %o0 /* Delay slot: set return value. */
diff --git a/sysdeps/sparc/sparc32/add_n.S b/sysdeps/sparc/sparc32/add_n.S
new file mode 100644
index 0000000000..9852c256aa
--- /dev/null
+++ b/sysdeps/sparc/sparc32/add_n.S
@@ -0,0 +1,226 @@
+! SPARC __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+! sum in a third limb vector.
+
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+#define res_ptr %o0
+#define s1_ptr %o1
+#define s2_ptr %o2
+#define size %o3
+
+#include "sysdep.h"
+
+ .text
+ .align 4
+ .global C_SYMBOL_NAME(__mpn_add_n)
+C_SYMBOL_NAME(__mpn_add_n):
+ xor s2_ptr,res_ptr,%g1
+ andcc %g1,4,%g0
+ bne L1 ! branch if alignment differs
+ nop
+! ** V1a **
+L0: andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0
+ be L_v1 ! if no, branch
+ nop
+/* Add least significant limb separately to align res_ptr and s2_ptr */
+ ld [s1_ptr],%g4
+ add s1_ptr,4,s1_ptr
+ ld [s2_ptr],%g2
+ add s2_ptr,4,s2_ptr
+ add size,-1,size
+ addcc %g4,%g2,%o4
+ st %o4,[res_ptr]
+ add res_ptr,4,res_ptr
+L_v1: addx %g0,%g0,%o4 ! save cy in register
+ cmp size,2 ! if size < 2 ...
+ bl Lend2 ! ... branch to tail code
+ subcc %g0,%o4,%g0 ! restore cy
+
+ ld [s1_ptr+0],%g4
+ addcc size,-10,size
+ ld [s1_ptr+4],%g1
+ ldd [s2_ptr+0],%g2
+ blt Lfin1
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+Loop1: addxcc %g4,%g2,%o4
+ ld [s1_ptr+8],%g4
+ addxcc %g1,%g3,%o5
+ ld [s1_ptr+12],%g1
+ ldd [s2_ptr+8],%g2
+ std %o4,[res_ptr+0]
+ addxcc %g4,%g2,%o4
+ ld [s1_ptr+16],%g4
+ addxcc %g1,%g3,%o5
+ ld [s1_ptr+20],%g1
+ ldd [s2_ptr+16],%g2
+ std %o4,[res_ptr+8]
+ addxcc %g4,%g2,%o4
+ ld [s1_ptr+24],%g4
+ addxcc %g1,%g3,%o5
+ ld [s1_ptr+28],%g1
+ ldd [s2_ptr+24],%g2
+ std %o4,[res_ptr+16]
+ addxcc %g4,%g2,%o4
+ ld [s1_ptr+32],%g4
+ addxcc %g1,%g3,%o5
+ ld [s1_ptr+36],%g1
+ ldd [s2_ptr+32],%g2
+ std %o4,[res_ptr+24]
+ addx %g0,%g0,%o4 ! save cy in register
+ addcc size,-8,size
+ add s1_ptr,32,s1_ptr
+ add s2_ptr,32,s2_ptr
+ add res_ptr,32,res_ptr
+ bge Loop1
+ subcc %g0,%o4,%g0 ! restore cy
+
+Lfin1: addcc size,8-2,size
+ blt Lend1
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add blocks of 2 limbs until less than 2 limbs remain */
+Loope1: addxcc %g4,%g2,%o4
+ ld [s1_ptr+8],%g4
+ addxcc %g1,%g3,%o5
+ ld [s1_ptr+12],%g1
+ ldd [s2_ptr+8],%g2
+ std %o4,[res_ptr+0]
+ addx %g0,%g0,%o4 ! save cy in register
+ addcc size,-2,size
+ add s1_ptr,8,s1_ptr
+ add s2_ptr,8,s2_ptr
+ add res_ptr,8,res_ptr
+ bge Loope1
+ subcc %g0,%o4,%g0 ! restore cy
+Lend1: addxcc %g4,%g2,%o4
+ addxcc %g1,%g3,%o5
+ std %o4,[res_ptr+0]
+ addx %g0,%g0,%o4 ! save cy in register
+
+ andcc size,1,%g0
+ be Lret1
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add last limb */
+ ld [s1_ptr+8],%g4
+ ld [s2_ptr+8],%g2
+ addxcc %g4,%g2,%o4
+ st %o4,[res_ptr+8]
+
+Lret1: retl
+ addx %g0,%g0,%o0 ! return carry-out from most sign. limb
+
+L1: xor s1_ptr,res_ptr,%g1
+ andcc %g1,4,%g0
+ bne L2
+ nop
+! ** V1b **
+ mov s2_ptr,%g1
+ mov s1_ptr,s2_ptr
+ b L0
+ mov %g1,s1_ptr
+
+! ** V2 **
+/* If we come here, the alignment of s1_ptr and res_ptr as well as the
+ alignment of s2_ptr and res_ptr differ. Since there are only two ways
+ things can be aligned (that we care about) we now know that the alignment
+ of s1_ptr and s2_ptr are the same. */
+
+L2: cmp size,1
+ be Ljone
+ nop
+ andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0
+ be L_v2 ! if no, branch
+ nop
+/* Add least significant limb separately to align s1_ptr and s2_ptr */
+ ld [s1_ptr],%g4
+ add s1_ptr,4,s1_ptr
+ ld [s2_ptr],%g2
+ add s2_ptr,4,s2_ptr
+ add size,-1,size
+ addcc %g4,%g2,%o4
+ st %o4,[res_ptr]
+ add res_ptr,4,res_ptr
+
+L_v2: addx %g0,%g0,%o4 ! save cy in register
+ addcc size,-8,size
+ blt Lfin2
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+Loop2: ldd [s1_ptr+0],%g2
+ ldd [s2_ptr+0],%o4
+ addxcc %g2,%o4,%g2
+ st %g2,[res_ptr+0]
+ addxcc %g3,%o5,%g3
+ st %g3,[res_ptr+4]
+ ldd [s1_ptr+8],%g2
+ ldd [s2_ptr+8],%o4
+ addxcc %g2,%o4,%g2
+ st %g2,[res_ptr+8]
+ addxcc %g3,%o5,%g3
+ st %g3,[res_ptr+12]
+ ldd [s1_ptr+16],%g2
+ ldd [s2_ptr+16],%o4
+ addxcc %g2,%o4,%g2
+ st %g2,[res_ptr+16]
+ addxcc %g3,%o5,%g3
+ st %g3,[res_ptr+20]
+ ldd [s1_ptr+24],%g2
+ ldd [s2_ptr+24],%o4
+ addxcc %g2,%o4,%g2
+ st %g2,[res_ptr+24]
+ addxcc %g3,%o5,%g3
+ st %g3,[res_ptr+28]
+ addx %g0,%g0,%o4 ! save cy in register
+ addcc size,-8,size
+ add s1_ptr,32,s1_ptr
+ add s2_ptr,32,s2_ptr
+ add res_ptr,32,res_ptr
+ bge Loop2
+ subcc %g0,%o4,%g0 ! restore cy
+
+Lfin2: addcc size,8-2,size
+ blt Lend2
+ subcc %g0,%o4,%g0 ! restore cy
+Loope2: ldd [s1_ptr+0],%g2
+ ldd [s2_ptr+0],%o4
+ addxcc %g2,%o4,%g2
+ st %g2,[res_ptr+0]
+ addxcc %g3,%o5,%g3
+ st %g3,[res_ptr+4]
+ addx %g0,%g0,%o4 ! save cy in register
+ addcc size,-2,size
+ add s1_ptr,8,s1_ptr
+ add s2_ptr,8,s2_ptr
+ add res_ptr,8,res_ptr
+ bge Loope2
+ subcc %g0,%o4,%g0 ! restore cy
+Lend2: andcc size,1,%g0
+ be Lret2
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add last limb */
+Ljone: ld [s1_ptr],%g4
+ ld [s2_ptr],%g2
+ addxcc %g4,%g2,%o4
+ st %o4,[res_ptr]
+
+Lret2: retl
+ addx %g0,%g0,%o0 ! return carry-out from most sign. limb
diff --git a/sysdeps/sparc/sparc32/addmul_1.S b/sysdeps/sparc/sparc32/addmul_1.S
new file mode 100644
index 0000000000..375d25db6b
--- /dev/null
+++ b/sysdeps/sparc/sparc32/addmul_1.S
@@ -0,0 +1,147 @@
+! SPARC __mpn_addmul_1 -- Multiply a limb vector with a limb and add
+! the result to a second limb vector.
+
+! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr o0
+! s1_ptr o1
+! size o2
+! s2_limb o3
+
+#include "sysdep.h"
+
+.text
+ .align 4
+ .global C_SYMBOL_NAME(__mpn_addmul_1)
+C_SYMBOL_NAME(__mpn_addmul_1):
+ ! Make S1_PTR and RES_PTR point at the end of their blocks
+ ! and put (- 4 x SIZE) in index/loop counter.
+ sll %o2,2,%o2
+ add %o0,%o2,%o4 ! RES_PTR in o4 since o0 is retval
+ add %o1,%o2,%o1
+ sub %g0,%o2,%o2
+
+ cmp %o3,0xfff
+ bgu Large
+ nop
+
+ ld [%o1+%o2],%o5
+ mov 0,%o0
+ b L0
+ add %o4,-4,%o4
+Loop0:
+ addcc %o5,%g1,%g1
+ ld [%o1+%o2],%o5
+ addx %o0,%g0,%o0
+ st %g1,[%o4+%o2]
+L0: wr %g0,%o3,%y
+ sra %o5,31,%g2
+ and %o3,%g2,%g2
+ andcc %g1,0,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,0,%g1
+ sra %g1,20,%g4
+ sll %g1,12,%g1
+ rd %y,%g3
+ srl %g3,20,%g3
+ or %g1,%g3,%g1
+
+ addcc %g1,%o0,%g1
+ addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb
+ addcc %o2,4,%o2 ! loop counter
+ bne Loop0
+ ld [%o4+%o2],%o5
+
+ addcc %o5,%g1,%g1
+ addx %o0,%g0,%o0
+ retl
+ st %g1,[%o4+%o2]
+
+
+Large: ld [%o1+%o2],%o5
+ mov 0,%o0
+ sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0
+ b L1
+ add %o4,-4,%o4
+Loop:
+ addcc %o5,%g3,%g3
+ ld [%o1+%o2],%o5
+ addx %o0,%g0,%o0
+ st %g3,[%o4+%o2]
+L1: wr %g0,%o5,%y
+ and %o5,%g4,%g2
+ andcc %g0,%g0,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%g0,%g1
+ rd %y,%g3
+ addcc %g3,%o0,%g3
+ addx %g2,%g1,%o0
+ addcc %o2,4,%o2
+ bne Loop
+ ld [%o4+%o2],%o5
+
+ addcc %o5,%g3,%g3
+ addx %o0,%g0,%o0
+ retl
+ st %g3,[%o4+%o2]
diff --git a/sysdeps/sparc/sparc32/alloca.S b/sysdeps/sparc/sparc32/alloca.S
new file mode 100644
index 0000000000..dcbd171163
--- /dev/null
+++ b/sysdeps/sparc/sparc32/alloca.S
@@ -0,0 +1,32 @@
+/* Copyright (C) 1994, 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include "sysdep.h"
+
+/* Code produced by Sun's C compiler calls this function with two extra
+ arguments which it makes relocatable symbols but seem always to be
+ the constant 96; I have no idea what they are for. */
+
+#ifndef NO_UNDERSCORES
+#define __builtin_alloca ___builtin_alloca
+#endif
+
+ENTRY (__builtin_alloca)
+ sub %sp, %o0, %sp /* Push some stack space. */
+ retl /* Return; the returned buffer leaves 96 */
+ add %sp, 96, %o0 /* bytes of register save area at the top. */
diff --git a/sysdeps/sparc/sparc32/bits/endian.h b/sysdeps/sparc/sparc32/bits/endian.h
new file mode 100644
index 0000000000..f1a75c0652
--- /dev/null
+++ b/sysdeps/sparc/sparc32/bits/endian.h
@@ -0,0 +1,3 @@
+/* SPARC is big-endian. */
+
+#define __BYTE_ORDER __BIG_ENDIAN
diff --git a/sysdeps/sparc/sparc32/bits/setjmp.h b/sysdeps/sparc/sparc32/bits/setjmp.h
new file mode 100644
index 0000000000..43bae1a496
--- /dev/null
+++ b/sysdeps/sparc/sparc32/bits/setjmp.h
@@ -0,0 +1,16 @@
+/* Define the machine-dependent type `jmp_buf'. SPARC version. */
+
+#if defined (__USE_MISC) || defined (_ASM)
+#define JB_SP 0
+#define JB_FP 1
+#define JB_PC 2
+#endif
+
+#ifndef _ASM
+typedef int __jmp_buf[3];
+#endif
+
+/* Test if longjmp to JMPBUF would unwind the frame
+ containing a local variable at ADDRESS. */
+#define _JMPBUF_UNWINDS(jmpbuf, address) \
+ ((int) (address) < (jmpbuf)[JB_SP])
diff --git a/sysdeps/sparc/sparc32/bsd-_setjmp.S b/sysdeps/sparc/sparc32/bsd-_setjmp.S
new file mode 100644
index 0000000000..bf49765f4d
--- /dev/null
+++ b/sysdeps/sparc/sparc32/bsd-_setjmp.S
@@ -0,0 +1,40 @@
+/* BSD `_setjmp' entry point to `sigsetjmp (..., 0)'. Sparc version.
+ Copyright (C) 1994, 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include <sysdep.h>
+
+ENTRY (_setjmp)
+
+#ifdef PIC
+ save %sp, -64, %sp
+1: call 2f
+ sethi %hi(_GLOBAL_OFFSET_TABLE_-(1b-.)), %g1
+2: or %l7, %lo(_GLOBAL_OFFSET_TABLE_-(1b-.)), %g1
+ add %g1, %o7, %g1
+ sethi %hi(C_SYMBOL_NAME(__sigsetjmp)), %g2
+ restore
+ or %g2, %lo(C_SYMBOL_NAME(__sigsetjmp)), %g2
+ ld [%g1+%g2], %g1
+#else
+ sethi %hi(C_SYMBOL_NAME(__sigsetjmp)), %g1
+ or %g1, %lo(C_SYMBOL_NAME(__sigsetjmp)), %g1
+#endif
+
+ jmp %g1
+ mov %g0, %o1 /* Pass second argument of zero. */
diff --git a/sysdeps/sparc/sparc32/bsd-setjmp.S b/sysdeps/sparc/sparc32/bsd-setjmp.S
new file mode 100644
index 0000000000..16dc260765
--- /dev/null
+++ b/sysdeps/sparc/sparc32/bsd-setjmp.S
@@ -0,0 +1,40 @@
+/* BSD `setjmp' entry point to `sigsetjmp (..., 1)'. Sparc version.
+ Copyright (C) 1994, 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include <sysdep.h>
+
+ENTRY (setjmp)
+
+#ifdef PIC
+ save %sp, -64, %sp
+1: call 2f
+ sethi %hi(_GLOBAL_OFFSET_TABLE_-(1b-.)), %g1
+2: or %l7, %lo(_GLOBAL_OFFSET_TABLE_-(1b-.)), %g1
+ add %g1, %o7, %g1
+ sethi %hi(C_SYMBOL_NAME(__sigsetjmp)), %g2
+ restore
+ or %g2, %lo(C_SYMBOL_NAME(__sigsetjmp)), %g2
+ ld [%g1+%g2], %g1
+#else
+ sethi %hi(C_SYMBOL_NAME(__sigsetjmp)), %g1
+ or %g1, %lo(C_SYMBOL_NAME(__sigsetjmp)), %g1
+#endif
+
+ jmp %g1
+ mov 1, %o1 /* Pass second argument of one. */
diff --git a/sysdeps/sparc/sparc32/divrem.m4 b/sysdeps/sparc/sparc32/divrem.m4
new file mode 100644
index 0000000000..665abf11ae
--- /dev/null
+++ b/sysdeps/sparc/sparc32/divrem.m4
@@ -0,0 +1,238 @@
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ * NAME name of function to generate
+ * OP OP=div => %o0 / %o1; OP=rem => %o0 % %o1
+ * S S=true => signed; S=false => unsigned
+ *
+ * Algorithm parameters:
+ * N how many bits per iteration we try to get (4)
+ * WORDSIZE total number of bits (32)
+ *
+ * Derived constants:
+ * TOPBITS number of bits in the top `decade' of a number
+ *
+ * Important variables:
+ * Q the partial quotient under development (initially 0)
+ * R the remainder so far, initially the dividend
+ * ITER number of main division loop iterations required;
+ * equal to ceil(log2(quotient) / N). Note that this
+ * is the log base (2^N) of the quotient.
+ * V the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ * Current estimate for non-large dividend is
+ * ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ * A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ * different path, as the upper bits of the quotient must be developed
+ * one bit at a time.
+ */
+
+define(N, `4')dnl
+define(WORDSIZE, `32')dnl
+define(TOPBITS, eval(WORDSIZE - N*((WORDSIZE-1)/N)))dnl
+dnl
+define(dividend, `%o0')dnl
+define(divisor, `%o1')dnl
+define(Q, `%o2')dnl
+define(R, `%o3')dnl
+define(ITER, `%o4')dnl
+define(V, `%o5')dnl
+dnl
+dnl m4 reminder: ifelse(a,b,c,d) => if a is b, then c, else d
+define(T, `%g1')dnl
+define(SC, `%g7')dnl
+ifelse(S, `true', `define(SIGN, `%g6')')dnl
+
+dnl
+dnl This is the recursive definition for developing quotient digits.
+dnl
+dnl Parameters:
+dnl $1 the current depth, 1 <= $1 <= N
+dnl $2 the current accumulation of quotient bits
+dnl N max depth
+dnl
+dnl We add a new bit to $2 and either recurse or insert the bits in
+dnl the quotient. R, Q, and V are inputs and outputs as defined above;
+dnl the condition codes are expected to reflect the input R, and are
+dnl modified to reflect the output R.
+dnl
+define(DEVELOP_QUOTIENT_BITS,
+` ! depth $1, accumulated bits $2
+ bl L.$1.eval(2**N+$2)
+ srl V,1,V
+ ! remainder is positive
+ subcc R,V,R
+ ifelse($1, N,
+ ` b 9f
+ add Q, ($2*2+1), Q
+ ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2+1)')')
+L.$1.eval(2**N+$2):
+ ! remainder is negative
+ addcc R,V,R
+ ifelse($1, N,
+ ` b 9f
+ add Q, ($2*2-1), Q
+ ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2-1)')')
+ ifelse($1, 1, `9:')')dnl
+
+#include "sysdep.h"
+#ifdef __linux__
+#include <asm/traps.h>
+#else
+#ifdef __svr4__
+#include <sys/trap.h>
+#else
+#include <machine/trap.h>
+#endif
+#endif
+
+FUNC(NAME)
+ifelse(S, `true',
+` ! compute sign of result; if neither is negative, no problem
+ orcc divisor, dividend, %g0 ! either negative?
+ bge 2f ! no, go do the divide
+ifelse(OP, `div',
+` xor divisor, dividend, SIGN ! compute sign in any case',
+` mov dividend, SIGN ! sign of remainder matches dividend')
+ tst divisor
+ bge 1f
+ tst dividend
+ ! divisor is definitely negative; dividend might also be negative
+ bge 2f ! if dividend not negative...
+ sub %g0, divisor, divisor ! in any case, make divisor nonneg
+1: ! dividend is negative, divisor is nonnegative
+ sub %g0, dividend, dividend ! make dividend nonnegative
+2:
+')
+ ! Ready to divide. Compute size of quotient; scale comparand.
+ orcc divisor, %g0, V
+ bne 1f
+ mov dividend, R
+
+ ! Divide by zero trap. If it returns, return 0 (about as
+ ! wrong as possible, but that is what SunOS does...).
+ ta ST_DIV0
+ retl
+ clr %o0
+
+1:
+ cmp R, V ! if divisor exceeds dividend, done
+ blu Lgot_result ! (and algorithm fails otherwise)
+ clr Q
+ sethi %hi(1 << (WORDSIZE - TOPBITS - 1)), T
+ cmp R, T
+ blu Lnot_really_big
+ clr ITER
+
+ ! `Here the dividend is >= 2**(31-N) or so. We must be careful here,
+ ! as our usual N-at-a-shot divide step will cause overflow and havoc.
+ ! The number of bits in the result here is N*ITER+SC, where SC <= N.
+ ! Compute ITER in an unorthodox manner: know we need to shift V into
+ ! the top decade: so do not even bother to compare to R.'
+ 1:
+ cmp V, T
+ bgeu 3f
+ mov 1, SC
+ sll V, N, V
+ b 1b
+ add ITER, 1, ITER
+
+ ! Now compute SC.
+ 2: addcc V, V, V
+ bcc Lnot_too_big
+ add SC, 1, SC
+
+ ! We get here if the divisor overflowed while shifting.
+ ! This means that R has the high-order bit set.
+ ! Restore V and subtract from R.
+ sll T, TOPBITS, T ! high order bit
+ srl V, 1, V ! rest of V
+ add V, T, V
+ b Ldo_single_div
+ sub SC, 1, SC
+
+ Lnot_too_big:
+ 3: cmp V, R
+ blu 2b
+ nop
+ be Ldo_single_div
+ nop
+ /* NB: these are commented out in the V8-Sparc manual as well */
+ /* (I do not understand this) */
+ ! V > R: went too far: back up 1 step
+ ! srl V, 1, V
+ ! dec SC
+ ! do single-bit divide steps
+ !
+ ! We have to be careful here. We know that R >= V, so we can do the
+ ! first divide step without thinking. BUT, the others are conditional,
+ ! and are only done if R >= 0. Because both R and V may have the high-
+ ! order bit set in the first step, just falling into the regular
+ ! division loop will mess up the first time around.
+ ! So we unroll slightly...
+ Ldo_single_div:
+ subcc SC, 1, SC
+ bl Lend_regular_divide
+ nop
+ sub R, V, R
+ mov 1, Q
+ b Lend_single_divloop
+ nop
+ Lsingle_divloop:
+ sll Q, 1, Q
+ bl 1f
+ srl V, 1, V
+ ! R >= 0
+ sub R, V, R
+ b 2f
+ add Q, 1, Q
+ 1: ! R < 0
+ add R, V, R
+ sub Q, 1, Q
+ 2:
+ Lend_single_divloop:
+ subcc SC, 1, SC
+ bge Lsingle_divloop
+ tst R
+ b,a Lend_regular_divide
+
+Lnot_really_big:
+1:
+ sll V, N, V
+ cmp V, R
+ bleu 1b
+ addcc ITER, 1, ITER
+ be Lgot_result
+ sub ITER, 1, ITER
+
+ tst R ! set up for initial iteration
+Ldivloop:
+ sll Q, N, Q
+ DEVELOP_QUOTIENT_BITS(1, 0)
+Lend_regular_divide:
+ subcc ITER, 1, ITER
+ bge Ldivloop
+ tst R
+ bl,a Lgot_result
+ ! non-restoring fixup here (one instruction only!)
+ifelse(OP, `div',
+` sub Q, 1, Q
+', ` add R, divisor, R
+')
+
+Lgot_result:
+ifelse(S, `true',
+` ! check to see if answer should be < 0
+ tst SIGN
+ bl,a 1f
+ ifelse(OP, `div', `sub %g0, Q, Q', `sub %g0, R, R')
+1:')
+ retl
+ ifelse(OP, `div', `mov Q, %o0', `mov R, %o0')
diff --git a/sysdeps/sparc/sparc32/dl-machine.h b/sysdeps/sparc/sparc32/dl-machine.h
new file mode 100644
index 0000000000..f7bcba0c2f
--- /dev/null
+++ b/sysdeps/sparc/sparc32/dl-machine.h
@@ -0,0 +1,363 @@
+/* Machine-dependent ELF dynamic relocation inline functions. SPARC version.
+ Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If
+ not, write to the Free Software Foundation, Inc.,
+ 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#define ELF_MACHINE_NAME "sparc"
+
+#include <assert.h>
+#include <string.h>
+#include <link.h>
+#include <sys/param.h>
+
+
+/* Some SPARC opcodes we need to use for self-modifying code. */
+#define OPCODE_NOP 0x01000000 /* nop */
+#define OPCODE_CALL 0x40000000 /* call ?; add PC-rel word address */
+#define OPCODE_SETHI_G1 0x03000000 /* sethi ?, %g1; add value>>10 */
+#define OPCODE_JMP_G1 0x81c06000 /* jmp %g1+?; add lo 10 bits of value */
+#define OPCODE_SAVE_SP64 0x9de3bfc0 /* save %sp, -64, %sp */
+
+
+/* Return nonzero iff E_MACHINE is compatible with the running host. */
+static inline int
+elf_machine_matches_host (Elf32_Half e_machine)
+{
+ return e_machine == EM_SPARC;
+}
+
+
+/* Return the link-time address of _DYNAMIC. Conveniently, this is the
+ first element of the GOT. This must be inlined in a function which
+ uses global data. */
+static inline Elf32_Addr
+elf_machine_dynamic (void)
+{
+ register Elf32_Addr *got asm ("%l7");
+ return *got;
+}
+
+/* Return the run-time load address of the shared object. */
+static inline Elf32_Addr
+elf_machine_load_address (void)
+{
+ register Elf32_Addr pc __asm("%o7"), got;
+
+ /* Utilize the fact that a local .got entry will be partially
+ initialized at startup awaiting its RELATIVE fixup. */
+
+ __asm("sethi %%hi(.Load_address),%1\n"
+ ".Load_address:\n\t"
+ "call 1f\n\t"
+ "or %1,%%lo(.Load_address),%1\n"
+ "1:\tld [%%l7+%1],%1"
+ : "=r"(pc), "=r"(got));
+
+ return pc - got;
+}
+
+ Elf32_Addr addr;
+
+ asm (
+ "add %%fp,0x44,%%o2\n\t" /* o2 = point to argc */
+ "ld [%%o2 - 4],%%o0\n\t" /* o0 = load argc */
+ "sll %%o0, 2, %%o0\n\t" /* o0 = argc * sizeof (int) */
+ "add %%o2,%%o0,%%o2\n\t" /* o2 = skip over argv */
+ "add %%o2,4,%%o2\n\t" /* skip over null after argv */
+
+ /* Now %o2 is pointing to env, skip over that as well. */
+ "1:\n\t"
+ "ld [%%o2],%%o0\n\t"
+ "cmp %%o0,0\n\t"
+ "bnz 1b\n\t"
+ "add %%o2,4,%%o2\n\t"
+
+ /* Note that above, we want to advance the NULL after envp so
+ we always add 4. */
+
+ /* Now, search for the AT_BASE property. */
+ "2:\n\t"
+ "ld [%%o2],%%o0\n\t"
+ "cmp %%o0,0\n\t"
+ "be,a 3f\n\t"
+ "or %%g0,%%g0,%0\n\t"
+ "cmp %%o0,7\n\t" /* AT_BASE = 7 */
+ "be,a 3f\n\t"
+ "ld [%%o2+4],%0\n\t"
+ "b 2b\n\t"
+ "add %%o2,8,%%o2\n\t"
+ /* At this point %0 has the load address for the interpreter */
+ "3:\n\t"
+ : "=r" (addr)
+ : /* no inputs */
+ : "o0", "o2");
+ return addr;
+}
+
+#ifdef RESOLVE
+/* Perform the relocation specified by RELOC and SYM (which is fully resolved).
+ MAP is the object containing the reloc. */
+
+static inline void
+elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
+ const Elf32_Sym *sym, const struct r_found_version *version,
+ Elf32_Addr *const reloc_addr)
+{
+ Elf32_Addr loadbase;
+
+ if (ELF32_R_TYPE (reloc->r_info) == R_SPARC_RELATIVE)
+ {
+#ifndef RTLD_BOOTSTRAP
+ if (map != &_dl_rtld_map) /* Already done in rtld itself. */
+#endif
+ *reloc_addr += map->l_addr + reloc->r_addend;
+ }
+ else
+ {
+ const Elf32_Sym *const refsym = sym;
+ Elf32_Addr value;
+ if (sym->st_shndx != SHN_UNDEF &&
+ ELF32_ST_BIND (sym->st_info) == STB_LOCAL)
+ value = map->l_addr;
+ else
+ {
+ value = RESOLVE (&sym, version, ELF32_R_TYPE (reloc->r_info));
+ if (sym)
+ value += sym->st_value;
+ }
+ value += reloc->r_addend; /* Assume copy relocs have zero addend. */
+
+ switch (ELF32_R_TYPE (reloc->r_info))
+ {
+ case R_SPARC_COPY:
+ if (sym->st_size > refsym->st_size
+ || (_dl_verbose && sym->st_size < refsym->st_size))
+ {
+ extern char **_dl_argv;
+ const char *strtab;
+
+ strtab = ((void *) map->l_addr
+ + map->l_info[DT_STRTAB]->d_un.d_ptr);
+ _dl_sysdep_error (_dl_argv[0] ?: "<program name unknown>",
+ ": Symbol `", strtab + refsym->st_name,
+ "' has different size in shared object, "
+ "consider re-linking\n", NULL);
+ }
+ memcpy (reloc_addr, (void *) value, MIN (sym->st_size,
+ refsym->st_size));
+ break;
+ case R_SPARC_GLOB_DAT:
+ case R_SPARC_32:
+ *reloc_addr = value;
+ break;
+ case R_SPARC_JMP_SLOT:
+ reloc_addr[1] = OPCODE_SETHI_G1 | (value >> 10);
+ reloc_addr[2] = OPCODE_JMP_G1 | (value & 0x3ff);
+ break;
+ case R_SPARC_8:
+ *(char *) reloc_addr = value;
+ break;
+ case R_SPARC_16:
+ *(short *) reloc_addr = value;
+ break;
+ case R_SPARC_DISP8:
+ *(char *) reloc_addr = (value - (Elf32_Addr) reloc_addr);
+ break;
+ case R_SPARC_DISP16:
+ *(short *) reloc_addr = (value - (Elf32_Addr) reloc_addr);
+ break;
+ case R_SPARC_DISP32:
+ *reloc_addr = (value - (Elf32_Addr) reloc_addr);
+ break;
+ case R_SPARC_LO10:
+ *reloc_addr = (*reloc_addr & ~0x3ff) | (value & 0x3ff);
+ break;
+ case R_SPARC_WDISP30:
+ *reloc_addr = ((*reloc_addr & 0xc0000000)
+ | ((value - (unsigned int) reloc_addr) >> 2));
+ break;
+ case R_SPARC_HI22:
+ *reloc_addr = (*reloc_addr & 0xffc00000) | (value >> 10);
+ break;
+ case R_SPARC_NONE: /* Alright, Wilbur. */
+ break;
+ default:
+ assert (! "unexpected dynamic reloc type");
+ break;
+ }
+ }
+}
+
+static inline void
+elf_machine_lazy_rel (struct link_map *map, const Elf32_Rela *reloc)
+{
+ switch (ELF32_R_TYPE (reloc->r_info))
+ {
+ case R_SPARC_NONE:
+ break;
+ case R_SPARC_JMP_SLOT:
+ break;
+ default:
+ assert (! "unexpected PLT reloc type");
+ break;
+ }
+}
+
+#endif /* RESOLVE */
+
+/* Nonzero iff TYPE should not be allowed to resolve to one of
+ the main executable's symbols, as for a COPY reloc. */
+#define elf_machine_lookup_noexec_p(type) ((type) == R_SPARC_COPY)
+
+/* Nonzero iff TYPE describes relocation of a PLT entry, so
+ PLT entries should not be allowed to define the value. */
+#define elf_machine_lookup_noplt_p(type) ((type) == R_SPARC_JMP_SLOT)
+
+/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */
+#define ELF_MACHINE_RELOC_NOPLT R_SPARC_JMP_SLOT
+
+/* The SPARC never uses Elf32_Rel relocations. */
+#define ELF_MACHINE_NO_REL 1
+
+/* The SPARC overlaps DT_RELA and DT_PLTREL. */
+#define ELF_MACHINE_PLTREL_OVERLAP 1
+
+/* Set up the loaded object described by L so its unrelocated PLT
+ entries will jump to the on-demand fixup code in dl-runtime.c. */
+
+static inline int
+elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
+{
+ Elf32_Addr *plt;
+ extern void _dl_runtime_resolve (Elf32_Word);
+
+ if (l->l_info[DT_JMPREL] && lazy)
+ {
+ /* The entries for functions in the PLT have not yet been filled in.
+ Their initial contents will arrange when called to set the high 22
+ bits of %g1 with an offset into the .rela.plt section and jump to
+ the beginning of the PLT. */
+ plt = (Elf32_Addr *) (l->l_addr + l->l_info[DT_PLTGOT]->d_un.d_ptr);
+
+ /* The beginning of the PLT does:
+
+ save %sp, -64, %sp
+ pltpc: call _dl_runtime_resolve
+ nop
+ .word MAP
+
+ This saves the register window containing the arguments, and the
+ PC value (pltpc) implicitly saved in %o7 by the call points near the
+ location where we store the link_map pointer for this object. */
+
+ plt[0] = OPCODE_SAVE_SP64; /* save %sp, -64, %sp */
+ /* Construct PC-relative word address. */
+ plt[1] = OPCODE_CALL | (((Elf32_Addr) &_dl_runtime_resolve -
+ (Elf32_Addr) &plt[1]) >> 2);
+ plt[2] = OPCODE_NOP; /* Fill call delay slot. */
+ plt[3] = (Elf32_Addr *) l;
+ }
+
+ return lazy;
+}
+
+/* This code is used in dl-runtime.c to call the `fixup' function
+ and then redirect to the address it returns. */
+#define ELF_MACHINE_RUNTIME_TRAMPOLINE asm ("\
+# Trampoline for _dl_runtime_resolver
+ .globl _dl_runtime_resolve
+ .type _dl_runtime_resolve, @function
+_dl_runtime_resolve:
+ t 1
+ #call %g0
+ # Pass two args to fixup: the PLT address computed from the PC saved
+ # in the PLT's call insn, and the reloc offset passed in %g1.
+ #ld [%o7 + 8], %o1 | Second arg, loaded from PLTPC[2].
+ #call fixup
+ #shrl %g1, 22, %o0 | First arg, set in delay slot of call.
+ # Jump to the real function.
+ #jmpl %o0, %g0
+ # In the delay slot of that jump, restore the register window
+ # saved by the first insn of the PLT.
+ #restore
+ .size _dl_runtime_resolve, . - _dl_runtime_resolve
+");
+
+/* The PLT uses Elf32_Rela relocs. */
+#define elf_machine_relplt elf_machine_rela
+
+
+/* Mask identifying addresses reserved for the user program,
+ where the dynamic linker should not map anything. */
+#define ELF_MACHINE_USER_ADDRESS_MASK ???
+
+/* Initial entry point code for the dynamic linker.
+ The C function `_dl_start' is the real entry point;
+ its return value is the user program's entry point. */
+
+#define RTLD_START __asm__ ( \
+".text\n\
+ .globl _start\n\
+ .type _start,@function\n\
+_start:\n\
+ /* Pass pointer to argument block to _dl_start. */\n\
+ add %sp,64,%o0\n\
+ call _dl_start\n\
+ nop\n\
+ \n\
+ mov %o0,%l0\n\
+ \n\
+2:\n\
+ call 1f\n\
+ nop\n\
+1:\n\
+ sethi %hi(_GLOBAL_OFFSET_TABLE_-(2b-.)),%l2\n\
+ sethi %hi(_dl_default_scope),%l3\n\
+ or %l2,%lo(_GLOBAL_OFFSET_TABLE_-(2b-.)),%l2\n\
+ or %l3,%lo(_dl_default_scope),%l3\n\
+ add %o7,%l2,%l1\n\
+ # %l1 has the GOT. %l3 has _dl_default_scope GOT offset\n\
+ ld [%l1+%l3],%l4\n\
+ # %l4 has pointer to _dl_default_scope. Now, load _dl_default_scope [2]\n\
+ ld [%l4+8],%l4\n\
+ # %l4 has _dl_default_scope [2]\n\
+ # call _dl_init_next until it returns 0, pass _dl_default_scope [2]\n\
+3:\n\
+ call _dl_init_next\n\
+ mov %l4,%o0\n\
+ cmp %o0,%g0\n\
+ bz,a 4f\n\
+ nop\n\
+ call %o0\n\
+ /* Pass pointer to argument block to this init function */\n\
+ add %sp,64,%o0\n\
+ b,a 3b\n\
+4:\n\
+ # Clear the _dl_starting_up variable and pass _dl_fini in %g1 as per ELF ABI.\n\
+ sethi %hi(_dl_starting_up),%l4\n\
+ sethi %hi(_dl_fini),%l3\n\
+ or %l4,%lo(_dl_starting_up),%l4\n\
+ or %l3,%lo(_dl_fini),%l3\n\
+ # clear _dl_starting_up\n\
+ ld [%l1+%l4],%l5\n\
+ st %g0,[%l5]\n\
+ # load out fini function for atexit in %g1\n\
+ ld [%l3+%l1],%g1\n\
+ # jump to the user program entry point.\n\
+ jmpl %l0,%g0\n\
+ nop\n\
+");
diff --git a/sysdeps/sparc/sparc32/dotmul.S b/sysdeps/sparc/sparc32/dotmul.S
new file mode 100644
index 0000000000..7ce695cb7a
--- /dev/null
+++ b/sysdeps/sparc/sparc32/dotmul.S
@@ -0,0 +1,123 @@
+/*
+ * Signed multiply, from Appendix E of the Sparc Version 8
+ * Architecture Manual.
+ */
+
+/*
+ * Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the upper 32 bits of
+ * the 64-bit product).
+ *
+ * This code optimizes short (less than 13-bit) multiplies.
+ */
+
+#include "sysdep.h"
+ENTRY(.mul)
+ mov %o0, %y ! multiplier -> Y
+ andncc %o0, 0xfff, %g0 ! test bits 12..31
+ be Lmul_shortway ! if zero, can do it the short way
+ andcc %g0, %g0, %o4 ! zero the partial product and clear N and V
+
+ /*
+ * Long multiply. 32 steps, followed by a final shift step.
+ */
+ mulscc %o4, %o1, %o4 ! 1
+ mulscc %o4, %o1, %o4 ! 2
+ mulscc %o4, %o1, %o4 ! 3
+ mulscc %o4, %o1, %o4 ! 4
+ mulscc %o4, %o1, %o4 ! 5
+ mulscc %o4, %o1, %o4 ! 6
+ mulscc %o4, %o1, %o4 ! 7
+ mulscc %o4, %o1, %o4 ! 8
+ mulscc %o4, %o1, %o4 ! 9
+ mulscc %o4, %o1, %o4 ! 10
+ mulscc %o4, %o1, %o4 ! 11
+ mulscc %o4, %o1, %o4 ! 12
+ mulscc %o4, %o1, %o4 ! 13
+ mulscc %o4, %o1, %o4 ! 14
+ mulscc %o4, %o1, %o4 ! 15
+ mulscc %o4, %o1, %o4 ! 16
+ mulscc %o4, %o1, %o4 ! 17
+ mulscc %o4, %o1, %o4 ! 18
+ mulscc %o4, %o1, %o4 ! 19
+ mulscc %o4, %o1, %o4 ! 20
+ mulscc %o4, %o1, %o4 ! 21
+ mulscc %o4, %o1, %o4 ! 22
+ mulscc %o4, %o1, %o4 ! 23
+ mulscc %o4, %o1, %o4 ! 24
+ mulscc %o4, %o1, %o4 ! 25
+ mulscc %o4, %o1, %o4 ! 26
+ mulscc %o4, %o1, %o4 ! 27
+ mulscc %o4, %o1, %o4 ! 28
+ mulscc %o4, %o1, %o4 ! 29
+ mulscc %o4, %o1, %o4 ! 30
+ mulscc %o4, %o1, %o4 ! 31
+ mulscc %o4, %o1, %o4 ! 32
+ mulscc %o4, %g0, %o4 ! final shift
+
+ ! If %o0 was negative, the result is
+ ! (%o0 * %o1) + (%o1 << 32))
+ ! We fix that here.
+
+#if 0
+ tst %o0
+ bge 1f
+ rd %y, %o0
+
+ ! %o0 was indeed negative; fix upper 32 bits of result by subtracting
+ ! %o1 (i.e., return %o4 - %o1 in %o1).
+ retl
+ sub %o4, %o1, %o1
+
+1:
+ retl
+ mov %o4, %o1
+#else
+ /* Faster code adapted from tege@sics.se's code for umul.S. */
+ sra %o0, 31, %o2 ! make mask from sign bit
+ and %o1, %o2, %o2 ! %o2 = 0 or %o1, depending on sign of %o0
+ rd %y, %o0 ! get lower half of product
+ retl
+ sub %o4, %o2, %o1 ! subtract compensation
+ ! and put upper half in place
+#endif
+
+Lmul_shortway:
+ /*
+ * Short multiply. 12 steps, followed by a final shift step.
+ * The resulting bits are off by 12 and (32-12) = 20 bit positions,
+ * but there is no problem with %o0 being negative (unlike above).
+ */
+ mulscc %o4, %o1, %o4 ! 1
+ mulscc %o4, %o1, %o4 ! 2
+ mulscc %o4, %o1, %o4 ! 3
+ mulscc %o4, %o1, %o4 ! 4
+ mulscc %o4, %o1, %o4 ! 5
+ mulscc %o4, %o1, %o4 ! 6
+ mulscc %o4, %o1, %o4 ! 7
+ mulscc %o4, %o1, %o4 ! 8
+ mulscc %o4, %o1, %o4 ! 9
+ mulscc %o4, %o1, %o4 ! 10
+ mulscc %o4, %o1, %o4 ! 11
+ mulscc %o4, %o1, %o4 ! 12
+ mulscc %o4, %g0, %o4 ! final shift
+
+ /*
+ * %o4 has 20 of the bits that should be in the low part of the
+ * result; %y has the bottom 12 (as %y's top 12). That is:
+ *
+ * %o4 %y
+ * +----------------+----------------+
+ * | -12- | -20- | -12- | -20- |
+ * +------(---------+------)---------+
+ * --hi-- ----low-part----
+ *
+ * The upper 12 bits of %o4 should be sign-extended to form the
+ * high part of the product (i.e., highpart = %o4 >> 20).
+ */
+
+ rd %y, %o5
+ sll %o4, 12, %o0 ! shift middle bits left 12
+ srl %o5, 20, %o5 ! shift low bits right 20, zero fill at left
+ or %o5, %o0, %o0 ! construct low part of result
+ retl
+ sra %o4, 20, %o1 ! ... and extract high part of result
diff --git a/sysdeps/sparc/sparc32/e_sqrt.c b/sysdeps/sparc/sparc32/e_sqrt.c
new file mode 100644
index 0000000000..a98ae21ace
--- /dev/null
+++ b/sysdeps/sparc/sparc32/e_sqrt.c
@@ -0,0 +1,34 @@
+/* Copyright (C) 1991, 1992, 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include <errno.h>
+#include <math.h>
+
+#ifndef __GNUC__
+ #error This file uses GNU C extensions; you must compile with GCC.
+#endif
+
+/* Return the square root of X. */
+double
+__ieee754_sqrt (x)
+ double x;
+{
+ register double result;
+ asm ("fsqrtd %1, %0" : "=f" (result) : "f" (x));
+ return result;
+}
diff --git a/sysdeps/sparc/sparc32/elf/Makefile b/sysdeps/sparc/sparc32/elf/Makefile
new file mode 100644
index 0000000000..319fbdef10
--- /dev/null
+++ b/sysdeps/sparc/sparc32/elf/Makefile
@@ -0,0 +1,4 @@
+# Sparc/ELF specific definitions.
+
+# The assembler on SPARC needs the -fPIC flag even when it's assembler code.
+ASFLAGS-.so = -fPIC
diff --git a/sysdeps/sparc/sparc32/elf/start.c b/sysdeps/sparc/sparc32/elf/start.c
new file mode 100644
index 0000000000..f1e80195ca
--- /dev/null
+++ b/sysdeps/sparc/sparc32/elf/start.c
@@ -0,0 +1,68 @@
+/* Copyright (C) 1991, 1992, 1993, 1994, 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+extern char **__environ;
+
+extern void __libc_init_first __P ((int argc, char **argv, char **envp));
+extern int main __P ((int argc, char **argv, char **envp));
+
+register long int sp asm("%sp"), fp asm("%fp");
+
+void
+_start (void)
+{
+ /* It is important that these be declared `register'.
+ Otherwise, when compiled without optimization, they are put on the
+ stack, which loses completely after we zero the FP. */
+ register int argc;
+ register char **argv, **envp;
+ register long int g1 asm ("%g1");
+ unsigned long int copy_g1 = g1;
+
+ /* Unwind the frame built when we entered the function. */
+ asm("restore");
+ if (copy_g1)
+ atexit (copy_g1);
+
+ /* And clear the frame pointer. */
+ fp = 0;
+
+ /* The argument info starts after one register
+ window (64 bytes) past the SP. */
+ argc = ((int *) sp)[16];
+ argv = (char **) &((int *) sp)[17];
+ envp = &argv[argc + 1];
+ __environ = envp;
+
+ /* Allocate 24 bytes of stack space for the register save area. */
+ sp -= 24;
+ __libc_init_first (argc, argv, envp);
+#ifdef ELF_INIT_FINI
+ {
+ extern void _fini (void);
+ _init ();
+ atexit (_fini);
+ }
+#endif
+ exit (main (argc, argv, envp));
+}
diff --git a/sysdeps/sparc/sparc32/fpu/bits/fenv.h b/sysdeps/sparc/sparc32/fpu/bits/fenv.h
new file mode 100644
index 0000000000..0560dd99b8
--- /dev/null
+++ b/sysdeps/sparc/sparc32/fpu/bits/fenv.h
@@ -0,0 +1,76 @@
+/* Copyright (C) 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#ifndef _FENV_H
+#error "Never use <bits/fenv.h> directly; include <fenv.h> instead."
+#endif
+
+
+/* Define bits representing the exception. We use the bit positions
+ of the appropriate accrued exception bits from the FSR. */
+enum
+ {
+ FE_INVALID = (1 << 9),
+#define FE_INVALID FE_INVALID
+ FE_OVERFLOW = (1 << 8),
+#define FE_OVERFLOW FE_OVERFLOW
+ FE_UNDERFLOW = (1 << 7),
+#define FE_UNDERFLOW FE_UNDERFLOW
+ FE_DIVBYZERO = (1 << 6),
+#define FE_DIVBYZERO FE_DIVBYZERO
+ FE_INEXACT = (1 << 5)
+#define FE_INEXACT FE_INEXACT
+ };
+
+#define FE_ALL_EXCEPT \
+ (FE_INEXACT | FE_DIVBYZERO | FE_UNDERFLOW | FE_OVERFLOW | FE_INVALID)
+
+/* The Sparc FPU supports all of the four defined rounding modes. We
+ use again the bit positions in the FPU control word as the values
+ for the appropriate macros. */
+enum
+ {
+ FE_TONEAREST = (0U << 30),
+#define FE_TONEAREST FE_TONEAREST
+ FE_TOWARDSZERO = (1U << 30),
+#define FE_TOWARDSZERO FE_TOWARDSZERO
+ FE_UPWARD = (2U << 30),
+#define FE_UPWARD FE_UPWARD
+ FE_DOWNWARD = (3U << 30)
+#define FE_DOWNWARD FE_DOWNWARD
+ };
+
+#define __FE_ROUND_MASK (3U << 30)
+
+/* Type representing exception flags. */
+typedef unsigned int fexcept_t;
+
+/* Type representing floating-point environment. */
+typedef unsigned int fenv_t;
+
+/* If the default argument is used we use this value. */
+#define FE_DFL_ENV ((fenv_t *) -1)
+
+#ifdef __USE_GNU
+/* Floating-point environment where none of the exception is masked. */
+# define FE_NOMASK_ENV ((fenv_t *) -2)
+#endif
+
+/* For internal use only: access the fp state register. */
+#define __fenv_stfsr(X) __asm__("stfsr %0" : "=m"(X))
+#define __fenv_ldfsr(X) __asm__ __volatile__("ldfsr %0" : : "m"(X))
diff --git a/sysdeps/sparc/sparc32/fpu/fpu_control.h b/sysdeps/sparc/sparc32/fpu/fpu_control.h
new file mode 100644
index 0000000000..5ef3824702
--- /dev/null
+++ b/sysdeps/sparc/sparc32/fpu/fpu_control.h
@@ -0,0 +1,69 @@
+/* FPU control word bits. SPARC version.
+ Copyright (C) 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Miguel de Icaza
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#ifndef _FPU_CONTROL_H
+#define _FPU_CONTROL_H 1
+
+
+#include <features.h>
+
+/* precision control */
+#define _FPU_EXTENDED 0x00000000 /* RECOMMENDED */
+#define _FPU_DOUBLE 0x20000000
+#define _FPU_80BIT 0x30000000
+#define _FPU_SINGLE 0x10000000 /* DO NOT USE */
+
+/* rounding control / Sparc */
+#define _FPU_RC_DOWN 0xc0000000
+#define _FPU_RC_UP 0x80000000
+#define _FPU_RC_ZERO 0x40000000
+#define _FPU_RC_NEAREST 0x0 /* RECOMMENDED */
+
+#define _FPU_RESERVED 0x30300000 /* Reserved bits in cw */
+
+
+/* Now two recommended cw */
+
+/* Linux default:
+ - extended precision
+ - rounding to nearest
+ - exceptions on overflow, zero divide and NaN */
+#define _FPU_DEFAULT 0x1e
+
+/* IEEE: same as above, but exceptions */
+#define _FPU_IEEE 0x0
+
+/* Type of the control word. */
+typedef unsigned int fpu_control_t;
+
+#define _FPU_GETCW(cw) __asm__ ("st %%fsr,%0" : "=m" (*&cw))
+#define _FPU_SETCW(cw) __asm__ ("ld %0,%%fsr" : : "m" (*&cw))
+
+/* Default control word set at startup. */
+extern fpu_control_t __fpu_control;
+
+__BEGIN_DECLS
+
+/* Called at startup. It can be used to manipulate fpu control register. */
+extern void __setfpucw __P ((fpu_control_t));
+
+__END_DECLS
+
+#endif /* fpu_control.h */
diff --git a/sysdeps/sparc/sparc32/lshift.S b/sysdeps/sparc/sparc32/lshift.S
new file mode 100644
index 0000000000..4f0595f2fb
--- /dev/null
+++ b/sysdeps/sparc/sparc32/lshift.S
@@ -0,0 +1,95 @@
+! sparc __mpn_lshift --
+
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr %o0
+! src_ptr %o1
+! size %o2
+! cnt %o3
+
+#include "sysdep.h"
+
+ .text
+ .align 4
+ .global C_SYMBOL_NAME(__mpn_lshift)
+C_SYMBOL_NAME(__mpn_lshift):
+ sll %o2,2,%g1
+ add %o1,%g1,%o1 ! make %o1 point at end of src
+ ld [%o1-4],%g2 ! load first limb
+ sub %g0,%o3,%o5 ! negate shift count
+ add %o0,%g1,%o0 ! make %o0 point at end of res
+ add %o2,-1,%o2
+ andcc %o2,4-1,%g4 ! number of limbs in first loop
+ srl %g2,%o5,%g1 ! compute function result
+ be L0 ! if multiple of 4 limbs, skip first loop
+ st %g1,[%sp+80]
+
+ sub %o2,%g4,%o2 ! adjust count for main loop
+
+Loop0: ld [%o1-8],%g3
+ add %o0,-4,%o0
+ add %o1,-4,%o1
+ addcc %g4,-1,%g4
+ sll %g2,%o3,%o4
+ srl %g3,%o5,%g1
+ mov %g3,%g2
+ or %o4,%g1,%o4
+ bne Loop0
+ st %o4,[%o0+0]
+
+L0: tst %o2
+ be Lend
+ nop
+
+Loop: ld [%o1-8],%g3
+ add %o0,-16,%o0
+ addcc %o2,-4,%o2
+ sll %g2,%o3,%o4
+ srl %g3,%o5,%g1
+
+ ld [%o1-12],%g2
+ sll %g3,%o3,%g4
+ or %o4,%g1,%o4
+ st %o4,[%o0+12]
+ srl %g2,%o5,%g1
+
+ ld [%o1-16],%g3
+ sll %g2,%o3,%o4
+ or %g4,%g1,%g4
+ st %g4,[%o0+8]
+ srl %g3,%o5,%g1
+
+ ld [%o1-20],%g2
+ sll %g3,%o3,%g4
+ or %o4,%g1,%o4
+ st %o4,[%o0+4]
+ srl %g2,%o5,%g1
+
+ add %o1,-16,%o1
+ or %g4,%g1,%g4
+ bne Loop
+ st %g4,[%o0+0]
+
+Lend: sll %g2,%o3,%g2
+ st %g2,[%o0-4]
+ retl
+ ld [%sp+80],%o0
diff --git a/sysdeps/sparc/sparc32/memcopy.h b/sysdeps/sparc/sparc32/memcopy.h
new file mode 100644
index 0000000000..ff73f453da
--- /dev/null
+++ b/sysdeps/sparc/sparc32/memcopy.h
@@ -0,0 +1,21 @@
+/* Copyright (C) 1991, 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include <sysdeps/generic/memcopy.h>
+#undef reg_char
+#define reg_char int
diff --git a/sysdeps/sparc/sparc32/mul_1.S b/sysdeps/sparc/sparc32/mul_1.S
new file mode 100644
index 0000000000..142fd8ba2a
--- /dev/null
+++ b/sysdeps/sparc/sparc32/mul_1.S
@@ -0,0 +1,199 @@
+! SPARC __mpn_mul_1 -- Multiply a limb vector with a limb and store
+! the result in a second limb vector.
+
+! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr o0
+! s1_ptr o1
+! size o2
+! s2_limb o3
+
+! ADD CODE FOR SMALL MULTIPLIERS!
+!1: ld
+! st
+!
+!2: ld ,a
+! addxcc a,a,x
+! st x,
+!
+!3_unrolled:
+! ld ,a
+! addxcc a,a,x1 ! 2a + cy
+! addx %g0,%g0,x2
+! addcc a,x1,x ! 3a + c
+! st x,
+!
+! ld ,a
+! addxcc a,a,y1
+! addx %g0,%g0,y2
+! addcc a,y1,x
+! st x,
+!
+!4_unrolled:
+! ld ,a
+! srl a,2,x1 ! 4a
+! addxcc y2,x1,x
+! sll a,30,x2
+! st x,
+!
+! ld ,a
+! srl a,2,y1
+! addxcc x2,y1,y
+! sll a,30,y2
+! st x,
+!
+!5_unrolled:
+! ld ,a
+! srl a,2,x1 ! 4a
+! addxcc a,x1,x ! 5a + c
+! sll a,30,x2
+! addx %g0,x2,x2
+! st x,
+!
+! ld ,a
+! srl a,2,y1
+! addxcc a,y1,x
+! sll a,30,y2
+! addx %g0,y2,y2
+! st x,
+!
+!8_unrolled:
+! ld ,a
+! srl a,3,x1 ! 8a
+! addxcc y2,x1,x
+! sll a,29,x2
+! st x,
+!
+! ld ,a
+! srl a,3,y1
+! addxcc x2,y1,y
+! sll a,29,y2
+! st x,
+
+#include "sysdep.h"
+
+.text
+ .align 4
+ .global C_SYMBOL_NAME(__mpn_mul_1)
+C_SYMBOL_NAME(__mpn_mul_1):
+ ! Make S1_PTR and RES_PTR point at the end of their blocks
+ ! and put (- 4 x SIZE) in index/loop counter.
+ sll %o2,2,%o2
+ add %o0,%o2,%o4 ! RES_PTR in o4 since o0 is retval
+ add %o1,%o2,%o1
+ sub %g0,%o2,%o2
+
+ cmp %o3,0xfff
+ bgu Large
+ nop
+
+ ld [%o1+%o2],%o5
+ mov 0,%o0
+ b L0
+ add %o4,-4,%o4
+Loop0:
+ st %g1,[%o4+%o2]
+L0: wr %g0,%o3,%y
+ sra %o5,31,%g2
+ and %o3,%g2,%g2
+ andcc %g1,0,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,0,%g1
+ sra %g1,20,%g4
+ sll %g1,12,%g1
+ rd %y,%g3
+ srl %g3,20,%g3
+ or %g1,%g3,%g1
+
+ addcc %g1,%o0,%g1
+ addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb
+ addcc %o2,4,%o2 ! loop counter
+ bne,a Loop0
+ ld [%o1+%o2],%o5
+
+ retl
+ st %g1,[%o4+%o2]
+
+
+Large: ld [%o1+%o2],%o5
+ mov 0,%o0
+ sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0
+ b L1
+ add %o4,-4,%o4
+Loop:
+ st %g3,[%o4+%o2]
+L1: wr %g0,%o5,%y
+ and %o5,%g4,%g2 ! g2 = S1_LIMB iff S2_LIMB < 0, else 0
+ andcc %g0,%g0,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%g0,%g1
+ rd %y,%g3
+ addcc %g3,%o0,%g3
+ addx %g2,%g1,%o0 ! add sign-compensation and cy to hi limb
+ addcc %o2,4,%o2 ! loop counter
+ bne,a Loop
+ ld [%o1+%o2],%o5
+
+ retl
+ st %g3,[%o4+%o2]
diff --git a/sysdeps/sparc/sparc32/rem.S b/sysdeps/sparc/sparc32/rem.S
new file mode 100644
index 0000000000..d50f1af630
--- /dev/null
+++ b/sysdeps/sparc/sparc32/rem.S
@@ -0,0 +1,369 @@
+ /* This file is generated from divrem.m4; DO NOT EDIT! */
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ * .rem name of function to generate
+ * rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1
+ * true true=true => signed; true=false => unsigned
+ *
+ * Algorithm parameters:
+ * N how many bits per iteration we try to get (4)
+ * WORDSIZE total number of bits (32)
+ *
+ * Derived constants:
+ * TOPBITS number of bits in the top decade of a number
+ *
+ * Important variables:
+ * Q the partial quotient under development (initially 0)
+ * R the remainder so far, initially the dividend
+ * ITER number of main division loop iterations required;
+ * equal to ceil(log2(quotient) / N). Note that this
+ * is the log base (2^N) of the quotient.
+ * V the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ * Current estimate for non-large dividend is
+ * ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ * A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ * different path, as the upper bits of the quotient must be developed
+ * one bit at a time.
+ */
+
+
+
+#include "sysdep.h"
+#ifdef __linux__
+#include <asm/traps.h>
+#else
+#ifdef __svr4__
+#include <sys/trap.h>
+#else
+#include <machine/trap.h>
+#endif
+#endif
+
+ENTRY(.rem)
+ ! compute sign of result; if neither is negative, no problem
+ orcc %o1, %o0, %g0 ! either negative?
+ bge 2f ! no, go do the divide
+ mov %o0, %g6 ! sign of remainder matches %o0
+ tst %o1
+ bge 1f
+ tst %o0
+ ! %o1 is definitely negative; %o0 might also be negative
+ bge 2f ! if %o0 not negative...
+ sub %g0, %o1, %o1 ! in any case, make %o1 nonneg
+1: ! %o0 is negative, %o1 is nonnegative
+ sub %g0, %o0, %o0 ! make %o0 nonnegative
+2:
+
+ ! Ready to divide. Compute size of quotient; scale comparand.
+ orcc %o1, %g0, %o5
+ bne 1f
+ mov %o0, %o3
+
+ ! Divide by zero trap. If it returns, return 0 (about as
+ ! wrong as possible, but that is what SunOS does...).
+ ta ST_DIV0
+ retl
+ clr %o0
+
+1:
+ cmp %o3, %o5 ! if %o1 exceeds %o0, done
+ blu Lgot_result ! (and algorithm fails otherwise)
+ clr %o2
+ sethi %hi(1 << (32 - 4 - 1)), %g1
+ cmp %o3, %g1
+ blu Lnot_really_big
+ clr %o4
+
+ ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
+ ! as our usual N-at-a-shot divide step will cause overflow and havoc.
+ ! The number of bits in the result here is N*ITER+SC, where SC <= N.
+ ! Compute ITER in an unorthodox manner: know we need to shift V into
+ ! the top decade: so do not even bother to compare to R.
+ 1:
+ cmp %o5, %g1
+ bgeu 3f
+ mov 1, %g7
+ sll %o5, 4, %o5
+ b 1b
+ add %o4, 1, %o4
+
+ ! Now compute %g7.
+ 2: addcc %o5, %o5, %o5
+ bcc Lnot_too_big
+ add %g7, 1, %g7
+
+ ! We get here if the %o1 overflowed while shifting.
+ ! This means that %o3 has the high-order bit set.
+ ! Restore %o5 and subtract from %o3.
+ sll %g1, 4, %g1 ! high order bit
+ srl %o5, 1, %o5 ! rest of %o5
+ add %o5, %g1, %o5
+ b Ldo_single_div
+ sub %g7, 1, %g7
+
+ Lnot_too_big:
+ 3: cmp %o5, %o3
+ blu 2b
+ nop
+ be Ldo_single_div
+ nop
+ /* NB: these are commented out in the V8-Sparc manual as well */
+ /* (I do not understand this) */
+ ! %o5 > %o3: went too far: back up 1 step
+ ! srl %o5, 1, %o5
+ ! dec %g7
+ ! do single-bit divide steps
+ !
+ ! We have to be careful here. We know that %o3 >= %o5, so we can do the
+ ! first divide step without thinking. BUT, the others are conditional,
+ ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
+ ! order bit set in the first step, just falling into the regular
+ ! division loop will mess up the first time around.
+ ! So we unroll slightly...
+ Ldo_single_div:
+ subcc %g7, 1, %g7
+ bl Lend_regular_divide
+ nop
+ sub %o3, %o5, %o3
+ mov 1, %o2
+ b Lend_single_divloop
+ nop
+ Lsingle_divloop:
+ sll %o2, 1, %o2
+ bl 1f
+ srl %o5, 1, %o5
+ ! %o3 >= 0
+ sub %o3, %o5, %o3
+ b 2f
+ add %o2, 1, %o2
+ 1: ! %o3 < 0
+ add %o3, %o5, %o3
+ sub %o2, 1, %o2
+ 2:
+ Lend_single_divloop:
+ subcc %g7, 1, %g7
+ bge Lsingle_divloop
+ tst %o3
+ b,a Lend_regular_divide
+
+Lnot_really_big:
+1:
+ sll %o5, 4, %o5
+ cmp %o5, %o3
+ bleu 1b
+ addcc %o4, 1, %o4
+ be Lgot_result
+ sub %o4, 1, %o4
+
+ tst %o3 ! set up for initial iteration
+Ldivloop:
+ sll %o2, 4, %o2
+ ! depth 1, accumulated bits 0
+ bl L.1.16
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 2, accumulated bits 1
+ bl L.2.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 3
+ bl L.3.19
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 7
+ bl L.4.23
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2+1), %o2
+
+L.4.23:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2-1), %o2
+
+
+L.3.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 5
+ bl L.4.21
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2+1), %o2
+
+L.4.21:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2-1), %o2
+
+
+
+L.2.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 1
+ bl L.3.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 3
+ bl L.4.19
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2+1), %o2
+
+L.4.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2-1), %o2
+
+
+L.3.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 1
+ bl L.4.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2+1), %o2
+
+L.4.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2-1), %o2
+
+
+
+
+L.1.16:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 2, accumulated bits -1
+ bl L.2.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -1
+ bl L.3.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -1
+ bl L.4.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2+1), %o2
+
+L.4.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2-1), %o2
+
+
+L.3.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -3
+ bl L.4.13
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2+1), %o2
+
+L.4.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2-1), %o2
+
+
+
+L.2.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -3
+ bl L.3.13
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -5
+ bl L.4.11
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2+1), %o2
+
+L.4.11:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2-1), %o2
+
+
+L.3.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -7
+ bl L.4.9
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2+1), %o2
+
+L.4.9:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2-1), %o2
+
+
+
+
+ 9:
+Lend_regular_divide:
+ subcc %o4, 1, %o4
+ bge Ldivloop
+ tst %o3
+ bl,a Lgot_result
+ ! non-restoring fixup here (one instruction only!)
+ add %o3, %o1, %o3
+
+
+Lgot_result:
+ ! check to see if answer should be < 0
+ tst %g6
+ bl,a 1f
+ sub %g0, %o3, %o3
+1:
+ retl
+ mov %o3, %o0
diff --git a/sysdeps/sparc/sparc32/rshift.S b/sysdeps/sparc/sparc32/rshift.S
new file mode 100644
index 0000000000..fea4f3b926
--- /dev/null
+++ b/sysdeps/sparc/sparc32/rshift.S
@@ -0,0 +1,92 @@
+! sparc __mpn_rshift --
+
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr %o0
+! src_ptr %o1
+! size %o2
+! cnt %o3
+
+#include "sysdep.h"
+
+ .text
+ .align 4
+ .global C_SYMBOL_NAME(__mpn_rshift)
+C_SYMBOL_NAME(__mpn_rshift):
+ ld [%o1],%g2 ! load first limb
+ sub %g0,%o3,%o5 ! negate shift count
+ add %o2,-1,%o2
+ andcc %o2,4-1,%g4 ! number of limbs in first loop
+ sll %g2,%o5,%g1 ! compute function result
+ be L0 ! if multiple of 4 limbs, skip first loop
+ st %g1,[%sp+80]
+
+ sub %o2,%g4,%o2 ! adjust count for main loop
+
+Loop0: ld [%o1+4],%g3
+ add %o0,4,%o0
+ add %o1,4,%o1
+ addcc %g4,-1,%g4
+ srl %g2,%o3,%o4
+ sll %g3,%o5,%g1
+ mov %g3,%g2
+ or %o4,%g1,%o4
+ bne Loop0
+ st %o4,[%o0-4]
+
+L0: tst %o2
+ be Lend
+ nop
+
+Loop: ld [%o1+4],%g3
+ add %o0,16,%o0
+ addcc %o2,-4,%o2
+ srl %g2,%o3,%o4
+ sll %g3,%o5,%g1
+
+ ld [%o1+8],%g2
+ srl %g3,%o3,%g4
+ or %o4,%g1,%o4
+ st %o4,[%o0-16]
+ sll %g2,%o5,%g1
+
+ ld [%o1+12],%g3
+ srl %g2,%o3,%o4
+ or %g4,%g1,%g4
+ st %g4,[%o0-12]
+ sll %g3,%o5,%g1
+
+ ld [%o1+16],%g2
+ srl %g3,%o3,%g4
+ or %o4,%g1,%o4
+ st %o4,[%o0-8]
+ sll %g2,%o5,%g1
+
+ add %o1,16,%o1
+ or %g4,%g1,%g4
+ bne Loop
+ st %g4,[%o0-4]
+
+Lend: srl %g2,%o3,%g2
+ st %g2,[%o0-0]
+ retl
+ ld [%sp+80],%o0
diff --git a/sysdeps/sparc/sparc32/sdiv.S b/sysdeps/sparc/sparc32/sdiv.S
new file mode 100644
index 0000000000..02ed2e973c
--- /dev/null
+++ b/sysdeps/sparc/sparc32/sdiv.S
@@ -0,0 +1,369 @@
+ /* This file is generated from divrem.m4; DO NOT EDIT! */
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ * .div name of function to generate
+ * div div=div => %o0 / %o1; div=rem => %o0 % %o1
+ * true true=true => signed; true=false => unsigned
+ *
+ * Algorithm parameters:
+ * N how many bits per iteration we try to get (4)
+ * WORDSIZE total number of bits (32)
+ *
+ * Derived constants:
+ * TOPBITS number of bits in the top decade of a number
+ *
+ * Important variables:
+ * Q the partial quotient under development (initially 0)
+ * R the remainder so far, initially the dividend
+ * ITER number of main division loop iterations required;
+ * equal to ceil(log2(quotient) / N). Note that this
+ * is the log base (2^N) of the quotient.
+ * V the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ * Current estimate for non-large dividend is
+ * ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ * A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ * different path, as the upper bits of the quotient must be developed
+ * one bit at a time.
+ */
+
+
+
+#include "sysdep.h"
+#ifdef __linux__
+#include <asm/traps.h>
+#else
+#ifdef __svr4__
+#include <sys/trap.h>
+#else
+#include <machine/trap.h>
+#endif
+#endif
+
+ENTRY(.div)
+ ! compute sign of result; if neither is negative, no problem
+ orcc %o1, %o0, %g0 ! either negative?
+ bge 2f ! no, go do the divide
+ xor %o1, %o0, %g6 ! compute sign in any case
+ tst %o1
+ bge 1f
+ tst %o0
+ ! %o1 is definitely negative; %o0 might also be negative
+ bge 2f ! if %o0 not negative...
+ sub %g0, %o1, %o1 ! in any case, make %o1 nonneg
+1: ! %o0 is negative, %o1 is nonnegative
+ sub %g0, %o0, %o0 ! make %o0 nonnegative
+2:
+
+ ! Ready to divide. Compute size of quotient; scale comparand.
+ orcc %o1, %g0, %o5
+ bne 1f
+ mov %o0, %o3
+
+ ! Divide by zero trap. If it returns, return 0 (about as
+ ! wrong as possible, but that is what SunOS does...).
+ ta ST_DIV0
+ retl
+ clr %o0
+
+1:
+ cmp %o3, %o5 ! if %o1 exceeds %o0, done
+ blu Lgot_result ! (and algorithm fails otherwise)
+ clr %o2
+ sethi %hi(1 << (32 - 4 - 1)), %g1
+ cmp %o3, %g1
+ blu Lnot_really_big
+ clr %o4
+
+ ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
+ ! as our usual N-at-a-shot divide step will cause overflow and havoc.
+ ! The number of bits in the result here is N*ITER+SC, where SC <= N.
+ ! Compute ITER in an unorthodox manner: know we need to shift V into
+ ! the top decade: so do not even bother to compare to R.
+ 1:
+ cmp %o5, %g1
+ bgeu 3f
+ mov 1, %g7
+ sll %o5, 4, %o5
+ b 1b
+ add %o4, 1, %o4
+
+ ! Now compute %g7.
+ 2: addcc %o5, %o5, %o5
+ bcc Lnot_too_big
+ add %g7, 1, %g7
+
+ ! We get here if the %o1 overflowed while shifting.
+ ! This means that %o3 has the high-order bit set.
+ ! Restore %o5 and subtract from %o3.
+ sll %g1, 4, %g1 ! high order bit
+ srl %o5, 1, %o5 ! rest of %o5
+ add %o5, %g1, %o5
+ b Ldo_single_div
+ sub %g7, 1, %g7
+
+ Lnot_too_big:
+ 3: cmp %o5, %o3
+ blu 2b
+ nop
+ be Ldo_single_div
+ nop
+ /* NB: these are commented out in the V8-Sparc manual as well */
+ /* (I do not understand this) */
+ ! %o5 > %o3: went too far: back up 1 step
+ ! srl %o5, 1, %o5
+ ! dec %g7
+ ! do single-bit divide steps
+ !
+ ! We have to be careful here. We know that %o3 >= %o5, so we can do the
+ ! first divide step without thinking. BUT, the others are conditional,
+ ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
+ ! order bit set in the first step, just falling into the regular
+ ! division loop will mess up the first time around.
+ ! So we unroll slightly...
+ Ldo_single_div:
+ subcc %g7, 1, %g7
+ bl Lend_regular_divide
+ nop
+ sub %o3, %o5, %o3
+ mov 1, %o2
+ b Lend_single_divloop
+ nop
+ Lsingle_divloop:
+ sll %o2, 1, %o2
+ bl 1f
+ srl %o5, 1, %o5
+ ! %o3 >= 0
+ sub %o3, %o5, %o3
+ b 2f
+ add %o2, 1, %o2
+ 1: ! %o3 < 0
+ add %o3, %o5, %o3
+ sub %o2, 1, %o2
+ 2:
+ Lend_single_divloop:
+ subcc %g7, 1, %g7
+ bge Lsingle_divloop
+ tst %o3
+ b,a Lend_regular_divide
+
+Lnot_really_big:
+1:
+ sll %o5, 4, %o5
+ cmp %o5, %o3
+ bleu 1b
+ addcc %o4, 1, %o4
+ be Lgot_result
+ sub %o4, 1, %o4
+
+ tst %o3 ! set up for initial iteration
+Ldivloop:
+ sll %o2, 4, %o2
+ ! depth 1, accumulated bits 0
+ bl L.1.16
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 2, accumulated bits 1
+ bl L.2.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 3
+ bl L.3.19
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 7
+ bl L.4.23
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2+1), %o2
+
+L.4.23:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2-1), %o2
+
+
+L.3.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 5
+ bl L.4.21
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2+1), %o2
+
+L.4.21:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2-1), %o2
+
+
+
+L.2.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 1
+ bl L.3.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 3
+ bl L.4.19
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2+1), %o2
+
+L.4.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2-1), %o2
+
+
+L.3.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 1
+ bl L.4.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2+1), %o2
+
+L.4.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2-1), %o2
+
+
+
+
+L.1.16:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 2, accumulated bits -1
+ bl L.2.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -1
+ bl L.3.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -1
+ bl L.4.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2+1), %o2
+
+L.4.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2-1), %o2
+
+
+L.3.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -3
+ bl L.4.13
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2+1), %o2
+
+L.4.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2-1), %o2
+
+
+
+L.2.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -3
+ bl L.3.13
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -5
+ bl L.4.11
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2+1), %o2
+
+L.4.11:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2-1), %o2
+
+
+L.3.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -7
+ bl L.4.9
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2+1), %o2
+
+L.4.9:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2-1), %o2
+
+
+
+
+ 9:
+Lend_regular_divide:
+ subcc %o4, 1, %o4
+ bge Ldivloop
+ tst %o3
+ bl,a Lgot_result
+ ! non-restoring fixup here (one instruction only!)
+ sub %o2, 1, %o2
+
+
+Lgot_result:
+ ! check to see if answer should be < 0
+ tst %g6
+ bl,a 1f
+ sub %g0, %o2, %o2
+1:
+ retl
+ mov %o2, %o0
diff --git a/sysdeps/sparc/sparc32/setjmp.S b/sysdeps/sparc/sparc32/setjmp.S
new file mode 100644
index 0000000000..772ec723a3
--- /dev/null
+++ b/sysdeps/sparc/sparc32/setjmp.S
@@ -0,0 +1,54 @@
+/* Copyright (C) 1991, 1993, 1994, 1996, 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include <sysdep.h>
+#include <sys/trap.h>
+
+#define _ASM 1
+#include <bits/setjmp.h>
+
+ENTRY (__sigsetjmp)
+ /* Save our SP and FP; in the delay slot of the jump, save our
+ return PC. Save the signal mask if requested with a tail-call
+ for simplicity; it always returns zero. */
+ ta ST_FLUSH_WINDOWS
+#ifdef PIC
+ mov %o7,%g1
+2:
+ call 1f
+ nop
+1:
+ sethi %hi(_GLOBAL_OFFSET_TABLE_-(2b-.)),%g2
+ or %g2,%lo(_GLOBAL_OFFSET_TABLE_-(2b-.)),%g2
+ add %g2,%o7,%g2
+ sethi %hi(C_SYMBOL_NAME (__sigjmp_save)), %g3
+ or %g3,%lo(C_SYMBOL_NAME (__sigjmp_save)), %g3
+ st %sp, [%o0 + (JB_SP * 4)]
+ st %fp, [%o0 + (JB_FP * 4)]
+ mov %g1,%o7
+ ld [%g2+%g3],%g1
+ jmp %g1
+ st %o7, [%o0+(JB_PC*4)]
+#else
+ sethi %hi(C_SYMBOL_NAME (__sigjmp_save)), %g1
+ st %sp, [%o0 + (JB_SP*4)]
+ or %lo(C_SYMBOL_NAME (__sigjmp_save)), %g1, %g1
+ st %fp, [%o0 + (JB_FP*4)]
+ jmp %g1
+ st %o7, [%o0 + (JB_PC*4)]
+#endif /* PIC */
diff --git a/sysdeps/sparc/sparc32/sparcv8/addmul_1.S b/sysdeps/sparc/sparc32/sparcv8/addmul_1.S
new file mode 100644
index 0000000000..fb9ea7cf0e
--- /dev/null
+++ b/sysdeps/sparc/sparc32/sparcv8/addmul_1.S
@@ -0,0 +1,124 @@
+! SPARC v8 __mpn_addmul_1 -- Multiply a limb vector with a limb and
+! add the result to a second limb vector.
+
+! Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr o0
+! s1_ptr o1
+! size o2
+! s2_limb o3
+
+#include "sysdep.h"
+
+.text
+ .align 4
+ .global C_SYMBOL_NAME(__mpn_addmul_1)
+C_SYMBOL_NAME(__mpn_addmul_1):
+ orcc %g0,%g0,%g2
+ ld [%o1+0],%o4 ! 1
+
+ sll %o2,4,%g1
+ and %g1,(4-1)<<4,%g1
+#if PIC
+ mov %o7,%g4 ! Save return address register
+ call 1f
+ add %o7,LL-1f,%g3
+1: mov %g4,%o7 ! Restore return address register
+#else
+ sethi %hi(LL),%g3
+ or %g3,%lo(LL),%g3
+#endif
+ jmp %g3+%g1
+ nop
+LL:
+LL00: add %o0,-4,%o0
+ b Loop00 /* 4, 8, 12, ... */
+ add %o1,-4,%o1
+ nop
+LL01: b Loop01 /* 1, 5, 9, ... */
+ nop
+ nop
+ nop
+LL10: add %o0,-12,%o0 /* 2, 6, 10, ... */
+ b Loop10
+ add %o1,4,%o1
+ nop
+LL11: add %o0,-8,%o0 /* 3, 7, 11, ... */
+ b Loop11
+ add %o1,-8,%o1
+ nop
+
+1: addcc %g3,%g2,%g3 ! 1
+ ld [%o1+4],%o4 ! 2
+ rd %y,%g2 ! 1
+ addx %g0,%g2,%g2
+ ld [%o0+0],%g1 ! 2
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+0] ! 1
+Loop00: umul %o4,%o3,%g3 ! 2
+ ld [%o0+4],%g1 ! 2
+ addxcc %g3,%g2,%g3 ! 2
+ ld [%o1+8],%o4 ! 3
+ rd %y,%g2 ! 2
+ addx %g0,%g2,%g2
+ nop
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+4] ! 2
+Loop11: umul %o4,%o3,%g3 ! 3
+ addxcc %g3,%g2,%g3 ! 3
+ ld [%o1+12],%o4 ! 4
+ rd %y,%g2 ! 3
+ add %o1,16,%o1
+ addx %g0,%g2,%g2
+ ld [%o0+8],%g1 ! 2
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+8] ! 3
+Loop10: umul %o4,%o3,%g3 ! 4
+ addxcc %g3,%g2,%g3 ! 4
+ ld [%o1+0],%o4 ! 1
+ rd %y,%g2 ! 4
+ addx %g0,%g2,%g2
+ ld [%o0+12],%g1 ! 2
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+12] ! 4
+ add %o0,16,%o0
+ addx %g0,%g2,%g2
+Loop01: addcc %o2,-4,%o2
+ bg 1b
+ umul %o4,%o3,%g3 ! 1
+
+ addcc %g3,%g2,%g3 ! 4
+ rd %y,%g2 ! 4
+ addx %g0,%g2,%g2
+ ld [%o0+0],%g1 ! 2
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+0] ! 4
+ addx %g0,%g2,%o0
+
+ retl
+ nop
+
+
+! umul, ld, addxcc, rd, st
+
+! umul, ld, addxcc, rd, ld, addcc, st, addx
+
diff --git a/sysdeps/sparc/sparc32/sparcv8/mul_1.S b/sysdeps/sparc/sparc32/sparcv8/mul_1.S
new file mode 100644
index 0000000000..b641feb453
--- /dev/null
+++ b/sysdeps/sparc/sparc32/sparcv8/mul_1.S
@@ -0,0 +1,99 @@
+! SPARC v8 __mpn_mul_1 -- Multiply a limb vector with a single limb and
+! store the product in a second limb vector.
+
+! Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr o0
+! s1_ptr o1
+! size o2
+! s2_limb o3
+
+#include "sysdep.h"
+
+.text
+ .align 8
+ .global C_SYMBOL_NAME(__mpn_mul_1)
+C_SYMBOL_NAME(__mpn_mul_1):
+ sll %o2,4,%g1
+ and %g1,(4-1)<<4,%g1
+#if PIC
+ mov %o7,%g4 ! Save return address register
+ call 1f
+ add %o7,LL-1f,%g3
+1: mov %g4,%o7 ! Restore return address register
+#else
+ sethi %hi(LL),%g3
+ or %g3,%lo(LL),%g3
+#endif
+ jmp %g3+%g1
+ ld [%o1+0],%o4 ! 1
+LL:
+LL00: add %o0,-4,%o0
+ add %o1,-4,%o1
+ b Loop00 /* 4, 8, 12, ... */
+ orcc %g0,%g0,%g2
+LL01: b Loop01 /* 1, 5, 9, ... */
+ orcc %g0,%g0,%g2
+ nop
+ nop
+LL10: add %o0,-12,%o0 /* 2, 6, 10, ... */
+ add %o1,4,%o1
+ b Loop10
+ orcc %g0,%g0,%g2
+ nop
+LL11: add %o0,-8,%o0 /* 3, 7, 11, ... */
+ add %o1,-8,%o1
+ b Loop11
+ orcc %g0,%g0,%g2
+
+Loop: addcc %g3,%g2,%g3 ! 1
+ ld [%o1+4],%o4 ! 2
+ st %g3,[%o0+0] ! 1
+ rd %y,%g2 ! 1
+Loop00: umul %o4,%o3,%g3 ! 2
+ addxcc %g3,%g2,%g3 ! 2
+ ld [%o1+8],%o4 ! 3
+ st %g3,[%o0+4] ! 2
+ rd %y,%g2 ! 2
+Loop11: umul %o4,%o3,%g3 ! 3
+ addxcc %g3,%g2,%g3 ! 3
+ ld [%o1+12],%o4 ! 4
+ add %o1,16,%o1
+ st %g3,[%o0+8] ! 3
+ rd %y,%g2 ! 3
+Loop10: umul %o4,%o3,%g3 ! 4
+ addxcc %g3,%g2,%g3 ! 4
+ ld [%o1+0],%o4 ! 1
+ st %g3,[%o0+12] ! 4
+ add %o0,16,%o0
+ rd %y,%g2 ! 4
+ addx %g0,%g2,%g2
+Loop01: addcc %o2,-4,%o2
+ bg Loop
+ umul %o4,%o3,%g3 ! 1
+
+ addcc %g3,%g2,%g3 ! 4
+ st %g3,[%o0+0] ! 4
+ rd %y,%g2 ! 4
+
+ retl
+ addx %g0,%g2,%o0
diff --git a/sysdeps/sparc/sparc32/sparcv8/submul_1.S b/sysdeps/sparc/sparc32/sparcv8/submul_1.S
new file mode 100644
index 0000000000..e40119d011
--- /dev/null
+++ b/sysdeps/sparc/sparc32/sparcv8/submul_1.S
@@ -0,0 +1,58 @@
+! SPARC v8 __mpn_submul_1 -- Multiply a limb vector with a limb and
+! subtract the result from a second limb vector.
+
+! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr o0
+! s1_ptr o1
+! size o2
+! s2_limb o3
+
+#include "sysdep.h"
+
+.text
+ .align 4
+ .global C_SYMBOL_NAME(__mpn_submul_1)
+C_SYMBOL_NAME(__mpn_submul_1):
+ sub %g0,%o2,%o2 ! negate ...
+ sll %o2,2,%o2 ! ... and scale size
+ sub %o1,%o2,%o1 ! o1 is offset s1_ptr
+ sub %o0,%o2,%g1 ! g1 is offset res_ptr
+
+ mov 0,%o0 ! clear cy_limb
+
+Loop: ld [%o1+%o2],%o4
+ ld [%g1+%o2],%g2
+ umul %o4,%o3,%o5
+ rd %y,%g3
+ addcc %o5,%o0,%o5
+ addx %g3,0,%o0
+ subcc %g2,%o5,%g2
+ addx %o0,0,%o0
+ st %g2,[%g1+%o2]
+
+ addcc %o2,4,%o2
+ bne Loop
+ nop
+
+ retl
+ nop
diff --git a/sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S b/sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S
new file mode 100644
index 0000000000..49c2398806
--- /dev/null
+++ b/sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S
@@ -0,0 +1,186 @@
+! SPARC __udiv_qrnnd division support, used from longlong.h.
+
+! Copyright (C) 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! rem_ptr o0
+! n1 o1
+! n0 o2
+! d o3
+
+#include "sysdep.h"
+
+ .text
+ .align 4
+ .global C_SYMBOL_NAME(__udiv_qrnnd)
+C_SYMBOL_NAME(__udiv_qrnnd):
+ tst %o3
+ bneg Largedivisor
+ mov 8,%g1
+
+ b Lp1
+ addxcc %o2,%o2,%o2
+
+Lplop: bcc Ln1
+ addxcc %o2,%o2,%o2
+Lp1: addx %o1,%o1,%o1
+ subcc %o1,%o3,%o4
+ bcc Ln2
+ addxcc %o2,%o2,%o2
+Lp2: addx %o1,%o1,%o1
+ subcc %o1,%o3,%o4
+ bcc Ln3
+ addxcc %o2,%o2,%o2
+Lp3: addx %o1,%o1,%o1
+ subcc %o1,%o3,%o4
+ bcc Ln4
+ addxcc %o2,%o2,%o2
+Lp4: addx %o1,%o1,%o1
+ addcc %g1,-1,%g1
+ bne Lplop
+ subcc %o1,%o3,%o4
+ bcc Ln5
+ addxcc %o2,%o2,%o2
+Lp5: st %o1,[%o0]
+ retl
+ xnor %g0,%o2,%o0
+
+Lnlop: bcc Lp1
+ addxcc %o2,%o2,%o2
+Ln1: addx %o4,%o4,%o4
+ subcc %o4,%o3,%o1
+ bcc Lp2
+ addxcc %o2,%o2,%o2
+Ln2: addx %o4,%o4,%o4
+ subcc %o4,%o3,%o1
+ bcc Lp3
+ addxcc %o2,%o2,%o2
+Ln3: addx %o4,%o4,%o4
+ subcc %o4,%o3,%o1
+ bcc Lp4
+ addxcc %o2,%o2,%o2
+Ln4: addx %o4,%o4,%o4
+ addcc %g1,-1,%g1
+ bne Lnlop
+ subcc %o4,%o3,%o1
+ bcc Lp5
+ addxcc %o2,%o2,%o2
+Ln5: st %o4,[%o0]
+ retl
+ xnor %g0,%o2,%o0
+
+Largedivisor:
+ and %o2,1,%o5 ! %o5 = n0 & 1
+
+ srl %o2,1,%o2
+ sll %o1,31,%g2
+ or %g2,%o2,%o2 ! %o2 = lo(n1n0 >> 1)
+ srl %o1,1,%o1 ! %o1 = hi(n1n0 >> 1)
+
+ and %o3,1,%g2
+ srl %o3,1,%g3 ! %g3 = floor(d / 2)
+ add %g3,%g2,%g3 ! %g3 = ceil(d / 2)
+
+ b LLp1
+ addxcc %o2,%o2,%o2
+
+LLplop: bcc LLn1
+ addxcc %o2,%o2,%o2
+LLp1: addx %o1,%o1,%o1
+ subcc %o1,%g3,%o4
+ bcc LLn2
+ addxcc %o2,%o2,%o2
+LLp2: addx %o1,%o1,%o1
+ subcc %o1,%g3,%o4
+ bcc LLn3
+ addxcc %o2,%o2,%o2
+LLp3: addx %o1,%o1,%o1
+ subcc %o1,%g3,%o4
+ bcc LLn4
+ addxcc %o2,%o2,%o2
+LLp4: addx %o1,%o1,%o1
+ addcc %g1,-1,%g1
+ bne LLplop
+ subcc %o1,%g3,%o4
+ bcc LLn5
+ addxcc %o2,%o2,%o2
+LLp5: add %o1,%o1,%o1 ! << 1
+ tst %g2
+ bne Oddp
+ add %o5,%o1,%o1
+ st %o1,[%o0]
+ retl
+ xnor %g0,%o2,%o0
+
+LLnlop: bcc LLp1
+ addxcc %o2,%o2,%o2
+LLn1: addx %o4,%o4,%o4
+ subcc %o4,%g3,%o1
+ bcc LLp2
+ addxcc %o2,%o2,%o2
+LLn2: addx %o4,%o4,%o4
+ subcc %o4,%g3,%o1
+ bcc LLp3
+ addxcc %o2,%o2,%o2
+LLn3: addx %o4,%o4,%o4
+ subcc %o4,%g3,%o1
+ bcc LLp4
+ addxcc %o2,%o2,%o2
+LLn4: addx %o4,%o4,%o4
+ addcc %g1,-1,%g1
+ bne LLnlop
+ subcc %o4,%g3,%o1
+ bcc LLp5
+ addxcc %o2,%o2,%o2
+LLn5: add %o4,%o4,%o4 ! << 1
+ tst %g2
+ bne Oddn
+ add %o5,%o4,%o4
+ st %o4,[%o0]
+ retl
+ xnor %g0,%o2,%o0
+
+Oddp: xnor %g0,%o2,%o2
+ ! q' in %o2. r' in %o1
+ addcc %o1,%o2,%o1
+ bcc LLp6
+ addx %o2,0,%o2
+ sub %o1,%o3,%o1
+LLp6: subcc %o1,%o3,%g0
+ bcs LLp7
+ subx %o2,-1,%o2
+ sub %o1,%o3,%o1
+LLp7: st %o1,[%o0]
+ retl
+ mov %o2,%o0
+
+Oddn: xnor %g0,%o2,%o2
+ ! q' in %o2. r' in %o4
+ addcc %o4,%o2,%o4
+ bcc LLn6
+ addx %o2,0,%o2
+ sub %o4,%o3,%o4
+LLn6: subcc %o4,%o3,%g0
+ bcs LLn7
+ subx %o2,-1,%o2
+ sub %o4,%o3,%o4
+LLn7: st %o4,[%o0]
+ retl
+ mov %o2,%o0
diff --git a/sysdeps/sparc/sparc32/sub_n.S b/sysdeps/sparc/sparc32/sub_n.S
new file mode 100644
index 0000000000..b7a11958e2
--- /dev/null
+++ b/sysdeps/sparc/sparc32/sub_n.S
@@ -0,0 +1,311 @@
+! SPARC __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+! store difference in a third limb vector.
+
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+#define res_ptr %o0
+#define s1_ptr %o1
+#define s2_ptr %o2
+#define size %o3
+
+#include "sysdep.h"
+
+ .text
+ .align 4
+ .global C_SYMBOL_NAME(__mpn_sub_n)
+C_SYMBOL_NAME(__mpn_sub_n):
+ xor s2_ptr,res_ptr,%g1
+ andcc %g1,4,%g0
+ bne L1 ! branch if alignment differs
+ nop
+! ** V1a **
+ andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0
+ be L_v1 ! if no, branch
+ nop
+/* Add least significant limb separately to align res_ptr and s2_ptr */
+ ld [s1_ptr],%g4
+ add s1_ptr,4,s1_ptr
+ ld [s2_ptr],%g2
+ add s2_ptr,4,s2_ptr
+ add size,-1,size
+ subcc %g4,%g2,%o4
+ st %o4,[res_ptr]
+ add res_ptr,4,res_ptr
+L_v1: addx %g0,%g0,%o4 ! save cy in register
+ cmp size,2 ! if size < 2 ...
+ bl Lend2 ! ... branch to tail code
+ subcc %g0,%o4,%g0 ! restore cy
+
+ ld [s1_ptr+0],%g4
+ addcc size,-10,size
+ ld [s1_ptr+4],%g1
+ ldd [s2_ptr+0],%g2
+ blt Lfin1
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+Loop1: subxcc %g4,%g2,%o4
+ ld [s1_ptr+8],%g4
+ subxcc %g1,%g3,%o5
+ ld [s1_ptr+12],%g1
+ ldd [s2_ptr+8],%g2
+ std %o4,[res_ptr+0]
+ subxcc %g4,%g2,%o4
+ ld [s1_ptr+16],%g4
+ subxcc %g1,%g3,%o5
+ ld [s1_ptr+20],%g1
+ ldd [s2_ptr+16],%g2
+ std %o4,[res_ptr+8]
+ subxcc %g4,%g2,%o4
+ ld [s1_ptr+24],%g4
+ subxcc %g1,%g3,%o5
+ ld [s1_ptr+28],%g1
+ ldd [s2_ptr+24],%g2
+ std %o4,[res_ptr+16]
+ subxcc %g4,%g2,%o4
+ ld [s1_ptr+32],%g4
+ subxcc %g1,%g3,%o5
+ ld [s1_ptr+36],%g1
+ ldd [s2_ptr+32],%g2
+ std %o4,[res_ptr+24]
+ addx %g0,%g0,%o4 ! save cy in register
+ addcc size,-8,size
+ add s1_ptr,32,s1_ptr
+ add s2_ptr,32,s2_ptr
+ add res_ptr,32,res_ptr
+ bge Loop1
+ subcc %g0,%o4,%g0 ! restore cy
+
+Lfin1: addcc size,8-2,size
+ blt Lend1
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add blocks of 2 limbs until less than 2 limbs remain */
+Loope1: subxcc %g4,%g2,%o4
+ ld [s1_ptr+8],%g4
+ subxcc %g1,%g3,%o5
+ ld [s1_ptr+12],%g1
+ ldd [s2_ptr+8],%g2
+ std %o4,[res_ptr+0]
+ addx %g0,%g0,%o4 ! save cy in register
+ addcc size,-2,size
+ add s1_ptr,8,s1_ptr
+ add s2_ptr,8,s2_ptr
+ add res_ptr,8,res_ptr
+ bge Loope1
+ subcc %g0,%o4,%g0 ! restore cy
+Lend1: subxcc %g4,%g2,%o4
+ subxcc %g1,%g3,%o5
+ std %o4,[res_ptr+0]
+ addx %g0,%g0,%o4 ! save cy in register
+
+ andcc size,1,%g0
+ be Lret1
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add last limb */
+ ld [s1_ptr+8],%g4
+ ld [s2_ptr+8],%g2
+ subxcc %g4,%g2,%o4
+ st %o4,[res_ptr+8]
+
+Lret1: retl
+ addx %g0,%g0,%o0 ! return carry-out from most sign. limb
+
+L1: xor s1_ptr,res_ptr,%g1
+ andcc %g1,4,%g0
+ bne L2
+ nop
+! ** V1b **
+ andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0
+ be L_v1b ! if no, branch
+ nop
+/* Add least significant limb separately to align res_ptr and s1_ptr */
+ ld [s2_ptr],%g4
+ add s2_ptr,4,s2_ptr
+ ld [s1_ptr],%g2
+ add s1_ptr,4,s1_ptr
+ add size,-1,size
+ subcc %g2,%g4,%o4
+ st %o4,[res_ptr]
+ add res_ptr,4,res_ptr
+L_v1b: addx %g0,%g0,%o4 ! save cy in register
+ cmp size,2 ! if size < 2 ...
+ bl Lend2 ! ... branch to tail code
+ subcc %g0,%o4,%g0 ! restore cy
+
+ ld [s2_ptr+0],%g4
+ addcc size,-10,size
+ ld [s2_ptr+4],%g1
+ ldd [s1_ptr+0],%g2
+ blt Lfin1b
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+Loop1b: subxcc %g2,%g4,%o4
+ ld [s2_ptr+8],%g4
+ subxcc %g3,%g1,%o5
+ ld [s2_ptr+12],%g1
+ ldd [s1_ptr+8],%g2
+ std %o4,[res_ptr+0]
+ subxcc %g2,%g4,%o4
+ ld [s2_ptr+16],%g4
+ subxcc %g3,%g1,%o5
+ ld [s2_ptr+20],%g1
+ ldd [s1_ptr+16],%g2
+ std %o4,[res_ptr+8]
+ subxcc %g2,%g4,%o4
+ ld [s2_ptr+24],%g4
+ subxcc %g3,%g1,%o5
+ ld [s2_ptr+28],%g1
+ ldd [s1_ptr+24],%g2
+ std %o4,[res_ptr+16]
+ subxcc %g2,%g4,%o4
+ ld [s2_ptr+32],%g4
+ subxcc %g3,%g1,%o5
+ ld [s2_ptr+36],%g1
+ ldd [s1_ptr+32],%g2
+ std %o4,[res_ptr+24]
+ addx %g0,%g0,%o4 ! save cy in register
+ addcc size,-8,size
+ add s1_ptr,32,s1_ptr
+ add s2_ptr,32,s2_ptr
+ add res_ptr,32,res_ptr
+ bge Loop1b
+ subcc %g0,%o4,%g0 ! restore cy
+
+Lfin1b: addcc size,8-2,size
+ blt Lend1b
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add blocks of 2 limbs until less than 2 limbs remain */
+Loope1b:subxcc %g2,%g4,%o4
+ ld [s2_ptr+8],%g4
+ subxcc %g3,%g1,%o5
+ ld [s2_ptr+12],%g1
+ ldd [s1_ptr+8],%g2
+ std %o4,[res_ptr+0]
+ addx %g0,%g0,%o4 ! save cy in register
+ addcc size,-2,size
+ add s1_ptr,8,s1_ptr
+ add s2_ptr,8,s2_ptr
+ add res_ptr,8,res_ptr
+ bge Loope1b
+ subcc %g0,%o4,%g0 ! restore cy
+Lend1b: subxcc %g2,%g4,%o4
+ subxcc %g3,%g1,%o5
+ std %o4,[res_ptr+0]
+ addx %g0,%g0,%o4 ! save cy in register
+
+ andcc size,1,%g0
+ be Lret1b
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add last limb */
+ ld [s2_ptr+8],%g4
+ ld [s1_ptr+8],%g2
+ subxcc %g2,%g4,%o4
+ st %o4,[res_ptr+8]
+
+Lret1b: retl
+ addx %g0,%g0,%o0 ! return carry-out from most sign. limb
+
+! ** V2 **
+/* If we come here, the alignment of s1_ptr and res_ptr as well as the
+ alignment of s2_ptr and res_ptr differ. Since there are only two ways
+ things can be aligned (that we care about) we now know that the alignment
+ of s1_ptr and s2_ptr are the same. */
+
+L2: cmp size,1
+ be Ljone
+ nop
+ andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0
+ be L_v2 ! if no, branch
+ nop
+/* Add least significant limb separately to align s1_ptr and s2_ptr */
+ ld [s1_ptr],%g4
+ add s1_ptr,4,s1_ptr
+ ld [s2_ptr],%g2
+ add s2_ptr,4,s2_ptr
+ add size,-1,size
+ subcc %g4,%g2,%o4
+ st %o4,[res_ptr]
+ add res_ptr,4,res_ptr
+
+L_v2: addx %g0,%g0,%o4 ! save cy in register
+ addcc size,-8,size
+ blt Lfin2
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+Loop2: ldd [s1_ptr+0],%g2
+ ldd [s2_ptr+0],%o4
+ subxcc %g2,%o4,%g2
+ st %g2,[res_ptr+0]
+ subxcc %g3,%o5,%g3
+ st %g3,[res_ptr+4]
+ ldd [s1_ptr+8],%g2
+ ldd [s2_ptr+8],%o4
+ subxcc %g2,%o4,%g2
+ st %g2,[res_ptr+8]
+ subxcc %g3,%o5,%g3
+ st %g3,[res_ptr+12]
+ ldd [s1_ptr+16],%g2
+ ldd [s2_ptr+16],%o4
+ subxcc %g2,%o4,%g2
+ st %g2,[res_ptr+16]
+ subxcc %g3,%o5,%g3
+ st %g3,[res_ptr+20]
+ ldd [s1_ptr+24],%g2
+ ldd [s2_ptr+24],%o4
+ subxcc %g2,%o4,%g2
+ st %g2,[res_ptr+24]
+ subxcc %g3,%o5,%g3
+ st %g3,[res_ptr+28]
+ addx %g0,%g0,%o4 ! save cy in register
+ addcc size,-8,size
+ add s1_ptr,32,s1_ptr
+ add s2_ptr,32,s2_ptr
+ add res_ptr,32,res_ptr
+ bge Loop2
+ subcc %g0,%o4,%g0 ! restore cy
+
+Lfin2: addcc size,8-2,size
+ blt Lend2
+ subcc %g0,%o4,%g0 ! restore cy
+Loope2: ldd [s1_ptr+0],%g2
+ ldd [s2_ptr+0],%o4
+ subxcc %g2,%o4,%g2
+ st %g2,[res_ptr+0]
+ subxcc %g3,%o5,%g3
+ st %g3,[res_ptr+4]
+ addx %g0,%g0,%o4 ! save cy in register
+ addcc size,-2,size
+ add s1_ptr,8,s1_ptr
+ add s2_ptr,8,s2_ptr
+ add res_ptr,8,res_ptr
+ bge Loope2
+ subcc %g0,%o4,%g0 ! restore cy
+Lend2: andcc size,1,%g0
+ be Lret2
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add last limb */
+Ljone: ld [s1_ptr],%g4
+ ld [s2_ptr],%g2
+ subxcc %g4,%g2,%o4
+ st %o4,[res_ptr]
+
+Lret2: retl
+ addx %g0,%g0,%o0 ! return carry-out from most sign. limb
diff --git a/sysdeps/sparc/sparc32/submul_1.S b/sysdeps/sparc/sparc32/submul_1.S
new file mode 100644
index 0000000000..a8ebd501a7
--- /dev/null
+++ b/sysdeps/sparc/sparc32/submul_1.S
@@ -0,0 +1,147 @@
+! SPARC __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+! the result from a second limb vector.
+
+! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr o0
+! s1_ptr o1
+! size o2
+! s2_limb o3
+
+#include "sysdep.h"
+
+.text
+ .align 4
+ .global C_SYMBOL_NAME(__mpn_submul_1)
+C_SYMBOL_NAME(__mpn_submul_1):
+ ! Make S1_PTR and RES_PTR point at the end of their blocks
+ ! and put (- 4 x SIZE) in index/loop counter.
+ sll %o2,2,%o2
+ add %o0,%o2,%o4 ! RES_PTR in o4 since o0 is retval
+ add %o1,%o2,%o1
+ sub %g0,%o2,%o2
+
+ cmp %o3,0xfff
+ bgu Large
+ nop
+
+ ld [%o1+%o2],%o5
+ mov 0,%o0
+ b L0
+ add %o4,-4,%o4
+Loop0:
+ subcc %o5,%g1,%g1
+ ld [%o1+%o2],%o5
+ addx %o0,%g0,%o0
+ st %g1,[%o4+%o2]
+L0: wr %g0,%o3,%y
+ sra %o5,31,%g2
+ and %o3,%g2,%g2
+ andcc %g1,0,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,0,%g1
+ sra %g1,20,%g4
+ sll %g1,12,%g1
+ rd %y,%g3
+ srl %g3,20,%g3
+ or %g1,%g3,%g1
+
+ addcc %g1,%o0,%g1
+ addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb
+ addcc %o2,4,%o2 ! loop counter
+ bne Loop0
+ ld [%o4+%o2],%o5
+
+ subcc %o5,%g1,%g1
+ addx %o0,%g0,%o0
+ retl
+ st %g1,[%o4+%o2]
+
+
+Large: ld [%o1+%o2],%o5
+ mov 0,%o0
+ sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0
+ b L1
+ add %o4,-4,%o4
+Loop:
+ subcc %o5,%g3,%g3
+ ld [%o1+%o2],%o5
+ addx %o0,%g0,%o0
+ st %g3,[%o4+%o2]
+L1: wr %g0,%o5,%y
+ and %o5,%g4,%g2
+ andcc %g0,%g0,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%g0,%g1
+ rd %y,%g3
+ addcc %g3,%o0,%g3
+ addx %g2,%g1,%o0
+ addcc %o2,4,%o2
+ bne Loop
+ ld [%o4+%o2],%o5
+
+ subcc %o5,%g3,%g3
+ addx %o0,%g0,%o0
+ retl
+ st %g3,[%o4+%o2]
diff --git a/sysdeps/sparc/sparc32/udiv.S b/sysdeps/sparc/sparc32/udiv.S
new file mode 100644
index 0000000000..4a7d1526b0
--- /dev/null
+++ b/sysdeps/sparc/sparc32/udiv.S
@@ -0,0 +1,352 @@
+ /* This file is generated from divrem.m4; DO NOT EDIT! */
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ * .udiv name of function to generate
+ * div div=div => %o0 / %o1; div=rem => %o0 % %o1
+ * false false=true => signed; false=false => unsigned
+ *
+ * Algorithm parameters:
+ * N how many bits per iteration we try to get (4)
+ * WORDSIZE total number of bits (32)
+ *
+ * Derived constants:
+ * TOPBITS number of bits in the top decade of a number
+ *
+ * Important variables:
+ * Q the partial quotient under development (initially 0)
+ * R the remainder so far, initially the dividend
+ * ITER number of main division loop iterations required;
+ * equal to ceil(log2(quotient) / N). Note that this
+ * is the log base (2^N) of the quotient.
+ * V the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ * Current estimate for non-large dividend is
+ * ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ * A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ * different path, as the upper bits of the quotient must be developed
+ * one bit at a time.
+ */
+
+
+
+#include "sysdep.h"
+#ifdef __linux__
+#include <asm/traps.h>
+#else
+#ifdef __svr4__
+#include <sys/trap.h>
+#else
+#include <machine/trap.h>
+#endif
+#endif
+
+ENTRY(.udiv)
+
+ ! Ready to divide. Compute size of quotient; scale comparand.
+ orcc %o1, %g0, %o5
+ bne 1f
+ mov %o0, %o3
+
+ ! Divide by zero trap. If it returns, return 0 (about as
+ ! wrong as possible, but that is what SunOS does...).
+ ta ST_DIV0
+ retl
+ clr %o0
+
+1:
+ cmp %o3, %o5 ! if %o1 exceeds %o0, done
+ blu Lgot_result ! (and algorithm fails otherwise)
+ clr %o2
+ sethi %hi(1 << (32 - 4 - 1)), %g1
+ cmp %o3, %g1
+ blu Lnot_really_big
+ clr %o4
+
+ ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
+ ! as our usual N-at-a-shot divide step will cause overflow and havoc.
+ ! The number of bits in the result here is N*ITER+SC, where SC <= N.
+ ! Compute ITER in an unorthodox manner: know we need to shift V into
+ ! the top decade: so do not even bother to compare to R.
+ 1:
+ cmp %o5, %g1
+ bgeu 3f
+ mov 1, %g7
+ sll %o5, 4, %o5
+ b 1b
+ add %o4, 1, %o4
+
+ ! Now compute %g7.
+ 2: addcc %o5, %o5, %o5
+ bcc Lnot_too_big
+ add %g7, 1, %g7
+
+ ! We get here if the %o1 overflowed while shifting.
+ ! This means that %o3 has the high-order bit set.
+ ! Restore %o5 and subtract from %o3.
+ sll %g1, 4, %g1 ! high order bit
+ srl %o5, 1, %o5 ! rest of %o5
+ add %o5, %g1, %o5
+ b Ldo_single_div
+ sub %g7, 1, %g7
+
+ Lnot_too_big:
+ 3: cmp %o5, %o3
+ blu 2b
+ nop
+ be Ldo_single_div
+ nop
+ /* NB: these are commented out in the V8-Sparc manual as well */
+ /* (I do not understand this) */
+ ! %o5 > %o3: went too far: back up 1 step
+ ! srl %o5, 1, %o5
+ ! dec %g7
+ ! do single-bit divide steps
+ !
+ ! We have to be careful here. We know that %o3 >= %o5, so we can do the
+ ! first divide step without thinking. BUT, the others are conditional,
+ ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
+ ! order bit set in the first step, just falling into the regular
+ ! division loop will mess up the first time around.
+ ! So we unroll slightly...
+ Ldo_single_div:
+ subcc %g7, 1, %g7
+ bl Lend_regular_divide
+ nop
+ sub %o3, %o5, %o3
+ mov 1, %o2
+ b Lend_single_divloop
+ nop
+ Lsingle_divloop:
+ sll %o2, 1, %o2
+ bl 1f
+ srl %o5, 1, %o5
+ ! %o3 >= 0
+ sub %o3, %o5, %o3
+ b 2f
+ add %o2, 1, %o2
+ 1: ! %o3 < 0
+ add %o3, %o5, %o3
+ sub %o2, 1, %o2
+ 2:
+ Lend_single_divloop:
+ subcc %g7, 1, %g7
+ bge Lsingle_divloop
+ tst %o3
+ b,a Lend_regular_divide
+
+Lnot_really_big:
+1:
+ sll %o5, 4, %o5
+ cmp %o5, %o3
+ bleu 1b
+ addcc %o4, 1, %o4
+ be Lgot_result
+ sub %o4, 1, %o4
+
+ tst %o3 ! set up for initial iteration
+Ldivloop:
+ sll %o2, 4, %o2
+ ! depth 1, accumulated bits 0
+ bl L.1.16
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 2, accumulated bits 1
+ bl L.2.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 3
+ bl L.3.19
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 7
+ bl L.4.23
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2+1), %o2
+
+L.4.23:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2-1), %o2
+
+
+L.3.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 5
+ bl L.4.21
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2+1), %o2
+
+L.4.21:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2-1), %o2
+
+
+
+L.2.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 1
+ bl L.3.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 3
+ bl L.4.19
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2+1), %o2
+
+L.4.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2-1), %o2
+
+
+L.3.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 1
+ bl L.4.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2+1), %o2
+
+L.4.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2-1), %o2
+
+
+
+
+L.1.16:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 2, accumulated bits -1
+ bl L.2.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -1
+ bl L.3.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -1
+ bl L.4.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2+1), %o2
+
+L.4.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2-1), %o2
+
+
+L.3.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -3
+ bl L.4.13
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2+1), %o2
+
+L.4.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2-1), %o2
+
+
+
+L.2.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -3
+ bl L.3.13
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -5
+ bl L.4.11
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2+1), %o2
+
+L.4.11:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2-1), %o2
+
+
+L.3.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -7
+ bl L.4.9
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2+1), %o2
+
+L.4.9:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2-1), %o2
+
+
+
+
+ 9:
+Lend_regular_divide:
+ subcc %o4, 1, %o4
+ bge Ldivloop
+ tst %o3
+ bl,a Lgot_result
+ ! non-restoring fixup here (one instruction only!)
+ sub %o2, 1, %o2
+
+
+Lgot_result:
+
+ retl
+ mov %o2, %o0
diff --git a/sysdeps/sparc/sparc32/udiv_qrnnd.S b/sysdeps/sparc/sparc32/udiv_qrnnd.S
new file mode 100644
index 0000000000..5b67f874c3
--- /dev/null
+++ b/sysdeps/sparc/sparc32/udiv_qrnnd.S
@@ -0,0 +1,165 @@
+! SPARC __udiv_qrnnd division support, used from longlong.h.
+
+! Copyright (C) 1993, 1994, 1997 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+! Added PIC support - May/96, Miguel de Icaza
+
+! INPUT PARAMETERS
+! rem_ptr i0
+! n1 i1
+! n0 i2
+! d i3
+
+#include "sysdep.h"
+#undef ret /* Kludge for glibc */
+
+ .text
+ .align 8
+
+ .type two_to_32,@object
+two_to_32:
+ .double 0r4294967296
+ .size two_to_32,8
+
+ .type two_to_31,@object
+two_to_31:
+ .double 0r2147483648
+ .size two_to_31,8
+
+ .align 4
+ .global __udiv_qrnnd
+ .type __udiv_qrnnd,@function
+ENTRY(__udiv_qrnnd)
+ !#PROLOGUE# 0
+ save %sp,-104,%sp
+ !#PROLOGUE# 1
+ st %i1,[%fp-8]
+ ld [%fp-8],%f10
+#ifdef PIC
+.Lbase: call 1f
+ fitod %f10,%f4
+1: ldd [%o7-(.Lbase-two_to_32)],%f8
+#else
+ sethi %hi(two_to_32),%o7
+ fitod %f10,%f4
+ ldd [%o7+%lo(two_to_32)],%f8
+#endif
+ cmp %i1,0
+ bge L248
+ mov %i0,%i5
+ faddd %f4,%f8,%f4
+.L248:
+ st %i2,[%fp-8]
+ ld [%fp-8],%f10
+ fmuld %f4,%f8,%f6
+ cmp %i2,0
+ bge L249
+ fitod %f10,%f2
+ faddd %f2,%f8,%f2
+.L249:
+ st %i3,[%fp-8]
+ faddd %f6,%f2,%f2
+ ld [%fp-8],%f10
+ cmp %i3,0
+ bge L250
+ fitod %f10,%f4
+ faddd %f4,%f8,%f4
+.L250:
+ fdivd %f2,%f4,%f2
+#ifdef PIC
+ ldd [%o7-(.Lbase-two_to_31)],%f4
+#else
+ sethi %hi(two_to_31),%o7
+ ldd [%o7+%lo(two_to_31)],%f4
+#endif
+ fcmped %f2,%f4
+ nop
+ fbge,a L251
+ fsubd %f2,%f4,%f2
+ fdtoi %f2,%f2
+ st %f2,[%fp-8]
+ b L252
+ ld [%fp-8],%i4
+.L251:
+ fdtoi %f2,%f2
+ st %f2,[%fp-8]
+ ld [%fp-8],%i4
+ sethi %hi(-2147483648),%g2
+ xor %i4,%g2,%i4
+.L252:
+ wr %g0,%i4,%y
+ sra %i3,31,%g2
+ and %i4,%g2,%g2
+ andcc %g0,0,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,0,%g1
+ add %g1,%g2,%i0
+ rd %y,%g3
+ subcc %i2,%g3,%o7
+ subxcc %i1,%i0,%g0
+ be L253
+ cmp %o7,%i3
+
+ add %i4,-1,%i0
+ add %o7,%i3,%o7
+ st %o7,[%i5]
+ ret
+ restore
+.L253:
+ blu L246
+ mov %i4,%i0
+ add %i4,1,%i0
+ sub %o7,%i3,%o7
+.L246:
+ st %o7,[%i5]
+ ret
+ restore
+
+ .size __udiv_qrnnd, .-__udiv_qrnnd
diff --git a/sysdeps/sparc/sparc32/umul.S b/sysdeps/sparc/sparc32/umul.S
new file mode 100644
index 0000000000..7a26c295cb
--- /dev/null
+++ b/sysdeps/sparc/sparc32/umul.S
@@ -0,0 +1,153 @@
+/*
+ * Unsigned multiply. Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the
+ * upper 32 bits of the 64-bit product).
+ *
+ * This code optimizes short (less than 13-bit) multiplies. Short
+ * multiplies require 25 instruction cycles, and long ones require
+ * 45 instruction cycles.
+ *
+ * On return, overflow has occurred (%o1 is not zero) if and only if
+ * the Z condition code is clear, allowing, e.g., the following:
+ *
+ * call .umul
+ * nop
+ * bnz overflow (or tnz)
+ */
+
+#include "DEFS.h"
+FUNC(.umul)
+ or %o0, %o1, %o4
+ mov %o0, %y ! multiplier -> Y
+ andncc %o4, 0xfff, %g0 ! test bits 12..31 of *both* args
+ be Lmul_shortway ! if zero, can do it the short way
+ andcc %g0, %g0, %o4 ! zero the partial product and clear N and V
+
+ /*
+ * Long multiply. 32 steps, followed by a final shift step.
+ */
+ mulscc %o4, %o1, %o4 ! 1
+ mulscc %o4, %o1, %o4 ! 2
+ mulscc %o4, %o1, %o4 ! 3
+ mulscc %o4, %o1, %o4 ! 4
+ mulscc %o4, %o1, %o4 ! 5
+ mulscc %o4, %o1, %o4 ! 6
+ mulscc %o4, %o1, %o4 ! 7
+ mulscc %o4, %o1, %o4 ! 8
+ mulscc %o4, %o1, %o4 ! 9
+ mulscc %o4, %o1, %o4 ! 10
+ mulscc %o4, %o1, %o4 ! 11
+ mulscc %o4, %o1, %o4 ! 12
+ mulscc %o4, %o1, %o4 ! 13
+ mulscc %o4, %o1, %o4 ! 14
+ mulscc %o4, %o1, %o4 ! 15
+ mulscc %o4, %o1, %o4 ! 16
+ mulscc %o4, %o1, %o4 ! 17
+ mulscc %o4, %o1, %o4 ! 18
+ mulscc %o4, %o1, %o4 ! 19
+ mulscc %o4, %o1, %o4 ! 20
+ mulscc %o4, %o1, %o4 ! 21
+ mulscc %o4, %o1, %o4 ! 22
+ mulscc %o4, %o1, %o4 ! 23
+ mulscc %o4, %o1, %o4 ! 24
+ mulscc %o4, %o1, %o4 ! 25
+ mulscc %o4, %o1, %o4 ! 26
+ mulscc %o4, %o1, %o4 ! 27
+ mulscc %o4, %o1, %o4 ! 28
+ mulscc %o4, %o1, %o4 ! 29
+ mulscc %o4, %o1, %o4 ! 30
+ mulscc %o4, %o1, %o4 ! 31
+ mulscc %o4, %o1, %o4 ! 32
+ mulscc %o4, %g0, %o4 ! final shift
+
+
+ /*
+ * Normally, with the shift-and-add approach, if both numbers are
+ * positive you get the correct result. With 32-bit two's-complement
+ * numbers, -x is represented as
+ *
+ * x 32
+ * ( 2 - ------ ) mod 2 * 2
+ * 32
+ * 2
+ *
+ * (the `mod 2' subtracts 1 from 1.bbbb). To avoid lots of 2^32s,
+ * we can treat this as if the radix point were just to the left
+ * of the sign bit (multiply by 2^32), and get
+ *
+ * -x = (2 - x) mod 2
+ *
+ * Then, ignoring the `mod 2's for convenience:
+ *
+ * x * y = xy
+ * -x * y = 2y - xy
+ * x * -y = 2x - xy
+ * -x * -y = 4 - 2x - 2y + xy
+ *
+ * For signed multiplies, we subtract (x << 32) from the partial
+ * product to fix this problem for negative multipliers (see mul.s).
+ * Because of the way the shift into the partial product is calculated
+ * (N xor V), this term is automatically removed for the multiplicand,
+ * so we don't have to adjust.
+ *
+ * But for unsigned multiplies, the high order bit wasn't a sign bit,
+ * and the correction is wrong. So for unsigned multiplies where the
+ * high order bit is one, we end up with xy - (y << 32). To fix it
+ * we add y << 32.
+ */
+#if 0
+ tst %o1
+ bl,a 1f ! if %o1 < 0 (high order bit = 1),
+ add %o4, %o0, %o4 ! %o4 += %o0 (add y to upper half)
+1: rd %y, %o0 ! get lower half of product
+ retl
+ addcc %o4, %g0, %o1 ! put upper half in place and set Z for %o1==0
+#else
+ /* Faster code from tege@sics.se. */
+ sra %o1, 31, %o2 ! make mask from sign bit
+ and %o0, %o2, %o2 ! %o2 = 0 or %o0, depending on sign of %o1
+ rd %y, %o0 ! get lower half of product
+ retl
+ addcc %o4, %o2, %o1 ! add compensation and put upper half in place
+#endif
+
+Lmul_shortway:
+ /*
+ * Short multiply. 12 steps, followed by a final shift step.
+ * The resulting bits are off by 12 and (32-12) = 20 bit positions,
+ * but there is no problem with %o0 being negative (unlike above),
+ * and overflow is impossible (the answer is at most 24 bits long).
+ */
+ mulscc %o4, %o1, %o4 ! 1
+ mulscc %o4, %o1, %o4 ! 2
+ mulscc %o4, %o1, %o4 ! 3
+ mulscc %o4, %o1, %o4 ! 4
+ mulscc %o4, %o1, %o4 ! 5
+ mulscc %o4, %o1, %o4 ! 6
+ mulscc %o4, %o1, %o4 ! 7
+ mulscc %o4, %o1, %o4 ! 8
+ mulscc %o4, %o1, %o4 ! 9
+ mulscc %o4, %o1, %o4 ! 10
+ mulscc %o4, %o1, %o4 ! 11
+ mulscc %o4, %o1, %o4 ! 12
+ mulscc %o4, %g0, %o4 ! final shift
+
+ /*
+ * %o4 has 20 of the bits that should be in the result; %y has
+ * the bottom 12 (as %y's top 12). That is:
+ *
+ * %o4 %y
+ * +----------------+----------------+
+ * | -12- | -20- | -12- | -20- |
+ * +------(---------+------)---------+
+ * -----result-----
+ *
+ * The 12 bits of %o4 left of the `result' area are all zero;
+ * in fact, all top 20 bits of %o4 are zero.
+ */
+
+ rd %y, %o5
+ sll %o4, 12, %o0 ! shift middle bits left 12
+ srl %o5, 20, %o5 ! shift low bits right 20
+ or %o5, %o0, %o0
+ retl
+ addcc %g0, %g0, %o1 ! %o1 = zero, and set Z
diff --git a/sysdeps/sparc/sparc32/urem.S b/sysdeps/sparc/sparc32/urem.S
new file mode 100644
index 0000000000..e72c33dd63
--- /dev/null
+++ b/sysdeps/sparc/sparc32/urem.S
@@ -0,0 +1,352 @@
+ /* This file is generated from divrem.m4; DO NOT EDIT! */
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ * .urem name of function to generate
+ * rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1
+ * false false=true => signed; false=false => unsigned
+ *
+ * Algorithm parameters:
+ * N how many bits per iteration we try to get (4)
+ * WORDSIZE total number of bits (32)
+ *
+ * Derived constants:
+ * TOPBITS number of bits in the top decade of a number
+ *
+ * Important variables:
+ * Q the partial quotient under development (initially 0)
+ * R the remainder so far, initially the dividend
+ * ITER number of main division loop iterations required;
+ * equal to ceil(log2(quotient) / N). Note that this
+ * is the log base (2^N) of the quotient.
+ * V the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ * Current estimate for non-large dividend is
+ * ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ * A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ * different path, as the upper bits of the quotient must be developed
+ * one bit at a time.
+ */
+
+
+
+#include "sysdep.h"
+#ifdef __linux__
+#include <asm/traps.h>
+#else
+#ifdef __svr4__
+#include <sys/trap.h>
+#else
+#include <machine/trap.h>
+#endif
+#endif
+
+ENTRY(.urem)
+
+ ! Ready to divide. Compute size of quotient; scale comparand.
+ orcc %o1, %g0, %o5
+ bne 1f
+ mov %o0, %o3
+
+ ! Divide by zero trap. If it returns, return 0 (about as
+ ! wrong as possible, but that is what SunOS does...).
+ ta ST_DIV0
+ retl
+ clr %o0
+
+1:
+ cmp %o3, %o5 ! if %o1 exceeds %o0, done
+ blu Lgot_result ! (and algorithm fails otherwise)
+ clr %o2
+ sethi %hi(1 << (32 - 4 - 1)), %g1
+ cmp %o3, %g1
+ blu Lnot_really_big
+ clr %o4
+
+ ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
+ ! as our usual N-at-a-shot divide step will cause overflow and havoc.
+ ! The number of bits in the result here is N*ITER+SC, where SC <= N.
+ ! Compute ITER in an unorthodox manner: know we need to shift V into
+ ! the top decade: so do not even bother to compare to R.
+ 1:
+ cmp %o5, %g1
+ bgeu 3f
+ mov 1, %g7
+ sll %o5, 4, %o5
+ b 1b
+ add %o4, 1, %o4
+
+ ! Now compute %g7.
+ 2: addcc %o5, %o5, %o5
+ bcc Lnot_too_big
+ add %g7, 1, %g7
+
+ ! We get here if the %o1 overflowed while shifting.
+ ! This means that %o3 has the high-order bit set.
+ ! Restore %o5 and subtract from %o3.
+ sll %g1, 4, %g1 ! high order bit
+ srl %o5, 1, %o5 ! rest of %o5
+ add %o5, %g1, %o5
+ b Ldo_single_div
+ sub %g7, 1, %g7
+
+ Lnot_too_big:
+ 3: cmp %o5, %o3
+ blu 2b
+ nop
+ be Ldo_single_div
+ nop
+ /* NB: these are commented out in the V8-Sparc manual as well */
+ /* (I do not understand this) */
+ ! %o5 > %o3: went too far: back up 1 step
+ ! srl %o5, 1, %o5
+ ! dec %g7
+ ! do single-bit divide steps
+ !
+ ! We have to be careful here. We know that %o3 >= %o5, so we can do the
+ ! first divide step without thinking. BUT, the others are conditional,
+ ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
+ ! order bit set in the first step, just falling into the regular
+ ! division loop will mess up the first time around.
+ ! So we unroll slightly...
+ Ldo_single_div:
+ subcc %g7, 1, %g7
+ bl Lend_regular_divide
+ nop
+ sub %o3, %o5, %o3
+ mov 1, %o2
+ b Lend_single_divloop
+ nop
+ Lsingle_divloop:
+ sll %o2, 1, %o2
+ bl 1f
+ srl %o5, 1, %o5
+ ! %o3 >= 0
+ sub %o3, %o5, %o3
+ b 2f
+ add %o2, 1, %o2
+ 1: ! %o3 < 0
+ add %o3, %o5, %o3
+ sub %o2, 1, %o2
+ 2:
+ Lend_single_divloop:
+ subcc %g7, 1, %g7
+ bge Lsingle_divloop
+ tst %o3
+ b,a Lend_regular_divide
+
+Lnot_really_big:
+1:
+ sll %o5, 4, %o5
+ cmp %o5, %o3
+ bleu 1b
+ addcc %o4, 1, %o4
+ be Lgot_result
+ sub %o4, 1, %o4
+
+ tst %o3 ! set up for initial iteration
+Ldivloop:
+ sll %o2, 4, %o2
+ ! depth 1, accumulated bits 0
+ bl L.1.16
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 2, accumulated bits 1
+ bl L.2.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 3
+ bl L.3.19
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 7
+ bl L.4.23
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2+1), %o2
+
+L.4.23:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2-1), %o2
+
+
+L.3.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 5
+ bl L.4.21
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2+1), %o2
+
+L.4.21:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2-1), %o2
+
+
+
+L.2.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 1
+ bl L.3.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 3
+ bl L.4.19
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2+1), %o2
+
+L.4.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2-1), %o2
+
+
+L.3.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 1
+ bl L.4.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2+1), %o2
+
+L.4.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2-1), %o2
+
+
+
+
+L.1.16:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 2, accumulated bits -1
+ bl L.2.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -1
+ bl L.3.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -1
+ bl L.4.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2+1), %o2
+
+L.4.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2-1), %o2
+
+
+L.3.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -3
+ bl L.4.13
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2+1), %o2
+
+L.4.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2-1), %o2
+
+
+
+L.2.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -3
+ bl L.3.13
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -5
+ bl L.4.11
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2+1), %o2
+
+L.4.11:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2-1), %o2
+
+
+L.3.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -7
+ bl L.4.9
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2+1), %o2
+
+L.4.9:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2-1), %o2
+
+
+
+
+ 9:
+Lend_regular_divide:
+ subcc %o4, 1, %o4
+ bge Ldivloop
+ tst %o3
+ bl,a Lgot_result
+ ! non-restoring fixup here (one instruction only!)
+ add %o3, %o1, %o3
+
+
+Lgot_result:
+
+ retl
+ mov %o3, %o0