From b9de2dde539ff09325818fb65391accd2c4a1c79 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 10 Jan 2013 16:15:32 -0800 Subject: Optimize sparc 32-bit V9 GMP multiply routines. * sysdeps/sparc/sparc32/sparcv9/mul_1.S: Properly optimize for 32-bit sparc V9 rather than using V8 code. * sysdeps/sparc/sparc32/sparcv9/addmul_1.S: Likewise. * sysdeps/sparc/sparc32/sparcv9/submul_1.S: Likewise. --- sysdeps/sparc/sparc32/sparcv9/addmul_1.S | 72 ++++++++++++++++++++++++++++++- sysdeps/sparc/sparc32/sparcv9/mul_1.S | 65 +++++++++++++++++++++++++++- sysdeps/sparc/sparc32/sparcv9/submul_1.S | 73 +++++++++++++++++++++++++++++++- 3 files changed, 207 insertions(+), 3 deletions(-) (limited to 'sysdeps/sparc') diff --git a/sysdeps/sparc/sparc32/sparcv9/addmul_1.S b/sysdeps/sparc/sparc32/sparcv9/addmul_1.S index 563bfb1c0a..875a80898e 100644 --- a/sysdeps/sparc/sparc32/sparcv9/addmul_1.S +++ b/sysdeps/sparc/sparc32/sparcv9/addmul_1.S @@ -1 +1,71 @@ -#include +! SPARC v9 32-bit mpn_addmul_1. +! +! Copyright 2010-2013 Free Software Foundation, Inc. +! +! This file is part of the GNU MP Library. +! +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published +! by the Free Software Foundation; either version 3 of the License, or (at +! your option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +! License for more details. +! +! You should have received a copy of the GNU Lesser General Public License +! along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. + +! INPUT PARAMETERS +! res_ptr %i0 +! s1_ptr %i1 +! size %i2 +! s2_limb %i3 + +#include + +ENTRY(__mpn_addmul_1) + save %sp, -96, %sp + srl %i2, 0, %o4 + srl %i3, 0, %g1 + subcc %o4, 1, %o4 + be .Lfinal_one + clr %o5 + +.Ltop: + lduw [%i1+0], %l0 + lduw [%i0+0], %l2 + lduw [%i1+4], %l1 + lduw [%i0+4], %l3 + mulx %l0, %g1, %g3 + add %i1, 8, %i1 + mulx %l1, %g1, %o3 + sub %o4, 2, %o4 + add %i0, 8, %i0 + add %l2, %g3, %g3 + add %o5, %g3, %g3 + stw %g3, [%i0-8] + srlx %g3, 32, %o5 + add %l3, %o3, %o3 + add %o5, %o3, %o3 + stw %o3, [%i0-4] + brgz %o4, .Ltop + srlx %o3, 32, %o5 + + brlz,pt %o4, .Ldone + nop + +.Lfinal_one: + lduw [%i1+0], %l0 + lduw [%i0+0], %l2 + mulx %l0, %g1, %g3 + add %l2, %g3, %g3 + add %o5, %g3, %g3 + stw %g3, [%i0+0] + srlx %g3, 32, %o5 + +.Ldone: + jmpl %i7 + 8, %g0 + restore %o5, 0, %o0 +END(__mpn_addmul_1) diff --git a/sysdeps/sparc/sparc32/sparcv9/mul_1.S b/sysdeps/sparc/sparc32/sparcv9/mul_1.S index 42284eada6..af51ee47e6 100644 --- a/sysdeps/sparc/sparc32/sparcv9/mul_1.S +++ b/sysdeps/sparc/sparc32/sparcv9/mul_1.S @@ -1 +1,64 @@ -#include +! SPARC v9 32-bit mpn_mul_1. +! +! Copyright 2010-2013 Free Software Foundation, Inc. +! +! This file is part of the GNU MP Library. +! +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published +! by the Free Software Foundation; either version 3 of the License, or (at +! your option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +! License for more details. +! +! You should have received a copy of the GNU Lesser General Public License +! along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. + +! INPUT PARAMETERS +! res_ptr %o0 +! s1_ptr %o1 +! size %o2 +! s2_limb %o3 + +#include + +ENTRY(__mpn_mul_1) + srl %o2, 0, %o2 + srl %o3, 0, %o3 + subcc %o2, 1, %o2 + be .Lfinal_one + clr %o5 + +.Ltop: + lduw [%o1+0], %g1 + lduw [%o1+4], %g2 + mulx %g1, %o3, %g3 + add %o1, 8, %o1 + mulx %g2, %o3, %o4 + sub %o2, 2, %o2 + add %o0, 8, %o0 + add %o5, %g3, %g3 + stw %g3, [%o0-8] + srlx %g3, 32, %o5 + add %o5, %o4, %o4 + stw %o4, [%o0-4] + brgz %o2, .Ltop + srlx %o4, 32, %o5 + + brlz,pt %o2, .Ldone + nop + +.Lfinal_one: + lduw [%o1+0], %g1 + mulx %g1, %o3, %g3 + add %o5, %g3, %g3 + stw %g3, [%o0+0] + srlx %g3, 32, %o5 + +.Ldone: + retl + mov %o5, %o0 +END(__mpn_mul_1) diff --git a/sysdeps/sparc/sparc32/sparcv9/submul_1.S b/sysdeps/sparc/sparc32/sparcv9/submul_1.S index de69533f63..9c89feb5d9 100644 --- a/sysdeps/sparc/sparc32/sparcv9/submul_1.S +++ b/sysdeps/sparc/sparc32/sparcv9/submul_1.S @@ -1 +1,72 @@ -#include +! SPARC v9 32-bit mpn_submul_1. +! +! Copyright 2010-2013 Free Software Foundation, Inc. +! +! This file is part of the GNU MP Library. +! +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published +! by the Free Software Foundation; either version 3 of the License, or (at +! your option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +! License for more details. +! +! You should have received a copy of the GNU Lesser General Public License +! along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. + +! INPUT PARAMETERS +! res_ptr %i0 +! s1_ptr %i1 +! size %i2 +! s2_limb %i3 + +#include + +ENTRY(__mpn_submul_1) + save %sp, -96, %sp + srl %i2, 0, %o4 + srl %i3, 0, %g1 + subcc %o4, 1, %o4 + be .Lfinal_one + subcc %g0, 0, %o5 + +.Ltop: + lduw [%i1+0], %l0 + lduw [%i0+0], %l2 + lduw [%i1+4], %l1 + lduw [%i0+4], %l3 + mulx %l0, %g1, %g3 + add %i1, 8, %i1 + mulx %l1, %g1, %o3 + sub %o4, 2, %o4 + add %i0, 8, %i0 + addx %o5, %g3, %g3 + srlx %g3, 32, %o5 + subcc %l2, %g3, %g3 + stw %g3, [%i0-8] + addx %o5, %o3, %o3 + srlx %o3, 32, %o5 + subcc %l3, %o3, %o3 + brgz %o4, .Ltop + stw %o3, [%i0-4] + + brlz,pt %o4, .Ldone + nop + +.Lfinal_one: + lduw [%i1+0], %l0 + lduw [%i0+0], %l2 + mulx %l0, %g1, %g3 + addx %o5, %g3, %g3 + srlx %g3, 32, %o5 + subcc %l2, %g3, %g3 + stw %g3, [%i0+0] + +.Ldone: + addx %o5, 0, %o5 + jmpl %i7 + 8, %g0 + restore %o5, 0, %o0 +END(__mpn_submul_1) -- cgit v1.2.3