From 4a22fa60cd42eba5ab1931547be33f7764ef6f73 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 2 Mar 2006 09:06:20 +0000 Subject: Updated to fedora-glibc-20060302T0855 --- sysdeps/ia64/memccpy.S | 55 ++- sysdeps/mach/hurd/Subdirs | 10 +- sysdeps/rs6000/add_n.s | 81 ---- sysdeps/rs6000/addmul_1.s | 123 ------ sysdeps/rs6000/ffs.c | 42 -- sysdeps/rs6000/lshift.s | 59 --- sysdeps/rs6000/memcopy.h | 86 ---- sysdeps/rs6000/mul_1.s | 110 ----- sysdeps/rs6000/rshift.s | 57 --- sysdeps/rs6000/sub_n.s | 82 ---- sysdeps/rs6000/submul_1.s | 128 ------ sysdeps/sparc/fpu/fraiseexcpt.c | 34 +- sysdeps/sparc/sparc32/fpu/libm-test-ulps | 9 + sysdeps/sparc/sparc32/sparcv9v/memcpy.S | 2 + sysdeps/sparc/sparc32/sparcv9v/memset.S | 2 + sysdeps/sparc/sparc64/fpu/libm-test-ulps | 9 + sysdeps/sparc/sparc64/sparcv9v/memcpy.S | 593 ++++++++++++++++++++++++++ sysdeps/sparc/sparc64/sparcv9v/memset.S | 127 ++++++ sysdeps/unix/sysv/linux/i386/fxstatat.c | 2 +- sysdeps/unix/sysv/linux/s390/s390-64/sysdep.h | 2 +- 20 files changed, 826 insertions(+), 787 deletions(-) delete mode 100644 sysdeps/rs6000/add_n.s delete mode 100644 sysdeps/rs6000/addmul_1.s delete mode 100644 sysdeps/rs6000/ffs.c delete mode 100644 sysdeps/rs6000/lshift.s delete mode 100644 sysdeps/rs6000/memcopy.h delete mode 100644 sysdeps/rs6000/mul_1.s delete mode 100644 sysdeps/rs6000/rshift.s delete mode 100644 sysdeps/rs6000/sub_n.s delete mode 100644 sysdeps/rs6000/submul_1.s create mode 100644 sysdeps/sparc/sparc32/sparcv9v/memcpy.S create mode 100644 sysdeps/sparc/sparc32/sparcv9v/memset.S create mode 100644 sysdeps/sparc/sparc64/sparcv9v/memcpy.S create mode 100644 sysdeps/sparc/sparc64/sparcv9v/memset.S (limited to 'sysdeps') diff --git a/sysdeps/ia64/memccpy.S b/sysdeps/ia64/memccpy.S index 53c43c512b..dd638d43c8 100644 --- a/sysdeps/ia64/memccpy.S +++ b/sysdeps/ia64/memccpy.S @@ -1,6 +1,6 @@ /* Optimized version of the memccpy() function. This file is part of the GNU C Library. - Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc. + Copyright (C) 2000,2001,2003,2006 Free Software Foundation, Inc. Contributed by Dan Pop . The GNU C Library is free software; you can redistribute it and/or @@ -183,27 +183,64 @@ ENTRY(memccpy) br.ret.sptk.many b0 .recovery1: - adds src = -(MEMLAT + 6 + 1) * 8, asrc +#if MEMLAT != 6 +# error "MEMLAT must be 6!" +#endif + adds src = -8, asrc mov loopcnt = ar.lc - mov tmp = ar.ec ;; + mov tmp = ar.ec + ;; +(p[0]) adds src = -8, src + ;; +(p[1]) adds src = -8, src sub sh1 = (MEMLAT + 6 + 1), tmp - shr.u sh2 = sh2, 3 - ;; + ;; +(p[2]) adds src = -8, src + ;; +(p[3]) adds src = -8, src shl loopcnt = loopcnt, 3 - sub src = src, sh2 + ;; +(p[4]) adds src = -8, src + ;; +(p[5]) adds src = -8, src shl sh1 = sh1, 3 + ;; +(p[6]) adds src = -8, src + ;; +(p[7]) adds src = -8, src shl tmp = tmp, 3 ;; +(p[8]) adds src = -8, src + ;; +(p[9]) adds src = -8, src + shr.u sh2 = sh2, 3 + ;; +(p[10]) adds src = -8, src + ;; +(p[11]) adds src = -8, src add len = len, loopcnt - add src = sh1, src ;; + ;; + sub src = src, sh2 + ;; add len = tmp, len -.back1: + add src = sh1, src br.cond.sptk .cpyfew .recovery2: - add tmp = -(MEMLAT + 3) * 8, src +#if MEMLAT != 6 +# error "MEMLAT must be 6!" +#endif + add tmp = -8, src (p7) br.cond.spnt .gotit ;; +(p[0]) add tmp = -8, tmp ;; +(p[1]) add tmp = -8, tmp ;; +(p[2]) add tmp = -8, tmp ;; +(p[3]) add tmp = -8, tmp ;; +(p[4]) add tmp = -8, tmp ;; +(p[5]) add tmp = -8, tmp ;; +(p[6]) add tmp = -8, tmp ;; +(p[7]) add tmp = -8, tmp ;; ld8 r[MEMLAT+2] = [tmp] ;; xor pos0[1] = r[MEMLAT+2], charx8 ;; czx1.r pos0[1] = pos0[1] ;; diff --git a/sysdeps/mach/hurd/Subdirs b/sysdeps/mach/hurd/Subdirs index 16b8348437..7a7757582a 100644 --- a/sysdeps/mach/hurd/Subdirs +++ b/sysdeps/mach/hurd/Subdirs @@ -1 +1,9 @@ -hurd +# This file says that the hurd subdirectory should appear before all others. +# The mach and hurd subdirectories have many generated header files which +# much of the rest of the library depends on, so it is best to build them +# first (and mach before hurd, at that). The before-compile additions in +# sysdeps/{mach,hurd}/Makefile should make it reliably work for these files +# not to exist when making in other directories, but it will be slower that +# way with more somewhat expensive `make' invocations. + +first hurd diff --git a/sysdeps/rs6000/add_n.s b/sysdeps/rs6000/add_n.s deleted file mode 100644 index 216874e7a4..0000000000 --- a/sysdeps/rs6000/add_n.s +++ /dev/null @@ -1,81 +0,0 @@ -# IBM POWER __mpn_add_n -- Add two limb vectors of equal, non-zero length. - -# Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc. - -# This file is part of the GNU MP Library. - -# The GNU MP Library is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 2.1 of the License, or (at your -# option) any later version. - -# The GNU MP Library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. - -# You should have received a copy of the GNU Lesser General Public License -# along with the GNU MP Library; see the file COPYING.LIB. If not, write to -# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -# MA 02111-1307, USA. - - -# INPUT PARAMETERS -# res_ptr r3 -# s1_ptr r4 -# s2_ptr r5 -# size r6 - - .toc - .extern __mpn_add_n[DS] - .extern .__mpn_add_n -.csect [PR] - .align 2 - .globl __mpn_add_n - .globl .__mpn_add_n - .csect __mpn_add_n[DS] -__mpn_add_n: - .long .__mpn_add_n, TOC[tc0], 0 - .csect [PR] -.__mpn_add_n: - andil. 10,6,1 # odd or even number of limbs? - l 8,0(4) # load least significant s1 limb - l 0,0(5) # load least significant s2 limb - cal 3,-4(3) # offset res_ptr, it's updated before it's used - sri 10,6,1 # count for unrolled loop - a 7,0,8 # add least significant limbs, set cy - mtctr 10 # copy count into CTR - beq 0,Leven # branch if even # of limbs (# of limbs >= 2) - -# We have an odd # of limbs. Add the first limbs separately. - cmpi 1,10,0 # is count for unrolled loop zero? - bne 1,L1 # branch if not - st 7,4(3) - aze 3,10 # use the fact that r10 is zero... - br # return - -# We added least significant limbs. Now reload the next limbs to enter loop. -L1: lu 8,4(4) # load s1 limb and update s1_ptr - lu 0,4(5) # load s2 limb and update s2_ptr - stu 7,4(3) - ae 7,0,8 # add limbs, set cy -Leven: lu 9,4(4) # load s1 limb and update s1_ptr - lu 10,4(5) # load s2 limb and update s2_ptr - bdz Lend # If done, skip loop - -Loop: lu 8,4(4) # load s1 limb and update s1_ptr - lu 0,4(5) # load s2 limb and update s2_ptr - ae 11,9,10 # add previous limbs with cy, set cy - stu 7,4(3) # - lu 9,4(4) # load s1 limb and update s1_ptr - lu 10,4(5) # load s2 limb and update s2_ptr - ae 7,0,8 # add previous limbs with cy, set cy - stu 11,4(3) # - bdn Loop # decrement CTR and loop back - -Lend: ae 11,9,10 # add limbs with cy, set cy - st 7,4(3) # - st 11,8(3) # - lil 3,0 # load cy into ... - aze 3,3 # ... return value register - br diff --git a/sysdeps/rs6000/addmul_1.s b/sysdeps/rs6000/addmul_1.s deleted file mode 100644 index 7cd743cede..0000000000 --- a/sysdeps/rs6000/addmul_1.s +++ /dev/null @@ -1,123 +0,0 @@ -# IBM POWER __mpn_addmul_1 -- Multiply a limb vector with a limb and add -# the result to a second limb vector. - -# Copyright (C) 1992, 1994 Free Software Foundation, Inc. - -# This file is part of the GNU MP Library. - -# The GNU MP Library is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 2.1 of the License, or (at your -# option) any later version. - -# The GNU MP Library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. - -# You should have received a copy of the GNU Lesser General Public License -# along with the GNU MP Library; see the file COPYING.LIB. If not, write to -# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -# MA 02111-1307, USA. - - -# INPUT PARAMETERS -# res_ptr r3 -# s1_ptr r4 -# size r5 -# s2_limb r6 - -# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To -# obtain that operation, we have to use the 32x32->64 signed multiplication -# instruction, and add the appropriate compensation to the high limb of the -# result. We add the multiplicand if the multiplier has its most significant -# bit set, and we add the multiplier if the multiplicand has its most -# significant bit set. We need to preserve the carry flag between each -# iteration, so we have to compute the compensation carefully (the natural, -# srai+and doesn't work). Since the POWER architecture has a branch unit -# we can branch in zero cycles, so that's how we perform the additions. - - .toc - .csect .__mpn_addmul_1[PR] - .align 2 - .globl __mpn_addmul_1 - .globl .__mpn_addmul_1 - .csect __mpn_addmul_1[DS] -__mpn_addmul_1: - .long .__mpn_addmul_1[PR], TOC[tc0], 0 - .csect .__mpn_addmul_1[PR] -.__mpn_addmul_1: - - cal 3,-4(3) - l 0,0(4) - cmpi 0,6,0 - mtctr 5 - mul 9,0,6 - srai 7,0,31 - and 7,7,6 - mfmq 8 - cax 9,9,7 - l 7,4(3) - a 8,8,7 # add res_limb - blt Lneg -Lpos: bdz Lend - -Lploop: lu 0,4(4) - stu 8,4(3) - cmpi 0,0,0 - mul 10,0,6 - mfmq 0 - ae 8,0,9 # low limb + old_cy_limb + old cy - l 7,4(3) - aze 10,10 # propagate cy to new cy_limb - a 8,8,7 # add res_limb - bge Lp0 - cax 10,10,6 # adjust high limb for negative limb from s1 -Lp0: bdz Lend0 - lu 0,4(4) - stu 8,4(3) - cmpi 0,0,0 - mul 9,0,6 - mfmq 0 - ae 8,0,10 - l 7,4(3) - aze 9,9 - a 8,8,7 - bge Lp1 - cax 9,9,6 # adjust high limb for negative limb from s1 -Lp1: bdn Lploop - - b Lend - -Lneg: cax 9,9,0 - bdz Lend -Lnloop: lu 0,4(4) - stu 8,4(3) - cmpi 0,0,0 - mul 10,0,6 - mfmq 7 - ae 8,7,9 - l 7,4(3) - ae 10,10,0 # propagate cy to new cy_limb - a 8,8,7 # add res_limb - bge Ln0 - cax 10,10,6 # adjust high limb for negative limb from s1 -Ln0: bdz Lend0 - lu 0,4(4) - stu 8,4(3) - cmpi 0,0,0 - mul 9,0,6 - mfmq 7 - ae 8,7,10 - l 7,4(3) - ae 9,9,0 # propagate cy to new cy_limb - a 8,8,7 # add res_limb - bge Ln1 - cax 9,9,6 # adjust high limb for negative limb from s1 -Ln1: bdn Lnloop - b Lend - -Lend0: cal 9,0(10) -Lend: st 8,4(3) - aze 3,9 - br diff --git a/sysdeps/rs6000/ffs.c b/sysdeps/rs6000/ffs.c deleted file mode 100644 index 619412cb50..0000000000 --- a/sysdeps/rs6000/ffs.c +++ /dev/null @@ -1,42 +0,0 @@ -/* ffs -- find first set bit in a word, counted from least significant end. - For IBM rs6000. - Copyright (C) 1991, 1992, 1997, 2004, 2005 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Torbjorn Granlund (tege@sics.se). - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#include - -#undef ffs - -#ifdef __GNUC__ - -int -__ffs (x) - int x; -{ - int cnt; - - asm ("cntlz %0,%1" : "=r" (cnt) : "r" (x & -x)); - return 32 - cnt; -} -weak_alias (__ffs, ffs) -libc_hidden_builtin_def (ffs) - -#else -#include -#endif diff --git a/sysdeps/rs6000/lshift.s b/sysdeps/rs6000/lshift.s deleted file mode 100644 index 8ccba7407e..0000000000 --- a/sysdeps/rs6000/lshift.s +++ /dev/null @@ -1,59 +0,0 @@ -# IBM POWER __mpn_lshift -- - -# Copyright (C) 1992, 1994 Free Software Foundation, Inc. - -# This file is part of the GNU MP Library. - -# The GNU MP Library is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 2.1 of the License, or (at your -# option) any later version. - -# The GNU MP Library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. - -# You should have received a copy of the GNU Lesser General Public License -# along with the GNU MP Library; see the file COPYING.LIB. If not, write to -# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -# MA 02111-1307, USA. - - -# INPUT PARAMETERS -# res_ptr r3 -# s_ptr r4 -# size r5 -# cnt r6 - - .toc - .extern __mpn_lshift[DS] - .extern .__mpn_lshift -.csect [PR] - .align 2 - .globl __mpn_lshift - .globl .__mpn_lshift - .csect __mpn_lshift[DS] -__mpn_lshift: - .long .__mpn_lshift, TOC[tc0], 0 - .csect [PR] -.__mpn_lshift: - sli 0,5,2 - cax 9,3,0 - cax 4,4,0 - sfi 8,6,32 - mtctr 5 # put limb count in CTR loop register - lu 0,-4(4) # read most significant limb - sre 3,0,8 # compute carry out limb, and init MQ register - bdz Lend2 # if just one limb, skip loop - lu 0,-4(4) # read 2:nd most significant limb - sreq 7,0,8 # compute most significant limb of result - bdz Lend # if just two limb, skip loop -Loop: lu 0,-4(4) # load next lower limb - stu 7,-4(9) # store previous result during read latency - sreq 7,0,8 # compute result limb - bdn Loop # loop back until CTR is zero -Lend: stu 7,-4(9) # store 2:nd least significant limb -Lend2: sle 7,0,6 # compute least significant limb - st 7,-4(9) # store it" \ - br diff --git a/sysdeps/rs6000/memcopy.h b/sysdeps/rs6000/memcopy.h deleted file mode 100644 index 8bdb6e9766..0000000000 --- a/sysdeps/rs6000/memcopy.h +++ /dev/null @@ -1,86 +0,0 @@ -/* Copyright (C) 1991, 1997 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#include - -#undef OP_T_THRES -#define OP_T_THRES 32 - -#undef BYTE_COPY_FWD -#define BYTE_COPY_FWD(dst_bp, src_bp, nbytes) \ - do \ - { \ - size_t __nbytes = nbytes; \ - asm volatile("mtspr 1,%2\n" \ - "lsx 6,0,%1\n" \ - "stsx 6,0,%0" : /* No outputs. */ : \ - "b" (dst_bp), "b" (src_bp), "r" (__nbytes) : \ - "6", "7", "8", "9", "10", "11", "12", "13"); \ - dst_bp += __nbytes; \ - src_bp += __nbytes; \ - } while (0) - -#undef BYTE_COPY_BWD -#define BYTE_COPY_BWD(dst_ep, src_ep, nbytes) \ - do \ - { \ - size_t __nbytes = (nbytes); \ - dst_ep -= __nbytes; \ - src_ep -= __nbytes; \ - asm volatile("mtspr 1,%2\n" \ - "lsx 6,0,%1\n" \ - "stsx 6,0,%0" : /* No outputs. */ : \ - "b" (dst_ep), "b" (src_ep), "r" (__nbytes) : \ - "6", "7", "8", "9", "10", "11", "12", "13"); \ - } while (0) - -#undef WORD_COPY_FWD -#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes) \ - do \ - { \ - size_t __nblocks = (nbytes) / 32; \ - if (__nblocks != 0) \ - asm volatile("mtctr %4\n" \ - "lsi 6,%1,32\n" \ - "ai %1,%1,32\n" \ - "stsi 6,%0,32\n" \ - "ai %0,%0,32\n" \ - "bdn $-16" : \ - "=b" (dst_bp), "=b" (src_bp) : \ - "0" (dst_bp), "1" (src_bp), "r" (__nblocks) : \ - "6", "7", "8", "9", "10", "11", "12", "13"); \ - (nbytes_left) = (nbytes) % 32; \ - } while (0) - -#undef WORD_COPY_BWD -#define WORD_COPY_BWD(dst_ep, src_ep, nbytes_left, nbytes) \ - do \ - { \ - size_t __nblocks = (nbytes) / 32; \ - if (__nblocks != 0) \ - asm volatile("mtctr %4\n" \ - "ai %1,%1,-32\n" \ - "lsi 6,%1,32\n" \ - "ai %0,%0,-32\n" \ - "stsi 6,%0,32\n" \ - "bdn $-16" : \ - "=b" (dst_ep), "=b" (src_ep) : \ - "0" (dst_ep), "1" (src_ep), "r" (__nblocks) : \ - "6", "7", "8", "9", "10", "11", "12", "13"); \ - (nbytes_left) = (nbytes) % 32; \ - } while (0) diff --git a/sysdeps/rs6000/mul_1.s b/sysdeps/rs6000/mul_1.s deleted file mode 100644 index c0feef4b72..0000000000 --- a/sysdeps/rs6000/mul_1.s +++ /dev/null @@ -1,110 +0,0 @@ -# IBM POWER __mpn_mul_1 -- Multiply a limb vector with a limb and store -# the result in a second limb vector. - -# Copyright (C) 1992, 1994 Free Software Foundation, Inc. - -# This file is part of the GNU MP Library. - -# The GNU MP Library is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 2.1 of the License, or (at your -# option) any later version. - -# The GNU MP Library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. - -# You should have received a copy of the GNU Lesser General Public License -# along with the GNU MP Library; see the file COPYING.LIB. If not, write to -# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -# MA 02111-1307, USA. - - -# INPUT PARAMETERS -# res_ptr r3 -# s1_ptr r4 -# size r5 -# s2_limb r6 - -# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To -# obtain that operation, we have to use the 32x32->64 signed multiplication -# instruction, and add the appropriate compensation to the high limb of the -# result. We add the multiplicand if the multiplier has its most significant -# bit set, and we add the multiplier if the multiplicand has its most -# significant bit set. We need to preserve the carry flag between each -# iteration, so we have to compute the compensation carefully (the natural, -# srai+and doesn't work). Since the POWER architecture has a branch unit -# we can branch in zero cycles, so that's how we perform the additions. - - .toc - .csect .__mpn_mul_1[PR] - .align 2 - .globl __mpn_mul_1 - .globl .__mpn_mul_1 - .csect __mpn_mul_1[DS] -__mpn_mul_1: - .long .__mpn_mul_1[PR], TOC[tc0], 0 - .csect .__mpn_mul_1[PR] -.__mpn_mul_1: - - cal 3,-4(3) - l 0,0(4) - cmpi 0,6,0 - mtctr 5 - mul 9,0,6 - srai 7,0,31 - and 7,7,6 - mfmq 8 - ai 0,0,0 # reset carry - cax 9,9,7 - blt Lneg -Lpos: bdz Lend -Lploop: lu 0,4(4) - stu 8,4(3) - cmpi 0,0,0 - mul 10,0,6 - mfmq 0 - ae 8,0,9 - bge Lp0 - cax 10,10,6 # adjust high limb for negative limb from s1 -Lp0: bdz Lend0 - lu 0,4(4) - stu 8,4(3) - cmpi 0,0,0 - mul 9,0,6 - mfmq 0 - ae 8,0,10 - bge Lp1 - cax 9,9,6 # adjust high limb for negative limb from s1 -Lp1: bdn Lploop - b Lend - -Lneg: cax 9,9,0 - bdz Lend -Lnloop: lu 0,4(4) - stu 8,4(3) - cmpi 0,0,0 - mul 10,0,6 - cax 10,10,0 # adjust high limb for negative s2_limb - mfmq 0 - ae 8,0,9 - bge Ln0 - cax 10,10,6 # adjust high limb for negative limb from s1 -Ln0: bdz Lend0 - lu 0,4(4) - stu 8,4(3) - cmpi 0,0,0 - mul 9,0,6 - cax 9,9,0 # adjust high limb for negative s2_limb - mfmq 0 - ae 8,0,10 - bge Ln1 - cax 9,9,6 # adjust high limb for negative limb from s1 -Ln1: bdn Lnloop - b Lend - -Lend0: cal 9,0(10) -Lend: st 8,4(3) - aze 3,9 - br diff --git a/sysdeps/rs6000/rshift.s b/sysdeps/rs6000/rshift.s deleted file mode 100644 index 145218fabd..0000000000 --- a/sysdeps/rs6000/rshift.s +++ /dev/null @@ -1,57 +0,0 @@ -# IBM POWER __mpn_rshift -- - -# Copyright (C) 1992, 1994 Free Software Foundation, Inc. - -# This file is part of the GNU MP Library. - -# The GNU MP Library is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 2.1 of the License, or (at your -# option) any later version. - -# The GNU MP Library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. - -# You should have received a copy of the GNU Lesser General Public License -# along with the GNU MP Library; see the file COPYING.LIB. If not, write to -# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -# MA 02111-1307, USA. - - -# INPUT PARAMETERS -# res_ptr r3 -# s_ptr r4 -# size r5 -# cnt r6 - - .toc - .extern __mpn_rshift[DS] - .extern .__mpn_rshift -.csect [PR] - .align 2 - .globl __mpn_rshift - .globl .__mpn_rshift - .csect __mpn_rshift[DS] -__mpn_rshift: - .long .__mpn_rshift, TOC[tc0], 0 - .csect [PR] -.__mpn_rshift: - sfi 8,6,32 - mtctr 5 # put limb count in CTR loop register - l 0,0(4) # read least significant limb - ai 9,3,-4 # adjust res_ptr since it's offset in the stu:s - sle 3,0,8 # compute carry limb, and init MQ register - bdz Lend2 # if just one limb, skip loop - lu 0,4(4) # read 2:nd least significant limb - sleq 7,0,8 # compute least significant limb of result - bdz Lend # if just two limb, skip loop -Loop: lu 0,4(4) # load next higher limb - stu 7,4(9) # store previous result during read latency - sleq 7,0,8 # compute result limb - bdn Loop # loop back until CTR is zero -Lend: stu 7,4(9) # store 2:nd most significant limb -Lend2: sre 7,0,6 # compute most significant limb - st 7,4(9) # store it" \ - br diff --git a/sysdeps/rs6000/sub_n.s b/sysdeps/rs6000/sub_n.s deleted file mode 100644 index d931870935..0000000000 --- a/sysdeps/rs6000/sub_n.s +++ /dev/null @@ -1,82 +0,0 @@ -# IBM POWER __mpn_sub_n -- Subtract two limb vectors of equal, non-zero length. - -# Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc. - -# This file is part of the GNU MP Library. - -# The GNU MP Library is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 2.1 of the License, or (at your -# option) any later version. - -# The GNU MP Library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. - -# You should have received a copy of the GNU Lesser General Public License -# along with the GNU MP Library; see the file COPYING.LIB. If not, write to -# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -# MA 02111-1307, USA. - - -# INPUT PARAMETERS -# res_ptr r3 -# s1_ptr r4 -# s2_ptr r5 -# size r6 - - .toc - .extern __mpn_sub_n[DS] - .extern .__mpn_sub_n -.csect [PR] - .align 2 - .globl __mpn_sub_n - .globl .__mpn_sub_n - .csect __mpn_sub_n[DS] -__mpn_sub_n: - .long .__mpn_sub_n, TOC[tc0], 0 - .csect [PR] -.__mpn_sub_n: - andil. 10,6,1 # odd or even number of limbs? - l 8,0(4) # load least significant s1 limb - l 0,0(5) # load least significant s2 limb - cal 3,-4(3) # offset res_ptr, it's updated before it's used - sri 10,6,1 # count for unrolled loop - sf 7,0,8 # subtract least significant limbs, set cy - mtctr 10 # copy count into CTR - beq 0,Leven # branch if even # of limbs (# of limbs >= 2) - -# We have an odd # of limbs. Add the first limbs separately. - cmpi 1,10,0 # is count for unrolled loop zero? - bne 1,L1 # branch if not - st 7,4(3) - sfe 3,0,0 # load !cy into ... - sfi 3,3,0 # ... return value register - br # return - -# We added least significant limbs. Now reload the next limbs to enter loop. -L1: lu 8,4(4) # load s1 limb and update s1_ptr - lu 0,4(5) # load s2 limb and update s2_ptr - stu 7,4(3) - sfe 7,0,8 # subtract limbs, set cy -Leven: lu 9,4(4) # load s1 limb and update s1_ptr - lu 10,4(5) # load s2 limb and update s2_ptr - bdz Lend # If done, skip loop - -Loop: lu 8,4(4) # load s1 limb and update s1_ptr - lu 0,4(5) # load s2 limb and update s2_ptr - sfe 11,10,9 # subtract previous limbs with cy, set cy - stu 7,4(3) # - lu 9,4(4) # load s1 limb and update s1_ptr - lu 10,4(5) # load s2 limb and update s2_ptr - sfe 7,0,8 # subtract previous limbs with cy, set cy - stu 11,4(3) # - bdn Loop # decrement CTR and loop back - -Lend: sfe 11,10,9 # subtract limbs with cy, set cy - st 7,4(3) # - st 11,8(3) # - sfe 3,0,0 # load !cy into ... - sfi 3,3,0 # ... return value register - br diff --git a/sysdeps/rs6000/submul_1.s b/sysdeps/rs6000/submul_1.s deleted file mode 100644 index 41095ab001..0000000000 --- a/sysdeps/rs6000/submul_1.s +++ /dev/null @@ -1,128 +0,0 @@ -# IBM POWER __mpn_submul_1 -- Multiply a limb vector with a limb and subtract -# the result from a second limb vector. - -# Copyright (C) 1992, 1994 Free Software Foundation, Inc. - -# This file is part of the GNU MP Library. - -# The GNU MP Library is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 2.1 of the License, or (at your -# option) any later version. - -# The GNU MP Library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. - -# You should have received a copy of the GNU Lesser General Public License -# along with the GNU MP Library; see the file COPYING.LIB. If not, write to -# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -# MA 02111-1307, USA. - - -# INPUT PARAMETERS -# res_ptr r3 -# s1_ptr r4 -# size r5 -# s2_limb r6 - -# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To -# obtain that operation, we have to use the 32x32->64 signed multiplication -# instruction, and add the appropriate compensation to the high limb of the -# result. We add the multiplicand if the multiplier has its most significant -# bit set, and we add the multiplier if the multiplicand has its most -# significant bit set. We need to preserve the carry flag between each -# iteration, so we have to compute the compensation carefully (the natural, -# srai+and doesn't work). Since the POWER architecture has a branch unit -# we can branch in zero cycles, so that's how we perform the additions. - - .toc - .csect .__mpn_submul_1[PR] - .align 2 - .globl __mpn_submul_1 - .globl .__mpn_submul_1 - .csect __mpn_submul_1[DS] -__mpn_submul_1: - .long .__mpn_submul_1[PR], TOC[tc0], 0 - .csect .__mpn_submul_1[PR] -.__mpn_submul_1: - - cal 3,-4(3) - l 0,0(4) - cmpi 0,6,0 - mtctr 5 - mul 9,0,6 - srai 7,0,31 - and 7,7,6 - mfmq 11 - cax 9,9,7 - l 7,4(3) - sf 8,11,7 # add res_limb - a 11,8,11 # invert cy (r11 is junk) - blt Lneg -Lpos: bdz Lend - -Lploop: lu 0,4(4) - stu 8,4(3) - cmpi 0,0,0 - mul 10,0,6 - mfmq 0 - ae 11,0,9 # low limb + old_cy_limb + old cy - l 7,4(3) - aze 10,10 # propagate cy to new cy_limb - sf 8,11,7 # add res_limb - a 11,8,11 # invert cy (r11 is junk) - bge Lp0 - cax 10,10,6 # adjust high limb for negative limb from s1 -Lp0: bdz Lend0 - lu 0,4(4) - stu 8,4(3) - cmpi 0,0,0 - mul 9,0,6 - mfmq 0 - ae 11,0,10 - l 7,4(3) - aze 9,9 - sf 8,11,7 - a 11,8,11 # invert cy (r11 is junk) - bge Lp1 - cax 9,9,6 # adjust high limb for negative limb from s1 -Lp1: bdn Lploop - - b Lend - -Lneg: cax 9,9,0 - bdz Lend -Lnloop: lu 0,4(4) - stu 8,4(3) - cmpi 0,0,0 - mul 10,0,6 - mfmq 7 - ae 11,7,9 - l 7,4(3) - ae 10,10,0 # propagate cy to new cy_limb - sf 8,11,7 # add res_limb - a 11,8,11 # invert cy (r11 is junk) - bge Ln0 - cax 10,10,6 # adjust high limb for negative limb from s1 -Ln0: bdz Lend0 - lu 0,4(4) - stu 8,4(3) - cmpi 0,0,0 - mul 9,0,6 - mfmq 7 - ae 11,7,10 - l 7,4(3) - ae 9,9,0 # propagate cy to new cy_limb - sf 8,11,7 # add res_limb - a 11,8,11 # invert cy (r11 is junk) - bge Ln1 - cax 9,9,6 # adjust high limb for negative limb from s1 -Ln1: bdn Lnloop - b Lend - -Lend0: cal 9,0(10) -Lend: st 8,4(3) - aze 3,9 - br diff --git a/sysdeps/sparc/fpu/fraiseexcpt.c b/sysdeps/sparc/fpu/fraiseexcpt.c index 0d45ec82d2..cbb8be80ec 100644 --- a/sysdeps/sparc/fpu/fraiseexcpt.c +++ b/sysdeps/sparc/fpu/fraiseexcpt.c @@ -25,12 +25,12 @@ int __feraiseexcept (int excepts) { - static volatile double sink; static const struct { double zero, one, max, min, sixteen, pi; } c = { 0.0, 1.0, DBL_MAX, DBL_MIN, 16.0, M_PI }; + double d; /* Raise exceptions represented by EXPECTS. But we must raise only one signal at a time. It is important the if the overflow/underflow @@ -39,24 +39,44 @@ __feraiseexcept (int excepts) /* First: invalid exception. */ if ((FE_INVALID & excepts) != 0) - /* One example of a invalid operation is 0/0. */ - sink = c.zero / c.zero; + { + /* One example of a invalid operation is 0/0. */ + __asm ("" : "=e" (d) : "0" (c.zero)); + d /= c.zero; + __asm __volatile ("" : : "e" (d)); + } /* Next: division by zero. */ if ((FE_DIVBYZERO & excepts) != 0) - sink = c.one / c.zero; + { + __asm ("" : "=e" (d) : "0" (c.one)); + d /= c.zero; + __asm __volatile ("" : : "e" (d)); + } /* Next: overflow. */ if ((FE_OVERFLOW & excepts) != 0) - sink = c.max * c.max; + { + __asm ("" : "=e" (d) : "0" (c.max)); + d *= d; + __asm __volatile ("" : : "e" (d)); + } /* Next: underflow. */ if ((FE_UNDERFLOW & excepts) != 0) - sink = c.min / c.sixteen; + { + __asm ("" : "=e" (d) : "0" (c.min)); + d /= c.sixteen; + __asm __volatile ("" : : "e" (d)); + } /* Last: inexact. */ if ((FE_INEXACT & excepts) != 0) - sink = c.one / c.pi; + { + __asm ("" : "=e" (d) : "0" (c.one)); + d /= c.pi; + __asm __volatile ("" : : "e" (d)); + } /* Success. */ return 0; diff --git a/sysdeps/sparc/sparc32/fpu/libm-test-ulps b/sysdeps/sparc/sparc32/fpu/libm-test-ulps index 40d563971a..ccf53788a6 100644 --- a/sysdeps/sparc/sparc32/fpu/libm-test-ulps +++ b/sysdeps/sparc/sparc32/fpu/libm-test-ulps @@ -465,6 +465,11 @@ ifloat: 2 ildouble: 1 ldouble: 1 +# exp2 +Test "exp2 (10) == 1024": +ildouble: 2 +ldouble: 2 + # expm1 Test "expm1 (0.75) == 1.11700001661267466854536981983709561": double: 1 @@ -1192,6 +1197,10 @@ ifloat: 2 ildouble: 1 ldouble: 1 +Function: "exp2": +ildouble: 2 +ldouble: 2 + Function: "expm1": double: 1 float: 1 diff --git a/sysdeps/sparc/sparc32/sparcv9v/memcpy.S b/sysdeps/sparc/sparc32/sparcv9v/memcpy.S new file mode 100644 index 0000000000..4c05f57bc2 --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv9v/memcpy.S @@ -0,0 +1,2 @@ +#define XCC icc +#include diff --git a/sysdeps/sparc/sparc32/sparcv9v/memset.S b/sysdeps/sparc/sparc32/sparcv9v/memset.S new file mode 100644 index 0000000000..5e46c7489f --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv9v/memset.S @@ -0,0 +1,2 @@ +#define XCC icc +#include diff --git a/sysdeps/sparc/sparc64/fpu/libm-test-ulps b/sysdeps/sparc/sparc64/fpu/libm-test-ulps index 5719a7ca54..db5543e9eb 100644 --- a/sysdeps/sparc/sparc64/fpu/libm-test-ulps +++ b/sysdeps/sparc/sparc64/fpu/libm-test-ulps @@ -465,6 +465,11 @@ ifloat: 2 ildouble: 1 ldouble: 1 +# exp2 +Test "exp2 (10) == 1024": +ildouble: 2 +ldouble: 2 + # expm1 Test "expm1 (0.75) == 1.11700001661267466854536981983709561": double: 1 @@ -1192,6 +1197,10 @@ ifloat: 2 ildouble: 1 ldouble: 1 +Function: "exp2": +ildouble: 2 +ldouble: 2 + Function: "expm1": double: 1 float: 1 diff --git a/sysdeps/sparc/sparc64/sparcv9v/memcpy.S b/sysdeps/sparc/sparc64/sparcv9v/memcpy.S new file mode 100644 index 0000000000..05c837fa25 --- /dev/null +++ b/sysdeps/sparc/sparc64/sparcv9v/memcpy.S @@ -0,0 +1,593 @@ +/* Copy SIZE bytes from SRC to DEST. For SUN4V Niagara. + Copyright (C) 2006 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller (davem@davemloft.net) + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include + +#define ASI_BLK_INIT_QUAD_LDD_P 0xe2 +#define ASI_P 0x80 +#define ASI_PNF 0x82 + +#define LOAD(type,addr,dest) type##a [addr] ASI_P, dest +#define LOAD_TWIN(addr_reg,dest0,dest1) \ + ldda [addr_reg] ASI_BLK_INIT_QUAD_LDD_P, dest0 + +#define STORE(type,src,addr) type src, [addr] +#define STORE_INIT(src,addr) stxa src, [addr] %asi + +#ifndef XCC +#define USE_BPR +#define XCC xcc +#endif + + .register %g2,#scratch + .register %g3,#scratch + .register %g6,#scratch + + .text + .align 32 + +ENTRY(bcopy) + sub %o1, %o0, %o4 + mov %o0, %g4 + cmp %o4, %o2 + mov %o1, %o0 + bgeu,pt %XCC, 100f + mov %g4, %o1 +#ifndef USE_BPR + srl %o2, 0, %o2 +#endif + brnz,pn %o2, 220f + add %o0, %o2, %o0 + retl + nop +END(bcopy) + + .align 32 +ENTRY(memcpy) +100: /* %o0=dst, %o1=src, %o2=len */ + mov %o0, %g5 + cmp %o2, 0 + be,pn %XCC, 85f +218: or %o0, %o1, %o3 + cmp %o2, 16 + blu,a,pn %XCC, 80f + or %o3, %o2, %o3 + + /* 2 blocks (128 bytes) is the minimum we can do the block + * copy with. We need to ensure that we'll iterate at least + * once in the block copy loop. At worst we'll need to align + * the destination to a 64-byte boundary which can chew up + * to (64 - 1) bytes from the length before we perform the + * block copy loop. + */ + cmp %o2, (2 * 64) + blu,pt %XCC, 70f + andcc %o3, 0x7, %g0 + + /* %o0: dst + * %o1: src + * %o2: len (known to be >= 128) + * + * The block copy loops will use %o4/%o5,%g2/%g3 as + * temporaries while copying the data. + */ + + LOAD(prefetch, %o1, #one_read) + wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi + + /* Align destination on 64-byte boundary. */ + andcc %o0, (64 - 1), %o4 + be,pt %XCC, 2f + sub %o4, 64, %o4 + sub %g0, %o4, %o4 ! bytes to align dst + sub %o2, %o4, %o2 +1: subcc %o4, 1, %o4 + LOAD(ldub, %o1, %g1) + STORE(stb, %g1, %o0) + add %o1, 1, %o1 + bne,pt %XCC, 1b + add %o0, 1, %o0 + + /* If the source is on a 16-byte boundary we can do + * the direct block copy loop. If it is 8-byte aligned + * we can do the 16-byte loads offset by -8 bytes and the + * init stores offset by one register. + * + * If the source is not even 8-byte aligned, we need to do + * shifting and masking (basically integer faligndata). + * + * The careful bit with init stores is that if we store + * to any part of the cache line we have to store the whole + * cacheline else we can end up with corrupt L2 cache line + * contents. Since the loop works on 64-bytes of 64-byte + * aligned store data at a time, this is easy to ensure. + */ +2: + andcc %o1, (16 - 1), %o4 + andn %o2, (64 - 1), %g1 ! block copy loop iterator + sub %o2, %g1, %o2 ! final sub-block copy bytes + be,pt %XCC, 50f + cmp %o4, 8 + be,a,pt %XCC, 10f + sub %o1, 0x8, %o1 + + /* Neither 8-byte nor 16-byte aligned, shift and mask. */ + mov %g1, %o4 + and %o1, 0x7, %g1 + sll %g1, 3, %g1 + mov 64, %o3 + andn %o1, 0x7, %o1 + LOAD(ldx, %o1, %g2) + sub %o3, %g1, %o3 + sllx %g2, %g1, %g2 + +#define SWIVEL_ONE_DWORD(SRC, TMP1, TMP2, PRE_VAL, PRE_SHIFT, POST_SHIFT, DST)\ + LOAD(ldx, SRC, TMP1); \ + srlx TMP1, PRE_SHIFT, TMP2; \ + or TMP2, PRE_VAL, TMP2; \ + STORE_INIT(TMP2, DST); \ + sllx TMP1, POST_SHIFT, PRE_VAL; + +1: add %o1, 0x8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x00) + add %o1, 0x8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x08) + add %o1, 0x8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x10) + add %o1, 0x8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x18) + add %o1, 32, %o1 + LOAD(prefetch, %o1, #one_read) + sub %o1, 32 - 8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x20) + add %o1, 8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x28) + add %o1, 8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x30) + add %o1, 8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x38) + subcc %o4, 64, %o4 + bne,pt %XCC, 1b + add %o0, 64, %o0 + +#undef SWIVEL_ONE_DWORD + + srl %g1, 3, %g1 + ba,pt %XCC, 60f + add %o1, %g1, %o1 + +10: /* Destination is 64-byte aligned, source was only 8-byte + * aligned but it has been subtracted by 8 and we perform + * one twin load ahead, then add 8 back into source when + * we finish the loop. + */ + LOAD_TWIN(%o1, %o4, %o5) +1: add %o1, 16, %o1 + LOAD_TWIN(%o1, %g2, %g3) + add %o1, 16 + 32, %o1 + LOAD(prefetch, %o1, #one_read) + sub %o1, 32, %o1 + STORE_INIT(%o5, %o0 + 0x00) ! initializes cache line + STORE_INIT(%g2, %o0 + 0x08) + LOAD_TWIN(%o1, %o4, %o5) + add %o1, 16, %o1 + STORE_INIT(%g3, %o0 + 0x10) + STORE_INIT(%o4, %o0 + 0x18) + LOAD_TWIN(%o1, %g2, %g3) + add %o1, 16, %o1 + STORE_INIT(%o5, %o0 + 0x20) + STORE_INIT(%g2, %o0 + 0x28) + LOAD_TWIN(%o1, %o4, %o5) + STORE_INIT(%g3, %o0 + 0x30) + STORE_INIT(%o4, %o0 + 0x38) + subcc %g1, 64, %g1 + bne,pt %XCC, 1b + add %o0, 64, %o0 + + ba,pt %XCC, 60f + add %o1, 0x8, %o1 + +50: /* Destination is 64-byte aligned, and source is 16-byte + * aligned. + */ +1: LOAD_TWIN(%o1, %o4, %o5) + add %o1, 16, %o1 + LOAD_TWIN(%o1, %g2, %g3) + add %o1, 16 + 32, %o1 + LOAD(prefetch, %o1, #one_read) + sub %o1, 32, %o1 + STORE_INIT(%o4, %o0 + 0x00) ! initializes cache line + STORE_INIT(%o5, %o0 + 0x08) + LOAD_TWIN(%o1, %o4, %o5) + add %o1, 16, %o1 + STORE_INIT(%g2, %o0 + 0x10) + STORE_INIT(%g3, %o0 + 0x18) + LOAD_TWIN(%o1, %g2, %g3) + add %o1, 16, %o1 + STORE_INIT(%o4, %o0 + 0x20) + STORE_INIT(%o5, %o0 + 0x28) + STORE_INIT(%g2, %o0 + 0x30) + STORE_INIT(%g3, %o0 + 0x38) + subcc %g1, 64, %g1 + bne,pt %XCC, 1b + add %o0, 64, %o0 + /* fall through */ + +60: + /* %o2 contains any final bytes still needed to be copied + * over. If anything is left, we copy it one byte at a time. + */ + wr %g0, ASI_PNF, %asi + brz,pt %o2, 85f + sub %o0, %o1, %o3 + ba,a,pt %XCC, 90f + + .align 64 +70: /* 16 < len <= 64 */ + bne,pn %XCC, 75f + sub %o0, %o1, %o3 + +72: + andn %o2, 0xf, %o4 + and %o2, 0xf, %o2 +1: subcc %o4, 0x10, %o4 + LOAD(ldx, %o1, %o5) + add %o1, 0x08, %o1 + LOAD(ldx, %o1, %g1) + sub %o1, 0x08, %o1 + STORE(stx, %o5, %o1 + %o3) + add %o1, 0x8, %o1 + STORE(stx, %g1, %o1 + %o3) + bgu,pt %XCC, 1b + add %o1, 0x8, %o1 +73: andcc %o2, 0x8, %g0 + be,pt %XCC, 1f + nop + sub %o2, 0x8, %o2 + LOAD(ldx, %o1, %o5) + STORE(stx, %o5, %o1 + %o3) + add %o1, 0x8, %o1 +1: andcc %o2, 0x4, %g0 + be,pt %XCC, 1f + nop + sub %o2, 0x4, %o2 + LOAD(lduw, %o1, %o5) + STORE(stw, %o5, %o1 + %o3) + add %o1, 0x4, %o1 +1: cmp %o2, 0 + be,pt %XCC, 85f + nop + ba,pt %XCC, 90f + nop + +75: + andcc %o0, 0x7, %g1 + sub %g1, 0x8, %g1 + be,pn %icc, 2f + sub %g0, %g1, %g1 + sub %o2, %g1, %o2 + +1: subcc %g1, 1, %g1 + LOAD(ldub, %o1, %o5) + STORE(stb, %o5, %o1 + %o3) + bgu,pt %icc, 1b + add %o1, 1, %o1 + +2: add %o1, %o3, %o0 + andcc %o1, 0x7, %g1 + bne,pt %icc, 8f + sll %g1, 3, %g1 + + cmp %o2, 16 + bgeu,pt %icc, 72b + nop + ba,a,pt %XCC, 73b + +8: mov 64, %o3 + andn %o1, 0x7, %o1 + LOAD(ldx, %o1, %g2) + sub %o3, %g1, %o3 + andn %o2, 0x7, %o4 + sllx %g2, %g1, %g2 +1: add %o1, 0x8, %o1 + LOAD(ldx, %o1, %g3) + subcc %o4, 0x8, %o4 + srlx %g3, %o3, %o5 + or %o5, %g2, %o5 + STORE(stx, %o5, %o0) + add %o0, 0x8, %o0 + bgu,pt %icc, 1b + sllx %g3, %g1, %g2 + + srl %g1, 3, %g1 + andcc %o2, 0x7, %o2 + be,pn %icc, 85f + add %o1, %g1, %o1 + ba,pt %XCC, 90f + sub %o0, %o1, %o3 + + .align 64 +80: /* 0 < len <= 16 */ + andcc %o3, 0x3, %g0 + bne,pn %XCC, 90f + sub %o0, %o1, %o3 + +1: + subcc %o2, 4, %o2 + LOAD(lduw, %o1, %g1) + STORE(stw, %g1, %o1 + %o3) + bgu,pt %XCC, 1b + add %o1, 4, %o1 + +85: retl + mov %g5, %o0 + + .align 32 +90: + subcc %o2, 1, %o2 + LOAD(ldub, %o1, %g1) + STORE(stb, %g1, %o1 + %o3) + bgu,pt %XCC, 90b + add %o1, 1, %o1 + retl + mov %g5, %o0 + +END(memcpy) + +#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \ + ldx [%src - offset - 0x20], %t0; \ + ldx [%src - offset - 0x18], %t1; \ + ldx [%src - offset - 0x10], %t2; \ + ldx [%src - offset - 0x08], %t3; \ + stw %t0, [%dst - offset - 0x1c]; \ + srlx %t0, 32, %t0; \ + stw %t0, [%dst - offset - 0x20]; \ + stw %t1, [%dst - offset - 0x14]; \ + srlx %t1, 32, %t1; \ + stw %t1, [%dst - offset - 0x18]; \ + stw %t2, [%dst - offset - 0x0c]; \ + srlx %t2, 32, %t2; \ + stw %t2, [%dst - offset - 0x10]; \ + stw %t3, [%dst - offset - 0x04]; \ + srlx %t3, 32, %t3; \ + stw %t3, [%dst - offset - 0x08]; + +#define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ + ldx [%src - offset - 0x20], %t0; \ + ldx [%src - offset - 0x18], %t1; \ + ldx [%src - offset - 0x10], %t2; \ + ldx [%src - offset - 0x08], %t3; \ + stx %t0, [%dst - offset - 0x20]; \ + stx %t1, [%dst - offset - 0x18]; \ + stx %t2, [%dst - offset - 0x10]; \ + stx %t3, [%dst - offset - 0x08]; \ + ldx [%src - offset - 0x40], %t0; \ + ldx [%src - offset - 0x38], %t1; \ + ldx [%src - offset - 0x30], %t2; \ + ldx [%src - offset - 0x28], %t3; \ + stx %t0, [%dst - offset - 0x40]; \ + stx %t1, [%dst - offset - 0x38]; \ + stx %t2, [%dst - offset - 0x30]; \ + stx %t3, [%dst - offset - 0x28]; + +#define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ + ldx [%src + offset + 0x00], %t0; \ + ldx [%src + offset + 0x08], %t1; \ + stw %t0, [%dst + offset + 0x04]; \ + srlx %t0, 32, %t2; \ + stw %t2, [%dst + offset + 0x00]; \ + stw %t1, [%dst + offset + 0x0c]; \ + srlx %t1, 32, %t3; \ + stw %t3, [%dst + offset + 0x08]; + +#define RMOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \ + ldx [%src + offset + 0x00], %t0; \ + ldx [%src + offset + 0x08], %t1; \ + stx %t0, [%dst + offset + 0x00]; \ + stx %t1, [%dst + offset + 0x08]; + + .align 32 +228: andcc %o2, 1, %g0 + be,pt %icc, 2f+4 +1: ldub [%o1 - 1], %o5 + sub %o1, 1, %o1 + sub %o0, 1, %o0 + subcc %o2, 1, %o2 + be,pn %xcc, 229f + stb %o5, [%o0] +2: ldub [%o1 - 1], %o5 + sub %o0, 2, %o0 + ldub [%o1 - 2], %g5 + sub %o1, 2, %o1 + subcc %o2, 2, %o2 + stb %o5, [%o0 + 1] + bne,pt %xcc, 2b + stb %g5, [%o0] +229: retl + mov %g4, %o0 +out: retl + mov %g5, %o0 + + .align 32 +ENTRY(memmove) + mov %o0, %g5 +#ifndef USE_BPR + srl %o2, 0, %o2 +#endif + brz,pn %o2, out + sub %o0, %o1, %o4 + cmp %o4, %o2 + bgeu,pt %XCC, 218b + mov %o0, %g4 + add %o0, %o2, %o0 +220: add %o1, %o2, %o1 + cmp %o2, 15 + bleu,pn %xcc, 228b + andcc %o0, 7, %g2 + sub %o0, %o1, %g5 + andcc %g5, 3, %o5 + bne,pn %xcc, 232f + andcc %o1, 3, %g0 + be,a,pt %xcc, 236f + andcc %o1, 4, %g0 + andcc %o1, 1, %g0 + be,pn %xcc, 4f + andcc %o1, 2, %g0 + ldub [%o1 - 1], %g2 + sub %o1, 1, %o1 + sub %o0, 1, %o0 + sub %o2, 1, %o2 + be,pn %xcc, 5f + stb %g2, [%o0] +4: lduh [%o1 - 2], %g2 + sub %o1, 2, %o1 + sub %o0, 2, %o0 + sub %o2, 2, %o2 + sth %g2, [%o0] +5: andcc %o1, 4, %g0 +236: be,a,pn %xcc, 2f + andcc %o2, -128, %g6 + lduw [%o1 - 4], %g5 + sub %o1, 4, %o1 + sub %o0, 4, %o0 + sub %o2, 4, %o2 + stw %g5, [%o0] + andcc %o2, -128, %g6 +2: be,pn %xcc, 235f + andcc %o0, 4, %g0 + be,pn %xcc, 282f + 4 +5: RMOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5) + RMOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5) + RMOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5) + RMOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5) + subcc %g6, 128, %g6 + sub %o1, 128, %o1 + bne,pt %xcc, 5b + sub %o0, 128, %o0 +235: andcc %o2, 0x70, %g6 +41: be,pn %xcc, 280f + andcc %o2, 8, %g0 + +279: rd %pc, %o5 + sll %g6, 1, %g5 + sub %o1, %g6, %o1 + sub %o5, %g5, %o5 + jmpl %o5 + %lo(280f - 279b), %g0 + sub %o0, %g6, %o0 + RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5) + RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5) + RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5) + RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5) + RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5) + RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5) + RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5) +280: be,pt %xcc, 281f + andcc %o2, 4, %g0 + ldx [%o1 - 8], %g2 + sub %o0, 8, %o0 + stw %g2, [%o0 + 4] + sub %o1, 8, %o1 + srlx %g2, 32, %g2 + stw %g2, [%o0] +281: be,pt %xcc, 1f + andcc %o2, 2, %g0 + lduw [%o1 - 4], %g2 + sub %o1, 4, %o1 + stw %g2, [%o0 - 4] + sub %o0, 4, %o0 +1: be,pt %xcc, 1f + andcc %o2, 1, %g0 + lduh [%o1 - 2], %g2 + sub %o1, 2, %o1 + sth %g2, [%o0 - 2] + sub %o0, 2, %o0 +1: be,pt %xcc, 211f + nop + ldub [%o1 - 1], %g2 + stb %g2, [%o0 - 1] +211: retl + mov %g4, %o0 + +282: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5) + RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5) + subcc %g6, 128, %g6 + sub %o1, 128, %o1 + bne,pt %xcc, 282b + sub %o0, 128, %o0 + andcc %o2, 0x70, %g6 + be,pn %xcc, 284f + andcc %o2, 8, %g0 + +283: rd %pc, %o5 + sub %o1, %g6, %o1 + sub %o5, %g6, %o5 + jmpl %o5 + %lo(284f - 283b), %g0 + sub %o0, %g6, %o0 + RMOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3) + RMOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3) + RMOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3) + RMOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3) + RMOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3) + RMOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3) + RMOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3) +284: be,pt %xcc, 285f + andcc %o2, 4, %g0 + ldx [%o1 - 8], %g2 + sub %o0, 8, %o0 + sub %o1, 8, %o1 + stx %g2, [%o0] +285: be,pt %xcc, 1f + andcc %o2, 2, %g0 + lduw [%o1 - 4], %g2 + sub %o0, 4, %o0 + sub %o1, 4, %o1 + stw %g2, [%o0] +1: be,pt %xcc, 1f + andcc %o2, 1, %g0 + lduh [%o1 - 2], %g2 + sub %o0, 2, %o0 + sub %o1, 2, %o1 + sth %g2, [%o0] +1: be,pt %xcc, 1f + nop + ldub [%o1 - 1], %g2 + stb %g2, [%o0 - 1] +1: retl + mov %g4, %o0 + +232: ldub [%o1 - 1], %g5 + sub %o1, 1, %o1 + sub %o0, 1, %o0 + subcc %o2, 1, %o2 + bne,pt %xcc, 232b + stb %g5, [%o0] +234: retl + mov %g4, %o0 +END(memmove) + +#ifdef USE_BPR +weak_alias (memcpy, __align_cpy_1) +weak_alias (memcpy, __align_cpy_2) +weak_alias (memcpy, __align_cpy_4) +weak_alias (memcpy, __align_cpy_8) +weak_alias (memcpy, __align_cpy_16) +#endif +libc_hidden_builtin_def (memcpy) +libc_hidden_builtin_def (memmove) diff --git a/sysdeps/sparc/sparc64/sparcv9v/memset.S b/sysdeps/sparc/sparc64/sparcv9v/memset.S new file mode 100644 index 0000000000..7a51ef77dc --- /dev/null +++ b/sysdeps/sparc/sparc64/sparcv9v/memset.S @@ -0,0 +1,127 @@ +/* Set a block of memory to some byte value. For SUN4V Niagara. + Copyright (C) 2006 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller (davem@davemloft.net) + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include + +#define ASI_BLK_INIT_QUAD_LDD_P 0xe2 +#define ASI_P 0x80 +#define ASI_PNF 0x82 + +#ifndef XCC +#define USE_BPR +#define XCC xcc +#endif + + .register %g2,#scratch + + .text + .align 32 + +ENTRY(memset) + /* %o0=buf, %o1=pat, %o2=len */ + and %o1, 0xff, %o3 + mov %o2, %o1 + sllx %o3, 8, %g1 + or %g1, %o3, %o2 + sllx %o2, 16, %g1 + or %g1, %o2, %o2 + sllx %o2, 32, %g1 + ba,pt %XCC, 1f + or %g1, %o2, %o2 + +ENTRY(__bzero) + clr %o2 +1: brz,pn %o1, 90f + mov %o0, %o3 + + wr %g0, ASI_P, %asi + + cmp %o1, 15 + bl,pn %icc, 70f + andcc %o0, 0x7, %g1 + be,pt %XCC, 2f + mov 8, %g2 + sub %g2, %g1, %g1 + sub %o1, %g1, %o1 +1: stba %o2, [%o0 + 0x00] %asi + subcc %g1, 1, %g1 + bne,pt %XCC, 1b + add %o0, 1, %o0 +2: cmp %o1, 128 + bl,pn %icc, 60f + andcc %o0, (64 - 1), %g1 + be,pt %XCC, 40f + mov 64, %g2 + sub %g2, %g1, %g1 + sub %o1, %g1, %o1 +1: stxa %o2, [%o0 + 0x00] %asi + subcc %g1, 8, %g1 + bne,pt %XCC, 1b + add %o0, 8, %o0 + +40: + wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi + andn %o1, (64 - 1), %g1 + sub %o1, %g1, %o1 +50: + stxa %o2, [%o0 + 0x00] %asi + stxa %o2, [%o0 + 0x08] %asi + stxa %o2, [%o0 + 0x10] %asi + stxa %o2, [%o0 + 0x18] %asi + stxa %o2, [%o0 + 0x20] %asi + stxa %o2, [%o0 + 0x28] %asi + stxa %o2, [%o0 + 0x30] %asi + stxa %o2, [%o0 + 0x38] %asi + subcc %g1, 64, %g1 + bne,pt %XCC, 50b + add %o0, 64, %o0 + + wr %g0, ASI_P, %asi + brz,pn %o1, 80f +60: + andncc %o1, 0x7, %g1 + be,pn %XCC, 2f + sub %o1, %g1, %o1 +1: stxa %o2, [%o0 + 0x00] %asi + subcc %g1, 8, %g1 + bne,pt %XCC, 1b + add %o0, 8, %o0 +2: brz,pt %o1, 80f + nop + +70: +1: stba %o2, [%o0 + 0x00] %asi + subcc %o1, 1, %o1 + bne,pt %icc, 1b + add %o0, 1, %o0 + + /* fallthrough */ + +80: + wr %g0, ASI_PNF, %asi + +90: + retl + mov %o3, %o0 +END(__bzero) +END(memset) + +libc_hidden_builtin_def (memset) +weak_alias (__bzero, bzero) diff --git a/sysdeps/unix/sysv/linux/i386/fxstatat.c b/sysdeps/unix/sysv/linux/i386/fxstatat.c index b077435553..94f6e81186 100644 --- a/sysdeps/unix/sysv/linux/i386/fxstatat.c +++ b/sysdeps/unix/sysv/linux/i386/fxstatat.c @@ -172,5 +172,5 @@ libc_hidden_def (__fxstatat) #ifdef XSTAT_IS_XSTAT64 # undef __fxstatat64 strong_alias (__fxstatat, __fxstatat64); -libc_hidden_def (__fxstatat64) +libc_hidden_ver (__fxstatat, __fxstatat64) #endif diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/sysdep.h b/sysdeps/unix/sysv/linux/s390/s390-64/sysdep.h index fc80c9ff86..9ddec8e041 100644 --- a/sysdeps/unix/sysv/linux/s390/s390-64/sysdep.h +++ b/sysdeps/unix/sysv/linux/s390/s390-64/sysdep.h @@ -268,7 +268,7 @@ register unsigned long gpr6 asm ("6") = (unsigned long)(arg5); #define DECLARGS_6(arg1, arg2, arg3, arg4, arg5, arg6) \ DECLARGS_5(arg1, arg2, arg3, arg4, arg5) \ - register unsigned long gpr6 asm ("7") = (unsigned long)(arg6); + register unsigned long gpr7 asm ("7") = (unsigned long)(arg6); #define ASMFMT_0 #define ASMFMT_1 , "0" (gpr2) -- cgit v1.2.3