diff options
author | Jakub Jelinek <jakub@redhat.com> | 2008-04-10 19:21:13 +0000 |
---|---|---|
committer | Jakub Jelinek <jakub@redhat.com> | 2008-04-10 19:21:13 +0000 |
commit | b0c50524f1fb93adbd52e9950f92650e020b0d59 (patch) | |
tree | fd9af5238c7ebb9bb27ffafb3e47be9ed5ca2680 /sysdeps/powerpc/powerpc32 | |
parent | 3a2e541ba348de2bbab7b65328694403e61e5dff (diff) |
Updated to fedora-glibc-20080410T1907
Diffstat (limited to 'sysdeps/powerpc/powerpc32')
-rw-r--r-- | sysdeps/powerpc/powerpc32/fpu/s_isnan.S | 56 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/fpu/s_isnan.c | 7 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.S | 119 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.c | 62 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.S | 111 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.c | 60 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/power4/hp-timing.c | 25 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/power4/hp-timing.h | 152 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S | 62 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/power5/fpu/s_isnanf.S | 46 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt.S | 117 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf.S | 109 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S | 62 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/power6/fpu/s_isnanf.S | 46 |
14 files changed, 905 insertions, 129 deletions
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_isnan.S b/sysdeps/powerpc/powerpc32/fpu/s_isnan.S new file mode 100644 index 0000000000..6e3f396de9 --- /dev/null +++ b/sysdeps/powerpc/powerpc32/fpu/s_isnan.S @@ -0,0 +1,56 @@ +/* isnan(). PowerPC32 version. + Copyright (C) 2008 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnan(x) */ + .machine power4 +EALIGN (__isnan, 4, 0) + mffs fp0 + mtfsb0 4*cr6+lt /* reset_fpscr_bit (FPSCR_VE) */ + fcmpu cr7,fp1,fp1 + mtfsf 255,fp0 + li r3,0 + beqlr+ cr7 /* (x == x) then not a NAN */ + li r3,1 /* else must be a NAN */ + blr + END (__isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isnan, __isnanf) +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#ifndef IS_IN_libm +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif + diff --git a/sysdeps/powerpc/powerpc32/fpu/s_isnan.c b/sysdeps/powerpc/powerpc32/fpu/s_isnan.c deleted file mode 100644 index 397717ba9c..0000000000 --- a/sysdeps/powerpc/powerpc32/fpu/s_isnan.c +++ /dev/null @@ -1,7 +0,0 @@ -#include <sysdeps/powerpc/fpu/s_isnan.c> -#ifndef IS_IN_libm -# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) -compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); -compat_symbol (libc, isnan, isnanl, GLIBC_2_0); -# endif -#endif diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.S b/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.S new file mode 100644 index 0000000000..6aef4e301b --- /dev/null +++ b/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.S @@ -0,0 +1,119 @@ +/* sqrt function. PowerPC32 version. + Copyright (C) 2007 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 1 Franklin Street, Fifth Floor, Boston MA + 02110-1301 USA. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* double [fp1] sqrt (double x [fp1]) + Power4 (ISA V2.0) and above implement sqrt in hardware (not optional). + The fsqrt instruction generates the correct value for all inputs and + sets the appropriate floating point exceptions. Extented checking is + only needed to set errno (via __kernel_standard) if the input value + is negative. + + The fsqrt will set FPCC and FU (Floating Point Unordered or NaN + to indicated that the input value was negative or NaN. Use Move to + Condition Register from FPSCR to copy the FPCC field to cr1. The + branch on summary overflow transfers control to w_sqrt to process + any error conditions. Otherwise we can return the result directly. + + This part of the function is a leaf routine, so no need to stack a + frame or execute prologue/epilogue code. This means it is safe to + transfer directly to w_sqrt as long as the input value (f1) is + preserved. Putting the the sqrt result into f2 (double parameter 2) + allows passing both the input value and sqrt result into the extended + wrapper so there is no need to recompute. + + This tactic avoids the overhead of stacking a frame for the normal + (non-error) case. Until gcc supports prologue shrink-wrapping + this is the best we can do. */ + + .section ".text" + .machine power4 +EALIGN (__sqrt, 5, 0) + fsqrt fp2,fp1 + mcrfs cr1,4 + bso- cr1,.Lw_sqrt + fmr fp1,fp2 + blr + .align 4 +.Lw_sqrt: + mflr r0 + stwu r1,-16(r1) + cfi_adjust_cfa_offset(16) + fmr fp12,fp2 + stw r0,20(r1) + stw r30,8(r1) + cfi_offset(lr,20) + cfi_offset(r30,8) +#ifdef SHARED +# ifdef HAVE_ASM_PPC_REL16 + bcl 20,31,.LCF1 +.LCF1: + mflr r30 + addis r30,r30,_GLOBAL_OFFSET_TABLE_-.LCF1@ha + addi r30,r30,_GLOBAL_OFFSET_TABLE_-.LCF1@l + lwz r9,_LIB_VERSION@got(30) + lwz r0,0(r9) +# else + bl _GLOBAL_OFFSET_TABLE_@local-4 + mflr r30 + lwz r9,_LIB_VERSION@got(30) + lwz r0,0(r9) +# endif +#else + lis r9,_LIB_VERSION@ha + lwz r0,_LIB_VERSION@l(r9) +#endif +/* if (_LIB_VERSION == _IEEE_) return z; */ + cmpwi cr7,r0,-1 + beq- cr7,.L4 +/* if (x != x) return z; !isnan*/ + fcmpu cr7,fp1,fp1 + bne- cr7,.L4 +/* if (x < 0.0) + return __kernel_standard (x, x, 26) */ + fmr fp2,fp1 + fabs fp0,fp1 + li r3,26 + fcmpu cr7,fp1,fp0 + bne- cr7,.L11 +.L4: + lwz r0,20(r1) + fmr fp1,fp12 + lwz r30,8(r1) + addi r1,r1,16 + mtlr r0 + blr +.L11: + bl __kernel_standard@plt + fmr fp12,fp1 + b .L4 + END (__sqrt) + +weak_alias (__sqrt, sqrt) + +#ifdef NO_LONG_DOUBLE +weak_alias (__sqrt, sqrtl) +strong_alias (__sqrt, __sqrtl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __sqrt, sqrtl, GLIBC_2_0) +#endif + diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.c b/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.c deleted file mode 100644 index f59c193934..0000000000 --- a/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.c +++ /dev/null @@ -1,62 +0,0 @@ -/* Double-precision floating point square root wrapper. - Copyright (C) 2004, 2007 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#include <math_ldbl_opt.h> -#include "math.h" -#include "math_private.h" -#include <fenv_libc.h> - -#ifdef __STDC__ -double -__sqrt (double x) /* wrapper sqrt */ -#else -double -__sqrt (x) /* wrapper sqrt */ - double x; -#endif -{ - double z; -/* Power4 (ISA V2.0) and above implement sqrt in hardware. */ - __asm __volatile ( - " fsqrt %0,%1\n" - : "=f" (z) - : "f" (x)); -#ifdef _IEEE_LIBM - return z; -#else - if (__builtin_expect (_LIB_VERSION == _IEEE_, 0)) - return z; - - if (__builtin_expect (x != x, 0)) - return z; - - if (__builtin_expect (x < 0.0, 0)) - return __kernel_standard (x, x, 26); /* sqrt(negative) */ - else - return z; -#endif -} - -weak_alias (__sqrt, sqrt) -#ifdef NO_LONG_DOUBLE - strong_alias (__sqrt, __sqrtl) weak_alias (__sqrt, sqrtl) -#endif -#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) -compat_symbol (libm, __sqrt, sqrtl, GLIBC_2_0); -#endif diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.S b/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.S new file mode 100644 index 0000000000..e5b8b9d565 --- /dev/null +++ b/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.S @@ -0,0 +1,111 @@ +/* sqrtf function. PowerPC32 version. + Copyright (C) 2007 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 1 Franklin Street, Fifth Floor, Boston MA + 02110-1301 USA. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* float [fp1] sqrts (float x [fp1]) + Power4 (ISA V2.0) and above implement sqrt in hardware (not optional). + The fsqrts instruction generates the correct value for all inputs and + sets the appropriate floating point exceptions. Extented checking is + only needed to set errno (via __kernel_standard) if the input value + is negative. + + The fsqrts will set FPCC and FU (Floating Point Unordered or NaN + to indicated that the input value was negative or NaN. Use Move to + Condition Register from FPSCR to copy the FPCC field to cr1. The + branch on summary overflow transfers control to w_sqrt to process + any error conditions. Otherwise we can return the result directly. + + This part of the function is a leaf routine, so no need to stack a + frame or execute prologue/epilogue code. This means it is safe to + transfer directly to w_sqrt as long as the input value (f1) is + preserved. Putting the the sqrt result into f2 (float parameter 2) + allows passing both the input value and sqrt result into the extended + wrapper so there is no need to recompute. + + This tactic avoids the overhead of stacking a frame for the normal + (non-error) case. Until gcc supports prologue shrink-wrapping + this is the best we can do. */ + + .section ".text" + .machine power4 +EALIGN (__sqrtf, 5, 0) + fsqrts fp2,fp1 + mcrfs cr1,4 + bso- cr1,.Lw_sqrtf + fmr fp1,fp2 + blr + .align 4 +.Lw_sqrtf: + mflr r0 + stwu r1,-16(r1) + cfi_adjust_cfa_offset(16) + fmr fp12,fp2 + stw r0,20(r1) + stw r30,8(r1) + cfi_offset(lr,20) + cfi_offset(r30,8) +#ifdef SHARED +# ifdef HAVE_ASM_PPC_REL16 + bcl 20,31,.LCF1 +.LCF1: + mflr r30 + addis r30,r30,_GLOBAL_OFFSET_TABLE_-.LCF1@ha + addi r30,r30,_GLOBAL_OFFSET_TABLE_-.LCF1@l + lwz r9,_LIB_VERSION@got(30) + lwz r0,0(r9) +# else + bl _GLOBAL_OFFSET_TABLE_@local-4 + mflr r30 + lwz r9,_LIB_VERSION@got(30) + lwz r0,0(r9) +# endif +#else + lis r9,_LIB_VERSION@ha + lwz r0,_LIB_VERSION@l(r9) +#endif +/* if (_LIB_VERSION == _IEEE_) return z; */ + cmpwi cr7,r0,-1 + beq- cr7,.L4 +/* if (x != x, 0) return z; !isnan */ + fcmpu cr7,fp1,fp1 + bne- cr7,.L4 +/* if (x < 0.0) + return __kernel_standard (x, x, 126) */ + fmr fp2,fp1 + fabs fp0,fp1 + li r3,126 + fcmpu cr7,1,0 + bne- cr7,.L11 +.L4: + lwz r0,20(r1) + fmr fp1,fp12 + lwz r30,8(r1) + addi r1,r1,16 + mtlr r0 + blr +.L11: + bl __kernel_standard@plt + fmr fp12,fp1 + b .L4 + END (__sqrtf) + +weak_alias (__sqrtf, sqrtf) + diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.c b/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.c deleted file mode 100644 index 4784869f07..0000000000 --- a/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.c +++ /dev/null @@ -1,60 +0,0 @@ -/* Single-precision floating point square root wrapper. - Copyright (C) 2004, 2007 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#include "math.h" -#include "math_private.h" -#include <fenv_libc.h> - -#include <sysdep.h> -#include <ldsodefs.h> - -#ifdef __STDC__ -float -__sqrtf (float x) /* wrapper sqrtf */ -#else -float -__sqrtf (x) /* wrapper sqrtf */ - float x; -#endif -{ -#ifdef _IEEE_LIBM - return __ieee754_sqrtf (x); -#else - float z; -/* Power4 (ISA V2.0) and above implement sqrtf in hardware. */ - __asm __volatile ( - " fsqrts %0,%1\n" - : "=f" (z) - : "f" (x)); - - if (__builtin_expect (_LIB_VERSION == _IEEE_, 0)) - return z; - - if (__builtin_expect (x != x, 0)) - return z; - - if (__builtin_expect (x < 0.0, 0)) - /* sqrtf(negative) */ - return (float) __kernel_standard ((double) x, (double) x, 126); - else - return z; -#endif -} - -weak_alias (__sqrtf, sqrtf) diff --git a/sysdeps/powerpc/powerpc32/power4/hp-timing.c b/sysdeps/powerpc/powerpc32/power4/hp-timing.c new file mode 100644 index 0000000000..332fe8af99 --- /dev/null +++ b/sysdeps/powerpc/powerpc32/power4/hp-timing.c @@ -0,0 +1,25 @@ +/* Support for high precision, low overhead timing functions. + powerpc64 version. + Copyright (C) 2005, 2008 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <hp-timing.h> + +/* We have to define the variable for the overhead. */ +hp_timing_t _dl_hp_timing_overhead; diff --git a/sysdeps/powerpc/powerpc32/power4/hp-timing.h b/sysdeps/powerpc/powerpc32/power4/hp-timing.h new file mode 100644 index 0000000000..5f719dd5a1 --- /dev/null +++ b/sysdeps/powerpc/powerpc32/power4/hp-timing.h @@ -0,0 +1,152 @@ +/* High precision, low overhead timing functions. powerpc64 version. + Copyright (C) 2005, 2008 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef _HP_TIMING_H +#define _HP_TIMING_H 1 + +#include <string.h> +#include <sys/param.h> +#include <stdio-common/_itoa.h> +#include <atomic.h> + +/* The macros defined here use the powerpc 64-bit time base register. + The time base is nominally clocked at 1/8th the CPU clock, but this + can vary. + + The list of macros we need includes the following: + + - HP_TIMING_AVAIL: test for availability. + + - HP_TIMING_INLINE: this macro is non-zero if the functionality is not + implemented using function calls but instead uses some inlined code + which might simply consist of a few assembler instructions. We have to + know this since we might want to use the macros here in places where we + cannot make function calls. + + - hp_timing_t: This is the type for variables used to store the time + values. + + - HP_TIMING_ZERO: clear `hp_timing_t' object. + + - HP_TIMING_NOW: place timestamp for current time in variable given as + parameter. + + - HP_TIMING_DIFF_INIT: do whatever is necessary to be able to use the + HP_TIMING_DIFF macro. + + - HP_TIMING_DIFF: compute difference between two times and store it + in a third. Source and destination might overlap. + + - HP_TIMING_ACCUM: add time difference to another variable. This might + be a bit more complicated to implement for some platforms as the + operation should be thread-safe and 64bit arithmetic on 32bit platforms + is not. + + - HP_TIMING_ACCUM_NT: this is the variant for situations where we know + there are no threads involved. + + - HP_TIMING_PRINT: write decimal representation of the timing value into + the given string. This operation need not be inline even though + HP_TIMING_INLINE is specified. + +*/ + +/* We always assume having the timestamp register. */ +#define HP_TIMING_AVAIL (1) + +/* We indeed have inlined functions. */ +#define HP_TIMING_INLINE (1) + +/* We use 64bit values for the times. */ +typedef unsigned long long int hp_timing_t; + +/* Set timestamp value to zero. */ +#define HP_TIMING_ZERO(Var) (Var) = (0) + +/* That's quite simple. Use the `mftb' instruction. Note that the value + might not be 100% accurate since there might be some more instructions + running in this moment. This could be changed by using a barrier like + 'lwsync' right before the `mftb' instruciton. But we are not interested + in accurate clock cycles here so we don't do this. */ + +#define HP_TIMING_NOW(Var) \ + do { \ + union { long long ll; long ii[2]; } _var; \ + long tmp; \ + __asm__ __volatile__ ( \ + "1: mfspr %0,269;" \ + " mfspr %1,268;" \ + " mfspr %2,269;" \ + " cmpw %0,%2;" \ + " bne 1b;" \ + : "=r" (_var.ii[0]), "=r" (_var.ii[1]) , "=r" (tmp) \ + : : "cr0" \ + ); \ + Var = _var.ll; \ + } while (0) + + +/* Use two 'mftb' instructions in a row to find out how long it takes. + On current POWER4, POWER5, and 970 processors mftb take ~10 cycles. */ +#define HP_TIMING_DIFF_INIT() \ + do { \ + if (GLRO(dl_hp_timing_overhead) == 0) \ + { \ + int __cnt = 5; \ + GLRO(dl_hp_timing_overhead) = ~0ull; \ + do \ + { \ + hp_timing_t __t1, __t2; \ + HP_TIMING_NOW (__t1); \ + HP_TIMING_NOW (__t2); \ + if (__t2 - __t1 < GLRO(dl_hp_timing_overhead)) \ + GLRO(dl_hp_timing_overhead) = __t2 - __t1; \ + } \ + while (--__cnt > 0); \ + } \ + } while (0) + +/* It's simple arithmetic in 64-bit. */ +#define HP_TIMING_DIFF(Diff, Start, End) (Diff) = ((End) - (Start)) + +/* We need to insure that this add is atomic in threaded environments. We use + __arch_atomic_exchange_and_add_64 from atomic.h to get thread safety. */ +#define HP_TIMING_ACCUM(Sum, Diff) \ + do { \ + hp_timing_t __diff = (Diff) - GLRO(dl_hp_timing_overhead); \ + __arch_atomic_exchange_and_add_64 (&(Sum), __diff); \ + } while (0) + +/* No threads, no extra work. */ +#define HP_TIMING_ACCUM_NT(Sum, Diff) (Sum) += (Diff) + +/* Print the time value. */ +#define HP_TIMING_PRINT(Buf, Len, Val) \ + do { \ + char __buf[20]; \ + char *__cp = _itoa (Val, __buf + sizeof (__buf), 10, 0); \ + size_t __len = (Len); \ + char *__dest = (Buf); \ + while (__len-- > 0 && __cp < __buf + sizeof (__buf)) \ + *__dest++ = *__cp++; \ + memcpy (__dest, " ticks", MIN (__len, sizeof (" ticks"))); \ + } while (0) + +#endif /* hp-timing.h */ diff --git a/sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S b/sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S new file mode 100644 index 0000000000..91337483d7 --- /dev/null +++ b/sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S @@ -0,0 +1,62 @@ +/* isnan(). PowerPC32 version. + Copyright (C) 2008 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnan(x) */ + .machine power5 +EALIGN (__isnan, 4, 0) + stwu r1,-32(r1) + cfi_adjust_cfa_offset (32) + ori r1,r1,0 + stfd fp1,24(r1) /* copy FPR to GPR */ + ori r1,r1,0 + lwz r4,24(r1) + lwz r5,28(r1) + lis r0,0x7ff0 /* const long r0 0x7ff00000 00000000 */ + clrlwi r4,r4,1 /* x = fabs(x) */ + cmpw cr7,r4,r0 /* if (fabs(x) =< inf) */ + cmpwi cr6,r5,0 + li r3,0 /* then return 0 */ + addi r1,r1,32 + cfi_adjust_cfa_offset (-32) + bltlr+ cr7 + bgt- cr7,L(NaN) + beqlr+ cr6 +L(NaN): + li r3,1 /* else return 1 */ + blr + END (__isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#ifndef IS_IN_libm +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif + diff --git a/sysdeps/powerpc/powerpc32/power5/fpu/s_isnanf.S b/sysdeps/powerpc/powerpc32/power5/fpu/s_isnanf.S new file mode 100644 index 0000000000..9b1e95f35f --- /dev/null +++ b/sysdeps/powerpc/powerpc32/power5/fpu/s_isnanf.S @@ -0,0 +1,46 @@ +/* isnan(). PowerPC32 version. + Copyright (C) 2008 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnanf(x) */ + .machine power5 +EALIGN (__isnanf, 4, 0) + stwu r1,-32(r1) + cfi_adjust_cfa_offset (32) + stfs fp1,28(r1) /* copy FPR to GPR */ + nop + nop + lwz r4,28(r1) + lis r0,0x7f80 /* const long r0 0x7f800000 */ + clrlwi r4,r4,1 /* x = fabs(x) */ + cmpw cr7,r4,r0 /* if (fabs(x) =< inf) */ + li r3,0 /* then return 0 */ + addi r1,r1,32 + cfi_adjust_cfa_offset (-32) + blelr+ cr7 +L(NaN): + li r3,1 /* else return 1 */ + blr + END (__isnanf) + +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) + diff --git a/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt.S b/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt.S new file mode 100644 index 0000000000..925930bf77 --- /dev/null +++ b/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt.S @@ -0,0 +1,117 @@ +/* sqrt function. PowerPC32 version. + Copyright (C) 2007 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 1 Franklin Street, Fifth Floor, Boston MA + 02110-1301 USA. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* double [fp1] sqrt (double x [fp1]) + Power4 (ISA V2.0) and above implement sqrt in hardware (not optional). + The fsqrt instruction generates the correct value for all inputs and + sets the appropriate floating point exceptions. Extented checking is + only needed to set errno (via __kernel_standard) if the input value + is negative. + + So compare the input value against the absolute value of itself. + This will compare equal unless the value is negative (EDOM) or a NAN, + in which case we branch to the extend wrapper. If equal we can return + the result directly. + + This part of the function looks like a leaf routine, so no need to + stack a frame or execute prologue/epilogue code. It is safe to + branch directly to w_sqrt as long as the input value (f1) is + preserved. Putting the the sqrt result into f2 (float parameter 2) + allows passing both the input value and sqrt result into the extended + wrapper so there is no need to recompute. + + This tactic avoids the overhead of stacking a frame for the normal + (non-error) case. Until gcc supports prologue shrink-wrapping + this is the best we can do. */ + + .section ".text" + .machine power4 +EALIGN (__sqrt, 5, 0) + fabs fp0,fp1 + fsqrt fp2,fp1 + fcmpu cr1,fp0,fp1 + bne- cr1,.Lw_sqrt + fmr fp1,fp2 + blr + .align 4 +.Lw_sqrt: + mflr r0 + stwu r1,-16(r1) + cfi_adjust_cfa_offset(16) + fmr fp12,fp2 + stw r0,20(r1) + stw r30,8(r1) + cfi_offset(lr,20) + cfi_offset(r30,8) +#ifdef SHARED +# ifdef HAVE_ASM_PPC_REL16 + bcl 20,31,.LCF1 +.LCF1: + mflr r30 + addis r30,r30,_GLOBAL_OFFSET_TABLE_-.LCF1@ha + addi r30,r30,_GLOBAL_OFFSET_TABLE_-.LCF1@l + lwz r9,_LIB_VERSION@got(30) + lwz r0,0(r9) +# else + bl _GLOBAL_OFFSET_TABLE_@local-4 + mflr r30 + lwz r9,_LIB_VERSION@got(30) + lwz r0,0(r9) +# endif +#else + lis r9,_LIB_VERSION@ha + lwz r0,_LIB_VERSION@l(r9) +#endif +/* if (_LIB_VERSION == _IEEE_) return z; */ + cmpwi cr7,r0,-1 + beq- cr7,.L4 +/* if (x != x) return z; !isnan*/ + fcmpu cr7,fp1,fp1 + bne- cr7,.L4 +/* if (x < 0.0) + return __kernel_standard (x, x, 26) */ + fmr fp2,fp1 + li r3,26 + bne- cr1,.L11 +.L4: + lwz r0,20(r1) + fmr fp1,fp12 + lwz r30,8(r1) + addi r1,r1,16 + mtlr r0 + blr +.L11: + bl __kernel_standard@plt + fmr fp12,fp1 + b .L4 + END (__sqrt) + +weak_alias (__sqrt, sqrt) + +#ifdef NO_LONG_DOUBLE +weak_alias (__sqrt, sqrtl) +strong_alias (__sqrt, __sqrtl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __sqrt, sqrtl, GLIBC_2_0) +#endif + diff --git a/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf.S b/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf.S new file mode 100644 index 0000000000..891e69c9c0 --- /dev/null +++ b/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf.S @@ -0,0 +1,109 @@ +/* sqrtf function. PowerPC32 version. + Copyright (C) 2007 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 1 Franklin Street, Fifth Floor, Boston MA + 02110-1301 USA. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* float [fp1] sqrts (float x [fp1]) + Power4 (ISA V2.0) and above implement sqrt in hardware (not optional). + The fsqrts instruction generates the correct value for all inputs and + sets the appropriate floating point exceptions. Extented checking is + only needed to set errno (via __kernel_standard) if the input value + is negative. + + So compare the input value against the absolute value of itself. + This will compare equal unless the value is negative (EDOM) or a NAN, + in which case we branch to the extend wrapper. If equal we can return + the result directly. + + This part of the function looks like a leaf routine, so no need to + stack a frame or execute prologue/epilogue code. It is safe to + branch directly to w_sqrt as long as the input value (f1) is + preserved. Putting the the sqrt result into f2 (float parameter 2) + allows passing both the input value and sqrt result into the extended + wrapper so there is no need to recompute. + + This tactic avoids the overhead of stacking a frame for the normal + (non-error) case. Until gcc supports prologue shrink-wrapping + this is the best we can do. */ + + .section ".text" + .machine power4 +EALIGN (__sqrtf, 5, 0) + fabs fp0,fp1 + fsqrts fp2,fp1 + fcmpu cr1,fp0,fp1 + bne- cr1,.Lw_sqrtf + fmr fp1,fp2 + blr + .align 4 +.Lw_sqrtf: + mflr r0 + stwu r1,-16(r1) + cfi_adjust_cfa_offset(16) + fmr fp12,fp2 + stw r0,20(r1) + stw r30,8(r1) + cfi_offset(lr,20) + cfi_offset(r30,8) +#ifdef SHARED +# ifdef HAVE_ASM_PPC_REL16 + bcl 20,31,.LCF1 +.LCF1: + mflr r30 + addis r30,r30,_GLOBAL_OFFSET_TABLE_-.LCF1@ha + addi r30,r30,_GLOBAL_OFFSET_TABLE_-.LCF1@l + lwz r9,_LIB_VERSION@got(30) + lwz r0,0(r9) +# else + bl _GLOBAL_OFFSET_TABLE_@local-4 + mflr r30 + lwz r9,_LIB_VERSION@got(30) + lwz r0,0(r9) +# endif +#else + lis r9,_LIB_VERSION@ha + lwz r0,_LIB_VERSION@l(r9) +#endif +/* if (_LIB_VERSION == _IEEE_) return z; */ + cmpwi cr7,r0,-1 + beq- cr7,.L4 +/* if (x != x, 0) return z; !isnan */ + fcmpu cr7,fp1,fp1 + bne- cr7,.L4 +/* if (x < 0.0) + return __kernel_standard (x, x, 126) */ + fmr fp2,fp1 + li r3,126 + bne- cr1,.L11 +.L4: + lwz r0,20(r1) + fmr fp1,fp12 + lwz r30,8(r1) + addi r1,r1,16 + mtlr r0 + blr +.L11: + bl __kernel_standard@plt + fmr fp12,fp1 + b .L4 + END (__sqrtf) + +weak_alias (__sqrtf, sqrtf) + diff --git a/sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S b/sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S new file mode 100644 index 0000000000..dbaaf1c1a8 --- /dev/null +++ b/sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S @@ -0,0 +1,62 @@ +/* isnan(). PowerPC32 version. + Copyright (C) 2008 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnan(x) */ + .machine power6 +EALIGN (__isnan, 4, 0) + stwu r1,-32(r1) + cfi_adjust_cfa_offset (32) + ori r1,r1,0 + stfd fp1,24(r1) /* copy FPR to GPR */ + ori r1,r1,0 + lwz r4,24(r1) + lwz r5,28(r1) + lis r0,0x7ff0 /* const long r0 0x7ff00000 00000000 */ + clrlwi r4,r4,1 /* x = fabs(x) */ + cmpw cr7,r4,r0 /* if (fabs(x) =< inf) */ + cmpwi cr6,r5,0 + li r3,0 /* then return 0 */ + addi r1,r1,32 + cfi_adjust_cfa_offset (-32) + bltlr+ cr7 + bgt- cr7,L(NaN) + beqlr+ cr6 +L(NaN): + li r3,1 /* else return 1 */ + blr + END (__isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#ifndef IS_IN_libm +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif + diff --git a/sysdeps/powerpc/powerpc32/power6/fpu/s_isnanf.S b/sysdeps/powerpc/powerpc32/power6/fpu/s_isnanf.S new file mode 100644 index 0000000000..12bf473a8d --- /dev/null +++ b/sysdeps/powerpc/powerpc32/power6/fpu/s_isnanf.S @@ -0,0 +1,46 @@ +/* isnanf(). PowerPC32 version. + Copyright (C) 2008 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnanf(x) */ + .machine power6 +EALIGN (__isnanf, 4, 0) + stwu r1,-32(r1) + cfi_adjust_cfa_offset (32) + ori r1,r1,0 + stfs fp1,24(r1) /* copy FPR to GPR */ + ori r1,r1,0 + lwz r4,24(r1) + lis r0,0x7f80 /* const long r0 0x7f800000 */ + clrlwi r4,r4,1 /* x = fabs(x) */ + cmpw cr7,r4,r0 /* if (fabs(x) =< inf) */ + li r3,0 /* then return 0 */ + addi r1,r1,32 + cfi_adjust_cfa_offset (-32) + blelr+ cr7 +L(NaN): + li r3,1 /* else return 1 */ + blr + END (__isnan) + +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) + |