diff options
author | Thomas Schwinge <thomas@codesourcery.com> | 2013-12-20 09:31:50 +0100 |
---|---|---|
committer | Thomas Schwinge <thomas@codesourcery.com> | 2013-12-20 09:31:50 +0100 |
commit | d421cfc9bd0275f1d81bd8757b80ff568c7b2826 (patch) | |
tree | 46b47f0c1b7a64e2697516b627be8464d15ab797 /sysdeps/powerpc/powerpc32/fpu | |
parent | 1cbf52cde38c2c2c2d8ffdd55cd73f864d79abda (diff) | |
parent | b4578bab30f72cddd2cf38abfb39f9c8dc892249 (diff) |
Merge commit 'refs/top-bases/t/no-hp-timing' into t/no-hp-timing
Diffstat (limited to 'sysdeps/powerpc/powerpc32/fpu')
-rw-r--r-- | sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S | 21 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/fpu/__longjmp.S | 11 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/fpu/s_copysign.S | 2 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/fpu/s_copysignl.S | 2 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/fpu/s_isnan.S | 2 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/fpu/s_lrint.S | 4 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/fpu/s_lround.S | 2 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/fpu/s_roundf.S | 3 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/fpu/setjmp-common.S | 83 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/fpu/setjmp.S | 18 |
10 files changed, 80 insertions, 68 deletions
diff --git a/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S b/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S index 9d34cd9165..13611541c2 100644 --- a/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S +++ b/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S @@ -17,6 +17,7 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> +#include <stap-probe.h> #define _ASM #ifdef __NO_VMX__ # include <novmxsetjmp.h> @@ -25,7 +26,7 @@ #endif .machine "altivec" -ENTRY (__longjmp) +ENTRY (__longjmp_symbol) #ifndef __NO_VMX__ # ifdef PIC mflr r6 @@ -43,16 +44,16 @@ ENTRY (__longjmp) # endif mtlr r6 cfi_same_value (lr) - lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r5) + lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r5) # else lwz r5,_dl_hwcap@got(r5) mtlr r6 cfi_same_value (lr) - lwz r5,4(r5) + lwz r5,LOWORD(r5) # endif # else - lis r5,(_dl_hwcap+4)@ha - lwz r5,(_dl_hwcap+4)@l(r5) + lis r5,(_dl_hwcap+LOWORD)@ha + lwz r5,(_dl_hwcap+LOWORD)@l(r5) # endif andis. r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16) beq L(no_vmx) @@ -143,19 +144,22 @@ L(no_vmx): # endif PTR_DEMANGLE2 (r0, r25) #endif + /* longjmp/longjmp_target probe expects longjmp first argument (4@3), + second argument (-4@4), and target address (4@0), respectively. */ + LIBC_PROBE (longjmp, 3, 4@3, -4@4, 4@0) mtlr r0 lwz r21,((JB_GPRS+7)*4)(r3) lfd fp21,((JB_FPRS+7*2)*4)(r3) lwz r22,((JB_GPRS+8)*4)(r3) lfd fp22,((JB_FPRS+8*2)*4)(r3) - lwz r0,(JB_CR*4)(r3) + lwz r5,(JB_CR*4)(r3) lwz r23,((JB_GPRS+9)*4)(r3) lfd fp23,((JB_FPRS+9*2)*4)(r3) lwz r24,((JB_GPRS+10)*4)(r3) lfd fp24,((JB_FPRS+10*2)*4)(r3) lwz r25,((JB_GPRS+11)*4)(r3) lfd fp25,((JB_FPRS+11*2)*4)(r3) - mtcrf 0xFF,r0 + mtcrf 0xFF,r5 lwz r26,((JB_GPRS+12)*4)(r3) lfd fp26,((JB_FPRS+12*2)*4)(r3) lwz r27,((JB_GPRS+13)*4)(r3) @@ -168,6 +172,7 @@ L(no_vmx): lfd fp30,((JB_FPRS+16*2)*4)(r3) lwz r31,((JB_GPRS+17)*4)(r3) lfd fp31,((JB_FPRS+17*2)*4)(r3) + LIBC_PROBE (longjmp_target, 3, 4@3, -4@4, 4@0) mr r3,r4 blr -END (__longjmp) +END (__longjmp_symbol) diff --git a/sysdeps/powerpc/powerpc32/fpu/__longjmp.S b/sysdeps/powerpc/powerpc32/fpu/__longjmp.S index 96e50de378..cbd42be5cb 100644 --- a/sysdeps/powerpc/powerpc32/fpu/__longjmp.S +++ b/sysdeps/powerpc/powerpc32/fpu/__longjmp.S @@ -22,20 +22,21 @@ #if defined NOT_IN_libc /* Build a non-versioned object for rtld-*. */ +# define __longjmp_symbol __longjmp # include "__longjmp-common.S" #else /* !NOT_IN_libc */ /* Build a versioned object for libc. */ -default_symbol_version (__vmx__longjmp,__longjmp,GLIBC_2.3.4); -# define __longjmp __vmx__longjmp +versioned_symbol (libc, __vmx__longjmp, __longjmp, GLIBC_2_3_4); +# define __longjmp_symbol __vmx__longjmp # include "__longjmp-common.S" # if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) # define __NO_VMX__ # undef JB_SIZE -symbol_version (__novmx__longjmp,__longjmp,GLIBC_2.0); -# undef __longjmp -# define __longjmp __novmx__longjmp +compat_symbol (libc, __novmx__longjmp, __longjmp, GLIBC_2_0); +# undef __longjmp_symbol +# define __longjmp_symbol __novmx__longjmp # include "__longjmp-common.S" # endif #endif /* !NOT_IN_libc */ diff --git a/sysdeps/powerpc/powerpc32/fpu/s_copysign.S b/sysdeps/powerpc/powerpc32/fpu/s_copysign.S index 840891f1c3..1da24f492e 100644 --- a/sysdeps/powerpc/powerpc32/fpu/s_copysign.S +++ b/sysdeps/powerpc/powerpc32/fpu/s_copysign.S @@ -29,7 +29,7 @@ ENTRY(__copysign) stwu r1,-16(r1) cfi_adjust_cfa_offset (16) stfd fp2,8(r1) - lwz r3,8(r1) + lwz r3,8+HIWORD(r1) cmpwi r3,0 addi r1,r1,16 cfi_adjust_cfa_offset (-16) diff --git a/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S b/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S index 4ec8389b5d..2ad6de273d 100644 --- a/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S +++ b/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S @@ -30,7 +30,7 @@ ENTRY(__copysignl) fmr fp0,fp1 fabs fp1,fp1 fcmpu cr7,fp0,fp1 - lwz r3,8(r1) + lwz r3,8+HIWORD(r1) cmpwi cr6,r3,0 addi r1,r1,16 cfi_adjust_cfa_offset (-16) diff --git a/sysdeps/powerpc/powerpc32/fpu/s_isnan.S b/sysdeps/powerpc/powerpc32/fpu/s_isnan.S index 98d10daf68..024252a5ca 100644 --- a/sysdeps/powerpc/powerpc32/fpu/s_isnan.S +++ b/sysdeps/powerpc/powerpc32/fpu/s_isnan.S @@ -37,9 +37,11 @@ weak_alias (__isnan, isnan) /* It turns out that the 'double' version will also always work for single-precision. */ +#ifndef __isnan strong_alias (__isnan, __isnanf) hidden_def (__isnanf) weak_alias (__isnanf, isnanf) +#endif #ifdef NO_LONG_DOUBLE strong_alias (__isnan, __isnanl) diff --git a/sysdeps/powerpc/powerpc32/fpu/s_lrint.S b/sysdeps/powerpc/powerpc32/fpu/s_lrint.S index 27881f8cc7..249fda501f 100644 --- a/sysdeps/powerpc/powerpc32/fpu/s_lrint.S +++ b/sysdeps/powerpc/powerpc32/fpu/s_lrint.S @@ -24,10 +24,10 @@ ENTRY (__lrint) stwu r1,-16(r1) fctiw fp13,fp1 stfd fp13,8(r1) - nop /* Insure the following load is in a different dispatch group */ + nop /* Ensure the following load is in a different dispatch group */ nop /* to avoid pipe stall on POWER4&5. */ nop - lwz r3,12(r1) + lwz r3,8+LOWORD(r1) addi r1,r1,16 blr END (__lrint) diff --git a/sysdeps/powerpc/powerpc32/fpu/s_lround.S b/sysdeps/powerpc/powerpc32/fpu/s_lround.S index 92dc3787d6..6309f864b7 100644 --- a/sysdeps/powerpc/powerpc32/fpu/s_lround.S +++ b/sysdeps/powerpc/powerpc32/fpu/s_lround.S @@ -67,7 +67,7 @@ ENTRY (__lround) nop /* Ensure the following load is in a different dispatch */ nop /* group to avoid pipe stall on POWER4&5. */ nop - lwz r3,12(r1) /* Load return as integer. */ + lwz r3,8+LOWORD(r1) /* Load return as integer. */ .Lout: addi r1,r1,16 blr diff --git a/sysdeps/powerpc/powerpc32/fpu/s_roundf.S b/sysdeps/powerpc/powerpc32/fpu/s_roundf.S index 2ed9ca7b40..8cff1563a0 100644 --- a/sysdeps/powerpc/powerpc32/fpu/s_roundf.S +++ b/sysdeps/powerpc/powerpc32/fpu/s_roundf.S @@ -19,7 +19,7 @@ #include <sysdep.h> .section .rodata.cst8,"aM",@progbits,8 - .align 2 + .align 3 .LC0: /* 2**23 */ .long 0x4b000000 .LC1: /* 0.5 */ @@ -60,7 +60,6 @@ ENTRY (__roundf ) #ifdef SHARED lfs fp10,.LC1-.LC0(r9) #else - lis r9,.LC1@ha lfs fp10,.LC1@l(r9) #endif ble- cr6,.L4 diff --git a/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S b/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S index 46ea2b00f9..08efd64920 100644 --- a/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S +++ b/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S @@ -17,6 +17,7 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> +#include <stap-probe.h> #define _ASM #ifdef __NO_VMX__ # include <novmxsetjmp.h> @@ -25,7 +26,7 @@ #endif .machine "altivec" -ENTRY (__sigsetjmp) +ENTRY (__sigsetjmp_symbol) #ifdef PTR_MANGLE mr r5,r1 @@ -35,6 +36,9 @@ ENTRY (__sigsetjmp) stw r1,(JB_GPR1*4)(3) #endif mflr r0 + /* setjmp probe expects longjmp first argument (4@3), second argument + (-4@4), and target address (4@0), respectively. */ + LIBC_PROBE (setjmp, 3, 4@3, -4@4, 4@0) stw r14,((JB_GPRS+0)*4)(3) stfd fp14,((JB_FPRS+0*2)*4)(3) #ifdef PTR_MANGLE @@ -94,14 +98,14 @@ ENTRY (__sigsetjmp) # else lwz r5,_rtld_global_ro@got(r5) # endif - lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r5) + lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r5) # else lwz r5,_dl_hwcap@got(r5) - lwz r5,4(r5) + lwz r5,LOWORD(r5) # endif # else - lis r6,(_dl_hwcap+4)@ha - lwz r5,(_dl_hwcap+4)@l(r6) + lis r6,(_dl_hwcap+LOWORD)@ha + lwz r5,(_dl_hwcap+LOWORD)@l(r6) # endif andis. r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16) beq L(no_vmx) @@ -111,44 +115,43 @@ ENTRY (__sigsetjmp) stw r0,((JB_VRSAVE)*4)(3) addi r6,r5,16 beq+ L(aligned_save_vmx) - lvsr v0,0,r5 - vspltisb v1,-1 /* set v1 to all 1's */ - vspltisb v2,0 /* set v2 to all 0's */ - vperm v3,v2,v1,v0 /* v3 contains shift mask with num all 1 bytes on left = misalignment */ + lvsr v0,0,r5 + lvsl v1,0,r5 + addi r6,r5,-16 - /* Special case for v20 we need to preserve what is in save area below v20 before obliterating it */ - lvx v5,0,r5 - vperm v20,v20,v20,v0 - vsel v5,v5,v20,v3 - vsel v20,v20,v2,v3 - stvx v5,0,r5 +# define save_misaligned_vmx(savevr,prevvr,shiftvr,tmpvr,savegpr,addgpr) \ + addi addgpr,addgpr,32; \ + vperm tmpvr,prevvr,savevr,shiftvr; \ + stvx tmpvr,0,savegpr -#define save_2vmx_partial(savevr,prev_savevr,hivr,shiftvr,maskvr,savegpr,addgpr) \ - addi addgpr,addgpr,32; \ - vperm savevr,savevr,savevr,shiftvr; \ - vsel hivr,prev_savevr,savevr,maskvr; \ - stvx hivr,0,savegpr; + /* + * We have to be careful not to corrupt the data below v20 and + * above v31. To keep things simple we just rotate both ends in + * the opposite direction to our main permute so we can use + * the common macro. + */ - save_2vmx_partial(v21,v20,v5,v0,v3,r6,r5) - save_2vmx_partial(v22,v21,v5,v0,v3,r5,r6) - save_2vmx_partial(v23,v22,v5,v0,v3,r6,r5) - save_2vmx_partial(v24,v23,v5,v0,v3,r5,r6) - save_2vmx_partial(v25,v24,v5,v0,v3,r6,r5) - save_2vmx_partial(v26,v25,v5,v0,v3,r5,r6) - save_2vmx_partial(v27,v26,v5,v0,v3,r6,r5) - save_2vmx_partial(v28,v27,v5,v0,v3,r5,r6) - save_2vmx_partial(v29,v28,v5,v0,v3,r6,r5) - save_2vmx_partial(v30,v29,v5,v0,v3,r5,r6) + /* load and rotate data below v20 */ + lvx v2,0,r5 + vperm v2,v2,v2,v1 + save_misaligned_vmx(v20,v2,v0,v3,r5,r6) + save_misaligned_vmx(v21,v20,v0,v3,r6,r5) + save_misaligned_vmx(v22,v21,v0,v3,r5,r6) + save_misaligned_vmx(v23,v22,v0,v3,r6,r5) + save_misaligned_vmx(v24,v23,v0,v3,r5,r6) + save_misaligned_vmx(v25,v24,v0,v3,r6,r5) + save_misaligned_vmx(v26,v25,v0,v3,r5,r6) + save_misaligned_vmx(v27,v26,v0,v3,r6,r5) + save_misaligned_vmx(v28,v27,v0,v3,r5,r6) + save_misaligned_vmx(v29,v28,v0,v3,r6,r5) + save_misaligned_vmx(v30,v29,v0,v3,r5,r6) + save_misaligned_vmx(v31,v30,v0,v3,r6,r5) + /* load and rotate data above v31 */ + lvx v2,0,r6 + vperm v2,v2,v2,v1 + save_misaligned_vmx(v2,v31,v0,v3,r5,r6) - /* Special case for r31 we need to preserve what is in save area above v31 before obliterating it */ - addi r5,r5,32 - vperm v31,v31,v31,v0 - lvx v4,0,r5 - vsel v5,v30,v31,v3 - stvx v5,0,r6 - vsel v4,v31,v4,v3 - stvx v4,0,r5 b L(no_vmx) L(aligned_save_vmx): @@ -176,5 +179,5 @@ L(aligned_save_vmx): stvx 31,0,r6 L(no_vmx): #endif - b __sigjmp_save@local -END (__sigsetjmp) + b __sigjmp_save_symbol@local +END (__sigsetjmp_symbol) diff --git a/sysdeps/powerpc/powerpc32/fpu/setjmp.S b/sysdeps/powerpc/powerpc32/fpu/setjmp.S index 60cd350522..566aa34d0f 100644 --- a/sysdeps/powerpc/powerpc32/fpu/setjmp.S +++ b/sysdeps/powerpc/powerpc32/fpu/setjmp.S @@ -22,23 +22,25 @@ #if defined NOT_IN_libc /* Build a non-versioned object for rtld-*. */ +# define __sigsetjmp_symbol __sigsetjmp +# define __sigjmp_save_symbol __sigjmp_save # include "setjmp-common.S" #else /* !NOT_IN_libc */ /* Build a versioned object for libc. */ -default_symbol_version (__vmx__sigsetjmp,__sigsetjmp,GLIBC_2.3.4) -# define __sigsetjmp __vmx__sigsetjmp -# define __sigjmp_save __vmx__sigjmp_save +versioned_symbol (libc, __vmx__sigsetjmp, __sigsetjmp, GLIBC_2_3_4) +# define __sigsetjmp_symbol __vmx__sigsetjmp +# define __sigjmp_save_symbol __vmx__sigjmp_save # include "setjmp-common.S" # if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) # define __NO_VMX__ -# undef __sigsetjmp -# undef __sigjmp_save +# undef __sigsetjmp_symbol +# undef __sigjmp_save_symbol # undef JB_SIZE -symbol_version (__novmx__sigsetjmp,__sigsetjmp,GLIBC_2.0) -# define __sigsetjmp __novmx__sigsetjmp -# define __sigjmp_save __novmx__sigjmp_save +compat_symbol (libc, __novmx__sigsetjmp, __sigsetjmp, GLIBC_2_0) +# define __sigsetjmp_symbol __novmx__sigsetjmp +# define __sigjmp_save_symbol __novmx__sigjmp_save # include "setjmp-common.S" # endif #endif /* !NOT_IN_libc */ |