From be1e5d311342e08ae1f8013342df27b7ded2c156 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sat, 17 Aug 2013 18:34:40 +0930 Subject: PowerPC LE setjmp/longjmp http://sourceware.org/ml/libc-alpha/2013-08/msg00089.html Little-endian fixes for setjmp/longjmp. When writing these I noticed the setjmp code corrupts the non volatile VMX registers when using an unaligned buffer. Anton fixed this, and also simplified it quite a bit. The current code uses boilerplate for the case where we want to store 16 bytes to an unaligned address. For that we have to do a read/modify/write of two aligned 16 byte quantities. In our case we are storing a bunch of back to back data (consective VMX registers), and only the start and end of the region need the read/modify/write. [BZ #15723] * sysdeps/powerpc/jmpbuf-offsets.h: Comment fix. * sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S: Correct _dl_hwcap access for little-endian. * sysdeps/powerpc/powerpc32/fpu/setjmp-common.S: Likewise. Don't destroy vmx regs when saving unaligned. * sysdeps/powerpc/powerpc64/__longjmp-common.S: Correct CR load. * sysdeps/powerpc/powerpc64/setjmp-common.S: Likewise CR save. Don't destroy vmx regs when saving unaligned. --- ChangeLog | 14 +++++ sysdeps/powerpc/jmpbuf-offsets.h | 6 +- sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S | 8 +-- sysdeps/powerpc/powerpc32/fpu/setjmp-common.S | 73 ++++++++++++------------ sysdeps/powerpc/powerpc64/__longjmp-common.S | 4 +- sysdeps/powerpc/powerpc64/setjmp-common.S | 72 +++++++++++------------ 6 files changed, 92 insertions(+), 85 deletions(-) diff --git a/ChangeLog b/ChangeLog index 89f51054e0..c106857c3f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +2013-10-04 Anton Blanchard + Alistair Popple + Alan Modra + + [BZ #15723] + * sysdeps/powerpc/jmpbuf-offsets.h: Comment fix. + * sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S: Correct + _dl_hwcap access for little-endian. + * sysdeps/powerpc/powerpc32/fpu/setjmp-common.S: Likewise. Don't + destroy vmx regs when saving unaligned. + * sysdeps/powerpc/powerpc64/__longjmp-common.S: Correct CR load. + * sysdeps/powerpc/powerpc64/setjmp-common.S: Likewise CR save. Don't + destroy vmx regs when saving unaligned. + 2013-10-04 Alan Modra * sysdeps/powerpc/powerpc32/power4/hp-timing.h (HP_TIMING_NOW): diff --git a/sysdeps/powerpc/jmpbuf-offsets.h b/sysdeps/powerpc/jmpbuf-offsets.h index 64c658a588..f2116bd703 100644 --- a/sysdeps/powerpc/jmpbuf-offsets.h +++ b/sysdeps/powerpc/jmpbuf-offsets.h @@ -21,12 +21,10 @@ #define JB_LR 2 /* The address we will return to */ #if __WORDSIZE == 64 # define JB_GPRS 3 /* GPRs 14 through 31 are saved, 18*2 words total. */ -# define JB_CR 21 /* Condition code registers with the VRSAVE at */ - /* offset 172 (low half of the double word. */ +# define JB_CR 21 /* Shared dword with VRSAVE. CR word at offset 172. */ # define JB_FPRS 22 /* FPRs 14 through 31 are saved, 18*2 words total. */ # define JB_SIZE (64 * 8) /* As per PPC64-VMX ABI. */ -# define JB_VRSAVE 21 /* VRSAVE shares a double word with the CR at offset */ - /* 168 (high half of the double word). */ +# define JB_VRSAVE 21 /* Shared dword with CR. VRSAVE word at offset 168. */ # define JB_VRS 40 /* VRs 20 through 31 are saved, 12*4 words total. */ #else # define JB_GPRS 3 /* GPRs 14 through 31 are saved, 18 in total. */ diff --git a/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S b/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S index 9d34cd9165..d02aa57549 100644 --- a/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S +++ b/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S @@ -43,16 +43,16 @@ ENTRY (__longjmp) # endif mtlr r6 cfi_same_value (lr) - lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r5) + lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r5) # else lwz r5,_dl_hwcap@got(r5) mtlr r6 cfi_same_value (lr) - lwz r5,4(r5) + lwz r5,LOWORD(r5) # endif # else - lis r5,(_dl_hwcap+4)@ha - lwz r5,(_dl_hwcap+4)@l(r5) + lis r5,(_dl_hwcap+LOWORD)@ha + lwz r5,(_dl_hwcap+LOWORD)@l(r5) # endif andis. r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16) beq L(no_vmx) diff --git a/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S b/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S index 46ea2b00f9..f3244060e0 100644 --- a/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S +++ b/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S @@ -94,14 +94,14 @@ ENTRY (__sigsetjmp) # else lwz r5,_rtld_global_ro@got(r5) # endif - lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r5) + lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r5) # else lwz r5,_dl_hwcap@got(r5) - lwz r5,4(r5) + lwz r5,LOWORD(r5) # endif # else - lis r6,(_dl_hwcap+4)@ha - lwz r5,(_dl_hwcap+4)@l(r6) + lis r6,(_dl_hwcap+LOWORD)@ha + lwz r5,(_dl_hwcap+LOWORD)@l(r6) # endif andis. r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16) beq L(no_vmx) @@ -111,44 +111,43 @@ ENTRY (__sigsetjmp) stw r0,((JB_VRSAVE)*4)(3) addi r6,r5,16 beq+ L(aligned_save_vmx) - lvsr v0,0,r5 - vspltisb v1,-1 /* set v1 to all 1's */ - vspltisb v2,0 /* set v2 to all 0's */ - vperm v3,v2,v1,v0 /* v3 contains shift mask with num all 1 bytes on left = misalignment */ + lvsr v0,0,r5 + lvsl v1,0,r5 + addi r6,r5,-16 - /* Special case for v20 we need to preserve what is in save area below v20 before obliterating it */ - lvx v5,0,r5 - vperm v20,v20,v20,v0 - vsel v5,v5,v20,v3 - vsel v20,v20,v2,v3 - stvx v5,0,r5 +# define save_misaligned_vmx(savevr,prevvr,shiftvr,tmpvr,savegpr,addgpr) \ + addi addgpr,addgpr,32; \ + vperm tmpvr,prevvr,savevr,shiftvr; \ + stvx tmpvr,0,savegpr -#define save_2vmx_partial(savevr,prev_savevr,hivr,shiftvr,maskvr,savegpr,addgpr) \ - addi addgpr,addgpr,32; \ - vperm savevr,savevr,savevr,shiftvr; \ - vsel hivr,prev_savevr,savevr,maskvr; \ - stvx hivr,0,savegpr; + /* + * We have to be careful not to corrupt the data below v20 and + * above v31. To keep things simple we just rotate both ends in + * the opposite direction to our main permute so we can use + * the common macro. + */ - save_2vmx_partial(v21,v20,v5,v0,v3,r6,r5) - save_2vmx_partial(v22,v21,v5,v0,v3,r5,r6) - save_2vmx_partial(v23,v22,v5,v0,v3,r6,r5) - save_2vmx_partial(v24,v23,v5,v0,v3,r5,r6) - save_2vmx_partial(v25,v24,v5,v0,v3,r6,r5) - save_2vmx_partial(v26,v25,v5,v0,v3,r5,r6) - save_2vmx_partial(v27,v26,v5,v0,v3,r6,r5) - save_2vmx_partial(v28,v27,v5,v0,v3,r5,r6) - save_2vmx_partial(v29,v28,v5,v0,v3,r6,r5) - save_2vmx_partial(v30,v29,v5,v0,v3,r5,r6) + /* load and rotate data below v20 */ + lvx v2,0,r5 + vperm v2,v2,v2,v1 + save_misaligned_vmx(v20,v2,v0,v3,r5,r6) + save_misaligned_vmx(v21,v20,v0,v3,r6,r5) + save_misaligned_vmx(v22,v21,v0,v3,r5,r6) + save_misaligned_vmx(v23,v22,v0,v3,r6,r5) + save_misaligned_vmx(v24,v23,v0,v3,r5,r6) + save_misaligned_vmx(v25,v24,v0,v3,r6,r5) + save_misaligned_vmx(v26,v25,v0,v3,r5,r6) + save_misaligned_vmx(v27,v26,v0,v3,r6,r5) + save_misaligned_vmx(v28,v27,v0,v3,r5,r6) + save_misaligned_vmx(v29,v28,v0,v3,r6,r5) + save_misaligned_vmx(v30,v29,v0,v3,r5,r6) + save_misaligned_vmx(v31,v30,v0,v3,r6,r5) + /* load and rotate data above v31 */ + lvx v2,0,r6 + vperm v2,v2,v2,v1 + save_misaligned_vmx(v2,v31,v0,v3,r5,r6) - /* Special case for r31 we need to preserve what is in save area above v31 before obliterating it */ - addi r5,r5,32 - vperm v31,v31,v31,v0 - lvx v4,0,r5 - vsel v5,v30,v31,v3 - stvx v5,0,r6 - vsel v4,v31,v4,v3 - stvx v4,0,r5 b L(no_vmx) L(aligned_save_vmx): diff --git a/sysdeps/powerpc/powerpc64/__longjmp-common.S b/sysdeps/powerpc/powerpc64/__longjmp-common.S index 70c370439f..4f1e3c88d3 100644 --- a/sysdeps/powerpc/powerpc64/__longjmp-common.S +++ b/sysdeps/powerpc/powerpc64/__longjmp-common.S @@ -57,7 +57,7 @@ ENTRY (__longjmp) beq L(no_vmx) la r5,((JB_VRS)*8)(3) andi. r6,r5,0xf - lwz r0,((JB_VRSAVE)*8)(3) + lwz r0,((JB_VRSAVE)*8)(3) /* 32-bit VRSAVE. */ mtspr VRSAVE,r0 beq+ L(aligned_restore_vmx) addi r6,r5,16 @@ -153,7 +153,7 @@ L(no_vmx): lfd fp21,((JB_FPRS+7)*8)(r3) ld r22,((JB_GPRS+8)*8)(r3) lfd fp22,((JB_FPRS+8)*8)(r3) - ld r0,(JB_CR*8)(r3) + lwz r0,((JB_CR*8)+4)(r3) /* 32-bit CR. */ ld r23,((JB_GPRS+9)*8)(r3) lfd fp23,((JB_FPRS+9)*8)(r3) ld r24,((JB_GPRS+10)*8)(r3) diff --git a/sysdeps/powerpc/powerpc64/setjmp-common.S b/sysdeps/powerpc/powerpc64/setjmp-common.S index 58ec610620..1829b9ab65 100644 --- a/sysdeps/powerpc/powerpc64/setjmp-common.S +++ b/sysdeps/powerpc/powerpc64/setjmp-common.S @@ -95,7 +95,7 @@ JUMPTARGET(GLUE(__sigsetjmp,_ent)): mfcr r0 std r16,((JB_GPRS+2)*8)(3) stfd fp16,((JB_FPRS+2)*8)(3) - std r0,(JB_CR*8)(3) + stw r0,((JB_CR*8)+4)(3) /* 32-bit CR. */ std r17,((JB_GPRS+3)*8)(3) stfd fp17,((JB_FPRS+3)*8)(3) std r18,((JB_GPRS+4)*8)(3) @@ -139,50 +139,46 @@ JUMPTARGET(GLUE(__sigsetjmp,_ent)): la r5,((JB_VRS)*8)(3) andi. r6,r5,0xf mfspr r0,VRSAVE - stw r0,((JB_VRSAVE)*8)(3) + stw r0,((JB_VRSAVE)*8)(3) /* 32-bit VRSAVE. */ addi r6,r5,16 beq+ L(aligned_save_vmx) - lvsr v0,0,r5 - vspltisb v1,-1 /* set v1 to all 1's */ - vspltisb v2,0 /* set v2 to all 0's */ - vperm v3,v2,v1,v0 /* v3 contains shift mask with num all 1 bytes - on left = misalignment */ + lvsr v0,0,r5 + lvsl v1,0,r5 + addi r6,r5,-16 - /* Special case for v20 we need to preserve what is in save area - below v20 before obliterating it */ - lvx v5,0,r5 - vperm v20,v20,v20,v0 - vsel v5,v5,v20,v3 - vsel v20,v20,v2,v3 - stvx v5,0,r5 +# define save_misaligned_vmx(savevr,prevvr,shiftvr,tmpvr,savegpr,addgpr) \ + addi addgpr,addgpr,32; \ + vperm tmpvr,prevvr,savevr,shiftvr; \ + stvx tmpvr,0,savegpr -# define save_2vmx_partial(savevr,prev_savevr,hivr,shiftvr,maskvr,savegpr,addgpr) \ - addi addgpr,addgpr,32; \ - vperm savevr,savevr,savevr,shiftvr; \ - vsel hivr,prev_savevr,savevr,maskvr; \ - stvx hivr,0,savegpr; + /* + * We have to be careful not to corrupt the data below v20 and + * above v31. To keep things simple we just rotate both ends in + * the opposite direction to our main permute so we can use + * the common macro. + */ - save_2vmx_partial(v21,v20,v5,v0,v3,r6,r5) - save_2vmx_partial(v22,v21,v5,v0,v3,r5,r6) - save_2vmx_partial(v23,v22,v5,v0,v3,r6,r5) - save_2vmx_partial(v24,v23,v5,v0,v3,r5,r6) - save_2vmx_partial(v25,v24,v5,v0,v3,r6,r5) - save_2vmx_partial(v26,v25,v5,v0,v3,r5,r6) - save_2vmx_partial(v27,v26,v5,v0,v3,r6,r5) - save_2vmx_partial(v28,v27,v5,v0,v3,r5,r6) - save_2vmx_partial(v29,v28,v5,v0,v3,r6,r5) - save_2vmx_partial(v30,v29,v5,v0,v3,r5,r6) + /* load and rotate data below v20 */ + lvx v2,0,r5 + vperm v2,v2,v2,v1 + save_misaligned_vmx(v20,v2,v0,v3,r5,r6) + save_misaligned_vmx(v21,v20,v0,v3,r6,r5) + save_misaligned_vmx(v22,v21,v0,v3,r5,r6) + save_misaligned_vmx(v23,v22,v0,v3,r6,r5) + save_misaligned_vmx(v24,v23,v0,v3,r5,r6) + save_misaligned_vmx(v25,v24,v0,v3,r6,r5) + save_misaligned_vmx(v26,v25,v0,v3,r5,r6) + save_misaligned_vmx(v27,v26,v0,v3,r6,r5) + save_misaligned_vmx(v28,v27,v0,v3,r5,r6) + save_misaligned_vmx(v29,v28,v0,v3,r6,r5) + save_misaligned_vmx(v30,v29,v0,v3,r5,r6) + save_misaligned_vmx(v31,v30,v0,v3,r6,r5) + /* load and rotate data above v31 */ + lvx v2,0,r6 + vperm v2,v2,v2,v1 + save_misaligned_vmx(v2,v31,v0,v3,r5,r6) - /* Special case for r31 we need to preserve what is in save area - above v31 before obliterating it */ - addi r5,r5,32 - vperm v31,v31,v31,v0 - lvx v4,0,r5 - vsel v5,v30,v31,v3 - stvx v5,0,r6 - vsel v4,v31,v4,v3 - stvx v4,0,r5 b L(no_vmx) L(aligned_save_vmx): -- cgit v1.2.3