11 files changed, 157 insertions, 98 deletions
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_lround.S b/sysdeps/powerpc/powerpc32/fpu/s_lround.S
index 9c534ec2be..ebaccccd9b 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_lround.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_lround.S
@@ -1,5 +1,5 @@
 /* lround function.  PowerPC32 version.
-   Copyright (C) 2004, 2006 Free Software Foundation, Inc.
+   Copyright (C) 2004, 2006, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -20,13 +20,10 @@
 #include <sysdep.h>
 #include <math_ldbl_opt.h>
 
-	.section	.rodata.cst8,"aM",@progbits,8
+	.section	.rodata.cst4,"aM",@progbits,4
 	.align	2
-.LC0:	/* 0.0 */
-	.long 0x00000000
-.LC1:	/* 0.5 */
+.LC0:	/* 0.5 */
 	.long 0x3f000000
-
 	.section	".text"
 	
 /* long [r3] lround (float x [fp1])
@@ -37,7 +34,10 @@
    tie, choose the one that is even (least significant bit o).". 
    So we can't use the PowerPC "round to Nearest" mode. Instead we set
    "round toward Zero" mode and round by adding +-0.5 before rounding
-   to the integer value.  */
+   to the integer value.  It is necessary to detect when x is
+   (+-)0x1.fffffffffffffp-2 because adding +-0.5 in this case will
+   cause an erroneous shift, carry and round.  We simply return 0 if
+   0.5 > x > -0.5.  */
 
 ENTRY (__lround)
 	stwu    r1,-16(r1)
@@ -49,40 +49,40 @@ ENTRY (__lround)
 	bcl	20,31,1f
 1:	mflr	r9
 	addis	r9,r9,.LC0-1b@ha
-	addi	r9,r9,.LC0-1b@l
+	lfs	fp10,.LC0-1b@l(r9)
 # else
 	bl	_GLOBAL_OFFSET_TABLE_@local-4
 	mflr	r10
 	lwz	r9,.LC0@got(10)
+	lfs	fp10,0(r9)
 # endif
 	mtlr	r11
 	cfi_same_value (lr)
-	lfs	fp12,0(r9)
 #else
 	lis	r9,.LC0@ha
-	lfs	fp12,.LC0@l(r9)
-#endif
-#ifdef SHARED
-	lfs	fp10,.LC1-.LC0(r9)
-#else
-	lis	r9,.LC1@ha
-	lfs	fp10,.LC1@l(r9)
+	lfs	fp10,.LC0@l(r9)
 #endif
-	fcmpu	cr6,fp1,fp12	/* if (x > 0.0)  */
-	ble-	cr6,.L4
-	fadd	fp1,fp1,fp10	/* x+= 0.5;  */
-.L9:
-	fctiwz	fp2,fp1		/* Convert To Integer DW lround toward 0.  */
-	stfd	fp2,8(r1)
+	fabs	fp2, fp1	/* Get the absolute value of x.  */
+	fsub	fp12,fp10,fp10	/* Compute 0.0.  */
+	fcmpu	cr6, fp2, fp10	/* if |x| < 0.5  */
+	fcmpu	cr3, fp1, fp12	/* x is negative? x < 0.0  */
+	blt-	cr6,.Lretzero
+	fadd	fp3,fp2,fp10	/* |x|+=0.5 bias to prepare to round.  */
+	bge	cr3,.Lconvert	/* x is positive so don't negate x.  */
+	fnabs	fp3,fp3		/* -(|x|+=0.5)  */ 
+.Lconvert:
+	fctiwz	fp4,fp3		/* Convert to Integer word lround toward 0.  */
+	stfd	fp4,8(r1)
 	nop	/* Ensure the following load is in a different dispatch  */
 	nop	/* group to avoid pipe stall on POWER4&5.  */
 	nop
-	lwz	r3,12(r1)
+	lwz	r3,12(r1)	/* Load return as integer.  */
+.Lout:
 	addi	r1,r1,16
 	blr
-.L4:
-	fsub	fp1,fp1,fp10	/* x-= 0.5;  */
-	b	.L9
+.Lretzero:			/* when 0.5 > x > -0.5  */
+	li	r3,0		/* return 0.  */
+	b	.Lout
 	END (__lround)
 
 weak_alias (__lround, lround)
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_lroundf.S b/sysdeps/powerpc/powerpc32/fpu/s_lroundf.S
index e3c992d771..6289e0be58 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_lroundf.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_lroundf.S
@@ -1,2 +1 @@
 /* __lroundf is in s_lround.S */
-/* __lroundf is in s_lround.S */
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S b/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S
index 952d2aa6a5..4b1691efd3 100644
--- a/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S
@@ -1,5 +1,5 @@
 /* llround function.  PowerPC32 on PowerPC64 version.
-   Copyright (C) 2004, 2006 Free Software Foundation, Inc.
+   Copyright (C) 2004, 2006, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -20,15 +20,15 @@
 #include <sysdep.h>
 #include <math_ldbl_opt.h>
 
-	.section	.rodata.cst8,"aM",@progbits,8
-	.align	2
-.LC0:	/* 0.0 */
+ .section .rodata.cst12,"aM",@progbits,12
+ .align 3
+ .LC0:   /* 0x1.0000000000000p+52 == 2^52 */
+	.long 0x43300000
 	.long 0x00000000
-.LC1:	/* 0.5 */
-	.long 0x3f000000
+	.long 0x3f000000 /* Use this for 0.5  */
 
 	.section	".text"
-	
+
 /* long [r3] lround (float x [fp1])
    IEEE 1003.1 lround function.  IEEE specifies "round to the nearest 
    integer value, rounding halfway cases away from zero, regardless of
@@ -37,7 +37,15 @@
    tie, choose the one that is even (least significant bit o).". 
    So we can't use the PowerPC "round to Nearest" mode. Instead we set
    "round toward Zero" mode and round by adding +-0.5 before rounding
-   to the integer value.  */
+   to the integer value.
+
+   It is necessary to detect when x is (+-)0x1.fffffffffffffp-2
+   because adding +-0.5 in this case will cause an erroneous shift,
+   carry and round.  We simply return 0 if 0.5 > x > -0.5.  Likewise
+   if x is and odd number between +-(2^52 and 2^53-1) a shift and
+   carry will erroneously round if biased with +-0.5.  Therefore if x
+   is greater/less than +-2^52 we don't need to bias the number with
+   +-0.5.  */
 
 ENTRY (__llround)
 	stwu    r1,-16(r1)
@@ -57,30 +65,41 @@ ENTRY (__llround)
 # endif
 	mtlr	r11
 	cfi_same_value (lr)
-	lfs	fp12,0(r9)
-	lfs	fp10,.LC1-.LC0(r9)
+	lfd	fp9,0(r9)
+	lfs	fp10,8(r9)
 #else
-	lis	r9,.LC0@ha
-	lis	r10,.LC1@ha
-	lfs	fp12,.LC0@l(r9)
-	lfs	fp10,.LC1@l(r10)
+	lis r9,.LC0@ha
+	lfd fp9,.LC0@l(r9)	/* Load 2^52 into fpr9.  */
+	lfs fp10,.LC0@l+8(r9)	/* Load 0.5 into fpr10.  */
 #endif
-	fcmpu	cr6,fp1,fp12	/* if (x > 0.0)  */
-	ble-	cr6,.L4
-	fadd	fp1,fp1,fp10	/* x+= 0.5;  */
-.L9:
-	fctidz	fp2,fp1		/* Convert To Integer DW round toward 0.  */
-	stfd	fp2,8(r1)
-	nop	/* Ensure the following load is in a different dispatch  */
-	nop	/* group to avoid pipe stall on POWER4&5.  */
+	fabs	fp2,fp1		/* Get the absolute value of x.  */
+	fsub	fp12,fp10,fp10	/* Compute 0.0 into fpr12.  */
+	fcmpu	cr6,fp2,fp10	/* if |x| < 0.5  */
+	fcmpu	cr4,fp2,fp9	/* if |x| >= 2^52  */
+	fcmpu	cr3,fp1,fp12	/* x is negative? x < 0.0  */
+	blt-	cr6,.Lretzero	/* 0.5 > x < -0.5 so just return 0.  */
+	bge-	cr4,.Lnobias	/* 2^52 > x < -2^52 just convert with no bias.  */
+	fadd	fp3,fp2,fp10	/* |x|+=0.5 bias to prepare to round.  */
+	bge	cr3,.Lconvert	/* x is positive so don't negate x.  */
+	fnabs	fp3,fp3		/* -(|x|+=0.5)  */
+.Lconvert:
+	fctidz	fp4,fp3		/* Convert to Integer double word round toward 0.  */
+	stfd	fp4,8(r1)
+	nop
+	nop
 	nop
-	lwz	r4,12(r1)
+	lwz	r4,12(r1)	/* Load return as integer.  */
 	lwz	r3,8(r1)
+.Lout:
 	addi	r1,r1,16
 	blr
-.L4:
-	fsub	fp1,fp1,fp10	/* x-= 0.5;  */
-	b	.L9
+.Lretzero:			/* 0.5 > x > -0.5  */
+	li	r3,0		/* return 0.  */
+	li	r4,0
+	b	.Lout
+.Lnobias:
+	fmr	fp3,fp1
+	b	.Lconvert
 	END (__llround)
 
 weak_alias (__llround, llround)
diff --git a/sysdeps/powerpc/powerpc32/power5+/fpu/s_llroundf.S b/sysdeps/powerpc/powerpc32/power5+/fpu/s_llroundf.S
index ffe6b7eb37..030d2fdff8 100644
--- a/sysdeps/powerpc/powerpc32/power5+/fpu/s_llroundf.S
+++ b/sysdeps/powerpc/powerpc32/power5+/fpu/s_llroundf.S
@@ -1,2 +1 @@
 /* __llroundf is in s_llround.S  */
-/* __llroundf is in s_llround.S  */
diff --git a/sysdeps/powerpc/powerpc32/power6/fpu/s_llroundf.S b/sysdeps/powerpc/powerpc32/power6/fpu/s_llroundf.S
index ffe6b7eb37..030d2fdff8 100644
--- a/sysdeps/powerpc/powerpc32/power6/fpu/s_llroundf.S
+++ b/sysdeps/powerpc/powerpc32/power6/fpu/s_llroundf.S
@@ -1,2 +1 @@
 /* __llroundf is in s_llround.S  */
-/* __llroundf is in s_llround.S  */
diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h
index c837393d79..b674dbef43 100644
--- a/sysdeps/powerpc/powerpc64/dl-machine.h
+++ b/sysdeps/powerpc/powerpc64/dl-machine.h
@@ -1,6 +1,6 @@
 /* Machine-dependent ELF dynamic relocation inline functions.
    PowerPC64 version.
-   Copyright 1995-2005, 2006 Free Software Foundation, Inc.
+   Copyright 1995-2005, 2006, 2008 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -287,6 +287,8 @@ BODY_PREFIX "_dl_start_user:\n"						\
 #define GLINK_INITIAL_ENTRY_WORDS 8
 
 #define PPC_DCBST(where) asm volatile ("dcbst 0,%0" : : "r"(where) : "memory")
+#define PPC_DCBT(where) asm volatile ("dcbt 0,%0" : : "r"(where) : "memory")
+#define PPC_DCBF(where) asm volatile ("dcbf 0,%0" : : "r"(where) : "memory")
 #define PPC_SYNC asm volatile ("sync" : : : "memory")
 #define PPC_ISYNC asm volatile ("sync; isync" : : : "memory")
 #define PPC_ICBI(where) asm volatile ("icbi 0,%0" : : "r"(where) : "memory")
@@ -408,6 +410,11 @@ elf_machine_fixup_plt (struct link_map *map, lookup_t sym_map,
   Elf64_FuncDesc *rel = (Elf64_FuncDesc *) finaladdr;
   Elf64_Addr offset = 0;
 
+  PPC_DCBT (&plt->fd_aux);
+  PPC_DCBT (&plt->fd_func);
+  PPC_DCBT (&rel->fd_aux);
+  PPC_DCBT (&rel->fd_func);
+
   /* If sym_map is NULL, it's a weak undefined sym;  Leave the plt zero.  */
   if (sym_map == NULL)
     return 0;
@@ -430,13 +437,12 @@ elf_machine_fixup_plt (struct link_map *map, lookup_t sym_map,
 
   plt->fd_aux = rel->fd_aux + offset;
   plt->fd_toc = rel->fd_toc + offset;
-  PPC_DCBST (&plt->fd_aux);
-  PPC_DCBST (&plt->fd_toc);
-  PPC_SYNC;
+  PPC_DCBF (&plt->fd_toc);
+  PPC_ISYNC;
 
   plt->fd_func = rel->fd_func + offset;
   PPC_DCBST (&plt->fd_func);
-  PPC_SYNC;
+  PPC_ISYNC;
 
   return finaladdr;
 }
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_llround.S b/sysdeps/powerpc/powerpc64/fpu/s_llround.S
index d023b8f2c0..4134847536 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_llround.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_llround.S
@@ -1,5 +1,5 @@
 /* llround function.  PowerPC64 version.
-   Copyright (C) 2004, 2006 Free Software Foundation, Inc.
+   Copyright (C) 2004, 2006, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -21,13 +21,13 @@
 #include <math_ldbl_opt.h>
 
 	.section	".toc","aw"
-.LC0:	/* -0.0 */
-	.tc FD_00000000_0[TC],0x0000000000000000
+.LC0:	/* 2^52 */
+	.tc FD_43300000_0[TC],0x4330000000000000
 .LC1:	/* 0.5 */
 	.tc FD_3fe00000_0[TC],0x3fe0000000000000
 	.section	".text"
 	
-/* long long [r3] llround (float x [fp1])
+/* long long [r3] llround (double x [fp1])
    IEEE 1003.1 llround function.  IEEE specifies "round to the nearest 
    integer value, rounding halfway cases away from zero, regardless of
    the current rounding mode."  However PowerPC Architecture defines
@@ -35,26 +35,45 @@
    tie, choose the one that is even (least significant bit o).". 
    So we can't use the PowerPC "round to Nearest" mode. Instead we set
    "round toward Zero" mode and round by adding +-0.5 before rounding
-   to the integer value.  */
+   to the integer value.
+
+   It is necessary to detect when x is (+-)0x1.fffffffffffffp-2
+   because adding +-0.5 in this case will cause an erroneous shift,
+   carry and round.  We simply return 0 if 0.5 > x > -0.5.  Likewise
+   if x is and odd number between +-(2^52 and 2^53-1) a shift and
+   carry will erroneously round if biased with +-0.5.  Therefore if x
+   is greater/less than +-2^52 we don't need to bias the number with
+   +-0.5.  */
 
 ENTRY (__llround)
 	CALL_MCOUNT 0
-	lfd	fp12,.LC0@toc(2)
-	lfd	fp10,.LC1@toc(2)
-	fcmpu	cr6,fp1,fp12	/* if (x > 0.0)  */
-	ble-	cr6,.L4
-	fadd	fp1,fp1,fp10	/* x+= 0.5;  */
-.L9:
-	fctidz	fp2,fp1		/* Convert To Integer DW llround toward 0.  */
-	stfd	fp2,-16(r1)
-	nop	/* Insure the following load is in a different dispatch group */
-	nop	/* to avoid pipe stall on POWER4&5.  */
+	lfd	fp9,.LC0@toc(2)	/* Load 2^52 into fpr9.  */
+	lfd	fp10,.LC1@toc(2)/* Load 0.5 into fpr10.  */
+	fabs	fp2,fp1		/* Get the absolute value of x.  */
+	fsub	fp12,fp10,fp10	/* Compute 0.0 into fp12.  */
+	fcmpu	cr6,fp2,fp10	/* if |x| < 0.5  */
+	fcmpu	cr4,fp2,fp9	/* if |x| >= 2^52  */
+	fcmpu	cr3,fp1,fp12	/* x is negative? x < 0.0  */
+	blt-	cr6,.Lretzero	/* 0.5 > x < -0.5 so just return 0.  */
+	bge-	cr4,.Lnobias	/* 2^52 > x < -2^52 just convert with no bias.  */
+	fadd	fp3,fp2,fp10	/* |x|+=0.5 bias to prepare to round.  */
+	bge	cr3,.Lconvert	/* x is positive so don't negate x.  */
+	fnabs	fp3,fp3		/* -(|x|+=0.5)  */
+.Lconvert:
+	fctidz	fp4,fp3		/* Convert to Integer double word round toward 0.  */
+	stfd	fp4,-16(r1)
+	nop
+	nop
 	nop
-	ld	r3,-16(r1)
+	ld	r3,-16(r1)	/* Load return as integer.  */
+.Lout:
 	blr
-.L4:
-	fsub	fp1,fp1,fp10	/* x-= 0.5;  */
-	b	.L9
+.Lretzero:			/* 0.5 > x > -0.5  */
+	li	r3,0		/* return 0.  */
+	b	.Lout
+.Lnobias:
+	fmr	fp3,fp1
+	b	.Lconvert
 	END (__llround)
 
 strong_alias (__llround, __lround)
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_llroundf.S b/sysdeps/powerpc/powerpc64/fpu/s_llroundf.S
index bbbd05492e..a211879393 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_llroundf.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_llroundf.S
@@ -1,5 +1,5 @@
 /* llroundf function.  PowerPC64 version.
-   Copyright (C) 2004, 2006 Free Software Foundation, Inc.
+   Copyright (C) 2004, 2006, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -20,8 +20,8 @@
 #include <sysdep.h>
 
 	.section	".toc","aw"
-.LC0:	/* -0.0 */
-	.tc FD_00000000_0[TC],0x0000000000000000
+.LC0:	/* 2^23 */
+	.tc FD_41600000_0[TC],0x4160000000000000
 .LC1:	/* 0.5 */
 	.tc FD_3fe00000_0[TC],0x3fe0000000000000
 	.section	".text"
@@ -34,24 +34,45 @@
    tie, choose the one that is even (least significant bit o).". 
    So we can't use the PowerPC "round to Nearest" mode. Instead we set
    "round toward Zero" mode and round by adding +-0.5 before rounding
-   to the integer value.  */
+   to the integer value.
+
+   It is necessary to detect when x is (+-)0x1.fffffffffffffp-2
+   because adding +-0.5 in this case will cause an erroneous shift,
+   carry and round.  We simply return 0 if 0.5 > x > -0.5.  Likewise
+   if x is and odd number between +-(2^23 and 2^24-1) a shift and
+   carry will erroneously round if biased with +-0.5.  Therefore if x
+   is greater/less than +-2^23 we don't need to bias the number with
+   +-0.5.  */
 
 ENTRY (__llroundf)
 	CALL_MCOUNT 0
-	lfd	fp12,.LC0@toc(2)
-	lfd	fp10,.LC1@toc(2)
-	fcmpu	cr6,fp1,fp12	/* if (x < 0.0)  */
-	fsubs	fp3,fp1,fp10	/* x-= 0.5;  */
-	ble-	cr6,.L9
-	fadds	fp3,fp1,fp10	/* x+= 0.5;  */
-.L9:
-	fctidz	fp2,fp3		/* Convert To Integer DW round toward 0.  */
-	stfd	fp2,-16(r1)
-	nop	/* Insure the following load is in a different dispatch group */
-	nop	/* to avoid pipe stall on POWER4&5.  */
+	lfd	fp9,.LC0@toc(2)	/* Load 2^23 into fpr9.  */
+	lfd	fp10,.LC1@toc(2)/* Load 0.5 into fpr10.  */
+	fabs	fp2,fp1		/* Get the absolute value of x.  */
+	fsub	fp12,fp10,fp10	/* Compute 0.0 into fp12.  */
+	fcmpu	cr6,fp2,fp10	/* if |x| < 0.5  */
+	fcmpu	cr4,fp2,fp9	/* if |x| >= 2^23  */
+	fcmpu	cr3,fp1,fp12	/* x is negative? x < 0.0  */
+	blt-	cr6,.Lretzero	/* 0.5 > x < -0.5 so just return 0.  */
+	bge-	cr4,.Lnobias	/* 2^23 > x < -2^23 just convert with no bias.  */
+	fadd	fp3,fp2,fp10	/* |x|+=0.5 bias to prepare to round.  */
+	bge	cr3,.Lconvert	/* x is positive so don't negate x.  */
+	fnabs	fp3,fp3		/* -(|x|+=0.5)  */
+.Lconvert:
+	fctidz	fp4,fp3		/* Convert to Integer double word round toward 0.  */
+	stfd	fp4,-16(r1)
+	nop
+	nop
 	nop
-	ld	r3,-16(r1)
+	ld	r3,-16(r1)	/* Load return as integer.  */
+.Lout:
 	blr
+.Lretzero:			/* 0.5 > x > -0.5  */
+	li	r3,0		/* return 0.  */
+	b	.Lout
+.Lnobias:
+	fmr	fp3,fp1
+	b	.Lconvert
 	END (__llroundf)
 
 strong_alias (__llroundf, __lroundf)
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_lrint.S b/sysdeps/powerpc/powerpc64/fpu/s_lrint.S
index fe774693b7..d3c2fff581 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_lrint.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_lrint.S
@@ -1,2 +1 @@
 /* __lrint is in s_llrint.c  */
-/* __lrint is in s_llrint.c  */
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_lround.S b/sysdeps/powerpc/powerpc64/fpu/s_lround.S
index 883bba1c5d..4306c405c4 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_lround.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_lround.S
@@ -1,2 +1 @@
 /* __lround is in s_llround.S  */
-/* __lround is in s_llround.S  */
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_lroundf.S b/sysdeps/powerpc/powerpc64/fpu/s_lroundf.S
index 15506f6801..6b2a4e37a6 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_lroundf.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_lroundf.S
@@ -1,2 +1 @@
 /* __lroundf is in s_llroundf.S  */
-/* __lroundf is in s_llroundf.S  */