Updated to fedora-glibc-20070731T1624cvs/fedora-glibc-2_6_90-1

author: Jakub Jelinek <jakub@redhat.com> 2007-07-31 17:46:17 +0000
committer: Jakub Jelinek <jakub@redhat.com> 2007-07-31 17:46:17 +0000
commit: 8833066b122427710a9e14a888ce6cfa862332d3 (patch)
tree: 29591019d695919417b3698618d6a342e97381d6 /sysdeps
parent: fedca46896bdb702cb988837a0c2c5447e72ba2b (diff)
147 files changed, 11575 insertions, 128 deletions
diff --git a/sysdeps/generic/_G_config.h b/sysdeps/generic/_G_config.h
index a152b070c6..4aafe65f6b 100644
--- a/sysdeps/generic/_G_config.h
+++ b/sysdeps/generic/_G_config.h
@@ -8,19 +8,15 @@
 
 #include <bits/types.h>
 #define __need_size_t
-#define __need_wchar_t
-#define __need_wint_t
+#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
+# define __need_wchar_t
+#endif
 #define __need_NULL
 #include <stddef.h>
-#ifndef _WINT_T
-/* Integral type unchanged by default argument promotions that can
-   hold any value corresponding to members of the extended character
-   set, as well as at least one value that does not correspond to any
-   member of the extended character set.  */
-# define _WINT_T
-typedef unsigned int wint_t;
-#endif
 #define __need_mbstate_t
+#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
+# define __need_wint_t
+#endif
 #include <wchar.h>
 #define _G_size_t	size_t
 typedef struct
@@ -41,7 +37,8 @@ typedef struct
 #define _G_wchar_t	wchar_t
 #define _G_wint_t	wint_t
 #define _G_stat64	stat
-#include <gconv.h>
+#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
+# include <gconv.h>
 typedef union
 {
   struct __gconv_info __cd;
@@ -51,6 +48,7 @@ typedef union
     struct __gconv_step_data __data;
   } __combined;
 } _G_iconv_t;
+#endif
 
 typedef int _G_int16_t __attribute__ ((__mode__ (__HI__)));
 typedef int _G_int32_t __attribute__ ((__mode__ (__SI__)));
diff --git a/sysdeps/generic/initfini.c b/sysdeps/generic/initfini.c
index 2b8412a428..d5ef778367 100644
--- a/sysdeps/generic/initfini.c
+++ b/sysdeps/generic/initfini.c
@@ -81,7 +81,7 @@ call_gmon_start(void)
 }
 
 SECTION (".init");
-extern void _init (void);
+extern void __attribute__ ((section (".init"))) _init (void);
 void
 _init (void)
 {
@@ -107,7 +107,7 @@ asm ("\n/*@_init_EPILOG_ENDS*/");
 asm ("\n/*@_fini_PROLOG_BEGINS*/");
 
 SECTION (".fini");
-extern void _fini (void);
+extern void __attribute__ ((section (".fini"))) _fini (void);
 void
 _fini (void)
 {
diff --git a/sysdeps/gnu/_G_config.h b/sysdeps/gnu/_G_config.h
index 83c78f0b93..211d0224b1 100644
--- a/sysdeps/gnu/_G_config.h
+++ b/sysdeps/gnu/_G_config.h
@@ -8,19 +8,15 @@
 
 #include <bits/types.h>
 #define __need_size_t
-#define __need_wchar_t
-#define __need_wint_t
+#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
+# define __need_wchar_t
+#endif
 #define __need_NULL
 #include <stddef.h>
-#ifndef _WINT_T
-/* Integral type unchanged by default argument promotions that can
-   hold any value corresponding to members of the extended character
-   set, as well as at least one value that does not correspond to any
-   member of the extended character set.  */
-# define _WINT_T
-typedef unsigned int wint_t;
-#endif
 #define __need_mbstate_t
+#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
+# define __need_wint_t
+#endif
 #include <wchar.h>
 #define _G_size_t	size_t
 typedef struct
@@ -41,7 +37,8 @@ typedef struct
 #define _G_wchar_t	wchar_t
 #define _G_wint_t	wint_t
 #define _G_stat64	stat64
-#include <gconv.h>
+#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
+# include <gconv.h>
 typedef union
 {
   struct __gconv_info __cd;
@@ -51,6 +48,7 @@ typedef union
     struct __gconv_step_data __data;
   } __combined;
 } _G_iconv_t;
+#endif
 
 typedef int _G_int16_t __attribute__ ((__mode__ (__HI__)));
 typedef int _G_int32_t __attribute__ ((__mode__ (__SI__)));
diff --git a/sysdeps/ia64/sched_cpucount.c b/sysdeps/ia64/sched_cpucount.c
new file mode 100644
index 0000000000..ff9d8cf2a8
--- /dev/null
+++ b/sysdeps/ia64/sched_cpucount.c
@@ -0,0 +1,21 @@
+/* Copyright (C) 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#define POPCNT(l) __builtin_popcountl (l)
+
+#include <posix/sched_cpucount.c>
diff --git a/sysdeps/ieee754/ldbl-96/s_roundl.c b/sysdeps/ieee754/ldbl-96/s_roundl.c
index 672536d358..f1399cc209 100644
--- a/sysdeps/ieee754/ldbl-96/s_roundl.c
+++ b/sysdeps/ieee754/ldbl-96/s_roundl.c
@@ -1,5 +1,5 @@
 /* Round long double to integer away from zero.
-   Copyright (C) 1997 Free Software Foundation, Inc.
+   Copyright (C) 1997, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
 
@@ -23,7 +23,7 @@
 #include "math_private.h"
 
 
-static const long double huge = 1.0e4930;
+static const long double huge = 1.0e4930L;
 
 
 long double
diff --git a/sysdeps/mach/hurd/_G_config.h b/sysdeps/mach/hurd/_G_config.h
index b643059234..db959246ee 100644
--- a/sysdeps/mach/hurd/_G_config.h
+++ b/sysdeps/mach/hurd/_G_config.h
@@ -8,19 +8,15 @@
 
 #include <bits/types.h>
 #define __need_size_t
-#define __need_wchar_t
-#define __need_wint_t
+#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
+# define __need_wchar_t
+#endif
 #define __need_NULL
 #include <stddef.h>
-#ifndef _WINT_T
-/* Integral type unchanged by default argument promotions that can
-   hold any value corresponding to members of the extended character
-   set, as well as at least one value that does not correspond to any
-   member of the extended character set.  */
-# define _WINT_T
-typedef unsigned int wint_t;
-#endif
 #define __need_mbstate_t
+#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
+# define __need_wint_t
+#endif
 #include <wchar.h>
 #define _G_size_t	size_t
 typedef struct
@@ -41,7 +37,8 @@ typedef struct
 #define _G_wchar_t	wchar_t
 #define _G_wint_t	wint_t
 #define _G_stat64	stat64
-#include <gconv.h>
+#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
+# include <gconv.h>
 typedef union
 {
   struct __gconv_info __cd;
@@ -51,6 +48,7 @@ typedef union
     struct __gconv_step_data __data;
   } __combined;
 } _G_iconv_t;
+#endif
 
 typedef int _G_int16_t __attribute__ ((__mode__ (__HI__)));
 typedef int _G_int32_t __attribute__ ((__mode__ (__SI__)));
diff --git a/sysdeps/mach/hurd/bits/fcntl.h b/sysdeps/mach/hurd/bits/fcntl.h
index e709cc6fb1..34e6baa049 100644
--- a/sysdeps/mach/hurd/bits/fcntl.h
+++ b/sysdeps/mach/hurd/bits/fcntl.h
@@ -1,5 +1,6 @@
 /* O_*, F_*, FD_* bit values for GNU.
-   Copyright (C) 1993,94,96,97,98,99,2000,01,04 Free Software Foundation, Inc.
+   Copyright (C) 1993,1994,1996,1997,1998,1999,2000,2001,2004,2007
+	Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -70,6 +71,7 @@
 #define O_SYNC		O_FSYNC
 #ifdef __USE_GNU
 # define O_NOATIME	0x0800	/* Don't set access time on read (owner).  */
+# define O_CLOEXEC	0x00010000 /* Set FD_CLOEXEC.  */
 #endif
 #ifdef	__USE_MISC
 # define O_SHLOCK	0x00020000 /* Open with shared file lock.  */
diff --git a/sysdeps/mach/hurd/i386/tls.h b/sysdeps/mach/hurd/i386/tls.h
index 972cac57a1..d98b485b96 100644
--- a/sysdeps/mach/hurd/i386/tls.h
+++ b/sysdeps/mach/hurd/i386/tls.h
@@ -1,5 +1,5 @@
 /* Definitions for thread-local data handling.  Hurd/i386 version.
-   Copyright (C) 2003, 2004, 2006 Free Software Foundation, Inc.
+   Copyright (C) 2003, 2004, 2006, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -29,7 +29,7 @@
    thread pointer points to is unspecified.  Allocate the TCB there.  */
 # define TLS_TCB_AT_TP	1
 
-# ifndef ASSEMBLER
+# ifndef __ASSEMBLER__
 
 /* Use i386-specific RPCs to arrange that %gs segment register prefix
    addresses the TCB in each thread.  */
@@ -165,7 +165,7 @@ _hurd_tls_fork (thread_t child, struct i386_thread_state *state)
   return err;
 }
 
-# endif	/* !ASSEMBLER */
+# endif	/* !__ASSEMBLER__ */
 #endif /* HAVE_TLS_SUPPORT */
 
 #endif	/* i386/tls.h */
diff --git a/sysdeps/mach/hurd/sigaction.c b/sysdeps/mach/hurd/sigaction.c
index 3dc530955d..fe452e8283 100644
--- a/sysdeps/mach/hurd/sigaction.c
+++ b/sysdeps/mach/hurd/sigaction.c
@@ -1,4 +1,6 @@
-/* Copyright (C) 1991,92,93,94,95,96,97,2002 Free Software Foundation, Inc.
+/* Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 2002, 2007
+     Free Software Foundation, Inc.
+
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -68,7 +70,7 @@ __sigaction (sig, act, oact)
       __spin_lock (&ss->lock);
       pending = ss->pending & ~ss->blocked;
     }
-  else if (a.sa_handler == SIG_IGN || a.sa_handler == SIG_DFL)
+  else if (act != NULL && (a.sa_handler == SIG_IGN || a.sa_handler == SIG_DFL))
     /* We are changing to an action that might be to ignore SIG signals.
        If SIG is blocked and pending and the new action is to ignore it, we
        must remove it from the pending set now; if the action is changed
diff --git a/sysdeps/mach/hurd/sigsuspend.c b/sysdeps/mach/hurd/sigsuspend.c
index 96b3857c72..7e32472ce3 100644
--- a/sysdeps/mach/hurd/sigsuspend.c
+++ b/sysdeps/mach/hurd/sigsuspend.c
@@ -1,4 +1,6 @@
-/* Copyright (C) 1991,92,93,94,95,96,97,98,2002 Free Software Foundation, Inc.
+/* Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 2002, 2007
+     Free Software Foundation, Inc.
+
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -80,4 +82,5 @@ __sigsuspend (set)
   return -1;
 }
 libc_hidden_def (__sigsuspend)
+strong_alias (__sigsuspend, sigsuspend_not_cancel)
 weak_alias (__sigsuspend, sigsuspend)
diff --git a/sysdeps/mach/hurd/tls.h b/sysdeps/mach/hurd/tls.h
index cce42ef374..8ad3f1a28b 100644
--- a/sysdeps/mach/hurd/tls.h
+++ b/sysdeps/mach/hurd/tls.h
@@ -1,5 +1,5 @@
 /* Definitions for thread-local data handling.  Hurd version.
-   Copyright (C) 2003, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2003, 2005, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -20,7 +20,7 @@
 #ifndef _TLS_H
 #define _TLS_H
 
-#if defined HAVE_TLS_SUPPORT && !defined ASSEMBLER
+#if defined HAVE_TLS_SUPPORT && !defined __ASSEMBLER__
 
 # include <stddef.h>
 # include <stdbool.h>
diff --git a/sysdeps/mach/i386/sysdep.h b/sysdeps/mach/i386/sysdep.h
index 1f138f4759..4fc5d50f3d 100644
--- a/sysdeps/mach/i386/sysdep.h
+++ b/sysdeps/mach/i386/sysdep.h
@@ -1,4 +1,6 @@
-/* Copyright (C) 1991, 92, 93, 94, 95, 96, 97 Free Software Foundation, Inc.
+/* Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 2007
+     Free Software Foundation, Inc.
+
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,6 +18,10 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
+/* Defines RTLD_PRIVATE_ERRNO and USE_DL_SYSINFO.  */
+#include <dl-sysdep.h>
+#include <tls.h>
+
 #define LOSE asm volatile ("hlt")
 
 #define SNARF_ARGS(entry_sp, argc, argv, envp)				      \
diff --git a/sysdeps/posix/posix_fallocate64.c b/sysdeps/posix/posix_fallocate64.c
index 64ca9ae83d..07e2a00bf5 100644
--- a/sysdeps/posix/posix_fallocate64.c
+++ b/sysdeps/posix/posix_fallocate64.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2000, 2003, 2004, 2005 Free Software Foundation, Inc.
+/* Copyright (C) 2000, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -93,6 +93,7 @@ __posix_fallocate64_l64 (int fd, __off64_t offset, __off64_t len)
   return 0;
 }
 
+#undef __posix_fallocate64_l64
 #include <shlib-compat.h>
 #include <bits/wordsize.h>
 
diff --git a/sysdeps/powerpc/powerpc32/970/Implies b/sysdeps/powerpc/powerpc32/970/Implies
new file mode 100644
index 0000000000..4e3a983426
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/970/Implies
@@ -0,0 +1 @@
+powerpc/powerpc32/power4
diff --git a/sysdeps/powerpc/powerpc32/970/fpu/Implies b/sysdeps/powerpc/powerpc32/970/fpu/Implies
new file mode 100644
index 0000000000..7c16e45c29
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/970/fpu/Implies
@@ -0,0 +1 @@
+powerpc/powerpc32/power4/fpu
diff --git a/sysdeps/powerpc/powerpc32/power4/Makefile b/sysdeps/powerpc/powerpc32/power4/Makefile
new file mode 100644
index 0000000000..60aa508ba4
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/Makefile
@@ -0,0 +1,6 @@
+# Makefile fragment for POWER4/5/5+.
+
+ifeq ($(subdir),string)
+CFLAGS-wordcopy.c += --param max-variable-expansions-in-unroller=2 --param max-unroll-times=2 -funroll-loops -fpeel-loops -ftree-loop-linear
+CFLAGS-memmove.c += --param max-variable-expansions-in-unroller=2 --param max-unroll-times=2 -funroll-loops -fpeel-loops -ftree-loop-linear
+endif
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/Makefile b/sysdeps/powerpc/powerpc32/power4/fpu/Makefile
new file mode 100644
index 0000000000..a6fa75ecbc
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/Makefile
@@ -0,0 +1,5 @@
+# Makefile fragment for POWER4/5/5+ with FPU.
+
+ifeq ($(subdir),math)
+CFLAGS-mpa.c += --param max-unroll-times=4 -funroll-loops -fpeel-loops -ftree-loop-linear 
+endif
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c b/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c
new file mode 100644
index 0000000000..4a232e27bf
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c
@@ -0,0 +1,549 @@
+
+/*
+ * IBM Accurate Mathematical Library
+ * written by International Business Machines Corp.
+ * Copyright (C) 2001, 2006 Free Software Foundation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+/************************************************************************/
+/*  MODULE_NAME: mpa.c                                                  */
+/*                                                                      */
+/*  FUNCTIONS:                                                          */
+/*               mcr                                                    */
+/*               acr                                                    */
+/*               cr                                                     */
+/*               cpy                                                    */
+/*               cpymn                                                  */
+/*               norm                                                   */
+/*               denorm                                                 */
+/*               mp_dbl                                                 */
+/*               dbl_mp                                                 */
+/*               add_magnitudes                                         */
+/*               sub_magnitudes                                         */
+/*               add                                                    */
+/*               sub                                                    */
+/*               mul                                                    */
+/*               inv                                                    */
+/*               dvd                                                    */
+/*                                                                      */
+/* Arithmetic functions for multiple precision numbers.                 */
+/* Relative errors are bounded                                          */
+/************************************************************************/
+
+
+#include "endian.h"
+#include "mpa.h"
+#include "mpa2.h"
+#include <sys/param.h>	/* For MIN() */
+/* mcr() compares the sizes of the mantissas of two multiple precision  */
+/* numbers. Mantissas are compared regardless of the signs of the       */
+/* numbers, even if x->d[0] or y->d[0] are zero. Exponents are also     */
+/* disregarded.                                                         */
+static int mcr(const mp_no *x, const mp_no *y, int p) {
+  long i;
+  long p2 = p;
+  for (i=1; i<=p2; i++) {
+    if      (X[i] == Y[i])  continue;
+    else if (X[i] >  Y[i])  return  1;
+    else                    return -1; }
+  return 0;
+}
+
+
+
+/* acr() compares the absolute values of two multiple precision numbers */
+int __acr(const mp_no *x, const mp_no *y, int p) {
+  long i;
+
+  if      (X[0] == ZERO) {
+    if    (Y[0] == ZERO) i= 0;
+    else                 i=-1;
+  }
+  else if (Y[0] == ZERO) i= 1;
+  else {
+    if      (EX >  EY)   i= 1;
+    else if (EX <  EY)   i=-1;
+    else                 i= mcr(x,y,p);
+  }
+
+  return i;
+}
+
+
+/* cr90 compares the values of two multiple precision numbers           */
+int  __cr(const mp_no *x, const mp_no *y, int p) {
+  int i;
+
+  if      (X[0] > Y[0])  i= 1;
+  else if (X[0] < Y[0])  i=-1;
+  else if (X[0] < ZERO ) i= __acr(y,x,p);
+  else                   i= __acr(x,y,p);
+
+  return i;
+}
+
+
+/* Copy a multiple precision number. Set *y=*x. x=y is permissible.      */
+void __cpy(const mp_no *x, mp_no *y, int p) {
+  long i;
+
+  EY = EX;
+  for (i=0; i <= p; i++)    Y[i] = X[i];
+
+  return;
+}
+
+
+/* Copy a multiple precision number x of precision m into a */
+/* multiple precision number y of precision n. In case n>m, */
+/* the digits of y beyond the m'th are set to zero. In case */
+/* n<m, the digits of x beyond the n'th are ignored.        */
+/* x=y is permissible.                                      */
+
+void __cpymn(const mp_no *x, int m, mp_no *y, int n) {
+
+  long i,k;
+  long n2 = n;
+  long m2 = m;
+
+  EY = EX;     k=MIN(m2,n2);
+  for (i=0; i <= k; i++)    Y[i] = X[i];
+  for (   ; i <= n2; i++)    Y[i] = ZERO;
+
+  return;
+}
+
+/* Convert a multiple precision number *x into a double precision */
+/* number *y, normalized case  (|x| >= 2**(-1022))) */
+static void norm(const mp_no *x, double *y, int p)
+{
+  #define R  radixi.d
+  long i;
+#if 0
+  int k;
+#endif
+  double a,c,u,v,z[5];
+  if (p<5) {
+    if      (p==1) c = X[1];
+    else if (p==2) c = X[1] + R* X[2];
+    else if (p==3) c = X[1] + R*(X[2]  +   R* X[3]);
+    else if (p==4) c =(X[1] + R* X[2]) + R*R*(X[3] + R*X[4]);
+  }
+  else {
+    for (a=ONE, z[1]=X[1]; z[1] < TWO23; )
+        {a *= TWO;   z[1] *= TWO; }
+
+    for (i=2; i<5; i++) {
+      z[i] = X[i]*a;
+      u = (z[i] + CUTTER)-CUTTER;
+      if  (u > z[i])  u -= RADIX;
+      z[i] -= u;
+      z[i-1] += u*RADIXI;
+    }
+
+    u = (z[3] + TWO71) - TWO71;
+    if (u > z[3])   u -= TWO19;
+    v = z[3]-u;
+
+    if (v == TWO18) {
+      if (z[4] == ZERO) {
+        for (i=5; i <= p; i++) {
+          if (X[i] == ZERO)   continue;
+          else                {z[3] += ONE;   break; }
+        }
+      }
+      else              z[3] += ONE;
+    }
+
+    c = (z[1] + R *(z[2] + R * z[3]))/a;
+  }
+
+  c *= X[0];
+
+  for (i=1; i<EX; i++)   c *= RADIX;
+  for (i=1; i>EX; i--)   c *= RADIXI;
+
+  *y = c;
+  return;
+#undef R
+}
+
+/* Convert a multiple precision number *x into a double precision */
+/* number *y, denormalized case  (|x| < 2**(-1022))) */
+static void denorm(const mp_no *x, double *y, int p)
+{
+  long i,k;
+  long p2 = p;
+  double c,u,z[5];
+#if 0
+  double a,v;
+#endif
+
+#define R  radixi.d
+  if (EX<-44 || (EX==-44 && X[1]<TWO5))
+     { *y=ZERO; return; }
+
+  if      (p2==1) {
+    if      (EX==-42) {z[1]=X[1]+TWO10;  z[2]=ZERO;  z[3]=ZERO;  k=3;}
+    else if (EX==-43) {z[1]=     TWO10;  z[2]=X[1];  z[3]=ZERO;  k=2;}
+    else              {z[1]=     TWO10;  z[2]=ZERO;  z[3]=X[1];  k=1;}
+  }
+  else if (p2==2) {
+    if      (EX==-42) {z[1]=X[1]+TWO10;  z[2]=X[2];  z[3]=ZERO;  k=3;}
+    else if (EX==-43) {z[1]=     TWO10;  z[2]=X[1];  z[3]=X[2];  k=2;}
+    else              {z[1]=     TWO10;  z[2]=ZERO;  z[3]=X[1];  k=1;}
+  }
+  else {
+    if      (EX==-42) {z[1]=X[1]+TWO10;  z[2]=X[2];  k=3;}
+    else if (EX==-43) {z[1]=     TWO10;  z[2]=X[1];  k=2;}
+    else              {z[1]=     TWO10;  z[2]=ZERO;  k=1;}
+    z[3] = X[k];
+  }
+
+  u = (z[3] + TWO57) - TWO57;
+  if  (u > z[3])   u -= TWO5;
+
+  if (u==z[3]) {
+    for (i=k+1; i <= p2; i++) {
+      if (X[i] == ZERO)   continue;
+      else {z[3] += ONE;   break; }
+    }
+  }
+
+  c = X[0]*((z[1] + R*(z[2] + R*z[3])) - TWO10);
+
+  *y = c*TWOM1032;
+  return;
+
+#undef R
+}
+
+/* Convert a multiple precision number *x into a double precision number *y. */
+/* The result is correctly rounded to the nearest/even. *x is left unchanged */
+
+void __mp_dbl(const mp_no *x, double *y, int p) {
+#if 0
+  int i,k;
+  double a,c,u,v,z[5];
+#endif
+
+  if (X[0] == ZERO)  {*y = ZERO;  return; }
+
+  if      (EX> -42)                 norm(x,y,p);
+  else if (EX==-42 && X[1]>=TWO10)  norm(x,y,p);
+  else                              denorm(x,y,p);
+}
+
+
+/* dbl_mp() converts a double precision number x into a multiple precision  */
+/* number *y. If the precision p is too small the result is truncated. x is */
+/* left unchanged.                                                          */
+
+void __dbl_mp(double x, mp_no *y, int p) {
+
+  long i,n;
+  long p2 = p;
+  double u;
+
+  /* Sign */
+  if      (x == ZERO)  {Y[0] = ZERO;  return; }
+  else if (x >  ZERO)   Y[0] = ONE;
+  else                 {Y[0] = MONE;  x=-x;   }
+
+  /* Exponent */
+  for (EY=ONE; x >= RADIX; EY += ONE)   x *= RADIXI;
+  for (      ; x <  ONE;   EY -= ONE)   x *= RADIX;
+
+  /* Digits */
+  n=MIN(p2,4);
+  for (i=1; i<=n; i++) {
+    u = (x + TWO52) - TWO52;
+    if (u>x)   u -= ONE;
+    Y[i] = u;     x -= u;    x *= RADIX; }
+  for (   ; i<=p2; i++)     Y[i] = ZERO;
+  return;
+}
+
+
+/*  add_magnitudes() adds the magnitudes of *x & *y assuming that           */
+/*  abs(*x) >= abs(*y) > 0.                                                 */
+/* The sign of the sum *z is undefined. x&y may overlap but not x&z or y&z. */
+/* No guard digit is used. The result equals the exact sum, truncated.      */
+/* *x & *y are left unchanged.                                              */
+
+static void add_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
+
+  long i,j,k;
+  long p2 = p;
+
+  EZ = EX;
+
+  i=p2;    j=p2+ EY - EX;    k=p2+1;
+
+  if (j<1)
+     {__cpy(x,z,p);  return; }
+  else   Z[k] = ZERO;
+
+  for (; j>0; i--,j--) {
+    Z[k] += X[i] + Y[j];
+    if (Z[k] >= RADIX) {
+      Z[k]  -= RADIX;
+      Z[--k] = ONE; }
+    else
+      Z[--k] = ZERO;
+  }
+
+  for (; i>0; i--) {
+    Z[k] += X[i];
+    if (Z[k] >= RADIX) {
+      Z[k]  -= RADIX;
+      Z[--k] = ONE; }
+    else
+      Z[--k] = ZERO;
+  }
+
+  if (Z[1] == ZERO) {
+    for (i=1; i<=p2; i++)    Z[i] = Z[i+1]; }
+  else   EZ += ONE;
+}
+
+
+/*  sub_magnitudes() subtracts the magnitudes of *x & *y assuming that      */
+/*  abs(*x) > abs(*y) > 0.                                                  */
+/* The sign of the difference *z is undefined. x&y may overlap but not x&z  */
+/* or y&z. One guard digit is used. The error is less than one ulp.         */
+/* *x & *y are left unchanged.                                              */
+
+static void sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
+
+  long i,j,k;
+  long p2 = p;
+
+  EZ = EX;
+
+  if (EX == EY) {
+    i=j=k=p2;
+    Z[k] = Z[k+1] = ZERO; }
+  else {
+    j= EX - EY;
+    if (j > p2)  {__cpy(x,z,p);  return; }
+    else {
+      i=p2;   j=p2+1-j;   k=p2;
+      if (Y[j] > ZERO) {
+        Z[k+1] = RADIX - Y[j--];
+        Z[k]   = MONE; }
+      else {
+        Z[k+1] = ZERO;
+        Z[k]   = ZERO;   j--;}
+    }
+  }
+
+  for (; j>0; i--,j--) {
+    Z[k] += (X[i] - Y[j]);
+    if (Z[k] < ZERO) {
+      Z[k]  += RADIX;
+      Z[--k] = MONE; }
+    else
+      Z[--k] = ZERO;
+  }
+
+  for (; i>0; i--) {
+    Z[k] += X[i];
+    if (Z[k] < ZERO) {
+      Z[k]  += RADIX;
+      Z[--k] = MONE; }
+    else
+      Z[--k] = ZERO;
+  }
+
+  for (i=1; Z[i] == ZERO; i++) ;
+  EZ = EZ - i + 1;
+  for (k=1; i <= p2+1; )
+    Z[k++] = Z[i++];
+  for (; k <= p2; )
+    Z[k++] = ZERO;
+
+  return;
+}
+
+
+/* Add two multiple precision numbers. Set *z = *x + *y. x&y may overlap  */
+/* but not x&z or y&z. One guard digit is used. The error is less than    */
+/* one ulp. *x & *y are left unchanged.                                   */
+
+void __add(const mp_no *x, const mp_no *y, mp_no *z, int p) {
+
+  int n;
+
+  if      (X[0] == ZERO)     {__cpy(y,z,p);  return; }
+  else if (Y[0] == ZERO)     {__cpy(x,z,p);  return; }
+
+  if (X[0] == Y[0])   {
+    if (__acr(x,y,p) > 0)      {add_magnitudes(x,y,z,p);  Z[0] = X[0]; }
+    else                     {add_magnitudes(y,x,z,p);  Z[0] = Y[0]; }
+  }
+  else                       {
+    if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p);  Z[0] = X[0]; }
+    else if (n == -1)        {sub_magnitudes(y,x,z,p);  Z[0] = Y[0]; }
+    else                      Z[0] = ZERO;
+  }
+  return;
+}
+
+
+/* Subtract two multiple precision numbers. *z is set to *x - *y. x&y may */
+/* overlap but not x&z or y&z. One guard digit is used. The error is      */
+/* less than one ulp. *x & *y are left unchanged.                         */
+
+void __sub(const mp_no *x, const mp_no *y, mp_no *z, int p) {
+
+  int n;
+
+  if      (X[0] == ZERO)     {__cpy(y,z,p);  Z[0] = -Z[0];  return; }
+  else if (Y[0] == ZERO)     {__cpy(x,z,p);                 return; }
+
+  if (X[0] != Y[0])    {
+    if (__acr(x,y,p) > 0)      {add_magnitudes(x,y,z,p);  Z[0] =  X[0]; }
+    else                     {add_magnitudes(y,x,z,p);  Z[0] = -Y[0]; }
+  }
+  else                       {
+    if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p);  Z[0] =  X[0]; }
+    else if (n == -1)        {sub_magnitudes(y,x,z,p);  Z[0] = -Y[0]; }
+    else                      Z[0] = ZERO;
+  }
+  return;
+}
+
+
+/* Multiply two multiple precision numbers. *z is set to *x * *y. x&y      */
+/* may overlap but not x&z or y&z. In case p=1,2,3 the exact result is     */
+/* truncated to p digits. In case p>3 the error is bounded by 1.001 ulp.   */
+/* *x & *y are left unchanged.                                             */
+
+void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) {
+
+  long i, i1, i2, j, k, k2;
+  long p2 = p;
+  double u, zk, zk2;
+
+                      /* Is z=0? */
+  if (X[0]*Y[0]==ZERO)
+     { Z[0]=ZERO;  return; }
+
+                       /* Multiply, add and carry */
+  k2 = (p2<3) ? p2+p2 : p2+3;
+  zk = Z[k2]=ZERO;
+  for (k=k2; k>1; ) {
+    if (k > p2)  {i1=k-p2; i2=p2+1; }
+    else        {i1=1;   i2=k;   }
+#if 1
+    /* rearange this inner loop to allow the fmadd instructions to be
+       independent and execute in parallel on processors that have
+       dual symetrical FP pipelines.  */
+    if (i1 < (i2-1))
+    {
+	/* make sure we have at least 2 iterations */
+	if (((i2 - i1) & 1L) == 1L)
+	{
+                /* Handle the odd iterations case.  */
+		zk2 = x->d[i2-1]*y->d[i1];
+	}
+	else
+		zk2 = zero.d;
+	/* Do two multiply/adds per loop iteration, using independent
+           accumulators; zk and zk2.  */
+	for (i=i1,j=i2-1; i<i2-1; i+=2,j-=2) 
+	{
+		zk += x->d[i]*y->d[j];
+		zk2 += x->d[i+1]*y->d[j-1];
+	}
+	zk += zk2; /* final sum.  */
+    }
+    else
+    {
+        /* Special case when iterations is 1.  */
+	zk += x->d[i1]*y->d[i1];
+    }
+#else
+    /* The orginal code.  */
+    for (i=i1,j=i2-1; i<i2; i++,j--)  zk += X[i]*Y[j];
+#endif
+
+    u = (zk + CUTTER)-CUTTER;
+    if  (u > zk)  u -= RADIX;
+    Z[k]  = zk - u;
+    zk = u*RADIXI;
+    --k;
+  }
+  Z[k] = zk;
+
+                 /* Is there a carry beyond the most significant digit? */
+  if (Z[1] == ZERO) {
+    for (i=1; i<=p2; i++)  Z[i]=Z[i+1];
+    EZ = EX + EY - 1; }
+  else
+    EZ = EX + EY;
+
+  Z[0] = X[0] * Y[0];
+  return;
+}
+
+
+/* Invert a multiple precision number. Set *y = 1 / *x.                     */
+/* Relative error bound = 1.001*r**(1-p) for p=2, 1.063*r**(1-p) for p=3,   */
+/* 2.001*r**(1-p) for p>3.                                                  */
+/* *x=0 is not permissible. *x is left unchanged.                           */
+
+void __inv(const mp_no *x, mp_no *y, int p) {
+  long i;
+#if 0
+  int l;
+#endif
+  double t;
+  mp_no z,w;
+  static const int np1[] = {0,0,0,0,1,2,2,2,2,3,3,3,3,3,3,3,3,3,
+                            4,4,4,4,4,4,4,4,4,4,4,4,4,4,4};
+  const mp_no mptwo = {1,{1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
+                         0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
+                         0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
+                         0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}};
+
+  __cpy(x,&z,p);  z.e=0;  __mp_dbl(&z,&t,p);
+  t=ONE/t;   __dbl_mp(t,y,p);    EY -= EX;
+
+  for (i=0; i<np1[p]; i++) {
+    __cpy(y,&w,p);
+    __mul(x,&w,y,p);
+    __sub(&mptwo,y,&z,p);
+    __mul(&w,&z,y,p);
+  }
+  return;
+}
+
+
+/* Divide one multiple precision number by another.Set *z = *x / *y. *x & *y */
+/* are left unchanged. x&y may overlap but not x&z or y&z.                   */
+/* Relative error bound = 2.001*r**(1-p) for p=2, 2.063*r**(1-p) for p=3     */
+/* and 3.001*r**(1-p) for p>3. *y=0 is not permissible.                      */
+
+void __dvd(const mp_no *x, const mp_no *y, mp_no *z, int p) {
+
+  mp_no w;
+
+  if (X[0] == ZERO)    Z[0] = ZERO;
+  else                {__inv(y,&w,p);   __mul(x,&w,z,p);}
+  return;
+}
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S b/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S
new file mode 100644
index 0000000000..ad3cd2769b
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S
@@ -0,0 +1,47 @@
+/* Round double to long int.  PowerPC32 on PowerPC64 version.
+   Copyright (C) 2004, 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+   02110-1301 USA.  */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+/* long long int[r3, r4] __llrint (double x[fp1])  */
+ENTRY (__llrint)	
+	CALL_MCOUNT
+	stwu	r1,-16(r1)
+	cfi_adjust_cfa_offset (16)
+	fctid	fp13,fp1
+	stfd	fp13,8(r1)
+	nop	/* Insure the following load is in a different dispatch group */
+	nop	/* to avoid pipe stall on POWER4&5.  */
+	nop
+	lwz	r3,8(r1)
+	lwz	r4,12(r1)
+	addi	r1,r1,16	
+	blr
+	END (__llrint)
+
+weak_alias (__llrint, llrint)
+
+#ifdef NO_LONG_DOUBLE
+strong_alias (__llrint, __llrintl)
+weak_alias (__llrint, llrintl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __llrint, llrintl, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S b/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S
new file mode 100644
index 0000000000..21353ffb5b
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S
@@ -0,0 +1,39 @@
+/* Round float to long int.  PowerPC32 on PowerPC64 version.
+   Copyright (C) 2004, 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+   02110-1301 USA.  */
+
+#include <sysdep.h>
+
+/* long long int[r3, r4] __llrintf (float x[fp1])  */
+ENTRY (__llrintf)	
+	CALL_MCOUNT
+	stwu	r1,-16(r1)
+	cfi_adjust_cfa_offset (16)
+	fctid	fp13,fp1
+	stfd	fp13,8(r1)
+	nop	/* Insure the following load is in a different dispatch group */
+	nop	/* to avoid pipe stall on POWER4&5.  */
+	nop
+	lwz	r3,8(r1)
+	lwz	r4,12(r1)
+	addi	r1,r1,16	
+	blr
+	END (__llrintf)
+
+weak_alias (__llrintf, llrintf)
+
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S b/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S
new file mode 100644
index 0000000000..952d2aa6a5
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S
@@ -0,0 +1,97 @@
+/* llround function.  PowerPC32 on PowerPC64 version.
+   Copyright (C) 2004, 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 1 Franklin Street, Fifth Floor, Boston MA
+   02110-1301 USA.  */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+	.section	.rodata.cst8,"aM",@progbits,8
+	.align	2
+.LC0:	/* 0.0 */
+	.long 0x00000000
+.LC1:	/* 0.5 */
+	.long 0x3f000000
+
+	.section	".text"
+	
+/* long [r3] lround (float x [fp1])
+   IEEE 1003.1 lround function.  IEEE specifies "round to the nearest 
+   integer value, rounding halfway cases away from zero, regardless of
+   the current rounding mode."  However PowerPC Architecture defines
+   "round to Nearest" as "Choose the best approximation. In case of a 
+   tie, choose the one that is even (least significant bit o).". 
+   So we can't use the PowerPC "round to Nearest" mode. Instead we set
+   "round toward Zero" mode and round by adding +-0.5 before rounding
+   to the integer value.  */
+
+ENTRY (__llround)
+	stwu    r1,-16(r1)
+	cfi_adjust_cfa_offset (16)
+#ifdef SHARED
+	mflr	r11
+	cfi_register(lr,r11)
+# ifdef HAVE_ASM_PPC_REL16
+	bcl	20,31,1f
+1:	mflr	r9
+	addis	r9,r9,.LC0-1b@ha
+	addi	r9,r9,.LC0-1b@l
+# else
+	bl	_GLOBAL_OFFSET_TABLE_@local-4
+	mflr	r10
+	lwz	r9,.LC0@got(10)
+# endif
+	mtlr	r11
+	cfi_same_value (lr)
+	lfs	fp12,0(r9)
+	lfs	fp10,.LC1-.LC0(r9)
+#else
+	lis	r9,.LC0@ha
+	lis	r10,.LC1@ha
+	lfs	fp12,.LC0@l(r9)
+	lfs	fp10,.LC1@l(r10)
+#endif
+	fcmpu	cr6,fp1,fp12	/* if (x > 0.0)  */
+	ble-	cr6,.L4
+	fadd	fp1,fp1,fp10	/* x+= 0.5;  */
+.L9:
+	fctidz	fp2,fp1		/* Convert To Integer DW round toward 0.  */
+	stfd	fp2,8(r1)
+	nop	/* Ensure the following load is in a different dispatch  */
+	nop	/* group to avoid pipe stall on POWER4&5.  */
+	nop
+	lwz	r4,12(r1)
+	lwz	r3,8(r1)
+	addi	r1,r1,16
+	blr
+.L4:
+	fsub	fp1,fp1,fp10	/* x-= 0.5;  */
+	b	.L9
+	END (__llround)
+
+weak_alias (__llround, llround)
+
+strong_alias (__llround, __llroundf)
+weak_alias (__llround, llroundf)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__llround, llroundl)
+strong_alias (__llround, __llroundl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __llround, llroundl, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/s_llroundf.S b/sysdeps/powerpc/powerpc32/power4/fpu/s_llroundf.S
new file mode 100644
index 0000000000..72d6181541
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/s_llroundf.S
@@ -0,0 +1 @@
+/* __llroundf is in s_llround.S */
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/slowexp.c b/sysdeps/powerpc/powerpc32/power4/fpu/slowexp.c
new file mode 100644
index 0000000000..b22b0dfeab
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/slowexp.c
@@ -0,0 +1,66 @@
+/*
+ * IBM Accurate Mathematical Library
+ * written by International Business Machines Corp.
+ * Copyright (C) 2001, 2007 Free Software Foundation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+/**************************************************************************/
+/*  MODULE_NAME:slowexp.c                                                 */
+/*                                                                        */
+/*  FUNCTION:slowexp                                                      */
+/*                                                                        */
+/*  FILES NEEDED:mpa.h                                                    */
+/*               mpa.c mpexp.c                                            */
+/*                                                                        */
+/*Converting from double precision to Multi-precision and calculating     */
+/* e^x                                                                    */
+/**************************************************************************/
+#include "math_private.h"
+
+#ifdef NO_LONG_DOUBLE
+#include "mpa.h"
+void __mpexp(mp_no *x, mp_no *y, int p);
+#endif
+
+/*Converting from double precision to Multi-precision and calculating  e^x */
+double __slowexp(double x) {
+#ifdef NO_LONG_DOUBLE
+  double w,z,res,eps=3.0e-26;
+  int p;
+  mp_no mpx, mpy, mpz,mpw,mpeps,mpcor;
+
+  p=6;
+  __dbl_mp(x,&mpx,p); /* Convert a double precision number  x               */
+                    /* into a multiple precision number mpx with prec. p. */
+  __mpexp(&mpx, &mpy, p); /* Multi-Precision exponential function */
+  __dbl_mp(eps,&mpeps,p);
+  __mul(&mpeps,&mpy,&mpcor,p);
+  __add(&mpy,&mpcor,&mpw,p);
+  __sub(&mpy,&mpcor,&mpz,p);
+  __mp_dbl(&mpw, &w, p);
+  __mp_dbl(&mpz, &z, p);
+  if (w == z) return w;
+  else  {                   /* if calculating is not exactly   */
+    p = 32;
+    __dbl_mp(x,&mpx,p);
+    __mpexp(&mpx, &mpy, p);
+    __mp_dbl(&mpy, &res, p);
+    return res;
+  }
+#else
+  return (double) __ieee754_expl((long double)x);
+#endif
+}
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/slowpow.c b/sysdeps/powerpc/powerpc32/power4/fpu/slowpow.c
new file mode 100644
index 0000000000..ad147a89a6
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/slowpow.c
@@ -0,0 +1,94 @@
+/*
+ * IBM Accurate Mathematical Library
+ * written by International Business Machines Corp.
+ * Copyright (C) 2001, 2006 Free Software Foundation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+/*************************************************************************/
+/* MODULE_NAME:slowpow.c                                                 */
+/*                                                                       */
+/* FUNCTION:slowpow                                                      */
+/*                                                                       */
+/*FILES NEEDED:mpa.h                                                     */
+/*             mpa.c mpexp.c mplog.c halfulp.c                           */
+/*                                                                       */
+/* Given two IEEE double machine numbers y,x , routine  computes the     */
+/* correctly  rounded (to nearest) value of x^y. Result calculated  by   */
+/* multiplication (in halfulp.c) or if result isn't accurate enough      */
+/* then routine converts x and y into multi-precision doubles and        */
+/* recompute.                                                            */
+/*************************************************************************/
+
+#include "mpa.h"
+#include "math_private.h"
+
+void __mpexp (mp_no * x, mp_no * y, int p);
+void __mplog (mp_no * x, mp_no * y, int p);
+double ulog (double);
+double __halfulp (double x, double y);
+
+double
+__slowpow (double x, double y, double z)
+{
+  double res, res1;
+  long double ldw, ldz, ldpp;
+  static const long double ldeps = 0x4.0p-96;
+
+  res = __halfulp (x, y);	/* halfulp() returns -10 or x^y             */
+  if (res >= 0)
+    return res;			/* if result was really computed by halfulp */
+  /*  else, if result was not really computed by halfulp */
+
+  /* Compute pow as long double, 106 bits */
+  ldz = __ieee754_logl ((long double) x);
+  ldw = (long double) y *ldz;
+  ldpp = __ieee754_expl (ldw);
+  res = (double) (ldpp + ldeps);
+  res1 = (double) (ldpp - ldeps);
+
+  if (res != res1)		/* if result still not accurate enough */
+    {				/* use mpa for higher persision.  */
+      mp_no mpx, mpy, mpz, mpw, mpp, mpr, mpr1;
+      static const mp_no eps = { -3, {1.0, 4.0} };
+      int p;
+
+      p = 10;			/*  p=precision 240 bits  */
+      __dbl_mp (x, &mpx, p);
+      __dbl_mp (y, &mpy, p);
+      __dbl_mp (z, &mpz, p);
+      __mplog (&mpx, &mpz, p);		/* log(x) = z   */
+      __mul (&mpy, &mpz, &mpw, p);	/*  y * z =w    */
+      __mpexp (&mpw, &mpp, p);		/*  e^w =pp     */
+      __add (&mpp, &eps, &mpr, p);	/*  pp+eps =r   */
+      __mp_dbl (&mpr, &res, p);
+      __sub (&mpp, &eps, &mpr1, p);	/*  pp -eps =r1 */
+      __mp_dbl (&mpr1, &res1, p);	/*  converting into double precision */
+      if (res == res1)
+	return res;
+
+      /* if we get here result wasn't calculated exactly, continue for
+         more exact calculation using 768 bits.  */
+      p = 32;
+      __dbl_mp (x, &mpx, p);
+      __dbl_mp (y, &mpy, p);
+      __dbl_mp (z, &mpz, p);
+      __mplog (&mpx, &mpz, p);		/* log(c)=z  */
+      __mul (&mpy, &mpz, &mpw, p);	/* y*z =w    */
+      __mpexp (&mpw, &mpp, p);		/* e^w=pp    */
+      __mp_dbl (&mpp, &res, p);		/* converting into double precision */
+    }
+  return res;
+}
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.c b/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.c
new file mode 100644
index 0000000000..f59c193934
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.c
@@ -0,0 +1,62 @@
+/* Double-precision floating point square root wrapper.
+   Copyright (C) 2004, 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <math_ldbl_opt.h>
+#include "math.h"
+#include "math_private.h"
+#include <fenv_libc.h>
+
+#ifdef __STDC__
+double
+__sqrt (double x)		/* wrapper sqrt */
+#else
+double
+__sqrt (x)			/* wrapper sqrt */
+     double x;
+#endif
+{
+  double z;
+/* Power4 (ISA V2.0) and above implement sqrt in hardware.  */
+   __asm __volatile (
+	"	fsqrt	%0,%1\n"
+		: "=f" (z)
+		: "f" (x));
+#ifdef _IEEE_LIBM
+  return z;
+#else
+  if (__builtin_expect (_LIB_VERSION == _IEEE_, 0))
+    return z;
+    
+  if (__builtin_expect (x != x, 0))
+    return z;
+
+  if  (__builtin_expect (x < 0.0, 0))
+    return __kernel_standard (x, x, 26);	/* sqrt(negative) */
+  else
+    return z;
+#endif
+}
+
+weak_alias (__sqrt, sqrt)
+#ifdef NO_LONG_DOUBLE
+  strong_alias (__sqrt, __sqrtl) weak_alias (__sqrt, sqrtl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0)
+compat_symbol (libm, __sqrt, sqrtl, GLIBC_2_0);
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.c b/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.c
new file mode 100644
index 0000000000..4784869f07
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.c
@@ -0,0 +1,60 @@
+/* Single-precision floating point square root wrapper.
+   Copyright (C) 2004, 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "math.h"
+#include "math_private.h"
+#include <fenv_libc.h>
+
+#include <sysdep.h>
+#include <ldsodefs.h>
+
+#ifdef __STDC__
+float
+__sqrtf (float x)		/* wrapper sqrtf */
+#else
+float
+__sqrtf (x)			/* wrapper sqrtf */
+     float x;
+#endif
+{
+#ifdef _IEEE_LIBM
+  return __ieee754_sqrtf (x);
+#else
+  float z;
+/* Power4 (ISA V2.0) and above implement sqrtf in hardware.  */
+   __asm __volatile (
+	"	fsqrts	%0,%1\n"
+		: "=f" (z)
+		: "f" (x));
+
+  if (__builtin_expect (_LIB_VERSION == _IEEE_, 0))
+    return z;
+    
+  if (__builtin_expect (x != x, 0))
+    return z;
+    
+  if  (__builtin_expect (x < 0.0, 0))
+    /* sqrtf(negative) */
+    return (float) __kernel_standard ((double) x, (double) x, 126);
+  else
+    return z;
+#endif
+}
+
+weak_alias (__sqrtf, sqrtf)
diff --git a/sysdeps/powerpc/powerpc32/power4/memcmp.S b/sysdeps/powerpc/powerpc32/power4/memcmp.S
new file mode 100644
index 0000000000..75b328403a
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/memcmp.S
@@ -0,0 +1,986 @@
+/* Optimized strcmp implementation for PowerPC64.
+   Copyright (C) 2003, 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+   02110-1301 USA.  */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* int [r3] memcmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5])  */
+
+	.machine power4
+EALIGN (BP_SYM(memcmp), 4, 0)
+	CALL_MCOUNT
+
+#define rTMP	r0
+#define rRTN	r3
+#define rSTR1	r3	/* first string arg */
+#define rSTR2	r4	/* second string arg */
+#define rN	r5	/* max string length */
+#define rWORD1	r6	/* current word in s1 */
+#define rWORD2	r7	/* current word in s2 */
+#define rWORD3	r8	/* next word in s1 */
+#define rWORD4	r9	/* next word in s2 */
+#define rWORD5	r10	/* next word in s1 */
+#define rWORD6	r11	/* next word in s2 */
+#define rBITDIF	r12	/* bits that differ in s1 & s2 words */
+#define rWORD7	r30	/* next word in s1 */
+#define rWORD8	r31	/* next word in s2 */
+
+	xor	rTMP, rSTR2, rSTR1
+	cmplwi	cr6, rN, 0
+	cmplwi	cr1, rN, 12
+	clrlwi.	rTMP, rTMP, 30
+	clrlwi	rBITDIF, rSTR1, 30
+	cmplwi	cr5, rBITDIF, 0
+	beq-	cr6, L(zeroLength)
+	dcbt	0,rSTR1
+	dcbt	0,rSTR2
+/* If less than 8 bytes or not aligned, use the unaligned
+   byte loop.  */
+	blt	cr1, L(bytealigned)
+        stwu    1,-64(1)
+	cfi_adjust_cfa_offset(64)
+        stw     r31,48(1)	
+	cfi_offset(31,(48-64))
+        stw     r30,44(1)	
+	cfi_offset(30,(44-64))
+	bne	L(unaligned)
+/* At this point we know both strings have the same alignment and the
+   compare length is at least 8 bytes.  rBITDIF contains the low order
+   2 bits of rSTR1 and cr5 contains the result of the logical compare
+   of rBITDIF to 0.  If rBITDIF == 0 then we are already word 
+   aligned and can perform the word aligned loop.
+  
+   Otherwise we know the two strings have the same alignment (but not
+   yet word aligned).  So we force the string addresses to the next lower
+   word boundary and special case this first word using shift left to
+   eliminate bits preceeding the first byte.  Since we want to join the
+   normal (word aligned) compare loop, starting at the second word,
+   we need to adjust the length (rN) and special case the loop
+   versioning for the first word. This insures that the loop count is
+   correct and the first word (shifted) is in the expected register pair. */
+	.align 4
+L(samealignment):
+	clrrwi	rSTR1, rSTR1, 2
+	clrrwi	rSTR2, rSTR2, 2
+	beq	cr5, L(Waligned)
+	add	rN, rN, rBITDIF
+	slwi	r11, rBITDIF, 3
+	srwi	rTMP, rN, 4	 /* Divide by 16 */
+	andi.	rBITDIF, rN, 12  /* Get the word remainder */
+	lwz	rWORD1, 0(rSTR1)
+	lwz	rWORD2, 0(rSTR2)
+	cmplwi	cr1, rBITDIF, 8
+	cmplwi	cr7, rN, 16
+	clrlwi	rN, rN, 30
+	beq	L(dPs4)
+	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	bgt	cr1, L(dPs3)
+	beq	cr1, L(dPs2)
+
+/* Remainder is 4 */
+	.align 3
+L(dsP1):
+	slw	rWORD5, rWORD1, r11
+	slw	rWORD6, rWORD2, r11
+	cmplw	cr5, rWORD5, rWORD6
+	blt	cr7, L(dP1x)
+/* Do something useful in this cycle since we have to branch anyway.  */
+	lwz	rWORD1, 4(rSTR1)
+	lwz	rWORD2, 4(rSTR2)
+	cmplw	cr0, rWORD1, rWORD2
+	b	L(dP1e)
+/* Remainder is 8 */
+	.align 4
+L(dPs2):
+	slw	rWORD5, rWORD1, r11
+	slw	rWORD6, rWORD2, r11
+	cmplw	cr6, rWORD5, rWORD6
+	blt	cr7, L(dP2x)
+/* Do something useful in this cycle since we have to branch anyway.  */
+	lwz	rWORD7, 4(rSTR1)
+	lwz	rWORD8, 4(rSTR2)
+	cmplw	cr5, rWORD7, rWORD8
+	b	L(dP2e)
+/* Remainder is 12 */
+	.align 4
+L(dPs3):
+	slw	rWORD3, rWORD1, r11
+	slw	rWORD4, rWORD2, r11
+	cmplw	cr1, rWORD3, rWORD4
+	b	L(dP3e)
+/* Count is a multiple of 16, remainder is 0 */
+	.align 4
+L(dPs4):
+	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	slw	rWORD1, rWORD1, r11
+	slw	rWORD2, rWORD2, r11
+	cmplw	cr0, rWORD1, rWORD2
+	b	L(dP4e)
+
+/* At this point we know both strings are word aligned and the
+   compare length is at least 8 bytes.  */
+	.align 4
+L(Waligned):
+	andi.	rBITDIF, rN, 12  /* Get the word remainder */
+	srwi	rTMP, rN, 4	 /* Divide by 16 */
+	cmplwi	cr1, rBITDIF, 8
+	cmplwi	cr7, rN, 16
+	clrlwi	rN, rN, 30
+	beq	L(dP4)
+	bgt	cr1, L(dP3)
+	beq	cr1, L(dP2)
+		
+/* Remainder is 4 */
+	.align 4
+L(dP1):
+	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+/* Normally we'd use rWORD7/rWORD8 here, but since we might exit early
+   (8-15 byte compare), we want to use only volatile registers.  This
+   means we can avoid restoring non-volatile registers since we did not
+   change any on the early exit path.  The key here is the non-early
+   exit path only cares about the condition code (cr5), not about which 
+   register pair was used.  */
+	lwz	rWORD5, 0(rSTR1)
+	lwz	rWORD6, 0(rSTR2)
+	cmplw	cr5, rWORD5, rWORD6
+	blt	cr7, L(dP1x)
+	lwz	rWORD1, 4(rSTR1)
+	lwz	rWORD2, 4(rSTR2)
+	cmplw	cr0, rWORD1, rWORD2
+L(dP1e):
+	lwz	rWORD3, 8(rSTR1)
+	lwz	rWORD4, 8(rSTR2)
+	cmplw	cr1, rWORD3, rWORD4
+	lwz	rWORD5, 12(rSTR1)
+	lwz	rWORD6, 12(rSTR2)
+	cmplw	cr6, rWORD5, rWORD6
+	bne	cr5, L(dLcr5)
+	bne	cr0, L(dLcr0)
+	
+	lwzu	rWORD7, 16(rSTR1)
+	lwzu	rWORD8, 16(rSTR2)
+	bne	cr1, L(dLcr1)
+	cmplw	cr5, rWORD7, rWORD8
+	bdnz	L(dLoop)
+	bne	cr6, L(dLcr6)
+        lwz     r30,44(1)
+        lwz     r31,48(1)
+	.align 3
+L(dP1x):
+	slwi.	r12, rN, 3
+	bne	cr5, L(dLcr5)
+	subfic	rN, r12, 32	/* Shift count is 32 - (rN * 8).  */
+        lwz     1,0(1)
+	bne	L(d00)
+	li	rRTN, 0
+	blr
+		
+/* Remainder is 8 */
+	.align 4
+L(dP2):
+	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	lwz	rWORD5, 0(rSTR1)
+	lwz	rWORD6, 0(rSTR2)
+	cmplw	cr6, rWORD5, rWORD6
+	blt	cr7, L(dP2x)
+	lwz	rWORD7, 4(rSTR1)
+	lwz	rWORD8, 4(rSTR2)
+	cmplw	cr5, rWORD7, rWORD8
+L(dP2e):
+	lwz	rWORD1, 8(rSTR1)
+	lwz	rWORD2, 8(rSTR2)
+	cmplw	cr0, rWORD1, rWORD2
+	lwz	rWORD3, 12(rSTR1)
+	lwz	rWORD4, 12(rSTR2)
+	cmplw	cr1, rWORD3, rWORD4
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+	bne	cr6, L(dLcr6)
+	bne	cr5, L(dLcr5)
+	b	L(dLoop2)
+/* Again we are on a early exit path (16-23 byte compare), we want to
+   only use volatile registers and avoid restoring non-volatile
+   registers.  */
+	.align 4
+L(dP2x):
+	lwz	rWORD3, 4(rSTR1)
+	lwz	rWORD4, 4(rSTR2)
+	cmplw	cr5, rWORD3, rWORD4
+	slwi.	r12, rN, 3
+	bne	cr6, L(dLcr6)
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+	bne	cr5, L(dLcr5)
+	subfic	rN, r12, 32	/* Shift count is 32 - (rN * 8).  */
+        lwz     1,0(1)
+	bne	L(d00)
+	li	rRTN, 0
+	blr
+		
+/* Remainder is 12 */
+	.align 4
+L(dP3):
+	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	lwz	rWORD3, 0(rSTR1)
+	lwz	rWORD4, 0(rSTR2)
+	cmplw	cr1, rWORD3, rWORD4
+L(dP3e):
+	lwz	rWORD5, 4(rSTR1)
+	lwz	rWORD6, 4(rSTR2)
+	cmplw	cr6, rWORD5, rWORD6
+	blt	cr7, L(dP3x)
+	lwz	rWORD7, 8(rSTR1)
+	lwz	rWORD8, 8(rSTR2)
+	cmplw	cr5, rWORD7, rWORD8
+	lwz	rWORD1, 12(rSTR1)
+	lwz	rWORD2, 12(rSTR2)
+	cmplw	cr0, rWORD1, rWORD2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+	bne	cr1, L(dLcr1)
+	bne	cr6, L(dLcr6)
+	b	L(dLoop1)
+/* Again we are on a early exit path (24-31 byte compare), we want to
+   only use volatile registers and avoid restoring non-volatile
+   registers.  */
+	.align 4
+L(dP3x):
+	lwz	rWORD1, 8(rSTR1)
+	lwz	rWORD2, 8(rSTR2)
+	cmplw	cr5, rWORD1, rWORD2
+	slwi.	r12, rN, 3
+	bne	cr1, L(dLcr1)
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+	bne	cr6, L(dLcr6)
+	subfic	rN, r12, 32	/* Shift count is 32 - (rN * 8).  */
+	bne	cr5, L(dLcr5)
+        lwz     1,0(1)
+	bne	L(d00)
+	li	rRTN, 0
+	blr
+	
+/* Count is a multiple of 16, remainder is 0 */
+	.align 4
+L(dP4):
+	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	lwz	rWORD1, 0(rSTR1)
+	lwz	rWORD2, 0(rSTR2)
+	cmplw	cr0, rWORD1, rWORD2
+L(dP4e):
+	lwz	rWORD3, 4(rSTR1)
+	lwz	rWORD4, 4(rSTR2)
+	cmplw	cr1, rWORD3, rWORD4
+	lwz	rWORD5, 8(rSTR1)
+	lwz	rWORD6, 8(rSTR2)
+	cmplw	cr6, rWORD5, rWORD6
+	lwzu	rWORD7, 12(rSTR1)
+	lwzu	rWORD8, 12(rSTR2)
+	cmplw	cr5, rWORD7, rWORD8
+	bne	cr0, L(dLcr0)
+	bne	cr1, L(dLcr1)
+	bdz-	L(d24)		/* Adjust CTR as we start with +4 */
+/* This is the primary loop */
+	.align 4
+L(dLoop):
+	lwz	rWORD1, 4(rSTR1)
+	lwz	rWORD2, 4(rSTR2)
+	cmplw	cr1, rWORD3, rWORD4
+	bne	cr6, L(dLcr6)
+L(dLoop1):
+	lwz	rWORD3, 8(rSTR1)
+	lwz	rWORD4, 8(rSTR2)
+	cmplw	cr6, rWORD5, rWORD6
+	bne	cr5, L(dLcr5)
+L(dLoop2):
+	lwz	rWORD5, 12(rSTR1)
+	lwz	rWORD6, 12(rSTR2)
+	cmplw	cr5, rWORD7, rWORD8
+	bne	cr0, L(dLcr0)
+L(dLoop3):
+	lwzu	rWORD7, 16(rSTR1)
+	lwzu	rWORD8, 16(rSTR2)
+	bne-	cr1, L(dLcr1)
+	cmplw	cr0, rWORD1, rWORD2
+	bdnz+	L(dLoop)	
+	
+L(dL4):
+	cmplw	cr1, rWORD3, rWORD4
+	bne	cr6, L(dLcr6)
+	cmplw	cr6, rWORD5, rWORD6
+	bne	cr5, L(dLcr5)
+	cmplw	cr5, rWORD7, rWORD8
+L(d44):
+	bne	cr0, L(dLcr0)
+L(d34):
+	bne	cr1, L(dLcr1)
+L(d24):
+	bne	cr6, L(dLcr6)
+L(d14):
+	slwi.	r12, rN, 3
+	bne	cr5, L(dLcr5) 
+L(d04):
+        lwz     r30,44(1)
+        lwz     r31,48(1)
+        lwz     1,0(1)
+	subfic	rN, r12, 32	/* Shift count is 32 - (rN * 8).  */
+	beq	L(zeroLength)
+/* At this point we have a remainder of 1 to 3 bytes to compare.  Since
+   we are aligned it is safe to load the whole word, and use
+   shift right to eliminate bits beyond the compare length. */ 
+L(d00):
+	lwz	rWORD1, 4(rSTR1)
+	lwz	rWORD2, 4(rSTR2) 
+	srw	rWORD1, rWORD1, rN
+	srw	rWORD2, rWORD2, rN
+        cmplw   rWORD1,rWORD2
+        li      rRTN,0
+        beqlr
+        li      rRTN,1
+        bgtlr
+        li      rRTN,-1
+        blr
+
+	.align 4
+L(dLcr0):
+        lwz     r30,44(1)
+        lwz     r31,48(1)
+	li	rRTN, 1
+        lwz     1,0(1)
+	bgtlr	cr0
+	li	rRTN, -1
+	blr
+	.align 4
+L(dLcr1):
+        lwz     r30,44(1)
+        lwz     r31,48(1)
+	li	rRTN, 1
+        lwz     1,0(1)
+	bgtlr	cr1
+	li	rRTN, -1
+	blr
+	.align 4
+L(dLcr6):
+        lwz     r30,44(1)
+        lwz     r31,48(1)
+	li	rRTN, 1
+        lwz     1,0(1)
+	bgtlr	cr6
+	li	rRTN, -1
+	blr
+	.align 4
+L(dLcr5):
+        lwz     r30,44(1)
+        lwz     r31,48(1)
+L(dLcr5x):
+	li	rRTN, 1
+        lwz     1,0(1)
+	bgtlr	cr5
+	li	rRTN, -1
+	blr
+	
+	.align 4
+L(bytealigned):
+	cfi_adjust_cfa_offset(-64)
+	mtctr   rN	/* Power4 wants mtctr 1st in dispatch group */
+
+/* We need to prime this loop.  This loop is swing modulo scheduled
+   to avoid pipe delays.  The dependent instruction latencies (load to 
+   compare to conditional branch) is 2 to 3 cycles.  In this loop each
+   dispatch group ends in a branch and takes 1 cycle.  Effectively
+   the first iteration of the loop only serves to load operands and 
+   branches based on compares are delayed until the next loop. 
+
+   So we must precondition some registers and condition codes so that
+   we don't exit the loop early on the first iteration.  */
+   
+	lbz	rWORD1, 0(rSTR1)
+	lbz	rWORD2, 0(rSTR2)
+	bdz-	L(b11)
+	cmplw	cr0, rWORD1, rWORD2
+	lbz	rWORD3, 1(rSTR1)
+	lbz	rWORD4, 1(rSTR2)
+	bdz-	L(b12)
+	cmplw	cr1, rWORD3, rWORD4
+	lbzu	rWORD5, 2(rSTR1)
+	lbzu	rWORD6, 2(rSTR2)
+	bdz-	L(b13)
+	.align 4
+L(bLoop):
+	lbzu	rWORD1, 1(rSTR1)
+	lbzu	rWORD2, 1(rSTR2)
+	bne-	cr0, L(bLcr0)
+
+	cmplw	cr6, rWORD5, rWORD6
+	bdz-	L(b3i)
+	
+	lbzu	rWORD3, 1(rSTR1)
+	lbzu	rWORD4, 1(rSTR2)
+	bne-	cr1, L(bLcr1)
+
+	cmplw	cr0, rWORD1, rWORD2
+	bdz-	L(b2i)
+
+	lbzu	rWORD5, 1(rSTR1)
+	lbzu	rWORD6, 1(rSTR2)
+	bne-	cr6, L(bLcr6)
+
+	cmplw	cr1, rWORD3, rWORD4
+	bdnz+	L(bLoop)
+	
+/* We speculatively loading bytes before we have tested the previous
+   bytes.  But we must avoid overrunning the length (in the ctr) to
+   prevent these speculative loads from causing a segfault.  In this 
+   case the loop will exit early (before the all pending bytes are
+   tested.  In this case we must complete the pending operations
+   before returning.  */
+L(b1i):
+	bne-	cr0, L(bLcr0)
+	bne-	cr1, L(bLcr1)
+	b	L(bx56)
+	.align 4
+L(b2i):
+	bne-	cr6, L(bLcr6)
+	bne-	cr0, L(bLcr0)
+	b	L(bx34)
+	.align 4
+L(b3i):
+	bne-	cr1, L(bLcr1)
+	bne-	cr6, L(bLcr6)
+	b	L(bx12)
+	.align 4
+L(bLcr0):
+	li	rRTN, 1
+	bgtlr	cr0
+	li	rRTN, -1
+	blr
+L(bLcr1):
+	li	rRTN, 1
+	bgtlr	cr1
+	li	rRTN, -1
+	blr
+L(bLcr6):
+	li	rRTN, 1
+	bgtlr	cr6
+	li	rRTN, -1
+	blr
+
+L(b13):
+	bne-	cr0, L(bx12)
+	bne-	cr1, L(bx34)
+L(bx56):
+	sub	rRTN, rWORD5, rWORD6
+	blr
+	nop
+L(b12):
+	bne-	cr0, L(bx12)
+L(bx34):	
+	sub	rRTN, rWORD3, rWORD4
+	blr
+
+L(b11):
+L(bx12):
+	sub	rRTN, rWORD1, rWORD2
+	blr
+
+	.align 4 
+L(zeroLengthReturn):
+
+L(zeroLength):
+	li	rRTN, 0
+	blr
+
+	cfi_adjust_cfa_offset(64)
+	.align 4
+/* At this point we know the strings have different alignment and the
+   compare length is at least 8 bytes.  rBITDIF contains the low order
+   2 bits of rSTR1 and cr5 contains the result of the logical compare
+   of rBITDIF to 0.  If rBITDIF == 0 then rStr1 is word aligned and can 
+   perform the Wunaligned loop.
+  
+   Otherwise we know that rSTR1 is not aready word aligned yet.
+   So we can force the string addresses to the next lower word
+   boundary and special case this first word using shift left to
+   eliminate bits preceeding the first byte.  Since we want to join the
+   normal (Wualigned) compare loop, starting at the second word,
+   we need to adjust the length (rN) and special case the loop
+   versioning for the first W. This insures that the loop count is
+   correct and the first W (shifted) is in the expected resister pair.  */
+#define rSHL		r29	/* Unaligned shift left count.  */
+#define rSHR		r28	/* Unaligned shift right count.  */
+#define rB		r27	/* Left rotation temp for rWORD2.  */
+#define rD		r26	/* Left rotation temp for rWORD4.  */
+#define rF		r25	/* Left rotation temp for rWORD6.  */
+#define rH		r24	/* Left rotation temp for rWORD8.  */
+#define rA		r0	/* Right rotation temp for rWORD2.  */
+#define rC		r12	/* Right rotation temp for rWORD4.  */
+#define rE		r0	/* Right rotation temp for rWORD6.  */
+#define rG		r12	/* Right rotation temp for rWORD8.  */
+L(unaligned):
+	stw     r29,40(r1)	
+	cfi_offset(r29,(40-64))	
+	clrlwi	rSHL, rSTR2, 30
+        stw     r28,36(r1)	
+	cfi_offset(r28,(36-64))
+	beq	cr5, L(Wunaligned)
+        stw     r27,32(r1)	
+	cfi_offset(r27,(32-64))
+/* Adjust the logical start of rSTR2 to compensate for the extra bits
+   in the 1st rSTR1 W.  */
+	sub	r27, rSTR2, rBITDIF
+/* But do not attempt to address the W before that W that contains
+   the actual start of rSTR2.  */
+	clrrwi	rSTR2, rSTR2, 2
+        stw     r26,28(r1)	
+	cfi_offset(r26,(28-64))
+/* Compute the left/right shift counts for the unalign rSTR2,
+   compensating for the logical (W aligned) start of rSTR1.  */ 
+	clrlwi	rSHL, r27, 30
+	clrrwi	rSTR1, rSTR1, 2	
+        stw     r25,24(r1)	
+	cfi_offset(r25,(24-64))
+	slwi	rSHL, rSHL, 3
+	cmplw	cr5, r27, rSTR2
+	add	rN, rN, rBITDIF
+	slwi	r11, rBITDIF, 3
+        stw     r24,20(r1)	
+	cfi_offset(r24,(20-64))
+	subfic	rSHR, rSHL, 32
+	srwi	rTMP, rN, 4      /* Divide by 16 */
+	andi.	rBITDIF, rN, 12  /* Get the W remainder */
+/* We normally need to load 2 Ws to start the unaligned rSTR2, but in
+   this special case those bits may be discarded anyway.  Also we
+   must avoid loading a W where none of the bits are part of rSTR2 as
+   this may cross a page boundary and cause a page fault.  */
+	li	rWORD8, 0
+	blt	cr5, L(dus0)
+	lwz	rWORD8, 0(rSTR2)
+	la	rSTR2, 4(rSTR2)
+	slw	rWORD8, rWORD8, rSHL
+
+L(dus0):
+	lwz	rWORD1, 0(rSTR1)
+	lwz	rWORD2, 0(rSTR2)
+	cmplwi	cr1, rBITDIF, 8
+	cmplwi	cr7, rN, 16
+	srw	rG, rWORD2, rSHR
+	clrlwi	rN, rN, 30
+	beq	L(duPs4)
+	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	or	rWORD8, rG, rWORD8
+	bgt	cr1, L(duPs3)
+	beq	cr1, L(duPs2)
+
+/* Remainder is 4 */
+	.align 4
+L(dusP1):
+	slw	rB, rWORD2, rSHL
+	slw	rWORD7, rWORD1, r11
+	slw	rWORD8, rWORD8, r11
+	bge	cr7, L(duP1e)
+/* At this point we exit early with the first word compare
+   complete and remainder of 0 to 3 bytes.  See L(du14) for details on
+   how we handle the remaining bytes.  */
+	cmplw	cr5, rWORD7, rWORD8
+	slwi.	rN, rN, 3
+	bne	cr5, L(duLcr5)
+	cmplw	cr7, rN, rSHR
+	beq	L(duZeroReturn)
+	li	rA, 0
+	ble	cr7, L(dutrim)
+	lwz	rWORD2, 4(rSTR2)
+	srw	rA, rWORD2, rSHR
+	b	L(dutrim)
+/* Remainder is 8 */
+	.align 4
+L(duPs2):
+	slw	rH, rWORD2, rSHL
+	slw	rWORD5, rWORD1, r11
+	slw	rWORD6, rWORD8, r11
+	b	L(duP2e)
+/* Remainder is 12 */
+	.align 4
+L(duPs3):
+	slw	rF, rWORD2, rSHL
+	slw	rWORD3, rWORD1, r11
+	slw	rWORD4, rWORD8, r11
+	b	L(duP3e)
+/* Count is a multiple of 16, remainder is 0 */
+	.align 4
+L(duPs4):
+	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	or	rWORD8, rG, rWORD8
+	slw	rD, rWORD2, rSHL
+	slw	rWORD1, rWORD1, r11
+	slw	rWORD2, rWORD8, r11
+	b	L(duP4e)
+
+/* At this point we know rSTR1 is word aligned and the
+   compare length is at least 8 bytes.  */
+	.align 4
+L(Wunaligned):
+        stw     r27,32(r1)	
+	cfi_offset(r27,(32-64))
+	clrrwi	rSTR2, rSTR2, 2
+        stw     r26,28(r1)	
+	cfi_offset(r26,(28-64))
+	srwi	rTMP, rN, 4	 /* Divide by 16 */
+        stw     r25,24(r1)	
+	cfi_offset(r25,(24-64))
+	andi.	rBITDIF, rN, 12  /* Get the W remainder */
+        stw     r24,20(r1)	
+	cfi_offset(r24,(24-64))
+	slwi	rSHL, rSHL, 3
+	lwz	rWORD6, 0(rSTR2)
+	lwzu	rWORD8, 4(rSTR2)
+	cmplwi	cr1, rBITDIF, 8
+	cmplwi	cr7, rN, 16
+	clrlwi	rN, rN, 30
+	subfic	rSHR, rSHL, 32
+	slw	rH, rWORD6, rSHL
+	beq	L(duP4)
+	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	bgt	cr1, L(duP3)
+	beq	cr1, L(duP2)
+		
+/* Remainder is 4 */
+	.align 4
+L(duP1):
+	srw	rG, rWORD8, rSHR
+	lwz	rWORD7, 0(rSTR1)
+	slw	rB, rWORD8, rSHL
+	or	rWORD8, rG, rH
+	blt	cr7, L(duP1x)
+L(duP1e):
+	lwz	rWORD1, 4(rSTR1)
+	lwz	rWORD2, 4(rSTR2)
+	cmplw	cr5, rWORD7, rWORD8
+	srw	rA, rWORD2, rSHR
+	slw	rD, rWORD2, rSHL
+	or	rWORD2, rA, rB
+	lwz	rWORD3, 8(rSTR1)
+	lwz	rWORD4, 8(rSTR2)
+	cmplw	cr0, rWORD1, rWORD2
+	srw	rC, rWORD4, rSHR
+	slw	rF, rWORD4, rSHL
+	bne	cr5, L(duLcr5)
+	or	rWORD4, rC, rD
+	lwz	rWORD5, 12(rSTR1)
+	lwz	rWORD6, 12(rSTR2)
+	cmplw	cr1, rWORD3, rWORD4
+	srw	rE, rWORD6, rSHR
+	slw	rH, rWORD6, rSHL
+	bne	cr0, L(duLcr0)
+	or	rWORD6, rE, rF
+	cmplw	cr6, rWORD5, rWORD6
+	b	L(duLoop3)	
+	.align 4
+/* At this point we exit early with the first word compare
+   complete and remainder of 0 to 3 bytes.  See L(du14) for details on
+   how we handle the remaining bytes.  */
+L(duP1x):
+	cmplw	cr5, rWORD7, rWORD8
+	slwi.	rN, rN, 3
+	bne	cr5, L(duLcr5)
+	cmplw	cr7, rN, rSHR
+	beq	L(duZeroReturn)
+	li	rA, 0
+	ble	cr7, L(dutrim)
+	ld	rWORD2, 8(rSTR2)
+	srw	rA, rWORD2, rSHR
+	b	L(dutrim)
+/* Remainder is 8 */
+	.align 4
+L(duP2):
+	srw	rE, rWORD8, rSHR
+	lwz	rWORD5, 0(rSTR1)
+	or	rWORD6, rE, rH
+	slw	rH, rWORD8, rSHL
+L(duP2e):
+	lwz	rWORD7, 4(rSTR1)
+	lwz	rWORD8, 4(rSTR2)
+	cmplw	cr6, rWORD5, rWORD6
+	srw	rG, rWORD8, rSHR
+	slw	rB, rWORD8, rSHL
+	or	rWORD8, rG, rH
+	blt	cr7, L(duP2x)
+	lwz	rWORD1, 8(rSTR1)
+	lwz	rWORD2, 8(rSTR2)
+	cmplw	cr5, rWORD7, rWORD8
+	bne	cr6, L(duLcr6)
+	srw	rA, rWORD2, rSHR
+	slw	rD, rWORD2, rSHL
+	or	rWORD2, rA, rB
+	lwz	rWORD3, 12(rSTR1)
+	lwz	rWORD4, 12(rSTR2)
+	cmplw	cr0, rWORD1, rWORD2
+	bne	cr5, L(duLcr5)
+	srw	rC, rWORD4, rSHR
+	slw	rF, rWORD4, rSHL
+	or	rWORD4, rC, rD
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+	cmplw	cr1, rWORD3, rWORD4
+	b	L(duLoop2)
+	.align 4
+L(duP2x):
+	cmplw	cr5, rWORD7, rWORD8
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+	bne	cr6, L(duLcr6)
+	slwi.	rN, rN, 3
+	bne	cr5, L(duLcr5)
+	cmplw	cr7, rN, rSHR
+	beq	L(duZeroReturn)
+	li	rA, 0
+	ble	cr7, L(dutrim)
+	lwz	rWORD2, 4(rSTR2)
+	srw	rA, rWORD2, rSHR
+	b	L(dutrim)
+		
+/* Remainder is 12 */
+	.align 4
+L(duP3):
+	srw	rC, rWORD8, rSHR
+	lwz	rWORD3, 0(rSTR1)
+	slw	rF, rWORD8, rSHL
+	or	rWORD4, rC, rH
+L(duP3e):
+	lwz	rWORD5, 4(rSTR1)
+	lwz	rWORD6, 4(rSTR2)
+	cmplw	cr1, rWORD3, rWORD4
+	srw	rE, rWORD6, rSHR
+	slw	rH, rWORD6, rSHL
+	or	rWORD6, rE, rF
+	lwz	rWORD7, 8(rSTR1)
+	lwz	rWORD8, 8(rSTR2)
+	cmplw	cr6, rWORD5, rWORD6
+	bne	cr1, L(duLcr1)
+	srw	rG, rWORD8, rSHR
+	slw	rB, rWORD8, rSHL
+	or	rWORD8, rG, rH
+	blt	cr7, L(duP3x)
+	lwz	rWORD1, 12(rSTR1)
+	lwz	rWORD2, 12(rSTR2)
+	cmplw	cr5, rWORD7, rWORD8
+	bne	cr6, L(duLcr6)
+	srw	rA, rWORD2, rSHR
+	slw	rD, rWORD2, rSHL
+	or	rWORD2, rA, rB
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+	cmplw	cr0, rWORD1, rWORD2
+	b	L(duLoop1)
+	.align 4
+L(duP3x):
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+	bne	cr1, L(duLcr1)
+	cmplw	cr5, rWORD7, rWORD8
+	bne	cr6, L(duLcr6)
+	slwi.	rN, rN, 3
+	bne	cr5, L(duLcr5)
+	cmplw	cr7, rN, rSHR
+	beq	L(duZeroReturn)
+	li	rA, 0
+	ble	cr7, L(dutrim)
+	lwz	rWORD2, 4(rSTR2)
+	srw	rA, rWORD2, rSHR
+	b	L(dutrim)
+	
+/* Count is a multiple of 16, remainder is 0 */
+	.align 4
+L(duP4):
+	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	srw	rA, rWORD8, rSHR
+	lwz	rWORD1, 0(rSTR1)
+	slw	rD, rWORD8, rSHL
+	or	rWORD2, rA, rH
+L(duP4e):
+	lwz	rWORD3, 4(rSTR1)
+	lwz	rWORD4, 4(rSTR2)
+	cmplw	cr0, rWORD1, rWORD2
+	srw	rC, rWORD4, rSHR
+	slw	rF, rWORD4, rSHL
+	or	rWORD4, rC, rD
+	lwz	rWORD5, 8(rSTR1)
+	lwz	rWORD6, 8(rSTR2)
+	cmplw	cr1, rWORD3, rWORD4
+	bne	cr0, L(duLcr0)
+	srw	rE, rWORD6, rSHR
+	slw	rH, rWORD6, rSHL
+	or	rWORD6, rE, rF
+	lwzu	rWORD7, 12(rSTR1)
+	lwzu	rWORD8, 12(rSTR2)
+	cmplw	cr6, rWORD5, rWORD6
+	bne	cr1, L(duLcr1)
+	srw	rG, rWORD8, rSHR
+	slw	rB, rWORD8, rSHL
+	or	rWORD8, rG, rH
+	cmplw	cr5, rWORD7, rWORD8
+	bdz-	L(du24)		/* Adjust CTR as we start with +4 */
+/* This is the primary loop */
+	.align 4
+L(duLoop):
+	lwz	rWORD1, 4(rSTR1)
+	lwz	rWORD2, 4(rSTR2)
+	cmplw	cr1, rWORD3, rWORD4
+	bne	cr6, L(duLcr6)
+	srw	rA, rWORD2, rSHR
+	slw	rD, rWORD2, rSHL
+	or	rWORD2, rA, rB
+L(duLoop1):
+	lwz	rWORD3, 8(rSTR1)
+	lwz	rWORD4, 8(rSTR2)
+	cmplw	cr6, rWORD5, rWORD6
+	bne	cr5, L(duLcr5)
+	srw	rC, rWORD4, rSHR
+	slw	rF, rWORD4, rSHL
+	or	rWORD4, rC, rD
+L(duLoop2):
+	lwz	rWORD5, 12(rSTR1)
+	lwz	rWORD6, 12(rSTR2)
+	cmplw	cr5, rWORD7, rWORD8
+	bne	cr0, L(duLcr0)
+	srw	rE, rWORD6, rSHR
+	slw	rH, rWORD6, rSHL
+	or	rWORD6, rE, rF
+L(duLoop3):
+	lwzu	rWORD7, 16(rSTR1)
+	lwzu	rWORD8, 16(rSTR2)
+	cmplw	cr0, rWORD1, rWORD2
+	bne-	cr1, L(duLcr1)
+	srw	rG, rWORD8, rSHR
+	slw	rB, rWORD8, rSHL
+	or	rWORD8, rG, rH
+	bdnz+	L(duLoop)	
+	
+L(duL4):
+	bne	cr1, L(duLcr1)
+	cmplw	cr1, rWORD3, rWORD4
+	bne	cr6, L(duLcr6)
+	cmplw	cr6, rWORD5, rWORD6
+	bne	cr5, L(duLcr5)
+	cmplw	cr5, rWORD7, rWORD8
+L(du44):
+	bne	cr0, L(duLcr0)
+L(du34):
+	bne	cr1, L(duLcr1)
+L(du24):
+	bne	cr6, L(duLcr6)
+L(du14):
+	slwi.	rN, rN, 3
+	bne	cr5, L(duLcr5)
+/* At this point we have a remainder of 1 to 3 bytes to compare.  We use
+   shift right to eliminate bits beyond the compare length. 
+
+   However it may not be safe to load rWORD2 which may be beyond the 
+   string length. So we compare the bit length of the remainder to
+   the right shift count (rSHR). If the bit count is less than or equal
+   we do not need to load rWORD2 (all significant bits are already in
+   rB).  */
+	cmplw	cr7, rN, rSHR
+	beq	L(duZeroReturn)
+	li	rA, 0
+	ble	cr7, L(dutrim)
+	lwz	rWORD2, 4(rSTR2)
+	srw	rA, rWORD2, rSHR
+	.align 4
+L(dutrim):
+	lwz	rWORD1, 4(rSTR1)
+        lwz     r31,48(1)
+	subfic	rN, rN, 32	/* Shift count is 32 - (rN * 8).  */ 
+	or	rWORD2, rA, rB
+        lwz     r30,44(1)
+        lwz     r29,40(r1)
+	srw	rWORD1, rWORD1, rN
+	srw	rWORD2, rWORD2, rN
+        lwz     r28,36(r1)	
+        lwz     r27,32(r1)
+        cmplw   rWORD1,rWORD2
+        li      rRTN,0
+        beq     L(dureturn26)
+        li      rRTN,1
+        bgt     L(dureturn26)
+        li      rRTN,-1
+	b    L(dureturn26)
+	.align 4
+L(duLcr0):
+        lwz     r31,48(1)
+        lwz     r30,44(1)
+	li	rRTN, 1
+	bgt	cr0, L(dureturn29)	
+	lwz     r29,40(r1)
+        lwz     r28,36(r1)	
+	li	rRTN, -1
+	b	L(dureturn27)
+	.align 4
+L(duLcr1):
+        lwz     r31,48(1)
+        lwz     r30,44(1)
+	li	rRTN, 1
+	bgt	cr1, L(dureturn29)	
+        lwz     r29,40(r1)
+        lwz     r28,36(r1)	
+	li	rRTN, -1
+	b	L(dureturn27)
+	.align 4
+L(duLcr6):
+        lwz     r31,48(1)
+        lwz     r30,44(1)
+	li	rRTN, 1
+	bgt	cr6, L(dureturn29)	
+        lwz     r29,40(r1)
+        lwz     r28,36(r1)	
+	li	rRTN, -1
+	b	L(dureturn27)
+	.align 4
+L(duLcr5):
+        lwz     r31,48(1)
+        lwz     r30,44(1)
+	li	rRTN, 1
+	bgt	cr5, L(dureturn29)	
+        lwz     r29,40(r1)
+        lwz     r28,36(r1)	
+	li	rRTN, -1
+	b	L(dureturn27)
+	.align	3
+L(duZeroReturn):
+	li	rRTN,0
+	.align	4
+L(dureturn):
+        lwz     r31,48(1)
+        lwz     r30,44(1)
+L(dureturn29):	
+        lwz     r29,40(r1)
+        lwz     r28,36(r1)	
+L(dureturn27):	
+        lwz     r27,32(r1)
+L(dureturn26):	
+        lwz     r26,28(r1)
+L(dureturn25):	
+        lwz     r25,24(r1)
+        lwz     r24,20(r1)
+        lwz     1,0(1)
+	blr
+END (BP_SYM (memcmp))
+
+libc_hidden_builtin_def (memcmp)
+weak_alias (memcmp, bcmp)
diff --git a/sysdeps/powerpc/powerpc32/power4/memcopy.h b/sysdeps/powerpc/powerpc32/power4/memcopy.h
new file mode 100644
index 0000000000..c05208da55
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/memcopy.h
@@ -0,0 +1,113 @@
+/* memcopy.h -- definitions for memory copy functions.  Generic C version.
+   Copyright (C) 1991, 1992, 1993, 1997, 2004, 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Torbjorn Granlund (tege@sics.se).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* The strategy of the memory functions is:
+
+     1. Copy bytes until the destination pointer is aligned.
+
+     2. Copy words in unrolled loops.  If the source and destination
+     are not aligned in the same way, use word memory operations,
+     but shift and merge two read words before writing.
+
+     3. Copy the few remaining bytes.
+
+   This is fast on processors that have at least 10 registers for
+   allocation by GCC, and that can access memory at reg+const in one
+   instruction.
+
+   I made an "exhaustive" test of this memmove when I wrote it,
+   exhaustive in the sense that I tried all alignment and length
+   combinations, with and without overlap.  */
+
+#include <sysdeps/generic/memcopy.h>
+
+/* The macros defined in this file are:
+
+   BYTE_COPY_FWD(dst_beg_ptr, src_beg_ptr, nbytes_to_copy)
+
+   BYTE_COPY_BWD(dst_end_ptr, src_end_ptr, nbytes_to_copy)
+
+   WORD_COPY_FWD(dst_beg_ptr, src_beg_ptr, nbytes_remaining, nbytes_to_copy)
+
+   WORD_COPY_BWD(dst_end_ptr, src_end_ptr, nbytes_remaining, nbytes_to_copy)
+
+   MERGE(old_word, sh_1, new_word, sh_2)
+     [I fail to understand.  I feel stupid.  --roland]
+*/
+
+
+/* Threshold value for when to enter the unrolled loops.  */
+#undef	OP_T_THRES
+#define OP_T_THRES 16
+
+/* Copy exactly NBYTES bytes from SRC_BP to DST_BP,
+   without any assumptions about alignment of the pointers.  */
+#undef BYTE_COPY_FWD
+#define BYTE_COPY_FWD(dst_bp, src_bp, nbytes)				      \
+  do									      \
+    {									      \
+      size_t __nbytes = (nbytes);					      \
+      if (__nbytes & 1)							      \
+        {								      \
+	  ((byte *) dst_bp)[0] =  ((byte *) src_bp)[0];			      \
+	  src_bp += 1;							      \
+	  dst_bp += 1;							      \
+	  __nbytes -= 1;						      \
+        }								      \
+      while (__nbytes > 0)						      \
+	{								      \
+	  byte __x = ((byte *) src_bp)[0];				      \
+	  byte __y = ((byte *) src_bp)[1];				      \
+	  src_bp += 2;							      \
+	  __nbytes -= 2;						      \
+	  ((byte *) dst_bp)[0] = __x;					      \
+	  ((byte *) dst_bp)[1] = __y;					      \
+	  dst_bp += 2;							      \
+	}								      \
+    } while (0)
+
+/* Copy exactly NBYTES_TO_COPY bytes from SRC_END_PTR to DST_END_PTR,
+   beginning at the bytes right before the pointers and continuing towards
+   smaller addresses.  Don't assume anything about alignment of the
+   pointers.  */
+#undef BYTE_COPY_BWD
+#define BYTE_COPY_BWD(dst_ep, src_ep, nbytes)				      \
+  do									      \
+    {									      \
+      size_t __nbytes = (nbytes);					      \
+      if (__nbytes & 1)							      \
+        {								      \
+	  src_ep -= 1;							      \
+	  dst_ep -= 1;							      \
+	  ((byte *) dst_ep)[0] =  ((byte *) src_ep)[0];			      \
+	  __nbytes -= 1;						      \
+        }								      \
+      while (__nbytes > 0)						      \
+	{								      \
+	  byte __x, __y;						      \
+	  src_ep -= 2;							      \
+	  __y = ((byte *) src_ep)[1];					      \
+	  __x = ((byte *) src_ep)[0];					      \
+	  dst_ep -= 2;							      \
+	  __nbytes -= 2;						      \
+	  ((byte *) dst_ep)[1] = __y;					      \
+	  ((byte *) dst_ep)[0] = __x;					      \
+	}								      \
+    } while (0)
diff --git a/sysdeps/powerpc/powerpc32/power4/memcpy.S b/sysdeps/powerpc/powerpc32/power4/memcpy.S
new file mode 100644
index 0000000000..73020c6da8
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/memcpy.S
@@ -0,0 +1,426 @@
+/* Optimized memcpy implementation for PowerPC32 on PowerPC64.
+   Copyright (C) 2003, 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+   02110-1301 USA.  */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]);
+   Returns 'dst'.
+
+   Memcpy handles short copies (< 32-bytes) using a binary move blocks 
+   (no loops) of lwz/stw.  The tail (remaining 1-3) bytes is handled 
+   with the appropriate combination of byte and halfword load/stores. 
+   There is minimal effort to optimize the alignment of short moves.  
+
+   Longer moves (>= 32-bytes) justify the effort to get at least the
+   destination word (4-byte) aligned.  Further optimization is
+   possible when both source and destination are word aligned.
+   Each case has an optimized unrolled loop.   */
+
+	.machine power4
+EALIGN (BP_SYM (memcpy), 5, 0)
+	CALL_MCOUNT
+
+    stwu  1,-32(1)
+    cfi_adjust_cfa_offset(32)
+    stw   30,20(1)
+    cfi_offset(30,(20-32))
+    mr    30,3
+    cmplwi cr1,5,31     
+    stw   31,24(1)
+    cfi_offset(31,(24-32))
+    neg   0,3
+    andi. 11,3,3	/* check alignment of dst.  */
+    clrlwi 0,0,30	/* Number of bytes until the 1st word of dst.  */
+    clrlwi 10,4,30	/* check alignment of src.  */
+    cmplwi cr6,5,8
+    ble-  cr1,.L2	/* If move < 32 bytes use short move code.  */
+    cmplw cr6,10,11  
+    mr    12,4
+    srwi  9,5,2		/* Number of full words remaining.  */
+    mtcrf 0x01,0
+    mr    31,5
+    beq   .L0
+  
+    subf  31,0,5
+  /* Move 0-3 bytes as needed to get the destination word aligned.  */
+1:  bf    31,2f
+    lbz   6,0(12)
+    addi  12,12,1
+    stb   6,0(3)
+    addi  3,3,1
+2:  bf    30,0f
+    lhz   6,0(12)
+    addi  12,12,2
+    sth   6,0(3)
+    addi  3,3,2
+0:
+    clrlwi 10,12,30	/* check alignment of src again.  */     
+    srwi  9,31,2	/* Number of full words remaining.  */
+    
+  /* Copy words from source to destination, assuming the destination is 
+     aligned on a word boundary.
+
+     At this point we know there are at least 25 bytes left (32-7) to copy.
+     The next step is to determine if the source is also word aligned. 
+     If not branch to the unaligned move code at .L6. which uses
+     a load, shift, store strategy.
+     
+     Otherwise source and destination are word aligned, and we can use
+     the optimized word copy loop.  */
+.L0:
+    clrlwi	11,31,30  /* calculate the number of tail bytes */
+    mtcrf 0x01,9
+    bne-  cr6,.L6   /* If source is not word aligned.  */
+
+  /* Move words where destination and source are word aligned.
+     Use an unrolled loop to copy 4 words (16-bytes) per iteration.
+     If the the copy is not an exact multiple of 16 bytes, 1-3 
+     words are copied as needed to set up the main loop.  After
+     the main loop exits there may be a tail of 1-3 bytes. These bytes are 
+     copied a halfword/byte at a time as needed to preserve alignment.  */
+
+    srwi  8,31,4    /* calculate the 16 byte loop count */
+    cmplwi	cr1,9,4
+    cmplwi	cr6,11,0
+    mr    11,12
+    
+    bf    30,1f
+    lwz   6,0(12)
+    lwz   7,4(12)
+    addi  11,12,8
+    mtctr 8
+    stw   6,0(3)
+    stw   7,4(3)
+    addi  10,3,8
+    bf    31,4f
+    lwz   0,8(12)
+    stw   0,8(3)    
+    blt   cr1,3f
+    addi  11,12,12
+    addi  10,3,12
+    b     4f
+    .align  4
+1:
+    mr    10,3
+    mtctr 8
+    bf    31,4f
+    lwz   6,0(12)
+    addi  11,12,4
+    stw   6,0(3)
+    addi  10,3,4
+    
+    .align  4
+4:
+    lwz   6,0(11)
+    lwz   7,4(11)
+    lwz   8,8(11)
+    lwz   0,12(11)
+    stw   6,0(10)
+    stw   7,4(10)
+    stw   8,8(10)
+    stw   0,12(10)
+    addi  11,11,16
+    addi  10,10,16
+    bdnz  4b
+3:  
+    clrrwi 0,31,2
+    mtcrf 0x01,31
+    beq   cr6,0f
+.L9:
+    add   3,3,0
+    add   12,12,0
+    
+/*  At this point we have a tail of 0-3 bytes and we know that the
+    destination is word aligned.  */
+2:  bf    30,1f
+    lhz   6,0(12)
+    addi  12,12,2
+    sth   6,0(3)
+    addi  3,3,2
+1:  bf    31,0f
+    lbz   6,0(12)
+    stb   6,0(3)
+0:
+  /* Return original dst pointer.  */
+    mr  3,30
+    lwz 30,20(1)
+    lwz 31,24(1)
+    addi 1,1,32
+    blr
+       
+/* Copy up to 31 bytes.  This is divided into two cases 0-8 bytes and 
+   9-31 bytes.  Each case is handled without loops, using binary 
+   (1,2,4,8) tests.  
+   
+   In the short (0-8 byte) case no attempt is made to force alignment
+   of either source or destination.  The hardware will handle the 
+   unaligned load/stores with small delays for crossing 32- 64-byte, and 
+   4096-byte boundaries. Since these short moves are unlikely to be
+   unaligned or cross these boundaries, the overhead to force 
+   alignment is not justified.
+   
+   The longer (9-31 byte) move is more likely to cross 32- or 64-byte
+   boundaries.  Since only loads are sensitive to the 32-/64-byte
+   boundaries it is more important to align the source than the 
+   destination.  If the source is not already word aligned, we first
+   move 1-3 bytes as needed.  While the destination and stores may 
+   still be unaligned, this is only an issue for page (4096 byte
+   boundary) crossing, which should be rare for these short moves.  
+   The hardware handles this case automatically with a small delay.  */ 
+   
+    .align  4
+.L2:
+    mtcrf 0x01,5
+    neg   8,4
+    clrrwi 11,4,2
+    andi. 0,8,3
+    ble   cr6,.LE8	/* Handle moves of 0-8 bytes.  */
+/* At least 9 bytes left.  Get the source word aligned.  */
+    cmplwi	cr1,5,16
+    mr    10,5
+    mr    12,4
+    cmplwi	cr6,0,2
+    beq   .L3	/* If the source is already word aligned skip this.  */
+/* Copy 1-3 bytes to get source address word aligned.  */
+    lwz   6,0(11)
+    subf  10,0,5
+    add   12,4,0
+    blt   cr6,5f
+    srwi  7,6,16
+    bgt	  cr6,3f
+    sth   6,0(3)
+    b     7f
+    .align  4
+3:
+    stb   7,0(3)
+    sth   6,1(3)
+    b     7f
+    .align  4
+5:
+    stb   6,0(3)
+7:
+    cmplwi	cr1,10,16
+    add   3,3,0
+    mtcrf 0x01,10
+    .align  4
+.L3:
+/* At least 6 bytes left and the source is word aligned.  */
+    blt   cr1,8f
+16: /* Move 16 bytes.  */
+    lwz   6,0(12)
+    lwz   7,4(12)
+    stw   6,0(3)
+    lwz   6,8(12)
+    stw   7,4(3)
+    lwz   7,12(12)
+    addi  12,12,16
+    stw   6,8(3)
+    stw   7,12(3)
+    addi  3,3,16
+8:  /* Move 8 bytes.  */
+    bf    28,4f
+    lwz   6,0(12)
+    lwz   7,4(12)
+    addi  12,12,8
+    stw   6,0(3)
+    stw   7,4(3)
+    addi  3,3,8
+4:  /* Move 4 bytes.  */
+    bf    29,2f
+    lwz   6,0(12)
+    addi  12,12,4
+    stw   6,0(3)
+    addi  3,3,4    
+2:  /* Move 2-3 bytes.  */
+    bf    30,1f
+    lhz   6,0(12)
+    sth   6,0(3) 
+    bf    31,0f
+    lbz   7,2(12)
+    stb   7,2(3)
+    mr    3,30
+    lwz   30,20(1)
+    addi  1,1,32
+    blr
+1:  /* Move 1 byte.  */
+    bf    31,0f
+    lbz   6,0(12)
+    stb   6,0(3)
+0:
+  /* Return original dst pointer.  */
+    mr   3,30
+    lwz  30,20(1)
+    addi 1,1,32
+    blr
+
+/* Special case to copy 0-8 bytes.  */
+    .align  4
+.LE8:
+    mr    12,4
+    bne   cr6,4f
+    lwz   6,0(4)
+    lwz   7,4(4)
+    stw   6,0(3)
+    stw   7,4(3)
+  /* Return original dst pointer.  */
+    mr    3,30
+    lwz   30,20(1)
+    addi  1,1,32
+    blr
+    .align  4
+4:  bf    29,2b
+    lwz   6,0(4)
+    stw   6,0(3)
+6:
+    bf    30,5f
+    lhz   7,4(4)
+    sth   7,4(3) 
+    bf    31,0f
+    lbz   8,6(4)
+    stb   8,6(3)
+    mr    3,30
+    lwz   30,20(1)
+    addi  1,1,32
+    blr
+    .align  4
+5:  
+    bf    31,0f
+    lbz   6,4(4)
+    stb   6,4(3)
+    .align  4
+0:
+  /* Return original dst pointer.  */
+    mr   3,30
+    lwz  30,20(1)
+    addi 1,1,32
+    blr
+
+    .align  4
+.L6:
+
+  /* Copy words where the destination is aligned but the source is
+     not.  Use aligned word loads from the source, shifted to realign
+     the data, to allow aligned destination stores.  
+     Use an unrolled loop to copy 4 words (16-bytes) per iteration.
+     A single word is retained for storing at loop exit to avoid walking
+     off the end of a page within the loop.
+     If the copy is not an exact multiple of 16 bytes, 1-3 
+     words are copied as needed to set up the main loop.  After
+     the main loop exits there may be a tail of 1-3 bytes. These bytes are 
+     copied a halfword/byte at a time as needed to preserve alignment.  */
+    
+
+    cmplwi  cr6,11,0  /* are there tail bytes left ? */
+    subf    5,10,12   /* back up src pointer to prev word alignment */
+    slwi    10,10,3   /* calculate number of bits to shift 1st word left */
+    addi    11,9,-1   /* we move one word after the loop */
+    srwi    8,11,2    /* calculate the 16 byte loop count */
+    lwz     6,0(5)    /* load 1st src word into R6 */
+    mr      4,3
+    lwz     7,4(5)    /* load 2nd src word into R7 */
+    mtcrf   0x01,11
+    subfic  9,10,32   /* number of bits to shift 2nd word right */
+    mtctr   8
+    bf      30,1f
+
+    /* there are at least two words to copy, so copy them */
+    slw   0,6,10  /* shift 1st src word to left align it in R0 */
+    srw   8,7,9   /* shift 2nd src word to right align it in R8 */
+    or    0,0,8   /* or them to get word to store */
+    lwz   6,8(5)  /* load the 3rd src word */
+    stw   0,0(4)  /* store the 1st dst word */
+    slw   0,7,10  /* now left align 2nd src word into R0 */
+    srw   8,6,9   /* shift 3rd src word to right align it in R8 */
+    or    0,0,8   /* or them to get word to store */
+    lwz   7,12(5)
+    stw   0,4(4)  /* store the 2nd dst word */
+    addi  4,4,8
+    addi  5,5,16
+    bf    31,4f
+    /* there is a third word to copy, so copy it */
+    slw   0,6,10  /* shift 3rd src word to left align it in R0 */
+    srw   8,7,9   /* shift 4th src word to right align it in R8 */
+    or    0,0,8   /* or them to get word to store */
+    stw   0,0(4)  /* store 3rd dst word */
+    mr    6,7
+    lwz   7,0(5)
+    addi  5,5,4
+    addi  4,4,4
+    b     4f
+    .align 4
+1:
+    slw     0,6,10  /* shift 1st src word to left align it in R0 */
+    srw     8,7,9   /* shift 2nd src word to right align it in R8 */
+    addi  5,5,8
+    or    0,0,8   /* or them to get word to store */
+    bf    31,4f
+    mr    6,7
+    lwz   7,0(5)
+    addi  5,5,4
+    stw   0,0(4)  /* store the 1st dst word */
+    addi  4,4,4
+
+    .align  4
+4:
+    /* copy 16 bytes at a time */
+    slw   0,6,10 
+    srw   8,7,9 
+    or    0,0,8
+    lwz   6,0(5)
+    stw   0,0(4)
+    slw   0,7,10
+    srw   8,6,9
+    or    0,0,8
+    lwz   7,4(5)
+    stw   0,4(4)
+    slw   0,6,10 
+    srw   8,7,9 
+    or    0,0,8
+    lwz   6,8(5)
+    stw   0,8(4)
+    slw   0,7,10
+    srw   8,6,9 
+    or    0,0,8
+    lwz   7,12(5)
+    stw   0,12(4)
+    addi  5,5,16
+    addi  4,4,16
+    bdnz+ 4b
+8:
+    /* calculate and store the final word */
+    slw   0,6,10 
+    srw   8,7,9 
+    or    0,0,8
+    stw   0,0(4)
+3:
+    clrrwi 0,31,2
+    mtcrf 0x01,31
+    bne   cr6,.L9	/* If the tail is 0 bytes we are done!  */
+
+  /* Return original dst pointer.  */
+    mr   3,30
+    lwz  30,20(1)
+    lwz  31,24(1)
+    addi 1,1,32
+    blr
+END (BP_SYM (memcpy))
+
+libc_hidden_builtin_def (memcpy)
diff --git a/sysdeps/powerpc/powerpc32/power4/memset.S b/sysdeps/powerpc/powerpc32/power4/memset.S
new file mode 100644
index 0000000000..5dd1d943cf
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/memset.S
@@ -0,0 +1,229 @@
+/* Optimized memset implementation for PowerPC64.
+   Copyright (C) 1997,99, 2000,02,03, 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+   02110-1301 USA.  */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
+   Returns 's'.
+
+   The memset is done in three sizes: byte (8 bits), word (32 bits),
+   cache line (1024 bits). There is a special case for setting cache lines
+   to 0, to take advantage of the dcbz instruction.  */
+
+	.machine power4
+EALIGN (BP_SYM (memset), 5, 0)
+	CALL_MCOUNT
+
+#define rTMP	r0
+#define rRTN	r3	/* Initial value of 1st argument.  */
+#define rMEMP0	r3	/* Original value of 1st arg.  */
+#define rCHR	r4	/* Char to set in each byte.  */
+#define rLEN	r5	/* Length of region to set.  */
+#define rMEMP	r6	/* Address at which we are storing.  */
+#define rALIGN	r7	/* Number of bytes we are setting now (when aligning). */
+#define rMEMP2	r8
+
+#define rNEG64	r8	/* Constant -64 for clearing with dcbz.  */
+#define rCLS	r8	/* Cache line size (known to be 128).  */
+#define rCLM	r9	/* Cache line size mask to check for cache alignment.  */
+L(_memset):
+/* Take care of case for size <= 4.  */
+	cmplwi	cr1, rLEN, 4
+	andi.	rALIGN, rMEMP0, 3
+	mr	rMEMP, rMEMP0
+	ble-	cr1, L(small)
+
+/* Align to word boundary.  */
+	cmplwi	cr5, rLEN, 31
+	rlwimi	rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword.  */
+	beq+	L(aligned)
+	mtcrf	0x01, rMEMP0
+	subfic	rALIGN, rALIGN, 4
+	add	rMEMP, rMEMP, rALIGN
+	sub	rLEN, rLEN, rALIGN
+	bf+	31, L(g0)
+	stb	rCHR, 0(rMEMP0)
+	bt	30, L(aligned)
+L(g0):
+	sth	rCHR, -2(rMEMP)
+
+/* Handle the case of size < 31.  */
+L(aligned):
+	mtcrf	0x01, rLEN
+	rlwimi	rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word.  */
+	ble	cr5, L(medium)
+/* Align to 32-byte boundary.  */
+	andi.	rALIGN, rMEMP, 0x1C
+	subfic	rALIGN, rALIGN, 0x20
+	beq	L(caligned)
+	mtcrf	0x01, rALIGN
+	add	rMEMP, rMEMP, rALIGN
+	sub	rLEN, rLEN, rALIGN
+	cmplwi	cr1, rALIGN, 0x10
+	mr	rMEMP2, rMEMP
+	bf	28, L(a1)
+        stw     rCHR, -4(rMEMP2)
+	stwu	rCHR, -8(rMEMP2)
+L(a1):	blt	cr1, L(a2)
+        stw     rCHR, -4(rMEMP2)
+	stw	rCHR, -8(rMEMP2)
+	stw	rCHR, -12(rMEMP2)
+	stwu	rCHR, -16(rMEMP2)
+L(a2):  bf      29, L(caligned)
+        stw     rCHR, -4(rMEMP2)
+
+/* Now aligned to a 32 byte boundary.  */
+L(caligned):
+	cmplwi	cr1, rCHR, 0
+	clrrwi.	rALIGN, rLEN, 5
+	mtcrf	0x01, rLEN
+	beq	cr1, L(zloopstart) /* Special case for clearing memory using dcbz.  */
+L(nondcbz):
+	srwi	rTMP, rALIGN, 5
+	mtctr	rTMP
+	beq	L(medium)	/* We may not actually get to do a full line.  */
+	clrlwi.	rLEN, rLEN, 27
+	add	rMEMP, rMEMP, rALIGN
+	li	rNEG64, -0x40
+	bdz	L(cloopdone)
+
+        .align 4
+L(c3): 	dcbtst	rNEG64, rMEMP
+        stw     rCHR, -4(rMEMP)
+	stw	rCHR, -8(rMEMP)
+        stw     rCHR, -12(rMEMP)
+	stw	rCHR, -16(rMEMP)
+        stw     rCHR, -20(rMEMP)
+	stw	rCHR, -24(rMEMP)
+        stw     rCHR, -28(rMEMP)
+	stwu	rCHR, -32(rMEMP)
+	bdnz	L(c3)
+L(cloopdone):
+        stw     rCHR, -4(rMEMP)
+	stw	rCHR, -8(rMEMP)
+        stw     rCHR, -12(rMEMP)
+	stw	rCHR, -16(rMEMP)
+	cmplwi	cr1, rLEN, 16
+        stw     rCHR, -20(rMEMP)
+	stw	rCHR, -24(rMEMP)
+        stw     rCHR, -28(rMEMP)
+	stwu	rCHR, -32(rMEMP)
+	beqlr
+	add	rMEMP, rMEMP, rALIGN
+	b	L(medium_tail2)
+
+	.align 5
+/* Clear lines of memory in 128-byte chunks.  */
+L(zloopstart):
+/* If the remaining length is less the 32 bytes, don't bother getting
+	 the cache line size.  */
+	beq	L(medium)
+	li      rCLS,128  /* cache line size is 128 */
+	dcbt	0,rMEMP
+L(getCacheAligned):
+	cmplwi	cr1,rLEN,32
+	andi.	rTMP,rMEMP,127
+	blt	cr1,L(handletail32)
+	beq	L(cacheAligned)
+	addi	rMEMP,rMEMP,32
+	addi	rLEN,rLEN,-32
+	stw	rCHR,-32(rMEMP)
+        stw     rCHR,-28(rMEMP)
+	stw	rCHR,-24(rMEMP)
+	stw     rCHR,-20(rMEMP)
+	stw	rCHR,-16(rMEMP)
+        stw     rCHR,-12(rMEMP)
+	stw	rCHR,-8(rMEMP)
+        stw     rCHR,-4(rMEMP)
+	b	L(getCacheAligned)
+
+/* Now we are aligned to the cache line and can use dcbz.  */
+        .align 4
+L(cacheAligned):
+	cmplw	cr1,rLEN,rCLS
+	blt	cr1,L(handletail32)
+	dcbz	0,rMEMP
+	subf	rLEN,rCLS,rLEN
+	add	rMEMP,rMEMP,rCLS
+	b	L(cacheAligned)
+
+/* We are here because the cache line size was set and the remainder 
+  (rLEN) is less than the actual cache line size.
+   So set up the preconditions for L(nondcbz) and go there.  */
+L(handletail32):
+	clrrwi.	rALIGN, rLEN, 5
+	b		L(nondcbz)
+
+	.align 5
+L(small):
+/* Memset of 4 bytes or less.  */
+	cmplwi	cr5, rLEN, 1
+	cmplwi	cr1, rLEN, 3
+	bltlr	cr5
+	stb	rCHR, 0(rMEMP)
+	beqlr	cr5
+	stb	rCHR, 1(rMEMP)
+	bltlr	cr1
+	stb	rCHR, 2(rMEMP)
+	beqlr	cr1
+	stb	rCHR, 3(rMEMP)
+	blr
+
+/* Memset of 0-31 bytes.  */
+	.align 5
+L(medium):
+	cmplwi	cr1, rLEN, 16
+L(medium_tail2):
+	add	rMEMP, rMEMP, rLEN
+L(medium_tail):
+	bt-	31, L(medium_31t)
+	bt-	30, L(medium_30t)
+L(medium_30f):
+	bt-	29, L(medium_29t)
+L(medium_29f):
+	bge-	cr1, L(medium_27t)
+	bflr-	28
+        stw     rCHR, -4(rMEMP)
+	stw	rCHR, -8(rMEMP)
+	blr
+
+L(medium_31t):
+	stbu	rCHR, -1(rMEMP)
+	bf-	30, L(medium_30f)
+L(medium_30t):
+	sthu	rCHR, -2(rMEMP)
+	bf-	29, L(medium_29f)
+L(medium_29t):
+	stwu	rCHR, -4(rMEMP)
+	blt-	cr1, L(medium_27f)
+L(medium_27t):
+        stw     rCHR, -4(rMEMP)
+	stw	rCHR, -8(rMEMP)
+        stw     rCHR, -12(rMEMP)
+	stwu	rCHR, -16(rMEMP)
+L(medium_27f):
+	bflr-	28
+L(medium_28t):
+        stw     rCHR, -4(rMEMP)
+	stw	rCHR, -8(rMEMP)
+	blr
+END (BP_SYM (memset))
+libc_hidden_builtin_def (memset)
diff --git a/sysdeps/powerpc/powerpc32/power4/strncmp.S b/sysdeps/powerpc/powerpc32/power4/strncmp.S
new file mode 100644
index 0000000000..fc0835ebe0
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/strncmp.S
@@ -0,0 +1,176 @@
+/* Optimized strcmp implementation for PowerPC32.
+   Copyright (C) 2003, 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+   02110-1301 USA.  */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* See strlen.s for comments on how the end-of-string testing works.  */
+
+/* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5])  */
+
+EALIGN (BP_SYM(strncmp), 4, 0)
+
+#define rTMP	r0
+#define rRTN	r3
+#define rSTR1	r3	/* first string arg */
+#define rSTR2	r4	/* second string arg */
+#define rN	r5	/* max string length */
+/* Note:  The Bounded pointer support in this code is broken.  This code
+   was inherited from PPC32 and and that support was never completed.  
+   Current PPC gcc does not support -fbounds-check or -fbounded-pointers.  */
+#define rWORD1	r6	/* current word in s1 */
+#define rWORD2	r7	/* current word in s2 */
+#define rWORD3  r10
+#define rWORD4  r11
+#define rFEFE	r8	/* constant 0xfefefeff (-0x01010101) */
+#define r7F7F	r9	/* constant 0x7f7f7f7f */
+#define rNEG	r10	/* ~(word in s1 | 0x7f7f7f7f) */
+#define rBITDIF	r11	/* bits that differ in s1 & s2 words */
+
+	dcbt	0,rSTR1
+	or	rTMP, rSTR2, rSTR1
+	lis	r7F7F, 0x7f7f
+	dcbt	0,rSTR2
+	clrlwi.	rTMP, rTMP, 30
+	cmplwi	cr1, rN, 0
+	lis	rFEFE, -0x101
+	bne	L(unaligned)
+/* We are word alligned so set up for two loops.  first a word
+   loop, then fall into the byte loop if any residual.  */
+	srwi.	rTMP, rN, 2
+	clrlwi	rN, rN, 30
+	addi	rFEFE, rFEFE, -0x101
+	addi	r7F7F, r7F7F, 0x7f7f
+	cmplwi	cr1, rN, 0	
+	beq	L(unaligned)
+
+	mtctr	rTMP	/* Power4 wants mtctr 1st in dispatch group.  */
+	lwz	rWORD1, 0(rSTR1)
+	lwz	rWORD2, 0(rSTR2)
+	b	L(g1)
+
+L(g0):	
+	lwzu	rWORD1, 4(rSTR1)
+	bne-	cr1, L(different)
+	lwzu	rWORD2, 4(rSTR2)
+L(g1):	add	rTMP, rFEFE, rWORD1
+	nor	rNEG, r7F7F, rWORD1
+	bdz	L(tail)
+	and.	rTMP, rTMP, rNEG
+	cmpw	cr1, rWORD1, rWORD2
+	beq+	L(g0)
+	
+/* OK. We've hit the end of the string. We need to be careful that
+   we don't compare two strings as different because of gunk beyond
+   the end of the strings...  */
+	
+L(endstring):
+	and	rTMP, r7F7F, rWORD1
+	beq	cr1, L(equal)
+	add	rTMP, rTMP, r7F7F
+	xor.	rBITDIF, rWORD1, rWORD2
+
+	andc	rNEG, rNEG, rTMP
+	blt-	L(highbit)
+	cntlzw	rBITDIF, rBITDIF
+	cntlzw	rNEG, rNEG
+	addi	rNEG, rNEG, 7
+	cmpw	cr1, rNEG, rBITDIF
+	sub	rRTN, rWORD1, rWORD2
+	blt-	cr1, L(equal)
+	srawi	rRTN, rRTN, 31
+	ori	rRTN, rRTN, 1
+	blr
+L(equal):
+	li	rRTN, 0
+	blr
+
+L(different):
+	lwzu	rWORD1, -4(rSTR1)
+	xor.	rBITDIF, rWORD1, rWORD2
+	sub	rRTN, rWORD1, rWORD2
+	blt-	L(highbit)
+	srawi	rRTN, rRTN, 31
+	ori	rRTN, rRTN, 1
+	blr
+L(highbit):
+	srwi	rWORD2, rWORD2, 24
+	srwi	rWORD1, rWORD1, 24
+	sub	rRTN, rWORD1, rWORD2
+	blr
+
+
+/* Oh well.  In this case, we just do a byte-by-byte comparison.  */
+	.align 4
+L(tail):
+	and.	rTMP, rTMP, rNEG
+	cmpw	cr1, rWORD1, rWORD2
+	bne-	L(endstring)
+	addi	rSTR1, rSTR1, 4
+	bne-	cr1, L(different)
+	addi	rSTR2, rSTR2, 4
+	cmplwi	cr1, rN, 0
+L(unaligned):
+	mtctr   rN	/* Power4 wants mtctr 1st in dispatch group */
+	ble	cr1, L(ux)
+L(uz):
+	lbz	rWORD1, 0(rSTR1)
+	lbz	rWORD2, 0(rSTR2)
+	.align 4
+L(u1):
+	cmpwi	cr1, rWORD1, 0
+	bdz	L(u4)
+	cmpw	rWORD1, rWORD2
+	beq-	cr1, L(u4)
+	lbzu    rWORD3, 1(rSTR1)
+	lbzu	rWORD4, 1(rSTR2)
+	bne-	L(u4)
+	cmpwi	cr1, rWORD3, 0
+	bdz	L(u3)
+	cmpw	rWORD3, rWORD4
+	beq-    cr1, L(u3)
+	lbzu	rWORD1, 1(rSTR1)
+	lbzu	rWORD2, 1(rSTR2)
+	bne-    L(u3)
+	cmpwi	cr1, rWORD1, 0
+	bdz	L(u4)
+	cmpw	rWORD1, rWORD2
+	beq-	cr1, L(u4)
+	lbzu	rWORD3, 1(rSTR1)
+	lbzu	rWORD4, 1(rSTR2)
+	bne-	L(u4)
+	cmpwi	cr1, rWORD3, 0
+	bdz	L(u3)
+	cmpw	rWORD3, rWORD4
+	beq-    cr1, L(u3)
+	lbzu	rWORD1, 1(rSTR1)
+	lbzu	rWORD2, 1(rSTR2)
+	beq+    L(u1)
+
+L(u3):  sub     rRTN, rWORD3, rWORD4
+        blr
+L(u4):	sub	rRTN, rWORD1, rWORD2
+	blr
+L(ux):
+	li	rRTN, 0
+	blr
+END (BP_SYM (strncmp))
+libc_hidden_builtin_def (strncmp)
+
diff --git a/sysdeps/powerpc/powerpc32/power4/wordcopy.c b/sysdeps/powerpc/powerpc32/power4/wordcopy.c
new file mode 100644
index 0000000000..f71b41dc42
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/wordcopy.c
@@ -0,0 +1,209 @@
+/* _memcopy.c -- subroutines for memory copy functions.
+   Copyright (C) 1991, 1996 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Torbjorn Granlund (tege@sics.se).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* BE VERY CAREFUL IF YOU CHANGE THIS CODE...!  */
+
+#include <stddef.h>
+#include <memcopy.h>
+
+/* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to
+   block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
+   Both SRCP and DSTP should be aligned for memory operations on `op_t's.  */
+
+void
+_wordcopy_fwd_aligned (dstp, srcp, len)
+     long int dstp;
+     long int srcp;
+     size_t len;
+{
+  op_t a0, a1;
+
+  if (len & 1)
+  {
+    ((op_t *) dstp)[0] = ((op_t *) srcp)[0];
+    
+    if (len == 1)
+      return;
+    srcp += OPSIZ;
+    dstp += OPSIZ;
+    len -= 1;
+  }
+
+  do
+    {
+      a0 = ((op_t *) srcp)[0];
+      a1 = ((op_t *) srcp)[1];
+      ((op_t *) dstp)[0] = a0;
+      ((op_t *) dstp)[1] = a1;
+
+      srcp += 2 * OPSIZ;
+      dstp += 2 * OPSIZ;
+      len -= 2;
+    }
+  while (len != 0);
+}
+
+/* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
+   block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
+   DSTP should be aligned for memory operations on `op_t's, but SRCP must
+   *not* be aligned.  */
+
+void
+_wordcopy_fwd_dest_aligned (dstp, srcp, len)
+     long int dstp;
+     long int srcp;
+     size_t len;
+{
+  op_t a0, a1, a2;
+  int sh_1, sh_2;
+
+  /* Calculate how to shift a word read at the memory operation
+     aligned srcp to make it aligned for copy.  */
+
+  sh_1 = 8 * (srcp % OPSIZ);
+  sh_2 = 8 * OPSIZ - sh_1;
+
+  /* Make SRCP aligned by rounding it down to the beginning of the `op_t'
+     it points in the middle of.  */
+  srcp &= -OPSIZ;
+  a0 = ((op_t *) srcp)[0];
+
+  if (len & 1)
+  {
+    a1 = ((op_t *) srcp)[1];
+    ((op_t *) dstp)[0] = MERGE (a0, sh_1, a1, sh_2);
+    
+    if (len == 1)
+      return;
+    
+    a0 = a1;
+    srcp += OPSIZ;
+    dstp += OPSIZ;
+    len -= 1;
+  }
+
+  do
+    {
+      a1 = ((op_t *) srcp)[1];
+      a2 = ((op_t *) srcp)[2];
+      ((op_t *) dstp)[0] = MERGE (a0, sh_1, a1, sh_2);
+      ((op_t *) dstp)[1] = MERGE (a1, sh_1, a2, sh_2);
+      a0 = a2;
+
+      srcp += 2 * OPSIZ;
+      dstp += 2 * OPSIZ;
+      len -= 2;
+    }
+  while (len != 0);
+}
+
+/* _wordcopy_bwd_aligned -- Copy block finishing right before
+   SRCP to block finishing right before DSTP with LEN `op_t' words
+   (not LEN bytes!).  Both SRCP and DSTP should be aligned for memory
+   operations on `op_t's.  */
+
+void
+_wordcopy_bwd_aligned (dstp, srcp, len)
+     long int dstp;
+     long int srcp;
+     size_t len;
+{
+  op_t a0, a1;
+
+  if (len & 1)
+  {
+    srcp -= OPSIZ;
+    dstp -= OPSIZ;
+    ((op_t *) dstp)[0] = ((op_t *) srcp)[0];
+    
+    if (len == 1)
+      return;
+    len -= 1;
+  }
+
+  do
+    {
+      srcp -= 2 * OPSIZ;
+      dstp -= 2 * OPSIZ;
+
+      a1 = ((op_t *) srcp)[1];
+      a0 = ((op_t *) srcp)[0];
+      ((op_t *) dstp)[1] = a1;
+      ((op_t *) dstp)[0] = a0;
+
+      len -= 2;
+    }
+  while (len != 0);
+}
+
+/* _wordcopy_bwd_dest_aligned -- Copy block finishing right
+   before SRCP to block finishing right before DSTP with LEN `op_t'
+   words (not LEN bytes!).  DSTP should be aligned for memory
+   operations on `op_t', but SRCP must *not* be aligned.  */
+
+void
+_wordcopy_bwd_dest_aligned (dstp, srcp, len)
+     long int dstp;
+     long int srcp;
+     size_t len;
+{
+  op_t a0, a1, a2;
+  int sh_1, sh_2;
+
+  /* Calculate how to shift a word read at the memory operation
+     aligned srcp to make it aligned for copy.  */
+
+  sh_1 = 8 * (srcp % OPSIZ);
+  sh_2 = 8 * OPSIZ - sh_1;
+
+  /* Make srcp aligned by rounding it down to the beginning of the op_t
+     it points in the middle of.  */
+  srcp &= -OPSIZ;
+  a2 = ((op_t *) srcp)[0];
+
+  if (len & 1)
+  {
+    srcp -= OPSIZ;
+    dstp -= OPSIZ;
+    a1 = ((op_t *) srcp)[0];
+    ((op_t *) dstp)[0] = MERGE (a1, sh_1, a2, sh_2);
+
+    if (len == 1)
+      return;
+
+    a2 = a1;
+    len -= 1;
+  }
+
+  do
+    {
+      srcp -= 2 * OPSIZ;
+      dstp -= 2 * OPSIZ;
+
+      a1 = ((op_t *) srcp)[1];
+      a0 = ((op_t *) srcp)[0];
+      ((op_t *) dstp)[1] = MERGE (a1, sh_1, a2, sh_2);
+      ((op_t *) dstp)[0] = MERGE (a0, sh_1, a1, sh_2);
+      a2 = a0;
+
+      len -= 2;
+    }
+  while (len != 0);
+}
diff --git a/sysdeps/powerpc/powerpc32/power5+/Implies b/sysdeps/powerpc/powerpc32/power5+/Implies
new file mode 100644
index 0000000000..4e3a983426
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power5+/Implies
@@ -0,0 +1 @@
+powerpc/powerpc32/power4
diff --git a/sysdeps/powerpc/powerpc32/power5+/fpu/Implies b/sysdeps/powerpc/powerpc32/power5+/fpu/Implies
new file mode 100644
index 0000000000..7c16e45c29
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power5+/fpu/Implies
@@ -0,0 +1 @@
+powerpc/powerpc32/power4/fpu
diff --git a/sysdeps/powerpc/powerpc32/power5+/fpu/s_ceil.S b/sysdeps/powerpc/powerpc32/power5+/fpu/s_ceil.S
new file mode 100644
index 0000000000..99cd6cc969
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power5+/fpu/s_ceil.S
@@ -0,0 +1,37 @@
+/* ceil function.  PowerPC32/power5+ version.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+	.machine	"power5"
+EALIGN (__ceil, 4, 0)
+	frip	fp1, fp1
+	blr
+	END (__ceil)
+
+weak_alias (__ceil, ceil)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__ceil, ceill)
+strong_alias (__ceil, __ceill)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0)
+compat_symbol (libm, __ceil, ceill, GLIBC_2_0)
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power5+/fpu/s_ceilf.S b/sysdeps/powerpc/powerpc32/power5+/fpu/s_ceilf.S
new file mode 100644
index 0000000000..0a844b6f47
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power5+/fpu/s_ceilf.S
@@ -0,0 +1,30 @@
+/* ceilf function.  PowerPC32/power5+ version.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+
+	.machine	"power5"
+EALIGN (__ceilf, 4, 0)
+	frip	fp1, fp1	/* The rounding instructions are double.  */
+	frsp	fp1, fp1	/* But we need to set ooverflow for float.  */
+	blr
+	END (__ceilf)
+
+weak_alias (__ceilf, ceilf)
+
diff --git a/sysdeps/powerpc/powerpc32/power5+/fpu/s_floor.S b/sysdeps/powerpc/powerpc32/power5+/fpu/s_floor.S
new file mode 100644
index 0000000000..3b1d26f9fc
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power5+/fpu/s_floor.S
@@ -0,0 +1,37 @@
+/* floor function.  PowerPC32/power5+ version.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+	.machine	"power5"
+EALIGN (__floor, 4, 0)
+	frim	fp1, fp1
+	blr
+	END (__floor)
+
+weak_alias (__floor, floor)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__floor, floorl)
+strong_alias (__floor, __floorl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0)
+compat_symbol (libm, __floor, floorl, GLIBC_2_0)
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power5+/fpu/s_floorf.S b/sysdeps/powerpc/powerpc32/power5+/fpu/s_floorf.S
new file mode 100644
index 0000000000..640140c334
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power5+/fpu/s_floorf.S
@@ -0,0 +1,30 @@
+/* floorf function.  PowerPC32/power5+ version.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+
+	.machine	"power5"
+EALIGN (__floorf, 4, 0)
+	frim	fp1, fp1	/* The rounding instructions are double.  */
+	frsp	fp1, fp1	/* But we need to set ooverflow for float.  */
+	blr
+	END (__floorf)
+
+weak_alias (__floorf, floorf)
+
diff --git a/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S b/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S
new file mode 100644
index 0000000000..a2171fe09a
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S
@@ -0,0 +1,60 @@
+/* lround function.  POWER5+, PowerPC32 version.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 1 Franklin Street, Fifth Floor, Boston MA
+   02110-1301 USA.  */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+	
+/* long [r3] llround (float x [fp1])
+   IEEE 1003.1 lround function.  IEEE specifies "round to the nearest 
+   integer value, rounding halfway cases away from zero, regardless of
+   the current rounding mode."  However PowerPC Architecture defines
+   "round to Nearest" as "Choose the best approximation. In case of a 
+   tie, choose the one that is even (least significant bit o).". 
+   So we pre-round using the V2.02 Floating Round to Integer Nearest
+   instruction before we use the Floating Convert to Integer Word with
+   round to zero instruction.  */
+
+	.machine	"power5"
+ENTRY (__llround)
+	stwu    r1,-16(r1)
+	cfi_adjust_cfa_offset (16)
+	frin	fp2,fp1
+	fctidz	fp3,fp2		/* Convert To Integer Word lround toward 0.  */
+	stfd	fp3,8(r1)
+	nop	/* Ensure the following load is in a different dispatch  */
+	nop	/* group to avoid pipe stall on POWER4&5.  */
+	nop
+	lwz	r4,12(r1)
+	lwz	r3,8(r1)
+	addi	r1,r1,16
+	blr
+	END (__llround)
+
+weak_alias (__llround, llround)
+
+strong_alias (__llround, __llroundf)
+weak_alias (__llround, llroundf)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__llround, llroundl)
+strong_alias (__llround, __llroundl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __llround, llroundl, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power5+/fpu/s_llroundf.S b/sysdeps/powerpc/powerpc32/power5+/fpu/s_llroundf.S
new file mode 100644
index 0000000000..ffe6b7eb37
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power5+/fpu/s_llroundf.S
@@ -0,0 +1,2 @@
+/* __llroundf is in s_llround.S  */
+/* __llroundf is in s_llround.S  */
diff --git a/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S b/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S
new file mode 100644
index 0000000000..83107f6f97
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S
@@ -0,0 +1,58 @@
+/* lround function.  POWER5+, PowerPC32 version.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 1 Franklin Street, Fifth Floor, Boston MA
+   02110-1301 USA.  */
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+	
+/* long [r3] lround (float x [fp1])
+   IEEE 1003.1 lround function.  IEEE specifies "round to the nearest 
+   integer value, rounding halfway cases away from zero, regardless of
+   the current rounding mode."  However PowerPC Architecture defines
+   "round to Nearest" as "Choose the best approximation. In case of a 
+   tie, choose the one that is even (least significant bit o).". 
+   So we pre-round using the V2.02 Floating Round to Integer Nearest
+   instruction before we use the Floating Convert to Integer Word with
+   round to zero instruction.  */
+
+	.machine	"power5"
+ENTRY (__lround)
+	stwu    r1,-16(r1)
+	cfi_adjust_cfa_offset (16)
+	frin	fp2,fp1
+	fctiwz	fp3,fp2		/* Convert To Integer Word lround toward 0.  */
+	stfd	fp3,8(r1)
+	nop	/* Ensure the following load is in a different dispatch  */
+	nop	/* group to avoid pipe stall on POWER4&5.  */
+	nop
+	lwz	r3,12(r1)
+	addi	r1,r1,16
+	blr
+	END (__lround)
+
+weak_alias (__lround, lround)
+
+strong_alias (__lround, __lroundf)
+weak_alias (__lround, lroundf)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__lround, lroundl)
+strong_alias (__lround, __lroundl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __lround, lroundl, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power5+/fpu/s_round.S b/sysdeps/powerpc/powerpc32/power5+/fpu/s_round.S
new file mode 100644
index 0000000000..379c579def
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power5+/fpu/s_round.S
@@ -0,0 +1,37 @@
+/* round function.  PowerPC32/power5+ version.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+	.machine	"power5"
+EALIGN (__round, 4, 0)
+	frin	fp1, fp1
+	blr
+	END (__round)
+
+weak_alias (__round, round)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__round, roundl)
+strong_alias (__round, __roundl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __round, roundl, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power5+/fpu/s_roundf.S b/sysdeps/powerpc/powerpc32/power5+/fpu/s_roundf.S
new file mode 100644
index 0000000000..4193c69152
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power5+/fpu/s_roundf.S
@@ -0,0 +1,30 @@
+/* roundf function.  PowerPC32/power5+ version.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+
+	.machine	"power5"
+EALIGN (__roundf, 4, 0)
+	frin	fp1, fp1	/* The rounding instructions are double.  */
+	frsp	fp1, fp1	/* But we need to set ooverflow for float.  */
+	blr
+	END (__roundf)
+
+weak_alias (__roundf, roundf)
+
diff --git a/sysdeps/powerpc/powerpc32/power5+/fpu/s_trunc.S b/sysdeps/powerpc/powerpc32/power5+/fpu/s_trunc.S
new file mode 100644
index 0000000000..92fa3ca34d
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power5+/fpu/s_trunc.S
@@ -0,0 +1,37 @@
+/* trunc function.  PowerPC32/power5+ version.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+	.machine	"power5"
+EALIGN (__trunc, 4, 0)
+	friz	fp1, fp1
+	blr
+	END (__trunc)
+
+weak_alias (__trunc, trunc)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__trunc, truncl)
+strong_alias (__trunc, __truncl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __trunc, truncl, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power5+/fpu/s_truncf.S b/sysdeps/powerpc/powerpc32/power5+/fpu/s_truncf.S
new file mode 100644
index 0000000000..c575e2bbc2
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power5+/fpu/s_truncf.S
@@ -0,0 +1,30 @@
+/* truncf function.  PowerPC32/power5+ version.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+
+	.machine	"power5"
+EALIGN (__truncf, 4, 0)
+	friz	fp1, fp1	/* The rounding instructions are double.  */
+	frsp	fp1, fp1	/* But we need to set ooverflow for float.  */
+	blr
+	END (__truncf)
+
+weak_alias (__truncf, truncf)
+
diff --git a/sysdeps/powerpc/powerpc32/power5/Implies b/sysdeps/powerpc/powerpc32/power5/Implies
new file mode 100644
index 0000000000..4e3a983426
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power5/Implies
@@ -0,0 +1 @@
+powerpc/powerpc32/power4
diff --git a/sysdeps/powerpc/powerpc32/power5/fpu/Implies b/sysdeps/powerpc/powerpc32/power5/fpu/Implies
new file mode 100644
index 0000000000..7c16e45c29
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power5/fpu/Implies
@@ -0,0 +1 @@
+powerpc/powerpc32/power4/fpu
diff --git a/sysdeps/powerpc/powerpc32/power6/Implies b/sysdeps/powerpc/powerpc32/power6/Implies
new file mode 100644
index 0000000000..c9a2d47f8a
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power6/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc32/power5+
+powerpc/powerpc32/power4
diff --git a/sysdeps/powerpc/powerpc32/power6/fpu/Implies b/sysdeps/powerpc/powerpc32/power6/fpu/Implies
new file mode 100644
index 0000000000..57b92fb13e
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power6/fpu/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc32/power5+/fpu
+powerpc/powerpc32/power4/fpu
diff --git a/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S b/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S
new file mode 100644
index 0000000000..528607602d
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S
@@ -0,0 +1,47 @@
+/* Round double to long int.  PowerPC32 on PowerPC64 version.
+   Copyright (C) 2004, 2006, 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+   02110-1301 USA.  */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+/* long long int[r3, r4] __llrint (double x[fp1])  */
+ENTRY (__llrint)	
+	CALL_MCOUNT
+	stwu	r1,-16(r1)
+	cfi_adjust_cfa_offset (16)
+	fctid	fp13,fp1
+	stfd	fp13,8(r1)
+/* Insure the following load is in a different dispatch group by
+   inserting "group ending nop".  */
+	ori	r1,r1,0
+	lwz	r3,8(r1)
+	lwz	r4,12(r1)
+	addi	r1,r1,16	
+	blr
+	END (__llrint)
+
+weak_alias (__llrint, llrint)
+
+#ifdef NO_LONG_DOUBLE
+strong_alias (__llrint, __llrintl)
+weak_alias (__llrint, llrintl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __llrint, llrintl, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S b/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S
new file mode 100644
index 0000000000..2c14800da2
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S
@@ -0,0 +1,39 @@
+/* Round float to long int.  PowerPC32 on PowerPC64 version.
+   Copyright (C) 2004, 2006, 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+   02110-1301 USA.  */
+
+#include <sysdep.h>
+
+/* long long int[r3, r4] __llrintf (float x[fp1])  */
+ENTRY (__llrintf)	
+	CALL_MCOUNT
+	stwu	r1,-16(r1)
+	cfi_adjust_cfa_offset (16)
+	fctid	fp13,fp1
+	stfd	fp13,8(r1)
+/* Insure the following load is in a different dispatch group by
+   inserting "group ending nop".  */
+	ori	r1,r1,0
+	lwz	r3,8(r1)
+	lwz	r4,12(r1)
+	addi	r1,r1,16	
+	blr
+	END (__llrintf)
+
+weak_alias (__llrintf, llrintf)
+
diff --git a/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S b/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S
new file mode 100644
index 0000000000..2b1fa1c846
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S
@@ -0,0 +1,60 @@
+/* lround function.  POWER5+, PowerPC32 version.
+   Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 1 Franklin Street, Fifth Floor, Boston MA
+   02110-1301 USA.  */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+	
+/* long [r3] llround (float x [fp1])
+   IEEE 1003.1 lround function.  IEEE specifies "round to the nearest 
+   integer value, rounding halfway cases away from zero, regardless of
+   the current rounding mode."  However PowerPC Architecture defines
+   "round to Nearest" as "Choose the best approximation. In case of a 
+   tie, choose the one that is even (least significant bit o).". 
+   So we pre-round using the V2.02 Floating Round to Integer Nearest
+   instruction before we use the Floating Convert to Integer Word with
+   round to zero instruction.  */
+
+	.machine	"power5"
+ENTRY (__llround)
+	stwu    r1,-16(r1)
+	cfi_adjust_cfa_offset (16)
+	frin	fp2,fp1
+	fctidz	fp3,fp2		/* Convert To Integer Word lround toward 0.  */
+	stfd	fp3,8(r1)
+/* Insure the following load is in a different dispatch group by
+   inserting "group ending nop".  */
+	ori	r1,r1,0
+	lwz	r4,12(r1)
+	lwz	r3,8(r1)
+	addi	r1,r1,16
+	blr
+	END (__llround)
+
+weak_alias (__llround, llround)
+
+strong_alias (__llround, __llroundf)
+weak_alias (__llround, llroundf)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__llround, llroundl)
+strong_alias (__llround, __llroundl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __llround, llroundl, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power6/fpu/s_llroundf.S b/sysdeps/powerpc/powerpc32/power6/fpu/s_llroundf.S
new file mode 100644
index 0000000000..ffe6b7eb37
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power6/fpu/s_llroundf.S
@@ -0,0 +1,2 @@
+/* __llroundf is in s_llround.S  */
+/* __llroundf is in s_llround.S  */
diff --git a/sysdeps/powerpc/powerpc32/power6/memcpy.S b/sysdeps/powerpc/powerpc32/power6/memcpy.S
new file mode 100644
index 0000000000..ba45fd250c
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power6/memcpy.S
@@ -0,0 +1,843 @@
+/* Optimized memcpy implementation for PowerPC32 on POWER6.
+   Copyright (C) 2003, 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+   02110-1301 USA.  */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]);
+   Returns 'dst'.
+
+   Memcpy handles short copies (< 32-bytes) using a binary move blocks 
+   (no loops) of lwz/stw.  The tail (remaining 1-3) bytes is handled 
+   with the appropriate combination of byte and halfword load/stores. 
+   There is minimal effort to optimize the alignment of short moves.  
+
+   Longer moves (>= 32-bytes) justify the effort to get at least the
+   destination word (4-byte) aligned.  Further optimization is
+   possible when both source and destination are word aligned.
+   Each case has an optimized unrolled loop.   */
+
+	.machine power6
+EALIGN (BP_SYM (memcpy), 5, 0)
+	CALL_MCOUNT
+
+    stwu   1,-32(1)
+    cfi_adjust_cfa_offset(32)
+    cmplwi cr1,5,31     /* check for short move.  */
+    neg    0,3
+    cmplwi cr1,5,31
+    clrlwi 10,4,30	/* check alignment of src.  */
+    andi.  11,3,3	/* check alignment of dst.  */
+    clrlwi 0,0,30	/* Number of bytes until the 1st word of dst.  */
+    ble-   cr1,L(word_unaligned_short)	/* If move < 32 bytes.  */
+    cmplw  cr6,10,11
+    stw    31,24(1)
+    cfi_offset(31,(24-32))
+    stw    30,20(1)
+    cfi_offset(30,(20-32))
+    mr     30,3
+    beq    .L0
+    mtcrf  0x01,0
+    subf  31,0,5        /* Length after alignment.  */
+    add   12,4,0        /* Compute src addr after alignment.  */
+  /* Move 0-3 bytes as needed to get the destination word aligned.  */
+1:  bf    31,2f
+    lbz   6,0(4)
+    bf    30,3f
+    lhz   7,1(4)
+    stb   6,0(3)
+    sth   7,1(3)
+    addi  3,3,3
+    b     0f
+3:
+    stb   6,0(3)
+    addi  3,3,1
+    b     0f
+2:  bf    30,0f
+    lhz   6,0(4)
+    sth   6,0(3)
+    addi  3,3,2
+0:
+    clrlwi 10,12,30	/* check alignment of src again.  */
+    srwi   9,31,2	/* Number of full words remaining.  */
+    bne-   cr6,L(wdu)   /* If source is not word aligned. .L6 */
+    clrlwi 11,31,30  /* calculate the number of tail bytes */
+    b      L(word_aligned)
+  /* Copy words from source to destination, assuming the destination is 
+     aligned on a word boundary.
+
+     At this point we know there are at least 29 bytes left (32-3) to copy.
+     The next step is to determine if the source is also word aligned. 
+     If not branch to the unaligned move code at .L6. which uses
+     a load, shift, store strategy.
+
+     Otherwise source and destination are word aligned, and we can use
+     the optimized word copy loop.  */
+    .align  4
+.L0:
+    mr     31,5
+    mr     12,4
+    bne-   cr6,L(wdu)   /* If source is not word aligned. .L6 */
+    srwi   9,5,2	/* Number of full words remaining.  */
+    clrlwi 11,5,30      /* calculate the number of tail bytes */
+
+  /* Move words where destination and source are word aligned.
+     Use an unrolled loop to copy 4 words (16-bytes) per iteration.
+     If the the copy is not an exact multiple of 16 bytes, 1-3 
+     words are copied as needed to set up the main loop.  After
+     the main loop exits there may be a tail of 1-3 bytes. These bytes are 
+     copied a halfword/byte at a time as needed to preserve alignment.  */
+L(word_aligned):
+    mtcrf 0x01,9
+    srwi  8,31,4    /* calculate the 16 byte loop count */
+    cmplwi	cr1,9,4
+    cmplwi	cr6,11,0
+    mr    11,12
+
+    bf    30,1f
+    lwz   6,0(12)
+    lwz   7,4(12)
+    addi  11,12,8
+    mtctr 8
+    stw   6,0(3)
+    stw   7,4(3)
+    addi  10,3,8
+    bf    31,4f
+    lwz   0,8(12)
+    stw   0,8(3)    
+    blt   cr1,3f
+    addi  11,12,12
+    addi  10,3,12
+    b     4f
+    .align  4
+1:
+    mr    10,3
+    mtctr 8
+    bf    31,4f
+    lwz   6,0(12)
+    addi  11,12,4
+    stw   6,0(3)
+    addi  10,3,4
+    
+    .align  4
+4:
+    lwz   6,0(11)
+    lwz   7,4(11)
+    lwz   8,8(11)
+    lwz   0,12(11)
+    stw   6,0(10)
+    stw   7,4(10)
+    stw   8,8(10)
+    stw   0,12(10)
+    addi  11,11,16
+    addi  10,10,16
+    bdnz  4b
+3:  
+    clrrwi 0,31,2
+    mtcrf 0x01,31
+    beq   cr6,0f
+.L9:
+    add   3,3,0
+    add   12,12,0
+    
+/*  At this point we have a tail of 0-3 bytes and we know that the
+    destination is word aligned.  */
+2:  bf    30,1f
+    lhz   6,0(12)
+    addi  12,12,2
+    sth   6,0(3)
+    addi  3,3,2
+1:  bf    31,0f
+    lbz   6,0(12)
+    stb   6,0(3)
+0:
+  /* Return original dst pointer.  */
+    mr  3,30
+    lwz 30,20(1)
+    lwz 31,24(1)
+    addi 1,1,32
+    blr
+
+/* Copy up to 31 bytes.  This divided into two cases 0-8 bytes and 9-31 
+   bytes.  Each case is handled without loops, using binary (1,2,4,8)
+   tests.
+
+   In the short (0-8 byte) case no attempt is made to force alignment
+   of either source or destination.  The hardware will handle the
+   unaligned load/stores with small delays for crossing 32- 128-byte,
+   and 4096-byte boundaries. Since these short moves are unlikely to be
+   unaligned or cross these boundaries, the overhead to force
+   alignment is not justified.
+
+   The longer (9-31 byte) move is more likely to cross 32- or 128-byte
+   boundaries.  Since only loads are sensitive to the 32-/128-byte
+   boundaries it is more important to align the source then the
+   destination.  If the source is not already word aligned, we first
+   move 1-3 bytes as needed.  Since we are only word aligned we don't
+   use double word load/stores to insure that all loads are aligned.
+   While the destination and stores may still be unaligned, this
+   is only an issue for page (4096 byte boundary) crossing, which
+   should be rare for these short moves.  The hardware handles this
+   case automatically with a small (~20 cycle) delay.  */
+    .align  4
+
+    cfi_same_value (31)
+    cfi_same_value (30)
+L(word_unaligned_short):
+    mtcrf 0x01,5
+    cmplwi cr6,5,8
+    neg   8,4
+    clrrwi	9,4,2
+    andi. 0,8,3
+    beq   cr6,L(wus_8)	/* Handle moves of 8 bytes.  */
+/* At least 9 bytes left.  Get the source word aligned.  */
+    cmpldi	cr1,5,16
+    mr    12,4
+    ble   cr6,L(wus_4)  /* Handle moves of 0-8 bytes.  */
+    mr    11,3
+    mr    10,5
+    cmplwi	cr6,0,2
+    beq   L(wus_tail)	/* If the source is already word aligned skip this.  */
+/* Copy 1-3 bytes to get source address word aligned.  */
+    lwz   6,0(9)
+    subf  10,0,5
+    add   12,4,0
+    blt   cr6,5f
+    srdi  7,6,16
+    bgt	  cr6,3f
+    sth   6,0(3)
+    b     7f
+    .align  4
+3:
+    stb   7,0(3)
+    sth   6,1(3)
+    b     7f
+    .align  4
+5:
+    stb   6,0(3)
+7:
+    cmplwi	cr1,10,16
+    add   11,3,0
+    mtcrf 0x01,10
+    .align  4
+L(wus_tail):
+/* At least 6 bytes left and the source is word aligned.  This allows
+   some speculative loads up front.  */
+/* We need to special case the fall-through because the biggest delays
+   are due to address computation not being ready in time for the 
+   AGEN.  */
+    lwz   6,0(12)
+    lwz   7,4(12)
+    blt   cr1,L(wus_tail8)
+    cmplwi	cr0,10,24
+L(wus_tail16): /* Move 16 bytes.  */
+    stw   6,0(11)
+    stw   7,4(11)
+    lwz   6,8(12)
+    lwz   7,12(12)
+    stw   6,8(11)
+    stw   7,12(11)
+/* Move 8 bytes more.  */
+    bf    28,L(wus_tail16p8)
+    cmplwi	cr1,10,28
+    lwz   6,16(12)
+    lwz   7,20(12)
+    stw   6,16(11)
+    stw   7,20(11)
+/* Move 4 bytes more.  */
+    bf    29,L(wus_tail16p4)
+    lwz   6,24(12)
+    stw   6,24(11)
+    addi  12,12,28
+    addi  11,11,28
+    bgt   cr1,L(wus_tail2)
+ /* exactly 28 bytes.  Return original dst pointer and exit.  */
+    addi  1,1,32
+    blr
+    .align  4
+L(wus_tail16p8):  /* less then 8 bytes left.  */
+    beq   cr1,L(wus_tailX) /* exactly 16 bytes, early exit.  */
+    cmplwi	cr1,10,20
+    bf    29,L(wus_tail16p2)
+/* Move 4 bytes more.  */
+    lwz   6,16(12)
+    stw   6,16(11)
+    addi  12,12,20
+    addi  11,11,20
+    bgt   cr1,L(wus_tail2)
+ /* exactly 20 bytes.  Return original dst pointer and exit.  */
+    addi  1,1,32
+    blr
+    .align  4
+L(wus_tail16p4):  /* less then 4 bytes left.  */
+    addi  12,12,24
+    addi  11,11,24
+    bgt   cr0,L(wus_tail2)
+ /* exactly 24 bytes.  Return original dst pointer and exit.  */
+    addi  1,1,32
+    blr
+    .align  4
+L(wus_tail16p2):  /* 16 bytes moved, less then 4 bytes left.  */
+    addi  12,12,16
+    addi  11,11,16
+    b     L(wus_tail2)
+
+    .align  4
+L(wus_tail8):  /* Move 8 bytes.  */
+/*  r6, r7 already loaded speculatively.  */
+    cmplwi	cr1,10,8
+    cmplwi	cr0,10,12
+    bf    28,L(wus_tail4)
+    stw   6,0(11)
+    stw   7,4(11)
+/* Move 4 bytes more.  */
+    bf    29,L(wus_tail8p4)
+    lwz   6,8(12)
+    stw   6,8(11)
+    addi  12,12,12
+    addi  11,11,12
+    bgt   cr0,L(wus_tail2)
+ /* exactly 12 bytes.  Return original dst pointer and exit.  */
+    addi  1,1,32
+    blr
+    .align  4
+L(wus_tail8p4):  /* less then 4 bytes left.  */
+    addi  12,12,8
+    addi  11,11,8
+    bgt   cr1,L(wus_tail2)
+ /* exactly 8 bytes.  Return original dst pointer and exit.  */
+    addi  1,1,32
+    blr
+
+    .align  4
+L(wus_tail4):  /* Move 4 bytes.  */
+/*  r6 already loaded speculatively.  If we are here we know there is
+    more then 4 bytes left.  So there is no need to test.  */
+    addi  12,12,4
+    stw   6,0(11)
+    addi  11,11,4
+L(wus_tail2):  /* Move 2-3 bytes.  */
+    bf    30,L(wus_tail1)
+    lhz   6,0(12)
+    sth   6,0(11) 
+    bf    31,L(wus_tailX)
+    lbz   7,2(12)
+    stb   7,2(11)
+    addi  1,1,32
+    blr
+L(wus_tail1):  /* Move 1 byte.  */
+    bf    31,L(wus_tailX)
+    lbz   6,0(12)
+    stb   6,0(11)
+L(wus_tailX):
+  /* Return original dst pointer.  */
+    addi  1,1,32
+    blr
+
+/* Special case to copy 0-8 bytes.  */
+    .align  4
+L(wus_8):
+    lwz   6,0(4)
+    lwz   7,4(4)
+    stw   6,0(3)
+    stw   7,4(3)
+  /* Return original dst pointer.  */
+    addi  1,1,32
+    blr
+    .align  4
+L(wus_4):
+    bf    29,L(wus_2)
+    lwz   6,0(4)
+    stw   6,0(3)
+    bf    30,L(wus_5)
+    lhz   7,4(4)
+    sth   7,4(3) 
+    bf    31,L(wus_0)
+    lbz   8,6(4)
+    stb   8,6(3)
+    addi  1,1,32
+    blr
+    .align  4
+L(wus_5):
+    bf    31,L(wus_0)
+    lbz   6,4(4)
+    stb   6,4(3)
+  /* Return original dst pointer.  */
+    addi 1,1,32
+    blr
+    .align  4
+L(wus_2):  /* Move 2-3 bytes.  */
+    bf    30,L(wus_1)
+    lhz   6,0(4)
+    sth   6,0(3) 
+    bf    31,L(wus_0)
+    lbz   7,2(4)
+    stb   7,2(3)
+    addi  1,1,32
+    blr
+    .align  4
+L(wus_1):  /* Move 1 byte.  */
+    bf    31,L(wus_0)
+    lbz   6,0(4)
+    stb   6,0(3)
+    .align  3
+L(wus_0):
+  /* Return original dst pointer.  */
+    addi  1,1,32
+    blr
+
+    .align  4
+    cfi_offset(31,(24-32))
+    cfi_offset(30,(20-32))
+L(wdu):
+
+  /* Copy words where the destination is aligned but the source is
+     not.  For power4, power5 and power6 machines there is penalty for
+     unaligned loads (src) that cross 32-byte, cacheline, or page 
+     boundaries. So we want to use simple (unaligned) loads where
+     posible but avoid them where we know the load would span a 32-byte
+     boundary. 
+
+     At this point we know we have at least 29 (32-3) bytes to copy
+     the src is unaligned. and we may cross at least one 32-byte 
+     boundary. Also we have the following regester values:
+     r3 == adjusted dst, word aligned
+     r4 == unadjusted src
+     r5 == unadjusted len
+     r9 == adjusted Word length
+     r10 == src alignment (1-3)
+     r12 == adjuested src, not aligned
+     r31 == adjusted len
+
+     First we need to copy word upto but not crossing the next 32-byte
+     boundary. Then perform aligned loads just before and just after 
+     the boundary and use shifts and or to gernerate the next aligned
+     word for dst. If more then 32 bytes remain we copy (unaligned src)
+     the next 7 words and repeat the loop until less then 32-bytes
+     remaim.
+
+     Then if more then 4 bytes remain we again use aligned loads,
+     shifts and or to generate the next dst word. We then process the
+     remaining words using unaligned loads as needed. Finally we check
+     if there more then 0 bytes (1-3) bytes remainting and use
+     halfword and or byte load/stores to complete the copy.
+*/
+    mr      4,12      /* restore unaligned adjusted src ptr */
+    clrlwi  0,12,27   /* Find dist from previous 32-byte boundary.  */
+    slwi    10,10,3   /* calculate number of bits to shift 1st word left */
+    cmplwi  cr5,0,16   
+    subfic  8,0,32   /* Number of bytes to next 32-byte boundary.  */
+
+    mtcrf   0x01,8
+    cmplwi  cr1,10,16
+    subfic  9,10,32  /* number of bits to shift 2nd word right */
+/*  This test is reversed because the timing to compare the bytes to
+    32-byte boundary could not be meet.  So we compare the bytes from
+    previous 32-byte boundary and invert the test.  */
+    bge     cr5,L(wdu_h32_8)
+    .align  4
+    lwz   6,0(4)
+    lwz   7,4(4)
+    addi  12,4,16    /* generate alternate pointers to avoid agen */
+    addi  11,3,16    /* timing issues downstream.  */
+    stw   6,0(3)
+    stw   7,4(3)
+    subi  31,31,16
+    lwz   6,8(4)
+    lwz   7,12(4)
+    addi  4,4,16
+    stw   6,8(3)
+    stw   7,12(3)
+    addi  3,3,16
+    bf    28,L(wdu_h32_4)
+    lwz   6,0(12)
+    lwz   7,4(12)
+    subi  31,31,8
+    addi  4,4,8
+    stw   6,0(11)
+    stw   7,4(11)
+    addi  3,3,8
+    bf    29,L(wdu_h32_0)
+    lwz   6,8(12)
+    addi  4,4,4
+    subi  31,31,4
+    stw   6,8(11)
+    addi  3,3,4
+    b     L(wdu_h32_0)
+    .align  4
+L(wdu_h32_8):
+    bf    28,L(wdu_h32_4)
+    lwz   6,0(4)
+    lwz   7,4(4)
+    subi  31,31,8
+    bf    29,L(wdu_h32_8x)
+    stw   6,0(3)
+    stw   7,4(3)
+    lwz   6,8(4)
+    addi  4,4,12
+    subi  31,31,4
+    stw   6,8(3)
+    addi  3,3,12
+    b     L(wdu_h32_0)
+    .align  4
+L(wdu_h32_8x):
+    addi  4,4,8
+    stw   6,0(3)
+    stw   7,4(3)
+    addi  3,3,8
+    b     L(wdu_h32_0)
+    .align  4
+L(wdu_h32_4):
+    bf    29,L(wdu_h32_0)
+    lwz   6,0(4)
+    subi  31,31,4
+    addi  4,4,4
+    stw   6,0(3)
+    addi  3,3,4
+    .align  4
+L(wdu_h32_0):
+/*  set up for 32-byte boundry crossing word move and possibly 32-byte
+    move loop.  */
+    clrrwi  12,4,2
+    cmplwi  cr5,31,32
+    bge     cr1,L(wdu2_32)
+#if 0
+    b       L(wdu1_32)
+/*
+    cmplwi  cr1,10,8
+    beq     cr1,L(wdu1_32)
+    cmplwi  cr1,10,16
+    beq     cr1,L(wdu2_32)
+    cmplwi  cr1,10,24
+    beq     cr1,L(wdu3_32)
+*/
+L(wdu_32):
+    lwz     6,0(12)
+    cmplwi  cr6,31,4
+    srwi    8,31,5    /* calculate the 32 byte loop count */
+    slw     0,6,10 
+    clrlwi  31,31,27   /* The remaining bytes, < 32.  */
+    blt     cr5,L(wdu_32tail)
+    mtctr   8
+    cmplwi  cr6,31,4
+    .align  4
+L(wdu_loop32):
+    /* copy 32 bytes at a time */
+    lwz   8,4(12)
+    addi  12,12,32
+    lwz   7,4(4)
+    srw   8,8,9 
+    or    0,0,8
+    stw   0,0(3)
+    stw   7,4(3)
+    lwz   6,8(4)
+    lwz   7,12(4)
+    stw   6,8(3)
+    stw   7,12(3)
+    lwz   6,16(4)
+    lwz   7,20(4)
+    stw   6,16(3)
+    stw   7,20(3)
+    lwz   6,24(4)
+    lwz   7,28(4)
+    lwz   8,0(12)
+    addi  4,4,32
+    stw   6,24(3)
+    stw   7,28(3)
+    addi  3,3,32
+    slw   0,8,10 
+    bdnz+ L(wdu_loop32)
+
+L(wdu_32tail):
+    mtcrf   0x01,31
+    cmplwi  cr5,31,16
+    blt     cr6,L(wdu_4tail)
+    /* calculate and store the final word */
+    lwz   8,4(12)
+    srw   8,8,9 
+    or    6,0,8
+    b     L(wdu_32tailx)
+#endif
+    .align  4
+L(wdu1_32):
+    lwz     6,-1(4)
+    cmplwi  cr6,31,4
+    srwi    8,31,5    /* calculate the 32 byte loop count */
+    slwi    6,6,8
+    clrlwi  31,31,27   /* The remaining bytes, < 32.  */
+    blt     cr5,L(wdu1_32tail)
+    mtctr   8
+    cmplwi  cr6,31,4
+
+    lwz   8,3(4)
+    lwz   7,4(4)
+/*  Equivalent to: srwi   8,8,32-8;  or    6,6,8 */
+    rlwimi 6,8,8,(32-8),31
+    b      L(wdu1_loop32x)
+    .align  4
+L(wdu1_loop32):
+    /* copy 32 bytes at a time */
+    lwz   8,3(4)
+    lwz   7,4(4)
+    stw   10,-8(3)
+    stw   11,-4(3)
+/*  Equivalent to  srwi   8,8,32-8; or    6,6,8 */
+    rlwimi 6,8,8,(32-8),31
+L(wdu1_loop32x):
+    lwz   10,8(4)
+    lwz   11,12(4)
+    stw   6,0(3)
+    stw   7,4(3)
+    lwz   6,16(4)
+    lwz   7,20(4)
+    stw   10,8(3)
+    stw   11,12(3)
+    lwz   10,24(4)
+    lwz   11,28(4)
+    lwz   8,32-1(4)
+    addi  4,4,32
+    stw   6,16(3)
+    stw   7,20(3)
+    addi  3,3,32
+    slwi  6,8,8
+    bdnz+ L(wdu1_loop32)
+    stw   10,-8(3)
+    stw   11,-4(3)
+
+L(wdu1_32tail):
+    mtcrf   0x01,31
+    cmplwi  cr5,31,16
+    blt     cr6,L(wdu_4tail)
+    /* calculate and store the final word */
+    lwz   8,3(4)
+/*  Equivalent to: srwi   8,8,32-9;  or    6,6,8  */
+    rlwimi 6,8,8,(32-8),31
+    b     L(wdu_32tailx)
+
+L(wdu2_32):
+    bgt     cr1,L(wdu3_32)
+    lwz     6,-2(4)
+    cmplwi  cr6,31,4
+    srwi    8,31,5    /* calculate the 32 byte loop count */
+    slwi    6,6,16
+    clrlwi  31,31,27   /* The remaining bytes, < 32.  */
+    blt     cr5,L(wdu2_32tail)
+    mtctr   8
+    cmplwi  cr6,31,4
+
+    lwz   8,2(4)
+    lwz   7,4(4)
+/*  Equivalent to: srwi   8,8,32-8;  or    6,6,8 */
+    rlwimi 6,8,16,(32-16),31
+    b      L(wdu2_loop32x)
+    .align  4
+L(wdu2_loop32):
+    /* copy 32 bytes at a time */
+    lwz   8,2(4)
+    lwz   7,4(4)
+    stw   10,-8(3)
+    stw   11,-4(3)
+/*  Equivalent to  srwi   8,8,32-8; or    6,6,8 */
+    rlwimi 6,8,16,(32-16),31
+L(wdu2_loop32x):
+    lwz   10,8(4)
+    lwz   11,12(4)
+    stw   6,0(3)
+    stw   7,4(3)
+    lwz   6,16(4)
+    lwz   7,20(4)
+    stw   10,8(3)
+    stw   11,12(3)
+    lwz   10,24(4)
+    lwz   11,28(4)
+/*    lwz   8,0(12) */
+    lwz   8,32-2(4)
+    addi  4,4,32
+    stw   6,16(3)
+    stw   7,20(3)
+    addi  3,3,32
+    slwi  6,8,16
+    bdnz+ L(wdu2_loop32)
+    stw   10,-8(3)
+    stw   11,-4(3)
+
+L(wdu2_32tail):
+    mtcrf   0x01,31
+    cmplwi  cr5,31,16
+    blt     cr6,L(wdu_4tail)
+    /* calculate and store the final word */
+    lwz   8,2(4)
+/*  Equivalent to: srwi   8,8,32-9;  or    6,6,8  */
+    rlwimi 6,8,16,(32-16),31
+    b     L(wdu_32tailx)
+
+L(wdu3_32):
+/*    lwz     6,0(12) */
+    lwz     6,-3(4)
+    cmplwi  cr6,31,4
+    srwi    8,31,5    /* calculate the 32 byte loop count */
+    slwi    6,6,24
+    clrlwi  31,31,27   /* The remaining bytes, < 32.  */
+    blt     cr5,L(wdu3_32tail)
+    mtctr   8
+    cmplwi  cr6,31,4
+
+    lwz   8,1(4)
+    lwz   7,4(4)
+/*  Equivalent to: srwi   8,8,32-8;  or    6,6,8 */
+    rlwimi 6,8,24,(32-24),31
+    b      L(wdu3_loop32x)
+    .align  4
+L(wdu3_loop32):
+    /* copy 32 bytes at a time */
+    lwz   8,1(4)
+    lwz   7,4(4)
+    stw   10,-8(3)
+    stw   11,-4(3)
+/*  Equivalent to  srwi   8,8,32-8; or    6,6,8 */
+    rlwimi 6,8,24,(32-24),31
+L(wdu3_loop32x):
+    lwz   10,8(4)
+    lwz   11,12(4)
+    stw   6,0(3)
+    stw   7,4(3)
+    lwz   6,16(4)
+    lwz   7,20(4)
+    stw   10,8(3)
+    stw   11,12(3)
+    lwz   10,24(4)
+    lwz   11,28(4)
+    lwz   8,32-3(4)
+    addi  4,4,32
+    stw   6,16(3)
+    stw   7,20(3)
+    addi  3,3,32
+    slwi  6,8,24
+    bdnz+ L(wdu3_loop32)
+    stw   10,-8(3)
+    stw   11,-4(3)
+
+L(wdu3_32tail):
+    mtcrf   0x01,31
+    cmplwi  cr5,31,16
+    blt     cr6,L(wdu_4tail)
+    /* calculate and store the final word */
+    lwz   8,1(4)
+/*  Equivalent to: srwi   8,8,32-9;  or    6,6,8  */
+    rlwimi 6,8,24,(32-24),31
+    b     L(wdu_32tailx)
+    .align  4
+L(wdu_32tailx):
+    blt     cr5,L(wdu_t32_8)
+    lwz   7,4(4)
+    addi  12,4,16    /* generate alternate pointers to avoid agen */
+    addi  11,3,16    /* timing issues downstream.  */
+    stw   6,0(3)
+    stw   7,4(3)
+    subi  31,31,16
+    lwz   6,8(4)
+    lwz   7,12(4)
+    addi  4,4,16
+    stw   6,8(3)
+    stw   7,12(3)
+    addi  3,3,16
+    bf    28,L(wdu_t32_4x)
+    lwz   6,0(12)
+    lwz   7,4(12)
+    addi  4,4,8
+    subi  31,31,8
+    stw   6,0(11)
+    stw   7,4(11)
+    addi  3,3,8
+    bf    29,L(wdu_t32_0)
+    lwz   6,8(12)
+    addi  4,4,4
+    subi  31,31,4
+    stw   6,8(11)
+    addi  3,3,4
+    b     L(wdu_t32_0)
+    .align  4
+L(wdu_t32_4x):
+    bf    29,L(wdu_t32_0)
+    lwz   6,0(4)
+    addi  4,4,4
+    subi  31,31,4
+    stw   6,0(3)
+    addi  3,3,4
+    b     L(wdu_t32_0)
+    .align  4
+L(wdu_t32_8):
+    bf    28,L(wdu_t32_4)
+    lwz   7,4(4)
+    subi  31,31,8
+    bf    29,L(wdu_t32_8x)
+    stw   6,0(3)
+    stw   7,4(3)
+    lwz   6,8(4)
+    subi  31,31,4
+    addi  4,4,12
+    stw   6,8(3)
+    addi  3,3,12
+    b     L(wdu_t32_0)
+    .align  4
+L(wdu_t32_8x):
+    addi  4,4,8
+    stw   6,0(3)
+    stw   7,4(3)
+    addi  3,3,8
+    b     L(wdu_t32_0)
+    .align  4
+L(wdu_t32_4):
+    subi  31,31,4
+    stw   6,0(3)
+    addi  4,4,4
+    addi  3,3,4
+    .align  4
+L(wdu_t32_0):
+L(wdu_4tail):
+    cmplwi  cr6,31,0
+    beq   cr6,L(wdus_0)	/* If the tail is 0 bytes we are done!  */
+    bf    30,L(wdus_3)
+    lhz   7,0(4)
+    sth   7,0(3) 
+    bf    31,L(wdus_0)
+    lbz   8,2(4)
+    stb   8,2(3)
+    mr    3,30
+    lwz   30,20(1)
+    lwz   31,24(1)
+    addi  1,1,32
+    blr
+    .align  4
+L(wdus_3):
+    bf    31,L(wus_0)
+    lbz   6,0(4)
+    stb   6,0(3)
+    .align  4
+L(wdus_0):
+  /* Return original dst pointer.  */
+    mr   3,30
+    lwz  30,20(1)
+    lwz  31,24(1)
+    addi 1,1,32
+    blr
+END (BP_SYM (memcpy))
+
+libc_hidden_builtin_def (memcpy)
diff --git a/sysdeps/powerpc/powerpc32/power6/memset.S b/sysdeps/powerpc/powerpc32/power6/memset.S
new file mode 100644
index 0000000000..10fb7b9786
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power6/memset.S
@@ -0,0 +1,542 @@
+/* Optimized 32-bit memset implementation for POWER6.
+   Copyright (C) 1997,99, 2000,02,03,06,2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+   02110-1301 USA.  */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
+   Returns 's'.
+
+   The memset is done in three sizes: byte (8 bits), word (32 bits),
+   cache line (1024 bits). There is a special case for setting cache lines
+   to 0, to take advantage of the dcbz instruction.  */
+
+	.machine power6
+EALIGN (BP_SYM (memset), 7, 0)
+	CALL_MCOUNT
+
+#define rTMP	r0
+#define rRTN	r3	/* Initial value of 1st argument.  */
+#define rMEMP0	r3	/* Original value of 1st arg.  */
+#define rCHR	r4	/* Char to set in each byte.  */
+#define rLEN	r5	/* Length of region to set.  */
+#define rMEMP	r6	/* Address at which we are storing.  */
+#define rALIGN	r7	/* Number of bytes we are setting now (when aligning). */
+#define rMEMP2	r8
+
+#define rNEG64	r8	/* Constant -64 for clearing with dcbz.  */
+#define rMEMP3	r9	/* Alt mem pointer.  */
+L(_memset):
+/* Take care of case for size <= 4.  */
+	cmplwi	cr1, rLEN, 4
+	andi.	rALIGN, rMEMP0, 3
+	mr	rMEMP, rMEMP0
+	ble-	cr1, L(small)
+/* Align to word boundary.  */
+	cmplwi	cr5, rLEN, 31
+	rlwimi	rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword.  */
+	beq+	L(aligned)
+	mtcrf	0x01, rMEMP0
+	subfic	rALIGN, rALIGN, 4
+	add	rMEMP, rMEMP, rALIGN
+	sub	rLEN, rLEN, rALIGN
+	bf+	31, L(g0)
+	stb	rCHR, 0(rMEMP0)
+	bt	30, L(aligned)
+L(g0):
+	sth	rCHR, -2(rMEMP)
+
+        .align 4
+/* Handle the case of size < 31.  */
+L(aligned):
+	mtcrf	0x01, rLEN
+	rlwimi	rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word.  */
+	ble	cr5, L(medium)
+/* Align to 32-byte boundary.  */
+	andi.	rALIGN, rMEMP, 0x1C
+	subfic	rALIGN, rALIGN, 0x20
+	beq	L(caligned)
+	mtcrf	0x01, rALIGN
+	add	rMEMP, rMEMP, rALIGN
+	sub	rLEN, rLEN, rALIGN
+	cmplwi	cr1, rALIGN, 0x10
+	mr	rMEMP2, rMEMP
+	bf	28, L(a1)
+        stw     rCHR, -4(rMEMP2)
+	stwu	rCHR, -8(rMEMP2)
+	nop
+L(a1):	blt	cr1, L(a2)
+        stw     rCHR, -4(rMEMP2)
+	stw	rCHR, -8(rMEMP2)
+	stw	rCHR, -12(rMEMP2)
+	stwu	rCHR, -16(rMEMP2)
+L(a2):  bf      29, L(caligned)
+        stw     rCHR, -4(rMEMP2)
+
+        .align 3
+/* Now aligned to a 32 byte boundary.  */
+L(caligned):
+	cmplwi	cr1, rCHR, 0
+	clrrwi.	rALIGN, rLEN, 5
+	mtcrf	0x01, rLEN
+	beq	cr1, L(zloopstart) /* Special case for clearing memory using dcbz.  */
+L(nondcbz):
+	beq	L(medium)	/* We may not actually get to do a full line.  */
+	nop
+/* Storing a non-zero "c" value. We are aligned at a sector (32-byte)
+   boundary may not be at cache line (128-byte) boundary.  */
+L(nzloopstart):
+/* memset in 32-byte chunks until we get to a cache line boundary.
+   If rLEN is less then the distance to the next cache-line boundary use
+   cacheAligned1 code to finish the tail.  */
+	cmplwi	cr1,rLEN,128
+
+	andi.	rTMP,rMEMP,127
+	blt	cr1,L(cacheAligned1)
+	addi	rMEMP3,rMEMP,32
+	beq	L(nzCacheAligned)
+	addi	rLEN,rLEN,-32
+	stw	rCHR,0(rMEMP)
+        stw     rCHR,4(rMEMP)
+	stw	rCHR,8(rMEMP)
+	stw     rCHR,12(rMEMP)
+	stw	rCHR,16(rMEMP)
+        stw     rCHR,20(rMEMP)
+	addi	rMEMP,rMEMP,32
+	andi.	rTMP,rMEMP3,127
+	stw	rCHR,-8(rMEMP3)
+        stw     rCHR,-4(rMEMP3)
+
+	beq	L(nzCacheAligned)
+	addi	rLEN,rLEN,-32
+	stw	rCHR,0(rMEMP3)
+        stw     rCHR,4(rMEMP3)
+	addi	rMEMP,rMEMP,32
+	stw	rCHR,8(rMEMP3)
+	stw     rCHR,12(rMEMP3)
+	andi.	rTMP,rMEMP,127
+	stw	rCHR,16(rMEMP3)
+        stw     rCHR,20(rMEMP3)
+	stw	rCHR,24(rMEMP3)
+        stw     rCHR,28(rMEMP3)
+
+	beq	L(nzCacheAligned)
+	addi	rLEN,rLEN,-32
+/* At this point we can overrun the store queue (pipe reject) so it is
+   time to slow things down. The store queue can merge two adjacent
+   stores into a single L1/L2 op, but the L2 is clocked at 1/2 the CPU.
+   So we add "group ending nops" to guarantee that we dispatch only two
+   stores every other cycle. */
+	ori	r1,r1,0
+	ori	r1,r1,0
+	stw	rCHR,32(rMEMP3)
+        stw     rCHR,36(rMEMP3)
+	addi	rMEMP,rMEMP,32
+	cmplwi	cr1,rLEN,128
+	ori	r1,r1,0
+	ori	r1,r1,0
+	stw	rCHR,40(rMEMP3)
+	stw     rCHR,44(rMEMP3)
+	ori	r1,r1,0
+	ori	r1,r1,0
+	stw	rCHR,48(rMEMP3)
+        stw     rCHR,52(rMEMP3)
+	ori	r1,r1,0
+	ori	r1,r1,0
+	stw	rCHR,56(rMEMP3)
+        stw     rCHR,60(rMEMP3)
+	blt	cr1,L(cacheAligned1)
+	b	L(nzCacheAligned)
+
+/* Now we are aligned to the cache line and can use dcbtst.  */
+        .align 5
+L(nzCacheAligned):
+	cmplwi	cr1,rLEN,128
+	cmplwi	cr6,rLEN,256
+	blt	cr1,L(cacheAligned1)
+	blt	cr6,L(nzCacheAligned128)
+        .align 4
+L(nzCacheAligned128):
+	nop
+	addi	rMEMP3,rMEMP,64
+	stw	rCHR,0(rMEMP)
+        stw     rCHR,4(rMEMP)
+	stw	rCHR,8(rMEMP)
+	stw     rCHR,12(rMEMP)
+	stw	rCHR,16(rMEMP)
+        stw     rCHR,20(rMEMP)
+	stw	rCHR,24(rMEMP)
+        stw     rCHR,28(rMEMP)
+	stw	rCHR,32(rMEMP)
+        stw     rCHR,36(rMEMP)
+	stw	rCHR,40(rMEMP)
+	stw     rCHR,44(rMEMP)
+	stw	rCHR,48(rMEMP)
+        stw     rCHR,52(rMEMP)
+	stw	rCHR,56(rMEMP)
+        stw     rCHR,60(rMEMP)
+	addi	rMEMP,rMEMP3,64
+	addi	rLEN,rLEN,-128
+/* At this point we can overrun the store queue (pipe reject) so it is
+   time to slow things down. The store queue can merge two adjacent
+   stores into a single L1/L2 op, but the L2 is clocked at 1/2 the CPU.
+   So we add "group ending nops" to guarantee that we dispatch only one
+   store per cycle. */
+	stw	rCHR,0(rMEMP3)
+	ori	r1,r1,0
+        stw     rCHR,4(rMEMP3)
+	ori	r1,r1,0
+	stw	rCHR,8(rMEMP3)
+	ori	r1,r1,0
+	stw     rCHR,12(rMEMP3)
+	ori	r1,r1,0
+	stw	rCHR,16(rMEMP3)
+	ori	r1,r1,0
+        stw     rCHR,20(rMEMP3)
+	ori	r1,r1,0
+	stw	rCHR,24(rMEMP3)
+	ori	r1,r1,0
+        stw     rCHR,28(rMEMP3)
+	ori	r1,r1,0
+	stw	rCHR,32(rMEMP3)
+	ori	r1,r1,0
+        stw     rCHR,36(rMEMP3)
+	ori	r1,r1,0
+	stw	rCHR,40(rMEMP3)
+	ori	r1,r1,0
+	stw     rCHR,44(rMEMP3)
+	ori	r1,r1,0
+	stw	rCHR,48(rMEMP3)
+	ori	r1,r1,0
+        stw     rCHR,52(rMEMP3)
+	ori	r1,r1,0
+	stw	rCHR,56(rMEMP3)
+	ori	r1,r1,0
+        stw     rCHR,60(rMEMP3)
+	blt	cr6,L(cacheAligned1)
+#ifndef NOT_IN_libc
+	lfd	0,-128(rMEMP)
+#endif
+	b	L(nzCacheAligned256)
+        .align 5
+L(nzCacheAligned256):
+	cmplwi	cr1,rLEN,256
+	addi	rMEMP3,rMEMP,64
+#ifdef NOT_IN_libc
+/* When we are not in libc we should use only GPRs to avoid the FPU lock 
+   interrupt.  */
+	stw	rCHR,0(rMEMP)
+        stw     rCHR,4(rMEMP)
+	stw	rCHR,8(rMEMP)
+	stw     rCHR,12(rMEMP)
+	stw	rCHR,16(rMEMP)
+        stw     rCHR,20(rMEMP)
+	stw	rCHR,24(rMEMP)
+        stw     rCHR,28(rMEMP)
+	stw	rCHR,32(rMEMP)
+        stw     rCHR,36(rMEMP)
+	stw	rCHR,40(rMEMP)
+	stw     rCHR,44(rMEMP)
+	stw	rCHR,48(rMEMP)
+        stw     rCHR,52(rMEMP)
+	stw	rCHR,56(rMEMP)
+        stw     rCHR,60(rMEMP)
+	addi	rMEMP,rMEMP3,64
+	addi	rLEN,rLEN,-128
+	stw	rCHR,0(rMEMP3)
+        stw     rCHR,4(rMEMP3)
+	stw	rCHR,8(rMEMP3)
+	stw     rCHR,12(rMEMP3)
+	stw	rCHR,16(rMEMP3)
+        stw     rCHR,20(rMEMP3)
+	stw	rCHR,24(rMEMP3)
+        stw     rCHR,28(rMEMP3)
+	stw	rCHR,32(rMEMP3)
+        stw     rCHR,36(rMEMP3)
+	stw	rCHR,40(rMEMP3)
+	stw     rCHR,44(rMEMP3)
+	stw	rCHR,48(rMEMP3)
+        stw     rCHR,52(rMEMP3)
+	stw	rCHR,56(rMEMP3)
+        stw     rCHR,60(rMEMP3)
+#else
+/* We are in libc and this is a long memset so we can use FPRs and can afford
+   occasional FPU locked interrupts.  */
+	stfd	0,0(rMEMP)
+	stfd	0,8(rMEMP)
+	stfd	0,16(rMEMP)
+	stfd	0,24(rMEMP)
+	stfd	0,32(rMEMP)
+	stfd	0,40(rMEMP)
+	stfd	0,48(rMEMP)
+	stfd	0,56(rMEMP)
+	addi	rMEMP,rMEMP3,64
+	addi	rLEN,rLEN,-128
+	stfd	0,0(rMEMP3)
+	stfd	0,8(rMEMP3)
+	stfd	0,16(rMEMP3)
+	stfd	0,24(rMEMP3)
+	stfd	0,32(rMEMP3)
+	stfd	0,40(rMEMP3)
+	stfd	0,48(rMEMP3)
+	stfd	0,56(rMEMP3)
+#endif
+	bge	cr1,L(nzCacheAligned256)
+	dcbtst	0,rMEMP
+	b	L(cacheAligned1)
+
+	.align 4
+/* Storing a zero "c" value. We are aligned at a sector (32-byte)
+   boundary but may not be at cache line (128-byte) boundary.  If the
+   remaining length spans a full cache line we can use the Data cache
+   block zero instruction. */
+L(zloopstart):
+/* memset in 32-byte chunks until we get to a cache line boundary.
+   If rLEN is less then the distance to the next cache-line boundary use
+   cacheAligned1 code to finish the tail.  */
+	cmplwi	cr1,rLEN,128
+	beq	L(medium)
+L(getCacheAligned):
+	andi.	rTMP,rMEMP,127
+	blt	cr1,L(cacheAligned1)
+	addi	rMEMP3,rMEMP,32
+	beq	L(cacheAligned)
+	addi	rLEN,rLEN,-32
+	stw	rCHR,0(rMEMP)
+        stw     rCHR,4(rMEMP)
+	stw	rCHR,8(rMEMP)
+	stw     rCHR,12(rMEMP)
+	stw	rCHR,16(rMEMP)
+        stw     rCHR,20(rMEMP)
+	addi	rMEMP,rMEMP,32
+	andi.	rTMP,rMEMP3,127
+	stw	rCHR,-8(rMEMP3)
+        stw     rCHR,-4(rMEMP3)
+L(getCacheAligned2):
+	beq	L(cacheAligned)
+	addi	rLEN,rLEN,-32
+	addi	rMEMP,rMEMP,32
+	stw	rCHR,0(rMEMP3)
+        stw     rCHR,4(rMEMP3)
+	stw	rCHR,8(rMEMP3)
+	stw     rCHR,12(rMEMP3)
+	andi.	rTMP,rMEMP,127
+	nop
+	stw	rCHR,16(rMEMP3)
+        stw     rCHR,20(rMEMP3)
+	stw	rCHR,24(rMEMP3)
+        stw     rCHR,28(rMEMP3)
+L(getCacheAligned3):
+	beq	L(cacheAligned)
+/* At this point we can overrun the store queue (pipe reject) so it is
+   time to slow things down. The store queue can merge two adjacent
+   stores into a single L1/L2 op, but the L2 is clocked at 1/2 the CPU.
+   So we add "group ending nops" to guarantee that we dispatch only two
+   stores every other cycle. */
+	addi	rLEN,rLEN,-32
+	ori	r1,r1,0
+	ori	r1,r1,0
+	stw	rCHR,32(rMEMP3)
+        stw     rCHR,36(rMEMP3)
+	addi	rMEMP,rMEMP,32
+	cmplwi	cr1,rLEN,128
+	ori	r1,r1,0
+	stw	rCHR,40(rMEMP3)
+	stw     rCHR,44(rMEMP3)
+	cmplwi	cr6,rLEN,256
+	li	rMEMP2,128
+	ori	r1,r1,0
+	stw	rCHR,48(rMEMP3)
+        stw     rCHR,52(rMEMP3)
+	ori	r1,r1,0
+	ori	r1,r1,0
+	stw	rCHR,56(rMEMP3)
+        stw     rCHR,60(rMEMP3)
+	blt	cr1,L(cacheAligned1)
+	blt	cr6,L(cacheAligned128)
+	b	L(cacheAlignedx)
+
+/* Now we are aligned to the cache line and can use dcbz.  */
+        .align 4
+L(cacheAligned):
+	cmplwi	cr1,rLEN,128
+	cmplwi	cr6,rLEN,256
+	blt	cr1,L(cacheAligned1)
+	li	rMEMP2,128
+L(cacheAlignedx):
+	cmpldi	cr5,rLEN,640
+	blt	cr6,L(cacheAligned128)
+	bgt	cr5,L(cacheAligned512)
+	cmplwi	cr6,rLEN,512
+	dcbz	0,rMEMP
+	cmplwi	cr1,rLEN,384
+	dcbz	rMEMP2,rMEMP
+	addi	rMEMP,rMEMP,256
+	addi	rLEN,rLEN,-256
+	blt	cr1,L(cacheAligned1)
+	blt	cr6,L(cacheAligned128)
+	b	L(cacheAligned256)
+	.align 5
+/* A simple loop for the longer (>640 bytes) lengths.  This form limits
+   the branch miss-predicted to exactly 1 at loop exit.*/
+L(cacheAligned512):
+	cmpli	cr1,rLEN,128
+	blt	cr1,L(cacheAligned1)
+	dcbz	0,rMEMP
+	addi	rLEN,rLEN,-128
+	addi	rMEMP,rMEMP,128
+	b	L(cacheAligned512)
+        .align 5
+L(cacheAligned256):
+	cmplwi	cr6,rLEN,512
+	dcbz	0,rMEMP
+	cmplwi	cr1,rLEN,384
+	dcbz	rMEMP2,rMEMP
+	addi	rMEMP,rMEMP,256
+	addi	rLEN,rLEN,-256
+	bge	cr6,L(cacheAligned256)
+	blt	cr1,L(cacheAligned1)
+        .align 4
+L(cacheAligned128):
+	dcbz	0,rMEMP
+	addi	rMEMP,rMEMP,128
+	addi	rLEN,rLEN,-128
+        .align 4
+L(cacheAligned1):
+	cmplwi	cr1,rLEN,32
+	blt	cr1,L(handletail32)
+	addi	rMEMP3,rMEMP,32
+	addi	rLEN,rLEN,-32
+	stw	rCHR,0(rMEMP)
+        stw     rCHR,4(rMEMP)
+	stw	rCHR,8(rMEMP)
+	stw     rCHR,12(rMEMP)
+	stw	rCHR,16(rMEMP)
+        stw     rCHR,20(rMEMP)
+	addi	rMEMP,rMEMP,32
+	cmplwi	cr1,rLEN,32
+	stw	rCHR,-8(rMEMP3)
+        stw     rCHR,-4(rMEMP3)
+L(cacheAligned2):
+	blt	cr1,L(handletail32)
+	addi	rLEN,rLEN,-32
+	stw	rCHR,0(rMEMP3)
+        stw     rCHR,4(rMEMP3)
+	stw	rCHR,8(rMEMP3)
+	stw     rCHR,12(rMEMP3)
+	addi	rMEMP,rMEMP,32
+	cmplwi	cr1,rLEN,32
+	stw	rCHR,16(rMEMP3)
+        stw     rCHR,20(rMEMP3)
+	stw	rCHR,24(rMEMP3)
+        stw     rCHR,28(rMEMP3)
+	nop
+L(cacheAligned3):
+	blt	cr1,L(handletail32)
+/* At this point we can overrun the store queue (pipe reject) so it is
+   time to slow things down. The store queue can merge two adjacent
+   stores into a single L1/L2 op, but the L2 is clocked at 1/2 the CPU.
+   So we add "group ending nops" to guarantee that we dispatch only two
+   stores every other cycle. */
+	ori	r1,r1,0
+	ori	r1,r1,0
+	addi	rMEMP,rMEMP,32
+	addi	rLEN,rLEN,-32
+	ori	r1,r1,0
+	ori	r1,r1,0
+	stw	rCHR,32(rMEMP3)
+        stw     rCHR,36(rMEMP3)
+	ori	r1,r1,0
+	ori	r1,r1,0
+	stw	rCHR,40(rMEMP3)
+	stw     rCHR,44(rMEMP3)
+	ori	r1,r1,0
+	ori	r1,r1,0
+	stw	rCHR,48(rMEMP3)
+        stw     rCHR,52(rMEMP3)
+	ori	r1,r1,0
+	ori	r1,r1,0
+	stw	rCHR,56(rMEMP3)
+        stw     rCHR,60(rMEMP3)
+
+/* We are here because the length or remainder (rLEN) is less than the
+   cache line/sector size and does not justify aggressive loop unrolling.
+   So set up the preconditions for L(medium) and go there.  */
+        .align 3
+L(handletail32):
+	cmplwi	cr1,rLEN,0
+	beqlr   cr1
+	b	L(medium)
+
+	.align 4
+L(small):
+/* Memset of 4 bytes or less.  */
+	cmplwi	cr5, rLEN, 1
+	cmplwi	cr1, rLEN, 3
+	bltlr	cr5
+	stb	rCHR, 0(rMEMP)
+	beqlr	cr5
+	stb	rCHR, 1(rMEMP)
+	bltlr	cr1
+	stb	rCHR, 2(rMEMP)
+	beqlr	cr1
+	stb	rCHR, 3(rMEMP)
+	blr
+
+/* Memset of 0-31 bytes.  */
+	.align 5
+L(medium):
+	cmplwi	cr1, rLEN, 16
+L(medium_tail2):
+	add	rMEMP, rMEMP, rLEN
+L(medium_tail):
+	bt-	31, L(medium_31t)
+	bt-	30, L(medium_30t)
+L(medium_30f):
+	bt	29, L(medium_29t)
+L(medium_29f):
+	bge	cr1, L(medium_27t)
+	bflr	28
+        stw     rCHR, -4(rMEMP)
+	stw	rCHR, -8(rMEMP)
+	blr
+
+L(medium_31t):
+	stbu	rCHR, -1(rMEMP)
+	bf-	30, L(medium_30f)
+L(medium_30t):
+	sthu	rCHR, -2(rMEMP)
+	bf-	29, L(medium_29f)
+L(medium_29t):
+	stwu	rCHR, -4(rMEMP)
+	blt	cr1, L(medium_27f)
+L(medium_27t):
+        stw     rCHR, -4(rMEMP)
+	stw	rCHR, -8(rMEMP)
+        stw     rCHR, -12(rMEMP)
+	stwu	rCHR, -16(rMEMP)
+L(medium_27f):
+	bflr	28
+L(medium_28t):
+        stw     rCHR, -4(rMEMP)
+	stw	rCHR, -8(rMEMP)
+	blr
+END (BP_SYM (memset))
+libc_hidden_builtin_def (memset)
diff --git a/sysdeps/powerpc/powerpc32/power6/wordcopy.c b/sysdeps/powerpc/powerpc32/power6/wordcopy.c
new file mode 100644
index 0000000000..ddf28659f9
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power6/wordcopy.c
@@ -0,0 +1,287 @@
+/* _memcopy.c -- subroutines for memory copy functions.
+   Copyright (C) 1991, 1996, 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Torbjorn Granlund (tege@sics.se).
+   Updated for POWER6 by Steven Munroe (sjmunroe@us.ibm.com).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* BE VERY CAREFUL IF YOU CHANGE THIS CODE...!  */
+
+#include <stddef.h>
+#include <memcopy.h>
+
+/* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to
+   block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
+   Both SRCP and DSTP should be aligned for memory operations on `op_t's.  */
+
+void
+_wordcopy_fwd_aligned (dstp, srcp, len)
+     long int dstp;
+     long int srcp;
+     size_t len;
+{
+  op_t a0, a1;
+
+  if (len & 1)
+  {
+    ((op_t *) dstp)[0] = ((op_t *) srcp)[0];
+    
+    if (len == 1)
+      return;
+    srcp += OPSIZ;
+    dstp += OPSIZ;
+    len -= 1;
+  }
+
+  do
+    {
+      a0 = ((op_t *) srcp)[0];
+      a1 = ((op_t *) srcp)[1];
+      ((op_t *) dstp)[0] = a0;
+      ((op_t *) dstp)[1] = a1;
+
+      srcp += 2 * OPSIZ;
+      dstp += 2 * OPSIZ;
+      len -= 2;
+    }
+  while (len != 0);
+}
+
+/* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
+   block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
+   DSTP should be aligned for memory operations on `op_t's, but SRCP must
+   *not* be aligned.  */
+
+void
+_wordcopy_fwd_dest_aligned (dstp, srcp, len)
+     long int dstp;
+     long int srcp;
+     size_t len;
+{
+  op_t a0, a1, a2;
+  int sh_1, sh_2;
+  int align;
+
+  /* Calculate how to shift a word read at the memory operation
+     aligned srcp to make it aligned for copy.  */
+
+  align = srcp % OPSIZ;
+  sh_1 = 8 * (srcp % OPSIZ);
+  sh_2 = 8 * OPSIZ - sh_1;
+
+  /* Make SRCP aligned by rounding it down to the beginning of the `op_t'
+     it points in the middle of.  */
+  srcp &= -OPSIZ;
+  a0 = ((op_t *) srcp)[0];
+
+  if (len & 1)
+  {
+    a1 = ((op_t *) srcp)[1];
+    ((op_t *) dstp)[0] = MERGE (a0, sh_1, a1, sh_2);
+    
+    if (len == 1)
+      return;
+    
+    a0 = a1;
+    srcp += OPSIZ;
+    dstp += OPSIZ;
+    len -= 1;
+  }
+
+  switch (align)
+    {
+    case 1:
+      do
+        {
+          a1 = ((op_t *) srcp)[1];
+          a2 = ((op_t *) srcp)[2];
+          ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (32-8));
+          ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (32-8));
+          a0 = a2;
+    
+          srcp += 2 * OPSIZ;
+          dstp += 2 * OPSIZ;
+          len -= 2;
+        }
+      while (len != 0);
+      break;
+    case 2:
+      do
+        {
+          a1 = ((op_t *) srcp)[1];
+          a2 = ((op_t *) srcp)[2];
+          ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (32-16));
+          ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (32-16));
+          a0 = a2;
+    
+          srcp += 2 * OPSIZ;
+          dstp += 2 * OPSIZ;
+          len -= 2;
+        }
+      while (len != 0);
+      break;
+    case 3:
+      do
+        {
+          a1 = ((op_t *) srcp)[1];
+          a2 = ((op_t *) srcp)[2];
+          ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (32-24));
+          ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (32-24));
+          a0 = a2;
+    
+          srcp += 2 * OPSIZ;
+          dstp += 2 * OPSIZ;
+          len -= 2;
+        }
+      while (len != 0);
+      break;
+    }
+
+}
+
+/* _wordcopy_bwd_aligned -- Copy block finishing right before
+   SRCP to block finishing right before DSTP with LEN `op_t' words
+   (not LEN bytes!).  Both SRCP and DSTP should be aligned for memory
+   operations on `op_t's.  */
+
+void
+_wordcopy_bwd_aligned (dstp, srcp, len)
+     long int dstp;
+     long int srcp;
+     size_t len;
+{
+  op_t a0, a1;
+
+  if (len & 1)
+  {
+    srcp -= OPSIZ;
+    dstp -= OPSIZ;
+    ((op_t *) dstp)[0] = ((op_t *) srcp)[0];
+    
+    if (len == 1)
+      return;
+    len -= 1;
+  }
+
+  do
+    {
+      srcp -= 2 * OPSIZ;
+      dstp -= 2 * OPSIZ;
+
+      a1 = ((op_t *) srcp)[1];
+      a0 = ((op_t *) srcp)[0];
+      ((op_t *) dstp)[1] = a1;
+      ((op_t *) dstp)[0] = a0;
+
+      len -= 2;
+    }
+  while (len != 0);
+}
+
+/* _wordcopy_bwd_dest_aligned -- Copy block finishing right
+   before SRCP to block finishing right before DSTP with LEN `op_t'
+   words (not LEN bytes!).  DSTP should be aligned for memory
+   operations on `op_t', but SRCP must *not* be aligned.  */
+
+void
+_wordcopy_bwd_dest_aligned (dstp, srcp, len)
+     long int dstp;
+     long int srcp;
+     size_t len;
+{
+  op_t a0, a1, a2;
+  int sh_1, sh_2;
+  int align;
+
+  /* Calculate how to shift a word read at the memory operation
+     aligned srcp to make it aligned for copy.  */
+
+  align = srcp % OPSIZ;
+  sh_1 = 8 * (srcp % OPSIZ);
+  sh_2 = 8 * OPSIZ - sh_1;
+
+  /* Make srcp aligned by rounding it down to the beginning of the op_t
+     it points in the middle of.  */
+  srcp &= -OPSIZ;
+  a2 = ((op_t *) srcp)[0];
+
+  if (len & 1)
+  {
+    srcp -= OPSIZ;
+    dstp -= OPSIZ;
+    a1 = ((op_t *) srcp)[0];
+    ((op_t *) dstp)[0] = MERGE (a1, sh_1, a2, sh_2);
+
+    if (len == 1)
+      return;
+
+    a2 = a1;
+    len -= 1;
+  }
+
+  switch (align)
+    {
+    case 1:
+      do
+        {
+          srcp -= 2 * OPSIZ;
+          dstp -= 2 * OPSIZ;
+    
+          a1 = ((op_t *) srcp)[1];
+          a0 = ((op_t *) srcp)[0];
+          ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (32-8));
+          ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (32-8));
+          a2 = a0;
+    
+          len -= 2;
+        }
+      while (len != 0);
+      break;
+    case 2:
+      do
+        {
+          srcp -= 2 * OPSIZ;
+          dstp -= 2 * OPSIZ;
+    
+          a1 = ((op_t *) srcp)[1];
+          a0 = ((op_t *) srcp)[0];
+          ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (32-16));
+          ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (32-16));
+          a2 = a0;
+    
+          len -= 2;
+        }
+      while (len != 0);
+      break;
+    case 3:
+      do
+        {
+          srcp -= 2 * OPSIZ;
+          dstp -= 2 * OPSIZ;
+    
+          a1 = ((op_t *) srcp)[1];
+          a0 = ((op_t *) srcp)[0];
+          ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (32-24));
+          ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (32-24));
+          a2 = a0;
+    
+          len -= 2;
+        }
+      while (len != 0);
+      break;
+    }
+}
diff --git a/sysdeps/powerpc/powerpc32/power6x/Implies b/sysdeps/powerpc/powerpc32/power6x/Implies
new file mode 100644
index 0000000000..1ac063bbc1
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power6x/Implies
@@ -0,0 +1,3 @@
+powerpc/powerpc32/power6
+powerpc/powerpc32/power5+
+powerpc/powerpc32/power4
diff --git a/sysdeps/powerpc/powerpc32/power6x/fpu/Implies b/sysdeps/powerpc/powerpc32/power6x/fpu/Implies
new file mode 100644
index 0000000000..081f750093
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power6x/fpu/Implies
@@ -0,0 +1,3 @@
+powerpc/powerpc32/power6/fpu
+powerpc/powerpc32/power5+/fpu
+powerpc/powerpc32/power4/fpu
diff --git a/sysdeps/powerpc/powerpc32/power6x/fpu/s_lrint.S b/sysdeps/powerpc/powerpc32/power6x/fpu/s_lrint.S
new file mode 100644
index 0000000000..76d8432920
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power6x/fpu/s_lrint.S
@@ -0,0 +1,42 @@
+/* Round double to long int.  POWER6x PowerPC32 version.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+	.machine	"power6"
+/* long int[r3] __lrint (double x[fp1])  */
+ENTRY (__lrint)
+	fctiw	fp13,fp1
+	mftgpr  r3,fp13
+	blr
+	END (__lrint)
+
+weak_alias (__lrint, lrint)
+
+strong_alias (__lrint, __lrintf)
+weak_alias (__lrint, lrintf)
+
+#ifdef NO_LONG_DOUBLE
+strong_alias (__lrint, __lrintl)
+weak_alias (__lrint, lrintl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __lrint, lrintl, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power6x/fpu/s_lround.S b/sysdeps/powerpc/powerpc32/power6x/fpu/s_lround.S
new file mode 100644
index 0000000000..dc0ace821a
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power6x/fpu/s_lround.S
@@ -0,0 +1,52 @@
+/* lround function.  POWER6x, PowerPC32 version.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 1 Franklin Street, Fifth Floor, Boston MA
+   02110-1301 USA.  */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+	
+/* long [r3] lround (float x [fp1])
+   IEEE 1003.1 lround function.  IEEE specifies "round to the nearest 
+   integer value, rounding halfway cases away from zero, regardless of
+   the current rounding mode."  However PowerPC Architecture defines
+   "round to Nearest" as "Choose the best approximation. In case of a 
+   tie, choose the one that is even (least significant bit o).". 
+   So we pre-round using the V2.02 Floating Round to Integer Nearest
+   instruction before we use the Floating Convert to Integer Word with
+   round to zero instruction.  */
+
+	.machine	"power6"
+ENTRY (__lround)
+	frin	fp2,fp1	/* Pre-round +-0.5.  */
+	fctiwz	fp3,fp2	/* Convert To Integer Word lround toward 0.  */
+	mftgpr	r3,fp3	/* Transfer fpr3 to r3.  */
+	blr
+	END (__lround)
+
+weak_alias (__lround, lround)
+
+strong_alias (__lround, __lroundf)
+weak_alias (__lround, lroundf)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__lround, lroundl)
+strong_alias (__lround, __lroundl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __lround, lroundl, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/powerpc64/970/Implies b/sysdeps/powerpc/powerpc64/970/Implies
new file mode 100644
index 0000000000..ac431fa96e
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/970/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/power4
diff --git a/sysdeps/powerpc/powerpc64/970/fpu/Implies b/sysdeps/powerpc/powerpc64/970/fpu/Implies
new file mode 100644
index 0000000000..558a5fb174
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/970/fpu/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/power4/fpu
diff --git a/sysdeps/powerpc/powerpc64/power4/Makefile b/sysdeps/powerpc/powerpc64/power4/Makefile
new file mode 100644
index 0000000000..60aa508ba4
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power4/Makefile
@@ -0,0 +1,6 @@
+# Makefile fragment for POWER4/5/5+.
+
+ifeq ($(subdir),string)
+CFLAGS-wordcopy.c += --param max-variable-expansions-in-unroller=2 --param max-unroll-times=2 -funroll-loops -fpeel-loops -ftree-loop-linear
+CFLAGS-memmove.c += --param max-variable-expansions-in-unroller=2 --param max-unroll-times=2 -funroll-loops -fpeel-loops -ftree-loop-linear
+endif
diff --git a/sysdeps/powerpc/powerpc64/power4/fpu/Makefile b/sysdeps/powerpc/powerpc64/power4/fpu/Makefile
new file mode 100644
index 0000000000..89dfa5ef35
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power4/fpu/Makefile
@@ -0,0 +1,5 @@
+# Makefile fragment for POWER4/5/5+ platforms with FPU.
+
+ifeq ($(subdir),math)
+CFLAGS-mpa.c += --param max-unroll-times=4 -funroll-loops -fpeel-loops -ftree-loop-linear 
+endif
diff --git a/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c b/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c
new file mode 100644
index 0000000000..4a232e27bf
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c
@@ -0,0 +1,549 @@
+
+/*
+ * IBM Accurate Mathematical Library
+ * written by International Business Machines Corp.
+ * Copyright (C) 2001, 2006 Free Software Foundation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+/************************************************************************/
+/*  MODULE_NAME: mpa.c                                                  */
+/*                                                                      */
+/*  FUNCTIONS:                                                          */
+/*               mcr                                                    */
+/*               acr                                                    */
+/*               cr                                                     */
+/*               cpy                                                    */
+/*               cpymn                                                  */
+/*               norm                                                   */
+/*               denorm                                                 */
+/*               mp_dbl                                                 */
+/*               dbl_mp                                                 */
+/*               add_magnitudes                                         */
+/*               sub_magnitudes                                         */
+/*               add                                                    */
+/*               sub                                                    */
+/*               mul                                                    */
+/*               inv                                                    */
+/*               dvd                                                    */
+/*                                                                      */
+/* Arithmetic functions for multiple precision numbers.                 */
+/* Relative errors are bounded                                          */
+/************************************************************************/
+
+
+#include "endian.h"
+#include "mpa.h"
+#include "mpa2.h"
+#include <sys/param.h>	/* For MIN() */
+/* mcr() compares the sizes of the mantissas of two multiple precision  */
+/* numbers. Mantissas are compared regardless of the signs of the       */
+/* numbers, even if x->d[0] or y->d[0] are zero. Exponents are also     */
+/* disregarded.                                                         */
+static int mcr(const mp_no *x, const mp_no *y, int p) {
+  long i;
+  long p2 = p;
+  for (i=1; i<=p2; i++) {
+    if      (X[i] == Y[i])  continue;
+    else if (X[i] >  Y[i])  return  1;
+    else                    return -1; }
+  return 0;
+}
+
+
+
+/* acr() compares the absolute values of two multiple precision numbers */
+int __acr(const mp_no *x, const mp_no *y, int p) {
+  long i;
+
+  if      (X[0] == ZERO) {
+    if    (Y[0] == ZERO) i= 0;
+    else                 i=-1;
+  }
+  else if (Y[0] == ZERO) i= 1;
+  else {
+    if      (EX >  EY)   i= 1;
+    else if (EX <  EY)   i=-1;
+    else                 i= mcr(x,y,p);
+  }
+
+  return i;
+}
+
+
+/* cr90 compares the values of two multiple precision numbers           */
+int  __cr(const mp_no *x, const mp_no *y, int p) {
+  int i;
+
+  if      (X[0] > Y[0])  i= 1;
+  else if (X[0] < Y[0])  i=-1;
+  else if (X[0] < ZERO ) i= __acr(y,x,p);
+  else                   i= __acr(x,y,p);
+
+  return i;
+}
+
+
+/* Copy a multiple precision number. Set *y=*x. x=y is permissible.      */
+void __cpy(const mp_no *x, mp_no *y, int p) {
+  long i;
+
+  EY = EX;
+  for (i=0; i <= p; i++)    Y[i] = X[i];
+
+  return;
+}
+
+
+/* Copy a multiple precision number x of precision m into a */
+/* multiple precision number y of precision n. In case n>m, */
+/* the digits of y beyond the m'th are set to zero. In case */
+/* n<m, the digits of x beyond the n'th are ignored.        */
+/* x=y is permissible.                                      */
+
+void __cpymn(const mp_no *x, int m, mp_no *y, int n) {
+
+  long i,k;
+  long n2 = n;
+  long m2 = m;
+
+  EY = EX;     k=MIN(m2,n2);
+  for (i=0; i <= k; i++)    Y[i] = X[i];
+  for (   ; i <= n2; i++)    Y[i] = ZERO;
+
+  return;
+}
+
+/* Convert a multiple precision number *x into a double precision */
+/* number *y, normalized case  (|x| >= 2**(-1022))) */
+static void norm(const mp_no *x, double *y, int p)
+{
+  #define R  radixi.d
+  long i;
+#if 0
+  int k;
+#endif
+  double a,c,u,v,z[5];
+  if (p<5) {
+    if      (p==1) c = X[1];
+    else if (p==2) c = X[1] + R* X[2];
+    else if (p==3) c = X[1] + R*(X[2]  +   R* X[3]);
+    else if (p==4) c =(X[1] + R* X[2]) + R*R*(X[3] + R*X[4]);
+  }
+  else {
+    for (a=ONE, z[1]=X[1]; z[1] < TWO23; )
+        {a *= TWO;   z[1] *= TWO; }
+
+    for (i=2; i<5; i++) {
+      z[i] = X[i]*a;
+      u = (z[i] + CUTTER)-CUTTER;
+      if  (u > z[i])  u -= RADIX;
+      z[i] -= u;
+      z[i-1] += u*RADIXI;
+    }
+
+    u = (z[3] + TWO71) - TWO71;
+    if (u > z[3])   u -= TWO19;
+    v = z[3]-u;
+
+    if (v == TWO18) {
+      if (z[4] == ZERO) {
+        for (i=5; i <= p; i++) {
+          if (X[i] == ZERO)   continue;
+          else                {z[3] += ONE;   break; }
+        }
+      }
+      else              z[3] += ONE;
+    }
+
+    c = (z[1] + R *(z[2] + R * z[3]))/a;
+  }
+
+  c *= X[0];
+
+  for (i=1; i<EX; i++)   c *= RADIX;
+  for (i=1; i>EX; i--)   c *= RADIXI;
+
+  *y = c;
+  return;
+#undef R
+}
+
+/* Convert a multiple precision number *x into a double precision */
+/* number *y, denormalized case  (|x| < 2**(-1022))) */
+static void denorm(const mp_no *x, double *y, int p)
+{
+  long i,k;
+  long p2 = p;
+  double c,u,z[5];
+#if 0
+  double a,v;
+#endif
+
+#define R  radixi.d
+  if (EX<-44 || (EX==-44 && X[1]<TWO5))
+     { *y=ZERO; return; }
+
+  if      (p2==1) {
+    if      (EX==-42) {z[1]=X[1]+TWO10;  z[2]=ZERO;  z[3]=ZERO;  k=3;}
+    else if (EX==-43) {z[1]=     TWO10;  z[2]=X[1];  z[3]=ZERO;  k=2;}
+    else              {z[1]=     TWO10;  z[2]=ZERO;  z[3]=X[1];  k=1;}
+  }
+  else if (p2==2) {
+    if      (EX==-42) {z[1]=X[1]+TWO10;  z[2]=X[2];  z[3]=ZERO;  k=3;}
+    else if (EX==-43) {z[1]=     TWO10;  z[2]=X[1];  z[3]=X[2];  k=2;}
+    else              {z[1]=     TWO10;  z[2]=ZERO;  z[3]=X[1];  k=1;}
+  }
+  else {
+    if      (EX==-42) {z[1]=X[1]+TWO10;  z[2]=X[2];  k=3;}
+    else if (EX==-43) {z[1]=     TWO10;  z[2]=X[1];  k=2;}
+    else              {z[1]=     TWO10;  z[2]=ZERO;  k=1;}
+    z[3] = X[k];
+  }
+
+  u = (z[3] + TWO57) - TWO57;
+  if  (u > z[3])   u -= TWO5;
+
+  if (u==z[3]) {
+    for (i=k+1; i <= p2; i++) {
+      if (X[i] == ZERO)   continue;
+      else {z[3] += ONE;   break; }
+    }
+  }
+
+  c = X[0]*((z[1] + R*(z[2] + R*z[3])) - TWO10);
+
+  *y = c*TWOM1032;
+  return;
+
+#undef R
+}
+
+/* Convert a multiple precision number *x into a double precision number *y. */
+/* The result is correctly rounded to the nearest/even. *x is left unchanged */
+
+void __mp_dbl(const mp_no *x, double *y, int p) {
+#if 0
+  int i,k;
+  double a,c,u,v,z[5];
+#endif
+
+  if (X[0] == ZERO)  {*y = ZERO;  return; }
+
+  if      (EX> -42)                 norm(x,y,p);
+  else if (EX==-42 && X[1]>=TWO10)  norm(x,y,p);
+  else                              denorm(x,y,p);
+}
+
+
+/* dbl_mp() converts a double precision number x into a multiple precision  */
+/* number *y. If the precision p is too small the result is truncated. x is */
+/* left unchanged.                                                          */
+
+void __dbl_mp(double x, mp_no *y, int p) {
+
+  long i,n;
+  long p2 = p;
+  double u;
+
+  /* Sign */
+  if      (x == ZERO)  {Y[0] = ZERO;  return; }
+  else if (x >  ZERO)   Y[0] = ONE;
+  else                 {Y[0] = MONE;  x=-x;   }
+
+  /* Exponent */
+  for (EY=ONE; x >= RADIX; EY += ONE)   x *= RADIXI;
+  for (      ; x <  ONE;   EY -= ONE)   x *= RADIX;
+
+  /* Digits */
+  n=MIN(p2,4);
+  for (i=1; i<=n; i++) {
+    u = (x + TWO52) - TWO52;
+    if (u>x)   u -= ONE;
+    Y[i] = u;     x -= u;    x *= RADIX; }
+  for (   ; i<=p2; i++)     Y[i] = ZERO;
+  return;
+}
+
+
+/*  add_magnitudes() adds the magnitudes of *x & *y assuming that           */
+/*  abs(*x) >= abs(*y) > 0.                                                 */
+/* The sign of the sum *z is undefined. x&y may overlap but not x&z or y&z. */
+/* No guard digit is used. The result equals the exact sum, truncated.      */
+/* *x & *y are left unchanged.                                              */
+
+static void add_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
+
+  long i,j,k;
+  long p2 = p;
+
+  EZ = EX;
+
+  i=p2;    j=p2+ EY - EX;    k=p2+1;
+
+  if (j<1)
+     {__cpy(x,z,p);  return; }
+  else   Z[k] = ZERO;
+
+  for (; j>0; i--,j--) {
+    Z[k] += X[i] + Y[j];
+    if (Z[k] >= RADIX) {
+      Z[k]  -= RADIX;
+      Z[--k] = ONE; }
+    else
+      Z[--k] = ZERO;
+  }
+
+  for (; i>0; i--) {
+    Z[k] += X[i];
+    if (Z[k] >= RADIX) {
+      Z[k]  -= RADIX;
+      Z[--k] = ONE; }
+    else
+      Z[--k] = ZERO;
+  }
+
+  if (Z[1] == ZERO) {
+    for (i=1; i<=p2; i++)    Z[i] = Z[i+1]; }
+  else   EZ += ONE;
+}
+
+
+/*  sub_magnitudes() subtracts the magnitudes of *x & *y assuming that      */
+/*  abs(*x) > abs(*y) > 0.                                                  */
+/* The sign of the difference *z is undefined. x&y may overlap but not x&z  */
+/* or y&z. One guard digit is used. The error is less than one ulp.         */
+/* *x & *y are left unchanged.                                              */
+
+static void sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
+
+  long i,j,k;
+  long p2 = p;
+
+  EZ = EX;
+
+  if (EX == EY) {
+    i=j=k=p2;
+    Z[k] = Z[k+1] = ZERO; }
+  else {
+    j= EX - EY;
+    if (j > p2)  {__cpy(x,z,p);  return; }
+    else {
+      i=p2;   j=p2+1-j;   k=p2;
+      if (Y[j] > ZERO) {
+        Z[k+1] = RADIX - Y[j--];
+        Z[k]   = MONE; }
+      else {
+        Z[k+1] = ZERO;
+        Z[k]   = ZERO;   j--;}
+    }
+  }
+
+  for (; j>0; i--,j--) {
+    Z[k] += (X[i] - Y[j]);
+    if (Z[k] < ZERO) {
+      Z[k]  += RADIX;
+      Z[--k] = MONE; }
+    else
+      Z[--k] = ZERO;
+  }
+
+  for (; i>0; i--) {
+    Z[k] += X[i];
+    if (Z[k] < ZERO) {
+      Z[k]  += RADIX;
+      Z[--k] = MONE; }
+    else
+      Z[--k] = ZERO;
+  }
+
+  for (i=1; Z[i] == ZERO; i++) ;
+  EZ = EZ - i + 1;
+  for (k=1; i <= p2+1; )
+    Z[k++] = Z[i++];
+  for (; k <= p2; )
+    Z[k++] = ZERO;
+
+  return;
+}
+
+
+/* Add two multiple precision numbers. Set *z = *x + *y. x&y may overlap  */
+/* but not x&z or y&z. One guard digit is used. The error is less than    */
+/* one ulp. *x & *y are left unchanged.                                   */
+
+void __add(const mp_no *x, const mp_no *y, mp_no *z, int p) {
+
+  int n;
+
+  if      (X[0] == ZERO)     {__cpy(y,z,p);  return; }
+  else if (Y[0] == ZERO)     {__cpy(x,z,p);  return; }
+
+  if (X[0] == Y[0])   {
+    if (__acr(x,y,p) > 0)      {add_magnitudes(x,y,z,p);  Z[0] = X[0]; }
+    else                     {add_magnitudes(y,x,z,p);  Z[0] = Y[0]; }
+  }
+  else                       {
+    if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p);  Z[0] = X[0]; }
+    else if (n == -1)        {sub_magnitudes(y,x,z,p);  Z[0] = Y[0]; }
+    else                      Z[0] = ZERO;
+  }
+  return;
+}
+
+
+/* Subtract two multiple precision numbers. *z is set to *x - *y. x&y may */
+/* overlap but not x&z or y&z. One guard digit is used. The error is      */
+/* less than one ulp. *x & *y are left unchanged.                         */
+
+void __sub(const mp_no *x, const mp_no *y, mp_no *z, int p) {
+
+  int n;
+
+  if      (X[0] == ZERO)     {__cpy(y,z,p);  Z[0] = -Z[0];  return; }
+  else if (Y[0] == ZERO)     {__cpy(x,z,p);                 return; }
+
+  if (X[0] != Y[0])    {
+    if (__acr(x,y,p) > 0)      {add_magnitudes(x,y,z,p);  Z[0] =  X[0]; }
+    else                     {add_magnitudes(y,x,z,p);  Z[0] = -Y[0]; }
+  }
+  else                       {
+    if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p);  Z[0] =  X[0]; }
+    else if (n == -1)        {sub_magnitudes(y,x,z,p);  Z[0] = -Y[0]; }
+    else                      Z[0] = ZERO;
+  }
+  return;
+}
+
+
+/* Multiply two multiple precision numbers. *z is set to *x * *y. x&y      */
+/* may overlap but not x&z or y&z. In case p=1,2,3 the exact result is     */
+/* truncated to p digits. In case p>3 the error is bounded by 1.001 ulp.   */
+/* *x & *y are left unchanged.                                             */
+
+void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) {
+
+  long i, i1, i2, j, k, k2;
+  long p2 = p;
+  double u, zk, zk2;
+
+                      /* Is z=0? */
+  if (X[0]*Y[0]==ZERO)
+     { Z[0]=ZERO;  return; }
+
+                       /* Multiply, add and carry */
+  k2 = (p2<3) ? p2+p2 : p2+3;
+  zk = Z[k2]=ZERO;
+  for (k=k2; k>1; ) {
+    if (k > p2)  {i1=k-p2; i2=p2+1; }
+    else        {i1=1;   i2=k;   }
+#if 1
+    /* rearange this inner loop to allow the fmadd instructions to be
+       independent and execute in parallel on processors that have
+       dual symetrical FP pipelines.  */
+    if (i1 < (i2-1))
+    {
+	/* make sure we have at least 2 iterations */
+	if (((i2 - i1) & 1L) == 1L)
+	{
+                /* Handle the odd iterations case.  */
+		zk2 = x->d[i2-1]*y->d[i1];
+	}
+	else
+		zk2 = zero.d;
+	/* Do two multiply/adds per loop iteration, using independent
+           accumulators; zk and zk2.  */
+	for (i=i1,j=i2-1; i<i2-1; i+=2,j-=2) 
+	{
+		zk += x->d[i]*y->d[j];
+		zk2 += x->d[i+1]*y->d[j-1];
+	}
+	zk += zk2; /* final sum.  */
+    }
+    else
+    {
+        /* Special case when iterations is 1.  */
+	zk += x->d[i1]*y->d[i1];
+    }
+#else
+    /* The orginal code.  */
+    for (i=i1,j=i2-1; i<i2; i++,j--)  zk += X[i]*Y[j];
+#endif
+
+    u = (zk + CUTTER)-CUTTER;
+    if  (u > zk)  u -= RADIX;
+    Z[k]  = zk - u;
+    zk = u*RADIXI;
+    --k;
+  }
+  Z[k] = zk;
+
+                 /* Is there a carry beyond the most significant digit? */
+  if (Z[1] == ZERO) {
+    for (i=1; i<=p2; i++)  Z[i]=Z[i+1];
+    EZ = EX + EY - 1; }
+  else
+    EZ = EX + EY;
+
+  Z[0] = X[0] * Y[0];
+  return;
+}
+
+
+/* Invert a multiple precision number. Set *y = 1 / *x.                     */
+/* Relative error bound = 1.001*r**(1-p) for p=2, 1.063*r**(1-p) for p=3,   */
+/* 2.001*r**(1-p) for p>3.                                                  */
+/* *x=0 is not permissible. *x is left unchanged.                           */
+
+void __inv(const mp_no *x, mp_no *y, int p) {
+  long i;
+#if 0
+  int l;
+#endif
+  double t;
+  mp_no z,w;
+  static const int np1[] = {0,0,0,0,1,2,2,2,2,3,3,3,3,3,3,3,3,3,
+                            4,4,4,4,4,4,4,4,4,4,4,4,4,4,4};
+  const mp_no mptwo = {1,{1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
+                         0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
+                         0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
+                         0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}};
+
+  __cpy(x,&z,p);  z.e=0;  __mp_dbl(&z,&t,p);
+  t=ONE/t;   __dbl_mp(t,y,p);    EY -= EX;
+
+  for (i=0; i<np1[p]; i++) {
+    __cpy(y,&w,p);
+    __mul(x,&w,y,p);
+    __sub(&mptwo,y,&z,p);
+    __mul(&w,&z,y,p);
+  }
+  return;
+}
+
+
+/* Divide one multiple precision number by another.Set *z = *x / *y. *x & *y */
+/* are left unchanged. x&y may overlap but not x&z or y&z.                   */
+/* Relative error bound = 2.001*r**(1-p) for p=2, 2.063*r**(1-p) for p=3     */
+/* and 3.001*r**(1-p) for p>3. *y=0 is not permissible.                      */
+
+void __dvd(const mp_no *x, const mp_no *y, mp_no *z, int p) {
+
+  mp_no w;
+
+  if (X[0] == ZERO)    Z[0] = ZERO;
+  else                {__inv(y,&w,p);   __mul(x,&w,z,p);}
+  return;
+}
diff --git a/sysdeps/powerpc/powerpc64/power4/fpu/slowexp.c b/sysdeps/powerpc/powerpc64/power4/fpu/slowexp.c
new file mode 100644
index 0000000000..b22b0dfeab
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power4/fpu/slowexp.c
@@ -0,0 +1,66 @@
+/*
+ * IBM Accurate Mathematical Library
+ * written by International Business Machines Corp.
+ * Copyright (C) 2001, 2007 Free Software Foundation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+/**************************************************************************/
+/*  MODULE_NAME:slowexp.c                                                 */
+/*                                                                        */
+/*  FUNCTION:slowexp                                                      */
+/*                                                                        */
+/*  FILES NEEDED:mpa.h                                                    */
+/*               mpa.c mpexp.c                                            */
+/*                                                                        */
+/*Converting from double precision to Multi-precision and calculating     */
+/* e^x                                                                    */
+/**************************************************************************/
+#include "math_private.h"
+
+#ifdef NO_LONG_DOUBLE
+#include "mpa.h"
+void __mpexp(mp_no *x, mp_no *y, int p);
+#endif
+
+/*Converting from double precision to Multi-precision and calculating  e^x */
+double __slowexp(double x) {
+#ifdef NO_LONG_DOUBLE
+  double w,z,res,eps=3.0e-26;
+  int p;
+  mp_no mpx, mpy, mpz,mpw,mpeps,mpcor;
+
+  p=6;
+  __dbl_mp(x,&mpx,p); /* Convert a double precision number  x               */
+                    /* into a multiple precision number mpx with prec. p. */
+  __mpexp(&mpx, &mpy, p); /* Multi-Precision exponential function */
+  __dbl_mp(eps,&mpeps,p);
+  __mul(&mpeps,&mpy,&mpcor,p);
+  __add(&mpy,&mpcor,&mpw,p);
+  __sub(&mpy,&mpcor,&mpz,p);
+  __mp_dbl(&mpw, &w, p);
+  __mp_dbl(&mpz, &z, p);
+  if (w == z) return w;
+  else  {                   /* if calculating is not exactly   */
+    p = 32;
+    __dbl_mp(x,&mpx,p);
+    __mpexp(&mpx, &mpy, p);
+    __mp_dbl(&mpy, &res, p);
+    return res;
+  }
+#else
+  return (double) __ieee754_expl((long double)x);
+#endif
+}
diff --git a/sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c b/sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c
new file mode 100644
index 0000000000..ad147a89a6
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c
@@ -0,0 +1,94 @@
+/*
+ * IBM Accurate Mathematical Library
+ * written by International Business Machines Corp.
+ * Copyright (C) 2001, 2006 Free Software Foundation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+/*************************************************************************/
+/* MODULE_NAME:slowpow.c                                                 */
+/*                                                                       */
+/* FUNCTION:slowpow                                                      */
+/*                                                                       */
+/*FILES NEEDED:mpa.h                                                     */
+/*             mpa.c mpexp.c mplog.c halfulp.c                           */
+/*                                                                       */
+/* Given two IEEE double machine numbers y,x , routine  computes the     */
+/* correctly  rounded (to nearest) value of x^y. Result calculated  by   */
+/* multiplication (in halfulp.c) or if result isn't accurate enough      */
+/* then routine converts x and y into multi-precision doubles and        */
+/* recompute.                                                            */
+/*************************************************************************/
+
+#include "mpa.h"
+#include "math_private.h"
+
+void __mpexp (mp_no * x, mp_no * y, int p);
+void __mplog (mp_no * x, mp_no * y, int p);
+double ulog (double);
+double __halfulp (double x, double y);
+
+double
+__slowpow (double x, double y, double z)
+{
+  double res, res1;
+  long double ldw, ldz, ldpp;
+  static const long double ldeps = 0x4.0p-96;
+
+  res = __halfulp (x, y);	/* halfulp() returns -10 or x^y             */
+  if (res >= 0)
+    return res;			/* if result was really computed by halfulp */
+  /*  else, if result was not really computed by halfulp */
+
+  /* Compute pow as long double, 106 bits */
+  ldz = __ieee754_logl ((long double) x);
+  ldw = (long double) y *ldz;
+  ldpp = __ieee754_expl (ldw);
+  res = (double) (ldpp + ldeps);
+  res1 = (double) (ldpp - ldeps);
+
+  if (res != res1)		/* if result still not accurate enough */
+    {				/* use mpa for higher persision.  */
+      mp_no mpx, mpy, mpz, mpw, mpp, mpr, mpr1;
+      static const mp_no eps = { -3, {1.0, 4.0} };
+      int p;
+
+      p = 10;			/*  p=precision 240 bits  */
+      __dbl_mp (x, &mpx, p);
+      __dbl_mp (y, &mpy, p);
+      __dbl_mp (z, &mpz, p);
+      __mplog (&mpx, &mpz, p);		/* log(x) = z   */
+      __mul (&mpy, &mpz, &mpw, p);	/*  y * z =w    */
+      __mpexp (&mpw, &mpp, p);		/*  e^w =pp     */
+      __add (&mpp, &eps, &mpr, p);	/*  pp+eps =r   */
+      __mp_dbl (&mpr, &res, p);
+      __sub (&mpp, &eps, &mpr1, p);	/*  pp -eps =r1 */
+      __mp_dbl (&mpr1, &res1, p);	/*  converting into double precision */
+      if (res == res1)
+	return res;
+
+      /* if we get here result wasn't calculated exactly, continue for
+         more exact calculation using 768 bits.  */
+      p = 32;
+      __dbl_mp (x, &mpx, p);
+      __dbl_mp (y, &mpy, p);
+      __dbl_mp (z, &mpz, p);
+      __mplog (&mpx, &mpz, p);		/* log(c)=z  */
+      __mul (&mpy, &mpz, &mpw, p);	/* y*z =w    */
+      __mpexp (&mpw, &mpp, p);		/* e^w=pp    */
+      __mp_dbl (&mpp, &res, p);		/* converting into double precision */
+    }
+  return res;
+}
diff --git a/sysdeps/powerpc/powerpc64/power4/fpu/w_sqrt.c b/sysdeps/powerpc/powerpc64/power4/fpu/w_sqrt.c
new file mode 100644
index 0000000000..d2b62b2672
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power4/fpu/w_sqrt.c
@@ -0,0 +1,62 @@
+/* Double-precision floating point square root wrapper.
+   Copyright (C) 2004, 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <math_ldbl_opt.h>
+#include "math.h"
+#include "math_private.h"
+#include <fenv_libc.h>
+
+#ifdef __STDC__
+double
+__sqrt (double x)		/* wrapper sqrt */
+#else
+double
+__sqrt (x)			/* wrapper sqrt */
+     double x;
+#endif
+{
+  double z;
+/* Power4 (ISA V2.0) and above implement sqrt in hardware.  */
+   __asm __volatile (
+	"	fsqrt	%0,%1\n"
+		: "=f" (z)
+		: "f" (x));
+#ifdef _IEEE_LIBM
+  return z;
+#else
+  if (__builtin_expect (_LIB_VERSION == _IEEE_, 0))
+    return z;
+    
+  if (__builtin_expect (x != x, 0))
+    return z;
+    
+  if  (__builtin_expect (x < 0.0, 0))
+    return __kernel_standard (x, x, 26);	/* sqrt(negative) */
+  else
+    return z;
+#endif
+}
+
+weak_alias (__sqrt, sqrt)
+#ifdef NO_LONG_DOUBLE
+  strong_alias (__sqrt, __sqrtl) weak_alias (__sqrt, sqrtl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0)
+compat_symbol (libm, __sqrt, sqrtl, GLIBC_2_0);
+#endif
diff --git a/sysdeps/powerpc/powerpc64/power4/fpu/w_sqrtf.c b/sysdeps/powerpc/powerpc64/power4/fpu/w_sqrtf.c
new file mode 100644
index 0000000000..4784869f07
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power4/fpu/w_sqrtf.c
@@ -0,0 +1,60 @@
+/* Single-precision floating point square root wrapper.
+   Copyright (C) 2004, 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "math.h"
+#include "math_private.h"
+#include <fenv_libc.h>
+
+#include <sysdep.h>
+#include <ldsodefs.h>
+
+#ifdef __STDC__
+float
+__sqrtf (float x)		/* wrapper sqrtf */
+#else
+float
+__sqrtf (x)			/* wrapper sqrtf */
+     float x;
+#endif
+{
+#ifdef _IEEE_LIBM
+  return __ieee754_sqrtf (x);
+#else
+  float z;
+/* Power4 (ISA V2.0) and above implement sqrtf in hardware.  */
+   __asm __volatile (
+	"	fsqrts	%0,%1\n"
+		: "=f" (z)
+		: "f" (x));
+
+  if (__builtin_expect (_LIB_VERSION == _IEEE_, 0))
+    return z;
+    
+  if (__builtin_expect (x != x, 0))
+    return z;
+    
+  if  (__builtin_expect (x < 0.0, 0))
+    /* sqrtf(negative) */
+    return (float) __kernel_standard ((double) x, (double) x, 126);
+  else
+    return z;
+#endif
+}
+
+weak_alias (__sqrtf, sqrtf)
diff --git a/sysdeps/powerpc/powerpc64/power4/memcmp.S b/sysdeps/powerpc/powerpc64/power4/memcmp.S
new file mode 100644
index 0000000000..a5e0c758df
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power4/memcmp.S
@@ -0,0 +1,982 @@
+/* Optimized strcmp implementation for PowerPC64.
+   Copyright (C) 2003, 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+   02110-1301 USA.  */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* int [r3] memcmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5])  */
+
+	.machine power4
+EALIGN (BP_SYM(memcmp), 4, 0)
+	CALL_MCOUNT 3
+
+#define rTMP	r0
+#define rRTN	r3
+#define rSTR1	r3	/* first string arg */
+#define rSTR2	r4	/* second string arg */
+#define rN	r5	/* max string length */
+/* Note:  The Bounded pointer support in this code is broken.  This code
+   was inherited from PPC32 and and that support was never completed.  
+   Current PPC gcc does not support -fbounds-check or -fbounded-pointers.  */
+#define rWORD1	r6	/* current word in s1 */
+#define rWORD2	r7	/* current word in s2 */
+#define rWORD3	r8	/* next word in s1 */
+#define rWORD4	r9	/* next word in s2 */
+#define rWORD5	r10	/* next word in s1 */
+#define rWORD6	r11	/* next word in s2 */
+#define rBITDIF	r12	/* bits that differ in s1 & s2 words */
+#define rWORD7	r30	/* next word in s1 */
+#define rWORD8	r31	/* next word in s2 */
+
+	xor	rTMP, rSTR2, rSTR1
+	cmpldi	cr6, rN, 0
+	cmpldi	cr1, rN, 12
+	clrldi.	rTMP, rTMP, 61
+	clrldi	rBITDIF, rSTR1, 61
+	cmpldi	cr5, rBITDIF, 0
+	beq-	cr6, L(zeroLength)
+	dcbt	0,rSTR1
+	dcbt	0,rSTR2
+/* If less than 8 bytes or not aligned, use the unalligned
+   byte loop.  */
+	blt	cr1, L(bytealigned)
+	std	rWORD8,-8(r1)	
+	cfi_offset(rWORD8,-8)
+	std	rWORD7,-16(r1)	
+	cfi_offset(rWORD7,-16)
+	bne	L(unaligned)
+/* At this point we know both strings have the same alignment and the
+   compare length is at least 8 bytes.  rBITDIF containes the low order
+   3 bits of rSTR1 and cr5 contains the result of the logical compare
+   of rBITDIF to 0.  If rBITDIF == 0 then we are already double word 
+   aligned and can perform the DWaligned loop.
+  
+   Otherwise we know the two strings have the same alignment (but not
+   yet DW).  So we can force the string addresses to the next lower DW
+   boundary and special case this first DW word using shift left to
+   ellimiate bits preceeding the first byte.  Since we want to join the
+   normal (DWaligned) compare loop, starting at the second double word,
+   we need to adjust the length (rN) and special case the loop
+   versioning for the first DW. This insures that the loop count is
+   correct and the first DW (shifted) is in the expected resister pair.  */
+	.align 4
+L(samealignment):
+	clrrdi	rSTR1, rSTR1, 3
+	clrrdi	rSTR2, rSTR2, 3
+	beq	cr5, L(DWaligned)
+	add	rN, rN, rBITDIF
+	sldi	r11, rBITDIF, 3
+	srdi	rTMP, rN, 5	/* Divide by 32 */
+	andi.	rBITDIF, rN, 24	/* Get the DW remainder */
+	ld	rWORD1, 0(rSTR1)
+	ld	rWORD2, 0(rSTR2)
+	cmpldi	cr1, rBITDIF, 16
+	cmpldi	cr7, rN, 32
+	clrldi	rN, rN, 61
+	beq	L(dPs4)
+	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	bgt	cr1, L(dPs3)
+	beq	cr1, L(dPs2)
+
+/* Remainder is 8 */
+	.align 3
+L(dsP1):
+	sld	rWORD5, rWORD1, r11
+	sld	rWORD6, rWORD2, r11
+	cmpld	cr5, rWORD5, rWORD6
+	blt	cr7, L(dP1x)
+/* Do something useful in this cycle since we have to branch anyway.  */
+	ld	rWORD1, 8(rSTR1)
+	ld	rWORD2, 8(rSTR2)
+	cmpld	cr0, rWORD1, rWORD2
+	b	L(dP1e)
+/* Remainder is 16 */
+	.align 4
+L(dPs2):
+	sld	rWORD5, rWORD1, r11
+	sld	rWORD6, rWORD2, r11
+	cmpld	cr6, rWORD5, rWORD6
+	blt	cr7, L(dP2x)
+/* Do something useful in this cycle since we have to branch anyway.  */
+	ld	rWORD7, 8(rSTR1)
+	ld	rWORD8, 8(rSTR2)
+	cmpld	cr5, rWORD7, rWORD8
+	b	L(dP2e)
+/* Remainder is 24 */
+	.align 4
+L(dPs3):
+	sld	rWORD3, rWORD1, r11
+	sld	rWORD4, rWORD2, r11
+	cmpld	cr1, rWORD3, rWORD4
+	b	L(dP3e)
+/* Count is a multiple of 32, remainder is 0 */
+	.align 4
+L(dPs4):
+	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	sld	rWORD1, rWORD1, r11
+	sld	rWORD2, rWORD2, r11
+	cmpld	cr0, rWORD1, rWORD2
+	b	L(dP4e)
+
+/* At this point we know both strings are double word aligned and the
+   compare length is at least 8 bytes.  */
+	.align 4
+L(DWaligned):
+	andi.	rBITDIF, rN, 24	/* Get the DW remainder */
+	srdi	rTMP, rN, 5	/* Divide by 32 */
+	cmpldi	cr1, rBITDIF, 16
+	cmpldi	cr7, rN, 32
+	clrldi	rN, rN, 61
+	beq	L(dP4)
+	bgt	cr1, L(dP3)
+	beq	cr1, L(dP2)
+		
+/* Remainder is 8 */
+	.align 4
+L(dP1):
+	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+/* Normally we'd use rWORD7/rWORD8 here, but since we might exit early
+   (8-15 byte compare), we want to use only volitile registers.  This
+   means we can avoid restoring non-volitile registers since we did not
+   change any on the early exit path.  The key here is the non-early
+   exit path only cares about the condition code (cr5), not about which 
+   register pair was used.  */
+	ld	rWORD5, 0(rSTR1)
+	ld	rWORD6, 0(rSTR2)
+	cmpld	cr5, rWORD5, rWORD6
+	blt	cr7, L(dP1x)
+	ld	rWORD1, 8(rSTR1)
+	ld	rWORD2, 8(rSTR2)
+	cmpld	cr0, rWORD1, rWORD2
+L(dP1e):
+	ld	rWORD3, 16(rSTR1)
+	ld	rWORD4, 16(rSTR2)
+	cmpld	cr1, rWORD3, rWORD4
+	ld	rWORD5, 24(rSTR1)
+	ld	rWORD6, 24(rSTR2)
+	cmpld	cr6, rWORD5, rWORD6
+	bne	cr5, L(dLcr5)
+	bne	cr0, L(dLcr0)
+	
+	ldu	rWORD7, 32(rSTR1)
+	ldu	rWORD8, 32(rSTR2)
+	bne	cr1, L(dLcr1)
+	cmpld	cr5, rWORD7, rWORD8
+	bdnz	L(dLoop)
+	bne	cr6, L(dLcr6)
+	ld	rWORD8,-8(r1)
+	ld	rWORD7,-16(r1)
+	.align 3
+L(dP1x):
+	sldi.	r12, rN, 3
+	bne	cr5, L(dLcr5)
+	subfic	rN, r12, 64	/* Shift count is 64 - (rN * 8).  */
+	bne	L(d00)
+	li	rRTN, 0
+	blr
+		
+/* Remainder is 16 */
+	.align 4
+L(dP2):
+	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	ld	rWORD5, 0(rSTR1)
+	ld	rWORD6, 0(rSTR2)
+	cmpld	cr6, rWORD5, rWORD6
+	blt	cr7, L(dP2x)
+	ld	rWORD7, 8(rSTR1)
+	ld	rWORD8, 8(rSTR2)
+	cmpld	cr5, rWORD7, rWORD8
+L(dP2e):
+	ld	rWORD1, 16(rSTR1)
+	ld	rWORD2, 16(rSTR2)
+	cmpld	cr0, rWORD1, rWORD2
+	ld	rWORD3, 24(rSTR1)
+	ld	rWORD4, 24(rSTR2)
+	cmpld	cr1, rWORD3, rWORD4
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+	bne	cr6, L(dLcr6)
+	bne	cr5, L(dLcr5)
+	b	L(dLoop2)
+/* Again we are on a early exit path (16-23 byte compare), we want to
+   only use volitile registers and avoid restoring non-volitile
+   registers.  */
+	.align 4
+L(dP2x):
+	ld	rWORD3, 8(rSTR1)
+	ld	rWORD4, 8(rSTR2)
+	cmpld	cr5, rWORD3, rWORD4
+	sldi.	r12, rN, 3
+	bne	cr6, L(dLcr6)
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+	bne	cr5, L(dLcr5)
+	subfic	rN, r12, 64	/* Shift count is 64 - (rN * 8).  */
+	bne	L(d00)
+	li	rRTN, 0
+	blr
+		
+/* Remainder is 24 */
+	.align 4
+L(dP3):
+	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	ld	rWORD3, 0(rSTR1)
+	ld	rWORD4, 0(rSTR2)
+	cmpld	cr1, rWORD3, rWORD4
+L(dP3e):
+	ld	rWORD5, 8(rSTR1)
+	ld	rWORD6, 8(rSTR2)
+	cmpld	cr6, rWORD5, rWORD6
+	blt	cr7, L(dP3x)
+	ld	rWORD7, 16(rSTR1)
+	ld	rWORD8, 16(rSTR2)
+	cmpld	cr5, rWORD7, rWORD8
+	ld	rWORD1, 24(rSTR1)
+	ld	rWORD2, 24(rSTR2)
+	cmpld	cr0, rWORD1, rWORD2
+	addi	rSTR1, rSTR1, 16
+	addi	rSTR2, rSTR2, 16
+	bne	cr1, L(dLcr1)
+	bne	cr6, L(dLcr6)
+	b	L(dLoop1)
+/* Again we are on a early exit path (24-31 byte compare), we want to
+   only use volitile registers and avoid restoring non-volitile
+   registers.  */
+	.align 4
+L(dP3x):
+	ld	rWORD1, 16(rSTR1)
+	ld	rWORD2, 16(rSTR2)
+	cmpld	cr5, rWORD1, rWORD2
+	sldi.	r12, rN, 3
+	bne	cr1, L(dLcr1)
+	addi	rSTR1, rSTR1, 16
+	addi	rSTR2, rSTR2, 16
+	bne	cr6, L(dLcr6)
+	subfic	rN, r12, 64	/* Shift count is 64 - (rN * 8).  */
+	bne	cr5, L(dLcr5)
+	bne	L(d00)
+	li	rRTN, 0
+	blr
+	
+/* Count is a multiple of 32, remainder is 0 */
+	.align 4
+L(dP4):
+	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	ld	rWORD1, 0(rSTR1)
+	ld	rWORD2, 0(rSTR2)
+	cmpld	cr0, rWORD1, rWORD2
+L(dP4e):
+	ld	rWORD3, 8(rSTR1)
+	ld	rWORD4, 8(rSTR2)
+	cmpld	cr1, rWORD3, rWORD4
+	ld	rWORD5, 16(rSTR1)
+	ld	rWORD6, 16(rSTR2)
+	cmpld	cr6, rWORD5, rWORD6
+	ldu	rWORD7, 24(rSTR1)
+	ldu	rWORD8, 24(rSTR2)
+	cmpld	cr5, rWORD7, rWORD8
+	bne	cr0, L(dLcr0)
+	bne	cr1, L(dLcr1)
+	bdz-	L(d24)		/* Adjust CTR as we start with +4 */
+/* This is the primary loop */
+	.align 4
+L(dLoop):
+	ld	rWORD1, 8(rSTR1)
+	ld	rWORD2, 8(rSTR2)
+	cmpld	cr1, rWORD3, rWORD4
+	bne	cr6, L(dLcr6)
+L(dLoop1):
+	ld	rWORD3, 16(rSTR1)
+	ld	rWORD4, 16(rSTR2)
+	cmpld	cr6, rWORD5, rWORD6
+	bne	cr5, L(dLcr5)
+L(dLoop2):
+	ld	rWORD5, 24(rSTR1)
+	ld	rWORD6, 24(rSTR2)
+	cmpld	cr5, rWORD7, rWORD8
+	bne	cr0, L(dLcr0)
+L(dLoop3):
+	ldu	rWORD7, 32(rSTR1)
+	ldu	rWORD8, 32(rSTR2)
+	bne-	cr1, L(dLcr1)
+	cmpld	cr0, rWORD1, rWORD2
+	bdnz+	L(dLoop)	
+	
+L(dL4):
+	cmpld	cr1, rWORD3, rWORD4
+	bne	cr6, L(dLcr6)
+	cmpld	cr6, rWORD5, rWORD6
+	bne	cr5, L(dLcr5)
+	cmpld	cr5, rWORD7, rWORD8
+L(d44):
+	bne	cr0, L(dLcr0)
+L(d34):
+	bne	cr1, L(dLcr1)
+L(d24):
+	bne	cr6, L(dLcr6)
+L(d14):
+	sldi.	r12, rN, 3
+	bne	cr5, L(dLcr5) 
+L(d04):
+	ld	rWORD8,-8(r1)
+	ld	rWORD7,-16(r1)
+	subfic	rN, r12, 64	/* Shift count is 64 - (rN * 8).  */
+	beq	L(zeroLength)
+/* At this point we have a remainder of 1 to 7 bytes to compare.  Since
+   we are aligned it is safe to load the whole double word, and use
+   shift right double to elliminate bits beyond the compare length.  */ 
+L(d00):
+	ld	rWORD1, 8(rSTR1)
+	ld	rWORD2, 8(rSTR2) 
+	srd	rWORD1, rWORD1, rN
+	srd	rWORD2, rWORD2, rN
+	cmpld	cr5, rWORD1, rWORD2
+ 	bne	cr5, L(dLcr5x)
+	li	rRTN, 0
+	blr
+	.align 4
+L(dLcr0):
+	ld	rWORD8,-8(r1)
+	ld	rWORD7,-16(r1)
+	li	rRTN, 1
+	bgtlr	cr0
+	li	rRTN, -1
+	blr
+	.align 4
+L(dLcr1):
+	ld	rWORD8,-8(r1)
+	ld	rWORD7,-16(r1)
+	li	rRTN, 1
+	bgtlr	cr1
+	li	rRTN, -1
+	blr
+	.align 4
+L(dLcr6):
+	ld	rWORD8,-8(r1)
+	ld	rWORD7,-16(r1)
+	li	rRTN, 1
+	bgtlr	cr6
+	li	rRTN, -1
+	blr
+	.align 4
+L(dLcr5):
+	ld	rWORD8,-8(r1)
+	ld	rWORD7,-16(r1)
+L(dLcr5x):
+	li	rRTN, 1
+	bgtlr	cr5
+	li	rRTN, -1
+	blr
+	
+	.align 4
+L(bytealigned):
+	mtctr   rN	/* Power4 wants mtctr 1st in dispatch group */
+	beq-	cr6, L(zeroLength)
+
+/* We need to prime this loop.  This loop is swing modulo scheduled
+   to avoid pipe delays.  The dependent instruction latencies (load to 
+   compare to conditional branch) is 2 to 3 cycles.  In this loop each
+   dispatch group ends in a branch and takes 1 cycle.  Effectively
+   the first iteration of the loop only serves to load operands and 
+   branches based on compares are delayed until the next loop. 
+
+   So we must precondition some registers and condition codes so that
+   we don't exit the loop early on the first iteration.  */
+   
+	lbz	rWORD1, 0(rSTR1)
+	lbz	rWORD2, 0(rSTR2)
+	bdz-	L(b11)
+	cmpld	cr0, rWORD1, rWORD2
+	lbz	rWORD3, 1(rSTR1)
+	lbz	rWORD4, 1(rSTR2)
+	bdz-	L(b12)
+	cmpld	cr1, rWORD3, rWORD4
+	lbzu	rWORD5, 2(rSTR1)
+	lbzu	rWORD6, 2(rSTR2)
+	bdz-	L(b13)
+	.align 4
+L(bLoop):
+	lbzu	rWORD1, 1(rSTR1)
+	lbzu	rWORD2, 1(rSTR2)
+	bne-	cr0, L(bLcr0)
+
+	cmpld	cr6, rWORD5, rWORD6
+	bdz-	L(b3i)
+	
+	lbzu	rWORD3, 1(rSTR1)
+	lbzu	rWORD4, 1(rSTR2)
+	bne-	cr1, L(bLcr1)
+
+	cmpld	cr0, rWORD1, rWORD2
+	bdz-	L(b2i)
+
+	lbzu	rWORD5, 1(rSTR1)
+	lbzu	rWORD6, 1(rSTR2)
+	bne-	cr6, L(bLcr6)
+
+	cmpld	cr1, rWORD3, rWORD4
+	bdnz+	L(bLoop)
+	
+/* We speculatively loading bytes before we have tested the previous
+   bytes.  But we must avoid overrunning the length (in the ctr) to
+   prevent these speculative loads from causing a segfault.  In this 
+   case the loop will exit early (before the all pending bytes are
+   tested.  In this case we must complete the pending operations
+   before returning.  */
+L(b1i):
+	bne-	cr0, L(bLcr0)
+	bne-	cr1, L(bLcr1)
+	b	L(bx56)
+	.align 4
+L(b2i):
+	bne-	cr6, L(bLcr6)
+	bne-	cr0, L(bLcr0)
+	b	L(bx34)
+	.align 4
+L(b3i):
+	bne-	cr1, L(bLcr1)
+	bne-	cr6, L(bLcr6)
+	b	L(bx12)
+	.align 4
+L(bLcr0):
+	li	rRTN, 1
+	bgtlr	cr0
+	li	rRTN, -1
+	blr
+L(bLcr1):
+	li	rRTN, 1
+	bgtlr	cr1
+	li	rRTN, -1
+	blr
+L(bLcr6):
+	li	rRTN, 1
+	bgtlr	cr6
+	li	rRTN, -1
+	blr
+
+L(b13):
+	bne-	cr0, L(bx12)
+	bne-	cr1, L(bx34)
+L(bx56):
+	sub	rRTN, rWORD5, rWORD6
+	blr
+	nop
+L(b12):
+	bne-	cr0, L(bx12)
+L(bx34):	
+	sub	rRTN, rWORD3, rWORD4
+	blr
+L(b11):
+L(bx12):
+	sub	rRTN, rWORD1, rWORD2
+	blr
+	.align 4 
+L(zeroLengthReturn):
+	ld	rWORD8,-8(r1)
+	ld	rWORD7,-16(r1)
+L(zeroLength):
+	li	rRTN, 0
+	blr
+
+	.align 4
+/* At this point we know the strings have different alignment and the
+   compare length is at least 8 bytes.  rBITDIF containes the low order
+   3 bits of rSTR1 and cr5 contains the result of the logical compare
+   of rBITDIF to 0.  If rBITDIF == 0 then rStr1 is double word 
+   aligned and can perform the DWunaligned loop.
+  
+   Otherwise we know that rSTR1 is not aready DW aligned yet.
+   So we can force the string addresses to the next lower DW
+   boundary and special case this first DW word using shift left to
+   ellimiate bits preceeding the first byte.  Since we want to join the
+   normal (DWaligned) compare loop, starting at the second double word,
+   we need to adjust the length (rN) and special case the loop
+   versioning for the first DW. This insures that the loop count is
+   correct and the first DW (shifted) is in the expected resister pair.  */
+#define rSHL	r29	/* Unaligned shift left count.  */
+#define rSHR	r28	/* Unaligned shift right count.  */
+#define rB		r27	/* Left rotation temp for rWORD2.  */
+#define rD		r26	/* Left rotation temp for rWORD4.  */
+#define rF		r25	/* Left rotation temp for rWORD6.  */
+#define rH		r24	/* Left rotation temp for rWORD8.  */
+#define rA		r0	/* Right rotation temp for rWORD2.  */
+#define rC		r12	/* Right rotation temp for rWORD4.  */
+#define rE		r0	/* Right rotation temp for rWORD6.  */
+#define rG		r12	/* Right rotation temp for rWORD8.  */
+L(unaligned):
+	std	r29,-24(r1)	
+	cfi_offset(r29,-24)
+	clrldi	rSHL, rSTR2, 61
+	beq-	cr6, L(duzeroLength)
+	std	r28,-32(r1)	
+	cfi_offset(r28,-32)
+	beq	cr5, L(DWunaligned)
+	std	r27,-40(r1)	
+	cfi_offset(r27,-40)
+/* Adjust the logical start of rSTR2 ro compensate for the extra bits
+   in the 1st rSTR1 DW.  */
+	sub	r27, rSTR2, rBITDIF
+/* But do not attempt to address the DW before that DW that contains
+   the actual start of rSTR2.  */
+	clrrdi	rSTR2, rSTR2, 3
+	std	r26,-48(r1)	
+	cfi_offset(r26,-48)
+/* Compute the leaft/right shift counts for the unalign rSTR2,
+   compensating for the logical (DW aligned) start of rSTR1.  */ 
+	clrldi	rSHL, r27, 61
+	clrrdi	rSTR1, rSTR1, 3	
+	std	r25,-56(r1)	
+	cfi_offset(r25,-56)
+	sldi	rSHL, rSHL, 3
+	cmpld	cr5, r27, rSTR2
+	add	rN, rN, rBITDIF
+	sldi	r11, rBITDIF, 3
+	std	r24,-64(r1)	
+	cfi_offset(r24,-64)
+	subfic	rSHR, rSHL, 64
+	srdi	rTMP, rN, 5	/* Divide by 32 */
+	andi.	rBITDIF, rN, 24	/* Get the DW remainder */
+/* We normally need to load 2 DWs to start the unaligned rSTR2, but in
+   this special case those bits may be discarded anyway.  Also we
+   must avoid loading a DW where none of the bits are part of rSTR2 as
+   this may cross a page boundary and cause a page fault.  */
+	li	rWORD8, 0
+	blt	cr5, L(dus0)
+	ld	rWORD8, 0(rSTR2)
+	la	rSTR2, 8(rSTR2)
+	sld	rWORD8, rWORD8, rSHL
+
+L(dus0):
+	ld	rWORD1, 0(rSTR1)
+	ld	rWORD2, 0(rSTR2)
+	cmpldi	cr1, rBITDIF, 16
+	cmpldi	cr7, rN, 32
+	srd	rG, rWORD2, rSHR
+	clrldi	rN, rN, 61
+	beq	L(duPs4)
+	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	or	rWORD8, rG, rWORD8
+	bgt	cr1, L(duPs3)
+	beq	cr1, L(duPs2)
+
+/* Remainder is 8 */
+	.align 4
+L(dusP1):
+	sld	rB, rWORD2, rSHL
+	sld	rWORD7, rWORD1, r11
+	sld	rWORD8, rWORD8, r11
+	bge	cr7, L(duP1e)
+/* At this point we exit early with the first double word compare
+   complete and remainder of 0 to 7 bytes.  See L(du14) for details on
+   how we handle the remaining bytes.  */
+	cmpld	cr5, rWORD7, rWORD8
+	sldi.	rN, rN, 3
+	bne	cr5, L(duLcr5)
+	cmpld	cr7, rN, rSHR
+	beq	L(duZeroReturn)
+	li	rA, 0
+	ble	cr7, L(dutrim)
+	ld	rWORD2, 8(rSTR2)
+	srd	rA, rWORD2, rSHR
+	b	L(dutrim)
+/* Remainder is 16 */
+	.align 4
+L(duPs2):
+	sld	rH, rWORD2, rSHL
+	sld	rWORD5, rWORD1, r11
+	sld	rWORD6, rWORD8, r11
+	b	L(duP2e)
+/* Remainder is 24 */
+	.align 4
+L(duPs3):
+	sld	rF, rWORD2, rSHL
+	sld	rWORD3, rWORD1, r11
+	sld	rWORD4, rWORD8, r11
+	b	L(duP3e)
+/* Count is a multiple of 32, remainder is 0 */
+	.align 4
+L(duPs4):
+	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	or	rWORD8, rG, rWORD8
+	sld	rD, rWORD2, rSHL
+	sld	rWORD1, rWORD1, r11
+	sld	rWORD2, rWORD8, r11
+	b	L(duP4e)
+
+/* At this point we know rSTR1 is double word aligned and the
+   compare length is at least 8 bytes.  */
+	.align 4
+L(DWunaligned):
+	std	r27,-40(r1)	
+	cfi_offset(r27,-40)
+	clrrdi	rSTR2, rSTR2, 3
+	std	r26,-48(r1)	
+	cfi_offset(r26,-48)
+	srdi	rTMP, rN, 5	/* Divide by 32 */
+	std	r25,-56(r1)	
+	cfi_offset(r25,-56)
+	andi.	rBITDIF, rN, 24	/* Get the DW remainder */
+	std	r24,-64(r1)	
+	cfi_offset(r24,-64)
+	sldi	rSHL, rSHL, 3
+	ld	rWORD6, 0(rSTR2)
+	ldu	rWORD8, 8(rSTR2)
+	cmpldi	cr1, rBITDIF, 16
+	cmpldi	cr7, rN, 32
+	clrldi	rN, rN, 61
+	subfic	rSHR, rSHL, 64
+	sld	rH, rWORD6, rSHL
+	beq	L(duP4)
+	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	bgt	cr1, L(duP3)
+	beq	cr1, L(duP2)
+		
+/* Remainder is 8 */
+	.align 4
+L(duP1):
+	srd	rG, rWORD8, rSHR
+	ld	rWORD7, 0(rSTR1)
+	sld	rB, rWORD8, rSHL
+	or	rWORD8, rG, rH
+	blt	cr7, L(duP1x)
+L(duP1e):
+	ld	rWORD1, 8(rSTR1)
+	ld	rWORD2, 8(rSTR2)
+	cmpld	cr5, rWORD7, rWORD8
+	srd	rA, rWORD2, rSHR
+	sld	rD, rWORD2, rSHL
+	or	rWORD2, rA, rB
+	ld	rWORD3, 16(rSTR1)
+	ld	rWORD4, 16(rSTR2)
+	cmpld	cr0, rWORD1, rWORD2
+	srd	rC, rWORD4, rSHR
+	sld	rF, rWORD4, rSHL
+	bne	cr5, L(duLcr5)
+	or	rWORD4, rC, rD
+	ld	rWORD5, 24(rSTR1)
+	ld	rWORD6, 24(rSTR2)
+	cmpld	cr1, rWORD3, rWORD4
+	srd	rE, rWORD6, rSHR
+	sld	rH, rWORD6, rSHL
+	bne	cr0, L(duLcr0)
+	or	rWORD6, rE, rF
+	cmpld	cr6, rWORD5, rWORD6
+	b	L(duLoop3)	
+	.align 4
+/* At this point we exit early with the first double word compare
+   complete and remainder of 0 to 7 bytes.  See L(du14) for details on
+   how we handle the remaining bytes.  */
+L(duP1x):
+	cmpld	cr5, rWORD7, rWORD8
+	sldi.	rN, rN, 3
+	bne	cr5, L(duLcr5)
+	cmpld	cr7, rN, rSHR
+	beq	L(duZeroReturn)
+	li	rA, 0
+	ble	cr7, L(dutrim)
+	ld	rWORD2, 8(rSTR2)
+	srd	rA, rWORD2, rSHR
+	b	L(dutrim)
+/* Remainder is 16 */
+	.align 4
+L(duP2):
+	srd	rE, rWORD8, rSHR
+	ld	rWORD5, 0(rSTR1)
+	or	rWORD6, rE, rH
+	sld	rH, rWORD8, rSHL
+L(duP2e):
+	ld	rWORD7, 8(rSTR1)
+	ld	rWORD8, 8(rSTR2)
+	cmpld	cr6, rWORD5, rWORD6
+	srd	rG, rWORD8, rSHR
+	sld	rB, rWORD8, rSHL
+	or	rWORD8, rG, rH
+	blt	cr7, L(duP2x)
+	ld	rWORD1, 16(rSTR1)
+	ld	rWORD2, 16(rSTR2)
+	cmpld	cr5, rWORD7, rWORD8
+	bne	cr6, L(duLcr6)
+	srd	rA, rWORD2, rSHR
+	sld	rD, rWORD2, rSHL
+	or	rWORD2, rA, rB
+	ld	rWORD3, 24(rSTR1)
+	ld	rWORD4, 24(rSTR2)
+	cmpld	cr0, rWORD1, rWORD2
+	bne	cr5, L(duLcr5)
+	srd	rC, rWORD4, rSHR
+	sld	rF, rWORD4, rSHL
+	or	rWORD4, rC, rD
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+	cmpld	cr1, rWORD3, rWORD4
+	b	L(duLoop2)
+	.align 4
+L(duP2x):
+	cmpld	cr5, rWORD7, rWORD8
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+	bne	cr6, L(duLcr6)
+	sldi.	rN, rN, 3
+	bne	cr5, L(duLcr5)
+	cmpld	cr7, rN, rSHR
+	beq	L(duZeroReturn)
+	li	rA, 0
+	ble	cr7, L(dutrim)
+	ld	rWORD2, 8(rSTR2)
+	srd	rA, rWORD2, rSHR
+	b	L(dutrim)
+		
+/* Remainder is 24 */
+	.align 4
+L(duP3):
+	srd	rC, rWORD8, rSHR
+	ld	rWORD3, 0(rSTR1)
+	sld	rF, rWORD8, rSHL
+	or	rWORD4, rC, rH
+L(duP3e):
+	ld	rWORD5, 8(rSTR1)
+	ld	rWORD6, 8(rSTR2)
+	cmpld	cr1, rWORD3, rWORD4
+	srd	rE, rWORD6, rSHR
+	sld	rH, rWORD6, rSHL
+	or	rWORD6, rE, rF
+	ld	rWORD7, 16(rSTR1)
+	ld	rWORD8, 16(rSTR2)
+	cmpld	cr6, rWORD5, rWORD6
+	bne	cr1, L(duLcr1)
+	srd	rG, rWORD8, rSHR
+	sld	rB, rWORD8, rSHL
+	or	rWORD8, rG, rH
+	blt	cr7, L(duP3x)
+	ld	rWORD1, 24(rSTR1)
+	ld	rWORD2, 24(rSTR2)
+	cmpld	cr5, rWORD7, rWORD8
+	bne	cr6, L(duLcr6)
+	srd	rA, rWORD2, rSHR
+	sld	rD, rWORD2, rSHL
+	or	rWORD2, rA, rB
+	addi	rSTR1, rSTR1, 16
+	addi	rSTR2, rSTR2, 16
+	cmpld	cr0, rWORD1, rWORD2
+	b	L(duLoop1)
+	.align 4
+L(duP3x):
+	addi	rSTR1, rSTR1, 16
+	addi	rSTR2, rSTR2, 16
+	bne	cr1, L(duLcr1)
+	cmpld	cr5, rWORD7, rWORD8
+	bne	cr6, L(duLcr6)
+	sldi.	rN, rN, 3
+	bne	cr5, L(duLcr5)
+	cmpld	cr7, rN, rSHR
+	beq	L(duZeroReturn)
+	li	rA, 0
+	ble	cr7, L(dutrim)
+	ld	rWORD2, 8(rSTR2)
+	srd	rA, rWORD2, rSHR
+	b	L(dutrim)
+	
+/* Count is a multiple of 32, remainder is 0 */
+	.align 4
+L(duP4):
+	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	srd	rA, rWORD8, rSHR
+	ld	rWORD1, 0(rSTR1)
+	sld	rD, rWORD8, rSHL
+	or	rWORD2, rA, rH
+L(duP4e):
+	ld	rWORD3, 8(rSTR1)
+	ld	rWORD4, 8(rSTR2)
+	cmpld	cr0, rWORD1, rWORD2
+	srd	rC, rWORD4, rSHR
+	sld	rF, rWORD4, rSHL
+	or	rWORD4, rC, rD
+	ld	rWORD5, 16(rSTR1)
+	ld	rWORD6, 16(rSTR2)
+	cmpld	cr1, rWORD3, rWORD4
+	bne	cr0, L(duLcr0)
+	srd	rE, rWORD6, rSHR
+	sld	rH, rWORD6, rSHL
+	or	rWORD6, rE, rF
+	ldu	rWORD7, 24(rSTR1)
+	ldu	rWORD8, 24(rSTR2)
+	cmpld	cr6, rWORD5, rWORD6
+	bne	cr1, L(duLcr1)
+	srd	rG, rWORD8, rSHR
+	sld	rB, rWORD8, rSHL
+	or	rWORD8, rG, rH
+	cmpld	cr5, rWORD7, rWORD8
+	bdz-	L(du24)		/* Adjust CTR as we start with +4 */
+/* This is the primary loop */
+	.align 4
+L(duLoop):
+	ld	rWORD1, 8(rSTR1)
+	ld	rWORD2, 8(rSTR2)
+	cmpld	cr1, rWORD3, rWORD4
+	bne	cr6, L(duLcr6)
+	srd	rA, rWORD2, rSHR
+	sld	rD, rWORD2, rSHL
+	or	rWORD2, rA, rB
+L(duLoop1):
+	ld	rWORD3, 16(rSTR1)
+	ld	rWORD4, 16(rSTR2)
+	cmpld	cr6, rWORD5, rWORD6
+	bne	cr5, L(duLcr5)
+	srd	rC, rWORD4, rSHR
+	sld	rF, rWORD4, rSHL
+	or	rWORD4, rC, rD
+L(duLoop2):
+	ld	rWORD5, 24(rSTR1)
+	ld	rWORD6, 24(rSTR2)
+	cmpld	cr5, rWORD7, rWORD8
+	bne	cr0, L(duLcr0)
+	srd	rE, rWORD6, rSHR
+	sld	rH, rWORD6, rSHL
+	or	rWORD6, rE, rF
+L(duLoop3):
+	ldu	rWORD7, 32(rSTR1)
+	ldu	rWORD8, 32(rSTR2)
+	cmpld	cr0, rWORD1, rWORD2
+	bne-	cr1, L(duLcr1)
+	srd	rG, rWORD8, rSHR
+	sld	rB, rWORD8, rSHL
+	or	rWORD8, rG, rH
+	bdnz+	L(duLoop)	
+	
+L(duL4):
+	bne	cr1, L(duLcr1)
+	cmpld	cr1, rWORD3, rWORD4
+	bne	cr6, L(duLcr6)
+	cmpld	cr6, rWORD5, rWORD6
+	bne	cr5, L(duLcr5)
+	cmpld	cr5, rWORD7, rWORD8
+L(du44):
+	bne	cr0, L(duLcr0)
+L(du34):
+	bne	cr1, L(duLcr1)
+L(du24):
+	bne	cr6, L(duLcr6)
+L(du14):
+	sldi.	rN, rN, 3
+	bne	cr5, L(duLcr5)
+/* At this point we have a remainder of 1 to 7 bytes to compare.  We use
+   shift right double to elliminate bits beyond the compare length. 
+   This allows the use of double word subtract to compute the final
+   result.
+
+   However it may not be safe to load rWORD2 which may be beyond the 
+   string length. So we compare the bit length of the remainder to
+   the right shift count (rSHR). If the bit count is less than or equal
+   we do not need to load rWORD2 (all significant bits are already in
+   rB).  */
+	cmpld	cr7, rN, rSHR
+	beq	L(duZeroReturn)
+	li	rA, 0
+	ble	cr7, L(dutrim)
+	ld	rWORD2, 8(rSTR2)
+	srd	rA, rWORD2, rSHR
+	.align 4
+L(dutrim):
+	ld	rWORD1, 8(rSTR1)
+	ld	rWORD8,-8(r1)
+	subfic	rN, rN, 64	/* Shift count is 64 - (rN * 8).  */ 
+	or	rWORD2, rA, rB
+	ld	rWORD7,-16(r1)	
+	ld	r29,-24(r1)
+	srd	rWORD1, rWORD1, rN
+	srd	rWORD2, rWORD2, rN
+	ld	r28,-32(r1)	
+	ld	r27,-40(r1)
+	li	rRTN, 0
+	cmpld	cr0, rWORD1, rWORD2	
+	ld	r26,-48(r1)
+	ld	r25,-56(r1)
+ 	beq	cr0, L(dureturn24)
+	li	rRTN, 1
+	ld	r24,-64(r1)
+	bgtlr	cr0
+	li	rRTN, -1
+	blr
+	.align 4
+L(duLcr0):
+	ld	rWORD8,-8(r1)
+	ld	rWORD7,-16(r1)
+	li	rRTN, 1
+	bgt	cr0, L(dureturn29)	
+	ld	r29,-24(r1)
+	ld	r28,-32(r1)
+	li	rRTN, -1
+	b	L(dureturn27)
+	.align 4
+L(duLcr1):
+	ld	rWORD8,-8(r1)
+	ld	rWORD7,-16(r1)
+	li	rRTN, 1
+	bgt	cr1, L(dureturn29)	
+	ld	r29,-24(r1)
+	ld	r28,-32(r1)
+	li	rRTN, -1
+	b	L(dureturn27)
+	.align 4
+L(duLcr6):
+	ld	rWORD8,-8(r1)
+	ld	rWORD7,-16(r1)
+	li	rRTN, 1
+	bgt	cr6, L(dureturn29)	
+	ld	r29,-24(r1)
+	ld	r28,-32(r1)
+	li	rRTN, -1
+	b	L(dureturn27)
+	.align 4
+L(duLcr5):
+	ld	rWORD8,-8(r1)
+	ld	rWORD7,-16(r1)
+	li	rRTN, 1
+	bgt	cr5, L(dureturn29)	
+	ld	r29,-24(r1)
+	ld	r28,-32(r1)
+	li	rRTN, -1
+	b	L(dureturn27)
+	.align	3
+L(duZeroReturn):
+	li	rRTN,0
+	.align	4
+L(dureturn):
+	ld	rWORD8,-8(r1)
+	ld	rWORD7,-16(r1)
+L(dureturn29):	
+	ld	r29,-24(r1)
+	ld	r28,-32(r1)
+L(dureturn27):	
+	ld	r27,-40(r1)
+L(dureturn26):	
+	ld	r26,-48(r1)
+L(dureturn25):	
+	ld	r25,-56(r1)
+L(dureturn24):
+	ld	r24,-64(r1)
+	blr
+L(duzeroLength):
+	li	rRTN,0
+	blr
+
+END (BP_SYM (memcmp))
+libc_hidden_builtin_def (memcmp)
+weak_alias (memcmp, bcmp)
diff --git a/sysdeps/powerpc/powerpc64/power4/memcopy.h b/sysdeps/powerpc/powerpc64/power4/memcopy.h
new file mode 100644
index 0000000000..9a4ff79f4a
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power4/memcopy.h
@@ -0,0 +1 @@
+#include "../../powerpc32/power4/memcopy.h"
diff --git a/sysdeps/powerpc/powerpc64/power4/memcpy.S b/sysdeps/powerpc/powerpc64/power4/memcpy.S
new file mode 100644
index 0000000000..56f313b4b8
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power4/memcpy.S
@@ -0,0 +1,418 @@
+/* Optimized memcpy implementation for PowerPC64.
+   Copyright (C) 2003, 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+   02110-1301 USA.  */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]);
+   Returns 'dst'.
+
+   Memcpy handles short copies (< 32-bytes) using a binary move blocks 
+   (no loops) of lwz/stw.  The tail (remaining 1-3) bytes is handled 
+   with the appropriate combination of byte and halfword load/stores. 
+   There is minimal effort to optimize the alignment of short moves.  
+   The 64-bit implementations of POWER3 and POWER4 do a reasonable job
+   of handling unligned load/stores that do not cross 32-byte boundries.
+
+   Longer moves (>= 32-bytes) justify the effort to get at least the
+   destination doubleword (8-byte) aligned.  Further optimization is
+   posible when both source and destination are doubleword aligned.
+   Each case has a optimized unrolled loop.   */
+
+	.machine power4
+EALIGN (BP_SYM (memcpy), 5, 0)
+	CALL_MCOUNT 3
+
+    cmpldi cr1,5,31
+    neg   0,3
+    std   3,-16(1)
+    std   31,-8(1)
+    cfi_offset(31,-8)
+    andi. 11,3,7	/* check alignement of dst.  */
+    clrldi 0,0,61	/* Number of bytes until the 1st doubleword of dst.  */
+    clrldi 10,4,61	/* check alignement of src.  */
+    cmpldi cr6,5,8
+    ble-  cr1,.L2	/* If move < 32 bytes use short move code.  */
+    cmpld cr6,10,11     
+    mr    12,4
+    srdi  9,5,3		/* Number of full double words remaining.  */
+    mtcrf 0x01,0
+    mr    31,5
+    beq   .L0
+  
+    subf  31,0,5
+  /* Move 0-7 bytes as needed to get the destination doubleword alligned.  */
+1:  bf    31,2f
+    lbz   6,0(12)
+    addi  12,12,1
+    stb   6,0(3)
+    addi  3,3,1
+2:  bf    30,4f
+    lhz   6,0(12)
+    addi  12,12,2
+    sth   6,0(3)
+    addi  3,3,2
+4:  bf    29,0f
+    lwz   6,0(12)
+    addi  12,12,4
+    stw   6,0(3)
+    addi  3,3,4
+0:
+    clrldi 10,12,61	/* check alignement of src again.  */     
+    srdi  9,31,3	/* Number of full double words remaining.  */
+    
+  /* Copy doublewords from source to destination, assumpting the
+     destination is aligned on a doubleword boundary.
+
+     At this point we know there are at least 25 bytes left (32-7) to copy.
+     The next step is to determine if the source is also doubleword aligned. 
+     If not branch to the unaligned move code at .L6. which uses
+     a load, shift, store strategy.
+     
+     Otherwise source and destination are doubleword aligned, and we can
+     the optimized doubleword copy loop.  */
+.L0:
+    clrldi  11,31,61
+    mtcrf   0x01,9
+    cmpldi  cr1,11,0
+    bne-    cr6,.L6   /* If source is not DW aligned.  */
+
+  /* Move doublewords where destination and source are DW aligned.
+     Use a unrolled loop to copy 4 doubleword (32-bytes) per iteration.
+     If the the copy is not an exact multiple of 32 bytes, 1-3 
+     doublewords are copied as needed to set up the main loop.  After
+     the main loop exits there may be a tail of 1-7 bytes. These byte are 
+     copied a word/halfword/byte at a time as needed to preserve alignment.  */
+
+    srdi  8,31,5
+    cmpldi	cr1,9,4
+    cmpldi	cr6,11,0
+    mr    11,12
+    
+    bf    30,1f
+    ld    6,0(12)
+    ld    7,8(12)
+    addi  11,12,16
+    mtctr 8
+    std   6,0(3)
+    std   7,8(3)
+    addi  10,3,16
+    bf    31,4f
+    ld    0,16(12)
+    std   0,16(3)    
+    blt   cr1,3f
+    addi  11,12,24
+    addi  10,3,24
+    b     4f
+    .align  4
+1:
+    mr    10,3
+    mtctr 8
+    bf    31,4f
+    ld    6,0(12)
+    addi  11,12,8
+    std   6,0(3)
+    addi  10,3,8
+    
+    .align  4
+4:
+    ld    6,0(11)
+    ld    7,8(11)
+    ld    8,16(11)
+    ld    0,24(11)
+    addi  11,11,32
+2:
+    std   6,0(10)
+    std   7,8(10)
+    std   8,16(10)
+    std   0,24(10)
+    addi  10,10,32
+    bdnz  4b
+3:  
+
+    rldicr 0,31,0,60
+    mtcrf 0x01,31
+    beq   cr6,0f
+.L9:
+    add   3,3,0
+    add   12,12,0
+    
+/*  At this point we have a tail of 0-7 bytes and we know that the
+    destiniation is double word aligned.  */
+4:  bf    29,2f
+    lwz   6,0(12)
+    addi  12,12,4
+    stw   6,0(3)
+    addi  3,3,4
+2:  bf    30,1f
+    lhz   6,0(12)
+    addi  12,12,2
+    sth   6,0(3)
+    addi  3,3,2
+1:  bf    31,0f
+    lbz   6,0(12)
+    stb   6,0(3)
+0:
+  /* Return original dst pointer.  */
+    ld 31,-8(1)
+    ld 3,-16(1)
+    blr
+       
+/* Copy up to 31 bytes.  This divided into two cases 0-8 bytes and 9-31 
+   bytes.  Each case is handled without loops, using binary (1,2,4,8) 
+   tests.  
+   
+   In the short (0-8 byte) case no attempt is made to force alignment
+   of either source or destination.  The hardware will handle the 
+   unaligned load/stores with small delays for crossing 32- 64-byte, and 
+   4096-byte boundaries. Since these short moves are unlikely to be
+   unaligned or cross these boundaries, the overhead to force 
+   alignment is not justified.
+   
+   The longer (9-31 byte) move is more likely to cross 32- or 64-byte
+   boundaries.  Since only loads are sensitive to the 32-/64-byte
+   boundaries it is more important to align the source then the 
+   destination.  If the source is not already word aligned, we first
+   move 1-3 bytes as needed.  Since we are only word aligned we don't 
+   use double word load/stores to insure that all loads are aligned. 
+   While the destination and stores may still be unaligned, this
+   is only an issue for page (4096 byte boundary) crossing, which
+   should be rare for these short moves.  The hardware handles this
+   case automatically with a small delay.  */ 
+   
+    .align  4
+.L2:
+    mtcrf 0x01,5
+    neg   8,4
+    clrrdi	11,4,2
+    andi. 0,8,3
+    ble   cr6,.LE8	/* Handle moves of 0-8 bytes.  */
+/* At least 9 bytes left.  Get the source word aligned.  */
+    cmpldi	cr1,5,16
+    mr    10,5
+    mr    12,4
+    cmpldi	cr6,0,2
+    beq   .L3	/* If the source is already word aligned skip this.  */
+/* Copy 1-3 bytes to get source address word aligned.  */
+    lwz   6,0(11)
+    subf  10,0,5
+    add   12,4,0
+    blt   cr6,5f
+    srdi  7,6,16
+    bgt	  cr6,3f
+    sth   6,0(3)
+    b     7f
+    .align  4
+3:
+    stb   7,0(3)
+    sth   6,1(3)
+    b     7f
+    .align  4
+5:
+    stb   6,0(3)
+7:
+    cmpldi	cr1,10,16
+    add   3,3,0
+    mtcrf 0x01,10
+    .align  4
+.L3:
+/* At least 6 bytes left and the source is word aligned.  */
+    blt   cr1,8f
+16: /* Move 16 bytes.  */
+    lwz   6,0(12)
+    lwz   7,4(12)
+    stw   6,0(3)
+    lwz   6,8(12)
+    stw   7,4(3)
+    lwz   7,12(12)
+    addi  12,12,16
+    stw   6,8(3)
+    stw   7,12(3)
+    addi  3,3,16
+8:  /* Move 8 bytes.  */
+    bf    28,4f
+    lwz   6,0(12)
+    lwz   7,4(12)
+    addi  12,12,8
+    stw   6,0(3)
+    stw   7,4(3)
+    addi  3,3,8
+4:  /* Move 4 bytes.  */
+    bf    29,2f
+    lwz   6,0(12)
+    addi  12,12,4
+    stw   6,0(3)
+    addi  3,3,4    
+2:  /* Move 2-3 bytes.  */
+    bf    30,1f
+    lhz   6,0(12)
+    sth   6,0(3) 
+    bf    31,0f
+    lbz   7,2(12)
+    stb   7,2(3)
+    ld 3,-16(1)
+    blr
+1:  /* Move 1 byte.  */
+    bf    31,0f
+    lbz   6,0(12)
+    stb   6,0(3)
+0:
+  /* Return original dst pointer.  */
+    ld    3,-16(1)
+    blr
+
+/* Special case to copy 0-8 bytes.  */
+    .align  4
+.LE8:
+    mr    12,4
+    bne   cr6,4f
+/* Would have liked to use use ld/std here but the 630 processors are
+   slow for load/store doubles that are not at least word aligned.  
+   Unaligned Load/Store word execute with only a 1 cycle penaltity.  */
+    lwz   6,0(4)
+    lwz   7,4(4)
+    stw   6,0(3)
+    stw   7,4(3)
+  /* Return original dst pointer.  */
+    ld    3,-16(1)
+    blr
+    .align  4
+4:  bf    29,2b
+    lwz   6,0(4)
+    stw   6,0(3)
+6:
+    bf    30,5f
+    lhz   7,4(4)
+    sth   7,4(3) 
+    bf    31,0f
+    lbz   8,6(4)
+    stb   8,6(3)
+    ld 3,-16(1)
+    blr
+    .align  4
+5:  
+    bf    31,0f
+    lbz   6,4(4)
+    stb   6,4(3)
+    .align  4
+0:
+  /* Return original dst pointer.  */
+    ld    3,-16(1)
+    blr
+
+    .align  4
+.L6:
+
+  /* Copy doublewords where the destination is aligned but the source is
+     not.  Use aligned doubleword loads from the source, shifted to realign
+     the data, to allow aligned destination stores.  */
+    addi    11,9,-1  /* loop DW count is one less than total */
+    subf    5,10,12
+    sldi    10,10,3
+    mr      4,3
+    srdi    8,11,2   /* calculate the 32 byte loop count */
+    ld      6,0(5)
+    mtcrf   0x01,11
+    cmpldi  cr6,9,4
+    mtctr   8
+    ld      7,8(5)
+    subfic  9,10,64
+    bf      30,1f
+
+    /* there are at least two DWs to copy */
+    sld     0,6,10
+    srd     8,7,9
+    or      0,0,8
+    ld      6,16(5)
+    std     0,0(4)
+    sld     0,7,10
+    srd     8,6,9
+    or      0,0,8
+    ld      7,24(5)
+    std     0,8(4)
+    addi    4,4,16
+    addi    5,5,32
+    blt     cr6,8f  /* if total DWs = 3, then bypass loop */
+    bf      31,4f
+    /* there is a third DW to copy */
+    sld     0,6,10
+    srd     8,7,9
+    or      0,0,8
+    std     0,0(4)
+    mr      6,7
+    ld      7,0(5)
+    addi    5,5,8
+    addi    4,4,8
+    beq     cr6,8f  /* if total DWs = 4, then bypass loop */
+    b       4f
+    .align 4
+1:
+    sld     0,6,10
+    srd     8,7,9
+    addi    5,5,16
+    or      0,0,8
+    bf      31,4f
+    mr      6,7
+    ld      7,0(5)
+    addi    5,5,8
+    std     0,0(4)
+    addi    4,4,8
+    .align 4
+/* copy 32 bytes at a time */
+4:  sld   0,6,10
+    srd   8,7,9
+    or    0,0,8
+    ld    6,0(5)
+    std   0,0(4)
+    sld   0,7,10
+    srd   8,6,9
+    or    0,0,8
+    ld    7,8(5)
+    std   0,8(4)
+    sld   0,6,10
+    srd   8,7,9
+    or    0,0,8
+    ld    6,16(5)
+    std   0,16(4)
+    sld   0,7,10
+    srd   8,6,9
+    or    0,0,8
+    ld    7,24(5)
+    std   0,24(4)
+    addi  5,5,32
+    addi  4,4,32
+    bdnz+ 4b
+    .align 4
+8:
+    /* calculate and store the final DW */
+    sld   0,6,10
+    srd   8,7,9
+    or    0,0,8  
+    std   0,0(4)
+3:
+    rldicr 0,31,0,60
+    mtcrf 0x01,31
+    bne   cr1,.L9	/* If the tail is 0 bytes we are done!  */
+  /* Return original dst pointer.  */
+    ld 31,-8(1)
+    ld 3,-16(1)
+    blr
+END_GEN_TB (BP_SYM (memcpy),TB_TOCLESS)
+libc_hidden_builtin_def (memcpy)
diff --git a/sysdeps/powerpc/powerpc64/power4/memset.S b/sysdeps/powerpc/powerpc64/power4/memset.S
new file mode 100644
index 0000000000..e7a259acdd
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power4/memset.S
@@ -0,0 +1,275 @@
+/* Optimized memset implementation for PowerPC64.
+   Copyright (C) 1997, 1999, 2000, 2002, 2003, 2007
+   Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
+   Returns 's'.
+
+   The memset is done in three sizes: byte (8 bits), word (32 bits),
+   cache line (256 bits). There is a special case for setting cache lines
+   to 0, to take advantage of the dcbz instruction.  */
+
+	.machine power4
+EALIGN (BP_SYM (memset), 5, 0)
+	CALL_MCOUNT 3
+
+#define rTMP	r0
+#define rRTN	r3	/* Initial value of 1st argument.  */
+#if __BOUNDED_POINTERS__
+# define rMEMP0	r4	/* Original value of 1st arg.  */
+# define rCHR	r5	/* Char to set in each byte.  */
+# define rLEN	r6	/* Length of region to set.  */
+# define rMEMP	r10	/* Address at which we are storing.  */
+#else
+# define rMEMP0	r3	/* Original value of 1st arg.  */
+# define rCHR	r4	/* Char to set in each byte.  */
+# define rLEN	r5	/* Length of region to set.  */
+# define rMEMP	r6	/* Address at which we are storing.  */
+#endif
+#define rALIGN	r7	/* Number of bytes we are setting now (when aligning). */
+#define rMEMP2	r8
+
+#define rNEG64	r8	/* Constant -64 for clearing with dcbz.  */
+#define rCLS	r8	/* Cache line size obtained from static.  */
+#define rCLM	r9	/* Cache line size mask to check for cache alignment.  */
+L(_memset):
+#if __BOUNDED_POINTERS__
+	cmpldi	cr1, rRTN, 0
+	CHECK_BOUNDS_BOTH_WIDE (rMEMP0, rTMP, rTMP2, rLEN)
+	beq	cr1, L(b0)
+	STORE_RETURN_VALUE (rMEMP0)
+	STORE_RETURN_BOUNDS (rTMP, rTMP2)
+L(b0):
+#endif
+/* Take care of case for size <= 4.  */
+	cmpldi	cr1, rLEN, 8
+	andi.	rALIGN, rMEMP0, 7
+	mr	rMEMP, rMEMP0
+	ble-	cr1, L(small)
+
+/* Align to doubleword boundary.  */
+	cmpldi	cr5, rLEN, 31
+	rlwimi	rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword.  */
+	beq+	L(aligned2)
+	mtcrf	0x01, rMEMP0
+	subfic	rALIGN, rALIGN, 8
+	cror	28,30,31		/* Detect odd word aligned.  */
+	add	rMEMP, rMEMP, rALIGN
+	sub	rLEN, rLEN, rALIGN
+	rlwimi	rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word.  */
+	bt	29, L(g4)
+/* Process the even word of doubleword.  */
+	bf+	31, L(g2)
+	stb	rCHR, 0(rMEMP0)
+	bt	30, L(g4x)
+L(g2):
+	sth	rCHR, -6(rMEMP)
+L(g4x):
+	stw	rCHR, -4(rMEMP)
+	b	L(aligned)
+/* Process the odd word of doubleword.  */
+L(g4):
+	bf	28, L(g4x) /* If false, word aligned on odd word.  */
+	bf+	31, L(g0)
+	stb	rCHR, 0(rMEMP0)
+	bt	30, L(aligned)
+L(g0):
+	sth	rCHR, -2(rMEMP)
+
+/* Handle the case of size < 31.  */
+L(aligned2):
+	rlwimi	rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word.  */
+L(aligned):
+	mtcrf	0x01, rLEN
+	ble	cr5, L(medium)
+/* Align to 32-byte boundary.  */
+	andi.	rALIGN, rMEMP, 0x18
+	subfic	rALIGN, rALIGN, 0x20
+	insrdi	rCHR,rCHR,32,0 /* Replicate word to double word. */
+	beq	L(caligned)
+	mtcrf	0x01, rALIGN
+	add	rMEMP, rMEMP, rALIGN
+	sub	rLEN, rLEN, rALIGN
+	cmplwi	cr1, rALIGN, 0x10
+	mr	rMEMP2, rMEMP
+	bf	28, L(a1)
+	stdu	rCHR, -8(rMEMP2)
+L(a1):	blt	cr1, L(a2)
+	std	rCHR, -8(rMEMP2)
+	stdu	rCHR, -16(rMEMP2)
+L(a2):
+
+/* Now aligned to a 32 byte boundary.  */
+L(caligned):
+	cmpldi	cr1, rCHR, 0
+	clrrdi.	rALIGN, rLEN, 5
+	mtcrf	0x01, rLEN
+	beq	cr1, L(zloopstart) /* Special case for clearing memory using dcbz.  */
+L(nondcbz):
+	srdi	rTMP, rALIGN, 5
+	mtctr	rTMP
+	beq	L(medium)	/* We may not actually get to do a full line.  */
+	clrldi.	rLEN, rLEN, 59
+	add	rMEMP, rMEMP, rALIGN
+	li	rNEG64, -0x40
+	bdz	L(cloopdone)
+
+L(c3):	dcbtst	rNEG64, rMEMP
+	std	rCHR, -8(rMEMP)
+	std	rCHR, -16(rMEMP)
+	std	rCHR, -24(rMEMP)
+	stdu	rCHR, -32(rMEMP)
+	bdnz	L(c3)
+L(cloopdone):
+	std	rCHR, -8(rMEMP)
+	std	rCHR, -16(rMEMP)
+	cmpldi	cr1, rLEN, 16
+	std	rCHR, -24(rMEMP)
+	stdu	rCHR, -32(rMEMP)
+	beqlr
+	add	rMEMP, rMEMP, rALIGN
+	b	L(medium_tail2)
+
+	.align 5
+/* Clear lines of memory in 128-byte chunks.  */
+L(zloopstart):
+/* If the remaining length is less the 32 bytes, don't bother getting
+	 the cache line size.  */
+	beq	L(medium)
+	li      rCLS,128  /* cache line size is 128 */
+
+/* Now we know the cache line size, and it is not 32-bytes, but
+	 we may not yet be aligned to the cache line. May have a partial
+	 line to fill, so touch it 1st.  */
+	dcbt	0,rMEMP
+L(getCacheAligned):
+	cmpldi	cr1,rLEN,32
+	andi.	rTMP,rMEMP,127
+	blt		cr1,L(handletail32)
+	beq		L(cacheAligned)
+	addi	rMEMP,rMEMP,32
+	addi	rLEN,rLEN,-32
+	std		rCHR,-32(rMEMP)
+	std		rCHR,-24(rMEMP)
+	std		rCHR,-16(rMEMP)
+	std		rCHR,-8(rMEMP)
+	b		L(getCacheAligned)
+
+/* Now we are aligned to the cache line and can use dcbz.  */
+L(cacheAligned):
+	cmpld	cr1,rLEN,rCLS
+	blt		cr1,L(handletail32)
+	dcbz	0,rMEMP
+	subf	rLEN,rCLS,rLEN
+	add		rMEMP,rMEMP,rCLS
+	b		L(cacheAligned)
+
+/* We are here because the cache line size was set and was not 32-bytes
+   and the remainder (rLEN) is less than the actual cache line size.
+   So set up the preconditions for L(nondcbz) and go there.  */
+L(handletail32):
+	clrrwi.	rALIGN, rLEN, 5
+	b		L(nondcbz)
+
+	.align 5
+L(small):
+/* Memset of 8 bytes or less.  */
+	cmpldi	cr6, rLEN, 4
+	cmpldi	cr5, rLEN, 1
+	ble	cr6,L(le4)
+	subi	rLEN, rLEN, 4
+	stb	rCHR,0(rMEMP)
+	stb	rCHR,1(rMEMP)
+	stb	rCHR,2(rMEMP)
+	stb	rCHR,3(rMEMP)
+	addi	rMEMP,rMEMP, 4
+	cmpldi	cr5, rLEN, 1
+L(le4):
+	cmpldi	cr1, rLEN, 3
+	bltlr	cr5
+	stb	rCHR, 0(rMEMP)
+	beqlr	cr5
+	stb	rCHR, 1(rMEMP)
+	bltlr	cr1
+	stb	rCHR, 2(rMEMP)
+	beqlr	cr1
+	stb	rCHR, 3(rMEMP)
+	blr
+
+/* Memset of 0-31 bytes.  */
+	.align 5
+L(medium):
+	insrdi	rCHR,rCHR,32,0 /* Replicate word to double word.  */
+	cmpldi	cr1, rLEN, 16
+L(medium_tail2):
+	add	rMEMP, rMEMP, rLEN
+L(medium_tail):
+	bt-	31, L(medium_31t)
+	bt-	30, L(medium_30t)
+L(medium_30f):
+	bt-	29, L(medium_29t)
+L(medium_29f):
+	bge-	cr1, L(medium_27t)
+	bflr-	28
+	std	rCHR, -8(rMEMP)
+	blr
+
+L(medium_31t):
+	stbu	rCHR, -1(rMEMP)
+	bf-	30, L(medium_30f)
+L(medium_30t):
+	sthu	rCHR, -2(rMEMP)
+	bf-	29, L(medium_29f)
+L(medium_29t):
+	stwu	rCHR, -4(rMEMP)
+	blt-	cr1, L(medium_27f)
+L(medium_27t):
+	std	rCHR, -8(rMEMP)
+	stdu	rCHR, -16(rMEMP)
+L(medium_27f):
+	bflr-	28
+L(medium_28t):
+	std	rCHR, -8(rMEMP)
+	blr
+END_GEN_TB (BP_SYM (memset),TB_TOCLESS)
+libc_hidden_builtin_def (memset)
+
+/* Copied from bzero.S to prevent the linker from inserting a stub
+   between bzero and memset.  */
+ENTRY (BP_SYM (__bzero))
+	CALL_MCOUNT 3
+#if __BOUNDED_POINTERS__
+	mr	r6,r4
+	li	r5,0
+	mr	r4,r3
+	/* Tell memset that we don't want a return value.  */
+	li	r3,0
+	b	L(_memset)
+#else
+	mr	r5,r4
+	li	r4,0
+	b	L(_memset)
+#endif
+END_GEN_TB (BP_SYM (__bzero),TB_TOCLESS)
+
+weak_alias (BP_SYM (__bzero), BP_SYM (bzero))
diff --git a/sysdeps/powerpc/powerpc64/power4/strncmp.S b/sysdeps/powerpc/powerpc64/power4/strncmp.S
new file mode 100644
index 0000000000..7a1665d2bc
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power4/strncmp.S
@@ -0,0 +1,180 @@
+/* Optimized strcmp implementation for PowerPC64.
+   Copyright (C) 2003, 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+   02110-1301 USA.  */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* See strlen.s for comments on how the end-of-string testing works.  */
+
+/* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5])  */
+
+EALIGN (BP_SYM(strncmp), 4, 0)
+	CALL_MCOUNT 3
+
+#define rTMP	r0
+#define rRTN	r3
+#define rSTR1	r3	/* first string arg */
+#define rSTR2	r4	/* second string arg */
+#define rN	r5	/* max string length */
+/* Note:  The Bounded pointer support in this code is broken.  This code
+   was inherited from PPC32 and and that support was never completed.  
+   Current PPC gcc does not support -fbounds-check or -fbounded-pointers.  */
+#define rWORD1	r6	/* current word in s1 */
+#define rWORD2	r7	/* current word in s2 */
+#define rWORD3  r10
+#define rWORD4  r11
+#define rFEFE	r8	/* constant 0xfefefefefefefeff (-0x0101010101010101) */
+#define r7F7F	r9	/* constant 0x7f7f7f7f7f7f7f7f */
+#define rNEG	r10	/* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
+#define rBITDIF	r11	/* bits that differ in s1 & s2 words */
+
+	dcbt	0,rSTR1
+	or	rTMP, rSTR2, rSTR1
+	lis	r7F7F, 0x7f7f
+	dcbt	0,rSTR2
+	clrldi.	rTMP, rTMP, 61
+	cmpldi	cr1, rN, 0
+	lis	rFEFE, -0x101
+	bne	L(unaligned)
+/* We are doubleword alligned so set up for two loops.  first a double word
+   loop, then fall into the byte loop if any residual.  */
+	srdi.	rTMP, rN, 3
+	clrldi	rN, rN, 61
+	addi	rFEFE, rFEFE, -0x101
+	addi	r7F7F, r7F7F, 0x7f7f
+	cmpldi	cr1, rN, 0	
+	beq	L(unaligned)
+
+	mtctr	rTMP	/* Power4 wants mtctr 1st in dispatch group.  */
+	ld	rWORD1, 0(rSTR1)
+	ld	rWORD2, 0(rSTR2)
+	sldi	rTMP, rFEFE, 32
+	insrdi	r7F7F, r7F7F, 32, 0
+	add	rFEFE, rFEFE, rTMP
+	b	L(g1)
+
+L(g0):	
+	ldu	rWORD1, 8(rSTR1)
+	bne-	cr1, L(different)
+	ldu	rWORD2, 8(rSTR2)
+L(g1):	add	rTMP, rFEFE, rWORD1
+	nor	rNEG, r7F7F, rWORD1
+	bdz	L(tail)
+	and.	rTMP, rTMP, rNEG
+	cmpd	cr1, rWORD1, rWORD2
+	beq+	L(g0)
+	
+/* OK. We've hit the end of the string. We need to be careful that
+   we don't compare two strings as different because of gunk beyond
+   the end of the strings...  */
+	
+L(endstring):
+	and	rTMP, r7F7F, rWORD1
+	beq	cr1, L(equal)
+	add	rTMP, rTMP, r7F7F
+	xor.	rBITDIF, rWORD1, rWORD2
+
+	andc	rNEG, rNEG, rTMP
+	blt-	L(highbit)
+	cntlzd	rBITDIF, rBITDIF
+	cntlzd	rNEG, rNEG
+	addi	rNEG, rNEG, 7
+	cmpd	cr1, rNEG, rBITDIF
+	sub	rRTN, rWORD1, rWORD2
+	blt-	cr1, L(equal)
+	sradi	rRTN, rRTN, 63
+	ori	rRTN, rRTN, 1
+	blr
+L(equal):
+	li	rRTN, 0
+	blr
+
+L(different):
+	ldu	rWORD1, -8(rSTR1)
+	xor.	rBITDIF, rWORD1, rWORD2
+	sub	rRTN, rWORD1, rWORD2
+	blt-	L(highbit)
+	sradi	rRTN, rRTN, 63
+	ori	rRTN, rRTN, 1
+	blr
+L(highbit):
+	srdi	rWORD2, rWORD2, 56
+	srdi	rWORD1, rWORD1, 56
+	sub	rRTN, rWORD1, rWORD2
+	blr
+
+
+/* Oh well.  In this case, we just do a byte-by-byte comparison.  */
+	.align 4
+L(tail):
+	and.	rTMP, rTMP, rNEG
+	cmpd	cr1, rWORD1, rWORD2
+	bne-	L(endstring)
+	addi	rSTR1, rSTR1, 8
+	bne-	cr1, L(different)
+	addi	rSTR2, rSTR2, 8
+	cmpldi	cr1, rN, 0
+L(unaligned):
+	mtctr   rN	/* Power4 wants mtctr 1st in dispatch group */
+	ble	cr1, L(ux)
+L(uz):
+	lbz	rWORD1, 0(rSTR1)
+	lbz	rWORD2, 0(rSTR2)
+	.align 4
+L(u1):
+	cmpdi	cr1, rWORD1, 0
+	bdz	L(u4)
+	cmpd	rWORD1, rWORD2
+	beq-	cr1, L(u4)
+	lbzu    rWORD3, 1(rSTR1)
+	lbzu	rWORD4, 1(rSTR2)
+	bne-	L(u4)
+	cmpdi	cr1, rWORD3, 0
+	bdz	L(u3)
+	cmpd	rWORD3, rWORD4
+	beq-    cr1, L(u3)
+	lbzu	rWORD1, 1(rSTR1)
+	lbzu	rWORD2, 1(rSTR2)
+	bne-    L(u3)
+	cmpdi	cr1, rWORD1, 0
+	bdz	L(u4)
+	cmpd	rWORD1, rWORD2
+	beq-	cr1, L(u4)
+	lbzu	rWORD3, 1(rSTR1)
+	lbzu	rWORD4, 1(rSTR2)
+	bne-	L(u4)
+	cmpdi	cr1, rWORD3, 0
+	bdz	L(u3)
+	cmpd	rWORD3, rWORD4
+	beq-    cr1, L(u3)
+	lbzu	rWORD1, 1(rSTR1)
+	lbzu	rWORD2, 1(rSTR2)
+	beq+    L(u1)
+
+L(u3):  sub     rRTN, rWORD3, rWORD4
+        blr
+L(u4):	sub	rRTN, rWORD1, rWORD2
+	blr
+L(ux):
+	li	rRTN, 0
+	blr
+END (BP_SYM (strncmp))
+libc_hidden_builtin_def (strncmp)
+
diff --git a/sysdeps/powerpc/powerpc64/power4/wordcopy.c b/sysdeps/powerpc/powerpc64/power4/wordcopy.c
new file mode 100644
index 0000000000..f427b48e7a
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power4/wordcopy.c
@@ -0,0 +1 @@
+#include "../../powerpc32/power4/wordcopy.c"
diff --git a/sysdeps/powerpc/powerpc64/power5+/Implies b/sysdeps/powerpc/powerpc64/power5+/Implies
new file mode 100644
index 0000000000..ac431fa96e
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power5+/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/power4
diff --git a/sysdeps/powerpc/powerpc64/power5+/fpu/Implies b/sysdeps/powerpc/powerpc64/power5+/fpu/Implies
new file mode 100644
index 0000000000..558a5fb174
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power5+/fpu/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/power4/fpu
diff --git a/sysdeps/powerpc/powerpc64/power5+/fpu/s_ceil.S b/sysdeps/powerpc/powerpc64/power5+/fpu/s_ceil.S
new file mode 100644
index 0000000000..1e4851db9e
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power5+/fpu/s_ceil.S
@@ -0,0 +1,38 @@
+/* ceil function.  PowerPC64/power5+ version.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+	.machine	"power5"
+EALIGN (__ceil, 4, 0)
+	CALL_MCOUNT 0
+	frip	fp1, fp1
+	blr
+	END (__ceil)
+
+weak_alias (__ceil, ceil)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__ceil, ceill)
+strong_alias (__ceil, __ceill)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0)
+compat_symbol (libm, __ceil, ceill, GLIBC_2_0)
+#endif
diff --git a/sysdeps/powerpc/powerpc64/power5+/fpu/s_ceilf.S b/sysdeps/powerpc/powerpc64/power5+/fpu/s_ceilf.S
new file mode 100644
index 0000000000..38c51d5ff1
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power5+/fpu/s_ceilf.S
@@ -0,0 +1,31 @@
+/* ceilf function.  PowerPC64/power5+ version.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+
+	.machine	"power5"
+EALIGN (__ceilf, 4, 0)
+	CALL_MCOUNT 0
+	frip	fp1, fp1	/* The rounding instructions are double.  */
+	frsp	fp1, fp1	/* But we need to set ooverflow for float.  */
+	blr
+	END (__ceilf)
+
+weak_alias (__ceilf, ceilf)
+
diff --git a/sysdeps/powerpc/powerpc64/power5+/fpu/s_floor.S b/sysdeps/powerpc/powerpc64/power5+/fpu/s_floor.S
new file mode 100644
index 0000000000..86f07c8b59
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power5+/fpu/s_floor.S
@@ -0,0 +1,38 @@
+/* floor function.  PowerPC64/power5+ version.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+	.machine	"power5"
+EALIGN (__floor, 4, 0)
+	CALL_MCOUNT 0
+	frim	fp1, fp1
+	blr
+	END (__floor)
+
+weak_alias (__floor, floor)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__floor, floorl)
+strong_alias (__floor, __floorl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0)
+compat_symbol (libm, __floor, floorl, GLIBC_2_0)
+#endif
diff --git a/sysdeps/powerpc/powerpc64/power5+/fpu/s_floorf.S b/sysdeps/powerpc/powerpc64/power5+/fpu/s_floorf.S
new file mode 100644
index 0000000000..e7bdccf513
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power5+/fpu/s_floorf.S
@@ -0,0 +1,31 @@
+/* floorf function.  PowerPC64/power5+ version.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+
+	.machine	"power5"
+EALIGN (__floorf, 4, 0)
+	CALL_MCOUNT 0
+	frim	fp1, fp1	/* The rounding instructions are double.  */
+	frsp	fp1, fp1	/* But we need to set ooverflow for float.  */
+	blr
+	END (__floorf)
+
+weak_alias (__floorf, floorf)
+
diff --git a/sysdeps/powerpc/powerpc64/power5+/fpu/s_llround.S b/sysdeps/powerpc/powerpc64/power5+/fpu/s_llround.S
new file mode 100644
index 0000000000..6a022769f4
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power5+/fpu/s_llround.S
@@ -0,0 +1,59 @@
+/* llround function.  POWER5+, PowerPC64 version.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+	
+/* long long [r3] llround (float x [fp1])
+   IEEE 1003.1 llround function.  IEEE specifies "round to the nearest 
+   integer value, rounding halfway cases away from zero, regardless of
+   the current rounding mode."  However PowerPC Architecture defines
+   "round to Nearest" as "Choose the best approximation. In case of a 
+   tie, choose the one that is even (least significant bit o).". 
+   So we pre-round using the V2.02 Floating Round to Integer Nearest
+   instruction before we use Floating Convert to Integer Word with
+   round to zero instruction.  */
+
+	.machine	"power5"
+EALIGN (__llround, 4, 0)
+	CALL_MCOUNT 0
+	frin	fp2, fp1	/* Round to nearest +-0.5.  */	
+	fctidz	fp3, fp2	/* Convert To Integer DW round toward 0.  */
+	stfd	fp3, -16(r1)
+	nop	/* Insure the following load is in a different dispatch group */
+	nop	/* to avoid pipe stall on POWER4&5.  */
+	nop
+	ld	r3, -16(r1)
+	blr
+	END (__llround)
+
+strong_alias (__llround, __lround)
+weak_alias (__llround, llround)
+weak_alias (__lround, lround)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__llround, llroundl)
+strong_alias (__llround, __llroundl)
+weak_alias (__lround, lroundl)
+strong_alias (__lround, __lroundl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __llround, llroundl, GLIBC_2_1)
+compat_symbol (libm, __lround, lroundl, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/powerpc64/power5+/fpu/s_round.S b/sysdeps/powerpc/powerpc64/power5+/fpu/s_round.S
new file mode 100644
index 0000000000..7082734bb5
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power5+/fpu/s_round.S
@@ -0,0 +1,38 @@
+/* round function.  PowerPC64/power5+ version.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+	.machine	"power5"
+EALIGN (__round, 4, 0)
+	CALL_MCOUNT 0
+	frin	fp1, fp1
+	blr
+	END (__round)
+
+weak_alias (__round, round)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__round, roundl)
+strong_alias (__round, __roundl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __round, roundl, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/powerpc64/power5+/fpu/s_roundf.S b/sysdeps/powerpc/powerpc64/power5+/fpu/s_roundf.S
new file mode 100644
index 0000000000..30b83be9e7
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power5+/fpu/s_roundf.S
@@ -0,0 +1,31 @@
+/* roundf function.  PowerPC64/power5+ version.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+
+	.machine	"power5"
+EALIGN (__roundf, 4, 0)
+	CALL_MCOUNT 0
+	frin	fp1, fp1	/* The rounding instructions are double.  */
+	frsp	fp1, fp1	/* But we need to set ooverflow for float.  */
+	blr
+	END (__roundf)
+
+weak_alias (__roundf, roundf)
+
diff --git a/sysdeps/powerpc/powerpc64/power5+/fpu/s_trunc.S b/sysdeps/powerpc/powerpc64/power5+/fpu/s_trunc.S
new file mode 100644
index 0000000000..4e6e4d790e
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power5+/fpu/s_trunc.S
@@ -0,0 +1,38 @@
+/* trunc function.  PowerPC64/power5+ version.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+	.machine	"power5"
+EALIGN (__trunc, 4, 0)
+	CALL_MCOUNT 0
+	friz	fp1, fp1
+	blr
+	END (__trunc)
+
+weak_alias (__trunc, trunc)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__trunc, truncl)
+strong_alias (__trunc, __truncl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __trunc, truncl, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/powerpc64/power5+/fpu/s_truncf.S b/sysdeps/powerpc/powerpc64/power5+/fpu/s_truncf.S
new file mode 100644
index 0000000000..61d9cb372d
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power5+/fpu/s_truncf.S
@@ -0,0 +1,31 @@
+/* truncf function.  PowerPC64/power5+ version.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+
+	.machine	"power5"
+EALIGN (__truncf, 4, 0)
+	CALL_MCOUNT 0
+	friz	fp1, fp1	/* The rounding instructions are double.  */
+	frsp	fp1, fp1	/* But we need to set ooverflow for float.  */
+	blr
+	END (__truncf)
+
+weak_alias (__truncf, truncf)
+
diff --git a/sysdeps/powerpc/powerpc64/power5/Implies b/sysdeps/powerpc/powerpc64/power5/Implies
new file mode 100644
index 0000000000..ac431fa96e
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power5/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/power4
diff --git a/sysdeps/powerpc/powerpc64/power5/fpu/Implies b/sysdeps/powerpc/powerpc64/power5/fpu/Implies
new file mode 100644
index 0000000000..558a5fb174
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power5/fpu/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/power4/fpu
diff --git a/sysdeps/powerpc/powerpc64/power6/Implies b/sysdeps/powerpc/powerpc64/power6/Implies
new file mode 100644
index 0000000000..0f35f37c77
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power6/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/power5+
diff --git a/sysdeps/powerpc/powerpc64/power6/fpu/Implies b/sysdeps/powerpc/powerpc64/power6/fpu/Implies
new file mode 100644
index 0000000000..885e39c003
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power6/fpu/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc64/power5+/fpu
+powerpc/powerpc64/power4/fpu
diff --git a/sysdeps/powerpc/powerpc64/power6/memcpy.S b/sysdeps/powerpc/powerpc64/power6/memcpy.S
new file mode 100644
index 0000000000..d105f8302e
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power6/memcpy.S
@@ -0,0 +1,1170 @@
+/* Optimized memcpy implementation for PowerPC64.
+   Copyright (C) 2003, 2006, 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+   02110-1301 USA.  */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]);
+   Returns 'dst'.
+
+   Memcpy handles short copies (< 32-bytes) using a binary move blocks 
+   (no loops) of lwz/stw.  The tail (remaining 1-3) bytes is handled 
+   with the appropriate combination of byte and halfword load/stores. 
+   There is minimal effort to optimize the alignment of short moves.  
+   The 64-bit implementations of POWER3 and POWER4 do a reasonable job
+   of handling unligned load/stores that do not cross 32-byte boundries.
+
+   Longer moves (>= 32-bytes) justify the effort to get at least the
+   destination doubleword (8-byte) aligned.  Further optimization is
+   posible when both source and destination are doubleword aligned.
+   Each case has a optimized unrolled loop.  
+     
+   For POWER6 unaligned loads will take a 20+ cycle hicup for any
+   L1 cache miss that crosses a 32- or 128-byte boundary.  Store
+   is more forgiving and does not take a hicup until page or 
+   segment boundaries.  So we require doubleword alignment for 
+   the source but may take a risk and only require word alignment
+   for the destination.  */
+
+	.machine	"power6"
+EALIGN (BP_SYM (memcpy), 7, 0)
+	CALL_MCOUNT 3
+
+    cmpldi cr1,5,31
+    neg   0,3
+    std   3,-16(1)
+    std   31,-8(1)
+    andi. 11,3,7	/* check alignement of dst.  */
+    clrldi 0,0,61	/* Number of bytes until the 1st doubleword of dst.  */
+    clrldi 10,4,61	/* check alignement of src.  */
+    cmpldi cr6,5,8
+    ble-  cr1,.L2	/* If move < 32 bytes use short move code.  */
+    mtcrf 0x01,0
+    cmpld cr6,10,11  
+    srdi  9,5,3		/* Number of full double words remaining.  */
+    beq   .L0
+  
+    subf  5,0,5
+  /* Move 0-7 bytes as needed to get the destination doubleword alligned.
+     Duplicate some code to maximize fall-throught and minimize agen delays.  */
+1:  bf    31,2f
+    lbz   6,0(4)
+    stb   6,0(3)
+    bf    30,5f
+    lhz   6,1(4)
+    sth   6,1(3)
+    bf    29,0f
+    lwz   6,3(4)
+    stw   6,3(3)
+    b     0f
+5:
+    bf    29,0f
+    lwz   6,1(4)
+    stw   6,1(3)
+    b     0f
+    
+2:  bf    30,4f
+    lhz   6,0(4)
+    sth   6,0(3)
+    bf    29,0f
+    lwz   6,2(4)
+    stw   6,2(3)
+    b     0f
+    
+4:  bf    29,0f
+    lwz   6,0(4)
+    stw   6,0(3)
+0: 
+/* Add the number of bytes until the 1st doubleword of dst to src and dst.  */
+    add   4,4,0
+    add   3,3,0
+    
+    clrldi 10,4,61	/* check alignement of src again.  */     
+    srdi  9,5,3	/* Number of full double words remaining.  */
+    
+  /* Copy doublewords from source to destination, assumpting the
+     destination is aligned on a doubleword boundary.
+
+     At this point we know there are at least 25 bytes left (32-7) to copy.
+     The next step is to determine if the source is also doubleword aligned. 
+     If not branch to the unaligned move code at .L6. which uses
+     a load, shift, store strategy.
+     
+     Otherwise source and destination are doubleword aligned, and we can
+     the optimized doubleword copy loop.  */
+    .align  4
+.L0:
+    clrldi  11,5,61
+    andi.   0,5,0x78
+    srdi    12,5,7	/* Number of 128-byte blocks to move.  */
+    cmpldi  cr1,11,0	/* If the tail is 0 bytes  */
+    bne-    cr6,.L6     /* If source is not DW aligned.  */
+
+  /* Move doublewords where destination and source are DW aligned.
+     Use a unrolled loop to copy 16 doublewords (128-bytes) per iteration.
+     If the the copy is not an exact multiple of 128 bytes, 1-15
+     doublewords are copied as needed to set up the main loop.  After
+     the main loop exits there may be a tail of 1-7 bytes. These byte
+     are copied a word/halfword/byte at a time as needed to preserve
+     alignment.
+     
+     For POWER6 the L1 is store-through and the L2 is store-in.  The
+     L2 is clocked at half CPU clock so we can store 16 bytes every
+     other cycle.  POWER6 also has a load/store bypass so we can do
+     load, load, store, store every 2 cycles.  
+     
+     The following code is sensitive to cache line alignment.  Do not
+     make any change with out first making sure thay don't result in
+     splitting ld/std pairs across a cache line.  */
+
+    mtcrf 0x02,5
+    mtcrf 0x01,5
+    cmpldi  cr5,12,1
+    beq   L(das_loop)
+
+    bf    25,4f
+    .align  3
+    ld    6,0(4)
+    ld    7,8(4)
+    mr    11,4
+    mr    10,3
+    std   6,0(3)
+    std   7,8(3)
+    ld    6,16(4)
+    ld    7,24(4)
+    std   6,16(3)
+    std   7,24(3)
+    ld    6,0+32(4)
+    ld    7,8+32(4)
+    addi  4,4,64
+    addi  3,3,64
+    std   6,0+32(10)
+    std   7,8+32(10)
+    ld    6,16+32(11)
+    ld    7,24+32(11)
+    std   6,16+32(10)
+    std   7,24+32(10)
+4:
+    mr    10,3
+    bf    26,2f
+    ld    6,0(4)
+    ld    7,8(4)
+    mr    11,4
+    nop
+    std   6,0(3)
+    std   7,8(3)
+    ld    6,16(4)
+    ld    7,24(4)
+    addi  4,4,32
+    std   6,16(3)
+    std   7,24(3)
+    addi  3,3,32
+6:
+    nop
+    bf    27,5f
+    ld    6,0+32(11)
+    ld    7,8+32(11)
+    addi  4,4,16
+    addi  3,3,16
+    std   6,0+32(10)
+    std   7,8+32(10)
+    bf    28,L(das_loop_s)
+    ld    0,16+32(11)
+    addi  4,4,8
+    addi  3,3,8
+    std   0,16+32(10)
+    blt   cr5,L(das_tail)
+    b     L(das_loop)
+    .align  3
+5:
+    nop
+    bf    28,L(das_loop_s)
+    ld    6,32(11)
+    addi  4,4,8
+    addi  3,3,8
+    std   6,32(10)
+    blt   cr5,L(das_tail)
+    b     L(das_loop)
+    .align  3
+2:
+    mr    11,4
+    bf    27,1f
+    ld    6,0(4)
+    ld    7,8(4)
+    addi  4,4,16
+    addi  3,3,16
+    std   6,0(10)
+    std   7,8(10)
+    bf    28,L(das_loop_s)
+    ld    0,16(11)
+    addi  4,11,24
+    addi  3,10,24
+    std   0,16(10)
+    blt   cr5,L(das_tail)
+    b     L(das_loop)
+    .align  3
+1:
+    nop
+    bf    28,L(das_loop_s)
+    ld    6,0(4)
+    addi  4,4,8
+    addi  3,3,8
+    std   6,0(10)
+L(das_loop_s):
+    nop
+    blt   cr5,L(das_tail)
+    .align  4
+L(das_loop):
+    ld    6,0(4)
+    ld    7,8(4)
+    mr    10,3
+    mr    11,4
+    std   6,0(3)
+    std   7,8(3)
+    addi  12,12,-1
+    nop
+    ld    8,16(4)
+    ld    0,24(4)
+    std   8,16(3)
+    std   0,24(3)
+
+    ld    6,0+32(4)
+    ld    7,8+32(4)
+    std   6,0+32(3)
+    std   7,8+32(3)
+    ld    8,16+32(4)
+    ld    0,24+32(4)
+    std   8,16+32(3)
+    std   0,24+32(3)
+
+    ld    6,0+64(11)
+    ld    7,8+64(11)
+    std   6,0+64(10)
+    std   7,8+64(10)
+    ld    8,16+64(11)
+    ld    0,24+64(11)
+    std   8,16+64(10)
+    std   0,24+64(10)
+
+    ld    6,0+96(11)
+    ld    7,8+96(11)
+    addi  4,4,128
+    addi  3,3,128
+    std   6,0+96(10)
+    std   7,8+96(10)
+    ld    8,16+96(11)
+    ld    0,24+96(11)
+    std   8,16+96(10)
+    std   0,24+96(10)
+    ble   cr5,L(das_loop_e)
+    
+    mtctr   12
+    .align  4
+L(das_loop2):
+    ld    6,0(4)
+    ld    7,8(4)
+    mr    10,3
+    mr    11,4
+    std   6,0(3)
+    std   7,8(3)
+    ld    8,16(4)
+    ld    0,24(4)
+    std   8,16(3)
+    std   0,24(3)
+
+    ld    6,0+32(4)
+    ld    7,8+32(4)
+    std   6,0+32(3)
+    std   7,8+32(3)
+    ld    8,16+32(4)
+    ld    0,24+32(4)
+    std   8,16+32(3)
+    std   0,24+32(3)
+
+    ld    6,0+64(11)
+    ld    7,8+64(11)
+    std   6,0+64(10)
+    std   7,8+64(10)
+    ld    8,16+64(11)
+    ld    0,24+64(11)
+    std   8,16+64(10)
+    std   0,24+64(10)
+
+    ld    6,0+96(11)
+    ld    7,8+96(11)
+    addi  4,4,128
+    addi  3,3,128
+    std   6,0+96(10)
+    std   7,8+96(10)
+    ld    8,16+96(11)
+    ld    0,24+96(11)
+    std   8,16+96(10)
+    std   0,24+96(10)
+    bdnz  L(das_loop2)
+L(das_loop_e):
+/* Check of a 1-7 byte tail, return if none.  */
+    bne   cr1,L(das_tail2)
+/* Return original dst pointer.  */
+    ld 3,-16(1)
+    blr
+    .align  4
+L(das_tail):
+    beq   cr1,0f
+    
+L(das_tail2):
+/*  At this point we have a tail of 0-7 bytes and we know that the
+    destiniation is double word aligned.  */
+4:  bf    29,2f
+    lwz   6,0(4)
+    stw   6,0(3)
+    bf    30,5f
+    lhz   6,4(4)
+    sth   6,4(3)
+    bf    31,0f
+    lbz   6,6(4)
+    stb   6,6(3)
+    b     0f
+5:  bf    31,0f
+    lbz   6,4(4)
+    stb   6,4(3)
+    b     0f
+  
+2:  bf    30,1f
+    lhz   6,0(4)
+    sth   6,0(3)
+    bf    31,0f
+    lbz   6,2(4)
+    stb   6,2(3)
+    b     0f
+    
+1:  bf    31,0f
+    lbz   6,0(4)
+    stb   6,0(3)
+0:
+  /* Return original dst pointer.  */
+    ld 3,-16(1)
+    blr
+
+/* Copy up to 31 bytes.  This divided into two cases 0-8 bytes and 9-31 
+   bytes.  Each case is handled without loops, using binary (1,2,4,8)
+   tests.
+
+   In the short (0-8 byte) case no attempt is made to force alignment
+   of either source or destination.  The hardware will handle the
+   unaligned load/stores with small delays for crossing 32- 128-byte,
+   and 4096-byte boundaries. Since these short moves are unlikely to be
+   unaligned or cross these boundaries, the overhead to force
+   alignment is not justified.
+
+   The longer (9-31 byte) move is more likely to cross 32- or 128-byte
+   boundaries.  Since only loads are sensitive to the 32-/128-byte
+   boundaries it is more important to align the source then the
+   destination.  If the source is not already word aligned, we first
+   move 1-3 bytes as needed.  Since we are only word aligned we don't
+   use double word load/stores to insure that all loads are aligned.
+   While the destination and stores may still be unaligned, this
+   is only an issue for page (4096 byte boundary) crossing, which
+   should be rare for these short moves.  The hardware handles this
+   case automatically with a small (~20 cycle) delay.  */
+    .align  4
+.L2:
+    mtcrf 0x01,5
+    neg   8,4
+    clrrdi	11,4,2
+    andi. 0,8,3
+    ble   cr6,.LE8	/* Handle moves of 0-8 bytes.  */
+/* At least 9 bytes left.  Get the source word aligned.  */
+    cmpldi	cr1,5,16
+    mr    10,5
+    mr    12,4
+    cmpldi	cr6,0,2
+    beq   L(dus_tail)	/* If the source is already word aligned skip this.  */
+/* Copy 1-3 bytes to get source address word aligned.  */
+    lwz   6,0(11)
+    subf  10,0,5
+    add   12,4,0
+    blt   cr6,5f
+    srdi  7,6,16
+    bgt	  cr6,3f
+    sth   6,0(3)
+    b     7f
+    .align  4
+3:
+    stb   7,0(3)
+    sth   6,1(3)
+    b     7f
+    .align  4
+5:
+    stb   6,0(3)
+7:
+    cmpldi	cr1,10,16
+    add   3,3,0
+    mtcrf 0x01,10
+    .align  4
+L(dus_tail):
+/* At least 6 bytes left and the source is word aligned.  This allows
+   some speculative loads up front.  */
+/* We need to special case the fall-through because the biggest delays
+   are due to address computation not being ready in time for the 
+   AGEN.  */
+    lwz   6,0(12)
+    lwz   7,4(12)
+    blt   cr1,L(dus_tail8)
+    cmpldi	cr0,10,24
+L(dus_tail16): /* Move 16 bytes.  */
+    stw   6,0(3)
+    stw   7,4(3)
+    lwz   6,8(12)
+    lwz   7,12(12)
+    stw   6,8(3)
+    stw   7,12(3)
+/* Move 8 bytes more.  */
+    bf    28,L(dus_tail16p8)
+    cmpldi	cr1,10,28
+    lwz   6,16(12)
+    lwz   7,20(12)
+    stw   6,16(3)
+    stw   7,20(3)
+/* Move 4 bytes more.  */
+    bf    29,L(dus_tail16p4)
+    lwz   6,24(12)
+    stw   6,24(3)
+    addi  12,12,28
+    addi  3,3,28
+    bgt   cr1,L(dus_tail2)
+ /* exactly 28 bytes.  Return original dst pointer and exit.  */
+    ld    3,-16(1)
+    blr
+    .align  4
+L(dus_tail16p8):  /* less then 8 bytes left.  */
+    beq   cr1,L(dus_tailX) /* exactly 16 bytes, early exit.  */
+    cmpldi	cr1,10,20
+    bf    29,L(dus_tail16p2)
+/* Move 4 bytes more.  */
+    lwz   6,16(12)
+    stw   6,16(3)
+    addi  12,12,20
+    addi  3,3,20
+    bgt   cr1,L(dus_tail2)
+ /* exactly 20 bytes.  Return original dst pointer and exit.  */
+    ld    3,-16(1)
+    blr
+    .align  4
+L(dus_tail16p4):  /* less then 4 bytes left.  */
+    addi  12,12,24
+    addi  3,3,24
+    bgt   cr0,L(dus_tail2)
+ /* exactly 24 bytes.  Return original dst pointer and exit.  */
+    ld    3,-16(1)
+    blr
+    .align  4
+L(dus_tail16p2):  /* 16 bytes moved, less then 4 bytes left.  */
+    addi  12,12,16
+    addi  3,3,16
+    b     L(dus_tail2)
+
+    .align  4
+L(dus_tail8):  /* Move 8 bytes.  */
+/*  r6, r7 already loaded speculatively.  */
+    cmpldi	cr1,10,8
+    cmpldi	cr0,10,12
+    bf    28,L(dus_tail4)
+    .align  2
+    stw   6,0(3)
+    stw   7,4(3)
+/* Move 4 bytes more.  */
+    bf    29,L(dus_tail8p4)
+    lwz   6,8(12)
+    stw   6,8(3)
+    addi  12,12,12
+    addi  3,3,12
+    bgt   cr0,L(dus_tail2)
+ /* exactly 12 bytes.  Return original dst pointer and exit.  */
+    ld    3,-16(1)
+    blr
+    .align  4
+L(dus_tail8p4):  /* less then 4 bytes left.  */
+    addi  12,12,8
+    addi  3,3,8
+    bgt   cr1,L(dus_tail2)
+ /* exactly 8 bytes.  Return original dst pointer and exit.  */
+    ld    3,-16(1)
+    blr
+
+    .align  4
+L(dus_tail4):  /* Move 4 bytes.  */
+/*  r6 already loaded speculatively.  If we are here we know there is
+    more then 4 bytes left.  So there is no need to test.  */
+    addi  12,12,4
+    stw   6,0(3)
+    addi  3,3,4
+L(dus_tail2):  /* Move 2-3 bytes.  */
+    bf    30,L(dus_tail1)
+    lhz   6,0(12)
+    sth   6,0(3) 
+    bf    31,L(dus_tailX)
+    lbz   7,2(12)
+    stb   7,2(3)
+    ld 3,-16(1)
+    blr
+L(dus_tail1):  /* Move 1 byte.  */
+    bf    31,L(dus_tailX)
+    lbz   6,0(12)
+    stb   6,0(3)
+L(dus_tailX):
+  /* Return original dst pointer.  */
+    ld    3,-16(1)
+    blr
+
+/* Special case to copy 0-8 bytes.  */
+    .align  4
+.LE8:
+    mr    12,4
+    bne   cr6,L(dus_4)
+/* Exactly 8 bytes.  We may cross a 32-/128-byte boundry and take a ~20
+   cycle delay.  This case should be rare and any attempt to avoid this
+   would take most of 20 cycles any way.  */
+    ld   6,0(4)
+    std   6,0(3)
+  /* Return original dst pointer.  */
+    ld    3,-16(1)
+    blr
+    .align  4
+L(dus_4):
+    bf    29,L(dus_tail2)
+    lwz   6,0(4)
+    stw   6,0(3)
+    bf    30,L(dus_5)
+    lhz   7,4(4)
+    sth   7,4(3) 
+    bf    31,L(dus_0)
+    lbz   8,6(4)
+    stb   8,6(3)
+    ld 3,-16(1)
+    blr
+    .align  4
+L(dus_5):
+    bf    31,L(dus_0)
+    lbz   6,4(4)
+    stb   6,4(3)
+L(dus_0):
+  /* Return original dst pointer.  */
+    ld    3,-16(1)
+    blr
+
+    .align  4
+.L6:
+    cfi_offset(31,-8)
+    mr    12,4
+    mr    31,5
+  /* Copy doublewords where the destination is aligned but the source is
+     not.  Use aligned doubleword loads from the source, shifted to realign
+     the data, to allow aligned destination stores.  */
+    addi    11,9,-1  /* loop DW count is one less than total */
+    subf    5,10,12  /* Move source addr to previous full double word.  */
+    cmpldi  cr5, 10, 2
+    cmpldi  cr0, 10, 4
+    mr      4,3
+    srdi    8,11,2   /* calculate the 32 byte loop count */
+    ld      6,0(5)   /* pre load 1st full doubleword.  */
+    mtcrf   0x01,11
+    cmpldi  cr6,9,4
+    mtctr   8
+    ld      7,8(5)   /* pre load 2nd full doubleword.  */
+    bge     cr0, L(du4_do)
+    blt     cr5, L(du1_do)
+    beq     cr5, L(du2_do)
+    b       L(du3_do) 
+       
+    .align 4
+L(du1_do):
+    bf      30,L(du1_1dw)
+
+    /* there are at least two DWs to copy */
+    sldi     0,6, 8
+    srdi     8,7, 64-8
+    or      0,0,8
+    ld      6,16(5)
+    std     0,0(4)
+    sldi     0,7, 8
+    srdi     8,6, 64-8
+    or      0,0,8
+    ld      7,24(5)
+    std     0,8(4)
+    addi    4,4,16
+    addi    5,5,32
+    blt     cr6,L(du1_fini)  /* if total DWs = 3, then bypass loop */
+    bf      31,L(du1_loop)
+    /* there is a third DW to copy */
+    sldi     0,6, 8
+    srdi     8,7, 64-8
+    or      0,0,8
+    std     0,0(4)
+    mr      6,7
+    ld      7,0(5)
+    addi    5,5,8
+    addi    4,4,8
+    beq     cr6,L(du1_fini)  /* if total DWs = 4, then bypass loop */
+    b       L(du1_loop)
+    .align 4
+L(du1_1dw):
+    sldi     0,6, 8
+    srdi     8,7, 64-8
+    addi    5,5,16
+    or      0,0,8
+    bf      31,L(du1_loop)
+    mr      6,7
+    ld      7,0(5)
+    addi    5,5,8
+    std     0,0(4)
+    addi    4,4,8
+    .align 4
+/* copy 32 bytes at a time */
+L(du1_loop):
+    sldi   0,6, 8
+    srdi   8,7, 64-8
+    or    0,0,8
+    ld    6,0(5)
+    std   0,0(4)
+    sldi   0,7, 8
+    srdi   8,6, 64-8
+    or    0,0,8
+    ld    7,8(5)
+    std   0,8(4)
+    sldi   0,6, 8
+    srdi   8,7, 64-8
+    or    0,0,8
+    ld    6,16(5)
+    std   0,16(4)
+    sldi   0,7, 8
+    srdi   8,6, 64-8
+    or    0,0,8
+    ld    7,24(5)
+    std   0,24(4)
+    addi  5,5,32
+    addi  4,4,32
+    bdnz+ L(du1_loop)
+    .align 4
+L(du1_fini):
+    /* calculate and store the final DW */
+    sldi   0,6, 8
+    srdi   8,7, 64-8
+    or    0,0,8  
+    std   0,0(4)
+    b     L(du_done)
+
+    .align 4
+L(du2_do):
+    bf      30,L(du2_1dw)
+
+    /* there are at least two DWs to copy */
+    sldi     0,6, 16
+    srdi     8,7, 64-16
+    or      0,0,8
+    ld      6,16(5)
+    std     0,0(4)
+    sldi     0,7, 16
+    srdi     8,6, 64-16
+    or      0,0,8
+    ld      7,24(5)
+    std     0,8(4)
+    addi    4,4,16
+    addi    5,5,32
+    blt     cr6,L(du2_fini)  /* if total DWs = 3, then bypass loop */
+    bf      31,L(du2_loop)
+    /* there is a third DW to copy */
+    sldi     0,6, 16
+    srdi     8,7, 64-16
+    or      0,0,8
+    std     0,0(4)
+    mr      6,7
+    ld      7,0(5)
+    addi    5,5,8
+    addi    4,4,8
+    beq     cr6,L(du2_fini)  /* if total DWs = 4, then bypass loop */
+    b       L(du2_loop)
+    .align 4
+L(du2_1dw):
+    sldi     0,6, 16
+    srdi     8,7, 64-16
+    addi    5,5,16
+    or      0,0,8
+    bf      31,L(du2_loop)
+    mr      6,7
+    ld      7,0(5)
+    addi    5,5,8
+    std     0,0(4)
+    addi    4,4,8
+    .align 4
+/* copy 32 bytes at a time */
+L(du2_loop):
+    sldi   0,6, 16
+    srdi   8,7, 64-16
+    or    0,0,8
+    ld    6,0(5)
+    std   0,0(4)
+    sldi   0,7, 16
+    srdi   8,6, 64-16
+    or    0,0,8
+    ld    7,8(5)
+    std   0,8(4)
+    sldi   0,6, 16
+    srdi   8,7, 64-16
+    or    0,0,8
+    ld    6,16(5)
+    std   0,16(4)
+    sldi   0,7, 16
+    srdi   8,6, 64-16
+    or    0,0,8
+    ld    7,24(5)
+    std   0,24(4)
+    addi  5,5,32
+    addi  4,4,32
+    bdnz+ L(du2_loop)
+    .align 4
+L(du2_fini):
+    /* calculate and store the final DW */
+    sldi   0,6, 16
+    srdi   8,7, 64-16
+    or    0,0,8  
+    std   0,0(4)
+    b     L(du_done)
+
+    .align 4
+L(du3_do):
+    bf      30,L(du3_1dw)
+
+    /* there are at least two DWs to copy */
+    sldi     0,6, 24
+    srdi     8,7, 64-24
+    or      0,0,8
+    ld      6,16(5)
+    std     0,0(4)
+    sldi     0,7, 24
+    srdi     8,6, 64-24
+    or      0,0,8
+    ld      7,24(5)
+    std     0,8(4)
+    addi    4,4,16
+    addi    5,5,32
+    blt     cr6,L(du3_fini)  /* if total DWs = 3, then bypass loop */
+    bf      31,L(du3_loop)
+    /* there is a third DW to copy */
+    sldi     0,6, 24
+    srdi     8,7, 64-24
+    or      0,0,8
+    std     0,0(4)
+    mr      6,7
+    ld      7,0(5)
+    addi    5,5,8
+    addi    4,4,8
+    beq     cr6,L(du3_fini)  /* if total DWs = 4, then bypass loop */
+    b       L(du3_loop)
+    .align 4
+L(du3_1dw):
+    sldi     0,6, 24
+    srdi     8,7, 64-24
+    addi    5,5,16
+    or      0,0,8
+    bf      31,L(du3_loop)
+    mr      6,7
+    ld      7,0(5)
+    addi    5,5,8
+    std     0,0(4)
+    addi    4,4,8
+    .align 4
+/* copy 32 bytes at a time */
+L(du3_loop):
+    sldi   0,6, 24
+    srdi   8,7, 64-24
+    or    0,0,8
+    ld    6,0(5)
+    std   0,0(4)
+    sldi   0,7, 24
+    srdi   8,6, 64-24
+    or    0,0,8
+    ld    7,8(5)
+    std   0,8(4)
+    sldi   0,6, 24
+    srdi   8,7, 64-24
+    or    0,0,8
+    ld    6,16(5)
+    std   0,16(4)
+    sldi   0,7, 24
+    srdi   8,6, 64-24
+    or    0,0,8
+    ld    7,24(5)
+    std   0,24(4)
+    addi  5,5,32
+    addi  4,4,32
+    bdnz+ L(du3_loop)
+    .align 4
+L(du3_fini):
+    /* calculate and store the final DW */
+    sldi   0,6, 24
+    srdi   8,7, 64-24
+    or    0,0,8  
+    std   0,0(4)
+    b     L(du_done)
+
+    .align 4
+L(du4_do):
+    cmpldi  cr5, 10, 6
+    beq     cr0, L(du4_dox)
+    blt     cr5, L(du5_do)
+    beq     cr5, L(du6_do)
+    b       L(du7_do)
+L(du4_dox):
+    bf      30,L(du4_1dw)
+
+    /* there are at least two DWs to copy */
+    sldi     0,6, 32
+    srdi     8,7, 64-32
+    or      0,0,8
+    ld      6,16(5)
+    std     0,0(4)
+    sldi     0,7, 32
+    srdi     8,6, 64-32
+    or      0,0,8
+    ld      7,24(5)
+    std     0,8(4)
+    addi    4,4,16
+    addi    5,5,32
+    blt     cr6,L(du4_fini)  /* if total DWs = 3, then bypass loop */
+    bf      31,L(du4_loop)
+    /* there is a third DW to copy */
+    sldi     0,6, 32
+    srdi     8,7, 64-32
+    or      0,0,8
+    std     0,0(4)
+    mr      6,7
+    ld      7,0(5)
+    addi    5,5,8
+    addi    4,4,8
+    beq     cr6,L(du4_fini)  /* if total DWs = 4, then bypass loop */
+    b       L(du4_loop)
+    .align 4
+L(du4_1dw):
+    sldi     0,6, 32
+    srdi     8,7, 64-32
+    addi    5,5,16
+    or      0,0,8
+    bf      31,L(du4_loop)
+    mr      6,7
+    ld      7,0(5)
+    addi    5,5,8
+    std     0,0(4)
+    addi    4,4,8
+    .align 4
+/* copy 32 bytes at a time */
+L(du4_loop):
+    sldi   0,6, 32
+    srdi   8,7, 64-32
+    or    0,0,8
+    ld    6,0(5)
+    std   0,0(4)
+    sldi   0,7, 32
+    srdi   8,6, 64-32
+    or    0,0,8
+    ld    7,8(5)
+    std   0,8(4)
+    sldi   0,6, 32
+    srdi   8,7, 64-32
+    or    0,0,8
+    ld    6,16(5)
+    std   0,16(4)
+    sldi   0,7, 32
+    srdi   8,6, 64-32
+    or    0,0,8
+    ld    7,24(5)
+    std   0,24(4)
+    addi  5,5,32
+    addi  4,4,32
+    bdnz+ L(du4_loop)
+    .align 4
+L(du4_fini):
+    /* calculate and store the final DW */
+    sldi   0,6, 32
+    srdi   8,7, 64-32
+    or    0,0,8  
+    std   0,0(4)
+    b     L(du_done)
+
+    .align 4
+L(du5_do):
+    bf      30,L(du5_1dw)
+
+    /* there are at least two DWs to copy */
+    sldi     0,6, 40
+    srdi     8,7, 64-40
+    or      0,0,8
+    ld      6,16(5)
+    std     0,0(4)
+    sldi     0,7, 40
+    srdi     8,6, 64-40
+    or      0,0,8
+    ld      7,24(5)
+    std     0,8(4)
+    addi    4,4,16
+    addi    5,5,32
+    blt     cr6,L(du5_fini)  /* if total DWs = 3, then bypass loop */
+    bf      31,L(du5_loop)
+    /* there is a third DW to copy */
+    sldi     0,6, 40
+    srdi     8,7, 64-40
+    or      0,0,8
+    std     0,0(4)
+    mr      6,7
+    ld      7,0(5)
+    addi    5,5,8
+    addi    4,4,8
+    beq     cr6,L(du5_fini)  /* if total DWs = 4, then bypass loop */
+    b       L(du5_loop)
+    .align 4
+L(du5_1dw):
+    sldi     0,6, 40
+    srdi     8,7, 64-40
+    addi    5,5,16
+    or      0,0,8
+    bf      31,L(du5_loop)
+    mr      6,7
+    ld      7,0(5)
+    addi    5,5,8
+    std     0,0(4)
+    addi    4,4,8
+    .align 4
+/* copy 32 bytes at a time */
+L(du5_loop):
+    sldi   0,6, 40
+    srdi   8,7, 64-40
+    or    0,0,8
+    ld    6,0(5)
+    std   0,0(4)
+    sldi   0,7, 40
+    srdi   8,6, 64-40
+    or    0,0,8
+    ld    7,8(5)
+    std   0,8(4)
+    sldi   0,6, 40
+    srdi   8,7, 64-40
+    or    0,0,8
+    ld    6,16(5)
+    std   0,16(4)
+    sldi   0,7, 40
+    srdi   8,6, 64-40
+    or    0,0,8
+    ld    7,24(5)
+    std   0,24(4)
+    addi  5,5,32
+    addi  4,4,32
+    bdnz+ L(du5_loop)
+    .align 4
+L(du5_fini):
+    /* calculate and store the final DW */
+    sldi   0,6, 40
+    srdi   8,7, 64-40
+    or    0,0,8  
+    std   0,0(4)
+    b     L(du_done)
+
+    .align 4
+L(du6_do):
+    bf      30,L(du6_1dw)
+
+    /* there are at least two DWs to copy */
+    sldi     0,6, 48
+    srdi     8,7, 64-48
+    or      0,0,8
+    ld      6,16(5)
+    std     0,0(4)
+    sldi     0,7, 48
+    srdi     8,6, 64-48
+    or      0,0,8
+    ld      7,24(5)
+    std     0,8(4)
+    addi    4,4,16
+    addi    5,5,32
+    blt     cr6,L(du6_fini)  /* if total DWs = 3, then bypass loop */
+    bf      31,L(du6_loop)
+    /* there is a third DW to copy */
+    sldi     0,6, 48
+    srdi     8,7, 64-48
+    or      0,0,8
+    std     0,0(4)
+    mr      6,7
+    ld      7,0(5)
+    addi    5,5,8
+    addi    4,4,8
+    beq     cr6,L(du6_fini)  /* if total DWs = 4, then bypass loop */
+    b       L(du6_loop)
+    .align 4
+L(du6_1dw):
+    sldi     0,6, 48
+    srdi     8,7, 64-48
+    addi    5,5,16
+    or      0,0,8
+    bf      31,L(du6_loop)
+    mr      6,7
+    ld      7,0(5)
+    addi    5,5,8
+    std     0,0(4)
+    addi    4,4,8
+    .align 4
+/* copy 32 bytes at a time */
+L(du6_loop):
+    sldi   0,6, 48
+    srdi   8,7, 64-48
+    or    0,0,8
+    ld    6,0(5)
+    std   0,0(4)
+    sldi   0,7, 48
+    srdi   8,6, 64-48
+    or    0,0,8
+    ld    7,8(5)
+    std   0,8(4)
+    sldi   0,6, 48
+    srdi   8,7, 64-48
+    or    0,0,8
+    ld    6,16(5)
+    std   0,16(4)
+    sldi   0,7, 48
+    srdi   8,6, 64-48
+    or    0,0,8
+    ld    7,24(5)
+    std   0,24(4)
+    addi  5,5,32
+    addi  4,4,32
+    bdnz+ L(du6_loop)
+    .align 4
+L(du6_fini):
+    /* calculate and store the final DW */
+    sldi   0,6, 48
+    srdi   8,7, 64-48
+    or    0,0,8  
+    std   0,0(4)
+    b     L(du_done)
+
+    .align 4
+L(du7_do):
+    bf      30,L(du7_1dw)
+
+    /* there are at least two DWs to copy */
+    sldi     0,6, 56
+    srdi     8,7, 64-56
+    or      0,0,8
+    ld      6,16(5)
+    std     0,0(4)
+    sldi     0,7, 56
+    srdi     8,6, 64-56
+    or      0,0,8
+    ld      7,24(5)
+    std     0,8(4)
+    addi    4,4,16
+    addi    5,5,32
+    blt     cr6,L(du7_fini)  /* if total DWs = 3, then bypass loop */
+    bf      31,L(du7_loop)
+    /* there is a third DW to copy */
+    sldi     0,6, 56
+    srdi     8,7, 64-56
+    or      0,0,8
+    std     0,0(4)
+    mr      6,7
+    ld      7,0(5)
+    addi    5,5,8
+    addi    4,4,8
+    beq     cr6,L(du7_fini)  /* if total DWs = 4, then bypass loop */
+    b       L(du7_loop)
+    .align 4
+L(du7_1dw):
+    sldi     0,6, 56
+    srdi     8,7, 64-56
+    addi    5,5,16
+    or      0,0,8
+    bf      31,L(du7_loop)
+    mr      6,7
+    ld      7,0(5)
+    addi    5,5,8
+    std     0,0(4)
+    addi    4,4,8
+    .align 4
+/* copy 32 bytes at a time */
+L(du7_loop):
+    sldi   0,6, 56
+    srdi   8,7, 64-56
+    or    0,0,8
+    ld    6,0(5)
+    std   0,0(4)
+    sldi   0,7, 56
+    srdi   8,6, 64-56
+    or    0,0,8
+    ld    7,8(5)
+    std   0,8(4)
+    sldi   0,6, 56
+    srdi   8,7, 64-56
+    or    0,0,8
+    ld    6,16(5)
+    std   0,16(4)
+    sldi   0,7, 56
+    srdi   8,6, 64-56
+    or    0,0,8
+    ld    7,24(5)
+    std   0,24(4)
+    addi  5,5,32
+    addi  4,4,32
+    bdnz+ L(du7_loop)
+    .align 4
+L(du7_fini):
+    /* calculate and store the final DW */
+    sldi   0,6, 56
+    srdi   8,7, 64-56
+    or    0,0,8  
+    std   0,0(4)
+    b     L(du_done)
+    
+    .align 4
+L(du_done):
+    rldicr 0,31,0,60
+    mtcrf 0x01,31
+    beq   cr1,0f	/* If the tail is 0 bytes we are done!  */
+
+    add   3,3,0
+    add   12,12,0    
+/*  At this point we have a tail of 0-7 bytes and we know that the
+    destiniation is double word aligned.  */
+4:  bf    29,2f
+    lwz   6,0(12)
+    addi  12,12,4
+    stw   6,0(3)
+    addi  3,3,4
+2:  bf    30,1f
+    lhz   6,0(12)
+    addi  12,12,2
+    sth   6,0(3)
+    addi  3,3,2
+1:  bf    31,0f
+    lbz   6,0(12)
+    stb   6,0(3)
+0:
+  /* Return original dst pointer.  */
+    ld 31,-8(1)
+    ld 3,-16(1)
+    blr
+END_GEN_TB (BP_SYM (memcpy),TB_TOCLESS)
+libc_hidden_builtin_def (memcpy)
diff --git a/sysdeps/powerpc/powerpc64/power6/memset.S b/sysdeps/powerpc/powerpc64/power6/memset.S
new file mode 100644
index 0000000000..ea74c117dd
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power6/memset.S
@@ -0,0 +1,419 @@
+/* Optimized 64-bit memset implementation for POWER6.
+   Copyright (C) 1997, 1999, 2000, 2002, 2003, 2007
+   Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
+   Returns 's'.
+
+   The memset is done in three sizes: byte (8 bits), word (32 bits),
+   cache line (256 bits). There is a special case for setting cache lines
+   to 0, to take advantage of the dcbz instruction.  */
+
+	.machine power6
+EALIGN (BP_SYM (memset), 7, 0)
+	CALL_MCOUNT 3
+
+#define rTMP	r0
+#define rRTN	r3	/* Initial value of 1st argument.  */
+#if __BOUNDED_POINTERS__
+# define rMEMP0	r4	/* Original value of 1st arg.  */
+# define rCHR	r5	/* Char to set in each byte.  */
+# define rLEN	r6	/* Length of region to set.  */
+# define rMEMP	r10	/* Address at which we are storing.  */
+#else
+# define rMEMP0	r3	/* Original value of 1st arg.  */
+# define rCHR	r4	/* Char to set in each byte.  */
+# define rLEN	r5	/* Length of region to set.  */
+# define rMEMP	r6	/* Address at which we are storing.  */
+#endif
+#define rALIGN	r7	/* Number of bytes we are setting now (when aligning). */
+#define rMEMP2	r8
+#define rMEMP3	r9	/* Alt mem pointer.  */
+L(_memset):
+#if __BOUNDED_POINTERS__
+	cmpldi	cr1, rRTN, 0
+	CHECK_BOUNDS_BOTH_WIDE (rMEMP0, rTMP, rTMP2, rLEN)
+	beq	cr1, L(b0)
+	STORE_RETURN_VALUE (rMEMP0)
+	STORE_RETURN_BOUNDS (rTMP, rTMP2)
+L(b0):
+#endif
+/* Take care of case for size <= 4.  */
+	cmpldi	cr1, rLEN, 8
+	andi.	rALIGN, rMEMP0, 7
+	mr	rMEMP, rMEMP0
+	ble	cr1, L(small)
+
+/* Align to doubleword boundary.  */
+	cmpldi	cr5, rLEN, 31
+	rlwimi	rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword.  */
+	beq+	L(aligned2)
+	mtcrf	0x01, rMEMP0
+	subfic	rALIGN, rALIGN, 8
+	cror	28,30,31		/* Detect odd word aligned.  */
+	add	rMEMP, rMEMP, rALIGN
+	sub	rLEN, rLEN, rALIGN
+	rlwimi	rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word.  */
+	bt	29, L(g4)
+/* Process the even word of doubleword.  */
+	bf+	31, L(g2)
+	stb	rCHR, 0(rMEMP0)
+	bt	30, L(g4x)
+L(g2):
+	sth	rCHR, -6(rMEMP)
+L(g4x):
+	stw	rCHR, -4(rMEMP)
+	b	L(aligned)
+/* Process the odd word of doubleword.  */
+L(g4):
+	bf	28, L(g4x) /* If false, word aligned on odd word.  */
+	bf+	31, L(g0)
+	stb	rCHR, 0(rMEMP0)
+	bt	30, L(aligned)
+L(g0):
+	sth	rCHR, -2(rMEMP)
+
+/* Handle the case of size < 31.  */
+L(aligned2):
+	rlwimi	rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word.  */
+L(aligned):
+	mtcrf	0x01, rLEN
+	ble	cr5, L(medium)
+/* Align to 32-byte boundary.  */
+	andi.	rALIGN, rMEMP, 0x18
+	subfic	rALIGN, rALIGN, 0x20
+	insrdi	rCHR,rCHR,32,0 /* Replicate word to double word. */
+	beq	L(caligned)
+	mtcrf	0x01, rALIGN
+	add	rMEMP, rMEMP, rALIGN
+	sub	rLEN, rLEN, rALIGN
+	cmplwi	cr1, rALIGN, 0x10
+	mr	rMEMP2, rMEMP
+	bf	28, L(a1)
+	stdu	rCHR, -8(rMEMP2)
+L(a1):	blt	cr1, L(a2)
+	std	rCHR, -8(rMEMP2)
+	stdu	rCHR, -16(rMEMP2)
+L(a2):
+
+/* Now aligned to a 32 byte boundary.  */
+        .align 4
+L(caligned):
+	cmpldi	cr1, rCHR, 0
+	clrrdi.	rALIGN, rLEN, 5
+	mtcrf	0x01, rLEN
+	beq	cr1, L(zloopstart) /* Special case for clearing memory using dcbz.  */
+	beq	L(medium)	/* We may not actually get to do a full line.  */
+	.align 4
+/* Storing a non-zero "c" value. We are aligned at a sector (32-byte)
+   boundary may not be at cache line (128-byte) boundary.  */
+L(nzloopstart):
+/* memset in 32-byte chunks until we get to a cache line boundary.
+   If rLEN is less then the distance to the next cache-line boundary use
+   cacheAligned1 code to finish the tail.  */
+	cmpldi	cr1,rLEN,128
+
+	andi.	rTMP,rMEMP,127
+	blt	cr1,L(cacheAligned1)
+	addi	rMEMP3,rMEMP,32
+	beq	L(nzCacheAligned)
+	addi	rLEN,rLEN,-32
+	std	rCHR,0(rMEMP)
+	std	rCHR,8(rMEMP)
+	std	rCHR,16(rMEMP)
+	addi	rMEMP,rMEMP,32
+	andi.	rTMP,rMEMP3,127
+	std	rCHR,-8(rMEMP3)
+
+	beq	L(nzCacheAligned)
+	addi	rLEN,rLEN,-32
+	std	rCHR,0(rMEMP3)
+	addi	rMEMP,rMEMP,32
+	std	rCHR,8(rMEMP3)
+	andi.	rTMP,rMEMP,127
+	std	rCHR,16(rMEMP3)
+	std	rCHR,24(rMEMP3)
+
+	beq	L(nzCacheAligned)
+	addi	rLEN,rLEN,-32
+	std	rCHR,32(rMEMP3)
+	addi	rMEMP,rMEMP,32
+	cmpldi	cr1,rLEN,128
+	std	rCHR,40(rMEMP3)
+	cmpldi	cr6,rLEN,256
+	li	rMEMP2,128
+	std	rCHR,48(rMEMP3)
+	std	rCHR,56(rMEMP3)
+	blt	cr1,L(cacheAligned1)
+	b	L(nzCacheAligned128)
+
+/* Now we are aligned to the cache line and can use dcbtst.  */
+        .align 4
+L(nzCacheAligned):
+	cmpldi	cr1,rLEN,128
+	blt	cr1,L(cacheAligned1)
+	b	L(nzCacheAligned128)
+        .align 5
+L(nzCacheAligned128):
+	cmpldi	cr1,rLEN,256
+	addi	rMEMP3,rMEMP,64
+	std	rCHR,0(rMEMP)
+	std	rCHR,8(rMEMP)
+	std	rCHR,16(rMEMP)
+	std	rCHR,24(rMEMP)
+	std	rCHR,32(rMEMP)
+	std	rCHR,40(rMEMP)
+	std	rCHR,48(rMEMP)
+	std	rCHR,56(rMEMP)
+	addi	rMEMP,rMEMP3,64
+	addi	rLEN,rLEN,-128
+	std	rCHR,0(rMEMP3)
+	std	rCHR,8(rMEMP3)
+	std	rCHR,16(rMEMP3)
+	std	rCHR,24(rMEMP3)
+	std	rCHR,32(rMEMP3)
+	std	rCHR,40(rMEMP3)
+	std	rCHR,48(rMEMP3)
+	std	rCHR,56(rMEMP3)
+	bge	cr1,L(nzCacheAligned128)
+	dcbtst	0,rMEMP
+	b	L(cacheAligned1)
+	.align 5
+/* Storing a zero "c" value. We are aligned at a sector (32-byte)
+   boundary but may not be at cache line (128-byte) boundary.  If the
+   remaining length spans a full cache line we can use the Data cache
+   block zero instruction. */
+L(zloopstart):
+/* memset in 32-byte chunks until we get to a cache line boundary.
+   If rLEN is less then the distance to the next cache-line boundary use
+   cacheAligned1 code to finish the tail.  */
+	cmpldi	cr1,rLEN,128
+	beq	L(medium)
+L(getCacheAligned):
+	andi.	rTMP,rMEMP,127
+	nop
+	blt	cr1,L(cacheAligned1)
+	addi	rMEMP3,rMEMP,32
+	beq	L(cacheAligned)
+	addi	rLEN,rLEN,-32
+	std	rCHR,0(rMEMP)
+	std	rCHR,8(rMEMP)
+	std	rCHR,16(rMEMP)
+	addi	rMEMP,rMEMP,32
+	andi.	rTMP,rMEMP3,127
+	std	rCHR,-8(rMEMP3)
+L(getCacheAligned2):
+	beq	L(cacheAligned)
+	addi	rLEN,rLEN,-32
+	std	rCHR,0(rMEMP3)
+	std	rCHR,8(rMEMP3)
+	addi	rMEMP,rMEMP,32
+	andi.	rTMP,rMEMP,127
+	std	rCHR,16(rMEMP3)
+	std	rCHR,24(rMEMP3)
+L(getCacheAligned3):
+	beq	L(cacheAligned)
+	addi	rLEN,rLEN,-32
+	std	rCHR,32(rMEMP3)
+	addi	rMEMP,rMEMP,32
+	cmpldi	cr1,rLEN,128
+	std	rCHR,40(rMEMP3)
+	cmpldi	cr6,rLEN,256
+	li	rMEMP2,128
+	std	rCHR,48(rMEMP3)
+	std	rCHR,56(rMEMP3)
+	blt	cr1,L(cacheAligned1)
+	blt	cr6,L(cacheAligned128)
+	b	L(cacheAlignedx)
+
+/* Now we are aligned to the cache line and can use dcbz.  */
+        .align 5
+L(cacheAligned):
+	cmpldi	cr1,rLEN,128
+	cmpldi	cr6,rLEN,256
+	blt	cr1,L(cacheAligned1)
+	li	rMEMP2,128
+L(cacheAlignedx):
+	cmpldi	cr5,rLEN,640
+	blt	cr6,L(cacheAligned128)
+	bgt	cr5,L(cacheAligned512)
+	cmpldi	cr6,rLEN,512
+	dcbz	0,rMEMP
+	cmpldi	cr1,rLEN,384
+	dcbz	rMEMP2,rMEMP
+	addi	rMEMP,rMEMP,256
+	addi	rLEN,rLEN,-256
+	blt	cr1,L(cacheAligned1)
+	blt	cr6,L(cacheAligned128)
+	b	L(cacheAligned256)
+	.align 5
+/* A simple loop for the longer (>640 bytes) lengths.  This form limits
+   the branch miss-predicted to exactly 1 at loop exit.*/
+L(cacheAligned512):
+	cmpli	cr1,rLEN,128
+	blt	cr1,L(cacheAligned1)
+	dcbz	0,rMEMP
+	addi	rLEN,rLEN,-128
+	addi	rMEMP,rMEMP,128
+	b	L(cacheAligned512)
+        .align 5
+L(cacheAligned256):
+
+	cmpldi	cr6,rLEN,512
+
+	dcbz	0,rMEMP
+	cmpldi	cr1,rLEN,384
+	dcbz	rMEMP2,rMEMP
+	addi	rMEMP,rMEMP,256
+	addi	rLEN,rLEN,-256
+
+	bge	cr6,L(cacheAligned256)
+
+	blt	cr1,L(cacheAligned1)
+        .align 4
+L(cacheAligned128):
+	dcbz	0,rMEMP
+	addi	rMEMP,rMEMP,128
+	addi	rLEN,rLEN,-128
+        nop
+L(cacheAligned1):
+	cmpldi	cr1,rLEN,32
+	blt	cr1,L(handletail32)
+	addi	rMEMP3,rMEMP,32
+	addi	rLEN,rLEN,-32
+	std	rCHR,0(rMEMP)
+	std	rCHR,8(rMEMP)
+	std	rCHR,16(rMEMP)
+	addi	rMEMP,rMEMP,32
+	cmpldi	cr1,rLEN,32
+	std	rCHR,-8(rMEMP3)
+L(cacheAligned2):
+	blt	cr1,L(handletail32)
+	addi	rLEN,rLEN,-32
+	std	rCHR,0(rMEMP3)
+	std	rCHR,8(rMEMP3)
+	addi	rMEMP,rMEMP,32
+	cmpldi	cr1,rLEN,32
+	std	rCHR,16(rMEMP3)
+	std	rCHR,24(rMEMP3)
+	nop
+L(cacheAligned3):
+	blt	cr1,L(handletail32)
+	addi	rMEMP,rMEMP,32
+	addi	rLEN,rLEN,-32
+	std	rCHR,32(rMEMP3)
+	std	rCHR,40(rMEMP3)
+	std	rCHR,48(rMEMP3)
+	std	rCHR,56(rMEMP3)
+
+/* We are here because the length or remainder (rLEN) is less than the
+   cache line/sector size and does not justify aggressive loop unrolling.
+   So set up the preconditions for L(medium) and go there.  */
+        .align 3
+L(handletail32):
+	cmpldi	cr1,rLEN,0
+	beqlr   cr1
+	b	L(medium)
+
+	.align 5
+L(small):
+/* Memset of 8 bytes or less.  */
+	cmpldi	cr6, rLEN, 4
+	cmpldi	cr5, rLEN, 1
+	ble	cr6,L(le4)
+	subi	rLEN, rLEN, 4
+	stb	rCHR,0(rMEMP)
+	stb	rCHR,1(rMEMP)
+	stb	rCHR,2(rMEMP)
+	stb	rCHR,3(rMEMP)
+	addi	rMEMP,rMEMP, 4
+	cmpldi	cr5, rLEN, 1
+L(le4):
+	cmpldi	cr1, rLEN, 3
+	bltlr	cr5
+	stb	rCHR, 0(rMEMP)
+	beqlr	cr5
+	stb	rCHR, 1(rMEMP)
+	bltlr	cr1
+	stb	rCHR, 2(rMEMP)
+	beqlr	cr1
+	stb	rCHR, 3(rMEMP)
+	blr
+
+/* Memset of 0-31 bytes.  */
+	.align 5
+L(medium):
+	insrdi	rCHR,rCHR,32,0 /* Replicate word to double word.  */
+	cmpldi	cr1, rLEN, 16
+L(medium_tail2):
+	add	rMEMP, rMEMP, rLEN
+L(medium_tail):
+	bt-	31, L(medium_31t)
+	bt-	30, L(medium_30t)
+L(medium_30f):
+	bt	29, L(medium_29t)
+L(medium_29f):
+	bge	cr1, L(medium_27t)
+	bflr	28
+	std	rCHR, -8(rMEMP)
+	blr
+
+L(medium_31t):
+	stbu	rCHR, -1(rMEMP)
+	bf-	30, L(medium_30f)
+L(medium_30t):
+	sthu	rCHR, -2(rMEMP)
+	bf-	29, L(medium_29f)
+L(medium_29t):
+	stwu	rCHR, -4(rMEMP)
+	blt	cr1, L(medium_27f)
+L(medium_27t):
+	std	rCHR, -8(rMEMP)
+	stdu	rCHR, -16(rMEMP)
+L(medium_27f):
+	bflr	28
+L(medium_28t):
+	std	rCHR, -8(rMEMP)
+	blr
+END_GEN_TB (BP_SYM (memset),TB_TOCLESS)
+libc_hidden_builtin_def (memset)
+
+/* Copied from bzero.S to prevent the linker from inserting a stub
+   between bzero and memset.  */
+ENTRY (BP_SYM (__bzero))
+	CALL_MCOUNT 3
+#if __BOUNDED_POINTERS__
+	mr	r6,r4
+	li	r5,0
+	mr	r4,r3
+	/* Tell memset that we don't want a return value.  */
+	li	r3,0
+	b	L(_memset)
+#else
+	mr	r5,r4
+	li	r4,0
+	b	L(_memset)
+#endif
+END_GEN_TB (BP_SYM (__bzero),TB_TOCLESS)
+
+weak_alias (BP_SYM (__bzero), BP_SYM (bzero))
diff --git a/sysdeps/powerpc/powerpc64/power6/wordcopy.c b/sysdeps/powerpc/powerpc64/power6/wordcopy.c
new file mode 100644
index 0000000000..faddd945b3
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power6/wordcopy.c
@@ -0,0 +1,410 @@
+/* _memcopy.c -- subroutines for memory copy functions.
+   Copyright (C) 1991, 1996 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Torbjorn Granlund (tege@sics.se).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* BE VERY CAREFUL IF YOU CHANGE THIS CODE...!  */
+
+#include <stddef.h>
+#include <memcopy.h>
+
+/* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to
+   block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
+   Both SRCP and DSTP should be aligned for memory operations on `op_t's.  */
+
+void
+_wordcopy_fwd_aligned (dstp, srcp, len)
+     long int dstp;
+     long int srcp;
+     size_t len;
+{
+  op_t a0, a1;
+
+  if (len & 1)
+  {
+    ((op_t *) dstp)[0] = ((op_t *) srcp)[0];
+    
+    if (len == 1)
+      return;
+    srcp += OPSIZ;
+    dstp += OPSIZ;
+    len -= 1;
+  }
+
+  do
+    {
+      a0 = ((op_t *) srcp)[0];
+      a1 = ((op_t *) srcp)[1];
+      ((op_t *) dstp)[0] = a0;
+      ((op_t *) dstp)[1] = a1;
+
+      srcp += 2 * OPSIZ;
+      dstp += 2 * OPSIZ;
+      len -= 2;
+    }
+  while (len != 0);
+}
+
+/* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
+   block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
+   DSTP should be aligned for memory operations on `op_t's, but SRCP must
+   *not* be aligned.  */
+
+void
+_wordcopy_fwd_dest_aligned (dstp, srcp, len)
+     long int dstp;
+     long int srcp;
+     size_t len;
+{
+  op_t a0, a1, a2;
+  int sh_1, sh_2;
+  int align;
+
+  /* Calculate how to shift a word read at the memory operation
+     aligned srcp to make it aligned for copy.  */
+
+  align = srcp % OPSIZ;
+  sh_1 = 8 * (srcp % OPSIZ);
+  sh_2 = 8 * OPSIZ - sh_1;
+
+  /* Make SRCP aligned by rounding it down to the beginning of the `op_t'
+     it points in the middle of.  */
+  srcp &= -OPSIZ;
+  a0 = ((op_t *) srcp)[0];
+
+  if (len & 1)
+  {
+    a1 = ((op_t *) srcp)[1];
+    ((op_t *) dstp)[0] = MERGE (a0, sh_1, a1, sh_2);
+    
+    if (len == 1)
+      return;
+    
+    a0 = a1;
+    srcp += OPSIZ;
+    dstp += OPSIZ;
+    len -= 1;
+  }
+
+  switch (align)
+    {
+    case 1:
+      do
+        {
+          a1 = ((op_t *) srcp)[1];
+          a2 = ((op_t *) srcp)[2];
+          ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (64-8));
+          ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (64-8));
+          a0 = a2;
+    
+          srcp += 2 * OPSIZ;
+          dstp += 2 * OPSIZ;
+          len -= 2;
+        }
+      while (len != 0);
+      break;
+    case 2:
+      do
+        {
+          a1 = ((op_t *) srcp)[1];
+          a2 = ((op_t *) srcp)[2];
+          ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (64-16));
+          ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (64-16));
+          a0 = a2;
+    
+          srcp += 2 * OPSIZ;
+          dstp += 2 * OPSIZ;
+          len -= 2;
+        }
+      while (len != 0);
+      break;
+    case 3:
+      do
+        {
+          a1 = ((op_t *) srcp)[1];
+          a2 = ((op_t *) srcp)[2];
+          ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (64-24));
+          ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (64-24));
+          a0 = a2;
+    
+          srcp += 2 * OPSIZ;
+          dstp += 2 * OPSIZ;
+          len -= 2;
+        }
+      while (len != 0);
+      break;
+    case 4:
+      do
+        {
+          a1 = ((op_t *) srcp)[1];
+          a2 = ((op_t *) srcp)[2];
+          ((op_t *) dstp)[0] = MERGE (a0, 32, a1, (64-32));
+          ((op_t *) dstp)[1] = MERGE (a1, 32, a2, (64-32));
+          a0 = a2;
+    
+          srcp += 2 * OPSIZ;
+          dstp += 2 * OPSIZ;
+          len -= 2;
+        }
+      while (len != 0);
+      break;
+    case 5:
+      do
+        {
+          a1 = ((op_t *) srcp)[1];
+          a2 = ((op_t *) srcp)[2];
+          ((op_t *) dstp)[0] = MERGE (a0, 40, a1, (64-40));
+          ((op_t *) dstp)[1] = MERGE (a1, 40, a2, (64-40));
+          a0 = a2;
+    
+          srcp += 2 * OPSIZ;
+          dstp += 2 * OPSIZ;
+          len -= 2;
+        }
+      while (len != 0);
+      break;
+    case 6:
+      do
+        {
+          a1 = ((op_t *) srcp)[1];
+          a2 = ((op_t *) srcp)[2];
+          ((op_t *) dstp)[0] = MERGE (a0, 48, a1, (64-48));
+          ((op_t *) dstp)[1] = MERGE (a1, 48, a2, (64-48));
+          a0 = a2;
+    
+          srcp += 2 * OPSIZ;
+          dstp += 2 * OPSIZ;
+          len -= 2;
+        }
+      while (len != 0);
+      break;
+    case 7:
+      do
+        {
+          a1 = ((op_t *) srcp)[1];
+          a2 = ((op_t *) srcp)[2];
+          ((op_t *) dstp)[0] = MERGE (a0, 56, a1, (64-56));
+          ((op_t *) dstp)[1] = MERGE (a1, 56, a2, (64-56));
+          a0 = a2;
+    
+          srcp += 2 * OPSIZ;
+          dstp += 2 * OPSIZ;
+          len -= 2;
+        }
+      while (len != 0);
+      break;
+    }
+
+}
+
+/* _wordcopy_bwd_aligned -- Copy block finishing right before
+   SRCP to block finishing right before DSTP with LEN `op_t' words
+   (not LEN bytes!).  Both SRCP and DSTP should be aligned for memory
+   operations on `op_t's.  */
+
+void
+_wordcopy_bwd_aligned (dstp, srcp, len)
+     long int dstp;
+     long int srcp;
+     size_t len;
+{
+  op_t a0, a1;
+
+  if (len & 1)
+  {
+    srcp -= OPSIZ;
+    dstp -= OPSIZ;
+    ((op_t *) dstp)[0] = ((op_t *) srcp)[0];
+    
+    if (len == 1)
+      return;
+    len -= 1;
+  }
+
+  do
+    {
+      srcp -= 2 * OPSIZ;
+      dstp -= 2 * OPSIZ;
+
+      a1 = ((op_t *) srcp)[1];
+      a0 = ((op_t *) srcp)[0];
+      ((op_t *) dstp)[1] = a1;
+      ((op_t *) dstp)[0] = a0;
+
+      len -= 2;
+    }
+  while (len != 0);
+}
+
+/* _wordcopy_bwd_dest_aligned -- Copy block finishing right
+   before SRCP to block finishing right before DSTP with LEN `op_t'
+   words (not LEN bytes!).  DSTP should be aligned for memory
+   operations on `op_t', but SRCP must *not* be aligned.  */
+
+void
+_wordcopy_bwd_dest_aligned (dstp, srcp, len)
+     long int dstp;
+     long int srcp;
+     size_t len;
+{
+  op_t a0, a1, a2;
+  int sh_1, sh_2;
+  int align;
+
+  /* Calculate how to shift a word read at the memory operation
+     aligned srcp to make it aligned for copy.  */
+
+  align = srcp % OPSIZ;
+  sh_1 = 8 * (srcp % OPSIZ);
+  sh_2 = 8 * OPSIZ - sh_1;
+
+  /* Make srcp aligned by rounding it down to the beginning of the op_t
+     it points in the middle of.  */
+  srcp &= -OPSIZ;
+  a2 = ((op_t *) srcp)[0];
+
+  if (len & 1)
+  {
+    srcp -= OPSIZ;
+    dstp -= OPSIZ;
+    a1 = ((op_t *) srcp)[0];
+    ((op_t *) dstp)[0] = MERGE (a1, sh_1, a2, sh_2);
+
+    if (len == 1)
+      return;
+
+    a2 = a1;
+    len -= 1;
+  }
+
+  switch (align)
+    {
+    case 1:
+      do
+        {
+          srcp -= 2 * OPSIZ;
+          dstp -= 2 * OPSIZ;
+    
+          a1 = ((op_t *) srcp)[1];
+          a0 = ((op_t *) srcp)[0];
+          ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (64-8));
+          ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (64-8));
+          a2 = a0;
+    
+          len -= 2;
+        }
+      while (len != 0);
+      break;
+    case 2:
+      do
+        {
+          srcp -= 2 * OPSIZ;
+          dstp -= 2 * OPSIZ;
+    
+          a1 = ((op_t *) srcp)[1];
+          a0 = ((op_t *) srcp)[0];
+          ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (64-16));
+          ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (64-16));
+          a2 = a0;
+    
+          len -= 2;
+        }
+      while (len != 0);
+      break;
+    case 3:
+      do
+        {
+          srcp -= 2 * OPSIZ;
+          dstp -= 2 * OPSIZ;
+    
+          a1 = ((op_t *) srcp)[1];
+          a0 = ((op_t *) srcp)[0];
+          ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (64-24));
+          ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (64-24));
+          a2 = a0;
+    
+          len -= 2;
+        }
+      while (len != 0);
+      break;
+    case 4:
+      do
+        {
+          srcp -= 2 * OPSIZ;
+          dstp -= 2 * OPSIZ;
+    
+          a1 = ((op_t *) srcp)[1];
+          a0 = ((op_t *) srcp)[0];
+          ((op_t *) dstp)[1] = MERGE (a1, 32, a2, (64-32));
+          ((op_t *) dstp)[0] = MERGE (a0, 32, a1, (64-32));
+          a2 = a0;
+    
+          len -= 2;
+        }
+      while (len != 0);
+      break;
+    case 5:
+      do
+        {
+          srcp -= 2 * OPSIZ;
+          dstp -= 2 * OPSIZ;
+    
+          a1 = ((op_t *) srcp)[1];
+          a0 = ((op_t *) srcp)[0];
+          ((op_t *) dstp)[1] = MERGE (a1, 40, a2, (64-40));
+          ((op_t *) dstp)[0] = MERGE (a0, 40, a1, (64-40));
+          a2 = a0;
+    
+          len -= 2;
+        }
+      while (len != 0);
+      break;
+    case 6:
+      do
+        {
+          srcp -= 2 * OPSIZ;
+          dstp -= 2 * OPSIZ;
+    
+          a1 = ((op_t *) srcp)[1];
+          a0 = ((op_t *) srcp)[0];
+          ((op_t *) dstp)[1] = MERGE (a1, 48, a2, (64-48));
+          ((op_t *) dstp)[0] = MERGE (a0, 48, a1, (64-48));
+          a2 = a0;
+    
+          len -= 2;
+        }
+      while (len != 0);
+      break;
+    case 7:
+      do
+        {
+          srcp -= 2 * OPSIZ;
+          dstp -= 2 * OPSIZ;
+    
+          a1 = ((op_t *) srcp)[1];
+          a0 = ((op_t *) srcp)[0];
+          ((op_t *) dstp)[1] = MERGE (a1, 56, a2, (64-56));
+          ((op_t *) dstp)[0] = MERGE (a0, 56, a1, (64-56));
+          a2 = a0;
+    
+          len -= 2;
+        }
+      while (len != 0);
+      break;
+    }
+}
diff --git a/sysdeps/powerpc/powerpc64/power6x/Implies b/sysdeps/powerpc/powerpc64/power6x/Implies
new file mode 100644
index 0000000000..d993b0cd73
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power6x/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc64/power6
+powerpc/powerpc64/power5+
diff --git a/sysdeps/powerpc/powerpc64/power6x/fpu/Implies b/sysdeps/powerpc/powerpc64/power6x/fpu/Implies
new file mode 100644
index 0000000000..84510991b2
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power6x/fpu/Implies
@@ -0,0 +1,3 @@
+powerpc/powerpc64/power6/fpu
+powerpc/powerpc64/power5+/fpu
+powerpc/powerpc64/power4/fpu
diff --git a/sysdeps/powerpc/powerpc64/power6x/fpu/s_llrint.S b/sysdeps/powerpc/powerpc64/power6x/fpu/s_llrint.S
new file mode 100644
index 0000000000..4a4a5f8855
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power6x/fpu/s_llrint.S
@@ -0,0 +1,45 @@
+/* Round double to long int.  POWER6x PowerPC64 version.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+	.machine	"power6"
+/* long long int[r3] __llrint (double x[fp1])  */
+ENTRY (__llrint)	
+	CALL_MCOUNT 0
+	fctid	fp13,fp1
+	mftgpr  r3,fp13
+	blr
+	END (__llrint)
+
+strong_alias (__llrint, __lrint)
+weak_alias (__llrint, llrint)
+weak_alias (__lrint, lrint)
+
+#ifdef NO_LONG_DOUBLE
+strong_alias (__llrint, __llrintl)
+weak_alias (__llrint, llrintl)
+strong_alias (__lrint, __lrintl)
+weak_alias (__lrint, lrintl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __llrint, llrintl, GLIBC_2_1)
+compat_symbol (libm, __lrint, lrintl, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/powerpc64/power6x/fpu/s_llround.S b/sysdeps/powerpc/powerpc64/power6x/fpu/s_llround.S
new file mode 100644
index 0000000000..149feaf965
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power6x/fpu/s_llround.S
@@ -0,0 +1,55 @@
+/* llround function.  POWER6x PowerPC64 version.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+	
+/* long long [r3] llround (float x [fp1])
+   IEEE 1003.1 llround function.  IEEE specifies "round to the nearest 
+   integer value, rounding halfway cases away from zero, regardless of
+   the current rounding mode."  However PowerPC Architecture defines
+   "round to Nearest" as "Choose the best approximation. In case of a 
+   tie, choose the one that is even (least significant bit o).". 
+   So we pre-round using the V2.02 Floating Round to Integer Nearest
+   instruction before we use Floating Convert to Integer Word with
+   round to zero instruction.  */
+
+	.machine	"power6"
+ENTRY (__llround)
+	CALL_MCOUNT 0
+	frin	fp2,fp1	/* Round to nearest +-0.5.  */	
+	fctidz	fp3,fp2	/* Convert To Integer DW round toward 0.  */
+	mftgpr	r3,fp3  /* Transfer integer to R3.  */
+	blr
+	END (__llround)
+
+strong_alias (__llround, __lround)
+weak_alias (__llround, llround)
+weak_alias (__lround, lround)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__llround, llroundl)
+strong_alias (__llround, __llroundl)
+weak_alias (__lround, lroundl)
+strong_alias (__lround, __lroundl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __llround, llroundl, GLIBC_2_1)
+compat_symbol (libm, __lround, lroundl, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/sched_cpucount.c b/sysdeps/powerpc/sched_cpucount.c
new file mode 100644
index 0000000000..f151eed231
--- /dev/null
+++ b/sysdeps/powerpc/sched_cpucount.c
@@ -0,0 +1,23 @@
+/* Copyright (C) 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifdef _ARCH_PWR5
+# define POPCNT(l) __builtin_popcountl (l)
+#endif
+
+#include <posix/sched_cpucount.c>
diff --git a/sysdeps/s390/dl-procinfo.c b/sysdeps/s390/dl-procinfo.c
new file mode 100644
index 0000000000..32c6aef951
--- /dev/null
+++ b/sysdeps/s390/dl-procinfo.c
@@ -0,0 +1,80 @@
+/* Data for s390 version of processor capability information.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Martin Schwidefsky <schwidefsky@de.ibm.com>, 2006.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* This information must be kept in sync with the _DL_HWCAP_COUNT and
+   _DL_PLATFORM_COUNT definitions in procinfo.h.
+
+   If anything should be added here check whether the size of each string
+   is still ok with the given array size.
+
+   All the #ifdefs in the definitions are quite irritating but
+   necessary if we want to avoid duplicating the information.  There
+   are three different modes:
+
+   - PROCINFO_DECL is defined.  This means we are only interested in
+     declarations.
+
+   - PROCINFO_DECL is not defined:
+
+     + if SHARED is defined the file is included in an array
+       initializer.  The .element = { ... } syntax is needed.
+
+     + if SHARED is not defined a normal array initialization is
+       needed.
+  */
+
+#ifndef PROCINFO_CLASS
+# define PROCINFO_CLASS
+#endif
+
+#if !defined PROCINFO_DECL && defined SHARED
+  ._dl_s390_cap_flags
+#else
+PROCINFO_CLASS const char _dl_s390_cap_flags[7][6]
+#endif
+#ifndef PROCINFO_DECL
+= {
+     "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp"
+  }
+#endif
+#if !defined SHARED || defined PROCINFO_DECL
+;
+#else
+,
+#endif
+
+#if !defined PROCINFO_DECL && defined SHARED
+  ._dl_s390_platforms
+#else
+PROCINFO_CLASS const char _dl_s390_platforms[4][7]
+#endif
+#ifndef PROCINFO_DECL
+= {
+     "g5", "z900", "z990", "z9-109"
+  }
+#endif
+#if !defined SHARED || defined PROCINFO_DECL
+;
+#else
+,
+#endif
+
+#undef PROCINFO_DECL
+#undef PROCINFO_CLASS
diff --git a/sysdeps/s390/dl-procinfo.h b/sysdeps/s390/dl-procinfo.h
new file mode 100644
index 0000000000..178d7cc017
--- /dev/null
+++ b/sysdeps/s390/dl-procinfo.h
@@ -0,0 +1,99 @@
+/* s390 version of processor capability information handling macros.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Martin Schwidefsky <schwidefsky@de.ibm.com>, 2006.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef _DL_PROCINFO_H
+#define _DL_PROCINFO_H	1
+#include <ldsodefs.h>
+
+#define _DL_HWCAP_COUNT 7
+
+#define _DL_PLATFORMS_COUNT	4
+
+/* The kernel provides up to 32 capability bits with elf_hwcap.  */
+#define _DL_FIRST_PLATFORM	32
+/* Mask to filter out platforms.  */
+#define _DL_HWCAP_PLATFORM	(((1ULL << _DL_PLATFORMS_COUNT) - 1) \
+				 << _DL_FIRST_PLATFORM)
+
+/* Hardware capablity bit numbers are derived directly from the
+   facility indications as stored by the "store facility list" (STFL)
+   instruction.  */
+
+enum
+{
+  HWCAP_S390_ESAN3 = 1 << 0,
+  HWCAP_S390_ZARCH = 1 << 1,
+  HWCAP_S390_STFLE = 1 << 2,
+  HWCAP_S390_MSA = 1 << 3,
+  HWCAP_S390_LDISP = 1 << 4,
+  HWCAP_S390_EIMM = 1 << 5,
+  HWCAP_S390_DFP = 1 << 6,
+};
+
+#define HWCAP_IMPORTANT (HWCAP_S390_ZARCH | HWCAP_S390_LDISP \
+			  | HWCAP_S390_EIMM | HWCAP_S390_DFP)
+
+/* We cannot provide a general printing function.  */
+#define _dl_procinfo(word) -1
+
+static inline const char *
+__attribute__ ((unused))
+_dl_hwcap_string (int idx)
+{
+  return GLRO(dl_s390_cap_flags)[idx];
+};
+
+static inline const char *
+__attribute__ ((unused))
+_dl_platform_string (int idx)
+{
+  return GLRO(dl_s390_platforms)[idx - _DL_FIRST_PLATFORM];
+};
+
+static inline int
+__attribute__ ((unused, always_inline))
+_dl_string_hwcap (const char *str)
+{
+  int i;
+
+  for (i = 0; i < _DL_HWCAP_COUNT; i++)
+    {
+      if (strcmp (str, GLRO(dl_s390_cap_flags)[i]) == 0)
+	return i;
+    }
+  return -1;
+};
+
+static inline int
+__attribute__ ((unused, always_inline))
+_dl_string_platform (const char *str)
+{
+  int i;
+
+  if (str != NULL)
+    for (i = 0; i < _DL_PLATFORMS_COUNT; ++i)
+      {
+	if (strcmp (str, GLRO(dl_s390_platforms)[i]) == 0)
+	  return _DL_FIRST_PLATFORM + i;
+      }
+  return -1;
+};
+
+#endif /* dl-procinfo.h */
diff --git a/sysdeps/sh/bsd-_setjmp.S b/sysdeps/sh/bsd-_setjmp.S
index 2811fcfd1b..dbfd95259b 100644
--- a/sysdeps/sh/bsd-_setjmp.S
+++ b/sysdeps/sh/bsd-_setjmp.S
@@ -1,5 +1,5 @@
 /* BSD `_setjmp' entry point to `sigsetjmp (..., 0)'.  SH version.
-   Copyright (C) 1999, 2000, 2002 Free Software Foundation, Inc.
+   Copyright (C) 1999, 2000, 2002, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -36,14 +36,14 @@ ENTRY (_setjmp)
 	mov.l	3f, r1
 	mov.l	@(r0,r1), r1
 	jmp	@r1
-	 mov	#0, r0
+	 mov	#0, r5
 	.align	2
 3:
 	.long	C_SYMBOL_NAME(__sigsetjmp@GOT)
 #else
 	mov.l	1f, r1
 	jmp	@r1
-	 mov	#0, r0
+	 mov	#0, r5
 	.align	2
 1:
 	.long	C_SYMBOL_NAME(__sigsetjmp)
diff --git a/sysdeps/sh/bsd-setjmp.S b/sysdeps/sh/bsd-setjmp.S
index e076284428..de226e1284 100644
--- a/sysdeps/sh/bsd-setjmp.S
+++ b/sysdeps/sh/bsd-setjmp.S
@@ -1,5 +1,5 @@
 /* BSD `setjmp' entry point to `sigsetjmp (..., 1)'.  SH version.
-   Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+   Copyright (C) 1999, 2000, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -36,14 +36,14 @@ ENTRY (setjmp)
 	mov.l	3f, r1
 	mov.l	@(r0,r1), r1
 	jmp	@r1
-	 mov	#1, r0
+	 mov	#1, r5
 	.align	2
 3:
 	.long	C_SYMBOL_NAME(__sigsetjmp@GOT)
 #else
 	mov.l	1f, r1
 	jmp	@r1
-	 mov	#1, r0
+	 mov	#1, r5
 	.align	2
 1:
 	.long	C_SYMBOL_NAME(__sigsetjmp)
diff --git a/sysdeps/unix/clock_gettime.c b/sysdeps/unix/clock_gettime.c
index f698f0151b..fbaaf301e4 100644
--- a/sysdeps/unix/clock_gettime.c
+++ b/sysdeps/unix/clock_gettime.c
@@ -1,5 +1,5 @@
 /* clock_gettime -- Get the current time from a POSIX clockid_t.  Unix version.
-   Copyright (C) 1999-2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 1999-2004, 2005, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -93,7 +93,6 @@ int
 clock_gettime (clockid_t clock_id, struct timespec *tp)
 {
   int retval = -1;
-  struct timeval tv;
 
   switch (clock_id)
     {
@@ -103,9 +102,12 @@ clock_gettime (clockid_t clock_id, struct timespec *tp)
 
 #ifndef HANDLED_REALTIME
     case CLOCK_REALTIME:
-      retval = gettimeofday (&tv, NULL);
-      if (retval == 0)
-	TIMEVAL_TO_TIMESPEC (&tv, tp);
+      {
+	struct timeval tv;
+	retval = gettimeofday (&tv, NULL);
+	if (retval == 0)
+	  TIMEVAL_TO_TIMESPEC (&tv, tp);
+      }
       break;
 #endif
 
diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile
index f59e340f8e..119e37b345 100644
--- a/sysdeps/unix/sysv/linux/Makefile
+++ b/sysdeps/unix/sysv/linux/Makefile
@@ -137,7 +137,7 @@ endif
 
 ifeq ($(subdir),io)
 sysdep_routines += xstatconv internal_statvfs internal_statvfs64 \
-		   sync_file_range
+		   sync_file_range open_2
 endif
 
 ifeq ($(subdir),elf)
diff --git a/sysdeps/unix/sysv/linux/bits/sched.h b/sysdeps/unix/sysv/linux/bits/sched.h
index 31db66f8ac..5eaa2fe528 100644
--- a/sysdeps/unix/sysv/linux/bits/sched.h
+++ b/sysdeps/unix/sysv/linux/bits/sched.h
@@ -104,7 +104,7 @@ struct __sched_param
 # define __CPU_SETSIZE	1024
 # define __NCPUBITS	(8 * sizeof (__cpu_mask))
 
-/* Type for array elements in 'cpu_set'.  */
+/* Type for array elements in 'cpu_set_t'.  */
 typedef unsigned long int __cpu_mask;
 
 /* Basic access functions.  */
@@ -118,20 +118,72 @@ typedef struct
 } cpu_set_t;
 
 /* Access functions for CPU masks.  */
-# define __CPU_ZERO(cpusetp) \
+# if __GNUC_PREREQ (2, 91)
+#  define __CPU_ZERO_S(setsize, cpusetp) \
+  do __builtin_memset (cpusetp, '\0', setsize); while (0)
+# else
+#  define __CPU_ZERO_S(setsize, cpusetp) \
   do {									      \
-    unsigned int __i;							      \
+    size_t __i;								      \
+    size_t __imax = (setsize) / sizeof (__cpu_mask);			      \
     cpu_set_t *__arr = (cpusetp);					      \
-    for (__i = 0; __i < sizeof (cpu_set_t) / sizeof (__cpu_mask); ++__i)      \
+    for (__i = 0; __i < __imax; ++__i)					      \
       __arr->__bits[__i] = 0;						      \
   } while (0)
-# define __CPU_SET(cpu, cpusetp) \
-  ((cpusetp)->__bits[__CPUELT (cpu)] |= __CPUMASK (cpu))
-# define __CPU_CLR(cpu, cpusetp) \
-  ((cpusetp)->__bits[__CPUELT (cpu)] &= ~__CPUMASK (cpu))
-# define __CPU_ISSET(cpu, cpusetp) \
-  (((cpusetp)->__bits[__CPUELT (cpu)] & __CPUMASK (cpu)) != 0)
-extern int __sched_cpucount (size_t __setsize, cpu_set_t *__setp) __THROW;
-# define __CPU_COUNT(cpusetp) \
-  __sched_cpucount (sizeof (cpu_set_t), cpusetp)
+# endif
+# define __CPU_SET_S(cpu, setsize, cpusetp) \
+  ({ size_t __cpu = (cpu);						      \
+     __cpu < 8 * (setsize)						      \
+     ? ((cpusetp)->__bits[__CPUELT (__cpu)] |= __CPUMASK (__cpu)) : 0; })
+# define __CPU_CLR_S(cpu, setsize, cpusetp) \
+  ({ size_t __cpu = (cpu);						      \
+     __cpu < 8 * (setsize)						      \
+     ? ((cpusetp)->__bits[__CPUELT (__cpu)] &= ~__CPUMASK (__cpu)) : 0; })
+# define __CPU_ISSET_S(cpu, setsize, cpusetp) \
+  ({ size_t __cpu = (cpu);						      \
+     __cpu < 8 * (setsize)						      \
+     ? (((cpusetp)->__bits[__CPUELT (__cpu)] & __CPUMASK (__cpu))) != 0 : 0; })
+
+# define __CPU_COUNT_S(setsize, cpusetp) \
+  __sched_cpucount (setsize, cpusetp)
+
+# if __GNUC_PREREQ (2, 91)
+#  define __CPU_EQUAL_S(setsize, cpusetp1, cpusetp2) \
+  (__builtin_memcmp (cpusetp1, cpusetp2, setsize) == 0)
+# else
+#  define __CPU_EQUAL_S(setsize, cpusetp1, cpusetp2) \
+  ({ cpu_set_t *__arr1 = (cpusetp1);					      \
+     cpu_set_t *__arr2 = (cpusetp2);					      \
+     size_t __imax = (setsize) / sizeof (__cpu_mask);			      \
+     size_t __i;							      \
+     for (__i = 0; __i < __imax; ++__i)					      \
+       if (__arr1->__bits[__i] != __arr2->__bits[__i])			      \
+	 break;								      \
+     __i == __imax; })
+# endif
+
+# define __CPU_OP_S(setsize, destset, srcset1, srcset2, op) \
+  ({ cpu_set_t *__dest = (destset);					      \
+     cpu_set_t *__arr1 = (srcset1);					      \
+     cpu_set_t *__arr2 = (srcset2);					      \
+     size_t __imax = (setsize) / sizeof (__cpu_mask);			      \
+     size_t __i;							      \
+     for (__i = 0; __i < __imax; ++__i)					      \
+       __dest->__bits[__i] = __arr1->__bits[__i] op __arr2->__bits[__i];      \
+     __dest; })
+
+# define __CPU_ALLOC_SIZE(count) \
+  ((((count) + __NCPUBITS - 1) / __NCPUBITS) * 8)
+# define __CPU_ALLOC(count) __sched_cpualloc (count)
+# define __CPU_FREE(cpuset) __sched_cpufree (cpuset)
+
+__BEGIN_DECLS
+
+extern int __sched_cpucount (size_t __setsize, const cpu_set_t *__setp)
+  __THROW;
+extern cpu_set_t *__sched_cpualloc (size_t __count) __THROW __wur;
+extern void __sched_cpufree (cpu_set_t *__set) __THROW;
+
+__END_DECLS
+
 #endif
diff --git a/sysdeps/unix/sysv/linux/bits/socket.h b/sysdeps/unix/sysv/linux/bits/socket.h
index 377f589bbd..89a9106b2e 100644
--- a/sysdeps/unix/sysv/linux/bits/socket.h
+++ b/sysdeps/unix/sysv/linux/bits/socket.h
@@ -63,7 +63,7 @@ enum __socket_type
 /* Protocol families.  */
 #define	PF_UNSPEC	0	/* Unspecified.  */
 #define	PF_LOCAL	1	/* Local to host (pipes and file-domain).  */
-#define	PF_UNIX		PF_LOCAL /* Old BSD name for PF_LOCAL.  */
+#define	PF_UNIX		PF_LOCAL /* POSIX name for PF_LOCAL.  */
 #define	PF_FILE		PF_LOCAL /* Another non-standard name for PF_LOCAL.  */
 #define	PF_INET		2	/* IP protocol family.  */
 #define	PF_AX25		3	/* Amateur Radio AX.25.  */
@@ -90,7 +90,9 @@ enum __socket_type
 #define	PF_PPPOX	24	/* PPPoX sockets.  */
 #define	PF_WANPIPE	25	/* Wanpipe API sockets.  */
 #define	PF_BLUETOOTH	31	/* Bluetooth sockets.  */
-#define	PF_MAX		32	/* For now..  */
+#define	PF_IUCV		32	/* IUCV sockets.  */
+#define PF_RXRPC	33	/* RxRPC sockets.  */
+#define	PF_MAX		34	/* For now..  */
 
 /* Address families.  */
 #define	AF_UNSPEC	PF_UNSPEC
@@ -122,6 +124,8 @@ enum __socket_type
 #define	AF_PPPOX	PF_PPPOX
 #define	AF_WANPIPE	PF_WANPIPE
 #define	AF_BLUETOOTH	PF_BLUETOOTH
+#define	AF_IUCV		PF_IUCV
+#define AF_RXRPC	PF_RXRPC
 #define	AF_MAX		PF_MAX
 
 /* Socket level values.  Others are defined in the appropriate headers.
@@ -206,8 +210,13 @@ enum
 #define	MSG_ERRQUEUE	MSG_ERRQUEUE
     MSG_NOSIGNAL	= 0x4000, /* Do not generate SIGPIPE.  */
 #define	MSG_NOSIGNAL	MSG_NOSIGNAL
-    MSG_MORE		= 0x8000  /* Sender will send more.  */
+    MSG_MORE		= 0x8000,  /* Sender will send more.  */
 #define	MSG_MORE	MSG_MORE
+
+    MSG_CMSG_CLOEXEC	= 0x40000000	/* Set close_on_exit for file
+                                           descriptor received through
+                                           SCM_RIGHTS.  */
+#define MSG_CMSG_CLOEXEC MSG_CMSG_CLOEXEC
   };
 
 
diff --git a/sysdeps/unix/sysv/linux/check_pf.c b/sysdeps/unix/sysv/linux/check_pf.c
index 3312da3af8..fbe805bf3b 100644
--- a/sysdeps/unix/sysv/linux/check_pf.c
+++ b/sysdeps/unix/sysv/linux/check_pf.c
@@ -136,40 +136,72 @@ make_request (int fd, pid_t pid, bool *seen_ipv4, bool *seen_ipv6,
 	  if (nlmh->nlmsg_type == RTM_NEWADDR)
 	    {
 	      struct ifaddrmsg *ifam = (struct ifaddrmsg *) NLMSG_DATA (nlmh);
+	      struct rtattr *rta = IFA_RTA (ifam);
+	      size_t len = nlmh->nlmsg_len - NLMSG_LENGTH (sizeof (*ifam));
 
 	      switch (ifam->ifa_family)
 		{
+		  const void *local;
+		  const void *address;
+
 		case AF_INET:
-		  *seen_ipv4 = true;
+		  local = NULL;
+		  address = NULL;
+		  while (RTA_OK (rta, len))
+		    {
+		      switch (rta->rta_type)
+			{
+			case IFA_LOCAL:
+			  local = RTA_DATA (rta);
+			  break;
+
+			case IFA_ADDRESS:
+			  address = RTA_DATA (rta);
+			  goto out_v4;
+			}
+
+		      rta = RTA_NEXT (rta, len);
+		    }
+
+		  if (local != NULL)
+		    {
+		    out_v4:
+		      if (*(const in_addr_t *) (address ?: local)
+			  != htonl (INADDR_LOOPBACK))
+			*seen_ipv4 = true;
+		    }
 		  break;
+
 		case AF_INET6:
-		  *seen_ipv6 = true;
+		  local = NULL;
+		  address = NULL;
+		  while (RTA_OK (rta, len))
+		    {
+		      switch (rta->rta_type)
+			{
+			case IFA_LOCAL:
+			  local = RTA_DATA (rta);
+			  break;
+
+			case IFA_ADDRESS:
+			  address = RTA_DATA (rta);
+			  goto out_v6;
+			}
+
+		      rta = RTA_NEXT (rta, len);
+		    }
+
+		  if (local != NULL)
+		    {
+		    out_v6:
+		      if (!IN6_IS_ADDR_LOOPBACK (address ?: local))
+			*seen_ipv6 = true;
+		    }
 
 		  if (ifam->ifa_flags & (IFA_F_DEPRECATED
 					 | IFA_F_TEMPORARY
 					 | IFA_F_HOMEADDRESS))
 		    {
-		      struct rtattr *rta = IFA_RTA (ifam);
-		      size_t len = (nlmh->nlmsg_len
-				    - NLMSG_LENGTH (sizeof (*ifam)));
-		      void *local = NULL;
-		      void *address = NULL;
-		      while (RTA_OK (rta, len))
-			{
-			  switch (rta->rta_type)
-			    {
-			    case IFA_LOCAL:
-			      local = RTA_DATA (rta);
-			      break;
-
-			    case IFA_ADDRESS:
-			      address = RTA_DATA (rta);
-			      break;
-			    }
-
-			  rta = RTA_NEXT (rta, len);
-			}
-
 		      struct in6ailist *newp = alloca (sizeof (*newp));
 		      newp->info.flags = (((ifam->ifa_flags & IFA_F_DEPRECATED)
 					   ? in6ai_deprecated : 0)
@@ -200,7 +232,7 @@ make_request (int fd, pid_t pid, bool *seen_ipv4, bool *seen_ipv6,
 
   close_not_cancel_no_status (fd);
 
-  if (in6ailist != NULL)
+  if (*seen_ipv6 && in6ailist != NULL)
     {
       *in6ai = malloc (in6ailistlen * sizeof (**in6ai));
       if (*in6ai == NULL)
diff --git a/sysdeps/unix/sysv/linux/futimes.c b/sysdeps/unix/sysv/linux/futimes.c
index 610f1deb29..272b83eb05 100644
--- a/sysdeps/unix/sysv/linux/futimes.c
+++ b/sysdeps/unix/sysv/linux/futimes.c
@@ -1,5 +1,5 @@
 /* futimes -- change access and modification times of open file.  Linux version.
-   Copyright (C) 2002,2003,2005,2006 Free Software Foundation, Inc.
+   Copyright (C) 2002,2003,2005,2006,2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -29,7 +29,7 @@
 #include <kernel-features.h>
 
 
-#ifndef __ASSUME_UTIMENSAT
+#if defined __NR_utimensat && !defined __ASSUME_UTIMENSAT
 static int miss_utimensat;
 #endif
 
diff --git a/sysdeps/unix/sysv/linux/i386/bits/fcntl.h b/sysdeps/unix/sysv/linux/i386/bits/fcntl.h
index 6de33302ee..83ca3c2861 100644
--- a/sysdeps/unix/sysv/linux/i386/bits/fcntl.h
+++ b/sysdeps/unix/sysv/linux/i386/bits/fcntl.h
@@ -1,5 +1,5 @@
 /* O_*, F_*, FD_* bit values for Linux.
-   Copyright (C) 1995, 1996, 1997, 1998, 2000, 2004, 2006
+   Copyright (C) 1995, 1996, 1997, 1998, 2000, 2004, 2006, 2007
    Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -50,6 +50,7 @@
 # define O_DIRECTORY	0200000	/* Must be a directory.	 */
 # define O_NOFOLLOW	0400000	/* Do not follow links.	 */
 # define O_NOATIME     01000000 /* Do not set atime.  */
+# define O_CLOEXEC     02000000 /* Set close_on_exec.  */
 #endif
 
 /* For now Linux has synchronisity options for data and read operations.
diff --git a/sysdeps/unix/sysv/linux/i386/sysconf.c b/sysdeps/unix/sysv/linux/i386/sysconf.c
index 2ffbd5227b..78877fb2a1 100644
--- a/sysdeps/unix/sysv/linux/i386/sysconf.c
+++ b/sysdeps/unix/sysv/linux/i386/sysconf.c
@@ -1,5 +1,5 @@
 /* Get file-specific information about a file.  Linux version.
-   Copyright (C) 2003, 2004, 2006 Free Software Foundation, Inc.
+   Copyright (C) 2003, 2004, 2006, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -97,11 +97,13 @@ static const struct intel_02_cache_info
     { 0x45, _SC_LEVEL2_CACHE_SIZE, 2097152, 4, 32 },
     { 0x46, _SC_LEVEL3_CACHE_SIZE, 4194304, 4, 64 },
     { 0x47, _SC_LEVEL3_CACHE_SIZE, 8388608, 8, 64 },
+    { 0x48, _SC_LEVEL2_CACHE_SIZE, 3145728, 12, 64 },
     { 0x49, _SC_LEVEL2_CACHE_SIZE, 4194304, 16, 64 },
     { 0x4a, _SC_LEVEL3_CACHE_SIZE, 6291456, 12, 64 },
     { 0x4b, _SC_LEVEL3_CACHE_SIZE, 8388608, 16, 64 },
     { 0x4c, _SC_LEVEL3_CACHE_SIZE, 12582912, 12, 64 },
     { 0x4d, _SC_LEVEL3_CACHE_SIZE, 16777216, 16, 64 },
+    { 0x4e, _SC_LEVEL2_CACHE_SIZE, 6291456, 24, 64 },
     { 0x60, _SC_LEVEL1_DCACHE_SIZE, 16384, 8, 64 },
     { 0x66, _SC_LEVEL1_DCACHE_SIZE, 8192, 4, 64 },
     { 0x67, _SC_LEVEL1_DCACHE_SIZE, 16384, 4, 64 },
diff --git a/sysdeps/unix/sysv/linux/ia64/bits/fcntl.h b/sysdeps/unix/sysv/linux/ia64/bits/fcntl.h
index ed8c2da9e2..8fa96e4e26 100644
--- a/sysdeps/unix/sysv/linux/ia64/bits/fcntl.h
+++ b/sysdeps/unix/sysv/linux/ia64/bits/fcntl.h
@@ -1,5 +1,5 @@
 /* O_*, F_*, FD_* bit values for Linux/IA64.
-   Copyright (C) 1999, 2000, 2004, 2006 Free Software Foundation, Inc.
+   Copyright (C) 1999, 2000, 2004, 2006, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -49,6 +49,7 @@
 # define O_DIRECTORY	0200000	/* must be a directory */
 # define O_NOFOLLOW	0400000 /* don't follow links */
 # define O_NOATIME	01000000 /* Do not set atime.  */
+# define O_CLOEXEC	02000000 /* Set close_on_exec.  */
 #endif
 
 #ifdef __USE_LARGEFILE64
diff --git a/sysdeps/unix/sysv/linux/ia64/sys/ptrace.h b/sysdeps/unix/sysv/linux/ia64/sys/ptrace.h
index ff77627a83..c824be2da7 100644
--- a/sysdeps/unix/sysv/linux/ia64/sys/ptrace.h
+++ b/sysdeps/unix/sysv/linux/ia64/sys/ptrace.h
@@ -1,5 +1,5 @@
 /* `ptrace' debugger support interface.  Linux/ia64 version.
-   Copyright (C) 2001, 2006 Free Software Foundation, Inc.
+   Copyright (C) 2001, 2006, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -127,6 +127,28 @@ struct pt_all_user_regs
     struct ia64_fpreg fr[128];
   };
 
+/* Options set using PTRACE_SETOPTIONS.  */
+enum __ptrace_setoptions {
+  PTRACE_O_TRACESYSGOOD	= 0x00000001,
+  PTRACE_O_TRACEFORK	= 0x00000002,
+  PTRACE_O_TRACEVFORK   = 0x00000004,
+  PTRACE_O_TRACECLONE	= 0x00000008,
+  PTRACE_O_TRACEEXEC	= 0x00000010,
+  PTRACE_O_TRACEVFORKDONE = 0x00000020,
+  PTRACE_O_TRACEEXIT	= 0x00000040,
+  PTRACE_O_MASK		= 0x0000007f
+};
+
+/* Wait extended result codes for the above trace options.  */
+enum __ptrace_eventcodes {
+  PTRACE_EVENT_FORK	= 1,
+  PTRACE_EVENT_VFORK	= 2,
+  PTRACE_EVENT_CLONE	= 3,
+  PTRACE_EVENT_EXEC	= 4,
+  PTRACE_EVENT_VFORK_DONE = 5,
+  PTRACE_EVENT_EXIT	= 6
+};
+
 /* Perform process tracing functions.  REQUEST is one of the values
    above, and determines the action to be taken.
    For all requests except PTRACE_TRACEME, PID specifies the process to be
diff --git a/sysdeps/unix/sysv/linux/kernel-features.h b/sysdeps/unix/sysv/linux/kernel-features.h
index ed32001544..f8116d8885 100644
--- a/sysdeps/unix/sysv/linux/kernel-features.h
+++ b/sysdeps/unix/sysv/linux/kernel-features.h
@@ -1,6 +1,6 @@
 /* Set flags signalling availability of kernel features based on given
    kernel version number.
-   Copyright (C) 1999-2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+   Copyright (C) 1999-2006, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -463,3 +463,13 @@
 #if __LINUX_KERNEL_VERSION >= 0x020616
 # define __ASSUME_UTIMENSAT	1
 #endif
+
+/* Support for private futexes was added in 2.6.22.  */
+#if __LINUX_KERNEL_VERSION >= 0x020616
+# define __ASSUME_PRIVATE_FUTEX	1
+#endif
+
+/* Support for fallocate was added in 2.6.23.  */
+#if __LINUX_KERNEL_VERSION >= 0x020617
+# define __ASSUME_FALLOCATE	1
+#endif
diff --git a/sysdeps/unix/sysv/linux/nscd_setup_thread.c b/sysdeps/unix/sysv/linux/nscd_setup_thread.c
index 1589c24ea9..56e23dc831 100644
--- a/sysdeps/unix/sysv/linux/nscd_setup_thread.c
+++ b/sysdeps/unix/sysv/linux/nscd_setup_thread.c
@@ -4,8 +4,9 @@
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2004.
 
    This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License version 2 as
-   published by the Free Software Foundation.
+   it under the terms of the GNU General Public License as published
+   by the Free Software Foundation; version 2 of the License, or
+   (at your option) any later version.
 
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/sysdeps/unix/sysv/linux/open64.c b/sysdeps/unix/sysv/linux/open64.c
index 5fb5363e1e..c52ce39db1 100644
--- a/sysdeps/unix/sysv/linux/open64.c
+++ b/sysdeps/unix/sysv/linux/open64.c
@@ -1,4 +1,5 @@
-/* Copyright (C) 1991,1995-1997,1999,2000,2002 Free Software Foundation, Inc.
+/* Copyright (C) 1991,1995-1997,1999,2000,2002,2007
+   Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -19,7 +20,7 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <stdarg.h>
-#include <bp-sym.h>
+#include <stdio.h>
 #include <sysdep-cancel.h>
 
 /* Open FILE with access OFLAG.  If OFLAG includes O_CREAT,
@@ -48,6 +49,29 @@ __libc_open64 (const char *file, int oflag, ...)
 
   return result;
 }
-weak_alias (__libc_open64, BP_SYM (__open64))
-libc_hidden_weak (BP_SYM (__open64))
-weak_alias (__libc_open64, BP_SYM (open64))
+weak_alias (__libc_open64, __open64)
+libc_hidden_weak (__open64)
+weak_alias (__libc_open64, open64)
+
+
+#ifndef PTW
+int
+__open64_2 (file, oflag)
+     const char *file;
+     int oflag;
+{
+  if (oflag & O_CREAT)
+    __fortify_fail ("invalid open64 call: O_CREAT without mode");
+
+  if (SINGLE_THREAD_P)
+    return INLINE_SYSCALL (open, 2, file, oflag | O_LARGEFILE);
+
+  int oldtype = LIBC_CANCEL_ASYNC ();
+
+  int result = INLINE_SYSCALL (open, 2, file, oflag | O_LARGEFILE);
+
+  LIBC_CANCEL_RESET (oldtype);
+
+  return result;
+}
+#endif
diff --git a/sysdeps/unix/sysv/linux/open_2.c b/sysdeps/unix/sysv/linux/open_2.c
new file mode 100644
index 0000000000..0a6dc28b43
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/open_2.c
@@ -0,0 +1,32 @@
+/* Copyright (C) 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <fcntl.h>
+#include <stdio.h>
+
+
+int
+__open_2 (file, oflag)
+     const char *file;
+     int oflag;
+{
+  if (oflag & O_CREAT)
+    __fortify_fail ("invalid open call: O_CREAT without mode");
+
+  return __open (file, oflag);
+}
diff --git a/sysdeps/unix/sysv/linux/openat.c b/sysdeps/unix/sysv/linux/openat.c
index df53b6cf2c..45b566f2d0 100644
--- a/sysdeps/unix/sysv/linux/openat.c
+++ b/sysdeps/unix/sysv/linux/openat.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2005, 2006 Free Software Foundation, Inc.
+/* Copyright (C) 2005, 2006, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -30,6 +30,7 @@
 
 #ifndef OPENAT
 # define OPENAT openat
+# define __OPENAT_2 __openat_2
 
 # ifndef __ASSUME_ATFCTS
 /* Set errno after a failed call.  If BUF is not null,
@@ -173,3 +174,18 @@ __OPENAT (fd, file, oflag)
 }
 libc_hidden_def (__OPENAT)
 weak_alias (__OPENAT, OPENAT)
+
+
+int
+__OPENAT_2 (fd, file, oflag)
+     int fd;
+     const char *file;
+     int oflag;
+{
+  if (oflag & O_CREAT)
+#define MSG(s) MSG2 (s)
+#define MSG2(s) "invalid " #s " call: O_CREAT without mode"
+    __fortify_fail (MSG (OPENAT));
+
+  return __OPENAT (fd, file, oflag);
+}
diff --git a/sysdeps/unix/sysv/linux/openat64.c b/sysdeps/unix/sysv/linux/openat64.c
index 9e7a2b3737..013a13effa 100644
--- a/sysdeps/unix/sysv/linux/openat64.c
+++ b/sysdeps/unix/sysv/linux/openat64.c
@@ -1,4 +1,5 @@
 #define OPENAT openat64
+#define __OPENAT_2 __openat64_2
 #define MORE_OFLAGS O_LARGEFILE
 
 #include "openat.c"
diff --git a/sysdeps/unix/sysv/linux/posix_fallocate.c b/sysdeps/unix/sysv/linux/posix_fallocate.c
new file mode 100644
index 0000000000..9cfade60ca
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/posix_fallocate.c
@@ -0,0 +1,60 @@
+/* Copyright (C) 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <fcntl.h>
+#include <kernel-features.h>
+#include <sysdep.h>
+
+#define posix_fallocate static internal_fallocate
+#include <sysdeps/posix/posix_fallocate.c>
+#undef posix_fallocate
+
+#if !defined __ASSUME_FALLOCATE && defined __NR_fallocate
+int __have_fallocate attribute_hidden;
+#endif
+
+
+/* Reserve storage for the data of the file associated with FD.  */
+int
+posix_fallocate (int fd, __off_t offset, __off_t len)
+{
+#ifdef __NR_fallocate
+# ifndef __ASSUME_FALLOCATE
+  if (__builtin_expect (__have_fallocate >= 0, 1))
+# endif
+    {
+      INTERNAL_SYSCALL_DECL (err);
+      int res = INTERNAL_SYSCALL (fallocate, err, 4, fd, 0,
+				  __LONG_LONG_PAIR (offset >> 31, offset),
+				  __LONG_LONG_PAIR (len >> 31, len));
+
+      if (! INTERNAL_SYSCALL_ERROR_P (res, err))
+	return 0;
+
+# ifndef __ASSUME_FALLOCATE
+      if (__builtin_expect (INTERNAL_SYSCALL_ERRNO (res, err) == ENOSYS, 0))
+	__have_fallocate = -1;
+      else
+# endif
+	if (INTERNAL_SYSCALL_ERRNO (res, err) != EOPNOTSUPP)
+	  return INTERNAL_SYSCALL_ERRNO (res, err);
+    }
+#endif
+
+  return internal_fallocate (fd, offset, len);
+}
diff --git a/sysdeps/unix/sysv/linux/posix_fallocate64.c b/sysdeps/unix/sysv/linux/posix_fallocate64.c
new file mode 100644
index 0000000000..c5b8a34494
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/posix_fallocate64.c
@@ -0,0 +1,64 @@
+/* Copyright (C) 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <fcntl.h>
+#include <kernel-features.h>
+#include <sysdep.h>
+
+extern int __posix_fallocate64_l64 (int fd, __off64_t offset, __off64_t len);
+#define __posix_fallocate64_l64 static internal_fallocate64
+#include <sysdeps/posix/posix_fallocate64.c>
+#undef __posix_fallocate64_l64
+
+#if !defined __ASSUME_FALLOCATE && defined __NR_fallocate
+/* Defined in posix_fallocate.c.  */
+extern int __have_fallocate attribute_hidden;
+#endif
+
+
+/* Reserve storage for the data of the file associated with FD.  */
+int
+__posix_fallocate64_l64 (int fd, __off64_t offset, __off64_t len)
+{
+#ifdef __NR_fallocate
+# ifndef __ASSUME_FALLOCATE
+  if (__builtin_expect (__have_fallocate >= 0, 1))
+# endif
+    {
+      INTERNAL_SYSCALL_DECL (err);
+      int res = INTERNAL_SYSCALL (fallocate, err, 6, fd, 0,
+				  __LONG_LONG_PAIR ((long int) (offset >> 32),
+						    (long int) offset),
+				  __LONG_LONG_PAIR ((long int) (len >> 32),
+						    (long int) len));
+
+      if (! INTERNAL_SYSCALL_ERROR_P (res, err))
+	return 0;
+
+# ifndef __ASSUME_FALLOCATE
+      if (__builtin_expect (INTERNAL_SYSCALL_ERRNO (res, err) == ENOSYS, 0))
+	__have_fallocate = -1;
+      else
+# endif
+	if (INTERNAL_SYSCALL_ERRNO (res, err) != EOPNOTSUPP)
+	  return INTERNAL_SYSCALL_ERRNO (res, err);
+    }
+#endif
+
+  return internal_fallocate64 (fd, offset, len);
+}
diff --git a/sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h b/sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h
index c4964e0fd8..68015dbca3 100644
--- a/sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h
+++ b/sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h
@@ -1,5 +1,5 @@
 /* O_*, F_*, FD_* bit values for Linux/PowerPC.
-   Copyright (C) 1995, 1996, 1997, 1998, 2000, 2003, 2004, 2006
+   Copyright (C) 1995, 1996, 1997, 1998, 2000, 2003, 2004, 2006, 2007
    Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -50,6 +50,7 @@
 # define O_DIRECTORY	 040000	/* Must be a directory.	 */
 # define O_NOFOLLOW	0100000	/* Do not follow links.	 */
 # define O_NOATIME	01000000 /* Do not set atime.  */
+# define O_CLOEXEC	02000000 /* Set close_on_exec.  */
 #endif
 
 #ifdef __USE_LARGEFILE64
diff --git a/sysdeps/unix/sysv/linux/powerpc/libc-start.c b/sysdeps/unix/sysv/linux/powerpc/libc-start.c
index 923eab99ef..a71cfa5b06 100644
--- a/sysdeps/unix/sysv/linux/powerpc/libc-start.c
+++ b/sysdeps/unix/sysv/linux/powerpc/libc-start.c
@@ -20,6 +20,7 @@
 #include <stdlib.h>
 #include <unistd.h>
 #include <ldsodefs.h>
+#include <sysdep.h>
 #include <bp-start.h>
 #include <bp-sym.h>
 
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/970/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/970/fpu/Implies
new file mode 100644
index 0000000000..52993ae71b
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/970/fpu/Implies
@@ -0,0 +1,3 @@
+# Make sure this comes before the powerpc/powerpc32/fpu that's
+# listed in unix/sysv/linux/powerpc/powerpc32/fpu/Implies.
+powerpc/powerpc32/970/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/power4/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/power4/fpu/Implies
new file mode 100644
index 0000000000..3c0690e2fe
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/power4/fpu/Implies
@@ -0,0 +1,3 @@
+# Make sure this comes before the powerpc/powerpc32/fpu that's
+# listed in unix/sysv/linux/powerpc/powerpc32/fpu/Implies.
+powerpc/powerpc32/power4/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/power5+/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/power5+/fpu/Implies
new file mode 100644
index 0000000000..37d43bb6fc
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/power5+/fpu/Implies
@@ -0,0 +1,3 @@
+# Make sure this comes before the powerpc/powerpc32/fpu that's
+# listed in unix/sysv/linux/powerpc/powerpc32/fpu/Implies.
+powerpc/powerpc32/power5+/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/power5/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/power5/fpu/Implies
new file mode 100644
index 0000000000..d379a2dd12
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/power5/fpu/Implies
@@ -0,0 +1,3 @@
+# Make sure this comes before the powerpc/powerpc32/fpu that's
+# listed in unix/sysv/linux/powerpc/powerpc32/fpu/Implies.
+powerpc/powerpc32/power5/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/power6/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/power6/fpu/Implies
new file mode 100644
index 0000000000..9fb457db93
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/power6/fpu/Implies
@@ -0,0 +1,3 @@
+# Make sure this comes before the powerpc/powerpc32/fpu that's
+# listed in unix/sysv/linux/powerpc/powerpc32/fpu/Implies.
+powerpc/powerpc32/power6/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/power6x/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/power6x/fpu/Implies
new file mode 100644
index 0000000000..ffca13c0ef
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/power6x/fpu/Implies
@@ -0,0 +1,4 @@
+# Make sure this comes before the powerpc/powerpc32/fpu that's
+# listed in unix/sysv/linux/powerpc/powerpc32/fpu/Implies.
+powerpc/powerpc32/power6x/fpu
+powerpc/powerpc32/power6/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/970/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc64/970/fpu/Implies
new file mode 100644
index 0000000000..558a5fb174
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/970/fpu/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/power4/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/power4/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc64/power4/fpu/Implies
new file mode 100644
index 0000000000..558a5fb174
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/power4/fpu/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/power4/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/power5+/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc64/power5+/fpu/Implies
new file mode 100644
index 0000000000..cf5913dec3
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/power5+/fpu/Implies
@@ -0,0 +1,3 @@
+# Make sure this comes before the powerpc/powerpc64/fpu that's
+# listed in unix/sysv/linux/powerpc/powerpc64/fpu/Implies.
+powerpc/powerpc64/power5+/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/power5/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc64/power5/fpu/Implies
new file mode 100644
index 0000000000..558a5fb174
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/power5/fpu/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/power4/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/power6/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc64/power6/fpu/Implies
new file mode 100644
index 0000000000..9451147267
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/power6/fpu/Implies
@@ -0,0 +1,3 @@
+# Make sure this comes before the powerpc/powerpc64/fpu that's
+# listed in unix/sysv/linux/powerpc/powerpc64/fpu/Implies.
+powerpc/powerpc64/power6/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/power6x/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc64/power6x/fpu/Implies
new file mode 100644
index 0000000000..fd74b8341c
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/power6x/fpu/Implies
@@ -0,0 +1,4 @@
+# Make sure this comes before the powerpc/powerpc64/fpu that's
+# listed in unix/sysv/linux/powerpc/powerpc64/fpu/Implies.
+powerpc/powerpc64/power6x/fpu
+powerpc/powerpc64/power6/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/sys/ptrace.h b/sysdeps/unix/sysv/linux/powerpc/sys/ptrace.h
index 5d055f67fe..23e75fbcff 100644
--- a/sysdeps/unix/sysv/linux/powerpc/sys/ptrace.h
+++ b/sysdeps/unix/sysv/linux/powerpc/sys/ptrace.h
@@ -1,5 +1,5 @@
 /* `ptrace' debugger support interface.  Linux version.
-   Copyright (C) 2001, 2006 Free Software Foundation, Inc.
+   Copyright (C) 2001, 2006, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -99,6 +99,28 @@ enum __ptrace_request
 #define PT_SETSIGINFO PTRACE_SETSIGINFO
 };
 
+/* Options set using PTRACE_SETOPTIONS.  */
+enum __ptrace_setoptions {
+  PTRACE_O_TRACESYSGOOD	= 0x00000001,
+  PTRACE_O_TRACEFORK	= 0x00000002,
+  PTRACE_O_TRACEVFORK   = 0x00000004,
+  PTRACE_O_TRACECLONE	= 0x00000008,
+  PTRACE_O_TRACEEXEC	= 0x00000010,
+  PTRACE_O_TRACEVFORKDONE = 0x00000020,
+  PTRACE_O_TRACEEXIT	= 0x00000040,
+  PTRACE_O_MASK		= 0x0000007f
+};
+
+/* Wait extended result codes for the above trace options.  */
+enum __ptrace_eventcodes {
+  PTRACE_EVENT_FORK	= 1,
+  PTRACE_EVENT_VFORK	= 2,
+  PTRACE_EVENT_CLONE	= 3,
+  PTRACE_EVENT_EXEC	= 4,
+  PTRACE_EVENT_VFORK_DONE = 5,
+  PTRACE_EVENT_EXIT	= 6
+};
+
 /* Perform process tracing functions.  REQUEST is one of the values
    above, and determines the action to be taken.
    For all requests except PTRACE_TRACEME, PID specifies the process to be
diff --git a/sysdeps/unix/sysv/linux/s390/bits/fcntl.h b/sysdeps/unix/sysv/linux/s390/bits/fcntl.h
index c611028f29..848568532f 100644
--- a/sysdeps/unix/sysv/linux/s390/bits/fcntl.h
+++ b/sysdeps/unix/sysv/linux/s390/bits/fcntl.h
@@ -1,5 +1,5 @@
 /* O_*, F_*, FD_* bit values for Linux.
-   Copyright (C) 2000, 2001, 2002, 2004, 2006 Free Software Foundation, Inc.
+   Copyright (C) 2000,2001,2002,2004,2006,2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -50,6 +50,7 @@
 # define O_DIRECTORY	0200000	/* Must be a directory.	 */
 # define O_NOFOLLOW	0400000	/* Do not follow links.	 */
 # define O_NOATIME     01000000 /* Do not set atime.  */
+# define O_CLOEXEC     02000000 /* Set close_on_exec.  */
 #endif
 
 #ifdef __USE_LARGEFILE64
diff --git a/sysdeps/unix/sysv/linux/s390/dl-procinfo.h b/sysdeps/unix/sysv/linux/s390/dl-procinfo.h
new file mode 100644
index 0000000000..3eeb529053
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/s390/dl-procinfo.h
@@ -0,0 +1,43 @@
+/* Linux/s390 version of processor capability information handling macros.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Martin Schwidefsky <schwidefsky@de.ibm.com>, 2006.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdeps/s390/dl-procinfo.h>
+#include <ldsodefs.h>
+
+
+#undef _dl_procinfo
+static inline int
+__attribute__ ((unused))
+_dl_procinfo (int word)
+{
+  /* This table should match the information from arch/s390/kernel/setup.c
+     in the kernel sources.  */
+  int i;
+
+  _dl_printf ("AT_HWCAP:   ");
+
+  for (i = 0; i < _DL_HWCAP_COUNT; ++i)
+    if (word & (1UL << i))
+      _dl_printf (" %s", GLRO(dl_s390_cap_flags)[i]);
+
+  _dl_printf ("\n");
+
+  return 0;
+}
diff --git a/sysdeps/unix/sysv/linux/s390/sys/ptrace.h b/sysdeps/unix/sysv/linux/s390/sys/ptrace.h
index 70eb4f8222..ac186387fc 100644
--- a/sysdeps/unix/sysv/linux/s390/sys/ptrace.h
+++ b/sysdeps/unix/sysv/linux/s390/sys/ptrace.h
@@ -1,5 +1,5 @@
 /* `ptrace' debugger support interface.  Linux version.
-   Copyright (C) 2000, 2006 Free Software Foundation, Inc.
+   Copyright (C) 2000, 2006, 2007 Free Software Foundation, Inc.
    Contributed by Denis Joseph Barrow (djbarrow@de.ibm.com).
    This file is part of the GNU C Library.
 
@@ -138,6 +138,28 @@ enum __ptrace_request
 #define PT_SETSIGINFO PTRACE_SETSIGINFO
 };
 
+/* Options set using PTRACE_SETOPTIONS.  */
+enum __ptrace_setoptions {
+  PTRACE_O_TRACESYSGOOD	= 0x00000001,
+  PTRACE_O_TRACEFORK	= 0x00000002,
+  PTRACE_O_TRACEVFORK   = 0x00000004,
+  PTRACE_O_TRACECLONE	= 0x00000008,
+  PTRACE_O_TRACEEXEC	= 0x00000010,
+  PTRACE_O_TRACEVFORKDONE = 0x00000020,
+  PTRACE_O_TRACEEXIT	= 0x00000040,
+  PTRACE_O_MASK		= 0x0000007f
+};
+
+/* Wait extended result codes for the above trace options.  */
+enum __ptrace_eventcodes {
+  PTRACE_EVENT_FORK	= 1,
+  PTRACE_EVENT_VFORK	= 2,
+  PTRACE_EVENT_CLONE	= 3,
+  PTRACE_EVENT_EXEC	= 4,
+  PTRACE_EVENT_VFORK_DONE = 5,
+  PTRACE_EVENT_EXIT	= 6
+};
+
 /* Perform process tracing functions.  REQUEST is one of the values
    above, and determines the action to be taken.
    For all requests except PTRACE_TRACEME, PID specifies the process to be
diff --git a/sysdeps/unix/sysv/linux/sh/bits/fcntl.h b/sysdeps/unix/sysv/linux/sh/bits/fcntl.h
index 6de33302ee..83ca3c2861 100644
--- a/sysdeps/unix/sysv/linux/sh/bits/fcntl.h
+++ b/sysdeps/unix/sysv/linux/sh/bits/fcntl.h
@@ -1,5 +1,5 @@
 /* O_*, F_*, FD_* bit values for Linux.
-   Copyright (C) 1995, 1996, 1997, 1998, 2000, 2004, 2006
+   Copyright (C) 1995, 1996, 1997, 1998, 2000, 2004, 2006, 2007
    Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -50,6 +50,7 @@
 # define O_DIRECTORY	0200000	/* Must be a directory.	 */
 # define O_NOFOLLOW	0400000	/* Do not follow links.	 */
 # define O_NOATIME     01000000 /* Do not set atime.  */
+# define O_CLOEXEC     02000000 /* Set close_on_exec.  */
 #endif
 
 /* For now Linux has synchronisity options for data and read operations.
diff --git a/sysdeps/unix/sysv/linux/sh/clone.S b/sysdeps/unix/sysv/linux/sh/clone.S
index 7941c6b3ad..f892c475dd 100644
--- a/sysdeps/unix/sysv/linux/sh/clone.S
+++ b/sysdeps/unix/sysv/linux/sh/clone.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 1999, 2000, 2003, 2004 Free Software Foundation, Inc.
+/* Copyright (C) 1999, 2000, 2003, 2004, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -32,12 +32,12 @@
 ENTRY(__clone)
 	/* sanity check arguments.  */
 	tst	r4, r4
-	bf/s	1f
+	bt/s	0f
 	 tst	r5, r5
-	bf/s	1f
-	 mov	#-EINVAL,r0
+	bf	1f
+0:
 	bra	.Lsyscall_error
-	 nop
+	 mov	#-EINVAL,r0
 1:
 	/* insert the args onto the new stack */
 	mov.l	r7, @-r5
diff --git a/sysdeps/unix/sysv/linux/sparc/sys/ptrace.h b/sysdeps/unix/sysv/linux/sparc/sys/ptrace.h
index 17907c4a38..a7b204b33a 100644
--- a/sysdeps/unix/sysv/linux/sparc/sys/ptrace.h
+++ b/sysdeps/unix/sysv/linux/sparc/sys/ptrace.h
@@ -1,5 +1,5 @@
 /* `ptrace' debugger support interface.  Linux/SPARC version.
-   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2006
+   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2006, 2007
    Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -183,6 +183,28 @@ enum __ptrace_request
 #define PT_SETSIGINFO PTRACE_SETSIGINFO
 };
 
+/* Options set using PTRACE_SETOPTIONS.  */
+enum __ptrace_setoptions {
+  PTRACE_O_TRACESYSGOOD	= 0x00000001,
+  PTRACE_O_TRACEFORK	= 0x00000002,
+  PTRACE_O_TRACEVFORK   = 0x00000004,
+  PTRACE_O_TRACECLONE	= 0x00000008,
+  PTRACE_O_TRACEEXEC	= 0x00000010,
+  PTRACE_O_TRACEVFORKDONE = 0x00000020,
+  PTRACE_O_TRACEEXIT	= 0x00000040,
+  PTRACE_O_MASK		= 0x0000007f
+};
+
+/* Wait extended result codes for the above trace options.  */
+enum __ptrace_eventcodes {
+  PTRACE_EVENT_FORK	= 1,
+  PTRACE_EVENT_VFORK	= 2,
+  PTRACE_EVENT_CLONE	= 3,
+  PTRACE_EVENT_EXEC	= 4,
+  PTRACE_EVENT_VFORK_DONE = 5,
+  PTRACE_EVENT_EXIT	= 6
+};
+
 /* Perform process tracing functions.  REQUEST is one of the values
    above, and determines the action to be taken.
    For all requests except PTRACE_TRACEME, PID specifies the process to be
diff --git a/sysdeps/unix/sysv/linux/sys/ptrace.h b/sysdeps/unix/sysv/linux/sys/ptrace.h
index 44284cb307..08658f9764 100644
--- a/sysdeps/unix/sysv/linux/sys/ptrace.h
+++ b/sysdeps/unix/sysv/linux/sys/ptrace.h
@@ -1,5 +1,5 @@
 /* `ptrace' debugger support interface.  Linux version.
-   Copyright (C) 1996-1999,2000,2006 Free Software Foundation, Inc.
+   Copyright (C) 1996-1999,2000,2006,2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -129,6 +129,29 @@ enum __ptrace_request
 #define PT_SETSIGINFO PTRACE_SETSIGINFO
 };
 
+
+/* Options set using PTRACE_SETOPTIONS.  */
+enum __ptrace_setoptions {
+  PTRACE_O_TRACESYSGOOD	= 0x00000001,
+  PTRACE_O_TRACEFORK	= 0x00000002,
+  PTRACE_O_TRACEVFORK   = 0x00000004,
+  PTRACE_O_TRACECLONE	= 0x00000008,
+  PTRACE_O_TRACEEXEC	= 0x00000010,
+  PTRACE_O_TRACEVFORKDONE = 0x00000020,
+  PTRACE_O_TRACEEXIT	= 0x00000040,
+  PTRACE_O_MASK		= 0x0000007f
+};
+
+/* Wait extended result codes for the above trace options.  */
+enum __ptrace_eventcodes {
+  PTRACE_EVENT_FORK	= 1,
+  PTRACE_EVENT_VFORK	= 2,
+  PTRACE_EVENT_CLONE	= 3,
+  PTRACE_EVENT_EXEC	= 4,
+  PTRACE_EVENT_VFORK_DONE = 5,
+  PTRACE_EVENT_EXIT	= 6
+};
+
 /* Perform process tracing functions.  REQUEST is one of the values
    above, and determines the action to be taken.
    For all requests except PTRACE_TRACEME, PID specifies the process to be
diff --git a/sysdeps/unix/sysv/linux/wordsize-64/posix_fallocate.c b/sysdeps/unix/sysv/linux/wordsize-64/posix_fallocate.c
new file mode 100644
index 0000000000..151174b472
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/wordsize-64/posix_fallocate.c
@@ -0,0 +1,59 @@
+/* Copyright (C) 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <fcntl.h>
+#include <kernel-features.h>
+#include <sysdep.h>
+
+#define posix_fallocate static internal_fallocate
+#include <sysdeps/posix/posix_fallocate.c>
+#undef posix_fallocate
+
+#if !defined __ASSUME_FALLOCATE && defined __NR_fallocate
+static int __have_fallocate;
+#endif
+
+
+/* Reserve storage for the data of the file associated with FD.  */
+int
+posix_fallocate (int fd, __off_t offset, __off_t len)
+{
+#ifdef __NR_fallocate
+# ifndef __ASSUME_FALLOCATE
+  if (__builtin_expect (__have_fallocate >= 0, 1))
+# endif
+    {
+      INTERNAL_SYSCALL_DECL (err);
+      int res = INTERNAL_SYSCALL (fallocate, err, 4, fd, 0, offset, len);
+
+      if (! INTERNAL_SYSCALL_ERROR_P (res, err))
+	return 0;
+
+# ifndef __ASSUME_FALLOCATE
+      if (__builtin_expect (INTERNAL_SYSCALL_ERRNO (res, err) == ENOSYS, 0))
+	__have_fallocate = -1;
+      else
+# endif
+	if (INTERNAL_SYSCALL_ERRNO (res, err) != EOPNOTSUPP)
+	  return INTERNAL_SYSCALL_ERRNO (res, err);
+    }
+#endif
+
+  return internal_fallocate (fd, offset, len);
+}
+strong_alias (posix_fallocate, posix_fallocate64)
diff --git a/sysdeps/unix/sysv/linux/wordsize-64/posix_fallocate64.c b/sysdeps/unix/sysv/linux/wordsize-64/posix_fallocate64.c
new file mode 100644
index 0000000000..f466f13e45
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/wordsize-64/posix_fallocate64.c
@@ -0,0 +1 @@
+/* posix_fallocate64 is in posix_fallocate.c */
diff --git a/sysdeps/unix/sysv/linux/x86_64/bits/fcntl.h b/sysdeps/unix/sysv/linux/x86_64/bits/fcntl.h
index fa1d02bc1f..a918a0725b 100644
--- a/sysdeps/unix/sysv/linux/x86_64/bits/fcntl.h
+++ b/sysdeps/unix/sysv/linux/x86_64/bits/fcntl.h
@@ -1,5 +1,5 @@
 /* O_*, F_*, FD_* bit values for Linux/x86-64.
-   Copyright (C) 2001, 2002, 2004, 2006 Free Software Foundation, Inc.
+   Copyright (C) 2001, 2002, 2004, 2006, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -50,6 +50,7 @@
 # define O_DIRECTORY	0200000	/* Must be a directory.	 */
 # define O_NOFOLLOW	0400000	/* Do not follow links.	 */
 # define O_NOATIME     01000000 /* Do not set atime.  */
+# define O_CLOEXEC     02000000 /* Set close_on_exec.  */
 #endif
 
 /* For now Linux has synchronisity options for data and read operations.
diff --git a/sysdeps/x86_64/cacheinfo.c b/sysdeps/x86_64/cacheinfo.c
index f8217a1757..8855b6d45f 100644
--- a/sysdeps/x86_64/cacheinfo.c
+++ b/sysdeps/x86_64/cacheinfo.c
@@ -55,11 +55,13 @@ static const struct intel_02_cache_info
     { 0x45, _SC_LEVEL2_CACHE_SIZE,  2097152,  4, 32 },
     { 0x46, _SC_LEVEL3_CACHE_SIZE,  4194304,  4, 64 },
     { 0x47, _SC_LEVEL3_CACHE_SIZE,  8388608,  8, 64 },
+    { 0x48, _SC_LEVEL2_CACHE_SIZE,  3145728, 12, 64 },
     { 0x49, _SC_LEVEL2_CACHE_SIZE,  4194304, 16, 64 },
     { 0x4a, _SC_LEVEL3_CACHE_SIZE,  6291456, 12, 64 },
     { 0x4b, _SC_LEVEL3_CACHE_SIZE,  8388608, 16, 64 },
     { 0x4c, _SC_LEVEL3_CACHE_SIZE, 12582912, 12, 64 },
     { 0x4d, _SC_LEVEL3_CACHE_SIZE, 16777216, 16, 64 },
+    { 0x4e, _SC_LEVEL2_CACHE_SIZE,  6291456, 24, 64 },
     { 0x60, _SC_LEVEL1_DCACHE_SIZE,   16384,  8, 64 },
     { 0x66, _SC_LEVEL1_DCACHE_SIZE,    8192,  4, 64 },
     { 0x67, _SC_LEVEL1_DCACHE_SIZE,   16384,  4, 64 },
diff --git a/sysdeps/x86_64/sched_cpucount.c b/sysdeps/x86_64/sched_cpucount.c
new file mode 100644
index 0000000000..63d84a62e8
--- /dev/null
+++ b/sysdeps/x86_64/sched_cpucount.c
@@ -0,0 +1,26 @@
+/* Copyright (C) 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifdef __amdfam10
+# define POPCNT(l) \
+  ({ __cpu_mask r;							      \
+     asm ("popcntq %1, %0" : "=r" (r) : "0" (l));			      \
+     r; })
+#endif
+
+#include <posix/sched_cpucount.c>
author	Jakub Jelinek <jakub@redhat.com>	2007-07-31 17:46:17 +0000
committer	Jakub Jelinek <jakub@redhat.com>	2007-07-31 17:46:17 +0000
commit	8833066b122427710a9e14a888ce6cfa862332d3 (patch)
tree	29591019d695919417b3698618d6a342e97381d6 /sysdeps
parent	fedca46896bdb702cb988837a0c2c5447e72ba2b (diff)