summaryrefslogtreecommitdiff
path: root/sysdeps
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2007-07-31 17:46:17 +0000
committerJakub Jelinek <jakub@redhat.com>2007-07-31 17:46:17 +0000
commit8833066b122427710a9e14a888ce6cfa862332d3 (patch)
tree29591019d695919417b3698618d6a342e97381d6 /sysdeps
parentfedca46896bdb702cb988837a0c2c5447e72ba2b (diff)
Updated to fedora-glibc-20070731T1624cvs/fedora-glibc-2_6_90-1
Diffstat (limited to 'sysdeps')
-rw-r--r--sysdeps/generic/_G_config.h20
-rw-r--r--sysdeps/generic/initfini.c4
-rw-r--r--sysdeps/gnu/_G_config.h20
-rw-r--r--sysdeps/ia64/sched_cpucount.c21
-rw-r--r--sysdeps/ieee754/ldbl-96/s_roundl.c4
-rw-r--r--sysdeps/mach/hurd/_G_config.h20
-rw-r--r--sysdeps/mach/hurd/bits/fcntl.h4
-rw-r--r--sysdeps/mach/hurd/i386/tls.h6
-rw-r--r--sysdeps/mach/hurd/sigaction.c6
-rw-r--r--sysdeps/mach/hurd/sigsuspend.c5
-rw-r--r--sysdeps/mach/hurd/tls.h4
-rw-r--r--sysdeps/mach/i386/sysdep.h8
-rw-r--r--sysdeps/posix/posix_fallocate64.c3
-rw-r--r--sysdeps/powerpc/powerpc32/970/Implies1
-rw-r--r--sysdeps/powerpc/powerpc32/970/fpu/Implies1
-rw-r--r--sysdeps/powerpc/powerpc32/power4/Makefile6
-rw-r--r--sysdeps/powerpc/powerpc32/power4/fpu/Makefile5
-rw-r--r--sysdeps/powerpc/powerpc32/power4/fpu/mpa.c549
-rw-r--r--sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S47
-rw-r--r--sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S39
-rw-r--r--sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S97
-rw-r--r--sysdeps/powerpc/powerpc32/power4/fpu/s_llroundf.S1
-rw-r--r--sysdeps/powerpc/powerpc32/power4/fpu/slowexp.c66
-rw-r--r--sysdeps/powerpc/powerpc32/power4/fpu/slowpow.c94
-rw-r--r--sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.c62
-rw-r--r--sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.c60
-rw-r--r--sysdeps/powerpc/powerpc32/power4/memcmp.S986
-rw-r--r--sysdeps/powerpc/powerpc32/power4/memcopy.h113
-rw-r--r--sysdeps/powerpc/powerpc32/power4/memcpy.S426
-rw-r--r--sysdeps/powerpc/powerpc32/power4/memset.S229
-rw-r--r--sysdeps/powerpc/powerpc32/power4/strncmp.S176
-rw-r--r--sysdeps/powerpc/powerpc32/power4/wordcopy.c209
-rw-r--r--sysdeps/powerpc/powerpc32/power5+/Implies1
-rw-r--r--sysdeps/powerpc/powerpc32/power5+/fpu/Implies1
-rw-r--r--sysdeps/powerpc/powerpc32/power5+/fpu/s_ceil.S37
-rw-r--r--sysdeps/powerpc/powerpc32/power5+/fpu/s_ceilf.S30
-rw-r--r--sysdeps/powerpc/powerpc32/power5+/fpu/s_floor.S37
-rw-r--r--sysdeps/powerpc/powerpc32/power5+/fpu/s_floorf.S30
-rw-r--r--sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S60
-rw-r--r--sysdeps/powerpc/powerpc32/power5+/fpu/s_llroundf.S2
-rw-r--r--sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S58
-rw-r--r--sysdeps/powerpc/powerpc32/power5+/fpu/s_round.S37
-rw-r--r--sysdeps/powerpc/powerpc32/power5+/fpu/s_roundf.S30
-rw-r--r--sysdeps/powerpc/powerpc32/power5+/fpu/s_trunc.S37
-rw-r--r--sysdeps/powerpc/powerpc32/power5+/fpu/s_truncf.S30
-rw-r--r--sysdeps/powerpc/powerpc32/power5/Implies1
-rw-r--r--sysdeps/powerpc/powerpc32/power5/fpu/Implies1
-rw-r--r--sysdeps/powerpc/powerpc32/power6/Implies2
-rw-r--r--sysdeps/powerpc/powerpc32/power6/fpu/Implies2
-rw-r--r--sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S47
-rw-r--r--sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S39
-rw-r--r--sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S60
-rw-r--r--sysdeps/powerpc/powerpc32/power6/fpu/s_llroundf.S2
-rw-r--r--sysdeps/powerpc/powerpc32/power6/memcpy.S843
-rw-r--r--sysdeps/powerpc/powerpc32/power6/memset.S542
-rw-r--r--sysdeps/powerpc/powerpc32/power6/wordcopy.c287
-rw-r--r--sysdeps/powerpc/powerpc32/power6x/Implies3
-rw-r--r--sysdeps/powerpc/powerpc32/power6x/fpu/Implies3
-rw-r--r--sysdeps/powerpc/powerpc32/power6x/fpu/s_lrint.S42
-rw-r--r--sysdeps/powerpc/powerpc32/power6x/fpu/s_lround.S52
-rw-r--r--sysdeps/powerpc/powerpc64/970/Implies1
-rw-r--r--sysdeps/powerpc/powerpc64/970/fpu/Implies1
-rw-r--r--sysdeps/powerpc/powerpc64/power4/Makefile6
-rw-r--r--sysdeps/powerpc/powerpc64/power4/fpu/Makefile5
-rw-r--r--sysdeps/powerpc/powerpc64/power4/fpu/mpa.c549
-rw-r--r--sysdeps/powerpc/powerpc64/power4/fpu/slowexp.c66
-rw-r--r--sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c94
-rw-r--r--sysdeps/powerpc/powerpc64/power4/fpu/w_sqrt.c62
-rw-r--r--sysdeps/powerpc/powerpc64/power4/fpu/w_sqrtf.c60
-rw-r--r--sysdeps/powerpc/powerpc64/power4/memcmp.S982
-rw-r--r--sysdeps/powerpc/powerpc64/power4/memcopy.h1
-rw-r--r--sysdeps/powerpc/powerpc64/power4/memcpy.S418
-rw-r--r--sysdeps/powerpc/powerpc64/power4/memset.S275
-rw-r--r--sysdeps/powerpc/powerpc64/power4/strncmp.S180
-rw-r--r--sysdeps/powerpc/powerpc64/power4/wordcopy.c1
-rw-r--r--sysdeps/powerpc/powerpc64/power5+/Implies1
-rw-r--r--sysdeps/powerpc/powerpc64/power5+/fpu/Implies1
-rw-r--r--sysdeps/powerpc/powerpc64/power5+/fpu/s_ceil.S38
-rw-r--r--sysdeps/powerpc/powerpc64/power5+/fpu/s_ceilf.S31
-rw-r--r--sysdeps/powerpc/powerpc64/power5+/fpu/s_floor.S38
-rw-r--r--sysdeps/powerpc/powerpc64/power5+/fpu/s_floorf.S31
-rw-r--r--sysdeps/powerpc/powerpc64/power5+/fpu/s_llround.S59
-rw-r--r--sysdeps/powerpc/powerpc64/power5+/fpu/s_round.S38
-rw-r--r--sysdeps/powerpc/powerpc64/power5+/fpu/s_roundf.S31
-rw-r--r--sysdeps/powerpc/powerpc64/power5+/fpu/s_trunc.S38
-rw-r--r--sysdeps/powerpc/powerpc64/power5+/fpu/s_truncf.S31
-rw-r--r--sysdeps/powerpc/powerpc64/power5/Implies1
-rw-r--r--sysdeps/powerpc/powerpc64/power5/fpu/Implies1
-rw-r--r--sysdeps/powerpc/powerpc64/power6/Implies1
-rw-r--r--sysdeps/powerpc/powerpc64/power6/fpu/Implies2
-rw-r--r--sysdeps/powerpc/powerpc64/power6/memcpy.S1170
-rw-r--r--sysdeps/powerpc/powerpc64/power6/memset.S419
-rw-r--r--sysdeps/powerpc/powerpc64/power6/wordcopy.c410
-rw-r--r--sysdeps/powerpc/powerpc64/power6x/Implies2
-rw-r--r--sysdeps/powerpc/powerpc64/power6x/fpu/Implies3
-rw-r--r--sysdeps/powerpc/powerpc64/power6x/fpu/s_llrint.S45
-rw-r--r--sysdeps/powerpc/powerpc64/power6x/fpu/s_llround.S55
-rw-r--r--sysdeps/powerpc/sched_cpucount.c23
-rw-r--r--sysdeps/s390/dl-procinfo.c80
-rw-r--r--sysdeps/s390/dl-procinfo.h99
-rw-r--r--sysdeps/sh/bsd-_setjmp.S6
-rw-r--r--sysdeps/sh/bsd-setjmp.S6
-rw-r--r--sysdeps/unix/clock_gettime.c12
-rw-r--r--sysdeps/unix/sysv/linux/Makefile2
-rw-r--r--sysdeps/unix/sysv/linux/bits/sched.h78
-rw-r--r--sysdeps/unix/sysv/linux/bits/socket.h15
-rw-r--r--sysdeps/unix/sysv/linux/check_pf.c80
-rw-r--r--sysdeps/unix/sysv/linux/futimes.c4
-rw-r--r--sysdeps/unix/sysv/linux/i386/bits/fcntl.h3
-rw-r--r--sysdeps/unix/sysv/linux/i386/sysconf.c4
-rw-r--r--sysdeps/unix/sysv/linux/ia64/bits/fcntl.h3
-rw-r--r--sysdeps/unix/sysv/linux/ia64/sys/ptrace.h24
-rw-r--r--sysdeps/unix/sysv/linux/kernel-features.h12
-rw-r--r--sysdeps/unix/sysv/linux/nscd_setup_thread.c5
-rw-r--r--sysdeps/unix/sysv/linux/open64.c34
-rw-r--r--sysdeps/unix/sysv/linux/open_2.c32
-rw-r--r--sysdeps/unix/sysv/linux/openat.c18
-rw-r--r--sysdeps/unix/sysv/linux/openat64.c1
-rw-r--r--sysdeps/unix/sysv/linux/posix_fallocate.c60
-rw-r--r--sysdeps/unix/sysv/linux/posix_fallocate64.c64
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h3
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/libc-start.c1
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/powerpc32/970/fpu/Implies3
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/powerpc32/power4/fpu/Implies3
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/powerpc32/power5+/fpu/Implies3
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/powerpc32/power5/fpu/Implies3
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/powerpc32/power6/fpu/Implies3
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/powerpc32/power6x/fpu/Implies4
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/powerpc64/970/fpu/Implies1
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/powerpc64/power4/fpu/Implies1
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/powerpc64/power5+/fpu/Implies3
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/powerpc64/power5/fpu/Implies1
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/powerpc64/power6/fpu/Implies3
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/powerpc64/power6x/fpu/Implies4
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/sys/ptrace.h24
-rw-r--r--sysdeps/unix/sysv/linux/s390/bits/fcntl.h3
-rw-r--r--sysdeps/unix/sysv/linux/s390/dl-procinfo.h43
-rw-r--r--sysdeps/unix/sysv/linux/s390/sys/ptrace.h24
-rw-r--r--sysdeps/unix/sysv/linux/sh/bits/fcntl.h3
-rw-r--r--sysdeps/unix/sysv/linux/sh/clone.S10
-rw-r--r--sysdeps/unix/sysv/linux/sparc/sys/ptrace.h24
-rw-r--r--sysdeps/unix/sysv/linux/sys/ptrace.h25
-rw-r--r--sysdeps/unix/sysv/linux/wordsize-64/posix_fallocate.c59
-rw-r--r--sysdeps/unix/sysv/linux/wordsize-64/posix_fallocate64.c1
-rw-r--r--sysdeps/unix/sysv/linux/x86_64/bits/fcntl.h3
-rw-r--r--sysdeps/x86_64/cacheinfo.c2
-rw-r--r--sysdeps/x86_64/sched_cpucount.c26
147 files changed, 11575 insertions, 128 deletions
diff --git a/sysdeps/generic/_G_config.h b/sysdeps/generic/_G_config.h
index a152b070c6..4aafe65f6b 100644
--- a/sysdeps/generic/_G_config.h
+++ b/sysdeps/generic/_G_config.h
@@ -8,19 +8,15 @@
#include <bits/types.h>
#define __need_size_t
-#define __need_wchar_t
-#define __need_wint_t
+#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
+# define __need_wchar_t
+#endif
#define __need_NULL
#include <stddef.h>
-#ifndef _WINT_T
-/* Integral type unchanged by default argument promotions that can
- hold any value corresponding to members of the extended character
- set, as well as at least one value that does not correspond to any
- member of the extended character set. */
-# define _WINT_T
-typedef unsigned int wint_t;
-#endif
#define __need_mbstate_t
+#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
+# define __need_wint_t
+#endif
#include <wchar.h>
#define _G_size_t size_t
typedef struct
@@ -41,7 +37,8 @@ typedef struct
#define _G_wchar_t wchar_t
#define _G_wint_t wint_t
#define _G_stat64 stat
-#include <gconv.h>
+#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
+# include <gconv.h>
typedef union
{
struct __gconv_info __cd;
@@ -51,6 +48,7 @@ typedef union
struct __gconv_step_data __data;
} __combined;
} _G_iconv_t;
+#endif
typedef int _G_int16_t __attribute__ ((__mode__ (__HI__)));
typedef int _G_int32_t __attribute__ ((__mode__ (__SI__)));
diff --git a/sysdeps/generic/initfini.c b/sysdeps/generic/initfini.c
index 2b8412a428..d5ef778367 100644
--- a/sysdeps/generic/initfini.c
+++ b/sysdeps/generic/initfini.c
@@ -81,7 +81,7 @@ call_gmon_start(void)
}
SECTION (".init");
-extern void _init (void);
+extern void __attribute__ ((section (".init"))) _init (void);
void
_init (void)
{
@@ -107,7 +107,7 @@ asm ("\n/*@_init_EPILOG_ENDS*/");
asm ("\n/*@_fini_PROLOG_BEGINS*/");
SECTION (".fini");
-extern void _fini (void);
+extern void __attribute__ ((section (".fini"))) _fini (void);
void
_fini (void)
{
diff --git a/sysdeps/gnu/_G_config.h b/sysdeps/gnu/_G_config.h
index 83c78f0b93..211d0224b1 100644
--- a/sysdeps/gnu/_G_config.h
+++ b/sysdeps/gnu/_G_config.h
@@ -8,19 +8,15 @@
#include <bits/types.h>
#define __need_size_t
-#define __need_wchar_t
-#define __need_wint_t
+#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
+# define __need_wchar_t
+#endif
#define __need_NULL
#include <stddef.h>
-#ifndef _WINT_T
-/* Integral type unchanged by default argument promotions that can
- hold any value corresponding to members of the extended character
- set, as well as at least one value that does not correspond to any
- member of the extended character set. */
-# define _WINT_T
-typedef unsigned int wint_t;
-#endif
#define __need_mbstate_t
+#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
+# define __need_wint_t
+#endif
#include <wchar.h>
#define _G_size_t size_t
typedef struct
@@ -41,7 +37,8 @@ typedef struct
#define _G_wchar_t wchar_t
#define _G_wint_t wint_t
#define _G_stat64 stat64
-#include <gconv.h>
+#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
+# include <gconv.h>
typedef union
{
struct __gconv_info __cd;
@@ -51,6 +48,7 @@ typedef union
struct __gconv_step_data __data;
} __combined;
} _G_iconv_t;
+#endif
typedef int _G_int16_t __attribute__ ((__mode__ (__HI__)));
typedef int _G_int32_t __attribute__ ((__mode__ (__SI__)));
diff --git a/sysdeps/ia64/sched_cpucount.c b/sysdeps/ia64/sched_cpucount.c
new file mode 100644
index 0000000000..ff9d8cf2a8
--- /dev/null
+++ b/sysdeps/ia64/sched_cpucount.c
@@ -0,0 +1,21 @@
+/* Copyright (C) 2007 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#define POPCNT(l) __builtin_popcountl (l)
+
+#include <posix/sched_cpucount.c>
diff --git a/sysdeps/ieee754/ldbl-96/s_roundl.c b/sysdeps/ieee754/ldbl-96/s_roundl.c
index 672536d358..f1399cc209 100644
--- a/sysdeps/ieee754/ldbl-96/s_roundl.c
+++ b/sysdeps/ieee754/ldbl-96/s_roundl.c
@@ -1,5 +1,5 @@
/* Round long double to integer away from zero.
- Copyright (C) 1997 Free Software Foundation, Inc.
+ Copyright (C) 1997, 2007 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
@@ -23,7 +23,7 @@
#include "math_private.h"
-static const long double huge = 1.0e4930;
+static const long double huge = 1.0e4930L;
long double
diff --git a/sysdeps/mach/hurd/_G_config.h b/sysdeps/mach/hurd/_G_config.h
index b643059234..db959246ee 100644
--- a/sysdeps/mach/hurd/_G_config.h
+++ b/sysdeps/mach/hurd/_G_config.h
@@ -8,19 +8,15 @@
#include <bits/types.h>
#define __need_size_t
-#define __need_wchar_t
-#define __need_wint_t
+#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
+# define __need_wchar_t
+#endif
#define __need_NULL
#include <stddef.h>
-#ifndef _WINT_T
-/* Integral type unchanged by default argument promotions that can
- hold any value corresponding to members of the extended character
- set, as well as at least one value that does not correspond to any
- member of the extended character set. */
-# define _WINT_T
-typedef unsigned int wint_t;
-#endif
#define __need_mbstate_t
+#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
+# define __need_wint_t
+#endif
#include <wchar.h>
#define _G_size_t size_t
typedef struct
@@ -41,7 +37,8 @@ typedef struct
#define _G_wchar_t wchar_t
#define _G_wint_t wint_t
#define _G_stat64 stat64
-#include <gconv.h>
+#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
+# include <gconv.h>
typedef union
{
struct __gconv_info __cd;
@@ -51,6 +48,7 @@ typedef union
struct __gconv_step_data __data;
} __combined;
} _G_iconv_t;
+#endif
typedef int _G_int16_t __attribute__ ((__mode__ (__HI__)));
typedef int _G_int32_t __attribute__ ((__mode__ (__SI__)));
diff --git a/sysdeps/mach/hurd/bits/fcntl.h b/sysdeps/mach/hurd/bits/fcntl.h
index e709cc6fb1..34e6baa049 100644
--- a/sysdeps/mach/hurd/bits/fcntl.h
+++ b/sysdeps/mach/hurd/bits/fcntl.h
@@ -1,5 +1,6 @@
/* O_*, F_*, FD_* bit values for GNU.
- Copyright (C) 1993,94,96,97,98,99,2000,01,04 Free Software Foundation, Inc.
+ Copyright (C) 1993,1994,1996,1997,1998,1999,2000,2001,2004,2007
+ Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -70,6 +71,7 @@
#define O_SYNC O_FSYNC
#ifdef __USE_GNU
# define O_NOATIME 0x0800 /* Don't set access time on read (owner). */
+# define O_CLOEXEC 0x00010000 /* Set FD_CLOEXEC. */
#endif
#ifdef __USE_MISC
# define O_SHLOCK 0x00020000 /* Open with shared file lock. */
diff --git a/sysdeps/mach/hurd/i386/tls.h b/sysdeps/mach/hurd/i386/tls.h
index 972cac57a1..d98b485b96 100644
--- a/sysdeps/mach/hurd/i386/tls.h
+++ b/sysdeps/mach/hurd/i386/tls.h
@@ -1,5 +1,5 @@
/* Definitions for thread-local data handling. Hurd/i386 version.
- Copyright (C) 2003, 2004, 2006 Free Software Foundation, Inc.
+ Copyright (C) 2003, 2004, 2006, 2007 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -29,7 +29,7 @@
thread pointer points to is unspecified. Allocate the TCB there. */
# define TLS_TCB_AT_TP 1
-# ifndef ASSEMBLER
+# ifndef __ASSEMBLER__
/* Use i386-specific RPCs to arrange that %gs segment register prefix
addresses the TCB in each thread. */
@@ -165,7 +165,7 @@ _hurd_tls_fork (thread_t child, struct i386_thread_state *state)
return err;
}
-# endif /* !ASSEMBLER */
+# endif /* !__ASSEMBLER__ */
#endif /* HAVE_TLS_SUPPORT */
#endif /* i386/tls.h */
diff --git a/sysdeps/mach/hurd/sigaction.c b/sysdeps/mach/hurd/sigaction.c
index 3dc530955d..fe452e8283 100644
--- a/sysdeps/mach/hurd/sigaction.c
+++ b/sysdeps/mach/hurd/sigaction.c
@@ -1,4 +1,6 @@
-/* Copyright (C) 1991,92,93,94,95,96,97,2002 Free Software Foundation, Inc.
+/* Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 2002, 2007
+ Free Software Foundation, Inc.
+
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -68,7 +70,7 @@ __sigaction (sig, act, oact)
__spin_lock (&ss->lock);
pending = ss->pending & ~ss->blocked;
}
- else if (a.sa_handler == SIG_IGN || a.sa_handler == SIG_DFL)
+ else if (act != NULL && (a.sa_handler == SIG_IGN || a.sa_handler == SIG_DFL))
/* We are changing to an action that might be to ignore SIG signals.
If SIG is blocked and pending and the new action is to ignore it, we
must remove it from the pending set now; if the action is changed
diff --git a/sysdeps/mach/hurd/sigsuspend.c b/sysdeps/mach/hurd/sigsuspend.c
index 96b3857c72..7e32472ce3 100644
--- a/sysdeps/mach/hurd/sigsuspend.c
+++ b/sysdeps/mach/hurd/sigsuspend.c
@@ -1,4 +1,6 @@
-/* Copyright (C) 1991,92,93,94,95,96,97,98,2002 Free Software Foundation, Inc.
+/* Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 2002, 2007
+ Free Software Foundation, Inc.
+
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -80,4 +82,5 @@ __sigsuspend (set)
return -1;
}
libc_hidden_def (__sigsuspend)
+strong_alias (__sigsuspend, sigsuspend_not_cancel)
weak_alias (__sigsuspend, sigsuspend)
diff --git a/sysdeps/mach/hurd/tls.h b/sysdeps/mach/hurd/tls.h
index cce42ef374..8ad3f1a28b 100644
--- a/sysdeps/mach/hurd/tls.h
+++ b/sysdeps/mach/hurd/tls.h
@@ -1,5 +1,5 @@
/* Definitions for thread-local data handling. Hurd version.
- Copyright (C) 2003, 2005 Free Software Foundation, Inc.
+ Copyright (C) 2003, 2005, 2007 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -20,7 +20,7 @@
#ifndef _TLS_H
#define _TLS_H
-#if defined HAVE_TLS_SUPPORT && !defined ASSEMBLER
+#if defined HAVE_TLS_SUPPORT && !defined __ASSEMBLER__
# include <stddef.h>
# include <stdbool.h>
diff --git a/sysdeps/mach/i386/sysdep.h b/sysdeps/mach/i386/sysdep.h
index 1f138f4759..4fc5d50f3d 100644
--- a/sysdeps/mach/i386/sysdep.h
+++ b/sysdeps/mach/i386/sysdep.h
@@ -1,4 +1,6 @@
-/* Copyright (C) 1991, 92, 93, 94, 95, 96, 97 Free Software Foundation, Inc.
+/* Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 2007
+ Free Software Foundation, Inc.
+
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,6 +18,10 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
+/* Defines RTLD_PRIVATE_ERRNO and USE_DL_SYSINFO. */
+#include <dl-sysdep.h>
+#include <tls.h>
+
#define LOSE asm volatile ("hlt")
#define SNARF_ARGS(entry_sp, argc, argv, envp) \
diff --git a/sysdeps/posix/posix_fallocate64.c b/sysdeps/posix/posix_fallocate64.c
index 64ca9ae83d..07e2a00bf5 100644
--- a/sysdeps/posix/posix_fallocate64.c
+++ b/sysdeps/posix/posix_fallocate64.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2000, 2003, 2004, 2005 Free Software Foundation, Inc.
+/* Copyright (C) 2000, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -93,6 +93,7 @@ __posix_fallocate64_l64 (int fd, __off64_t offset, __off64_t len)
return 0;
}
+#undef __posix_fallocate64_l64
#include <shlib-compat.h>
#include <bits/wordsize.h>
diff --git a/sysdeps/powerpc/powerpc32/970/Implies b/sysdeps/powerpc/powerpc32/970/Implies
new file mode 100644
index 0000000000..4e3a983426
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/970/Implies
@@ -0,0 +1 @@
+powerpc/powerpc32/power4
diff --git a/sysdeps/powerpc/powerpc32/970/fpu/Implies b/sysdeps/powerpc/powerpc32/970/fpu/Implies
new file mode 100644
index 0000000000..7c16e45c29
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/970/fpu/Implies
@@ -0,0 +1 @@
+powerpc/powerpc32/power4/fpu
diff --git a/sysdeps/powerpc/powerpc32/power4/Makefile b/sysdeps/powerpc/powerpc32/power4/Makefile
new file mode 100644
index 0000000000..60aa508ba4
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/Makefile
@@ -0,0 +1,6 @@
+# Makefile fragment for POWER4/5/5+.
+
+ifeq ($(subdir),string)
+CFLAGS-wordcopy.c += --param max-variable-expansions-in-unroller=2 --param max-unroll-times=2 -funroll-loops -fpeel-loops -ftree-loop-linear
+CFLAGS-memmove.c += --param max-variable-expansions-in-unroller=2 --param max-unroll-times=2 -funroll-loops -fpeel-loops -ftree-loop-linear
+endif
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/Makefile b/sysdeps/powerpc/powerpc32/power4/fpu/Makefile
new file mode 100644
index 0000000000..a6fa75ecbc
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/Makefile
@@ -0,0 +1,5 @@
+# Makefile fragment for POWER4/5/5+ with FPU.
+
+ifeq ($(subdir),math)
+CFLAGS-mpa.c += --param max-unroll-times=4 -funroll-loops -fpeel-loops -ftree-loop-linear
+endif
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c b/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c
new file mode 100644
index 0000000000..4a232e27bf
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c
@@ -0,0 +1,549 @@
+
+/*
+ * IBM Accurate Mathematical Library
+ * written by International Business Machines Corp.
+ * Copyright (C) 2001, 2006 Free Software Foundation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+/************************************************************************/
+/* MODULE_NAME: mpa.c */
+/* */
+/* FUNCTIONS: */
+/* mcr */
+/* acr */
+/* cr */
+/* cpy */
+/* cpymn */
+/* norm */
+/* denorm */
+/* mp_dbl */
+/* dbl_mp */
+/* add_magnitudes */
+/* sub_magnitudes */
+/* add */
+/* sub */
+/* mul */
+/* inv */
+/* dvd */
+/* */
+/* Arithmetic functions for multiple precision numbers. */
+/* Relative errors are bounded */
+/************************************************************************/
+
+
+#include "endian.h"
+#include "mpa.h"
+#include "mpa2.h"
+#include <sys/param.h> /* For MIN() */
+/* mcr() compares the sizes of the mantissas of two multiple precision */
+/* numbers. Mantissas are compared regardless of the signs of the */
+/* numbers, even if x->d[0] or y->d[0] are zero. Exponents are also */
+/* disregarded. */
+static int mcr(const mp_no *x, const mp_no *y, int p) {
+ long i;
+ long p2 = p;
+ for (i=1; i<=p2; i++) {
+ if (X[i] == Y[i]) continue;
+ else if (X[i] > Y[i]) return 1;
+ else return -1; }
+ return 0;
+}
+
+
+
+/* acr() compares the absolute values of two multiple precision numbers */
+int __acr(const mp_no *x, const mp_no *y, int p) {
+ long i;
+
+ if (X[0] == ZERO) {
+ if (Y[0] == ZERO) i= 0;
+ else i=-1;
+ }
+ else if (Y[0] == ZERO) i= 1;
+ else {
+ if (EX > EY) i= 1;
+ else if (EX < EY) i=-1;
+ else i= mcr(x,y,p);
+ }
+
+ return i;
+}
+
+
+/* cr90 compares the values of two multiple precision numbers */
+int __cr(const mp_no *x, const mp_no *y, int p) {
+ int i;
+
+ if (X[0] > Y[0]) i= 1;
+ else if (X[0] < Y[0]) i=-1;
+ else if (X[0] < ZERO ) i= __acr(y,x,p);
+ else i= __acr(x,y,p);
+
+ return i;
+}
+
+
+/* Copy a multiple precision number. Set *y=*x. x=y is permissible. */
+void __cpy(const mp_no *x, mp_no *y, int p) {
+ long i;
+
+ EY = EX;
+ for (i=0; i <= p; i++) Y[i] = X[i];
+
+ return;
+}
+
+
+/* Copy a multiple precision number x of precision m into a */
+/* multiple precision number y of precision n. In case n>m, */
+/* the digits of y beyond the m'th are set to zero. In case */
+/* n<m, the digits of x beyond the n'th are ignored. */
+/* x=y is permissible. */
+
+void __cpymn(const mp_no *x, int m, mp_no *y, int n) {
+
+ long i,k;
+ long n2 = n;
+ long m2 = m;
+
+ EY = EX; k=MIN(m2,n2);
+ for (i=0; i <= k; i++) Y[i] = X[i];
+ for ( ; i <= n2; i++) Y[i] = ZERO;
+
+ return;
+}
+
+/* Convert a multiple precision number *x into a double precision */
+/* number *y, normalized case (|x| >= 2**(-1022))) */
+static void norm(const mp_no *x, double *y, int p)
+{
+ #define R radixi.d
+ long i;
+#if 0
+ int k;
+#endif
+ double a,c,u,v,z[5];
+ if (p<5) {
+ if (p==1) c = X[1];
+ else if (p==2) c = X[1] + R* X[2];
+ else if (p==3) c = X[1] + R*(X[2] + R* X[3]);
+ else if (p==4) c =(X[1] + R* X[2]) + R*R*(X[3] + R*X[4]);
+ }
+ else {
+ for (a=ONE, z[1]=X[1]; z[1] < TWO23; )
+ {a *= TWO; z[1] *= TWO; }
+
+ for (i=2; i<5; i++) {
+ z[i] = X[i]*a;
+ u = (z[i] + CUTTER)-CUTTER;
+ if (u > z[i]) u -= RADIX;
+ z[i] -= u;
+ z[i-1] += u*RADIXI;
+ }
+
+ u = (z[3] + TWO71) - TWO71;
+ if (u > z[3]) u -= TWO19;
+ v = z[3]-u;
+
+ if (v == TWO18) {
+ if (z[4] == ZERO) {
+ for (i=5; i <= p; i++) {
+ if (X[i] == ZERO) continue;
+ else {z[3] += ONE; break; }
+ }
+ }
+ else z[3] += ONE;
+ }
+
+ c = (z[1] + R *(z[2] + R * z[3]))/a;
+ }
+
+ c *= X[0];
+
+ for (i=1; i<EX; i++) c *= RADIX;
+ for (i=1; i>EX; i--) c *= RADIXI;
+
+ *y = c;
+ return;
+#undef R
+}
+
+/* Convert a multiple precision number *x into a double precision */
+/* number *y, denormalized case (|x| < 2**(-1022))) */
+static void denorm(const mp_no *x, double *y, int p)
+{
+ long i,k;
+ long p2 = p;
+ double c,u,z[5];
+#if 0
+ double a,v;
+#endif
+
+#define R radixi.d
+ if (EX<-44 || (EX==-44 && X[1]<TWO5))
+ { *y=ZERO; return; }
+
+ if (p2==1) {
+ if (EX==-42) {z[1]=X[1]+TWO10; z[2]=ZERO; z[3]=ZERO; k=3;}
+ else if (EX==-43) {z[1]= TWO10; z[2]=X[1]; z[3]=ZERO; k=2;}
+ else {z[1]= TWO10; z[2]=ZERO; z[3]=X[1]; k=1;}
+ }
+ else if (p2==2) {
+ if (EX==-42) {z[1]=X[1]+TWO10; z[2]=X[2]; z[3]=ZERO; k=3;}
+ else if (EX==-43) {z[1]= TWO10; z[2]=X[1]; z[3]=X[2]; k=2;}
+ else {z[1]= TWO10; z[2]=ZERO; z[3]=X[1]; k=1;}
+ }
+ else {
+ if (EX==-42) {z[1]=X[1]+TWO10; z[2]=X[2]; k=3;}
+ else if (EX==-43) {z[1]= TWO10; z[2]=X[1]; k=2;}
+ else {z[1]= TWO10; z[2]=ZERO; k=1;}
+ z[3] = X[k];
+ }
+
+ u = (z[3] + TWO57) - TWO57;
+ if (u > z[3]) u -= TWO5;
+
+ if (u==z[3]) {
+ for (i=k+1; i <= p2; i++) {
+ if (X[i] == ZERO) continue;
+ else {z[3] += ONE; break; }
+ }
+ }
+
+ c = X[0]*((z[1] + R*(z[2] + R*z[3])) - TWO10);
+
+ *y = c*TWOM1032;
+ return;
+
+#undef R
+}
+
+/* Convert a multiple precision number *x into a double precision number *y. */
+/* The result is correctly rounded to the nearest/even. *x is left unchanged */
+
+void __mp_dbl(const mp_no *x, double *y, int p) {
+#if 0
+ int i,k;
+ double a,c,u,v,z[5];
+#endif
+
+ if (X[0] == ZERO) {*y = ZERO; return; }
+
+ if (EX> -42) norm(x,y,p);
+ else if (EX==-42 && X[1]>=TWO10) norm(x,y,p);
+ else denorm(x,y,p);
+}
+
+
+/* dbl_mp() converts a double precision number x into a multiple precision */
+/* number *y. If the precision p is too small the result is truncated. x is */
+/* left unchanged. */
+
+void __dbl_mp(double x, mp_no *y, int p) {
+
+ long i,n;
+ long p2 = p;
+ double u;
+
+ /* Sign */
+ if (x == ZERO) {Y[0] = ZERO; return; }
+ else if (x > ZERO) Y[0] = ONE;
+ else {Y[0] = MONE; x=-x; }
+
+ /* Exponent */
+ for (EY=ONE; x >= RADIX; EY += ONE) x *= RADIXI;
+ for ( ; x < ONE; EY -= ONE) x *= RADIX;
+
+ /* Digits */
+ n=MIN(p2,4);
+ for (i=1; i<=n; i++) {
+ u = (x + TWO52) - TWO52;
+ if (u>x) u -= ONE;
+ Y[i] = u; x -= u; x *= RADIX; }
+ for ( ; i<=p2; i++) Y[i] = ZERO;
+ return;
+}
+
+
+/* add_magnitudes() adds the magnitudes of *x & *y assuming that */
+/* abs(*x) >= abs(*y) > 0. */
+/* The sign of the sum *z is undefined. x&y may overlap but not x&z or y&z. */
+/* No guard digit is used. The result equals the exact sum, truncated. */
+/* *x & *y are left unchanged. */
+
+static void add_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
+
+ long i,j,k;
+ long p2 = p;
+
+ EZ = EX;
+
+ i=p2; j=p2+ EY - EX; k=p2+1;
+
+ if (j<1)
+ {__cpy(x,z,p); return; }
+ else Z[k] = ZERO;
+
+ for (; j>0; i--,j--) {
+ Z[k] += X[i] + Y[j];
+ if (Z[k] >= RADIX) {
+ Z[k] -= RADIX;
+ Z[--k] = ONE; }
+ else
+ Z[--k] = ZERO;
+ }
+
+ for (; i>0; i--) {
+ Z[k] += X[i];
+ if (Z[k] >= RADIX) {
+ Z[k] -= RADIX;
+ Z[--k] = ONE; }
+ else
+ Z[--k] = ZERO;
+ }
+
+ if (Z[1] == ZERO) {
+ for (i=1; i<=p2; i++) Z[i] = Z[i+1]; }
+ else EZ += ONE;
+}
+
+
+/* sub_magnitudes() subtracts the magnitudes of *x & *y assuming that */
+/* abs(*x) > abs(*y) > 0. */
+/* The sign of the difference *z is undefined. x&y may overlap but not x&z */
+/* or y&z. One guard digit is used. The error is less than one ulp. */
+/* *x & *y are left unchanged. */
+
+static void sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
+
+ long i,j,k;
+ long p2 = p;
+
+ EZ = EX;
+
+ if (EX == EY) {
+ i=j=k=p2;
+ Z[k] = Z[k+1] = ZERO; }
+ else {
+ j= EX - EY;
+ if (j > p2) {__cpy(x,z,p); return; }
+ else {
+ i=p2; j=p2+1-j; k=p2;
+ if (Y[j] > ZERO) {
+ Z[k+1] = RADIX - Y[j--];
+ Z[k] = MONE; }
+ else {
+ Z[k+1] = ZERO;
+ Z[k] = ZERO; j--;}
+ }
+ }
+
+ for (; j>0; i--,j--) {
+ Z[k] += (X[i] - Y[j]);
+ if (Z[k] < ZERO) {
+ Z[k] += RADIX;
+ Z[--k] = MONE; }
+ else
+ Z[--k] = ZERO;
+ }
+
+ for (; i>0; i--) {
+ Z[k] += X[i];
+ if (Z[k] < ZERO) {
+ Z[k] += RADIX;
+ Z[--k] = MONE; }
+ else
+ Z[--k] = ZERO;
+ }
+
+ for (i=1; Z[i] == ZERO; i++) ;
+ EZ = EZ - i + 1;
+ for (k=1; i <= p2+1; )
+ Z[k++] = Z[i++];
+ for (; k <= p2; )
+ Z[k++] = ZERO;
+
+ return;
+}
+
+
+/* Add two multiple precision numbers. Set *z = *x + *y. x&y may overlap */
+/* but not x&z or y&z. One guard digit is used. The error is less than */
+/* one ulp. *x & *y are left unchanged. */
+
+void __add(const mp_no *x, const mp_no *y, mp_no *z, int p) {
+
+ int n;
+
+ if (X[0] == ZERO) {__cpy(y,z,p); return; }
+ else if (Y[0] == ZERO) {__cpy(x,z,p); return; }
+
+ if (X[0] == Y[0]) {
+ if (__acr(x,y,p) > 0) {add_magnitudes(x,y,z,p); Z[0] = X[0]; }
+ else {add_magnitudes(y,x,z,p); Z[0] = Y[0]; }
+ }
+ else {
+ if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p); Z[0] = X[0]; }
+ else if (n == -1) {sub_magnitudes(y,x,z,p); Z[0] = Y[0]; }
+ else Z[0] = ZERO;
+ }
+ return;
+}
+
+
+/* Subtract two multiple precision numbers. *z is set to *x - *y. x&y may */
+/* overlap but not x&z or y&z. One guard digit is used. The error is */
+/* less than one ulp. *x & *y are left unchanged. */
+
+void __sub(const mp_no *x, const mp_no *y, mp_no *z, int p) {
+
+ int n;
+
+ if (X[0] == ZERO) {__cpy(y,z,p); Z[0] = -Z[0]; return; }
+ else if (Y[0] == ZERO) {__cpy(x,z,p); return; }
+
+ if (X[0] != Y[0]) {
+ if (__acr(x,y,p) > 0) {add_magnitudes(x,y,z,p); Z[0] = X[0]; }
+ else {add_magnitudes(y,x,z,p); Z[0] = -Y[0]; }
+ }
+ else {
+ if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p); Z[0] = X[0]; }
+ else if (n == -1) {sub_magnitudes(y,x,z,p); Z[0] = -Y[0]; }
+ else Z[0] = ZERO;
+ }
+ return;
+}
+
+
+/* Multiply two multiple precision numbers. *z is set to *x * *y. x&y */
+/* may overlap but not x&z or y&z. In case p=1,2,3 the exact result is */
+/* truncated to p digits. In case p>3 the error is bounded by 1.001 ulp. */
+/* *x & *y are left unchanged. */
+
+void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) {
+
+ long i, i1, i2, j, k, k2;
+ long p2 = p;
+ double u, zk, zk2;
+
+ /* Is z=0? */
+ if (X[0]*Y[0]==ZERO)
+ { Z[0]=ZERO; return; }
+
+ /* Multiply, add and carry */
+ k2 = (p2<3) ? p2+p2 : p2+3;
+ zk = Z[k2]=ZERO;
+ for (k=k2; k>1; ) {
+ if (k > p2) {i1=k-p2; i2=p2+1; }
+ else {i1=1; i2=k; }
+#if 1
+ /* rearange this inner loop to allow the fmadd instructions to be
+ independent and execute in parallel on processors that have
+ dual symetrical FP pipelines. */
+ if (i1 < (i2-1))
+ {
+ /* make sure we have at least 2 iterations */
+ if (((i2 - i1) & 1L) == 1L)
+ {
+ /* Handle the odd iterations case. */
+ zk2 = x->d[i2-1]*y->d[i1];
+ }
+ else
+ zk2 = zero.d;
+ /* Do two multiply/adds per loop iteration, using independent
+ accumulators; zk and zk2. */
+ for (i=i1,j=i2-1; i<i2-1; i+=2,j-=2)
+ {
+ zk += x->d[i]*y->d[j];
+ zk2 += x->d[i+1]*y->d[j-1];
+ }
+ zk += zk2; /* final sum. */
+ }
+ else
+ {
+ /* Special case when iterations is 1. */
+ zk += x->d[i1]*y->d[i1];
+ }
+#else
+ /* The orginal code. */
+ for (i=i1,j=i2-1; i<i2; i++,j--) zk += X[i]*Y[j];
+#endif
+
+ u = (zk + CUTTER)-CUTTER;
+ if (u > zk) u -= RADIX;
+ Z[k] = zk - u;
+ zk = u*RADIXI;
+ --k;
+ }
+ Z[k] = zk;
+
+ /* Is there a carry beyond the most significant digit? */
+ if (Z[1] == ZERO) {
+ for (i=1; i<=p2; i++) Z[i]=Z[i+1];
+ EZ = EX + EY - 1; }
+ else
+ EZ = EX + EY;
+
+ Z[0] = X[0] * Y[0];
+ return;
+}
+
+
+/* Invert a multiple precision number. Set *y = 1 / *x. */
+/* Relative error bound = 1.001*r**(1-p) for p=2, 1.063*r**(1-p) for p=3, */
+/* 2.001*r**(1-p) for p>3. */
+/* *x=0 is not permissible. *x is left unchanged. */
+
+void __inv(const mp_no *x, mp_no *y, int p) {
+ long i;
+#if 0
+ int l;
+#endif
+ double t;
+ mp_no z,w;
+ static const int np1[] = {0,0,0,0,1,2,2,2,2,3,3,3,3,3,3,3,3,3,
+ 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4};
+ const mp_no mptwo = {1,{1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
+ 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
+ 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
+ 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}};
+
+ __cpy(x,&z,p); z.e=0; __mp_dbl(&z,&t,p);
+ t=ONE/t; __dbl_mp(t,y,p); EY -= EX;
+
+ for (i=0; i<np1[p]; i++) {
+ __cpy(y,&w,p);
+ __mul(x,&w,y,p);
+ __sub(&mptwo,y,&z,p);
+ __mul(&w,&z,y,p);
+ }
+ return;
+}
+
+
+/* Divide one multiple precision number by another.Set *z = *x / *y. *x & *y */
+/* are left unchanged. x&y may overlap but not x&z or y&z. */
+/* Relative error bound = 2.001*r**(1-p) for p=2, 2.063*r**(1-p) for p=3 */
+/* and 3.001*r**(1-p) for p>3. *y=0 is not permissible. */
+
+void __dvd(const mp_no *x, const mp_no *y, mp_no *z, int p) {
+
+ mp_no w;
+
+ if (X[0] == ZERO) Z[0] = ZERO;
+ else {__inv(y,&w,p); __mul(x,&w,z,p);}
+ return;
+}
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S b/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S
new file mode 100644
index 0000000000..ad3cd2769b
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S
@@ -0,0 +1,47 @@
+/* Round double to long int. PowerPC32 on PowerPC64 version.
+ Copyright (C) 2004, 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+ 02110-1301 USA. */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+/* long long int[r3, r4] __llrint (double x[fp1]) */
+ENTRY (__llrint)
+ CALL_MCOUNT
+ stwu r1,-16(r1)
+ cfi_adjust_cfa_offset (16)
+ fctid fp13,fp1
+ stfd fp13,8(r1)
+ nop /* Insure the following load is in a different dispatch group */
+ nop /* to avoid pipe stall on POWER4&5. */
+ nop
+ lwz r3,8(r1)
+ lwz r4,12(r1)
+ addi r1,r1,16
+ blr
+ END (__llrint)
+
+weak_alias (__llrint, llrint)
+
+#ifdef NO_LONG_DOUBLE
+strong_alias (__llrint, __llrintl)
+weak_alias (__llrint, llrintl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __llrint, llrintl, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S b/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S
new file mode 100644
index 0000000000..21353ffb5b
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S
@@ -0,0 +1,39 @@
+/* Round float to long int. PowerPC32 on PowerPC64 version.
+ Copyright (C) 2004, 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+ 02110-1301 USA. */
+
+#include <sysdep.h>
+
+/* long long int[r3, r4] __llrintf (float x[fp1]) */
+ENTRY (__llrintf)
+ CALL_MCOUNT
+ stwu r1,-16(r1)
+ cfi_adjust_cfa_offset (16)
+ fctid fp13,fp1
+ stfd fp13,8(r1)
+ nop /* Insure the following load is in a different dispatch group */
+ nop /* to avoid pipe stall on POWER4&5. */
+ nop
+ lwz r3,8(r1)
+ lwz r4,12(r1)
+ addi r1,r1,16
+ blr
+ END (__llrintf)
+
+weak_alias (__llrintf, llrintf)
+
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S b/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S
new file mode 100644
index 0000000000..952d2aa6a5
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S
@@ -0,0 +1,97 @@
+/* llround function. PowerPC32 on PowerPC64 version.
+ Copyright (C) 2004, 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 1 Franklin Street, Fifth Floor, Boston MA
+ 02110-1301 USA. */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+ .section .rodata.cst8,"aM",@progbits,8
+ .align 2
+.LC0: /* 0.0 */
+ .long 0x00000000
+.LC1: /* 0.5 */
+ .long 0x3f000000
+
+ .section ".text"
+
+/* long [r3] lround (float x [fp1])
+ IEEE 1003.1 lround function. IEEE specifies "round to the nearest
+ integer value, rounding halfway cases away from zero, regardless of
+ the current rounding mode." However PowerPC Architecture defines
+ "round to Nearest" as "Choose the best approximation. In case of a
+ tie, choose the one that is even (least significant bit o).".
+ So we can't use the PowerPC "round to Nearest" mode. Instead we set
+ "round toward Zero" mode and round by adding +-0.5 before rounding
+ to the integer value. */
+
+ENTRY (__llround)
+ stwu r1,-16(r1)
+ cfi_adjust_cfa_offset (16)
+#ifdef SHARED
+ mflr r11
+ cfi_register(lr,r11)
+# ifdef HAVE_ASM_PPC_REL16
+ bcl 20,31,1f
+1: mflr r9
+ addis r9,r9,.LC0-1b@ha
+ addi r9,r9,.LC0-1b@l
+# else
+ bl _GLOBAL_OFFSET_TABLE_@local-4
+ mflr r10
+ lwz r9,.LC0@got(10)
+# endif
+ mtlr r11
+ cfi_same_value (lr)
+ lfs fp12,0(r9)
+ lfs fp10,.LC1-.LC0(r9)
+#else
+ lis r9,.LC0@ha
+ lis r10,.LC1@ha
+ lfs fp12,.LC0@l(r9)
+ lfs fp10,.LC1@l(r10)
+#endif
+ fcmpu cr6,fp1,fp12 /* if (x > 0.0) */
+ ble- cr6,.L4
+ fadd fp1,fp1,fp10 /* x+= 0.5; */
+.L9:
+ fctidz fp2,fp1 /* Convert To Integer DW round toward 0. */
+ stfd fp2,8(r1)
+ nop /* Ensure the following load is in a different dispatch */
+ nop /* group to avoid pipe stall on POWER4&5. */
+ nop
+ lwz r4,12(r1)
+ lwz r3,8(r1)
+ addi r1,r1,16
+ blr
+.L4:
+ fsub fp1,fp1,fp10 /* x-= 0.5; */
+ b .L9
+ END (__llround)
+
+weak_alias (__llround, llround)
+
+strong_alias (__llround, __llroundf)
+weak_alias (__llround, llroundf)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__llround, llroundl)
+strong_alias (__llround, __llroundl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __llround, llroundl, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/s_llroundf.S b/sysdeps/powerpc/powerpc32/power4/fpu/s_llroundf.S
new file mode 100644
index 0000000000..72d6181541
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/s_llroundf.S
@@ -0,0 +1 @@
+/* __llroundf is in s_llround.S */
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/slowexp.c b/sysdeps/powerpc/powerpc32/power4/fpu/slowexp.c
new file mode 100644
index 0000000000..b22b0dfeab
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/slowexp.c
@@ -0,0 +1,66 @@
+/*
+ * IBM Accurate Mathematical Library
+ * written by International Business Machines Corp.
+ * Copyright (C) 2001, 2007 Free Software Foundation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+/**************************************************************************/
+/* MODULE_NAME:slowexp.c */
+/* */
+/* FUNCTION:slowexp */
+/* */
+/* FILES NEEDED:mpa.h */
+/* mpa.c mpexp.c */
+/* */
+/*Converting from double precision to Multi-precision and calculating */
+/* e^x */
+/**************************************************************************/
+#include "math_private.h"
+
+#ifdef NO_LONG_DOUBLE
+#include "mpa.h"
+void __mpexp(mp_no *x, mp_no *y, int p);
+#endif
+
+/*Converting from double precision to Multi-precision and calculating e^x */
+double __slowexp(double x) {
+#ifdef NO_LONG_DOUBLE
+ double w,z,res,eps=3.0e-26;
+ int p;
+ mp_no mpx, mpy, mpz,mpw,mpeps,mpcor;
+
+ p=6;
+ __dbl_mp(x,&mpx,p); /* Convert a double precision number x */
+ /* into a multiple precision number mpx with prec. p. */
+ __mpexp(&mpx, &mpy, p); /* Multi-Precision exponential function */
+ __dbl_mp(eps,&mpeps,p);
+ __mul(&mpeps,&mpy,&mpcor,p);
+ __add(&mpy,&mpcor,&mpw,p);
+ __sub(&mpy,&mpcor,&mpz,p);
+ __mp_dbl(&mpw, &w, p);
+ __mp_dbl(&mpz, &z, p);
+ if (w == z) return w;
+ else { /* if calculating is not exactly */
+ p = 32;
+ __dbl_mp(x,&mpx,p);
+ __mpexp(&mpx, &mpy, p);
+ __mp_dbl(&mpy, &res, p);
+ return res;
+ }
+#else
+ return (double) __ieee754_expl((long double)x);
+#endif
+}
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/slowpow.c b/sysdeps/powerpc/powerpc32/power4/fpu/slowpow.c
new file mode 100644
index 0000000000..ad147a89a6
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/slowpow.c
@@ -0,0 +1,94 @@
+/*
+ * IBM Accurate Mathematical Library
+ * written by International Business Machines Corp.
+ * Copyright (C) 2001, 2006 Free Software Foundation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+/*************************************************************************/
+/* MODULE_NAME:slowpow.c */
+/* */
+/* FUNCTION:slowpow */
+/* */
+/*FILES NEEDED:mpa.h */
+/* mpa.c mpexp.c mplog.c halfulp.c */
+/* */
+/* Given two IEEE double machine numbers y,x , routine computes the */
+/* correctly rounded (to nearest) value of x^y. Result calculated by */
+/* multiplication (in halfulp.c) or if result isn't accurate enough */
+/* then routine converts x and y into multi-precision doubles and */
+/* recompute. */
+/*************************************************************************/
+
+#include "mpa.h"
+#include "math_private.h"
+
+void __mpexp (mp_no * x, mp_no * y, int p);
+void __mplog (mp_no * x, mp_no * y, int p);
+double ulog (double);
+double __halfulp (double x, double y);
+
+double
+__slowpow (double x, double y, double z)
+{
+ double res, res1;
+ long double ldw, ldz, ldpp;
+ static const long double ldeps = 0x4.0p-96;
+
+ res = __halfulp (x, y); /* halfulp() returns -10 or x^y */
+ if (res >= 0)
+ return res; /* if result was really computed by halfulp */
+ /* else, if result was not really computed by halfulp */
+
+ /* Compute pow as long double, 106 bits */
+ ldz = __ieee754_logl ((long double) x);
+ ldw = (long double) y *ldz;
+ ldpp = __ieee754_expl (ldw);
+ res = (double) (ldpp + ldeps);
+ res1 = (double) (ldpp - ldeps);
+
+ if (res != res1) /* if result still not accurate enough */
+ { /* use mpa for higher persision. */
+ mp_no mpx, mpy, mpz, mpw, mpp, mpr, mpr1;
+ static const mp_no eps = { -3, {1.0, 4.0} };
+ int p;
+
+ p = 10; /* p=precision 240 bits */
+ __dbl_mp (x, &mpx, p);
+ __dbl_mp (y, &mpy, p);
+ __dbl_mp (z, &mpz, p);
+ __mplog (&mpx, &mpz, p); /* log(x) = z */
+ __mul (&mpy, &mpz, &mpw, p); /* y * z =w */
+ __mpexp (&mpw, &mpp, p); /* e^w =pp */
+ __add (&mpp, &eps, &mpr, p); /* pp+eps =r */
+ __mp_dbl (&mpr, &res, p);
+ __sub (&mpp, &eps, &mpr1, p); /* pp -eps =r1 */
+ __mp_dbl (&mpr1, &res1, p); /* converting into double precision */
+ if (res == res1)
+ return res;
+
+ /* if we get here result wasn't calculated exactly, continue for
+ more exact calculation using 768 bits. */
+ p = 32;
+ __dbl_mp (x, &mpx, p);
+ __dbl_mp (y, &mpy, p);
+ __dbl_mp (z, &mpz, p);
+ __mplog (&mpx, &mpz, p); /* log(c)=z */
+ __mul (&mpy, &mpz, &mpw, p); /* y*z =w */
+ __mpexp (&mpw, &mpp, p); /* e^w=pp */
+ __mp_dbl (&mpp, &res, p); /* converting into double precision */
+ }
+ return res;
+}
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.c b/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.c
new file mode 100644
index 0000000000..f59c193934
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.c
@@ -0,0 +1,62 @@
+/* Double-precision floating point square root wrapper.
+ Copyright (C) 2004, 2007 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <math_ldbl_opt.h>
+#include "math.h"
+#include "math_private.h"
+#include <fenv_libc.h>
+
+#ifdef __STDC__
+double
+__sqrt (double x) /* wrapper sqrt */
+#else
+double
+__sqrt (x) /* wrapper sqrt */
+ double x;
+#endif
+{
+ double z;
+/* Power4 (ISA V2.0) and above implement sqrt in hardware. */
+ __asm __volatile (
+ " fsqrt %0,%1\n"
+ : "=f" (z)
+ : "f" (x));
+#ifdef _IEEE_LIBM
+ return z;
+#else
+ if (__builtin_expect (_LIB_VERSION == _IEEE_, 0))
+ return z;
+
+ if (__builtin_expect (x != x, 0))
+ return z;
+
+ if (__builtin_expect (x < 0.0, 0))
+ return __kernel_standard (x, x, 26); /* sqrt(negative) */
+ else
+ return z;
+#endif
+}
+
+weak_alias (__sqrt, sqrt)
+#ifdef NO_LONG_DOUBLE
+ strong_alias (__sqrt, __sqrtl) weak_alias (__sqrt, sqrtl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0)
+compat_symbol (libm, __sqrt, sqrtl, GLIBC_2_0);
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.c b/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.c
new file mode 100644
index 0000000000..4784869f07
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.c
@@ -0,0 +1,60 @@
+/* Single-precision floating point square root wrapper.
+ Copyright (C) 2004, 2007 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include "math.h"
+#include "math_private.h"
+#include <fenv_libc.h>
+
+#include <sysdep.h>
+#include <ldsodefs.h>
+
+#ifdef __STDC__
+float
+__sqrtf (float x) /* wrapper sqrtf */
+#else
+float
+__sqrtf (x) /* wrapper sqrtf */
+ float x;
+#endif
+{
+#ifdef _IEEE_LIBM
+ return __ieee754_sqrtf (x);
+#else
+ float z;
+/* Power4 (ISA V2.0) and above implement sqrtf in hardware. */
+ __asm __volatile (
+ " fsqrts %0,%1\n"
+ : "=f" (z)
+ : "f" (x));
+
+ if (__builtin_expect (_LIB_VERSION == _IEEE_, 0))
+ return z;
+
+ if (__builtin_expect (x != x, 0))
+ return z;
+
+ if (__builtin_expect (x < 0.0, 0))
+ /* sqrtf(negative) */
+ return (float) __kernel_standard ((double) x, (double) x, 126);
+ else
+ return z;
+#endif
+}
+
+weak_alias (__sqrtf, sqrtf)
diff --git a/sysdeps/powerpc/powerpc32/power4/memcmp.S b/sysdeps/powerpc/powerpc32/power4/memcmp.S
new file mode 100644
index 0000000000..75b328403a
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/memcmp.S
@@ -0,0 +1,986 @@
+/* Optimized strcmp implementation for PowerPC64.
+ Copyright (C) 2003, 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+ 02110-1301 USA. */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* int [r3] memcmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */
+
+ .machine power4
+EALIGN (BP_SYM(memcmp), 4, 0)
+ CALL_MCOUNT
+
+#define rTMP r0
+#define rRTN r3
+#define rSTR1 r3 /* first string arg */
+#define rSTR2 r4 /* second string arg */
+#define rN r5 /* max string length */
+#define rWORD1 r6 /* current word in s1 */
+#define rWORD2 r7 /* current word in s2 */
+#define rWORD3 r8 /* next word in s1 */
+#define rWORD4 r9 /* next word in s2 */
+#define rWORD5 r10 /* next word in s1 */
+#define rWORD6 r11 /* next word in s2 */
+#define rBITDIF r12 /* bits that differ in s1 & s2 words */
+#define rWORD7 r30 /* next word in s1 */
+#define rWORD8 r31 /* next word in s2 */
+
+ xor rTMP, rSTR2, rSTR1
+ cmplwi cr6, rN, 0
+ cmplwi cr1, rN, 12
+ clrlwi. rTMP, rTMP, 30
+ clrlwi rBITDIF, rSTR1, 30
+ cmplwi cr5, rBITDIF, 0
+ beq- cr6, L(zeroLength)
+ dcbt 0,rSTR1
+ dcbt 0,rSTR2
+/* If less than 8 bytes or not aligned, use the unaligned
+ byte loop. */
+ blt cr1, L(bytealigned)
+ stwu 1,-64(1)
+ cfi_adjust_cfa_offset(64)
+ stw r31,48(1)
+ cfi_offset(31,(48-64))
+ stw r30,44(1)
+ cfi_offset(30,(44-64))
+ bne L(unaligned)
+/* At this point we know both strings have the same alignment and the
+ compare length is at least 8 bytes. rBITDIF contains the low order
+ 2 bits of rSTR1 and cr5 contains the result of the logical compare
+ of rBITDIF to 0. If rBITDIF == 0 then we are already word
+ aligned and can perform the word aligned loop.
+
+ Otherwise we know the two strings have the same alignment (but not
+ yet word aligned). So we force the string addresses to the next lower
+ word boundary and special case this first word using shift left to
+ eliminate bits preceeding the first byte. Since we want to join the
+ normal (word aligned) compare loop, starting at the second word,
+ we need to adjust the length (rN) and special case the loop
+ versioning for the first word. This insures that the loop count is
+ correct and the first word (shifted) is in the expected register pair. */
+ .align 4
+L(samealignment):
+ clrrwi rSTR1, rSTR1, 2
+ clrrwi rSTR2, rSTR2, 2
+ beq cr5, L(Waligned)
+ add rN, rN, rBITDIF
+ slwi r11, rBITDIF, 3
+ srwi rTMP, rN, 4 /* Divide by 16 */
+ andi. rBITDIF, rN, 12 /* Get the word remainder */
+ lwz rWORD1, 0(rSTR1)
+ lwz rWORD2, 0(rSTR2)
+ cmplwi cr1, rBITDIF, 8
+ cmplwi cr7, rN, 16
+ clrlwi rN, rN, 30
+ beq L(dPs4)
+ mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+ bgt cr1, L(dPs3)
+ beq cr1, L(dPs2)
+
+/* Remainder is 4 */
+ .align 3
+L(dsP1):
+ slw rWORD5, rWORD1, r11
+ slw rWORD6, rWORD2, r11
+ cmplw cr5, rWORD5, rWORD6
+ blt cr7, L(dP1x)
+/* Do something useful in this cycle since we have to branch anyway. */
+ lwz rWORD1, 4(rSTR1)
+ lwz rWORD2, 4(rSTR2)
+ cmplw cr0, rWORD1, rWORD2
+ b L(dP1e)
+/* Remainder is 8 */
+ .align 4
+L(dPs2):
+ slw rWORD5, rWORD1, r11
+ slw rWORD6, rWORD2, r11
+ cmplw cr6, rWORD5, rWORD6
+ blt cr7, L(dP2x)
+/* Do something useful in this cycle since we have to branch anyway. */
+ lwz rWORD7, 4(rSTR1)
+ lwz rWORD8, 4(rSTR2)
+ cmplw cr5, rWORD7, rWORD8
+ b L(dP2e)
+/* Remainder is 12 */
+ .align 4
+L(dPs3):
+ slw rWORD3, rWORD1, r11
+ slw rWORD4, rWORD2, r11
+ cmplw cr1, rWORD3, rWORD4
+ b L(dP3e)
+/* Count is a multiple of 16, remainder is 0 */
+ .align 4
+L(dPs4):
+ mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+ slw rWORD1, rWORD1, r11
+ slw rWORD2, rWORD2, r11
+ cmplw cr0, rWORD1, rWORD2
+ b L(dP4e)
+
+/* At this point we know both strings are word aligned and the
+ compare length is at least 8 bytes. */
+ .align 4
+L(Waligned):
+ andi. rBITDIF, rN, 12 /* Get the word remainder */
+ srwi rTMP, rN, 4 /* Divide by 16 */
+ cmplwi cr1, rBITDIF, 8
+ cmplwi cr7, rN, 16
+ clrlwi rN, rN, 30
+ beq L(dP4)
+ bgt cr1, L(dP3)
+ beq cr1, L(dP2)
+
+/* Remainder is 4 */
+ .align 4
+L(dP1):
+ mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+/* Normally we'd use rWORD7/rWORD8 here, but since we might exit early
+ (8-15 byte compare), we want to use only volatile registers. This
+ means we can avoid restoring non-volatile registers since we did not
+ change any on the early exit path. The key here is the non-early
+ exit path only cares about the condition code (cr5), not about which
+ register pair was used. */
+ lwz rWORD5, 0(rSTR1)
+ lwz rWORD6, 0(rSTR2)
+ cmplw cr5, rWORD5, rWORD6
+ blt cr7, L(dP1x)
+ lwz rWORD1, 4(rSTR1)
+ lwz rWORD2, 4(rSTR2)
+ cmplw cr0, rWORD1, rWORD2
+L(dP1e):
+ lwz rWORD3, 8(rSTR1)
+ lwz rWORD4, 8(rSTR2)
+ cmplw cr1, rWORD3, rWORD4
+ lwz rWORD5, 12(rSTR1)
+ lwz rWORD6, 12(rSTR2)
+ cmplw cr6, rWORD5, rWORD6
+ bne cr5, L(dLcr5)
+ bne cr0, L(dLcr0)
+
+ lwzu rWORD7, 16(rSTR1)
+ lwzu rWORD8, 16(rSTR2)
+ bne cr1, L(dLcr1)
+ cmplw cr5, rWORD7, rWORD8
+ bdnz L(dLoop)
+ bne cr6, L(dLcr6)
+ lwz r30,44(1)
+ lwz r31,48(1)
+ .align 3
+L(dP1x):
+ slwi. r12, rN, 3
+ bne cr5, L(dLcr5)
+ subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */
+ lwz 1,0(1)
+ bne L(d00)
+ li rRTN, 0
+ blr
+
+/* Remainder is 8 */
+ .align 4
+L(dP2):
+ mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+ lwz rWORD5, 0(rSTR1)
+ lwz rWORD6, 0(rSTR2)
+ cmplw cr6, rWORD5, rWORD6
+ blt cr7, L(dP2x)
+ lwz rWORD7, 4(rSTR1)
+ lwz rWORD8, 4(rSTR2)
+ cmplw cr5, rWORD7, rWORD8
+L(dP2e):
+ lwz rWORD1, 8(rSTR1)
+ lwz rWORD2, 8(rSTR2)
+ cmplw cr0, rWORD1, rWORD2
+ lwz rWORD3, 12(rSTR1)
+ lwz rWORD4, 12(rSTR2)
+ cmplw cr1, rWORD3, rWORD4
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+ bne cr6, L(dLcr6)
+ bne cr5, L(dLcr5)
+ b L(dLoop2)
+/* Again we are on a early exit path (16-23 byte compare), we want to
+ only use volatile registers and avoid restoring non-volatile
+ registers. */
+ .align 4
+L(dP2x):
+ lwz rWORD3, 4(rSTR1)
+ lwz rWORD4, 4(rSTR2)
+ cmplw cr5, rWORD3, rWORD4
+ slwi. r12, rN, 3
+ bne cr6, L(dLcr6)
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+ bne cr5, L(dLcr5)
+ subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */
+ lwz 1,0(1)
+ bne L(d00)
+ li rRTN, 0
+ blr
+
+/* Remainder is 12 */
+ .align 4
+L(dP3):
+ mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+ lwz rWORD3, 0(rSTR1)
+ lwz rWORD4, 0(rSTR2)
+ cmplw cr1, rWORD3, rWORD4
+L(dP3e):
+ lwz rWORD5, 4(rSTR1)
+ lwz rWORD6, 4(rSTR2)
+ cmplw cr6, rWORD5, rWORD6
+ blt cr7, L(dP3x)
+ lwz rWORD7, 8(rSTR1)
+ lwz rWORD8, 8(rSTR2)
+ cmplw cr5, rWORD7, rWORD8
+ lwz rWORD1, 12(rSTR1)
+ lwz rWORD2, 12(rSTR2)
+ cmplw cr0, rWORD1, rWORD2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+ bne cr1, L(dLcr1)
+ bne cr6, L(dLcr6)
+ b L(dLoop1)
+/* Again we are on a early exit path (24-31 byte compare), we want to
+ only use volatile registers and avoid restoring non-volatile
+ registers. */
+ .align 4
+L(dP3x):
+ lwz rWORD1, 8(rSTR1)
+ lwz rWORD2, 8(rSTR2)
+ cmplw cr5, rWORD1, rWORD2
+ slwi. r12, rN, 3
+ bne cr1, L(dLcr1)
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+ bne cr6, L(dLcr6)
+ subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */
+ bne cr5, L(dLcr5)
+ lwz 1,0(1)
+ bne L(d00)
+ li rRTN, 0
+ blr
+
+/* Count is a multiple of 16, remainder is 0 */
+ .align 4
+L(dP4):
+ mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+ lwz rWORD1, 0(rSTR1)
+ lwz rWORD2, 0(rSTR2)
+ cmplw cr0, rWORD1, rWORD2
+L(dP4e):
+ lwz rWORD3, 4(rSTR1)
+ lwz rWORD4, 4(rSTR2)
+ cmplw cr1, rWORD3, rWORD4
+ lwz rWORD5, 8(rSTR1)
+ lwz rWORD6, 8(rSTR2)
+ cmplw cr6, rWORD5, rWORD6
+ lwzu rWORD7, 12(rSTR1)
+ lwzu rWORD8, 12(rSTR2)
+ cmplw cr5, rWORD7, rWORD8
+ bne cr0, L(dLcr0)
+ bne cr1, L(dLcr1)
+ bdz- L(d24) /* Adjust CTR as we start with +4 */
+/* This is the primary loop */
+ .align 4
+L(dLoop):
+ lwz rWORD1, 4(rSTR1)
+ lwz rWORD2, 4(rSTR2)
+ cmplw cr1, rWORD3, rWORD4
+ bne cr6, L(dLcr6)
+L(dLoop1):
+ lwz rWORD3, 8(rSTR1)
+ lwz rWORD4, 8(rSTR2)
+ cmplw cr6, rWORD5, rWORD6
+ bne cr5, L(dLcr5)
+L(dLoop2):
+ lwz rWORD5, 12(rSTR1)
+ lwz rWORD6, 12(rSTR2)
+ cmplw cr5, rWORD7, rWORD8
+ bne cr0, L(dLcr0)
+L(dLoop3):
+ lwzu rWORD7, 16(rSTR1)
+ lwzu rWORD8, 16(rSTR2)
+ bne- cr1, L(dLcr1)
+ cmplw cr0, rWORD1, rWORD2
+ bdnz+ L(dLoop)
+
+L(dL4):
+ cmplw cr1, rWORD3, rWORD4
+ bne cr6, L(dLcr6)
+ cmplw cr6, rWORD5, rWORD6
+ bne cr5, L(dLcr5)
+ cmplw cr5, rWORD7, rWORD8
+L(d44):
+ bne cr0, L(dLcr0)
+L(d34):
+ bne cr1, L(dLcr1)
+L(d24):
+ bne cr6, L(dLcr6)
+L(d14):
+ slwi. r12, rN, 3
+ bne cr5, L(dLcr5)
+L(d04):
+ lwz r30,44(1)
+ lwz r31,48(1)
+ lwz 1,0(1)
+ subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */
+ beq L(zeroLength)
+/* At this point we have a remainder of 1 to 3 bytes to compare. Since
+ we are aligned it is safe to load the whole word, and use
+ shift right to eliminate bits beyond the compare length. */
+L(d00):
+ lwz rWORD1, 4(rSTR1)
+ lwz rWORD2, 4(rSTR2)
+ srw rWORD1, rWORD1, rN
+ srw rWORD2, rWORD2, rN
+ cmplw rWORD1,rWORD2
+ li rRTN,0
+ beqlr
+ li rRTN,1
+ bgtlr
+ li rRTN,-1
+ blr
+
+ .align 4
+L(dLcr0):
+ lwz r30,44(1)
+ lwz r31,48(1)
+ li rRTN, 1
+ lwz 1,0(1)
+ bgtlr cr0
+ li rRTN, -1
+ blr
+ .align 4
+L(dLcr1):
+ lwz r30,44(1)
+ lwz r31,48(1)
+ li rRTN, 1
+ lwz 1,0(1)
+ bgtlr cr1
+ li rRTN, -1
+ blr
+ .align 4
+L(dLcr6):
+ lwz r30,44(1)
+ lwz r31,48(1)
+ li rRTN, 1
+ lwz 1,0(1)
+ bgtlr cr6
+ li rRTN, -1
+ blr
+ .align 4
+L(dLcr5):
+ lwz r30,44(1)
+ lwz r31,48(1)
+L(dLcr5x):
+ li rRTN, 1
+ lwz 1,0(1)
+ bgtlr cr5
+ li rRTN, -1
+ blr
+
+ .align 4
+L(bytealigned):
+ cfi_adjust_cfa_offset(-64)
+ mtctr rN /* Power4 wants mtctr 1st in dispatch group */
+
+/* We need to prime this loop. This loop is swing modulo scheduled
+ to avoid pipe delays. The dependent instruction latencies (load to
+ compare to conditional branch) is 2 to 3 cycles. In this loop each
+ dispatch group ends in a branch and takes 1 cycle. Effectively
+ the first iteration of the loop only serves to load operands and
+ branches based on compares are delayed until the next loop.
+
+ So we must precondition some registers and condition codes so that
+ we don't exit the loop early on the first iteration. */
+
+ lbz rWORD1, 0(rSTR1)
+ lbz rWORD2, 0(rSTR2)
+ bdz- L(b11)
+ cmplw cr0, rWORD1, rWORD2
+ lbz rWORD3, 1(rSTR1)
+ lbz rWORD4, 1(rSTR2)
+ bdz- L(b12)
+ cmplw cr1, rWORD3, rWORD4
+ lbzu rWORD5, 2(rSTR1)
+ lbzu rWORD6, 2(rSTR2)
+ bdz- L(b13)
+ .align 4
+L(bLoop):
+ lbzu rWORD1, 1(rSTR1)
+ lbzu rWORD2, 1(rSTR2)
+ bne- cr0, L(bLcr0)
+
+ cmplw cr6, rWORD5, rWORD6
+ bdz- L(b3i)
+
+ lbzu rWORD3, 1(rSTR1)
+ lbzu rWORD4, 1(rSTR2)
+ bne- cr1, L(bLcr1)
+
+ cmplw cr0, rWORD1, rWORD2
+ bdz- L(b2i)
+
+ lbzu rWORD5, 1(rSTR1)
+ lbzu rWORD6, 1(rSTR2)
+ bne- cr6, L(bLcr6)
+
+ cmplw cr1, rWORD3, rWORD4
+ bdnz+ L(bLoop)
+
+/* We speculatively loading bytes before we have tested the previous
+ bytes. But we must avoid overrunning the length (in the ctr) to
+ prevent these speculative loads from causing a segfault. In this
+ case the loop will exit early (before the all pending bytes are
+ tested. In this case we must complete the pending operations
+ before returning. */
+L(b1i):
+ bne- cr0, L(bLcr0)
+ bne- cr1, L(bLcr1)
+ b L(bx56)
+ .align 4
+L(b2i):
+ bne- cr6, L(bLcr6)
+ bne- cr0, L(bLcr0)
+ b L(bx34)
+ .align 4
+L(b3i):
+ bne- cr1, L(bLcr1)
+ bne- cr6, L(bLcr6)
+ b L(bx12)
+ .align 4
+L(bLcr0):
+ li rRTN, 1
+ bgtlr cr0
+ li rRTN, -1
+ blr
+L(bLcr1):
+ li rRTN, 1
+ bgtlr cr1
+ li rRTN, -1
+ blr
+L(bLcr6):
+ li rRTN, 1
+ bgtlr cr6
+ li rRTN, -1
+ blr
+
+L(b13):
+ bne- cr0, L(bx12)
+ bne- cr1, L(bx34)
+L(bx56):
+ sub rRTN, rWORD5, rWORD6
+ blr
+ nop
+L(b12):
+ bne- cr0, L(bx12)
+L(bx34):
+ sub rRTN, rWORD3, rWORD4
+ blr
+
+L(b11):
+L(bx12):
+ sub rRTN, rWORD1, rWORD2
+ blr
+
+ .align 4
+L(zeroLengthReturn):
+
+L(zeroLength):
+ li rRTN, 0
+ blr
+
+ cfi_adjust_cfa_offset(64)
+ .align 4
+/* At this point we know the strings have different alignment and the
+ compare length is at least 8 bytes. rBITDIF contains the low order
+ 2 bits of rSTR1 and cr5 contains the result of the logical compare
+ of rBITDIF to 0. If rBITDIF == 0 then rStr1 is word aligned and can
+ perform the Wunaligned loop.
+
+ Otherwise we know that rSTR1 is not aready word aligned yet.
+ So we can force the string addresses to the next lower word
+ boundary and special case this first word using shift left to
+ eliminate bits preceeding the first byte. Since we want to join the
+ normal (Wualigned) compare loop, starting at the second word,
+ we need to adjust the length (rN) and special case the loop
+ versioning for the first W. This insures that the loop count is
+ correct and the first W (shifted) is in the expected resister pair. */
+#define rSHL r29 /* Unaligned shift left count. */
+#define rSHR r28 /* Unaligned shift right count. */
+#define rB r27 /* Left rotation temp for rWORD2. */
+#define rD r26 /* Left rotation temp for rWORD4. */
+#define rF r25 /* Left rotation temp for rWORD6. */
+#define rH r24 /* Left rotation temp for rWORD8. */
+#define rA r0 /* Right rotation temp for rWORD2. */
+#define rC r12 /* Right rotation temp for rWORD4. */
+#define rE r0 /* Right rotation temp for rWORD6. */
+#define rG r12 /* Right rotation temp for rWORD8. */
+L(unaligned):
+ stw r29,40(r1)
+ cfi_offset(r29,(40-64))
+ clrlwi rSHL, rSTR2, 30
+ stw r28,36(r1)
+ cfi_offset(r28,(36-64))
+ beq cr5, L(Wunaligned)
+ stw r27,32(r1)
+ cfi_offset(r27,(32-64))
+/* Adjust the logical start of rSTR2 to compensate for the extra bits
+ in the 1st rSTR1 W. */
+ sub r27, rSTR2, rBITDIF
+/* But do not attempt to address the W before that W that contains
+ the actual start of rSTR2. */
+ clrrwi rSTR2, rSTR2, 2
+ stw r26,28(r1)
+ cfi_offset(r26,(28-64))
+/* Compute the left/right shift counts for the unalign rSTR2,
+ compensating for the logical (W aligned) start of rSTR1. */
+ clrlwi rSHL, r27, 30
+ clrrwi rSTR1, rSTR1, 2
+ stw r25,24(r1)
+ cfi_offset(r25,(24-64))
+ slwi rSHL, rSHL, 3
+ cmplw cr5, r27, rSTR2
+ add rN, rN, rBITDIF
+ slwi r11, rBITDIF, 3
+ stw r24,20(r1)
+ cfi_offset(r24,(20-64))
+ subfic rSHR, rSHL, 32
+ srwi rTMP, rN, 4 /* Divide by 16 */
+ andi. rBITDIF, rN, 12 /* Get the W remainder */
+/* We normally need to load 2 Ws to start the unaligned rSTR2, but in
+ this special case those bits may be discarded anyway. Also we
+ must avoid loading a W where none of the bits are part of rSTR2 as
+ this may cross a page boundary and cause a page fault. */
+ li rWORD8, 0
+ blt cr5, L(dus0)
+ lwz rWORD8, 0(rSTR2)
+ la rSTR2, 4(rSTR2)
+ slw rWORD8, rWORD8, rSHL
+
+L(dus0):
+ lwz rWORD1, 0(rSTR1)
+ lwz rWORD2, 0(rSTR2)
+ cmplwi cr1, rBITDIF, 8
+ cmplwi cr7, rN, 16
+ srw rG, rWORD2, rSHR
+ clrlwi rN, rN, 30
+ beq L(duPs4)
+ mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+ or rWORD8, rG, rWORD8
+ bgt cr1, L(duPs3)
+ beq cr1, L(duPs2)
+
+/* Remainder is 4 */
+ .align 4
+L(dusP1):
+ slw rB, rWORD2, rSHL
+ slw rWORD7, rWORD1, r11
+ slw rWORD8, rWORD8, r11
+ bge cr7, L(duP1e)
+/* At this point we exit early with the first word compare
+ complete and remainder of 0 to 3 bytes. See L(du14) for details on
+ how we handle the remaining bytes. */
+ cmplw cr5, rWORD7, rWORD8
+ slwi. rN, rN, 3
+ bne cr5, L(duLcr5)
+ cmplw cr7, rN, rSHR
+ beq L(duZeroReturn)
+ li rA, 0
+ ble cr7, L(dutrim)
+ lwz rWORD2, 4(rSTR2)
+ srw rA, rWORD2, rSHR
+ b L(dutrim)
+/* Remainder is 8 */
+ .align 4
+L(duPs2):
+ slw rH, rWORD2, rSHL
+ slw rWORD5, rWORD1, r11
+ slw rWORD6, rWORD8, r11
+ b L(duP2e)
+/* Remainder is 12 */
+ .align 4
+L(duPs3):
+ slw rF, rWORD2, rSHL
+ slw rWORD3, rWORD1, r11
+ slw rWORD4, rWORD8, r11
+ b L(duP3e)
+/* Count is a multiple of 16, remainder is 0 */
+ .align 4
+L(duPs4):
+ mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+ or rWORD8, rG, rWORD8
+ slw rD, rWORD2, rSHL
+ slw rWORD1, rWORD1, r11
+ slw rWORD2, rWORD8, r11
+ b L(duP4e)
+
+/* At this point we know rSTR1 is word aligned and the
+ compare length is at least 8 bytes. */
+ .align 4
+L(Wunaligned):
+ stw r27,32(r1)
+ cfi_offset(r27,(32-64))
+ clrrwi rSTR2, rSTR2, 2
+ stw r26,28(r1)
+ cfi_offset(r26,(28-64))
+ srwi rTMP, rN, 4 /* Divide by 16 */
+ stw r25,24(r1)
+ cfi_offset(r25,(24-64))
+ andi. rBITDIF, rN, 12 /* Get the W remainder */
+ stw r24,20(r1)
+ cfi_offset(r24,(24-64))
+ slwi rSHL, rSHL, 3
+ lwz rWORD6, 0(rSTR2)
+ lwzu rWORD8, 4(rSTR2)
+ cmplwi cr1, rBITDIF, 8
+ cmplwi cr7, rN, 16
+ clrlwi rN, rN, 30
+ subfic rSHR, rSHL, 32
+ slw rH, rWORD6, rSHL
+ beq L(duP4)
+ mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+ bgt cr1, L(duP3)
+ beq cr1, L(duP2)
+
+/* Remainder is 4 */
+ .align 4
+L(duP1):
+ srw rG, rWORD8, rSHR
+ lwz rWORD7, 0(rSTR1)
+ slw rB, rWORD8, rSHL
+ or rWORD8, rG, rH
+ blt cr7, L(duP1x)
+L(duP1e):
+ lwz rWORD1, 4(rSTR1)
+ lwz rWORD2, 4(rSTR2)
+ cmplw cr5, rWORD7, rWORD8
+ srw rA, rWORD2, rSHR
+ slw rD, rWORD2, rSHL
+ or rWORD2, rA, rB
+ lwz rWORD3, 8(rSTR1)
+ lwz rWORD4, 8(rSTR2)
+ cmplw cr0, rWORD1, rWORD2
+ srw rC, rWORD4, rSHR
+ slw rF, rWORD4, rSHL
+ bne cr5, L(duLcr5)
+ or rWORD4, rC, rD
+ lwz rWORD5, 12(rSTR1)
+ lwz rWORD6, 12(rSTR2)
+ cmplw cr1, rWORD3, rWORD4
+ srw rE, rWORD6, rSHR
+ slw rH, rWORD6, rSHL
+ bne cr0, L(duLcr0)
+ or rWORD6, rE, rF
+ cmplw cr6, rWORD5, rWORD6
+ b L(duLoop3)
+ .align 4
+/* At this point we exit early with the first word compare
+ complete and remainder of 0 to 3 bytes. See L(du14) for details on
+ how we handle the remaining bytes. */
+L(duP1x):
+ cmplw cr5, rWORD7, rWORD8
+ slwi. rN, rN, 3
+ bne cr5, L(duLcr5)
+ cmplw cr7, rN, rSHR
+ beq L(duZeroReturn)
+ li rA, 0
+ ble cr7, L(dutrim)
+ ld rWORD2, 8(rSTR2)
+ srw rA, rWORD2, rSHR
+ b L(dutrim)
+/* Remainder is 8 */
+ .align 4
+L(duP2):
+ srw rE, rWORD8, rSHR
+ lwz rWORD5, 0(rSTR1)
+ or rWORD6, rE, rH
+ slw rH, rWORD8, rSHL
+L(duP2e):
+ lwz rWORD7, 4(rSTR1)
+ lwz rWORD8, 4(rSTR2)
+ cmplw cr6, rWORD5, rWORD6
+ srw rG, rWORD8, rSHR
+ slw rB, rWORD8, rSHL
+ or rWORD8, rG, rH
+ blt cr7, L(duP2x)
+ lwz rWORD1, 8(rSTR1)
+ lwz rWORD2, 8(rSTR2)
+ cmplw cr5, rWORD7, rWORD8
+ bne cr6, L(duLcr6)
+ srw rA, rWORD2, rSHR
+ slw rD, rWORD2, rSHL
+ or rWORD2, rA, rB
+ lwz rWORD3, 12(rSTR1)
+ lwz rWORD4, 12(rSTR2)
+ cmplw cr0, rWORD1, rWORD2
+ bne cr5, L(duLcr5)
+ srw rC, rWORD4, rSHR
+ slw rF, rWORD4, rSHL
+ or rWORD4, rC, rD
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+ cmplw cr1, rWORD3, rWORD4
+ b L(duLoop2)
+ .align 4
+L(duP2x):
+ cmplw cr5, rWORD7, rWORD8
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+ bne cr6, L(duLcr6)
+ slwi. rN, rN, 3
+ bne cr5, L(duLcr5)
+ cmplw cr7, rN, rSHR
+ beq L(duZeroReturn)
+ li rA, 0
+ ble cr7, L(dutrim)
+ lwz rWORD2, 4(rSTR2)
+ srw rA, rWORD2, rSHR
+ b L(dutrim)
+
+/* Remainder is 12 */
+ .align 4
+L(duP3):
+ srw rC, rWORD8, rSHR
+ lwz rWORD3, 0(rSTR1)
+ slw rF, rWORD8, rSHL
+ or rWORD4, rC, rH
+L(duP3e):
+ lwz rWORD5, 4(rSTR1)
+ lwz rWORD6, 4(rSTR2)
+ cmplw cr1, rWORD3, rWORD4
+ srw rE, rWORD6, rSHR
+ slw rH, rWORD6, rSHL
+ or rWORD6, rE, rF
+ lwz rWORD7, 8(rSTR1)
+ lwz rWORD8, 8(rSTR2)
+ cmplw cr6, rWORD5, rWORD6
+ bne cr1, L(duLcr1)
+ srw rG, rWORD8, rSHR
+ slw rB, rWORD8, rSHL
+ or rWORD8, rG, rH
+ blt cr7, L(duP3x)
+ lwz rWORD1, 12(rSTR1)
+ lwz rWORD2, 12(rSTR2)
+ cmplw cr5, rWORD7, rWORD8
+ bne cr6, L(duLcr6)
+ srw rA, rWORD2, rSHR
+ slw rD, rWORD2, rSHL
+ or rWORD2, rA, rB
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+ cmplw cr0, rWORD1, rWORD2
+ b L(duLoop1)
+ .align 4
+L(duP3x):
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+ bne cr1, L(duLcr1)
+ cmplw cr5, rWORD7, rWORD8
+ bne cr6, L(duLcr6)
+ slwi. rN, rN, 3
+ bne cr5, L(duLcr5)
+ cmplw cr7, rN, rSHR
+ beq L(duZeroReturn)
+ li rA, 0
+ ble cr7, L(dutrim)
+ lwz rWORD2, 4(rSTR2)
+ srw rA, rWORD2, rSHR
+ b L(dutrim)
+
+/* Count is a multiple of 16, remainder is 0 */
+ .align 4
+L(duP4):
+ mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+ srw rA, rWORD8, rSHR
+ lwz rWORD1, 0(rSTR1)
+ slw rD, rWORD8, rSHL
+ or rWORD2, rA, rH
+L(duP4e):
+ lwz rWORD3, 4(rSTR1)
+ lwz rWORD4, 4(rSTR2)
+ cmplw cr0, rWORD1, rWORD2
+ srw rC, rWORD4, rSHR
+ slw rF, rWORD4, rSHL
+ or rWORD4, rC, rD
+ lwz rWORD5, 8(rSTR1)
+ lwz rWORD6, 8(rSTR2)
+ cmplw cr1, rWORD3, rWORD4
+ bne cr0, L(duLcr0)
+ srw rE, rWORD6, rSHR
+ slw rH, rWORD6, rSHL
+ or rWORD6, rE, rF
+ lwzu rWORD7, 12(rSTR1)
+ lwzu rWORD8, 12(rSTR2)
+ cmplw cr6, rWORD5, rWORD6
+ bne cr1, L(duLcr1)
+ srw rG, rWORD8, rSHR
+ slw rB, rWORD8, rSHL
+ or rWORD8, rG, rH
+ cmplw cr5, rWORD7, rWORD8
+ bdz- L(du24) /* Adjust CTR as we start with +4 */
+/* This is the primary loop */
+ .align 4
+L(duLoop):
+ lwz rWORD1, 4(rSTR1)
+ lwz rWORD2, 4(rSTR2)
+ cmplw cr1, rWORD3, rWORD4
+ bne cr6, L(duLcr6)
+ srw rA, rWORD2, rSHR
+ slw rD, rWORD2, rSHL
+ or rWORD2, rA, rB
+L(duLoop1):
+ lwz rWORD3, 8(rSTR1)
+ lwz rWORD4, 8(rSTR2)
+ cmplw cr6, rWORD5, rWORD6
+ bne cr5, L(duLcr5)
+ srw rC, rWORD4, rSHR
+ slw rF, rWORD4, rSHL
+ or rWORD4, rC, rD
+L(duLoop2):
+ lwz rWORD5, 12(rSTR1)
+ lwz rWORD6, 12(rSTR2)
+ cmplw cr5, rWORD7, rWORD8
+ bne cr0, L(duLcr0)
+ srw rE, rWORD6, rSHR
+ slw rH, rWORD6, rSHL
+ or rWORD6, rE, rF
+L(duLoop3):
+ lwzu rWORD7, 16(rSTR1)
+ lwzu rWORD8, 16(rSTR2)
+ cmplw cr0, rWORD1, rWORD2
+ bne- cr1, L(duLcr1)
+ srw rG, rWORD8, rSHR
+ slw rB, rWORD8, rSHL
+ or rWORD8, rG, rH
+ bdnz+ L(duLoop)
+
+L(duL4):
+ bne cr1, L(duLcr1)
+ cmplw cr1, rWORD3, rWORD4
+ bne cr6, L(duLcr6)
+ cmplw cr6, rWORD5, rWORD6
+ bne cr5, L(duLcr5)
+ cmplw cr5, rWORD7, rWORD8
+L(du44):
+ bne cr0, L(duLcr0)
+L(du34):
+ bne cr1, L(duLcr1)
+L(du24):
+ bne cr6, L(duLcr6)
+L(du14):
+ slwi. rN, rN, 3
+ bne cr5, L(duLcr5)
+/* At this point we have a remainder of 1 to 3 bytes to compare. We use
+ shift right to eliminate bits beyond the compare length.
+
+ However it may not be safe to load rWORD2 which may be beyond the
+ string length. So we compare the bit length of the remainder to
+ the right shift count (rSHR). If the bit count is less than or equal
+ we do not need to load rWORD2 (all significant bits are already in
+ rB). */
+ cmplw cr7, rN, rSHR
+ beq L(duZeroReturn)
+ li rA, 0
+ ble cr7, L(dutrim)
+ lwz rWORD2, 4(rSTR2)
+ srw rA, rWORD2, rSHR
+ .align 4
+L(dutrim):
+ lwz rWORD1, 4(rSTR1)
+ lwz r31,48(1)
+ subfic rN, rN, 32 /* Shift count is 32 - (rN * 8). */
+ or rWORD2, rA, rB
+ lwz r30,44(1)
+ lwz r29,40(r1)
+ srw rWORD1, rWORD1, rN
+ srw rWORD2, rWORD2, rN
+ lwz r28,36(r1)
+ lwz r27,32(r1)
+ cmplw rWORD1,rWORD2
+ li rRTN,0
+ beq L(dureturn26)
+ li rRTN,1
+ bgt L(dureturn26)
+ li rRTN,-1
+ b L(dureturn26)
+ .align 4
+L(duLcr0):
+ lwz r31,48(1)
+ lwz r30,44(1)
+ li rRTN, 1
+ bgt cr0, L(dureturn29)
+ lwz r29,40(r1)
+ lwz r28,36(r1)
+ li rRTN, -1
+ b L(dureturn27)
+ .align 4
+L(duLcr1):
+ lwz r31,48(1)
+ lwz r30,44(1)
+ li rRTN, 1
+ bgt cr1, L(dureturn29)
+ lwz r29,40(r1)
+ lwz r28,36(r1)
+ li rRTN, -1
+ b L(dureturn27)
+ .align 4
+L(duLcr6):
+ lwz r31,48(1)
+ lwz r30,44(1)
+ li rRTN, 1
+ bgt cr6, L(dureturn29)
+ lwz r29,40(r1)
+ lwz r28,36(r1)
+ li rRTN, -1
+ b L(dureturn27)
+ .align 4
+L(duLcr5):
+ lwz r31,48(1)
+ lwz r30,44(1)
+ li rRTN, 1
+ bgt cr5, L(dureturn29)
+ lwz r29,40(r1)
+ lwz r28,36(r1)
+ li rRTN, -1
+ b L(dureturn27)
+ .align 3
+L(duZeroReturn):
+ li rRTN,0
+ .align 4
+L(dureturn):
+ lwz r31,48(1)
+ lwz r30,44(1)
+L(dureturn29):
+ lwz r29,40(r1)
+ lwz r28,36(r1)
+L(dureturn27):
+ lwz r27,32(r1)
+L(dureturn26):
+ lwz r26,28(r1)
+L(dureturn25):
+ lwz r25,24(r1)
+ lwz r24,20(r1)
+ lwz 1,0(1)
+ blr
+END (BP_SYM (memcmp))
+
+libc_hidden_builtin_def (memcmp)
+weak_alias (memcmp, bcmp)
diff --git a/sysdeps/powerpc/powerpc32/power4/memcopy.h b/sysdeps/powerpc/powerpc32/power4/memcopy.h
new file mode 100644
index 0000000000..c05208da55
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/memcopy.h
@@ -0,0 +1,113 @@
+/* memcopy.h -- definitions for memory copy functions. Generic C version.
+ Copyright (C) 1991, 1992, 1993, 1997, 2004, 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Torbjorn Granlund (tege@sics.se).
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/* The strategy of the memory functions is:
+
+ 1. Copy bytes until the destination pointer is aligned.
+
+ 2. Copy words in unrolled loops. If the source and destination
+ are not aligned in the same way, use word memory operations,
+ but shift and merge two read words before writing.
+
+ 3. Copy the few remaining bytes.
+
+ This is fast on processors that have at least 10 registers for
+ allocation by GCC, and that can access memory at reg+const in one
+ instruction.
+
+ I made an "exhaustive" test of this memmove when I wrote it,
+ exhaustive in the sense that I tried all alignment and length
+ combinations, with and without overlap. */
+
+#include <sysdeps/generic/memcopy.h>
+
+/* The macros defined in this file are:
+
+ BYTE_COPY_FWD(dst_beg_ptr, src_beg_ptr, nbytes_to_copy)
+
+ BYTE_COPY_BWD(dst_end_ptr, src_end_ptr, nbytes_to_copy)
+
+ WORD_COPY_FWD(dst_beg_ptr, src_beg_ptr, nbytes_remaining, nbytes_to_copy)
+
+ WORD_COPY_BWD(dst_end_ptr, src_end_ptr, nbytes_remaining, nbytes_to_copy)
+
+ MERGE(old_word, sh_1, new_word, sh_2)
+ [I fail to understand. I feel stupid. --roland]
+*/
+
+
+/* Threshold value for when to enter the unrolled loops. */
+#undef OP_T_THRES
+#define OP_T_THRES 16
+
+/* Copy exactly NBYTES bytes from SRC_BP to DST_BP,
+ without any assumptions about alignment of the pointers. */
+#undef BYTE_COPY_FWD
+#define BYTE_COPY_FWD(dst_bp, src_bp, nbytes) \
+ do \
+ { \
+ size_t __nbytes = (nbytes); \
+ if (__nbytes & 1) \
+ { \
+ ((byte *) dst_bp)[0] = ((byte *) src_bp)[0]; \
+ src_bp += 1; \
+ dst_bp += 1; \
+ __nbytes -= 1; \
+ } \
+ while (__nbytes > 0) \
+ { \
+ byte __x = ((byte *) src_bp)[0]; \
+ byte __y = ((byte *) src_bp)[1]; \
+ src_bp += 2; \
+ __nbytes -= 2; \
+ ((byte *) dst_bp)[0] = __x; \
+ ((byte *) dst_bp)[1] = __y; \
+ dst_bp += 2; \
+ } \
+ } while (0)
+
+/* Copy exactly NBYTES_TO_COPY bytes from SRC_END_PTR to DST_END_PTR,
+ beginning at the bytes right before the pointers and continuing towards
+ smaller addresses. Don't assume anything about alignment of the
+ pointers. */
+#undef BYTE_COPY_BWD
+#define BYTE_COPY_BWD(dst_ep, src_ep, nbytes) \
+ do \
+ { \
+ size_t __nbytes = (nbytes); \
+ if (__nbytes & 1) \
+ { \
+ src_ep -= 1; \
+ dst_ep -= 1; \
+ ((byte *) dst_ep)[0] = ((byte *) src_ep)[0]; \
+ __nbytes -= 1; \
+ } \
+ while (__nbytes > 0) \
+ { \
+ byte __x, __y; \
+ src_ep -= 2; \
+ __y = ((byte *) src_ep)[1]; \
+ __x = ((byte *) src_ep)[0]; \
+ dst_ep -= 2; \
+ __nbytes -= 2; \
+ ((byte *) dst_ep)[1] = __y; \
+ ((byte *) dst_ep)[0] = __x; \
+ } \
+ } while (0)
diff --git a/sysdeps/powerpc/powerpc32/power4/memcpy.S b/sysdeps/powerpc/powerpc32/power4/memcpy.S
new file mode 100644
index 0000000000..73020c6da8
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/memcpy.S
@@ -0,0 +1,426 @@
+/* Optimized memcpy implementation for PowerPC32 on PowerPC64.
+ Copyright (C) 2003, 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+ 02110-1301 USA. */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]);
+ Returns 'dst'.
+
+ Memcpy handles short copies (< 32-bytes) using a binary move blocks
+ (no loops) of lwz/stw. The tail (remaining 1-3) bytes is handled
+ with the appropriate combination of byte and halfword load/stores.
+ There is minimal effort to optimize the alignment of short moves.
+
+ Longer moves (>= 32-bytes) justify the effort to get at least the
+ destination word (4-byte) aligned. Further optimization is
+ possible when both source and destination are word aligned.
+ Each case has an optimized unrolled loop. */
+
+ .machine power4
+EALIGN (BP_SYM (memcpy), 5, 0)
+ CALL_MCOUNT
+
+ stwu 1,-32(1)
+ cfi_adjust_cfa_offset(32)
+ stw 30,20(1)
+ cfi_offset(30,(20-32))
+ mr 30,3
+ cmplwi cr1,5,31
+ stw 31,24(1)
+ cfi_offset(31,(24-32))
+ neg 0,3
+ andi. 11,3,3 /* check alignment of dst. */
+ clrlwi 0,0,30 /* Number of bytes until the 1st word of dst. */
+ clrlwi 10,4,30 /* check alignment of src. */
+ cmplwi cr6,5,8
+ ble- cr1,.L2 /* If move < 32 bytes use short move code. */
+ cmplw cr6,10,11
+ mr 12,4
+ srwi 9,5,2 /* Number of full words remaining. */
+ mtcrf 0x01,0
+ mr 31,5
+ beq .L0
+
+ subf 31,0,5
+ /* Move 0-3 bytes as needed to get the destination word aligned. */
+1: bf 31,2f
+ lbz 6,0(12)
+ addi 12,12,1
+ stb 6,0(3)
+ addi 3,3,1
+2: bf 30,0f
+ lhz 6,0(12)
+ addi 12,12,2
+ sth 6,0(3)
+ addi 3,3,2
+0:
+ clrlwi 10,12,30 /* check alignment of src again. */
+ srwi 9,31,2 /* Number of full words remaining. */
+
+ /* Copy words from source to destination, assuming the destination is
+ aligned on a word boundary.
+
+ At this point we know there are at least 25 bytes left (32-7) to copy.
+ The next step is to determine if the source is also word aligned.
+ If not branch to the unaligned move code at .L6. which uses
+ a load, shift, store strategy.
+
+ Otherwise source and destination are word aligned, and we can use
+ the optimized word copy loop. */
+.L0:
+ clrlwi 11,31,30 /* calculate the number of tail bytes */
+ mtcrf 0x01,9
+ bne- cr6,.L6 /* If source is not word aligned. */
+
+ /* Move words where destination and source are word aligned.
+ Use an unrolled loop to copy 4 words (16-bytes) per iteration.
+ If the the copy is not an exact multiple of 16 bytes, 1-3
+ words are copied as needed to set up the main loop. After
+ the main loop exits there may be a tail of 1-3 bytes. These bytes are
+ copied a halfword/byte at a time as needed to preserve alignment. */
+
+ srwi 8,31,4 /* calculate the 16 byte loop count */
+ cmplwi cr1,9,4
+ cmplwi cr6,11,0
+ mr 11,12
+
+ bf 30,1f
+ lwz 6,0(12)
+ lwz 7,4(12)
+ addi 11,12,8
+ mtctr 8
+ stw 6,0(3)
+ stw 7,4(3)
+ addi 10,3,8
+ bf 31,4f
+ lwz 0,8(12)
+ stw 0,8(3)
+ blt cr1,3f
+ addi 11,12,12
+ addi 10,3,12
+ b 4f
+ .align 4
+1:
+ mr 10,3
+ mtctr 8
+ bf 31,4f
+ lwz 6,0(12)
+ addi 11,12,4
+ stw 6,0(3)
+ addi 10,3,4
+
+ .align 4
+4:
+ lwz 6,0(11)
+ lwz 7,4(11)
+ lwz 8,8(11)
+ lwz 0,12(11)
+ stw 6,0(10)
+ stw 7,4(10)
+ stw 8,8(10)
+ stw 0,12(10)
+ addi 11,11,16
+ addi 10,10,16
+ bdnz 4b
+3:
+ clrrwi 0,31,2
+ mtcrf 0x01,31
+ beq cr6,0f
+.L9:
+ add 3,3,0
+ add 12,12,0
+
+/* At this point we have a tail of 0-3 bytes and we know that the
+ destination is word aligned. */
+2: bf 30,1f
+ lhz 6,0(12)
+ addi 12,12,2
+ sth 6,0(3)
+ addi 3,3,2
+1: bf 31,0f
+ lbz 6,0(12)
+ stb 6,0(3)
+0:
+ /* Return original dst pointer. */
+ mr 3,30
+ lwz 30,20(1)
+ lwz 31,24(1)
+ addi 1,1,32
+ blr
+
+/* Copy up to 31 bytes. This is divided into two cases 0-8 bytes and
+ 9-31 bytes. Each case is handled without loops, using binary
+ (1,2,4,8) tests.
+
+ In the short (0-8 byte) case no attempt is made to force alignment
+ of either source or destination. The hardware will handle the
+ unaligned load/stores with small delays for crossing 32- 64-byte, and
+ 4096-byte boundaries. Since these short moves are unlikely to be
+ unaligned or cross these boundaries, the overhead to force
+ alignment is not justified.
+
+ The longer (9-31 byte) move is more likely to cross 32- or 64-byte
+ boundaries. Since only loads are sensitive to the 32-/64-byte
+ boundaries it is more important to align the source than the
+ destination. If the source is not already word aligned, we first
+ move 1-3 bytes as needed. While the destination and stores may
+ still be unaligned, this is only an issue for page (4096 byte
+ boundary) crossing, which should be rare for these short moves.
+ The hardware handles this case automatically with a small delay. */
+
+ .align 4
+.L2:
+ mtcrf 0x01,5
+ neg 8,4
+ clrrwi 11,4,2
+ andi. 0,8,3
+ ble cr6,.LE8 /* Handle moves of 0-8 bytes. */
+/* At least 9 bytes left. Get the source word aligned. */
+ cmplwi cr1,5,16
+ mr 10,5
+ mr 12,4
+ cmplwi cr6,0,2
+ beq .L3 /* If the source is already word aligned skip this. */
+/* Copy 1-3 bytes to get source address word aligned. */
+ lwz 6,0(11)
+ subf 10,0,5
+ add 12,4,0
+ blt cr6,5f
+ srwi 7,6,16
+ bgt cr6,3f
+ sth 6,0(3)
+ b 7f
+ .align 4
+3:
+ stb 7,0(3)
+ sth 6,1(3)
+ b 7f
+ .align 4
+5:
+ stb 6,0(3)
+7:
+ cmplwi cr1,10,16
+ add 3,3,0
+ mtcrf 0x01,10
+ .align 4
+.L3:
+/* At least 6 bytes left and the source is word aligned. */
+ blt cr1,8f
+16: /* Move 16 bytes. */
+ lwz 6,0(12)
+ lwz 7,4(12)
+ stw 6,0(3)
+ lwz 6,8(12)
+ stw 7,4(3)
+ lwz 7,12(12)
+ addi 12,12,16
+ stw 6,8(3)
+ stw 7,12(3)
+ addi 3,3,16
+8: /* Move 8 bytes. */
+ bf 28,4f
+ lwz 6,0(12)
+ lwz 7,4(12)
+ addi 12,12,8
+ stw 6,0(3)
+ stw 7,4(3)
+ addi 3,3,8
+4: /* Move 4 bytes. */
+ bf 29,2f
+ lwz 6,0(12)
+ addi 12,12,4
+ stw 6,0(3)
+ addi 3,3,4
+2: /* Move 2-3 bytes. */
+ bf 30,1f
+ lhz 6,0(12)
+ sth 6,0(3)
+ bf 31,0f
+ lbz 7,2(12)
+ stb 7,2(3)
+ mr 3,30
+ lwz 30,20(1)
+ addi 1,1,32
+ blr
+1: /* Move 1 byte. */
+ bf 31,0f
+ lbz 6,0(12)
+ stb 6,0(3)
+0:
+ /* Return original dst pointer. */
+ mr 3,30
+ lwz 30,20(1)
+ addi 1,1,32
+ blr
+
+/* Special case to copy 0-8 bytes. */
+ .align 4
+.LE8:
+ mr 12,4
+ bne cr6,4f
+ lwz 6,0(4)
+ lwz 7,4(4)
+ stw 6,0(3)
+ stw 7,4(3)
+ /* Return original dst pointer. */
+ mr 3,30
+ lwz 30,20(1)
+ addi 1,1,32
+ blr
+ .align 4
+4: bf 29,2b
+ lwz 6,0(4)
+ stw 6,0(3)
+6:
+ bf 30,5f
+ lhz 7,4(4)
+ sth 7,4(3)
+ bf 31,0f
+ lbz 8,6(4)
+ stb 8,6(3)
+ mr 3,30
+ lwz 30,20(1)
+ addi 1,1,32
+ blr
+ .align 4
+5:
+ bf 31,0f
+ lbz 6,4(4)
+ stb 6,4(3)
+ .align 4
+0:
+ /* Return original dst pointer. */
+ mr 3,30
+ lwz 30,20(1)
+ addi 1,1,32
+ blr
+
+ .align 4
+.L6:
+
+ /* Copy words where the destination is aligned but the source is
+ not. Use aligned word loads from the source, shifted to realign
+ the data, to allow aligned destination stores.
+ Use an unrolled loop to copy 4 words (16-bytes) per iteration.
+ A single word is retained for storing at loop exit to avoid walking
+ off the end of a page within the loop.
+ If the copy is not an exact multiple of 16 bytes, 1-3
+ words are copied as needed to set up the main loop. After
+ the main loop exits there may be a tail of 1-3 bytes. These bytes are
+ copied a halfword/byte at a time as needed to preserve alignment. */
+
+
+ cmplwi cr6,11,0 /* are there tail bytes left ? */
+ subf 5,10,12 /* back up src pointer to prev word alignment */
+ slwi 10,10,3 /* calculate number of bits to shift 1st word left */
+ addi 11,9,-1 /* we move one word after the loop */
+ srwi 8,11,2 /* calculate the 16 byte loop count */
+ lwz 6,0(5) /* load 1st src word into R6 */
+ mr 4,3
+ lwz 7,4(5) /* load 2nd src word into R7 */
+ mtcrf 0x01,11
+ subfic 9,10,32 /* number of bits to shift 2nd word right */
+ mtctr 8
+ bf 30,1f
+
+ /* there are at least two words to copy, so copy them */
+ slw 0,6,10 /* shift 1st src word to left align it in R0 */
+ srw 8,7,9 /* shift 2nd src word to right align it in R8 */
+ or 0,0,8 /* or them to get word to store */
+ lwz 6,8(5) /* load the 3rd src word */
+ stw 0,0(4) /* store the 1st dst word */
+ slw 0,7,10 /* now left align 2nd src word into R0 */
+ srw 8,6,9 /* shift 3rd src word to right align it in R8 */
+ or 0,0,8 /* or them to get word to store */
+ lwz 7,12(5)
+ stw 0,4(4) /* store the 2nd dst word */
+ addi 4,4,8
+ addi 5,5,16
+ bf 31,4f
+ /* there is a third word to copy, so copy it */
+ slw 0,6,10 /* shift 3rd src word to left align it in R0 */
+ srw 8,7,9 /* shift 4th src word to right align it in R8 */
+ or 0,0,8 /* or them to get word to store */
+ stw 0,0(4) /* store 3rd dst word */
+ mr 6,7
+ lwz 7,0(5)
+ addi 5,5,4
+ addi 4,4,4
+ b 4f
+ .align 4
+1:
+ slw 0,6,10 /* shift 1st src word to left align it in R0 */
+ srw 8,7,9 /* shift 2nd src word to right align it in R8 */
+ addi 5,5,8
+ or 0,0,8 /* or them to get word to store */
+ bf 31,4f
+ mr 6,7
+ lwz 7,0(5)
+ addi 5,5,4
+ stw 0,0(4) /* store the 1st dst word */
+ addi 4,4,4
+
+ .align 4
+4:
+ /* copy 16 bytes at a time */
+ slw 0,6,10
+ srw 8,7,9
+ or 0,0,8
+ lwz 6,0(5)
+ stw 0,0(4)
+ slw 0,7,10
+ srw 8,6,9
+ or 0,0,8
+ lwz 7,4(5)
+ stw 0,4(4)
+ slw 0,6,10
+ srw 8,7,9
+ or 0,0,8
+ lwz 6,8(5)
+ stw 0,8(4)
+ slw 0,7,10
+ srw 8,6,9
+ or 0,0,8
+ lwz 7,12(5)
+ stw 0,12(4)
+ addi 5,5,16
+ addi 4,4,16
+ bdnz+ 4b
+8:
+ /* calculate and store the final word */
+ slw 0,6,10
+ srw 8,7,9
+ or 0,0,8
+ stw 0,0(4)
+3:
+ clrrwi 0,31,2
+ mtcrf 0x01,31
+ bne cr6,.L9 /* If the tail is 0 bytes we are done! */
+
+ /* Return original dst pointer. */
+ mr 3,30
+ lwz 30,20(1)
+ lwz 31,24(1)
+ addi 1,1,32
+ blr
+END (BP_SYM (memcpy))
+
+libc_hidden_builtin_def (memcpy)
diff --git a/sysdeps/powerpc/powerpc32/power4/memset.S b/sysdeps/powerpc/powerpc32/power4/memset.S
new file mode 100644
index 0000000000..5dd1d943cf
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/memset.S
@@ -0,0 +1,229 @@
+/* Optimized memset implementation for PowerPC64.
+ Copyright (C) 1997,99, 2000,02,03, 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+ 02110-1301 USA. */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
+ Returns 's'.
+
+ The memset is done in three sizes: byte (8 bits), word (32 bits),
+ cache line (1024 bits). There is a special case for setting cache lines
+ to 0, to take advantage of the dcbz instruction. */
+
+ .machine power4
+EALIGN (BP_SYM (memset), 5, 0)
+ CALL_MCOUNT
+
+#define rTMP r0
+#define rRTN r3 /* Initial value of 1st argument. */
+#define rMEMP0 r3 /* Original value of 1st arg. */
+#define rCHR r4 /* Char to set in each byte. */
+#define rLEN r5 /* Length of region to set. */
+#define rMEMP r6 /* Address at which we are storing. */
+#define rALIGN r7 /* Number of bytes we are setting now (when aligning). */
+#define rMEMP2 r8
+
+#define rNEG64 r8 /* Constant -64 for clearing with dcbz. */
+#define rCLS r8 /* Cache line size (known to be 128). */
+#define rCLM r9 /* Cache line size mask to check for cache alignment. */
+L(_memset):
+/* Take care of case for size <= 4. */
+ cmplwi cr1, rLEN, 4
+ andi. rALIGN, rMEMP0, 3
+ mr rMEMP, rMEMP0
+ ble- cr1, L(small)
+
+/* Align to word boundary. */
+ cmplwi cr5, rLEN, 31
+ rlwimi rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword. */
+ beq+ L(aligned)
+ mtcrf 0x01, rMEMP0
+ subfic rALIGN, rALIGN, 4
+ add rMEMP, rMEMP, rALIGN
+ sub rLEN, rLEN, rALIGN
+ bf+ 31, L(g0)
+ stb rCHR, 0(rMEMP0)
+ bt 30, L(aligned)
+L(g0):
+ sth rCHR, -2(rMEMP)
+
+/* Handle the case of size < 31. */
+L(aligned):
+ mtcrf 0x01, rLEN
+ rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */
+ ble cr5, L(medium)
+/* Align to 32-byte boundary. */
+ andi. rALIGN, rMEMP, 0x1C
+ subfic rALIGN, rALIGN, 0x20
+ beq L(caligned)
+ mtcrf 0x01, rALIGN
+ add rMEMP, rMEMP, rALIGN
+ sub rLEN, rLEN, rALIGN
+ cmplwi cr1, rALIGN, 0x10
+ mr rMEMP2, rMEMP
+ bf 28, L(a1)
+ stw rCHR, -4(rMEMP2)
+ stwu rCHR, -8(rMEMP2)
+L(a1): blt cr1, L(a2)
+ stw rCHR, -4(rMEMP2)
+ stw rCHR, -8(rMEMP2)
+ stw rCHR, -12(rMEMP2)
+ stwu rCHR, -16(rMEMP2)
+L(a2): bf 29, L(caligned)
+ stw rCHR, -4(rMEMP2)
+
+/* Now aligned to a 32 byte boundary. */
+L(caligned):
+ cmplwi cr1, rCHR, 0
+ clrrwi. rALIGN, rLEN, 5
+ mtcrf 0x01, rLEN
+ beq cr1, L(zloopstart) /* Special case for clearing memory using dcbz. */
+L(nondcbz):
+ srwi rTMP, rALIGN, 5
+ mtctr rTMP
+ beq L(medium) /* We may not actually get to do a full line. */
+ clrlwi. rLEN, rLEN, 27
+ add rMEMP, rMEMP, rALIGN
+ li rNEG64, -0x40
+ bdz L(cloopdone)
+
+ .align 4
+L(c3): dcbtst rNEG64, rMEMP
+ stw rCHR, -4(rMEMP)
+ stw rCHR, -8(rMEMP)
+ stw rCHR, -12(rMEMP)
+ stw rCHR, -16(rMEMP)
+ stw rCHR, -20(rMEMP)
+ stw rCHR, -24(rMEMP)
+ stw rCHR, -28(rMEMP)
+ stwu rCHR, -32(rMEMP)
+ bdnz L(c3)
+L(cloopdone):
+ stw rCHR, -4(rMEMP)
+ stw rCHR, -8(rMEMP)
+ stw rCHR, -12(rMEMP)
+ stw rCHR, -16(rMEMP)
+ cmplwi cr1, rLEN, 16
+ stw rCHR, -20(rMEMP)
+ stw rCHR, -24(rMEMP)
+ stw rCHR, -28(rMEMP)
+ stwu rCHR, -32(rMEMP)
+ beqlr
+ add rMEMP, rMEMP, rALIGN
+ b L(medium_tail2)
+
+ .align 5
+/* Clear lines of memory in 128-byte chunks. */
+L(zloopstart):
+/* If the remaining length is less the 32 bytes, don't bother getting
+ the cache line size. */
+ beq L(medium)
+ li rCLS,128 /* cache line size is 128 */
+ dcbt 0,rMEMP
+L(getCacheAligned):
+ cmplwi cr1,rLEN,32
+ andi. rTMP,rMEMP,127
+ blt cr1,L(handletail32)
+ beq L(cacheAligned)
+ addi rMEMP,rMEMP,32
+ addi rLEN,rLEN,-32
+ stw rCHR,-32(rMEMP)
+ stw rCHR,-28(rMEMP)
+ stw rCHR,-24(rMEMP)
+ stw rCHR,-20(rMEMP)
+ stw rCHR,-16(rMEMP)
+ stw rCHR,-12(rMEMP)
+ stw rCHR,-8(rMEMP)
+ stw rCHR,-4(rMEMP)
+ b L(getCacheAligned)
+
+/* Now we are aligned to the cache line and can use dcbz. */
+ .align 4
+L(cacheAligned):
+ cmplw cr1,rLEN,rCLS
+ blt cr1,L(handletail32)
+ dcbz 0,rMEMP
+ subf rLEN,rCLS,rLEN
+ add rMEMP,rMEMP,rCLS
+ b L(cacheAligned)
+
+/* We are here because the cache line size was set and the remainder
+ (rLEN) is less than the actual cache line size.
+ So set up the preconditions for L(nondcbz) and go there. */
+L(handletail32):
+ clrrwi. rALIGN, rLEN, 5
+ b L(nondcbz)
+
+ .align 5
+L(small):
+/* Memset of 4 bytes or less. */
+ cmplwi cr5, rLEN, 1
+ cmplwi cr1, rLEN, 3
+ bltlr cr5
+ stb rCHR, 0(rMEMP)
+ beqlr cr5
+ stb rCHR, 1(rMEMP)
+ bltlr cr1
+ stb rCHR, 2(rMEMP)
+ beqlr cr1
+ stb rCHR, 3(rMEMP)
+ blr
+
+/* Memset of 0-31 bytes. */
+ .align 5
+L(medium):
+ cmplwi cr1, rLEN, 16
+L(medium_tail2):
+ add rMEMP, rMEMP, rLEN
+L(medium_tail):
+ bt- 31, L(medium_31t)
+ bt- 30, L(medium_30t)
+L(medium_30f):
+ bt- 29, L(medium_29t)
+L(medium_29f):
+ bge- cr1, L(medium_27t)
+ bflr- 28
+ stw rCHR, -4(rMEMP)
+ stw rCHR, -8(rMEMP)
+ blr
+
+L(medium_31t):
+ stbu rCHR, -1(rMEMP)
+ bf- 30, L(medium_30f)
+L(medium_30t):
+ sthu rCHR, -2(rMEMP)
+ bf- 29, L(medium_29f)
+L(medium_29t):
+ stwu rCHR, -4(rMEMP)
+ blt- cr1, L(medium_27f)
+L(medium_27t):
+ stw rCHR, -4(rMEMP)
+ stw rCHR, -8(rMEMP)
+ stw rCHR, -12(rMEMP)
+ stwu rCHR, -16(rMEMP)
+L(medium_27f):
+ bflr- 28
+L(medium_28t):
+ stw rCHR, -4(rMEMP)
+ stw rCHR, -8(rMEMP)
+ blr
+END (BP_SYM (memset))
+libc_hidden_builtin_def (memset)
diff --git a/sysdeps/powerpc/powerpc32/power4/strncmp.S b/sysdeps/powerpc/powerpc32/power4/strncmp.S
new file mode 100644
index 0000000000..fc0835ebe0
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/strncmp.S
@@ -0,0 +1,176 @@
+/* Optimized strcmp implementation for PowerPC32.
+ Copyright (C) 2003, 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+ 02110-1301 USA. */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* See strlen.s for comments on how the end-of-string testing works. */
+
+/* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */
+
+EALIGN (BP_SYM(strncmp), 4, 0)
+
+#define rTMP r0
+#define rRTN r3
+#define rSTR1 r3 /* first string arg */
+#define rSTR2 r4 /* second string arg */
+#define rN r5 /* max string length */
+/* Note: The Bounded pointer support in this code is broken. This code
+ was inherited from PPC32 and and that support was never completed.
+ Current PPC gcc does not support -fbounds-check or -fbounded-pointers. */
+#define rWORD1 r6 /* current word in s1 */
+#define rWORD2 r7 /* current word in s2 */
+#define rWORD3 r10
+#define rWORD4 r11
+#define rFEFE r8 /* constant 0xfefefeff (-0x01010101) */
+#define r7F7F r9 /* constant 0x7f7f7f7f */
+#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f) */
+#define rBITDIF r11 /* bits that differ in s1 & s2 words */
+
+ dcbt 0,rSTR1
+ or rTMP, rSTR2, rSTR1
+ lis r7F7F, 0x7f7f
+ dcbt 0,rSTR2
+ clrlwi. rTMP, rTMP, 30
+ cmplwi cr1, rN, 0
+ lis rFEFE, -0x101
+ bne L(unaligned)
+/* We are word alligned so set up for two loops. first a word
+ loop, then fall into the byte loop if any residual. */
+ srwi. rTMP, rN, 2
+ clrlwi rN, rN, 30
+ addi rFEFE, rFEFE, -0x101
+ addi r7F7F, r7F7F, 0x7f7f
+ cmplwi cr1, rN, 0
+ beq L(unaligned)
+
+ mtctr rTMP /* Power4 wants mtctr 1st in dispatch group. */
+ lwz rWORD1, 0(rSTR1)
+ lwz rWORD2, 0(rSTR2)
+ b L(g1)
+
+L(g0):
+ lwzu rWORD1, 4(rSTR1)
+ bne- cr1, L(different)
+ lwzu rWORD2, 4(rSTR2)
+L(g1): add rTMP, rFEFE, rWORD1
+ nor rNEG, r7F7F, rWORD1
+ bdz L(tail)
+ and. rTMP, rTMP, rNEG
+ cmpw cr1, rWORD1, rWORD2
+ beq+ L(g0)
+
+/* OK. We've hit the end of the string. We need to be careful that
+ we don't compare two strings as different because of gunk beyond
+ the end of the strings... */
+
+L(endstring):
+ and rTMP, r7F7F, rWORD1
+ beq cr1, L(equal)
+ add rTMP, rTMP, r7F7F
+ xor. rBITDIF, rWORD1, rWORD2
+
+ andc rNEG, rNEG, rTMP
+ blt- L(highbit)
+ cntlzw rBITDIF, rBITDIF
+ cntlzw rNEG, rNEG
+ addi rNEG, rNEG, 7
+ cmpw cr1, rNEG, rBITDIF
+ sub rRTN, rWORD1, rWORD2
+ blt- cr1, L(equal)
+ srawi rRTN, rRTN, 31
+ ori rRTN, rRTN, 1
+ blr
+L(equal):
+ li rRTN, 0
+ blr
+
+L(different):
+ lwzu rWORD1, -4(rSTR1)
+ xor. rBITDIF, rWORD1, rWORD2
+ sub rRTN, rWORD1, rWORD2
+ blt- L(highbit)
+ srawi rRTN, rRTN, 31
+ ori rRTN, rRTN, 1
+ blr
+L(highbit):
+ srwi rWORD2, rWORD2, 24
+ srwi rWORD1, rWORD1, 24
+ sub rRTN, rWORD1, rWORD2
+ blr
+
+
+/* Oh well. In this case, we just do a byte-by-byte comparison. */
+ .align 4
+L(tail):
+ and. rTMP, rTMP, rNEG
+ cmpw cr1, rWORD1, rWORD2
+ bne- L(endstring)
+ addi rSTR1, rSTR1, 4
+ bne- cr1, L(different)
+ addi rSTR2, rSTR2, 4
+ cmplwi cr1, rN, 0
+L(unaligned):
+ mtctr rN /* Power4 wants mtctr 1st in dispatch group */
+ ble cr1, L(ux)
+L(uz):
+ lbz rWORD1, 0(rSTR1)
+ lbz rWORD2, 0(rSTR2)
+ .align 4
+L(u1):
+ cmpwi cr1, rWORD1, 0
+ bdz L(u4)
+ cmpw rWORD1, rWORD2
+ beq- cr1, L(u4)
+ lbzu rWORD3, 1(rSTR1)
+ lbzu rWORD4, 1(rSTR2)
+ bne- L(u4)
+ cmpwi cr1, rWORD3, 0
+ bdz L(u3)
+ cmpw rWORD3, rWORD4
+ beq- cr1, L(u3)
+ lbzu rWORD1, 1(rSTR1)
+ lbzu rWORD2, 1(rSTR2)
+ bne- L(u3)
+ cmpwi cr1, rWORD1, 0
+ bdz L(u4)
+ cmpw rWORD1, rWORD2
+ beq- cr1, L(u4)
+ lbzu rWORD3, 1(rSTR1)
+ lbzu rWORD4, 1(rSTR2)
+ bne- L(u4)
+ cmpwi cr1, rWORD3, 0
+ bdz L(u3)
+ cmpw rWORD3, rWORD4
+ beq- cr1, L(u3)
+ lbzu rWORD1, 1(rSTR1)
+ lbzu rWORD2, 1(rSTR2)
+ beq+ L(u1)
+
+L(u3): sub rRTN, rWORD3, rWORD4
+ blr
+L(u4): sub rRTN, rWORD1, rWORD2
+ blr
+L(ux):
+ li rRTN, 0
+ blr
+END (BP_SYM (strncmp))
+libc_hidden_builtin_def (strncmp)
+
diff --git a/sysdeps/powerpc/powerpc32/power4/wordcopy.c b/sysdeps/powerpc/powerpc32/power4/wordcopy.c
new file mode 100644
index 0000000000..f71b41dc42
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/wordcopy.c
@@ -0,0 +1,209 @@
+/* _memcopy.c -- subroutines for memory copy functions.
+ Copyright (C) 1991, 1996 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Torbjorn Granlund (tege@sics.se).
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */
+
+#include <stddef.h>
+#include <memcopy.h>
+
+/* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to
+ block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
+ Both SRCP and DSTP should be aligned for memory operations on `op_t's. */
+
+void
+_wordcopy_fwd_aligned (dstp, srcp, len)
+ long int dstp;
+ long int srcp;
+ size_t len;
+{
+ op_t a0, a1;
+
+ if (len & 1)
+ {
+ ((op_t *) dstp)[0] = ((op_t *) srcp)[0];
+
+ if (len == 1)
+ return;
+ srcp += OPSIZ;
+ dstp += OPSIZ;
+ len -= 1;
+ }
+
+ do
+ {
+ a0 = ((op_t *) srcp)[0];
+ a1 = ((op_t *) srcp)[1];
+ ((op_t *) dstp)[0] = a0;
+ ((op_t *) dstp)[1] = a1;
+
+ srcp += 2 * OPSIZ;
+ dstp += 2 * OPSIZ;
+ len -= 2;
+ }
+ while (len != 0);
+}
+
+/* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
+ block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
+ DSTP should be aligned for memory operations on `op_t's, but SRCP must
+ *not* be aligned. */
+
+void
+_wordcopy_fwd_dest_aligned (dstp, srcp, len)
+ long int dstp;
+ long int srcp;
+ size_t len;
+{
+ op_t a0, a1, a2;
+ int sh_1, sh_2;
+
+ /* Calculate how to shift a word read at the memory operation
+ aligned srcp to make it aligned for copy. */
+
+ sh_1 = 8 * (srcp % OPSIZ);
+ sh_2 = 8 * OPSIZ - sh_1;
+
+ /* Make SRCP aligned by rounding it down to the beginning of the `op_t'
+ it points in the middle of. */
+ srcp &= -OPSIZ;
+ a0 = ((op_t *) srcp)[0];
+
+ if (len & 1)
+ {
+ a1 = ((op_t *) srcp)[1];
+ ((op_t *) dstp)[0] = MERGE (a0, sh_1, a1, sh_2);
+
+ if (len == 1)
+ return;
+
+ a0 = a1;
+ srcp += OPSIZ;
+ dstp += OPSIZ;
+ len -= 1;
+ }
+
+ do
+ {
+ a1 = ((op_t *) srcp)[1];
+ a2 = ((op_t *) srcp)[2];
+ ((op_t *) dstp)[0] = MERGE (a0, sh_1, a1, sh_2);
+ ((op_t *) dstp)[1] = MERGE (a1, sh_1, a2, sh_2);
+ a0 = a2;
+
+ srcp += 2 * OPSIZ;
+ dstp += 2 * OPSIZ;
+ len -= 2;
+ }
+ while (len != 0);
+}
+
+/* _wordcopy_bwd_aligned -- Copy block finishing right before
+ SRCP to block finishing right before DSTP with LEN `op_t' words
+ (not LEN bytes!). Both SRCP and DSTP should be aligned for memory
+ operations on `op_t's. */
+
+void
+_wordcopy_bwd_aligned (dstp, srcp, len)
+ long int dstp;
+ long int srcp;
+ size_t len;
+{
+ op_t a0, a1;
+
+ if (len & 1)
+ {
+ srcp -= OPSIZ;
+ dstp -= OPSIZ;
+ ((op_t *) dstp)[0] = ((op_t *) srcp)[0];
+
+ if (len == 1)
+ return;
+ len -= 1;
+ }
+
+ do
+ {
+ srcp -= 2 * OPSIZ;
+ dstp -= 2 * OPSIZ;
+
+ a1 = ((op_t *) srcp)[1];
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[1] = a1;
+ ((op_t *) dstp)[0] = a0;
+
+ len -= 2;
+ }
+ while (len != 0);
+}
+
+/* _wordcopy_bwd_dest_aligned -- Copy block finishing right
+ before SRCP to block finishing right before DSTP with LEN `op_t'
+ words (not LEN bytes!). DSTP should be aligned for memory
+ operations on `op_t', but SRCP must *not* be aligned. */
+
+void
+_wordcopy_bwd_dest_aligned (dstp, srcp, len)
+ long int dstp;
+ long int srcp;
+ size_t len;
+{
+ op_t a0, a1, a2;
+ int sh_1, sh_2;
+
+ /* Calculate how to shift a word read at the memory operation
+ aligned srcp to make it aligned for copy. */
+
+ sh_1 = 8 * (srcp % OPSIZ);
+ sh_2 = 8 * OPSIZ - sh_1;
+
+ /* Make srcp aligned by rounding it down to the beginning of the op_t
+ it points in the middle of. */
+ srcp &= -OPSIZ;
+ a2 = ((op_t *) srcp)[0];
+
+ if (len & 1)
+ {
+ srcp -= OPSIZ;
+ dstp -= OPSIZ;
+ a1 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = MERGE (a1, sh_1, a2, sh_2);
+
+ if (len == 1)
+ return;
+
+ a2 = a1;
+ len -= 1;
+ }
+
+ do
+ {
+ srcp -= 2 * OPSIZ;
+ dstp -= 2 * OPSIZ;
+
+ a1 = ((op_t *) srcp)[1];
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[1] = MERGE (a1, sh_1, a2, sh_2);
+ ((op_t *) dstp)[0] = MERGE (a0, sh_1, a1, sh_2);
+ a2 = a0;
+
+ len -= 2;
+ }
+ while (len != 0);
+}
diff --git a/sysdeps/powerpc/powerpc32/power5+/Implies b/sysdeps/powerpc/powerpc32/power5+/Implies
new file mode 100644
index 0000000000..4e3a983426
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power5+/Implies
@@ -0,0 +1 @@
+powerpc/powerpc32/power4
diff --git a/sysdeps/powerpc/powerpc32/power5+/fpu/Implies b/sysdeps/powerpc/powerpc32/power5+/fpu/Implies
new file mode 100644
index 0000000000..7c16e45c29
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power5+/fpu/Implies
@@ -0,0 +1 @@
+powerpc/powerpc32/power4/fpu
diff --git a/sysdeps/powerpc/powerpc32/power5+/fpu/s_ceil.S b/sysdeps/powerpc/powerpc32/power5+/fpu/s_ceil.S
new file mode 100644
index 0000000000..99cd6cc969
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power5+/fpu/s_ceil.S
@@ -0,0 +1,37 @@
+/* ceil function. PowerPC32/power5+ version.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+ .machine "power5"
+EALIGN (__ceil, 4, 0)
+ frip fp1, fp1
+ blr
+ END (__ceil)
+
+weak_alias (__ceil, ceil)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__ceil, ceill)
+strong_alias (__ceil, __ceill)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0)
+compat_symbol (libm, __ceil, ceill, GLIBC_2_0)
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power5+/fpu/s_ceilf.S b/sysdeps/powerpc/powerpc32/power5+/fpu/s_ceilf.S
new file mode 100644
index 0000000000..0a844b6f47
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power5+/fpu/s_ceilf.S
@@ -0,0 +1,30 @@
+/* ceilf function. PowerPC32/power5+ version.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+
+ .machine "power5"
+EALIGN (__ceilf, 4, 0)
+ frip fp1, fp1 /* The rounding instructions are double. */
+ frsp fp1, fp1 /* But we need to set ooverflow for float. */
+ blr
+ END (__ceilf)
+
+weak_alias (__ceilf, ceilf)
+
diff --git a/sysdeps/powerpc/powerpc32/power5+/fpu/s_floor.S b/sysdeps/powerpc/powerpc32/power5+/fpu/s_floor.S
new file mode 100644
index 0000000000..3b1d26f9fc
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power5+/fpu/s_floor.S
@@ -0,0 +1,37 @@
+/* floor function. PowerPC32/power5+ version.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+ .machine "power5"
+EALIGN (__floor, 4, 0)
+ frim fp1, fp1
+ blr
+ END (__floor)
+
+weak_alias (__floor, floor)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__floor, floorl)
+strong_alias (__floor, __floorl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0)
+compat_symbol (libm, __floor, floorl, GLIBC_2_0)
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power5+/fpu/s_floorf.S b/sysdeps/powerpc/powerpc32/power5+/fpu/s_floorf.S
new file mode 100644
index 0000000000..640140c334
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power5+/fpu/s_floorf.S
@@ -0,0 +1,30 @@
+/* floorf function. PowerPC32/power5+ version.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+
+ .machine "power5"
+EALIGN (__floorf, 4, 0)
+ frim fp1, fp1 /* The rounding instructions are double. */
+ frsp fp1, fp1 /* But we need to set ooverflow for float. */
+ blr
+ END (__floorf)
+
+weak_alias (__floorf, floorf)
+
diff --git a/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S b/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S
new file mode 100644
index 0000000000..a2171fe09a
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S
@@ -0,0 +1,60 @@
+/* lround function. POWER5+, PowerPC32 version.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 1 Franklin Street, Fifth Floor, Boston MA
+ 02110-1301 USA. */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+/* long [r3] llround (float x [fp1])
+ IEEE 1003.1 lround function. IEEE specifies "round to the nearest
+ integer value, rounding halfway cases away from zero, regardless of
+ the current rounding mode." However PowerPC Architecture defines
+ "round to Nearest" as "Choose the best approximation. In case of a
+ tie, choose the one that is even (least significant bit o).".
+ So we pre-round using the V2.02 Floating Round to Integer Nearest
+ instruction before we use the Floating Convert to Integer Word with
+ round to zero instruction. */
+
+ .machine "power5"
+ENTRY (__llround)
+ stwu r1,-16(r1)
+ cfi_adjust_cfa_offset (16)
+ frin fp2,fp1
+ fctidz fp3,fp2 /* Convert To Integer Word lround toward 0. */
+ stfd fp3,8(r1)
+ nop /* Ensure the following load is in a different dispatch */
+ nop /* group to avoid pipe stall on POWER4&5. */
+ nop
+ lwz r4,12(r1)
+ lwz r3,8(r1)
+ addi r1,r1,16
+ blr
+ END (__llround)
+
+weak_alias (__llround, llround)
+
+strong_alias (__llround, __llroundf)
+weak_alias (__llround, llroundf)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__llround, llroundl)
+strong_alias (__llround, __llroundl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __llround, llroundl, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power5+/fpu/s_llroundf.S b/sysdeps/powerpc/powerpc32/power5+/fpu/s_llroundf.S
new file mode 100644
index 0000000000..ffe6b7eb37
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power5+/fpu/s_llroundf.S
@@ -0,0 +1,2 @@
+/* __llroundf is in s_llround.S */
+/* __llroundf is in s_llround.S */
diff --git a/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S b/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S
new file mode 100644
index 0000000000..83107f6f97
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S
@@ -0,0 +1,58 @@
+/* lround function. POWER5+, PowerPC32 version.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 1 Franklin Street, Fifth Floor, Boston MA
+ 02110-1301 USA. */
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+/* long [r3] lround (float x [fp1])
+ IEEE 1003.1 lround function. IEEE specifies "round to the nearest
+ integer value, rounding halfway cases away from zero, regardless of
+ the current rounding mode." However PowerPC Architecture defines
+ "round to Nearest" as "Choose the best approximation. In case of a
+ tie, choose the one that is even (least significant bit o).".
+ So we pre-round using the V2.02 Floating Round to Integer Nearest
+ instruction before we use the Floating Convert to Integer Word with
+ round to zero instruction. */
+
+ .machine "power5"
+ENTRY (__lround)
+ stwu r1,-16(r1)
+ cfi_adjust_cfa_offset (16)
+ frin fp2,fp1
+ fctiwz fp3,fp2 /* Convert To Integer Word lround toward 0. */
+ stfd fp3,8(r1)
+ nop /* Ensure the following load is in a different dispatch */
+ nop /* group to avoid pipe stall on POWER4&5. */
+ nop
+ lwz r3,12(r1)
+ addi r1,r1,16
+ blr
+ END (__lround)
+
+weak_alias (__lround, lround)
+
+strong_alias (__lround, __lroundf)
+weak_alias (__lround, lroundf)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__lround, lroundl)
+strong_alias (__lround, __lroundl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __lround, lroundl, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power5+/fpu/s_round.S b/sysdeps/powerpc/powerpc32/power5+/fpu/s_round.S
new file mode 100644
index 0000000000..379c579def
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power5+/fpu/s_round.S
@@ -0,0 +1,37 @@
+/* round function. PowerPC32/power5+ version.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+ .machine "power5"
+EALIGN (__round, 4, 0)
+ frin fp1, fp1
+ blr
+ END (__round)
+
+weak_alias (__round, round)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__round, roundl)
+strong_alias (__round, __roundl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __round, roundl, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power5+/fpu/s_roundf.S b/sysdeps/powerpc/powerpc32/power5+/fpu/s_roundf.S
new file mode 100644
index 0000000000..4193c69152
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power5+/fpu/s_roundf.S
@@ -0,0 +1,30 @@
+/* roundf function. PowerPC32/power5+ version.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+
+ .machine "power5"
+EALIGN (__roundf, 4, 0)
+ frin fp1, fp1 /* The rounding instructions are double. */
+ frsp fp1, fp1 /* But we need to set ooverflow for float. */
+ blr
+ END (__roundf)
+
+weak_alias (__roundf, roundf)
+
diff --git a/sysdeps/powerpc/powerpc32/power5+/fpu/s_trunc.S b/sysdeps/powerpc/powerpc32/power5+/fpu/s_trunc.S
new file mode 100644
index 0000000000..92fa3ca34d
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power5+/fpu/s_trunc.S
@@ -0,0 +1,37 @@
+/* trunc function. PowerPC32/power5+ version.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+ .machine "power5"
+EALIGN (__trunc, 4, 0)
+ friz fp1, fp1
+ blr
+ END (__trunc)
+
+weak_alias (__trunc, trunc)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__trunc, truncl)
+strong_alias (__trunc, __truncl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __trunc, truncl, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power5+/fpu/s_truncf.S b/sysdeps/powerpc/powerpc32/power5+/fpu/s_truncf.S
new file mode 100644
index 0000000000..c575e2bbc2
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power5+/fpu/s_truncf.S
@@ -0,0 +1,30 @@
+/* truncf function. PowerPC32/power5+ version.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+
+ .machine "power5"
+EALIGN (__truncf, 4, 0)
+ friz fp1, fp1 /* The rounding instructions are double. */
+ frsp fp1, fp1 /* But we need to set ooverflow for float. */
+ blr
+ END (__truncf)
+
+weak_alias (__truncf, truncf)
+
diff --git a/sysdeps/powerpc/powerpc32/power5/Implies b/sysdeps/powerpc/powerpc32/power5/Implies
new file mode 100644
index 0000000000..4e3a983426
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power5/Implies
@@ -0,0 +1 @@
+powerpc/powerpc32/power4
diff --git a/sysdeps/powerpc/powerpc32/power5/fpu/Implies b/sysdeps/powerpc/powerpc32/power5/fpu/Implies
new file mode 100644
index 0000000000..7c16e45c29
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power5/fpu/Implies
@@ -0,0 +1 @@
+powerpc/powerpc32/power4/fpu
diff --git a/sysdeps/powerpc/powerpc32/power6/Implies b/sysdeps/powerpc/powerpc32/power6/Implies
new file mode 100644
index 0000000000..c9a2d47f8a
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power6/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc32/power5+
+powerpc/powerpc32/power4
diff --git a/sysdeps/powerpc/powerpc32/power6/fpu/Implies b/sysdeps/powerpc/powerpc32/power6/fpu/Implies
new file mode 100644
index 0000000000..57b92fb13e
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power6/fpu/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc32/power5+/fpu
+powerpc/powerpc32/power4/fpu
diff --git a/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S b/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S
new file mode 100644
index 0000000000..528607602d
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S
@@ -0,0 +1,47 @@
+/* Round double to long int. PowerPC32 on PowerPC64 version.
+ Copyright (C) 2004, 2006, 2007 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+ 02110-1301 USA. */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+/* long long int[r3, r4] __llrint (double x[fp1]) */
+ENTRY (__llrint)
+ CALL_MCOUNT
+ stwu r1,-16(r1)
+ cfi_adjust_cfa_offset (16)
+ fctid fp13,fp1
+ stfd fp13,8(r1)
+/* Insure the following load is in a different dispatch group by
+ inserting "group ending nop". */
+ ori r1,r1,0
+ lwz r3,8(r1)
+ lwz r4,12(r1)
+ addi r1,r1,16
+ blr
+ END (__llrint)
+
+weak_alias (__llrint, llrint)
+
+#ifdef NO_LONG_DOUBLE
+strong_alias (__llrint, __llrintl)
+weak_alias (__llrint, llrintl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __llrint, llrintl, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S b/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S
new file mode 100644
index 0000000000..2c14800da2
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S
@@ -0,0 +1,39 @@
+/* Round float to long int. PowerPC32 on PowerPC64 version.
+ Copyright (C) 2004, 2006, 2007 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+ 02110-1301 USA. */
+
+#include <sysdep.h>
+
+/* long long int[r3, r4] __llrintf (float x[fp1]) */
+ENTRY (__llrintf)
+ CALL_MCOUNT
+ stwu r1,-16(r1)
+ cfi_adjust_cfa_offset (16)
+ fctid fp13,fp1
+ stfd fp13,8(r1)
+/* Insure the following load is in a different dispatch group by
+ inserting "group ending nop". */
+ ori r1,r1,0
+ lwz r3,8(r1)
+ lwz r4,12(r1)
+ addi r1,r1,16
+ blr
+ END (__llrintf)
+
+weak_alias (__llrintf, llrintf)
+
diff --git a/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S b/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S
new file mode 100644
index 0000000000..2b1fa1c846
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S
@@ -0,0 +1,60 @@
+/* lround function. POWER5+, PowerPC32 version.
+ Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 1 Franklin Street, Fifth Floor, Boston MA
+ 02110-1301 USA. */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+/* long [r3] llround (float x [fp1])
+ IEEE 1003.1 lround function. IEEE specifies "round to the nearest
+ integer value, rounding halfway cases away from zero, regardless of
+ the current rounding mode." However PowerPC Architecture defines
+ "round to Nearest" as "Choose the best approximation. In case of a
+ tie, choose the one that is even (least significant bit o).".
+ So we pre-round using the V2.02 Floating Round to Integer Nearest
+ instruction before we use the Floating Convert to Integer Word with
+ round to zero instruction. */
+
+ .machine "power5"
+ENTRY (__llround)
+ stwu r1,-16(r1)
+ cfi_adjust_cfa_offset (16)
+ frin fp2,fp1
+ fctidz fp3,fp2 /* Convert To Integer Word lround toward 0. */
+ stfd fp3,8(r1)
+/* Insure the following load is in a different dispatch group by
+ inserting "group ending nop". */
+ ori r1,r1,0
+ lwz r4,12(r1)
+ lwz r3,8(r1)
+ addi r1,r1,16
+ blr
+ END (__llround)
+
+weak_alias (__llround, llround)
+
+strong_alias (__llround, __llroundf)
+weak_alias (__llround, llroundf)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__llround, llroundl)
+strong_alias (__llround, __llroundl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __llround, llroundl, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power6/fpu/s_llroundf.S b/sysdeps/powerpc/powerpc32/power6/fpu/s_llroundf.S
new file mode 100644
index 0000000000..ffe6b7eb37
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power6/fpu/s_llroundf.S
@@ -0,0 +1,2 @@
+/* __llroundf is in s_llround.S */
+/* __llroundf is in s_llround.S */
diff --git a/sysdeps/powerpc/powerpc32/power6/memcpy.S b/sysdeps/powerpc/powerpc32/power6/memcpy.S
new file mode 100644
index 0000000000..ba45fd250c
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power6/memcpy.S
@@ -0,0 +1,843 @@
+/* Optimized memcpy implementation for PowerPC32 on POWER6.
+ Copyright (C) 2003, 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+ 02110-1301 USA. */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]);
+ Returns 'dst'.
+
+ Memcpy handles short copies (< 32-bytes) using a binary move blocks
+ (no loops) of lwz/stw. The tail (remaining 1-3) bytes is handled
+ with the appropriate combination of byte and halfword load/stores.
+ There is minimal effort to optimize the alignment of short moves.
+
+ Longer moves (>= 32-bytes) justify the effort to get at least the
+ destination word (4-byte) aligned. Further optimization is
+ possible when both source and destination are word aligned.
+ Each case has an optimized unrolled loop. */
+
+ .machine power6
+EALIGN (BP_SYM (memcpy), 5, 0)
+ CALL_MCOUNT
+
+ stwu 1,-32(1)
+ cfi_adjust_cfa_offset(32)
+ cmplwi cr1,5,31 /* check for short move. */
+ neg 0,3
+ cmplwi cr1,5,31
+ clrlwi 10,4,30 /* check alignment of src. */
+ andi. 11,3,3 /* check alignment of dst. */
+ clrlwi 0,0,30 /* Number of bytes until the 1st word of dst. */
+ ble- cr1,L(word_unaligned_short) /* If move < 32 bytes. */
+ cmplw cr6,10,11
+ stw 31,24(1)
+ cfi_offset(31,(24-32))
+ stw 30,20(1)
+ cfi_offset(30,(20-32))
+ mr 30,3
+ beq .L0
+ mtcrf 0x01,0
+ subf 31,0,5 /* Length after alignment. */
+ add 12,4,0 /* Compute src addr after alignment. */
+ /* Move 0-3 bytes as needed to get the destination word aligned. */
+1: bf 31,2f
+ lbz 6,0(4)
+ bf 30,3f
+ lhz 7,1(4)
+ stb 6,0(3)
+ sth 7,1(3)
+ addi 3,3,3
+ b 0f
+3:
+ stb 6,0(3)
+ addi 3,3,1
+ b 0f
+2: bf 30,0f
+ lhz 6,0(4)
+ sth 6,0(3)
+ addi 3,3,2
+0:
+ clrlwi 10,12,30 /* check alignment of src again. */
+ srwi 9,31,2 /* Number of full words remaining. */
+ bne- cr6,L(wdu) /* If source is not word aligned. .L6 */
+ clrlwi 11,31,30 /* calculate the number of tail bytes */
+ b L(word_aligned)
+ /* Copy words from source to destination, assuming the destination is
+ aligned on a word boundary.
+
+ At this point we know there are at least 29 bytes left (32-3) to copy.
+ The next step is to determine if the source is also word aligned.
+ If not branch to the unaligned move code at .L6. which uses
+ a load, shift, store strategy.
+
+ Otherwise source and destination are word aligned, and we can use
+ the optimized word copy loop. */
+ .align 4
+.L0:
+ mr 31,5
+ mr 12,4
+ bne- cr6,L(wdu) /* If source is not word aligned. .L6 */
+ srwi 9,5,2 /* Number of full words remaining. */
+ clrlwi 11,5,30 /* calculate the number of tail bytes */
+
+ /* Move words where destination and source are word aligned.
+ Use an unrolled loop to copy 4 words (16-bytes) per iteration.
+ If the the copy is not an exact multiple of 16 bytes, 1-3
+ words are copied as needed to set up the main loop. After
+ the main loop exits there may be a tail of 1-3 bytes. These bytes are
+ copied a halfword/byte at a time as needed to preserve alignment. */
+L(word_aligned):
+ mtcrf 0x01,9
+ srwi 8,31,4 /* calculate the 16 byte loop count */
+ cmplwi cr1,9,4
+ cmplwi cr6,11,0
+ mr 11,12
+
+ bf 30,1f
+ lwz 6,0(12)
+ lwz 7,4(12)
+ addi 11,12,8
+ mtctr 8
+ stw 6,0(3)
+ stw 7,4(3)
+ addi 10,3,8
+ bf 31,4f
+ lwz 0,8(12)
+ stw 0,8(3)
+ blt cr1,3f
+ addi 11,12,12
+ addi 10,3,12
+ b 4f
+ .align 4
+1:
+ mr 10,3
+ mtctr 8
+ bf 31,4f
+ lwz 6,0(12)
+ addi 11,12,4
+ stw 6,0(3)
+ addi 10,3,4
+
+ .align 4
+4:
+ lwz 6,0(11)
+ lwz 7,4(11)
+ lwz 8,8(11)
+ lwz 0,12(11)
+ stw 6,0(10)
+ stw 7,4(10)
+ stw 8,8(10)
+ stw 0,12(10)
+ addi 11,11,16
+ addi 10,10,16
+ bdnz 4b
+3:
+ clrrwi 0,31,2
+ mtcrf 0x01,31
+ beq cr6,0f
+.L9:
+ add 3,3,0
+ add 12,12,0
+
+/* At this point we have a tail of 0-3 bytes and we know that the
+ destination is word aligned. */
+2: bf 30,1f
+ lhz 6,0(12)
+ addi 12,12,2
+ sth 6,0(3)
+ addi 3,3,2
+1: bf 31,0f
+ lbz 6,0(12)
+ stb 6,0(3)
+0:
+ /* Return original dst pointer. */
+ mr 3,30
+ lwz 30,20(1)
+ lwz 31,24(1)
+ addi 1,1,32
+ blr
+
+/* Copy up to 31 bytes. This divided into two cases 0-8 bytes and 9-31
+ bytes. Each case is handled without loops, using binary (1,2,4,8)
+ tests.
+
+ In the short (0-8 byte) case no attempt is made to force alignment
+ of either source or destination. The hardware will handle the
+ unaligned load/stores with small delays for crossing 32- 128-byte,
+ and 4096-byte boundaries. Since these short moves are unlikely to be
+ unaligned or cross these boundaries, the overhead to force
+ alignment is not justified.
+
+ The longer (9-31 byte) move is more likely to cross 32- or 128-byte
+ boundaries. Since only loads are sensitive to the 32-/128-byte
+ boundaries it is more important to align the source then the
+ destination. If the source is not already word aligned, we first
+ move 1-3 bytes as needed. Since we are only word aligned we don't
+ use double word load/stores to insure that all loads are aligned.
+ While the destination and stores may still be unaligned, this
+ is only an issue for page (4096 byte boundary) crossing, which
+ should be rare for these short moves. The hardware handles this
+ case automatically with a small (~20 cycle) delay. */
+ .align 4
+
+ cfi_same_value (31)
+ cfi_same_value (30)
+L(word_unaligned_short):
+ mtcrf 0x01,5
+ cmplwi cr6,5,8
+ neg 8,4
+ clrrwi 9,4,2
+ andi. 0,8,3
+ beq cr6,L(wus_8) /* Handle moves of 8 bytes. */
+/* At least 9 bytes left. Get the source word aligned. */
+ cmpldi cr1,5,16
+ mr 12,4
+ ble cr6,L(wus_4) /* Handle moves of 0-8 bytes. */
+ mr 11,3
+ mr 10,5
+ cmplwi cr6,0,2
+ beq L(wus_tail) /* If the source is already word aligned skip this. */
+/* Copy 1-3 bytes to get source address word aligned. */
+ lwz 6,0(9)
+ subf 10,0,5
+ add 12,4,0
+ blt cr6,5f
+ srdi 7,6,16
+ bgt cr6,3f
+ sth 6,0(3)
+ b 7f
+ .align 4
+3:
+ stb 7,0(3)
+ sth 6,1(3)
+ b 7f
+ .align 4
+5:
+ stb 6,0(3)
+7:
+ cmplwi cr1,10,16
+ add 11,3,0
+ mtcrf 0x01,10
+ .align 4
+L(wus_tail):
+/* At least 6 bytes left and the source is word aligned. This allows
+ some speculative loads up front. */
+/* We need to special case the fall-through because the biggest delays
+ are due to address computation not being ready in time for the
+ AGEN. */
+ lwz 6,0(12)
+ lwz 7,4(12)
+ blt cr1,L(wus_tail8)
+ cmplwi cr0,10,24
+L(wus_tail16): /* Move 16 bytes. */
+ stw 6,0(11)
+ stw 7,4(11)
+ lwz 6,8(12)
+ lwz 7,12(12)
+ stw 6,8(11)
+ stw 7,12(11)
+/* Move 8 bytes more. */
+ bf 28,L(wus_tail16p8)
+ cmplwi cr1,10,28
+ lwz 6,16(12)
+ lwz 7,20(12)
+ stw 6,16(11)
+ stw 7,20(11)
+/* Move 4 bytes more. */
+ bf 29,L(wus_tail16p4)
+ lwz 6,24(12)
+ stw 6,24(11)
+ addi 12,12,28
+ addi 11,11,28
+ bgt cr1,L(wus_tail2)
+ /* exactly 28 bytes. Return original dst pointer and exit. */
+ addi 1,1,32
+ blr
+ .align 4
+L(wus_tail16p8): /* less then 8 bytes left. */
+ beq cr1,L(wus_tailX) /* exactly 16 bytes, early exit. */
+ cmplwi cr1,10,20
+ bf 29,L(wus_tail16p2)
+/* Move 4 bytes more. */
+ lwz 6,16(12)
+ stw 6,16(11)
+ addi 12,12,20
+ addi 11,11,20
+ bgt cr1,L(wus_tail2)
+ /* exactly 20 bytes. Return original dst pointer and exit. */
+ addi 1,1,32
+ blr
+ .align 4
+L(wus_tail16p4): /* less then 4 bytes left. */
+ addi 12,12,24
+ addi 11,11,24
+ bgt cr0,L(wus_tail2)
+ /* exactly 24 bytes. Return original dst pointer and exit. */
+ addi 1,1,32
+ blr
+ .align 4
+L(wus_tail16p2): /* 16 bytes moved, less then 4 bytes left. */
+ addi 12,12,16
+ addi 11,11,16
+ b L(wus_tail2)
+
+ .align 4
+L(wus_tail8): /* Move 8 bytes. */
+/* r6, r7 already loaded speculatively. */
+ cmplwi cr1,10,8
+ cmplwi cr0,10,12
+ bf 28,L(wus_tail4)
+ stw 6,0(11)
+ stw 7,4(11)
+/* Move 4 bytes more. */
+ bf 29,L(wus_tail8p4)
+ lwz 6,8(12)
+ stw 6,8(11)
+ addi 12,12,12
+ addi 11,11,12
+ bgt cr0,L(wus_tail2)
+ /* exactly 12 bytes. Return original dst pointer and exit. */
+ addi 1,1,32
+ blr
+ .align 4
+L(wus_tail8p4): /* less then 4 bytes left. */
+ addi 12,12,8
+ addi 11,11,8
+ bgt cr1,L(wus_tail2)
+ /* exactly 8 bytes. Return original dst pointer and exit. */
+ addi 1,1,32
+ blr
+
+ .align 4
+L(wus_tail4): /* Move 4 bytes. */
+/* r6 already loaded speculatively. If we are here we know there is
+ more then 4 bytes left. So there is no need to test. */
+ addi 12,12,4
+ stw 6,0(11)
+ addi 11,11,4
+L(wus_tail2): /* Move 2-3 bytes. */
+ bf 30,L(wus_tail1)
+ lhz 6,0(12)
+ sth 6,0(11)
+ bf 31,L(wus_tailX)
+ lbz 7,2(12)
+ stb 7,2(11)
+ addi 1,1,32
+ blr
+L(wus_tail1): /* Move 1 byte. */
+ bf 31,L(wus_tailX)
+ lbz 6,0(12)
+ stb 6,0(11)
+L(wus_tailX):
+ /* Return original dst pointer. */
+ addi 1,1,32
+ blr
+
+/* Special case to copy 0-8 bytes. */
+ .align 4
+L(wus_8):
+ lwz 6,0(4)
+ lwz 7,4(4)
+ stw 6,0(3)
+ stw 7,4(3)
+ /* Return original dst pointer. */
+ addi 1,1,32
+ blr
+ .align 4
+L(wus_4):
+ bf 29,L(wus_2)
+ lwz 6,0(4)
+ stw 6,0(3)
+ bf 30,L(wus_5)
+ lhz 7,4(4)
+ sth 7,4(3)
+ bf 31,L(wus_0)
+ lbz 8,6(4)
+ stb 8,6(3)
+ addi 1,1,32
+ blr
+ .align 4
+L(wus_5):
+ bf 31,L(wus_0)
+ lbz 6,4(4)
+ stb 6,4(3)
+ /* Return original dst pointer. */
+ addi 1,1,32
+ blr
+ .align 4
+L(wus_2): /* Move 2-3 bytes. */
+ bf 30,L(wus_1)
+ lhz 6,0(4)
+ sth 6,0(3)
+ bf 31,L(wus_0)
+ lbz 7,2(4)
+ stb 7,2(3)
+ addi 1,1,32
+ blr
+ .align 4
+L(wus_1): /* Move 1 byte. */
+ bf 31,L(wus_0)
+ lbz 6,0(4)
+ stb 6,0(3)
+ .align 3
+L(wus_0):
+ /* Return original dst pointer. */
+ addi 1,1,32
+ blr
+
+ .align 4
+ cfi_offset(31,(24-32))
+ cfi_offset(30,(20-32))
+L(wdu):
+
+ /* Copy words where the destination is aligned but the source is
+ not. For power4, power5 and power6 machines there is penalty for
+ unaligned loads (src) that cross 32-byte, cacheline, or page
+ boundaries. So we want to use simple (unaligned) loads where
+ posible but avoid them where we know the load would span a 32-byte
+ boundary.
+
+ At this point we know we have at least 29 (32-3) bytes to copy
+ the src is unaligned. and we may cross at least one 32-byte
+ boundary. Also we have the following regester values:
+ r3 == adjusted dst, word aligned
+ r4 == unadjusted src
+ r5 == unadjusted len
+ r9 == adjusted Word length
+ r10 == src alignment (1-3)
+ r12 == adjuested src, not aligned
+ r31 == adjusted len
+
+ First we need to copy word upto but not crossing the next 32-byte
+ boundary. Then perform aligned loads just before and just after
+ the boundary and use shifts and or to gernerate the next aligned
+ word for dst. If more then 32 bytes remain we copy (unaligned src)
+ the next 7 words and repeat the loop until less then 32-bytes
+ remaim.
+
+ Then if more then 4 bytes remain we again use aligned loads,
+ shifts and or to generate the next dst word. We then process the
+ remaining words using unaligned loads as needed. Finally we check
+ if there more then 0 bytes (1-3) bytes remainting and use
+ halfword and or byte load/stores to complete the copy.
+*/
+ mr 4,12 /* restore unaligned adjusted src ptr */
+ clrlwi 0,12,27 /* Find dist from previous 32-byte boundary. */
+ slwi 10,10,3 /* calculate number of bits to shift 1st word left */
+ cmplwi cr5,0,16
+ subfic 8,0,32 /* Number of bytes to next 32-byte boundary. */
+
+ mtcrf 0x01,8
+ cmplwi cr1,10,16
+ subfic 9,10,32 /* number of bits to shift 2nd word right */
+/* This test is reversed because the timing to compare the bytes to
+ 32-byte boundary could not be meet. So we compare the bytes from
+ previous 32-byte boundary and invert the test. */
+ bge cr5,L(wdu_h32_8)
+ .align 4
+ lwz 6,0(4)
+ lwz 7,4(4)
+ addi 12,4,16 /* generate alternate pointers to avoid agen */
+ addi 11,3,16 /* timing issues downstream. */
+ stw 6,0(3)
+ stw 7,4(3)
+ subi 31,31,16
+ lwz 6,8(4)
+ lwz 7,12(4)
+ addi 4,4,16
+ stw 6,8(3)
+ stw 7,12(3)
+ addi 3,3,16
+ bf 28,L(wdu_h32_4)
+ lwz 6,0(12)
+ lwz 7,4(12)
+ subi 31,31,8
+ addi 4,4,8
+ stw 6,0(11)
+ stw 7,4(11)
+ addi 3,3,8
+ bf 29,L(wdu_h32_0)
+ lwz 6,8(12)
+ addi 4,4,4
+ subi 31,31,4
+ stw 6,8(11)
+ addi 3,3,4
+ b L(wdu_h32_0)
+ .align 4
+L(wdu_h32_8):
+ bf 28,L(wdu_h32_4)
+ lwz 6,0(4)
+ lwz 7,4(4)
+ subi 31,31,8
+ bf 29,L(wdu_h32_8x)
+ stw 6,0(3)
+ stw 7,4(3)
+ lwz 6,8(4)
+ addi 4,4,12
+ subi 31,31,4
+ stw 6,8(3)
+ addi 3,3,12
+ b L(wdu_h32_0)
+ .align 4
+L(wdu_h32_8x):
+ addi 4,4,8
+ stw 6,0(3)
+ stw 7,4(3)
+ addi 3,3,8
+ b L(wdu_h32_0)
+ .align 4
+L(wdu_h32_4):
+ bf 29,L(wdu_h32_0)
+ lwz 6,0(4)
+ subi 31,31,4
+ addi 4,4,4
+ stw 6,0(3)
+ addi 3,3,4
+ .align 4
+L(wdu_h32_0):
+/* set up for 32-byte boundry crossing word move and possibly 32-byte
+ move loop. */
+ clrrwi 12,4,2
+ cmplwi cr5,31,32
+ bge cr1,L(wdu2_32)
+#if 0
+ b L(wdu1_32)
+/*
+ cmplwi cr1,10,8
+ beq cr1,L(wdu1_32)
+ cmplwi cr1,10,16
+ beq cr1,L(wdu2_32)
+ cmplwi cr1,10,24
+ beq cr1,L(wdu3_32)
+*/
+L(wdu_32):
+ lwz 6,0(12)
+ cmplwi cr6,31,4
+ srwi 8,31,5 /* calculate the 32 byte loop count */
+ slw 0,6,10
+ clrlwi 31,31,27 /* The remaining bytes, < 32. */
+ blt cr5,L(wdu_32tail)
+ mtctr 8
+ cmplwi cr6,31,4
+ .align 4
+L(wdu_loop32):
+ /* copy 32 bytes at a time */
+ lwz 8,4(12)
+ addi 12,12,32
+ lwz 7,4(4)
+ srw 8,8,9
+ or 0,0,8
+ stw 0,0(3)
+ stw 7,4(3)
+ lwz 6,8(4)
+ lwz 7,12(4)
+ stw 6,8(3)
+ stw 7,12(3)
+ lwz 6,16(4)
+ lwz 7,20(4)
+ stw 6,16(3)
+ stw 7,20(3)
+ lwz 6,24(4)
+ lwz 7,28(4)
+ lwz 8,0(12)
+ addi 4,4,32
+ stw 6,24(3)
+ stw 7,28(3)
+ addi 3,3,32
+ slw 0,8,10
+ bdnz+ L(wdu_loop32)
+
+L(wdu_32tail):
+ mtcrf 0x01,31
+ cmplwi cr5,31,16
+ blt cr6,L(wdu_4tail)
+ /* calculate and store the final word */
+ lwz 8,4(12)
+ srw 8,8,9
+ or 6,0,8
+ b L(wdu_32tailx)
+#endif
+ .align 4
+L(wdu1_32):
+ lwz 6,-1(4)
+ cmplwi cr6,31,4
+ srwi 8,31,5 /* calculate the 32 byte loop count */
+ slwi 6,6,8
+ clrlwi 31,31,27 /* The remaining bytes, < 32. */
+ blt cr5,L(wdu1_32tail)
+ mtctr 8
+ cmplwi cr6,31,4
+
+ lwz 8,3(4)
+ lwz 7,4(4)
+/* Equivalent to: srwi 8,8,32-8; or 6,6,8 */
+ rlwimi 6,8,8,(32-8),31
+ b L(wdu1_loop32x)
+ .align 4
+L(wdu1_loop32):
+ /* copy 32 bytes at a time */
+ lwz 8,3(4)
+ lwz 7,4(4)
+ stw 10,-8(3)
+ stw 11,-4(3)
+/* Equivalent to srwi 8,8,32-8; or 6,6,8 */
+ rlwimi 6,8,8,(32-8),31
+L(wdu1_loop32x):
+ lwz 10,8(4)
+ lwz 11,12(4)
+ stw 6,0(3)
+ stw 7,4(3)
+ lwz 6,16(4)
+ lwz 7,20(4)
+ stw 10,8(3)
+ stw 11,12(3)
+ lwz 10,24(4)
+ lwz 11,28(4)
+ lwz 8,32-1(4)
+ addi 4,4,32
+ stw 6,16(3)
+ stw 7,20(3)
+ addi 3,3,32
+ slwi 6,8,8
+ bdnz+ L(wdu1_loop32)
+ stw 10,-8(3)
+ stw 11,-4(3)
+
+L(wdu1_32tail):
+ mtcrf 0x01,31
+ cmplwi cr5,31,16
+ blt cr6,L(wdu_4tail)
+ /* calculate and store the final word */
+ lwz 8,3(4)
+/* Equivalent to: srwi 8,8,32-9; or 6,6,8 */
+ rlwimi 6,8,8,(32-8),31
+ b L(wdu_32tailx)
+
+L(wdu2_32):
+ bgt cr1,L(wdu3_32)
+ lwz 6,-2(4)
+ cmplwi cr6,31,4
+ srwi 8,31,5 /* calculate the 32 byte loop count */
+ slwi 6,6,16
+ clrlwi 31,31,27 /* The remaining bytes, < 32. */
+ blt cr5,L(wdu2_32tail)
+ mtctr 8
+ cmplwi cr6,31,4
+
+ lwz 8,2(4)
+ lwz 7,4(4)
+/* Equivalent to: srwi 8,8,32-8; or 6,6,8 */
+ rlwimi 6,8,16,(32-16),31
+ b L(wdu2_loop32x)
+ .align 4
+L(wdu2_loop32):
+ /* copy 32 bytes at a time */
+ lwz 8,2(4)
+ lwz 7,4(4)
+ stw 10,-8(3)
+ stw 11,-4(3)
+/* Equivalent to srwi 8,8,32-8; or 6,6,8 */
+ rlwimi 6,8,16,(32-16),31
+L(wdu2_loop32x):
+ lwz 10,8(4)
+ lwz 11,12(4)
+ stw 6,0(3)
+ stw 7,4(3)
+ lwz 6,16(4)
+ lwz 7,20(4)
+ stw 10,8(3)
+ stw 11,12(3)
+ lwz 10,24(4)
+ lwz 11,28(4)
+/* lwz 8,0(12) */
+ lwz 8,32-2(4)
+ addi 4,4,32
+ stw 6,16(3)
+ stw 7,20(3)
+ addi 3,3,32
+ slwi 6,8,16
+ bdnz+ L(wdu2_loop32)
+ stw 10,-8(3)
+ stw 11,-4(3)
+
+L(wdu2_32tail):
+ mtcrf 0x01,31
+ cmplwi cr5,31,16
+ blt cr6,L(wdu_4tail)
+ /* calculate and store the final word */
+ lwz 8,2(4)
+/* Equivalent to: srwi 8,8,32-9; or 6,6,8 */
+ rlwimi 6,8,16,(32-16),31
+ b L(wdu_32tailx)
+
+L(wdu3_32):
+/* lwz 6,0(12) */
+ lwz 6,-3(4)
+ cmplwi cr6,31,4
+ srwi 8,31,5 /* calculate the 32 byte loop count */
+ slwi 6,6,24
+ clrlwi 31,31,27 /* The remaining bytes, < 32. */
+ blt cr5,L(wdu3_32tail)
+ mtctr 8
+ cmplwi cr6,31,4
+
+ lwz 8,1(4)
+ lwz 7,4(4)
+/* Equivalent to: srwi 8,8,32-8; or 6,6,8 */
+ rlwimi 6,8,24,(32-24),31
+ b L(wdu3_loop32x)
+ .align 4
+L(wdu3_loop32):
+ /* copy 32 bytes at a time */
+ lwz 8,1(4)
+ lwz 7,4(4)
+ stw 10,-8(3)
+ stw 11,-4(3)
+/* Equivalent to srwi 8,8,32-8; or 6,6,8 */
+ rlwimi 6,8,24,(32-24),31
+L(wdu3_loop32x):
+ lwz 10,8(4)
+ lwz 11,12(4)
+ stw 6,0(3)
+ stw 7,4(3)
+ lwz 6,16(4)
+ lwz 7,20(4)
+ stw 10,8(3)
+ stw 11,12(3)
+ lwz 10,24(4)
+ lwz 11,28(4)
+ lwz 8,32-3(4)
+ addi 4,4,32
+ stw 6,16(3)
+ stw 7,20(3)
+ addi 3,3,32
+ slwi 6,8,24
+ bdnz+ L(wdu3_loop32)
+ stw 10,-8(3)
+ stw 11,-4(3)
+
+L(wdu3_32tail):
+ mtcrf 0x01,31
+ cmplwi cr5,31,16
+ blt cr6,L(wdu_4tail)
+ /* calculate and store the final word */
+ lwz 8,1(4)
+/* Equivalent to: srwi 8,8,32-9; or 6,6,8 */
+ rlwimi 6,8,24,(32-24),31
+ b L(wdu_32tailx)
+ .align 4
+L(wdu_32tailx):
+ blt cr5,L(wdu_t32_8)
+ lwz 7,4(4)
+ addi 12,4,16 /* generate alternate pointers to avoid agen */
+ addi 11,3,16 /* timing issues downstream. */
+ stw 6,0(3)
+ stw 7,4(3)
+ subi 31,31,16
+ lwz 6,8(4)
+ lwz 7,12(4)
+ addi 4,4,16
+ stw 6,8(3)
+ stw 7,12(3)
+ addi 3,3,16
+ bf 28,L(wdu_t32_4x)
+ lwz 6,0(12)
+ lwz 7,4(12)
+ addi 4,4,8
+ subi 31,31,8
+ stw 6,0(11)
+ stw 7,4(11)
+ addi 3,3,8
+ bf 29,L(wdu_t32_0)
+ lwz 6,8(12)
+ addi 4,4,4
+ subi 31,31,4
+ stw 6,8(11)
+ addi 3,3,4
+ b L(wdu_t32_0)
+ .align 4
+L(wdu_t32_4x):
+ bf 29,L(wdu_t32_0)
+ lwz 6,0(4)
+ addi 4,4,4
+ subi 31,31,4
+ stw 6,0(3)
+ addi 3,3,4
+ b L(wdu_t32_0)
+ .align 4
+L(wdu_t32_8):
+ bf 28,L(wdu_t32_4)
+ lwz 7,4(4)
+ subi 31,31,8
+ bf 29,L(wdu_t32_8x)
+ stw 6,0(3)
+ stw 7,4(3)
+ lwz 6,8(4)
+ subi 31,31,4
+ addi 4,4,12
+ stw 6,8(3)
+ addi 3,3,12
+ b L(wdu_t32_0)
+ .align 4
+L(wdu_t32_8x):
+ addi 4,4,8
+ stw 6,0(3)
+ stw 7,4(3)
+ addi 3,3,8
+ b L(wdu_t32_0)
+ .align 4
+L(wdu_t32_4):
+ subi 31,31,4
+ stw 6,0(3)
+ addi 4,4,4
+ addi 3,3,4
+ .align 4
+L(wdu_t32_0):
+L(wdu_4tail):
+ cmplwi cr6,31,0
+ beq cr6,L(wdus_0) /* If the tail is 0 bytes we are done! */
+ bf 30,L(wdus_3)
+ lhz 7,0(4)
+ sth 7,0(3)
+ bf 31,L(wdus_0)
+ lbz 8,2(4)
+ stb 8,2(3)
+ mr 3,30
+ lwz 30,20(1)
+ lwz 31,24(1)
+ addi 1,1,32
+ blr
+ .align 4
+L(wdus_3):
+ bf 31,L(wus_0)
+ lbz 6,0(4)
+ stb 6,0(3)
+ .align 4
+L(wdus_0):
+ /* Return original dst pointer. */
+ mr 3,30
+ lwz 30,20(1)
+ lwz 31,24(1)
+ addi 1,1,32
+ blr
+END (BP_SYM (memcpy))
+
+libc_hidden_builtin_def (memcpy)
diff --git a/sysdeps/powerpc/powerpc32/power6/memset.S b/sysdeps/powerpc/powerpc32/power6/memset.S
new file mode 100644
index 0000000000..10fb7b9786
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power6/memset.S
@@ -0,0 +1,542 @@
+/* Optimized 32-bit memset implementation for POWER6.
+ Copyright (C) 1997,99, 2000,02,03,06,2007 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+ 02110-1301 USA. */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
+ Returns 's'.
+
+ The memset is done in three sizes: byte (8 bits), word (32 bits),
+ cache line (1024 bits). There is a special case for setting cache lines
+ to 0, to take advantage of the dcbz instruction. */
+
+ .machine power6
+EALIGN (BP_SYM (memset), 7, 0)
+ CALL_MCOUNT
+
+#define rTMP r0
+#define rRTN r3 /* Initial value of 1st argument. */
+#define rMEMP0 r3 /* Original value of 1st arg. */
+#define rCHR r4 /* Char to set in each byte. */
+#define rLEN r5 /* Length of region to set. */
+#define rMEMP r6 /* Address at which we are storing. */
+#define rALIGN r7 /* Number of bytes we are setting now (when aligning). */
+#define rMEMP2 r8
+
+#define rNEG64 r8 /* Constant -64 for clearing with dcbz. */
+#define rMEMP3 r9 /* Alt mem pointer. */
+L(_memset):
+/* Take care of case for size <= 4. */
+ cmplwi cr1, rLEN, 4
+ andi. rALIGN, rMEMP0, 3
+ mr rMEMP, rMEMP0
+ ble- cr1, L(small)
+/* Align to word boundary. */
+ cmplwi cr5, rLEN, 31
+ rlwimi rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword. */
+ beq+ L(aligned)
+ mtcrf 0x01, rMEMP0
+ subfic rALIGN, rALIGN, 4
+ add rMEMP, rMEMP, rALIGN
+ sub rLEN, rLEN, rALIGN
+ bf+ 31, L(g0)
+ stb rCHR, 0(rMEMP0)
+ bt 30, L(aligned)
+L(g0):
+ sth rCHR, -2(rMEMP)
+
+ .align 4
+/* Handle the case of size < 31. */
+L(aligned):
+ mtcrf 0x01, rLEN
+ rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */
+ ble cr5, L(medium)
+/* Align to 32-byte boundary. */
+ andi. rALIGN, rMEMP, 0x1C
+ subfic rALIGN, rALIGN, 0x20
+ beq L(caligned)
+ mtcrf 0x01, rALIGN
+ add rMEMP, rMEMP, rALIGN
+ sub rLEN, rLEN, rALIGN
+ cmplwi cr1, rALIGN, 0x10
+ mr rMEMP2, rMEMP
+ bf 28, L(a1)
+ stw rCHR, -4(rMEMP2)
+ stwu rCHR, -8(rMEMP2)
+ nop
+L(a1): blt cr1, L(a2)
+ stw rCHR, -4(rMEMP2)
+ stw rCHR, -8(rMEMP2)
+ stw rCHR, -12(rMEMP2)
+ stwu rCHR, -16(rMEMP2)
+L(a2): bf 29, L(caligned)
+ stw rCHR, -4(rMEMP2)
+
+ .align 3
+/* Now aligned to a 32 byte boundary. */
+L(caligned):
+ cmplwi cr1, rCHR, 0
+ clrrwi. rALIGN, rLEN, 5
+ mtcrf 0x01, rLEN
+ beq cr1, L(zloopstart) /* Special case for clearing memory using dcbz. */
+L(nondcbz):
+ beq L(medium) /* We may not actually get to do a full line. */
+ nop
+/* Storing a non-zero "c" value. We are aligned at a sector (32-byte)
+ boundary may not be at cache line (128-byte) boundary. */
+L(nzloopstart):
+/* memset in 32-byte chunks until we get to a cache line boundary.
+ If rLEN is less then the distance to the next cache-line boundary use
+ cacheAligned1 code to finish the tail. */
+ cmplwi cr1,rLEN,128
+
+ andi. rTMP,rMEMP,127
+ blt cr1,L(cacheAligned1)
+ addi rMEMP3,rMEMP,32
+ beq L(nzCacheAligned)
+ addi rLEN,rLEN,-32
+ stw rCHR,0(rMEMP)
+ stw rCHR,4(rMEMP)
+ stw rCHR,8(rMEMP)
+ stw rCHR,12(rMEMP)
+ stw rCHR,16(rMEMP)
+ stw rCHR,20(rMEMP)
+ addi rMEMP,rMEMP,32
+ andi. rTMP,rMEMP3,127
+ stw rCHR,-8(rMEMP3)
+ stw rCHR,-4(rMEMP3)
+
+ beq L(nzCacheAligned)
+ addi rLEN,rLEN,-32
+ stw rCHR,0(rMEMP3)
+ stw rCHR,4(rMEMP3)
+ addi rMEMP,rMEMP,32
+ stw rCHR,8(rMEMP3)
+ stw rCHR,12(rMEMP3)
+ andi. rTMP,rMEMP,127
+ stw rCHR,16(rMEMP3)
+ stw rCHR,20(rMEMP3)
+ stw rCHR,24(rMEMP3)
+ stw rCHR,28(rMEMP3)
+
+ beq L(nzCacheAligned)
+ addi rLEN,rLEN,-32
+/* At this point we can overrun the store queue (pipe reject) so it is
+ time to slow things down. The store queue can merge two adjacent
+ stores into a single L1/L2 op, but the L2 is clocked at 1/2 the CPU.
+ So we add "group ending nops" to guarantee that we dispatch only two
+ stores every other cycle. */
+ ori r1,r1,0
+ ori r1,r1,0
+ stw rCHR,32(rMEMP3)
+ stw rCHR,36(rMEMP3)
+ addi rMEMP,rMEMP,32
+ cmplwi cr1,rLEN,128
+ ori r1,r1,0
+ ori r1,r1,0
+ stw rCHR,40(rMEMP3)
+ stw rCHR,44(rMEMP3)
+ ori r1,r1,0
+ ori r1,r1,0
+ stw rCHR,48(rMEMP3)
+ stw rCHR,52(rMEMP3)
+ ori r1,r1,0
+ ori r1,r1,0
+ stw rCHR,56(rMEMP3)
+ stw rCHR,60(rMEMP3)
+ blt cr1,L(cacheAligned1)
+ b L(nzCacheAligned)
+
+/* Now we are aligned to the cache line and can use dcbtst. */
+ .align 5
+L(nzCacheAligned):
+ cmplwi cr1,rLEN,128
+ cmplwi cr6,rLEN,256
+ blt cr1,L(cacheAligned1)
+ blt cr6,L(nzCacheAligned128)
+ .align 4
+L(nzCacheAligned128):
+ nop
+ addi rMEMP3,rMEMP,64
+ stw rCHR,0(rMEMP)
+ stw rCHR,4(rMEMP)
+ stw rCHR,8(rMEMP)
+ stw rCHR,12(rMEMP)
+ stw rCHR,16(rMEMP)
+ stw rCHR,20(rMEMP)
+ stw rCHR,24(rMEMP)
+ stw rCHR,28(rMEMP)
+ stw rCHR,32(rMEMP)
+ stw rCHR,36(rMEMP)
+ stw rCHR,40(rMEMP)
+ stw rCHR,44(rMEMP)
+ stw rCHR,48(rMEMP)
+ stw rCHR,52(rMEMP)
+ stw rCHR,56(rMEMP)
+ stw rCHR,60(rMEMP)
+ addi rMEMP,rMEMP3,64
+ addi rLEN,rLEN,-128
+/* At this point we can overrun the store queue (pipe reject) so it is
+ time to slow things down. The store queue can merge two adjacent
+ stores into a single L1/L2 op, but the L2 is clocked at 1/2 the CPU.
+ So we add "group ending nops" to guarantee that we dispatch only one
+ store per cycle. */
+ stw rCHR,0(rMEMP3)
+ ori r1,r1,0
+ stw rCHR,4(rMEMP3)
+ ori r1,r1,0
+ stw rCHR,8(rMEMP3)
+ ori r1,r1,0
+ stw rCHR,12(rMEMP3)
+ ori r1,r1,0
+ stw rCHR,16(rMEMP3)
+ ori r1,r1,0
+ stw rCHR,20(rMEMP3)
+ ori r1,r1,0
+ stw rCHR,24(rMEMP3)
+ ori r1,r1,0
+ stw rCHR,28(rMEMP3)
+ ori r1,r1,0
+ stw rCHR,32(rMEMP3)
+ ori r1,r1,0
+ stw rCHR,36(rMEMP3)
+ ori r1,r1,0
+ stw rCHR,40(rMEMP3)
+ ori r1,r1,0
+ stw rCHR,44(rMEMP3)
+ ori r1,r1,0
+ stw rCHR,48(rMEMP3)
+ ori r1,r1,0
+ stw rCHR,52(rMEMP3)
+ ori r1,r1,0
+ stw rCHR,56(rMEMP3)
+ ori r1,r1,0
+ stw rCHR,60(rMEMP3)
+ blt cr6,L(cacheAligned1)
+#ifndef NOT_IN_libc
+ lfd 0,-128(rMEMP)
+#endif
+ b L(nzCacheAligned256)
+ .align 5
+L(nzCacheAligned256):
+ cmplwi cr1,rLEN,256
+ addi rMEMP3,rMEMP,64
+#ifdef NOT_IN_libc
+/* When we are not in libc we should use only GPRs to avoid the FPU lock
+ interrupt. */
+ stw rCHR,0(rMEMP)
+ stw rCHR,4(rMEMP)
+ stw rCHR,8(rMEMP)
+ stw rCHR,12(rMEMP)
+ stw rCHR,16(rMEMP)
+ stw rCHR,20(rMEMP)
+ stw rCHR,24(rMEMP)
+ stw rCHR,28(rMEMP)
+ stw rCHR,32(rMEMP)
+ stw rCHR,36(rMEMP)
+ stw rCHR,40(rMEMP)
+ stw rCHR,44(rMEMP)
+ stw rCHR,48(rMEMP)
+ stw rCHR,52(rMEMP)
+ stw rCHR,56(rMEMP)
+ stw rCHR,60(rMEMP)
+ addi rMEMP,rMEMP3,64
+ addi rLEN,rLEN,-128
+ stw rCHR,0(rMEMP3)
+ stw rCHR,4(rMEMP3)
+ stw rCHR,8(rMEMP3)
+ stw rCHR,12(rMEMP3)
+ stw rCHR,16(rMEMP3)
+ stw rCHR,20(rMEMP3)
+ stw rCHR,24(rMEMP3)
+ stw rCHR,28(rMEMP3)
+ stw rCHR,32(rMEMP3)
+ stw rCHR,36(rMEMP3)
+ stw rCHR,40(rMEMP3)
+ stw rCHR,44(rMEMP3)
+ stw rCHR,48(rMEMP3)
+ stw rCHR,52(rMEMP3)
+ stw rCHR,56(rMEMP3)
+ stw rCHR,60(rMEMP3)
+#else
+/* We are in libc and this is a long memset so we can use FPRs and can afford
+ occasional FPU locked interrupts. */
+ stfd 0,0(rMEMP)
+ stfd 0,8(rMEMP)
+ stfd 0,16(rMEMP)
+ stfd 0,24(rMEMP)
+ stfd 0,32(rMEMP)
+ stfd 0,40(rMEMP)
+ stfd 0,48(rMEMP)
+ stfd 0,56(rMEMP)
+ addi rMEMP,rMEMP3,64
+ addi rLEN,rLEN,-128
+ stfd 0,0(rMEMP3)
+ stfd 0,8(rMEMP3)
+ stfd 0,16(rMEMP3)
+ stfd 0,24(rMEMP3)
+ stfd 0,32(rMEMP3)
+ stfd 0,40(rMEMP3)
+ stfd 0,48(rMEMP3)
+ stfd 0,56(rMEMP3)
+#endif
+ bge cr1,L(nzCacheAligned256)
+ dcbtst 0,rMEMP
+ b L(cacheAligned1)
+
+ .align 4
+/* Storing a zero "c" value. We are aligned at a sector (32-byte)
+ boundary but may not be at cache line (128-byte) boundary. If the
+ remaining length spans a full cache line we can use the Data cache
+ block zero instruction. */
+L(zloopstart):
+/* memset in 32-byte chunks until we get to a cache line boundary.
+ If rLEN is less then the distance to the next cache-line boundary use
+ cacheAligned1 code to finish the tail. */
+ cmplwi cr1,rLEN,128
+ beq L(medium)
+L(getCacheAligned):
+ andi. rTMP,rMEMP,127
+ blt cr1,L(cacheAligned1)
+ addi rMEMP3,rMEMP,32
+ beq L(cacheAligned)
+ addi rLEN,rLEN,-32
+ stw rCHR,0(rMEMP)
+ stw rCHR,4(rMEMP)
+ stw rCHR,8(rMEMP)
+ stw rCHR,12(rMEMP)
+ stw rCHR,16(rMEMP)
+ stw rCHR,20(rMEMP)
+ addi rMEMP,rMEMP,32
+ andi. rTMP,rMEMP3,127
+ stw rCHR,-8(rMEMP3)
+ stw rCHR,-4(rMEMP3)
+L(getCacheAligned2):
+ beq L(cacheAligned)
+ addi rLEN,rLEN,-32
+ addi rMEMP,rMEMP,32
+ stw rCHR,0(rMEMP3)
+ stw rCHR,4(rMEMP3)
+ stw rCHR,8(rMEMP3)
+ stw rCHR,12(rMEMP3)
+ andi. rTMP,rMEMP,127
+ nop
+ stw rCHR,16(rMEMP3)
+ stw rCHR,20(rMEMP3)
+ stw rCHR,24(rMEMP3)
+ stw rCHR,28(rMEMP3)
+L(getCacheAligned3):
+ beq L(cacheAligned)
+/* At this point we can overrun the store queue (pipe reject) so it is
+ time to slow things down. The store queue can merge two adjacent
+ stores into a single L1/L2 op, but the L2 is clocked at 1/2 the CPU.
+ So we add "group ending nops" to guarantee that we dispatch only two
+ stores every other cycle. */
+ addi rLEN,rLEN,-32
+ ori r1,r1,0
+ ori r1,r1,0
+ stw rCHR,32(rMEMP3)
+ stw rCHR,36(rMEMP3)
+ addi rMEMP,rMEMP,32
+ cmplwi cr1,rLEN,128
+ ori r1,r1,0
+ stw rCHR,40(rMEMP3)
+ stw rCHR,44(rMEMP3)
+ cmplwi cr6,rLEN,256
+ li rMEMP2,128
+ ori r1,r1,0
+ stw rCHR,48(rMEMP3)
+ stw rCHR,52(rMEMP3)
+ ori r1,r1,0
+ ori r1,r1,0
+ stw rCHR,56(rMEMP3)
+ stw rCHR,60(rMEMP3)
+ blt cr1,L(cacheAligned1)
+ blt cr6,L(cacheAligned128)
+ b L(cacheAlignedx)
+
+/* Now we are aligned to the cache line and can use dcbz. */
+ .align 4
+L(cacheAligned):
+ cmplwi cr1,rLEN,128
+ cmplwi cr6,rLEN,256
+ blt cr1,L(cacheAligned1)
+ li rMEMP2,128
+L(cacheAlignedx):
+ cmpldi cr5,rLEN,640
+ blt cr6,L(cacheAligned128)
+ bgt cr5,L(cacheAligned512)
+ cmplwi cr6,rLEN,512
+ dcbz 0,rMEMP
+ cmplwi cr1,rLEN,384
+ dcbz rMEMP2,rMEMP
+ addi rMEMP,rMEMP,256
+ addi rLEN,rLEN,-256
+ blt cr1,L(cacheAligned1)
+ blt cr6,L(cacheAligned128)
+ b L(cacheAligned256)
+ .align 5
+/* A simple loop for the longer (>640 bytes) lengths. This form limits
+ the branch miss-predicted to exactly 1 at loop exit.*/
+L(cacheAligned512):
+ cmpli cr1,rLEN,128
+ blt cr1,L(cacheAligned1)
+ dcbz 0,rMEMP
+ addi rLEN,rLEN,-128
+ addi rMEMP,rMEMP,128
+ b L(cacheAligned512)
+ .align 5
+L(cacheAligned256):
+ cmplwi cr6,rLEN,512
+ dcbz 0,rMEMP
+ cmplwi cr1,rLEN,384
+ dcbz rMEMP2,rMEMP
+ addi rMEMP,rMEMP,256
+ addi rLEN,rLEN,-256
+ bge cr6,L(cacheAligned256)
+ blt cr1,L(cacheAligned1)
+ .align 4
+L(cacheAligned128):
+ dcbz 0,rMEMP
+ addi rMEMP,rMEMP,128
+ addi rLEN,rLEN,-128
+ .align 4
+L(cacheAligned1):
+ cmplwi cr1,rLEN,32
+ blt cr1,L(handletail32)
+ addi rMEMP3,rMEMP,32
+ addi rLEN,rLEN,-32
+ stw rCHR,0(rMEMP)
+ stw rCHR,4(rMEMP)
+ stw rCHR,8(rMEMP)
+ stw rCHR,12(rMEMP)
+ stw rCHR,16(rMEMP)
+ stw rCHR,20(rMEMP)
+ addi rMEMP,rMEMP,32
+ cmplwi cr1,rLEN,32
+ stw rCHR,-8(rMEMP3)
+ stw rCHR,-4(rMEMP3)
+L(cacheAligned2):
+ blt cr1,L(handletail32)
+ addi rLEN,rLEN,-32
+ stw rCHR,0(rMEMP3)
+ stw rCHR,4(rMEMP3)
+ stw rCHR,8(rMEMP3)
+ stw rCHR,12(rMEMP3)
+ addi rMEMP,rMEMP,32
+ cmplwi cr1,rLEN,32
+ stw rCHR,16(rMEMP3)
+ stw rCHR,20(rMEMP3)
+ stw rCHR,24(rMEMP3)
+ stw rCHR,28(rMEMP3)
+ nop
+L(cacheAligned3):
+ blt cr1,L(handletail32)
+/* At this point we can overrun the store queue (pipe reject) so it is
+ time to slow things down. The store queue can merge two adjacent
+ stores into a single L1/L2 op, but the L2 is clocked at 1/2 the CPU.
+ So we add "group ending nops" to guarantee that we dispatch only two
+ stores every other cycle. */
+ ori r1,r1,0
+ ori r1,r1,0
+ addi rMEMP,rMEMP,32
+ addi rLEN,rLEN,-32
+ ori r1,r1,0
+ ori r1,r1,0
+ stw rCHR,32(rMEMP3)
+ stw rCHR,36(rMEMP3)
+ ori r1,r1,0
+ ori r1,r1,0
+ stw rCHR,40(rMEMP3)
+ stw rCHR,44(rMEMP3)
+ ori r1,r1,0
+ ori r1,r1,0
+ stw rCHR,48(rMEMP3)
+ stw rCHR,52(rMEMP3)
+ ori r1,r1,0
+ ori r1,r1,0
+ stw rCHR,56(rMEMP3)
+ stw rCHR,60(rMEMP3)
+
+/* We are here because the length or remainder (rLEN) is less than the
+ cache line/sector size and does not justify aggressive loop unrolling.
+ So set up the preconditions for L(medium) and go there. */
+ .align 3
+L(handletail32):
+ cmplwi cr1,rLEN,0
+ beqlr cr1
+ b L(medium)
+
+ .align 4
+L(small):
+/* Memset of 4 bytes or less. */
+ cmplwi cr5, rLEN, 1
+ cmplwi cr1, rLEN, 3
+ bltlr cr5
+ stb rCHR, 0(rMEMP)
+ beqlr cr5
+ stb rCHR, 1(rMEMP)
+ bltlr cr1
+ stb rCHR, 2(rMEMP)
+ beqlr cr1
+ stb rCHR, 3(rMEMP)
+ blr
+
+/* Memset of 0-31 bytes. */
+ .align 5
+L(medium):
+ cmplwi cr1, rLEN, 16
+L(medium_tail2):
+ add rMEMP, rMEMP, rLEN
+L(medium_tail):
+ bt- 31, L(medium_31t)
+ bt- 30, L(medium_30t)
+L(medium_30f):
+ bt 29, L(medium_29t)
+L(medium_29f):
+ bge cr1, L(medium_27t)
+ bflr 28
+ stw rCHR, -4(rMEMP)
+ stw rCHR, -8(rMEMP)
+ blr
+
+L(medium_31t):
+ stbu rCHR, -1(rMEMP)
+ bf- 30, L(medium_30f)
+L(medium_30t):
+ sthu rCHR, -2(rMEMP)
+ bf- 29, L(medium_29f)
+L(medium_29t):
+ stwu rCHR, -4(rMEMP)
+ blt cr1, L(medium_27f)
+L(medium_27t):
+ stw rCHR, -4(rMEMP)
+ stw rCHR, -8(rMEMP)
+ stw rCHR, -12(rMEMP)
+ stwu rCHR, -16(rMEMP)
+L(medium_27f):
+ bflr 28
+L(medium_28t):
+ stw rCHR, -4(rMEMP)
+ stw rCHR, -8(rMEMP)
+ blr
+END (BP_SYM (memset))
+libc_hidden_builtin_def (memset)
diff --git a/sysdeps/powerpc/powerpc32/power6/wordcopy.c b/sysdeps/powerpc/powerpc32/power6/wordcopy.c
new file mode 100644
index 0000000000..ddf28659f9
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power6/wordcopy.c
@@ -0,0 +1,287 @@
+/* _memcopy.c -- subroutines for memory copy functions.
+ Copyright (C) 1991, 1996, 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Torbjorn Granlund (tege@sics.se).
+ Updated for POWER6 by Steven Munroe (sjmunroe@us.ibm.com).
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */
+
+#include <stddef.h>
+#include <memcopy.h>
+
+/* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to
+ block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
+ Both SRCP and DSTP should be aligned for memory operations on `op_t's. */
+
+void
+_wordcopy_fwd_aligned (dstp, srcp, len)
+ long int dstp;
+ long int srcp;
+ size_t len;
+{
+ op_t a0, a1;
+
+ if (len & 1)
+ {
+ ((op_t *) dstp)[0] = ((op_t *) srcp)[0];
+
+ if (len == 1)
+ return;
+ srcp += OPSIZ;
+ dstp += OPSIZ;
+ len -= 1;
+ }
+
+ do
+ {
+ a0 = ((op_t *) srcp)[0];
+ a1 = ((op_t *) srcp)[1];
+ ((op_t *) dstp)[0] = a0;
+ ((op_t *) dstp)[1] = a1;
+
+ srcp += 2 * OPSIZ;
+ dstp += 2 * OPSIZ;
+ len -= 2;
+ }
+ while (len != 0);
+}
+
+/* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
+ block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
+ DSTP should be aligned for memory operations on `op_t's, but SRCP must
+ *not* be aligned. */
+
+void
+_wordcopy_fwd_dest_aligned (dstp, srcp, len)
+ long int dstp;
+ long int srcp;
+ size_t len;
+{
+ op_t a0, a1, a2;
+ int sh_1, sh_2;
+ int align;
+
+ /* Calculate how to shift a word read at the memory operation
+ aligned srcp to make it aligned for copy. */
+
+ align = srcp % OPSIZ;
+ sh_1 = 8 * (srcp % OPSIZ);
+ sh_2 = 8 * OPSIZ - sh_1;
+
+ /* Make SRCP aligned by rounding it down to the beginning of the `op_t'
+ it points in the middle of. */
+ srcp &= -OPSIZ;
+ a0 = ((op_t *) srcp)[0];
+
+ if (len & 1)
+ {
+ a1 = ((op_t *) srcp)[1];
+ ((op_t *) dstp)[0] = MERGE (a0, sh_1, a1, sh_2);
+
+ if (len == 1)
+ return;
+
+ a0 = a1;
+ srcp += OPSIZ;
+ dstp += OPSIZ;
+ len -= 1;
+ }
+
+ switch (align)
+ {
+ case 1:
+ do
+ {
+ a1 = ((op_t *) srcp)[1];
+ a2 = ((op_t *) srcp)[2];
+ ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (32-8));
+ ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (32-8));
+ a0 = a2;
+
+ srcp += 2 * OPSIZ;
+ dstp += 2 * OPSIZ;
+ len -= 2;
+ }
+ while (len != 0);
+ break;
+ case 2:
+ do
+ {
+ a1 = ((op_t *) srcp)[1];
+ a2 = ((op_t *) srcp)[2];
+ ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (32-16));
+ ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (32-16));
+ a0 = a2;
+
+ srcp += 2 * OPSIZ;
+ dstp += 2 * OPSIZ;
+ len -= 2;
+ }
+ while (len != 0);
+ break;
+ case 3:
+ do
+ {
+ a1 = ((op_t *) srcp)[1];
+ a2 = ((op_t *) srcp)[2];
+ ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (32-24));
+ ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (32-24));
+ a0 = a2;
+
+ srcp += 2 * OPSIZ;
+ dstp += 2 * OPSIZ;
+ len -= 2;
+ }
+ while (len != 0);
+ break;
+ }
+
+}
+
+/* _wordcopy_bwd_aligned -- Copy block finishing right before
+ SRCP to block finishing right before DSTP with LEN `op_t' words
+ (not LEN bytes!). Both SRCP and DSTP should be aligned for memory
+ operations on `op_t's. */
+
+void
+_wordcopy_bwd_aligned (dstp, srcp, len)
+ long int dstp;
+ long int srcp;
+ size_t len;
+{
+ op_t a0, a1;
+
+ if (len & 1)
+ {
+ srcp -= OPSIZ;
+ dstp -= OPSIZ;
+ ((op_t *) dstp)[0] = ((op_t *) srcp)[0];
+
+ if (len == 1)
+ return;
+ len -= 1;
+ }
+
+ do
+ {
+ srcp -= 2 * OPSIZ;
+ dstp -= 2 * OPSIZ;
+
+ a1 = ((op_t *) srcp)[1];
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[1] = a1;
+ ((op_t *) dstp)[0] = a0;
+
+ len -= 2;
+ }
+ while (len != 0);
+}
+
+/* _wordcopy_bwd_dest_aligned -- Copy block finishing right
+ before SRCP to block finishing right before DSTP with LEN `op_t'
+ words (not LEN bytes!). DSTP should be aligned for memory
+ operations on `op_t', but SRCP must *not* be aligned. */
+
+void
+_wordcopy_bwd_dest_aligned (dstp, srcp, len)
+ long int dstp;
+ long int srcp;
+ size_t len;
+{
+ op_t a0, a1, a2;
+ int sh_1, sh_2;
+ int align;
+
+ /* Calculate how to shift a word read at the memory operation
+ aligned srcp to make it aligned for copy. */
+
+ align = srcp % OPSIZ;
+ sh_1 = 8 * (srcp % OPSIZ);
+ sh_2 = 8 * OPSIZ - sh_1;
+
+ /* Make srcp aligned by rounding it down to the beginning of the op_t
+ it points in the middle of. */
+ srcp &= -OPSIZ;
+ a2 = ((op_t *) srcp)[0];
+
+ if (len & 1)
+ {
+ srcp -= OPSIZ;
+ dstp -= OPSIZ;
+ a1 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = MERGE (a1, sh_1, a2, sh_2);
+
+ if (len == 1)
+ return;
+
+ a2 = a1;
+ len -= 1;
+ }
+
+ switch (align)
+ {
+ case 1:
+ do
+ {
+ srcp -= 2 * OPSIZ;
+ dstp -= 2 * OPSIZ;
+
+ a1 = ((op_t *) srcp)[1];
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (32-8));
+ ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (32-8));
+ a2 = a0;
+
+ len -= 2;
+ }
+ while (len != 0);
+ break;
+ case 2:
+ do
+ {
+ srcp -= 2 * OPSIZ;
+ dstp -= 2 * OPSIZ;
+
+ a1 = ((op_t *) srcp)[1];
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (32-16));
+ ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (32-16));
+ a2 = a0;
+
+ len -= 2;
+ }
+ while (len != 0);
+ break;
+ case 3:
+ do
+ {
+ srcp -= 2 * OPSIZ;
+ dstp -= 2 * OPSIZ;
+
+ a1 = ((op_t *) srcp)[1];
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (32-24));
+ ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (32-24));
+ a2 = a0;
+
+ len -= 2;
+ }
+ while (len != 0);
+ break;
+ }
+}
diff --git a/sysdeps/powerpc/powerpc32/power6x/Implies b/sysdeps/powerpc/powerpc32/power6x/Implies
new file mode 100644
index 0000000000..1ac063bbc1
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power6x/Implies
@@ -0,0 +1,3 @@
+powerpc/powerpc32/power6
+powerpc/powerpc32/power5+
+powerpc/powerpc32/power4
diff --git a/sysdeps/powerpc/powerpc32/power6x/fpu/Implies b/sysdeps/powerpc/powerpc32/power6x/fpu/Implies
new file mode 100644
index 0000000000..081f750093
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power6x/fpu/Implies
@@ -0,0 +1,3 @@
+powerpc/powerpc32/power6/fpu
+powerpc/powerpc32/power5+/fpu
+powerpc/powerpc32/power4/fpu
diff --git a/sysdeps/powerpc/powerpc32/power6x/fpu/s_lrint.S b/sysdeps/powerpc/powerpc32/power6x/fpu/s_lrint.S
new file mode 100644
index 0000000000..76d8432920
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power6x/fpu/s_lrint.S
@@ -0,0 +1,42 @@
+/* Round double to long int. POWER6x PowerPC32 version.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+ .machine "power6"
+/* long int[r3] __lrint (double x[fp1]) */
+ENTRY (__lrint)
+ fctiw fp13,fp1
+ mftgpr r3,fp13
+ blr
+ END (__lrint)
+
+weak_alias (__lrint, lrint)
+
+strong_alias (__lrint, __lrintf)
+weak_alias (__lrint, lrintf)
+
+#ifdef NO_LONG_DOUBLE
+strong_alias (__lrint, __lrintl)
+weak_alias (__lrint, lrintl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __lrint, lrintl, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power6x/fpu/s_lround.S b/sysdeps/powerpc/powerpc32/power6x/fpu/s_lround.S
new file mode 100644
index 0000000000..dc0ace821a
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power6x/fpu/s_lround.S
@@ -0,0 +1,52 @@
+/* lround function. POWER6x, PowerPC32 version.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 1 Franklin Street, Fifth Floor, Boston MA
+ 02110-1301 USA. */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+/* long [r3] lround (float x [fp1])
+ IEEE 1003.1 lround function. IEEE specifies "round to the nearest
+ integer value, rounding halfway cases away from zero, regardless of
+ the current rounding mode." However PowerPC Architecture defines
+ "round to Nearest" as "Choose the best approximation. In case of a
+ tie, choose the one that is even (least significant bit o).".
+ So we pre-round using the V2.02 Floating Round to Integer Nearest
+ instruction before we use the Floating Convert to Integer Word with
+ round to zero instruction. */
+
+ .machine "power6"
+ENTRY (__lround)
+ frin fp2,fp1 /* Pre-round +-0.5. */
+ fctiwz fp3,fp2 /* Convert To Integer Word lround toward 0. */
+ mftgpr r3,fp3 /* Transfer fpr3 to r3. */
+ blr
+ END (__lround)
+
+weak_alias (__lround, lround)
+
+strong_alias (__lround, __lroundf)
+weak_alias (__lround, lroundf)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__lround, lroundl)
+strong_alias (__lround, __lroundl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __lround, lroundl, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/powerpc64/970/Implies b/sysdeps/powerpc/powerpc64/970/Implies
new file mode 100644
index 0000000000..ac431fa96e
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/970/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/power4
diff --git a/sysdeps/powerpc/powerpc64/970/fpu/Implies b/sysdeps/powerpc/powerpc64/970/fpu/Implies
new file mode 100644
index 0000000000..558a5fb174
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/970/fpu/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/power4/fpu
diff --git a/sysdeps/powerpc/powerpc64/power4/Makefile b/sysdeps/powerpc/powerpc64/power4/Makefile
new file mode 100644
index 0000000000..60aa508ba4
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power4/Makefile
@@ -0,0 +1,6 @@
+# Makefile fragment for POWER4/5/5+.
+
+ifeq ($(subdir),string)
+CFLAGS-wordcopy.c += --param max-variable-expansions-in-unroller=2 --param max-unroll-times=2 -funroll-loops -fpeel-loops -ftree-loop-linear
+CFLAGS-memmove.c += --param max-variable-expansions-in-unroller=2 --param max-unroll-times=2 -funroll-loops -fpeel-loops -ftree-loop-linear
+endif
diff --git a/sysdeps/powerpc/powerpc64/power4/fpu/Makefile b/sysdeps/powerpc/powerpc64/power4/fpu/Makefile
new file mode 100644
index 0000000000..89dfa5ef35
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power4/fpu/Makefile
@@ -0,0 +1,5 @@
+# Makefile fragment for POWER4/5/5+ platforms with FPU.
+
+ifeq ($(subdir),math)
+CFLAGS-mpa.c += --param max-unroll-times=4 -funroll-loops -fpeel-loops -ftree-loop-linear
+endif
diff --git a/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c b/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c
new file mode 100644
index 0000000000..4a232e27bf
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c
@@ -0,0 +1,549 @@
+
+/*
+ * IBM Accurate Mathematical Library
+ * written by International Business Machines Corp.
+ * Copyright (C) 2001, 2006 Free Software Foundation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+/************************************************************************/
+/* MODULE_NAME: mpa.c */
+/* */
+/* FUNCTIONS: */
+/* mcr */
+/* acr */
+/* cr */
+/* cpy */
+/* cpymn */
+/* norm */
+/* denorm */
+/* mp_dbl */
+/* dbl_mp */
+/* add_magnitudes */
+/* sub_magnitudes */
+/* add */
+/* sub */
+/* mul */
+/* inv */
+/* dvd */
+/* */
+/* Arithmetic functions for multiple precision numbers. */
+/* Relative errors are bounded */
+/************************************************************************/
+
+
+#include "endian.h"
+#include "mpa.h"
+#include "mpa2.h"
+#include <sys/param.h> /* For MIN() */
+/* mcr() compares the sizes of the mantissas of two multiple precision */
+/* numbers. Mantissas are compared regardless of the signs of the */
+/* numbers, even if x->d[0] or y->d[0] are zero. Exponents are also */
+/* disregarded. */
+static int mcr(const mp_no *x, const mp_no *y, int p) {
+ long i;
+ long p2 = p;
+ for (i=1; i<=p2; i++) {
+ if (X[i] == Y[i]) continue;
+ else if (X[i] > Y[i]) return 1;
+ else return -1; }
+ return 0;
+}
+
+
+
+/* acr() compares the absolute values of two multiple precision numbers */
+int __acr(const mp_no *x, const mp_no *y, int p) {
+ long i;
+
+ if (X[0] == ZERO) {
+ if (Y[0] == ZERO) i= 0;
+ else i=-1;
+ }
+ else if (Y[0] == ZERO) i= 1;
+ else {
+ if (EX > EY) i= 1;
+ else if (EX < EY) i=-1;
+ else i= mcr(x,y,p);
+ }
+
+ return i;
+}
+
+
+/* cr90 compares the values of two multiple precision numbers */
+int __cr(const mp_no *x, const mp_no *y, int p) {
+ int i;
+
+ if (X[0] > Y[0]) i= 1;
+ else if (X[0] < Y[0]) i=-1;
+ else if (X[0] < ZERO ) i= __acr(y,x,p);
+ else i= __acr(x,y,p);
+
+ return i;
+}
+
+
+/* Copy a multiple precision number. Set *y=*x. x=y is permissible. */
+void __cpy(const mp_no *x, mp_no *y, int p) {
+ long i;
+
+ EY = EX;
+ for (i=0; i <= p; i++) Y[i] = X[i];
+
+ return;
+}
+
+
+/* Copy a multiple precision number x of precision m into a */
+/* multiple precision number y of precision n. In case n>m, */
+/* the digits of y beyond the m'th are set to zero. In case */
+/* n<m, the digits of x beyond the n'th are ignored. */
+/* x=y is permissible. */
+
+void __cpymn(const mp_no *x, int m, mp_no *y, int n) {
+
+ long i,k;
+ long n2 = n;
+ long m2 = m;
+
+ EY = EX; k=MIN(m2,n2);
+ for (i=0; i <= k; i++) Y[i] = X[i];
+ for ( ; i <= n2; i++) Y[i] = ZERO;
+
+ return;
+}
+
+/* Convert a multiple precision number *x into a double precision */
+/* number *y, normalized case (|x| >= 2**(-1022))) */
+static void norm(const mp_no *x, double *y, int p)
+{
+ #define R radixi.d
+ long i;
+#if 0
+ int k;
+#endif
+ double a,c,u,v,z[5];
+ if (p<5) {
+ if (p==1) c = X[1];
+ else if (p==2) c = X[1] + R* X[2];
+ else if (p==3) c = X[1] + R*(X[2] + R* X[3]);
+ else if (p==4) c =(X[1] + R* X[2]) + R*R*(X[3] + R*X[4]);
+ }
+ else {
+ for (a=ONE, z[1]=X[1]; z[1] < TWO23; )
+ {a *= TWO; z[1] *= TWO; }
+
+ for (i=2; i<5; i++) {
+ z[i] = X[i]*a;
+ u = (z[i] + CUTTER)-CUTTER;
+ if (u > z[i]) u -= RADIX;
+ z[i] -= u;
+ z[i-1] += u*RADIXI;
+ }
+
+ u = (z[3] + TWO71) - TWO71;
+ if (u > z[3]) u -= TWO19;
+ v = z[3]-u;
+
+ if (v == TWO18) {
+ if (z[4] == ZERO) {
+ for (i=5; i <= p; i++) {
+ if (X[i] == ZERO) continue;
+ else {z[3] += ONE; break; }
+ }
+ }
+ else z[3] += ONE;
+ }
+
+ c = (z[1] + R *(z[2] + R * z[3]))/a;
+ }
+
+ c *= X[0];
+
+ for (i=1; i<EX; i++) c *= RADIX;
+ for (i=1; i>EX; i--) c *= RADIXI;
+
+ *y = c;
+ return;
+#undef R
+}
+
+/* Convert a multiple precision number *x into a double precision */
+/* number *y, denormalized case (|x| < 2**(-1022))) */
+static void denorm(const mp_no *x, double *y, int p)
+{
+ long i,k;
+ long p2 = p;
+ double c,u,z[5];
+#if 0
+ double a,v;
+#endif
+
+#define R radixi.d
+ if (EX<-44 || (EX==-44 && X[1]<TWO5))
+ { *y=ZERO; return; }
+
+ if (p2==1) {
+ if (EX==-42) {z[1]=X[1]+TWO10; z[2]=ZERO; z[3]=ZERO; k=3;}
+ else if (EX==-43) {z[1]= TWO10; z[2]=X[1]; z[3]=ZERO; k=2;}
+ else {z[1]= TWO10; z[2]=ZERO; z[3]=X[1]; k=1;}
+ }
+ else if (p2==2) {
+ if (EX==-42) {z[1]=X[1]+TWO10; z[2]=X[2]; z[3]=ZERO; k=3;}
+ else if (EX==-43) {z[1]= TWO10; z[2]=X[1]; z[3]=X[2]; k=2;}
+ else {z[1]= TWO10; z[2]=ZERO; z[3]=X[1]; k=1;}
+ }
+ else {
+ if (EX==-42) {z[1]=X[1]+TWO10; z[2]=X[2]; k=3;}
+ else if (EX==-43) {z[1]= TWO10; z[2]=X[1]; k=2;}
+ else {z[1]= TWO10; z[2]=ZERO; k=1;}
+ z[3] = X[k];
+ }
+
+ u = (z[3] + TWO57) - TWO57;
+ if (u > z[3]) u -= TWO5;
+
+ if (u==z[3]) {
+ for (i=k+1; i <= p2; i++) {
+ if (X[i] == ZERO) continue;
+ else {z[3] += ONE; break; }
+ }
+ }
+
+ c = X[0]*((z[1] + R*(z[2] + R*z[3])) - TWO10);
+
+ *y = c*TWOM1032;
+ return;
+
+#undef R
+}
+
+/* Convert a multiple precision number *x into a double precision number *y. */
+/* The result is correctly rounded to the nearest/even. *x is left unchanged */
+
+void __mp_dbl(const mp_no *x, double *y, int p) {
+#if 0
+ int i,k;
+ double a,c,u,v,z[5];
+#endif
+
+ if (X[0] == ZERO) {*y = ZERO; return; }
+
+ if (EX> -42) norm(x,y,p);
+ else if (EX==-42 && X[1]>=TWO10) norm(x,y,p);
+ else denorm(x,y,p);
+}
+
+
+/* dbl_mp() converts a double precision number x into a multiple precision */
+/* number *y. If the precision p is too small the result is truncated. x is */
+/* left unchanged. */
+
+void __dbl_mp(double x, mp_no *y, int p) {
+
+ long i,n;
+ long p2 = p;
+ double u;
+
+ /* Sign */
+ if (x == ZERO) {Y[0] = ZERO; return; }
+ else if (x > ZERO) Y[0] = ONE;
+ else {Y[0] = MONE; x=-x; }
+
+ /* Exponent */
+ for (EY=ONE; x >= RADIX; EY += ONE) x *= RADIXI;
+ for ( ; x < ONE; EY -= ONE) x *= RADIX;
+
+ /* Digits */
+ n=MIN(p2,4);
+ for (i=1; i<=n; i++) {
+ u = (x + TWO52) - TWO52;
+ if (u>x) u -= ONE;
+ Y[i] = u; x -= u; x *= RADIX; }
+ for ( ; i<=p2; i++) Y[i] = ZERO;
+ return;
+}
+
+
+/* add_magnitudes() adds the magnitudes of *x & *y assuming that */
+/* abs(*x) >= abs(*y) > 0. */
+/* The sign of the sum *z is undefined. x&y may overlap but not x&z or y&z. */
+/* No guard digit is used. The result equals the exact sum, truncated. */
+/* *x & *y are left unchanged. */
+
+static void add_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
+
+ long i,j,k;
+ long p2 = p;
+
+ EZ = EX;
+
+ i=p2; j=p2+ EY - EX; k=p2+1;
+
+ if (j<1)
+ {__cpy(x,z,p); return; }
+ else Z[k] = ZERO;
+
+ for (; j>0; i--,j--) {
+ Z[k] += X[i] + Y[j];
+ if (Z[k] >= RADIX) {
+ Z[k] -= RADIX;
+ Z[--k] = ONE; }
+ else
+ Z[--k] = ZERO;
+ }
+
+ for (; i>0; i--) {
+ Z[k] += X[i];
+ if (Z[k] >= RADIX) {
+ Z[k] -= RADIX;
+ Z[--k] = ONE; }
+ else
+ Z[--k] = ZERO;
+ }
+
+ if (Z[1] == ZERO) {
+ for (i=1; i<=p2; i++) Z[i] = Z[i+1]; }
+ else EZ += ONE;
+}
+
+
+/* sub_magnitudes() subtracts the magnitudes of *x & *y assuming that */
+/* abs(*x) > abs(*y) > 0. */
+/* The sign of the difference *z is undefined. x&y may overlap but not x&z */
+/* or y&z. One guard digit is used. The error is less than one ulp. */
+/* *x & *y are left unchanged. */
+
+static void sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
+
+ long i,j,k;
+ long p2 = p;
+
+ EZ = EX;
+
+ if (EX == EY) {
+ i=j=k=p2;
+ Z[k] = Z[k+1] = ZERO; }
+ else {
+ j= EX - EY;
+ if (j > p2) {__cpy(x,z,p); return; }
+ else {
+ i=p2; j=p2+1-j; k=p2;
+ if (Y[j] > ZERO) {
+ Z[k+1] = RADIX - Y[j--];
+ Z[k] = MONE; }
+ else {
+ Z[k+1] = ZERO;
+ Z[k] = ZERO; j--;}
+ }
+ }
+
+ for (; j>0; i--,j--) {
+ Z[k] += (X[i] - Y[j]);
+ if (Z[k] < ZERO) {
+ Z[k] += RADIX;
+ Z[--k] = MONE; }
+ else
+ Z[--k] = ZERO;
+ }
+
+ for (; i>0; i--) {
+ Z[k] += X[i];
+ if (Z[k] < ZERO) {
+ Z[k] += RADIX;
+ Z[--k] = MONE; }
+ else
+ Z[--k] = ZERO;
+ }
+
+ for (i=1; Z[i] == ZERO; i++) ;
+ EZ = EZ - i + 1;
+ for (k=1; i <= p2+1; )
+ Z[k++] = Z[i++];
+ for (; k <= p2; )
+ Z[k++] = ZERO;
+
+ return;
+}
+
+
+/* Add two multiple precision numbers. Set *z = *x + *y. x&y may overlap */
+/* but not x&z or y&z. One guard digit is used. The error is less than */
+/* one ulp. *x & *y are left unchanged. */
+
+void __add(const mp_no *x, const mp_no *y, mp_no *z, int p) {
+
+ int n;
+
+ if (X[0] == ZERO) {__cpy(y,z,p); return; }
+ else if (Y[0] == ZERO) {__cpy(x,z,p); return; }
+
+ if (X[0] == Y[0]) {
+ if (__acr(x,y,p) > 0) {add_magnitudes(x,y,z,p); Z[0] = X[0]; }
+ else {add_magnitudes(y,x,z,p); Z[0] = Y[0]; }
+ }
+ else {
+ if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p); Z[0] = X[0]; }
+ else if (n == -1) {sub_magnitudes(y,x,z,p); Z[0] = Y[0]; }
+ else Z[0] = ZERO;
+ }
+ return;
+}
+
+
+/* Subtract two multiple precision numbers. *z is set to *x - *y. x&y may */
+/* overlap but not x&z or y&z. One guard digit is used. The error is */
+/* less than one ulp. *x & *y are left unchanged. */
+
+void __sub(const mp_no *x, const mp_no *y, mp_no *z, int p) {
+
+ int n;
+
+ if (X[0] == ZERO) {__cpy(y,z,p); Z[0] = -Z[0]; return; }
+ else if (Y[0] == ZERO) {__cpy(x,z,p); return; }
+
+ if (X[0] != Y[0]) {
+ if (__acr(x,y,p) > 0) {add_magnitudes(x,y,z,p); Z[0] = X[0]; }
+ else {add_magnitudes(y,x,z,p); Z[0] = -Y[0]; }
+ }
+ else {
+ if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p); Z[0] = X[0]; }
+ else if (n == -1) {sub_magnitudes(y,x,z,p); Z[0] = -Y[0]; }
+ else Z[0] = ZERO;
+ }
+ return;
+}
+
+
+/* Multiply two multiple precision numbers. *z is set to *x * *y. x&y */
+/* may overlap but not x&z or y&z. In case p=1,2,3 the exact result is */
+/* truncated to p digits. In case p>3 the error is bounded by 1.001 ulp. */
+/* *x & *y are left unchanged. */
+
+void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) {
+
+ long i, i1, i2, j, k, k2;
+ long p2 = p;
+ double u, zk, zk2;
+
+ /* Is z=0? */
+ if (X[0]*Y[0]==ZERO)
+ { Z[0]=ZERO; return; }
+
+ /* Multiply, add and carry */
+ k2 = (p2<3) ? p2+p2 : p2+3;
+ zk = Z[k2]=ZERO;
+ for (k=k2; k>1; ) {
+ if (k > p2) {i1=k-p2; i2=p2+1; }
+ else {i1=1; i2=k; }
+#if 1
+ /* rearange this inner loop to allow the fmadd instructions to be
+ independent and execute in parallel on processors that have
+ dual symetrical FP pipelines. */
+ if (i1 < (i2-1))
+ {
+ /* make sure we have at least 2 iterations */
+ if (((i2 - i1) & 1L) == 1L)
+ {
+ /* Handle the odd iterations case. */
+ zk2 = x->d[i2-1]*y->d[i1];
+ }
+ else
+ zk2 = zero.d;
+ /* Do two multiply/adds per loop iteration, using independent
+ accumulators; zk and zk2. */
+ for (i=i1,j=i2-1; i<i2-1; i+=2,j-=2)
+ {
+ zk += x->d[i]*y->d[j];
+ zk2 += x->d[i+1]*y->d[j-1];
+ }
+ zk += zk2; /* final sum. */
+ }
+ else
+ {
+ /* Special case when iterations is 1. */
+ zk += x->d[i1]*y->d[i1];
+ }
+#else
+ /* The orginal code. */
+ for (i=i1,j=i2-1; i<i2; i++,j--) zk += X[i]*Y[j];
+#endif
+
+ u = (zk + CUTTER)-CUTTER;
+ if (u > zk) u -= RADIX;
+ Z[k] = zk - u;
+ zk = u*RADIXI;
+ --k;
+ }
+ Z[k] = zk;
+
+ /* Is there a carry beyond the most significant digit? */
+ if (Z[1] == ZERO) {
+ for (i=1; i<=p2; i++) Z[i]=Z[i+1];
+ EZ = EX + EY - 1; }
+ else
+ EZ = EX + EY;
+
+ Z[0] = X[0] * Y[0];
+ return;
+}
+
+
+/* Invert a multiple precision number. Set *y = 1 / *x. */
+/* Relative error bound = 1.001*r**(1-p) for p=2, 1.063*r**(1-p) for p=3, */
+/* 2.001*r**(1-p) for p>3. */
+/* *x=0 is not permissible. *x is left unchanged. */
+
+void __inv(const mp_no *x, mp_no *y, int p) {
+ long i;
+#if 0
+ int l;
+#endif
+ double t;
+ mp_no z,w;
+ static const int np1[] = {0,0,0,0,1,2,2,2,2,3,3,3,3,3,3,3,3,3,
+ 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4};
+ const mp_no mptwo = {1,{1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
+ 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
+ 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
+ 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}};
+
+ __cpy(x,&z,p); z.e=0; __mp_dbl(&z,&t,p);
+ t=ONE/t; __dbl_mp(t,y,p); EY -= EX;
+
+ for (i=0; i<np1[p]; i++) {
+ __cpy(y,&w,p);
+ __mul(x,&w,y,p);
+ __sub(&mptwo,y,&z,p);
+ __mul(&w,&z,y,p);
+ }
+ return;
+}
+
+
+/* Divide one multiple precision number by another.Set *z = *x / *y. *x & *y */
+/* are left unchanged. x&y may overlap but not x&z or y&z. */
+/* Relative error bound = 2.001*r**(1-p) for p=2, 2.063*r**(1-p) for p=3 */
+/* and 3.001*r**(1-p) for p>3. *y=0 is not permissible. */
+
+void __dvd(const mp_no *x, const mp_no *y, mp_no *z, int p) {
+
+ mp_no w;
+
+ if (X[0] == ZERO) Z[0] = ZERO;
+ else {__inv(y,&w,p); __mul(x,&w,z,p);}
+ return;
+}
diff --git a/sysdeps/powerpc/powerpc64/power4/fpu/slowexp.c b/sysdeps/powerpc/powerpc64/power4/fpu/slowexp.c
new file mode 100644
index 0000000000..b22b0dfeab
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power4/fpu/slowexp.c
@@ -0,0 +1,66 @@
+/*
+ * IBM Accurate Mathematical Library
+ * written by International Business Machines Corp.
+ * Copyright (C) 2001, 2007 Free Software Foundation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+/**************************************************************************/
+/* MODULE_NAME:slowexp.c */
+/* */
+/* FUNCTION:slowexp */
+/* */
+/* FILES NEEDED:mpa.h */
+/* mpa.c mpexp.c */
+/* */
+/*Converting from double precision to Multi-precision and calculating */
+/* e^x */
+/**************************************************************************/
+#include "math_private.h"
+
+#ifdef NO_LONG_DOUBLE
+#include "mpa.h"
+void __mpexp(mp_no *x, mp_no *y, int p);
+#endif
+
+/*Converting from double precision to Multi-precision and calculating e^x */
+double __slowexp(double x) {
+#ifdef NO_LONG_DOUBLE
+ double w,z,res,eps=3.0e-26;
+ int p;
+ mp_no mpx, mpy, mpz,mpw,mpeps,mpcor;
+
+ p=6;
+ __dbl_mp(x,&mpx,p); /* Convert a double precision number x */
+ /* into a multiple precision number mpx with prec. p. */
+ __mpexp(&mpx, &mpy, p); /* Multi-Precision exponential function */
+ __dbl_mp(eps,&mpeps,p);
+ __mul(&mpeps,&mpy,&mpcor,p);
+ __add(&mpy,&mpcor,&mpw,p);
+ __sub(&mpy,&mpcor,&mpz,p);
+ __mp_dbl(&mpw, &w, p);
+ __mp_dbl(&mpz, &z, p);
+ if (w == z) return w;
+ else { /* if calculating is not exactly */
+ p = 32;
+ __dbl_mp(x,&mpx,p);
+ __mpexp(&mpx, &mpy, p);
+ __mp_dbl(&mpy, &res, p);
+ return res;
+ }
+#else
+ return (double) __ieee754_expl((long double)x);
+#endif
+}
diff --git a/sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c b/sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c
new file mode 100644
index 0000000000..ad147a89a6
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c
@@ -0,0 +1,94 @@
+/*
+ * IBM Accurate Mathematical Library
+ * written by International Business Machines Corp.
+ * Copyright (C) 2001, 2006 Free Software Foundation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+/*************************************************************************/
+/* MODULE_NAME:slowpow.c */
+/* */
+/* FUNCTION:slowpow */
+/* */
+/*FILES NEEDED:mpa.h */
+/* mpa.c mpexp.c mplog.c halfulp.c */
+/* */
+/* Given two IEEE double machine numbers y,x , routine computes the */
+/* correctly rounded (to nearest) value of x^y. Result calculated by */
+/* multiplication (in halfulp.c) or if result isn't accurate enough */
+/* then routine converts x and y into multi-precision doubles and */
+/* recompute. */
+/*************************************************************************/
+
+#include "mpa.h"
+#include "math_private.h"
+
+void __mpexp (mp_no * x, mp_no * y, int p);
+void __mplog (mp_no * x, mp_no * y, int p);
+double ulog (double);
+double __halfulp (double x, double y);
+
+double
+__slowpow (double x, double y, double z)
+{
+ double res, res1;
+ long double ldw, ldz, ldpp;
+ static const long double ldeps = 0x4.0p-96;
+
+ res = __halfulp (x, y); /* halfulp() returns -10 or x^y */
+ if (res >= 0)
+ return res; /* if result was really computed by halfulp */
+ /* else, if result was not really computed by halfulp */
+
+ /* Compute pow as long double, 106 bits */
+ ldz = __ieee754_logl ((long double) x);
+ ldw = (long double) y *ldz;
+ ldpp = __ieee754_expl (ldw);
+ res = (double) (ldpp + ldeps);
+ res1 = (double) (ldpp - ldeps);
+
+ if (res != res1) /* if result still not accurate enough */
+ { /* use mpa for higher persision. */
+ mp_no mpx, mpy, mpz, mpw, mpp, mpr, mpr1;
+ static const mp_no eps = { -3, {1.0, 4.0} };
+ int p;
+
+ p = 10; /* p=precision 240 bits */
+ __dbl_mp (x, &mpx, p);
+ __dbl_mp (y, &mpy, p);
+ __dbl_mp (z, &mpz, p);
+ __mplog (&mpx, &mpz, p); /* log(x) = z */
+ __mul (&mpy, &mpz, &mpw, p); /* y * z =w */
+ __mpexp (&mpw, &mpp, p); /* e^w =pp */
+ __add (&mpp, &eps, &mpr, p); /* pp+eps =r */
+ __mp_dbl (&mpr, &res, p);
+ __sub (&mpp, &eps, &mpr1, p); /* pp -eps =r1 */
+ __mp_dbl (&mpr1, &res1, p); /* converting into double precision */
+ if (res == res1)
+ return res;
+
+ /* if we get here result wasn't calculated exactly, continue for
+ more exact calculation using 768 bits. */
+ p = 32;
+ __dbl_mp (x, &mpx, p);
+ __dbl_mp (y, &mpy, p);
+ __dbl_mp (z, &mpz, p);
+ __mplog (&mpx, &mpz, p); /* log(c)=z */
+ __mul (&mpy, &mpz, &mpw, p); /* y*z =w */
+ __mpexp (&mpw, &mpp, p); /* e^w=pp */
+ __mp_dbl (&mpp, &res, p); /* converting into double precision */
+ }
+ return res;
+}
diff --git a/sysdeps/powerpc/powerpc64/power4/fpu/w_sqrt.c b/sysdeps/powerpc/powerpc64/power4/fpu/w_sqrt.c
new file mode 100644
index 0000000000..d2b62b2672
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power4/fpu/w_sqrt.c
@@ -0,0 +1,62 @@
+/* Double-precision floating point square root wrapper.
+ Copyright (C) 2004, 2007 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <math_ldbl_opt.h>
+#include "math.h"
+#include "math_private.h"
+#include <fenv_libc.h>
+
+#ifdef __STDC__
+double
+__sqrt (double x) /* wrapper sqrt */
+#else
+double
+__sqrt (x) /* wrapper sqrt */
+ double x;
+#endif
+{
+ double z;
+/* Power4 (ISA V2.0) and above implement sqrt in hardware. */
+ __asm __volatile (
+ " fsqrt %0,%1\n"
+ : "=f" (z)
+ : "f" (x));
+#ifdef _IEEE_LIBM
+ return z;
+#else
+ if (__builtin_expect (_LIB_VERSION == _IEEE_, 0))
+ return z;
+
+ if (__builtin_expect (x != x, 0))
+ return z;
+
+ if (__builtin_expect (x < 0.0, 0))
+ return __kernel_standard (x, x, 26); /* sqrt(negative) */
+ else
+ return z;
+#endif
+}
+
+weak_alias (__sqrt, sqrt)
+#ifdef NO_LONG_DOUBLE
+ strong_alias (__sqrt, __sqrtl) weak_alias (__sqrt, sqrtl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0)
+compat_symbol (libm, __sqrt, sqrtl, GLIBC_2_0);
+#endif
diff --git a/sysdeps/powerpc/powerpc64/power4/fpu/w_sqrtf.c b/sysdeps/powerpc/powerpc64/power4/fpu/w_sqrtf.c
new file mode 100644
index 0000000000..4784869f07
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power4/fpu/w_sqrtf.c
@@ -0,0 +1,60 @@
+/* Single-precision floating point square root wrapper.
+ Copyright (C) 2004, 2007 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include "math.h"
+#include "math_private.h"
+#include <fenv_libc.h>
+
+#include <sysdep.h>
+#include <ldsodefs.h>
+
+#ifdef __STDC__
+float
+__sqrtf (float x) /* wrapper sqrtf */
+#else
+float
+__sqrtf (x) /* wrapper sqrtf */
+ float x;
+#endif
+{
+#ifdef _IEEE_LIBM
+ return __ieee754_sqrtf (x);
+#else
+ float z;
+/* Power4 (ISA V2.0) and above implement sqrtf in hardware. */
+ __asm __volatile (
+ " fsqrts %0,%1\n"
+ : "=f" (z)
+ : "f" (x));
+
+ if (__builtin_expect (_LIB_VERSION == _IEEE_, 0))
+ return z;
+
+ if (__builtin_expect (x != x, 0))
+ return z;
+
+ if (__builtin_expect (x < 0.0, 0))
+ /* sqrtf(negative) */
+ return (float) __kernel_standard ((double) x, (double) x, 126);
+ else
+ return z;
+#endif
+}
+
+weak_alias (__sqrtf, sqrtf)
diff --git a/sysdeps/powerpc/powerpc64/power4/memcmp.S b/sysdeps/powerpc/powerpc64/power4/memcmp.S
new file mode 100644
index 0000000000..a5e0c758df
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power4/memcmp.S
@@ -0,0 +1,982 @@
+/* Optimized strcmp implementation for PowerPC64.
+ Copyright (C) 2003, 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+ 02110-1301 USA. */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* int [r3] memcmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */
+
+ .machine power4
+EALIGN (BP_SYM(memcmp), 4, 0)
+ CALL_MCOUNT 3
+
+#define rTMP r0
+#define rRTN r3
+#define rSTR1 r3 /* first string arg */
+#define rSTR2 r4 /* second string arg */
+#define rN r5 /* max string length */
+/* Note: The Bounded pointer support in this code is broken. This code
+ was inherited from PPC32 and and that support was never completed.
+ Current PPC gcc does not support -fbounds-check or -fbounded-pointers. */
+#define rWORD1 r6 /* current word in s1 */
+#define rWORD2 r7 /* current word in s2 */
+#define rWORD3 r8 /* next word in s1 */
+#define rWORD4 r9 /* next word in s2 */
+#define rWORD5 r10 /* next word in s1 */
+#define rWORD6 r11 /* next word in s2 */
+#define rBITDIF r12 /* bits that differ in s1 & s2 words */
+#define rWORD7 r30 /* next word in s1 */
+#define rWORD8 r31 /* next word in s2 */
+
+ xor rTMP, rSTR2, rSTR1
+ cmpldi cr6, rN, 0
+ cmpldi cr1, rN, 12
+ clrldi. rTMP, rTMP, 61
+ clrldi rBITDIF, rSTR1, 61
+ cmpldi cr5, rBITDIF, 0
+ beq- cr6, L(zeroLength)
+ dcbt 0,rSTR1
+ dcbt 0,rSTR2
+/* If less than 8 bytes or not aligned, use the unalligned
+ byte loop. */
+ blt cr1, L(bytealigned)
+ std rWORD8,-8(r1)
+ cfi_offset(rWORD8,-8)
+ std rWORD7,-16(r1)
+ cfi_offset(rWORD7,-16)
+ bne L(unaligned)
+/* At this point we know both strings have the same alignment and the
+ compare length is at least 8 bytes. rBITDIF containes the low order
+ 3 bits of rSTR1 and cr5 contains the result of the logical compare
+ of rBITDIF to 0. If rBITDIF == 0 then we are already double word
+ aligned and can perform the DWaligned loop.
+
+ Otherwise we know the two strings have the same alignment (but not
+ yet DW). So we can force the string addresses to the next lower DW
+ boundary and special case this first DW word using shift left to
+ ellimiate bits preceeding the first byte. Since we want to join the
+ normal (DWaligned) compare loop, starting at the second double word,
+ we need to adjust the length (rN) and special case the loop
+ versioning for the first DW. This insures that the loop count is
+ correct and the first DW (shifted) is in the expected resister pair. */
+ .align 4
+L(samealignment):
+ clrrdi rSTR1, rSTR1, 3
+ clrrdi rSTR2, rSTR2, 3
+ beq cr5, L(DWaligned)
+ add rN, rN, rBITDIF
+ sldi r11, rBITDIF, 3
+ srdi rTMP, rN, 5 /* Divide by 32 */
+ andi. rBITDIF, rN, 24 /* Get the DW remainder */
+ ld rWORD1, 0(rSTR1)
+ ld rWORD2, 0(rSTR2)
+ cmpldi cr1, rBITDIF, 16
+ cmpldi cr7, rN, 32
+ clrldi rN, rN, 61
+ beq L(dPs4)
+ mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+ bgt cr1, L(dPs3)
+ beq cr1, L(dPs2)
+
+/* Remainder is 8 */
+ .align 3
+L(dsP1):
+ sld rWORD5, rWORD1, r11
+ sld rWORD6, rWORD2, r11
+ cmpld cr5, rWORD5, rWORD6
+ blt cr7, L(dP1x)
+/* Do something useful in this cycle since we have to branch anyway. */
+ ld rWORD1, 8(rSTR1)
+ ld rWORD2, 8(rSTR2)
+ cmpld cr0, rWORD1, rWORD2
+ b L(dP1e)
+/* Remainder is 16 */
+ .align 4
+L(dPs2):
+ sld rWORD5, rWORD1, r11
+ sld rWORD6, rWORD2, r11
+ cmpld cr6, rWORD5, rWORD6
+ blt cr7, L(dP2x)
+/* Do something useful in this cycle since we have to branch anyway. */
+ ld rWORD7, 8(rSTR1)
+ ld rWORD8, 8(rSTR2)
+ cmpld cr5, rWORD7, rWORD8
+ b L(dP2e)
+/* Remainder is 24 */
+ .align 4
+L(dPs3):
+ sld rWORD3, rWORD1, r11
+ sld rWORD4, rWORD2, r11
+ cmpld cr1, rWORD3, rWORD4
+ b L(dP3e)
+/* Count is a multiple of 32, remainder is 0 */
+ .align 4
+L(dPs4):
+ mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+ sld rWORD1, rWORD1, r11
+ sld rWORD2, rWORD2, r11
+ cmpld cr0, rWORD1, rWORD2
+ b L(dP4e)
+
+/* At this point we know both strings are double word aligned and the
+ compare length is at least 8 bytes. */
+ .align 4
+L(DWaligned):
+ andi. rBITDIF, rN, 24 /* Get the DW remainder */
+ srdi rTMP, rN, 5 /* Divide by 32 */
+ cmpldi cr1, rBITDIF, 16
+ cmpldi cr7, rN, 32
+ clrldi rN, rN, 61
+ beq L(dP4)
+ bgt cr1, L(dP3)
+ beq cr1, L(dP2)
+
+/* Remainder is 8 */
+ .align 4
+L(dP1):
+ mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+/* Normally we'd use rWORD7/rWORD8 here, but since we might exit early
+ (8-15 byte compare), we want to use only volitile registers. This
+ means we can avoid restoring non-volitile registers since we did not
+ change any on the early exit path. The key here is the non-early
+ exit path only cares about the condition code (cr5), not about which
+ register pair was used. */
+ ld rWORD5, 0(rSTR1)
+ ld rWORD6, 0(rSTR2)
+ cmpld cr5, rWORD5, rWORD6
+ blt cr7, L(dP1x)
+ ld rWORD1, 8(rSTR1)
+ ld rWORD2, 8(rSTR2)
+ cmpld cr0, rWORD1, rWORD2
+L(dP1e):
+ ld rWORD3, 16(rSTR1)
+ ld rWORD4, 16(rSTR2)
+ cmpld cr1, rWORD3, rWORD4
+ ld rWORD5, 24(rSTR1)
+ ld rWORD6, 24(rSTR2)
+ cmpld cr6, rWORD5, rWORD6
+ bne cr5, L(dLcr5)
+ bne cr0, L(dLcr0)
+
+ ldu rWORD7, 32(rSTR1)
+ ldu rWORD8, 32(rSTR2)
+ bne cr1, L(dLcr1)
+ cmpld cr5, rWORD7, rWORD8
+ bdnz L(dLoop)
+ bne cr6, L(dLcr6)
+ ld rWORD8,-8(r1)
+ ld rWORD7,-16(r1)
+ .align 3
+L(dP1x):
+ sldi. r12, rN, 3
+ bne cr5, L(dLcr5)
+ subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */
+ bne L(d00)
+ li rRTN, 0
+ blr
+
+/* Remainder is 16 */
+ .align 4
+L(dP2):
+ mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+ ld rWORD5, 0(rSTR1)
+ ld rWORD6, 0(rSTR2)
+ cmpld cr6, rWORD5, rWORD6
+ blt cr7, L(dP2x)
+ ld rWORD7, 8(rSTR1)
+ ld rWORD8, 8(rSTR2)
+ cmpld cr5, rWORD7, rWORD8
+L(dP2e):
+ ld rWORD1, 16(rSTR1)
+ ld rWORD2, 16(rSTR2)
+ cmpld cr0, rWORD1, rWORD2
+ ld rWORD3, 24(rSTR1)
+ ld rWORD4, 24(rSTR2)
+ cmpld cr1, rWORD3, rWORD4
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+ bne cr6, L(dLcr6)
+ bne cr5, L(dLcr5)
+ b L(dLoop2)
+/* Again we are on a early exit path (16-23 byte compare), we want to
+ only use volitile registers and avoid restoring non-volitile
+ registers. */
+ .align 4
+L(dP2x):
+ ld rWORD3, 8(rSTR1)
+ ld rWORD4, 8(rSTR2)
+ cmpld cr5, rWORD3, rWORD4
+ sldi. r12, rN, 3
+ bne cr6, L(dLcr6)
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+ bne cr5, L(dLcr5)
+ subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */
+ bne L(d00)
+ li rRTN, 0
+ blr
+
+/* Remainder is 24 */
+ .align 4
+L(dP3):
+ mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+ ld rWORD3, 0(rSTR1)
+ ld rWORD4, 0(rSTR2)
+ cmpld cr1, rWORD3, rWORD4
+L(dP3e):
+ ld rWORD5, 8(rSTR1)
+ ld rWORD6, 8(rSTR2)
+ cmpld cr6, rWORD5, rWORD6
+ blt cr7, L(dP3x)
+ ld rWORD7, 16(rSTR1)
+ ld rWORD8, 16(rSTR2)
+ cmpld cr5, rWORD7, rWORD8
+ ld rWORD1, 24(rSTR1)
+ ld rWORD2, 24(rSTR2)
+ cmpld cr0, rWORD1, rWORD2
+ addi rSTR1, rSTR1, 16
+ addi rSTR2, rSTR2, 16
+ bne cr1, L(dLcr1)
+ bne cr6, L(dLcr6)
+ b L(dLoop1)
+/* Again we are on a early exit path (24-31 byte compare), we want to
+ only use volitile registers and avoid restoring non-volitile
+ registers. */
+ .align 4
+L(dP3x):
+ ld rWORD1, 16(rSTR1)
+ ld rWORD2, 16(rSTR2)
+ cmpld cr5, rWORD1, rWORD2
+ sldi. r12, rN, 3
+ bne cr1, L(dLcr1)
+ addi rSTR1, rSTR1, 16
+ addi rSTR2, rSTR2, 16
+ bne cr6, L(dLcr6)
+ subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */
+ bne cr5, L(dLcr5)
+ bne L(d00)
+ li rRTN, 0
+ blr
+
+/* Count is a multiple of 32, remainder is 0 */
+ .align 4
+L(dP4):
+ mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+ ld rWORD1, 0(rSTR1)
+ ld rWORD2, 0(rSTR2)
+ cmpld cr0, rWORD1, rWORD2
+L(dP4e):
+ ld rWORD3, 8(rSTR1)
+ ld rWORD4, 8(rSTR2)
+ cmpld cr1, rWORD3, rWORD4
+ ld rWORD5, 16(rSTR1)
+ ld rWORD6, 16(rSTR2)
+ cmpld cr6, rWORD5, rWORD6
+ ldu rWORD7, 24(rSTR1)
+ ldu rWORD8, 24(rSTR2)
+ cmpld cr5, rWORD7, rWORD8
+ bne cr0, L(dLcr0)
+ bne cr1, L(dLcr1)
+ bdz- L(d24) /* Adjust CTR as we start with +4 */
+/* This is the primary loop */
+ .align 4
+L(dLoop):
+ ld rWORD1, 8(rSTR1)
+ ld rWORD2, 8(rSTR2)
+ cmpld cr1, rWORD3, rWORD4
+ bne cr6, L(dLcr6)
+L(dLoop1):
+ ld rWORD3, 16(rSTR1)
+ ld rWORD4, 16(rSTR2)
+ cmpld cr6, rWORD5, rWORD6
+ bne cr5, L(dLcr5)
+L(dLoop2):
+ ld rWORD5, 24(rSTR1)
+ ld rWORD6, 24(rSTR2)
+ cmpld cr5, rWORD7, rWORD8
+ bne cr0, L(dLcr0)
+L(dLoop3):
+ ldu rWORD7, 32(rSTR1)
+ ldu rWORD8, 32(rSTR2)
+ bne- cr1, L(dLcr1)
+ cmpld cr0, rWORD1, rWORD2
+ bdnz+ L(dLoop)
+
+L(dL4):
+ cmpld cr1, rWORD3, rWORD4
+ bne cr6, L(dLcr6)
+ cmpld cr6, rWORD5, rWORD6
+ bne cr5, L(dLcr5)
+ cmpld cr5, rWORD7, rWORD8
+L(d44):
+ bne cr0, L(dLcr0)
+L(d34):
+ bne cr1, L(dLcr1)
+L(d24):
+ bne cr6, L(dLcr6)
+L(d14):
+ sldi. r12, rN, 3
+ bne cr5, L(dLcr5)
+L(d04):
+ ld rWORD8,-8(r1)
+ ld rWORD7,-16(r1)
+ subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */
+ beq L(zeroLength)
+/* At this point we have a remainder of 1 to 7 bytes to compare. Since
+ we are aligned it is safe to load the whole double word, and use
+ shift right double to elliminate bits beyond the compare length. */
+L(d00):
+ ld rWORD1, 8(rSTR1)
+ ld rWORD2, 8(rSTR2)
+ srd rWORD1, rWORD1, rN
+ srd rWORD2, rWORD2, rN
+ cmpld cr5, rWORD1, rWORD2
+ bne cr5, L(dLcr5x)
+ li rRTN, 0
+ blr
+ .align 4
+L(dLcr0):
+ ld rWORD8,-8(r1)
+ ld rWORD7,-16(r1)
+ li rRTN, 1
+ bgtlr cr0
+ li rRTN, -1
+ blr
+ .align 4
+L(dLcr1):
+ ld rWORD8,-8(r1)
+ ld rWORD7,-16(r1)
+ li rRTN, 1
+ bgtlr cr1
+ li rRTN, -1
+ blr
+ .align 4
+L(dLcr6):
+ ld rWORD8,-8(r1)
+ ld rWORD7,-16(r1)
+ li rRTN, 1
+ bgtlr cr6
+ li rRTN, -1
+ blr
+ .align 4
+L(dLcr5):
+ ld rWORD8,-8(r1)
+ ld rWORD7,-16(r1)
+L(dLcr5x):
+ li rRTN, 1
+ bgtlr cr5
+ li rRTN, -1
+ blr
+
+ .align 4
+L(bytealigned):
+ mtctr rN /* Power4 wants mtctr 1st in dispatch group */
+ beq- cr6, L(zeroLength)
+
+/* We need to prime this loop. This loop is swing modulo scheduled
+ to avoid pipe delays. The dependent instruction latencies (load to
+ compare to conditional branch) is 2 to 3 cycles. In this loop each
+ dispatch group ends in a branch and takes 1 cycle. Effectively
+ the first iteration of the loop only serves to load operands and
+ branches based on compares are delayed until the next loop.
+
+ So we must precondition some registers and condition codes so that
+ we don't exit the loop early on the first iteration. */
+
+ lbz rWORD1, 0(rSTR1)
+ lbz rWORD2, 0(rSTR2)
+ bdz- L(b11)
+ cmpld cr0, rWORD1, rWORD2
+ lbz rWORD3, 1(rSTR1)
+ lbz rWORD4, 1(rSTR2)
+ bdz- L(b12)
+ cmpld cr1, rWORD3, rWORD4
+ lbzu rWORD5, 2(rSTR1)
+ lbzu rWORD6, 2(rSTR2)
+ bdz- L(b13)
+ .align 4
+L(bLoop):
+ lbzu rWORD1, 1(rSTR1)
+ lbzu rWORD2, 1(rSTR2)
+ bne- cr0, L(bLcr0)
+
+ cmpld cr6, rWORD5, rWORD6
+ bdz- L(b3i)
+
+ lbzu rWORD3, 1(rSTR1)
+ lbzu rWORD4, 1(rSTR2)
+ bne- cr1, L(bLcr1)
+
+ cmpld cr0, rWORD1, rWORD2
+ bdz- L(b2i)
+
+ lbzu rWORD5, 1(rSTR1)
+ lbzu rWORD6, 1(rSTR2)
+ bne- cr6, L(bLcr6)
+
+ cmpld cr1, rWORD3, rWORD4
+ bdnz+ L(bLoop)
+
+/* We speculatively loading bytes before we have tested the previous
+ bytes. But we must avoid overrunning the length (in the ctr) to
+ prevent these speculative loads from causing a segfault. In this
+ case the loop will exit early (before the all pending bytes are
+ tested. In this case we must complete the pending operations
+ before returning. */
+L(b1i):
+ bne- cr0, L(bLcr0)
+ bne- cr1, L(bLcr1)
+ b L(bx56)
+ .align 4
+L(b2i):
+ bne- cr6, L(bLcr6)
+ bne- cr0, L(bLcr0)
+ b L(bx34)
+ .align 4
+L(b3i):
+ bne- cr1, L(bLcr1)
+ bne- cr6, L(bLcr6)
+ b L(bx12)
+ .align 4
+L(bLcr0):
+ li rRTN, 1
+ bgtlr cr0
+ li rRTN, -1
+ blr
+L(bLcr1):
+ li rRTN, 1
+ bgtlr cr1
+ li rRTN, -1
+ blr
+L(bLcr6):
+ li rRTN, 1
+ bgtlr cr6
+ li rRTN, -1
+ blr
+
+L(b13):
+ bne- cr0, L(bx12)
+ bne- cr1, L(bx34)
+L(bx56):
+ sub rRTN, rWORD5, rWORD6
+ blr
+ nop
+L(b12):
+ bne- cr0, L(bx12)
+L(bx34):
+ sub rRTN, rWORD3, rWORD4
+ blr
+L(b11):
+L(bx12):
+ sub rRTN, rWORD1, rWORD2
+ blr
+ .align 4
+L(zeroLengthReturn):
+ ld rWORD8,-8(r1)
+ ld rWORD7,-16(r1)
+L(zeroLength):
+ li rRTN, 0
+ blr
+
+ .align 4
+/* At this point we know the strings have different alignment and the
+ compare length is at least 8 bytes. rBITDIF containes the low order
+ 3 bits of rSTR1 and cr5 contains the result of the logical compare
+ of rBITDIF to 0. If rBITDIF == 0 then rStr1 is double word
+ aligned and can perform the DWunaligned loop.
+
+ Otherwise we know that rSTR1 is not aready DW aligned yet.
+ So we can force the string addresses to the next lower DW
+ boundary and special case this first DW word using shift left to
+ ellimiate bits preceeding the first byte. Since we want to join the
+ normal (DWaligned) compare loop, starting at the second double word,
+ we need to adjust the length (rN) and special case the loop
+ versioning for the first DW. This insures that the loop count is
+ correct and the first DW (shifted) is in the expected resister pair. */
+#define rSHL r29 /* Unaligned shift left count. */
+#define rSHR r28 /* Unaligned shift right count. */
+#define rB r27 /* Left rotation temp for rWORD2. */
+#define rD r26 /* Left rotation temp for rWORD4. */
+#define rF r25 /* Left rotation temp for rWORD6. */
+#define rH r24 /* Left rotation temp for rWORD8. */
+#define rA r0 /* Right rotation temp for rWORD2. */
+#define rC r12 /* Right rotation temp for rWORD4. */
+#define rE r0 /* Right rotation temp for rWORD6. */
+#define rG r12 /* Right rotation temp for rWORD8. */
+L(unaligned):
+ std r29,-24(r1)
+ cfi_offset(r29,-24)
+ clrldi rSHL, rSTR2, 61
+ beq- cr6, L(duzeroLength)
+ std r28,-32(r1)
+ cfi_offset(r28,-32)
+ beq cr5, L(DWunaligned)
+ std r27,-40(r1)
+ cfi_offset(r27,-40)
+/* Adjust the logical start of rSTR2 ro compensate for the extra bits
+ in the 1st rSTR1 DW. */
+ sub r27, rSTR2, rBITDIF
+/* But do not attempt to address the DW before that DW that contains
+ the actual start of rSTR2. */
+ clrrdi rSTR2, rSTR2, 3
+ std r26,-48(r1)
+ cfi_offset(r26,-48)
+/* Compute the leaft/right shift counts for the unalign rSTR2,
+ compensating for the logical (DW aligned) start of rSTR1. */
+ clrldi rSHL, r27, 61
+ clrrdi rSTR1, rSTR1, 3
+ std r25,-56(r1)
+ cfi_offset(r25,-56)
+ sldi rSHL, rSHL, 3
+ cmpld cr5, r27, rSTR2
+ add rN, rN, rBITDIF
+ sldi r11, rBITDIF, 3
+ std r24,-64(r1)
+ cfi_offset(r24,-64)
+ subfic rSHR, rSHL, 64
+ srdi rTMP, rN, 5 /* Divide by 32 */
+ andi. rBITDIF, rN, 24 /* Get the DW remainder */
+/* We normally need to load 2 DWs to start the unaligned rSTR2, but in
+ this special case those bits may be discarded anyway. Also we
+ must avoid loading a DW where none of the bits are part of rSTR2 as
+ this may cross a page boundary and cause a page fault. */
+ li rWORD8, 0
+ blt cr5, L(dus0)
+ ld rWORD8, 0(rSTR2)
+ la rSTR2, 8(rSTR2)
+ sld rWORD8, rWORD8, rSHL
+
+L(dus0):
+ ld rWORD1, 0(rSTR1)
+ ld rWORD2, 0(rSTR2)
+ cmpldi cr1, rBITDIF, 16
+ cmpldi cr7, rN, 32
+ srd rG, rWORD2, rSHR
+ clrldi rN, rN, 61
+ beq L(duPs4)
+ mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+ or rWORD8, rG, rWORD8
+ bgt cr1, L(duPs3)
+ beq cr1, L(duPs2)
+
+/* Remainder is 8 */
+ .align 4
+L(dusP1):
+ sld rB, rWORD2, rSHL
+ sld rWORD7, rWORD1, r11
+ sld rWORD8, rWORD8, r11
+ bge cr7, L(duP1e)
+/* At this point we exit early with the first double word compare
+ complete and remainder of 0 to 7 bytes. See L(du14) for details on
+ how we handle the remaining bytes. */
+ cmpld cr5, rWORD7, rWORD8
+ sldi. rN, rN, 3
+ bne cr5, L(duLcr5)
+ cmpld cr7, rN, rSHR
+ beq L(duZeroReturn)
+ li rA, 0
+ ble cr7, L(dutrim)
+ ld rWORD2, 8(rSTR2)
+ srd rA, rWORD2, rSHR
+ b L(dutrim)
+/* Remainder is 16 */
+ .align 4
+L(duPs2):
+ sld rH, rWORD2, rSHL
+ sld rWORD5, rWORD1, r11
+ sld rWORD6, rWORD8, r11
+ b L(duP2e)
+/* Remainder is 24 */
+ .align 4
+L(duPs3):
+ sld rF, rWORD2, rSHL
+ sld rWORD3, rWORD1, r11
+ sld rWORD4, rWORD8, r11
+ b L(duP3e)
+/* Count is a multiple of 32, remainder is 0 */
+ .align 4
+L(duPs4):
+ mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+ or rWORD8, rG, rWORD8
+ sld rD, rWORD2, rSHL
+ sld rWORD1, rWORD1, r11
+ sld rWORD2, rWORD8, r11
+ b L(duP4e)
+
+/* At this point we know rSTR1 is double word aligned and the
+ compare length is at least 8 bytes. */
+ .align 4
+L(DWunaligned):
+ std r27,-40(r1)
+ cfi_offset(r27,-40)
+ clrrdi rSTR2, rSTR2, 3
+ std r26,-48(r1)
+ cfi_offset(r26,-48)
+ srdi rTMP, rN, 5 /* Divide by 32 */
+ std r25,-56(r1)
+ cfi_offset(r25,-56)
+ andi. rBITDIF, rN, 24 /* Get the DW remainder */
+ std r24,-64(r1)
+ cfi_offset(r24,-64)
+ sldi rSHL, rSHL, 3
+ ld rWORD6, 0(rSTR2)
+ ldu rWORD8, 8(rSTR2)
+ cmpldi cr1, rBITDIF, 16
+ cmpldi cr7, rN, 32
+ clrldi rN, rN, 61
+ subfic rSHR, rSHL, 64
+ sld rH, rWORD6, rSHL
+ beq L(duP4)
+ mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+ bgt cr1, L(duP3)
+ beq cr1, L(duP2)
+
+/* Remainder is 8 */
+ .align 4
+L(duP1):
+ srd rG, rWORD8, rSHR
+ ld rWORD7, 0(rSTR1)
+ sld rB, rWORD8, rSHL
+ or rWORD8, rG, rH
+ blt cr7, L(duP1x)
+L(duP1e):
+ ld rWORD1, 8(rSTR1)
+ ld rWORD2, 8(rSTR2)
+ cmpld cr5, rWORD7, rWORD8
+ srd rA, rWORD2, rSHR
+ sld rD, rWORD2, rSHL
+ or rWORD2, rA, rB
+ ld rWORD3, 16(rSTR1)
+ ld rWORD4, 16(rSTR2)
+ cmpld cr0, rWORD1, rWORD2
+ srd rC, rWORD4, rSHR
+ sld rF, rWORD4, rSHL
+ bne cr5, L(duLcr5)
+ or rWORD4, rC, rD
+ ld rWORD5, 24(rSTR1)
+ ld rWORD6, 24(rSTR2)
+ cmpld cr1, rWORD3, rWORD4
+ srd rE, rWORD6, rSHR
+ sld rH, rWORD6, rSHL
+ bne cr0, L(duLcr0)
+ or rWORD6, rE, rF
+ cmpld cr6, rWORD5, rWORD6
+ b L(duLoop3)
+ .align 4
+/* At this point we exit early with the first double word compare
+ complete and remainder of 0 to 7 bytes. See L(du14) for details on
+ how we handle the remaining bytes. */
+L(duP1x):
+ cmpld cr5, rWORD7, rWORD8
+ sldi. rN, rN, 3
+ bne cr5, L(duLcr5)
+ cmpld cr7, rN, rSHR
+ beq L(duZeroReturn)
+ li rA, 0
+ ble cr7, L(dutrim)
+ ld rWORD2, 8(rSTR2)
+ srd rA, rWORD2, rSHR
+ b L(dutrim)
+/* Remainder is 16 */
+ .align 4
+L(duP2):
+ srd rE, rWORD8, rSHR
+ ld rWORD5, 0(rSTR1)
+ or rWORD6, rE, rH
+ sld rH, rWORD8, rSHL
+L(duP2e):
+ ld rWORD7, 8(rSTR1)
+ ld rWORD8, 8(rSTR2)
+ cmpld cr6, rWORD5, rWORD6
+ srd rG, rWORD8, rSHR
+ sld rB, rWORD8, rSHL
+ or rWORD8, rG, rH
+ blt cr7, L(duP2x)
+ ld rWORD1, 16(rSTR1)
+ ld rWORD2, 16(rSTR2)
+ cmpld cr5, rWORD7, rWORD8
+ bne cr6, L(duLcr6)
+ srd rA, rWORD2, rSHR
+ sld rD, rWORD2, rSHL
+ or rWORD2, rA, rB
+ ld rWORD3, 24(rSTR1)
+ ld rWORD4, 24(rSTR2)
+ cmpld cr0, rWORD1, rWORD2
+ bne cr5, L(duLcr5)
+ srd rC, rWORD4, rSHR
+ sld rF, rWORD4, rSHL
+ or rWORD4, rC, rD
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+ cmpld cr1, rWORD3, rWORD4
+ b L(duLoop2)
+ .align 4
+L(duP2x):
+ cmpld cr5, rWORD7, rWORD8
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+ bne cr6, L(duLcr6)
+ sldi. rN, rN, 3
+ bne cr5, L(duLcr5)
+ cmpld cr7, rN, rSHR
+ beq L(duZeroReturn)
+ li rA, 0
+ ble cr7, L(dutrim)
+ ld rWORD2, 8(rSTR2)
+ srd rA, rWORD2, rSHR
+ b L(dutrim)
+
+/* Remainder is 24 */
+ .align 4
+L(duP3):
+ srd rC, rWORD8, rSHR
+ ld rWORD3, 0(rSTR1)
+ sld rF, rWORD8, rSHL
+ or rWORD4, rC, rH
+L(duP3e):
+ ld rWORD5, 8(rSTR1)
+ ld rWORD6, 8(rSTR2)
+ cmpld cr1, rWORD3, rWORD4
+ srd rE, rWORD6, rSHR
+ sld rH, rWORD6, rSHL
+ or rWORD6, rE, rF
+ ld rWORD7, 16(rSTR1)
+ ld rWORD8, 16(rSTR2)
+ cmpld cr6, rWORD5, rWORD6
+ bne cr1, L(duLcr1)
+ srd rG, rWORD8, rSHR
+ sld rB, rWORD8, rSHL
+ or rWORD8, rG, rH
+ blt cr7, L(duP3x)
+ ld rWORD1, 24(rSTR1)
+ ld rWORD2, 24(rSTR2)
+ cmpld cr5, rWORD7, rWORD8
+ bne cr6, L(duLcr6)
+ srd rA, rWORD2, rSHR
+ sld rD, rWORD2, rSHL
+ or rWORD2, rA, rB
+ addi rSTR1, rSTR1, 16
+ addi rSTR2, rSTR2, 16
+ cmpld cr0, rWORD1, rWORD2
+ b L(duLoop1)
+ .align 4
+L(duP3x):
+ addi rSTR1, rSTR1, 16
+ addi rSTR2, rSTR2, 16
+ bne cr1, L(duLcr1)
+ cmpld cr5, rWORD7, rWORD8
+ bne cr6, L(duLcr6)
+ sldi. rN, rN, 3
+ bne cr5, L(duLcr5)
+ cmpld cr7, rN, rSHR
+ beq L(duZeroReturn)
+ li rA, 0
+ ble cr7, L(dutrim)
+ ld rWORD2, 8(rSTR2)
+ srd rA, rWORD2, rSHR
+ b L(dutrim)
+
+/* Count is a multiple of 32, remainder is 0 */
+ .align 4
+L(duP4):
+ mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+ srd rA, rWORD8, rSHR
+ ld rWORD1, 0(rSTR1)
+ sld rD, rWORD8, rSHL
+ or rWORD2, rA, rH
+L(duP4e):
+ ld rWORD3, 8(rSTR1)
+ ld rWORD4, 8(rSTR2)
+ cmpld cr0, rWORD1, rWORD2
+ srd rC, rWORD4, rSHR
+ sld rF, rWORD4, rSHL
+ or rWORD4, rC, rD
+ ld rWORD5, 16(rSTR1)
+ ld rWORD6, 16(rSTR2)
+ cmpld cr1, rWORD3, rWORD4
+ bne cr0, L(duLcr0)
+ srd rE, rWORD6, rSHR
+ sld rH, rWORD6, rSHL
+ or rWORD6, rE, rF
+ ldu rWORD7, 24(rSTR1)
+ ldu rWORD8, 24(rSTR2)
+ cmpld cr6, rWORD5, rWORD6
+ bne cr1, L(duLcr1)
+ srd rG, rWORD8, rSHR
+ sld rB, rWORD8, rSHL
+ or rWORD8, rG, rH
+ cmpld cr5, rWORD7, rWORD8
+ bdz- L(du24) /* Adjust CTR as we start with +4 */
+/* This is the primary loop */
+ .align 4
+L(duLoop):
+ ld rWORD1, 8(rSTR1)
+ ld rWORD2, 8(rSTR2)
+ cmpld cr1, rWORD3, rWORD4
+ bne cr6, L(duLcr6)
+ srd rA, rWORD2, rSHR
+ sld rD, rWORD2, rSHL
+ or rWORD2, rA, rB
+L(duLoop1):
+ ld rWORD3, 16(rSTR1)
+ ld rWORD4, 16(rSTR2)
+ cmpld cr6, rWORD5, rWORD6
+ bne cr5, L(duLcr5)
+ srd rC, rWORD4, rSHR
+ sld rF, rWORD4, rSHL
+ or rWORD4, rC, rD
+L(duLoop2):
+ ld rWORD5, 24(rSTR1)
+ ld rWORD6, 24(rSTR2)
+ cmpld cr5, rWORD7, rWORD8
+ bne cr0, L(duLcr0)
+ srd rE, rWORD6, rSHR
+ sld rH, rWORD6, rSHL
+ or rWORD6, rE, rF
+L(duLoop3):
+ ldu rWORD7, 32(rSTR1)
+ ldu rWORD8, 32(rSTR2)
+ cmpld cr0, rWORD1, rWORD2
+ bne- cr1, L(duLcr1)
+ srd rG, rWORD8, rSHR
+ sld rB, rWORD8, rSHL
+ or rWORD8, rG, rH
+ bdnz+ L(duLoop)
+
+L(duL4):
+ bne cr1, L(duLcr1)
+ cmpld cr1, rWORD3, rWORD4
+ bne cr6, L(duLcr6)
+ cmpld cr6, rWORD5, rWORD6
+ bne cr5, L(duLcr5)
+ cmpld cr5, rWORD7, rWORD8
+L(du44):
+ bne cr0, L(duLcr0)
+L(du34):
+ bne cr1, L(duLcr1)
+L(du24):
+ bne cr6, L(duLcr6)
+L(du14):
+ sldi. rN, rN, 3
+ bne cr5, L(duLcr5)
+/* At this point we have a remainder of 1 to 7 bytes to compare. We use
+ shift right double to elliminate bits beyond the compare length.
+ This allows the use of double word subtract to compute the final
+ result.
+
+ However it may not be safe to load rWORD2 which may be beyond the
+ string length. So we compare the bit length of the remainder to
+ the right shift count (rSHR). If the bit count is less than or equal
+ we do not need to load rWORD2 (all significant bits are already in
+ rB). */
+ cmpld cr7, rN, rSHR
+ beq L(duZeroReturn)
+ li rA, 0
+ ble cr7, L(dutrim)
+ ld rWORD2, 8(rSTR2)
+ srd rA, rWORD2, rSHR
+ .align 4
+L(dutrim):
+ ld rWORD1, 8(rSTR1)
+ ld rWORD8,-8(r1)
+ subfic rN, rN, 64 /* Shift count is 64 - (rN * 8). */
+ or rWORD2, rA, rB
+ ld rWORD7,-16(r1)
+ ld r29,-24(r1)
+ srd rWORD1, rWORD1, rN
+ srd rWORD2, rWORD2, rN
+ ld r28,-32(r1)
+ ld r27,-40(r1)
+ li rRTN, 0
+ cmpld cr0, rWORD1, rWORD2
+ ld r26,-48(r1)
+ ld r25,-56(r1)
+ beq cr0, L(dureturn24)
+ li rRTN, 1
+ ld r24,-64(r1)
+ bgtlr cr0
+ li rRTN, -1
+ blr
+ .align 4
+L(duLcr0):
+ ld rWORD8,-8(r1)
+ ld rWORD7,-16(r1)
+ li rRTN, 1
+ bgt cr0, L(dureturn29)
+ ld r29,-24(r1)
+ ld r28,-32(r1)
+ li rRTN, -1
+ b L(dureturn27)
+ .align 4
+L(duLcr1):
+ ld rWORD8,-8(r1)
+ ld rWORD7,-16(r1)
+ li rRTN, 1
+ bgt cr1, L(dureturn29)
+ ld r29,-24(r1)
+ ld r28,-32(r1)
+ li rRTN, -1
+ b L(dureturn27)
+ .align 4
+L(duLcr6):
+ ld rWORD8,-8(r1)
+ ld rWORD7,-16(r1)
+ li rRTN, 1
+ bgt cr6, L(dureturn29)
+ ld r29,-24(r1)
+ ld r28,-32(r1)
+ li rRTN, -1
+ b L(dureturn27)
+ .align 4
+L(duLcr5):
+ ld rWORD8,-8(r1)
+ ld rWORD7,-16(r1)
+ li rRTN, 1
+ bgt cr5, L(dureturn29)
+ ld r29,-24(r1)
+ ld r28,-32(r1)
+ li rRTN, -1
+ b L(dureturn27)
+ .align 3
+L(duZeroReturn):
+ li rRTN,0
+ .align 4
+L(dureturn):
+ ld rWORD8,-8(r1)
+ ld rWORD7,-16(r1)
+L(dureturn29):
+ ld r29,-24(r1)
+ ld r28,-32(r1)
+L(dureturn27):
+ ld r27,-40(r1)
+L(dureturn26):
+ ld r26,-48(r1)
+L(dureturn25):
+ ld r25,-56(r1)
+L(dureturn24):
+ ld r24,-64(r1)
+ blr
+L(duzeroLength):
+ li rRTN,0
+ blr
+
+END (BP_SYM (memcmp))
+libc_hidden_builtin_def (memcmp)
+weak_alias (memcmp, bcmp)
diff --git a/sysdeps/powerpc/powerpc64/power4/memcopy.h b/sysdeps/powerpc/powerpc64/power4/memcopy.h
new file mode 100644
index 0000000000..9a4ff79f4a
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power4/memcopy.h
@@ -0,0 +1 @@
+#include "../../powerpc32/power4/memcopy.h"
diff --git a/sysdeps/powerpc/powerpc64/power4/memcpy.S b/sysdeps/powerpc/powerpc64/power4/memcpy.S
new file mode 100644
index 0000000000..56f313b4b8
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power4/memcpy.S
@@ -0,0 +1,418 @@
+/* Optimized memcpy implementation for PowerPC64.
+ Copyright (C) 2003, 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+ 02110-1301 USA. */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]);
+ Returns 'dst'.
+
+ Memcpy handles short copies (< 32-bytes) using a binary move blocks
+ (no loops) of lwz/stw. The tail (remaining 1-3) bytes is handled
+ with the appropriate combination of byte and halfword load/stores.
+ There is minimal effort to optimize the alignment of short moves.
+ The 64-bit implementations of POWER3 and POWER4 do a reasonable job
+ of handling unligned load/stores that do not cross 32-byte boundries.
+
+ Longer moves (>= 32-bytes) justify the effort to get at least the
+ destination doubleword (8-byte) aligned. Further optimization is
+ posible when both source and destination are doubleword aligned.
+ Each case has a optimized unrolled loop. */
+
+ .machine power4
+EALIGN (BP_SYM (memcpy), 5, 0)
+ CALL_MCOUNT 3
+
+ cmpldi cr1,5,31
+ neg 0,3
+ std 3,-16(1)
+ std 31,-8(1)
+ cfi_offset(31,-8)
+ andi. 11,3,7 /* check alignement of dst. */
+ clrldi 0,0,61 /* Number of bytes until the 1st doubleword of dst. */
+ clrldi 10,4,61 /* check alignement of src. */
+ cmpldi cr6,5,8
+ ble- cr1,.L2 /* If move < 32 bytes use short move code. */
+ cmpld cr6,10,11
+ mr 12,4
+ srdi 9,5,3 /* Number of full double words remaining. */
+ mtcrf 0x01,0
+ mr 31,5
+ beq .L0
+
+ subf 31,0,5
+ /* Move 0-7 bytes as needed to get the destination doubleword alligned. */
+1: bf 31,2f
+ lbz 6,0(12)
+ addi 12,12,1
+ stb 6,0(3)
+ addi 3,3,1
+2: bf 30,4f
+ lhz 6,0(12)
+ addi 12,12,2
+ sth 6,0(3)
+ addi 3,3,2
+4: bf 29,0f
+ lwz 6,0(12)
+ addi 12,12,4
+ stw 6,0(3)
+ addi 3,3,4
+0:
+ clrldi 10,12,61 /* check alignement of src again. */
+ srdi 9,31,3 /* Number of full double words remaining. */
+
+ /* Copy doublewords from source to destination, assumpting the
+ destination is aligned on a doubleword boundary.
+
+ At this point we know there are at least 25 bytes left (32-7) to copy.
+ The next step is to determine if the source is also doubleword aligned.
+ If not branch to the unaligned move code at .L6. which uses
+ a load, shift, store strategy.
+
+ Otherwise source and destination are doubleword aligned, and we can
+ the optimized doubleword copy loop. */
+.L0:
+ clrldi 11,31,61
+ mtcrf 0x01,9
+ cmpldi cr1,11,0
+ bne- cr6,.L6 /* If source is not DW aligned. */
+
+ /* Move doublewords where destination and source are DW aligned.
+ Use a unrolled loop to copy 4 doubleword (32-bytes) per iteration.
+ If the the copy is not an exact multiple of 32 bytes, 1-3
+ doublewords are copied as needed to set up the main loop. After
+ the main loop exits there may be a tail of 1-7 bytes. These byte are
+ copied a word/halfword/byte at a time as needed to preserve alignment. */
+
+ srdi 8,31,5
+ cmpldi cr1,9,4
+ cmpldi cr6,11,0
+ mr 11,12
+
+ bf 30,1f
+ ld 6,0(12)
+ ld 7,8(12)
+ addi 11,12,16
+ mtctr 8
+ std 6,0(3)
+ std 7,8(3)
+ addi 10,3,16
+ bf 31,4f
+ ld 0,16(12)
+ std 0,16(3)
+ blt cr1,3f
+ addi 11,12,24
+ addi 10,3,24
+ b 4f
+ .align 4
+1:
+ mr 10,3
+ mtctr 8
+ bf 31,4f
+ ld 6,0(12)
+ addi 11,12,8
+ std 6,0(3)
+ addi 10,3,8
+
+ .align 4
+4:
+ ld 6,0(11)
+ ld 7,8(11)
+ ld 8,16(11)
+ ld 0,24(11)
+ addi 11,11,32
+2:
+ std 6,0(10)
+ std 7,8(10)
+ std 8,16(10)
+ std 0,24(10)
+ addi 10,10,32
+ bdnz 4b
+3:
+
+ rldicr 0,31,0,60
+ mtcrf 0x01,31
+ beq cr6,0f
+.L9:
+ add 3,3,0
+ add 12,12,0
+
+/* At this point we have a tail of 0-7 bytes and we know that the
+ destiniation is double word aligned. */
+4: bf 29,2f
+ lwz 6,0(12)
+ addi 12,12,4
+ stw 6,0(3)
+ addi 3,3,4
+2: bf 30,1f
+ lhz 6,0(12)
+ addi 12,12,2
+ sth 6,0(3)
+ addi 3,3,2
+1: bf 31,0f
+ lbz 6,0(12)
+ stb 6,0(3)
+0:
+ /* Return original dst pointer. */
+ ld 31,-8(1)
+ ld 3,-16(1)
+ blr
+
+/* Copy up to 31 bytes. This divided into two cases 0-8 bytes and 9-31
+ bytes. Each case is handled without loops, using binary (1,2,4,8)
+ tests.
+
+ In the short (0-8 byte) case no attempt is made to force alignment
+ of either source or destination. The hardware will handle the
+ unaligned load/stores with small delays for crossing 32- 64-byte, and
+ 4096-byte boundaries. Since these short moves are unlikely to be
+ unaligned or cross these boundaries, the overhead to force
+ alignment is not justified.
+
+ The longer (9-31 byte) move is more likely to cross 32- or 64-byte
+ boundaries. Since only loads are sensitive to the 32-/64-byte
+ boundaries it is more important to align the source then the
+ destination. If the source is not already word aligned, we first
+ move 1-3 bytes as needed. Since we are only word aligned we don't
+ use double word load/stores to insure that all loads are aligned.
+ While the destination and stores may still be unaligned, this
+ is only an issue for page (4096 byte boundary) crossing, which
+ should be rare for these short moves. The hardware handles this
+ case automatically with a small delay. */
+
+ .align 4
+.L2:
+ mtcrf 0x01,5
+ neg 8,4
+ clrrdi 11,4,2
+ andi. 0,8,3
+ ble cr6,.LE8 /* Handle moves of 0-8 bytes. */
+/* At least 9 bytes left. Get the source word aligned. */
+ cmpldi cr1,5,16
+ mr 10,5
+ mr 12,4
+ cmpldi cr6,0,2
+ beq .L3 /* If the source is already word aligned skip this. */
+/* Copy 1-3 bytes to get source address word aligned. */
+ lwz 6,0(11)
+ subf 10,0,5
+ add 12,4,0
+ blt cr6,5f
+ srdi 7,6,16
+ bgt cr6,3f
+ sth 6,0(3)
+ b 7f
+ .align 4
+3:
+ stb 7,0(3)
+ sth 6,1(3)
+ b 7f
+ .align 4
+5:
+ stb 6,0(3)
+7:
+ cmpldi cr1,10,16
+ add 3,3,0
+ mtcrf 0x01,10
+ .align 4
+.L3:
+/* At least 6 bytes left and the source is word aligned. */
+ blt cr1,8f
+16: /* Move 16 bytes. */
+ lwz 6,0(12)
+ lwz 7,4(12)
+ stw 6,0(3)
+ lwz 6,8(12)
+ stw 7,4(3)
+ lwz 7,12(12)
+ addi 12,12,16
+ stw 6,8(3)
+ stw 7,12(3)
+ addi 3,3,16
+8: /* Move 8 bytes. */
+ bf 28,4f
+ lwz 6,0(12)
+ lwz 7,4(12)
+ addi 12,12,8
+ stw 6,0(3)
+ stw 7,4(3)
+ addi 3,3,8
+4: /* Move 4 bytes. */
+ bf 29,2f
+ lwz 6,0(12)
+ addi 12,12,4
+ stw 6,0(3)
+ addi 3,3,4
+2: /* Move 2-3 bytes. */
+ bf 30,1f
+ lhz 6,0(12)
+ sth 6,0(3)
+ bf 31,0f
+ lbz 7,2(12)
+ stb 7,2(3)
+ ld 3,-16(1)
+ blr
+1: /* Move 1 byte. */
+ bf 31,0f
+ lbz 6,0(12)
+ stb 6,0(3)
+0:
+ /* Return original dst pointer. */
+ ld 3,-16(1)
+ blr
+
+/* Special case to copy 0-8 bytes. */
+ .align 4
+.LE8:
+ mr 12,4
+ bne cr6,4f
+/* Would have liked to use use ld/std here but the 630 processors are
+ slow for load/store doubles that are not at least word aligned.
+ Unaligned Load/Store word execute with only a 1 cycle penaltity. */
+ lwz 6,0(4)
+ lwz 7,4(4)
+ stw 6,0(3)
+ stw 7,4(3)
+ /* Return original dst pointer. */
+ ld 3,-16(1)
+ blr
+ .align 4
+4: bf 29,2b
+ lwz 6,0(4)
+ stw 6,0(3)
+6:
+ bf 30,5f
+ lhz 7,4(4)
+ sth 7,4(3)
+ bf 31,0f
+ lbz 8,6(4)
+ stb 8,6(3)
+ ld 3,-16(1)
+ blr
+ .align 4
+5:
+ bf 31,0f
+ lbz 6,4(4)
+ stb 6,4(3)
+ .align 4
+0:
+ /* Return original dst pointer. */
+ ld 3,-16(1)
+ blr
+
+ .align 4
+.L6:
+
+ /* Copy doublewords where the destination is aligned but the source is
+ not. Use aligned doubleword loads from the source, shifted to realign
+ the data, to allow aligned destination stores. */
+ addi 11,9,-1 /* loop DW count is one less than total */
+ subf 5,10,12
+ sldi 10,10,3
+ mr 4,3
+ srdi 8,11,2 /* calculate the 32 byte loop count */
+ ld 6,0(5)
+ mtcrf 0x01,11
+ cmpldi cr6,9,4
+ mtctr 8
+ ld 7,8(5)
+ subfic 9,10,64
+ bf 30,1f
+
+ /* there are at least two DWs to copy */
+ sld 0,6,10
+ srd 8,7,9
+ or 0,0,8
+ ld 6,16(5)
+ std 0,0(4)
+ sld 0,7,10
+ srd 8,6,9
+ or 0,0,8
+ ld 7,24(5)
+ std 0,8(4)
+ addi 4,4,16
+ addi 5,5,32
+ blt cr6,8f /* if total DWs = 3, then bypass loop */
+ bf 31,4f
+ /* there is a third DW to copy */
+ sld 0,6,10
+ srd 8,7,9
+ or 0,0,8
+ std 0,0(4)
+ mr 6,7
+ ld 7,0(5)
+ addi 5,5,8
+ addi 4,4,8
+ beq cr6,8f /* if total DWs = 4, then bypass loop */
+ b 4f
+ .align 4
+1:
+ sld 0,6,10
+ srd 8,7,9
+ addi 5,5,16
+ or 0,0,8
+ bf 31,4f
+ mr 6,7
+ ld 7,0(5)
+ addi 5,5,8
+ std 0,0(4)
+ addi 4,4,8
+ .align 4
+/* copy 32 bytes at a time */
+4: sld 0,6,10
+ srd 8,7,9
+ or 0,0,8
+ ld 6,0(5)
+ std 0,0(4)
+ sld 0,7,10
+ srd 8,6,9
+ or 0,0,8
+ ld 7,8(5)
+ std 0,8(4)
+ sld 0,6,10
+ srd 8,7,9
+ or 0,0,8
+ ld 6,16(5)
+ std 0,16(4)
+ sld 0,7,10
+ srd 8,6,9
+ or 0,0,8
+ ld 7,24(5)
+ std 0,24(4)
+ addi 5,5,32
+ addi 4,4,32
+ bdnz+ 4b
+ .align 4
+8:
+ /* calculate and store the final DW */
+ sld 0,6,10
+ srd 8,7,9
+ or 0,0,8
+ std 0,0(4)
+3:
+ rldicr 0,31,0,60
+ mtcrf 0x01,31
+ bne cr1,.L9 /* If the tail is 0 bytes we are done! */
+ /* Return original dst pointer. */
+ ld 31,-8(1)
+ ld 3,-16(1)
+ blr
+END_GEN_TB (BP_SYM (memcpy),TB_TOCLESS)
+libc_hidden_builtin_def (memcpy)
diff --git a/sysdeps/powerpc/powerpc64/power4/memset.S b/sysdeps/powerpc/powerpc64/power4/memset.S
new file mode 100644
index 0000000000..e7a259acdd
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power4/memset.S
@@ -0,0 +1,275 @@
+/* Optimized memset implementation for PowerPC64.
+ Copyright (C) 1997, 1999, 2000, 2002, 2003, 2007
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
+ Returns 's'.
+
+ The memset is done in three sizes: byte (8 bits), word (32 bits),
+ cache line (256 bits). There is a special case for setting cache lines
+ to 0, to take advantage of the dcbz instruction. */
+
+ .machine power4
+EALIGN (BP_SYM (memset), 5, 0)
+ CALL_MCOUNT 3
+
+#define rTMP r0
+#define rRTN r3 /* Initial value of 1st argument. */
+#if __BOUNDED_POINTERS__
+# define rMEMP0 r4 /* Original value of 1st arg. */
+# define rCHR r5 /* Char to set in each byte. */
+# define rLEN r6 /* Length of region to set. */
+# define rMEMP r10 /* Address at which we are storing. */
+#else
+# define rMEMP0 r3 /* Original value of 1st arg. */
+# define rCHR r4 /* Char to set in each byte. */
+# define rLEN r5 /* Length of region to set. */
+# define rMEMP r6 /* Address at which we are storing. */
+#endif
+#define rALIGN r7 /* Number of bytes we are setting now (when aligning). */
+#define rMEMP2 r8
+
+#define rNEG64 r8 /* Constant -64 for clearing with dcbz. */
+#define rCLS r8 /* Cache line size obtained from static. */
+#define rCLM r9 /* Cache line size mask to check for cache alignment. */
+L(_memset):
+#if __BOUNDED_POINTERS__
+ cmpldi cr1, rRTN, 0
+ CHECK_BOUNDS_BOTH_WIDE (rMEMP0, rTMP, rTMP2, rLEN)
+ beq cr1, L(b0)
+ STORE_RETURN_VALUE (rMEMP0)
+ STORE_RETURN_BOUNDS (rTMP, rTMP2)
+L(b0):
+#endif
+/* Take care of case for size <= 4. */
+ cmpldi cr1, rLEN, 8
+ andi. rALIGN, rMEMP0, 7
+ mr rMEMP, rMEMP0
+ ble- cr1, L(small)
+
+/* Align to doubleword boundary. */
+ cmpldi cr5, rLEN, 31
+ rlwimi rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword. */
+ beq+ L(aligned2)
+ mtcrf 0x01, rMEMP0
+ subfic rALIGN, rALIGN, 8
+ cror 28,30,31 /* Detect odd word aligned. */
+ add rMEMP, rMEMP, rALIGN
+ sub rLEN, rLEN, rALIGN
+ rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */
+ bt 29, L(g4)
+/* Process the even word of doubleword. */
+ bf+ 31, L(g2)
+ stb rCHR, 0(rMEMP0)
+ bt 30, L(g4x)
+L(g2):
+ sth rCHR, -6(rMEMP)
+L(g4x):
+ stw rCHR, -4(rMEMP)
+ b L(aligned)
+/* Process the odd word of doubleword. */
+L(g4):
+ bf 28, L(g4x) /* If false, word aligned on odd word. */
+ bf+ 31, L(g0)
+ stb rCHR, 0(rMEMP0)
+ bt 30, L(aligned)
+L(g0):
+ sth rCHR, -2(rMEMP)
+
+/* Handle the case of size < 31. */
+L(aligned2):
+ rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */
+L(aligned):
+ mtcrf 0x01, rLEN
+ ble cr5, L(medium)
+/* Align to 32-byte boundary. */
+ andi. rALIGN, rMEMP, 0x18
+ subfic rALIGN, rALIGN, 0x20
+ insrdi rCHR,rCHR,32,0 /* Replicate word to double word. */
+ beq L(caligned)
+ mtcrf 0x01, rALIGN
+ add rMEMP, rMEMP, rALIGN
+ sub rLEN, rLEN, rALIGN
+ cmplwi cr1, rALIGN, 0x10
+ mr rMEMP2, rMEMP
+ bf 28, L(a1)
+ stdu rCHR, -8(rMEMP2)
+L(a1): blt cr1, L(a2)
+ std rCHR, -8(rMEMP2)
+ stdu rCHR, -16(rMEMP2)
+L(a2):
+
+/* Now aligned to a 32 byte boundary. */
+L(caligned):
+ cmpldi cr1, rCHR, 0
+ clrrdi. rALIGN, rLEN, 5
+ mtcrf 0x01, rLEN
+ beq cr1, L(zloopstart) /* Special case for clearing memory using dcbz. */
+L(nondcbz):
+ srdi rTMP, rALIGN, 5
+ mtctr rTMP
+ beq L(medium) /* We may not actually get to do a full line. */
+ clrldi. rLEN, rLEN, 59
+ add rMEMP, rMEMP, rALIGN
+ li rNEG64, -0x40
+ bdz L(cloopdone)
+
+L(c3): dcbtst rNEG64, rMEMP
+ std rCHR, -8(rMEMP)
+ std rCHR, -16(rMEMP)
+ std rCHR, -24(rMEMP)
+ stdu rCHR, -32(rMEMP)
+ bdnz L(c3)
+L(cloopdone):
+ std rCHR, -8(rMEMP)
+ std rCHR, -16(rMEMP)
+ cmpldi cr1, rLEN, 16
+ std rCHR, -24(rMEMP)
+ stdu rCHR, -32(rMEMP)
+ beqlr
+ add rMEMP, rMEMP, rALIGN
+ b L(medium_tail2)
+
+ .align 5
+/* Clear lines of memory in 128-byte chunks. */
+L(zloopstart):
+/* If the remaining length is less the 32 bytes, don't bother getting
+ the cache line size. */
+ beq L(medium)
+ li rCLS,128 /* cache line size is 128 */
+
+/* Now we know the cache line size, and it is not 32-bytes, but
+ we may not yet be aligned to the cache line. May have a partial
+ line to fill, so touch it 1st. */
+ dcbt 0,rMEMP
+L(getCacheAligned):
+ cmpldi cr1,rLEN,32
+ andi. rTMP,rMEMP,127
+ blt cr1,L(handletail32)
+ beq L(cacheAligned)
+ addi rMEMP,rMEMP,32
+ addi rLEN,rLEN,-32
+ std rCHR,-32(rMEMP)
+ std rCHR,-24(rMEMP)
+ std rCHR,-16(rMEMP)
+ std rCHR,-8(rMEMP)
+ b L(getCacheAligned)
+
+/* Now we are aligned to the cache line and can use dcbz. */
+L(cacheAligned):
+ cmpld cr1,rLEN,rCLS
+ blt cr1,L(handletail32)
+ dcbz 0,rMEMP
+ subf rLEN,rCLS,rLEN
+ add rMEMP,rMEMP,rCLS
+ b L(cacheAligned)
+
+/* We are here because the cache line size was set and was not 32-bytes
+ and the remainder (rLEN) is less than the actual cache line size.
+ So set up the preconditions for L(nondcbz) and go there. */
+L(handletail32):
+ clrrwi. rALIGN, rLEN, 5
+ b L(nondcbz)
+
+ .align 5
+L(small):
+/* Memset of 8 bytes or less. */
+ cmpldi cr6, rLEN, 4
+ cmpldi cr5, rLEN, 1
+ ble cr6,L(le4)
+ subi rLEN, rLEN, 4
+ stb rCHR,0(rMEMP)
+ stb rCHR,1(rMEMP)
+ stb rCHR,2(rMEMP)
+ stb rCHR,3(rMEMP)
+ addi rMEMP,rMEMP, 4
+ cmpldi cr5, rLEN, 1
+L(le4):
+ cmpldi cr1, rLEN, 3
+ bltlr cr5
+ stb rCHR, 0(rMEMP)
+ beqlr cr5
+ stb rCHR, 1(rMEMP)
+ bltlr cr1
+ stb rCHR, 2(rMEMP)
+ beqlr cr1
+ stb rCHR, 3(rMEMP)
+ blr
+
+/* Memset of 0-31 bytes. */
+ .align 5
+L(medium):
+ insrdi rCHR,rCHR,32,0 /* Replicate word to double word. */
+ cmpldi cr1, rLEN, 16
+L(medium_tail2):
+ add rMEMP, rMEMP, rLEN
+L(medium_tail):
+ bt- 31, L(medium_31t)
+ bt- 30, L(medium_30t)
+L(medium_30f):
+ bt- 29, L(medium_29t)
+L(medium_29f):
+ bge- cr1, L(medium_27t)
+ bflr- 28
+ std rCHR, -8(rMEMP)
+ blr
+
+L(medium_31t):
+ stbu rCHR, -1(rMEMP)
+ bf- 30, L(medium_30f)
+L(medium_30t):
+ sthu rCHR, -2(rMEMP)
+ bf- 29, L(medium_29f)
+L(medium_29t):
+ stwu rCHR, -4(rMEMP)
+ blt- cr1, L(medium_27f)
+L(medium_27t):
+ std rCHR, -8(rMEMP)
+ stdu rCHR, -16(rMEMP)
+L(medium_27f):
+ bflr- 28
+L(medium_28t):
+ std rCHR, -8(rMEMP)
+ blr
+END_GEN_TB (BP_SYM (memset),TB_TOCLESS)
+libc_hidden_builtin_def (memset)
+
+/* Copied from bzero.S to prevent the linker from inserting a stub
+ between bzero and memset. */
+ENTRY (BP_SYM (__bzero))
+ CALL_MCOUNT 3
+#if __BOUNDED_POINTERS__
+ mr r6,r4
+ li r5,0
+ mr r4,r3
+ /* Tell memset that we don't want a return value. */
+ li r3,0
+ b L(_memset)
+#else
+ mr r5,r4
+ li r4,0
+ b L(_memset)
+#endif
+END_GEN_TB (BP_SYM (__bzero),TB_TOCLESS)
+
+weak_alias (BP_SYM (__bzero), BP_SYM (bzero))
diff --git a/sysdeps/powerpc/powerpc64/power4/strncmp.S b/sysdeps/powerpc/powerpc64/power4/strncmp.S
new file mode 100644
index 0000000000..7a1665d2bc
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power4/strncmp.S
@@ -0,0 +1,180 @@
+/* Optimized strcmp implementation for PowerPC64.
+ Copyright (C) 2003, 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+ 02110-1301 USA. */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* See strlen.s for comments on how the end-of-string testing works. */
+
+/* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */
+
+EALIGN (BP_SYM(strncmp), 4, 0)
+ CALL_MCOUNT 3
+
+#define rTMP r0
+#define rRTN r3
+#define rSTR1 r3 /* first string arg */
+#define rSTR2 r4 /* second string arg */
+#define rN r5 /* max string length */
+/* Note: The Bounded pointer support in this code is broken. This code
+ was inherited from PPC32 and and that support was never completed.
+ Current PPC gcc does not support -fbounds-check or -fbounded-pointers. */
+#define rWORD1 r6 /* current word in s1 */
+#define rWORD2 r7 /* current word in s2 */
+#define rWORD3 r10
+#define rWORD4 r11
+#define rFEFE r8 /* constant 0xfefefefefefefeff (-0x0101010101010101) */
+#define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */
+#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
+#define rBITDIF r11 /* bits that differ in s1 & s2 words */
+
+ dcbt 0,rSTR1
+ or rTMP, rSTR2, rSTR1
+ lis r7F7F, 0x7f7f
+ dcbt 0,rSTR2
+ clrldi. rTMP, rTMP, 61
+ cmpldi cr1, rN, 0
+ lis rFEFE, -0x101
+ bne L(unaligned)
+/* We are doubleword alligned so set up for two loops. first a double word
+ loop, then fall into the byte loop if any residual. */
+ srdi. rTMP, rN, 3
+ clrldi rN, rN, 61
+ addi rFEFE, rFEFE, -0x101
+ addi r7F7F, r7F7F, 0x7f7f
+ cmpldi cr1, rN, 0
+ beq L(unaligned)
+
+ mtctr rTMP /* Power4 wants mtctr 1st in dispatch group. */
+ ld rWORD1, 0(rSTR1)
+ ld rWORD2, 0(rSTR2)
+ sldi rTMP, rFEFE, 32
+ insrdi r7F7F, r7F7F, 32, 0
+ add rFEFE, rFEFE, rTMP
+ b L(g1)
+
+L(g0):
+ ldu rWORD1, 8(rSTR1)
+ bne- cr1, L(different)
+ ldu rWORD2, 8(rSTR2)
+L(g1): add rTMP, rFEFE, rWORD1
+ nor rNEG, r7F7F, rWORD1
+ bdz L(tail)
+ and. rTMP, rTMP, rNEG
+ cmpd cr1, rWORD1, rWORD2
+ beq+ L(g0)
+
+/* OK. We've hit the end of the string. We need to be careful that
+ we don't compare two strings as different because of gunk beyond
+ the end of the strings... */
+
+L(endstring):
+ and rTMP, r7F7F, rWORD1
+ beq cr1, L(equal)
+ add rTMP, rTMP, r7F7F
+ xor. rBITDIF, rWORD1, rWORD2
+
+ andc rNEG, rNEG, rTMP
+ blt- L(highbit)
+ cntlzd rBITDIF, rBITDIF
+ cntlzd rNEG, rNEG
+ addi rNEG, rNEG, 7
+ cmpd cr1, rNEG, rBITDIF
+ sub rRTN, rWORD1, rWORD2
+ blt- cr1, L(equal)
+ sradi rRTN, rRTN, 63
+ ori rRTN, rRTN, 1
+ blr
+L(equal):
+ li rRTN, 0
+ blr
+
+L(different):
+ ldu rWORD1, -8(rSTR1)
+ xor. rBITDIF, rWORD1, rWORD2
+ sub rRTN, rWORD1, rWORD2
+ blt- L(highbit)
+ sradi rRTN, rRTN, 63
+ ori rRTN, rRTN, 1
+ blr
+L(highbit):
+ srdi rWORD2, rWORD2, 56
+ srdi rWORD1, rWORD1, 56
+ sub rRTN, rWORD1, rWORD2
+ blr
+
+
+/* Oh well. In this case, we just do a byte-by-byte comparison. */
+ .align 4
+L(tail):
+ and. rTMP, rTMP, rNEG
+ cmpd cr1, rWORD1, rWORD2
+ bne- L(endstring)
+ addi rSTR1, rSTR1, 8
+ bne- cr1, L(different)
+ addi rSTR2, rSTR2, 8
+ cmpldi cr1, rN, 0
+L(unaligned):
+ mtctr rN /* Power4 wants mtctr 1st in dispatch group */
+ ble cr1, L(ux)
+L(uz):
+ lbz rWORD1, 0(rSTR1)
+ lbz rWORD2, 0(rSTR2)
+ .align 4
+L(u1):
+ cmpdi cr1, rWORD1, 0
+ bdz L(u4)
+ cmpd rWORD1, rWORD2
+ beq- cr1, L(u4)
+ lbzu rWORD3, 1(rSTR1)
+ lbzu rWORD4, 1(rSTR2)
+ bne- L(u4)
+ cmpdi cr1, rWORD3, 0
+ bdz L(u3)
+ cmpd rWORD3, rWORD4
+ beq- cr1, L(u3)
+ lbzu rWORD1, 1(rSTR1)
+ lbzu rWORD2, 1(rSTR2)
+ bne- L(u3)
+ cmpdi cr1, rWORD1, 0
+ bdz L(u4)
+ cmpd rWORD1, rWORD2
+ beq- cr1, L(u4)
+ lbzu rWORD3, 1(rSTR1)
+ lbzu rWORD4, 1(rSTR2)
+ bne- L(u4)
+ cmpdi cr1, rWORD3, 0
+ bdz L(u3)
+ cmpd rWORD3, rWORD4
+ beq- cr1, L(u3)
+ lbzu rWORD1, 1(rSTR1)
+ lbzu rWORD2, 1(rSTR2)
+ beq+ L(u1)
+
+L(u3): sub rRTN, rWORD3, rWORD4
+ blr
+L(u4): sub rRTN, rWORD1, rWORD2
+ blr
+L(ux):
+ li rRTN, 0
+ blr
+END (BP_SYM (strncmp))
+libc_hidden_builtin_def (strncmp)
+
diff --git a/sysdeps/powerpc/powerpc64/power4/wordcopy.c b/sysdeps/powerpc/powerpc64/power4/wordcopy.c
new file mode 100644
index 0000000000..f427b48e7a
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power4/wordcopy.c
@@ -0,0 +1 @@
+#include "../../powerpc32/power4/wordcopy.c"
diff --git a/sysdeps/powerpc/powerpc64/power5+/Implies b/sysdeps/powerpc/powerpc64/power5+/Implies
new file mode 100644
index 0000000000..ac431fa96e
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power5+/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/power4
diff --git a/sysdeps/powerpc/powerpc64/power5+/fpu/Implies b/sysdeps/powerpc/powerpc64/power5+/fpu/Implies
new file mode 100644
index 0000000000..558a5fb174
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power5+/fpu/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/power4/fpu
diff --git a/sysdeps/powerpc/powerpc64/power5+/fpu/s_ceil.S b/sysdeps/powerpc/powerpc64/power5+/fpu/s_ceil.S
new file mode 100644
index 0000000000..1e4851db9e
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power5+/fpu/s_ceil.S
@@ -0,0 +1,38 @@
+/* ceil function. PowerPC64/power5+ version.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+ .machine "power5"
+EALIGN (__ceil, 4, 0)
+ CALL_MCOUNT 0
+ frip fp1, fp1
+ blr
+ END (__ceil)
+
+weak_alias (__ceil, ceil)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__ceil, ceill)
+strong_alias (__ceil, __ceill)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0)
+compat_symbol (libm, __ceil, ceill, GLIBC_2_0)
+#endif
diff --git a/sysdeps/powerpc/powerpc64/power5+/fpu/s_ceilf.S b/sysdeps/powerpc/powerpc64/power5+/fpu/s_ceilf.S
new file mode 100644
index 0000000000..38c51d5ff1
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power5+/fpu/s_ceilf.S
@@ -0,0 +1,31 @@
+/* ceilf function. PowerPC64/power5+ version.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+
+ .machine "power5"
+EALIGN (__ceilf, 4, 0)
+ CALL_MCOUNT 0
+ frip fp1, fp1 /* The rounding instructions are double. */
+ frsp fp1, fp1 /* But we need to set ooverflow for float. */
+ blr
+ END (__ceilf)
+
+weak_alias (__ceilf, ceilf)
+
diff --git a/sysdeps/powerpc/powerpc64/power5+/fpu/s_floor.S b/sysdeps/powerpc/powerpc64/power5+/fpu/s_floor.S
new file mode 100644
index 0000000000..86f07c8b59
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power5+/fpu/s_floor.S
@@ -0,0 +1,38 @@
+/* floor function. PowerPC64/power5+ version.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+ .machine "power5"
+EALIGN (__floor, 4, 0)
+ CALL_MCOUNT 0
+ frim fp1, fp1
+ blr
+ END (__floor)
+
+weak_alias (__floor, floor)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__floor, floorl)
+strong_alias (__floor, __floorl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0)
+compat_symbol (libm, __floor, floorl, GLIBC_2_0)
+#endif
diff --git a/sysdeps/powerpc/powerpc64/power5+/fpu/s_floorf.S b/sysdeps/powerpc/powerpc64/power5+/fpu/s_floorf.S
new file mode 100644
index 0000000000..e7bdccf513
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power5+/fpu/s_floorf.S
@@ -0,0 +1,31 @@
+/* floorf function. PowerPC64/power5+ version.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+
+ .machine "power5"
+EALIGN (__floorf, 4, 0)
+ CALL_MCOUNT 0
+ frim fp1, fp1 /* The rounding instructions are double. */
+ frsp fp1, fp1 /* But we need to set ooverflow for float. */
+ blr
+ END (__floorf)
+
+weak_alias (__floorf, floorf)
+
diff --git a/sysdeps/powerpc/powerpc64/power5+/fpu/s_llround.S b/sysdeps/powerpc/powerpc64/power5+/fpu/s_llround.S
new file mode 100644
index 0000000000..6a022769f4
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power5+/fpu/s_llround.S
@@ -0,0 +1,59 @@
+/* llround function. POWER5+, PowerPC64 version.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+/* long long [r3] llround (float x [fp1])
+ IEEE 1003.1 llround function. IEEE specifies "round to the nearest
+ integer value, rounding halfway cases away from zero, regardless of
+ the current rounding mode." However PowerPC Architecture defines
+ "round to Nearest" as "Choose the best approximation. In case of a
+ tie, choose the one that is even (least significant bit o).".
+ So we pre-round using the V2.02 Floating Round to Integer Nearest
+ instruction before we use Floating Convert to Integer Word with
+ round to zero instruction. */
+
+ .machine "power5"
+EALIGN (__llround, 4, 0)
+ CALL_MCOUNT 0
+ frin fp2, fp1 /* Round to nearest +-0.5. */
+ fctidz fp3, fp2 /* Convert To Integer DW round toward 0. */
+ stfd fp3, -16(r1)
+ nop /* Insure the following load is in a different dispatch group */
+ nop /* to avoid pipe stall on POWER4&5. */
+ nop
+ ld r3, -16(r1)
+ blr
+ END (__llround)
+
+strong_alias (__llround, __lround)
+weak_alias (__llround, llround)
+weak_alias (__lround, lround)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__llround, llroundl)
+strong_alias (__llround, __llroundl)
+weak_alias (__lround, lroundl)
+strong_alias (__lround, __lroundl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __llround, llroundl, GLIBC_2_1)
+compat_symbol (libm, __lround, lroundl, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/powerpc64/power5+/fpu/s_round.S b/sysdeps/powerpc/powerpc64/power5+/fpu/s_round.S
new file mode 100644
index 0000000000..7082734bb5
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power5+/fpu/s_round.S
@@ -0,0 +1,38 @@
+/* round function. PowerPC64/power5+ version.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+ .machine "power5"
+EALIGN (__round, 4, 0)
+ CALL_MCOUNT 0
+ frin fp1, fp1
+ blr
+ END (__round)
+
+weak_alias (__round, round)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__round, roundl)
+strong_alias (__round, __roundl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __round, roundl, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/powerpc64/power5+/fpu/s_roundf.S b/sysdeps/powerpc/powerpc64/power5+/fpu/s_roundf.S
new file mode 100644
index 0000000000..30b83be9e7
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power5+/fpu/s_roundf.S
@@ -0,0 +1,31 @@
+/* roundf function. PowerPC64/power5+ version.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+
+ .machine "power5"
+EALIGN (__roundf, 4, 0)
+ CALL_MCOUNT 0
+ frin fp1, fp1 /* The rounding instructions are double. */
+ frsp fp1, fp1 /* But we need to set ooverflow for float. */
+ blr
+ END (__roundf)
+
+weak_alias (__roundf, roundf)
+
diff --git a/sysdeps/powerpc/powerpc64/power5+/fpu/s_trunc.S b/sysdeps/powerpc/powerpc64/power5+/fpu/s_trunc.S
new file mode 100644
index 0000000000..4e6e4d790e
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power5+/fpu/s_trunc.S
@@ -0,0 +1,38 @@
+/* trunc function. PowerPC64/power5+ version.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+ .machine "power5"
+EALIGN (__trunc, 4, 0)
+ CALL_MCOUNT 0
+ friz fp1, fp1
+ blr
+ END (__trunc)
+
+weak_alias (__trunc, trunc)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__trunc, truncl)
+strong_alias (__trunc, __truncl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __trunc, truncl, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/powerpc64/power5+/fpu/s_truncf.S b/sysdeps/powerpc/powerpc64/power5+/fpu/s_truncf.S
new file mode 100644
index 0000000000..61d9cb372d
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power5+/fpu/s_truncf.S
@@ -0,0 +1,31 @@
+/* truncf function. PowerPC64/power5+ version.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+
+ .machine "power5"
+EALIGN (__truncf, 4, 0)
+ CALL_MCOUNT 0
+ friz fp1, fp1 /* The rounding instructions are double. */
+ frsp fp1, fp1 /* But we need to set ooverflow for float. */
+ blr
+ END (__truncf)
+
+weak_alias (__truncf, truncf)
+
diff --git a/sysdeps/powerpc/powerpc64/power5/Implies b/sysdeps/powerpc/powerpc64/power5/Implies
new file mode 100644
index 0000000000..ac431fa96e
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power5/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/power4
diff --git a/sysdeps/powerpc/powerpc64/power5/fpu/Implies b/sysdeps/powerpc/powerpc64/power5/fpu/Implies
new file mode 100644
index 0000000000..558a5fb174
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power5/fpu/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/power4/fpu
diff --git a/sysdeps/powerpc/powerpc64/power6/Implies b/sysdeps/powerpc/powerpc64/power6/Implies
new file mode 100644
index 0000000000..0f35f37c77
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power6/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/power5+
diff --git a/sysdeps/powerpc/powerpc64/power6/fpu/Implies b/sysdeps/powerpc/powerpc64/power6/fpu/Implies
new file mode 100644
index 0000000000..885e39c003
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power6/fpu/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc64/power5+/fpu
+powerpc/powerpc64/power4/fpu
diff --git a/sysdeps/powerpc/powerpc64/power6/memcpy.S b/sysdeps/powerpc/powerpc64/power6/memcpy.S
new file mode 100644
index 0000000000..d105f8302e
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power6/memcpy.S
@@ -0,0 +1,1170 @@
+/* Optimized memcpy implementation for PowerPC64.
+ Copyright (C) 2003, 2006, 2007 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+ 02110-1301 USA. */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]);
+ Returns 'dst'.
+
+ Memcpy handles short copies (< 32-bytes) using a binary move blocks
+ (no loops) of lwz/stw. The tail (remaining 1-3) bytes is handled
+ with the appropriate combination of byte and halfword load/stores.
+ There is minimal effort to optimize the alignment of short moves.
+ The 64-bit implementations of POWER3 and POWER4 do a reasonable job
+ of handling unligned load/stores that do not cross 32-byte boundries.
+
+ Longer moves (>= 32-bytes) justify the effort to get at least the
+ destination doubleword (8-byte) aligned. Further optimization is
+ posible when both source and destination are doubleword aligned.
+ Each case has a optimized unrolled loop.
+
+ For POWER6 unaligned loads will take a 20+ cycle hicup for any
+ L1 cache miss that crosses a 32- or 128-byte boundary. Store
+ is more forgiving and does not take a hicup until page or
+ segment boundaries. So we require doubleword alignment for
+ the source but may take a risk and only require word alignment
+ for the destination. */
+
+ .machine "power6"
+EALIGN (BP_SYM (memcpy), 7, 0)
+ CALL_MCOUNT 3
+
+ cmpldi cr1,5,31
+ neg 0,3
+ std 3,-16(1)
+ std 31,-8(1)
+ andi. 11,3,7 /* check alignement of dst. */
+ clrldi 0,0,61 /* Number of bytes until the 1st doubleword of dst. */
+ clrldi 10,4,61 /* check alignement of src. */
+ cmpldi cr6,5,8
+ ble- cr1,.L2 /* If move < 32 bytes use short move code. */
+ mtcrf 0x01,0
+ cmpld cr6,10,11
+ srdi 9,5,3 /* Number of full double words remaining. */
+ beq .L0
+
+ subf 5,0,5
+ /* Move 0-7 bytes as needed to get the destination doubleword alligned.
+ Duplicate some code to maximize fall-throught and minimize agen delays. */
+1: bf 31,2f
+ lbz 6,0(4)
+ stb 6,0(3)
+ bf 30,5f
+ lhz 6,1(4)
+ sth 6,1(3)
+ bf 29,0f
+ lwz 6,3(4)
+ stw 6,3(3)
+ b 0f
+5:
+ bf 29,0f
+ lwz 6,1(4)
+ stw 6,1(3)
+ b 0f
+
+2: bf 30,4f
+ lhz 6,0(4)
+ sth 6,0(3)
+ bf 29,0f
+ lwz 6,2(4)
+ stw 6,2(3)
+ b 0f
+
+4: bf 29,0f
+ lwz 6,0(4)
+ stw 6,0(3)
+0:
+/* Add the number of bytes until the 1st doubleword of dst to src and dst. */
+ add 4,4,0
+ add 3,3,0
+
+ clrldi 10,4,61 /* check alignement of src again. */
+ srdi 9,5,3 /* Number of full double words remaining. */
+
+ /* Copy doublewords from source to destination, assumpting the
+ destination is aligned on a doubleword boundary.
+
+ At this point we know there are at least 25 bytes left (32-7) to copy.
+ The next step is to determine if the source is also doubleword aligned.
+ If not branch to the unaligned move code at .L6. which uses
+ a load, shift, store strategy.
+
+ Otherwise source and destination are doubleword aligned, and we can
+ the optimized doubleword copy loop. */
+ .align 4
+.L0:
+ clrldi 11,5,61
+ andi. 0,5,0x78
+ srdi 12,5,7 /* Number of 128-byte blocks to move. */
+ cmpldi cr1,11,0 /* If the tail is 0 bytes */
+ bne- cr6,.L6 /* If source is not DW aligned. */
+
+ /* Move doublewords where destination and source are DW aligned.
+ Use a unrolled loop to copy 16 doublewords (128-bytes) per iteration.
+ If the the copy is not an exact multiple of 128 bytes, 1-15
+ doublewords are copied as needed to set up the main loop. After
+ the main loop exits there may be a tail of 1-7 bytes. These byte
+ are copied a word/halfword/byte at a time as needed to preserve
+ alignment.
+
+ For POWER6 the L1 is store-through and the L2 is store-in. The
+ L2 is clocked at half CPU clock so we can store 16 bytes every
+ other cycle. POWER6 also has a load/store bypass so we can do
+ load, load, store, store every 2 cycles.
+
+ The following code is sensitive to cache line alignment. Do not
+ make any change with out first making sure thay don't result in
+ splitting ld/std pairs across a cache line. */
+
+ mtcrf 0x02,5
+ mtcrf 0x01,5
+ cmpldi cr5,12,1
+ beq L(das_loop)
+
+ bf 25,4f
+ .align 3
+ ld 6,0(4)
+ ld 7,8(4)
+ mr 11,4
+ mr 10,3
+ std 6,0(3)
+ std 7,8(3)
+ ld 6,16(4)
+ ld 7,24(4)
+ std 6,16(3)
+ std 7,24(3)
+ ld 6,0+32(4)
+ ld 7,8+32(4)
+ addi 4,4,64
+ addi 3,3,64
+ std 6,0+32(10)
+ std 7,8+32(10)
+ ld 6,16+32(11)
+ ld 7,24+32(11)
+ std 6,16+32(10)
+ std 7,24+32(10)
+4:
+ mr 10,3
+ bf 26,2f
+ ld 6,0(4)
+ ld 7,8(4)
+ mr 11,4
+ nop
+ std 6,0(3)
+ std 7,8(3)
+ ld 6,16(4)
+ ld 7,24(4)
+ addi 4,4,32
+ std 6,16(3)
+ std 7,24(3)
+ addi 3,3,32
+6:
+ nop
+ bf 27,5f
+ ld 6,0+32(11)
+ ld 7,8+32(11)
+ addi 4,4,16
+ addi 3,3,16
+ std 6,0+32(10)
+ std 7,8+32(10)
+ bf 28,L(das_loop_s)
+ ld 0,16+32(11)
+ addi 4,4,8
+ addi 3,3,8
+ std 0,16+32(10)
+ blt cr5,L(das_tail)
+ b L(das_loop)
+ .align 3
+5:
+ nop
+ bf 28,L(das_loop_s)
+ ld 6,32(11)
+ addi 4,4,8
+ addi 3,3,8
+ std 6,32(10)
+ blt cr5,L(das_tail)
+ b L(das_loop)
+ .align 3
+2:
+ mr 11,4
+ bf 27,1f
+ ld 6,0(4)
+ ld 7,8(4)
+ addi 4,4,16
+ addi 3,3,16
+ std 6,0(10)
+ std 7,8(10)
+ bf 28,L(das_loop_s)
+ ld 0,16(11)
+ addi 4,11,24
+ addi 3,10,24
+ std 0,16(10)
+ blt cr5,L(das_tail)
+ b L(das_loop)
+ .align 3
+1:
+ nop
+ bf 28,L(das_loop_s)
+ ld 6,0(4)
+ addi 4,4,8
+ addi 3,3,8
+ std 6,0(10)
+L(das_loop_s):
+ nop
+ blt cr5,L(das_tail)
+ .align 4
+L(das_loop):
+ ld 6,0(4)
+ ld 7,8(4)
+ mr 10,3
+ mr 11,4
+ std 6,0(3)
+ std 7,8(3)
+ addi 12,12,-1
+ nop
+ ld 8,16(4)
+ ld 0,24(4)
+ std 8,16(3)
+ std 0,24(3)
+
+ ld 6,0+32(4)
+ ld 7,8+32(4)
+ std 6,0+32(3)
+ std 7,8+32(3)
+ ld 8,16+32(4)
+ ld 0,24+32(4)
+ std 8,16+32(3)
+ std 0,24+32(3)
+
+ ld 6,0+64(11)
+ ld 7,8+64(11)
+ std 6,0+64(10)
+ std 7,8+64(10)
+ ld 8,16+64(11)
+ ld 0,24+64(11)
+ std 8,16+64(10)
+ std 0,24+64(10)
+
+ ld 6,0+96(11)
+ ld 7,8+96(11)
+ addi 4,4,128
+ addi 3,3,128
+ std 6,0+96(10)
+ std 7,8+96(10)
+ ld 8,16+96(11)
+ ld 0,24+96(11)
+ std 8,16+96(10)
+ std 0,24+96(10)
+ ble cr5,L(das_loop_e)
+
+ mtctr 12
+ .align 4
+L(das_loop2):
+ ld 6,0(4)
+ ld 7,8(4)
+ mr 10,3
+ mr 11,4
+ std 6,0(3)
+ std 7,8(3)
+ ld 8,16(4)
+ ld 0,24(4)
+ std 8,16(3)
+ std 0,24(3)
+
+ ld 6,0+32(4)
+ ld 7,8+32(4)
+ std 6,0+32(3)
+ std 7,8+32(3)
+ ld 8,16+32(4)
+ ld 0,24+32(4)
+ std 8,16+32(3)
+ std 0,24+32(3)
+
+ ld 6,0+64(11)
+ ld 7,8+64(11)
+ std 6,0+64(10)
+ std 7,8+64(10)
+ ld 8,16+64(11)
+ ld 0,24+64(11)
+ std 8,16+64(10)
+ std 0,24+64(10)
+
+ ld 6,0+96(11)
+ ld 7,8+96(11)
+ addi 4,4,128
+ addi 3,3,128
+ std 6,0+96(10)
+ std 7,8+96(10)
+ ld 8,16+96(11)
+ ld 0,24+96(11)
+ std 8,16+96(10)
+ std 0,24+96(10)
+ bdnz L(das_loop2)
+L(das_loop_e):
+/* Check of a 1-7 byte tail, return if none. */
+ bne cr1,L(das_tail2)
+/* Return original dst pointer. */
+ ld 3,-16(1)
+ blr
+ .align 4
+L(das_tail):
+ beq cr1,0f
+
+L(das_tail2):
+/* At this point we have a tail of 0-7 bytes and we know that the
+ destiniation is double word aligned. */
+4: bf 29,2f
+ lwz 6,0(4)
+ stw 6,0(3)
+ bf 30,5f
+ lhz 6,4(4)
+ sth 6,4(3)
+ bf 31,0f
+ lbz 6,6(4)
+ stb 6,6(3)
+ b 0f
+5: bf 31,0f
+ lbz 6,4(4)
+ stb 6,4(3)
+ b 0f
+
+2: bf 30,1f
+ lhz 6,0(4)
+ sth 6,0(3)
+ bf 31,0f
+ lbz 6,2(4)
+ stb 6,2(3)
+ b 0f
+
+1: bf 31,0f
+ lbz 6,0(4)
+ stb 6,0(3)
+0:
+ /* Return original dst pointer. */
+ ld 3,-16(1)
+ blr
+
+/* Copy up to 31 bytes. This divided into two cases 0-8 bytes and 9-31
+ bytes. Each case is handled without loops, using binary (1,2,4,8)
+ tests.
+
+ In the short (0-8 byte) case no attempt is made to force alignment
+ of either source or destination. The hardware will handle the
+ unaligned load/stores with small delays for crossing 32- 128-byte,
+ and 4096-byte boundaries. Since these short moves are unlikely to be
+ unaligned or cross these boundaries, the overhead to force
+ alignment is not justified.
+
+ The longer (9-31 byte) move is more likely to cross 32- or 128-byte
+ boundaries. Since only loads are sensitive to the 32-/128-byte
+ boundaries it is more important to align the source then the
+ destination. If the source is not already word aligned, we first
+ move 1-3 bytes as needed. Since we are only word aligned we don't
+ use double word load/stores to insure that all loads are aligned.
+ While the destination and stores may still be unaligned, this
+ is only an issue for page (4096 byte boundary) crossing, which
+ should be rare for these short moves. The hardware handles this
+ case automatically with a small (~20 cycle) delay. */
+ .align 4
+.L2:
+ mtcrf 0x01,5
+ neg 8,4
+ clrrdi 11,4,2
+ andi. 0,8,3
+ ble cr6,.LE8 /* Handle moves of 0-8 bytes. */
+/* At least 9 bytes left. Get the source word aligned. */
+ cmpldi cr1,5,16
+ mr 10,5
+ mr 12,4
+ cmpldi cr6,0,2
+ beq L(dus_tail) /* If the source is already word aligned skip this. */
+/* Copy 1-3 bytes to get source address word aligned. */
+ lwz 6,0(11)
+ subf 10,0,5
+ add 12,4,0
+ blt cr6,5f
+ srdi 7,6,16
+ bgt cr6,3f
+ sth 6,0(3)
+ b 7f
+ .align 4
+3:
+ stb 7,0(3)
+ sth 6,1(3)
+ b 7f
+ .align 4
+5:
+ stb 6,0(3)
+7:
+ cmpldi cr1,10,16
+ add 3,3,0
+ mtcrf 0x01,10
+ .align 4
+L(dus_tail):
+/* At least 6 bytes left and the source is word aligned. This allows
+ some speculative loads up front. */
+/* We need to special case the fall-through because the biggest delays
+ are due to address computation not being ready in time for the
+ AGEN. */
+ lwz 6,0(12)
+ lwz 7,4(12)
+ blt cr1,L(dus_tail8)
+ cmpldi cr0,10,24
+L(dus_tail16): /* Move 16 bytes. */
+ stw 6,0(3)
+ stw 7,4(3)
+ lwz 6,8(12)
+ lwz 7,12(12)
+ stw 6,8(3)
+ stw 7,12(3)
+/* Move 8 bytes more. */
+ bf 28,L(dus_tail16p8)
+ cmpldi cr1,10,28
+ lwz 6,16(12)
+ lwz 7,20(12)
+ stw 6,16(3)
+ stw 7,20(3)
+/* Move 4 bytes more. */
+ bf 29,L(dus_tail16p4)
+ lwz 6,24(12)
+ stw 6,24(3)
+ addi 12,12,28
+ addi 3,3,28
+ bgt cr1,L(dus_tail2)
+ /* exactly 28 bytes. Return original dst pointer and exit. */
+ ld 3,-16(1)
+ blr
+ .align 4
+L(dus_tail16p8): /* less then 8 bytes left. */
+ beq cr1,L(dus_tailX) /* exactly 16 bytes, early exit. */
+ cmpldi cr1,10,20
+ bf 29,L(dus_tail16p2)
+/* Move 4 bytes more. */
+ lwz 6,16(12)
+ stw 6,16(3)
+ addi 12,12,20
+ addi 3,3,20
+ bgt cr1,L(dus_tail2)
+ /* exactly 20 bytes. Return original dst pointer and exit. */
+ ld 3,-16(1)
+ blr
+ .align 4
+L(dus_tail16p4): /* less then 4 bytes left. */
+ addi 12,12,24
+ addi 3,3,24
+ bgt cr0,L(dus_tail2)
+ /* exactly 24 bytes. Return original dst pointer and exit. */
+ ld 3,-16(1)
+ blr
+ .align 4
+L(dus_tail16p2): /* 16 bytes moved, less then 4 bytes left. */
+ addi 12,12,16
+ addi 3,3,16
+ b L(dus_tail2)
+
+ .align 4
+L(dus_tail8): /* Move 8 bytes. */
+/* r6, r7 already loaded speculatively. */
+ cmpldi cr1,10,8
+ cmpldi cr0,10,12
+ bf 28,L(dus_tail4)
+ .align 2
+ stw 6,0(3)
+ stw 7,4(3)
+/* Move 4 bytes more. */
+ bf 29,L(dus_tail8p4)
+ lwz 6,8(12)
+ stw 6,8(3)
+ addi 12,12,12
+ addi 3,3,12
+ bgt cr0,L(dus_tail2)
+ /* exactly 12 bytes. Return original dst pointer and exit. */
+ ld 3,-16(1)
+ blr
+ .align 4
+L(dus_tail8p4): /* less then 4 bytes left. */
+ addi 12,12,8
+ addi 3,3,8
+ bgt cr1,L(dus_tail2)
+ /* exactly 8 bytes. Return original dst pointer and exit. */
+ ld 3,-16(1)
+ blr
+
+ .align 4
+L(dus_tail4): /* Move 4 bytes. */
+/* r6 already loaded speculatively. If we are here we know there is
+ more then 4 bytes left. So there is no need to test. */
+ addi 12,12,4
+ stw 6,0(3)
+ addi 3,3,4
+L(dus_tail2): /* Move 2-3 bytes. */
+ bf 30,L(dus_tail1)
+ lhz 6,0(12)
+ sth 6,0(3)
+ bf 31,L(dus_tailX)
+ lbz 7,2(12)
+ stb 7,2(3)
+ ld 3,-16(1)
+ blr
+L(dus_tail1): /* Move 1 byte. */
+ bf 31,L(dus_tailX)
+ lbz 6,0(12)
+ stb 6,0(3)
+L(dus_tailX):
+ /* Return original dst pointer. */
+ ld 3,-16(1)
+ blr
+
+/* Special case to copy 0-8 bytes. */
+ .align 4
+.LE8:
+ mr 12,4
+ bne cr6,L(dus_4)
+/* Exactly 8 bytes. We may cross a 32-/128-byte boundry and take a ~20
+ cycle delay. This case should be rare and any attempt to avoid this
+ would take most of 20 cycles any way. */
+ ld 6,0(4)
+ std 6,0(3)
+ /* Return original dst pointer. */
+ ld 3,-16(1)
+ blr
+ .align 4
+L(dus_4):
+ bf 29,L(dus_tail2)
+ lwz 6,0(4)
+ stw 6,0(3)
+ bf 30,L(dus_5)
+ lhz 7,4(4)
+ sth 7,4(3)
+ bf 31,L(dus_0)
+ lbz 8,6(4)
+ stb 8,6(3)
+ ld 3,-16(1)
+ blr
+ .align 4
+L(dus_5):
+ bf 31,L(dus_0)
+ lbz 6,4(4)
+ stb 6,4(3)
+L(dus_0):
+ /* Return original dst pointer. */
+ ld 3,-16(1)
+ blr
+
+ .align 4
+.L6:
+ cfi_offset(31,-8)
+ mr 12,4
+ mr 31,5
+ /* Copy doublewords where the destination is aligned but the source is
+ not. Use aligned doubleword loads from the source, shifted to realign
+ the data, to allow aligned destination stores. */
+ addi 11,9,-1 /* loop DW count is one less than total */
+ subf 5,10,12 /* Move source addr to previous full double word. */
+ cmpldi cr5, 10, 2
+ cmpldi cr0, 10, 4
+ mr 4,3
+ srdi 8,11,2 /* calculate the 32 byte loop count */
+ ld 6,0(5) /* pre load 1st full doubleword. */
+ mtcrf 0x01,11
+ cmpldi cr6,9,4
+ mtctr 8
+ ld 7,8(5) /* pre load 2nd full doubleword. */
+ bge cr0, L(du4_do)
+ blt cr5, L(du1_do)
+ beq cr5, L(du2_do)
+ b L(du3_do)
+
+ .align 4
+L(du1_do):
+ bf 30,L(du1_1dw)
+
+ /* there are at least two DWs to copy */
+ sldi 0,6, 8
+ srdi 8,7, 64-8
+ or 0,0,8
+ ld 6,16(5)
+ std 0,0(4)
+ sldi 0,7, 8
+ srdi 8,6, 64-8
+ or 0,0,8
+ ld 7,24(5)
+ std 0,8(4)
+ addi 4,4,16
+ addi 5,5,32
+ blt cr6,L(du1_fini) /* if total DWs = 3, then bypass loop */
+ bf 31,L(du1_loop)
+ /* there is a third DW to copy */
+ sldi 0,6, 8
+ srdi 8,7, 64-8
+ or 0,0,8
+ std 0,0(4)
+ mr 6,7
+ ld 7,0(5)
+ addi 5,5,8
+ addi 4,4,8
+ beq cr6,L(du1_fini) /* if total DWs = 4, then bypass loop */
+ b L(du1_loop)
+ .align 4
+L(du1_1dw):
+ sldi 0,6, 8
+ srdi 8,7, 64-8
+ addi 5,5,16
+ or 0,0,8
+ bf 31,L(du1_loop)
+ mr 6,7
+ ld 7,0(5)
+ addi 5,5,8
+ std 0,0(4)
+ addi 4,4,8
+ .align 4
+/* copy 32 bytes at a time */
+L(du1_loop):
+ sldi 0,6, 8
+ srdi 8,7, 64-8
+ or 0,0,8
+ ld 6,0(5)
+ std 0,0(4)
+ sldi 0,7, 8
+ srdi 8,6, 64-8
+ or 0,0,8
+ ld 7,8(5)
+ std 0,8(4)
+ sldi 0,6, 8
+ srdi 8,7, 64-8
+ or 0,0,8
+ ld 6,16(5)
+ std 0,16(4)
+ sldi 0,7, 8
+ srdi 8,6, 64-8
+ or 0,0,8
+ ld 7,24(5)
+ std 0,24(4)
+ addi 5,5,32
+ addi 4,4,32
+ bdnz+ L(du1_loop)
+ .align 4
+L(du1_fini):
+ /* calculate and store the final DW */
+ sldi 0,6, 8
+ srdi 8,7, 64-8
+ or 0,0,8
+ std 0,0(4)
+ b L(du_done)
+
+ .align 4
+L(du2_do):
+ bf 30,L(du2_1dw)
+
+ /* there are at least two DWs to copy */
+ sldi 0,6, 16
+ srdi 8,7, 64-16
+ or 0,0,8
+ ld 6,16(5)
+ std 0,0(4)
+ sldi 0,7, 16
+ srdi 8,6, 64-16
+ or 0,0,8
+ ld 7,24(5)
+ std 0,8(4)
+ addi 4,4,16
+ addi 5,5,32
+ blt cr6,L(du2_fini) /* if total DWs = 3, then bypass loop */
+ bf 31,L(du2_loop)
+ /* there is a third DW to copy */
+ sldi 0,6, 16
+ srdi 8,7, 64-16
+ or 0,0,8
+ std 0,0(4)
+ mr 6,7
+ ld 7,0(5)
+ addi 5,5,8
+ addi 4,4,8
+ beq cr6,L(du2_fini) /* if total DWs = 4, then bypass loop */
+ b L(du2_loop)
+ .align 4
+L(du2_1dw):
+ sldi 0,6, 16
+ srdi 8,7, 64-16
+ addi 5,5,16
+ or 0,0,8
+ bf 31,L(du2_loop)
+ mr 6,7
+ ld 7,0(5)
+ addi 5,5,8
+ std 0,0(4)
+ addi 4,4,8
+ .align 4
+/* copy 32 bytes at a time */
+L(du2_loop):
+ sldi 0,6, 16
+ srdi 8,7, 64-16
+ or 0,0,8
+ ld 6,0(5)
+ std 0,0(4)
+ sldi 0,7, 16
+ srdi 8,6, 64-16
+ or 0,0,8
+ ld 7,8(5)
+ std 0,8(4)
+ sldi 0,6, 16
+ srdi 8,7, 64-16
+ or 0,0,8
+ ld 6,16(5)
+ std 0,16(4)
+ sldi 0,7, 16
+ srdi 8,6, 64-16
+ or 0,0,8
+ ld 7,24(5)
+ std 0,24(4)
+ addi 5,5,32
+ addi 4,4,32
+ bdnz+ L(du2_loop)
+ .align 4
+L(du2_fini):
+ /* calculate and store the final DW */
+ sldi 0,6, 16
+ srdi 8,7, 64-16
+ or 0,0,8
+ std 0,0(4)
+ b L(du_done)
+
+ .align 4
+L(du3_do):
+ bf 30,L(du3_1dw)
+
+ /* there are at least two DWs to copy */
+ sldi 0,6, 24
+ srdi 8,7, 64-24
+ or 0,0,8
+ ld 6,16(5)
+ std 0,0(4)
+ sldi 0,7, 24
+ srdi 8,6, 64-24
+ or 0,0,8
+ ld 7,24(5)
+ std 0,8(4)
+ addi 4,4,16
+ addi 5,5,32
+ blt cr6,L(du3_fini) /* if total DWs = 3, then bypass loop */
+ bf 31,L(du3_loop)
+ /* there is a third DW to copy */
+ sldi 0,6, 24
+ srdi 8,7, 64-24
+ or 0,0,8
+ std 0,0(4)
+ mr 6,7
+ ld 7,0(5)
+ addi 5,5,8
+ addi 4,4,8
+ beq cr6,L(du3_fini) /* if total DWs = 4, then bypass loop */
+ b L(du3_loop)
+ .align 4
+L(du3_1dw):
+ sldi 0,6, 24
+ srdi 8,7, 64-24
+ addi 5,5,16
+ or 0,0,8
+ bf 31,L(du3_loop)
+ mr 6,7
+ ld 7,0(5)
+ addi 5,5,8
+ std 0,0(4)
+ addi 4,4,8
+ .align 4
+/* copy 32 bytes at a time */
+L(du3_loop):
+ sldi 0,6, 24
+ srdi 8,7, 64-24
+ or 0,0,8
+ ld 6,0(5)
+ std 0,0(4)
+ sldi 0,7, 24
+ srdi 8,6, 64-24
+ or 0,0,8
+ ld 7,8(5)
+ std 0,8(4)
+ sldi 0,6, 24
+ srdi 8,7, 64-24
+ or 0,0,8
+ ld 6,16(5)
+ std 0,16(4)
+ sldi 0,7, 24
+ srdi 8,6, 64-24
+ or 0,0,8
+ ld 7,24(5)
+ std 0,24(4)
+ addi 5,5,32
+ addi 4,4,32
+ bdnz+ L(du3_loop)
+ .align 4
+L(du3_fini):
+ /* calculate and store the final DW */
+ sldi 0,6, 24
+ srdi 8,7, 64-24
+ or 0,0,8
+ std 0,0(4)
+ b L(du_done)
+
+ .align 4
+L(du4_do):
+ cmpldi cr5, 10, 6
+ beq cr0, L(du4_dox)
+ blt cr5, L(du5_do)
+ beq cr5, L(du6_do)
+ b L(du7_do)
+L(du4_dox):
+ bf 30,L(du4_1dw)
+
+ /* there are at least two DWs to copy */
+ sldi 0,6, 32
+ srdi 8,7, 64-32
+ or 0,0,8
+ ld 6,16(5)
+ std 0,0(4)
+ sldi 0,7, 32
+ srdi 8,6, 64-32
+ or 0,0,8
+ ld 7,24(5)
+ std 0,8(4)
+ addi 4,4,16
+ addi 5,5,32
+ blt cr6,L(du4_fini) /* if total DWs = 3, then bypass loop */
+ bf 31,L(du4_loop)
+ /* there is a third DW to copy */
+ sldi 0,6, 32
+ srdi 8,7, 64-32
+ or 0,0,8
+ std 0,0(4)
+ mr 6,7
+ ld 7,0(5)
+ addi 5,5,8
+ addi 4,4,8
+ beq cr6,L(du4_fini) /* if total DWs = 4, then bypass loop */
+ b L(du4_loop)
+ .align 4
+L(du4_1dw):
+ sldi 0,6, 32
+ srdi 8,7, 64-32
+ addi 5,5,16
+ or 0,0,8
+ bf 31,L(du4_loop)
+ mr 6,7
+ ld 7,0(5)
+ addi 5,5,8
+ std 0,0(4)
+ addi 4,4,8
+ .align 4
+/* copy 32 bytes at a time */
+L(du4_loop):
+ sldi 0,6, 32
+ srdi 8,7, 64-32
+ or 0,0,8
+ ld 6,0(5)
+ std 0,0(4)
+ sldi 0,7, 32
+ srdi 8,6, 64-32
+ or 0,0,8
+ ld 7,8(5)
+ std 0,8(4)
+ sldi 0,6, 32
+ srdi 8,7, 64-32
+ or 0,0,8
+ ld 6,16(5)
+ std 0,16(4)
+ sldi 0,7, 32
+ srdi 8,6, 64-32
+ or 0,0,8
+ ld 7,24(5)
+ std 0,24(4)
+ addi 5,5,32
+ addi 4,4,32
+ bdnz+ L(du4_loop)
+ .align 4
+L(du4_fini):
+ /* calculate and store the final DW */
+ sldi 0,6, 32
+ srdi 8,7, 64-32
+ or 0,0,8
+ std 0,0(4)
+ b L(du_done)
+
+ .align 4
+L(du5_do):
+ bf 30,L(du5_1dw)
+
+ /* there are at least two DWs to copy */
+ sldi 0,6, 40
+ srdi 8,7, 64-40
+ or 0,0,8
+ ld 6,16(5)
+ std 0,0(4)
+ sldi 0,7, 40
+ srdi 8,6, 64-40
+ or 0,0,8
+ ld 7,24(5)
+ std 0,8(4)
+ addi 4,4,16
+ addi 5,5,32
+ blt cr6,L(du5_fini) /* if total DWs = 3, then bypass loop */
+ bf 31,L(du5_loop)
+ /* there is a third DW to copy */
+ sldi 0,6, 40
+ srdi 8,7, 64-40
+ or 0,0,8
+ std 0,0(4)
+ mr 6,7
+ ld 7,0(5)
+ addi 5,5,8
+ addi 4,4,8
+ beq cr6,L(du5_fini) /* if total DWs = 4, then bypass loop */
+ b L(du5_loop)
+ .align 4
+L(du5_1dw):
+ sldi 0,6, 40
+ srdi 8,7, 64-40
+ addi 5,5,16
+ or 0,0,8
+ bf 31,L(du5_loop)
+ mr 6,7
+ ld 7,0(5)
+ addi 5,5,8
+ std 0,0(4)
+ addi 4,4,8
+ .align 4
+/* copy 32 bytes at a time */
+L(du5_loop):
+ sldi 0,6, 40
+ srdi 8,7, 64-40
+ or 0,0,8
+ ld 6,0(5)
+ std 0,0(4)
+ sldi 0,7, 40
+ srdi 8,6, 64-40
+ or 0,0,8
+ ld 7,8(5)
+ std 0,8(4)
+ sldi 0,6, 40
+ srdi 8,7, 64-40
+ or 0,0,8
+ ld 6,16(5)
+ std 0,16(4)
+ sldi 0,7, 40
+ srdi 8,6, 64-40
+ or 0,0,8
+ ld 7,24(5)
+ std 0,24(4)
+ addi 5,5,32
+ addi 4,4,32
+ bdnz+ L(du5_loop)
+ .align 4
+L(du5_fini):
+ /* calculate and store the final DW */
+ sldi 0,6, 40
+ srdi 8,7, 64-40
+ or 0,0,8
+ std 0,0(4)
+ b L(du_done)
+
+ .align 4
+L(du6_do):
+ bf 30,L(du6_1dw)
+
+ /* there are at least two DWs to copy */
+ sldi 0,6, 48
+ srdi 8,7, 64-48
+ or 0,0,8
+ ld 6,16(5)
+ std 0,0(4)
+ sldi 0,7, 48
+ srdi 8,6, 64-48
+ or 0,0,8
+ ld 7,24(5)
+ std 0,8(4)
+ addi 4,4,16
+ addi 5,5,32
+ blt cr6,L(du6_fini) /* if total DWs = 3, then bypass loop */
+ bf 31,L(du6_loop)
+ /* there is a third DW to copy */
+ sldi 0,6, 48
+ srdi 8,7, 64-48
+ or 0,0,8
+ std 0,0(4)
+ mr 6,7
+ ld 7,0(5)
+ addi 5,5,8
+ addi 4,4,8
+ beq cr6,L(du6_fini) /* if total DWs = 4, then bypass loop */
+ b L(du6_loop)
+ .align 4
+L(du6_1dw):
+ sldi 0,6, 48
+ srdi 8,7, 64-48
+ addi 5,5,16
+ or 0,0,8
+ bf 31,L(du6_loop)
+ mr 6,7
+ ld 7,0(5)
+ addi 5,5,8
+ std 0,0(4)
+ addi 4,4,8
+ .align 4
+/* copy 32 bytes at a time */
+L(du6_loop):
+ sldi 0,6, 48
+ srdi 8,7, 64-48
+ or 0,0,8
+ ld 6,0(5)
+ std 0,0(4)
+ sldi 0,7, 48
+ srdi 8,6, 64-48
+ or 0,0,8
+ ld 7,8(5)
+ std 0,8(4)
+ sldi 0,6, 48
+ srdi 8,7, 64-48
+ or 0,0,8
+ ld 6,16(5)
+ std 0,16(4)
+ sldi 0,7, 48
+ srdi 8,6, 64-48
+ or 0,0,8
+ ld 7,24(5)
+ std 0,24(4)
+ addi 5,5,32
+ addi 4,4,32
+ bdnz+ L(du6_loop)
+ .align 4
+L(du6_fini):
+ /* calculate and store the final DW */
+ sldi 0,6, 48
+ srdi 8,7, 64-48
+ or 0,0,8
+ std 0,0(4)
+ b L(du_done)
+
+ .align 4
+L(du7_do):
+ bf 30,L(du7_1dw)
+
+ /* there are at least two DWs to copy */
+ sldi 0,6, 56
+ srdi 8,7, 64-56
+ or 0,0,8
+ ld 6,16(5)
+ std 0,0(4)
+ sldi 0,7, 56
+ srdi 8,6, 64-56
+ or 0,0,8
+ ld 7,24(5)
+ std 0,8(4)
+ addi 4,4,16
+ addi 5,5,32
+ blt cr6,L(du7_fini) /* if total DWs = 3, then bypass loop */
+ bf 31,L(du7_loop)
+ /* there is a third DW to copy */
+ sldi 0,6, 56
+ srdi 8,7, 64-56
+ or 0,0,8
+ std 0,0(4)
+ mr 6,7
+ ld 7,0(5)
+ addi 5,5,8
+ addi 4,4,8
+ beq cr6,L(du7_fini) /* if total DWs = 4, then bypass loop */
+ b L(du7_loop)
+ .align 4
+L(du7_1dw):
+ sldi 0,6, 56
+ srdi 8,7, 64-56
+ addi 5,5,16
+ or 0,0,8
+ bf 31,L(du7_loop)
+ mr 6,7
+ ld 7,0(5)
+ addi 5,5,8
+ std 0,0(4)
+ addi 4,4,8
+ .align 4
+/* copy 32 bytes at a time */
+L(du7_loop):
+ sldi 0,6, 56
+ srdi 8,7, 64-56
+ or 0,0,8
+ ld 6,0(5)
+ std 0,0(4)
+ sldi 0,7, 56
+ srdi 8,6, 64-56
+ or 0,0,8
+ ld 7,8(5)
+ std 0,8(4)
+ sldi 0,6, 56
+ srdi 8,7, 64-56
+ or 0,0,8
+ ld 6,16(5)
+ std 0,16(4)
+ sldi 0,7, 56
+ srdi 8,6, 64-56
+ or 0,0,8
+ ld 7,24(5)
+ std 0,24(4)
+ addi 5,5,32
+ addi 4,4,32
+ bdnz+ L(du7_loop)
+ .align 4
+L(du7_fini):
+ /* calculate and store the final DW */
+ sldi 0,6, 56
+ srdi 8,7, 64-56
+ or 0,0,8
+ std 0,0(4)
+ b L(du_done)
+
+ .align 4
+L(du_done):
+ rldicr 0,31,0,60
+ mtcrf 0x01,31
+ beq cr1,0f /* If the tail is 0 bytes we are done! */
+
+ add 3,3,0
+ add 12,12,0
+/* At this point we have a tail of 0-7 bytes and we know that the
+ destiniation is double word aligned. */
+4: bf 29,2f
+ lwz 6,0(12)
+ addi 12,12,4
+ stw 6,0(3)
+ addi 3,3,4
+2: bf 30,1f
+ lhz 6,0(12)
+ addi 12,12,2
+ sth 6,0(3)
+ addi 3,3,2
+1: bf 31,0f
+ lbz 6,0(12)
+ stb 6,0(3)
+0:
+ /* Return original dst pointer. */
+ ld 31,-8(1)
+ ld 3,-16(1)
+ blr
+END_GEN_TB (BP_SYM (memcpy),TB_TOCLESS)
+libc_hidden_builtin_def (memcpy)
diff --git a/sysdeps/powerpc/powerpc64/power6/memset.S b/sysdeps/powerpc/powerpc64/power6/memset.S
new file mode 100644
index 0000000000..ea74c117dd
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power6/memset.S
@@ -0,0 +1,419 @@
+/* Optimized 64-bit memset implementation for POWER6.
+ Copyright (C) 1997, 1999, 2000, 2002, 2003, 2007
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
+ Returns 's'.
+
+ The memset is done in three sizes: byte (8 bits), word (32 bits),
+ cache line (256 bits). There is a special case for setting cache lines
+ to 0, to take advantage of the dcbz instruction. */
+
+ .machine power6
+EALIGN (BP_SYM (memset), 7, 0)
+ CALL_MCOUNT 3
+
+#define rTMP r0
+#define rRTN r3 /* Initial value of 1st argument. */
+#if __BOUNDED_POINTERS__
+# define rMEMP0 r4 /* Original value of 1st arg. */
+# define rCHR r5 /* Char to set in each byte. */
+# define rLEN r6 /* Length of region to set. */
+# define rMEMP r10 /* Address at which we are storing. */
+#else
+# define rMEMP0 r3 /* Original value of 1st arg. */
+# define rCHR r4 /* Char to set in each byte. */
+# define rLEN r5 /* Length of region to set. */
+# define rMEMP r6 /* Address at which we are storing. */
+#endif
+#define rALIGN r7 /* Number of bytes we are setting now (when aligning). */
+#define rMEMP2 r8
+#define rMEMP3 r9 /* Alt mem pointer. */
+L(_memset):
+#if __BOUNDED_POINTERS__
+ cmpldi cr1, rRTN, 0
+ CHECK_BOUNDS_BOTH_WIDE (rMEMP0, rTMP, rTMP2, rLEN)
+ beq cr1, L(b0)
+ STORE_RETURN_VALUE (rMEMP0)
+ STORE_RETURN_BOUNDS (rTMP, rTMP2)
+L(b0):
+#endif
+/* Take care of case for size <= 4. */
+ cmpldi cr1, rLEN, 8
+ andi. rALIGN, rMEMP0, 7
+ mr rMEMP, rMEMP0
+ ble cr1, L(small)
+
+/* Align to doubleword boundary. */
+ cmpldi cr5, rLEN, 31
+ rlwimi rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword. */
+ beq+ L(aligned2)
+ mtcrf 0x01, rMEMP0
+ subfic rALIGN, rALIGN, 8
+ cror 28,30,31 /* Detect odd word aligned. */
+ add rMEMP, rMEMP, rALIGN
+ sub rLEN, rLEN, rALIGN
+ rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */
+ bt 29, L(g4)
+/* Process the even word of doubleword. */
+ bf+ 31, L(g2)
+ stb rCHR, 0(rMEMP0)
+ bt 30, L(g4x)
+L(g2):
+ sth rCHR, -6(rMEMP)
+L(g4x):
+ stw rCHR, -4(rMEMP)
+ b L(aligned)
+/* Process the odd word of doubleword. */
+L(g4):
+ bf 28, L(g4x) /* If false, word aligned on odd word. */
+ bf+ 31, L(g0)
+ stb rCHR, 0(rMEMP0)
+ bt 30, L(aligned)
+L(g0):
+ sth rCHR, -2(rMEMP)
+
+/* Handle the case of size < 31. */
+L(aligned2):
+ rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */
+L(aligned):
+ mtcrf 0x01, rLEN
+ ble cr5, L(medium)
+/* Align to 32-byte boundary. */
+ andi. rALIGN, rMEMP, 0x18
+ subfic rALIGN, rALIGN, 0x20
+ insrdi rCHR,rCHR,32,0 /* Replicate word to double word. */
+ beq L(caligned)
+ mtcrf 0x01, rALIGN
+ add rMEMP, rMEMP, rALIGN
+ sub rLEN, rLEN, rALIGN
+ cmplwi cr1, rALIGN, 0x10
+ mr rMEMP2, rMEMP
+ bf 28, L(a1)
+ stdu rCHR, -8(rMEMP2)
+L(a1): blt cr1, L(a2)
+ std rCHR, -8(rMEMP2)
+ stdu rCHR, -16(rMEMP2)
+L(a2):
+
+/* Now aligned to a 32 byte boundary. */
+ .align 4
+L(caligned):
+ cmpldi cr1, rCHR, 0
+ clrrdi. rALIGN, rLEN, 5
+ mtcrf 0x01, rLEN
+ beq cr1, L(zloopstart) /* Special case for clearing memory using dcbz. */
+ beq L(medium) /* We may not actually get to do a full line. */
+ .align 4
+/* Storing a non-zero "c" value. We are aligned at a sector (32-byte)
+ boundary may not be at cache line (128-byte) boundary. */
+L(nzloopstart):
+/* memset in 32-byte chunks until we get to a cache line boundary.
+ If rLEN is less then the distance to the next cache-line boundary use
+ cacheAligned1 code to finish the tail. */
+ cmpldi cr1,rLEN,128
+
+ andi. rTMP,rMEMP,127
+ blt cr1,L(cacheAligned1)
+ addi rMEMP3,rMEMP,32
+ beq L(nzCacheAligned)
+ addi rLEN,rLEN,-32
+ std rCHR,0(rMEMP)
+ std rCHR,8(rMEMP)
+ std rCHR,16(rMEMP)
+ addi rMEMP,rMEMP,32
+ andi. rTMP,rMEMP3,127
+ std rCHR,-8(rMEMP3)
+
+ beq L(nzCacheAligned)
+ addi rLEN,rLEN,-32
+ std rCHR,0(rMEMP3)
+ addi rMEMP,rMEMP,32
+ std rCHR,8(rMEMP3)
+ andi. rTMP,rMEMP,127
+ std rCHR,16(rMEMP3)
+ std rCHR,24(rMEMP3)
+
+ beq L(nzCacheAligned)
+ addi rLEN,rLEN,-32
+ std rCHR,32(rMEMP3)
+ addi rMEMP,rMEMP,32
+ cmpldi cr1,rLEN,128
+ std rCHR,40(rMEMP3)
+ cmpldi cr6,rLEN,256
+ li rMEMP2,128
+ std rCHR,48(rMEMP3)
+ std rCHR,56(rMEMP3)
+ blt cr1,L(cacheAligned1)
+ b L(nzCacheAligned128)
+
+/* Now we are aligned to the cache line and can use dcbtst. */
+ .align 4
+L(nzCacheAligned):
+ cmpldi cr1,rLEN,128
+ blt cr1,L(cacheAligned1)
+ b L(nzCacheAligned128)
+ .align 5
+L(nzCacheAligned128):
+ cmpldi cr1,rLEN,256
+ addi rMEMP3,rMEMP,64
+ std rCHR,0(rMEMP)
+ std rCHR,8(rMEMP)
+ std rCHR,16(rMEMP)
+ std rCHR,24(rMEMP)
+ std rCHR,32(rMEMP)
+ std rCHR,40(rMEMP)
+ std rCHR,48(rMEMP)
+ std rCHR,56(rMEMP)
+ addi rMEMP,rMEMP3,64
+ addi rLEN,rLEN,-128
+ std rCHR,0(rMEMP3)
+ std rCHR,8(rMEMP3)
+ std rCHR,16(rMEMP3)
+ std rCHR,24(rMEMP3)
+ std rCHR,32(rMEMP3)
+ std rCHR,40(rMEMP3)
+ std rCHR,48(rMEMP3)
+ std rCHR,56(rMEMP3)
+ bge cr1,L(nzCacheAligned128)
+ dcbtst 0,rMEMP
+ b L(cacheAligned1)
+ .align 5
+/* Storing a zero "c" value. We are aligned at a sector (32-byte)
+ boundary but may not be at cache line (128-byte) boundary. If the
+ remaining length spans a full cache line we can use the Data cache
+ block zero instruction. */
+L(zloopstart):
+/* memset in 32-byte chunks until we get to a cache line boundary.
+ If rLEN is less then the distance to the next cache-line boundary use
+ cacheAligned1 code to finish the tail. */
+ cmpldi cr1,rLEN,128
+ beq L(medium)
+L(getCacheAligned):
+ andi. rTMP,rMEMP,127
+ nop
+ blt cr1,L(cacheAligned1)
+ addi rMEMP3,rMEMP,32
+ beq L(cacheAligned)
+ addi rLEN,rLEN,-32
+ std rCHR,0(rMEMP)
+ std rCHR,8(rMEMP)
+ std rCHR,16(rMEMP)
+ addi rMEMP,rMEMP,32
+ andi. rTMP,rMEMP3,127
+ std rCHR,-8(rMEMP3)
+L(getCacheAligned2):
+ beq L(cacheAligned)
+ addi rLEN,rLEN,-32
+ std rCHR,0(rMEMP3)
+ std rCHR,8(rMEMP3)
+ addi rMEMP,rMEMP,32
+ andi. rTMP,rMEMP,127
+ std rCHR,16(rMEMP3)
+ std rCHR,24(rMEMP3)
+L(getCacheAligned3):
+ beq L(cacheAligned)
+ addi rLEN,rLEN,-32
+ std rCHR,32(rMEMP3)
+ addi rMEMP,rMEMP,32
+ cmpldi cr1,rLEN,128
+ std rCHR,40(rMEMP3)
+ cmpldi cr6,rLEN,256
+ li rMEMP2,128
+ std rCHR,48(rMEMP3)
+ std rCHR,56(rMEMP3)
+ blt cr1,L(cacheAligned1)
+ blt cr6,L(cacheAligned128)
+ b L(cacheAlignedx)
+
+/* Now we are aligned to the cache line and can use dcbz. */
+ .align 5
+L(cacheAligned):
+ cmpldi cr1,rLEN,128
+ cmpldi cr6,rLEN,256
+ blt cr1,L(cacheAligned1)
+ li rMEMP2,128
+L(cacheAlignedx):
+ cmpldi cr5,rLEN,640
+ blt cr6,L(cacheAligned128)
+ bgt cr5,L(cacheAligned512)
+ cmpldi cr6,rLEN,512
+ dcbz 0,rMEMP
+ cmpldi cr1,rLEN,384
+ dcbz rMEMP2,rMEMP
+ addi rMEMP,rMEMP,256
+ addi rLEN,rLEN,-256
+ blt cr1,L(cacheAligned1)
+ blt cr6,L(cacheAligned128)
+ b L(cacheAligned256)
+ .align 5
+/* A simple loop for the longer (>640 bytes) lengths. This form limits
+ the branch miss-predicted to exactly 1 at loop exit.*/
+L(cacheAligned512):
+ cmpli cr1,rLEN,128
+ blt cr1,L(cacheAligned1)
+ dcbz 0,rMEMP
+ addi rLEN,rLEN,-128
+ addi rMEMP,rMEMP,128
+ b L(cacheAligned512)
+ .align 5
+L(cacheAligned256):
+
+ cmpldi cr6,rLEN,512
+
+ dcbz 0,rMEMP
+ cmpldi cr1,rLEN,384
+ dcbz rMEMP2,rMEMP
+ addi rMEMP,rMEMP,256
+ addi rLEN,rLEN,-256
+
+ bge cr6,L(cacheAligned256)
+
+ blt cr1,L(cacheAligned1)
+ .align 4
+L(cacheAligned128):
+ dcbz 0,rMEMP
+ addi rMEMP,rMEMP,128
+ addi rLEN,rLEN,-128
+ nop
+L(cacheAligned1):
+ cmpldi cr1,rLEN,32
+ blt cr1,L(handletail32)
+ addi rMEMP3,rMEMP,32
+ addi rLEN,rLEN,-32
+ std rCHR,0(rMEMP)
+ std rCHR,8(rMEMP)
+ std rCHR,16(rMEMP)
+ addi rMEMP,rMEMP,32
+ cmpldi cr1,rLEN,32
+ std rCHR,-8(rMEMP3)
+L(cacheAligned2):
+ blt cr1,L(handletail32)
+ addi rLEN,rLEN,-32
+ std rCHR,0(rMEMP3)
+ std rCHR,8(rMEMP3)
+ addi rMEMP,rMEMP,32
+ cmpldi cr1,rLEN,32
+ std rCHR,16(rMEMP3)
+ std rCHR,24(rMEMP3)
+ nop
+L(cacheAligned3):
+ blt cr1,L(handletail32)
+ addi rMEMP,rMEMP,32
+ addi rLEN,rLEN,-32
+ std rCHR,32(rMEMP3)
+ std rCHR,40(rMEMP3)
+ std rCHR,48(rMEMP3)
+ std rCHR,56(rMEMP3)
+
+/* We are here because the length or remainder (rLEN) is less than the
+ cache line/sector size and does not justify aggressive loop unrolling.
+ So set up the preconditions for L(medium) and go there. */
+ .align 3
+L(handletail32):
+ cmpldi cr1,rLEN,0
+ beqlr cr1
+ b L(medium)
+
+ .align 5
+L(small):
+/* Memset of 8 bytes or less. */
+ cmpldi cr6, rLEN, 4
+ cmpldi cr5, rLEN, 1
+ ble cr6,L(le4)
+ subi rLEN, rLEN, 4
+ stb rCHR,0(rMEMP)
+ stb rCHR,1(rMEMP)
+ stb rCHR,2(rMEMP)
+ stb rCHR,3(rMEMP)
+ addi rMEMP,rMEMP, 4
+ cmpldi cr5, rLEN, 1
+L(le4):
+ cmpldi cr1, rLEN, 3
+ bltlr cr5
+ stb rCHR, 0(rMEMP)
+ beqlr cr5
+ stb rCHR, 1(rMEMP)
+ bltlr cr1
+ stb rCHR, 2(rMEMP)
+ beqlr cr1
+ stb rCHR, 3(rMEMP)
+ blr
+
+/* Memset of 0-31 bytes. */
+ .align 5
+L(medium):
+ insrdi rCHR,rCHR,32,0 /* Replicate word to double word. */
+ cmpldi cr1, rLEN, 16
+L(medium_tail2):
+ add rMEMP, rMEMP, rLEN
+L(medium_tail):
+ bt- 31, L(medium_31t)
+ bt- 30, L(medium_30t)
+L(medium_30f):
+ bt 29, L(medium_29t)
+L(medium_29f):
+ bge cr1, L(medium_27t)
+ bflr 28
+ std rCHR, -8(rMEMP)
+ blr
+
+L(medium_31t):
+ stbu rCHR, -1(rMEMP)
+ bf- 30, L(medium_30f)
+L(medium_30t):
+ sthu rCHR, -2(rMEMP)
+ bf- 29, L(medium_29f)
+L(medium_29t):
+ stwu rCHR, -4(rMEMP)
+ blt cr1, L(medium_27f)
+L(medium_27t):
+ std rCHR, -8(rMEMP)
+ stdu rCHR, -16(rMEMP)
+L(medium_27f):
+ bflr 28
+L(medium_28t):
+ std rCHR, -8(rMEMP)
+ blr
+END_GEN_TB (BP_SYM (memset),TB_TOCLESS)
+libc_hidden_builtin_def (memset)
+
+/* Copied from bzero.S to prevent the linker from inserting a stub
+ between bzero and memset. */
+ENTRY (BP_SYM (__bzero))
+ CALL_MCOUNT 3
+#if __BOUNDED_POINTERS__
+ mr r6,r4
+ li r5,0
+ mr r4,r3
+ /* Tell memset that we don't want a return value. */
+ li r3,0
+ b L(_memset)
+#else
+ mr r5,r4
+ li r4,0
+ b L(_memset)
+#endif
+END_GEN_TB (BP_SYM (__bzero),TB_TOCLESS)
+
+weak_alias (BP_SYM (__bzero), BP_SYM (bzero))
diff --git a/sysdeps/powerpc/powerpc64/power6/wordcopy.c b/sysdeps/powerpc/powerpc64/power6/wordcopy.c
new file mode 100644
index 0000000000..faddd945b3
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power6/wordcopy.c
@@ -0,0 +1,410 @@
+/* _memcopy.c -- subroutines for memory copy functions.
+ Copyright (C) 1991, 1996 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Torbjorn Granlund (tege@sics.se).
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */
+
+#include <stddef.h>
+#include <memcopy.h>
+
+/* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to
+ block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
+ Both SRCP and DSTP should be aligned for memory operations on `op_t's. */
+
+void
+_wordcopy_fwd_aligned (dstp, srcp, len)
+ long int dstp;
+ long int srcp;
+ size_t len;
+{
+ op_t a0, a1;
+
+ if (len & 1)
+ {
+ ((op_t *) dstp)[0] = ((op_t *) srcp)[0];
+
+ if (len == 1)
+ return;
+ srcp += OPSIZ;
+ dstp += OPSIZ;
+ len -= 1;
+ }
+
+ do
+ {
+ a0 = ((op_t *) srcp)[0];
+ a1 = ((op_t *) srcp)[1];
+ ((op_t *) dstp)[0] = a0;
+ ((op_t *) dstp)[1] = a1;
+
+ srcp += 2 * OPSIZ;
+ dstp += 2 * OPSIZ;
+ len -= 2;
+ }
+ while (len != 0);
+}
+
+/* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
+ block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
+ DSTP should be aligned for memory operations on `op_t's, but SRCP must
+ *not* be aligned. */
+
+void
+_wordcopy_fwd_dest_aligned (dstp, srcp, len)
+ long int dstp;
+ long int srcp;
+ size_t len;
+{
+ op_t a0, a1, a2;
+ int sh_1, sh_2;
+ int align;
+
+ /* Calculate how to shift a word read at the memory operation
+ aligned srcp to make it aligned for copy. */
+
+ align = srcp % OPSIZ;
+ sh_1 = 8 * (srcp % OPSIZ);
+ sh_2 = 8 * OPSIZ - sh_1;
+
+ /* Make SRCP aligned by rounding it down to the beginning of the `op_t'
+ it points in the middle of. */
+ srcp &= -OPSIZ;
+ a0 = ((op_t *) srcp)[0];
+
+ if (len & 1)
+ {
+ a1 = ((op_t *) srcp)[1];
+ ((op_t *) dstp)[0] = MERGE (a0, sh_1, a1, sh_2);
+
+ if (len == 1)
+ return;
+
+ a0 = a1;
+ srcp += OPSIZ;
+ dstp += OPSIZ;
+ len -= 1;
+ }
+
+ switch (align)
+ {
+ case 1:
+ do
+ {
+ a1 = ((op_t *) srcp)[1];
+ a2 = ((op_t *) srcp)[2];
+ ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (64-8));
+ ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (64-8));
+ a0 = a2;
+
+ srcp += 2 * OPSIZ;
+ dstp += 2 * OPSIZ;
+ len -= 2;
+ }
+ while (len != 0);
+ break;
+ case 2:
+ do
+ {
+ a1 = ((op_t *) srcp)[1];
+ a2 = ((op_t *) srcp)[2];
+ ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (64-16));
+ ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (64-16));
+ a0 = a2;
+
+ srcp += 2 * OPSIZ;
+ dstp += 2 * OPSIZ;
+ len -= 2;
+ }
+ while (len != 0);
+ break;
+ case 3:
+ do
+ {
+ a1 = ((op_t *) srcp)[1];
+ a2 = ((op_t *) srcp)[2];
+ ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (64-24));
+ ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (64-24));
+ a0 = a2;
+
+ srcp += 2 * OPSIZ;
+ dstp += 2 * OPSIZ;
+ len -= 2;
+ }
+ while (len != 0);
+ break;
+ case 4:
+ do
+ {
+ a1 = ((op_t *) srcp)[1];
+ a2 = ((op_t *) srcp)[2];
+ ((op_t *) dstp)[0] = MERGE (a0, 32, a1, (64-32));
+ ((op_t *) dstp)[1] = MERGE (a1, 32, a2, (64-32));
+ a0 = a2;
+
+ srcp += 2 * OPSIZ;
+ dstp += 2 * OPSIZ;
+ len -= 2;
+ }
+ while (len != 0);
+ break;
+ case 5:
+ do
+ {
+ a1 = ((op_t *) srcp)[1];
+ a2 = ((op_t *) srcp)[2];
+ ((op_t *) dstp)[0] = MERGE (a0, 40, a1, (64-40));
+ ((op_t *) dstp)[1] = MERGE (a1, 40, a2, (64-40));
+ a0 = a2;
+
+ srcp += 2 * OPSIZ;
+ dstp += 2 * OPSIZ;
+ len -= 2;
+ }
+ while (len != 0);
+ break;
+ case 6:
+ do
+ {
+ a1 = ((op_t *) srcp)[1];
+ a2 = ((op_t *) srcp)[2];
+ ((op_t *) dstp)[0] = MERGE (a0, 48, a1, (64-48));
+ ((op_t *) dstp)[1] = MERGE (a1, 48, a2, (64-48));
+ a0 = a2;
+
+ srcp += 2 * OPSIZ;
+ dstp += 2 * OPSIZ;
+ len -= 2;
+ }
+ while (len != 0);
+ break;
+ case 7:
+ do
+ {
+ a1 = ((op_t *) srcp)[1];
+ a2 = ((op_t *) srcp)[2];
+ ((op_t *) dstp)[0] = MERGE (a0, 56, a1, (64-56));
+ ((op_t *) dstp)[1] = MERGE (a1, 56, a2, (64-56));
+ a0 = a2;
+
+ srcp += 2 * OPSIZ;
+ dstp += 2 * OPSIZ;
+ len -= 2;
+ }
+ while (len != 0);
+ break;
+ }
+
+}
+
+/* _wordcopy_bwd_aligned -- Copy block finishing right before
+ SRCP to block finishing right before DSTP with LEN `op_t' words
+ (not LEN bytes!). Both SRCP and DSTP should be aligned for memory
+ operations on `op_t's. */
+
+void
+_wordcopy_bwd_aligned (dstp, srcp, len)
+ long int dstp;
+ long int srcp;
+ size_t len;
+{
+ op_t a0, a1;
+
+ if (len & 1)
+ {
+ srcp -= OPSIZ;
+ dstp -= OPSIZ;
+ ((op_t *) dstp)[0] = ((op_t *) srcp)[0];
+
+ if (len == 1)
+ return;
+ len -= 1;
+ }
+
+ do
+ {
+ srcp -= 2 * OPSIZ;
+ dstp -= 2 * OPSIZ;
+
+ a1 = ((op_t *) srcp)[1];
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[1] = a1;
+ ((op_t *) dstp)[0] = a0;
+
+ len -= 2;
+ }
+ while (len != 0);
+}
+
+/* _wordcopy_bwd_dest_aligned -- Copy block finishing right
+ before SRCP to block finishing right before DSTP with LEN `op_t'
+ words (not LEN bytes!). DSTP should be aligned for memory
+ operations on `op_t', but SRCP must *not* be aligned. */
+
+void
+_wordcopy_bwd_dest_aligned (dstp, srcp, len)
+ long int dstp;
+ long int srcp;
+ size_t len;
+{
+ op_t a0, a1, a2;
+ int sh_1, sh_2;
+ int align;
+
+ /* Calculate how to shift a word read at the memory operation
+ aligned srcp to make it aligned for copy. */
+
+ align = srcp % OPSIZ;
+ sh_1 = 8 * (srcp % OPSIZ);
+ sh_2 = 8 * OPSIZ - sh_1;
+
+ /* Make srcp aligned by rounding it down to the beginning of the op_t
+ it points in the middle of. */
+ srcp &= -OPSIZ;
+ a2 = ((op_t *) srcp)[0];
+
+ if (len & 1)
+ {
+ srcp -= OPSIZ;
+ dstp -= OPSIZ;
+ a1 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = MERGE (a1, sh_1, a2, sh_2);
+
+ if (len == 1)
+ return;
+
+ a2 = a1;
+ len -= 1;
+ }
+
+ switch (align)
+ {
+ case 1:
+ do
+ {
+ srcp -= 2 * OPSIZ;
+ dstp -= 2 * OPSIZ;
+
+ a1 = ((op_t *) srcp)[1];
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (64-8));
+ ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (64-8));
+ a2 = a0;
+
+ len -= 2;
+ }
+ while (len != 0);
+ break;
+ case 2:
+ do
+ {
+ srcp -= 2 * OPSIZ;
+ dstp -= 2 * OPSIZ;
+
+ a1 = ((op_t *) srcp)[1];
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (64-16));
+ ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (64-16));
+ a2 = a0;
+
+ len -= 2;
+ }
+ while (len != 0);
+ break;
+ case 3:
+ do
+ {
+ srcp -= 2 * OPSIZ;
+ dstp -= 2 * OPSIZ;
+
+ a1 = ((op_t *) srcp)[1];
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (64-24));
+ ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (64-24));
+ a2 = a0;
+
+ len -= 2;
+ }
+ while (len != 0);
+ break;
+ case 4:
+ do
+ {
+ srcp -= 2 * OPSIZ;
+ dstp -= 2 * OPSIZ;
+
+ a1 = ((op_t *) srcp)[1];
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[1] = MERGE (a1, 32, a2, (64-32));
+ ((op_t *) dstp)[0] = MERGE (a0, 32, a1, (64-32));
+ a2 = a0;
+
+ len -= 2;
+ }
+ while (len != 0);
+ break;
+ case 5:
+ do
+ {
+ srcp -= 2 * OPSIZ;
+ dstp -= 2 * OPSIZ;
+
+ a1 = ((op_t *) srcp)[1];
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[1] = MERGE (a1, 40, a2, (64-40));
+ ((op_t *) dstp)[0] = MERGE (a0, 40, a1, (64-40));
+ a2 = a0;
+
+ len -= 2;
+ }
+ while (len != 0);
+ break;
+ case 6:
+ do
+ {
+ srcp -= 2 * OPSIZ;
+ dstp -= 2 * OPSIZ;
+
+ a1 = ((op_t *) srcp)[1];
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[1] = MERGE (a1, 48, a2, (64-48));
+ ((op_t *) dstp)[0] = MERGE (a0, 48, a1, (64-48));
+ a2 = a0;
+
+ len -= 2;
+ }
+ while (len != 0);
+ break;
+ case 7:
+ do
+ {
+ srcp -= 2 * OPSIZ;
+ dstp -= 2 * OPSIZ;
+
+ a1 = ((op_t *) srcp)[1];
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[1] = MERGE (a1, 56, a2, (64-56));
+ ((op_t *) dstp)[0] = MERGE (a0, 56, a1, (64-56));
+ a2 = a0;
+
+ len -= 2;
+ }
+ while (len != 0);
+ break;
+ }
+}
diff --git a/sysdeps/powerpc/powerpc64/power6x/Implies b/sysdeps/powerpc/powerpc64/power6x/Implies
new file mode 100644
index 0000000000..d993b0cd73
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power6x/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc64/power6
+powerpc/powerpc64/power5+
diff --git a/sysdeps/powerpc/powerpc64/power6x/fpu/Implies b/sysdeps/powerpc/powerpc64/power6x/fpu/Implies
new file mode 100644
index 0000000000..84510991b2
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power6x/fpu/Implies
@@ -0,0 +1,3 @@
+powerpc/powerpc64/power6/fpu
+powerpc/powerpc64/power5+/fpu
+powerpc/powerpc64/power4/fpu
diff --git a/sysdeps/powerpc/powerpc64/power6x/fpu/s_llrint.S b/sysdeps/powerpc/powerpc64/power6x/fpu/s_llrint.S
new file mode 100644
index 0000000000..4a4a5f8855
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power6x/fpu/s_llrint.S
@@ -0,0 +1,45 @@
+/* Round double to long int. POWER6x PowerPC64 version.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+ .machine "power6"
+/* long long int[r3] __llrint (double x[fp1]) */
+ENTRY (__llrint)
+ CALL_MCOUNT 0
+ fctid fp13,fp1
+ mftgpr r3,fp13
+ blr
+ END (__llrint)
+
+strong_alias (__llrint, __lrint)
+weak_alias (__llrint, llrint)
+weak_alias (__lrint, lrint)
+
+#ifdef NO_LONG_DOUBLE
+strong_alias (__llrint, __llrintl)
+weak_alias (__llrint, llrintl)
+strong_alias (__lrint, __lrintl)
+weak_alias (__lrint, lrintl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __llrint, llrintl, GLIBC_2_1)
+compat_symbol (libm, __lrint, lrintl, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/powerpc64/power6x/fpu/s_llround.S b/sysdeps/powerpc/powerpc64/power6x/fpu/s_llround.S
new file mode 100644
index 0000000000..149feaf965
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power6x/fpu/s_llround.S
@@ -0,0 +1,55 @@
+/* llround function. POWER6x PowerPC64 version.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+/* long long [r3] llround (float x [fp1])
+ IEEE 1003.1 llround function. IEEE specifies "round to the nearest
+ integer value, rounding halfway cases away from zero, regardless of
+ the current rounding mode." However PowerPC Architecture defines
+ "round to Nearest" as "Choose the best approximation. In case of a
+ tie, choose the one that is even (least significant bit o).".
+ So we pre-round using the V2.02 Floating Round to Integer Nearest
+ instruction before we use Floating Convert to Integer Word with
+ round to zero instruction. */
+
+ .machine "power6"
+ENTRY (__llround)
+ CALL_MCOUNT 0
+ frin fp2,fp1 /* Round to nearest +-0.5. */
+ fctidz fp3,fp2 /* Convert To Integer DW round toward 0. */
+ mftgpr r3,fp3 /* Transfer integer to R3. */
+ blr
+ END (__llround)
+
+strong_alias (__llround, __lround)
+weak_alias (__llround, llround)
+weak_alias (__lround, lround)
+
+#ifdef NO_LONG_DOUBLE
+weak_alias (__llround, llroundl)
+strong_alias (__llround, __llroundl)
+weak_alias (__lround, lroundl)
+strong_alias (__lround, __lroundl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __llround, llroundl, GLIBC_2_1)
+compat_symbol (libm, __lround, lroundl, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/sched_cpucount.c b/sysdeps/powerpc/sched_cpucount.c
new file mode 100644
index 0000000000..f151eed231
--- /dev/null
+++ b/sysdeps/powerpc/sched_cpucount.c
@@ -0,0 +1,23 @@
+/* Copyright (C) 2007 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifdef _ARCH_PWR5
+# define POPCNT(l) __builtin_popcountl (l)
+#endif
+
+#include <posix/sched_cpucount.c>
diff --git a/sysdeps/s390/dl-procinfo.c b/sysdeps/s390/dl-procinfo.c
new file mode 100644
index 0000000000..32c6aef951
--- /dev/null
+++ b/sysdeps/s390/dl-procinfo.c
@@ -0,0 +1,80 @@
+/* Data for s390 version of processor capability information.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Martin Schwidefsky <schwidefsky@de.ibm.com>, 2006.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/* This information must be kept in sync with the _DL_HWCAP_COUNT and
+ _DL_PLATFORM_COUNT definitions in procinfo.h.
+
+ If anything should be added here check whether the size of each string
+ is still ok with the given array size.
+
+ All the #ifdefs in the definitions are quite irritating but
+ necessary if we want to avoid duplicating the information. There
+ are three different modes:
+
+ - PROCINFO_DECL is defined. This means we are only interested in
+ declarations.
+
+ - PROCINFO_DECL is not defined:
+
+ + if SHARED is defined the file is included in an array
+ initializer. The .element = { ... } syntax is needed.
+
+ + if SHARED is not defined a normal array initialization is
+ needed.
+ */
+
+#ifndef PROCINFO_CLASS
+# define PROCINFO_CLASS
+#endif
+
+#if !defined PROCINFO_DECL && defined SHARED
+ ._dl_s390_cap_flags
+#else
+PROCINFO_CLASS const char _dl_s390_cap_flags[7][6]
+#endif
+#ifndef PROCINFO_DECL
+= {
+ "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp"
+ }
+#endif
+#if !defined SHARED || defined PROCINFO_DECL
+;
+#else
+,
+#endif
+
+#if !defined PROCINFO_DECL && defined SHARED
+ ._dl_s390_platforms
+#else
+PROCINFO_CLASS const char _dl_s390_platforms[4][7]
+#endif
+#ifndef PROCINFO_DECL
+= {
+ "g5", "z900", "z990", "z9-109"
+ }
+#endif
+#if !defined SHARED || defined PROCINFO_DECL
+;
+#else
+,
+#endif
+
+#undef PROCINFO_DECL
+#undef PROCINFO_CLASS
diff --git a/sysdeps/s390/dl-procinfo.h b/sysdeps/s390/dl-procinfo.h
new file mode 100644
index 0000000000..178d7cc017
--- /dev/null
+++ b/sysdeps/s390/dl-procinfo.h
@@ -0,0 +1,99 @@
+/* s390 version of processor capability information handling macros.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Martin Schwidefsky <schwidefsky@de.ibm.com>, 2006.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef _DL_PROCINFO_H
+#define _DL_PROCINFO_H 1
+#include <ldsodefs.h>
+
+#define _DL_HWCAP_COUNT 7
+
+#define _DL_PLATFORMS_COUNT 4
+
+/* The kernel provides up to 32 capability bits with elf_hwcap. */
+#define _DL_FIRST_PLATFORM 32
+/* Mask to filter out platforms. */
+#define _DL_HWCAP_PLATFORM (((1ULL << _DL_PLATFORMS_COUNT) - 1) \
+ << _DL_FIRST_PLATFORM)
+
+/* Hardware capablity bit numbers are derived directly from the
+ facility indications as stored by the "store facility list" (STFL)
+ instruction. */
+
+enum
+{
+ HWCAP_S390_ESAN3 = 1 << 0,
+ HWCAP_S390_ZARCH = 1 << 1,
+ HWCAP_S390_STFLE = 1 << 2,
+ HWCAP_S390_MSA = 1 << 3,
+ HWCAP_S390_LDISP = 1 << 4,
+ HWCAP_S390_EIMM = 1 << 5,
+ HWCAP_S390_DFP = 1 << 6,
+};
+
+#define HWCAP_IMPORTANT (HWCAP_S390_ZARCH | HWCAP_S390_LDISP \
+ | HWCAP_S390_EIMM | HWCAP_S390_DFP)
+
+/* We cannot provide a general printing function. */
+#define _dl_procinfo(word) -1
+
+static inline const char *
+__attribute__ ((unused))
+_dl_hwcap_string (int idx)
+{
+ return GLRO(dl_s390_cap_flags)[idx];
+};
+
+static inline const char *
+__attribute__ ((unused))
+_dl_platform_string (int idx)
+{
+ return GLRO(dl_s390_platforms)[idx - _DL_FIRST_PLATFORM];
+};
+
+static inline int
+__attribute__ ((unused, always_inline))
+_dl_string_hwcap (const char *str)
+{
+ int i;
+
+ for (i = 0; i < _DL_HWCAP_COUNT; i++)
+ {
+ if (strcmp (str, GLRO(dl_s390_cap_flags)[i]) == 0)
+ return i;
+ }
+ return -1;
+};
+
+static inline int
+__attribute__ ((unused, always_inline))
+_dl_string_platform (const char *str)
+{
+ int i;
+
+ if (str != NULL)
+ for (i = 0; i < _DL_PLATFORMS_COUNT; ++i)
+ {
+ if (strcmp (str, GLRO(dl_s390_platforms)[i]) == 0)
+ return _DL_FIRST_PLATFORM + i;
+ }
+ return -1;
+};
+
+#endif /* dl-procinfo.h */
diff --git a/sysdeps/sh/bsd-_setjmp.S b/sysdeps/sh/bsd-_setjmp.S
index 2811fcfd1b..dbfd95259b 100644
--- a/sysdeps/sh/bsd-_setjmp.S
+++ b/sysdeps/sh/bsd-_setjmp.S
@@ -1,5 +1,5 @@
/* BSD `_setjmp' entry point to `sigsetjmp (..., 0)'. SH version.
- Copyright (C) 1999, 2000, 2002 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2000, 2002, 2007 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -36,14 +36,14 @@ ENTRY (_setjmp)
mov.l 3f, r1
mov.l @(r0,r1), r1
jmp @r1
- mov #0, r0
+ mov #0, r5
.align 2
3:
.long C_SYMBOL_NAME(__sigsetjmp@GOT)
#else
mov.l 1f, r1
jmp @r1
- mov #0, r0
+ mov #0, r5
.align 2
1:
.long C_SYMBOL_NAME(__sigsetjmp)
diff --git a/sysdeps/sh/bsd-setjmp.S b/sysdeps/sh/bsd-setjmp.S
index e076284428..de226e1284 100644
--- a/sysdeps/sh/bsd-setjmp.S
+++ b/sysdeps/sh/bsd-setjmp.S
@@ -1,5 +1,5 @@
/* BSD `setjmp' entry point to `sigsetjmp (..., 1)'. SH version.
- Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2000, 2007 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -36,14 +36,14 @@ ENTRY (setjmp)
mov.l 3f, r1
mov.l @(r0,r1), r1
jmp @r1
- mov #1, r0
+ mov #1, r5
.align 2
3:
.long C_SYMBOL_NAME(__sigsetjmp@GOT)
#else
mov.l 1f, r1
jmp @r1
- mov #1, r0
+ mov #1, r5
.align 2
1:
.long C_SYMBOL_NAME(__sigsetjmp)
diff --git a/sysdeps/unix/clock_gettime.c b/sysdeps/unix/clock_gettime.c
index f698f0151b..fbaaf301e4 100644
--- a/sysdeps/unix/clock_gettime.c
+++ b/sysdeps/unix/clock_gettime.c
@@ -1,5 +1,5 @@
/* clock_gettime -- Get the current time from a POSIX clockid_t. Unix version.
- Copyright (C) 1999-2004, 2005 Free Software Foundation, Inc.
+ Copyright (C) 1999-2004, 2005, 2007 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -93,7 +93,6 @@ int
clock_gettime (clockid_t clock_id, struct timespec *tp)
{
int retval = -1;
- struct timeval tv;
switch (clock_id)
{
@@ -103,9 +102,12 @@ clock_gettime (clockid_t clock_id, struct timespec *tp)
#ifndef HANDLED_REALTIME
case CLOCK_REALTIME:
- retval = gettimeofday (&tv, NULL);
- if (retval == 0)
- TIMEVAL_TO_TIMESPEC (&tv, tp);
+ {
+ struct timeval tv;
+ retval = gettimeofday (&tv, NULL);
+ if (retval == 0)
+ TIMEVAL_TO_TIMESPEC (&tv, tp);
+ }
break;
#endif
diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile
index f59e340f8e..119e37b345 100644
--- a/sysdeps/unix/sysv/linux/Makefile
+++ b/sysdeps/unix/sysv/linux/Makefile
@@ -137,7 +137,7 @@ endif
ifeq ($(subdir),io)
sysdep_routines += xstatconv internal_statvfs internal_statvfs64 \
- sync_file_range
+ sync_file_range open_2
endif
ifeq ($(subdir),elf)
diff --git a/sysdeps/unix/sysv/linux/bits/sched.h b/sysdeps/unix/sysv/linux/bits/sched.h
index 31db66f8ac..5eaa2fe528 100644
--- a/sysdeps/unix/sysv/linux/bits/sched.h
+++ b/sysdeps/unix/sysv/linux/bits/sched.h
@@ -104,7 +104,7 @@ struct __sched_param
# define __CPU_SETSIZE 1024
# define __NCPUBITS (8 * sizeof (__cpu_mask))
-/* Type for array elements in 'cpu_set'. */
+/* Type for array elements in 'cpu_set_t'. */
typedef unsigned long int __cpu_mask;
/* Basic access functions. */
@@ -118,20 +118,72 @@ typedef struct
} cpu_set_t;
/* Access functions for CPU masks. */
-# define __CPU_ZERO(cpusetp) \
+# if __GNUC_PREREQ (2, 91)
+# define __CPU_ZERO_S(setsize, cpusetp) \
+ do __builtin_memset (cpusetp, '\0', setsize); while (0)
+# else
+# define __CPU_ZERO_S(setsize, cpusetp) \
do { \
- unsigned int __i; \
+ size_t __i; \
+ size_t __imax = (setsize) / sizeof (__cpu_mask); \
cpu_set_t *__arr = (cpusetp); \
- for (__i = 0; __i < sizeof (cpu_set_t) / sizeof (__cpu_mask); ++__i) \
+ for (__i = 0; __i < __imax; ++__i) \
__arr->__bits[__i] = 0; \
} while (0)
-# define __CPU_SET(cpu, cpusetp) \
- ((cpusetp)->__bits[__CPUELT (cpu)] |= __CPUMASK (cpu))
-# define __CPU_CLR(cpu, cpusetp) \
- ((cpusetp)->__bits[__CPUELT (cpu)] &= ~__CPUMASK (cpu))
-# define __CPU_ISSET(cpu, cpusetp) \
- (((cpusetp)->__bits[__CPUELT (cpu)] & __CPUMASK (cpu)) != 0)
-extern int __sched_cpucount (size_t __setsize, cpu_set_t *__setp) __THROW;
-# define __CPU_COUNT(cpusetp) \
- __sched_cpucount (sizeof (cpu_set_t), cpusetp)
+# endif
+# define __CPU_SET_S(cpu, setsize, cpusetp) \
+ ({ size_t __cpu = (cpu); \
+ __cpu < 8 * (setsize) \
+ ? ((cpusetp)->__bits[__CPUELT (__cpu)] |= __CPUMASK (__cpu)) : 0; })
+# define __CPU_CLR_S(cpu, setsize, cpusetp) \
+ ({ size_t __cpu = (cpu); \
+ __cpu < 8 * (setsize) \
+ ? ((cpusetp)->__bits[__CPUELT (__cpu)] &= ~__CPUMASK (__cpu)) : 0; })
+# define __CPU_ISSET_S(cpu, setsize, cpusetp) \
+ ({ size_t __cpu = (cpu); \
+ __cpu < 8 * (setsize) \
+ ? (((cpusetp)->__bits[__CPUELT (__cpu)] & __CPUMASK (__cpu))) != 0 : 0; })
+
+# define __CPU_COUNT_S(setsize, cpusetp) \
+ __sched_cpucount (setsize, cpusetp)
+
+# if __GNUC_PREREQ (2, 91)
+# define __CPU_EQUAL_S(setsize, cpusetp1, cpusetp2) \
+ (__builtin_memcmp (cpusetp1, cpusetp2, setsize) == 0)
+# else
+# define __CPU_EQUAL_S(setsize, cpusetp1, cpusetp2) \
+ ({ cpu_set_t *__arr1 = (cpusetp1); \
+ cpu_set_t *__arr2 = (cpusetp2); \
+ size_t __imax = (setsize) / sizeof (__cpu_mask); \
+ size_t __i; \
+ for (__i = 0; __i < __imax; ++__i) \
+ if (__arr1->__bits[__i] != __arr2->__bits[__i]) \
+ break; \
+ __i == __imax; })
+# endif
+
+# define __CPU_OP_S(setsize, destset, srcset1, srcset2, op) \
+ ({ cpu_set_t *__dest = (destset); \
+ cpu_set_t *__arr1 = (srcset1); \
+ cpu_set_t *__arr2 = (srcset2); \
+ size_t __imax = (setsize) / sizeof (__cpu_mask); \
+ size_t __i; \
+ for (__i = 0; __i < __imax; ++__i) \
+ __dest->__bits[__i] = __arr1->__bits[__i] op __arr2->__bits[__i]; \
+ __dest; })
+
+# define __CPU_ALLOC_SIZE(count) \
+ ((((count) + __NCPUBITS - 1) / __NCPUBITS) * 8)
+# define __CPU_ALLOC(count) __sched_cpualloc (count)
+# define __CPU_FREE(cpuset) __sched_cpufree (cpuset)
+
+__BEGIN_DECLS
+
+extern int __sched_cpucount (size_t __setsize, const cpu_set_t *__setp)
+ __THROW;
+extern cpu_set_t *__sched_cpualloc (size_t __count) __THROW __wur;
+extern void __sched_cpufree (cpu_set_t *__set) __THROW;
+
+__END_DECLS
+
#endif
diff --git a/sysdeps/unix/sysv/linux/bits/socket.h b/sysdeps/unix/sysv/linux/bits/socket.h
index 377f589bbd..89a9106b2e 100644
--- a/sysdeps/unix/sysv/linux/bits/socket.h
+++ b/sysdeps/unix/sysv/linux/bits/socket.h
@@ -63,7 +63,7 @@ enum __socket_type
/* Protocol families. */
#define PF_UNSPEC 0 /* Unspecified. */
#define PF_LOCAL 1 /* Local to host (pipes and file-domain). */
-#define PF_UNIX PF_LOCAL /* Old BSD name for PF_LOCAL. */
+#define PF_UNIX PF_LOCAL /* POSIX name for PF_LOCAL. */
#define PF_FILE PF_LOCAL /* Another non-standard name for PF_LOCAL. */
#define PF_INET 2 /* IP protocol family. */
#define PF_AX25 3 /* Amateur Radio AX.25. */
@@ -90,7 +90,9 @@ enum __socket_type
#define PF_PPPOX 24 /* PPPoX sockets. */
#define PF_WANPIPE 25 /* Wanpipe API sockets. */
#define PF_BLUETOOTH 31 /* Bluetooth sockets. */
-#define PF_MAX 32 /* For now.. */
+#define PF_IUCV 32 /* IUCV sockets. */
+#define PF_RXRPC 33 /* RxRPC sockets. */
+#define PF_MAX 34 /* For now.. */
/* Address families. */
#define AF_UNSPEC PF_UNSPEC
@@ -122,6 +124,8 @@ enum __socket_type
#define AF_PPPOX PF_PPPOX
#define AF_WANPIPE PF_WANPIPE
#define AF_BLUETOOTH PF_BLUETOOTH
+#define AF_IUCV PF_IUCV
+#define AF_RXRPC PF_RXRPC
#define AF_MAX PF_MAX
/* Socket level values. Others are defined in the appropriate headers.
@@ -206,8 +210,13 @@ enum
#define MSG_ERRQUEUE MSG_ERRQUEUE
MSG_NOSIGNAL = 0x4000, /* Do not generate SIGPIPE. */
#define MSG_NOSIGNAL MSG_NOSIGNAL
- MSG_MORE = 0x8000 /* Sender will send more. */
+ MSG_MORE = 0x8000, /* Sender will send more. */
#define MSG_MORE MSG_MORE
+
+ MSG_CMSG_CLOEXEC = 0x40000000 /* Set close_on_exit for file
+ descriptor received through
+ SCM_RIGHTS. */
+#define MSG_CMSG_CLOEXEC MSG_CMSG_CLOEXEC
};
diff --git a/sysdeps/unix/sysv/linux/check_pf.c b/sysdeps/unix/sysv/linux/check_pf.c
index 3312da3af8..fbe805bf3b 100644
--- a/sysdeps/unix/sysv/linux/check_pf.c
+++ b/sysdeps/unix/sysv/linux/check_pf.c
@@ -136,40 +136,72 @@ make_request (int fd, pid_t pid, bool *seen_ipv4, bool *seen_ipv6,
if (nlmh->nlmsg_type == RTM_NEWADDR)
{
struct ifaddrmsg *ifam = (struct ifaddrmsg *) NLMSG_DATA (nlmh);
+ struct rtattr *rta = IFA_RTA (ifam);
+ size_t len = nlmh->nlmsg_len - NLMSG_LENGTH (sizeof (*ifam));
switch (ifam->ifa_family)
{
+ const void *local;
+ const void *address;
+
case AF_INET:
- *seen_ipv4 = true;
+ local = NULL;
+ address = NULL;
+ while (RTA_OK (rta, len))
+ {
+ switch (rta->rta_type)
+ {
+ case IFA_LOCAL:
+ local = RTA_DATA (rta);
+ break;
+
+ case IFA_ADDRESS:
+ address = RTA_DATA (rta);
+ goto out_v4;
+ }
+
+ rta = RTA_NEXT (rta, len);
+ }
+
+ if (local != NULL)
+ {
+ out_v4:
+ if (*(const in_addr_t *) (address ?: local)
+ != htonl (INADDR_LOOPBACK))
+ *seen_ipv4 = true;
+ }
break;
+
case AF_INET6:
- *seen_ipv6 = true;
+ local = NULL;
+ address = NULL;
+ while (RTA_OK (rta, len))
+ {
+ switch (rta->rta_type)
+ {
+ case IFA_LOCAL:
+ local = RTA_DATA (rta);
+ break;
+
+ case IFA_ADDRESS:
+ address = RTA_DATA (rta);
+ goto out_v6;
+ }
+
+ rta = RTA_NEXT (rta, len);
+ }
+
+ if (local != NULL)
+ {
+ out_v6:
+ if (!IN6_IS_ADDR_LOOPBACK (address ?: local))
+ *seen_ipv6 = true;
+ }
if (ifam->ifa_flags & (IFA_F_DEPRECATED
| IFA_F_TEMPORARY
| IFA_F_HOMEADDRESS))
{
- struct rtattr *rta = IFA_RTA (ifam);
- size_t len = (nlmh->nlmsg_len
- - NLMSG_LENGTH (sizeof (*ifam)));
- void *local = NULL;
- void *address = NULL;
- while (RTA_OK (rta, len))
- {
- switch (rta->rta_type)
- {
- case IFA_LOCAL:
- local = RTA_DATA (rta);
- break;
-
- case IFA_ADDRESS:
- address = RTA_DATA (rta);
- break;
- }
-
- rta = RTA_NEXT (rta, len);
- }
-
struct in6ailist *newp = alloca (sizeof (*newp));
newp->info.flags = (((ifam->ifa_flags & IFA_F_DEPRECATED)
? in6ai_deprecated : 0)
@@ -200,7 +232,7 @@ make_request (int fd, pid_t pid, bool *seen_ipv4, bool *seen_ipv6,
close_not_cancel_no_status (fd);
- if (in6ailist != NULL)
+ if (*seen_ipv6 && in6ailist != NULL)
{
*in6ai = malloc (in6ailistlen * sizeof (**in6ai));
if (*in6ai == NULL)
diff --git a/sysdeps/unix/sysv/linux/futimes.c b/sysdeps/unix/sysv/linux/futimes.c
index 610f1deb29..272b83eb05 100644
--- a/sysdeps/unix/sysv/linux/futimes.c
+++ b/sysdeps/unix/sysv/linux/futimes.c
@@ -1,5 +1,5 @@
/* futimes -- change access and modification times of open file. Linux version.
- Copyright (C) 2002,2003,2005,2006 Free Software Foundation, Inc.
+ Copyright (C) 2002,2003,2005,2006,2007 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -29,7 +29,7 @@
#include <kernel-features.h>
-#ifndef __ASSUME_UTIMENSAT
+#if defined __NR_utimensat && !defined __ASSUME_UTIMENSAT
static int miss_utimensat;
#endif
diff --git a/sysdeps/unix/sysv/linux/i386/bits/fcntl.h b/sysdeps/unix/sysv/linux/i386/bits/fcntl.h
index 6de33302ee..83ca3c2861 100644
--- a/sysdeps/unix/sysv/linux/i386/bits/fcntl.h
+++ b/sysdeps/unix/sysv/linux/i386/bits/fcntl.h
@@ -1,5 +1,5 @@
/* O_*, F_*, FD_* bit values for Linux.
- Copyright (C) 1995, 1996, 1997, 1998, 2000, 2004, 2006
+ Copyright (C) 1995, 1996, 1997, 1998, 2000, 2004, 2006, 2007
Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -50,6 +50,7 @@
# define O_DIRECTORY 0200000 /* Must be a directory. */
# define O_NOFOLLOW 0400000 /* Do not follow links. */
# define O_NOATIME 01000000 /* Do not set atime. */
+# define O_CLOEXEC 02000000 /* Set close_on_exec. */
#endif
/* For now Linux has synchronisity options for data and read operations.
diff --git a/sysdeps/unix/sysv/linux/i386/sysconf.c b/sysdeps/unix/sysv/linux/i386/sysconf.c
index 2ffbd5227b..78877fb2a1 100644
--- a/sysdeps/unix/sysv/linux/i386/sysconf.c
+++ b/sysdeps/unix/sysv/linux/i386/sysconf.c
@@ -1,5 +1,5 @@
/* Get file-specific information about a file. Linux version.
- Copyright (C) 2003, 2004, 2006 Free Software Foundation, Inc.
+ Copyright (C) 2003, 2004, 2006, 2007 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -97,11 +97,13 @@ static const struct intel_02_cache_info
{ 0x45, _SC_LEVEL2_CACHE_SIZE, 2097152, 4, 32 },
{ 0x46, _SC_LEVEL3_CACHE_SIZE, 4194304, 4, 64 },
{ 0x47, _SC_LEVEL3_CACHE_SIZE, 8388608, 8, 64 },
+ { 0x48, _SC_LEVEL2_CACHE_SIZE, 3145728, 12, 64 },
{ 0x49, _SC_LEVEL2_CACHE_SIZE, 4194304, 16, 64 },
{ 0x4a, _SC_LEVEL3_CACHE_SIZE, 6291456, 12, 64 },
{ 0x4b, _SC_LEVEL3_CACHE_SIZE, 8388608, 16, 64 },
{ 0x4c, _SC_LEVEL3_CACHE_SIZE, 12582912, 12, 64 },
{ 0x4d, _SC_LEVEL3_CACHE_SIZE, 16777216, 16, 64 },
+ { 0x4e, _SC_LEVEL2_CACHE_SIZE, 6291456, 24, 64 },
{ 0x60, _SC_LEVEL1_DCACHE_SIZE, 16384, 8, 64 },
{ 0x66, _SC_LEVEL1_DCACHE_SIZE, 8192, 4, 64 },
{ 0x67, _SC_LEVEL1_DCACHE_SIZE, 16384, 4, 64 },
diff --git a/sysdeps/unix/sysv/linux/ia64/bits/fcntl.h b/sysdeps/unix/sysv/linux/ia64/bits/fcntl.h
index ed8c2da9e2..8fa96e4e26 100644
--- a/sysdeps/unix/sysv/linux/ia64/bits/fcntl.h
+++ b/sysdeps/unix/sysv/linux/ia64/bits/fcntl.h
@@ -1,5 +1,5 @@
/* O_*, F_*, FD_* bit values for Linux/IA64.
- Copyright (C) 1999, 2000, 2004, 2006 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2000, 2004, 2006, 2007 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -49,6 +49,7 @@
# define O_DIRECTORY 0200000 /* must be a directory */
# define O_NOFOLLOW 0400000 /* don't follow links */
# define O_NOATIME 01000000 /* Do not set atime. */
+# define O_CLOEXEC 02000000 /* Set close_on_exec. */
#endif
#ifdef __USE_LARGEFILE64
diff --git a/sysdeps/unix/sysv/linux/ia64/sys/ptrace.h b/sysdeps/unix/sysv/linux/ia64/sys/ptrace.h
index ff77627a83..c824be2da7 100644
--- a/sysdeps/unix/sysv/linux/ia64/sys/ptrace.h
+++ b/sysdeps/unix/sysv/linux/ia64/sys/ptrace.h
@@ -1,5 +1,5 @@
/* `ptrace' debugger support interface. Linux/ia64 version.
- Copyright (C) 2001, 2006 Free Software Foundation, Inc.
+ Copyright (C) 2001, 2006, 2007 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -127,6 +127,28 @@ struct pt_all_user_regs
struct ia64_fpreg fr[128];
};
+/* Options set using PTRACE_SETOPTIONS. */
+enum __ptrace_setoptions {
+ PTRACE_O_TRACESYSGOOD = 0x00000001,
+ PTRACE_O_TRACEFORK = 0x00000002,
+ PTRACE_O_TRACEVFORK = 0x00000004,
+ PTRACE_O_TRACECLONE = 0x00000008,
+ PTRACE_O_TRACEEXEC = 0x00000010,
+ PTRACE_O_TRACEVFORKDONE = 0x00000020,
+ PTRACE_O_TRACEEXIT = 0x00000040,
+ PTRACE_O_MASK = 0x0000007f
+};
+
+/* Wait extended result codes for the above trace options. */
+enum __ptrace_eventcodes {
+ PTRACE_EVENT_FORK = 1,
+ PTRACE_EVENT_VFORK = 2,
+ PTRACE_EVENT_CLONE = 3,
+ PTRACE_EVENT_EXEC = 4,
+ PTRACE_EVENT_VFORK_DONE = 5,
+ PTRACE_EVENT_EXIT = 6
+};
+
/* Perform process tracing functions. REQUEST is one of the values
above, and determines the action to be taken.
For all requests except PTRACE_TRACEME, PID specifies the process to be
diff --git a/sysdeps/unix/sysv/linux/kernel-features.h b/sysdeps/unix/sysv/linux/kernel-features.h
index ed32001544..f8116d8885 100644
--- a/sysdeps/unix/sysv/linux/kernel-features.h
+++ b/sysdeps/unix/sysv/linux/kernel-features.h
@@ -1,6 +1,6 @@
/* Set flags signalling availability of kernel features based on given
kernel version number.
- Copyright (C) 1999-2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+ Copyright (C) 1999-2006, 2007 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -463,3 +463,13 @@
#if __LINUX_KERNEL_VERSION >= 0x020616
# define __ASSUME_UTIMENSAT 1
#endif
+
+/* Support for private futexes was added in 2.6.22. */
+#if __LINUX_KERNEL_VERSION >= 0x020616
+# define __ASSUME_PRIVATE_FUTEX 1
+#endif
+
+/* Support for fallocate was added in 2.6.23. */
+#if __LINUX_KERNEL_VERSION >= 0x020617
+# define __ASSUME_FALLOCATE 1
+#endif
diff --git a/sysdeps/unix/sysv/linux/nscd_setup_thread.c b/sysdeps/unix/sysv/linux/nscd_setup_thread.c
index 1589c24ea9..56e23dc831 100644
--- a/sysdeps/unix/sysv/linux/nscd_setup_thread.c
+++ b/sysdeps/unix/sysv/linux/nscd_setup_thread.c
@@ -4,8 +4,9 @@
Contributed by Ulrich Drepper <drepper@redhat.com>, 2004.
This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License version 2 as
- published by the Free Software Foundation.
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/sysdeps/unix/sysv/linux/open64.c b/sysdeps/unix/sysv/linux/open64.c
index 5fb5363e1e..c52ce39db1 100644
--- a/sysdeps/unix/sysv/linux/open64.c
+++ b/sysdeps/unix/sysv/linux/open64.c
@@ -1,4 +1,5 @@
-/* Copyright (C) 1991,1995-1997,1999,2000,2002 Free Software Foundation, Inc.
+/* Copyright (C) 1991,1995-1997,1999,2000,2002,2007
+ Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -19,7 +20,7 @@
#include <errno.h>
#include <fcntl.h>
#include <stdarg.h>
-#include <bp-sym.h>
+#include <stdio.h>
#include <sysdep-cancel.h>
/* Open FILE with access OFLAG. If OFLAG includes O_CREAT,
@@ -48,6 +49,29 @@ __libc_open64 (const char *file, int oflag, ...)
return result;
}
-weak_alias (__libc_open64, BP_SYM (__open64))
-libc_hidden_weak (BP_SYM (__open64))
-weak_alias (__libc_open64, BP_SYM (open64))
+weak_alias (__libc_open64, __open64)
+libc_hidden_weak (__open64)
+weak_alias (__libc_open64, open64)
+
+
+#ifndef PTW
+int
+__open64_2 (file, oflag)
+ const char *file;
+ int oflag;
+{
+ if (oflag & O_CREAT)
+ __fortify_fail ("invalid open64 call: O_CREAT without mode");
+
+ if (SINGLE_THREAD_P)
+ return INLINE_SYSCALL (open, 2, file, oflag | O_LARGEFILE);
+
+ int oldtype = LIBC_CANCEL_ASYNC ();
+
+ int result = INLINE_SYSCALL (open, 2, file, oflag | O_LARGEFILE);
+
+ LIBC_CANCEL_RESET (oldtype);
+
+ return result;
+}
+#endif
diff --git a/sysdeps/unix/sysv/linux/open_2.c b/sysdeps/unix/sysv/linux/open_2.c
new file mode 100644
index 0000000000..0a6dc28b43
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/open_2.c
@@ -0,0 +1,32 @@
+/* Copyright (C) 2007 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <fcntl.h>
+#include <stdio.h>
+
+
+int
+__open_2 (file, oflag)
+ const char *file;
+ int oflag;
+{
+ if (oflag & O_CREAT)
+ __fortify_fail ("invalid open call: O_CREAT without mode");
+
+ return __open (file, oflag);
+}
diff --git a/sysdeps/unix/sysv/linux/openat.c b/sysdeps/unix/sysv/linux/openat.c
index df53b6cf2c..45b566f2d0 100644
--- a/sysdeps/unix/sysv/linux/openat.c
+++ b/sysdeps/unix/sysv/linux/openat.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2005, 2006 Free Software Foundation, Inc.
+/* Copyright (C) 2005, 2006, 2007 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -30,6 +30,7 @@
#ifndef OPENAT
# define OPENAT openat
+# define __OPENAT_2 __openat_2
# ifndef __ASSUME_ATFCTS
/* Set errno after a failed call. If BUF is not null,
@@ -173,3 +174,18 @@ __OPENAT (fd, file, oflag)
}
libc_hidden_def (__OPENAT)
weak_alias (__OPENAT, OPENAT)
+
+
+int
+__OPENAT_2 (fd, file, oflag)
+ int fd;
+ const char *file;
+ int oflag;
+{
+ if (oflag & O_CREAT)
+#define MSG(s) MSG2 (s)
+#define MSG2(s) "invalid " #s " call: O_CREAT without mode"
+ __fortify_fail (MSG (OPENAT));
+
+ return __OPENAT (fd, file, oflag);
+}
diff --git a/sysdeps/unix/sysv/linux/openat64.c b/sysdeps/unix/sysv/linux/openat64.c
index 9e7a2b3737..013a13effa 100644
--- a/sysdeps/unix/sysv/linux/openat64.c
+++ b/sysdeps/unix/sysv/linux/openat64.c
@@ -1,4 +1,5 @@
#define OPENAT openat64
+#define __OPENAT_2 __openat64_2
#define MORE_OFLAGS O_LARGEFILE
#include "openat.c"
diff --git a/sysdeps/unix/sysv/linux/posix_fallocate.c b/sysdeps/unix/sysv/linux/posix_fallocate.c
new file mode 100644
index 0000000000..9cfade60ca
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/posix_fallocate.c
@@ -0,0 +1,60 @@
+/* Copyright (C) 2007 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <fcntl.h>
+#include <kernel-features.h>
+#include <sysdep.h>
+
+#define posix_fallocate static internal_fallocate
+#include <sysdeps/posix/posix_fallocate.c>
+#undef posix_fallocate
+
+#if !defined __ASSUME_FALLOCATE && defined __NR_fallocate
+int __have_fallocate attribute_hidden;
+#endif
+
+
+/* Reserve storage for the data of the file associated with FD. */
+int
+posix_fallocate (int fd, __off_t offset, __off_t len)
+{
+#ifdef __NR_fallocate
+# ifndef __ASSUME_FALLOCATE
+ if (__builtin_expect (__have_fallocate >= 0, 1))
+# endif
+ {
+ INTERNAL_SYSCALL_DECL (err);
+ int res = INTERNAL_SYSCALL (fallocate, err, 4, fd, 0,
+ __LONG_LONG_PAIR (offset >> 31, offset),
+ __LONG_LONG_PAIR (len >> 31, len));
+
+ if (! INTERNAL_SYSCALL_ERROR_P (res, err))
+ return 0;
+
+# ifndef __ASSUME_FALLOCATE
+ if (__builtin_expect (INTERNAL_SYSCALL_ERRNO (res, err) == ENOSYS, 0))
+ __have_fallocate = -1;
+ else
+# endif
+ if (INTERNAL_SYSCALL_ERRNO (res, err) != EOPNOTSUPP)
+ return INTERNAL_SYSCALL_ERRNO (res, err);
+ }
+#endif
+
+ return internal_fallocate (fd, offset, len);
+}
diff --git a/sysdeps/unix/sysv/linux/posix_fallocate64.c b/sysdeps/unix/sysv/linux/posix_fallocate64.c
new file mode 100644
index 0000000000..c5b8a34494
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/posix_fallocate64.c
@@ -0,0 +1,64 @@
+/* Copyright (C) 2007 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <fcntl.h>
+#include <kernel-features.h>
+#include <sysdep.h>
+
+extern int __posix_fallocate64_l64 (int fd, __off64_t offset, __off64_t len);
+#define __posix_fallocate64_l64 static internal_fallocate64
+#include <sysdeps/posix/posix_fallocate64.c>
+#undef __posix_fallocate64_l64
+
+#if !defined __ASSUME_FALLOCATE && defined __NR_fallocate
+/* Defined in posix_fallocate.c. */
+extern int __have_fallocate attribute_hidden;
+#endif
+
+
+/* Reserve storage for the data of the file associated with FD. */
+int
+__posix_fallocate64_l64 (int fd, __off64_t offset, __off64_t len)
+{
+#ifdef __NR_fallocate
+# ifndef __ASSUME_FALLOCATE
+ if (__builtin_expect (__have_fallocate >= 0, 1))
+# endif
+ {
+ INTERNAL_SYSCALL_DECL (err);
+ int res = INTERNAL_SYSCALL (fallocate, err, 6, fd, 0,
+ __LONG_LONG_PAIR ((long int) (offset >> 32),
+ (long int) offset),
+ __LONG_LONG_PAIR ((long int) (len >> 32),
+ (long int) len));
+
+ if (! INTERNAL_SYSCALL_ERROR_P (res, err))
+ return 0;
+
+# ifndef __ASSUME_FALLOCATE
+ if (__builtin_expect (INTERNAL_SYSCALL_ERRNO (res, err) == ENOSYS, 0))
+ __have_fallocate = -1;
+ else
+# endif
+ if (INTERNAL_SYSCALL_ERRNO (res, err) != EOPNOTSUPP)
+ return INTERNAL_SYSCALL_ERRNO (res, err);
+ }
+#endif
+
+ return internal_fallocate64 (fd, offset, len);
+}
diff --git a/sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h b/sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h
index c4964e0fd8..68015dbca3 100644
--- a/sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h
+++ b/sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h
@@ -1,5 +1,5 @@
/* O_*, F_*, FD_* bit values for Linux/PowerPC.
- Copyright (C) 1995, 1996, 1997, 1998, 2000, 2003, 2004, 2006
+ Copyright (C) 1995, 1996, 1997, 1998, 2000, 2003, 2004, 2006, 2007
Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -50,6 +50,7 @@
# define O_DIRECTORY 040000 /* Must be a directory. */
# define O_NOFOLLOW 0100000 /* Do not follow links. */
# define O_NOATIME 01000000 /* Do not set atime. */
+# define O_CLOEXEC 02000000 /* Set close_on_exec. */
#endif
#ifdef __USE_LARGEFILE64
diff --git a/sysdeps/unix/sysv/linux/powerpc/libc-start.c b/sysdeps/unix/sysv/linux/powerpc/libc-start.c
index 923eab99ef..a71cfa5b06 100644
--- a/sysdeps/unix/sysv/linux/powerpc/libc-start.c
+++ b/sysdeps/unix/sysv/linux/powerpc/libc-start.c
@@ -20,6 +20,7 @@
#include <stdlib.h>
#include <unistd.h>
#include <ldsodefs.h>
+#include <sysdep.h>
#include <bp-start.h>
#include <bp-sym.h>
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/970/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/970/fpu/Implies
new file mode 100644
index 0000000000..52993ae71b
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/970/fpu/Implies
@@ -0,0 +1,3 @@
+# Make sure this comes before the powerpc/powerpc32/fpu that's
+# listed in unix/sysv/linux/powerpc/powerpc32/fpu/Implies.
+powerpc/powerpc32/970/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/power4/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/power4/fpu/Implies
new file mode 100644
index 0000000000..3c0690e2fe
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/power4/fpu/Implies
@@ -0,0 +1,3 @@
+# Make sure this comes before the powerpc/powerpc32/fpu that's
+# listed in unix/sysv/linux/powerpc/powerpc32/fpu/Implies.
+powerpc/powerpc32/power4/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/power5+/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/power5+/fpu/Implies
new file mode 100644
index 0000000000..37d43bb6fc
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/power5+/fpu/Implies
@@ -0,0 +1,3 @@
+# Make sure this comes before the powerpc/powerpc32/fpu that's
+# listed in unix/sysv/linux/powerpc/powerpc32/fpu/Implies.
+powerpc/powerpc32/power5+/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/power5/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/power5/fpu/Implies
new file mode 100644
index 0000000000..d379a2dd12
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/power5/fpu/Implies
@@ -0,0 +1,3 @@
+# Make sure this comes before the powerpc/powerpc32/fpu that's
+# listed in unix/sysv/linux/powerpc/powerpc32/fpu/Implies.
+powerpc/powerpc32/power5/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/power6/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/power6/fpu/Implies
new file mode 100644
index 0000000000..9fb457db93
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/power6/fpu/Implies
@@ -0,0 +1,3 @@
+# Make sure this comes before the powerpc/powerpc32/fpu that's
+# listed in unix/sysv/linux/powerpc/powerpc32/fpu/Implies.
+powerpc/powerpc32/power6/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/power6x/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/power6x/fpu/Implies
new file mode 100644
index 0000000000..ffca13c0ef
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/power6x/fpu/Implies
@@ -0,0 +1,4 @@
+# Make sure this comes before the powerpc/powerpc32/fpu that's
+# listed in unix/sysv/linux/powerpc/powerpc32/fpu/Implies.
+powerpc/powerpc32/power6x/fpu
+powerpc/powerpc32/power6/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/970/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc64/970/fpu/Implies
new file mode 100644
index 0000000000..558a5fb174
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/970/fpu/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/power4/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/power4/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc64/power4/fpu/Implies
new file mode 100644
index 0000000000..558a5fb174
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/power4/fpu/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/power4/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/power5+/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc64/power5+/fpu/Implies
new file mode 100644
index 0000000000..cf5913dec3
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/power5+/fpu/Implies
@@ -0,0 +1,3 @@
+# Make sure this comes before the powerpc/powerpc64/fpu that's
+# listed in unix/sysv/linux/powerpc/powerpc64/fpu/Implies.
+powerpc/powerpc64/power5+/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/power5/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc64/power5/fpu/Implies
new file mode 100644
index 0000000000..558a5fb174
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/power5/fpu/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/power4/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/power6/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc64/power6/fpu/Implies
new file mode 100644
index 0000000000..9451147267
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/power6/fpu/Implies
@@ -0,0 +1,3 @@
+# Make sure this comes before the powerpc/powerpc64/fpu that's
+# listed in unix/sysv/linux/powerpc/powerpc64/fpu/Implies.
+powerpc/powerpc64/power6/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/power6x/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc64/power6x/fpu/Implies
new file mode 100644
index 0000000000..fd74b8341c
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/power6x/fpu/Implies
@@ -0,0 +1,4 @@
+# Make sure this comes before the powerpc/powerpc64/fpu that's
+# listed in unix/sysv/linux/powerpc/powerpc64/fpu/Implies.
+powerpc/powerpc64/power6x/fpu
+powerpc/powerpc64/power6/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/sys/ptrace.h b/sysdeps/unix/sysv/linux/powerpc/sys/ptrace.h
index 5d055f67fe..23e75fbcff 100644
--- a/sysdeps/unix/sysv/linux/powerpc/sys/ptrace.h
+++ b/sysdeps/unix/sysv/linux/powerpc/sys/ptrace.h
@@ -1,5 +1,5 @@
/* `ptrace' debugger support interface. Linux version.
- Copyright (C) 2001, 2006 Free Software Foundation, Inc.
+ Copyright (C) 2001, 2006, 2007 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -99,6 +99,28 @@ enum __ptrace_request
#define PT_SETSIGINFO PTRACE_SETSIGINFO
};
+/* Options set using PTRACE_SETOPTIONS. */
+enum __ptrace_setoptions {
+ PTRACE_O_TRACESYSGOOD = 0x00000001,
+ PTRACE_O_TRACEFORK = 0x00000002,
+ PTRACE_O_TRACEVFORK = 0x00000004,
+ PTRACE_O_TRACECLONE = 0x00000008,
+ PTRACE_O_TRACEEXEC = 0x00000010,
+ PTRACE_O_TRACEVFORKDONE = 0x00000020,
+ PTRACE_O_TRACEEXIT = 0x00000040,
+ PTRACE_O_MASK = 0x0000007f
+};
+
+/* Wait extended result codes for the above trace options. */
+enum __ptrace_eventcodes {
+ PTRACE_EVENT_FORK = 1,
+ PTRACE_EVENT_VFORK = 2,
+ PTRACE_EVENT_CLONE = 3,
+ PTRACE_EVENT_EXEC = 4,
+ PTRACE_EVENT_VFORK_DONE = 5,
+ PTRACE_EVENT_EXIT = 6
+};
+
/* Perform process tracing functions. REQUEST is one of the values
above, and determines the action to be taken.
For all requests except PTRACE_TRACEME, PID specifies the process to be
diff --git a/sysdeps/unix/sysv/linux/s390/bits/fcntl.h b/sysdeps/unix/sysv/linux/s390/bits/fcntl.h
index c611028f29..848568532f 100644
--- a/sysdeps/unix/sysv/linux/s390/bits/fcntl.h
+++ b/sysdeps/unix/sysv/linux/s390/bits/fcntl.h
@@ -1,5 +1,5 @@
/* O_*, F_*, FD_* bit values for Linux.
- Copyright (C) 2000, 2001, 2002, 2004, 2006 Free Software Foundation, Inc.
+ Copyright (C) 2000,2001,2002,2004,2006,2007 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -50,6 +50,7 @@
# define O_DIRECTORY 0200000 /* Must be a directory. */
# define O_NOFOLLOW 0400000 /* Do not follow links. */
# define O_NOATIME 01000000 /* Do not set atime. */
+# define O_CLOEXEC 02000000 /* Set close_on_exec. */
#endif
#ifdef __USE_LARGEFILE64
diff --git a/sysdeps/unix/sysv/linux/s390/dl-procinfo.h b/sysdeps/unix/sysv/linux/s390/dl-procinfo.h
new file mode 100644
index 0000000000..3eeb529053
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/s390/dl-procinfo.h
@@ -0,0 +1,43 @@
+/* Linux/s390 version of processor capability information handling macros.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Martin Schwidefsky <schwidefsky@de.ibm.com>, 2006.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdeps/s390/dl-procinfo.h>
+#include <ldsodefs.h>
+
+
+#undef _dl_procinfo
+static inline int
+__attribute__ ((unused))
+_dl_procinfo (int word)
+{
+ /* This table should match the information from arch/s390/kernel/setup.c
+ in the kernel sources. */
+ int i;
+
+ _dl_printf ("AT_HWCAP: ");
+
+ for (i = 0; i < _DL_HWCAP_COUNT; ++i)
+ if (word & (1UL << i))
+ _dl_printf (" %s", GLRO(dl_s390_cap_flags)[i]);
+
+ _dl_printf ("\n");
+
+ return 0;
+}
diff --git a/sysdeps/unix/sysv/linux/s390/sys/ptrace.h b/sysdeps/unix/sysv/linux/s390/sys/ptrace.h
index 70eb4f8222..ac186387fc 100644
--- a/sysdeps/unix/sysv/linux/s390/sys/ptrace.h
+++ b/sysdeps/unix/sysv/linux/s390/sys/ptrace.h
@@ -1,5 +1,5 @@
/* `ptrace' debugger support interface. Linux version.
- Copyright (C) 2000, 2006 Free Software Foundation, Inc.
+ Copyright (C) 2000, 2006, 2007 Free Software Foundation, Inc.
Contributed by Denis Joseph Barrow (djbarrow@de.ibm.com).
This file is part of the GNU C Library.
@@ -138,6 +138,28 @@ enum __ptrace_request
#define PT_SETSIGINFO PTRACE_SETSIGINFO
};
+/* Options set using PTRACE_SETOPTIONS. */
+enum __ptrace_setoptions {
+ PTRACE_O_TRACESYSGOOD = 0x00000001,
+ PTRACE_O_TRACEFORK = 0x00000002,
+ PTRACE_O_TRACEVFORK = 0x00000004,
+ PTRACE_O_TRACECLONE = 0x00000008,
+ PTRACE_O_TRACEEXEC = 0x00000010,
+ PTRACE_O_TRACEVFORKDONE = 0x00000020,
+ PTRACE_O_TRACEEXIT = 0x00000040,
+ PTRACE_O_MASK = 0x0000007f
+};
+
+/* Wait extended result codes for the above trace options. */
+enum __ptrace_eventcodes {
+ PTRACE_EVENT_FORK = 1,
+ PTRACE_EVENT_VFORK = 2,
+ PTRACE_EVENT_CLONE = 3,
+ PTRACE_EVENT_EXEC = 4,
+ PTRACE_EVENT_VFORK_DONE = 5,
+ PTRACE_EVENT_EXIT = 6
+};
+
/* Perform process tracing functions. REQUEST is one of the values
above, and determines the action to be taken.
For all requests except PTRACE_TRACEME, PID specifies the process to be
diff --git a/sysdeps/unix/sysv/linux/sh/bits/fcntl.h b/sysdeps/unix/sysv/linux/sh/bits/fcntl.h
index 6de33302ee..83ca3c2861 100644
--- a/sysdeps/unix/sysv/linux/sh/bits/fcntl.h
+++ b/sysdeps/unix/sysv/linux/sh/bits/fcntl.h
@@ -1,5 +1,5 @@
/* O_*, F_*, FD_* bit values for Linux.
- Copyright (C) 1995, 1996, 1997, 1998, 2000, 2004, 2006
+ Copyright (C) 1995, 1996, 1997, 1998, 2000, 2004, 2006, 2007
Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -50,6 +50,7 @@
# define O_DIRECTORY 0200000 /* Must be a directory. */
# define O_NOFOLLOW 0400000 /* Do not follow links. */
# define O_NOATIME 01000000 /* Do not set atime. */
+# define O_CLOEXEC 02000000 /* Set close_on_exec. */
#endif
/* For now Linux has synchronisity options for data and read operations.
diff --git a/sysdeps/unix/sysv/linux/sh/clone.S b/sysdeps/unix/sysv/linux/sh/clone.S
index 7941c6b3ad..f892c475dd 100644
--- a/sysdeps/unix/sysv/linux/sh/clone.S
+++ b/sysdeps/unix/sysv/linux/sh/clone.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 1999, 2000, 2003, 2004 Free Software Foundation, Inc.
+/* Copyright (C) 1999, 2000, 2003, 2004, 2007 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -32,12 +32,12 @@
ENTRY(__clone)
/* sanity check arguments. */
tst r4, r4
- bf/s 1f
+ bt/s 0f
tst r5, r5
- bf/s 1f
- mov #-EINVAL,r0
+ bf 1f
+0:
bra .Lsyscall_error
- nop
+ mov #-EINVAL,r0
1:
/* insert the args onto the new stack */
mov.l r7, @-r5
diff --git a/sysdeps/unix/sysv/linux/sparc/sys/ptrace.h b/sysdeps/unix/sysv/linux/sparc/sys/ptrace.h
index 17907c4a38..a7b204b33a 100644
--- a/sysdeps/unix/sysv/linux/sparc/sys/ptrace.h
+++ b/sysdeps/unix/sysv/linux/sparc/sys/ptrace.h
@@ -1,5 +1,5 @@
/* `ptrace' debugger support interface. Linux/SPARC version.
- Copyright (C) 1996, 1997, 1998, 1999, 2000, 2006
+ Copyright (C) 1996, 1997, 1998, 1999, 2000, 2006, 2007
Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -183,6 +183,28 @@ enum __ptrace_request
#define PT_SETSIGINFO PTRACE_SETSIGINFO
};
+/* Options set using PTRACE_SETOPTIONS. */
+enum __ptrace_setoptions {
+ PTRACE_O_TRACESYSGOOD = 0x00000001,
+ PTRACE_O_TRACEFORK = 0x00000002,
+ PTRACE_O_TRACEVFORK = 0x00000004,
+ PTRACE_O_TRACECLONE = 0x00000008,
+ PTRACE_O_TRACEEXEC = 0x00000010,
+ PTRACE_O_TRACEVFORKDONE = 0x00000020,
+ PTRACE_O_TRACEEXIT = 0x00000040,
+ PTRACE_O_MASK = 0x0000007f
+};
+
+/* Wait extended result codes for the above trace options. */
+enum __ptrace_eventcodes {
+ PTRACE_EVENT_FORK = 1,
+ PTRACE_EVENT_VFORK = 2,
+ PTRACE_EVENT_CLONE = 3,
+ PTRACE_EVENT_EXEC = 4,
+ PTRACE_EVENT_VFORK_DONE = 5,
+ PTRACE_EVENT_EXIT = 6
+};
+
/* Perform process tracing functions. REQUEST is one of the values
above, and determines the action to be taken.
For all requests except PTRACE_TRACEME, PID specifies the process to be
diff --git a/sysdeps/unix/sysv/linux/sys/ptrace.h b/sysdeps/unix/sysv/linux/sys/ptrace.h
index 44284cb307..08658f9764 100644
--- a/sysdeps/unix/sysv/linux/sys/ptrace.h
+++ b/sysdeps/unix/sysv/linux/sys/ptrace.h
@@ -1,5 +1,5 @@
/* `ptrace' debugger support interface. Linux version.
- Copyright (C) 1996-1999,2000,2006 Free Software Foundation, Inc.
+ Copyright (C) 1996-1999,2000,2006,2007 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -129,6 +129,29 @@ enum __ptrace_request
#define PT_SETSIGINFO PTRACE_SETSIGINFO
};
+
+/* Options set using PTRACE_SETOPTIONS. */
+enum __ptrace_setoptions {
+ PTRACE_O_TRACESYSGOOD = 0x00000001,
+ PTRACE_O_TRACEFORK = 0x00000002,
+ PTRACE_O_TRACEVFORK = 0x00000004,
+ PTRACE_O_TRACECLONE = 0x00000008,
+ PTRACE_O_TRACEEXEC = 0x00000010,
+ PTRACE_O_TRACEVFORKDONE = 0x00000020,
+ PTRACE_O_TRACEEXIT = 0x00000040,
+ PTRACE_O_MASK = 0x0000007f
+};
+
+/* Wait extended result codes for the above trace options. */
+enum __ptrace_eventcodes {
+ PTRACE_EVENT_FORK = 1,
+ PTRACE_EVENT_VFORK = 2,
+ PTRACE_EVENT_CLONE = 3,
+ PTRACE_EVENT_EXEC = 4,
+ PTRACE_EVENT_VFORK_DONE = 5,
+ PTRACE_EVENT_EXIT = 6
+};
+
/* Perform process tracing functions. REQUEST is one of the values
above, and determines the action to be taken.
For all requests except PTRACE_TRACEME, PID specifies the process to be
diff --git a/sysdeps/unix/sysv/linux/wordsize-64/posix_fallocate.c b/sysdeps/unix/sysv/linux/wordsize-64/posix_fallocate.c
new file mode 100644
index 0000000000..151174b472
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/wordsize-64/posix_fallocate.c
@@ -0,0 +1,59 @@
+/* Copyright (C) 2007 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <fcntl.h>
+#include <kernel-features.h>
+#include <sysdep.h>
+
+#define posix_fallocate static internal_fallocate
+#include <sysdeps/posix/posix_fallocate.c>
+#undef posix_fallocate
+
+#if !defined __ASSUME_FALLOCATE && defined __NR_fallocate
+static int __have_fallocate;
+#endif
+
+
+/* Reserve storage for the data of the file associated with FD. */
+int
+posix_fallocate (int fd, __off_t offset, __off_t len)
+{
+#ifdef __NR_fallocate
+# ifndef __ASSUME_FALLOCATE
+ if (__builtin_expect (__have_fallocate >= 0, 1))
+# endif
+ {
+ INTERNAL_SYSCALL_DECL (err);
+ int res = INTERNAL_SYSCALL (fallocate, err, 4, fd, 0, offset, len);
+
+ if (! INTERNAL_SYSCALL_ERROR_P (res, err))
+ return 0;
+
+# ifndef __ASSUME_FALLOCATE
+ if (__builtin_expect (INTERNAL_SYSCALL_ERRNO (res, err) == ENOSYS, 0))
+ __have_fallocate = -1;
+ else
+# endif
+ if (INTERNAL_SYSCALL_ERRNO (res, err) != EOPNOTSUPP)
+ return INTERNAL_SYSCALL_ERRNO (res, err);
+ }
+#endif
+
+ return internal_fallocate (fd, offset, len);
+}
+strong_alias (posix_fallocate, posix_fallocate64)
diff --git a/sysdeps/unix/sysv/linux/wordsize-64/posix_fallocate64.c b/sysdeps/unix/sysv/linux/wordsize-64/posix_fallocate64.c
new file mode 100644
index 0000000000..f466f13e45
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/wordsize-64/posix_fallocate64.c
@@ -0,0 +1 @@
+/* posix_fallocate64 is in posix_fallocate.c */
diff --git a/sysdeps/unix/sysv/linux/x86_64/bits/fcntl.h b/sysdeps/unix/sysv/linux/x86_64/bits/fcntl.h
index fa1d02bc1f..a918a0725b 100644
--- a/sysdeps/unix/sysv/linux/x86_64/bits/fcntl.h
+++ b/sysdeps/unix/sysv/linux/x86_64/bits/fcntl.h
@@ -1,5 +1,5 @@
/* O_*, F_*, FD_* bit values for Linux/x86-64.
- Copyright (C) 2001, 2002, 2004, 2006 Free Software Foundation, Inc.
+ Copyright (C) 2001, 2002, 2004, 2006, 2007 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -50,6 +50,7 @@
# define O_DIRECTORY 0200000 /* Must be a directory. */
# define O_NOFOLLOW 0400000 /* Do not follow links. */
# define O_NOATIME 01000000 /* Do not set atime. */
+# define O_CLOEXEC 02000000 /* Set close_on_exec. */
#endif
/* For now Linux has synchronisity options for data and read operations.
diff --git a/sysdeps/x86_64/cacheinfo.c b/sysdeps/x86_64/cacheinfo.c
index f8217a1757..8855b6d45f 100644
--- a/sysdeps/x86_64/cacheinfo.c
+++ b/sysdeps/x86_64/cacheinfo.c
@@ -55,11 +55,13 @@ static const struct intel_02_cache_info
{ 0x45, _SC_LEVEL2_CACHE_SIZE, 2097152, 4, 32 },
{ 0x46, _SC_LEVEL3_CACHE_SIZE, 4194304, 4, 64 },
{ 0x47, _SC_LEVEL3_CACHE_SIZE, 8388608, 8, 64 },
+ { 0x48, _SC_LEVEL2_CACHE_SIZE, 3145728, 12, 64 },
{ 0x49, _SC_LEVEL2_CACHE_SIZE, 4194304, 16, 64 },
{ 0x4a, _SC_LEVEL3_CACHE_SIZE, 6291456, 12, 64 },
{ 0x4b, _SC_LEVEL3_CACHE_SIZE, 8388608, 16, 64 },
{ 0x4c, _SC_LEVEL3_CACHE_SIZE, 12582912, 12, 64 },
{ 0x4d, _SC_LEVEL3_CACHE_SIZE, 16777216, 16, 64 },
+ { 0x4e, _SC_LEVEL2_CACHE_SIZE, 6291456, 24, 64 },
{ 0x60, _SC_LEVEL1_DCACHE_SIZE, 16384, 8, 64 },
{ 0x66, _SC_LEVEL1_DCACHE_SIZE, 8192, 4, 64 },
{ 0x67, _SC_LEVEL1_DCACHE_SIZE, 16384, 4, 64 },
diff --git a/sysdeps/x86_64/sched_cpucount.c b/sysdeps/x86_64/sched_cpucount.c
new file mode 100644
index 0000000000..63d84a62e8
--- /dev/null
+++ b/sysdeps/x86_64/sched_cpucount.c
@@ -0,0 +1,26 @@
+/* Copyright (C) 2007 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifdef __amdfam10
+# define POPCNT(l) \
+ ({ __cpu_mask r; \
+ asm ("popcntq %1, %0" : "=r" (r) : "0" (l)); \
+ r; })
+#endif
+
+#include <posix/sched_cpucount.c>