summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Schwab <schwab@redhat.com>2010-02-22 16:34:12 +0100
committerAndreas Schwab <schwab@redhat.com>2010-02-22 16:41:22 +0100
commit2b6e16c5e2fa8ad12b02a218ba32d63a4dffda18 (patch)
treeb931e143f212d5b656497bb0c3f3e3a413fc78e1
parent0c3f133ed106996172dd2c106b22e38ce695e63d (diff)
parent199428c19774c12b3c4b6e6486ea9d4a021288af (diff)
Merge remote branch 'origin/master' into fedora/master
-rw-r--r--ChangeLog181
-rw-r--r--README21
-rw-r--r--elf/elf.h25
-rw-r--r--elf/tls-macros.h234
-rw-r--r--hurd/hurd/ioctl.h2
-rw-r--r--hurd/hurdioctl.c44
-rw-r--r--include/fenv.h1
-rw-r--r--io/ftw.c3
-rw-r--r--math/fegetenv.c1
-rw-r--r--sysdeps/i386/fpu/fegetenv.c1
-rw-r--r--sysdeps/i386/i686/multiarch/Makefile4
-rw-r--r--sysdeps/i386/i686/multiarch/memcmp-sse4.S1004
-rw-r--r--sysdeps/i386/i686/multiarch/memcmp-ssse3.S1966
-rw-r--r--sysdeps/i386/i686/multiarch/memcmp.S88
-rw-r--r--sysdeps/i386/i686/multiarch/strcmp-sse4.S378
-rw-r--r--sysdeps/i386/i686/multiarch/strcmp-ssse3.S2220
-rw-r--r--sysdeps/i386/i686/multiarch/strcmp.S115
-rw-r--r--sysdeps/i386/i686/multiarch/strncmp-c.c8
-rw-r--r--sysdeps/i386/i686/multiarch/strncmp-sse4.S5
-rw-r--r--sysdeps/i386/i686/multiarch/strncmp-ssse3.S5
-rw-r--r--sysdeps/i386/i686/multiarch/strncmp.S3
-rw-r--r--sysdeps/i386/lshift.S4
-rw-r--r--sysdeps/i386/rshift.S8
-rw-r--r--sysdeps/ia64/fpu/fegetenv.c1
-rw-r--r--sysdeps/powerpc/fpu/fegetenv.c1
-rw-r--r--sysdeps/powerpc/powerpc32/configure9
-rw-r--r--sysdeps/powerpc/powerpc32/configure.in4
-rw-r--r--sysdeps/powerpc/powerpc32/dl-machine.h7
-rw-r--r--sysdeps/powerpc/powerpc32/dl-start.S5
-rw-r--r--sysdeps/powerpc/powerpc32/elf/start.S10
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S5
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_ceil.S7
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_ceilf.S7
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_floor.S7
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_floorf.S7
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_lround.S7
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_rint.S7
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_rintf.S7
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_round.S6
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_roundf.S6
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_trunc.S7
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_truncf.S7
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/setjmp-common.S5
-rw-r--r--sysdeps/powerpc/powerpc32/memset.S7
-rw-r--r--sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S6
-rw-r--r--sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.S7
-rw-r--r--sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.S7
-rw-r--r--sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt.S7
-rw-r--r--sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf.S7
-rw-r--r--sysdeps/powerpc/powerpc32/power7/Implies1
-rw-r--r--sysdeps/powerpc/powerpc32/power7/fpu/Implies1
-rw-r--r--sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S89
-rw-r--r--sysdeps/powerpc/powerpc32/power7/fpu/s_finitef.S1
-rw-r--r--sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S88
-rw-r--r--sysdeps/powerpc/powerpc32/power7/fpu/s_isinff.S1
-rw-r--r--sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S92
-rw-r--r--sysdeps/powerpc/powerpc32/power7/fpu/s_isnanf.S1
-rw-r--r--sysdeps/powerpc/powerpc64/power7/Implies1
-rw-r--r--sysdeps/powerpc/powerpc64/power7/fpu/Implies1
-rw-r--r--sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S68
-rw-r--r--sysdeps/powerpc/powerpc64/power7/fpu/s_finitef.S1
-rw-r--r--sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S71
-rw-r--r--sysdeps/powerpc/powerpc64/power7/fpu/s_isinff.S1
-rw-r--r--sysdeps/powerpc/powerpc64/power7/fpu/s_isnan.S69
-rw-r--r--sysdeps/powerpc/powerpc64/power7/fpu/s_isnanf.S1
-rw-r--r--sysdeps/s390/fpu/fegetenv.c5
-rw-r--r--sysdeps/s390/s390-64/utf16-utf32-z9.c11
-rw-r--r--sysdeps/s390/s390-64/utf8-utf16-z9.c9
-rw-r--r--sysdeps/sh/sh4/fpu/fegetenv.c1
-rw-r--r--sysdeps/sparc/fpu/fegetenv.c1
-rw-r--r--sysdeps/sparc/sparc32/dl-irel.h55
-rw-r--r--sysdeps/sparc/sparc32/dl-machine.h81
-rw-r--r--sysdeps/sparc/sparc32/dl-plt.h62
-rw-r--r--sysdeps/sparc/sparc64/dl-irel.h58
-rw-r--r--sysdeps/sparc/sparc64/dl-machine.h172
-rw-r--r--sysdeps/sparc/sparc64/dl-plt.h144
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/powerpc32/____longjmp_chk.S7
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/powerpc32/brk.S7
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S5
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/powerpc32/power7/fpu/Implies1
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S5
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S5
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/powerpc64/power7/fpu/Implies1
-rw-r--r--sysdeps/x86_64/fpu/fegetenv.c1
84 files changed, 7061 insertions, 541 deletions
diff --git a/ChangeLog b/ChangeLog
index cc4855fc27..d5b67e804f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,184 @@
+2010-02-19 Carl Fredrik Hammar <hammy.lite@gmail.com>
+
+ * hurd/hurdioctl.c (tiocsctty): Call `do_tiocsctty' instead of
+ non-existent `tiocsctty_port'.
+
+2010-02-16 H.J. Lu <hongjiu.lu@intel.com>
+
+ * sysdeps/i386/i686/multiarch/memcmp.S (memcmp): Use CPUID_OFFSET
+ instead of FEATURE_OFFSET.
+ * sysdeps/i386/i686/multiarch/strcmp.S (strcmp): Likewise.
+
+ * sysdeps/i386/i686/multiarch/memcmp-sse4.S: Add alignnments.
+ Fix one unwind info problem.
+
+ * sysdeps/i386/i686/multiarch/memcmp-ssse3.S (less1bytes): Add CFI_POP.
+
+ * sysdeps/i386/i686/multiarch/strcmp-sse4.S: Simplify unwind info.
+
+2010-02-17 H.J. Lu <hongjiu.lu@intel.com>
+ Ulrich Drepper <drepper@redhat.com>
+
+ * sysdeps/i386/i686/multiarch/strcmp-ssse3.S: Fix typo in unwind info.
+ Clean up a bit.
+
+2010-02-17 Carl Fredrik Hammar <hammy.lite@gmail.com>
+
+ * hurd/hurdioctl.c (tiocsctty): Only get FD ports, do work in...
+ (tiocsctty_port): ...this new function.
+
+ * hurd/hurd/ioctl.h (_HURD_HANDLE_IOCTLS_1): Cast to
+ `ioctl_handler_t'.
+
+2010-02-15 Ulrich Drepper <drepper@redhat.com>
+
+ * sysdeps/i386/i686/multiarch/memcmp-sse4.S: Fix unwind info.
+ * sysdeps/i386/i686/multiarch/memcmp-ssse3.S: Likewise.
+ * sysdeps/i386/i686/multiarch/strcmp-sse4.S: Likewise.
+ * sysdeps/i386/i686/multiarch/strcmp-ssse3.S: Likewise.
+
+ * sysdeps/i386/i686/multiarch/strcmp-sse4.S: Don't fall through to
+ undefined code.
+
+2010-02-12 H.J. Lu <hongjiu.lu@intel.com>
+
+ * sysdeps/i386/i686/multiarch/Makefile (sysdep_routines): Add
+ strcmp-ssse3, strcmp-sse4, strncmp-c, strncmp-ssse3, strncmp-sse4,
+ memcmp-c, memcmp-ssse3, and memcmp-sse4.
+ * sysdeps/i386/i686/multiarch/memcmp-sse4.S: New file.
+ * sysdeps/i386/i686/multiarch/memcmp-ssse3.S: New file.
+ * sysdeps/i386/i686/multiarch/memcmp.S: New file.
+ * sysdeps/i386/i686/multiarch/strcmp-sse4.S: New file.
+ * sysdeps/i386/i686/multiarch/strcmp-ssse3.S: New file.
+ * sysdeps/i386/i686/multiarch/strcmp.S: New file.
+ * sysdeps/i386/i686/multiarch/strncmp-c.c: New file.
+ * sysdeps/i386/i686/multiarch/strncmp-sse4.S: New file.
+ * sysdeps/i386/i686/multiarch/strncmp-ssse3.S: New file.
+ * sysdeps/i386/i686/multiarch/strncmp.S: New file.
+
+2010-02-12 Luis Machado <luisgpm@br.ibm.com>
+
+ * sysdeps/powerpc/powerpc32/dl-machine.h: Removed old PPC_REL16 check.
+ * sysdeps/powerpc/powerpc32/dl-machine.h: Likewise.
+ * sysdeps/powerpc/powerpc32/elf/start.S: Likewise.
+ * sysdeps/powerpc/powerpc32/memset.S: Likewise.
+ * sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt.S: Likewise.
+ * sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf.S: Likewise.
+ * sysdeps/powerpc/powerpc32/configure.in: Fail if R_PPC_REL16
+ is not supported.
+ * sysdeps/powerpc/powerpc32/fpu/s_round.S: Likewise.
+ * sysdeps/powerpc/powerpc32/fpu/s_truncf.S: Likewise.
+ * sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S: Likewise.
+ * sysdeps/powerpc/powerpc32/fpu/s_floorf.S: Likewise.
+ * sysdeps/powerpc/powerpc32/fpu/s_ceilf.S: Likewise.
+ * sysdeps/powerpc/powerpc32/fpu/s_ceil.S: Likewise.
+ * sysdeps/powerpc/powerpc32/fpu/s_floor.S: Likewise.
+ * sysdeps/powerpc/powerpc32/fpu/s_roundf.S: Likewise.
+ * sysdeps/powerpc/powerpc32/fpu/s_rintf.S: Likewise.
+ * sysdeps/powerpc/powerpc32/fpu/s_trunc.S: Likewise.
+ * sysdeps/powerpc/powerpc32/fpu/setjmp-common.S: Likewise.
+ * sysdeps/powerpc/powerpc32/fpu/s_lround.S: Likewise.
+ * sysdeps/powerpc/powerpc32/fpu/s_rint.S: Likewise.
+ * sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.S: Likewise.
+ * sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S: Likewise.
+ * sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.S: Likewise.
+ * sysdeps/powerpc/powerpc32/dl-start.S: Likewise.
+ * sysdeps/unix/sysv/linux/powerpc/powerpc32/____longjmp_chk.S:
+ Likewise.
+ * sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S:
+ Likewise.
+ * sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S:
+ Likewise.
+ * sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S:
+ Likewise.
+ * sysdeps/unix/sysv/linux/powerpc/powerpc32/brk.S: Likewise.
+
+2010-02-12 Alan Modra <amodra@gmail.com>
+
+ * elf/tls-macros.h [__powerpc__] (__TLS_CALL_CLOBBERS): Remove r3.
+ Define and use for __powerpc64__ too.
+ [__powerpc__] (TLS_LD): Add r3 to clobbers.
+ [__powerpc__] (TLS_GD): Set asm output. Make __result r3 reg.
+ [__powerpc64__] (TLS_GD): Make __result r3 reg.
+ [__powerpc64__] (TLS_IE): Relax output constraint.
+
+2010-02-11 Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
+
+ * sysdeps/s390/s390-64/utf8-utf16-z9.c: Disable hardware
+ instructions cu21 and cu24. Add well-formedness checking
+ parameter and adjust the software implementation.
+ * sysdeps/s390/s390-64/utf16-utf32-z9.c: Likewise.
+
+2010-02-10 Ulrich Drepper <drepper@redhat.com>
+
+ [BZ #11271]
+ * io/ftw.c (ftw_startup): Close descriptor for initial directory
+ after changing back to it.
+
+2010-02-05 David S. Miller <davem@davemloft.net>
+
+ * elf/elf.h (R_SPARC_JMP_IREL, R_SPARC_IRELATIVE): Define.
+ * sysdeps/sparc/sparc32/dl-machine.h (elf_machine_rela): Handle new
+ ifunc relocs.
+ (elf_machine_lazy_rel): Likewise.
+ (sparc_fixup_plt): Pull out to...
+ * sysdeps/sparc/sparc32/dl-plt.h: ...here.
+ * sysdeps/sparc/sparc32/dl-irel.h: New file.
+ * sysdeps/sparc/sparc64/dl-machine.h (elf_machine_rela): Handle new
+ ifunc relocs.
+ (elf_machine_lazy_rel): Likewise.
+ (sparc64_fixup_plt): Pull out to...
+ * sysdeps/sparc/sparc64/dl-plt.h: ...here.
+ * sysdeps/sparc/sparc64/dl-irel.h: New file.
+
+2010-02-09 Maxim Kuvyrkov <maxim@codesourcery.com>
+
+ * elf/elf.h: Define m68k TLS relocations.
+
+2010-02-10 Luis Machado <luisgpm@br.ibm.com>
+
+ * sysdeps/powerpc/powerpc64/power7/Implies: Removed.
+ * sysdeps/powerpc/powerpc64/power7/fpu/Implies: Removed.
+ * sysdeps/powerpc/powerpc32/power7/Implies: Removed.
+ * sysdeps/powerpc/powerpc32/power7/fpu/Implies: Removed.
+ * sysdeps/unix/sysv/linux/powerpc/powerpc64/power7/fpu/Implies: Add
+ 64-bit power7 fpu path.
+ * sysdeps/unix/sysv/linux/powerpc/powerpc32/power7/fpu/Implies: Add
+ 32-bit power7 fpu math.
+
+2010-02-09 Ulrich Drepper <drepper@redhat.com>
+
+ * sysdeps/i386/rshift.S: More compact unwind information.
+
+ * sysdeps/i386/lshift.S: Fix unwind information.
+
+2010-02-08 Luis Machado <luisgpm@br.ibm.com>
+
+ * sysdeps/powerpc/powerpc64/power7/fpu/s_isnan.S: New file.
+ * sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S: New file.
+ * sysdeps/powerpc/powerpc64/power7/fpu/s_isnanf.S: New file.
+ * sysdeps/powerpc/powerpc32/power7/fpu/s_isnanf.S: New file.
+ * sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S: New file.
+ * sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S: New file.
+ * sysdeps/powerpc/powerpc64/power7/fpu/s_isinff.S: New file.
+ * sysdeps/powerpc/powerpc32/power7/fpu/s_isinff.S: New file.
+ * sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S: New file.
+ * sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S: New file.
+ * sysdeps/powerpc/powerpc64/power7/fpu/s_finitef.S: New file.
+ * sysdeps/powerpc/powerpc32/power7/fpu/s_finitef.S: New file.
+
+2010-02-08 Andreas Schwab <schwab@redhat.com>
+
+ * include/fenv.h: Add hidden proto for fegetenv.
+ * math/fegetenv.c: Add hidden alias.
+ * sysdeps/i386/fpu/fegetenv.c: Likewise.
+ * sysdeps/ia64/fpu/fegetenv.c: Likewise.
+ * sysdeps/powerpc/fpu/fegetenv.c: Likewise.
+ * sysdeps/sh/sh4/fpu/fegetenv.c: Likewise.
+ * sysdeps/sparc/fpu/fegetenv.c: Likewise.
+ * sysdeps/x86_64/fpu/fegetenv.c: Likewise
+ * sysdeps/s390/fpu/fegetenv.c: Likewise. Remove unused headers.
+
2010-02-12 H.J. Lu <hongjiu.lu@intel.com>
* sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S: Use unsigned
diff --git a/README b/README
index 322b4dac4a..7c27e3652a 100644
--- a/README
+++ b/README
@@ -14,20 +14,23 @@ In GNU/Hurd systems, it works with a microkernel and Hurd servers.
The GNU C Library implements much of the POSIX.1 functionality in the
GNU/Hurd system, using configurations i[34567]86-*-gnu.
-When working with Linux kernels, the GNU C Library version 2.4 is
-intended primarily for use with Linux kernel version 2.6.0 and later.
-We only support using the NPTL implementation of pthreads, which is now
-the default configuration. Most of the C library will continue to work
-on older Linux kernels and many programs will not require a 2.6 kernel
-to run correctly. However, pthreads and related functionality will not
-work at all on old kernels and we do not recommend using glibc 2.4 with
-any Linux kernel prior to 2.6.
+When working with Linux kernels, the GNU C Library version from
+version 2.4 on is intended primarily for use with Linux kernel version
+2.6.0 and later. We only support using the NPTL implementation of
+pthreads, which is now the default configuration. Most of the C
+library will continue to work on older Linux kernels and many programs
+will not require a 2.6 kernel to run correctly. However, pthreads and
+related functionality will not work at all on old kernels and we do
+not recommend using glibc 2.4 with any Linux kernel prior to 2.6.
All Linux kernel versions prior to 2.6.16 are known to have some bugs that
may cause some of the tests related to pthreads in "make check" to fail.
If you see such problems, please try the test suite on the most recent
Linux kernel version that you can use, before pursuing those bugs further.
+Also note that the shared version of the libgcc_s library must be
+installed for the pthread library to work correctly.
+
The old LinuxThreads add-on implementation of pthreads for older Linux
kernels is no longer supported, and we are not distributing it with this
release. Someone has volunteered to revive its maintenance unofficially
@@ -48,7 +51,6 @@ The GNU C Library supports these configurations for using Linux kernels:
sparc*-*-linux-gnu
sparc64*-*-linux-gnu
- alpha*-*-linux-gnu Requires Linux 2.6.9 for NPTL
sh[34]-*-linux-gnu Requires Linux 2.6.11
The code for other CPU configurations supported by volunteers outside of
@@ -57,6 +59,7 @@ add-on. You can find glibc-ports-VERSION distributed separately in the
same place where you got the main glibc distribution files.
Currently these configurations are known to work using the `ports' add-on:
+ alpha*-*-linux-gnu Requires Linux 2.6.9 for NPTL
arm-*-linux-gnu Requires Linux 2.6.15 for NPTL, no SMP support
arm-*-linux-gnueabi Requires Linux 2.6.16-rc1 for NPTL, no SMP
mips-*-linux-gnu Requires Linux 2.6.12 for NPTL
diff --git a/elf/elf.h b/elf/elf.h
index 8af7c177ce..204a0f9e19 100644
--- a/elf/elf.h
+++ b/elf/elf.h
@@ -1123,8 +1123,29 @@ typedef struct
#define R_68K_GLOB_DAT 20 /* Create GOT entry */
#define R_68K_JMP_SLOT 21 /* Create PLT entry */
#define R_68K_RELATIVE 22 /* Adjust by program base */
+#define R_68K_TLS_GD32 25 /* 32 bit GOT offset for GD */
+#define R_68K_TLS_GD16 26 /* 16 bit GOT offset for GD */
+#define R_68K_TLS_GD8 27 /* 8 bit GOT offset for GD */
+#define R_68K_TLS_LDM32 28 /* 32 bit GOT offset for LDM */
+#define R_68K_TLS_LDM16 29 /* 16 bit GOT offset for LDM */
+#define R_68K_TLS_LDM8 30 /* 8 bit GOT offset for LDM */
+#define R_68K_TLS_LDO32 31 /* 32 bit module-relative offset */
+#define R_68K_TLS_LDO16 32 /* 16 bit module-relative offset */
+#define R_68K_TLS_LDO8 33 /* 8 bit module-relative offset */
+#define R_68K_TLS_IE32 34 /* 32 bit GOT offset for IE */
+#define R_68K_TLS_IE16 35 /* 16 bit GOT offset for IE */
+#define R_68K_TLS_IE8 36 /* 8 bit GOT offset for IE */
+#define R_68K_TLS_LE32 37 /* 32 bit offset relative to
+ static TLS block */
+#define R_68K_TLS_LE16 38 /* 16 bit offset relative to
+ static TLS block */
+#define R_68K_TLS_LE8 39 /* 8 bit offset relative to
+ static TLS block */
+#define R_68K_TLS_DTPMOD32 40 /* 32 bit module number */
+#define R_68K_TLS_DTPREL32 41 /* 32 bit module-relative offset */
+#define R_68K_TLS_TPREL32 42 /* 32 bit TP-relative offset */
/* Keep this the last entry. */
-#define R_68K_NUM 23
+#define R_68K_NUM 43
/* Intel 80386 specific definitions. */
@@ -1303,6 +1324,8 @@ typedef struct
#define R_SPARC_H34 85
#define R_SPARC_SIZE32 86
#define R_SPARC_SIZE64 87
+#define R_SPARC_JMP_IREL 248
+#define R_SPARC_IRELATIVE 249
#define R_SPARC_GNU_VTINHERIT 250
#define R_SPARC_GNU_VTENTRY 251
#define R_SPARC_REV32 252
diff --git a/elf/tls-macros.h b/elf/tls-macros.h
index 6463a6c3f9..781256db1e 100644
--- a/elf/tls-macros.h
+++ b/elf/tls-macros.h
@@ -701,154 +701,146 @@ register void *__gp __asm__("$29");
(int *) (__builtin_thread_pointer() + __offset); })
# endif
-#elif defined __powerpc__ && !defined __powerpc64__
+#elif defined __powerpc__
-#include "config.h"
-
-# define __TLS_CALL_CLOBBERS \
- "0", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", \
+# define __TLS_CALL_CLOBBERS \
+ "0", "4", "5", "6", "7", "8", "9", "10", "11", "12", \
"lr", "ctr", "cr0", "cr1", "cr5", "cr6", "cr7"
+# ifndef __powerpc64__
+
+# include "config.h"
+
/* PowerPC32 Local Exec TLS access. */
-# define TLS_LE(x) \
- ({ int *__result; \
- asm ("addi %0,2," #x "@tprel" \
- : "=r" (__result)); \
+# define TLS_LE(x) \
+ ({ int *__result; \
+ asm ("addi %0,2," #x "@tprel" \
+ : "=r" (__result)); \
__result; })
/* PowerPC32 Initial Exec TLS access. */
-# ifdef HAVE_ASM_PPC_REL16
-# define TLS_IE(x) \
- ({ int *__result; \
- asm ("bcl 20,31,1f\n1:\t" \
- "mflr %0\n\t" \
- "addis %0,%0,_GLOBAL_OFFSET_TABLE_-1b@ha\n\t" \
- "addi %0,%0,_GLOBAL_OFFSET_TABLE_-1b@l\n\t" \
- "lwz %0," #x "@got@tprel(%0)\n\t" \
- "add %0,%0," #x "@tls" \
- : "=b" (__result) : \
- : "lr"); \
+# ifdef HAVE_ASM_PPC_REL16
+# define TLS_IE(x) \
+ ({ int *__result; \
+ asm ("bcl 20,31,1f\n1:\t" \
+ "mflr %0\n\t" \
+ "addis %0,%0,_GLOBAL_OFFSET_TABLE_-1b@ha\n\t" \
+ "addi %0,%0,_GLOBAL_OFFSET_TABLE_-1b@l\n\t" \
+ "lwz %0," #x "@got@tprel(%0)\n\t" \
+ "add %0,%0," #x "@tls" \
+ : "=b" (__result) : \
+ : "lr"); \
__result; })
-# else
-# define TLS_IE(x) \
- ({ int *__result; \
- asm ("bl _GLOBAL_OFFSET_TABLE_@local-4\n\t" \
- "mflr %0\n\t" \
- "lwz %0," #x "@got@tprel(%0)\n\t" \
- "add %0,%0," #x "@tls" \
- : "=b" (__result) : \
- : "lr"); \
+# else
+# define TLS_IE(x) \
+ ({ int *__result; \
+ asm ("bl _GLOBAL_OFFSET_TABLE_@local-4\n\t" \
+ "mflr %0\n\t" \
+ "lwz %0," #x "@got@tprel(%0)\n\t" \
+ "add %0,%0," #x "@tls" \
+ : "=b" (__result) : \
+ : "lr"); \
__result; })
-# endif
+# endif
/* PowerPC32 Local Dynamic TLS access. */
-# ifdef HAVE_ASM_PPC_REL16
-# define TLS_LD(x) \
- ({ int *__result; \
- asm ("bcl 20,31,1f\n1:\t" \
- "mflr 3\n\t" \
- "addis 3,3,_GLOBAL_OFFSET_TABLE_-1b@ha\n\t" \
- "addi 3,3,_GLOBAL_OFFSET_TABLE_-1b@l\n\t" \
- "addi 3,3," #x "@got@tlsld\n\t" \
- "bl __tls_get_addr@plt\n\t" \
- "addi %0,3," #x "@dtprel" \
- : "=r" (__result) : \
- : __TLS_CALL_CLOBBERS); \
+# ifdef HAVE_ASM_PPC_REL16
+# define TLS_LD(x) \
+ ({ int *__result; \
+ asm ("bcl 20,31,1f\n1:\t" \
+ "mflr 3\n\t" \
+ "addis 3,3,_GLOBAL_OFFSET_TABLE_-1b@ha\n\t" \
+ "addi 3,3,_GLOBAL_OFFSET_TABLE_-1b@l\n\t" \
+ "addi 3,3," #x "@got@tlsld\n\t" \
+ "bl __tls_get_addr@plt\n\t" \
+ "addi %0,3," #x "@dtprel" \
+ : "=r" (__result) : \
+ : "3", __TLS_CALL_CLOBBERS); \
__result; })
-# else
-# define TLS_LD(x) \
- ({ int *__result; \
- asm ("bl _GLOBAL_OFFSET_TABLE_@local-4\n\t" \
- "mflr 3\n\t" \
- "addi 3,3," #x "@got@tlsld\n\t" \
- "bl __tls_get_addr@plt\n\t" \
- "addi %0,3," #x "@dtprel" \
- : "=r" (__result) : \
- : __TLS_CALL_CLOBBERS); \
+# else
+# define TLS_LD(x) \
+ ({ int *__result; \
+ asm ("bl _GLOBAL_OFFSET_TABLE_@local-4\n\t" \
+ "mflr 3\n\t" \
+ "addi 3,3," #x "@got@tlsld\n\t" \
+ "bl __tls_get_addr@plt\n\t" \
+ "addi %0,3," #x "@dtprel" \
+ : "=r" (__result) : \
+ : "3", __TLS_CALL_CLOBBERS); \
__result; })
-# endif
+# endif
/* PowerPC32 General Dynamic TLS access. */
-# ifdef HAVE_ASM_PPC_REL16
-# define TLS_GD(x) \
- ({ register int *__result __asm__ ("r3"); \
- asm ("bcl 20,31,1f\n1:\t" \
- "mflr 3\n\t" \
- "addis 3,3,_GLOBAL_OFFSET_TABLE_-1b@ha\n\t" \
- "addi 3,3,_GLOBAL_OFFSET_TABLE_-1b@l\n\t" \
- "addi 3,3," #x "@got@tlsgd\n\t" \
- "bl __tls_get_addr@plt" \
- : : \
- : __TLS_CALL_CLOBBERS); \
+# ifdef HAVE_ASM_PPC_REL16
+# define TLS_GD(x) \
+ ({ register int *__result __asm__ ("r3"); \
+ asm ("bcl 20,31,1f\n1:\t" \
+ "mflr 3\n\t" \
+ "addis 3,3,_GLOBAL_OFFSET_TABLE_-1b@ha\n\t" \
+ "addi 3,3,_GLOBAL_OFFSET_TABLE_-1b@l\n\t" \
+ "addi 3,3," #x "@got@tlsgd\n\t" \
+ "bl __tls_get_addr@plt" \
+ : "=r" (__result) : \
+ : __TLS_CALL_CLOBBERS); \
__result; })
-# else
-# define TLS_GD(x) \
- ({ register int *__result __asm__ ("r3"); \
- asm ("bl _GLOBAL_OFFSET_TABLE_@local-4\n\t" \
- "mflr 3\n\t" \
- "addi 3,3," #x "@got@tlsgd\n\t" \
- "bl __tls_get_addr@plt" \
- : : \
- : __TLS_CALL_CLOBBERS); \
+# else
+# define TLS_GD(x) \
+ ({ register int *__result __asm__ ("r3"); \
+ asm ("bl _GLOBAL_OFFSET_TABLE_@local-4\n\t" \
+ "mflr 3\n\t" \
+ "addi 3,3," #x "@got@tlsgd\n\t" \
+ "bl __tls_get_addr@plt" \
+ : "=r" (__result) : \
+ : __TLS_CALL_CLOBBERS); \
__result; })
-# endif
+# endif
-#elif defined __powerpc__ && defined __powerpc64__
+# else
/* PowerPC64 Local Exec TLS access. */
-# define TLS_LE(x) \
- ({ int * __result; \
- asm ( \
- " addis %0,13," #x "@tprel@ha\n" \
- " addi %0,%0," #x "@tprel@l\n" \
- : "=b" (__result) ); \
- __result; \
+# define TLS_LE(x) \
+ ({ int * __result; \
+ asm ("addis %0,13," #x "@tprel@ha\n\t" \
+ "addi %0,%0," #x "@tprel@l" \
+ : "=b" (__result) ); \
+ __result; \
})
/* PowerPC64 Initial Exec TLS access. */
-# define TLS_IE(x) \
- ({ int * __result; \
- asm ( \
- " ld %0," #x "@got@tprel(2)\n" \
- " add %0,%0," #x "@tls\n" \
- : "=b" (__result) ); \
- __result; \
+# define TLS_IE(x) \
+ ({ int * __result; \
+ asm ("ld %0," #x "@got@tprel(2)\n\t" \
+ "add %0,%0," #x "@tls" \
+ : "=r" (__result) ); \
+ __result; \
})
-# ifdef HAVE_ASM_GLOBAL_DOT_NAME
-# define __TLS_GET_ADDR ".__tls_get_addr"
-# else
-# define __TLS_GET_ADDR "__tls_get_addr"
-# endif
+# ifdef HAVE_ASM_GLOBAL_DOT_NAME
+# define __TLS_GET_ADDR ".__tls_get_addr"
+# else
+# define __TLS_GET_ADDR "__tls_get_addr"
+# endif
/* PowerPC64 Local Dynamic TLS access. */
-# define TLS_LD(x) \
- ({ int * __result; \
- asm ( \
- " addi 3,2," #x "@got@tlsld\n" \
- " bl " __TLS_GET_ADDR "\n" \
- " nop \n" \
- " addis %0,3," #x "@dtprel@ha\n" \
- " addi %0,%0," #x "@dtprel@l\n" \
- : "=b" (__result) : \
- : "0", "3", "4", "5", "6", "7", \
- "8", "9", "10", "11", "12", \
- "lr", "ctr", \
- "cr0", "cr1", "cr5", "cr6", "cr7"); \
- __result; \
+# define TLS_LD(x) \
+ ({ int * __result; \
+ asm ("addi 3,2," #x "@got@tlsld\n\t" \
+ "bl " __TLS_GET_ADDR "\n\t" \
+ "nop \n\t" \
+ "addis %0,3," #x "@dtprel@ha\n\t" \
+ "addi %0,%0," #x "@dtprel@l" \
+ : "=b" (__result) : \
+ : "3", __TLS_CALL_CLOBBERS); \
+ __result; \
})
/* PowerPC64 General Dynamic TLS access. */
-# define TLS_GD(x) \
- ({ int * __result; \
- asm ( \
- " addi 3,2," #x "@got@tlsgd\n" \
- " bl " __TLS_GET_ADDR "\n" \
- " nop \n" \
- " mr %0,3\n" \
- : "=b" (__result) : \
- : "0", "3", "4", "5", "6", "7", \
- "8", "9", "10", "11", "12", \
- "lr", "ctr", \
- "cr0", "cr1", "cr5", "cr6", "cr7"); \
- __result; \
+# define TLS_GD(x) \
+ ({ register int *__result __asm__ ("r3"); \
+ asm ("addi 3,2," #x "@got@tlsgd\n\t" \
+ "bl " __TLS_GET_ADDR "\n\t" \
+ "nop " \
+ : "=r" (__result) : \
+ : __TLS_CALL_CLOBBERS); \
+ __result; \
})
+# endif
#elif !defined TLS_LE || !defined TLS_IE \
|| !defined TLS_LD || !defined TLS_GD
diff --git a/hurd/hurd/ioctl.h b/hurd/hurd/ioctl.h
index ee156f02f9..e5ab3dc965 100644
--- a/hurd/hurd/ioctl.h
+++ b/hurd/hurd/ioctl.h
@@ -57,7 +57,7 @@ extern int hurd_register_ioctl_handler (int first_request, int last_request,
static const struct ioctl_handler handler##_ioctl_handler##moniker \
__attribute__ ((__unused__)) = \
{ _IOC_NOTYPE (first), _IOC_NOTYPE (last), \
- (int (*) (int, int, void *)) (handler), NULL }; \
+ (ioctl_handler_t) (handler), NULL }; \
text_set_element (_hurd_ioctl_handler_lists, \
handler##_ioctl_handler##moniker)
#define _HURD_HANDLE_IOCTLS(handler, first, last) \
diff --git a/hurd/hurdioctl.c b/hurd/hurdioctl.c
index 7c689841ca..04d98629ef 100644
--- a/hurd/hurdioctl.c
+++ b/hurd/hurdioctl.c
@@ -1,5 +1,5 @@
/* ioctl commands which must be done in the C library.
- Copyright (C) 1994,95,96,97,99,2001,2002,2009
+ Copyright (C) 1994,95,96,97,99,2001,2002,2009,2010
Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -239,34 +239,40 @@ _hurd_setcttyid (mach_port_t cttyid)
}
-/* Make FD be the controlling terminal.
- This function is called for `ioctl (fd, TCIOSCTTY)'. */
-
-static int
-tiocsctty (int fd,
- int request) /* Always TIOCSCTTY. */
+static inline error_t
+do_tiocsctty (io_t port, io_t ctty)
{
mach_port_t cttyid;
error_t err;
- /* Get FD's cttyid port, unless it is already ours. */
- err = HURD_DPORT_USE (fd, ctty != MACH_PORT_NULL ? EADDRINUSE :
- __term_getctty (port, &cttyid));
- if (err == EADDRINUSE)
- /* FD is already the ctty. Nothing to do. */
+ if (ctty != MACH_PORT_NULL)
+ /* PORT is already the ctty. Nothing to do. */
return 0;
- else if (err)
- return __hurd_fail (err);
+
+ /* Get PORT's cttyid port. */
+ err = __term_getctty (port, &cttyid);
+ if (err)
+ return err;
/* Change the terminal's pgrp to ours. */
- err = HURD_DPORT_USE (fd, __tioctl_tiocspgrp (port, _hurd_pgrp));
+ err = __tioctl_tiocspgrp (port, _hurd_pgrp);
if (err)
- return __hurd_fail (err);
+ __mach_port_deallocate (__mach_task_self (), cttyid);
+ else
+ /* Make it our own. */
+ install_ctty (cttyid);
- /* Make it our own. */
- install_ctty (cttyid);
+ return err;
+}
- return 0;
+/* Make FD be the controlling terminal.
+ This function is called for `ioctl (fd, TCIOSCTTY)'. */
+
+static int
+tiocsctty (int fd,
+ int request) /* Always TIOCSCTTY. */
+{
+ return __hurd_fail (HURD_DPORT_USE (fd, do_tiocsctty (port, ctty)));
}
_HURD_HANDLE_IOCTL (tiocsctty, TIOCSCTTY);
diff --git a/include/fenv.h b/include/fenv.h
index 3aec7e52bb..254162d45c 100644
--- a/include/fenv.h
+++ b/include/fenv.h
@@ -13,6 +13,7 @@ extern int __fesetenv (__const fenv_t *__envp);
extern int __feupdateenv (__const fenv_t *__envp);
libm_hidden_proto (feraiseexcept)
+libm_hidden_proto (fegetenv)
libm_hidden_proto (fesetenv)
libm_hidden_proto (fesetround)
libm_hidden_proto (feholdexcept)
diff --git a/io/ftw.c b/io/ftw.c
index 9cc09077ed..bb7dba8ca8 100644
--- a/io/ftw.c
+++ b/io/ftw.c
@@ -1,5 +1,5 @@
/* File tree walker functions.
- Copyright (C) 1996-2004, 2006, 2007, 2008 Free Software Foundation, Inc.
+ Copyright (C) 1996-2004, 2006-2008, 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
@@ -790,6 +790,7 @@ ftw_startup (const char *dir, int is_nftw, void *func, int descriptors,
{
int save_err = errno;
__fchdir (cwdfd);
+ close_not_cancel_no_status (cwdfd);
__set_errno (save_err);
}
else if (cwd != NULL)
diff --git a/math/fegetenv.c b/math/fegetenv.c
index 4a878cc41b..5b524307db 100644
--- a/math/fegetenv.c
+++ b/math/fegetenv.c
@@ -32,6 +32,7 @@ __fegetenv (fenv_t *envp)
strong_alias (__fegetenv, __old_fegetenv)
compat_symbol (libm, BP_SYM (__old_fegetenv), BP_SYM (fegetenv), GLIBC_2_1);
#endif
+libm_hidden_ver (__fegetenv, fegetenv)
versioned_symbol (libm, BP_SYM (__fegetenv), BP_SYM (fegetenv), GLIBC_2_2);
stub_warning (fegetenv)
diff --git a/sysdeps/i386/fpu/fegetenv.c b/sysdeps/i386/fpu/fegetenv.c
index fb955cf565..ddb67e5d83 100644
--- a/sysdeps/i386/fpu/fegetenv.c
+++ b/sysdeps/i386/fpu/fegetenv.c
@@ -40,4 +40,5 @@ strong_alias (__fegetenv, __old_fegetenv)
compat_symbol (libm, BP_SYM (__old_fegetenv), BP_SYM (fegetenv), GLIBC_2_1);
#endif
+libm_hidden_ver (__fegetenv, fegetenv)
versioned_symbol (libm, BP_SYM (__fegetenv), BP_SYM (fegetenv), GLIBC_2_2);
diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile
index fbad9ae734..e8847d6fc4 100644
--- a/sysdeps/i386/i686/multiarch/Makefile
+++ b/sysdeps/i386/i686/multiarch/Makefile
@@ -7,7 +7,9 @@ ifeq ($(subdir),string)
sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
memmove-ssse3 memcpy-ssse3-rep mempcpy-ssse3-rep \
memmove-ssse3-rep bcopy-ssse3 bcopy-ssse3-rep \
- memset-sse2-rep bzero-sse2-rep
+ memset-sse2-rep bzero-sse2-rep strcmp-ssse3 \
+ strcmp-sse4 strncmp-c strncmp-ssse3 strncmp-sse4 \
+ memcmp-ssse3 memcmp-sse4
ifeq (yes,$(config-cflags-sse4))
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
CFLAGS-strcspn-c.c += -msse4
diff --git a/sysdeps/i386/i686/multiarch/memcmp-sse4.S b/sysdeps/i386/i686/multiarch/memcmp-sse4.S
new file mode 100644
index 0000000000..b1ed778f1f
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/memcmp-sse4.S
@@ -0,0 +1,1004 @@
+/* memcmp with SSE4.2
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef NOT_IN_libc
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#ifndef MEMCMP
+# define MEMCMP __memcmp_sse4_2
+#endif
+
+#define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+#define PUSH(REG) pushl REG; CFI_PUSH (REG)
+#define POP(REG) popl REG; CFI_POP (REG)
+
+#define PARMS 4
+#define BLK1 PARMS
+#define BLK2 BLK1+4
+#define LEN BLK2+4
+#define RETURN POP (%ebx); ret; CFI_PUSH (%ebx)
+
+
+#ifdef SHARED
+# define JMPTBL(I, B) I - B
+
+/* Load an entry in a jump table into EBX and branch to it. TABLE is a
+ jump table with relative offsets. INDEX is a register contains the
+ index into the jump table. SCALE is the scale of INDEX. */
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
+ /* We first load PC into EBX. */ \
+ call __i686.get_pc_thunk.bx; \
+ /* Get the address of the jump table. */ \
+ addl $(TABLE - .), %ebx; \
+ /* Get the entry and convert the relative offset to the \
+ absolute address. */ \
+ addl (%ebx,INDEX,SCALE), %ebx; \
+ /* We loaded the jump table and adjuested EDX/ESI. Go. */ \
+ jmp *%ebx
+
+ .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
+ .globl __i686.get_pc_thunk.bx
+ .hidden __i686.get_pc_thunk.bx
+ ALIGN (4)
+ .type __i686.get_pc_thunk.bx,@function
+__i686.get_pc_thunk.bx:
+ movl (%esp), %ebx
+ ret
+#else
+# define JMPTBL(I, B) I
+
+/* Load an entry in a jump table into EBX and branch to it. TABLE is a
+ jump table with relative offsets. INDEX is a register contains the
+ index into the jump table. SCALE is the scale of INDEX. */
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
+ jmp *TABLE(,INDEX,SCALE)
+#endif
+
+ .section .text.sse4.2,"ax",@progbits
+ENTRY (MEMCMP)
+ movl BLK1(%esp), %eax
+ movl BLK2(%esp), %edx
+ movl LEN(%esp), %ecx
+ cmp $1, %ecx
+ jbe L(less1bytes)
+ pxor %xmm0, %xmm0
+ cmp $64, %ecx
+ ja L(64bytesormore)
+ cmp $8, %ecx
+ PUSH (%ebx)
+ jb L(less8bytes)
+ add %ecx, %edx
+ add %ecx, %eax
+ BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
+
+ ALIGN (4)
+L(less8bytes):
+ mov (%eax), %bl
+ cmpb (%edx), %bl
+ jne L(nonzero)
+
+ mov 1(%eax), %bl
+ cmpb 1(%edx), %bl
+ jne L(nonzero)
+
+ cmp $2, %ecx
+ jz L(0bytes)
+
+ mov 2(%eax), %bl
+ cmpb 2(%edx), %bl
+ jne L(nonzero)
+
+ cmp $3, %ecx
+ jz L(0bytes)
+
+ mov 3(%eax), %bl
+ cmpb 3(%edx), %bl
+ jne L(nonzero)
+
+ cmp $4, %ecx
+ jz L(0bytes)
+
+ mov 4(%eax), %bl
+ cmpb 4(%edx), %bl
+ jne L(nonzero)
+
+ cmp $5, %ecx
+ jz L(0bytes)
+
+ mov 5(%eax), %bl
+ cmpb 5(%edx), %bl
+ jne L(nonzero)
+
+ cmp $6, %ecx
+ jz L(0bytes)
+
+ mov 6(%eax), %bl
+ cmpb 6(%edx), %bl
+ je L(0bytes)
+L(nonzero):
+ POP (%ebx)
+ mov $1, %eax
+ ja L(above)
+ neg %eax
+L(above):
+ ret
+ CFI_PUSH (%ebx)
+
+ ALIGN (4)
+L(0bytes):
+ POP (%ebx)
+ xor %eax, %eax
+ ret
+
+ ALIGN (4)
+L(less1bytes):
+ jb L(0bytesend)
+ movzbl (%eax), %eax
+ movzbl (%edx), %edx
+ sub %edx, %eax
+ ret
+
+ ALIGN (4)
+L(0bytesend):
+ xor %eax, %eax
+ ret
+
+ ALIGN (4)
+L(64bytesormore):
+ PUSH (%ebx)
+ mov %ecx, %ebx
+ mov $64, %ecx
+ sub $64, %ebx
+L(64bytesormore_loop):
+ movdqu (%eax), %xmm1
+ movdqu (%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(find_16diff)
+
+ movdqu 16(%eax), %xmm1
+ movdqu 16(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(find_32diff)
+
+ movdqu 32(%eax), %xmm1
+ movdqu 32(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(find_48diff)
+
+ movdqu 48(%eax), %xmm1
+ movdqu 48(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(find_64diff)
+ add %ecx, %eax
+ add %ecx, %edx
+ sub %ecx, %ebx
+ jae L(64bytesormore_loop)
+ add %ebx, %ecx
+ add %ecx, %edx
+ add %ecx, %eax
+ BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
+
+ ALIGN (4)
+L(find_16diff):
+ sub $16, %ecx
+L(find_32diff):
+ sub $16, %ecx
+L(find_48diff):
+ sub $16, %ecx
+L(find_64diff):
+ add %ecx, %edx
+ add %ecx, %eax
+ jmp L(16bytes)
+
+ ALIGN (4)
+L(16bytes):
+ mov -16(%eax), %ecx
+ mov -16(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(12bytes):
+ mov -12(%eax), %ecx
+ mov -12(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(8bytes):
+ mov -8(%eax), %ecx
+ mov -8(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(4bytes):
+ mov -4(%eax), %ecx
+ mov -4(%edx), %ebx
+ cmp %ebx, %ecx
+ mov $0, %eax
+ jne L(find_diff)
+ RETURN
+
+ ALIGN (4)
+L(49bytes):
+ movdqu -49(%eax), %xmm1
+ movdqu -49(%edx), %xmm2
+ mov $-49, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(33bytes):
+ movdqu -33(%eax), %xmm1
+ movdqu -33(%edx), %xmm2
+ mov $-33, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(17bytes):
+ mov -17(%eax), %ecx
+ mov -17(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(13bytes):
+ mov -13(%eax), %ecx
+ mov -13(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(9bytes):
+ mov -9(%eax), %ecx
+ mov -9(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(5bytes):
+ mov -5(%eax), %ecx
+ mov -5(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movzbl -1(%eax), %ecx
+ cmp -1(%edx), %cl
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ ALIGN (4)
+L(50bytes):
+ mov $-50, %ebx
+ movdqu -50(%eax), %xmm1
+ movdqu -50(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(34bytes):
+ mov $-34, %ebx
+ movdqu -34(%eax), %xmm1
+ movdqu -34(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(18bytes):
+ mov -18(%eax), %ecx
+ mov -18(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(14bytes):
+ mov -14(%eax), %ecx
+ mov -14(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(10bytes):
+ mov -10(%eax), %ecx
+ mov -10(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(6bytes):
+ mov -6(%eax), %ecx
+ mov -6(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(2bytes):
+ movzwl -2(%eax), %ecx
+ movzwl -2(%edx), %ebx
+ cmp %bl, %cl
+ jne L(end)
+ cmp %bh, %ch
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ ALIGN (4)
+L(51bytes):
+ mov $-51, %ebx
+ movdqu -51(%eax), %xmm1
+ movdqu -51(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(35bytes):
+ mov $-35, %ebx
+ movdqu -35(%eax), %xmm1
+ movdqu -35(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(19bytes):
+ movl -19(%eax), %ecx
+ movl -19(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(15bytes):
+ movl -15(%eax), %ecx
+ movl -15(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(11bytes):
+ movl -11(%eax), %ecx
+ movl -11(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(7bytes):
+ movl -7(%eax), %ecx
+ movl -7(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(3bytes):
+ movzwl -3(%eax), %ecx
+ movzwl -3(%edx), %ebx
+ cmpb %bl, %cl
+ jne L(end)
+ cmp %bx, %cx
+ jne L(end)
+L(1bytes):
+ movzbl -1(%eax), %eax
+ cmpb -1(%edx), %al
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ ALIGN (4)
+L(52bytes):
+ movdqu -52(%eax), %xmm1
+ movdqu -52(%edx), %xmm2
+ mov $-52, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(36bytes):
+ movdqu -36(%eax), %xmm1
+ movdqu -36(%edx), %xmm2
+ mov $-36, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(20bytes):
+ movdqu -20(%eax), %xmm1
+ movdqu -20(%edx), %xmm2
+ mov $-20, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+ mov -4(%eax), %ecx
+ mov -4(%edx), %ebx
+ cmp %ebx, %ecx
+ mov $0, %eax
+ jne L(find_diff)
+ RETURN
+
+ ALIGN (4)
+L(53bytes):
+ movdqu -53(%eax), %xmm1
+ movdqu -53(%edx), %xmm2
+ mov $-53, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(37bytes):
+ mov $-37, %ebx
+ movdqu -37(%eax), %xmm1
+ movdqu -37(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(21bytes):
+ mov $-21, %ebx
+ movdqu -21(%eax), %xmm1
+ movdqu -21(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+ mov -5(%eax), %ecx
+ mov -5(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movzbl -1(%eax), %ecx
+ cmp -1(%edx), %cl
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ ALIGN (4)
+L(54bytes):
+ movdqu -54(%eax), %xmm1
+ movdqu -54(%edx), %xmm2
+ mov $-54, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(38bytes):
+ mov $-38, %ebx
+ movdqu -38(%eax), %xmm1
+ movdqu -38(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(22bytes):
+ mov $-22, %ebx
+ movdqu -22(%eax), %xmm1
+ movdqu -22(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+
+ mov -6(%eax), %ecx
+ mov -6(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movzwl -2(%eax), %ecx
+ movzwl -2(%edx), %ebx
+ cmp %bl, %cl
+ jne L(end)
+ cmp %bh, %ch
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ ALIGN (4)
+L(55bytes):
+ movdqu -55(%eax), %xmm1
+ movdqu -55(%edx), %xmm2
+ mov $-55, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(39bytes):
+ mov $-39, %ebx
+ movdqu -39(%eax), %xmm1
+ movdqu -39(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(23bytes):
+ mov $-23, %ebx
+ movdqu -23(%eax), %xmm1
+ movdqu -23(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+ movl -7(%eax), %ecx
+ movl -7(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movzwl -3(%eax), %ecx
+ movzwl -3(%edx), %ebx
+ cmpb %bl, %cl
+ jne L(end)
+ cmp %bx, %cx
+ jne L(end)
+ movzbl -1(%eax), %eax
+ cmpb -1(%edx), %al
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ ALIGN (4)
+L(56bytes):
+ movdqu -56(%eax), %xmm1
+ movdqu -56(%edx), %xmm2
+ mov $-56, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(40bytes):
+ mov $-40, %ebx
+ movdqu -40(%eax), %xmm1
+ movdqu -40(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(24bytes):
+ mov $-24, %ebx
+ movdqu -24(%eax), %xmm1
+ movdqu -24(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+
+ mov -8(%eax), %ecx
+ mov -8(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+
+ mov -4(%eax), %ecx
+ mov -4(%edx), %ebx
+ cmp %ebx, %ecx
+ mov $0, %eax
+ jne L(find_diff)
+ RETURN
+
+ ALIGN (4)
+L(57bytes):
+ movdqu -57(%eax), %xmm1
+ movdqu -57(%edx), %xmm2
+ mov $-57, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(41bytes):
+ mov $-41, %ebx
+ movdqu -41(%eax), %xmm1
+ movdqu -41(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(25bytes):
+ mov $-25, %ebx
+ movdqu -25(%eax), %xmm1
+ movdqu -25(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+ mov -9(%eax), %ecx
+ mov -9(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ mov -5(%eax), %ecx
+ mov -5(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movzbl -1(%eax), %ecx
+ cmp -1(%edx), %cl
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ ALIGN (4)
+L(58bytes):
+ movdqu -58(%eax), %xmm1
+ movdqu -58(%edx), %xmm2
+ mov $-58, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(42bytes):
+ mov $-42, %ebx
+ movdqu -42(%eax), %xmm1
+ movdqu -42(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(26bytes):
+ mov $-26, %ebx
+ movdqu -26(%eax), %xmm1
+ movdqu -26(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+
+ mov -10(%eax), %ecx
+ mov -10(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+
+ mov -6(%eax), %ecx
+ mov -6(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+
+ movzwl -2(%eax), %ecx
+ movzwl -2(%edx), %ebx
+ cmp %bl, %cl
+ jne L(end)
+ cmp %bh, %ch
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ ALIGN (4)
+L(59bytes):
+ movdqu -59(%eax), %xmm1
+ movdqu -59(%edx), %xmm2
+ mov $-59, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(43bytes):
+ mov $-43, %ebx
+ movdqu -43(%eax), %xmm1
+ movdqu -43(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(27bytes):
+ mov $-27, %ebx
+ movdqu -27(%eax), %xmm1
+ movdqu -27(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+ movl -11(%eax), %ecx
+ movl -11(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movl -7(%eax), %ecx
+ movl -7(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movzwl -3(%eax), %ecx
+ movzwl -3(%edx), %ebx
+ cmpb %bl, %cl
+ jne L(end)
+ cmp %bx, %cx
+ jne L(end)
+ movzbl -1(%eax), %eax
+ cmpb -1(%edx), %al
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ ALIGN (4)
+L(60bytes):
+ movdqu -60(%eax), %xmm1
+ movdqu -60(%edx), %xmm2
+ mov $-60, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(44bytes):
+ mov $-44, %ebx
+ movdqu -44(%eax), %xmm1
+ movdqu -44(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(28bytes):
+ mov $-28, %ebx
+ movdqu -28(%eax), %xmm1
+ movdqu -28(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+ mov -12(%eax), %ecx
+ mov -12(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ mov -8(%eax), %ecx
+ mov -8(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ mov -4(%eax), %ecx
+ mov -4(%edx), %ebx
+ cmp %ebx, %ecx
+ mov $0, %eax
+ jne L(find_diff)
+ RETURN
+
+ ALIGN (4)
+L(61bytes):
+ movdqu -61(%eax), %xmm1
+ movdqu -61(%edx), %xmm2
+ mov $-61, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(45bytes):
+ mov $-45, %ebx
+ movdqu -45(%eax), %xmm1
+ movdqu -45(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(29bytes):
+ mov $-29, %ebx
+ movdqu -29(%eax), %xmm1
+ movdqu -29(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+
+ mov -13(%eax), %ecx
+ mov -13(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+
+ mov -9(%eax), %ecx
+ mov -9(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+
+ mov -5(%eax), %ecx
+ mov -5(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movzbl -1(%eax), %ecx
+ cmp -1(%edx), %cl
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ ALIGN (4)
+L(62bytes):
+ movdqu -62(%eax), %xmm1
+ movdqu -62(%edx), %xmm2
+ mov $-62, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(46bytes):
+ mov $-46, %ebx
+ movdqu -46(%eax), %xmm1
+ movdqu -46(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(30bytes):
+ mov $-30, %ebx
+ movdqu -30(%eax), %xmm1
+ movdqu -30(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+ mov -14(%eax), %ecx
+ mov -14(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ mov -10(%eax), %ecx
+ mov -10(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ mov -6(%eax), %ecx
+ mov -6(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movzwl -2(%eax), %ecx
+ movzwl -2(%edx), %ebx
+ cmp %bl, %cl
+ jne L(end)
+ cmp %bh, %ch
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ ALIGN (4)
+L(63bytes):
+ movdqu -63(%eax), %xmm1
+ movdqu -63(%edx), %xmm2
+ mov $-63, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(47bytes):
+ mov $-47, %ebx
+ movdqu -47(%eax), %xmm1
+ movdqu -47(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(31bytes):
+ mov $-31, %ebx
+ movdqu -31(%eax), %xmm1
+ movdqu -31(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+
+ movl -15(%eax), %ecx
+ movl -15(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movl -11(%eax), %ecx
+ movl -11(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movl -7(%eax), %ecx
+ movl -7(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movzwl -3(%eax), %ecx
+ movzwl -3(%edx), %ebx
+ cmpb %bl, %cl
+ jne L(end)
+ cmp %bx, %cx
+ jne L(end)
+ movzbl -1(%eax), %eax
+ cmpb -1(%edx), %al
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ ALIGN (4)
+L(64bytes):
+ movdqu -64(%eax), %xmm1
+ movdqu -64(%edx), %xmm2
+ mov $-64, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(48bytes):
+ movdqu -48(%eax), %xmm1
+ movdqu -48(%edx), %xmm2
+ mov $-48, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(32bytes):
+ movdqu -32(%eax), %xmm1
+ movdqu -32(%edx), %xmm2
+ mov $-32, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+
+ mov -16(%eax), %ecx
+ mov -16(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+
+ mov -12(%eax), %ecx
+ mov -12(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+
+ mov -8(%eax), %ecx
+ mov -8(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+
+ mov -4(%eax), %ecx
+ mov -4(%edx), %ebx
+ cmp %ebx, %ecx
+ mov $0, %eax
+ jne L(find_diff)
+ RETURN
+
+ ALIGN (4)
+L(less16bytes):
+ add %ebx, %eax
+ add %ebx, %edx
+
+ mov (%eax), %ecx
+ mov (%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+
+ mov 4(%eax), %ecx
+ mov 4(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+
+ mov 8(%eax), %ecx
+ mov 8(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+
+ mov 12(%eax), %ecx
+ mov 12(%edx), %ebx
+ cmp %ebx, %ecx
+ mov $0, %eax
+ jne L(find_diff)
+ RETURN
+
+ ALIGN (4)
+L(find_diff):
+ cmpb %bl, %cl
+ jne L(end)
+ cmp %bx, %cx
+ jne L(end)
+ shr $16,%ecx
+ shr $16,%ebx
+ cmp %bl, %cl
+ jne L(end)
+ cmp %bx, %cx
+L(end):
+ POP (%ebx)
+ mov $1, %eax
+ ja L(bigger)
+ neg %eax
+L(bigger):
+ ret
+END (MEMCMP)
+
+ .section .rodata.sse4.2,"a",@progbits
+ ALIGN (2)
+ .type L(table_64bytes), @object
+L(table_64bytes):
+ .int JMPTBL (L(0bytes), L(table_64bytes))
+ .int JMPTBL (L(1bytes), L(table_64bytes))
+ .int JMPTBL (L(2bytes), L(table_64bytes))
+ .int JMPTBL (L(3bytes), L(table_64bytes))
+ .int JMPTBL (L(4bytes), L(table_64bytes))
+ .int JMPTBL (L(5bytes), L(table_64bytes))
+ .int JMPTBL (L(6bytes), L(table_64bytes))
+ .int JMPTBL (L(7bytes), L(table_64bytes))
+ .int JMPTBL (L(8bytes), L(table_64bytes))
+ .int JMPTBL (L(9bytes), L(table_64bytes))
+ .int JMPTBL (L(10bytes), L(table_64bytes))
+ .int JMPTBL (L(11bytes), L(table_64bytes))
+ .int JMPTBL (L(12bytes), L(table_64bytes))
+ .int JMPTBL (L(13bytes), L(table_64bytes))
+ .int JMPTBL (L(14bytes), L(table_64bytes))
+ .int JMPTBL (L(15bytes), L(table_64bytes))
+ .int JMPTBL (L(16bytes), L(table_64bytes))
+ .int JMPTBL (L(17bytes), L(table_64bytes))
+ .int JMPTBL (L(18bytes), L(table_64bytes))
+ .int JMPTBL (L(19bytes), L(table_64bytes))
+ .int JMPTBL (L(20bytes), L(table_64bytes))
+ .int JMPTBL (L(21bytes), L(table_64bytes))
+ .int JMPTBL (L(22bytes), L(table_64bytes))
+ .int JMPTBL (L(23bytes), L(table_64bytes))
+ .int JMPTBL (L(24bytes), L(table_64bytes))
+ .int JMPTBL (L(25bytes), L(table_64bytes))
+ .int JMPTBL (L(26bytes), L(table_64bytes))
+ .int JMPTBL (L(27bytes), L(table_64bytes))
+ .int JMPTBL (L(28bytes), L(table_64bytes))
+ .int JMPTBL (L(29bytes), L(table_64bytes))
+ .int JMPTBL (L(30bytes), L(table_64bytes))
+ .int JMPTBL (L(31bytes), L(table_64bytes))
+ .int JMPTBL (L(32bytes), L(table_64bytes))
+ .int JMPTBL (L(33bytes), L(table_64bytes))
+ .int JMPTBL (L(34bytes), L(table_64bytes))
+ .int JMPTBL (L(35bytes), L(table_64bytes))
+ .int JMPTBL (L(36bytes), L(table_64bytes))
+ .int JMPTBL (L(37bytes), L(table_64bytes))
+ .int JMPTBL (L(38bytes), L(table_64bytes))
+ .int JMPTBL (L(39bytes), L(table_64bytes))
+ .int JMPTBL (L(40bytes), L(table_64bytes))
+ .int JMPTBL (L(41bytes), L(table_64bytes))
+ .int JMPTBL (L(42bytes), L(table_64bytes))
+ .int JMPTBL (L(43bytes), L(table_64bytes))
+ .int JMPTBL (L(44bytes), L(table_64bytes))
+ .int JMPTBL (L(45bytes), L(table_64bytes))
+ .int JMPTBL (L(46bytes), L(table_64bytes))
+ .int JMPTBL (L(47bytes), L(table_64bytes))
+ .int JMPTBL (L(48bytes), L(table_64bytes))
+ .int JMPTBL (L(49bytes), L(table_64bytes))
+ .int JMPTBL (L(50bytes), L(table_64bytes))
+ .int JMPTBL (L(51bytes), L(table_64bytes))
+ .int JMPTBL (L(52bytes), L(table_64bytes))
+ .int JMPTBL (L(53bytes), L(table_64bytes))
+ .int JMPTBL (L(54bytes), L(table_64bytes))
+ .int JMPTBL (L(55bytes), L(table_64bytes))
+ .int JMPTBL (L(56bytes), L(table_64bytes))
+ .int JMPTBL (L(57bytes), L(table_64bytes))
+ .int JMPTBL (L(58bytes), L(table_64bytes))
+ .int JMPTBL (L(59bytes), L(table_64bytes))
+ .int JMPTBL (L(60bytes), L(table_64bytes))
+ .int JMPTBL (L(61bytes), L(table_64bytes))
+ .int JMPTBL (L(62bytes), L(table_64bytes))
+ .int JMPTBL (L(63bytes), L(table_64bytes))
+ .int JMPTBL (L(64bytes), L(table_64bytes))
+ .size L(table_64bytes), .-L(table_64bytes)
+#endif
diff --git a/sysdeps/i386/i686/multiarch/memcmp-ssse3.S b/sysdeps/i386/i686/multiarch/memcmp-ssse3.S
new file mode 100644
index 0000000000..d2f852f726
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/memcmp-ssse3.S
@@ -0,0 +1,1966 @@
+/* memcmp with SSSE3
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef NOT_IN_libc
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#ifndef MEMCMP
+# define MEMCMP __memcmp_ssse3
+#endif
+
+#define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+#define PUSH(REG) pushl REG; CFI_PUSH (REG)
+#define POP(REG) popl REG; CFI_POP (REG)
+
+#define PARMS 4
+#define BLK1 PARMS
+#define BLK2 BLK1+4
+#define LEN BLK2+4
+#define RETURN_END POP (%edi); POP (%esi); POP (%ebx); ret
+#define RETURN RETURN_END; cfi_restore_state; cfi_remember_state
+
+ .section .text.ssse3,"ax",@progbits
+ENTRY (MEMCMP)
+ movl LEN(%esp), %ecx
+ movl BLK1(%esp), %eax
+ cmp $48, %ecx
+ movl BLK2(%esp), %edx
+ jae L(48bytesormore)
+ cmp $1, %ecx
+ jbe L(less1bytes)
+ PUSH (%ebx)
+ add %ecx, %edx
+ add %ecx, %eax
+ jmp L(less48bytes)
+
+ ALIGN (4)
+ CFI_POP (%ebx)
+L(less1bytes):
+ jb L(zero)
+ movb (%eax), %cl
+ cmp (%edx), %cl
+ je L(zero)
+ mov $1, %eax
+ ja L(1bytesend)
+ neg %eax
+L(1bytesend):
+ ret
+
+ ALIGN (4)
+L(zero):
+ mov $0, %eax
+ ret
+
+ ALIGN (4)
+L(48bytesormore):
+ PUSH (%ebx)
+ PUSH (%esi)
+ PUSH (%edi)
+ cfi_remember_state
+ movdqu (%eax), %xmm3
+ movdqu (%edx), %xmm0
+ movl %eax, %edi
+ movl %edx, %esi
+ pcmpeqb %xmm0, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 16(%edi), %edi
+
+ sub $0xffff, %edx
+ lea 16(%esi), %esi
+ jnz L(less16bytes)
+ mov %edi, %edx
+ and $0xf, %edx
+ xor %edx, %edi
+ sub %edx, %esi
+ add %edx, %ecx
+ mov %esi, %edx
+ and $0xf, %edx
+ jz L(shr_0)
+ xor %edx, %esi
+
+ cmp $8, %edx
+ jae L(next_unaligned_table)
+ cmp $0, %edx
+ je L(shr_0)
+ cmp $1, %edx
+ je L(shr_1)
+ cmp $2, %edx
+ je L(shr_2)
+ cmp $3, %edx
+ je L(shr_3)
+ cmp $4, %edx
+ je L(shr_4)
+ cmp $5, %edx
+ je L(shr_5)
+ cmp $6, %edx
+ je L(shr_6)
+ jmp L(shr_7)
+
+ ALIGN (4)
+L(next_unaligned_table):
+ cmp $8, %edx
+ je L(shr_8)
+ cmp $9, %edx
+ je L(shr_9)
+ cmp $10, %edx
+ je L(shr_10)
+ cmp $11, %edx
+ je L(shr_11)
+ cmp $12, %edx
+ je L(shr_12)
+ cmp $13, %edx
+ je L(shr_13)
+ cmp $14, %edx
+ je L(shr_14)
+ jmp L(shr_15)
+
+ ALIGN (4)
+L(shr_0):
+ cmp $80, %ecx
+ jae L(shr_0_gobble)
+ lea -48(%ecx), %ecx
+ xor %eax, %eax
+ movaps (%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+ movaps 16(%esi), %xmm2
+ pcmpeqb 16(%edi), %xmm2
+ pand %xmm1, %xmm2
+ pmovmskb %xmm2, %edx
+ add $32, %edi
+ add $32, %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea (%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_0_gobble):
+ lea -48(%ecx), %ecx
+ movdqa (%esi), %xmm0
+ xor %eax, %eax
+ pcmpeqb (%edi), %xmm0
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm2
+ pcmpeqb 16(%edi), %xmm2
+L(shr_0_gobble_loop):
+ pand %xmm0, %xmm2
+ sub $32, %ecx
+ pmovmskb %xmm2, %edx
+ movdqa %xmm0, %xmm1
+ movdqa 32(%esi), %xmm0
+ movdqa 48(%esi), %xmm2
+ sbb $0xffff, %edx
+ pcmpeqb 32(%edi), %xmm0
+ pcmpeqb 48(%edi), %xmm2
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ jz L(shr_0_gobble_loop)
+
+ pand %xmm0, %xmm2
+ cmp $0, %ecx
+ jge L(shr_0_gobble_loop_next)
+ inc %edx
+ add $32, %ecx
+L(shr_0_gobble_loop_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm2, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea (%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_1):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_1_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $1,(%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $1,%xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 1(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_1_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $1,(%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $1,16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_1_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $1,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $1,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_1_gobble_loop)
+
+ cmp $0, %ecx
+ jge L(shr_1_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_1_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 1(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_2):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_2_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $2,(%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $2,%xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 2(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_2_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $2,(%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $2,16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_2_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $2,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $2,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_2_gobble_loop)
+
+ cmp $0, %ecx
+ jge L(shr_2_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_2_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 2(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_3):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_3_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $3,(%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $3,%xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 3(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_3_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $3,(%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $3,16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_3_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $3,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $3,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_3_gobble_loop)
+
+ cmp $0, %ecx
+ jge L(shr_3_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_3_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 3(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_4):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_4_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $4,(%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $4,%xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 4(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_4_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $4,(%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $4,16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_4_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $4,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $4,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_4_gobble_loop)
+
+ cmp $0, %ecx
+ jge L(shr_4_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_4_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 4(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_5):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_5_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $5,(%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $5,%xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 5(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_5_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $5,(%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $5,16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_5_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $5,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $5,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_5_gobble_loop)
+
+ cmp $0, %ecx
+ jge L(shr_5_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_5_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 5(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_6):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_6_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $6,(%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $6,%xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 6(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_6_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $6,(%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $6,16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_6_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $6,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $6,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_6_gobble_loop)
+
+ cmp $0, %ecx
+ jge L(shr_6_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_6_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 6(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_7):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_7_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $7,(%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $7,%xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 7(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_7_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $7,(%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $7,16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_7_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $7,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $7,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_7_gobble_loop)
+
+ cmp $0, %ecx
+ jge L(shr_7_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_7_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 7(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_8):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_8_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $8,(%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $8,%xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 8(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_8_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $8,(%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $8,16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_8_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $8,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $8,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_8_gobble_loop)
+
+ cmp $0, %ecx
+ jge L(shr_8_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_8_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 8(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_9):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_9_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $9,(%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $9,%xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 9(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_9_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $9,(%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $9,16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_9_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $9,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $9,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_9_gobble_loop)
+
+ cmp $0, %ecx
+ jge L(shr_9_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_9_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 9(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_10):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_10_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $10, (%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $10,%xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 10(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_10_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $10, (%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $10, 16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_10_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $10,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $10,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_10_gobble_loop)
+
+ cmp $0, %ecx
+ jge L(shr_10_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_10_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 10(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_11):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_11_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $11, (%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $11, %xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 11(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_11_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $11, (%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $11, 16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_11_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $11,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $11,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_11_gobble_loop)
+
+ cmp $0, %ecx
+ jge L(shr_11_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_11_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 11(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_12):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_12_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $12, (%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $12, %xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 12(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_12_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $12, (%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $12, 16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_12_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $12,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $12,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_12_gobble_loop)
+
+ cmp $0, %ecx
+ jge L(shr_12_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_12_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 12(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_13):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_13_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $13, (%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $13, %xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 13(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_13_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $13, (%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $13, 16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_13_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $13,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $13,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_13_gobble_loop)
+
+ cmp $0, %ecx
+ jge L(shr_13_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_13_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 13(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_14):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_14_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $14, (%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $14, %xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 14(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_14_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $14, (%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $14, 16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_14_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $14,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $14,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_14_gobble_loop)
+
+ cmp $0, %ecx
+ jge L(shr_14_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_14_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 14(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_15):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_15_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $15, (%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $15, %xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 15(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_15_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $15, (%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $15, 16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_15_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $15,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $15,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_15_gobble_loop)
+
+ cmp $0, %ecx
+ jge L(shr_15_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_15_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 15(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(exit):
+ pmovmskb %xmm1, %ebx
+ sub $0xffff, %ebx
+ jz L(first16bytes)
+ lea -16(%esi), %esi
+ lea -16(%edi), %edi
+ mov %ebx, %edx
+L(first16bytes):
+ add %eax, %esi
+L(less16bytes):
+ test %dl, %dl
+ jz L(next_24_bytes)
+
+ test $0x01, %dl
+ jnz L(Byte16)
+
+ test $0x02, %dl
+ jnz L(Byte17)
+
+ test $0x04, %dl
+ jnz L(Byte18)
+
+ test $0x08, %dl
+ jnz L(Byte19)
+
+ test $0x10, %dl
+ jnz L(Byte20)
+
+ test $0x20, %dl
+ jnz L(Byte21)
+
+ test $0x40, %dl
+ jnz L(Byte22)
+L(Byte23):
+ movzbl -9(%edi), %eax
+ movzbl -9(%esi), %edx
+ sub %edx, %eax
+ RETURN
+
+ ALIGN (4)
+L(Byte16):
+ movzbl -16(%edi), %eax
+ movzbl -16(%esi), %edx
+ sub %edx, %eax
+ RETURN
+
+ ALIGN (4)
+L(Byte17):
+ movzbl -15(%edi), %eax
+ movzbl -15(%esi), %edx
+ sub %edx, %eax
+ RETURN
+
+ ALIGN (4)
+L(Byte18):
+ movzbl -14(%edi), %eax
+ movzbl -14(%esi), %edx
+ sub %edx, %eax
+ RETURN
+
+ ALIGN (4)
+L(Byte19):
+ movzbl -13(%edi), %eax
+ movzbl -13(%esi), %edx
+ sub %edx, %eax
+ RETURN
+
+ ALIGN (4)
+L(Byte20):
+ movzbl -12(%edi), %eax
+ movzbl -12(%esi), %edx
+ sub %edx, %eax
+ RETURN
+
+ ALIGN (4)
+L(Byte21):
+ movzbl -11(%edi), %eax
+ movzbl -11(%esi), %edx
+ sub %edx, %eax
+ RETURN
+
+ ALIGN (4)
+L(Byte22):
+ movzbl -10(%edi), %eax
+ movzbl -10(%esi), %edx
+ sub %edx, %eax
+ RETURN
+
+ ALIGN (4)
+L(next_24_bytes):
+ lea 8(%edi), %edi
+ lea 8(%esi), %esi
+ test $0x01, %dh
+ jnz L(Byte16)
+
+ test $0x02, %dh
+ jnz L(Byte17)
+
+ test $0x04, %dh
+ jnz L(Byte18)
+
+ test $0x08, %dh
+ jnz L(Byte19)
+
+ test $0x10, %dh
+ jnz L(Byte20)
+
+ test $0x20, %dh
+ jnz L(Byte21)
+
+ test $0x40, %dh
+ jnz L(Byte22)
+
+ ALIGN (4)
+L(Byte31):
+ movzbl -9(%edi), %eax
+ movzbl -9(%esi), %edx
+ sub %edx, %eax
+ RETURN_END
+
+ CFI_PUSH (%ebx)
+ ALIGN (4)
+L(more8bytes):
+ cmp $16, %ecx
+ jae L(more16bytes)
+ cmp $8, %ecx
+ je L(8bytes)
+ cmp $9, %ecx
+ je L(9bytes)
+ cmp $10, %ecx
+ je L(10bytes)
+ cmp $11, %ecx
+ je L(11bytes)
+ cmp $12, %ecx
+ je L(12bytes)
+ cmp $13, %ecx
+ je L(13bytes)
+ cmp $14, %ecx
+ je L(14bytes)
+ jmp L(15bytes)
+
+ ALIGN (4)
+L(more16bytes):
+ cmp $24, %ecx
+ jae L(more24bytes)
+ cmp $16, %ecx
+ je L(16bytes)
+ cmp $17, %ecx
+ je L(17bytes)
+ cmp $18, %ecx
+ je L(18bytes)
+ cmp $19, %ecx
+ je L(19bytes)
+ cmp $20, %ecx
+ je L(20bytes)
+ cmp $21, %ecx
+ je L(21bytes)
+ cmp $22, %ecx
+ je L(22bytes)
+ jmp L(23bytes)
+
+ ALIGN (4)
+L(more24bytes):
+ cmp $32, %ecx
+ jae L(more32bytes)
+ cmp $24, %ecx
+ je L(24bytes)
+ cmp $25, %ecx
+ je L(25bytes)
+ cmp $26, %ecx
+ je L(26bytes)
+ cmp $27, %ecx
+ je L(27bytes)
+ cmp $28, %ecx
+ je L(28bytes)
+ cmp $29, %ecx
+ je L(29bytes)
+ cmp $30, %ecx
+ je L(30bytes)
+ jmp L(31bytes)
+
+ ALIGN (4)
+L(more32bytes):
+ cmp $40, %ecx
+ jae L(more40bytes)
+ cmp $32, %ecx
+ je L(32bytes)
+ cmp $33, %ecx
+ je L(33bytes)
+ cmp $34, %ecx
+ je L(34bytes)
+ cmp $35, %ecx
+ je L(35bytes)
+ cmp $36, %ecx
+ je L(36bytes)
+ cmp $37, %ecx
+ je L(37bytes)
+ cmp $38, %ecx
+ je L(38bytes)
+ jmp L(39bytes)
+
+ ALIGN (4)
+L(more40bytes):
+ cmp $40, %ecx
+ je L(40bytes)
+ cmp $41, %ecx
+ je L(41bytes)
+ cmp $42, %ecx
+ je L(42bytes)
+ cmp $43, %ecx
+ je L(43bytes)
+ cmp $44, %ecx
+ je L(44bytes)
+ cmp $45, %ecx
+ je L(45bytes)
+ cmp $46, %ecx
+ je L(46bytes)
+ jmp L(47bytes)
+
+ ALIGN (4)
+L(less48bytes):
+ cmp $8, %ecx
+ jae L(more8bytes)
+ cmp $2, %ecx
+ je L(2bytes)
+ cmp $3, %ecx
+ je L(3bytes)
+ cmp $4, %ecx
+ je L(4bytes)
+ cmp $5, %ecx
+ je L(5bytes)
+ cmp $6, %ecx
+ je L(6bytes)
+ jmp L(7bytes)
+
+ ALIGN (4)
+L(44bytes):
+ mov -44(%eax), %ecx
+ mov -44(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(40bytes):
+ mov -40(%eax), %ecx
+ mov -40(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(36bytes):
+ mov -36(%eax), %ecx
+ mov -36(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(32bytes):
+ mov -32(%eax), %ecx
+ mov -32(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(28bytes):
+ mov -28(%eax), %ecx
+ mov -28(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(24bytes):
+ mov -24(%eax), %ecx
+ mov -24(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(20bytes):
+ mov -20(%eax), %ecx
+ mov -20(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(16bytes):
+ mov -16(%eax), %ecx
+ mov -16(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(12bytes):
+ mov -12(%eax), %ecx
+ mov -12(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(8bytes):
+ mov -8(%eax), %ecx
+ mov -8(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(4bytes):
+ mov -4(%eax), %ecx
+ mov -4(%edx), %ebx
+ cmp %ebx, %ecx
+ mov $0, %eax
+ jne L(find_diff)
+ POP (%ebx)
+ ret
+ CFI_PUSH (%ebx)
+
+ ALIGN (4)
+L(45bytes):
+ mov -45(%eax), %ecx
+ mov -45(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(41bytes):
+ mov -41(%eax), %ecx
+ mov -41(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(37bytes):
+ mov -37(%eax), %ecx
+ mov -37(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(33bytes):
+ mov -33(%eax), %ecx
+ mov -33(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(29bytes):
+ mov -29(%eax), %ecx
+ mov -29(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(25bytes):
+ mov -25(%eax), %ecx
+ mov -25(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(21bytes):
+ mov -21(%eax), %ecx
+ mov -21(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(17bytes):
+ mov -17(%eax), %ecx
+ mov -17(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(13bytes):
+ mov -13(%eax), %ecx
+ mov -13(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(9bytes):
+ mov -9(%eax), %ecx
+ mov -9(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(5bytes):
+ mov -5(%eax), %ecx
+ mov -5(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movzbl -1(%eax), %ecx
+ cmp -1(%edx), %cl
+ mov $0, %eax
+ jne L(end)
+ POP (%ebx)
+ ret
+ CFI_PUSH (%ebx)
+
+ ALIGN (4)
+L(46bytes):
+ mov -46(%eax), %ecx
+ mov -46(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(42bytes):
+ mov -42(%eax), %ecx
+ mov -42(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(38bytes):
+ mov -38(%eax), %ecx
+ mov -38(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(34bytes):
+ mov -34(%eax), %ecx
+ mov -34(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(30bytes):
+ mov -30(%eax), %ecx
+ mov -30(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(26bytes):
+ mov -26(%eax), %ecx
+ mov -26(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(22bytes):
+ mov -22(%eax), %ecx
+ mov -22(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(18bytes):
+ mov -18(%eax), %ecx
+ mov -18(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(14bytes):
+ mov -14(%eax), %ecx
+ mov -14(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(10bytes):
+ mov -10(%eax), %ecx
+ mov -10(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(6bytes):
+ mov -6(%eax), %ecx
+ mov -6(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(2bytes):
+ movzwl -2(%eax), %ecx
+ movzwl -2(%edx), %ebx
+ cmp %bl, %cl
+ jne L(end)
+ cmp %bh, %ch
+ mov $0, %eax
+ jne L(end)
+ POP (%ebx)
+ ret
+ CFI_PUSH (%ebx)
+
+ ALIGN (4)
+L(47bytes):
+ movl -47(%eax), %ecx
+ movl -47(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(43bytes):
+ movl -43(%eax), %ecx
+ movl -43(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(39bytes):
+ movl -39(%eax), %ecx
+ movl -39(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(35bytes):
+ movl -35(%eax), %ecx
+ movl -35(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(31bytes):
+ movl -31(%eax), %ecx
+ movl -31(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(27bytes):
+ movl -27(%eax), %ecx
+ movl -27(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(23bytes):
+ movl -23(%eax), %ecx
+ movl -23(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(19bytes):
+ movl -19(%eax), %ecx
+ movl -19(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(15bytes):
+ movl -15(%eax), %ecx
+ movl -15(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(11bytes):
+ movl -11(%eax), %ecx
+ movl -11(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(7bytes):
+ movl -7(%eax), %ecx
+ movl -7(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(3bytes):
+ movzwl -3(%eax), %ecx
+ movzwl -3(%edx), %ebx
+ cmpb %bl, %cl
+ jne L(end)
+ cmp %bx, %cx
+ jne L(end)
+ movzbl -1(%eax), %eax
+ cmpb -1(%edx), %al
+ mov $0, %eax
+ jne L(end)
+ POP (%ebx)
+ ret
+ CFI_PUSH (%ebx)
+
+ ALIGN (4)
+L(find_diff):
+ cmpb %bl, %cl
+ jne L(end)
+ cmp %bx, %cx
+ jne L(end)
+ shr $16,%ecx
+ shr $16,%ebx
+ cmp %bl, %cl
+ jne L(end)
+ cmp %bx, %cx
+L(end):
+ POP (%ebx)
+ mov $1, %eax
+ ja L(bigger)
+ neg %eax
+L(bigger):
+ ret
+
+END (MEMCMP)
+
+#endif
diff --git a/sysdeps/i386/i686/multiarch/memcmp.S b/sysdeps/i386/i686/multiarch/memcmp.S
new file mode 100644
index 0000000000..cf606a5959
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/memcmp.S
@@ -0,0 +1,88 @@
+/* Multiple versions of memcmp
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+/* Define multiple versions only for the definition in libc. */
+#ifndef NOT_IN_libc
+# ifdef SHARED
+ .text
+ENTRY(memcmp)
+ .type memcmp, @gnu_indirect_function
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (ebx, 0)
+ call __i686.get_pc_thunk.bx
+ addl $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
+ jne 1f
+ call __init_cpu_features
+1: leal __memcmp_ia32@GOTOFF(%ebx), %eax
+ testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+ jz 2f
+ leal __memcmp_ssse3@GOTOFF(%ebx), %eax
+ testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
+ jz 2f
+ leal __memcmp_sse4_2@GOTOFF(%ebx), %eax
+2: popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebx)
+ ret
+END(memcmp)
+# else
+ .text
+ENTRY(memcmp)
+ .type memcmp, @gnu_indirect_function
+ cmpl $0, KIND_OFFSET+__cpu_features
+ jne 1f
+ call __init_cpu_features
+1: leal __memcmp_ia32, %eax
+ testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
+ jz 2f
+ leal __memcmp_ssse3, %eax
+ testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
+ jz 2f
+ leal __memcmp_sse4_2, %eax
+2: ret
+END(memcmp)
+# endif
+
+# undef ENTRY
+# define ENTRY(name) \
+ .type __memcmp_ia32, @function; \
+ .p2align 4; \
+ __memcmp_ia32: cfi_startproc; \
+ CALL_MCOUNT
+# undef END
+# define END(name) \
+ cfi_endproc; .size __memcmp_ia32, .-__memcmp_ia32
+
+# ifdef SHARED
+# undef libc_hidden_builtin_def
+/* IFUNC doesn't work with the hidden functions in shared library since
+ they will be called without setting up EBX needed for PLT which is
+ used by IFUNC. */
+# define libc_hidden_builtin_def(name) \
+ .globl __GI_memcmp; __GI_memcmp = __memcmp_ia32
+# endif
+#endif
+
+#include "../memcmp.S"
diff --git a/sysdeps/i386/i686/multiarch/strcmp-sse4.S b/sysdeps/i386/i686/multiarch/strcmp-sse4.S
new file mode 100644
index 0000000000..d5fd23e15c
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strcmp-sse4.S
@@ -0,0 +1,378 @@
+/* strcmp with SSE4.2
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef NOT_IN_libc
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+#define PUSH(REG) pushl REG; CFI_PUSH (REG)
+#define POP(REG) popl REG; CFI_POP (REG)
+
+#ifndef USE_AS_STRNCMP
+# ifndef STRCMP
+# define STRCMP __strcmp_sse4_2
+# endif
+# define STR1 4
+# define STR2 STR1+4
+# define RETURN ret; .p2align 4
+#else
+# ifndef STRCMP
+# define STRCMP __strncmp_sse4_2
+# endif
+# define STR1 8
+# define STR2 STR1+4
+# define CNT STR2+4
+# define RETURN POP (%ebp); ret; .p2align 4; CFI_PUSH (%ebp)
+#endif
+
+ .section .text.sse4.2,"ax",@progbits
+ENTRY (STRCMP)
+#ifdef USE_AS_STRNCMP
+ PUSH (%ebp)
+#endif
+ mov STR1(%esp), %edx
+ mov STR2(%esp), %eax
+#ifdef USE_AS_STRNCMP
+ movl CNT(%esp), %ebp
+ test %ebp, %ebp
+ je L(eq)
+#endif
+ mov %dx, %cx
+ and $0xfff, %cx
+ cmp $0xff0, %cx
+ ja L(first4bytes)
+ movdqu (%edx), %xmm2
+ mov %eax, %ecx
+ and $0xfff, %ecx
+ cmp $0xff0, %ecx
+ ja L(first4bytes)
+ movd %xmm2, %ecx
+ cmp (%eax), %ecx
+ jne L(less4bytes)
+ movdqu (%eax), %xmm1
+ pxor %xmm2, %xmm1
+ pxor %xmm0, %xmm0
+ ptest %xmm1, %xmm0
+ jnc L(less16bytes)
+ pcmpeqb %xmm0, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %ebp
+ jbe L(eq)
+#endif
+ add $16, %edx
+ add $16, %eax
+L(first4bytes):
+ movzbl (%eax), %ecx
+ cmpb %cl, (%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#ifdef USE_AS_STRNCMP
+ cmp $1, %ebp
+ je L(eq)
+#endif
+
+ movzbl 1(%eax), %ecx
+ cmpb %cl, 1(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#ifdef USE_AS_STRNCMP
+ cmp $2, %ebp
+ je L(eq)
+#endif
+ movzbl 2(%eax), %ecx
+ cmpb %cl, 2(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#ifdef USE_AS_STRNCMP
+ cmp $3, %ebp
+ je L(eq)
+#endif
+ movzbl 3(%eax), %ecx
+ cmpb %cl, 3(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#ifdef USE_AS_STRNCMP
+ cmp $4, %ebp
+ je L(eq)
+#endif
+ movzbl 4(%eax), %ecx
+ cmpb %cl, 4(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#ifdef USE_AS_STRNCMP
+ cmp $5, %ebp
+ je L(eq)
+#endif
+ movzbl 5(%eax), %ecx
+ cmpb %cl, 5(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#ifdef USE_AS_STRNCMP
+ cmp $6, %ebp
+ je L(eq)
+#endif
+ movzbl 6(%eax), %ecx
+ cmpb %cl, 6(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#ifdef USE_AS_STRNCMP
+ cmp $7, %ebp
+ je L(eq)
+#endif
+ movzbl 7(%eax), %ecx
+ cmpb %cl, 7(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#ifdef USE_AS_STRNCMP
+ sub $8, %ebp
+ je L(eq)
+#endif
+ add $8, %eax
+ add $8, %edx
+
+ PUSH (%ebx)
+ PUSH (%edi)
+ PUSH (%esi)
+ cfi_remember_state
+ mov %edx, %edi
+ mov %eax, %esi
+ xorl %eax, %eax
+L(check_offset):
+ movl %edi, %ebx
+ movl %esi, %ecx
+ andl $0xfff, %ebx
+ andl $0xfff, %ecx
+ cmpl %ebx, %ecx
+ cmovl %ebx, %ecx
+ lea -0xff0(%ecx), %edx
+ sub %edx, %edi
+ sub %edx, %esi
+ testl %edx, %edx
+ jg L(crosspage)
+L(loop):
+ movdqu (%esi,%edx), %xmm2
+ movdqu (%edi,%edx), %xmm1
+ pcmpistri $0x1a, %xmm2, %xmm1
+ jbe L(end)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %ebp
+ jbe L(more16byteseq)
+#endif
+
+ add $16, %edx
+ jle L(loop)
+L(crosspage):
+ movzbl (%edi,%edx), %eax
+ movzbl (%esi,%edx), %ebx
+ subl %ebx, %eax
+ jne L(ret)
+ testl %ebx, %ebx
+ je L(ret)
+#ifdef USE_AS_STRNCMP
+ sub $1, %ebp
+ jbe L(more16byteseq)
+#endif
+ inc %edx
+ cmp $15, %edx
+ jle L(crosspage)
+ add $16, %edi
+ add $16, %esi
+ jmp L(check_offset)
+
+ .p2align 4
+L(end):
+ jnc L(ret)
+#ifdef USE_AS_STRNCMP
+ sub %ecx, %ebp
+ jbe L(more16byteseq)
+#endif
+ lea (%ecx,%edx), %ebx
+ movzbl (%edi,%ebx), %eax
+ movzbl (%esi,%ebx), %ecx
+ subl %ecx, %eax
+L(ret):
+ POP (%esi)
+ POP (%edi)
+ POP (%ebx)
+#ifdef USE_AS_STRNCMP
+ POP (%ebp)
+#endif
+ ret
+
+ .p2align 4
+ cfi_restore_state
+#ifdef USE_AS_STRNCMP
+L(more16byteseq):
+ POP (%esi)
+ POP (%edi)
+ POP (%ebx)
+#endif
+L(eq):
+ xorl %eax, %eax
+ RETURN
+
+L(neq):
+ mov $1, %eax
+ ja L(neq_bigger)
+ neg %eax
+L(neq_bigger):
+ RETURN
+
+L(less16bytes):
+ add $0xfefefeff, %ecx
+ jnc L(less4bytes)
+ xor (%edx), %ecx
+ or $0xfefefeff, %ecx
+ add $1, %ecx
+ jnz L(less4bytes)
+
+#ifdef USE_AS_STRNCMP
+ cmp $4, %ebp
+ jbe L(eq)
+#endif
+ mov 4(%edx), %ecx
+ cmp 4(%eax), %ecx
+ jne L(more4bytes)
+ add $0xfefefeff, %ecx
+ jnc L(more4bytes)
+ xor 4(%edx), %ecx
+ or $0xfefefeff, %ecx
+ add $1, %ecx
+ jnz L(more4bytes)
+
+#ifdef USE_AS_STRNCMP
+ sub $8, %ebp
+ jbe L(eq)
+#endif
+
+ add $8, %edx
+ add $8, %eax
+L(less4bytes):
+
+ movzbl (%eax), %ecx
+ cmpb %cl, (%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#ifdef USE_AS_STRNCMP
+ cmp $1, %ebp
+ je L(eq)
+#endif
+ movzbl 1(%eax), %ecx
+ cmpb %cl, 1(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#ifdef USE_AS_STRNCMP
+ cmp $2, %ebp
+ je L(eq)
+#endif
+
+ movzbl 2(%eax), %ecx
+ cmpb %cl, 2(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#ifdef USE_AS_STRNCMP
+ cmp $3, %ebp
+ je L(eq)
+#endif
+ movzbl 3(%eax), %ecx
+ cmpb %cl, 3(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+L(more4bytes):
+#ifdef USE_AS_STRNCMP
+ cmp $4, %ebp
+ je L(eq)
+#endif
+ movzbl 4(%eax), %ecx
+ cmpb %cl, 4(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+
+#ifdef USE_AS_STRNCMP
+ cmp $5, %ebp
+ je L(eq)
+#endif
+ movzbl 5(%eax), %ecx
+ cmpb %cl, 5(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#ifdef USE_AS_STRNCMP
+ cmp $6, %ebp
+ je L(eq)
+#endif
+ movzbl 6(%eax), %ecx
+ cmpb %cl, 6(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#ifdef USE_AS_STRNCMP
+ cmp $7, %ebp
+ je L(eq)
+#endif
+ movzbl 7(%eax), %ecx
+ cmpb %cl, 7(%edx)
+ jne L(neq)
+ jmp L(eq)
+
+END (STRCMP)
+
+#endif
diff --git a/sysdeps/i386/i686/multiarch/strcmp-ssse3.S b/sysdeps/i386/i686/multiarch/strcmp-ssse3.S
new file mode 100644
index 0000000000..40994c05b1
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strcmp-ssse3.S
@@ -0,0 +1,2220 @@
+/* strcmp with SSSE3
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef NOT_IN_libc
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+#define PUSH(REG) pushl REG; CFI_PUSH (REG)
+#define POP(REG) popl REG; CFI_POP (REG)
+
+#ifndef USE_AS_STRNCMP
+# ifndef STRCMP
+# define STRCMP __strcmp_ssse3
+# endif
+# define STR1 4
+# define STR2 STR1+4
+# define RETURN ret; .p2align 4
+# define UPDATE_STRNCMP_COUNTER
+#else
+# ifndef STRCMP
+# define STRCMP __strncmp_ssse3
+# endif
+# define STR1 8
+# define STR2 STR1+4
+# define CNT STR2+4
+# define RETURN POP (%ebp); ret; .p2align 4; CFI_PUSH (%ebp)
+# define UPDATE_STRNCMP_COUNTER \
+ /* calculate left number to compare */ \
+ mov $16, %esi; \
+ sub %ecx, %esi; \
+ cmp %esi, %ebp; \
+ jbe L(more8byteseq); \
+ sub %esi, %ebp
+#endif
+
+ .section .text.ssse3,"ax",@progbits
+ENTRY (STRCMP)
+#ifdef USE_AS_STRNCMP
+ PUSH (%ebp)
+#endif
+ movl STR1(%esp), %edx
+ movl STR2(%esp), %eax
+#ifdef USE_AS_STRNCMP
+ movl CNT(%esp), %ebp
+ cmp $16, %ebp
+ jb L(less16bytes_sncmp)
+ jmp L(more16bytes)
+#endif
+
+ movzbl (%eax), %ecx
+ cmpb %cl, (%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+ movzbl 1(%eax), %ecx
+ cmpb %cl, 1(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+ movzbl 2(%eax), %ecx
+ cmpb %cl, 2(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+ movzbl 3(%eax), %ecx
+ cmpb %cl, 3(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+ movzbl 4(%eax), %ecx
+ cmpb %cl, 4(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+ movzbl 5(%eax), %ecx
+ cmpb %cl, 5(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+ movzbl 6(%eax), %ecx
+ cmpb %cl, 6(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+ movzbl 7(%eax), %ecx
+ cmpb %cl, 7(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+ add $8, %edx
+ add $8, %eax
+#ifdef USE_AS_STRNCMP
+ cmp $8, %ebp
+ lea -8(%ebp), %ebp
+ je L(eq)
+L(more16bytes):
+#endif
+ movl %edx, %ecx
+ and $0xfff, %ecx
+ cmp $0xff0, %ecx
+ ja L(crosspage)
+ mov %eax, %ecx
+ and $0xfff, %ecx
+ cmp $0xff0, %ecx
+ ja L(crosspage)
+ pxor %xmm0, %xmm0
+ movlpd (%eax), %xmm1
+ movlpd (%edx), %xmm2
+ movhpd 8(%eax), %xmm1
+ movhpd 8(%edx), %xmm2
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %ecx
+ sub $0xffff, %ecx
+ jnz L(less16bytes)
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(eq)
+#endif
+ add $16, %eax
+ add $16, %edx
+
+L(crosspage):
+
+ PUSH (%ebx)
+ PUSH (%edi)
+ PUSH (%esi)
+#ifdef USE_AS_STRNCMP
+ cfi_remember_state
+#endif
+
+ movl %edx, %edi
+ movl %eax, %ecx
+ and $0xf, %ecx
+ and $0xf, %edi
+ xor %ecx, %eax
+ xor %edi, %edx
+ xor %ebx, %ebx
+ cmp %edi, %ecx
+ je L(ashr_0)
+ ja L(bigger)
+ or $0x20, %ebx
+ xchg %edx, %eax
+ xchg %ecx, %edi
+L(bigger):
+ lea 15(%edi), %edi
+ sub %ecx, %edi
+ cmp $8, %edi
+ jle L(ashr_less_8)
+ cmp $14, %edi
+ je L(ashr_15)
+ cmp $13, %edi
+ je L(ashr_14)
+ cmp $12, %edi
+ je L(ashr_13)
+ cmp $11, %edi
+ je L(ashr_12)
+ cmp $10, %edi
+ je L(ashr_11)
+ cmp $9, %edi
+ je L(ashr_10)
+L(ashr_less_8):
+ je L(ashr_9)
+ cmp $7, %edi
+ je L(ashr_8)
+ cmp $6, %edi
+ je L(ashr_7)
+ cmp $5, %edi
+ je L(ashr_6)
+ cmp $4, %edi
+ je L(ashr_5)
+ cmp $3, %edi
+ je L(ashr_4)
+ cmp $2, %edi
+ je L(ashr_3)
+ cmp $1, %edi
+ je L(ashr_2)
+ cmp $0, %edi
+ je L(ashr_1)
+
+/*
+ * The following cases will be handled by ashr_0
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(0~15) n(0~15) 15(15+ n-n) ashr_0
+ */
+ .p2align 4
+L(ashr_0):
+ mov $0xffff, %esi
+ movdqa (%eax), %xmm1
+ pxor %xmm0, %xmm0
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb (%edx), %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ mov %ecx, %edi
+ jne L(less32bytes)
+ UPDATE_STRNCMP_COUNTER
+ mov $0x10, %ebx
+ mov $0x10, %ecx
+ pxor %xmm0, %xmm0
+ .p2align 4
+L(loop_ashr_0):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ jmp L(loop_ashr_0)
+
+/*
+ * The following cases will be handled by ashr_1
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(15) n -15 0(15 +(n-15) - n) ashr_1
+ */
+ .p2align 4
+L(ashr_1):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $15, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -15(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ or $1, %ebx
+ lea 1(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_1):
+ add $16, %edi
+ jg L(nibble_ashr_1)
+
+L(gobble_ashr_1):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $1, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_1)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $1, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_1)
+
+ .p2align 4
+L(nibble_ashr_1):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xfffe, %esi
+ jnz L(ashr_1_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $15, %ebp
+ jbe L(ashr_1_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_1)
+
+ .p2align 4
+L(ashr_1_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $1, %xmm0
+ psrldq $1, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_2
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(14~15) n -14 1(15 +(n-14) - n) ashr_2
+ */
+ .p2align 4
+L(ashr_2):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $14, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -14(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ or $2, %ebx
+ lea 2(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_2):
+ add $16, %edi
+ jg L(nibble_ashr_2)
+
+L(gobble_ashr_2):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $2, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_2)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $2, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_2)
+
+ .p2align 4
+L(nibble_ashr_2):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xfffc, %esi
+ jnz L(ashr_2_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $14, %ebp
+ jbe L(ashr_2_exittail)
+#endif
+
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_2)
+
+ .p2align 4
+L(ashr_2_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $2, %xmm0
+ psrldq $2, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_3
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(13~15) n -13 2(15 +(n-13) - n) ashr_3
+ */
+ .p2align 4
+L(ashr_3):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $13, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -13(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ or $3, %ebx
+ lea 3(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_3):
+ add $16, %edi
+ jg L(nibble_ashr_3)
+
+L(gobble_ashr_3):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $3, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_3)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $3, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_3)
+
+ .p2align 4
+L(nibble_ashr_3):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xfff8, %esi
+ jnz L(ashr_3_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $13, %ebp
+ jbe L(ashr_3_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_3)
+
+ .p2align 4
+L(ashr_3_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $3, %xmm0
+ psrldq $3, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_4
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(12~15) n -12 3(15 +(n-12) - n) ashr_4
+ */
+ .p2align 4
+L(ashr_4):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $12, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -12(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ or $4, %ebx
+ lea 4(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_4):
+ add $16, %edi
+ jg L(nibble_ashr_4)
+
+L(gobble_ashr_4):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $4, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_4)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $4, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_4)
+
+ .p2align 4
+L(nibble_ashr_4):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xfff0, %esi
+ jnz L(ashr_4_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $12, %ebp
+ jbe L(ashr_4_exittail)
+#endif
+
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_4)
+
+ .p2align 4
+L(ashr_4_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $4, %xmm0
+ psrldq $4, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_5
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(11~15) n -11 4(15 +(n-11) - n) ashr_5
+ */
+ .p2align 4
+L(ashr_5):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $11, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -11(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ or $5, %ebx
+ lea 5(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_5):
+ add $16, %edi
+ jg L(nibble_ashr_5)
+
+L(gobble_ashr_5):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $5, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_5)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $5, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_5)
+
+ .p2align 4
+L(nibble_ashr_5):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xffe0, %esi
+ jnz L(ashr_5_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $11, %ebp
+ jbe L(ashr_5_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_5)
+
+ .p2align 4
+L(ashr_5_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $5, %xmm0
+ psrldq $5, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_6
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(10~15) n -10 5(15 +(n-10) - n) ashr_6
+ */
+
+ .p2align 4
+L(ashr_6):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $10, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -10(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ or $6, %ebx
+ lea 6(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_6):
+ add $16, %edi
+ jg L(nibble_ashr_6)
+
+L(gobble_ashr_6):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $6, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_6)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $6, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_6)
+
+ .p2align 4
+L(nibble_ashr_6):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xffc0, %esi
+ jnz L(ashr_6_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $10, %ebp
+ jbe L(ashr_6_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_6)
+
+ .p2align 4
+L(ashr_6_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $6, %xmm0
+ psrldq $6, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_7
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(9~15) n - 9 6(15 +(n-9) - n) ashr_7
+ */
+
+ .p2align 4
+L(ashr_7):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $9, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -9(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ or $7, %ebx
+ lea 8(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_7):
+ add $16, %edi
+ jg L(nibble_ashr_7)
+
+L(gobble_ashr_7):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $7, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_7)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $7, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_7)
+
+ .p2align 4
+L(nibble_ashr_7):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xff80, %esi
+ jnz L(ashr_7_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $9, %ebp
+ jbe L(ashr_7_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_7)
+
+ .p2align 4
+L(ashr_7_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $7, %xmm0
+ psrldq $7, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_8
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(8~15) n - 8 7(15 +(n-8) - n) ashr_8
+ */
+ .p2align 4
+L(ashr_8):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $8, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -8(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ or $8, %ebx
+ lea 8(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_8):
+ add $16, %edi
+ jg L(nibble_ashr_8)
+
+L(gobble_ashr_8):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $8, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_8)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $8, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_8)
+
+ .p2align 4
+L(nibble_ashr_8):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xff00, %esi
+ jnz L(ashr_8_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $8, %ebp
+ jbe L(ashr_8_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_8)
+
+ .p2align 4
+L(ashr_8_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $8, %xmm0
+ psrldq $8, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_9
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(7~15) n - 7 8(15 +(n-7) - n) ashr_9
+ */
+ .p2align 4
+L(ashr_9):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $7, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -7(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ or $9, %ebx
+ lea 9(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_9):
+ add $16, %edi
+ jg L(nibble_ashr_9)
+
+L(gobble_ashr_9):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $9, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_9)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $9, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_9)
+
+ .p2align 4
+L(nibble_ashr_9):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xfe00, %esi
+ jnz L(ashr_9_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $7, %ebp
+ jbe L(ashr_9_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_9)
+
+ .p2align 4
+L(ashr_9_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $9, %xmm0
+ psrldq $9, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_10
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(6~15) n - 6 9(15 +(n-6) - n) ashr_10
+ */
+ .p2align 4
+L(ashr_10):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $6, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -6(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ or $10, %ebx
+ lea 10(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_10):
+ add $16, %edi
+ jg L(nibble_ashr_10)
+
+L(gobble_ashr_10):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $10, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_10)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $10, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_10)
+
+ .p2align 4
+L(nibble_ashr_10):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xfc00, %esi
+ jnz L(ashr_10_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $6, %ebp
+ jbe L(ashr_10_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_10)
+
+ .p2align 4
+L(ashr_10_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $10, %xmm0
+ psrldq $10, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_11
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(5~15) n - 5 10(15 +(n-5) - n) ashr_11
+ */
+ .p2align 4
+L(ashr_11):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $5, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -5(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ or $11, %ebx
+ lea 11(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_11):
+ add $16, %edi
+ jg L(nibble_ashr_11)
+
+L(gobble_ashr_11):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $11, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_11)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $11, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_11)
+
+ .p2align 4
+L(nibble_ashr_11):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xf800, %esi
+ jnz L(ashr_11_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $5, %ebp
+ jbe L(ashr_11_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_11)
+
+ .p2align 4
+L(ashr_11_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $11, %xmm0
+ psrldq $11, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_12
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(4~15) n - 4 11(15 +(n-4) - n) ashr_12
+ */
+ .p2align 4
+L(ashr_12):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $4, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -4(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ or $12, %ebx
+ lea 12(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_12):
+ add $16, %edi
+ jg L(nibble_ashr_12)
+
+L(gobble_ashr_12):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $12, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_12)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $12, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_12)
+
+ .p2align 4
+L(nibble_ashr_12):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xf000, %esi
+ jnz L(ashr_12_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $4, %ebp
+ jbe L(ashr_12_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_12)
+
+ .p2align 4
+L(ashr_12_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $12, %xmm0
+ psrldq $12, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_13
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(3~15) n - 3 12(15 +(n-3) - n) ashr_13
+ */
+ .p2align 4
+L(ashr_13):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $3, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -3(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ or $13, %ebx
+ lea 13(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_13):
+ add $16, %edi
+ jg L(nibble_ashr_13)
+
+L(gobble_ashr_13):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $13, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_13)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $13, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_13)
+
+ .p2align 4
+L(nibble_ashr_13):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xe000, %esi
+ jnz L(ashr_13_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $3, %ebp
+ jbe L(ashr_13_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_13)
+
+ .p2align 4
+L(ashr_13_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $13, %xmm0
+ psrldq $13, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_14
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(2~15) n - 2 13(15 +(n-2) - n) ashr_14
+ */
+ .p2align 4
+L(ashr_14):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $2, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -2(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ or $14, %ebx
+ lea 14(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_14):
+ add $16, %edi
+ jg L(nibble_ashr_14)
+
+L(gobble_ashr_14):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $14, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_14)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $14, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_14)
+
+ .p2align 4
+L(nibble_ashr_14):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xc000, %esi
+ jnz L(ashr_14_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $2, %ebp
+ jbe L(ashr_14_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_14)
+
+ .p2align 4
+L(ashr_14_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $14, %xmm0
+ psrldq $14, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_14
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(1~15) n - 1 14(15 +(n-1) - n) ashr_15
+ */
+
+ .p2align 4
+L(ashr_15):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $1, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -1(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ or $15, %ebx
+ lea 15(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_15):
+ add $16, %edi
+ jg L(nibble_ashr_15)
+
+L(gobble_ashr_15):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $15, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_15)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $15, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_15)
+
+ .p2align 4
+L(nibble_ashr_15):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0x8000, %esi
+ jnz L(ashr_15_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $1, %ebp
+ jbe L(ashr_15_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_15)
+
+ .p2align 4
+L(ashr_15_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $15, %xmm0
+ psrldq $15, %xmm3
+ jmp L(aftertail)
+
+ .p2align 4
+L(aftertail):
+ pcmpeqb %xmm3, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ not %esi
+L(exit):
+ mov %ebx, %edi
+ and $0x1f, %edi
+ lea -16(%edi, %ecx), %edi
+L(less32bytes):
+ add %edi, %edx
+ add %ecx, %eax
+ test $0x20, %ebx
+ jz L(ret2)
+ xchg %eax, %edx
+
+ .p2align 4
+L(ret2):
+ mov %esi, %ecx
+ POP (%esi)
+ POP (%edi)
+ POP (%ebx)
+L(less16bytes):
+ test %cl, %cl
+ jz L(2next_8_bytes)
+
+ test $0x01, %cl
+ jnz L(Byte0)
+
+ test $0x02, %cl
+ jnz L(Byte1)
+
+ test $0x04, %cl
+ jnz L(Byte2)
+
+ test $0x08, %cl
+ jnz L(Byte3)
+
+ test $0x10, %cl
+ jnz L(Byte4)
+
+ test $0x20, %cl
+ jnz L(Byte5)
+
+ test $0x40, %cl
+ jnz L(Byte6)
+#ifdef USE_AS_STRNCMP
+ cmp $7, %ebp
+ jbe L(eq)
+#endif
+
+ movzx 7(%eax), %ecx
+ movzx 7(%edx), %eax
+
+ sub %ecx, %eax
+ RETURN
+
+L(Byte0):
+#ifdef USE_AS_STRNCMP
+ cmp $0, %ebp
+ jbe L(eq)
+#endif
+ movzx (%eax), %ecx
+ movzx (%edx), %eax
+
+ sub %ecx, %eax
+ RETURN
+
+L(Byte1):
+#ifdef USE_AS_STRNCMP
+ cmp $1, %ebp
+ jbe L(eq)
+#endif
+ movzx 1(%eax), %ecx
+ movzx 1(%edx), %eax
+
+ sub %ecx, %eax
+ RETURN
+
+L(Byte2):
+#ifdef USE_AS_STRNCMP
+ cmp $2, %ebp
+ jbe L(eq)
+#endif
+ movzx 2(%eax), %ecx
+ movzx 2(%edx), %eax
+
+ sub %ecx, %eax
+ RETURN
+
+L(Byte3):
+#ifdef USE_AS_STRNCMP
+ cmp $3, %ebp
+ jbe L(eq)
+#endif
+ movzx 3(%eax), %ecx
+ movzx 3(%edx), %eax
+
+ sub %ecx, %eax
+ RETURN
+
+L(Byte4):
+#ifdef USE_AS_STRNCMP
+ cmp $4, %ebp
+ jbe L(eq)
+#endif
+ movzx 4(%eax), %ecx
+ movzx 4(%edx), %eax
+
+ sub %ecx, %eax
+ RETURN
+
+L(Byte5):
+#ifdef USE_AS_STRNCMP
+ cmp $5, %ebp
+ jbe L(eq)
+#endif
+ movzx 5(%eax), %ecx
+ movzx 5(%edx), %eax
+
+ sub %ecx, %eax
+ RETURN
+
+L(Byte6):
+#ifdef USE_AS_STRNCMP
+ cmp $6, %ebp
+ jbe L(eq)
+#endif
+ movzx 6(%eax), %ecx
+ movzx 6(%edx), %eax
+
+ sub %ecx, %eax
+ RETURN
+
+L(2next_8_bytes):
+ add $8, %eax
+ add $8, %edx
+#ifdef USE_AS_STRNCMP
+ cmp $8, %ebp
+ lea -8(%ebp), %ebp
+ jbe L(eq)
+#endif
+
+ test $0x01, %ch
+ jnz L(Byte0)
+
+ test $0x02, %ch
+ jnz L(Byte1)
+
+ test $0x04, %ch
+ jnz L(Byte2)
+
+ test $0x08, %ch
+ jnz L(Byte3)
+
+ test $0x10, %ch
+ jnz L(Byte4)
+
+ test $0x20, %ch
+ jnz L(Byte5)
+
+ test $0x40, %ch
+ jnz L(Byte6)
+
+#ifdef USE_AS_STRNCMP
+ cmp $7, %ebp
+ jbe L(eq)
+#endif
+ movzx 7(%eax), %ecx
+ movzx 7(%edx), %eax
+
+ sub %ecx, %eax
+ RETURN
+
+L(neq):
+ mov $1, %eax
+ ja L(neq_bigger)
+ neg %eax
+L(neq_bigger):
+#ifdef USE_AS_STRNCMP
+ POP (%ebp)
+#endif
+ ret
+
+#ifdef USE_AS_STRNCMP
+ .p2align 4
+ cfi_restore_state
+L(more8byteseq):
+ POP (%esi)
+ POP (%edi)
+ POP (%ebx)
+#endif
+
+L(eq):
+
+#ifdef USE_AS_STRNCMP
+ POP (%ebp)
+#endif
+ xorl %eax, %eax
+ ret
+
+#ifdef USE_AS_STRNCMP
+ .p2align 4
+ CFI_PUSH (%ebp)
+L(less16bytes_sncmp):
+ test %ebp, %ebp
+ jz L(eq)
+
+ movzbl (%eax), %ecx
+ cmpb %cl, (%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+ cmp $1, %ebp
+ je L(eq)
+
+ movzbl 1(%eax), %ecx
+ cmpb %cl, 1(%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+ cmp $2, %ebp
+ je L(eq)
+
+ movzbl 2(%eax), %ecx
+ cmpb %cl, 2(%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+ cmp $3, %ebp
+ je L(eq)
+
+ movzbl 3(%eax), %ecx
+ cmpb %cl, 3(%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+ cmp $4, %ebp
+ je L(eq)
+
+ movzbl 4(%eax), %ecx
+ cmpb %cl, 4(%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+ cmp $5, %ebp
+ je L(eq)
+
+ movzbl 5(%eax), %ecx
+ cmpb %cl, 5(%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+ cmp $6, %ebp
+ je L(eq)
+
+ movzbl 6(%eax), %ecx
+ cmpb %cl, 6(%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+ cmp $7, %ebp
+ je L(eq)
+
+ movzbl 7(%eax), %ecx
+ cmpb %cl, 7(%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+
+ cmp $8, %ebp
+ je L(eq)
+
+ movzbl 8(%eax), %ecx
+ cmpb %cl, 8(%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+ cmp $9, %ebp
+ je L(eq)
+
+ movzbl 9(%eax), %ecx
+ cmpb %cl, 9(%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+ cmp $10, %ebp
+ je L(eq)
+
+ movzbl 10(%eax), %ecx
+ cmpb %cl, 10(%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+ cmp $11, %ebp
+ je L(eq)
+
+ movzbl 11(%eax), %ecx
+ cmpb %cl, 11(%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+
+ cmp $12, %ebp
+ je L(eq)
+
+ movzbl 12(%eax), %ecx
+ cmpb %cl, 12(%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+ cmp $13, %ebp
+ je L(eq)
+
+ movzbl 13(%eax), %ecx
+ cmpb %cl, 13(%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+ cmp $14, %ebp
+ je L(eq)
+
+ movzbl 14(%eax), %ecx
+ cmpb %cl, 14(%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+ cmp $15, %ebp
+ je L(eq)
+
+ movzbl 15(%eax), %ecx
+ cmpb %cl, 15(%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+ POP (%ebp)
+ xor %eax, %eax
+ ret
+#endif
+
+END (STRCMP)
+
+#endif
diff --git a/sysdeps/i386/i686/multiarch/strcmp.S b/sysdeps/i386/i686/multiarch/strcmp.S
new file mode 100644
index 0000000000..7136d47e85
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strcmp.S
@@ -0,0 +1,115 @@
+/* Multiple versions of strcmp
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+#ifndef USE_AS_STRNCMP
+# define STRCMP strcmp
+# define __GI_STRCMP __GI_strcmp
+# define __STRCMP_IA32 __strcmp_ia32
+# define __STRCMP_SSSE3 __strcmp_ssse3
+# define __STRCMP_SSE4_2 __strcmp_sse4_2
+#else
+# define STRCMP strncmp
+# define __GI_STRCMP __GI_strncmp
+# define __STRCMP_IA32 __strncmp_ia32
+# define __STRCMP_SSSE3 __strncmp_ssse3
+# define __STRCMP_SSE4_2 __strncmp_sse4_2
+#endif
+
+/* Define multiple versions only for the definition in libc. Don't
+ define multiple versions for strncmp in static library since we
+ need strncmp before the initialization happened. */
+#if (defined SHARED || !defined USE_AS_STRNCMP) && !defined NOT_IN_libc
+# ifdef SHARED
+ .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
+ .globl __i686.get_pc_thunk.bx
+ .hidden __i686.get_pc_thunk.bx
+ .p2align 4
+ .type __i686.get_pc_thunk.bx,@function
+__i686.get_pc_thunk.bx:
+ movl (%esp), %ebx
+ ret
+
+ .text
+ENTRY(STRCMP)
+ .type STRCMP, @gnu_indirect_function
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (ebx, 0)
+ call __i686.get_pc_thunk.bx
+ addl $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
+ jne 1f
+ call __init_cpu_features
+1: leal __STRCMP_IA32@GOTOFF(%ebx), %eax
+ testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+ jz 2f
+ leal __STRCMP_SSSE3@GOTOFF(%ebx), %eax
+ testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
+ jz 2f
+ leal __STRCMP_SSE4_2@GOTOFF(%ebx), %eax
+2: popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebx)
+ ret
+END(STRCMP)
+# else
+ .text
+ENTRY(STRCMP)
+ .type STRCMP, @gnu_indirect_function
+ cmpl $0, KIND_OFFSET+__cpu_features
+ jne 1f
+ call __init_cpu_features
+1: leal __STRCMP_IA32, %eax
+ testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
+ jz 2f
+ leal __STRCMP_SSSE3, %eax
+ testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
+ jz 2f
+ leal __STRCMP_SSE4_2, %eax
+2: ret
+END(STRCMP)
+# endif
+
+# undef ENTRY
+# define ENTRY(name) \
+ .type __STRCMP_IA32, @function; \
+ .p2align 4; \
+ __STRCMP_IA32: cfi_startproc; \
+ CALL_MCOUNT
+# undef END
+# define END(name) \
+ cfi_endproc; .size __STRCMP_IA32, .-__STRCMP_IA32
+
+# ifdef SHARED
+# undef libc_hidden_builtin_def
+/* IFUNC doesn't work with the hidden functions in shared library since
+ they will be called without setting up EBX needed for PLT which is
+ used by IFUNC. */
+# define libc_hidden_builtin_def(name) \
+ .globl __GI_STRCMP; __GI_STRCMP = __STRCMP_IA32
+# endif
+#endif
+
+#ifndef USE_AS_STRNCMP
+# include "../strcmp.S"
+#endif
diff --git a/sysdeps/i386/i686/multiarch/strncmp-c.c b/sysdeps/i386/i686/multiarch/strncmp-c.c
new file mode 100644
index 0000000000..cc059da494
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strncmp-c.c
@@ -0,0 +1,8 @@
+#ifdef SHARED
+# define STRNCMP __strncmp_ia32
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(name) \
+ __hidden_ver1 (__strncmp_ia32, __GI_strncmp, __strncmp_ia32);
+#endif
+
+#include "string/strncmp.c"
diff --git a/sysdeps/i386/i686/multiarch/strncmp-sse4.S b/sysdeps/i386/i686/multiarch/strncmp-sse4.S
new file mode 100644
index 0000000000..cf14dfaf6c
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strncmp-sse4.S
@@ -0,0 +1,5 @@
+#ifdef SHARED
+# define USE_AS_STRNCMP
+# define STRCMP __strncmp_sse4_2
+# include "strcmp-sse4.S"
+#endif
diff --git a/sysdeps/i386/i686/multiarch/strncmp-ssse3.S b/sysdeps/i386/i686/multiarch/strncmp-ssse3.S
new file mode 100644
index 0000000000..536c8685f2
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strncmp-ssse3.S
@@ -0,0 +1,5 @@
+#ifdef SHARED
+# define USE_AS_STRNCMP
+# define STRCMP __strncmp_ssse3
+# include "strcmp-ssse3.S"
+#endif
diff --git a/sysdeps/i386/i686/multiarch/strncmp.S b/sysdeps/i386/i686/multiarch/strncmp.S
new file mode 100644
index 0000000000..b6814315fb
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strncmp.S
@@ -0,0 +1,3 @@
+#define USE_AS_STRNCMP
+#define STRCMP strncmp
+#include "strcmp.S"
diff --git a/sysdeps/i386/lshift.S b/sysdeps/i386/lshift.S
index 536d9878eb..398cf038c7 100644
--- a/sysdeps/i386/lshift.S
+++ b/sysdeps/i386/lshift.S
@@ -1,5 +1,5 @@
/* i80386 __mpn_lshift --
- Copyright (C) 1992, 1994, 1997-2000, 2005 Free Software Foundation, Inc.
+ Copyright (C) 1992,1994,1997-2000,2005,2010 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
@@ -55,6 +55,7 @@ ENTRY (BP_SYM (__mpn_lshift))
movl (%esi,%edx,4),%ebx /* read most significant limb */
cfi_rel_offset (ebx, 0)
+ cfi_remember_state
xorl %eax,%eax
shldl %cl,%ebx,%eax /* compute carry limb */
decl %edx
@@ -95,6 +96,7 @@ L(1): movl (%esi,%edx,4),%eax
LEAVE
ret
+ cfi_restore_state
L(end): shll %cl,%ebx /* compute least significant limb */
movl %ebx,(%edi) /* store it */
diff --git a/sysdeps/i386/rshift.S b/sysdeps/i386/rshift.S
index 3fd0afe822..332c4d09e7 100644
--- a/sysdeps/i386/rshift.S
+++ b/sysdeps/i386/rshift.S
@@ -1,5 +1,5 @@
/* i80386 __mpn_rshift --
- Copyright (C) 1992,1994,1997-2000,2005 Free Software Foundation, Inc.
+ Copyright (C) 1992,1994,1997-2000,2005,2010 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
@@ -57,6 +57,7 @@ ENTRY (BP_SYM (__mpn_rshift))
movl (%esi,%edx,4),%ebx /* read least significant limb */
cfi_rel_offset (ebx, 0)
+ cfi_remember_state
xorl %eax,%eax
shrdl %cl,%ebx,%eax /* compute carry limb */
incl %edx
@@ -97,10 +98,7 @@ L(1): movl (%esi,%edx,4),%eax
LEAVE
ret
- cfi_adjust_cfa_offset (12)
- cfi_rel_offset (edi, 8)
- cfi_rel_offset (esi, 4)
- cfi_rel_offset (ebx, 0)
+ cfi_restore_state
L(end): shrl %cl,%ebx /* compute most significant limb */
movl %ebx,(%edi) /* store it */
diff --git a/sysdeps/ia64/fpu/fegetenv.c b/sysdeps/ia64/fpu/fegetenv.c
index 5446b16494..e240f75e43 100644
--- a/sysdeps/ia64/fpu/fegetenv.c
+++ b/sysdeps/ia64/fpu/fegetenv.c
@@ -27,3 +27,4 @@ fegetenv (fenv_t *envp)
return 0;
}
+libm_hidden_def (fegetenv)
diff --git a/sysdeps/powerpc/fpu/fegetenv.c b/sysdeps/powerpc/fpu/fegetenv.c
index 53953454cb..3d21abb529 100644
--- a/sysdeps/powerpc/fpu/fegetenv.c
+++ b/sysdeps/powerpc/fpu/fegetenv.c
@@ -35,4 +35,5 @@ strong_alias (__fegetenv, __old_fegetenv)
compat_symbol (libm, BP_SYM (__old_fegetenv), BP_SYM (fegetenv), GLIBC_2_1);
#endif
+libm_hidden_ver (__fegetenv, fegetenv)
versioned_symbol (libm, BP_SYM (__fegetenv), BP_SYM (fegetenv), GLIBC_2_2);
diff --git a/sysdeps/powerpc/powerpc32/configure b/sysdeps/powerpc/powerpc32/configure
index 9b76c57886..da8ec0b87c 100644
--- a/sysdeps/powerpc/powerpc32/configure
+++ b/sysdeps/powerpc/powerpc32/configure
@@ -25,11 +25,10 @@ rm -f conftest*
fi
{ $as_echo "$as_me:$LINENO: result: $libc_cv_ppc_rel16" >&5
$as_echo "$libc_cv_ppc_rel16" >&6; }
-if test $libc_cv_ppc_rel16 = yes; then
- cat >>confdefs.h <<\_ACEOF
-#define HAVE_ASM_PPC_REL16 1
-_ACEOF
-
+if test $libc_cv_ppc_rel16 = no; then
+ { { $as_echo "$as_me:$LINENO: error: R_PPC_REL16 is not supported. Binutils is too old." >&5
+$as_echo "$as_me: error: R_PPC_REL16 is not supported. Binutils is too old." >&2;}
+ { (exit 1); exit 1; }; }
fi
# See whether GCC uses -msecure-plt.
diff --git a/sysdeps/powerpc/powerpc32/configure.in b/sysdeps/powerpc/powerpc32/configure.in
index 7219ad993e..21d3f5ee5b 100644
--- a/sysdeps/powerpc/powerpc32/configure.in
+++ b/sysdeps/powerpc/powerpc32/configure.in
@@ -13,8 +13,8 @@ else
libc_cv_ppc_rel16=no
fi
rm -f conftest*])
-if test $libc_cv_ppc_rel16 = yes; then
- AC_DEFINE(HAVE_ASM_PPC_REL16)
+if test $libc_cv_ppc_rel16 = no; then
+ AC_MSG_ERROR(R_PPC_REL16 is not supported. Binutils is too old.)
fi
# See whether GCC uses -msecure-plt.
diff --git a/sysdeps/powerpc/powerpc32/dl-machine.h b/sysdeps/powerpc/powerpc32/dl-machine.h
index 6f8d0f506e..5351d9691d 100644
--- a/sysdeps/powerpc/powerpc32/dl-machine.h
+++ b/sysdeps/powerpc/powerpc32/dl-machine.h
@@ -41,16 +41,13 @@ static inline Elf32_Addr * __attribute__ ((const))
ppc_got (void)
{
Elf32_Addr *got;
-#ifdef HAVE_ASM_PPC_REL16
+
asm ("bcl 20,31,1f\n"
"1: mflr %0\n"
" addis %0,%0,_GLOBAL_OFFSET_TABLE_-1b@ha\n"
" addi %0,%0,_GLOBAL_OFFSET_TABLE_-1b@l\n"
: "=b" (got) : : "lr");
-#else
- asm (" bl _GLOBAL_OFFSET_TABLE_-4@local"
- : "=l" (got));
-#endif
+
return got;
}
diff --git a/sysdeps/powerpc/powerpc32/dl-start.S b/sysdeps/powerpc/powerpc32/dl-start.S
index c77c4de198..ae41f47ede 100644
--- a/sysdeps/powerpc/powerpc32/dl-start.S
+++ b/sysdeps/powerpc/powerpc32/dl-start.S
@@ -47,15 +47,10 @@ _dl_start_user:
passed by value!). */
/* Put our GOT pointer in r31, */
-#ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r31
addis r31,r31,_GLOBAL_OFFSET_TABLE_-1b@ha
addi r31,r31,_GLOBAL_OFFSET_TABLE_-1b@l
-#else
- bl _GLOBAL_OFFSET_TABLE_-4@local
- mflr r31
-#endif
/* the address of _start in r30, */
mr r30,r3
/* &_dl_argc in 29, &_dl_argv in 27, and _dl_loaded in 28. */
diff --git a/sysdeps/powerpc/powerpc32/elf/start.S b/sysdeps/powerpc/powerpc32/elf/start.S
index a8abdca0c6..dc89a5e109 100644
--- a/sysdeps/powerpc/powerpc32/elf/start.S
+++ b/sysdeps/powerpc/powerpc32/elf/start.S
@@ -53,10 +53,6 @@ L(start_addresses):
ASM_SIZE_DIRECTIVE(L(start_addresses))
.section ".text"
-#if defined PIC && !defined HAVE_ASM_PPC_REL16
-L(start_addressesp):
- .long L(start_addresses)-L(branch)
-#endif
ENTRY(_start)
/* Save the stack pointer, in case we're statically linked under Linux. */
mr r9,r1
@@ -77,16 +73,10 @@ L(branch):
start_addresses in r8. Also load the GOT pointer so that new PLT
calls work, like the one to __libc_start_main. */
#ifdef PIC
-# ifdef HAVE_ASM_PPC_REL16
addis r30,r13,_GLOBAL_OFFSET_TABLE_-L(branch)@ha
addis r8,r13,L(start_addresses)-L(branch)@ha
addi r30,r30,_GLOBAL_OFFSET_TABLE_-L(branch)@l
lwzu r13,L(start_addresses)-L(branch)@l(r8)
-# else
- lwz r8,L(start_addressesp)-L(branch)(r13)
- add r8,r13,r8
- lwz r13,0(r8)
-# endif
#else
lis r8,L(start_addresses)@ha
lwzu r13,L(start_addresses)@l(r8)
diff --git a/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S b/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S
index 04ed6da68b..e1ac064a59 100644
--- a/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S
+++ b/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S
@@ -34,15 +34,10 @@ ENTRY (BP_SYM (__longjmp))
# ifdef PIC
mflr r6
cfi_register (lr,r6)
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r5
addis r5,r5,_GLOBAL_OFFSET_TABLE_-1b@ha
addi r5,r5,_GLOBAL_OFFSET_TABLE_-1b@l
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r5
-# endif
# ifdef SHARED
lwz r5,_rtld_global_ro@got(r5)
mtlr r6
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_ceil.S b/sysdeps/powerpc/powerpc32/fpu/s_ceil.S
index bc74d302fb..80e72ca2bd 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_ceil.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_ceil.S
@@ -31,17 +31,10 @@ ENTRY (__ceil)
#ifdef SHARED
mflr r11
cfi_register(lr,r11)
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r9
addis r9,r9,.LC0-1b@ha
lfs fp13,.LC0-1b@l(r9)
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r10
- lwz r9,.LC0@got(10)
- lfs fp13,0(r9)
-# endif
mtlr r11
cfi_same_value (lr)
#else
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_ceilf.S b/sysdeps/powerpc/powerpc32/fpu/s_ceilf.S
index 47a75ec0c3..ce6d71e4f8 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_ceilf.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_ceilf.S
@@ -30,17 +30,10 @@ ENTRY (__ceilf)
#ifdef SHARED
mflr r11
cfi_register(lr,r11)
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r9
addis r9,r9,.LC0-1b@ha
lfs fp13,.LC0-1b@l(r9)
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r10
- lwz r9,.LC0@got(10)
- lfs fp13,0(r9)
-# endif
mtlr r11
cfi_same_value (lr)
#else
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_floor.S b/sysdeps/powerpc/powerpc32/fpu/s_floor.S
index a29e4791ea..0dd0dbe6c0 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_floor.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_floor.S
@@ -31,17 +31,10 @@ ENTRY (__floor)
#ifdef SHARED
mflr r11
cfi_register(lr,r11)
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r9
addis r9,r9,.LC0-1b@ha
lfs fp13,.LC0-1b@l(r9)
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r10
- lwz r9,.LC0@got(10)
- lfs fp13,0(r9)
-# endif
mtlr r11
cfi_same_value (lr)
#else
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_floorf.S b/sysdeps/powerpc/powerpc32/fpu/s_floorf.S
index 99fbdc5f86..98a47458bc 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_floorf.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_floorf.S
@@ -30,17 +30,10 @@ ENTRY (__floorf)
#ifdef SHARED
mflr r11
cfi_register(lr,r11)
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r9
addis r9,r9,.LC0-1b@ha
lfs fp13,.LC0-1b@l(r9)
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r10
- lwz r9,.LC0@got(10)
- lfs fp13,0(r9)
-# endif
mtlr r11
cfi_same_value (lr)
#else
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_lround.S b/sysdeps/powerpc/powerpc32/fpu/s_lround.S
index d73749e134..3bf1ffaea1 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_lround.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_lround.S
@@ -45,17 +45,10 @@ ENTRY (__lround)
#ifdef SHARED
mflr r11
cfi_register(lr,r11)
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r9
addis r9,r9,.LC0-1b@ha
lfs fp10,.LC0-1b@l(r9)
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r10
- lwz r9,.LC0@got(10)
- lfs fp10,0(r9)
-# endif
mtlr r11
cfi_same_value (lr)
#else
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_rint.S b/sysdeps/powerpc/powerpc32/fpu/s_rint.S
index c8dca313ae..93133718ad 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_rint.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_rint.S
@@ -33,17 +33,10 @@ ENTRY (__rint)
#ifdef SHARED
mflr r11
cfi_register(lr,r11)
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r9
addis r9,r9,.LC0-1b@ha
lfs fp13,.LC0-1b@l(r9)
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r10
- lwz r9,.LC0@got(10)
- lfs fp13,0(r9)
-# endif
mtlr r11
cfi_same_value (lr)
#else
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_rintf.S b/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
index 7771cb2bc8..1e0fbb1f0d 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
@@ -29,17 +29,10 @@ ENTRY (__rintf)
#ifdef SHARED
mflr r11
cfi_register(lr,r11)
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r9
addis r9,r9,.LC0-1b@ha
lfs fp13,.LC0-1b@l(r9)
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r10
- lwz r9,.LC0@got(10)
- lfs fp13,0(r9)
-# endif
mtlr r11
cfi_same_value (lr)
#else
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_round.S b/sysdeps/powerpc/powerpc32/fpu/s_round.S
index 590c87ad8c..48b346e651 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_round.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_round.S
@@ -43,16 +43,10 @@ ENTRY (__round)
#ifdef SHARED
mflr r11
cfi_register(lr,r11)
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r9
addis r9,r9,.LC0-1b@ha
addi r9,r9,.LC0-1b@l
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r10
- lwz r9,.LC0@got(10)
-# endif
mtlr r11
cfi_same_value (lr)
lfs fp13,0(r9)
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_roundf.S b/sysdeps/powerpc/powerpc32/fpu/s_roundf.S
index 7e99bca315..88125aad06 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_roundf.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_roundf.S
@@ -42,16 +42,10 @@ ENTRY (__roundf )
#ifdef SHARED
mflr r11
cfi_register(lr,r11)
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r9
addis r9,r9,.LC0-1b@ha
addi r9,r9,.LC0-1b@l
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r10
- lwz r9,.LC0@got(10)
-# endif
mtlr r11
cfi_same_value (lr)
lfs fp13,0(r9)
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_trunc.S b/sysdeps/powerpc/powerpc32/fpu/s_trunc.S
index 5bc0856b9f..c3c021716a 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_trunc.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_trunc.S
@@ -38,17 +38,10 @@ ENTRY (__trunc)
#ifdef SHARED
mflr r11
cfi_register(lr,r11)
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r9
addis r9,r9,.LC0-1b@ha
lfs fp13,.LC0-1b@l(r9)
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r10
- lwz r9,.LC0@got(10)
- lfs fp13,0(r9)
-# endif
mtlr r11
cfi_same_value (lr)
#else
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_truncf.S b/sysdeps/powerpc/powerpc32/fpu/s_truncf.S
index e2e3bd6740..eddef070cd 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_truncf.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_truncf.S
@@ -37,17 +37,10 @@ ENTRY (__truncf)
#ifdef SHARED
mflr r11
cfi_register(lr,r11)
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r9
addis r9,r9,.LC0-1b@ha
lfs fp13,.LC0-1b@l(r9)
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r10
- lwz r9,.LC0@got(10)
- lfs fp13,0(r9)
-# endif
mtlr r11
cfi_same_value (lr)
#else
diff --git a/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S b/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S
index b7d1abc00d..131e7a332e 100644
--- a/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S
+++ b/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S
@@ -85,15 +85,10 @@ ENTRY (BP_SYM (__sigsetjmp))
# ifdef PIC
mflr r6
cfi_register(lr,r6)
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r5
addis r5,r5,_GLOBAL_OFFSET_TABLE_-1b@ha
addi r5,r5,_GLOBAL_OFFSET_TABLE_-1b@l
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r5
-# endif
mtlr r6
cfi_same_value (lr)
# ifdef SHARED
diff --git a/sysdeps/powerpc/powerpc32/memset.S b/sysdeps/powerpc/powerpc32/memset.S
index 454abb2b65..b4ce218e24 100644
--- a/sysdeps/powerpc/powerpc32/memset.S
+++ b/sysdeps/powerpc/powerpc32/memset.S
@@ -256,17 +256,10 @@ L(checklinesize):
beq L(medium)
/* Establishes GOT addressability so we can load __cache_line_size
from static. This value was set from the aux vector during startup. */
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr rGOT
addis rGOT,rGOT,__cache_line_size-1b@ha
lwz rCLS,__cache_line_size-1b@l(rGOT)
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr rGOT
- lwz rGOT,__cache_line_size@got(rGOT)
- lwz rCLS,0(rGOT)
-# endif
mtlr rTMP
#else
/* Load __cache_line_size from static. This value was set from the
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S b/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S
index e10a37977a..b03e041d8a 100644
--- a/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S
@@ -53,16 +53,10 @@ ENTRY (__llround)
#ifdef SHARED
mflr r11
cfi_register(lr,r11)
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r9
addis r9,r9,.LC0-1b@ha
addi r9,r9,.LC0-1b@l
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r10
- lwz r9,.LC0@got(10)
-# endif
mtlr r11
cfi_same_value (lr)
lfd fp9,0(r9)
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.S b/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.S
index 95a0b3915d..8be3cf1848 100644
--- a/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.S
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.S
@@ -63,7 +63,6 @@ EALIGN (__sqrt, 5, 0)
cfi_offset(lr,20-16)
cfi_offset(r30,8-16)
#ifdef SHARED
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,.LCF1
.LCF1:
mflr r30
@@ -71,12 +70,6 @@ EALIGN (__sqrt, 5, 0)
addi r30,r30,_GLOBAL_OFFSET_TABLE_-.LCF1@l
lwz r9,_LIB_VERSION@got(30)
lwz r0,0(r9)
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r30
- lwz r9,_LIB_VERSION@got(30)
- lwz r0,0(r9)
-# endif
#else
lis r9,_LIB_VERSION@ha
lwz r0,_LIB_VERSION@l(r9)
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.S b/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.S
index c31555194b..9fa282c162 100644
--- a/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.S
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.S
@@ -63,7 +63,6 @@ EALIGN (__sqrtf, 5, 0)
cfi_offset(lr,20-16)
cfi_offset(r30,8-16)
#ifdef SHARED
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,.LCF1
.LCF1:
mflr r30
@@ -71,12 +70,6 @@ EALIGN (__sqrtf, 5, 0)
addi r30,r30,_GLOBAL_OFFSET_TABLE_-.LCF1@l
lwz r9,_LIB_VERSION@got(30)
lwz r0,0(r9)
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r30
- lwz r9,_LIB_VERSION@got(30)
- lwz r0,0(r9)
-# endif
#else
lis r9,_LIB_VERSION@ha
lwz r0,_LIB_VERSION@l(r9)
diff --git a/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt.S b/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt.S
index 105b5912a1..27a1a0dcbb 100644
--- a/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt.S
+++ b/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt.S
@@ -63,7 +63,6 @@ EALIGN (__sqrt, 5, 0)
cfi_offset(lr,20-16)
cfi_offset(r30,8-16)
#ifdef SHARED
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,.LCF1
.LCF1:
mflr r30
@@ -71,12 +70,6 @@ EALIGN (__sqrt, 5, 0)
addi r30,r30,_GLOBAL_OFFSET_TABLE_-.LCF1@l
lwz r9,_LIB_VERSION@got(30)
lwz r0,0(r9)
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r30
- lwz r9,_LIB_VERSION@got(30)
- lwz r0,0(r9)
-# endif
#else
lis r9,_LIB_VERSION@ha
lwz r0,_LIB_VERSION@l(r9)
diff --git a/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf.S b/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf.S
index 14bc0a2ceb..8914855542 100644
--- a/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf.S
+++ b/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf.S
@@ -63,7 +63,6 @@ EALIGN (__sqrtf, 5, 0)
cfi_offset(lr,20-16)
cfi_offset(r30,8-16)
#ifdef SHARED
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,.LCF1
.LCF1:
mflr r30
@@ -71,12 +70,6 @@ EALIGN (__sqrtf, 5, 0)
addi r30,r30,_GLOBAL_OFFSET_TABLE_-.LCF1@l
lwz r9,_LIB_VERSION@got(30)
lwz r0,0(r9)
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r30
- lwz r9,_LIB_VERSION@got(30)
- lwz r0,0(r9)
-# endif
#else
lis r9,_LIB_VERSION@ha
lwz r0,_LIB_VERSION@l(r9)
diff --git a/sysdeps/powerpc/powerpc32/power7/Implies b/sysdeps/powerpc/powerpc32/power7/Implies
deleted file mode 100644
index 03899d8a3c..0000000000
--- a/sysdeps/powerpc/powerpc32/power7/Implies
+++ /dev/null
@@ -1 +0,0 @@
-powerpc/powerpc32/power5
diff --git a/sysdeps/powerpc/powerpc32/power7/fpu/Implies b/sysdeps/powerpc/powerpc32/power7/fpu/Implies
deleted file mode 100644
index 819a7d7979..0000000000
--- a/sysdeps/powerpc/powerpc32/power7/fpu/Implies
+++ /dev/null
@@ -1 +0,0 @@
-powerpc/powerpc32/power5/fpu
diff --git a/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S b/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S
new file mode 100644
index 0000000000..5b0d950c74
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S
@@ -0,0 +1,89 @@
+/* finite(). PowerPC32/POWER7 version.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by Luis Machado <luisgpm@br.ibm.com>.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+/* int __finite(x) */
+ .section .rodata.cst8,"aM",@progbits,8
+ .align 3
+.LC0: /* 1.0 */
+ .quad 0x3ff0000000000000
+
+ .section ".text"
+ .type __finite, @function
+ .machine power7
+ENTRY (__finite)
+#ifdef SHARED
+ mflr r11
+ cfi_register(lr,r11)
+
+ bcl 20,31,1f
+1: mflr r9
+ addis r9,r9,.LC0-1b@ha
+ lfd fp0,.LC0-1b@l(r9)
+
+ mtlr r11
+ cfi_same_value (lr)
+#else
+ lis r9,.LC0@ha
+ lfd fp0,.LC0@l(r9)
+#endif
+ ftdiv cr7,fp1,fp0
+ li r3,1
+ bflr 30
+
+ /* We have -INF/+INF/NaN or a denormal. */
+
+ stwu r1,-16(r1) /* Allocate stack space. */
+ stfd fp1,8(r1) /* Transfer FP to GPR's. */
+
+ ori 2,2,0 /* Force a new dispatch group. */
+ lhz r0,8(r1) /* Fetch the upper portion of the high word of
+ the FP value (where the exponent and sign bits
+ are). */
+ clrlwi r0,r0,17 /* r0 = abs(r0). */
+ addi r1,r1,16 /* Reset the stack pointer. */
+ cmpwi cr7,r0,0x7ff0 /* r4 == 0x7ff0?. */
+ bltlr cr7 /* LT means we have a denormal. */
+ li r3,0
+ blr
+ END (__finite)
+
+hidden_def (__finite)
+weak_alias (__finite, finite)
+
+/* It turns out that the 'double' version will also always work for
+ single-precision. */
+strong_alias (__finite, __finitef)
+hidden_def (__finitef)
+weak_alias (__finitef, finitef)
+
+#ifdef NO_LONG_DOUBLE
+strong_alias (__finite, __finitel)
+weak_alias (__finite, finitel)
+#endif
+
+#ifndef IS_IN_libm
+# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0)
+compat_symbol (libc, __finite, __finitel, GLIBC_2_0);
+compat_symbol (libc, finite, finitel, GLIBC_2_0);
+# endif
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power7/fpu/s_finitef.S b/sysdeps/powerpc/powerpc32/power7/fpu/s_finitef.S
new file mode 100644
index 0000000000..54bd94176d
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power7/fpu/s_finitef.S
@@ -0,0 +1 @@
+/* This function uses the same code as s_finite.S. */
diff --git a/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S b/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S
new file mode 100644
index 0000000000..2979534911
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S
@@ -0,0 +1,88 @@
+/* isinf(). PowerPC32/POWER7 version.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by Luis Machado <luisgpm@br.ibm.com>.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+/* int __isinf(x) */
+ .section .rodata.cst8,"aM",@progbits,8
+ .align 3
+.LC0: /* 1.0 */
+ .quad 0x3ff0000000000000
+
+ .section ".text"
+ .type __isinf, @function
+ .machine power7
+ENTRY (__isinf)
+#ifdef SHARED
+ mflr r11
+ cfi_register(lr,r11)
+
+ bcl 20,31,1f
+1: mflr r9
+ addis r9,r9,.LC0-1b@ha
+ lfd fp0,.LC0-1b@l(r9)
+
+ mtlr r11
+ cfi_same_value (lr)
+#else
+ lis r9,.LC0@ha
+ lfd fp0,.LC0@l(r9)
+#endif
+ ftdiv cr7,fp1,fp0
+ li r3,0
+ bflr 29 /* If not INF, return. */
+
+ /* Either we have -INF/+INF or a denormal. */
+
+ stwu r1,-16(r1) /* Allocate stack space. */
+ stfd fp1,8(r1) /* Transfer FP to GPR's. */
+ ori 2,2,0 /* Force a new dispatch group. */
+ lhz r4,8(r1) /* Fetch the upper portion of the high word of
+ the FP value (where the exponent and sign bits
+ are). */
+ addi r1,r1,16 /* Reset the stack pointer. */
+ cmpwi cr7,r4,0x7ff0 /* r4 == 0x7ff0? */
+ li r3,1
+ beqlr cr7 /* EQ means INF, otherwise -INF. */
+ li r3,-1
+ blr
+ END (__isinf)
+
+hidden_def (__isinf)
+weak_alias (__isinf, isinf)
+
+/* It turns out that the 'double' version will also always work for
+ single-precision. */
+strong_alias (__isinf, __isinff)
+hidden_def (__isinff)
+weak_alias (__isinff, isinff)
+
+#ifdef NO_LONG_DOUBLE
+strong_alias (__isinf, __isinfl)
+weak_alias (__isinf, isinfl)
+#endif
+
+#ifndef IS_IN_libm
+# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0)
+compat_symbol (libc, __isinf, __isinfl, GLIBC_2_0);
+compat_symbol (libc, isinf, isinfl, GLIBC_2_0);
+# endif
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power7/fpu/s_isinff.S b/sysdeps/powerpc/powerpc32/power7/fpu/s_isinff.S
new file mode 100644
index 0000000000..be759e091e
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power7/fpu/s_isinff.S
@@ -0,0 +1 @@
+/* This function uses the same code as s_isinf.S. */
diff --git a/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S b/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S
new file mode 100644
index 0000000000..852539f24b
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S
@@ -0,0 +1,92 @@
+/* isnan(). PowerPC32/POWER7 version.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by Luis Machado <luisgpm@br.ibm.com>.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+/* int __isnan(x) */
+ .section .rodata.cst8,"aM",@progbits,8
+ .align 3
+.LC0: /* 1.0 */
+ .quad 0x3ff0000000000000
+
+ .section ".text"
+ .type __isnan, @function
+ .machine power7
+ENTRY (__isnan)
+#ifdef SHARED
+ mflr r11
+ cfi_register(lr,r11)
+
+ bcl 20,31,1f
+1: mflr r9
+ addis r9,r9,.LC0-1b@ha
+ lfd fp0,.LC0-1b@l(r9)
+
+ mtlr r11
+ cfi_same_value (lr)
+#else
+ lis r9,.LC0@ha
+ lfd fp0,.LC0@l(r9)
+#endif
+ ftdiv cr7,fp1,fp0
+ li r3,0
+ bflr 30 /* If not NaN or Inf, finish. */
+
+ /* We have -INF/+INF/NaN or a denormal. */
+
+ stwu r1,-16(r1) /* Allocate stack space. */
+ stfd fp1,8(r1) /* Transfer FP to GPR's. */
+ ori 2,2,0 /* Force a new dispatch group. */
+ lwz r4,8(r1) /* Load the upper half of the FP value. */
+ lwz r5,12(r1) /* Load the lower half of the FP value. */
+ addi r1,r1,16 /* Reset the stack pointer. */
+ lis r0,0x7ff0 /* Load the upper portion for an INF/NaN. */
+ clrlwi r4,r4,1 /* r4 = abs(r4). */
+ cmpw cr7,r4,r0 /* if (abs(r4) <= inf). */
+ cmpwi cr6,r5,0 /* r5 == 0x00000000? */
+ bltlr cr7 /* LT means we have a denormal. */
+ bgt cr7,L(NaN) /* GT means we have a NaN. */
+ beqlr cr6 /* EQ means we have +/-INF. */
+L(NaN):
+ li r3,1 /* x == NaN? */
+ blr
+ END (__isnan)
+
+hidden_def (__isnan)
+weak_alias (__isnan, isnan)
+
+/* It turns out that the 'double' version will also always work for
+ single-precision. */
+strong_alias (__isnan, __isnanf)
+hidden_def (__isnanf)
+weak_alias (__isnanf, isnanf)
+
+#ifdef NO_LONG_DOUBLE
+strong_alias (__isnan, __isnanl)
+weak_alias (__isnan, isnanl)
+#endif
+
+#ifndef IS_IN_libm
+# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0)
+compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0);
+compat_symbol (libc, isnan, isnanl, GLIBC_2_0);
+# endif
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power7/fpu/s_isnanf.S b/sysdeps/powerpc/powerpc32/power7/fpu/s_isnanf.S
new file mode 100644
index 0000000000..b48c85e0d3
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power7/fpu/s_isnanf.S
@@ -0,0 +1 @@
+/* This function uses the same code as s_isnan.S. */
diff --git a/sysdeps/powerpc/powerpc64/power7/Implies b/sysdeps/powerpc/powerpc64/power7/Implies
deleted file mode 100644
index 13b03309fb..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/Implies
+++ /dev/null
@@ -1 +0,0 @@
-powerpc/powerpc64/power5
diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/Implies b/sysdeps/powerpc/powerpc64/power7/fpu/Implies
deleted file mode 100644
index 13b03309fb..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/fpu/Implies
+++ /dev/null
@@ -1 +0,0 @@
-powerpc/powerpc64/power5
diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S b/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S
new file mode 100644
index 0000000000..6763d1adc8
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S
@@ -0,0 +1,68 @@
+/* finite(). PowerPC64/POWER7 version.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by Luis Machado <luisgpm@br.ibm.com>.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+/* int __finite(x) */
+ .section ".toc","aw"
+.LC0: /* 1.0 */
+ .tc FD_ONE[TC],0x3ff0000000000000
+ .section ".text"
+ .type __finite, @function
+ .machine power7
+EALIGN (__finite, 4, 0)
+ CALL_MCOUNT 0
+ lfd fp0,.LC0@toc(r2)
+ ftdiv cr7,fp1,fp0
+ li r3,1
+ bflr 30
+
+ /* If we are here, we either have +/-INF,
+ NaN or denormal. */
+
+ stfd fp1,-16(r1) /* Transfer FP to GPR's. */
+ ori 2,2,0 /* Force a new dispatch group. */
+
+ lhz r4,-16(r1) /* Fetch the upper portion of the high word of
+ the FP value (where the exponent and sign bits
+ are). */
+ clrlwi r4,r4,17 /* r4 = abs(r4). */
+ cmpwi cr7,r4,0x7ff0 /* r4 == 0x7ff0? */
+ bltlr cr7 /* LT means finite, other non-finite. */
+ li r3,0
+ blr
+ END (__finite)
+
+hidden_def (__finite)
+weak_alias (__finite, finite)
+
+/* It turns out that the 'double' version will also always work for
+ single-precision. */
+strong_alias (__finite, __finitef)
+hidden_def (__finitef)
+weak_alias (__finitef, finitef)
+
+#ifndef IS_IN_libm
+# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0)
+compat_symbol (libc, __finite, __finitel, GLIBC_2_0);
+compat_symbol (libc, finite, finitel, GLIBC_2_0);
+# endif
+#endif
diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/s_finitef.S b/sysdeps/powerpc/powerpc64/power7/fpu/s_finitef.S
new file mode 100644
index 0000000000..54bd94176d
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power7/fpu/s_finitef.S
@@ -0,0 +1 @@
+/* This function uses the same code as s_finite.S. */
diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S b/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S
new file mode 100644
index 0000000000..f896d38026
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S
@@ -0,0 +1,71 @@
+/* isinf(). PowerPC64/POWER7 version.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by Luis Machado <luisgpm@br.ibm.com>.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+/* int __isinf(x) */
+ .section ".toc","aw"
+.LC0: /* 1.0 */
+ .tc FD_ONE[TC],0x3ff0000000000000
+ .section ".text"
+ .type __isinf, @function
+ .machine power7
+EALIGN (__isinf, 4, 0)
+ CALL_MCOUNT 0
+ lfd fp0,.LC0@toc(r2)
+ ftdiv cr7,fp1,fp0
+ li r3,0
+ bflr 29 /* If not INF, return. */
+
+ /* Either we have -INF/+INF or a denormal. */
+
+ stfd fp1,-16(r1) /* Transfer FP to GPR's. */
+ ori 2,2,0 /* Force a new dispatch group. */
+ lhz r4,-16(r1) /* Fetch the upper portion of the high word of
+ the FP value (where the exponent and sign bits
+ are). */
+ cmpwi cr7,r4,0x7ff0 /* r4 == 0x7ff0? */
+ li r3,1
+ beqlr cr7 /* EQ means INF, otherwise -INF. */
+ li r3,-1
+ blr
+ END (__isinf)
+
+hidden_def (__isinf)
+weak_alias (__isinf, isinf)
+
+/* It turns out that the 'double' version will also always work for
+ single-precision. */
+strong_alias (__isinf, __isinff)
+hidden_def (__isinff)
+weak_alias (__isinff, isinff)
+
+#ifdef NO_LONG_DOUBLE
+strong_alias (__isinf, __isinfl)
+weak_alias (__isinf, isinfl)
+#endif
+
+#ifndef IS_IN_libm
+# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0)
+compat_symbol (libc, __isinf, __isinfl, GLIBC_2_0);
+compat_symbol (libc, isinf, isinfl, GLIBC_2_0);
+# endif
+#endif
diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/s_isinff.S b/sysdeps/powerpc/powerpc64/power7/fpu/s_isinff.S
new file mode 100644
index 0000000000..be759e091e
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power7/fpu/s_isinff.S
@@ -0,0 +1 @@
+/* This function uses the same code as s_isinf.S. */
diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/s_isnan.S b/sysdeps/powerpc/powerpc64/power7/fpu/s_isnan.S
new file mode 100644
index 0000000000..8877012598
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power7/fpu/s_isnan.S
@@ -0,0 +1,69 @@
+/* isnan(). PowerPC64/POWER7 version.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by Luis Machado <luisgpm@br.ibm.com>.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+/* int __isnan(x) */
+ .section ".toc","aw"
+.LC0: /* 1.0 */
+ .tc FD_ONE[TC],0x3ff0000000000000
+ .section ".text"
+ .type __isnan, @function
+ .machine power7
+EALIGN (__isnan, 4, 0)
+ CALL_MCOUNT 0
+ lfd fp0,.LC0@toc(r2)
+ ftdiv cr7,fp1,fp0
+ li r3,0
+ bflr 30 /* If not NaN, finish. */
+
+ stfd fp1,-16(r1) /* Transfer FP to GPR's. */
+ ori 2,2,0 /* Force a new dispatch group. */
+ ld r4,-16(r1) /* Load FP into GPR. */
+ lis r0,0x7ff0
+ sldi r0,r0,32 /* const long r0 0x7ff00000 00000000. */
+ clrldi r4,r4,1 /* x = fabs(x) */
+ cmpd cr7,r4,r0 /* if (fabs(x) <= inf) */
+ blelr cr7 /* LE means not NaN. */
+ li r3,1 /* else return 1 */
+ blr
+ END (__isnan)
+
+hidden_def (__isnan)
+weak_alias (__isnan, isnan)
+
+/* It turns out that the 'double' version will also always work for
+ single-precision. */
+strong_alias (__isnan, __isnanf)
+hidden_def (__isnanf)
+weak_alias (__isnanf, isnanf)
+
+#ifdef NO_LONG_DOUBLE
+strong_alias (__isnan, __isnanl)
+weak_alias (__isnan, isnanl)
+#endif
+
+#ifndef IS_IN_libm
+# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0)
+compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0);
+compat_symbol (libc, isnan, isnanl, GLIBC_2_0);
+# endif
+#endif
diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/s_isnanf.S b/sysdeps/powerpc/powerpc64/power7/fpu/s_isnanf.S
new file mode 100644
index 0000000000..b48c85e0d3
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power7/fpu/s_isnanf.S
@@ -0,0 +1 @@
+/* This function uses the same code as s_isnan.S. */
diff --git a/sysdeps/s390/fpu/fegetenv.c b/sysdeps/s390/fpu/fegetenv.c
index a244f2ca8b..04da54c94c 100644
--- a/sysdeps/s390/fpu/fegetenv.c
+++ b/sysdeps/s390/fpu/fegetenv.c
@@ -20,10 +20,6 @@
#include <fenv_libc.h>
#include <fpu_control.h>
-#include <stddef.h>
-#include <asm/ptrace.h>
-#include <sys/ptrace.h>
-#include <unistd.h>
int
fegetenv (fenv_t *envp)
@@ -33,3 +29,4 @@ fegetenv (fenv_t *envp)
/* Success. */
return 0;
}
+libm_hidden_def (fegetenv)
diff --git a/sysdeps/s390/s390-64/utf16-utf32-z9.c b/sysdeps/s390/s390-64/utf16-utf32-z9.c
index 868dea68ca..14daf2118f 100644
--- a/sysdeps/s390/s390-64/utf16-utf32-z9.c
+++ b/sysdeps/s390/s390-64/utf16-utf32-z9.c
@@ -203,7 +203,10 @@ gconv_end (struct __gconv_step *data)
swapping). */
#define BODY \
{ \
- if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) \
+ /* The hardware instruction currently fails to report an error for \
+ isolated low surrogates so we have to disable the instruction \
+ until this gets resolved. */ \
+ if (0) /* (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) */ \
{ \
HARDWARE_CONVERT ("cu24 %0, %1, 1"); \
if (inptr != inend) \
@@ -229,6 +232,12 @@ gconv_end (struct __gconv_step *data)
} \
else \
{ \
+ /* An isolated low-surrogate was found. This has to be \
+ considered ill-formed. */ \
+ if (__builtin_expect (u1 >= 0xdc00, 0)) \
+ { \
+ STANDARD_FROM_LOOP_ERR_HANDLER (2); \
+ } \
/* It's a surrogate character. At least the first word says \
it is. */ \
if (__builtin_expect (inptr + 4 > inend, 0)) \
diff --git a/sysdeps/s390/s390-64/utf8-utf16-z9.c b/sysdeps/s390/s390-64/utf8-utf16-z9.c
index 531d3ebd4b..5f73f3c59e 100644
--- a/sysdeps/s390/s390-64/utf8-utf16-z9.c
+++ b/sysdeps/s390/s390-64/utf8-utf16-z9.c
@@ -345,9 +345,12 @@ gconv_end (struct __gconv_step *data)
Operation. */
#define BODY \
{ \
- if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) \
+ /* The hardware instruction currently fails to report an error for \
+ isolated low surrogates so we have to disable the instruction \
+ until this gets resolved. */ \
+ if (0) /* (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) */ \
{ \
- HARDWARE_CONVERT ("cu21 %0, %1"); \
+ HARDWARE_CONVERT ("cu21 %0, %1, 1"); \
if (inptr != inend) \
{ \
/* Check if the third byte is \
@@ -388,7 +391,7 @@ gconv_end (struct __gconv_step *data)
\
outptr += 2; \
} \
- else if (c >= 0x0800 && c <= 0xd7ff) \
+ else if ((c >= 0x0800 && c <= 0xd7ff) || c > 0xdfff) \
{ \
/* Three byte UTF-8 char. */ \
\
diff --git a/sysdeps/sh/sh4/fpu/fegetenv.c b/sysdeps/sh/sh4/fpu/fegetenv.c
index c07b32af30..683939b52d 100644
--- a/sysdeps/sh/sh4/fpu/fegetenv.c
+++ b/sysdeps/sh/sh4/fpu/fegetenv.c
@@ -29,3 +29,4 @@ fegetenv (fenv_t *envp)
return 0;
}
+libm_hidden_def (fegetenv)
diff --git a/sysdeps/sparc/fpu/fegetenv.c b/sysdeps/sparc/fpu/fegetenv.c
index 36486f5973..c606a9cac0 100644
--- a/sysdeps/sparc/fpu/fegetenv.c
+++ b/sysdeps/sparc/fpu/fegetenv.c
@@ -34,4 +34,5 @@ strong_alias (__fegetenv, __old_fegetenv)
compat_symbol (libm, __old_fegetenv, fegetenv, GLIBC_2_1);
#endif
+libm_hidden_ver (__fegetenv, fegetenv)
versioned_symbol (libm, __fegetenv, fegetenv, GLIBC_2_2);
diff --git a/sysdeps/sparc/sparc32/dl-irel.h b/sysdeps/sparc/sparc32/dl-irel.h
new file mode 100644
index 0000000000..1891938d6d
--- /dev/null
+++ b/sysdeps/sparc/sparc32/dl-irel.h
@@ -0,0 +1,55 @@
+/* Machine-dependent ELF indirect relocation inline functions.
+ SPARC 32-bit version.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef _DL_IREL_H
+#define _DL_IREL_H
+
+#include <stdio.h>
+#include <unistd.h>
+#include <dl-plt.h>
+
+#define ELF_MACHINE_IRELA 1
+
+static inline void
+__attribute ((always_inline))
+elf_irela (const Elf32_Rela *reloc)
+{
+ unsigned int r_type = ELF32_R_TYPE (reloc->r_info);
+
+ if (__builtin_expect (r_type == R_SPARC_IRELATIVE, 1))
+ {
+ Elf32_Addr *const reloc_addr = (void *) reloc->r_offset;
+ Elf32_Addr value = ((Elf32_Addr (*) (void)) reloc->r_addend) ();
+ *reloc_addr = value;
+ }
+ else if (__builtin_expect (r_type == R_SPARC_JMP_IREL, 1))
+ {
+ Elf32_Addr *const reloc_addr = (void *) reloc->r_offset;
+ Elf32_Addr value = ((Elf32_Addr (*) (void)) reloc->r_addend) ();
+
+ sparc_fixup_plt (reloc, reloc_addr, value, 0, 1);
+ }
+ else if (r_type == R_SPARC_NONE)
+ ;
+ else
+ __libc_fatal ("unexpected reloc type in static binary");
+}
+
+#endif /* dl-irel.h */
diff --git a/sysdeps/sparc/sparc32/dl-machine.h b/sysdeps/sparc/sparc32/dl-machine.h
index b3b7852d87..e1385f7aca 100644
--- a/sysdeps/sparc/sparc32/dl-machine.h
+++ b/sysdeps/sparc/sparc32/dl-machine.h
@@ -1,5 +1,5 @@
/* Machine-dependent ELF dynamic relocation inline functions. SPARC version.
- Copyright (C) 1996-2003, 2004, 2005, 2006, 2007
+ Copyright (C) 1996-2003, 2004, 2005, 2006, 2007, 2010
Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -27,20 +27,13 @@
#include <sys/param.h>
#include <ldsodefs.h>
#include <tls.h>
+#include <dl-plt.h>
#ifndef VALIDX
# define VALIDX(tag) (DT_NUM + DT_THISPROCNUM + DT_VERSIONTAGNUM \
+ DT_EXTRANUM + DT_VALTAGIDX (tag))
#endif
-/* Some SPARC opcodes we need to use for self-modifying code. */
-#define OPCODE_NOP 0x01000000 /* nop */
-#define OPCODE_CALL 0x40000000 /* call ?; add PC-rel word address */
-#define OPCODE_SETHI_G1 0x03000000 /* sethi ?, %g1; add value>>10 */
-#define OPCODE_JMP_G1 0x81c06000 /* jmp %g1+?; add lo 10 bits of value */
-#define OPCODE_SAVE_SP 0x9de3bfa8 /* save %sp, -(16+6)*4, %sp */
-#define OPCODE_BA 0x30800000 /* b,a ?; add PC-rel word address */
-
/* Return nonzero iff ELF header is compatible with the running host. */
static inline int
elf_machine_matches_host (const Elf32_Ehdr *ehdr)
@@ -312,41 +305,6 @@ _dl_start_user:\n\
.size _dl_start_user, . - _dl_start_user\n\
.previous");
-static inline __attribute__ ((always_inline)) Elf32_Addr
-sparc_fixup_plt (const Elf32_Rela *reloc, Elf32_Addr *reloc_addr,
- Elf32_Addr value, int t, int do_flush)
-{
- Elf32_Sword disp = value - (Elf32_Addr) reloc_addr;
-
- if (0 && disp >= -0x800000 && disp < 0x800000)
- {
- /* Don't need to worry about thread safety. We're writing just one
- instruction. */
-
- reloc_addr[0] = OPCODE_BA | ((disp >> 2) & 0x3fffff);
- if (do_flush)
- __asm __volatile ("flush %0" : : "r"(reloc_addr));
- }
- else
- {
- /* For thread safety, write the instructions from the bottom and
- flush before we overwrite the critical "b,a". This of course
- need not be done during bootstrapping, since there are no threads.
- But we also can't tell if we _can_ use flush, so don't. */
-
- reloc_addr += t;
- reloc_addr[1] = OPCODE_JMP_G1 | (value & 0x3ff);
- if (do_flush)
- __asm __volatile ("flush %0+4" : : "r"(reloc_addr));
-
- reloc_addr[0] = OPCODE_SETHI_G1 | (value >> 10);
- if (do_flush)
- __asm __volatile ("flush %0" : : "r"(reloc_addr));
- }
-
- return value;
-}
-
static inline Elf32_Addr
elf_machine_fixup_plt (struct link_map *map, lookup_t t,
const Elf32_Rela *reloc,
@@ -433,6 +391,13 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
value += reloc->r_addend; /* Assume copy relocs have zero addend. */
+ if (sym != NULL
+ && __builtin_expect (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC, 0)
+ && __builtin_expect (sym->st_shndx != SHN_UNDEF, 1))
+ {
+ value = ((Elf32_Addr (*) (void)) value) ();
+ }
+
switch (r_type)
{
#if !defined RTLD_BOOTSTRAP && !defined RESOLVE_CONFLICT_FIND_MAP
@@ -460,6 +425,13 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
case R_SPARC_32:
*reloc_addr = value;
break;
+ case R_SPARC_IRELATIVE:
+ value = ((Elf32_Addr (*) (void)) value) ();
+ *reloc_addr = value;
+ break;
+ case R_SPARC_JMP_IREL:
+ value = ((Elf32_Addr (*) (void)) value) ();
+ /* Fall thru */
case R_SPARC_JMP_SLOT:
{
#if !defined RTLD_BOOTSTRAP && !defined __sparc_v9__
@@ -578,16 +550,21 @@ __attribute__ ((always_inline))
elf_machine_lazy_rel (struct link_map *map,
Elf32_Addr l_addr, const Elf32_Rela *reloc)
{
- switch (ELF32_R_TYPE (reloc->r_info))
+ Elf32_Addr *const reloc_addr = (void *) (l_addr + reloc->r_offset);
+ const unsigned int r_type = ELF32_R_TYPE (reloc->r_info);
+
+ if (__builtin_expect (r_type == R_SPARC_JMP_SLOT, 1))
+ ;
+ else if (r_type == R_SPARC_JMP_IREL)
{
- case R_SPARC_NONE:
- break;
- case R_SPARC_JMP_SLOT:
- break;
- default:
- _dl_reloc_bad_type (map, ELFW(R_TYPE) (reloc->r_info), 1);
- break;
+ Elf32_Addr value = map->l_addr + reloc->r_addend;
+ value = ((Elf32_Addr (*) (void)) value) ();
+ sparc_fixup_plt (reloc, reloc_addr, value, 0, 1);
}
+ else if (r_type == R_SPARC_NONE)
+ ;
+ else
+ _dl_reloc_bad_type (map, r_type, 1);
}
#endif /* RESOLVE_MAP */
diff --git a/sysdeps/sparc/sparc32/dl-plt.h b/sysdeps/sparc/sparc32/dl-plt.h
new file mode 100644
index 0000000000..edcc5c1374
--- /dev/null
+++ b/sysdeps/sparc/sparc32/dl-plt.h
@@ -0,0 +1,62 @@
+/* PLT fixups. Sparc 32-bit version.
+ Copyright (C) 1996-2003, 2004, 2005, 2006, 2007, 2010
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/* Some SPARC opcodes we need to use for self-modifying code. */
+#define OPCODE_NOP 0x01000000 /* nop */
+#define OPCODE_CALL 0x40000000 /* call ?; add PC-rel word address */
+#define OPCODE_SETHI_G1 0x03000000 /* sethi ?, %g1; add value>>10 */
+#define OPCODE_JMP_G1 0x81c06000 /* jmp %g1+?; add lo 10 bits of value */
+#define OPCODE_SAVE_SP 0x9de3bfa8 /* save %sp, -(16+6)*4, %sp */
+#define OPCODE_BA 0x30800000 /* b,a ?; add PC-rel word address */
+
+static inline __attribute__ ((always_inline)) Elf32_Addr
+sparc_fixup_plt (const Elf32_Rela *reloc, Elf32_Addr *reloc_addr,
+ Elf32_Addr value, int t, int do_flush)
+{
+ Elf32_Sword disp = value - (Elf32_Addr) reloc_addr;
+
+ if (0 && disp >= -0x800000 && disp < 0x800000)
+ {
+ /* Don't need to worry about thread safety. We're writing just one
+ instruction. */
+
+ reloc_addr[0] = OPCODE_BA | ((disp >> 2) & 0x3fffff);
+ if (do_flush)
+ __asm __volatile ("flush %0" : : "r"(reloc_addr));
+ }
+ else
+ {
+ /* For thread safety, write the instructions from the bottom and
+ flush before we overwrite the critical "b,a". This of course
+ need not be done during bootstrapping, since there are no threads.
+ But we also can't tell if we _can_ use flush, so don't. */
+
+ reloc_addr += t;
+ reloc_addr[1] = OPCODE_JMP_G1 | (value & 0x3ff);
+ if (do_flush)
+ __asm __volatile ("flush %0+4" : : "r"(reloc_addr));
+
+ reloc_addr[0] = OPCODE_SETHI_G1 | (value >> 10);
+ if (do_flush)
+ __asm __volatile ("flush %0" : : "r"(reloc_addr));
+ }
+
+ return value;
+}
diff --git a/sysdeps/sparc/sparc64/dl-irel.h b/sysdeps/sparc/sparc64/dl-irel.h
new file mode 100644
index 0000000000..1a2a0a3dd5
--- /dev/null
+++ b/sysdeps/sparc/sparc64/dl-irel.h
@@ -0,0 +1,58 @@
+/* Machine-dependent ELF indirect relocation inline functions.
+ SPARC 64-bit version.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef _DL_IREL_H
+#define _DL_IREL_H
+
+#include <stdio.h>
+#include <unistd.h>
+#include <dl-plt.h>
+
+#define ELF_MACHINE_IRELA 1
+
+static inline void
+__attribute ((always_inline))
+elf_irela (const Elf64_Rela *reloc)
+{
+ unsigned int r_type = (reloc->r_info & 0xff);
+
+ if (__builtin_expect (r_type == R_SPARC_IRELATIVE, 1))
+ {
+ Elf64_Addr *const reloc_addr = (void *) reloc->r_offset;
+ Elf64_Addr value = ((Elf64_Addr (*) (void)) reloc->r_addend) ();
+ *reloc_addr = value;
+ }
+ else if (__builtin_expect (r_type == R_SPARC_JMP_IREL, 1))
+ {
+ Elf64_Addr *const reloc_addr = (void *) reloc->r_offset;
+ Elf64_Addr value = ((Elf64_Addr (*) (void)) reloc->r_addend) ();
+ struct link_map map = { .l_addr = 0 };
+
+ /* 'high' is always zero, for large PLT entries the linker
+ emits an R_SPARC_IRELATIVE. */
+ sparc64_fixup_plt (&map, reloc, reloc_addr, value, 0, 0);
+ }
+ else if (r_type == R_SPARC_NONE)
+ ;
+ else
+ __libc_fatal ("unexpected reloc type in static binary");
+}
+
+#endif /* dl-irel.h */
diff --git a/sysdeps/sparc/sparc64/dl-machine.h b/sysdeps/sparc/sparc64/dl-machine.h
index 3eee672912..b4f43e9cf5 100644
--- a/sysdeps/sparc/sparc64/dl-machine.h
+++ b/sysdeps/sparc/sparc64/dl-machine.h
@@ -1,6 +1,6 @@
/* Machine-dependent ELF dynamic relocation inline functions. Sparc64 version.
- Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
- Free Software Foundation, Inc.
+ Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+ 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -27,6 +27,7 @@
#include <sys/param.h>
#include <ldsodefs.h>
#include <sysdep.h>
+#include <dl-plt.h>
#ifndef VALIDX
# define VALIDX(tag) (DT_NUM + DT_THISPROCNUM + DT_VERSIONTAGNUM \
@@ -89,132 +90,6 @@ elf_machine_load_address (void)
return (Elf64_Addr) got - *got + (Elf32_Sword) ((pc[2] - pc[3]) * 4) - 4;
}
-/* We have 4 cases to handle. And we code different code sequences
- for each one. I love V9 code models... */
-static inline void __attribute__ ((always_inline))
-sparc64_fixup_plt (struct link_map *map, const Elf64_Rela *reloc,
- Elf64_Addr *reloc_addr, Elf64_Addr value,
- Elf64_Addr high, int t)
-{
- unsigned int *insns = (unsigned int *) reloc_addr;
- Elf64_Addr plt_vaddr = (Elf64_Addr) reloc_addr;
- Elf64_Sxword disp = value - plt_vaddr;
-
- /* Now move plt_vaddr up to the call instruction. */
- plt_vaddr += ((t + 1) * 4);
-
- /* PLT entries .PLT32768 and above look always the same. */
- if (__builtin_expect (high, 0) != 0)
- {
- *reloc_addr = value - map->l_addr;
- }
- /* Near destination. */
- else if (disp >= -0x800000 && disp < 0x800000)
- {
- /* As this is just one instruction, it is thread safe and so
- we can avoid the unnecessary sethi FOO, %g1.
- b,a target */
- insns[0] = 0x30800000 | ((disp >> 2) & 0x3fffff);
- __asm __volatile ("flush %0" : : "r" (insns));
- }
- /* 32-bit Sparc style, the target is in the lower 32-bits of
- address space. */
- else if (insns += t, (value >> 32) == 0)
- {
- /* sethi %hi(target), %g1
- jmpl %g1 + %lo(target), %g0 */
-
- insns[1] = 0x81c06000 | (value & 0x3ff);
- __asm __volatile ("flush %0 + 4" : : "r" (insns));
-
- insns[0] = 0x03000000 | ((unsigned int)(value >> 10));
- __asm __volatile ("flush %0" : : "r" (insns));
- }
- /* We can also get somewhat simple sequences if the distance between
- the target and the PLT entry is within +/- 2GB. */
- else if ((plt_vaddr > value
- && ((plt_vaddr - value) >> 31) == 0)
- || (value > plt_vaddr
- && ((value - plt_vaddr) >> 31) == 0))
- {
- unsigned int displacement;
-
- if (plt_vaddr > value)
- displacement = (0 - (plt_vaddr - value));
- else
- displacement = value - plt_vaddr;
-
- /* mov %o7, %g1
- call displacement
- mov %g1, %o7 */
-
- insns[2] = 0x9e100001;
- __asm __volatile ("flush %0 + 8" : : "r" (insns));
-
- insns[1] = 0x40000000 | (displacement >> 2);
- __asm __volatile ("flush %0 + 4" : : "r" (insns));
-
- insns[0] = 0x8210000f;
- __asm __volatile ("flush %0" : : "r" (insns));
- }
- /* Worst case, ho hum... */
- else
- {
- unsigned int high32 = (value >> 32);
- unsigned int low32 = (unsigned int) value;
-
- /* ??? Some tricks can be stolen from the sparc64 egcs backend
- constant formation code I wrote. -DaveM */
-
- if (__builtin_expect (high32 & 0x3ff, 0))
- {
- /* sethi %hh(value), %g1
- sethi %lm(value), %g5
- or %g1, %hm(value), %g1
- or %g5, %lo(value), %g5
- sllx %g1, 32, %g1
- jmpl %g1 + %g5, %g0
- nop */
-
- insns[5] = 0x81c04005;
- __asm __volatile ("flush %0 + 20" : : "r" (insns));
-
- insns[4] = 0x83287020;
- __asm __volatile ("flush %0 + 16" : : "r" (insns));
-
- insns[3] = 0x8a116000 | (low32 & 0x3ff);
- __asm __volatile ("flush %0 + 12" : : "r" (insns));
-
- insns[2] = 0x82106000 | (high32 & 0x3ff);
- }
- else
- {
- /* sethi %hh(value), %g1
- sethi %lm(value), %g5
- sllx %g1, 32, %g1
- or %g5, %lo(value), %g5
- jmpl %g1 + %g5, %g0
- nop */
-
- insns[4] = 0x81c04005;
- __asm __volatile ("flush %0 + 16" : : "r" (insns));
-
- insns[3] = 0x8a116000 | (low32 & 0x3ff);
- __asm __volatile ("flush %0 + 12" : : "r" (insns));
-
- insns[2] = 0x83287020;
- }
-
- __asm __volatile ("flush %0 + 8" : : "r" (insns));
-
- insns[1] = 0x0b000000 | (low32 >> 10);
- __asm __volatile ("flush %0 + 4" : : "r" (insns));
-
- insns[0] = 0x03000000 | (high32 >> 10);
- __asm __volatile ("flush %0" : : "r" (insns));
- }
-}
-
static inline Elf64_Addr __attribute__ ((always_inline))
elf_machine_fixup_plt (struct link_map *map, lookup_t t,
const Elf64_Rela *reloc,
@@ -549,6 +424,11 @@ elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc,
value += reloc->r_addend; /* Assume copy relocs have zero addend. */
+ if (sym != NULL
+ && __builtin_expect (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC, 0)
+ && __builtin_expect (sym->st_shndx != SHN_UNDEF, 1))
+ value = ((Elf64_Addr (*) (void)) value) ();
+
switch (r_type)
{
#if !defined RTLD_BOOTSTRAP && !defined RESOLVE_CONFLICT_FIND_MAP
@@ -576,6 +456,13 @@ elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc,
case R_SPARC_GLOB_DAT:
*reloc_addr = value;
break;
+ case R_SPARC_IRELATIVE:
+ value = ((Elf64_Addr (*) (void)) value) ();
+ *reloc_addr = value;
+ break;
+ case R_SPARC_JMP_IREL:
+ value = ((Elf64_Addr (*) (void)) value) ();
+ /* Fall thru */
case R_SPARC_JMP_SLOT:
#ifdef RESOLVE_CONFLICT_FIND_MAP
/* R_SPARC_JMP_SLOT conflicts against .plt[32768+]
@@ -757,16 +644,29 @@ __attribute__ ((always_inline))
elf_machine_lazy_rel (struct link_map *map,
Elf64_Addr l_addr, const Elf64_Rela *reloc)
{
- switch (ELF64_R_TYPE (reloc->r_info))
+ Elf64_Addr *const reloc_addr = (void *) (l_addr + reloc->r_offset);
+ const unsigned int r_type = ELF64_R_TYPE (reloc->r_info);
+
+ if (__builtin_expect (r_type == R_SPARC_JMP_SLOT, 1))
+ ;
+ else if (r_type == R_SPARC_JMP_IREL
+ || r_type == R_SPARC_IRELATIVE)
{
- case R_SPARC_NONE:
- break;
- case R_SPARC_JMP_SLOT:
- break;
- default:
- _dl_reloc_bad_type (map, ELFW(R_TYPE) (reloc->r_info), 1);
- break;
+ Elf64_Addr value = map->l_addr + reloc->r_addend;
+ value = ((Elf64_Addr (*) (void)) value) ();
+ if (r_type == R_SPARC_JMP_IREL)
+ {
+ /* 'high' is always zero, for large PLT entries the linker
+ emits an R_SPARC_IRELATIVE. */
+ sparc64_fixup_plt (map, reloc, reloc_addr, value, 0, 0);
+ }
+ else
+ *reloc_addr = value;
}
+ else if (r_type == R_SPARC_NONE)
+ ;
+ else
+ _dl_reloc_bad_type (map, r_type, 1);
}
#endif /* RESOLVE_MAP */
diff --git a/sysdeps/sparc/sparc64/dl-plt.h b/sysdeps/sparc/sparc64/dl-plt.h
new file mode 100644
index 0000000000..e06be43a0a
--- /dev/null
+++ b/sysdeps/sparc/sparc64/dl-plt.h
@@ -0,0 +1,144 @@
+/* PLT fixups. Sparc 64-bit version.
+ Copyright (C) 1997-2006, 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/* We have 4 cases to handle. And we code different code sequences
+ for each one. I love V9 code models... */
+static inline void __attribute__ ((always_inline))
+sparc64_fixup_plt (struct link_map *map, const Elf64_Rela *reloc,
+ Elf64_Addr *reloc_addr, Elf64_Addr value,
+ Elf64_Addr high, int t)
+{
+ unsigned int *insns = (unsigned int *) reloc_addr;
+ Elf64_Addr plt_vaddr = (Elf64_Addr) reloc_addr;
+ Elf64_Sxword disp = value - plt_vaddr;
+
+ /* Now move plt_vaddr up to the call instruction. */
+ plt_vaddr += ((t + 1) * 4);
+
+ /* PLT entries .PLT32768 and above look always the same. */
+ if (__builtin_expect (high, 0) != 0)
+ {
+ *reloc_addr = value - map->l_addr;
+ }
+ /* Near destination. */
+ else if (disp >= -0x800000 && disp < 0x800000)
+ {
+ /* As this is just one instruction, it is thread safe and so
+ we can avoid the unnecessary sethi FOO, %g1.
+ b,a target */
+ insns[0] = 0x30800000 | ((disp >> 2) & 0x3fffff);
+ __asm __volatile ("flush %0" : : "r" (insns));
+ }
+ /* 32-bit Sparc style, the target is in the lower 32-bits of
+ address space. */
+ else if (insns += t, (value >> 32) == 0)
+ {
+ /* sethi %hi(target), %g1
+ jmpl %g1 + %lo(target), %g0 */
+
+ insns[1] = 0x81c06000 | (value & 0x3ff);
+ __asm __volatile ("flush %0 + 4" : : "r" (insns));
+
+ insns[0] = 0x03000000 | ((unsigned int)(value >> 10));
+ __asm __volatile ("flush %0" : : "r" (insns));
+ }
+ /* We can also get somewhat simple sequences if the distance between
+ the target and the PLT entry is within +/- 2GB. */
+ else if ((plt_vaddr > value
+ && ((plt_vaddr - value) >> 31) == 0)
+ || (value > plt_vaddr
+ && ((value - plt_vaddr) >> 31) == 0))
+ {
+ unsigned int displacement;
+
+ if (plt_vaddr > value)
+ displacement = (0 - (plt_vaddr - value));
+ else
+ displacement = value - plt_vaddr;
+
+ /* mov %o7, %g1
+ call displacement
+ mov %g1, %o7 */
+
+ insns[2] = 0x9e100001;
+ __asm __volatile ("flush %0 + 8" : : "r" (insns));
+
+ insns[1] = 0x40000000 | (displacement >> 2);
+ __asm __volatile ("flush %0 + 4" : : "r" (insns));
+
+ insns[0] = 0x8210000f;
+ __asm __volatile ("flush %0" : : "r" (insns));
+ }
+ /* Worst case, ho hum... */
+ else
+ {
+ unsigned int high32 = (value >> 32);
+ unsigned int low32 = (unsigned int) value;
+
+ /* ??? Some tricks can be stolen from the sparc64 egcs backend
+ constant formation code I wrote. -DaveM */
+
+ if (__builtin_expect (high32 & 0x3ff, 0))
+ {
+ /* sethi %hh(value), %g1
+ sethi %lm(value), %g5
+ or %g1, %hm(value), %g1
+ or %g5, %lo(value), %g5
+ sllx %g1, 32, %g1
+ jmpl %g1 + %g5, %g0
+ nop */
+
+ insns[5] = 0x81c04005;
+ __asm __volatile ("flush %0 + 20" : : "r" (insns));
+
+ insns[4] = 0x83287020;
+ __asm __volatile ("flush %0 + 16" : : "r" (insns));
+
+ insns[3] = 0x8a116000 | (low32 & 0x3ff);
+ __asm __volatile ("flush %0 + 12" : : "r" (insns));
+
+ insns[2] = 0x82106000 | (high32 & 0x3ff);
+ }
+ else
+ {
+ /* sethi %hh(value), %g1
+ sethi %lm(value), %g5
+ sllx %g1, 32, %g1
+ or %g5, %lo(value), %g5
+ jmpl %g1 + %g5, %g0
+ nop */
+
+ insns[4] = 0x81c04005;
+ __asm __volatile ("flush %0 + 16" : : "r" (insns));
+
+ insns[3] = 0x8a116000 | (low32 & 0x3ff);
+ __asm __volatile ("flush %0 + 12" : : "r" (insns));
+
+ insns[2] = 0x83287020;
+ }
+
+ __asm __volatile ("flush %0 + 8" : : "r" (insns));
+
+ insns[1] = 0x0b000000 | (low32 >> 10);
+ __asm __volatile ("flush %0 + 4" : : "r" (insns));
+
+ insns[0] = 0x03000000 | (high32 >> 10);
+ __asm __volatile ("flush %0" : : "r" (insns));
+ }
+}
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/____longjmp_chk.S b/sysdeps/unix/sysv/linux/powerpc/powerpc32/____longjmp_chk.S
index 4cb968505d..cfd9864f63 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/____longjmp_chk.S
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/____longjmp_chk.S
@@ -28,19 +28,12 @@
#define __longjmp ____longjmp_chk
#ifdef PIC
-# ifdef HAVE_ASM_PPC_REL16
# define LOAD_ARG \
bcl 20,31,1f; \
1: mflr r3; \
addis r3,r3,_GLOBAL_OFFSET_TABLE_-1b@ha; \
addi r3,r3,_GLOBAL_OFFSET_TABLE_-1b@l; \
lwz r3,.LC0@got(r3)
-# else
-# define LOAD_ARG \
- bl _GLOBAL_OFFSET_TABLE_-4@local; \
- mflr r3; \
- lwz r3,.LC0@got(r3)
-# endif
#else
# define LOAD_ARG \
lis r3,.LC0@ha; \
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/brk.S b/sysdeps/unix/sysv/linux/powerpc/powerpc32/brk.S
index e945834945..4c8c6b433b 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/brk.S
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/brk.S
@@ -36,17 +36,10 @@ ENTRY (BP_SYM (__brk))
DO_CALL(SYS_ify(brk))
lwz r6,8(r1)
#ifdef PIC
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r5
addis r5,r5,__curbrk-1b@ha
stw r3,__curbrk-1b@l(r5)
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r5
- lwz r5,__curbrk@got(r5)
- stw r3,0(r5)
-# endif
#else
lis r4,__curbrk@ha
stw r3,__curbrk@l(r4)
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S b/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S
index 63e1773e22..27285ed4a5 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S
@@ -145,15 +145,10 @@ ENTRY(__CONTEXT_FUNC_NAME)
# ifdef __CONTEXT_ENABLE_VRS
# ifdef PIC
mflr r8
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r7
addis r7,r7,_GLOBAL_OFFSET_TABLE_-1b@ha
addi r7,r7,_GLOBAL_OFFSET_TABLE_-1b@l
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r7
-# endif
# ifdef SHARED
lwz r7,_rtld_global_ro@got(r7)
mtlr r8
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/power7/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/power7/fpu/Implies
index d379a2dd12..af946119aa 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/power7/fpu/Implies
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/power7/fpu/Implies
@@ -1,3 +1,4 @@
# Make sure this comes before the powerpc/powerpc32/fpu that's
# listed in unix/sysv/linux/powerpc/powerpc32/fpu/Implies.
+powerpc/powerpc32/power7/fpu
powerpc/powerpc32/power5/fpu
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S b/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S
index 127c9e4581..f304090868 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S
@@ -73,15 +73,10 @@ ENTRY(__CONTEXT_FUNC_NAME)
#ifdef PIC
mflr r8
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r7
addis r7,r7,_GLOBAL_OFFSET_TABLE_-1b@ha
addi r7,r7,_GLOBAL_OFFSET_TABLE_-1b@l
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r7
-# endif
# ifdef SHARED
lwz r7,_rtld_global_ro@got(r7)
mtlr r8
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S b/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S
index 89b1a61954..62efee2dce 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S
@@ -146,15 +146,10 @@ ENTRY(__CONTEXT_FUNC_NAME)
# ifdef PIC
mflr r8
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r7
addis r7,r7,_GLOBAL_OFFSET_TABLE_-1b@ha
addi r7,r7,_GLOBAL_OFFSET_TABLE_-1b@l
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r7
-# endif
# ifdef SHARED
lwz r7,_rtld_global_ro@got(r7)
mtlr r8
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/power7/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc64/power7/fpu/Implies
index c46b3d42af..ca112208d1 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/power7/fpu/Implies
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/power7/fpu/Implies
@@ -1,3 +1,4 @@
# Make sure this comes before the powerpc/powerpc64/fpu that's
# listed in unix/sysv/linux/powerpc/powerpc64/fpu/Implies.
+powerpc/powerpc64/power7/fpu
powerpc/powerpc64/power5/fpu
diff --git a/sysdeps/x86_64/fpu/fegetenv.c b/sysdeps/x86_64/fpu/fegetenv.c
index fa5a8dadcb..2159a1fab1 100644
--- a/sysdeps/x86_64/fpu/fegetenv.c
+++ b/sysdeps/x86_64/fpu/fegetenv.c
@@ -28,3 +28,4 @@ fegetenv (fenv_t *envp)
/* Success. */
return 0;
}
+libm_hidden_def (fegetenv)