summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Schwab <schwab@redhat.com>2009-07-27 10:51:39 +0200
committerAndreas Schwab <schwab@redhat.com>2009-07-27 10:51:39 +0200
commite1a51361e3ac137c56adc6083d095c68e9471795 (patch)
treebfec41199e84a74265d8e77b0e8444b06bc9ff00
parent9285e82ab3a0d1bdaa63fc740165d6a300ad0cc5 (diff)
parent16d2ea4c821502948d193a152c8b151f5497a0d3 (diff)
Merge commit 'origin/master' into fedora/master
-rw-r--r--ChangeLog106
-rw-r--r--elf/elf.h3
-rw-r--r--include/unistd.h4
-rw-r--r--nptl/ChangeLog14
-rw-r--r--nptl/pthread_mutex_lock.c12
-rw-r--r--nptl/pthread_mutex_timedlock.c12
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S4
-rw-r--r--nptl/sysdeps/x86_64/configure36
-rw-r--r--nptl/sysdeps/x86_64/configure.in23
-rw-r--r--posix/tst-rfc3484-2.c2
-rw-r--r--posix/tst-rfc3484-3.c2
-rw-r--r--posix/tst-rfc3484.c2
-rw-r--r--resolv/res_send.c6
-rw-r--r--sysdeps/posix/getaddrinfo.c2
-rw-r--r--sysdeps/s390/dl-procinfo.c10
-rw-r--r--sysdeps/s390/dl-procinfo.h9
-rw-r--r--sysdeps/s390/s390-64/Makefile67
-rw-r--r--sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c238
-rw-r--r--sysdeps/s390/s390-64/utf16-utf32-z9.c325
-rw-r--r--sysdeps/s390/s390-64/utf8-utf16-z9.c463
-rw-r--r--sysdeps/s390/s390-64/utf8-utf32-z9.c508
-rw-r--r--sysdeps/unix/sysv/linux/configure11
-rw-r--r--sysdeps/unix/sysv/linux/configure.in13
-rw-r--r--sysdeps/unix/sysv/linux/eventfd.c15
-rw-r--r--sysdeps/unix/sysv/linux/i386/sysconf.c3
-rw-r--r--sysdeps/unix/sysv/linux/kernel-features.h2
-rw-r--r--sysdeps/unix/sysv/linux/signalfd.c15
-rw-r--r--sysdeps/x86_64/Makefile4
-rw-r--r--sysdeps/x86_64/cacheinfo.c53
-rw-r--r--sysdeps/x86_64/multiarch/Makefile2
-rw-r--r--sysdeps/x86_64/multiarch/init-arch.c8
-rw-r--r--sysdeps/x86_64/multiarch/rtld-rawmemchr.c1
-rw-r--r--sysdeps/x86_64/multiarch/rtld-strlen.S1
-rw-r--r--sysdeps/x86_64/multiarch/strcmp.S369
-rw-r--r--sysdeps/x86_64/multiarch/strcpy.S13
-rw-r--r--sysdeps/x86_64/multiarch/strncmp-c.c8
-rw-r--r--sysdeps/x86_64/rtld-memchr.c1
-rw-r--r--sysdeps/x86_64/rtld-memcmp.c1
-rw-r--r--sysdeps/x86_64/rtld-rawmemchr.c1
-rw-r--r--sysdeps/x86_64/rtld-strchr.S291
-rw-r--r--sysdeps/x86_64/rtld-strcmp.S28
-rw-r--r--sysdeps/x86_64/rtld-strlen.S139
-rw-r--r--sysdeps/x86_64/strcmp.S1948
-rw-r--r--sysdeps/x86_64/strncmp.S3
-rwxr-xr-xsysdeps/x86_64/tst-xmmymm.sh17
45 files changed, 4437 insertions, 358 deletions
diff --git a/ChangeLog b/ChangeLog
index 6f7026debf..a2fbd61fe6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,111 @@
+2009-07-26 Ulrich Drepper <drepper@redhat.com>
+
+ * sysdeps/x86_64/tst-xmmymm.sh: New file. Check whether any of the
+ functions used in ld.so modify xmm/ymm registers.
+ * sysdeps/x86_64/Makefile: Hook new test up.
+ * sysdeps/x86_64/rtld-memchr.c: New file.
+ * sysdeps/x86_64/rtld-memcmp.c: New file.
+ * sysdeps/x86_64/rtld-rawmemchr.c: New file.
+ * sysdeps/x86_64/rtld-strchr.S: New file.
+ * sysdeps/x86_64/rtld-strcmp.S: New file.
+ * sysdeps/x86_64/rtld-strlen.S: New file.
+ * sysdeps/x86_64/multiarch/rtld-rawmemchr.c: New file.
+ * sysdeps/x86_64/multiarch/rtld-strlen.S: New file.
+
+2009-07-26 H.J. Lu <hongjiu.lu@intel.com>
+
+ * sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Remove
+ strncmp-c.
+ * sysdeps/x86_64/multiarch/strcmp.S (aftertail): Removed.
+ (exit): Likewise.
+ (Byte1): Likewise.
+ (Byte2): Likewise.
+ (Byte3): Likewise.
+ (Byte4): Likewise.
+ (Byte5): Likewise.
+ (Byte6): Likewise.
+ (next_8_bytes): Likewise.
+ (Byte0): Remove commented out codes.
+ (unaligned_table): Align jump table at 8 bytes.
+ Add _sse4_2 to all labels. Always include "../strcmp.S".
+ * sysdeps/x86_64/multiarch/strncmp-c.c: Removed.
+ * sysdeps/x86_64/strcmp.S: Add SSE2 support.
+ * sysdeps/x86_64/strncmp.S: New file.
+
+2009-07-26 Ulrich Drepper <drepper@redhat.com>
+
+ [BZ #10422]
+ * sysdeps/unix/sysv/linux/eventfd.c: Add compatibility for old
+ kernels, dropped when eventfd2 support was added.
+ * sysdeps/unix/sysv/linux/signalfd.c: Add compatibility for old
+ kernels, dropped when signalfd4 support was added.
+ * sysdeps/unix/sysv/linux/kernel-features.h: More CLOEXEC syscalls
+ added, name them.
+
+ [BZ #10452]
+ * resolv/res_send.c (send_dg): Pass full SERVFAIL, NOTIMP, REFUSED
+ replies up.
+
+ * elf/elf.h: Define NT_GNU_GOLD_VERSION.
+
+2009-07-25 Ulrich Drepper <drepper@redhat.com>
+
+ * sysdeps/x86_64/multiarch/strcmp.S: Some more optimizations for
+ modern processor versions. Patch by H.J. Lu <hongjiu.lu@intel.com>.
+
+ [BZ #10448]
+ * sysdeps/posix/getaddrinfo.c (gaih_inet): If NSS module contains no
+ callback we must touch the status to avoid using stale value.
+
+ * sysdeps/x86_64/multiarch/strcmp.S: Exclude unused code from being
+ compiled in.
+
+2009-07-24 Ulrich Drepper <drepper@redhat.com>
+
+ * sysdeps/unix/sysv/linux/configure.in: Don't automatically include
+ /lib/modules/* headers anymore. We have sane headers in the standard
+ place now.
+
+2009-06-16 Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
+
+ * sysdeps/s390/dl-procinfo.c (_dl_s390_cap_flags): "hpage",
+ "etf3enh" and "highgprs" added.
+ (_dl_s390_platforms): "z10" added.
+ * sysdeps/s390/dl-procinfo.h (_DL_HWCAP_COUNT, _DL_PLATFORMS_COUNT):
+ Increased for the new entries.
+ (HWCAP enum): HWCAP_S390_HPAGE, HWCAP_S390_ETF3EH and
+ HWCAP_S390_HIGH_GPRS added.
+
+ * sysdeps/s390/s390-64/Makefile: Adjusted to build the new modules.
+ * sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c: New file.
+ * sysdeps/s390/s390-64/utf16-utf32-z9.c: New file.
+ * sysdeps/s390/s390-64/utf8-utf16-z9.c: New file.
+ * sysdeps/s390/s390-64/utf8-utf32-z9.c: New file.
+
2009-07-23 Ulrich Drepper <drepper@redhat.com>
+ * sysdeps/x86_64/cacheinfo.c [USE_MULTIARCH]: Rearrange code to
+ avoid additional cpuid instructions. Most of the information is
+ stored somewhere.
+
+ * sysdeps/unix/sysv/linux/i386/sysconf.c (intel_02_known): Add more
+ cache descriptors.
+ * sysdeps/x86_64/cacheinfo.c (intel_02_known): Likewise.
+
+ * sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features): Reset
+ SSSE3 bit for Atoms.
+ * sysdeps/x86_64/multiarch/strcpy.S: New need to perform Atom test
+ here anymore.
+
+ * posix/tst-rfc3484.c (do_test): Initialize entire sockaddr_in
+ structure before copying it to avoid warning.
+ * posix/tst-rfc3484-2.c (do_test): Likewise.
+ * posix/tst-rfc3484-3.c (do_test): Likewise.
+
+ [BZ #10416]
+ * include/unistd.h: Make header file suitable for C++ test cases.
+ Patch by Duncan Simpson <dps@simpson.demon.co.uk>.
+
* sysdeps/unix/sysv/linux/i386/makecontext.S: Ensure we preserve the
stack alignment in the exit code.
diff --git a/elf/elf.h b/elf/elf.h
index 7efdedefb4..ce6de07e91 100644
--- a/elf/elf.h
+++ b/elf/elf.h
@@ -1054,6 +1054,9 @@ typedef struct
The descriptor consists of any nonzero number of bytes. */
#define NT_GNU_BUILD_ID 3
+/* Version note generated by GNU gold containing a version string. */
+#define NT_GNU_GOLD_VERSION 4
+
/* Move records. */
typedef struct
diff --git a/include/unistd.h b/include/unistd.h
index 72d7e2e88c..ccba893abe 100644
--- a/include/unistd.h
+++ b/include/unistd.h
@@ -1,6 +1,8 @@
#ifndef _UNISTD_H
# include <posix/unistd.h>
+__BEGIN_DECLS
+
libc_hidden_proto (_exit, __noreturn__)
libc_hidden_proto (alarm)
libc_hidden_proto (confstr)
@@ -174,4 +176,6 @@ extern int __have_sock_cloexec;
unless it is really necessary. */
#define __have_pipe2 __have_sock_cloexec
+__END_DECLS
+
#endif
diff --git a/nptl/ChangeLog b/nptl/ChangeLog
index 1f24aa5849..c485435e82 100644
--- a/nptl/ChangeLog
+++ b/nptl/ChangeLog
@@ -1,3 +1,17 @@
+2009-07-26 Ulrich Drepper <drepper@redhat.com>
+
+ [BZ #10418]
+ * pthread_mutex_lock.c (pthread_mutex_lock): Use _rel instead of of
+ _acq variants of cmpxchg.
+ * pthread_mutex_timedlock.c (pthread_mutex_timedlock): Likewise.
+
+2009-07-23 Ulrich Drepper <drepper@redhat.com>
+
+ * sysdeps/x86_64/configure.in: New file.
+
+ * sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S: Fix error
+ path when not using absolute timeout futex.
+
2009-07-20 Ulrich Drepper <drepper@redhat.com>
* sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S: Minor
diff --git a/nptl/pthread_mutex_lock.c b/nptl/pthread_mutex_lock.c
index 406e588fdb..a0ff881faf 100644
--- a/nptl/pthread_mutex_lock.c
+++ b/nptl/pthread_mutex_lock.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002-2007, 2008 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2007, 2008, 2009 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@@ -160,7 +160,7 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex)
#endif
newval
- = atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,
+ = atomic_compare_and_exchange_val_rel (&mutex->__data.__lock,
newval, oldval);
if (newval != oldval)
@@ -285,7 +285,7 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex)
#ifdef NO_INCR
newval |= FUTEX_WAITERS;
#endif
- oldval = atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,
+ oldval = atomic_compare_and_exchange_val_rel (&mutex->__data.__lock,
newval, 0);
if (oldval != 0)
@@ -420,7 +420,7 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex)
oldprio = ceiling;
oldval
- = atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,
+ = atomic_compare_and_exchange_val_rel (&mutex->__data.__lock,
#ifdef NO_INCR
ceilval | 2,
#else
@@ -434,7 +434,7 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex)
do
{
oldval
- = atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,
+ = atomic_compare_and_exchange_val_rel (&mutex->__data.__lock,
ceilval | 2,
ceilval | 1);
@@ -445,7 +445,7 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex)
lll_futex_wait (&mutex->__data.__lock, ceilval | 2,
PTHREAD_MUTEX_PSHARED (mutex));
}
- while (atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,
+ while (atomic_compare_and_exchange_val_rel (&mutex->__data.__lock,
ceilval | 2, ceilval)
!= ceilval);
}
diff --git a/nptl/pthread_mutex_timedlock.c b/nptl/pthread_mutex_timedlock.c
index 8d0db79d58..2c6ff114da 100644
--- a/nptl/pthread_mutex_timedlock.c
+++ b/nptl/pthread_mutex_timedlock.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002-2007, 2008 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2007, 2008, 2009 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@@ -126,7 +126,7 @@ pthread_mutex_timedlock (mutex, abstime)
int newval = id | (oldval & FUTEX_WAITERS);
newval
- = atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,
+ = atomic_compare_and_exchange_val_rel (&mutex->__data.__lock,
newval, oldval);
if (newval != oldval)
{
@@ -246,7 +246,7 @@ pthread_mutex_timedlock (mutex, abstime)
}
}
- oldval = atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,
+ oldval = atomic_compare_and_exchange_val_rel (&mutex->__data.__lock,
id, 0);
if (oldval != 0)
@@ -404,7 +404,7 @@ pthread_mutex_timedlock (mutex, abstime)
oldprio = ceiling;
oldval
- = atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,
+ = atomic_compare_and_exchange_val_rel (&mutex->__data.__lock,
ceilval | 1, ceilval);
if (oldval == ceilval)
@@ -413,7 +413,7 @@ pthread_mutex_timedlock (mutex, abstime)
do
{
oldval
- = atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,
+ = atomic_compare_and_exchange_val_rel (&mutex->__data.__lock,
ceilval | 2,
ceilval | 1);
@@ -456,7 +456,7 @@ pthread_mutex_timedlock (mutex, abstime)
PTHREAD_MUTEX_PSHARED (mutex));
}
}
- while (atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,
+ while (atomic_compare_and_exchange_val_rel (&mutex->__data.__lock,
ceilval | 2, ceilval)
!= ceilval);
}
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
index e12790cb96..7486825d5f 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
@@ -551,12 +551,12 @@ __pthread_cond_timedwait:
jne 53b
cmpq 24(%rsp), %r9
- jbe 45b
+ jbe 15f
cmpq %rax, %r9
ja 39b
- cmpq $-ETIMEDOUT, %r14
+15: cmpq $-ETIMEDOUT, %r14
jne 8b
jmp 99b
diff --git a/nptl/sysdeps/x86_64/configure b/nptl/sysdeps/x86_64/configure
new file mode 100644
index 0000000000..b959168843
--- /dev/null
+++ b/nptl/sysdeps/x86_64/configure
@@ -0,0 +1,36 @@
+# This file is generated from configure.in by Autoconf. DO NOT EDIT!
+ # Local configure fragment for sysdeps/i386.
+
+{ echo "$as_me:$LINENO: checking for .cfi_personality and .cfi_lsda pseudo-ops" >&5
+echo $ECHO_N "checking for .cfi_personality and .cfi_lsda pseudo-ops... $ECHO_C" >&6; }
+if test "${libc_cv_asm_cfi_personality+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat > conftest.s <<EOF
+${libc_cv_dot_text}
+foo:
+ .cfi_startproc
+ .cfi_personality 0, foo
+ .cfi_lsda 0, foo
+ .cfi_endproc
+EOF
+ if { ac_try='${CC-cc} $ASFLAGS -c conftest.s 1>&5'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ libc_cv_asm_cfi_personality=yes
+ else
+ libc_cv_asm_cfi_personality=no
+ fi
+ rm -f conftest*
+
+fi
+{ echo "$as_me:$LINENO: result: $libc_cv_asm_cfi_personality" >&5
+echo "${ECHO_T}$libc_cv_asm_cfi_personality" >&6; }
+if test x"$libc_cv_asm_cfi_personality" != xyes; then
+ { { echo "$as_me:$LINENO: error: assembler too old, .cfi_personality support missing" >&5
+echo "$as_me: error: assembler too old, .cfi_personality support missing" >&2;}
+ { (exit 1); exit 1; }; }
+fi
diff --git a/nptl/sysdeps/x86_64/configure.in b/nptl/sysdeps/x86_64/configure.in
new file mode 100644
index 0000000000..0ba0cc3726
--- /dev/null
+++ b/nptl/sysdeps/x86_64/configure.in
@@ -0,0 +1,23 @@
+GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
+# Local configure fragment for sysdeps/i386.
+
+AC_CACHE_CHECK([for .cfi_personality and .cfi_lsda pseudo-ops],
+ libc_cv_asm_cfi_personality, [dnl
+ cat > conftest.s <<EOF
+${libc_cv_dot_text}
+foo:
+ .cfi_startproc
+ .cfi_personality 0, foo
+ .cfi_lsda 0, foo
+ .cfi_endproc
+EOF
+ if AC_TRY_COMMAND(${CC-cc} $ASFLAGS -c conftest.s 1>&AS_MESSAGE_LOG_FD); then
+ libc_cv_asm_cfi_personality=yes
+ else
+ libc_cv_asm_cfi_personality=no
+ fi
+ rm -f conftest*
+])
+if test x"$libc_cv_asm_cfi_personality" != xyes; then
+ AC_MSG_ERROR([assembler too old, .cfi_personality support missing])
+fi
diff --git a/posix/tst-rfc3484-2.c b/posix/tst-rfc3484-2.c
index c85fdd0742..bf5f6cff7e 100644
--- a/posix/tst-rfc3484-2.c
+++ b/posix/tst-rfc3484-2.c
@@ -82,6 +82,8 @@ do_test (void)
struct sockaddr_in so1;
so1.sin_family = AF_INET;
so1.sin_addr.s_addr = h (0xc0a85f19);
+ /* Clear the rest of the structure to avoid warnings. */
+ memset (so1.sin_zero, '\0', sizeof (so1.sin_zero));
struct sockaddr_in sa1;
sa1.sin_family = AF_INET;
diff --git a/posix/tst-rfc3484-3.c b/posix/tst-rfc3484-3.c
index 3aa4563c0c..8eba74e48e 100644
--- a/posix/tst-rfc3484-3.c
+++ b/posix/tst-rfc3484-3.c
@@ -113,6 +113,8 @@ do_test (void)
struct sockaddr_in so;
so.sin_family = AF_INET;
so.sin_addr.s_addr = h (0x0aa85f19);
+ /* Clear the rest of the structure to avoid warnings. */
+ memset (so.sin_zero, '\0', sizeof (so.sin_zero));
for (int i = 0; i < naddrs; ++i)
{
diff --git a/posix/tst-rfc3484.c b/posix/tst-rfc3484.c
index 15d0c94a5e..26835cf8b2 100644
--- a/posix/tst-rfc3484.c
+++ b/posix/tst-rfc3484.c
@@ -102,6 +102,8 @@ do_test (void)
struct sockaddr_in so;
so.sin_family = AF_INET;
so.sin_addr.s_addr = h (0xc0a85f19);
+ /* Clear the rest of the structure to avoid warnings. */
+ memset (so.sin_zero, '\0', sizeof (so.sin_zero));
for (int i = 0; i < naddrs; ++i)
{
diff --git a/resolv/res_send.c b/resolv/res_send.c
index 971a4afb6f..4c14db1bf5 100644
--- a/resolv/res_send.c
+++ b/resolv/res_send.c
@@ -1278,14 +1278,10 @@ send_dg(res_state statp,
? *thisanssiz : *thisresplen);
if (recvresp1 || (buf2 != NULL && recvresp2))
- {
- *resplen2 = 1;
- return resplen;
- }
+ return resplen;
if (buf2 != NULL)
{
/* We are waiting for a possible second reply. */
- resplen = 1;
if (hp->id == anhp->id)
recvresp1 = 1;
else
diff --git a/sysdeps/posix/getaddrinfo.c b/sysdeps/posix/getaddrinfo.c
index d346c621fb..a788d18fee 100644
--- a/sysdeps/posix/getaddrinfo.c
+++ b/sysdeps/posix/getaddrinfo.c
@@ -833,6 +833,8 @@ gaih_inet (const char *name, const struct gaih_service *service,
&& inet6_status != NSS_STATUS_UNAVAIL)
status = inet6_status;
}
+ else
+ status = NSS_STATUS_UNAVAIL;
}
if (nss_next_action (nip, status) == NSS_ACTION_RETURN)
diff --git a/sysdeps/s390/dl-procinfo.c b/sysdeps/s390/dl-procinfo.c
index 32c6aef951..d51d7b2379 100644
--- a/sysdeps/s390/dl-procinfo.c
+++ b/sysdeps/s390/dl-procinfo.c
@@ -1,5 +1,5 @@
/* Data for s390 version of processor capability information.
- Copyright (C) 2006 Free Software Foundation, Inc.
+ Copyright (C) 2006, 2009 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Martin Schwidefsky <schwidefsky@de.ibm.com>, 2006.
@@ -47,11 +47,11 @@
#if !defined PROCINFO_DECL && defined SHARED
._dl_s390_cap_flags
#else
-PROCINFO_CLASS const char _dl_s390_cap_flags[7][6]
+PROCINFO_CLASS const char _dl_s390_cap_flags[10][8]
#endif
#ifndef PROCINFO_DECL
= {
- "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp"
+ "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", "hpage", "etf3enh", "highgprs"
}
#endif
#if !defined SHARED || defined PROCINFO_DECL
@@ -63,11 +63,11 @@ PROCINFO_CLASS const char _dl_s390_cap_flags[7][6]
#if !defined PROCINFO_DECL && defined SHARED
._dl_s390_platforms
#else
-PROCINFO_CLASS const char _dl_s390_platforms[4][7]
+PROCINFO_CLASS const char _dl_s390_platforms[5][7]
#endif
#ifndef PROCINFO_DECL
= {
- "g5", "z900", "z990", "z9-109"
+ "g5", "z900", "z990", "z9-109", "z10"
}
#endif
#if !defined SHARED || defined PROCINFO_DECL
diff --git a/sysdeps/s390/dl-procinfo.h b/sysdeps/s390/dl-procinfo.h
index 178d7cc017..0a7ebd3be9 100644
--- a/sysdeps/s390/dl-procinfo.h
+++ b/sysdeps/s390/dl-procinfo.h
@@ -1,5 +1,5 @@
/* s390 version of processor capability information handling macros.
- Copyright (C) 2006 Free Software Foundation, Inc.
+ Copyright (C) 2006, 2009 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Martin Schwidefsky <schwidefsky@de.ibm.com>, 2006.
@@ -22,9 +22,9 @@
#define _DL_PROCINFO_H 1
#include <ldsodefs.h>
-#define _DL_HWCAP_COUNT 7
+#define _DL_HWCAP_COUNT 10
-#define _DL_PLATFORMS_COUNT 4
+#define _DL_PLATFORMS_COUNT 5
/* The kernel provides up to 32 capability bits with elf_hwcap. */
#define _DL_FIRST_PLATFORM 32
@@ -45,6 +45,9 @@ enum
HWCAP_S390_LDISP = 1 << 4,
HWCAP_S390_EIMM = 1 << 5,
HWCAP_S390_DFP = 1 << 6,
+ HWCAP_S390_HPAGE = 1 << 7,
+ HWCAP_S390_ETF3EH = 1 << 8,
+ HWCAP_S390_HIGH_GPRS = 1 << 9,
};
#define HWCAP_IMPORTANT (HWCAP_S390_ZARCH | HWCAP_S390_LDISP \
diff --git a/sysdeps/s390/s390-64/Makefile b/sysdeps/s390/s390-64/Makefile
index 0a5051449d..1814f37abd 100644
--- a/sysdeps/s390/s390-64/Makefile
+++ b/sysdeps/s390/s390-64/Makefile
@@ -9,3 +9,70 @@ CFLAGS-rtld.c += -Wno-uninitialized -Wno-unused
CFLAGS-dl-load.c += -Wno-unused
CFLAGS-dl-reloc.c += -Wno-unused
endif
+
+ifeq ($(subdir),iconvdata)
+ISO-8859-1_CP037_Z900-routines := iso-8859-1_cp037_z900
+ISO-8859-1_CP037_Z900-map := gconv.map
+
+UTF8_UTF32_Z9-routines := utf8-utf32-z9
+UTF8_UTF32_Z9-map := gconv.map
+
+UTF16_UTF32_Z9-routines := utf16-utf32-z9
+UTF16_UTF32_Z9-map := gconv.map
+
+UTF8_UTF16_Z9-routines := utf8-utf16-z9
+UTF8_UTF16_Z9-map := gconv.map
+
+s390x-iconv-modules = ISO-8859-1_CP037_Z900 UTF8_UTF16_Z9 UTF16_UTF32_Z9 UTF8_UTF32_Z9
+
+extra-modules-left += $(s390x-iconv-modules)
+include extra-module.mk
+
+extra-objs += $(addsuffix .so, $(s390x-iconv-modules))
+install-others += $(patsubst %, $(inst_gconvdir)/%.so, $(s390x-iconv-modules))
+
+distribute += iso-8859-1_cp037_z900.c utf8-utf32-z9.c utf16-utf32-z9.c utf8-utf16-z9.c
+
+$(patsubst %, $(inst_gconvdir)/%.so, $(s390x-iconv-modules)) : \
+$(inst_gconvdir)/%.so: $(objpfx)%.so $(+force)
+ $(do-install-program)
+
+$(objpfx)gconv-modules-s390: gconv-modules $(+force)
+ cp $< $@
+ echo >> $@
+ echo "# S/390 hardware accelerated modules" >> $@
+ echo -n "module ISO-8859-1// IBM037// " >> $@
+ echo " ISO-8859-1_CP037_Z900 1" >> $@
+ echo -n "module IBM037// ISO-8859-1// " >> $@
+ echo " ISO-8859-1_CP037_Z900 1" >> $@
+ echo -n "module ISO-10646/UTF8/ UTF-32// " >> $@
+ echo " UTF8_UTF32_Z9 1" >> $@
+ echo -n "module UTF-32BE// ISO-10646/UTF8/ " >> $@
+ echo " UTF8_UTF32_Z9 1" >> $@
+ echo -n "module ISO-10646/UTF8/ UTF-32BE// " >> $@
+ echo " UTF8_UTF32_Z9 1" >> $@
+ echo -n "module UTF-16BE// UTF-32// " >> $@
+ echo " UTF16_UTF32_Z9 1" >> $@
+ echo -n "module UTF-32BE// UTF-16// " >> $@
+ echo " UTF16_UTF32_Z9 1" >> $@
+ echo -n "module INTERNAL UTF-16// " >> $@
+ echo " UTF16_UTF32_Z9 1" >> $@
+ echo -n "module UTF-32BE// UTF-16BE// " >> $@
+ echo " UTF16_UTF32_Z9 1" >> $@
+ echo -n "module INTERNAL UTF-16BE// " >> $@
+ echo " UTF16_UTF32_Z9 1" >> $@
+ echo -n "module UTF-16BE// UTF-32BE// " >> $@
+ echo " UTF16_UTF32_Z9 1" >> $@
+ echo -n "module UTF-16BE// INTERNAL " >> $@
+ echo " UTF16_UTF32_Z9 1" >> $@
+ echo -n "module UTF-16BE// ISO-10646/UTF8/ " >> $@
+ echo " UTF8_UTF16_Z9 1" >> $@
+ echo -n "module ISO-10646/UTF8/ UTF-16// " >> $@
+ echo " UTF8_UTF16_Z9 1" >> $@
+ echo -n "module ISO-10646/UTF8/ UTF-16BE// " >> $@
+ echo " UTF8_UTF16_Z9 1" >> $@
+
+$(inst_gconvdir)/gconv-modules: $(objpfx)gconv-modules-s390 $(+force)
+ $(do-install)
+
+endif
diff --git a/sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c b/sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c
new file mode 100644
index 0000000000..d4c4931f22
--- /dev/null
+++ b/sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c
@@ -0,0 +1,238 @@
+/* Conversion between ISO 8859-1 and IBM037.
+
+ This module uses the Z900 variant of the Translate One To One
+ instruction.
+ Copyright (C) 1997-2009 Free Software Foundation, Inc.
+
+ Author: Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
+ Based on the work by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ Thanks to Daniel Appich who covered the relevant performance work
+ in his diploma thesis.
+
+ This is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <dlfcn.h>
+#include <stdint.h>
+
+// conversion table from ISO-8859-1 to IBM037
+static const unsigned char table_iso8859_1_to_cp037[256]
+__attribute__ ((aligned (8))) =
+{
+ [0x00] = 0x00, [0x01] = 0x01, [0x02] = 0x02, [0x03] = 0x03,
+ [0x04] = 0x37, [0x05] = 0x2D, [0x06] = 0x2E, [0x07] = 0x2F,
+ [0x08] = 0x16, [0x09] = 0x05, [0x0A] = 0x25, [0x0B] = 0x0B,
+ [0x0C] = 0x0C, [0x0D] = 0x0D, [0x0E] = 0x0E, [0x0F] = 0x0F,
+ [0x10] = 0x10, [0x11] = 0x11, [0x12] = 0x12, [0x13] = 0x13,
+ [0x14] = 0x3C, [0x15] = 0x3D, [0x16] = 0x32, [0x17] = 0x26,
+ [0x18] = 0x18, [0x19] = 0x19, [0x1A] = 0x3F, [0x1B] = 0x27,
+ [0x1C] = 0x1C, [0x1D] = 0x1D, [0x1E] = 0x1E, [0x1F] = 0x1F,
+ [0x20] = 0x40, [0x21] = 0x5A, [0x22] = 0x7F, [0x23] = 0x7B,
+ [0x24] = 0x5B, [0x25] = 0x6C, [0x26] = 0x50, [0x27] = 0x7D,
+ [0x28] = 0x4D, [0x29] = 0x5D, [0x2A] = 0x5C, [0x2B] = 0x4E,
+ [0x2C] = 0x6B, [0x2D] = 0x60, [0x2E] = 0x4B, [0x2F] = 0x61,
+ [0x30] = 0xF0, [0x31] = 0xF1, [0x32] = 0xF2, [0x33] = 0xF3,
+ [0x34] = 0xF4, [0x35] = 0xF5, [0x36] = 0xF6, [0x37] = 0xF7,
+ [0x38] = 0xF8, [0x39] = 0xF9, [0x3A] = 0x7A, [0x3B] = 0x5E,
+ [0x3C] = 0x4C, [0x3D] = 0x7E, [0x3E] = 0x6E, [0x3F] = 0x6F,
+ [0x40] = 0x7C, [0x41] = 0xC1, [0x42] = 0xC2, [0x43] = 0xC3,
+ [0x44] = 0xC4, [0x45] = 0xC5, [0x46] = 0xC6, [0x47] = 0xC7,
+ [0x48] = 0xC8, [0x49] = 0xC9, [0x4A] = 0xD1, [0x4B] = 0xD2,
+ [0x4C] = 0xD3, [0x4D] = 0xD4, [0x4E] = 0xD5, [0x4F] = 0xD6,
+ [0x50] = 0xD7, [0x51] = 0xD8, [0x52] = 0xD9, [0x53] = 0xE2,
+ [0x54] = 0xE3, [0x55] = 0xE4, [0x56] = 0xE5, [0x57] = 0xE6,
+ [0x58] = 0xE7, [0x59] = 0xE8, [0x5A] = 0xE9, [0x5B] = 0xBA,
+ [0x5C] = 0xE0, [0x5D] = 0xBB, [0x5E] = 0xB0, [0x5F] = 0x6D,
+ [0x60] = 0x79, [0x61] = 0x81, [0x62] = 0x82, [0x63] = 0x83,
+ [0x64] = 0x84, [0x65] = 0x85, [0x66] = 0x86, [0x67] = 0x87,
+ [0x68] = 0x88, [0x69] = 0x89, [0x6A] = 0x91, [0x6B] = 0x92,
+ [0x6C] = 0x93, [0x6D] = 0x94, [0x6E] = 0x95, [0x6F] = 0x96,
+ [0x70] = 0x97, [0x71] = 0x98, [0x72] = 0x99, [0x73] = 0xA2,
+ [0x74] = 0xA3, [0x75] = 0xA4, [0x76] = 0xA5, [0x77] = 0xA6,
+ [0x78] = 0xA7, [0x79] = 0xA8, [0x7A] = 0xA9, [0x7B] = 0xC0,
+ [0x7C] = 0x4F, [0x7D] = 0xD0, [0x7E] = 0xA1, [0x7F] = 0x07,
+ [0x80] = 0x20, [0x81] = 0x21, [0x82] = 0x22, [0x83] = 0x23,
+ [0x84] = 0x24, [0x85] = 0x15, [0x86] = 0x06, [0x87] = 0x17,
+ [0x88] = 0x28, [0x89] = 0x29, [0x8A] = 0x2A, [0x8B] = 0x2B,
+ [0x8C] = 0x2C, [0x8D] = 0x09, [0x8E] = 0x0A, [0x8F] = 0x1B,
+ [0x90] = 0x30, [0x91] = 0x31, [0x92] = 0x1A, [0x93] = 0x33,
+ [0x94] = 0x34, [0x95] = 0x35, [0x96] = 0x36, [0x97] = 0x08,
+ [0x98] = 0x38, [0x99] = 0x39, [0x9A] = 0x3A, [0x9B] = 0x3B,
+ [0x9C] = 0x04, [0x9D] = 0x14, [0x9E] = 0x3E, [0x9F] = 0xFF,
+ [0xA0] = 0x41, [0xA1] = 0xAA, [0xA2] = 0x4A, [0xA3] = 0xB1,
+ [0xA4] = 0x9F, [0xA5] = 0xB2, [0xA6] = 0x6A, [0xA7] = 0xB5,
+ [0xA8] = 0xBD, [0xA9] = 0xB4, [0xAA] = 0x9A, [0xAB] = 0x8A,
+ [0xAC] = 0x5F, [0xAD] = 0xCA, [0xAE] = 0xAF, [0xAF] = 0xBC,
+ [0xB0] = 0x90, [0xB1] = 0x8F, [0xB2] = 0xEA, [0xB3] = 0xFA,
+ [0xB4] = 0xBE, [0xB5] = 0xA0, [0xB6] = 0xB6, [0xB7] = 0xB3,
+ [0xB8] = 0x9D, [0xB9] = 0xDA, [0xBA] = 0x9B, [0xBB] = 0x8B,
+ [0xBC] = 0xB7, [0xBD] = 0xB8, [0xBE] = 0xB9, [0xBF] = 0xAB,
+ [0xC0] = 0x64, [0xC1] = 0x65, [0xC2] = 0x62, [0xC3] = 0x66,
+ [0xC4] = 0x63, [0xC5] = 0x67, [0xC6] = 0x9E, [0xC7] = 0x68,
+ [0xC8] = 0x74, [0xC9] = 0x71, [0xCA] = 0x72, [0xCB] = 0x73,
+ [0xCC] = 0x78, [0xCD] = 0x75, [0xCE] = 0x76, [0xCF] = 0x77,
+ [0xD0] = 0xAC, [0xD1] = 0x69, [0xD2] = 0xED, [0xD3] = 0xEE,
+ [0xD4] = 0xEB, [0xD5] = 0xEF, [0xD6] = 0xEC, [0xD7] = 0xBF,
+ [0xD8] = 0x80, [0xD9] = 0xFD, [0xDA] = 0xFE, [0xDB] = 0xFB,
+ [0xDC] = 0xFC, [0xDD] = 0xAD, [0xDE] = 0xAE, [0xDF] = 0x59,
+ [0xE0] = 0x44, [0xE1] = 0x45, [0xE2] = 0x42, [0xE3] = 0x46,
+ [0xE4] = 0x43, [0xE5] = 0x47, [0xE6] = 0x9C, [0xE7] = 0x48,
+ [0xE8] = 0x54, [0xE9] = 0x51, [0xEA] = 0x52, [0xEB] = 0x53,
+ [0xEC] = 0x58, [0xED] = 0x55, [0xEE] = 0x56, [0xEF] = 0x57,
+ [0xF0] = 0x8C, [0xF1] = 0x49, [0xF2] = 0xCD, [0xF3] = 0xCE,
+ [0xF4] = 0xCB, [0xF5] = 0xCF, [0xF6] = 0xCC, [0xF7] = 0xE1,
+ [0xF8] = 0x70, [0xF9] = 0xDD, [0xFA] = 0xDE, [0xFB] = 0xDB,
+ [0xFC] = 0xDC, [0xFD] = 0x8D, [0xFE] = 0x8E, [0xFF] = 0xDF
+};
+
+// conversion table from IBM037 to ISO-8859-1
+static const unsigned char table_cp037_iso8859_1[256]
+__attribute__ ((aligned (8))) =
+{
+ [0x00] = 0x00, [0x01] = 0x01, [0x02] = 0x02, [0x03] = 0x03,
+ [0x04] = 0x9C, [0x05] = 0x09, [0x06] = 0x86, [0x07] = 0x7F,
+ [0x08] = 0x97, [0x09] = 0x8D, [0x0A] = 0x8E, [0x0B] = 0x0B,
+ [0x0C] = 0x0C, [0x0D] = 0x0D, [0x0E] = 0x0E, [0x0F] = 0x0F,
+ [0x10] = 0x10, [0x11] = 0x11, [0x12] = 0x12, [0x13] = 0x13,
+ [0x14] = 0x9D, [0x15] = 0x85, [0x16] = 0x08, [0x17] = 0x87,
+ [0x18] = 0x18, [0x19] = 0x19, [0x1A] = 0x92, [0x1B] = 0x8F,
+ [0x1C] = 0x1C, [0x1D] = 0x1D, [0x1E] = 0x1E, [0x1F] = 0x1F,
+ [0x20] = 0x80, [0x21] = 0x81, [0x22] = 0x82, [0x23] = 0x83,
+ [0x24] = 0x84, [0x25] = 0x0A, [0x26] = 0x17, [0x27] = 0x1B,
+ [0x28] = 0x88, [0x29] = 0x89, [0x2A] = 0x8A, [0x2B] = 0x8B,
+ [0x2C] = 0x8C, [0x2D] = 0x05, [0x2E] = 0x06, [0x2F] = 0x07,
+ [0x30] = 0x90, [0x31] = 0x91, [0x32] = 0x16, [0x33] = 0x93,
+ [0x34] = 0x94, [0x35] = 0x95, [0x36] = 0x96, [0x37] = 0x04,
+ [0x38] = 0x98, [0x39] = 0x99, [0x3A] = 0x9A, [0x3B] = 0x9B,
+ [0x3C] = 0x14, [0x3D] = 0x15, [0x3E] = 0x9E, [0x3F] = 0x1A,
+ [0x40] = 0x20, [0x41] = 0xA0, [0x42] = 0xE2, [0x43] = 0xE4,
+ [0x44] = 0xE0, [0x45] = 0xE1, [0x46] = 0xE3, [0x47] = 0xE5,
+ [0x48] = 0xE7, [0x49] = 0xF1, [0x4A] = 0xA2, [0x4B] = 0x2E,
+ [0x4C] = 0x3C, [0x4D] = 0x28, [0x4E] = 0x2B, [0x4F] = 0x7C,
+ [0x50] = 0x26, [0x51] = 0xE9, [0x52] = 0xEA, [0x53] = 0xEB,
+ [0x54] = 0xE8, [0x55] = 0xED, [0x56] = 0xEE, [0x57] = 0xEF,
+ [0x58] = 0xEC, [0x59] = 0xDF, [0x5A] = 0x21, [0x5B] = 0x24,
+ [0x5C] = 0x2A, [0x5D] = 0x29, [0x5E] = 0x3B, [0x5F] = 0xAC,
+ [0x60] = 0x2D, [0x61] = 0x2F, [0x62] = 0xC2, [0x63] = 0xC4,
+ [0x64] = 0xC0, [0x65] = 0xC1, [0x66] = 0xC3, [0x67] = 0xC5,
+ [0x68] = 0xC7, [0x69] = 0xD1, [0x6A] = 0xA6, [0x6B] = 0x2C,
+ [0x6C] = 0x25, [0x6D] = 0x5F, [0x6E] = 0x3E, [0x6F] = 0x3F,
+ [0x70] = 0xF8, [0x71] = 0xC9, [0x72] = 0xCA, [0x73] = 0xCB,
+ [0x74] = 0xC8, [0x75] = 0xCD, [0x76] = 0xCE, [0x77] = 0xCF,
+ [0x78] = 0xCC, [0x79] = 0x60, [0x7A] = 0x3A, [0x7B] = 0x23,
+ [0x7C] = 0x40, [0x7D] = 0x27, [0x7E] = 0x3D, [0x7F] = 0x22,
+ [0x80] = 0xD8, [0x81] = 0x61, [0x82] = 0x62, [0x83] = 0x63,
+ [0x84] = 0x64, [0x85] = 0x65, [0x86] = 0x66, [0x87] = 0x67,
+ [0x88] = 0x68, [0x89] = 0x69, [0x8A] = 0xAB, [0x8B] = 0xBB,
+ [0x8C] = 0xF0, [0x8D] = 0xFD, [0x8E] = 0xFE, [0x8F] = 0xB1,
+ [0x90] = 0xB0, [0x91] = 0x6A, [0x92] = 0x6B, [0x93] = 0x6C,
+ [0x94] = 0x6D, [0x95] = 0x6E, [0x96] = 0x6F, [0x97] = 0x70,
+ [0x98] = 0x71, [0x99] = 0x72, [0x9A] = 0xAA, [0x9B] = 0xBA,
+ [0x9C] = 0xE6, [0x9D] = 0xB8, [0x9E] = 0xC6, [0x9F] = 0xA4,
+ [0xA0] = 0xB5, [0xA1] = 0x7E, [0xA2] = 0x73, [0xA3] = 0x74,
+ [0xA4] = 0x75, [0xA5] = 0x76, [0xA6] = 0x77, [0xA7] = 0x78,
+ [0xA8] = 0x79, [0xA9] = 0x7A, [0xAA] = 0xA1, [0xAB] = 0xBF,
+ [0xAC] = 0xD0, [0xAD] = 0xDD, [0xAE] = 0xDE, [0xAF] = 0xAE,
+ [0xB0] = 0x5E, [0xB1] = 0xA3, [0xB2] = 0xA5, [0xB3] = 0xB7,
+ [0xB4] = 0xA9, [0xB5] = 0xA7, [0xB6] = 0xB6, [0xB7] = 0xBC,
+ [0xB8] = 0xBD, [0xB9] = 0xBE, [0xBA] = 0x5B, [0xBB] = 0x5D,
+ [0xBC] = 0xAF, [0xBD] = 0xA8, [0xBE] = 0xB4, [0xBF] = 0xD7,
+ [0xC0] = 0x7B, [0xC1] = 0x41, [0xC2] = 0x42, [0xC3] = 0x43,
+ [0xC4] = 0x44, [0xC5] = 0x45, [0xC6] = 0x46, [0xC7] = 0x47,
+ [0xC8] = 0x48, [0xC9] = 0x49, [0xCA] = 0xAD, [0xCB] = 0xF4,
+ [0xCC] = 0xF6, [0xCD] = 0xF2, [0xCE] = 0xF3, [0xCF] = 0xF5,
+ [0xD0] = 0x7D, [0xD1] = 0x4A, [0xD2] = 0x4B, [0xD3] = 0x4C,
+ [0xD4] = 0x4D, [0xD5] = 0x4E, [0xD6] = 0x4F, [0xD7] = 0x50,
+ [0xD8] = 0x51, [0xD9] = 0x52, [0xDA] = 0xB9, [0xDB] = 0xFB,
+ [0xDC] = 0xFC, [0xDD] = 0xF9, [0xDE] = 0xFA, [0xDF] = 0xFF,
+ [0xE0] = 0x5C, [0xE1] = 0xF7, [0xE2] = 0x53, [0xE3] = 0x54,
+ [0xE4] = 0x55, [0xE5] = 0x56, [0xE6] = 0x57, [0xE7] = 0x58,
+ [0xE8] = 0x59, [0xE9] = 0x5A, [0xEA] = 0xB2, [0xEB] = 0xD4,
+ [0xEC] = 0xD6, [0xED] = 0xD2, [0xEE] = 0xD3, [0xEF] = 0xD5,
+ [0xF0] = 0x30, [0xF1] = 0x31, [0xF2] = 0x32, [0xF3] = 0x33,
+ [0xF4] = 0x34, [0xF5] = 0x35, [0xF6] = 0x36, [0xF7] = 0x37,
+ [0xF8] = 0x38, [0xF9] = 0x39, [0xFA] = 0xB3, [0xFB] = 0xDB,
+ [0xFC] = 0xDC, [0xFD] = 0xD9, [0xFE] = 0xDA, [0xFF] = 0x9F
+};
+
+/* Definitions used in the body of the `gconv' function. */
+#define CHARSET_NAME "ISO-8859-1//"
+#define FROM_LOOP iso8859_1_to_cp037_z900
+#define TO_LOOP cp037_to_iso8859_1_z900
+#define DEFINE_INIT 1
+#define DEFINE_FINI 1
+#define MIN_NEEDED_FROM 1
+#define MIN_NEEDED_TO 1
+
+/* The Z900 variant of troo forces us to always specify a test
+ character which ends the translation. So if we run into the
+ situation where the translation has been interrupted due to the
+ test character we translate the character by hand and jump back
+ into the instruction. */
+
+#define TROO_LOOP(TABLE) \
+ { \
+ register const unsigned char test asm ("0") = 0; \
+ register const unsigned char *pTable asm ("1") = TABLE; \
+ register unsigned char *pOutput asm ("2") = outptr; \
+ register uint64_t length asm ("3"); \
+ const unsigned char* pInput = inptr; \
+ uint64_t tmp; \
+ \
+ length = (inend - inptr < outend - outptr \
+ ? inend - inptr : outend - outptr); \
+ \
+ asm volatile ("0: \n\t" \
+ " troo %0,%1 \n\t" \
+ " jz 1f \n\t" \
+ " jo 0b \n\t" \
+ " llgc %3,0(%1) \n\t" \
+ " la %3,0(%3,%4) \n\t" \
+ " mvc 0(1,%0),0(%3) \n\t" \
+ " aghi %1,1 \n\t" \
+ " aghi %0,1 \n\t" \
+ " aghi %2,-1 \n\t" \
+ " j 0b \n\t" \
+ "1: \n" \
+ \
+ : "+a" (pOutput), "+a" (pInput), "+d" (length), "=&a" (tmp) \
+ : "a" (pTable), "d" (test) \
+ : "cc"); \
+ \
+ inptr = pInput; \
+ outptr = pOutput; \
+ }
+
+/* First define the conversion function from ISO 8859-1 to CP037. */
+#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
+#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
+#define LOOPFCT FROM_LOOP
+#define BODY TROO_LOOP (table_iso8859_1_to_cp037)
+
+#include <iconv/loop.c>
+
+
+/* Next, define the conversion function from CP037 to ISO 8859-1. */
+#define MIN_NEEDED_INPUT MIN_NEEDED_TO
+#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
+#define LOOPFCT TO_LOOP
+#define BODY TROO_LOOP (table_cp037_iso8859_1);
+
+#include <iconv/loop.c>
+
+
+/* Now define the toplevel functions. */
+#include <iconv/skeleton.c>
diff --git a/sysdeps/s390/s390-64/utf16-utf32-z9.c b/sysdeps/s390/s390-64/utf16-utf32-z9.c
new file mode 100644
index 0000000000..868dea68ca
--- /dev/null
+++ b/sysdeps/s390/s390-64/utf16-utf32-z9.c
@@ -0,0 +1,325 @@
+/* Conversion between UTF-16 and UTF-32 BE/internal.
+
+ This module uses the Z9-109 variants of the Convert Unicode
+ instructions.
+ Copyright (C) 1997-2009 Free Software Foundation, Inc.
+
+ Author: Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
+ Based on the work by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ Thanks to Daniel Appich who covered the relevant performance work
+ in his diploma thesis.
+
+ This is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <dlfcn.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <dl-procinfo.h>
+#include <gconv.h>
+
+/* UTF-32 big endian byte order mark. */
+#define BOM_UTF32 0x0000feffu
+
+/* UTF-16 big endian byte order mark. */
+#define BOM_UTF16 0xfeff
+
+#define DEFINE_INIT 0
+#define DEFINE_FINI 0
+#define MIN_NEEDED_FROM 2
+#define MAX_NEEDED_FROM 4
+#define MIN_NEEDED_TO 4
+#define FROM_LOOP from_utf16_loop
+#define TO_LOOP to_utf16_loop
+#define FROM_DIRECTION (dir == from_utf16)
+#define PREPARE_LOOP \
+ enum direction dir = ((struct utf16_data *) step->__data)->dir; \
+ int emit_bom = ((struct utf16_data *) step->__data)->emit_bom; \
+ \
+ if (emit_bom && !data->__internal_use \
+ && data->__invocation_counter == 0) \
+ { \
+ if (dir == to_utf16) \
+ { \
+ /* Emit the UTF-16 Byte Order Mark. */ \
+ if (__builtin_expect (outbuf + 2 > outend, 0)) \
+ return __GCONV_FULL_OUTPUT; \
+ \
+ put16u (outbuf, BOM_UTF16); \
+ outbuf += 2; \
+ } \
+ else \
+ { \
+ /* Emit the UTF-32 Byte Order Mark. */ \
+ if (__builtin_expect (outbuf + 4 > outend, 0)) \
+ return __GCONV_FULL_OUTPUT; \
+ \
+ put32u (outbuf, BOM_UTF32); \
+ outbuf += 4; \
+ } \
+ }
+
+/* Direction of the transformation. */
+enum direction
+{
+ illegal_dir,
+ to_utf16,
+ from_utf16
+};
+
+struct utf16_data
+{
+ enum direction dir;
+ int emit_bom;
+};
+
+
+extern int gconv_init (struct __gconv_step *step);
+int
+gconv_init (struct __gconv_step *step)
+{
+ /* Determine which direction. */
+ struct utf16_data *new_data;
+ enum direction dir = illegal_dir;
+ int emit_bom;
+ int result;
+
+ emit_bom = (__strcasecmp (step->__to_name, "UTF-32//") == 0
+ || __strcasecmp (step->__to_name, "UTF-16//") == 0);
+
+ if (__strcasecmp (step->__from_name, "UTF-16BE//") == 0
+ && (__strcasecmp (step->__to_name, "UTF-32//") == 0
+ || __strcasecmp (step->__to_name, "UTF-32BE//") == 0
+ || __strcasecmp (step->__to_name, "INTERNAL") == 0))
+ {
+ dir = from_utf16;
+ }
+ else if ((__strcasecmp (step->__to_name, "UTF-16//") == 0
+ || __strcasecmp (step->__to_name, "UTF-16BE//") == 0)
+ && (__strcasecmp (step->__from_name, "UTF-32BE//") == 0
+ || __strcasecmp (step->__from_name, "INTERNAL") == 0))
+ {
+ dir = to_utf16;
+ }
+
+ result = __GCONV_NOCONV;
+ if (dir != illegal_dir)
+ {
+ new_data = (struct utf16_data *) malloc (sizeof (struct utf16_data));
+
+ result = __GCONV_NOMEM;
+ if (new_data != NULL)
+ {
+ new_data->dir = dir;
+ new_data->emit_bom = emit_bom;
+ step->__data = new_data;
+
+ if (dir == from_utf16)
+ {
+ step->__min_needed_from = MIN_NEEDED_FROM;
+ step->__max_needed_from = MIN_NEEDED_FROM;
+ step->__min_needed_to = MIN_NEEDED_TO;
+ step->__max_needed_to = MIN_NEEDED_TO;
+ }
+ else
+ {
+ step->__min_needed_from = MIN_NEEDED_TO;
+ step->__max_needed_from = MIN_NEEDED_TO;
+ step->__min_needed_to = MIN_NEEDED_FROM;
+ step->__max_needed_to = MIN_NEEDED_FROM;
+ }
+
+ step->__stateful = 0;
+
+ result = __GCONV_OK;
+ }
+ }
+
+ return result;
+}
+
+
+extern void gconv_end (struct __gconv_step *data);
+void
+gconv_end (struct __gconv_step *data)
+{
+ free (data->__data);
+}
+
+/* The macro for the hardware loop. This is used for both
+ directions. */
+#define HARDWARE_CONVERT(INSTRUCTION) \
+ { \
+ register const unsigned char* pInput asm ("8") = inptr; \
+ register unsigned long long inlen asm ("9") = inend - inptr; \
+ register unsigned char* pOutput asm ("10") = outptr; \
+ register unsigned long long outlen asm("11") = outend - outptr; \
+ uint64_t cc = 0; \
+ \
+ asm volatile ("0: " INSTRUCTION " \n\t" \
+ " jo 0b \n\t" \
+ " ipm %2 \n" \
+ : "+a" (pOutput), "+a" (pInput), "+d" (cc), \
+ "+d" (outlen), "+d" (inlen) \
+ : \
+ : "cc", "memory"); \
+ \
+ inptr = pInput; \
+ outptr = pOutput; \
+ cc >>= 28; \
+ \
+ if (cc == 1) \
+ { \
+ result = __GCONV_FULL_OUTPUT; \
+ break; \
+ } \
+ else if (cc == 2) \
+ { \
+ result = __GCONV_ILLEGAL_INPUT; \
+ break; \
+ } \
+ }
+
+/* Conversion function from UTF-16 to UTF-32 internal/BE. */
+
+#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
+#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
+#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
+#define LOOPFCT FROM_LOOP
+/* The software routine is copied from utf-16.c (minus bytes
+ swapping). */
+#define BODY \
+ { \
+ if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) \
+ { \
+ HARDWARE_CONVERT ("cu24 %0, %1, 1"); \
+ if (inptr != inend) \
+ { \
+ /* Check if the third byte is \
+ a valid start of a UTF-16 surrogate. */ \
+ if (inend - inptr == 3 && (inptr[3] & 0xfc) != 0xdc) \
+ STANDARD_FROM_LOOP_ERR_HANDLER (3); \
+ \
+ result = __GCONV_INCOMPLETE_INPUT; \
+ break; \
+ } \
+ continue; \
+ } \
+ \
+ uint16_t u1 = get16 (inptr); \
+ \
+ if (__builtin_expect (u1 < 0xd800, 1) || u1 > 0xdfff) \
+ { \
+ /* No surrogate. */ \
+ put32 (outptr, u1); \
+ inptr += 2; \
+ } \
+ else \
+ { \
+ /* It's a surrogate character. At least the first word says \
+ it is. */ \
+ if (__builtin_expect (inptr + 4 > inend, 0)) \
+ { \
+ /* We don't have enough input for another complete input \
+ character. */ \
+ result = __GCONV_INCOMPLETE_INPUT; \
+ break; \
+ } \
+ \
+ inptr += 2; \
+ uint16_t u2 = get16 (inptr); \
+ if (__builtin_expect (u2 < 0xdc00, 0) \
+ || __builtin_expect (u2 > 0xdfff, 0)) \
+ { \
+ /* This is no valid second word for a surrogate. */ \
+ inptr -= 2; \
+ STANDARD_FROM_LOOP_ERR_HANDLER (2); \
+ } \
+ \
+ put32 (outptr, ((u1 - 0xd7c0) << 10) + (u2 - 0xdc00)); \
+ inptr += 2; \
+ } \
+ outptr += 4; \
+ }
+#define LOOP_NEED_FLAGS
+#include <iconv/loop.c>
+
+/* Conversion from UTF-32 internal/BE to UTF-16. */
+
+#define MIN_NEEDED_INPUT MIN_NEEDED_TO
+#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
+#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
+#define LOOPFCT TO_LOOP
+/* The software routine is copied from utf-16.c (minus bytes
+ swapping). */
+#define BODY \
+ { \
+ if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) \
+ { \
+ HARDWARE_CONVERT ("cu42 %0, %1"); \
+ \
+ if (inptr != inend) \
+ { \
+ result = __GCONV_INCOMPLETE_INPUT; \
+ break; \
+ } \
+ continue; \
+ } \
+ \
+ uint32_t c = get32 (inptr); \
+ \
+ if (__builtin_expect (c <= 0xd7ff, 1) \
+ || (c >=0xdc00 && c <= 0xffff)) \
+ { \
+ /* Two UTF-16 chars. */ \
+ put16 (outptr, c); \
+ } \
+ else if (__builtin_expect (c >= 0x10000, 1) \
+ && __builtin_expect (c <= 0x10ffff, 1)) \
+ { \
+ /* Four UTF-16 chars. */ \
+ uint16_t zabcd = ((c & 0x1f0000) >> 16) - 1; \
+ uint16_t out; \
+ \
+ /* Generate a surrogate character. */ \
+ if (__builtin_expect (outptr + 4 > outend, 0)) \
+ { \
+ /* Overflow in the output buffer. */ \
+ result = __GCONV_FULL_OUTPUT; \
+ break; \
+ } \
+ \
+ out = 0xd800; \
+ out |= (zabcd & 0xff) << 6; \
+ out |= (c >> 10) & 0x3f; \
+ put16 (outptr, out); \
+ outptr += 2; \
+ \
+ out = 0xdc00; \
+ out |= c & 0x3ff; \
+ put16 (outptr, out); \
+ } \
+ else \
+ { \
+ STANDARD_TO_LOOP_ERR_HANDLER (4); \
+ } \
+ outptr += 2; \
+ inptr += 4; \
+ }
+#define LOOP_NEED_FLAGS
+#include <iconv/loop.c>
+
+#include <iconv/skeleton.c>
diff --git a/sysdeps/s390/s390-64/utf8-utf16-z9.c b/sysdeps/s390/s390-64/utf8-utf16-z9.c
new file mode 100644
index 0000000000..531d3ebd4b
--- /dev/null
+++ b/sysdeps/s390/s390-64/utf8-utf16-z9.c
@@ -0,0 +1,463 @@
+/* Conversion between UTF-16 and UTF-32 BE/internal.
+
+ This module uses the Z9-109 variants of the Convert Unicode
+ instructions.
+ Copyright (C) 1997-2009 Free Software Foundation, Inc.
+
+ Author: Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
+ Based on the work by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ Thanks to Daniel Appich who covered the relevant performance work
+ in his diploma thesis.
+
+ This is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <dlfcn.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <dl-procinfo.h>
+#include <gconv.h>
+
+/* UTF-16 big endian byte order mark. */
+#define BOM_UTF16 0xfeff
+
+#define DEFINE_INIT 0
+#define DEFINE_FINI 0
+#define MIN_NEEDED_FROM 1
+#define MAX_NEEDED_FROM 4
+#define MIN_NEEDED_TO 2
+#define MAX_NEEDED_TO 4
+#define FROM_LOOP from_utf8_loop
+#define TO_LOOP to_utf8_loop
+#define FROM_DIRECTION (dir == from_utf8)
+#define PREPARE_LOOP \
+ enum direction dir = ((struct utf8_data *) step->__data)->dir; \
+ int emit_bom = ((struct utf8_data *) step->__data)->emit_bom; \
+ \
+ if (emit_bom && !data->__internal_use \
+ && data->__invocation_counter == 0) \
+ { \
+ /* Emit the UTF-16 Byte Order Mark. */ \
+ if (__builtin_expect (outbuf + 2 > outend, 0)) \
+ return __GCONV_FULL_OUTPUT; \
+ \
+ put16u (outbuf, BOM_UTF16); \
+ outbuf += 2; \
+ }
+
+/* Direction of the transformation. */
+enum direction
+{
+ illegal_dir,
+ to_utf8,
+ from_utf8
+};
+
+struct utf8_data
+{
+ enum direction dir;
+ int emit_bom;
+};
+
+
+extern int gconv_init (struct __gconv_step *step);
+int
+gconv_init (struct __gconv_step *step)
+{
+ /* Determine which direction. */
+ struct utf8_data *new_data;
+ enum direction dir = illegal_dir;
+ int emit_bom;
+ int result;
+
+ emit_bom = (__strcasecmp (step->__to_name, "UTF-16//") == 0);
+
+ if (__strcasecmp (step->__from_name, "ISO-10646/UTF8/") == 0
+ && (__strcasecmp (step->__to_name, "UTF-16//") == 0
+ || __strcasecmp (step->__to_name, "UTF-16BE//") == 0))
+ {
+ dir = from_utf8;
+ }
+ else if (__strcasecmp (step->__from_name, "UTF-16BE//") == 0
+ && __strcasecmp (step->__to_name, "ISO-10646/UTF8/") == 0)
+ {
+ dir = to_utf8;
+ }
+
+ result = __GCONV_NOCONV;
+ if (dir != illegal_dir)
+ {
+ new_data = (struct utf8_data *) malloc (sizeof (struct utf8_data));
+
+ result = __GCONV_NOMEM;
+ if (new_data != NULL)
+ {
+ new_data->dir = dir;
+ new_data->emit_bom = emit_bom;
+ step->__data = new_data;
+
+ if (dir == from_utf8)
+ {
+ step->__min_needed_from = MIN_NEEDED_FROM;
+ step->__max_needed_from = MIN_NEEDED_FROM;
+ step->__min_needed_to = MIN_NEEDED_TO;
+ step->__max_needed_to = MIN_NEEDED_TO;
+ }
+ else
+ {
+ step->__min_needed_from = MIN_NEEDED_TO;
+ step->__max_needed_from = MIN_NEEDED_TO;
+ step->__min_needed_to = MIN_NEEDED_FROM;
+ step->__max_needed_to = MIN_NEEDED_FROM;
+ }
+
+ step->__stateful = 0;
+
+ result = __GCONV_OK;
+ }
+ }
+
+ return result;
+}
+
+
+extern void gconv_end (struct __gconv_step *data);
+void
+gconv_end (struct __gconv_step *data)
+{
+ free (data->__data);
+}
+
+/* The macro for the hardware loop. This is used for both
+ directions. */
+#define HARDWARE_CONVERT(INSTRUCTION) \
+ { \
+ register const unsigned char* pInput asm ("8") = inptr; \
+ register unsigned long long inlen asm ("9") = inend - inptr; \
+ register unsigned char* pOutput asm ("10") = outptr; \
+ register unsigned long long outlen asm("11") = outend - outptr; \
+ uint64_t cc = 0; \
+ \
+ asm volatile ("0: " INSTRUCTION " \n\t" \
+ " jo 0b \n\t" \
+ " ipm %2 \n" \
+ : "+a" (pOutput), "+a" (pInput), "+d" (cc), \
+ "+d" (outlen), "+d" (inlen) \
+ : \
+ : "cc", "memory"); \
+ \
+ inptr = pInput; \
+ outptr = pOutput; \
+ cc >>= 28; \
+ \
+ if (cc == 1) \
+ { \
+ result = __GCONV_FULL_OUTPUT; \
+ break; \
+ } \
+ else if (cc == 2) \
+ { \
+ result = __GCONV_ILLEGAL_INPUT; \
+ break; \
+ } \
+ }
+
+/* Conversion function from UTF-8 to UTF-16. */
+
+#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
+#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
+#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
+#define LOOPFCT FROM_LOOP
+/* The software implementation is based on the code in gconv_simple.c. */
+#define BODY \
+ { \
+ if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) \
+ { \
+ HARDWARE_CONVERT ("cu12 %0, %1, 1"); \
+ \
+ if (inptr != inend) \
+ { \
+ int i; \
+ for (i = 1; inptr + i < inend; ++i) \
+ if ((inptr[i] & 0xc0) != 0x80) \
+ break; \
+ \
+ if (__builtin_expect (inptr + i == inend, 1)) \
+ { \
+ result = __GCONV_INCOMPLETE_INPUT; \
+ break; \
+ } \
+ STANDARD_FROM_LOOP_ERR_HANDLER (i); \
+ } \
+ continue; \
+ } \
+ \
+ /* Next input byte. */ \
+ uint16_t ch = *inptr; \
+ \
+ if (__builtin_expect (ch < 0x80, 1)) \
+ { \
+ /* One byte sequence. */ \
+ ++inptr; \
+ } \
+ else \
+ { \
+ uint_fast32_t cnt; \
+ uint_fast32_t i; \
+ \
+ if (ch >= 0xc2 && ch < 0xe0) \
+ { \
+ /* We expect two bytes. The first byte cannot be 0xc0 \
+ or 0xc1, otherwise the wide character could have been \
+ represented using a single byte. */ \
+ cnt = 2; \
+ ch &= 0x1f; \
+ } \
+ else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
+ { \
+ /* We expect three bytes. */ \
+ cnt = 3; \
+ ch &= 0x0f; \
+ } \
+ else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
+ { \
+ /* We expect four bytes. */ \
+ cnt = 4; \
+ ch &= 0x07; \
+ } \
+ else \
+ { \
+ /* Search the end of this ill-formed UTF-8 character. This \
+ is the next byte with (x & 0xc0) != 0x80. */ \
+ i = 0; \
+ do \
+ ++i; \
+ while (inptr + i < inend \
+ && (*(inptr + i) & 0xc0) == 0x80 \
+ && i < 5); \
+ \
+ errout: \
+ STANDARD_FROM_LOOP_ERR_HANDLER (i); \
+ } \
+ \
+ if (__builtin_expect (inptr + cnt > inend, 0)) \
+ { \
+ /* We don't have enough input. But before we report \
+ that check that all the bytes are correct. */ \
+ for (i = 1; inptr + i < inend; ++i) \
+ if ((inptr[i] & 0xc0) != 0x80) \
+ break; \
+ \
+ if (__builtin_expect (inptr + i == inend, 1)) \
+ { \
+ result = __GCONV_INCOMPLETE_INPUT; \
+ break; \
+ } \
+ \
+ goto errout; \
+ } \
+ \
+ if (cnt == 4) \
+ { \
+ /* For 4 byte UTF-8 chars two UTF-16 chars (high and \
+ low) are needed. */ \
+ uint16_t zabcd, high, low; \
+ \
+ if (__builtin_expect (outptr + 4 > outend, 0)) \
+ { \
+ /* Overflow in the output buffer. */ \
+ result = __GCONV_FULL_OUTPUT; \
+ break; \
+ } \
+ \
+ /* See Principles of Operations cu12. */ \
+ zabcd = (((inptr[0] & 0x7) << 2) | \
+ ((inptr[1] & 0x30) >> 4)) - 1; \
+ \
+ /* z-bit must be zero after subtracting 1. */ \
+ if (zabcd & 0x10) \
+ STANDARD_FROM_LOOP_ERR_HANDLER (4) \
+ \
+ high = (uint16_t)(0xd8 << 8); /* high surrogate id */ \
+ high |= zabcd << 6; /* abcd bits */ \
+ high |= (inptr[1] & 0xf) << 2; /* efgh bits */ \
+ high |= (inptr[2] & 0x30) >> 4; /* ij bits */ \
+ \
+ low = (uint16_t)(0xdc << 8); /* low surrogate id */ \
+ low |= ((uint16_t)inptr[2] & 0xc) << 6; /* kl bits */ \
+ low |= (inptr[2] & 0x3) << 6; /* mn bits */ \
+ low |= inptr[3] & 0x3f; /* opqrst bits */ \
+ \
+ put16 (outptr, high); \
+ outptr += 2; \
+ put16 (outptr, low); \
+ outptr += 2; \
+ inptr += 4; \
+ continue; \
+ } \
+ else \
+ { \
+ /* Read the possible remaining bytes. */ \
+ for (i = 1; i < cnt; ++i) \
+ { \
+ uint16_t byte = inptr[i]; \
+ \
+ if ((byte & 0xc0) != 0x80) \
+ /* This is an illegal encoding. */ \
+ break; \
+ \
+ ch <<= 6; \
+ ch |= byte & 0x3f; \
+ } \
+ inptr += cnt; \
+ \
+ } \
+ } \
+ /* Now adjust the pointers and store the result. */ \
+ *((uint16_t *) outptr) = ch; \
+ outptr += sizeof (uint16_t); \
+ }
+
+#define LOOP_NEED_FLAGS
+#include <iconv/loop.c>
+
+/* Conversion from UTF-16 to UTF-8. */
+
+#define MIN_NEEDED_INPUT MIN_NEEDED_TO
+#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
+#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
+#define LOOPFCT TO_LOOP
+/* The software routine is based on the functionality of the S/390
+ hardware instruction (cu21) as described in the Principles of
+ Operation. */
+#define BODY \
+ { \
+ if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) \
+ { \
+ HARDWARE_CONVERT ("cu21 %0, %1"); \
+ if (inptr != inend) \
+ { \
+ /* Check if the third byte is \
+ a valid start of a UTF-16 surrogate. */ \
+ if (inend - inptr == 3 && (inptr[3] & 0xfc) != 0xdc) \
+ STANDARD_TO_LOOP_ERR_HANDLER (3); \
+ \
+ result = __GCONV_INCOMPLETE_INPUT; \
+ break; \
+ } \
+ continue; \
+ } \
+ \
+ uint16_t c = get16 (inptr); \
+ \
+ if (__builtin_expect (c <= 0x007f, 1)) \
+ { \
+ /* Single byte UTF-8 char. */ \
+ *outptr = c & 0xff; \
+ outptr++; \
+ } \
+ else if (c >= 0x0080 && c <= 0x07ff) \
+ { \
+ /* Two byte UTF-8 char. */ \
+ \
+ if (__builtin_expect (outptr + 2 > outend, 0)) \
+ { \
+ /* Overflow in the output buffer. */ \
+ result = __GCONV_FULL_OUTPUT; \
+ break; \
+ } \
+ \
+ outptr[0] = 0xc0; \
+ outptr[0] |= c >> 6; \
+ \
+ outptr[1] = 0x80; \
+ outptr[1] |= c & 0x3f; \
+ \
+ outptr += 2; \
+ } \
+ else if (c >= 0x0800 && c <= 0xd7ff) \
+ { \
+ /* Three byte UTF-8 char. */ \
+ \
+ if (__builtin_expect (outptr + 3 > outend, 0)) \
+ { \
+ /* Overflow in the output buffer. */ \
+ result = __GCONV_FULL_OUTPUT; \
+ break; \
+ } \
+ outptr[0] = 0xe0; \
+ outptr[0] |= c >> 12; \
+ \
+ outptr[1] = 0x80; \
+ outptr[1] |= (c >> 6) & 0x3f; \
+ \
+ outptr[2] = 0x80; \
+ outptr[2] |= c & 0x3f; \
+ \
+ outptr += 3; \
+ } \
+ else if (c >= 0xd800 && c <= 0xdbff) \
+ { \
+ /* Four byte UTF-8 char. */ \
+ uint16_t low, uvwxy; \
+ \
+ if (__builtin_expect (outptr + 4 > outend, 0)) \
+ { \
+ /* Overflow in the output buffer. */ \
+ result = __GCONV_FULL_OUTPUT; \
+ break; \
+ } \
+ inptr += 2; \
+ if (__builtin_expect (inptr + 2 > inend, 0)) \
+ { \
+ result = __GCONV_INCOMPLETE_INPUT; \
+ break; \
+ } \
+ \
+ low = get16 (inptr); \
+ \
+ if ((low & 0xfc00) != 0xdc00) \
+ { \
+ inptr -= 2; \
+ STANDARD_TO_LOOP_ERR_HANDLER (2); \
+ } \
+ uvwxy = ((c >> 6) & 0xf) + 1; \
+ outptr[0] = 0xf0; \
+ outptr[0] |= uvwxy >> 2; \
+ \
+ outptr[1] = 0x80; \
+ outptr[1] |= (uvwxy << 4) & 0x30; \
+ outptr[1] |= (c >> 2) & 0x0f; \
+ \
+ outptr[2] = 0x80; \
+ outptr[2] |= (c & 0x03) << 4; \
+ outptr[2] |= (low >> 6) & 0x0f; \
+ \
+ outptr[3] = 0x80; \
+ outptr[3] |= low & 0x3f; \
+ \
+ outptr += 4; \
+ } \
+ else \
+ { \
+ STANDARD_TO_LOOP_ERR_HANDLER (2); \
+ } \
+ inptr += 2; \
+ }
+#define LOOP_NEED_FLAGS
+#include <iconv/loop.c>
+
+#include <iconv/skeleton.c>
diff --git a/sysdeps/s390/s390-64/utf8-utf32-z9.c b/sysdeps/s390/s390-64/utf8-utf32-z9.c
new file mode 100644
index 0000000000..17ef8bc890
--- /dev/null
+++ b/sysdeps/s390/s390-64/utf8-utf32-z9.c
@@ -0,0 +1,508 @@
+/* Conversion between UTF-8 and UTF-32 BE/internal.
+
+ This module uses the Z9-109 variants of the Convert Unicode
+ instructions.
+ Copyright (C) 1997-2009 Free Software Foundation, Inc.
+
+ Author: Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
+ Based on the work by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ Thanks to Daniel Appich who covered the relevant performance work
+ in his diploma thesis.
+
+ This is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <dlfcn.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <dl-procinfo.h>
+#include <gconv.h>
+
+/* UTF-32 big endian byte order mark. */
+#define BOM 0x0000feffu
+
+#define DEFINE_INIT 0
+#define DEFINE_FINI 0
+/* These definitions apply to the UTF-8 to UTF-32 direction. The
+ software implementation for UTF-8 still supports multibyte
+ characters up to 6 bytes whereas the hardware variant does not. */
+#define MIN_NEEDED_FROM 1
+#define MAX_NEEDED_FROM 6
+#define MIN_NEEDED_TO 4
+#define FROM_LOOP from_utf8_loop
+#define TO_LOOP to_utf8_loop
+#define FROM_DIRECTION (dir == from_utf8)
+#define PREPARE_LOOP \
+ enum direction dir = ((struct utf8_data *) step->__data)->dir; \
+ int emit_bom = ((struct utf8_data *) step->__data)->emit_bom; \
+ \
+ if (emit_bom && !data->__internal_use \
+ && data->__invocation_counter == 0) \
+ { \
+ /* Emit the Byte Order Mark. */ \
+ if (__builtin_expect (outbuf + 4 > outend, 0)) \
+ return __GCONV_FULL_OUTPUT; \
+ \
+ put32u (outbuf, BOM); \
+ outbuf += 4; \
+ }
+
+/* Direction of the transformation. */
+enum direction
+{
+ illegal_dir,
+ to_utf8,
+ from_utf8
+};
+
+struct utf8_data
+{
+ enum direction dir;
+ int emit_bom;
+};
+
+
+extern int gconv_init (struct __gconv_step *step);
+int
+gconv_init (struct __gconv_step *step)
+{
+ /* Determine which direction. */
+ struct utf8_data *new_data;
+ enum direction dir = illegal_dir;
+ int emit_bom;
+ int result;
+
+ emit_bom = (__strcasecmp (step->__to_name, "UTF-32//") == 0);
+
+ if (__strcasecmp (step->__from_name, "ISO-10646/UTF8/") == 0
+ && (__strcasecmp (step->__to_name, "UTF-32//") == 0
+ || __strcasecmp (step->__to_name, "UTF-32BE//") == 0
+ || __strcasecmp (step->__to_name, "INTERNAL") == 0))
+ {
+ dir = from_utf8;
+ }
+ else if (__strcasecmp (step->__to_name, "ISO-10646/UTF8/") == 0
+ && (__strcasecmp (step->__from_name, "UTF-32BE//") == 0
+ || __strcasecmp (step->__from_name, "INTERNAL") == 0))
+ {
+ dir = to_utf8;
+ }
+
+ result = __GCONV_NOCONV;
+ if (dir != illegal_dir)
+ {
+ new_data = (struct utf8_data *) malloc (sizeof (struct utf8_data));
+
+ result = __GCONV_NOMEM;
+ if (new_data != NULL)
+ {
+ new_data->dir = dir;
+ new_data->emit_bom = emit_bom;
+ step->__data = new_data;
+
+ if (dir == from_utf8)
+ {
+ step->__min_needed_from = MIN_NEEDED_FROM;
+ step->__max_needed_from = MIN_NEEDED_FROM;
+ step->__min_needed_to = MIN_NEEDED_TO;
+ step->__max_needed_to = MIN_NEEDED_TO;
+ }
+ else
+ {
+ step->__min_needed_from = MIN_NEEDED_TO;
+ step->__max_needed_from = MIN_NEEDED_TO;
+ step->__min_needed_to = MIN_NEEDED_FROM;
+ step->__max_needed_to = MIN_NEEDED_FROM;
+ }
+
+ step->__stateful = 0;
+
+ result = __GCONV_OK;
+ }
+ }
+
+ return result;
+}
+
+
+extern void gconv_end (struct __gconv_step *data);
+void
+gconv_end (struct __gconv_step *data)
+{
+ free (data->__data);
+}
+
+/* The macro for the hardware loop. This is used for both
+ directions. */
+#define HARDWARE_CONVERT(INSTRUCTION) \
+ { \
+ register const unsigned char* pInput asm ("8") = inptr; \
+ register unsigned long long inlen asm ("9") = inend - inptr; \
+ register unsigned char* pOutput asm ("10") = outptr; \
+ register unsigned long long outlen asm("11") = outend - outptr; \
+ uint64_t cc = 0; \
+ \
+ asm volatile ("0: " INSTRUCTION " \n\t" \
+ " jo 0b \n\t" \
+ " ipm %2 \n" \
+ : "+a" (pOutput), "+a" (pInput), "+d" (cc), \
+ "+d" (outlen), "+d" (inlen) \
+ : \
+ : "cc", "memory"); \
+ \
+ inptr = pInput; \
+ outptr = pOutput; \
+ cc >>= 28; \
+ \
+ if (cc == 1) \
+ { \
+ result = __GCONV_FULL_OUTPUT; \
+ break; \
+ } \
+ else if (cc == 2) \
+ { \
+ result = __GCONV_ILLEGAL_INPUT; \
+ break; \
+ } \
+ }
+
+/* Conversion function from UTF-8 to UTF-32 internal/BE. */
+
+#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
+#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
+#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
+#define LOOPFCT FROM_LOOP
+/* The software routine is copied from gconv_simple.c. */
+#define BODY \
+ { \
+ if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) \
+ { \
+ HARDWARE_CONVERT ("cu14 %0, %1, 1"); \
+ \
+ if (inptr != inend) \
+ { \
+ int i; \
+ for (i = 1; inptr + i < inend; ++i) \
+ if ((inptr[i] & 0xc0) != 0x80) \
+ break; \
+ \
+ if (__builtin_expect (inptr + i == inend, 1)) \
+ { \
+ result = __GCONV_INCOMPLETE_INPUT; \
+ break; \
+ } \
+ STANDARD_FROM_LOOP_ERR_HANDLER (i); \
+ } \
+ continue; \
+ } \
+ \
+ /* Next input byte. */ \
+ uint32_t ch = *inptr; \
+ \
+ if (__builtin_expect (ch < 0x80, 1)) \
+ { \
+ /* One byte sequence. */ \
+ ++inptr; \
+ } \
+ else \
+ { \
+ uint_fast32_t cnt; \
+ uint_fast32_t i; \
+ \
+ if (ch >= 0xc2 && ch < 0xe0) \
+ { \
+ /* We expect two bytes. The first byte cannot be 0xc0 or \
+ 0xc1, otherwise the wide character could have been \
+ represented using a single byte. */ \
+ cnt = 2; \
+ ch &= 0x1f; \
+ } \
+ else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
+ { \
+ /* We expect three bytes. */ \
+ cnt = 3; \
+ ch &= 0x0f; \
+ } \
+ else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
+ { \
+ /* We expect four bytes. */ \
+ cnt = 4; \
+ ch &= 0x07; \
+ } \
+ else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
+ { \
+ /* We expect five bytes. */ \
+ cnt = 5; \
+ ch &= 0x03; \
+ } \
+ else if (__builtin_expect ((ch & 0xfe) == 0xfc, 1)) \
+ { \
+ /* We expect six bytes. */ \
+ cnt = 6; \
+ ch &= 0x01; \
+ } \
+ else \
+ { \
+ /* Search the end of this ill-formed UTF-8 character. This \
+ is the next byte with (x & 0xc0) != 0x80. */ \
+ i = 0; \
+ do \
+ ++i; \
+ while (inptr + i < inend \
+ && (*(inptr + i) & 0xc0) == 0x80 \
+ && i < 5); \
+ \
+ errout: \
+ STANDARD_FROM_LOOP_ERR_HANDLER (i); \
+ } \
+ \
+ if (__builtin_expect (inptr + cnt > inend, 0)) \
+ { \
+ /* We don't have enough input. But before we report \
+ that check that all the bytes are correct. */ \
+ for (i = 1; inptr + i < inend; ++i) \
+ if ((inptr[i] & 0xc0) != 0x80) \
+ break; \
+ \
+ if (__builtin_expect (inptr + i == inend, 1)) \
+ { \
+ result = __GCONV_INCOMPLETE_INPUT; \
+ break; \
+ } \
+ \
+ goto errout; \
+ } \
+ \
+ /* Read the possible remaining bytes. */ \
+ for (i = 1; i < cnt; ++i) \
+ { \
+ uint32_t byte = inptr[i]; \
+ \
+ if ((byte & 0xc0) != 0x80) \
+ /* This is an illegal encoding. */ \
+ break; \
+ \
+ ch <<= 6; \
+ ch |= byte & 0x3f; \
+ } \
+ \
+ /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
+ If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
+ have been represented with fewer than cnt bytes. */ \
+ if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \
+ { \
+ /* This is an illegal encoding. */ \
+ goto errout; \
+ } \
+ \
+ inptr += cnt; \
+ } \
+ \
+ /* Now adjust the pointers and store the result. */ \
+ *((uint32_t *) outptr) = ch; \
+ outptr += sizeof (uint32_t); \
+ }
+#define LOOP_NEED_FLAGS
+
+#define STORE_REST \
+ { \
+ /* We store the remaining bytes while converting them into the UCS4 \
+ format. We can assume that the first byte in the buffer is \
+ correct and that it requires a larger number of bytes than there \
+ are in the input buffer. */ \
+ wint_t ch = **inptrp; \
+ size_t cnt, r; \
+ \
+ state->__count = inend - *inptrp; \
+ \
+ if (ch >= 0xc2 && ch < 0xe0) \
+ { \
+ /* We expect two bytes. The first byte cannot be 0xc0 or \
+ 0xc1, otherwise the wide character could have been \
+ represented using a single byte. */ \
+ cnt = 2; \
+ ch &= 0x1f; \
+ } \
+ else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
+ { \
+ /* We expect three bytes. */ \
+ cnt = 3; \
+ ch &= 0x0f; \
+ } \
+ else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
+ { \
+ /* We expect four bytes. */ \
+ cnt = 4; \
+ ch &= 0x07; \
+ } \
+ else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
+ { \
+ /* We expect five bytes. */ \
+ cnt = 5; \
+ ch &= 0x03; \
+ } \
+ else \
+ { \
+ /* We expect six bytes. */ \
+ cnt = 6; \
+ ch &= 0x01; \
+ } \
+ \
+ /* The first byte is already consumed. */ \
+ r = cnt - 1; \
+ while (++(*inptrp) < inend) \
+ { \
+ ch <<= 6; \
+ ch |= **inptrp & 0x3f; \
+ --r; \
+ } \
+ \
+ /* Shift for the so far missing bytes. */ \
+ ch <<= r * 6; \
+ \
+ /* Store the number of bytes expected for the entire sequence. */ \
+ state->__count |= cnt << 8; \
+ \
+ /* Store the value. */ \
+ state->__value.__wch = ch; \
+ }
+
+#define UNPACK_BYTES \
+ { \
+ static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \
+ wint_t wch = state->__value.__wch; \
+ size_t ntotal = state->__count >> 8; \
+ \
+ inlen = state->__count & 255; \
+ \
+ bytebuf[0] = inmask[ntotal - 2]; \
+ \
+ do \
+ { \
+ if (--ntotal < inlen) \
+ bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
+ wch >>= 6; \
+ } \
+ while (ntotal > 1); \
+ \
+ bytebuf[0] |= wch; \
+ }
+
+#define CLEAR_STATE \
+ state->__count = 0
+
+#include <iconv/loop.c>
+
+/* Conversion from UTF-32 internal/BE to UTF-8. */
+
+#define MIN_NEEDED_INPUT MIN_NEEDED_TO
+#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
+#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
+#define LOOPFCT TO_LOOP
+/* The software routine mimics the S/390 cu41 instruction. */
+#define BODY \
+ { \
+ if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) \
+ { \
+ HARDWARE_CONVERT ("cu41 %0, %1"); \
+ \
+ if (inptr != inend) \
+ { \
+ result = __GCONV_INCOMPLETE_INPUT; \
+ break; \
+ } \
+ continue; \
+ } \
+ \
+ uint32_t wc = *((const uint32_t *) inptr); \
+ \
+ if (__builtin_expect (wc <= 0x7f, 1)) \
+ { \
+ /* Single UTF-8 char. */ \
+ *outptr = (uint8_t)wc; \
+ outptr++; \
+ } \
+ else if (wc <= 0x7ff) \
+ { \
+ /* Two UTF-8 chars. */ \
+ if (__builtin_expect (outptr + 2 > outend, 0)) \
+ { \
+ /* Overflow in the output buffer. */ \
+ result = __GCONV_FULL_OUTPUT; \
+ break; \
+ } \
+ \
+ outptr[0] = 0xc0; \
+ outptr[0] |= wc >> 6; \
+ \
+ outptr[1] = 0x80; \
+ outptr[1] |= wc & 0x3f; \
+ \
+ outptr += 2; \
+ } \
+ else if (wc <= 0xffff) \
+ { \
+ /* Three UTF-8 chars. */ \
+ if (__builtin_expect (outptr + 3 > outend, 0)) \
+ { \
+ /* Overflow in the output buffer. */ \
+ result = __GCONV_FULL_OUTPUT; \
+ break; \
+ } \
+ outptr[0] = 0xe0; \
+ outptr[0] |= wc >> 12; \
+ \
+ outptr[1] = 0x80; \
+ outptr[1] |= (wc >> 6) & 0x3f; \
+ \
+ outptr[2] = 0x80; \
+ outptr[2] |= wc & 0x3f; \
+ \
+ outptr += 3; \
+ } \
+ else if (wc <= 0x10ffff) \
+ { \
+ /* Four UTF-8 chars. */ \
+ if (__builtin_expect (outptr + 4 > outend, 0)) \
+ { \
+ /* Overflow in the output buffer. */ \
+ result = __GCONV_FULL_OUTPUT; \
+ break; \
+ } \
+ outptr[0] = 0xf0; \
+ outptr[0] |= wc >> 18; \
+ \
+ outptr[1] = 0x80; \
+ outptr[1] |= (wc >> 12) & 0x3f; \
+ \
+ outptr[2] = 0x80; \
+ outptr[2] |= (wc >> 6) & 0x3f; \
+ \
+ outptr[3] = 0x80; \
+ outptr[3] |= wc & 0x3f; \
+ \
+ outptr += 4; \
+ } \
+ else \
+ { \
+ STANDARD_TO_LOOP_ERR_HANDLER (4); \
+ } \
+ inptr += 4; \
+ }
+#define LOOP_NEED_FLAGS
+#include <iconv/loop.c>
+
+#include <iconv/skeleton.c>
diff --git a/sysdeps/unix/sysv/linux/configure b/sysdeps/unix/sysv/linux/configure
index 253e9c57ff..199457a3ac 100644
--- a/sysdeps/unix/sysv/linux/configure
+++ b/sysdeps/unix/sysv/linux/configure
@@ -1,17 +1,6 @@
# This file is generated from configure.in by Autoconf. DO NOT EDIT!
# Local configure fragment for sysdeps/unix/sysv/linux.
-# The Linux kernel headers can be found in
-# /lib/modules/$(uname -r)/build/include
-# Check whether this directory is available.
-if test -z "$sysheaders" &&
- test "x$cross_compiling" = xno &&
- test -d /lib/modules/`uname -r`/build/include; then
- sysheaders="/lib/modules/`uname -r`/build/include"
- ccheaders=`$CC -print-file-name=include`
- SYSINCLUDES="-I $sysheaders"
-fi
-
# Don't bother trying to generate any glue code to be compatible with the
# existing system library, because we are the only system library.
inhibit_glue=yes
diff --git a/sysdeps/unix/sysv/linux/configure.in b/sysdeps/unix/sysv/linux/configure.in
index 5330e98c2d..8f00407a8b 100644
--- a/sysdeps/unix/sysv/linux/configure.in
+++ b/sysdeps/unix/sysv/linux/configure.in
@@ -1,19 +1,6 @@
GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
# Local configure fragment for sysdeps/unix/sysv/linux.
-# The Linux kernel headers can be found in
-# /lib/modules/$(uname -r)/build/include
-# Check whether this directory is available.
-if test -z "$sysheaders" &&
- test "x$cross_compiling" = xno &&
- test -d /lib/modules/`uname -r`/build/include; then
- sysheaders="/lib/modules/`uname -r`/build/include"
- ccheaders=`$CC -print-file-name=include`
- dnl We don't have to use -nostdinc. We just want one more directory
- dnl to be used.
- SYSINCLUDES="-I $sysheaders"
-fi
-
# Don't bother trying to generate any glue code to be compatible with the
# existing system library, because we are the only system library.
inhibit_glue=yes
diff --git a/sysdeps/unix/sysv/linux/eventfd.c b/sysdeps/unix/sysv/linux/eventfd.c
index 4cd557983e..7f69ecdb8c 100644
--- a/sysdeps/unix/sysv/linux/eventfd.c
+++ b/sysdeps/unix/sysv/linux/eventfd.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007, 2008 Free Software Foundation, Inc.
+/* Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -19,14 +19,21 @@
#include <errno.h>
#include <sys/eventfd.h>
#include <sysdep.h>
+#include <kernel-features.h>
int
eventfd (int count, int flags)
{
#ifdef __NR_eventfd2
- return INLINE_SYSCALL (eventfd2, 2, count, flags);
-#else
+ int res = INLINE_SYSCALL (eventfd2, 2, count, flags);
+# ifndef __ASSUME_EVENTFD2
+ if (res != -1 || errno != ENOSYS)
+# endif
+ return res;
+#endif
+
+#ifndef __ASSUME_EVENTFD2
/* The old system call has no flag parameter which is bad. So we have
to wait until we have to support to pass additional values to the
kernel (sys_indirect) before implementing setting flags like
@@ -43,5 +50,7 @@ eventfd (int count, int flags)
__set_errno (ENOSYS);
return -1;
# endif
+#elif !defined __NR_eventfd2
+# error "__ASSUME_EVENTFD2 defined but not __NR_eventfd2"
#endif
}
diff --git a/sysdeps/unix/sysv/linux/i386/sysconf.c b/sysdeps/unix/sysv/linux/i386/sysconf.c
index efe1a639cd..ff3cf9f7c7 100644
--- a/sysdeps/unix/sysv/linux/i386/sysconf.c
+++ b/sysdeps/unix/sysv/linux/i386/sysconf.c
@@ -138,6 +138,9 @@ static const struct intel_02_cache_info
{ 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
{ 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
{ 0xe4, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
+ { 0xea, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
+ { 0xeb, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 18874368 },
+ { 0xec, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 25165824 },
};
#define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known[0]))
diff --git a/sysdeps/unix/sysv/linux/kernel-features.h b/sysdeps/unix/sysv/linux/kernel-features.h
index 4562515790..ff065effb5 100644
--- a/sysdeps/unix/sysv/linux/kernel-features.h
+++ b/sysdeps/unix/sysv/linux/kernel-features.h
@@ -516,6 +516,8 @@
# define __ASSUME_SOCK_CLOEXEC 1
# define __ASSUME_IN_NONBLOCK 1
# define __ASSUME_PIPE2 1
+# define __ASSUME_EVENTFD2 1
+# define __ASSUME_SIGNALFD4 1
#endif
/* Support for the accept4 syscall was added in 2.6.28. */
diff --git a/sysdeps/unix/sysv/linux/signalfd.c b/sysdeps/unix/sysv/linux/signalfd.c
index 9898f29231..c2d974a45d 100644
--- a/sysdeps/unix/sysv/linux/signalfd.c
+++ b/sysdeps/unix/sysv/linux/signalfd.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007, 2008 Free Software Foundation, Inc.
+/* Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -20,14 +20,21 @@
#include <signal.h>
#include <sys/signalfd.h>
#include <sysdep.h>
+#include <kernel-features.h>
int
signalfd (int fd, const sigset_t *mask, int flags)
{
#ifdef __NR_signalfd4
- return INLINE_SYSCALL (signalfd4, 4, fd, mask, _NSIG / 8, flags);
-#else
+ int res = INLINE_SYSCALL (signalfd4, 4, fd, mask, _NSIG / 8, flags);
+# ifndef __ASSUME_SIGNALFD4
+ if (res != -1 || errno != ENOSYS)
+# endif
+ return res;
+#endif
+
+#ifndef __ASSUME_SIGNALFD4
/* The old system call has no flag parameter which is bad. So we have
to wait until we have to support to pass additional values to the
kernel (sys_indirect) before implementing setting flags like
@@ -44,5 +51,7 @@ signalfd (int fd, const sigset_t *mask, int flags)
__set_errno (ENOSYS);
return -1;
# endif
+#elif !defined __NR_signalfd4
+# error "__ASSUME_SIGNALFD4 defined but not __NR_signalfd4"
#endif
}
diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
index 78fdb04fcb..57cd88432a 100644
--- a/sysdeps/x86_64/Makefile
+++ b/sysdeps/x86_64/Makefile
@@ -19,6 +19,10 @@ ifeq ($(subdir),elf)
sysdep-dl-routines += tlsdesc dl-tlsdesc
sysdep_routines += tlsdesc dl-tlsdesc
sysdep-rtld-routines += tlsdesc dl-tlsdesc
+
+tests: $(objpfx)tst-xmmymm.out
+$(objpfx)tst-xmmymm.out: ../sysdeps/x86_64/tst-xmmymm.sh $(objpfx)ld.so
+ $(SHELL) -e $< $(objpfx) > $@
endif
ifeq ($(subdir),csu)
diff --git a/sysdeps/x86_64/cacheinfo.c b/sysdeps/x86_64/cacheinfo.c
index 362687c181..75b81958dd 100644
--- a/sysdeps/x86_64/cacheinfo.c
+++ b/sysdeps/x86_64/cacheinfo.c
@@ -25,6 +25,17 @@
#ifdef USE_MULTIARCH
# include "multiarch/init-arch.h"
+
+# define is_intel __cpu_features.kind == arch_kind_intel
+# define is_amd __cpu_features.kind == arch_kind_amd
+# define max_cpuid __cpu_features.max_cpuid
+#else
+ /* This spells out "GenuineIntel". */
+# define is_intel \
+ ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69
+ /* This spells out "AuthenticAMD". */
+# define is_amd \
+ ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65
#endif
static const struct intel_02_cache_info
@@ -100,6 +111,9 @@ static const struct intel_02_cache_info
{ 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
{ 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
{ 0xe4, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
+ { 0xea, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
+ { 0xeb, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 18874368 },
+ { 0xec, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 25165824 },
};
#define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
@@ -152,6 +166,12 @@ intel_check_word (int name, unsigned int value, bool *has_level_2,
/* Intel reused this value. For family 15, model 6 it
specifies the 3rd level cache. Otherwise the 2nd
level cache. */
+ unsigned int family;
+ unsigned int model;
+#ifdef USE_MULTIARCH
+ family = __cpu_features.family;
+ model = __cpu_features.model;
+#else
unsigned int eax;
unsigned int ebx;
unsigned int ecx;
@@ -160,9 +180,10 @@ intel_check_word (int name, unsigned int value, bool *has_level_2,
: "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
: "0" (1));
- unsigned int family = ((eax >> 20) & 0xff) + ((eax >> 8) & 0xf);
- unsigned int model = ((((eax >>16) & 0xf) << 4)
- + ((eax >> 4) & 0xf));
+ family = ((eax >> 20) & 0xff) + ((eax >> 8) & 0xf);
+ model = (((eax >>16) & 0xf) << 4) + ((eax >> 4) & 0xf);
+#endif
+
if (family == 15 && model == 6)
{
/* The level 3 cache is encoded for this model like
@@ -394,21 +415,24 @@ long int
attribute_hidden
__cache_sysconf (int name)
{
+#ifdef USE_MULTIARCH
+ if (__cpu_features.kind == arch_kind_unknown)
+ __init_cpu_features ();
+#else
/* Find out what brand of processor. */
- unsigned int eax;
+ unsigned int max_cpuid;
unsigned int ebx;
unsigned int ecx;
unsigned int edx;
asm volatile ("cpuid"
- : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
+ : "=a" (max_cpuid), "=b" (ebx), "=c" (ecx), "=d" (edx)
: "0" (0));
+#endif
- /* This spells out "GenuineIntel". */
- if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
- return handle_intel (name, eax);
+ if (is_intel)
+ return handle_intel (name, max_cpuid);
- /* This spells out "AuthenticAMD". */
- if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
+ if (is_amd)
return handle_amd (name);
// XXX Fill in more vendors.
@@ -457,20 +481,11 @@ init_cacheinfo (void)
#ifdef USE_MULTIARCH
if (__cpu_features.kind == arch_kind_unknown)
__init_cpu_features ();
-# define is_intel __cpu_features.kind == arch_kind_intel
-# define is_amd __cpu_features.kind == arch_kind_amd
-# define max_cpuid __cpu_features.max_cpuid
#else
int max_cpuid;
asm volatile ("cpuid"
: "=a" (max_cpuid), "=b" (ebx), "=c" (ecx), "=d" (edx)
: "0" (0));
- /* This spells out "GenuineIntel". */
-# define is_intel \
- ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69
- /* This spells out "AuthenticAMD". */
-# define is_amd \
- ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65
#endif
if (is_intel)
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 5ce14aad8d..b066402204 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -4,7 +4,7 @@ gen-as-const-headers += ifunc-defines.sym
endif
ifeq ($(subdir),string)
-sysdep_routines += stpncpy-c strncpy-c strncmp-c
+sysdep_routines += stpncpy-c strncpy-c
ifeq (yes,$(config-cflags-sse4))
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
CFLAGS-strcspn-c.c += -msse4
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index 29e687344d..35fd19af0e 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -68,7 +68,13 @@ __init_cpu_features (void)
__cpu_features.model += extended_model;
}
else if (__cpu_features.family == 0x06)
- __cpu_features.model += extended_model;
+ {
+ __cpu_features.model += extended_model;
+
+ if (__cpu_features.model == 0x1c)
+ /* Avoid SSSE3 on Atom since it is slow. */
+ __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx &= ~(1 << 9);
+ }
}
/* This spells out "AuthenticAMD". */
else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
diff --git a/sysdeps/x86_64/multiarch/rtld-rawmemchr.c b/sysdeps/x86_64/multiarch/rtld-rawmemchr.c
new file mode 100644
index 0000000000..53a90675ab
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/rtld-rawmemchr.c
@@ -0,0 +1 @@
+#include "../rtld-rawmemchr.c"
diff --git a/sysdeps/x86_64/multiarch/rtld-strlen.S b/sysdeps/x86_64/multiarch/rtld-strlen.S
new file mode 100644
index 0000000000..596e0549ea
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/rtld-strlen.S
@@ -0,0 +1 @@
+#include "../rtld-strlen.S"
diff --git a/sysdeps/x86_64/multiarch/strcmp.S b/sysdeps/x86_64/multiarch/strcmp.S
index 37985036aa..1a315737af 100644
--- a/sysdeps/x86_64/multiarch/strcmp.S
+++ b/sysdeps/x86_64/multiarch/strcmp.S
@@ -28,9 +28,9 @@
/* calculate left number to compare */ \
lea -16(%rcx, %r11), %r9; \
cmp %r9, %r11; \
- jb LABEL(strcmp_exitz); \
+ jb LABEL(strcmp_exitz_sse4_2); \
test %r9, %r9; \
- je LABEL(strcmp_exitz); \
+ je LABEL(strcmp_exitz_sse4_2); \
mov %r9, %r11
#define STRCMP_SSE42 __strncmp_sse42
@@ -106,9 +106,9 @@ STRCMP_SSE42:
*/
#ifdef USE_AS_STRNCMP
test %rdx, %rdx
- je LABEL(strcmp_exitz)
+ je LABEL(strcmp_exitz_sse4_2)
cmp $1, %rdx
- je LABEL(Byte0)
+ je LABEL(Byte0_sse4_2)
mov %rdx, %r11
#endif
mov %esi, %ecx
@@ -117,23 +117,21 @@ STRCMP_SSE42:
and $0x3f, %rcx /* rsi alignment in cache line */
and $0x3f, %rax /* rdi alignment in cache line */
cmp $0x30, %ecx
- ja LABEL(crosscache) /* rsi: 16-byte load will cross cache line */
+ ja LABEL(crosscache_sse4_2)/* rsi: 16-byte load will cross cache line */
cmp $0x30, %eax
- ja LABEL(crosscache) /* rdi: 16-byte load will cross cache line */
- movlpd (%rdi), %xmm1
- movlpd (%rsi), %xmm2
- movhpd 8(%rdi), %xmm1
- movhpd 8(%rsi), %xmm2
+ ja LABEL(crosscache_sse4_2)/* rdi: 16-byte load will cross cache line */
+ movdqu (%rdi), %xmm1
+ movdqu (%rsi), %xmm2
pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
pcmpeqb %xmm1, %xmm0 /* Any null chars? */
pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */
- jnz LABEL(less16bytes) /* If not, find different value or null char */
+ jnz LABEL(less16bytes_sse4_2)/* If not, find different value or null char */
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz) /* finish comparision */
+ jbe LABEL(strcmp_exitz_sse4_2)/* finish comparision */
#endif
add $16, %rsi /* prepare to search next 16 bytes */
add $16, %rdi /* prepare to search next 16 bytes */
@@ -144,7 +142,7 @@ STRCMP_SSE42:
* below to use.
*/
.p2align 4
-LABEL(crosscache):
+LABEL(crosscache_sse4_2):
and $0xfffffffffffffff0, %rsi /* force %rsi is 16 byte aligned */
and $0xfffffffffffffff0, %rdi /* force %rdi is 16 byte aligned */
mov $0xffff, %edx /* for equivalent offset */
@@ -152,15 +150,15 @@ LABEL(crosscache):
and $0xf, %ecx /* offset of rsi */
and $0xf, %eax /* offset of rdi */
cmp %eax, %ecx
- je LABEL(ashr_0) /* rsi and rdi relative offset same */
- ja LABEL(bigger)
+ je LABEL(ashr_0_sse4_2) /* rsi and rdi relative offset same */
+ ja LABEL(bigger_sse4_2)
mov %edx, %r8d /* r8d is offset flag for exit tail */
xchg %ecx, %eax
xchg %rsi, %rdi
-LABEL(bigger):
+LABEL(bigger_sse4_2):
lea 15(%rax), %r9
sub %rcx, %r9
- lea LABEL(unaligned_table)(%rip), %r10
+ lea LABEL(unaligned_table_sse4_2)(%rip), %r10
movslq (%r10, %r9,4), %r9
lea (%r10, %r9), %r10
jmp *%r10 /* jump to corresponding case */
@@ -171,7 +169,7 @@ LABEL(bigger):
* n(0~15) n(0~15) 15(15+ n-n) ashr_0
*/
.p2align 4
-LABEL(ashr_0):
+LABEL(ashr_0_sse4_2):
movdqa (%rsi), %xmm1
pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */
@@ -186,7 +184,7 @@ LABEL(ashr_0):
* edx must be the same with r9d if in left byte (16-rcx) is equal to
* the start from (16-rax) and no null char was seen.
*/
- jne LABEL(less32bytes) /* mismatch or null char */
+ jne LABEL(less32bytes_sse4_2) /* mismatch or null char */
UPDATE_STRNCMP_COUNTER
mov $16, %rcx
mov $16, %r9
@@ -205,7 +203,7 @@ LABEL(ashr_0_use_sse4_2):
jbe LABEL(ashr_0_use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
movdqa (%rdi,%rdx), %xmm0
@@ -214,17 +212,17 @@ LABEL(ashr_0_use_sse4_2):
jbe LABEL(ashr_0_use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
jmp LABEL(ashr_0_use_sse4_2)
.p2align 4
LABEL(ashr_0_use_sse4_2_exit):
- jnc LABEL(strcmp_exitz)
+ jnc LABEL(strcmp_exitz_sse4_2)
#ifdef USE_AS_STRNCMP
sub %rcx, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
lea -16(%rdx, %rcx), %rcx
movzbl (%rdi, %rcx), %eax
@@ -241,7 +239,7 @@ LABEL(ashr_0_use_sse4_2_exit):
* n(15) n -15 0(15 +(n-15) - n) ashr_1
*/
.p2align 4
-LABEL(ashr_1):
+LABEL(ashr_1_sse4_2):
pxor %xmm0, %xmm0
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
@@ -253,7 +251,7 @@ LABEL(ashr_1):
shr %cl, %edx /* adjust 0xffff for offset */
shr %cl, %r9d /* adjust for 16-byte offset */
sub %r9d, %edx
- jnz LABEL(less32bytes) /* mismatch or null char seen */
+ jnz LABEL(less32bytes_sse4_2)/* mismatch or null char seen */
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
@@ -281,7 +279,7 @@ LABEL(loop_ashr_1_use_sse4_2):
jbe LABEL(use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add $16, %rdx
@@ -294,7 +292,7 @@ LABEL(loop_ashr_1_use_sse4_2):
jbe LABEL(use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add $16, %rdx
jmp LABEL(loop_ashr_1_use_sse4_2)
@@ -320,7 +318,7 @@ LABEL(nibble_ashr_1_use_sse4_2):
* n(14~15) n -14 1(15 +(n-14) - n) ashr_2
*/
.p2align 4
-LABEL(ashr_2):
+LABEL(ashr_2_sse4_2):
pxor %xmm0, %xmm0
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
@@ -332,7 +330,7 @@ LABEL(ashr_2):
shr %cl, %edx
shr %cl, %r9d
sub %r9d, %edx
- jnz LABEL(less32bytes)
+ jnz LABEL(less32bytes_sse4_2)
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
@@ -360,7 +358,7 @@ LABEL(loop_ashr_2_use_sse4_2):
jbe LABEL(use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add $16, %rdx
@@ -373,7 +371,7 @@ LABEL(loop_ashr_2_use_sse4_2):
jbe LABEL(use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add $16, %rdx
jmp LABEL(loop_ashr_2_use_sse4_2)
@@ -399,7 +397,7 @@ LABEL(nibble_ashr_2_use_sse4_2):
* n(13~15) n -13 2(15 +(n-13) - n) ashr_3
*/
.p2align 4
-LABEL(ashr_3):
+LABEL(ashr_3_sse4_2):
pxor %xmm0, %xmm0
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
@@ -411,7 +409,7 @@ LABEL(ashr_3):
shr %cl, %edx
shr %cl, %r9d
sub %r9d, %edx
- jnz LABEL(less32bytes)
+ jnz LABEL(less32bytes_sse4_2)
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
@@ -439,7 +437,7 @@ LABEL(loop_ashr_3_use_sse4_2):
jbe LABEL(use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add $16, %rdx
@@ -452,7 +450,7 @@ LABEL(loop_ashr_3_use_sse4_2):
jbe LABEL(use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add $16, %rdx
jmp LABEL(loop_ashr_3_use_sse4_2)
@@ -478,7 +476,7 @@ LABEL(nibble_ashr_3_use_sse4_2):
* n(12~15) n -12 3(15 +(n-12) - n) ashr_4
*/
.p2align 4
-LABEL(ashr_4):
+LABEL(ashr_4_sse4_2):
pxor %xmm0, %xmm0
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
@@ -490,7 +488,7 @@ LABEL(ashr_4):
shr %cl, %edx
shr %cl, %r9d
sub %r9d, %edx
- jnz LABEL(less32bytes)
+ jnz LABEL(less32bytes_sse4_2)
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
@@ -519,7 +517,7 @@ LABEL(loop_ashr_4_use_sse4_2):
jbe LABEL(use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add $16, %rdx
@@ -532,7 +530,7 @@ LABEL(loop_ashr_4_use_sse4_2):
jbe LABEL(use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add $16, %rdx
jmp LABEL(loop_ashr_4_use_sse4_2)
@@ -558,7 +556,7 @@ LABEL(nibble_ashr_4_use_sse4_2):
* n(11~15) n - 11 4(15 +(n-11) - n) ashr_5
*/
.p2align 4
-LABEL(ashr_5):
+LABEL(ashr_5_sse4_2):
pxor %xmm0, %xmm0
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
@@ -570,7 +568,7 @@ LABEL(ashr_5):
shr %cl, %edx
shr %cl, %r9d
sub %r9d, %edx
- jnz LABEL(less32bytes)
+ jnz LABEL(less32bytes_sse4_2)
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
@@ -599,7 +597,7 @@ LABEL(loop_ashr_5_use_sse4_2):
jbe LABEL(use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add $16, %rdx
@@ -613,7 +611,7 @@ LABEL(loop_ashr_5_use_sse4_2):
jbe LABEL(use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add $16, %rdx
jmp LABEL(loop_ashr_5_use_sse4_2)
@@ -639,7 +637,7 @@ LABEL(nibble_ashr_5_use_sse4_2):
* n(10~15) n - 10 5(15 +(n-10) - n) ashr_6
*/
.p2align 4
-LABEL(ashr_6):
+LABEL(ashr_6_sse4_2):
pxor %xmm0, %xmm0
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
@@ -651,7 +649,7 @@ LABEL(ashr_6):
shr %cl, %edx
shr %cl, %r9d
sub %r9d, %edx
- jnz LABEL(less32bytes)
+ jnz LABEL(less32bytes_sse4_2)
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
@@ -680,7 +678,7 @@ LABEL(loop_ashr_6_use_sse4_2):
jbe LABEL(use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add $16, %rdx
@@ -693,7 +691,7 @@ LABEL(loop_ashr_6_use_sse4_2):
jbe LABEL(use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add $16, %rdx
jmp LABEL(loop_ashr_6_use_sse4_2)
@@ -719,7 +717,7 @@ LABEL(nibble_ashr_6_use_sse4_2):
* n(9~15) n - 9 6(15 +(n - 9) - n) ashr_7
*/
.p2align 4
-LABEL(ashr_7):
+LABEL(ashr_7_sse4_2):
pxor %xmm0, %xmm0
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
@@ -731,7 +729,7 @@ LABEL(ashr_7):
shr %cl, %edx
shr %cl, %r9d
sub %r9d, %edx
- jnz LABEL(less32bytes)
+ jnz LABEL(less32bytes_sse4_2)
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
@@ -760,7 +758,7 @@ LABEL(loop_ashr_7_use_sse4_2):
jbe LABEL(use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add $16, %rdx
@@ -773,7 +771,7 @@ LABEL(loop_ashr_7_use_sse4_2):
jbe LABEL(use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add $16, %rdx
jmp LABEL(loop_ashr_7_use_sse4_2)
@@ -799,7 +797,7 @@ LABEL(nibble_ashr_7_use_sse4_2):
* n(8~15) n - 8 7(15 +(n - 8) - n) ashr_8
*/
.p2align 4
-LABEL(ashr_8):
+LABEL(ashr_8_sse4_2):
pxor %xmm0, %xmm0
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
@@ -811,7 +809,7 @@ LABEL(ashr_8):
shr %cl, %edx
shr %cl, %r9d
sub %r9d, %edx
- jnz LABEL(less32bytes)
+ jnz LABEL(less32bytes_sse4_2)
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
@@ -840,7 +838,7 @@ LABEL(loop_ashr_8_use_sse4_2):
jbe LABEL(use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add $16, %rdx
@@ -853,7 +851,7 @@ LABEL(loop_ashr_8_use_sse4_2):
jbe LABEL(use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add $16, %rdx
jmp LABEL(loop_ashr_8_use_sse4_2)
@@ -879,7 +877,7 @@ LABEL(nibble_ashr_8_use_sse4_2):
* n(7~15) n - 7 8(15 +(n - 7) - n) ashr_9
*/
.p2align 4
-LABEL(ashr_9):
+LABEL(ashr_9_sse4_2):
pxor %xmm0, %xmm0
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
@@ -891,7 +889,7 @@ LABEL(ashr_9):
shr %cl, %edx
shr %cl, %r9d
sub %r9d, %edx
- jnz LABEL(less32bytes)
+ jnz LABEL(less32bytes_sse4_2)
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
@@ -921,7 +919,7 @@ LABEL(loop_ashr_9_use_sse4_2):
jbe LABEL(use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add $16, %rdx
@@ -934,7 +932,7 @@ LABEL(loop_ashr_9_use_sse4_2):
jbe LABEL(use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add $16, %rdx
jmp LABEL(loop_ashr_9_use_sse4_2)
@@ -960,7 +958,7 @@ LABEL(nibble_ashr_9_use_sse4_2):
* n(6~15) n - 6 9(15 +(n - 6) - n) ashr_10
*/
.p2align 4
-LABEL(ashr_10):
+LABEL(ashr_10_sse4_2):
pxor %xmm0, %xmm0
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
@@ -972,7 +970,7 @@ LABEL(ashr_10):
shr %cl, %edx
shr %cl, %r9d
sub %r9d, %edx
- jnz LABEL(less32bytes)
+ jnz LABEL(less32bytes_sse4_2)
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
@@ -1001,7 +999,7 @@ LABEL(loop_ashr_10_use_sse4_2):
jbe LABEL(use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add $16, %rdx
@@ -1014,7 +1012,7 @@ LABEL(loop_ashr_10_use_sse4_2):
jbe LABEL(use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add $16, %rdx
jmp LABEL(loop_ashr_10_use_sse4_2)
@@ -1040,7 +1038,7 @@ LABEL(nibble_ashr_10_use_sse4_2):
* n(5~15) n - 5 10(15 +(n - 5) - n) ashr_11
*/
.p2align 4
-LABEL(ashr_11):
+LABEL(ashr_11_sse4_2):
pxor %xmm0, %xmm0
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
@@ -1052,7 +1050,7 @@ LABEL(ashr_11):
shr %cl, %edx
shr %cl, %r9d
sub %r9d, %edx
- jnz LABEL(less32bytes)
+ jnz LABEL(less32bytes_sse4_2)
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
@@ -1081,7 +1079,7 @@ LABEL(loop_ashr_11_use_sse4_2):
jbe LABEL(use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add $16, %rdx
@@ -1094,7 +1092,7 @@ LABEL(loop_ashr_11_use_sse4_2):
jbe LABEL(use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add $16, %rdx
jmp LABEL(loop_ashr_11_use_sse4_2)
@@ -1120,7 +1118,7 @@ LABEL(nibble_ashr_11_use_sse4_2):
* n(4~15) n - 4 11(15 +(n - 4) - n) ashr_12
*/
.p2align 4
-LABEL(ashr_12):
+LABEL(ashr_12_sse4_2):
pxor %xmm0, %xmm0
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
@@ -1132,7 +1130,7 @@ LABEL(ashr_12):
shr %cl, %edx
shr %cl, %r9d
sub %r9d, %edx
- jnz LABEL(less32bytes)
+ jnz LABEL(less32bytes_sse4_2)
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
@@ -1161,7 +1159,7 @@ LABEL(loop_ashr_12_use_sse4_2):
jbe LABEL(use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add $16, %rdx
@@ -1174,7 +1172,7 @@ LABEL(loop_ashr_12_use_sse4_2):
jbe LABEL(use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add $16, %rdx
jmp LABEL(loop_ashr_12_use_sse4_2)
@@ -1200,7 +1198,7 @@ LABEL(nibble_ashr_12_use_sse4_2):
* n(3~15) n - 3 12(15 +(n - 3) - n) ashr_13
*/
.p2align 4
-LABEL(ashr_13):
+LABEL(ashr_13_sse4_2):
pxor %xmm0, %xmm0
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
@@ -1212,7 +1210,7 @@ LABEL(ashr_13):
shr %cl, %edx
shr %cl, %r9d
sub %r9d, %edx
- jnz LABEL(less32bytes)
+ jnz LABEL(less32bytes_sse4_2)
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
@@ -1242,7 +1240,7 @@ LABEL(loop_ashr_13_use_sse4_2):
jbe LABEL(use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add $16, %rdx
@@ -1255,7 +1253,7 @@ LABEL(loop_ashr_13_use_sse4_2):
jbe LABEL(use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add $16, %rdx
jmp LABEL(loop_ashr_13_use_sse4_2)
@@ -1281,7 +1279,7 @@ LABEL(nibble_ashr_13_use_sse4_2):
* n(2~15) n - 2 13(15 +(n - 2) - n) ashr_14
*/
.p2align 4
-LABEL(ashr_14):
+LABEL(ashr_14_sse4_2):
pxor %xmm0, %xmm0
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
@@ -1293,7 +1291,7 @@ LABEL(ashr_14):
shr %cl, %edx
shr %cl, %r9d
sub %r9d, %edx
- jnz LABEL(less32bytes)
+ jnz LABEL(less32bytes_sse4_2)
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
@@ -1323,7 +1321,7 @@ LABEL(loop_ashr_14_use_sse4_2):
jbe LABEL(use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add $16, %rdx
@@ -1336,7 +1334,7 @@ LABEL(loop_ashr_14_use_sse4_2):
jbe LABEL(use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add $16, %rdx
jmp LABEL(loop_ashr_14_use_sse4_2)
@@ -1362,7 +1360,7 @@ LABEL(nibble_ashr_14_use_sse4_2):
* n(1~15) n - 1 14(15 +(n - 1) - n) ashr_15
*/
.p2align 4
-LABEL(ashr_15):
+LABEL(ashr_15_sse4_2):
pxor %xmm0, %xmm0
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
@@ -1374,7 +1372,7 @@ LABEL(ashr_15):
shr %cl, %edx
shr %cl, %r9d
sub %r9d, %edx
- jnz LABEL(less32bytes)
+ jnz LABEL(less32bytes_sse4_2)
movdqa (%rdi), %xmm3
@@ -1406,7 +1404,7 @@ LABEL(loop_ashr_15_use_sse4_2):
jbe LABEL(use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add $16, %rdx
@@ -1419,7 +1417,7 @@ LABEL(loop_ashr_15_use_sse4_2):
jbe LABEL(use_sse4_2_exit)
#ifdef USE_AS_STRNCMP
sub $16, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add $16, %rdx
jmp LABEL(loop_ashr_15_use_sse4_2)
@@ -1441,219 +1439,78 @@ LABEL(nibble_ashr_use_sse4_2_exit):
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
.p2align 4
LABEL(use_sse4_2_exit):
- jnc LABEL(strcmp_exitz)
+ jnc LABEL(strcmp_exitz_sse4_2)
#ifdef USE_AS_STRNCMP
sub %rcx, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
add %rcx, %rdx
lea -16(%rdi, %r9), %rdi
movzbl (%rdi, %rdx), %eax
movzbl (%rsi, %rdx), %edx
test %r8d, %r8d
- jz LABEL(use_sse4_2_ret)
+ jz LABEL(use_sse4_2_ret_sse4_2)
xchg %eax, %edx
-LABEL(use_sse4_2_ret):
+LABEL(use_sse4_2_ret_sse4_2):
sub %edx, %eax
ret
- .p2align 4
-LABEL(aftertail):
- pcmpeqb %xmm3, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %edx
- not %edx
-
- .p2align 4
-LABEL(exit):
- lea -16(%r9, %rcx), %rax /* locate the exact offset for rdi */
-LABEL(less32bytes):
+LABEL(less32bytes_sse4_2):
lea (%rdi, %rax), %rdi /* locate the exact address for first operand(rdi) */
lea (%rsi, %rcx), %rsi /* locate the exact address for second operand(rsi) */
test %r8d, %r8d
- jz LABEL(ret)
+ jz LABEL(ret_sse4_2)
xchg %rsi, %rdi /* recover original order according to flag(%r8d) */
.p2align 4
-LABEL(ret):
-LABEL(less16bytes):
- /*
- * Check to see if BSF is fast on this processor. If not, use a different
- * exit tail.
- */
+LABEL(ret_sse4_2):
+LABEL(less16bytes_sse4_2):
bsf %rdx, %rdx /* find and store bit index in %rdx */
#ifdef USE_AS_STRNCMP
sub %rdx, %r11
- jbe LABEL(strcmp_exitz)
+ jbe LABEL(strcmp_exitz_sse4_2)
#endif
- xor %ecx, %ecx /* clear %ecx */
- xor %eax, %eax /* clear %eax */
-
- movb (%rsi, %rdx), %cl
- movb (%rdi, %rdx), %al
+ movzbl (%rsi, %rdx), %ecx
+ movzbl (%rdi, %rdx), %eax
sub %ecx, %eax
ret
-LABEL(strcmp_exitz):
+LABEL(strcmp_exitz_sse4_2):
xor %eax, %eax
ret
.p2align 4
-LABEL(Byte0):
- /*
- * never need to handle byte 0 for strncmpy
-#ifdef USE_AS_STRNCMP
- sub $0, %r11
- jbe LABEL(strcmp_exitz)
-#endif
- */
+LABEL(Byte0_sse4_2):
movzx (%rsi), %ecx
movzx (%rdi), %eax
sub %ecx, %eax
ret
-
- .p2align 4
-LABEL(Byte1):
-
-#ifdef USE_AS_STRNCMP
- sub $1, %r11
- jbe LABEL(strcmp_exitz)
-#endif
- movzx 1(%rsi), %ecx
- movzx 1(%rdi), %eax
-
- sub %ecx, %eax
- ret
-
- .p2align 4
-LABEL(Byte2):
-
-#ifdef USE_AS_STRNCMP
- sub $2, %r11
- jbe LABEL(strcmp_exitz)
-#endif
- movzx 2(%rsi), %ecx
- movzx 2(%rdi), %eax
-
- sub %ecx, %eax
- ret
-
- .p2align 4
-LABEL(Byte3):
-
-#ifdef USE_AS_STRNCMP
- sub $3, %r11
- jbe LABEL(strcmp_exitz)
-#endif
- movzx 3(%rsi), %ecx
- movzx 3(%rdi), %eax
-
- sub %ecx, %eax
- ret
-
- .p2align 4
-LABEL(Byte4):
-
-#ifdef USE_AS_STRNCMP
- sub $4, %r11
- jbe LABEL(strcmp_exitz)
-#endif
- movzx 4(%rsi), %ecx
- movzx 4(%rdi), %eax
-
- sub %ecx, %eax
- ret
-
- .p2align 4
-LABEL(Byte5):
-
-#ifdef USE_AS_STRNCMP
- sub $5, %r11
- jbe LABEL(strcmp_exitz)
-#endif
- movzx 5(%rsi), %ecx
- movzx 5(%rdi), %eax
-
- sub %ecx, %eax
- ret
-
- .p2align 4
-LABEL(Byte6):
-
-#ifdef USE_AS_STRNCMP
- sub $6, %r11
- jbe LABEL(strcmp_exitz)
-#endif
- movzx 6(%rsi), %ecx
- movzx 6(%rdi), %eax
-
- sub %ecx, %eax
- ret
-
- .p2align 4
-LABEL(next_8_bytes):
- add $8, %rdi
- add $8, %rsi
-#ifdef USE_AS_STRNCMP
- sub $8, %r11
- jbe LABEL(strcmp_exitz)
-#endif
- test $0x01, %dh
- jnz LABEL(Byte0)
-
- test $0x02, %dh
- jnz LABEL(Byte1)
-
- test $0x04, %dh
- jnz LABEL(Byte2)
-
- test $0x08, %dh
- jnz LABEL(Byte3)
-
- test $0x10, %dh
- jnz LABEL(Byte4)
-
- test $0x20, %dh
- jnz LABEL(Byte5)
-
- test $0x40, %dh
- jnz LABEL(Byte6)
-
-#ifdef USE_AS_STRNCMP
- sub $7, %r11
- jbe LABEL(strcmp_exitz)
-#endif
- movzx 7(%rsi), %ecx
- movzx 7(%rdi), %eax
-
- sub %ecx, %eax
- ret
cfi_endproc
.size STRCMP_SSE42, .-STRCMP_SSE42
/* Put all SSE 4.2 functions together. */
.section .rodata.sse4.2,"a",@progbits
- .p2align 4
-LABEL(unaligned_table):
- .int LABEL(ashr_1) - LABEL(unaligned_table)
- .int LABEL(ashr_2) - LABEL(unaligned_table)
- .int LABEL(ashr_3) - LABEL(unaligned_table)
- .int LABEL(ashr_4) - LABEL(unaligned_table)
- .int LABEL(ashr_5) - LABEL(unaligned_table)
- .int LABEL(ashr_6) - LABEL(unaligned_table)
- .int LABEL(ashr_7) - LABEL(unaligned_table)
- .int LABEL(ashr_8) - LABEL(unaligned_table)
- .int LABEL(ashr_9) - LABEL(unaligned_table)
- .int LABEL(ashr_10) - LABEL(unaligned_table)
- .int LABEL(ashr_11) - LABEL(unaligned_table)
- .int LABEL(ashr_12) - LABEL(unaligned_table)
- .int LABEL(ashr_13) - LABEL(unaligned_table)
- .int LABEL(ashr_14) - LABEL(unaligned_table)
- .int LABEL(ashr_15) - LABEL(unaligned_table)
- .int LABEL(ashr_0) - LABEL(unaligned_table)
+ .p2align 3
+LABEL(unaligned_table_sse4_2):
+ .int LABEL(ashr_1_sse4_2) - LABEL(unaligned_table_sse4_2)
+ .int LABEL(ashr_2_sse4_2) - LABEL(unaligned_table_sse4_2)
+ .int LABEL(ashr_3_sse4_2) - LABEL(unaligned_table_sse4_2)
+ .int LABEL(ashr_4_sse4_2) - LABEL(unaligned_table_sse4_2)
+ .int LABEL(ashr_5_sse4_2) - LABEL(unaligned_table_sse4_2)
+ .int LABEL(ashr_6_sse4_2) - LABEL(unaligned_table_sse4_2)
+ .int LABEL(ashr_7_sse4_2) - LABEL(unaligned_table_sse4_2)
+ .int LABEL(ashr_8_sse4_2) - LABEL(unaligned_table_sse4_2)
+ .int LABEL(ashr_9_sse4_2) - LABEL(unaligned_table_sse4_2)
+ .int LABEL(ashr_10_sse4_2) - LABEL(unaligned_table_sse4_2)
+ .int LABEL(ashr_11_sse4_2) - LABEL(unaligned_table_sse4_2)
+ .int LABEL(ashr_12_sse4_2) - LABEL(unaligned_table_sse4_2)
+ .int LABEL(ashr_13_sse4_2) - LABEL(unaligned_table_sse4_2)
+ .int LABEL(ashr_14_sse4_2) - LABEL(unaligned_table_sse4_2)
+ .int LABEL(ashr_15_sse4_2) - LABEL(unaligned_table_sse4_2)
+ .int LABEL(ashr_0_sse4_2) - LABEL(unaligned_table_sse4_2)
# undef ENTRY
@@ -1673,6 +1530,4 @@ LABEL(unaligned_table):
.globl __GI_STRCMP; __GI_STRCMP = STRCMP_SSE2
#endif
-#ifndef USE_AS_STRNCMP
#include "../strcmp.S"
-#endif
diff --git a/sysdeps/x86_64/multiarch/strcpy.S b/sysdeps/x86_64/multiarch/strcpy.S
index 25cd01307d..7e400a9140 100644
--- a/sysdeps/x86_64/multiarch/strcpy.S
+++ b/sysdeps/x86_64/multiarch/strcpy.S
@@ -64,16 +64,9 @@ ENTRY(STRCPY)
call __init_cpu_features
1: leaq STRCPY_SSE2(%rip), %rax
testl $(1<<9), __cpu_features+CPUID_OFFSET+COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET(%rip)
- jz 3f
-/* Avoid SSSE3 strcpy on Atom since it is slow. */
- cmpl $1, __cpu_features+KIND_OFFSET(%rip)
- jne 2f
- cmpl $6, __cpu_features+FAMILY_OFFSET(%rip)
- jne 2f
- cmpl $28, __cpu_features+MODEL_OFFSET(%rip)
- jz 3f
-2: leaq STRCPY_SSSE3(%rip), %rax
-3: ret
+ jz 2f
+ leaq STRCPY_SSSE3(%rip), %rax
+2: ret
END(STRCPY)
.section .text.ssse3,"ax",@progbits
diff --git a/sysdeps/x86_64/multiarch/strncmp-c.c b/sysdeps/x86_64/multiarch/strncmp-c.c
deleted file mode 100644
index d4f74a418d..0000000000
--- a/sysdeps/x86_64/multiarch/strncmp-c.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifdef SHARED
-#define STRNCMP __strncmp_sse2
-#undef libc_hidden_builtin_def
-#define libc_hidden_builtin_def(name) \
- __hidden_ver1 (__strncmp_sse2, __GI_strncmp, __strncmp_sse2);
-#endif
-
-#include "strncmp.c"
diff --git a/sysdeps/x86_64/rtld-memchr.c b/sysdeps/x86_64/rtld-memchr.c
new file mode 100644
index 0000000000..f63fefbcec
--- /dev/null
+++ b/sysdeps/x86_64/rtld-memchr.c
@@ -0,0 +1 @@
+#include <string/memchr.c>
diff --git a/sysdeps/x86_64/rtld-memcmp.c b/sysdeps/x86_64/rtld-memcmp.c
new file mode 100644
index 0000000000..2ee40328b8
--- /dev/null
+++ b/sysdeps/x86_64/rtld-memcmp.c
@@ -0,0 +1 @@
+#include <string/memcmp.c>
diff --git a/sysdeps/x86_64/rtld-rawmemchr.c b/sysdeps/x86_64/rtld-rawmemchr.c
new file mode 100644
index 0000000000..2b9189393c
--- /dev/null
+++ b/sysdeps/x86_64/rtld-rawmemchr.c
@@ -0,0 +1 @@
+#include <string/rawmemchr.c>
diff --git a/sysdeps/x86_64/rtld-strchr.S b/sysdeps/x86_64/rtld-strchr.S
new file mode 100644
index 0000000000..8934697972
--- /dev/null
+++ b/sysdeps/x86_64/rtld-strchr.S
@@ -0,0 +1,291 @@
+/* strchr (str, ch) -- Return pointer to first occurrence of CH in STR.
+ For AMD x86-64.
+ Copyright (C) 2002, 2005 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+#include "bp-sym.h"
+#include "bp-asm.h"
+
+
+ .text
+ENTRY (BP_SYM (strchr))
+
+ /* Before we start with the main loop we process single bytes
+ until the source pointer is aligned. This has two reasons:
+ 1. aligned 64-bit memory access is faster
+ and (more important)
+ 2. we process in the main loop 64 bit in one step although
+ we don't know the end of the string. But accessing at
+ 8-byte alignment guarantees that we never access illegal
+ memory if this would not also be done by the trivial
+ implementation (this is because all processor inherent
+ boundaries are multiples of 8). */
+
+ movq %rdi, %rdx
+ andl $7, %edx /* Mask alignment bits */
+ movq %rdi, %rax /* duplicate destination. */
+ jz 1f /* aligned => start loop */
+ neg %edx
+ addl $8, %edx /* Align to 8 bytes. */
+
+ /* Search the first bytes directly. */
+0: movb (%rax), %cl /* load byte */
+ cmpb %cl,%sil /* compare byte. */
+ je 6f /* target found */
+ testb %cl,%cl /* is byte NUL? */
+ je 7f /* yes => return NULL */
+ incq %rax /* increment pointer */
+ decl %edx
+ jnz 0b
+
+
+1:
+ /* At the moment %rsi contains C. What we need for the
+ algorithm is C in all bytes of the register. Avoid
+ operations on 16 bit words because these require an
+ prefix byte (and one more cycle). */
+ /* Populate 8 bit data to full 64-bit. */
+ movabs $0x0101010101010101,%r9
+ movzbl %sil,%edx
+ imul %rdx,%r9
+
+ movq $0xfefefefefefefeff, %r8 /* Save magic. */
+
+ /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
+ change any of the hole bits of LONGWORD.
+
+ 1) Is this safe? Will it catch all the zero bytes?
+ Suppose there is a byte with all zeros. Any carry bits
+ propagating from its left will fall into the hole at its
+ least significant bit and stop. Since there will be no
+ carry from its most significant bit, the LSB of the
+ byte to the left will be unchanged, and the zero will be
+ detected.
+
+ 2) Is this worthwhile? Will it ignore everything except
+ zero bytes? Suppose every byte of QUARDWORD has a bit set
+ somewhere. There will be a carry into bit 8. If bit 8
+ is set, this will carry into bit 16. If bit 8 is clear,
+ one of bits 9-15 must be set, so there will be a carry
+ into bit 16. Similarly, there will be a carry into bit
+ 24 tec.. If one of bits 54-63 is set, there will be a carry
+ into bit 64 (=carry flag), so all of the hole bits will
+ be changed.
+
+ 3) But wait! Aren't we looking for C, not zero?
+ Good point. So what we do is XOR LONGWORD with a longword,
+ each of whose bytes is C. This turns each byte that is C
+ into a zero. */
+
+ .p2align 4
+4:
+ /* Main Loop is unrolled 4 times. */
+ /* First unroll. */
+ movq (%rax), %rcx /* get double word (= 8 bytes) in question */
+ addq $8,%rax /* adjust pointer for next word */
+ movq %r8, %rdx /* magic value */
+ xorq %r9, %rcx /* XOR with qword c|...|c => bytes of str == c
+ are now 0 */
+ addq %rcx, %rdx /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc 3f /* highest byte is NUL => return pointer */
+ xorq %rcx, %rdx /* (word+magic)^word */
+ orq %r8, %rdx /* set all non-carry bits */
+ incq %rdx /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz 3f /* found c => return pointer */
+
+ /* The quadword we looked at does not contain the value we're looking
+ for. Let's search now whether we have reached the end of the
+ string. */
+ xorq %r9, %rcx /* restore original dword without reload */
+ movq %r8, %rdx /* magic value */
+ addq %rcx, %rdx /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc 7f /* highest byte is NUL => return NULL */
+ xorq %rcx, %rdx /* (word+magic)^word */
+ orq %r8, %rdx /* set all non-carry bits */
+ incq %rdx /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz 7f /* found NUL => return NULL */
+
+ /* Second unroll. */
+ movq (%rax), %rcx /* get double word (= 8 bytes) in question */
+ addq $8,%rax /* adjust pointer for next word */
+ movq %r8, %rdx /* magic value */
+ xorq %r9, %rcx /* XOR with qword c|...|c => bytes of str == c
+ are now 0 */
+ addq %rcx, %rdx /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc 3f /* highest byte is NUL => return pointer */
+ xorq %rcx, %rdx /* (word+magic)^word */
+ orq %r8, %rdx /* set all non-carry bits */
+ incq %rdx /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz 3f /* found c => return pointer */
+
+ /* The quadword we looked at does not contain the value we're looking
+ for. Let's search now whether we have reached the end of the
+ string. */
+ xorq %r9, %rcx /* restore original dword without reload */
+ movq %r8, %rdx /* magic value */
+ addq %rcx, %rdx /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc 7f /* highest byte is NUL => return NULL */
+ xorq %rcx, %rdx /* (word+magic)^word */
+ orq %r8, %rdx /* set all non-carry bits */
+ incq %rdx /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz 7f /* found NUL => return NULL */
+ /* Third unroll. */
+ movq (%rax), %rcx /* get double word (= 8 bytes) in question */
+ addq $8,%rax /* adjust pointer for next word */
+ movq %r8, %rdx /* magic value */
+ xorq %r9, %rcx /* XOR with qword c|...|c => bytes of str == c
+ are now 0 */
+ addq %rcx, %rdx /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc 3f /* highest byte is NUL => return pointer */
+ xorq %rcx, %rdx /* (word+magic)^word */
+ orq %r8, %rdx /* set all non-carry bits */
+ incq %rdx /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz 3f /* found c => return pointer */
+
+ /* The quadword we looked at does not contain the value we're looking
+ for. Let's search now whether we have reached the end of the
+ string. */
+ xorq %r9, %rcx /* restore original dword without reload */
+ movq %r8, %rdx /* magic value */
+ addq %rcx, %rdx /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc 7f /* highest byte is NUL => return NULL */
+ xorq %rcx, %rdx /* (word+magic)^word */
+ orq %r8, %rdx /* set all non-carry bits */
+ incq %rdx /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz 7f /* found NUL => return NULL */
+ /* Fourth unroll. */
+ movq (%rax), %rcx /* get double word (= 8 bytes) in question */
+ addq $8,%rax /* adjust pointer for next word */
+ movq %r8, %rdx /* magic value */
+ xorq %r9, %rcx /* XOR with qword c|...|c => bytes of str == c
+ are now 0 */
+ addq %rcx, %rdx /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc 3f /* highest byte is NUL => return pointer */
+ xorq %rcx, %rdx /* (word+magic)^word */
+ orq %r8, %rdx /* set all non-carry bits */
+ incq %rdx /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz 3f /* found c => return pointer */
+
+ /* The quadword we looked at does not contain the value we're looking
+ for. Let's search now whether we have reached the end of the
+ string. */
+ xorq %r9, %rcx /* restore original dword without reload */
+ movq %r8, %rdx /* magic value */
+ addq %rcx, %rdx /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc 7f /* highest byte is NUL => return NULL */
+ xorq %rcx, %rdx /* (word+magic)^word */
+ orq %r8, %rdx /* set all non-carry bits */
+ incq %rdx /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jz 4b /* no NUL found => restart loop */
+
+
+7: /* Return NULL. */
+ xorl %eax, %eax
+ retq
+
+
+ /* We now scan for the byte in which the character was matched.
+ But we have to take care of the case that a NUL char is
+ found before this in the dword. Note that we XORed %rcx
+ with the byte we're looking for, therefore the tests below look
+ reversed. */
+
+
+ .p2align 4 /* Align, it's a jump target. */
+3: movq %r9,%rdx /* move to %rdx so that we can access bytes */
+ subq $8,%rax /* correct pointer increment. */
+ testb %cl, %cl /* is first byte C? */
+ jz 6f /* yes => return pointer */
+ cmpb %dl, %cl /* is first byte NUL? */
+ je 7b /* yes => return NULL */
+ incq %rax /* increment pointer */
+
+ testb %ch, %ch /* is second byte C? */
+ jz 6f /* yes => return pointer */
+ cmpb %dl, %ch /* is second byte NUL? */
+ je 7b /* yes => return NULL? */
+ incq %rax /* increment pointer */
+
+ shrq $16, %rcx /* make upper bytes accessible */
+ testb %cl, %cl /* is third byte C? */
+ jz 6f /* yes => return pointer */
+ cmpb %dl, %cl /* is third byte NUL? */
+ je 7b /* yes => return NULL */
+ incq %rax /* increment pointer */
+
+ testb %ch, %ch /* is fourth byte C? */
+ jz 6f /* yes => return pointer */
+ cmpb %dl, %ch /* is fourth byte NUL? */
+ je 7b /* yes => return NULL? */
+ incq %rax /* increment pointer */
+
+ shrq $16, %rcx /* make upper bytes accessible */
+ testb %cl, %cl /* is fifth byte C? */
+ jz 6f /* yes => return pointer */
+ cmpb %dl, %cl /* is fifth byte NUL? */
+ je 7b /* yes => return NULL */
+ incq %rax /* increment pointer */
+
+ testb %ch, %ch /* is sixth byte C? */
+ jz 6f /* yes => return pointer */
+ cmpb %dl, %ch /* is sixth byte NUL? */
+ je 7b /* yes => return NULL? */
+ incq %rax /* increment pointer */
+
+ shrq $16, %rcx /* make upper bytes accessible */
+ testb %cl, %cl /* is seventh byte C? */
+ jz 6f /* yes => return pointer */
+ cmpb %dl, %cl /* is seventh byte NUL? */
+ je 7b /* yes => return NULL */
+
+ /* It must be in the eigth byte and it cannot be NUL. */
+ incq %rax
+
+6:
+ nop
+ retq
+END (BP_SYM (strchr))
+
+weak_alias (BP_SYM (strchr), BP_SYM (index))
+libc_hidden_builtin_def (strchr)
diff --git a/sysdeps/x86_64/rtld-strcmp.S b/sysdeps/x86_64/rtld-strcmp.S
new file mode 100644
index 0000000000..a25535c161
--- /dev/null
+++ b/sysdeps/x86_64/rtld-strcmp.S
@@ -0,0 +1,28 @@
+#include <sysdep.h>
+#include "asm-syntax.h"
+#include "bp-sym.h"
+#include "bp-asm.h"
+
+#ifndef LABEL
+#define LABEL(l) L(l)
+#endif
+
+ .text
+ENTRY (BP_SYM (STRCMP))
+/* Simple version since we can't use SSE registers in ld.so. */
+L(oop): movb (%rdi), %al
+ cmpb (%rsi), %al
+ jne L(neq)
+ incq %rdi
+ incq %rsi
+ testb %al, %al
+ jnz L(oop)
+
+ xorl %eax, %eax
+ ret
+
+L(neq): movl $1, %eax
+ movl $-1, %ecx
+ cmovbl %ecx, %eax
+ ret
+END (BP_SYM (STRCMP))
diff --git a/sysdeps/x86_64/rtld-strlen.S b/sysdeps/x86_64/rtld-strlen.S
new file mode 100644
index 0000000000..fd950edaaa
--- /dev/null
+++ b/sysdeps/x86_64/rtld-strlen.S
@@ -0,0 +1,139 @@
+/* strlen(str) -- determine the length of the string STR.
+ Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+ Based on i486 version contributed by Ulrich Drepper <drepper@redhat.com>.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+#include "bp-sym.h"
+#include "bp-asm.h"
+
+
+ .text
+ENTRY (strlen)
+ movq %rdi, %rcx /* Duplicate source pointer. */
+ andl $7, %ecx /* mask alignment bits */
+ movq %rdi, %rax /* duplicate destination. */
+ jz 1f /* aligned => start loop */
+
+ neg %ecx /* We need to align to 8 bytes. */
+ addl $8,%ecx
+ /* Search the first bytes directly. */
+0: cmpb $0x0,(%rax) /* is byte NUL? */
+ je 2f /* yes => return */
+ incq %rax /* increment pointer */
+ decl %ecx
+ jnz 0b
+
+1: movq $0xfefefefefefefeff,%r8 /* Save magic. */
+
+ .p2align 4 /* Align loop. */
+4: /* Main Loop is unrolled 4 times. */
+ /* First unroll. */
+ movq (%rax), %rcx /* get double word (= 8 bytes) in question */
+ addq $8,%rax /* adjust pointer for next word */
+ movq %r8, %rdx /* magic value */
+ addq %rcx, %rdx /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc 3f /* highest byte is NUL => return pointer */
+ xorq %rcx, %rdx /* (word+magic)^word */
+ orq %r8, %rdx /* set all non-carry bits */
+ incq %rdx /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz 3f /* found NUL => return pointer */
+
+ /* Second unroll. */
+ movq (%rax), %rcx /* get double word (= 8 bytes) in question */
+ addq $8,%rax /* adjust pointer for next word */
+ movq %r8, %rdx /* magic value */
+ addq %rcx, %rdx /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc 3f /* highest byte is NUL => return pointer */
+ xorq %rcx, %rdx /* (word+magic)^word */
+ orq %r8, %rdx /* set all non-carry bits */
+ incq %rdx /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz 3f /* found NUL => return pointer */
+
+ /* Third unroll. */
+ movq (%rax), %rcx /* get double word (= 8 bytes) in question */
+ addq $8,%rax /* adjust pointer for next word */
+ movq %r8, %rdx /* magic value */
+ addq %rcx, %rdx /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc 3f /* highest byte is NUL => return pointer */
+ xorq %rcx, %rdx /* (word+magic)^word */
+ orq %r8, %rdx /* set all non-carry bits */
+ incq %rdx /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jnz 3f /* found NUL => return pointer */
+
+ /* Fourth unroll. */
+ movq (%rax), %rcx /* get double word (= 8 bytes) in question */
+ addq $8,%rax /* adjust pointer for next word */
+ movq %r8, %rdx /* magic value */
+ addq %rcx, %rdx /* add the magic value to the word. We get
+ carry bits reported for each byte which
+ is *not* 0 */
+ jnc 3f /* highest byte is NUL => return pointer */
+ xorq %rcx, %rdx /* (word+magic)^word */
+ orq %r8, %rdx /* set all non-carry bits */
+ incq %rdx /* add 1: if one carry bit was *not* set
+ the addition will not result in 0. */
+ jz 4b /* no NUL found => continue loop */
+
+ .p2align 4 /* Align, it's a jump target. */
+3: subq $8,%rax /* correct pointer increment. */
+
+ testb %cl, %cl /* is first byte NUL? */
+ jz 2f /* yes => return */
+ incq %rax /* increment pointer */
+
+ testb %ch, %ch /* is second byte NUL? */
+ jz 2f /* yes => return */
+ incq %rax /* increment pointer */
+
+ testl $0x00ff0000, %ecx /* is third byte NUL? */
+ jz 2f /* yes => return pointer */
+ incq %rax /* increment pointer */
+
+ testl $0xff000000, %ecx /* is fourth byte NUL? */
+ jz 2f /* yes => return pointer */
+ incq %rax /* increment pointer */
+
+ shrq $32, %rcx /* look at other half. */
+
+ testb %cl, %cl /* is first byte NUL? */
+ jz 2f /* yes => return */
+ incq %rax /* increment pointer */
+
+ testb %ch, %ch /* is second byte NUL? */
+ jz 2f /* yes => return */
+ incq %rax /* increment pointer */
+
+ testl $0xff0000, %ecx /* is third byte NUL? */
+ jz 2f /* yes => return pointer */
+ incq %rax /* increment pointer */
+2:
+ subq %rdi, %rax /* compute difference to string start */
+ ret
+END (strlen)
+libc_hidden_builtin_def (strlen)
diff --git a/sysdeps/x86_64/strcmp.S b/sysdeps/x86_64/strcmp.S
index 119b88e40b..340a64ba35 100644
--- a/sysdeps/x86_64/strcmp.S
+++ b/sysdeps/x86_64/strcmp.S
@@ -1,8 +1,10 @@
/* Highly optimized version for x86-64.
- Copyright (C) 1999, 2000, 2002, 2003, 2005 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2000, 2002, 2003, 2005, 2009
+ Free Software Foundation, Inc.
This file is part of the GNU C Library.
Based on i686 version contributed by Ulrich Drepper
<drepper@cygnus.com>, 1999.
+ Updated with SSE2 support contributed by Intel Corporation.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -24,8 +26,35 @@
#include "bp-sym.h"
#include "bp-asm.h"
- .text
-ENTRY (BP_SYM (strcmp))
+#undef UPDATE_STRNCMP_COUNTER
+
+#ifndef LABEL
+#define LABEL(l) L(l)
+#endif
+
+#ifdef USE_AS_STRNCMP
+/* Since the counter, %r11, is unsigned, we branch to strcmp_exitz
+ if the new counter > the old one or is 0. */
+# define UPDATE_STRNCMP_COUNTER \
+ /* calculate left number to compare */ \
+ lea -16(%rcx, %r11), %r9; \
+ cmp %r9, %r11; \
+ jb LABEL(strcmp_exitz); \
+ test %r9, %r9; \
+ je LABEL(strcmp_exitz); \
+ mov %r9, %r11
+
+#else
+# define UPDATE_STRNCMP_COUNTER
+# ifndef STRCMP
+# define STRCMP strcmp
+# endif
+#endif
+
+ .text
+ENTRY (BP_SYM (STRCMP))
+#ifdef NOT_IN_libc
+/* Simple version since we can't use SSE registers in ld.so. */
L(oop): movb (%rdi), %al
cmpb (%rsi), %al
jne L(neq)
@@ -41,5 +70,1914 @@ L(neq): movl $1, %eax
movl $-1, %ecx
cmovbl %ecx, %eax
ret
-END (BP_SYM (strcmp))
-libc_hidden_builtin_def (strcmp)
+END (BP_SYM (STRCMP))
+#else /* NOT_IN_libc */
+/*
+ * This implementation uses SSE to compare up to 16 bytes at a time.
+ */
+#ifdef USE_AS_STRNCMP
+ test %rdx, %rdx
+ je LABEL(strcmp_exitz)
+ cmp $1, %rdx
+ je LABEL(Byte0)
+ mov %rdx, %r11
+#endif
+ mov %esi, %ecx
+ mov %edi, %eax
+/* Use 64bit AND here to avoid long NOP padding. */
+ and $0x3f, %rcx /* rsi alignment in cache line */
+ and $0x3f, %rax /* rdi alignment in cache line */
+ cmp $0x30, %ecx
+ ja LABEL(crosscache) /* rsi: 16-byte load will cross cache line */
+ cmp $0x30, %eax
+ ja LABEL(crosscache) /* rdi: 16-byte load will cross cache line */
+ movlpd (%rdi), %xmm1
+ movlpd (%rsi), %xmm2
+ movhpd 8(%rdi), %xmm1
+ movhpd 8(%rsi), %xmm2
+ pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
+ pcmpeqb %xmm1, %xmm0 /* Any null chars? */
+ pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */
+ jnz LABEL(less16bytes) /* If not, find different value or null char */
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz) /* finish comparision */
+#endif
+ add $16, %rsi /* prepare to search next 16 bytes */
+ add $16, %rdi /* prepare to search next 16 bytes */
+
+ /*
+ * Determine source and destination string offsets from 16-byte alignment.
+ * Use relative offset difference between the two to determine which case
+ * below to use.
+ */
+ .p2align 4
+LABEL(crosscache):
+ and $0xfffffffffffffff0, %rsi /* force %rsi is 16 byte aligned */
+ and $0xfffffffffffffff0, %rdi /* force %rdi is 16 byte aligned */
+ mov $0xffff, %edx /* for equivalent offset */
+ xor %r8d, %r8d
+ and $0xf, %ecx /* offset of rsi */
+ and $0xf, %eax /* offset of rdi */
+ cmp %eax, %ecx
+ je LABEL(ashr_0) /* rsi and rdi relative offset same */
+ ja LABEL(bigger)
+ mov %edx, %r8d /* r8d is offset flag for exit tail */
+ xchg %ecx, %eax
+ xchg %rsi, %rdi
+LABEL(bigger):
+ lea 15(%rax), %r9
+ sub %rcx, %r9
+ lea LABEL(unaligned_table)(%rip), %r10
+ movslq (%r10, %r9,4), %r9
+ lea (%r10, %r9), %r10
+ jmp *%r10 /* jump to corresponding case */
+
+/*
+ * The following cases will be handled by ashr_0
+ * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
+ * n(0~15) n(0~15) 15(15+ n-n) ashr_0
+ */
+ .p2align 4
+LABEL(ashr_0):
+
+ movdqa (%rsi), %xmm1
+ pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */
+ pcmpeqb %xmm1, %xmm0 /* Any null chars? */
+ pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ pmovmskb %xmm1, %r9d
+ shr %cl, %edx /* adjust 0xffff for offset */
+ shr %cl, %r9d /* adjust for 16-byte offset */
+ sub %r9d, %edx
+ /*
+ * edx must be the same with r9d if in left byte (16-rcx) is equal to
+ * the start from (16-rax) and no null char was seen.
+ */
+ jne LABEL(less32bytes) /* mismatch or null char */
+ UPDATE_STRNCMP_COUNTER
+ mov $16, %rcx
+ mov $16, %r9
+ pxor %xmm0, %xmm0 /* clear xmm0, may have changed above */
+
+ /*
+ * Now both strings are aligned at 16-byte boundary. Loop over strings
+ * checking 32-bytes per iteration.
+ */
+ .p2align 4
+LABEL(loop_ashr_0):
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit) /* mismatch or null char seen */
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+ add $16, %rcx
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+ add $16, %rcx
+ jmp LABEL(loop_ashr_0)
+
+/*
+ * The following cases will be handled by ashr_1
+ * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
+ * n(15) n -15 0(15 +(n-15) - n) ashr_1
+ */
+ .p2align 4
+LABEL(ashr_1):
+ pxor %xmm0, %xmm0
+ movdqa (%rdi), %xmm2
+ movdqa (%rsi), %xmm1
+ pcmpeqb %xmm1, %xmm0 /* Any null chars? */
+ pslldq $15, %xmm2 /* shift first string to align with second */
+ pcmpeqb %xmm1, %xmm2 /* compare 16 bytes for equality */
+ psubb %xmm0, %xmm2 /* packed sub of comparison results*/
+ pmovmskb %xmm2, %r9d
+ shr %cl, %edx /* adjust 0xffff for offset */
+ shr %cl, %r9d /* adjust for 16-byte offset */
+ sub %r9d, %edx
+ jnz LABEL(less32bytes) /* mismatch or null char seen */
+ movdqa (%rdi), %xmm3
+ UPDATE_STRNCMP_COUNTER
+
+ pxor %xmm0, %xmm0
+ mov $16, %rcx /* index for loads*/
+ mov $1, %r9d /* byte position left over from less32bytes case */
+ /*
+ * Setup %r10 value allows us to detect crossing a page boundary.
+ * When %r10 goes positive we have crossed a page boundary and
+ * need to do a nibble.
+ */
+ lea 1(%rdi), %r10
+ and $0xfff, %r10 /* offset into 4K page */
+ sub $0x1000, %r10 /* subtract 4K pagesize */
+
+ .p2align 4
+LABEL(loop_ashr_1):
+ add $16, %r10
+ jg LABEL(nibble_ashr_1) /* cross page boundary */
+
+LABEL(gobble_ashr_1):
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+ movdqa %xmm2, %xmm4 /* store for next cycle */
+
+ psrldq $1, %xmm3
+ pslldq $15, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+ add $16, %rcx
+ movdqa %xmm4, %xmm3
+
+ add $16, %r10
+ jg LABEL(nibble_ashr_1) /* cross page boundary */
+
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+ movdqa %xmm2, %xmm4 /* store for next cycle */
+
+ psrldq $1, %xmm3
+ pslldq $15, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+ add $16, %rcx
+ movdqa %xmm4, %xmm3
+ jmp LABEL(loop_ashr_1)
+
+ /*
+ * Nibble avoids loads across page boundary. This is to avoid a potential
+ * access into unmapped memory.
+ */
+ .p2align 4
+LABEL(nibble_ashr_1):
+ pcmpeqb %xmm3, %xmm0 /* check nibble for null char*/
+ pmovmskb %xmm0, %edx
+ test $0xfffe, %edx
+ jnz LABEL(ashr_1_exittail) /* find null char*/
+
+#ifdef USE_AS_STRNCMP
+ cmp $14, %r11
+ jbe LABEL(ashr_1_exittail)
+#endif
+
+ pxor %xmm0, %xmm0
+ sub $0x1000, %r10 /* substract 4K from %r10 */
+ jmp LABEL(gobble_ashr_1)
+
+ /*
+ * Once find null char, determine if there is a string mismatch
+ * before the null char.
+ */
+ .p2align 4
+LABEL(ashr_1_exittail):
+ movdqa (%rsi, %rcx), %xmm1
+ psrldq $1, %xmm0
+ psrldq $1, %xmm3
+ jmp LABEL(aftertail)
+
+/*
+ * The following cases will be handled by ashr_2
+ * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
+ * n(14~15) n -14 1(15 +(n-14) - n) ashr_2
+ */
+ .p2align 4
+LABEL(ashr_2):
+ pxor %xmm0, %xmm0
+ movdqa (%rdi), %xmm2
+ movdqa (%rsi), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $14, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %r9d
+ shr %cl, %edx
+ shr %cl, %r9d
+ sub %r9d, %edx
+ jnz LABEL(less32bytes)
+ movdqa (%rdi), %xmm3
+ UPDATE_STRNCMP_COUNTER
+
+ pxor %xmm0, %xmm0
+ mov $16, %rcx /* index for loads */
+ mov $2, %r9d /* byte position left over from less32bytes case */
+ /*
+ * Setup %r10 value allows us to detect crossing a page boundary.
+ * When %r10 goes positive we have crossed a page boundary and
+ * need to do a nibble.
+ */
+ lea 2(%rdi), %r10
+ and $0xfff, %r10 /* offset into 4K page */
+ sub $0x1000, %r10 /* subtract 4K pagesize */
+
+ .p2align 4
+LABEL(loop_ashr_2):
+ add $16, %r10
+ jg LABEL(nibble_ashr_2)
+
+LABEL(gobble_ashr_2):
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ psrldq $2, %xmm3
+ pslldq $14, %xmm2
+ por %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+
+ add $16, %rcx
+ movdqa %xmm4, %xmm3
+
+ add $16, %r10
+ jg LABEL(nibble_ashr_2) /* cross page boundary */
+
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ psrldq $2, %xmm3
+ pslldq $14, %xmm2
+ por %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+
+ add $16, %rcx
+ movdqa %xmm4, %xmm3
+ jmp LABEL(loop_ashr_2)
+
+ .p2align 4
+LABEL(nibble_ashr_2):
+ pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
+ pmovmskb %xmm0, %edx
+ test $0xfffc, %edx
+ jnz LABEL(ashr_2_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $13, %r11
+ jbe LABEL(ashr_2_exittail)
+#endif
+
+ pxor %xmm0, %xmm0
+ sub $0x1000, %r10
+ jmp LABEL(gobble_ashr_2)
+
+ .p2align 4
+LABEL(ashr_2_exittail):
+ movdqa (%rsi, %rcx), %xmm1
+ psrldq $2, %xmm0
+ psrldq $2, %xmm3
+ jmp LABEL(aftertail)
+
+/*
+ * The following cases will be handled by ashr_3
+ * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
+ * n(13~15) n -13 2(15 +(n-13) - n) ashr_3
+ */
+ .p2align 4
+LABEL(ashr_3):
+ pxor %xmm0, %xmm0
+ movdqa (%rdi), %xmm2
+ movdqa (%rsi), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $13, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %r9d
+ shr %cl, %edx
+ shr %cl, %r9d
+ sub %r9d, %edx
+ jnz LABEL(less32bytes)
+ movdqa (%rdi), %xmm3
+
+ UPDATE_STRNCMP_COUNTER
+
+ pxor %xmm0, %xmm0
+ mov $16, %rcx /* index for loads */
+ mov $3, %r9d /* byte position left over from less32bytes case */
+ /*
+ * Setup %r10 value allows us to detect crossing a page boundary.
+ * When %r10 goes positive we have crossed a page boundary and
+ * need to do a nibble.
+ */
+ lea 3(%rdi), %r10
+ and $0xfff, %r10 /* offset into 4K page */
+ sub $0x1000, %r10 /* subtract 4K pagesize */
+
+ .p2align 4
+LABEL(loop_ashr_3):
+ add $16, %r10
+ jg LABEL(nibble_ashr_3)
+
+LABEL(gobble_ashr_3):
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ psrldq $3, %xmm3
+ pslldq $13, %xmm2
+ por %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+
+ add $16, %rcx
+ movdqa %xmm4, %xmm3
+
+ add $16, %r10
+ jg LABEL(nibble_ashr_3) /* cross page boundary */
+
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ psrldq $3, %xmm3
+ pslldq $13, %xmm2
+ por %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+
+ add $16, %rcx
+ movdqa %xmm4, %xmm3
+ jmp LABEL(loop_ashr_3)
+
+ .p2align 4
+LABEL(nibble_ashr_3):
+ pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
+ pmovmskb %xmm0, %edx
+ test $0xfff8, %edx
+ jnz LABEL(ashr_3_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $12, %r11
+ jbe LABEL(ashr_3_exittail)
+#endif
+
+ pxor %xmm0, %xmm0
+ sub $0x1000, %r10
+ jmp LABEL(gobble_ashr_3)
+
+ .p2align 4
+LABEL(ashr_3_exittail):
+ movdqa (%rsi, %rcx), %xmm1
+ psrldq $3, %xmm0
+ psrldq $3, %xmm3
+ jmp LABEL(aftertail)
+
+/*
+ * The following cases will be handled by ashr_4
+ * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
+ * n(12~15) n -12 3(15 +(n-12) - n) ashr_4
+ */
+ .p2align 4
+LABEL(ashr_4):
+ pxor %xmm0, %xmm0
+ movdqa (%rdi), %xmm2
+ movdqa (%rsi), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $12, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %r9d
+ shr %cl, %edx
+ shr %cl, %r9d
+ sub %r9d, %edx
+ jnz LABEL(less32bytes)
+ movdqa (%rdi), %xmm3
+
+ UPDATE_STRNCMP_COUNTER
+
+ pxor %xmm0, %xmm0
+ mov $16, %rcx /* index for loads */
+ mov $4, %r9d /* byte position left over from less32bytes case */
+ /*
+ * Setup %r10 value allows us to detect crossing a page boundary.
+ * When %r10 goes positive we have crossed a page boundary and
+ * need to do a nibble.
+ */
+ lea 4(%rdi), %r10
+ and $0xfff, %r10 /* offset into 4K page */
+ sub $0x1000, %r10 /* subtract 4K pagesize */
+
+ .p2align 4
+LABEL(loop_ashr_4):
+ add $16, %r10
+ jg LABEL(nibble_ashr_4)
+
+LABEL(gobble_ashr_4):
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ psrldq $4, %xmm3
+ pslldq $12, %xmm2
+ por %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+
+ add $16, %rcx
+ movdqa %xmm4, %xmm3
+
+ add $16, %r10
+ jg LABEL(nibble_ashr_4) /* cross page boundary */
+
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ psrldq $4, %xmm3
+ pslldq $12, %xmm2
+ por %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+
+ add $16, %rcx
+ movdqa %xmm4, %xmm3
+ jmp LABEL(loop_ashr_4)
+
+ .p2align 4
+LABEL(nibble_ashr_4):
+ pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
+ pmovmskb %xmm0, %edx
+ test $0xfff0, %edx
+ jnz LABEL(ashr_4_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $11, %r11
+ jbe LABEL(ashr_4_exittail)
+#endif
+
+ pxor %xmm0, %xmm0
+ sub $0x1000, %r10
+ jmp LABEL(gobble_ashr_4)
+
+ .p2align 4
+LABEL(ashr_4_exittail):
+ movdqa (%rsi, %rcx), %xmm1
+ psrldq $4, %xmm0
+ psrldq $4, %xmm3
+ jmp LABEL(aftertail)
+
+/*
+ * The following cases will be handled by ashr_5
+ * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
+ * n(11~15) n - 11 4(15 +(n-11) - n) ashr_5
+ */
+ .p2align 4
+LABEL(ashr_5):
+ pxor %xmm0, %xmm0
+ movdqa (%rdi), %xmm2
+ movdqa (%rsi), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $11, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %r9d
+ shr %cl, %edx
+ shr %cl, %r9d
+ sub %r9d, %edx
+ jnz LABEL(less32bytes)
+ movdqa (%rdi), %xmm3
+
+ UPDATE_STRNCMP_COUNTER
+
+ pxor %xmm0, %xmm0
+ mov $16, %rcx /* index for loads */
+ mov $5, %r9d /* byte position left over from less32bytes case */
+ /*
+ * Setup %r10 value allows us to detect crossing a page boundary.
+ * When %r10 goes positive we have crossed a page boundary and
+ * need to do a nibble.
+ */
+ lea 5(%rdi), %r10
+ and $0xfff, %r10 /* offset into 4K page */
+ sub $0x1000, %r10 /* subtract 4K pagesize */
+
+ .p2align 4
+LABEL(loop_ashr_5):
+ add $16, %r10
+ jg LABEL(nibble_ashr_5)
+
+LABEL(gobble_ashr_5):
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ psrldq $5, %xmm3
+ pslldq $11, %xmm2
+ por %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+
+ add $16, %rcx
+ movdqa %xmm4, %xmm3
+
+ add $16, %r10
+ jg LABEL(nibble_ashr_5) /* cross page boundary */
+
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ psrldq $5, %xmm3
+ pslldq $11, %xmm2
+ por %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+
+ add $16, %rcx
+ movdqa %xmm4, %xmm3
+ jmp LABEL(loop_ashr_5)
+
+ .p2align 4
+LABEL(nibble_ashr_5):
+ pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
+ pmovmskb %xmm0, %edx
+ test $0xffe0, %edx
+ jnz LABEL(ashr_5_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $10, %r11
+ jbe LABEL(ashr_5_exittail)
+#endif
+
+ pxor %xmm0, %xmm0
+ sub $0x1000, %r10
+ jmp LABEL(gobble_ashr_5)
+
+ .p2align 4
+LABEL(ashr_5_exittail):
+ movdqa (%rsi, %rcx), %xmm1
+ psrldq $5, %xmm0
+ psrldq $5, %xmm3
+ jmp LABEL(aftertail)
+
+/*
+ * The following cases will be handled by ashr_6
+ * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
+ * n(10~15) n - 10 5(15 +(n-10) - n) ashr_6
+ */
+ .p2align 4
+LABEL(ashr_6):
+ pxor %xmm0, %xmm0
+ movdqa (%rdi), %xmm2
+ movdqa (%rsi), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $10, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %r9d
+ shr %cl, %edx
+ shr %cl, %r9d
+ sub %r9d, %edx
+ jnz LABEL(less32bytes)
+ movdqa (%rdi), %xmm3
+
+ UPDATE_STRNCMP_COUNTER
+
+ pxor %xmm0, %xmm0
+ mov $16, %rcx /* index for loads */
+ mov $6, %r9d /* byte position left over from less32bytes case */
+ /*
+ * Setup %r10 value allows us to detect crossing a page boundary.
+ * When %r10 goes positive we have crossed a page boundary and
+ * need to do a nibble.
+ */
+ lea 6(%rdi), %r10
+ and $0xfff, %r10 /* offset into 4K page */
+ sub $0x1000, %r10 /* subtract 4K pagesize */
+
+ .p2align 4
+LABEL(loop_ashr_6):
+ add $16, %r10
+ jg LABEL(nibble_ashr_6)
+
+LABEL(gobble_ashr_6):
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ psrldq $6, %xmm3
+ pslldq $10, %xmm2
+ por %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+
+ add $16, %rcx
+ movdqa %xmm4, %xmm3
+
+ add $16, %r10
+ jg LABEL(nibble_ashr_6) /* cross page boundary */
+
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ psrldq $6, %xmm3
+ pslldq $10, %xmm2
+ por %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+
+ add $16, %rcx
+ movdqa %xmm4, %xmm3
+ jmp LABEL(loop_ashr_6)
+
+ .p2align 4
+LABEL(nibble_ashr_6):
+ pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
+ pmovmskb %xmm0, %edx
+ test $0xffc0, %edx
+ jnz LABEL(ashr_6_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $9, %r11
+ jbe LABEL(ashr_6_exittail)
+#endif
+
+ pxor %xmm0, %xmm0
+ sub $0x1000, %r10
+ jmp LABEL(gobble_ashr_6)
+
+ .p2align 4
+LABEL(ashr_6_exittail):
+ movdqa (%rsi, %rcx), %xmm1
+ psrldq $6, %xmm0
+ psrldq $6, %xmm3
+ jmp LABEL(aftertail)
+
+/*
+ * The following cases will be handled by ashr_7
+ * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
+ * n(9~15) n - 9 6(15 +(n - 9) - n) ashr_7
+ */
+ .p2align 4
+LABEL(ashr_7):
+ pxor %xmm0, %xmm0
+ movdqa (%rdi), %xmm2
+ movdqa (%rsi), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $9, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %r9d
+ shr %cl, %edx
+ shr %cl, %r9d
+ sub %r9d, %edx
+ jnz LABEL(less32bytes)
+ movdqa (%rdi), %xmm3
+
+ UPDATE_STRNCMP_COUNTER
+
+ pxor %xmm0, %xmm0
+ mov $16, %rcx /* index for loads */
+ mov $7, %r9d /* byte position left over from less32bytes case */
+ /*
+ * Setup %r10 value allows us to detect crossing a page boundary.
+ * When %r10 goes positive we have crossed a page boundary and
+ * need to do a nibble.
+ */
+ lea 7(%rdi), %r10
+ and $0xfff, %r10 /* offset into 4K page */
+ sub $0x1000, %r10 /* subtract 4K pagesize */
+
+ .p2align 4
+LABEL(loop_ashr_7):
+ add $16, %r10
+ jg LABEL(nibble_ashr_7)
+
+LABEL(gobble_ashr_7):
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ psrldq $7, %xmm3
+ pslldq $9, %xmm2
+ por %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+
+ add $16, %rcx
+ movdqa %xmm4, %xmm3
+
+ add $16, %r10
+ jg LABEL(nibble_ashr_7) /* cross page boundary */
+
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ psrldq $7, %xmm3
+ pslldq $9, %xmm2
+ por %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+
+ add $16, %rcx
+ movdqa %xmm4, %xmm3
+ jmp LABEL(loop_ashr_7)
+
+ .p2align 4
+LABEL(nibble_ashr_7):
+ pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
+ pmovmskb %xmm0, %edx
+ test $0xff80, %edx
+ jnz LABEL(ashr_7_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $8, %r11
+ jbe LABEL(ashr_7_exittail)
+#endif
+
+ pxor %xmm0, %xmm0
+ sub $0x1000, %r10
+ jmp LABEL(gobble_ashr_7)
+
+ .p2align 4
+LABEL(ashr_7_exittail):
+ movdqa (%rsi, %rcx), %xmm1
+ psrldq $7, %xmm0
+ psrldq $7, %xmm3
+ jmp LABEL(aftertail)
+
+/*
+ * The following cases will be handled by ashr_8
+ * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
+ * n(8~15) n - 8 7(15 +(n - 8) - n) ashr_8
+ */
+ .p2align 4
+LABEL(ashr_8):
+ pxor %xmm0, %xmm0
+ movdqa (%rdi), %xmm2
+ movdqa (%rsi), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $8, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %r9d
+ shr %cl, %edx
+ shr %cl, %r9d
+ sub %r9d, %edx
+ jnz LABEL(less32bytes)
+ movdqa (%rdi), %xmm3
+
+ UPDATE_STRNCMP_COUNTER
+
+ pxor %xmm0, %xmm0
+ mov $16, %rcx /* index for loads */
+ mov $8, %r9d /* byte position left over from less32bytes case */
+ /*
+ * Setup %r10 value allows us to detect crossing a page boundary.
+ * When %r10 goes positive we have crossed a page boundary and
+ * need to do a nibble.
+ */
+ lea 8(%rdi), %r10
+ and $0xfff, %r10 /* offset into 4K page */
+ sub $0x1000, %r10 /* subtract 4K pagesize */
+
+ .p2align 4
+LABEL(loop_ashr_8):
+ add $16, %r10
+ jg LABEL(nibble_ashr_8)
+
+LABEL(gobble_ashr_8):
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ psrldq $8, %xmm3
+ pslldq $8, %xmm2
+ por %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+
+ add $16, %rcx
+ movdqa %xmm4, %xmm3
+
+ add $16, %r10
+ jg LABEL(nibble_ashr_8) /* cross page boundary */
+
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ psrldq $8, %xmm3
+ pslldq $8, %xmm2
+ por %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+
+ add $16, %rcx
+ movdqa %xmm4, %xmm3
+ jmp LABEL(loop_ashr_8)
+
+ .p2align 4
+LABEL(nibble_ashr_8):
+ pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
+ pmovmskb %xmm0, %edx
+ test $0xff00, %edx
+ jnz LABEL(ashr_8_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $7, %r11
+ jbe LABEL(ashr_8_exittail)
+#endif
+
+ pxor %xmm0, %xmm0
+ sub $0x1000, %r10
+ jmp LABEL(gobble_ashr_8)
+
+ .p2align 4
+LABEL(ashr_8_exittail):
+ movdqa (%rsi, %rcx), %xmm1
+ psrldq $8, %xmm0
+ psrldq $8, %xmm3
+ jmp LABEL(aftertail)
+
+/*
+ * The following cases will be handled by ashr_9
+ * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
+ * n(7~15) n - 7 8(15 +(n - 7) - n) ashr_9
+ */
+ .p2align 4
+LABEL(ashr_9):
+ pxor %xmm0, %xmm0
+ movdqa (%rdi), %xmm2
+ movdqa (%rsi), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $7, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %r9d
+ shr %cl, %edx
+ shr %cl, %r9d
+ sub %r9d, %edx
+ jnz LABEL(less32bytes)
+ movdqa (%rdi), %xmm3
+
+ UPDATE_STRNCMP_COUNTER
+
+ pxor %xmm0, %xmm0
+ mov $16, %rcx /* index for loads */
+ mov $9, %r9d /* byte position left over from less32bytes case */
+ /*
+ * Setup %r10 value allows us to detect crossing a page boundary.
+ * When %r10 goes positive we have crossed a page boundary and
+ * need to do a nibble.
+ */
+ lea 9(%rdi), %r10
+ and $0xfff, %r10 /* offset into 4K page */
+ sub $0x1000, %r10 /* subtract 4K pagesize */
+
+ .p2align 4
+LABEL(loop_ashr_9):
+ add $16, %r10
+ jg LABEL(nibble_ashr_9)
+
+LABEL(gobble_ashr_9):
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ psrldq $9, %xmm3
+ pslldq $7, %xmm2
+ por %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+
+ add $16, %rcx
+ movdqa %xmm4, %xmm3
+
+ add $16, %r10
+ jg LABEL(nibble_ashr_9) /* cross page boundary */
+
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ psrldq $9, %xmm3
+ pslldq $7, %xmm2
+ por %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+
+ add $16, %rcx
+ movdqa %xmm4, %xmm3 /* store for next cycle */
+ jmp LABEL(loop_ashr_9)
+
+ .p2align 4
+LABEL(nibble_ashr_9):
+ pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
+ pmovmskb %xmm0, %edx
+ test $0xfe00, %edx
+ jnz LABEL(ashr_9_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $6, %r11
+ jbe LABEL(ashr_9_exittail)
+#endif
+
+ pxor %xmm0, %xmm0
+ sub $0x1000, %r10
+ jmp LABEL(gobble_ashr_9)
+
+ .p2align 4
+LABEL(ashr_9_exittail):
+ movdqa (%rsi, %rcx), %xmm1
+ psrldq $9, %xmm0
+ psrldq $9, %xmm3
+ jmp LABEL(aftertail)
+
+/*
+ * The following cases will be handled by ashr_10
+ * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
+ * n(6~15) n - 6 9(15 +(n - 6) - n) ashr_10
+ */
+ .p2align 4
+LABEL(ashr_10):
+ pxor %xmm0, %xmm0
+ movdqa (%rdi), %xmm2
+ movdqa (%rsi), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $6, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %r9d
+ shr %cl, %edx
+ shr %cl, %r9d
+ sub %r9d, %edx
+ jnz LABEL(less32bytes)
+ movdqa (%rdi), %xmm3
+
+ UPDATE_STRNCMP_COUNTER
+
+ pxor %xmm0, %xmm0
+ mov $16, %rcx /* index for loads */
+ mov $10, %r9d /* byte position left over from less32bytes case */
+ /*
+ * Setup %r10 value allows us to detect crossing a page boundary.
+ * When %r10 goes positive we have crossed a page boundary and
+ * need to do a nibble.
+ */
+ lea 10(%rdi), %r10
+ and $0xfff, %r10 /* offset into 4K page */
+ sub $0x1000, %r10 /* subtract 4K pagesize */
+
+ .p2align 4
+LABEL(loop_ashr_10):
+ add $16, %r10
+ jg LABEL(nibble_ashr_10)
+
+LABEL(gobble_ashr_10):
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ psrldq $10, %xmm3
+ pslldq $6, %xmm2
+ por %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+
+ add $16, %rcx
+ movdqa %xmm4, %xmm3
+
+ add $16, %r10
+ jg LABEL(nibble_ashr_10) /* cross page boundary */
+
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ psrldq $10, %xmm3
+ pslldq $6, %xmm2
+ por %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+
+ add $16, %rcx
+ movdqa %xmm4, %xmm3
+ jmp LABEL(loop_ashr_10)
+
+ .p2align 4
+LABEL(nibble_ashr_10):
+ pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
+ pmovmskb %xmm0, %edx
+ test $0xfc00, %edx
+ jnz LABEL(ashr_10_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $5, %r11
+ jbe LABEL(ashr_10_exittail)
+#endif
+
+ pxor %xmm0, %xmm0
+ sub $0x1000, %r10
+ jmp LABEL(gobble_ashr_10)
+
+ .p2align 4
+LABEL(ashr_10_exittail):
+ movdqa (%rsi, %rcx), %xmm1
+ psrldq $10, %xmm0
+ psrldq $10, %xmm3
+ jmp LABEL(aftertail)
+
+/*
+ * The following cases will be handled by ashr_11
+ * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
+ * n(5~15) n - 5 10(15 +(n - 5) - n) ashr_11
+ */
+ .p2align 4
+LABEL(ashr_11):
+ pxor %xmm0, %xmm0
+ movdqa (%rdi), %xmm2
+ movdqa (%rsi), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $5, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %r9d
+ shr %cl, %edx
+ shr %cl, %r9d
+ sub %r9d, %edx
+ jnz LABEL(less32bytes)
+ movdqa (%rdi), %xmm3
+
+ UPDATE_STRNCMP_COUNTER
+
+ pxor %xmm0, %xmm0
+ mov $16, %rcx /* index for loads */
+ mov $11, %r9d /* byte position left over from less32bytes case */
+ /*
+ * Setup %r10 value allows us to detect crossing a page boundary.
+ * When %r10 goes positive we have crossed a page boundary and
+ * need to do a nibble.
+ */
+ lea 11(%rdi), %r10
+ and $0xfff, %r10 /* offset into 4K page */
+ sub $0x1000, %r10 /* subtract 4K pagesize */
+
+ .p2align 4
+LABEL(loop_ashr_11):
+ add $16, %r10
+ jg LABEL(nibble_ashr_11)
+
+LABEL(gobble_ashr_11):
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ psrldq $11, %xmm3
+ pslldq $5, %xmm2
+ por %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+
+ add $16, %rcx
+ movdqa %xmm4, %xmm3
+
+ add $16, %r10
+ jg LABEL(nibble_ashr_11) /* cross page boundary */
+
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ psrldq $11, %xmm3
+ pslldq $5, %xmm2
+ por %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+
+ add $16, %rcx
+ movdqa %xmm4, %xmm3
+ jmp LABEL(loop_ashr_11)
+
+ .p2align 4
+LABEL(nibble_ashr_11):
+ pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
+ pmovmskb %xmm0, %edx
+ test $0xf800, %edx
+ jnz LABEL(ashr_11_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $4, %r11
+ jbe LABEL(ashr_11_exittail)
+#endif
+
+ pxor %xmm0, %xmm0
+ sub $0x1000, %r10
+ jmp LABEL(gobble_ashr_11)
+
+ .p2align 4
+LABEL(ashr_11_exittail):
+ movdqa (%rsi, %rcx), %xmm1
+ psrldq $11, %xmm0
+ psrldq $11, %xmm3
+ jmp LABEL(aftertail)
+
+/*
+ * The following cases will be handled by ashr_12
+ * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
+ * n(4~15) n - 4 11(15 +(n - 4) - n) ashr_12
+ */
+ .p2align 4
+LABEL(ashr_12):
+ pxor %xmm0, %xmm0
+ movdqa (%rdi), %xmm2
+ movdqa (%rsi), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $4, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %r9d
+ shr %cl, %edx
+ shr %cl, %r9d
+ sub %r9d, %edx
+ jnz LABEL(less32bytes)
+ movdqa (%rdi), %xmm3
+
+ UPDATE_STRNCMP_COUNTER
+
+ pxor %xmm0, %xmm0
+ mov $16, %rcx /* index for loads */
+ mov $12, %r9d /* byte position left over from less32bytes case */
+ /*
+ * Setup %r10 value allows us to detect crossing a page boundary.
+ * When %r10 goes positive we have crossed a page boundary and
+ * need to do a nibble.
+ */
+ lea 12(%rdi), %r10
+ and $0xfff, %r10 /* offset into 4K page */
+ sub $0x1000, %r10 /* subtract 4K pagesize */
+
+ .p2align 4
+LABEL(loop_ashr_12):
+ add $16, %r10
+ jg LABEL(nibble_ashr_12)
+
+LABEL(gobble_ashr_12):
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ psrldq $12, %xmm3
+ pslldq $4, %xmm2
+ por %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+
+ add $16, %rcx
+ movdqa %xmm4, %xmm3
+
+ add $16, %r10
+ jg LABEL(nibble_ashr_12) /* cross page boundary */
+
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ psrldq $12, %xmm3
+ pslldq $4, %xmm2
+ por %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+
+ add $16, %rcx
+ movdqa %xmm4, %xmm3
+ jmp LABEL(loop_ashr_12)
+
+ .p2align 4
+LABEL(nibble_ashr_12):
+ pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
+ pmovmskb %xmm0, %edx
+ test $0xf000, %edx
+ jnz LABEL(ashr_12_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $3, %r11
+ jbe LABEL(ashr_12_exittail)
+#endif
+
+ pxor %xmm0, %xmm0
+ sub $0x1000, %r10
+ jmp LABEL(gobble_ashr_12)
+
+ .p2align 4
+LABEL(ashr_12_exittail):
+ movdqa (%rsi, %rcx), %xmm1
+ psrldq $12, %xmm0
+ psrldq $12, %xmm3
+ jmp LABEL(aftertail)
+
+/*
+ * The following cases will be handled by ashr_13
+ * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
+ * n(3~15) n - 3 12(15 +(n - 3) - n) ashr_13
+ */
+ .p2align 4
+LABEL(ashr_13):
+ pxor %xmm0, %xmm0
+ movdqa (%rdi), %xmm2
+ movdqa (%rsi), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $3, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %r9d
+ shr %cl, %edx
+ shr %cl, %r9d
+ sub %r9d, %edx
+ jnz LABEL(less32bytes)
+ movdqa (%rdi), %xmm3
+
+ UPDATE_STRNCMP_COUNTER
+
+ pxor %xmm0, %xmm0
+ mov $16, %rcx /* index for loads */
+ mov $13, %r9d /* byte position left over from less32bytes case */
+ /*
+ * Setup %r10 value allows us to detect crossing a page boundary.
+ * When %r10 goes positive we have crossed a page boundary and
+ * need to do a nibble.
+ */
+ lea 13(%rdi), %r10
+ and $0xfff, %r10 /* offset into 4K page */
+ sub $0x1000, %r10 /* subtract 4K pagesize */
+
+ .p2align 4
+LABEL(loop_ashr_13):
+ add $16, %r10
+ jg LABEL(nibble_ashr_13)
+
+LABEL(gobble_ashr_13):
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ psrldq $13, %xmm3
+ pslldq $3, %xmm2
+ por %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+
+ add $16, %rcx
+ movdqa %xmm4, %xmm3
+
+ add $16, %r10
+ jg LABEL(nibble_ashr_13) /* cross page boundary */
+
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ psrldq $13, %xmm3
+ pslldq $3, %xmm2
+ por %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+
+ add $16, %rcx
+ movdqa %xmm4, %xmm3
+ jmp LABEL(loop_ashr_13)
+
+ .p2align 4
+LABEL(nibble_ashr_13):
+ pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
+ pmovmskb %xmm0, %edx
+ test $0xe000, %edx
+ jnz LABEL(ashr_13_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $2, %r11
+ jbe LABEL(ashr_13_exittail)
+#endif
+
+ pxor %xmm0, %xmm0
+ sub $0x1000, %r10
+ jmp LABEL(gobble_ashr_13)
+
+ .p2align 4
+LABEL(ashr_13_exittail):
+ movdqa (%rsi, %rcx), %xmm1
+ psrldq $13, %xmm0
+ psrldq $13, %xmm3
+ jmp LABEL(aftertail)
+
+/*
+ * The following cases will be handled by ashr_14
+ * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
+ * n(2~15) n - 2 13(15 +(n - 2) - n) ashr_14
+ */
+ .p2align 4
+LABEL(ashr_14):
+ pxor %xmm0, %xmm0
+ movdqa (%rdi), %xmm2
+ movdqa (%rsi), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $2, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %r9d
+ shr %cl, %edx
+ shr %cl, %r9d
+ sub %r9d, %edx
+ jnz LABEL(less32bytes)
+ movdqa (%rdi), %xmm3
+
+ UPDATE_STRNCMP_COUNTER
+
+ pxor %xmm0, %xmm0
+ mov $16, %rcx /* index for loads */
+ mov $14, %r9d /* byte position left over from less32bytes case */
+ /*
+ * Setup %r10 value allows us to detect crossing a page boundary.
+ * When %r10 goes positive we have crossed a page boundary and
+ * need to do a nibble.
+ */
+ lea 14(%rdi), %r10
+ and $0xfff, %r10 /* offset into 4K page */
+ sub $0x1000, %r10 /* subtract 4K pagesize */
+
+ .p2align 4
+LABEL(loop_ashr_14):
+ add $16, %r10
+ jg LABEL(nibble_ashr_14)
+
+LABEL(gobble_ashr_14):
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ psrldq $14, %xmm3
+ pslldq $2, %xmm2
+ por %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+
+ add $16, %rcx
+ movdqa %xmm4, %xmm3
+
+ add $16, %r10
+ jg LABEL(nibble_ashr_14) /* cross page boundary */
+
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ psrldq $14, %xmm3
+ pslldq $2, %xmm2
+ por %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+
+ add $16, %rcx
+ movdqa %xmm4, %xmm3
+ jmp LABEL(loop_ashr_14)
+
+ .p2align 4
+LABEL(nibble_ashr_14):
+ pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
+ pmovmskb %xmm0, %edx
+ test $0xc000, %edx
+ jnz LABEL(ashr_14_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $1, %r11
+ jbe LABEL(ashr_14_exittail)
+#endif
+
+ pxor %xmm0, %xmm0
+ sub $0x1000, %r10
+ jmp LABEL(gobble_ashr_14)
+
+ .p2align 4
+LABEL(ashr_14_exittail):
+ movdqa (%rsi, %rcx), %xmm1
+ psrldq $14, %xmm0
+ psrldq $14, %xmm3
+ jmp LABEL(aftertail)
+
+/*
+ * The following cases will be handled by ashr_15
+ * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
+ * n(1~15) n - 1 14(15 +(n - 1) - n) ashr_15
+ */
+ .p2align 4
+LABEL(ashr_15):
+ pxor %xmm0, %xmm0
+ movdqa (%rdi), %xmm2
+ movdqa (%rsi), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $1, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %r9d
+ shr %cl, %edx
+ shr %cl, %r9d
+ sub %r9d, %edx
+ jnz LABEL(less32bytes)
+
+ movdqa (%rdi), %xmm3
+
+ UPDATE_STRNCMP_COUNTER
+
+ pxor %xmm0, %xmm0
+ mov $16, %rcx /* index for loads */
+ mov $15, %r9d /* byte position left over from less32bytes case */
+ /*
+ * Setup %r10 value allows us to detect crossing a page boundary.
+ * When %r10 goes positive we have crossed a page boundary and
+ * need to do a nibble.
+ */
+ lea 15(%rdi), %r10
+ and $0xfff, %r10 /* offset into 4K page */
+
+ sub $0x1000, %r10 /* subtract 4K pagesize */
+
+ .p2align 4
+LABEL(loop_ashr_15):
+ add $16, %r10
+ jg LABEL(nibble_ashr_15)
+
+LABEL(gobble_ashr_15):
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ psrldq $15, %xmm3
+ pslldq $1, %xmm2
+ por %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+
+ add $16, %rcx
+ movdqa %xmm4, %xmm3
+
+ add $16, %r10
+ jg LABEL(nibble_ashr_15) /* cross page boundary */
+
+ movdqa (%rsi, %rcx), %xmm1
+ movdqa (%rdi, %rcx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ psrldq $15, %xmm3
+ pslldq $1, %xmm2
+ por %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ sub $0xffff, %edx
+ jnz LABEL(exit)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+
+ add $16, %rcx
+ movdqa %xmm4, %xmm3
+ jmp LABEL(loop_ashr_15)
+
+ .p2align 4
+LABEL(nibble_ashr_15):
+ pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
+ pmovmskb %xmm0, %edx
+ test $0x8000, %edx
+ jnz LABEL(ashr_15_exittail)
+
+#ifdef USE_AS_STRNCMP
+ test %r11, %r11
+ je LABEL(ashr_15_exittail)
+#endif
+
+ pxor %xmm0, %xmm0
+ sub $0x1000, %r10
+ jmp LABEL(gobble_ashr_15)
+
+ .p2align 4
+LABEL(ashr_15_exittail):
+ movdqa (%rsi, %rcx), %xmm1
+ psrldq $15, %xmm3
+ psrldq $15, %xmm0
+
+ .p2align 4
+LABEL(aftertail):
+ pcmpeqb %xmm3, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edx
+ not %edx
+
+ .p2align 4
+LABEL(exit):
+ lea -16(%r9, %rcx), %rax /* locate the exact offset for rdi */
+LABEL(less32bytes):
+ lea (%rdi, %rax), %rdi /* locate the exact address for first operand(rdi) */
+ lea (%rsi, %rcx), %rsi /* locate the exact address for second operand(rsi) */
+ test %r8d, %r8d
+ jz LABEL(ret)
+ xchg %rsi, %rdi /* recover original order according to flag(%r8d) */
+
+ .p2align 4
+LABEL(ret):
+LABEL(less16bytes):
+ bsf %rdx, %rdx /* find and store bit index in %rdx */
+
+#ifdef USE_AS_STRNCMP
+ sub %rdx, %r11
+ jbe LABEL(strcmp_exitz)
+#endif
+ movzbl (%rsi, %rdx), %ecx
+ movzbl (%rdi, %rdx), %eax
+
+ sub %ecx, %eax
+ ret
+
+LABEL(strcmp_exitz):
+ xor %eax, %eax
+ ret
+
+ .p2align 4
+LABEL(Byte0):
+ movzx (%rsi), %ecx
+ movzx (%rdi), %eax
+
+ sub %ecx, %eax
+ ret
+END (BP_SYM (STRCMP))
+
+ .section .rodata,"a",@progbits
+ .p2align 3
+LABEL(unaligned_table):
+ .int LABEL(ashr_1) - LABEL(unaligned_table)
+ .int LABEL(ashr_2) - LABEL(unaligned_table)
+ .int LABEL(ashr_3) - LABEL(unaligned_table)
+ .int LABEL(ashr_4) - LABEL(unaligned_table)
+ .int LABEL(ashr_5) - LABEL(unaligned_table)
+ .int LABEL(ashr_6) - LABEL(unaligned_table)
+ .int LABEL(ashr_7) - LABEL(unaligned_table)
+ .int LABEL(ashr_8) - LABEL(unaligned_table)
+ .int LABEL(ashr_9) - LABEL(unaligned_table)
+ .int LABEL(ashr_10) - LABEL(unaligned_table)
+ .int LABEL(ashr_11) - LABEL(unaligned_table)
+ .int LABEL(ashr_12) - LABEL(unaligned_table)
+ .int LABEL(ashr_13) - LABEL(unaligned_table)
+ .int LABEL(ashr_14) - LABEL(unaligned_table)
+ .int LABEL(ashr_15) - LABEL(unaligned_table)
+ .int LABEL(ashr_0) - LABEL(unaligned_table)
+#endif /* NOT_IN_libc */
+libc_hidden_builtin_def (STRCMP)
diff --git a/sysdeps/x86_64/strncmp.S b/sysdeps/x86_64/strncmp.S
new file mode 100644
index 0000000000..0af34e7f15
--- /dev/null
+++ b/sysdeps/x86_64/strncmp.S
@@ -0,0 +1,3 @@
+#define STRCMP strncmp
+#define USE_AS_STRNCMP
+#include "strcmp.S"
diff --git a/sysdeps/x86_64/tst-xmmymm.sh b/sysdeps/x86_64/tst-xmmymm.sh
new file mode 100755
index 0000000000..0735276e6d
--- /dev/null
+++ b/sysdeps/x86_64/tst-xmmymm.sh
@@ -0,0 +1,17 @@
+#! /bin/sh
+objpfx="$1"
+
+tmp=$(mktemp ${objpfx}tst-xmmymm.XXXXXX)
+trap 'rm -f "$tmp"' 1 2 3 15
+
+objdump -d "${objpfx}ld.so" |
+awk 'BEGIN { last="" } /^[[:xdigit:]]* <[_[:alnum:]]*>:$/ { fct=substr($2, 2, length($2)-3) } /,%[xy]mm[[:digit:]]*$/ { if (last != fct) { print fct; last=fct} }' |
+tee "$tmp"
+
+echo "Functions which incorrectly modify xmm/ymm registers:"
+err=1
+egrep -vs '^_dl_runtime_profile$' "$tmp" || err=0
+if test $err -eq 0; then echo "None"; fi
+
+rm "$tmp"
+exit $err