diff options
author | Jakub Jelinek <jakub@redhat.com> | 2008-03-14 17:22:27 +0000 |
---|---|---|
committer | Jakub Jelinek <jakub@redhat.com> | 2008-03-14 17:22:27 +0000 |
commit | b87b7fc3e6e41cf8006fb2341c236a46f6d8bdd4 (patch) | |
tree | 8b042dd05d766dd46dfa953aec240207eae14208 /sysdeps | |
parent | 5c25449dd9fd706f79ee6d92019f28044d9270fa (diff) |
Updated to fedora-glibc-20080310T1651
Diffstat (limited to 'sysdeps')
-rw-r--r-- | sysdeps/generic/ldsodefs.h | 8 | ||||
-rw-r--r-- | sysdeps/ia64/ieee754.h | 8 | ||||
-rw-r--r-- | sysdeps/ieee754/ieee754.h | 8 | ||||
-rw-r--r-- | sysdeps/mach/hurd/i386/trampoline.c | 6 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/bits/posix_opt.h | 7 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/dl-osinfo.h | 97 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/dl-sysdep.c | 104 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/dl-sysdep.h | 11 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/fpathconf.c | 5 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/pathconf.c | 48 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/pathconf.h | 13 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/sysconf.c | 20 | ||||
-rw-r--r-- | sysdeps/x86_64/cacheinfo.c | 10 | ||||
-rw-r--r-- | sysdeps/x86_64/memset.S | 1365 | ||||
-rw-r--r-- | sysdeps/x86_64/rtld-memset.c | 1 |
15 files changed, 1497 insertions, 214 deletions
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h index 958a099b82..9d1ebdf615 100644 --- a/sysdeps/generic/ldsodefs.h +++ b/sysdeps/generic/ldsodefs.h @@ -1,5 +1,5 @@ /* Run-time dynamic linker data structures for loaded ELF shared objects. - Copyright (C) 1995-2006, 2007 Free Software Foundation, Inc. + Copyright (C) 1995-2006, 2007, 2008 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -655,6 +655,10 @@ struct rtld_global_ro void *(*_dl_open) (const char *file, int mode, const void *caller_dlopen, Lmid_t nsid, int argc, char *argv[], char *env[]); void (*_dl_close) (void *map); + void *(*_dl_tls_get_addr_soft) (struct link_map *); +#ifdef HAVE_DL_DISCOVER_OSVERSION + int (*_dl_discover_osversion) (void); +#endif /* List of auditing interfaces. */ struct audit_ifaces *_dl_audit; @@ -1069,7 +1073,7 @@ extern struct link_map *_dl_update_slotinfo (unsigned long int req_modid); /* Look up the module's TLS block as for __tls_get_addr, but never touch anything. Return null if it's not allocated yet. */ -extern void *_dl_tls_get_addr_soft (struct link_map *l) internal_function; +extern void *_dl_tls_get_addr_soft (struct link_map *l) attribute_hidden; extern int _dl_addr_inside_object (struct link_map *l, const ElfW(Addr) addr) internal_function attribute_hidden; diff --git a/sysdeps/ia64/ieee754.h b/sysdeps/ia64/ieee754.h index f5a71f5b92..44629430f8 100644 --- a/sysdeps/ia64/ieee754.h +++ b/sysdeps/ia64/ieee754.h @@ -80,7 +80,7 @@ union ieee754_double unsigned int mantissa1:32; #endif /* Big endian. */ #if __BYTE_ORDER == __LITTLE_ENDIAN -# if __FLOAT_WORD_ORDER == BIG_ENDIAN +# if __FLOAT_WORD_ORDER == __BIG_ENDIAN unsigned int mantissa0:20; unsigned int exponent:11; unsigned int negative:1; @@ -106,7 +106,7 @@ union ieee754_double unsigned int mantissa0:19; unsigned int mantissa1:32; #else -# if __FLOAT_WORD_ORDER == BIG_ENDIAN +# if __FLOAT_WORD_ORDER == __BIG_ENDIAN unsigned int mantissa0:19; unsigned int quiet_nan:1; unsigned int exponent:11; @@ -143,7 +143,7 @@ union ieee854_long_double unsigned int mantissa1:32; #endif #if __BYTE_ORDER == __LITTLE_ENDIAN -# if __FLOAT_WORD_ORDER == BIG_ENDIAN +# if __FLOAT_WORD_ORDER == __BIG_ENDIAN unsigned int empty0:32; unsigned int exponent:15; unsigned int negative:1; @@ -175,7 +175,7 @@ union ieee854_long_double unsigned int mantissa1:32; #endif #if __BYTE_ORDER == __LITTLE_ENDIAN -# if __FLOAT_WORD_ORDER == BIG_ENDIAN +# if __FLOAT_WORD_ORDER == __BIG_ENDIAN unsigned int empty0:32; unsigned int exponent:15; unsigned int negative:1; diff --git a/sysdeps/ieee754/ieee754.h b/sysdeps/ieee754/ieee754.h index 7131e5de6c..b17c29ab73 100644 --- a/sysdeps/ieee754/ieee754.h +++ b/sysdeps/ieee754/ieee754.h @@ -80,7 +80,7 @@ union ieee754_double unsigned int mantissa1:32; #endif /* Big endian. */ #if __BYTE_ORDER == __LITTLE_ENDIAN -# if __FLOAT_WORD_ORDER == BIG_ENDIAN +# if __FLOAT_WORD_ORDER == __BIG_ENDIAN unsigned int mantissa0:20; unsigned int exponent:11; unsigned int negative:1; @@ -106,7 +106,7 @@ union ieee754_double unsigned int mantissa0:19; unsigned int mantissa1:32; #else -# if __FLOAT_WORD_ORDER == BIG_ENDIAN +# if __FLOAT_WORD_ORDER == __BIG_ENDIAN unsigned int mantissa0:19; unsigned int quiet_nan:1; unsigned int exponent:11; @@ -142,7 +142,7 @@ union ieee854_long_double unsigned int mantissa1:32; #endif #if __BYTE_ORDER == __LITTLE_ENDIAN -# if __FLOAT_WORD_ORDER == BIG_ENDIAN +# if __FLOAT_WORD_ORDER == __BIG_ENDIAN unsigned int exponent:15; unsigned int negative:1; unsigned int empty:16; @@ -171,7 +171,7 @@ union ieee854_long_double unsigned int mantissa1:32; #endif #if __BYTE_ORDER == __LITTLE_ENDIAN -# if __FLOAT_WORD_ORDER == BIG_ENDIAN +# if __FLOAT_WORD_ORDER == __BIG_ENDIAN unsigned int exponent:15; unsigned int negative:1; unsigned int empty:16; diff --git a/sysdeps/mach/hurd/i386/trampoline.c b/sysdeps/mach/hurd/i386/trampoline.c index dddc6f3ef6..99d9308360 100644 --- a/sysdeps/mach/hurd/i386/trampoline.c +++ b/sysdeps/mach/hurd/i386/trampoline.c @@ -1,5 +1,5 @@ /* Set thread_state for sighandler, and sigcontext to recover. i386 version. - Copyright (C) 1994,1995,1996,1997,1998,1999,2005 + Copyright (C) 1994,1995,1996,1997,1998,1999,2005,2008 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -21,6 +21,7 @@ #include <hurd/signal.h> #include <hurd/userlink.h> #include <thread_state.h> +#include <mach/machine/eflags.h> #include <assert.h> #include <errno.h> #include "hurdfault.h" @@ -218,6 +219,9 @@ _hurd_setup_sighandler (struct hurd_sigstate *ss, __sighandler_t handler, /* We pass the handler function to the trampoline code in %edx. */ state->basic.edx = (int) handler; + /* The x86 ABI says the DF bit is clear on entry to any function. */ + state->basic.efl &= ~EFL_DF; + return scp; } diff --git a/sysdeps/unix/sysv/linux/bits/posix_opt.h b/sysdeps/unix/sysv/linux/bits/posix_opt.h index 1a96db2982..37612e0bd5 100644 --- a/sysdeps/unix/sysv/linux/bits/posix_opt.h +++ b/sysdeps/unix/sysv/linux/bits/posix_opt.h @@ -1,5 +1,6 @@ /* Define POSIX options for Linux. - Copyright (C) 1996,1997,1999,2000,2002,2003 Free Software Foundation, Inc. + Copyright (C) 1996,1997,1999,2000,2002,2003,2008 + Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -51,8 +52,8 @@ /* Setting of memory protections is supported. */ #define _POSIX_MEMORY_PROTECTION 200112L -/* Only root can change owner of file. */ -#define _POSIX_CHOWN_RESTRICTED 1 +/* Some filesystems allow all users to change file ownership. */ +#define _POSIX_CHOWN_RESTRICTED 0 /* `c_cc' member of 'struct termios' structure can be disabled by using the value _POSIX_VDISABLE. */ diff --git a/sysdeps/unix/sysv/linux/dl-osinfo.h b/sysdeps/unix/sysv/linux/dl-osinfo.h index f0600283b6..582412e300 100644 --- a/sysdeps/unix/sysv/linux/dl-osinfo.h +++ b/sysdeps/unix/sysv/linux/dl-osinfo.h @@ -1,6 +1,5 @@ /* Operating system specific code for generic dynamic loader functions. Linux. - Copyright (C) 2000,2001,2002,2004,2005,2006,2007 - Free Software Foundation, Inc. + Copyright (C) 2000-2002,2004-2007,2008 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -18,10 +17,7 @@ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ -#include <string.h> #include <errno.h> -#include <fcntl.h> -#include <sys/utsname.h> #include <kernel-features.h> #include <dl-sysdep.h> #include <stdint.h> @@ -44,97 +40,6 @@ dl_fatal (const char *str) } #endif -static inline int __attribute__ ((always_inline)) -_dl_discover_osversion (void) -{ -#if (defined NEED_DL_SYSINFO || defined NEED_DL_SYSINFO_DSO) && defined SHARED - if (GLRO(dl_sysinfo_map) != NULL) - { - /* If the kernel-supplied DSO contains a note indicating the kernel's - version, we don't need to call uname or parse any strings. */ - - static const struct - { - ElfW(Nhdr) hdr; - char vendor[8]; - } expected_note = { { sizeof "Linux", sizeof (ElfW(Word)), 0 }, "Linux" }; - const ElfW(Phdr) *const phdr = GLRO(dl_sysinfo_map)->l_phdr; - const ElfW(Word) phnum = GLRO(dl_sysinfo_map)->l_phnum; - for (uint_fast16_t i = 0; i < phnum; ++i) - if (phdr[i].p_type == PT_NOTE) - { - const ElfW(Addr) start = (phdr[i].p_vaddr - + GLRO(dl_sysinfo_map)->l_addr); - const ElfW(Nhdr) *note = (const void *) start; - while ((ElfW(Addr)) (note + 1) - start < phdr[i].p_memsz) - { - if (!memcmp (note, &expected_note, sizeof expected_note)) - return *(const ElfW(Word) *) ((const void *) note - + sizeof expected_note); -#define ROUND(len) (((len) + sizeof note->n_type - 1) & -sizeof note->n_type) - note = ((const void *) (note + 1) - + ROUND (note->n_namesz) + ROUND (note->n_descsz)); -#undef ROUND - } - } - } -#endif - - char bufmem[64]; - char *buf = bufmem; - unsigned int version; - int parts; - char *cp; - struct utsname uts; - - /* Try the uname system call. */ - if (__uname (&uts)) - { - /* This was not successful. Now try reading the /proc filesystem. */ - int fd = __open ("/proc/sys/kernel/osrelease", O_RDONLY); - if (fd < 0) - return -1; - ssize_t reslen = __read (fd, bufmem, sizeof (bufmem)); - __close (fd); - if (reslen <= 0) - /* This also didn't work. We give up since we cannot - make sure the library can actually work. */ - return -1; - buf[MIN (reslen, (ssize_t) sizeof (bufmem) - 1)] = '\0'; - } - else - buf = uts.release; - - /* Now convert it into a number. The string consists of at most - three parts. */ - version = 0; - parts = 0; - cp = buf; - while ((*cp >= '0') && (*cp <= '9')) - { - unsigned int here = *cp++ - '0'; - - while ((*cp >= '0') && (*cp <= '9')) - { - here *= 10; - here += *cp++ - '0'; - } - - ++parts; - version <<= 8; - version |= here; - - if (*cp++ != '.' || parts == 3) - /* Another part following? */ - break; - } - - if (parts < 3) - version <<= 8 * (3 - parts); - - return version; -} - #define DL_SYSDEP_OSCHECK(FATAL) \ do { \ /* Test whether the kernel is new enough. This test is only performed \ diff --git a/sysdeps/unix/sysv/linux/dl-sysdep.c b/sysdeps/unix/sysv/linux/dl-sysdep.c index 42aec77e82..08ae9aa86d 100644 --- a/sysdeps/unix/sysv/linux/dl-sysdep.c +++ b/sysdeps/unix/sysv/linux/dl-sysdep.c @@ -1,5 +1,5 @@ /* Dynamic linker system dependencies for Linux. - Copyright (C) 1995,1997,2001,2004,2005,2006 Free Software Foundation, Inc. + Copyright (C) 1995,1997,2001,2004,2005,2006, 2008 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -20,11 +20,15 @@ /* Linux needs some special initialization, but otherwise uses the generic dynamic linker system interface code. */ +#include <string.h> +#include <fcntl.h> #include <unistd.h> +#include <sys/utsname.h> #include <ldsodefs.h> #include <kernel-features.h> -#define DL_SYSDEP_INIT frob_brk () +#ifdef SHARED +# define DL_SYSDEP_INIT frob_brk () static inline void frob_brk (void) @@ -53,4 +57,98 @@ frob_brk (void) #endif } -#include <elf/dl-sysdep.c> +# include <elf/dl-sysdep.c> +#endif + + +int +attribute_hidden +_dl_discover_osversion (void) +{ +#if (defined NEED_DL_SYSINFO || defined NEED_DL_SYSINFO_DSO) && defined SHARED + if (GLRO(dl_sysinfo_map) != NULL) + { + /* If the kernel-supplied DSO contains a note indicating the kernel's + version, we don't need to call uname or parse any strings. */ + + static const struct + { + ElfW(Nhdr) hdr; + char vendor[8]; + } expected_note = { { sizeof "Linux", sizeof (ElfW(Word)), 0 }, "Linux" }; + const ElfW(Phdr) *const phdr = GLRO(dl_sysinfo_map)->l_phdr; + const ElfW(Word) phnum = GLRO(dl_sysinfo_map)->l_phnum; + for (uint_fast16_t i = 0; i < phnum; ++i) + if (phdr[i].p_type == PT_NOTE) + { + const ElfW(Addr) start = (phdr[i].p_vaddr + + GLRO(dl_sysinfo_map)->l_addr); + const ElfW(Nhdr) *note = (const void *) start; + while ((ElfW(Addr)) (note + 1) - start < phdr[i].p_memsz) + { + if (!memcmp (note, &expected_note, sizeof expected_note)) + return *(const ElfW(Word) *) ((const void *) note + + sizeof expected_note); +#define ROUND(len) (((len) + sizeof note->n_type - 1) & -sizeof note->n_type) + note = ((const void *) (note + 1) + + ROUND (note->n_namesz) + ROUND (note->n_descsz)); +#undef ROUND + } + } + } +#endif + + char bufmem[64]; + char *buf = bufmem; + unsigned int version; + int parts; + char *cp; + struct utsname uts; + + /* Try the uname system call. */ + if (__uname (&uts)) + { + /* This was not successful. Now try reading the /proc filesystem. */ + int fd = __open ("/proc/sys/kernel/osrelease", O_RDONLY); + if (fd < 0) + return -1; + ssize_t reslen = __read (fd, bufmem, sizeof (bufmem)); + __close (fd); + if (reslen <= 0) + /* This also didn't work. We give up since we cannot + make sure the library can actually work. */ + return -1; + buf[MIN (reslen, (ssize_t) sizeof (bufmem) - 1)] = '\0'; + } + else + buf = uts.release; + + /* Now convert it into a number. The string consists of at most + three parts. */ + version = 0; + parts = 0; + cp = buf; + while ((*cp >= '0') && (*cp <= '9')) + { + unsigned int here = *cp++ - '0'; + + while ((*cp >= '0') && (*cp <= '9')) + { + here *= 10; + here += *cp++ - '0'; + } + + ++parts; + version <<= 8; + version |= here; + + if (*cp++ != '.' || parts == 3) + /* Another part following? */ + break; + } + + if (parts < 3) + version <<= 8 * (3 - parts); + + return version; +} diff --git a/sysdeps/unix/sysv/linux/dl-sysdep.h b/sysdeps/unix/sysv/linux/dl-sysdep.h index becfc8df3f..0371fe87a1 100644 --- a/sysdeps/unix/sysv/linux/dl-sysdep.h +++ b/sysdeps/unix/sysv/linux/dl-sysdep.h @@ -1,5 +1,5 @@ /* System-specific settings for dynamic linker code. Linux version. - Copyright (C) 2005 Free Software Foundation, Inc. + Copyright (C) 2005, 2008 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -25,3 +25,12 @@ we aren't making direct use of it. So enable this across the board. */ #define NEED_DL_SYSINFO_DSO 1 + + +/* The _dl_discover_osversion function is so far only needed in sysconf + to check for kernels later than 2.6.23. */ +#if !defined ASSEMBLER && __LINUX_KERNEL_VERSION < 0x020617 +/* Get version of the OS. */ +extern int _dl_discover_osversion (void) attribute_hidden; +# define HAVE_DL_DISCOVER_OSVERSION 1 +#endif diff --git a/sysdeps/unix/sysv/linux/fpathconf.c b/sysdeps/unix/sysv/linux/fpathconf.c index c1cdb1b899..2701c9ec99 100644 --- a/sysdeps/unix/sysv/linux/fpathconf.c +++ b/sysdeps/unix/sysv/linux/fpathconf.c @@ -1,5 +1,5 @@ /* Get file-specific information about descriptor FD. Linux version. - Copyright (C) 1991,1995,1996,1998-2002,2003 Free Software Foundation, Inc. + Copyright (C) 1991,1995,1996,1998-2003,2008 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -45,6 +45,9 @@ __fpathconf (fd, name) case _PC_2_SYMLINKS: return __statfs_symlinks (__fstatfs (fd, &fsbuf), &fsbuf); + case _PC_CHOWN_RESTRICTED: + return __statfs_chown_restricted (__fstatfs (fd, &fsbuf), &fsbuf); + default: return posix_fpathconf (fd, name); } diff --git a/sysdeps/unix/sysv/linux/pathconf.c b/sysdeps/unix/sysv/linux/pathconf.c index e12a08434a..db03529fe8 100644 --- a/sysdeps/unix/sysv/linux/pathconf.c +++ b/sysdeps/unix/sysv/linux/pathconf.c @@ -1,5 +1,5 @@ /* Get file-specific information about a file. Linux version. - Copyright (C) 1991,1995,1996,1998-2002,2003 Free Software Foundation, Inc. + Copyright (C) 1991,1995,1996,1998-2003,2008 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -19,8 +19,10 @@ #include <unistd.h> #include <errno.h> + #include "pathconf.h" #include "linux_fsinfo.h" +#include <not-cancel.h> static long int posix_pathconf (const char *file, int name); @@ -46,6 +48,9 @@ __pathconf (const char *file, int name) case _PC_2_SYMLINKS: return __statfs_symlinks (__statfs (file, &fsbuf), &fsbuf); + case _PC_CHOWN_RESTRICTED: + return __statfs_chown_restricted (__statfs (file, &fsbuf), &fsbuf); + default: return posix_pathconf (file, name); } @@ -179,3 +184,44 @@ __statfs_symlinks (int result, const struct statfs *fsbuf) return 1; } } + + +/* Used like: return __statfs_chown_restricted (__statfs (name, &buf), &buf);*/ +long int +__statfs_chown_restricted (int result, const struct statfs *fsbuf) +{ + if (result < 0) + { + if (errno == ENOSYS) + /* Not possible, return the default value. */ + return 1; + + /* Some error occured. */ + return -1; + } + + int fd; + long int retval = 1; + switch (fsbuf->f_type) + { + case XFS_SUPER_MAGIC: + /* Read the value from /proc/sys/fs/xfs/restrict_chown. If we cannot + read it default to assume the restriction is in place. */ + fd = open_not_cancel_2 ("/proc/sys/fs/xfs/restrict_chown", O_RDONLY); + if (fd != -1) + { + char buf[2]; + if (TEMP_FAILURE_RETRY (read_not_cancel (fd, buf, 2)) == 2 + && buf[0] >= '0' && buf[0] <= '1') + retval = buf[0] - '0'; + + close_not_cancel_no_status (fd); + } + break; + + default: + break; + } + + return retval; +} diff --git a/sysdeps/unix/sysv/linux/pathconf.h b/sysdeps/unix/sysv/linux/pathconf.h index 20e23685eb..806adcc5ea 100644 --- a/sysdeps/unix/sysv/linux/pathconf.h +++ b/sysdeps/unix/sysv/linux/pathconf.h @@ -1,5 +1,5 @@ /* Common parts of Linux implementation of pathconf and fpathconf. - Copyright (C) 1991,1995,1996,1998-2002,2003 Free Software Foundation, Inc. + Copyright (C) 1991,1995,1996,1998-2003,2008 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,13 +22,18 @@ #include <sys/statfs.h> -/* Used like: return statfs_link_max (__statfs (name, &buf), &buf); */ +/* Used like: return __statfs_link_max (__statfs (name, &buf), &buf); */ extern long int __statfs_link_max (int result, const struct statfs *fsbuf); -/* Used like: return statfs_filesize_max (__statfs (name, &buf), &buf); */ +/* Used like: return __statfs_filesize_max (__statfs (name, &buf), &buf); */ extern long int __statfs_filesize_max (int result, const struct statfs *fsbuf); -/* Used like: return statfs_link_max (__statfs (name, &buf), &buf); */ +/* Used like: return __statfs_link_max (__statfs (name, &buf), &buf); */ extern long int __statfs_symlinks (int result, const struct statfs *fsbuf); + + +/* Used like: return __statfs_chown_restricted (__statfs (name, &buf), &buf);*/ +extern long int __statfs_chown_restricted (int result, + const struct statfs *fsbuf); diff --git a/sysdeps/unix/sysv/linux/sysconf.c b/sysdeps/unix/sysv/linux/sysconf.c index f9f6f1bfa5..ab9cddc306 100644 --- a/sysdeps/unix/sysv/linux/sysconf.c +++ b/sysdeps/unix/sysv/linux/sysconf.c @@ -1,5 +1,5 @@ /* Get file-specific information about a file. Linux version. - Copyright (C) 2003, 2004, 2006 Free Software Foundation, Inc. + Copyright (C) 2003, 2004, 2006, 2008 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -23,7 +23,9 @@ #include <sysdep.h> #include <time.h> #include <unistd.h> +#include <sys/resource.h> #include <not-cancel.h> +#include <ldsodefs.h> static long int posix_sysconf (int name); @@ -70,6 +72,22 @@ __sysconf (int name) } #endif + case _SC_ARG_MAX: +#if __LINUX_KERNEL_VERSION < 0x020617 + /* Determine whether this is a kernel 2.6.23 or later. Only + then do we have an argument limit determined by the stack + size. */ + if (GLRO(dl_discover_osversion) () >= 0x020617) +#endif + { + /* Use getrlimit to get the stack limit. */ + struct rlimit rlimit; + if (__getrlimit (RLIMIT_STACK, &rlimit) == 0) + return MAX (ARG_MAX, rlimit.rlim_cur / 4); + } + + return ARG_MAX; + case _SC_NGROUPS_MAX: /* Try to read the information from the /proc/sys/kernel/ngroups_max file. */ diff --git a/sysdeps/x86_64/cacheinfo.c b/sysdeps/x86_64/cacheinfo.c index 6403081c90..6a3ea0f1cb 100644 --- a/sysdeps/x86_64/cacheinfo.c +++ b/sysdeps/x86_64/cacheinfo.c @@ -405,13 +405,10 @@ long int __x86_64_data_cache_size_half attribute_hidden = 32 * 1024 / 2; /* Shared cache size for use in memory and string routines, typically L2 or L3 size. */ long int __x86_64_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2; -#ifdef NOT_USED_RIGHT_NOW long int __x86_64_shared_cache_size attribute_hidden = 1024 * 1024; -#endif /* PREFETCHW support flag for use in memory and string routines. */ int __x86_64_prefetchw attribute_hidden; -#ifdef NOT_USED_RIGHT_NOW /* Instructions preferred for memory and string routines. 0: Regular instructions @@ -421,7 +418,6 @@ int __x86_64_prefetchw attribute_hidden; */ int __x86_64_preferred_memory_instruction attribute_hidden; -#endif static void @@ -464,14 +460,12 @@ init_cacheinfo (void) : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "0" (1)); -#ifdef NOT_USED_RIGHT_NOW - /* Intel prefers SSSE3 instructions for memory/string rountines + /* Intel prefers SSSE3 instructions for memory/string routines if they are avaiable. */ if ((ecx & 0x200)) __x86_64_preferred_memory_instruction = 3; else __x86_64_preferred_memory_instruction = 2; -#endif /* Figure out the number of logical threads that share the highest cache level. */ @@ -577,8 +571,6 @@ init_cacheinfo (void) if (shared > 0) { __x86_64_shared_cache_size_half = shared / 2; -#ifdef NOT_USED_RIGHT_NOW __x86_64_shared_cache_size = shared; -#endif } } diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S index 939240600d..c7bf2318de 100644 --- a/sysdeps/x86_64/memset.S +++ b/sysdeps/x86_64/memset.S @@ -1,8 +1,7 @@ /* memset/bzero -- set memory area to CH/0 Optimized version for x86-64. - Copyright (C) 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc. + Copyright (C) 2002-2005, 2007, 2008 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Andreas Jaeger <aj@suse.de>. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -20,13 +19,9 @@ 02111-1307 USA. */ #include <sysdep.h> -#include "asm-syntax.h" -#include "bp-sym.h" -#include "bp-asm.h" -/* This is somehow experimental and could made dependend on the cache - size. */ -#define LARGE $120000 +#define __STOS_LOWER_BOUNDARY $8192 +#define __STOS_UPPER_BOUNDARY $65536 .text #ifndef NOT_IN_libc @@ -46,89 +41,1291 @@ END (__memset_chk) #endif ENTRY (memset) L(memset_entry): - cmp $0x7,%rdx /* Check for small length. */ - mov %rdi,%rcx /* Save ptr as return value. */ - jbe 7f + cmp $0x1,%rdx + mov %rdi,%rax /* memset returns the dest address. */ + jne L(ck2) + mov %sil,(%rdi) + retq +L(ck2): + mov $0x101010101010101,%r9 + mov %rdx,%r8 + movzbq %sil,%rdx + imul %r9,%rdx +L(now_dw_aligned): + cmp $0x90,%r8 + jg L(ck_mem_ops_method) +L(now_dw_aligned_small): + lea L(setPxQx)(%rip),%r11 + add %r8,%rdi +#ifndef PIC + jmpq *(%r11,%r8,8) +#else + movslq (%r11,%r8,4),%rcx + lea (%rcx,%r11,1),%r11 + jmpq *%r11 +#endif - /* Populate 8 bit data to full 64-bit. */ - movabs $0x0101010101010101,%r8 - movzbl %sil,%eax - imul %rax,%r8 - test $0x7,%edi /* Check for alignment. */ - je 2f +L(Got0): + retq - .p2align 4 -1: /* Align ptr to 8 byte. */ - mov %sil,(%rcx) - dec %rdx - inc %rcx - test $0x7,%ecx - jne 1b - -2: /* Check for really large regions. */ - mov %rdx,%rax - shr $0x6,%rax - je 4f - cmp LARGE, %rdx - jae 11f + .pushsection .rodata + .balign 16 +#ifndef PIC +L(setPxQx): + .quad L(Got0), L(P1Q0), L(P2Q0), L(P3Q0) + .quad L(P4Q0), L(P5Q0), L(P6Q0), L(P7Q0) + .quad L(P0Q1), L(P1Q1), L(P2Q1), L(P3Q1) + .quad L(P4Q1), L(P5Q1), L(P6Q1), L(P7Q1) + .quad L(P0Q2), L(P1Q2), L(P2Q2), L(P3Q2) + .quad L(P4Q2), L(P5Q2), L(P6Q2), L(P7Q2) + .quad L(P0Q3), L(P1Q3), L(P2Q3), L(P3Q3) + .quad L(P4Q3), L(P5Q3), L(P6Q3), L(P7Q3) + .quad L(P0Q4), L(P1Q4), L(P2Q4), L(P3Q4) + .quad L(P4Q4), L(P5Q4), L(P6Q4), L(P7Q4) + .quad L(P0Q5), L(P1Q5), L(P2Q5), L(P3Q5) + .quad L(P4Q5), L(P5Q5), L(P6Q5), L(P7Q5) + .quad L(P0Q6), L(P1Q6), L(P2Q6), L(P3Q6) + .quad L(P4Q6), L(P5Q6), L(P6Q6), L(P7Q6) + .quad L(P0Q7), L(P1Q7), L(P2Q7), L(P3Q7) + .quad L(P4Q7), L(P5Q7), L(P6Q7), L(P7Q7) + .quad L(P0Q8), L(P1Q8), L(P2Q8), L(P3Q8) + .quad L(P4Q8), L(P5Q8), L(P6Q8), L(P7Q8) + .quad L(P0Q9), L(P1Q9), L(P2Q9), L(P3Q9) + .quad L(P4Q9), L(P5Q9), L(P6Q9), L(P7Q9) + .quad L(P0QA), L(P1QA), L(P2QA), L(P3QA) + .quad L(P4QA), L(P5QA), L(P6QA), L(P7QA) + .quad L(P0QB), L(P1QB), L(P2QB), L(P3QB) + .quad L(P4QB), L(P5QB), L(P6QB), L(P7QB) + .quad L(P0QC), L(P1QC), L(P2QC), L(P3QC) + .quad L(P4QC), L(P5QC), L(P6QC), L(P7QC) + .quad L(P0QD), L(P1QD), L(P2QD), L(P3QD) + .quad L(P4QD), L(P5QD), L(P6QD), L(P7QD) + .quad L(P0QE), L(P1QE), L(P2QE), L(P3QE) + .quad L(P4QE), L(P5QE), L(P6QE), L(P7QE) + .quad L(P0QF), L(P1QF), L(P2QF), L(P3QF) + .quad L(P4QF), L(P5QF), L(P6QF), L(P7QF) + .quad L(P0QG), L(P1QG), L(P2QG), L(P3QG) + .quad L(P4QG), L(P5QG), L(P6QG), L(P7QG) + .quad L(P0QH), L(P1QH), L(P2QH), L(P3QH) + .quad L(P4QH), L(P5QH), L(P6QH), L(P7QH) + .quad L(P0QI) +# ifdef USE_EXTRA_TABLE + .quad L(P1QI), L(P2QI), L(P3QI), L(P4QI) + .quad L(P5QI), L(P6QI), L(P7QI) +# endif +#else +L(setPxQx): + .int L(Got0)-L(setPxQx) + .int L(P1Q0)-L(setPxQx) + .int L(P2Q0)-L(setPxQx) + .int L(P3Q0)-L(setPxQx) + .int L(P4Q0)-L(setPxQx) + .int L(P5Q0)-L(setPxQx) + .int L(P6Q0)-L(setPxQx) + .int L(P7Q0)-L(setPxQx) + + .int L(P0Q1)-L(setPxQx) + .int L(P1Q1)-L(setPxQx) + .int L(P2Q1)-L(setPxQx) + .int L(P3Q1)-L(setPxQx) + .int L(P4Q1)-L(setPxQx) + .int L(P5Q1)-L(setPxQx) + .int L(P6Q1)-L(setPxQx) + .int L(P7Q1)-L(setPxQx) + + .int L(P0Q2)-L(setPxQx) + .int L(P1Q2)-L(setPxQx) + .int L(P2Q2)-L(setPxQx) + .int L(P3Q2)-L(setPxQx) + .int L(P4Q2)-L(setPxQx) + .int L(P5Q2)-L(setPxQx) + .int L(P6Q2)-L(setPxQx) + .int L(P7Q2)-L(setPxQx) + + .int L(P0Q3)-L(setPxQx) + .int L(P1Q3)-L(setPxQx) + .int L(P2Q3)-L(setPxQx) + .int L(P3Q3)-L(setPxQx) + .int L(P4Q3)-L(setPxQx) + .int L(P5Q3)-L(setPxQx) + .int L(P6Q3)-L(setPxQx) + .int L(P7Q3)-L(setPxQx) + + .int L(P0Q4)-L(setPxQx) + .int L(P1Q4)-L(setPxQx) + .int L(P2Q4)-L(setPxQx) + .int L(P3Q4)-L(setPxQx) + .int L(P4Q4)-L(setPxQx) + .int L(P5Q4)-L(setPxQx) + .int L(P6Q4)-L(setPxQx) + .int L(P7Q4)-L(setPxQx) + + .int L(P0Q5)-L(setPxQx) + .int L(P1Q5)-L(setPxQx) + .int L(P2Q5)-L(setPxQx) + .int L(P3Q5)-L(setPxQx) + .int L(P4Q5)-L(setPxQx) + .int L(P5Q5)-L(setPxQx) + .int L(P6Q5)-L(setPxQx) + .int L(P7Q5)-L(setPxQx) + + .int L(P0Q6)-L(setPxQx) + .int L(P1Q6)-L(setPxQx) + .int L(P2Q6)-L(setPxQx) + .int L(P3Q6)-L(setPxQx) + .int L(P4Q6)-L(setPxQx) + .int L(P5Q6)-L(setPxQx) + .int L(P6Q6)-L(setPxQx) + .int L(P7Q6)-L(setPxQx) + + .int L(P0Q7)-L(setPxQx) + .int L(P1Q7)-L(setPxQx) + .int L(P2Q7)-L(setPxQx) + .int L(P3Q7)-L(setPxQx) + .int L(P4Q7)-L(setPxQx) + .int L(P5Q7)-L(setPxQx) + .int L(P6Q7)-L(setPxQx) + .int L(P7Q7)-L(setPxQx) + + .int L(P0Q8)-L(setPxQx) + .int L(P1Q8)-L(setPxQx) + .int L(P2Q8)-L(setPxQx) + .int L(P3Q8)-L(setPxQx) + .int L(P4Q8)-L(setPxQx) + .int L(P5Q8)-L(setPxQx) + .int L(P6Q8)-L(setPxQx) + .int L(P7Q8)-L(setPxQx) + + .int L(P0Q9)-L(setPxQx) + .int L(P1Q9)-L(setPxQx) + .int L(P2Q9)-L(setPxQx) + .int L(P3Q9)-L(setPxQx) + .int L(P4Q9)-L(setPxQx) + .int L(P5Q9)-L(setPxQx) + .int L(P6Q9)-L(setPxQx) + .int L(P7Q9)-L(setPxQx) + + .int L(P0QA)-L(setPxQx) + .int L(P1QA)-L(setPxQx) + .int L(P2QA)-L(setPxQx) + .int L(P3QA)-L(setPxQx) + .int L(P4QA)-L(setPxQx) + .int L(P5QA)-L(setPxQx) + .int L(P6QA)-L(setPxQx) + .int L(P7QA)-L(setPxQx) + + .int L(P0QB)-L(setPxQx) + .int L(P1QB)-L(setPxQx) + .int L(P2QB)-L(setPxQx) + .int L(P3QB)-L(setPxQx) + .int L(P4QB)-L(setPxQx) + .int L(P5QB)-L(setPxQx) + .int L(P6QB)-L(setPxQx) + .int L(P7QB)-L(setPxQx) + + .int L(P0QC)-L(setPxQx) + .int L(P1QC)-L(setPxQx) + .int L(P2QC)-L(setPxQx) + .int L(P3QC)-L(setPxQx) + .int L(P4QC)-L(setPxQx) + .int L(P5QC)-L(setPxQx) + .int L(P6QC)-L(setPxQx) + .int L(P7QC)-L(setPxQx) + + .int L(P0QD)-L(setPxQx) + .int L(P1QD)-L(setPxQx) + .int L(P2QD)-L(setPxQx) + .int L(P3QD)-L(setPxQx) + .int L(P4QD)-L(setPxQx) + .int L(P5QD)-L(setPxQx) + .int L(P6QD)-L(setPxQx) + .int L(P7QD)-L(setPxQx) + + .int L(P0QE)-L(setPxQx) + .int L(P1QE)-L(setPxQx) + .int L(P2QE)-L(setPxQx) + .int L(P3QE)-L(setPxQx) + .int L(P4QE)-L(setPxQx) + .int L(P5QE)-L(setPxQx) + .int L(P6QE)-L(setPxQx) + .int L(P7QE)-L(setPxQx) + + .int L(P0QF)-L(setPxQx) + .int L(P1QF)-L(setPxQx) + .int L(P2QF)-L(setPxQx) + .int L(P3QF)-L(setPxQx) + .int L(P4QF)-L(setPxQx) + .int L(P5QF)-L(setPxQx) + .int L(P6QF)-L(setPxQx) + .int L(P7QF)-L(setPxQx) + + .int L(P0QG)-L(setPxQx) + .int L(P1QG)-L(setPxQx) + .int L(P2QG)-L(setPxQx) + .int L(P3QG)-L(setPxQx) + .int L(P4QG)-L(setPxQx) + .int L(P5QG)-L(setPxQx) + .int L(P6QG)-L(setPxQx) + .int L(P7QG)-L(setPxQx) + + .int L(P0QH)-L(setPxQx) + .int L(P1QH)-L(setPxQx) + .int L(P2QH)-L(setPxQx) + .int L(P3QH)-L(setPxQx) + .int L(P4QH)-L(setPxQx) + .int L(P5QH)-L(setPxQx) + .int L(P6QH)-L(setPxQx) + .int L(P7QH)-L(setPxQx) + + .int L(P0QI)-L(setPxQx) +# ifdef USE_EXTRA_TABLE + .int L(P1QI)-L(setPxQx) + .int L(P2QI)-L(setPxQx) + .int L(P3QI)-L(setPxQx) + .int L(P4QI)-L(setPxQx) + .int L(P5QI)-L(setPxQx) + .int L(P6QI)-L(setPxQx) + .int L(P7QI)-L(setPxQx) +# endif +#endif + .popsection + + .balign 16 +#ifdef USE_EXTRA_TABLE +L(P1QI): mov %rdx,-0x91(%rdi) +#endif +L(P1QH): mov %rdx,-0x89(%rdi) +L(P1QG): mov %rdx,-0x81(%rdi) +# .balign 16 +L(P1QF): mov %rdx,-0x79(%rdi) +L(P1QE): mov %rdx,-0x71(%rdi) +L(P1QD): mov %rdx,-0x69(%rdi) +L(P1QC): mov %rdx,-0x61(%rdi) +L(P1QB): mov %rdx,-0x59(%rdi) +L(P1QA): mov %rdx,-0x51(%rdi) +L(P1Q9): mov %rdx,-0x49(%rdi) +L(P1Q8): mov %rdx,-0x41(%rdi) +L(P1Q7): mov %rdx,-0x39(%rdi) +L(P1Q6): mov %rdx,-0x31(%rdi) +L(P1Q5): mov %rdx,-0x29(%rdi) +L(P1Q4): mov %rdx,-0x21(%rdi) +L(P1Q3): mov %rdx,-0x19(%rdi) +L(P1Q2): mov %rdx,-0x11(%rdi) +L(P1Q1): mov %rdx,-0x9(%rdi) +L(P1Q0): mov %dl,-0x1(%rdi) + retq + + .balign 16 +L(P0QI): mov %rdx,-0x90(%rdi) +L(P0QH): mov %rdx,-0x88(%rdi) +# .balign 16 +L(P0QG): mov %rdx,-0x80(%rdi) +L(P0QF): mov %rdx,-0x78(%rdi) +L(P0QE): mov %rdx,-0x70(%rdi) +L(P0QD): mov %rdx,-0x68(%rdi) +L(P0QC): mov %rdx,-0x60(%rdi) +L(P0QB): mov %rdx,-0x58(%rdi) +L(P0QA): mov %rdx,-0x50(%rdi) +L(P0Q9): mov %rdx,-0x48(%rdi) +L(P0Q8): mov %rdx,-0x40(%rdi) +L(P0Q7): mov %rdx,-0x38(%rdi) +L(P0Q6): mov %rdx,-0x30(%rdi) +L(P0Q5): mov %rdx,-0x28(%rdi) +L(P0Q4): mov %rdx,-0x20(%rdi) +L(P0Q3): mov %rdx,-0x18(%rdi) +L(P0Q2): mov %rdx,-0x10(%rdi) +L(P0Q1): mov %rdx,-0x8(%rdi) +L(P0Q0): retq + + + .balign 16 +#ifdef USE_EXTRA_TABLE +L(P2QI): mov %rdx,-0x92(%rdi) +#endif +L(P2QH): mov %rdx,-0x8a(%rdi) +L(P2QG): mov %rdx,-0x82(%rdi) +# .balign 16 +L(P2QF): mov %rdx,-0x7a(%rdi) +L(P2QE): mov %rdx,-0x72(%rdi) +L(P2QD): mov %rdx,-0x6a(%rdi) +L(P2QC): mov %rdx,-0x62(%rdi) +L(P2QB): mov %rdx,-0x5a(%rdi) +L(P2QA): mov %rdx,-0x52(%rdi) +L(P2Q9): mov %rdx,-0x4a(%rdi) +L(P2Q8): mov %rdx,-0x42(%rdi) +L(P2Q7): mov %rdx,-0x3a(%rdi) +L(P2Q6): mov %rdx,-0x32(%rdi) +L(P2Q5): mov %rdx,-0x2a(%rdi) +L(P2Q4): mov %rdx,-0x22(%rdi) +L(P2Q3): mov %rdx,-0x1a(%rdi) +L(P2Q2): mov %rdx,-0x12(%rdi) +L(P2Q1): mov %rdx,-0xa(%rdi) +L(P2Q0): mov %dx,-0x2(%rdi) + retq + + .balign 16 +#ifdef USE_EXTRA_TABLE +L(P3QI): mov %rdx,-0x93(%rdi) +#endif +L(P3QH): mov %rdx,-0x8b(%rdi) +L(P3QG): mov %rdx,-0x83(%rdi) +# .balign 16 +L(P3QF): mov %rdx,-0x7b(%rdi) +L(P3QE): mov %rdx,-0x73(%rdi) +L(P3QD): mov %rdx,-0x6b(%rdi) +L(P3QC): mov %rdx,-0x63(%rdi) +L(P3QB): mov %rdx,-0x5b(%rdi) +L(P3QA): mov %rdx,-0x53(%rdi) +L(P3Q9): mov %rdx,-0x4b(%rdi) +L(P3Q8): mov %rdx,-0x43(%rdi) +L(P3Q7): mov %rdx,-0x3b(%rdi) +L(P3Q6): mov %rdx,-0x33(%rdi) +L(P3Q5): mov %rdx,-0x2b(%rdi) +L(P3Q4): mov %rdx,-0x23(%rdi) +L(P3Q3): mov %rdx,-0x1b(%rdi) +L(P3Q2): mov %rdx,-0x13(%rdi) +L(P3Q1): mov %rdx,-0xb(%rdi) +L(P3Q0): mov %dx,-0x3(%rdi) + mov %dl,-0x1(%rdi) + retq + + .balign 16 +#ifdef USE_EXTRA_TABLE +L(P4QI): mov %rdx,-0x94(%rdi) +#endif +L(P4QH): mov %rdx,-0x8c(%rdi) +L(P4QG): mov %rdx,-0x84(%rdi) +# .balign 16 +L(P4QF): mov %rdx,-0x7c(%rdi) +L(P4QE): mov %rdx,-0x74(%rdi) +L(P4QD): mov %rdx,-0x6c(%rdi) +L(P4QC): mov %rdx,-0x64(%rdi) +L(P4QB): mov %rdx,-0x5c(%rdi) +L(P4QA): mov %rdx,-0x54(%rdi) +L(P4Q9): mov %rdx,-0x4c(%rdi) +L(P4Q8): mov %rdx,-0x44(%rdi) +L(P4Q7): mov %rdx,-0x3c(%rdi) +L(P4Q6): mov %rdx,-0x34(%rdi) +L(P4Q5): mov %rdx,-0x2c(%rdi) +L(P4Q4): mov %rdx,-0x24(%rdi) +L(P4Q3): mov %rdx,-0x1c(%rdi) +L(P4Q2): mov %rdx,-0x14(%rdi) +L(P4Q1): mov %rdx,-0xc(%rdi) +L(P4Q0): mov %edx,-0x4(%rdi) + retq + + .balign 16 +#if defined(USE_EXTRA_TABLE) +L(P5QI): mov %rdx,-0x95(%rdi) +#endif +L(P5QH): mov %rdx,-0x8d(%rdi) +L(P5QG): mov %rdx,-0x85(%rdi) +# .balign 16 +L(P5QF): mov %rdx,-0x7d(%rdi) +L(P5QE): mov %rdx,-0x75(%rdi) +L(P5QD): mov %rdx,-0x6d(%rdi) +L(P5QC): mov %rdx,-0x65(%rdi) +L(P5QB): mov %rdx,-0x5d(%rdi) +L(P5QA): mov %rdx,-0x55(%rdi) +L(P5Q9): mov %rdx,-0x4d(%rdi) +L(P5Q8): mov %rdx,-0x45(%rdi) +L(P5Q7): mov %rdx,-0x3d(%rdi) +L(P5Q6): mov %rdx,-0x35(%rdi) +L(P5Q5): mov %rdx,-0x2d(%rdi) +L(P5Q4): mov %rdx,-0x25(%rdi) +L(P5Q3): mov %rdx,-0x1d(%rdi) +L(P5Q2): mov %rdx,-0x15(%rdi) +L(P5Q1): mov %rdx,-0xd(%rdi) +L(P5Q0): mov %edx,-0x5(%rdi) + mov %dl,-0x1(%rdi) + retq + + .balign 16 +#ifdef USE_EXTRA_TABLE +L(P6QI): mov %rdx,-0x96(%rdi) +#endif +L(P6QH): mov %rdx,-0x8e(%rdi) +L(P6QG): mov %rdx,-0x86(%rdi) +# .balign 16 +L(P6QF): mov %rdx,-0x7e(%rdi) +L(P6QE): mov %rdx,-0x76(%rdi) +L(P6QD): mov %rdx,-0x6e(%rdi) +L(P6QC): mov %rdx,-0x66(%rdi) +L(P6QB): mov %rdx,-0x5e(%rdi) +L(P6QA): mov %rdx,-0x56(%rdi) +L(P6Q9): mov %rdx,-0x4e(%rdi) +L(P6Q8): mov %rdx,-0x46(%rdi) +L(P6Q7): mov %rdx,-0x3e(%rdi) +L(P6Q6): mov %rdx,-0x36(%rdi) +L(P6Q5): mov %rdx,-0x2e(%rdi) +L(P6Q4): mov %rdx,-0x26(%rdi) +L(P6Q3): mov %rdx,-0x1e(%rdi) +L(P6Q2): mov %rdx,-0x16(%rdi) +L(P6Q1): mov %rdx,-0xe(%rdi) +L(P6Q0): mov %edx,-0x6(%rdi) + mov %dx,-0x2(%rdi) + retq + + .balign 16 +#ifdef USE_EXTRA_TABLE +L(P7QI): mov %rdx,-0x97(%rdi) +#endif +L(P7QH): mov %rdx,-0x8f(%rdi) +L(P7QG): mov %rdx,-0x87(%rdi) +# .balign 16 +L(P7QF): mov %rdx,-0x7f(%rdi) +L(P7QE): mov %rdx,-0x77(%rdi) +L(P7QD): mov %rdx,-0x6f(%rdi) +L(P7QC): mov %rdx,-0x67(%rdi) +L(P7QB): mov %rdx,-0x5f(%rdi) +L(P7QA): mov %rdx,-0x57(%rdi) +L(P7Q9): mov %rdx,-0x4f(%rdi) +L(P7Q8): mov %rdx,-0x47(%rdi) +L(P7Q7): mov %rdx,-0x3f(%rdi) +L(P7Q6): mov %rdx,-0x37(%rdi) +L(P7Q5): mov %rdx,-0x2f(%rdi) +L(P7Q4): mov %rdx,-0x27(%rdi) +L(P7Q3): mov %rdx,-0x1f(%rdi) +L(P7Q2): mov %rdx,-0x17(%rdi) +L(P7Q1): mov %rdx,-0xf(%rdi) +L(P7Q0): mov %edx,-0x7(%rdi) + mov %dx,-0x3(%rdi) + mov %dl,-0x1(%rdi) + retq + + .balign 16 +L(ck_mem_ops_method): + +# align to 16 byte boundary first + #test $0xf,%rdi + #jz L(aligned_now) + lea L(AliPxQx)(%rip),%r11 + mov $0x10,%r10 + mov %rdi,%r9 + and $0xf,%r9 + sub %r9,%r10 + and $0xf,%r10 + add %r10,%rdi + sub %r10,%r8 +#ifndef PIC + jmpq *(%r11,%r10,8) +#else + movslq (%r11,%r10,4),%rcx + lea (%rcx,%r11,1),%r11 + jmpq *%r11 +#endif + + .pushsection .rodata + .balign 16 +#ifndef PIC +L(AliPxQx): + .quad L(aligned_now), L(A1Q0), L(A2Q0), L(A3Q0) + .quad L(A4Q0), L(A5Q0), L(A6Q0), L(A7Q0) + .quad L(A0Q1), L(A1Q1), L(A2Q1), L(A3Q1) + .quad L(A4Q1), L(A5Q1), L(A6Q1), L(A7Q1) +#else +L(AliPxQx): + .int L(aligned_now)-L(AliPxQx) + .int L(A1Q0)-L(AliPxQx) + .int L(A2Q0)-L(AliPxQx) + .int L(A3Q0)-L(AliPxQx) + .int L(A4Q0)-L(AliPxQx) + .int L(A5Q0)-L(AliPxQx) + .int L(A6Q0)-L(AliPxQx) + .int L(A7Q0)-L(AliPxQx) + + .int L(A0Q1)-L(AliPxQx) + .int L(A1Q1)-L(AliPxQx) + .int L(A2Q1)-L(AliPxQx) + .int L(A3Q1)-L(AliPxQx) + .int L(A4Q1)-L(AliPxQx) + .int L(A5Q1)-L(AliPxQx) + .int L(A6Q1)-L(AliPxQx) + .int L(A7Q1)-L(AliPxQx) +#endif + .popsection + + .balign 16 +L(A5Q1): mov %dl,-0xd(%rdi) +L(A4Q1): mov %edx,-0xc(%rdi) +L(A0Q1): mov %rdx,-0x8(%rdi) +L(A0Q0): jmp L(aligned_now) + + .balign 16 +L(A1Q1): mov %dl,-0x9(%rdi) + mov %rdx,-0x8(%rdi) + jmp L(aligned_now) + + .balign 16 +L(A1Q0): mov %dl,-0x1(%rdi) + jmp L(aligned_now) + + .balign 16 +L(A3Q1): mov %dl,-0xb(%rdi) +L(A2Q1): mov %dx,-0xa(%rdi) + mov %rdx,-0x8(%rdi) + jmp L(aligned_now) + + .balign 16 +L(A3Q0): mov %dl,-0x3(%rdi) +L(A2Q0): mov %dx,-0x2(%rdi) + jmp L(aligned_now) + + .balign 16 +L(A5Q0): mov %dl,-0x5(%rdi) +L(A4Q0): mov %edx,-0x4(%rdi) + jmp L(aligned_now) + + .balign 16 +L(A7Q1): mov %dl,-0xf(%rdi) +L(A6Q1): mov %dx,-0xe(%rdi) + mov %edx,-0xc(%rdi) + mov %rdx,-0x8(%rdi) + jmp L(aligned_now) + + .balign 16 +L(A7Q0): mov %dl,-0x7(%rdi) +L(A6Q0): mov %dx,-0x6(%rdi) + mov %edx,-0x4(%rdi) + jmp L(aligned_now) + + .balign 16 +L(aligned_now): + + cmpl $0x1,__x86_64_preferred_memory_instruction(%rip) + jg L(SSE_pre) + +L(8byte_move_try): + cmpq __STOS_LOWER_BOUNDARY,%r8 + jae L(8byte_stos_try) + + .balign 16 +L(8byte_move): + movq %r8,%rcx + shrq $7,%rcx + jz L(8byte_move_skip) .p2align 4 -3: /* Copy 64 bytes. */ - mov %r8,(%rcx) - mov %r8,0x8(%rcx) - mov %r8,0x10(%rcx) - mov %r8,0x18(%rcx) - mov %r8,0x20(%rcx) - mov %r8,0x28(%rcx) - mov %r8,0x30(%rcx) - mov %r8,0x38(%rcx) - add $0x40,%rcx - dec %rax - jne 3b - -4: /* Copy final bytes. */ - and $0x3f,%edx - mov %rdx,%rax - shr $0x3,%rax - je 6f - -5: /* First in chunks of 8 bytes. */ - mov %r8,(%rcx) - add $0x8,%rcx - dec %rax - jne 5b -6: - and $0x7,%edx -7: - test %rdx,%rdx - je 9f -8: /* And finally as bytes (up to 7). */ - mov %sil,(%rcx) - inc %rcx - dec %rdx - jne 8b -9: - /* Load result (only if used as memset). */ - mov %rdi,%rax /* start address of destination is result */ + +L(8byte_move_loop): + decq %rcx + + movq %rdx, (%rdi) + movq %rdx, 8 (%rdi) + movq %rdx, 16 (%rdi) + movq %rdx, 24 (%rdi) + movq %rdx, 32 (%rdi) + movq %rdx, 40 (%rdi) + movq %rdx, 48 (%rdi) + movq %rdx, 56 (%rdi) + movq %rdx, 64 (%rdi) + movq %rdx, 72 (%rdi) + movq %rdx, 80 (%rdi) + movq %rdx, 88 (%rdi) + movq %rdx, 96 (%rdi) + movq %rdx, 104 (%rdi) + movq %rdx, 112 (%rdi) + movq %rdx, 120 (%rdi) + + leaq 128 (%rdi),%rdi + + jnz L(8byte_move_loop) + +L(8byte_move_skip): + andl $127,%r8d + lea (%rdi,%r8,1),%rdi + lea L(setPxQx)(%rip),%r11 + +#ifndef PIC + jmpq *(%r11,%r8,8) # old scheme remained for nonPIC +#else + movslq (%r11,%r8,4),%rcx + lea (%rcx,%r11,1),%r11 + jmpq *%r11 +#endif + + .balign 16 +L(8byte_stos_try): + mov __x86_64_shared_cache_size(%rip),%r9d // ck largest cache size + cmpq %r8,%r9 // calculate the lesser of remaining + cmovaq %r8,%r9 // bytes and largest cache size + jbe L(8byte_stos) + +L(8byte_move_reuse_try): + cmp __STOS_UPPER_BOUNDARY,%r8 + jae L(8byte_move) + + .balign 16 +L(8byte_stos): + movq %r9,%rcx + andq $-8,%r9 + + shrq $3,%rcx + jz L(8byte_stos_skip) + + xchgq %rax,%rdx + + rep + stosq + + xchgq %rax,%rdx + +L(8byte_stos_skip): + subq %r9,%r8 + ja L(8byte_nt_move) + + andl $7,%r8d + lea (%rdi,%r8,1),%rdi + lea L(setPxQx)(%rip),%r11 +#ifndef PIC + jmpq *(%r11,%r8,8) # old scheme remained for nonPIC +#else + movslq (%r11,%r8,4),%rcx + lea (%rcx,%r11,1),%r11 + jmpq *%r11 +#endif + + .balign 16 +L(8byte_nt_move): + movq %r8,%rcx + shrq $7,%rcx + jz L(8byte_nt_move_skip) + + .balign 16 +L(8byte_nt_move_loop): + decq %rcx + + movntiq %rdx, (%rdi) + movntiq %rdx, 8 (%rdi) + movntiq %rdx, 16 (%rdi) + movntiq %rdx, 24 (%rdi) + movntiq %rdx, 32 (%rdi) + movntiq %rdx, 40 (%rdi) + movntiq %rdx, 48 (%rdi) + movntiq %rdx, 56 (%rdi) + movntiq %rdx, 64 (%rdi) + movntiq %rdx, 72 (%rdi) + movntiq %rdx, 80 (%rdi) + movntiq %rdx, 88 (%rdi) + movntiq %rdx, 96 (%rdi) + movntiq %rdx, 104 (%rdi) + movntiq %rdx, 112 (%rdi) + movntiq %rdx, 120 (%rdi) + + leaq 128 (%rdi),%rdi + + jnz L(8byte_nt_move_loop) + + sfence + +L(8byte_nt_move_skip): + andl $127,%r8d + + lea (%rdi,%r8,1),%rdi + lea L(setPxQx)(%rip),%r11 +#ifndef PIC + jmpq *(%r11,%r8,8) # old scheme remained for nonPIC +#else + movslq (%r11,%r8,4),%rcx + lea (%rcx,%r11,1),%r11 + jmpq *%r11 +#endif + +L(SSE_pre): + # fill RegXMM0 with the pattern + movd %rdx,%xmm0 + punpcklqdq %xmm0,%xmm0 + + lea L(SSExDx)(%rip),%r9 # for later after the alignment + cmp $0xb0,%r8 # 176 + jge L(byte32sse2_pre) + + add %r8,%rdi +#ifndef PIC + jmpq *(%r9,%r8,8) +#else + movslq (%r9,%r8,4),%rcx + lea (%rcx,%r9,1),%r9 + jmpq *%r9 +#endif + +L(SSE0QB): movdqa %xmm0,-0xb0(%rdi) +L(SSE0QA): movdqa %xmm0,-0xa0(%rdi) +L(SSE0Q9): movdqa %xmm0,-0x90(%rdi) +L(SSE0Q8): movdqa %xmm0,-0x80(%rdi) +L(SSE0Q7): movdqa %xmm0,-0x70(%rdi) +L(SSE0Q6): movdqa %xmm0,-0x60(%rdi) +L(SSE0Q5): movdqa %xmm0,-0x50(%rdi) +L(SSE0Q4): movdqa %xmm0,-0x40(%rdi) +L(SSE0Q3): movdqa %xmm0,-0x30(%rdi) +L(SSE0Q2): movdqa %xmm0,-0x20(%rdi) +L(SSE0Q1): movdqa %xmm0,-0x10(%rdi) +L(SSE0Q0): retq + +L(SSE1QB): movdqa %xmm0,-0xb1(%rdi) +L(SSE1QA): movdqa %xmm0,-0xa1(%rdi) +L(SSE1Q9): movdqa %xmm0,-0x91(%rdi) +L(SSE1Q8): movdqa %xmm0,-0x81(%rdi) +L(SSE1Q7): movdqa %xmm0,-0x71(%rdi) +L(SSE1Q6): movdqa %xmm0,-0x61(%rdi) +L(SSE1Q5): movdqa %xmm0,-0x51(%rdi) +L(SSE1Q4): movdqa %xmm0,-0x41(%rdi) +L(SSE1Q3): movdqa %xmm0,-0x31(%rdi) +L(SSE1Q2): movdqa %xmm0,-0x21(%rdi) +L(SSE1Q1): movdqa %xmm0,-0x11(%rdi) +L(SSE1Q0): mov %dl,-0x1(%rdi) retq - .p2align 4 -11: /* Copy 64 bytes without polluting the cache. */ - /* We could use movntdq %xmm0,(%rcx) here to further - speed up for large cases but let's not use XMM registers. */ - movnti %r8,(%rcx) - movnti %r8,0x8(%rcx) - movnti %r8,0x10(%rcx) - movnti %r8,0x18(%rcx) - movnti %r8,0x20(%rcx) - movnti %r8,0x28(%rcx) - movnti %r8,0x30(%rcx) - movnti %r8,0x38(%rcx) - add $0x40,%rcx - dec %rax - jne 11b +L(SSE2QB): movdqa %xmm0,-0xb2(%rdi) +L(SSE2QA): movdqa %xmm0,-0xa2(%rdi) +L(SSE2Q9): movdqa %xmm0,-0x92(%rdi) +L(SSE2Q8): movdqa %xmm0,-0x82(%rdi) +L(SSE2Q7): movdqa %xmm0,-0x72(%rdi) +L(SSE2Q6): movdqa %xmm0,-0x62(%rdi) +L(SSE2Q5): movdqa %xmm0,-0x52(%rdi) +L(SSE2Q4): movdqa %xmm0,-0x42(%rdi) +L(SSE2Q3): movdqa %xmm0,-0x32(%rdi) +L(SSE2Q2): movdqa %xmm0,-0x22(%rdi) +L(SSE2Q1): movdqa %xmm0,-0x12(%rdi) +L(SSE2Q0): mov %dx,-0x2(%rdi) + retq + +L(SSE3QB): movdqa %xmm0,-0xb3(%rdi) +L(SSE3QA): movdqa %xmm0,-0xa3(%rdi) +L(SSE3Q9): movdqa %xmm0,-0x93(%rdi) +L(SSE3Q8): movdqa %xmm0,-0x83(%rdi) +L(SSE3Q7): movdqa %xmm0,-0x73(%rdi) +L(SSE3Q6): movdqa %xmm0,-0x63(%rdi) +L(SSE3Q5): movdqa %xmm0,-0x53(%rdi) +L(SSE3Q4): movdqa %xmm0,-0x43(%rdi) +L(SSE3Q3): movdqa %xmm0,-0x33(%rdi) +L(SSE3Q2): movdqa %xmm0,-0x23(%rdi) +L(SSE3Q1): movdqa %xmm0,-0x13(%rdi) +L(SSE3Q0): mov %dx,-0x3(%rdi) + mov %dl,-0x1(%rdi) + retq + +L(SSE4QB): movdqa %xmm0,-0xb4(%rdi) +L(SSE4QA): movdqa %xmm0,-0xa4(%rdi) +L(SSE4Q9): movdqa %xmm0,-0x94(%rdi) +L(SSE4Q8): movdqa %xmm0,-0x84(%rdi) +L(SSE4Q7): movdqa %xmm0,-0x74(%rdi) +L(SSE4Q6): movdqa %xmm0,-0x64(%rdi) +L(SSE4Q5): movdqa %xmm0,-0x54(%rdi) +L(SSE4Q4): movdqa %xmm0,-0x44(%rdi) +L(SSE4Q3): movdqa %xmm0,-0x34(%rdi) +L(SSE4Q2): movdqa %xmm0,-0x24(%rdi) +L(SSE4Q1): movdqa %xmm0,-0x14(%rdi) +L(SSE4Q0): mov %edx,-0x4(%rdi) + retq + +L(SSE5QB): movdqa %xmm0,-0xb5(%rdi) +L(SSE5QA): movdqa %xmm0,-0xa5(%rdi) +L(SSE5Q9): movdqa %xmm0,-0x95(%rdi) +L(SSE5Q8): movdqa %xmm0,-0x85(%rdi) +L(SSE5Q7): movdqa %xmm0,-0x75(%rdi) +L(SSE5Q6): movdqa %xmm0,-0x65(%rdi) +L(SSE5Q5): movdqa %xmm0,-0x55(%rdi) +L(SSE5Q4): movdqa %xmm0,-0x45(%rdi) +L(SSE5Q3): movdqa %xmm0,-0x35(%rdi) +L(SSE5Q2): movdqa %xmm0,-0x25(%rdi) +L(SSE5Q1): movdqa %xmm0,-0x15(%rdi) +L(SSE5Q0): mov %edx,-0x5(%rdi) + mov %dl,-0x1(%rdi) + retq + + +L(SSE6QB): movdqa %xmm0,-0xb6(%rdi) +L(SSE6QA): movdqa %xmm0,-0xa6(%rdi) +L(SSE6Q9): movdqa %xmm0,-0x96(%rdi) +L(SSE6Q8): movdqa %xmm0,-0x86(%rdi) +L(SSE6Q7): movdqa %xmm0,-0x76(%rdi) +L(SSE6Q6): movdqa %xmm0,-0x66(%rdi) +L(SSE6Q5): movdqa %xmm0,-0x56(%rdi) +L(SSE6Q4): movdqa %xmm0,-0x46(%rdi) +L(SSE6Q3): movdqa %xmm0,-0x36(%rdi) +L(SSE6Q2): movdqa %xmm0,-0x26(%rdi) +L(SSE6Q1): movdqa %xmm0,-0x16(%rdi) +L(SSE6Q0): mov %edx,-0x6(%rdi) + mov %dx,-0x2(%rdi) + retq + +L(SSE7QB): movdqa %xmm0,-0xb7(%rdi) +L(SSE7QA): movdqa %xmm0,-0xa7(%rdi) +L(SSE7Q9): movdqa %xmm0,-0x97(%rdi) +L(SSE7Q8): movdqa %xmm0,-0x87(%rdi) +L(SSE7Q7): movdqa %xmm0,-0x77(%rdi) +L(SSE7Q6): movdqa %xmm0,-0x67(%rdi) +L(SSE7Q5): movdqa %xmm0,-0x57(%rdi) +L(SSE7Q4): movdqa %xmm0,-0x47(%rdi) +L(SSE7Q3): movdqa %xmm0,-0x37(%rdi) +L(SSE7Q2): movdqa %xmm0,-0x27(%rdi) +L(SSE7Q1): movdqa %xmm0,-0x17(%rdi) +L(SSE7Q0): mov %edx,-0x7(%rdi) + mov %dx,-0x3(%rdi) + mov %dl,-0x1(%rdi) + retq + +L(SSE8QB): movdqa %xmm0,-0xb8(%rdi) +L(SSE8QA): movdqa %xmm0,-0xa8(%rdi) +L(SSE8Q9): movdqa %xmm0,-0x98(%rdi) +L(SSE8Q8): movdqa %xmm0,-0x88(%rdi) +L(SSE8Q7): movdqa %xmm0,-0x78(%rdi) +L(SSE8Q6): movdqa %xmm0,-0x68(%rdi) +L(SSE8Q5): movdqa %xmm0,-0x58(%rdi) +L(SSE8Q4): movdqa %xmm0,-0x48(%rdi) +L(SSE8Q3): movdqa %xmm0,-0x38(%rdi) +L(SSE8Q2): movdqa %xmm0,-0x28(%rdi) +L(SSE8Q1): movdqa %xmm0,-0x18(%rdi) +L(SSE8Q0): mov %rdx,-0x8(%rdi) + retq + +L(SSE9QB): movdqa %xmm0,-0xb9(%rdi) +L(SSE9QA): movdqa %xmm0,-0xa9(%rdi) +L(SSE9Q9): movdqa %xmm0,-0x99(%rdi) +L(SSE9Q8): movdqa %xmm0,-0x89(%rdi) +L(SSE9Q7): movdqa %xmm0,-0x79(%rdi) +L(SSE9Q6): movdqa %xmm0,-0x69(%rdi) +L(SSE9Q5): movdqa %xmm0,-0x59(%rdi) +L(SSE9Q4): movdqa %xmm0,-0x49(%rdi) +L(SSE9Q3): movdqa %xmm0,-0x39(%rdi) +L(SSE9Q2): movdqa %xmm0,-0x29(%rdi) +L(SSE9Q1): movdqa %xmm0,-0x19(%rdi) +L(SSE9Q0): mov %rdx,-0x9(%rdi) + mov %dl,-0x1(%rdi) + retq + +L(SSE10QB): movdqa %xmm0,-0xba(%rdi) +L(SSE10QA): movdqa %xmm0,-0xaa(%rdi) +L(SSE10Q9): movdqa %xmm0,-0x9a(%rdi) +L(SSE10Q8): movdqa %xmm0,-0x8a(%rdi) +L(SSE10Q7): movdqa %xmm0,-0x7a(%rdi) +L(SSE10Q6): movdqa %xmm0,-0x6a(%rdi) +L(SSE10Q5): movdqa %xmm0,-0x5a(%rdi) +L(SSE10Q4): movdqa %xmm0,-0x4a(%rdi) +L(SSE10Q3): movdqa %xmm0,-0x3a(%rdi) +L(SSE10Q2): movdqa %xmm0,-0x2a(%rdi) +L(SSE10Q1): movdqa %xmm0,-0x1a(%rdi) +L(SSE10Q0): mov %rdx,-0xa(%rdi) + mov %dx,-0x2(%rdi) + retq + +L(SSE11QB): movdqa %xmm0,-0xbb(%rdi) +L(SSE11QA): movdqa %xmm0,-0xab(%rdi) +L(SSE11Q9): movdqa %xmm0,-0x9b(%rdi) +L(SSE11Q8): movdqa %xmm0,-0x8b(%rdi) +L(SSE11Q7): movdqa %xmm0,-0x7b(%rdi) +L(SSE11Q6): movdqa %xmm0,-0x6b(%rdi) +L(SSE11Q5): movdqa %xmm0,-0x5b(%rdi) +L(SSE11Q4): movdqa %xmm0,-0x4b(%rdi) +L(SSE11Q3): movdqa %xmm0,-0x3b(%rdi) +L(SSE11Q2): movdqa %xmm0,-0x2b(%rdi) +L(SSE11Q1): movdqa %xmm0,-0x1b(%rdi) +L(SSE11Q0): mov %rdx,-0xb(%rdi) + mov %dx,-0x3(%rdi) + mov %dl,-0x1(%rdi) + retq + +L(SSE12QB): movdqa %xmm0,-0xbc(%rdi) +L(SSE12QA): movdqa %xmm0,-0xac(%rdi) +L(SSE12Q9): movdqa %xmm0,-0x9c(%rdi) +L(SSE12Q8): movdqa %xmm0,-0x8c(%rdi) +L(SSE12Q7): movdqa %xmm0,-0x7c(%rdi) +L(SSE12Q6): movdqa %xmm0,-0x6c(%rdi) +L(SSE12Q5): movdqa %xmm0,-0x5c(%rdi) +L(SSE12Q4): movdqa %xmm0,-0x4c(%rdi) +L(SSE12Q3): movdqa %xmm0,-0x3c(%rdi) +L(SSE12Q2): movdqa %xmm0,-0x2c(%rdi) +L(SSE12Q1): movdqa %xmm0,-0x1c(%rdi) +L(SSE12Q0): mov %rdx,-0xc(%rdi) + mov %edx,-0x4(%rdi) + retq + +L(SSE13QB): movdqa %xmm0,-0xbd(%rdi) +L(SSE13QA): movdqa %xmm0,-0xad(%rdi) +L(SSE13Q9): movdqa %xmm0,-0x9d(%rdi) +L(SSE13Q8): movdqa %xmm0,-0x8d(%rdi) +L(SSE13Q7): movdqa %xmm0,-0x7d(%rdi) +L(SSE13Q6): movdqa %xmm0,-0x6d(%rdi) +L(SSE13Q5): movdqa %xmm0,-0x5d(%rdi) +L(SSE13Q4): movdqa %xmm0,-0x4d(%rdi) +L(SSE13Q3): movdqa %xmm0,-0x3d(%rdi) +L(SSE13Q2): movdqa %xmm0,-0x2d(%rdi) +L(SSE13Q1): movdqa %xmm0,-0x1d(%rdi) +L(SSE13Q0): mov %rdx,-0xd(%rdi) + mov %edx,-0x5(%rdi) + mov %dl,-0x1(%rdi) + retq + +L(SSE14QB): movdqa %xmm0,-0xbe(%rdi) +L(SSE14QA): movdqa %xmm0,-0xae(%rdi) +L(SSE14Q9): movdqa %xmm0,-0x9e(%rdi) +L(SSE14Q8): movdqa %xmm0,-0x8e(%rdi) +L(SSE14Q7): movdqa %xmm0,-0x7e(%rdi) +L(SSE14Q6): movdqa %xmm0,-0x6e(%rdi) +L(SSE14Q5): movdqa %xmm0,-0x5e(%rdi) +L(SSE14Q4): movdqa %xmm0,-0x4e(%rdi) +L(SSE14Q3): movdqa %xmm0,-0x3e(%rdi) +L(SSE14Q2): movdqa %xmm0,-0x2e(%rdi) +L(SSE14Q1): movdqa %xmm0,-0x1e(%rdi) +L(SSE14Q0): mov %rdx,-0xe(%rdi) + mov %edx,-0x6(%rdi) + mov %dx,-0x2(%rdi) + retq + +L(SSE15QB): movdqa %xmm0,-0xbf(%rdi) +L(SSE15QA): movdqa %xmm0,-0xaf(%rdi) +L(SSE15Q9): movdqa %xmm0,-0x9f(%rdi) +L(SSE15Q8): movdqa %xmm0,-0x8f(%rdi) +L(SSE15Q7): movdqa %xmm0,-0x7f(%rdi) +L(SSE15Q6): movdqa %xmm0,-0x6f(%rdi) +L(SSE15Q5): movdqa %xmm0,-0x5f(%rdi) +L(SSE15Q4): movdqa %xmm0,-0x4f(%rdi) +L(SSE15Q3): movdqa %xmm0,-0x3f(%rdi) +L(SSE15Q2): movdqa %xmm0,-0x2f(%rdi) +L(SSE15Q1): movdqa %xmm0,-0x1f(%rdi) +L(SSE15Q0): mov %rdx,-0xf(%rdi) + mov %edx,-0x7(%rdi) + mov %dx,-0x3(%rdi) + mov %dl,-0x1(%rdi) + retq + + .balign 16 +L(byte32sse2_pre): + + mov __x86_64_shared_cache_size(%rip),%r9d # The largest cache size + cmp %r9,%r8 + jg L(sse2_nt_move_pre) + #jmp L(byte32sse2) + .balign 16 +L(byte32sse2): + lea -0x80(%r8),%r8 # 128 + cmp $0x80,%r8 # 128 + movdqa %xmm0,(%rdi) + movdqa %xmm0,0x10(%rdi) + movdqa %xmm0,0x20(%rdi) + movdqa %xmm0,0x30(%rdi) + movdqa %xmm0,0x40(%rdi) + movdqa %xmm0,0x50(%rdi) + movdqa %xmm0,0x60(%rdi) + movdqa %xmm0,0x70(%rdi) + + lea 0x80(%rdi),%rdi + jge L(byte32sse2) + lea L(SSExDx)(%rip),%r11 + add %r8,%rdi +#ifndef PIC + jmpq *(%r11,%r8,8) +#else + movslq (%r11,%r8,4),%rcx + lea (%rcx,%r11,1),%r11 + jmpq *%r11 +#endif + + .balign 16 +L(sse2_nt_move_pre): + cmp $0x0,%r9 + je L(byte32sse2) + jmp L(sse2_nt_move) + + .balign 16 +L(sse2_nt_move): + lea -0x80(%r8),%r8 + cmp $0x80,%r8 + + movntdq %xmm0,(%rdi) + movntdq %xmm0,0x10(%rdi) + movntdq %xmm0,0x20(%rdi) + movntdq %xmm0,0x30(%rdi) + movntdq %xmm0,0x40(%rdi) + movntdq %xmm0,0x50(%rdi) + movntdq %xmm0,0x60(%rdi) + movntdq %xmm0,0x70(%rdi) + + lea 0x80(%rdi),%rdi + jge L(sse2_nt_move) + lea L(SSExDx)(%rip),%r11 sfence - jmp 4b + add %r8,%rdi +#ifndef PIC + jmpq *(%r11,%r8,8) +#else + movslq (%r11,%r8,4),%rcx + lea (%rcx,%r11,1),%r11 + jmpq *%r11 +#endif + + .pushsection .rodata + .balign 16 +#ifndef PIC +L(SSExDx): + .quad L(SSE0Q0), L(SSE1Q0), L(SSE2Q0), L(SSE3Q0) + .quad L(SSE4Q0), L(SSE5Q0), L(SSE6Q0), L(SSE7Q0) + .quad L(SSE8Q0), L(SSE9Q0), L(SSE10Q0), L(SSE11Q0) + .quad L(SSE12Q0), L(SSE13Q0), L(SSE14Q0), L(SSE15Q0) + .quad L(SSE0Q1), L(SSE1Q1), L(SSE2Q1), L(SSE3Q1) + .quad L(SSE4Q1), L(SSE5Q1), L(SSE6Q1), L(SSE7Q1) + .quad L(SSE8Q1), L(SSE9Q1), L(SSE10Q1), L(SSE11Q1) + .quad L(SSE12Q1), L(SSE13Q1), L(SSE14Q1), L(SSE15Q1) + .quad L(SSE0Q2), L(SSE1Q2), L(SSE2Q2), L(SSE3Q2) + .quad L(SSE4Q2), L(SSE5Q2), L(SSE6Q2), L(SSE7Q2) + .quad L(SSE8Q2), L(SSE9Q2), L(SSE10Q2), L(SSE11Q2) + .quad L(SSE12Q2), L(SSE13Q2), L(SSE14Q2), L(SSE15Q2) + .quad L(SSE0Q3), L(SSE1Q3), L(SSE2Q3), L(SSE3Q3) + .quad L(SSE4Q3), L(SSE5Q3), L(SSE6Q3), L(SSE7Q3) + .quad L(SSE8Q3), L(SSE9Q3), L(SSE10Q3), L(SSE11Q3) + .quad L(SSE12Q3), L(SSE13Q3), L(SSE14Q3), L(SSE15Q3) + .quad L(SSE0Q4), L(SSE1Q4), L(SSE2Q4), L(SSE3Q4) + .quad L(SSE4Q4), L(SSE5Q4), L(SSE6Q4), L(SSE7Q4) + .quad L(SSE8Q4), L(SSE9Q4), L(SSE10Q4), L(SSE11Q4) + .quad L(SSE12Q4), L(SSE13Q4), L(SSE14Q4), L(SSE15Q4) + .quad L(SSE0Q5), L(SSE1Q5), L(SSE2Q5), L(SSE3Q5) + .quad L(SSE4Q5), L(SSE5Q5), L(SSE6Q5), L(SSE7Q5) + .quad L(SSE8Q5), L(SSE9Q5), L(SSE10Q5), L(SSE11Q5) + .quad L(SSE12Q5), L(SSE13Q5), L(SSE14Q5), L(SSE15Q5) + .quad L(SSE0Q6), L(SSE1Q6), L(SSE2Q6), L(SSE3Q6) + .quad L(SSE4Q6), L(SSE5Q6), L(SSE6Q6), L(SSE7Q6) + .quad L(SSE8Q6), L(SSE9Q6), L(SSE10Q6), L(SSE11Q6) + .quad L(SSE12Q6), L(SSE13Q6), L(SSE14Q6), L(SSE15Q6) + .quad L(SSE0Q7), L(SSE1Q7), L(SSE2Q7), L(SSE3Q7) + .quad L(SSE4Q7), L(SSE5Q7), L(SSE6Q7), L(SSE7Q7) + .quad L(SSE8Q7), L(SSE9Q7), L(SSE10Q7), L(SSE11Q7) + .quad L(SSE12Q7), L(SSE13Q7), L(SSE14Q7), L(SSE15Q7) + .quad L(SSE0Q8), L(SSE1Q8), L(SSE2Q8), L(SSE3Q8) + .quad L(SSE4Q8), L(SSE5Q8), L(SSE6Q8), L(SSE7Q8) + .quad L(SSE8Q8), L(SSE9Q8), L(SSE10Q8), L(SSE11Q8) + .quad L(SSE12Q8), L(SSE13Q8), L(SSE14Q8), L(SSE15Q8) + .quad L(SSE0Q9), L(SSE1Q9), L(SSE2Q9), L(SSE3Q9) + .quad L(SSE4Q9), L(SSE5Q9), L(SSE6Q9), L(SSE7Q9) + .quad L(SSE8Q9), L(SSE9Q9), L(SSE10Q9), L(SSE11Q9) + .quad L(SSE12Q9), L(SSE13Q9), L(SSE14Q9), L(SSE15Q9) + .quad L(SSE0QA), L(SSE1QA), L(SSE2QA), L(SSE3QA) + .quad L(SSE4QA), L(SSE5QA), L(SSE6QA), L(SSE7QA) + .quad L(SSE8QA), L(SSE9QA), L(SSE10QA), L(SSE11QA) + .quad L(SSE12QA), L(SSE13QA), L(SSE14QA), L(SSE15QA) + .quad L(SSE0QB), L(SSE1QB), L(SSE2QB), L(SSE3QB) + .quad L(SSE4QB), L(SSE5QB), L(SSE6QB), L(SSE7QB) + .quad L(SSE8QB), L(SSE9QB), L(SSE10QB), L(SSE11QB) + .quad L(SSE12QB), L(SSE13QB), L(SSE14QB), L(SSE15QB) +#else +L(SSExDx): + .int L(SSE0Q0) -L(SSExDx) + .int L(SSE1Q0) -L(SSExDx) + .int L(SSE2Q0) -L(SSExDx) + .int L(SSE3Q0) -L(SSExDx) + .int L(SSE4Q0) -L(SSExDx) + .int L(SSE5Q0) -L(SSExDx) + .int L(SSE6Q0) -L(SSExDx) + .int L(SSE7Q0) -L(SSExDx) + + .int L(SSE8Q0) -L(SSExDx) + .int L(SSE9Q0) -L(SSExDx) + .int L(SSE10Q0)-L(SSExDx) + .int L(SSE11Q0)-L(SSExDx) + .int L(SSE12Q0)-L(SSExDx) + .int L(SSE13Q0)-L(SSExDx) + .int L(SSE14Q0)-L(SSExDx) + .int L(SSE15Q0)-L(SSExDx) + + .int L(SSE0Q1) -L(SSExDx) + .int L(SSE1Q1) -L(SSExDx) + .int L(SSE2Q1) -L(SSExDx) + .int L(SSE3Q1) -L(SSExDx) + .int L(SSE4Q1) -L(SSExDx) + .int L(SSE5Q1) -L(SSExDx) + .int L(SSE6Q1) -L(SSExDx) + .int L(SSE7Q1) -L(SSExDx) + + .int L(SSE8Q1) -L(SSExDx) + .int L(SSE9Q1) -L(SSExDx) + .int L(SSE10Q1)-L(SSExDx) + .int L(SSE11Q1)-L(SSExDx) + .int L(SSE12Q1)-L(SSExDx) + .int L(SSE13Q1)-L(SSExDx) + .int L(SSE14Q1)-L(SSExDx) + .int L(SSE15Q1)-L(SSExDx) + + .int L(SSE0Q2) -L(SSExDx) + .int L(SSE1Q2) -L(SSExDx) + .int L(SSE2Q2) -L(SSExDx) + .int L(SSE3Q2) -L(SSExDx) + .int L(SSE4Q2) -L(SSExDx) + .int L(SSE5Q2) -L(SSExDx) + .int L(SSE6Q2) -L(SSExDx) + .int L(SSE7Q2) -L(SSExDx) + + .int L(SSE8Q2) -L(SSExDx) + .int L(SSE9Q2) -L(SSExDx) + .int L(SSE10Q2)-L(SSExDx) + .int L(SSE11Q2)-L(SSExDx) + .int L(SSE12Q2)-L(SSExDx) + .int L(SSE13Q2)-L(SSExDx) + .int L(SSE14Q2)-L(SSExDx) + .int L(SSE15Q2)-L(SSExDx) + + .int L(SSE0Q3) -L(SSExDx) + .int L(SSE1Q3) -L(SSExDx) + .int L(SSE2Q3) -L(SSExDx) + .int L(SSE3Q3) -L(SSExDx) + .int L(SSE4Q3) -L(SSExDx) + .int L(SSE5Q3) -L(SSExDx) + .int L(SSE6Q3) -L(SSExDx) + .int L(SSE7Q3) -L(SSExDx) + + .int L(SSE8Q3) -L(SSExDx) + .int L(SSE9Q3) -L(SSExDx) + .int L(SSE10Q3)-L(SSExDx) + .int L(SSE11Q3)-L(SSExDx) + .int L(SSE12Q3)-L(SSExDx) + .int L(SSE13Q3)-L(SSExDx) + .int L(SSE14Q3)-L(SSExDx) + .int L(SSE15Q3)-L(SSExDx) + + .int L(SSE0Q4) -L(SSExDx) + .int L(SSE1Q4) -L(SSExDx) + .int L(SSE2Q4) -L(SSExDx) + .int L(SSE3Q4) -L(SSExDx) + .int L(SSE4Q4) -L(SSExDx) + .int L(SSE5Q4) -L(SSExDx) + .int L(SSE6Q4) -L(SSExDx) + .int L(SSE7Q4) -L(SSExDx) + + .int L(SSE8Q4) -L(SSExDx) + .int L(SSE9Q4) -L(SSExDx) + .int L(SSE10Q4)-L(SSExDx) + .int L(SSE11Q4)-L(SSExDx) + .int L(SSE12Q4)-L(SSExDx) + .int L(SSE13Q4)-L(SSExDx) + .int L(SSE14Q4)-L(SSExDx) + .int L(SSE15Q4)-L(SSExDx) + + .int L(SSE0Q5) -L(SSExDx) + .int L(SSE1Q5) -L(SSExDx) + .int L(SSE2Q5) -L(SSExDx) + .int L(SSE3Q5) -L(SSExDx) + .int L(SSE4Q5) -L(SSExDx) + .int L(SSE5Q5) -L(SSExDx) + .int L(SSE6Q5) -L(SSExDx) + .int L(SSE7Q5) -L(SSExDx) + + .int L(SSE8Q5) -L(SSExDx) + .int L(SSE9Q5) -L(SSExDx) + .int L(SSE10Q5)-L(SSExDx) + .int L(SSE11Q5)-L(SSExDx) + .int L(SSE12Q5)-L(SSExDx) + .int L(SSE13Q5)-L(SSExDx) + .int L(SSE14Q5)-L(SSExDx) + .int L(SSE15Q5)-L(SSExDx) + + .int L(SSE0Q6) -L(SSExDx) + .int L(SSE1Q6) -L(SSExDx) + .int L(SSE2Q6) -L(SSExDx) + .int L(SSE3Q6) -L(SSExDx) + .int L(SSE4Q6) -L(SSExDx) + .int L(SSE5Q6) -L(SSExDx) + .int L(SSE6Q6) -L(SSExDx) + .int L(SSE7Q6) -L(SSExDx) + + .int L(SSE8Q6) -L(SSExDx) + .int L(SSE9Q6) -L(SSExDx) + .int L(SSE10Q6)-L(SSExDx) + .int L(SSE11Q6)-L(SSExDx) + .int L(SSE12Q6)-L(SSExDx) + .int L(SSE13Q6)-L(SSExDx) + .int L(SSE14Q6)-L(SSExDx) + .int L(SSE15Q6)-L(SSExDx) + + .int L(SSE0Q7) -L(SSExDx) + .int L(SSE1Q7) -L(SSExDx) + .int L(SSE2Q7) -L(SSExDx) + .int L(SSE3Q7) -L(SSExDx) + .int L(SSE4Q7) -L(SSExDx) + .int L(SSE5Q7) -L(SSExDx) + .int L(SSE6Q7) -L(SSExDx) + .int L(SSE7Q7) -L(SSExDx) + + .int L(SSE8Q7) -L(SSExDx) + .int L(SSE9Q7) -L(SSExDx) + .int L(SSE10Q7)-L(SSExDx) + .int L(SSE11Q7)-L(SSExDx) + .int L(SSE12Q7)-L(SSExDx) + .int L(SSE13Q7)-L(SSExDx) + .int L(SSE14Q7)-L(SSExDx) + .int L(SSE15Q7)-L(SSExDx) + + .int L(SSE0Q8) -L(SSExDx) + .int L(SSE1Q8) -L(SSExDx) + .int L(SSE2Q8) -L(SSExDx) + .int L(SSE3Q8) -L(SSExDx) + .int L(SSE4Q8) -L(SSExDx) + .int L(SSE5Q8) -L(SSExDx) + .int L(SSE6Q8) -L(SSExDx) + .int L(SSE7Q8) -L(SSExDx) + + .int L(SSE8Q8) -L(SSExDx) + .int L(SSE9Q8) -L(SSExDx) + .int L(SSE10Q8)-L(SSExDx) + .int L(SSE11Q8)-L(SSExDx) + .int L(SSE12Q8)-L(SSExDx) + .int L(SSE13Q8)-L(SSExDx) + .int L(SSE14Q8)-L(SSExDx) + .int L(SSE15Q8)-L(SSExDx) + + .int L(SSE0Q9) -L(SSExDx) + .int L(SSE1Q9) -L(SSExDx) + .int L(SSE2Q9) -L(SSExDx) + .int L(SSE3Q9) -L(SSExDx) + .int L(SSE4Q9) -L(SSExDx) + .int L(SSE5Q9) -L(SSExDx) + .int L(SSE6Q9) -L(SSExDx) + .int L(SSE7Q9) -L(SSExDx) + + .int L(SSE8Q9) -L(SSExDx) + .int L(SSE9Q9) -L(SSExDx) + .int L(SSE10Q9)-L(SSExDx) + .int L(SSE11Q9)-L(SSExDx) + .int L(SSE12Q9)-L(SSExDx) + .int L(SSE13Q9)-L(SSExDx) + .int L(SSE14Q9)-L(SSExDx) + .int L(SSE15Q9)-L(SSExDx) + + .int L(SSE0QA) -L(SSExDx) + .int L(SSE1QA) -L(SSExDx) + .int L(SSE2QA) -L(SSExDx) + .int L(SSE3QA) -L(SSExDx) + .int L(SSE4QA) -L(SSExDx) + .int L(SSE5QA) -L(SSExDx) + .int L(SSE6QA) -L(SSExDx) + .int L(SSE7QA) -L(SSExDx) + + .int L(SSE8QA) -L(SSExDx) + .int L(SSE9QA) -L(SSExDx) + .int L(SSE10QA)-L(SSExDx) + .int L(SSE11QA)-L(SSExDx) + .int L(SSE12QA)-L(SSExDx) + .int L(SSE13QA)-L(SSExDx) + .int L(SSE14QA)-L(SSExDx) + .int L(SSE15QA)-L(SSExDx) + + .int L(SSE0QB) -L(SSExDx) + .int L(SSE1QB) -L(SSExDx) + .int L(SSE2QB) -L(SSExDx) + .int L(SSE3QB) -L(SSExDx) + .int L(SSE4QB) -L(SSExDx) + .int L(SSE5QB) -L(SSExDx) + .int L(SSE6QB) -L(SSExDx) + .int L(SSE7QB) -L(SSExDx) + + .int L(SSE8QB) -L(SSExDx) + .int L(SSE9QB) -L(SSExDx) + .int L(SSE10QB)-L(SSExDx) + .int L(SSE11QB)-L(SSExDx) + .int L(SSE12QB)-L(SSExDx) + .int L(SSE13QB)-L(SSExDx) + .int L(SSE14QB)-L(SSExDx) + .int L(SSE15QB)-L(SSExDx) +#endif + .popsection END (memset) libc_hidden_builtin_def (memset) diff --git a/sysdeps/x86_64/rtld-memset.c b/sysdeps/x86_64/rtld-memset.c new file mode 100644 index 0000000000..55f3835790 --- /dev/null +++ b/sysdeps/x86_64/rtld-memset.c @@ -0,0 +1 @@ +#include <string/memset.c> |