diff options
author | Samuel Thibault <samuel.thibault@ens-lyon.org> | 2016-10-09 19:34:06 +0200 |
---|---|---|
committer | Samuel Thibault <samuel.thibault@ens-lyon.org> | 2016-10-09 19:34:06 +0200 |
commit | 6772d640a4f4874166a61f1859e1660a2913a89d (patch) | |
tree | 839fea4d5dcefab75577cecb563ccad4234eb953 /sysdeps/x86_64 | |
parent | f98906bbb57cb495b4501afc5f18604ef3a94e2a (diff) | |
parent | 7bb5f8a836b916d6ebf7b6921b136e99cea2442d (diff) |
Merge commit 'refs/top-bases/t/hurdsig-fixes' into t/hurdsig-fixes
Diffstat (limited to 'sysdeps/x86_64')
381 files changed, 2909 insertions, 3519 deletions
diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile index ef70a50c84..67ed5ba213 100644 --- a/sysdeps/x86_64/Makefile +++ b/sysdeps/x86_64/Makefile @@ -19,8 +19,17 @@ gen-as-const-headers += locale-defines.sym endif ifeq ($(subdir),elf) +# There is no good reason to use MMX in x86-64 ld.so with GCC. +CFLAGS-.os += $(if $(filter $(@F),$(patsubst %,%.os,$(all-rtld-routines))),\ + -mno-mmx) + sysdep-dl-routines += tlsdesc dl-tlsdesc +tests += ifuncmain8 +modules-names += ifuncmod8 + +$(objpfx)ifuncmain8: $(objpfx)ifuncmod8.so + tests += tst-quad1 tst-quad2 modules-names += tst-quadmod1 tst-quadmod2 @@ -34,10 +43,12 @@ tests-pie += $(quad-pie-test) $(objpfx)tst-quad1pie: $(objpfx)tst-quadmod1pie.o $(objpfx)tst-quad2pie: $(objpfx)tst-quadmod2pie.o -tests += tst-audit3 tst-audit4 tst-audit5 tst-audit10 -ifeq (yes,$(config-cflags-avx)) -tests += tst-audit6 tst-audit7 -endif +tests += tst-audit3 tst-audit4 tst-audit5 tst-audit6 tst-audit7 tst-audit10 + +tests += tst-split-dynreloc +LDFLAGS-tst-split-dynreloc = -Wl,-T,$(..)sysdeps/x86_64/tst-split-dynreloc.lds +tst-split-dynreloc-ENV = LD_BIND_NOW=1 + modules-names += tst-auditmod3a tst-auditmod3b \ tst-auditmod4a tst-auditmod4b \ tst-auditmod5a tst-auditmod5b \ @@ -70,18 +81,13 @@ $(objpfx)tst-audit10: $(objpfx)tst-auditmod10a.so $(objpfx)tst-audit10.out: $(objpfx)tst-auditmod10b.so tst-audit10-ENV = LD_AUDIT=$(objpfx)tst-auditmod10b.so -ifeq (yes,$(config-cflags-avx)) -AVX-CFLAGS=-mavx -ifeq (yes,$(config-cflags-novzeroupper)) -AVX-CFLAGS+=-mno-vzeroupper -endif +AVX-CFLAGS=-mavx -mno-vzeroupper CFLAGS-tst-audit4.c += $(AVX-CFLAGS) CFLAGS-tst-auditmod4a.c += $(AVX-CFLAGS) CFLAGS-tst-auditmod4b.c += $(AVX-CFLAGS) CFLAGS-tst-auditmod6b.c += $(AVX-CFLAGS) CFLAGS-tst-auditmod6c.c += $(AVX-CFLAGS) CFLAGS-tst-auditmod7b.c += $(AVX-CFLAGS) -endif ifeq (yes,$(config-cflags-avx512)) AVX512-CFLAGS = -mavx512f CFLAGS-tst-audit10.c += $(AVX512-CFLAGS) diff --git a/sysdeps/x86_64/__longjmp.S b/sysdeps/x86_64/__longjmp.S index a410efb08c..c164626577 100644 --- a/sysdeps/x86_64/__longjmp.S +++ b/sysdeps/x86_64/__longjmp.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2001-2015 Free Software Foundation, Inc. +/* Copyright (C) 2001-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/_mcount.S b/sysdeps/x86_64/_mcount.S index 01787f95f7..5d7edd2a29 100644 --- a/sysdeps/x86_64/_mcount.S +++ b/sysdeps/x86_64/_mcount.S @@ -1,5 +1,5 @@ /* Machine-specific calling sequence for `mcount' profiling function. x86-64 version. - Copyright (C) 2002-2015 Free Software Foundation, Inc. + Copyright (C) 2002-2016 Free Software Foundation, Inc. Contributed by Andreas Jaeger <aj@suse.de>. This file is part of the GNU C Library. diff --git a/sysdeps/x86_64/add_n.S b/sysdeps/x86_64/add_n.S index b8e7c3e067..fc99811476 100644 --- a/sysdeps/x86_64/add_n.S +++ b/sysdeps/x86_64/add_n.S @@ -1,6 +1,6 @@ /* x86-64 __mpn_add_n -- Add two limb vectors of the same length > 0 and store sum in a third limb vector. - Copyright (C) 2006-2015 Free Software Foundation, Inc. + Copyright (C) 2006-2016 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify diff --git a/sysdeps/x86_64/addmul_1.S b/sysdeps/x86_64/addmul_1.S index 829e01eff9..ab7c2fa701 100644 --- a/sysdeps/x86_64/addmul_1.S +++ b/sysdeps/x86_64/addmul_1.S @@ -1,6 +1,6 @@ /* x86-64 __mpn_addmul_1 -- Multiply a limb vector with a limb and add the result to a second limb vector. - Copyright (C) 2003-2015 Free Software Foundation, Inc. + Copyright (C) 2003-2016 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify diff --git a/sysdeps/x86_64/bits/atomic.h b/sysdeps/x86_64/atomic-machine.h index 337b334db1..a5b86eb3ce 100644 --- a/sysdeps/x86_64/bits/atomic.h +++ b/sysdeps/x86_64/atomic-machine.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2002-2015 Free Software Foundation, Inc. +/* Copyright (C) 2002-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@redhat.com>, 2002. @@ -56,11 +56,7 @@ typedef uintmax_t uatomic_max_t; #endif #define __HAVE_64B_ATOMICS 1 -#if __GNUC_PREREQ (4, 7) #define USE_ATOMIC_COMPILER_BUILTINS 1 -#else -#define USE_ATOMIC_COMPILER_BUILTINS 0 -#endif #define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \ __sync_val_compare_and_swap (mem, oldval, newval) diff --git a/sysdeps/x86_64/backtrace.c b/sysdeps/x86_64/backtrace.c index 2a3848d20f..e04407c516 100644 --- a/sysdeps/x86_64/backtrace.c +++ b/sysdeps/x86_64/backtrace.c @@ -1,5 +1,5 @@ /* Return backtrace of current program state. - Copyright (C) 2003-2015 Free Software Foundation, Inc. + Copyright (C) 2003-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Jakub Jelinek <jakub@redhat.com>, 2003. @@ -17,7 +17,7 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <bits/libc-lock.h> +#include <libc-lock.h> #include <dlfcn.h> #include <execinfo.h> #include <stdlib.h> @@ -92,11 +92,13 @@ backtrace_helper (struct _Unwind_Context *ctx, void *a) } int -__backtrace (array, size) - void **array; - int size; +__backtrace (void **array, int size) { struct trace_arg arg = { .array = array, .cfa = 0, .size = size, .cnt = -1 }; + + if (size <= 0) + return 0; + #ifdef SHARED __libc_once_define (static, once); @@ -105,8 +107,7 @@ __backtrace (array, size) return 0; #endif - if (size >= 1) - unwind_backtrace (backtrace_helper, &arg); + unwind_backtrace (backtrace_helper, &arg); /* _Unwind_Backtrace seems to put NULL address above _start. Fix it up here. */ diff --git a/sysdeps/x86_64/bsd-_setjmp.S b/sysdeps/x86_64/bsd-_setjmp.S index fed6afd97b..1a2a94f1a6 100644 --- a/sysdeps/x86_64/bsd-_setjmp.S +++ b/sysdeps/x86_64/bsd-_setjmp.S @@ -1,5 +1,5 @@ /* BSD `_setjmp' entry point to `sigsetjmp (..., 0)'. x86-64 version. - Copyright (C) 1994-2015 Free Software Foundation, Inc. + Copyright (C) 1994-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/bsd-setjmp.S b/sysdeps/x86_64/bsd-setjmp.S index 6a078dd786..11d9d8daa0 100644 --- a/sysdeps/x86_64/bsd-setjmp.S +++ b/sysdeps/x86_64/bsd-setjmp.S @@ -1,5 +1,5 @@ /* BSD `setjmp' entry point to `sigsetjmp (..., 1)'. x86-64 version. - Copyright (C) 1994-2015 Free Software Foundation, Inc. + Copyright (C) 1994-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/cacheinfo.c b/sysdeps/x86_64/cacheinfo.c index b99fb9a762..96463df064 100644 --- a/sysdeps/x86_64/cacheinfo.c +++ b/sysdeps/x86_64/cacheinfo.c @@ -1,5 +1,5 @@ /* x86_64 cache info. - Copyright (C) 2003-2015 Free Software Foundation, Inc. + Copyright (C) 2003-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -21,40 +21,11 @@ #include <stdlib.h> #include <unistd.h> #include <cpuid.h> +#include <init-arch.h> -#ifndef __cpuid_count -/* FIXME: Provide __cpuid_count if it isn't defined. Copied from gcc - 4.4.0. Remove this if gcc 4.4 is the minimum requirement. */ -# if defined(__i386__) && defined(__PIC__) -/* %ebx may be the PIC register. */ -# define __cpuid_count(level, count, a, b, c, d) \ - __asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \ - "cpuid\n\t" \ - "xchg{l}\t{%%}ebx, %1\n\t" \ - : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ - : "0" (level), "2" (count)) -# else -# define __cpuid_count(level, count, a, b, c, d) \ - __asm__ ("cpuid\n\t" \ - : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \ - : "0" (level), "2" (count)) -# endif -#endif - -#ifdef USE_MULTIARCH -# include "multiarch/init-arch.h" - -# define is_intel __cpu_features.kind == arch_kind_intel -# define is_amd __cpu_features.kind == arch_kind_amd -# define max_cpuid __cpu_features.max_cpuid -#else - /* This spells out "GenuineIntel". */ -# define is_intel \ - ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69 - /* This spells out "AuthenticAMD". */ -# define is_amd \ - ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65 -#endif +#define is_intel GLRO(dl_x86_cpu_features).kind == arch_kind_intel +#define is_amd GLRO(dl_x86_cpu_features).kind == arch_kind_amd +#define max_cpuid GLRO(dl_x86_cpu_features).max_cpuid static const struct intel_02_cache_info { @@ -235,21 +206,8 @@ intel_check_word (int name, unsigned int value, bool *has_level_2, /* Intel reused this value. For family 15, model 6 it specifies the 3rd level cache. Otherwise the 2nd level cache. */ - unsigned int family; - unsigned int model; -#ifdef USE_MULTIARCH - family = __cpu_features.family; - model = __cpu_features.model; -#else - unsigned int eax; - unsigned int ebx; - unsigned int ecx; - unsigned int edx; - __cpuid (1, eax, ebx, ecx, edx); - - family = ((eax >> 20) & 0xff) + ((eax >> 8) & 0xf); - model = (((eax >>16) & 0xf) << 4) + ((eax >> 4) & 0xf); -#endif + unsigned int family = GLRO(dl_x86_cpu_features).family; + unsigned int model = GLRO(dl_x86_cpu_features).model; if (family == 15 && model == 6) { @@ -476,18 +434,6 @@ long int attribute_hidden __cache_sysconf (int name) { -#ifdef USE_MULTIARCH - if (__cpu_features.kind == arch_kind_unknown) - __init_cpu_features (); -#else - /* Find out what brand of processor. */ - unsigned int max_cpuid; - unsigned int ebx; - unsigned int ecx; - unsigned int edx; - __cpuid (0, max_cpuid, ebx, ecx, edx); -#endif - if (is_intel) return handle_intel (name, max_cpuid); @@ -523,18 +469,6 @@ long int __x86_raw_shared_cache_size attribute_hidden = 1024 * 1024; int __x86_prefetchw attribute_hidden; #endif -#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION -/* Instructions preferred for memory and string routines. - - 0: Regular instructions - 1: MMX instructions - 2: SSE2 instructions - 3: SSSE3 instructions - - */ -int __x86_preferred_memory_instruction attribute_hidden; -#endif - static void __attribute__((constructor)) @@ -551,14 +485,6 @@ init_cacheinfo (void) unsigned int level; unsigned int threads = 0; -#ifdef USE_MULTIARCH - if (__cpu_features.kind == arch_kind_unknown) - __init_cpu_features (); -#else - int max_cpuid; - __cpuid (0, max_cpuid, ebx, ecx, edx); -#endif - if (is_intel) { data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid); @@ -574,34 +500,13 @@ init_cacheinfo (void) shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid); } - unsigned int ebx_1; - -#ifdef USE_MULTIARCH - eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax; - ebx_1 = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx; - ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx; - edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx; -#else - __cpuid (1, eax, ebx_1, ecx, edx); -#endif - - unsigned int family = (eax >> 8) & 0x0f; - unsigned int model = (eax >> 4) & 0x0f; - unsigned int extended_model = (eax >> 12) & 0xf0; - -#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION - /* Intel prefers SSSE3 instructions for memory/string routines - if they are available. */ - if ((ecx & 0x200)) - __x86_preferred_memory_instruction = 3; - else - __x86_preferred_memory_instruction = 2; -#endif - /* Figure out the number of logical threads that share the highest cache level. */ if (max_cpuid >= 4) { + unsigned int family = GLRO(dl_x86_cpu_features).family; + unsigned int model = GLRO(dl_x86_cpu_features).model; + int i = 0; /* Query until desired cache level is enumerated. */ @@ -653,7 +558,6 @@ init_cacheinfo (void) threads += 1; if (threads > 2 && level == 2 && family == 6) { - model += extended_model; switch (model) { case 0x57: @@ -676,7 +580,9 @@ init_cacheinfo (void) intel_bug_no_cache_info: /* Assume that all logical threads share the highest cache level. */ - threads = (ebx_1 >> 16) & 0xff; + threads + = ((GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].ebx + >> 16) & 0xff); } /* Cap usage of highest cache level to the number of supported @@ -691,25 +597,6 @@ init_cacheinfo (void) long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE); shared = handle_amd (_SC_LEVEL3_CACHE_SIZE); -#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION -# ifdef USE_MULTIARCH - eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax; - ebx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx; - ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx; - edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx; -# else - __cpuid (1, eax, ebx, ecx, edx); -# endif - - /* AMD prefers SSSE3 instructions for memory/string routines - if they are avaiable, otherwise it prefers integer - instructions. */ - if ((ecx & 0x200)) - __x86_preferred_memory_instruction = 3; - else - __x86_preferred_memory_instruction = 0; -#endif - /* Get maximum extended function. */ __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx); diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure index 552f535ac6..c72b9d3184 100644 --- a/sysdeps/x86_64/configure +++ b/sysdeps/x86_64/configure @@ -1,100 +1,6 @@ - -# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES -# ------------------------------------------------------- -# Tests whether HEADER exists and can be compiled using the include files in -# INCLUDES, setting the cache variable VAR accordingly. -ac_fn_c_check_header_compile () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 -$as_echo_n "checking for $2... " >&6; } -if eval \${$3+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$4 -#include <$2> -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - eval "$3=yes" -else - eval "$3=no" -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -eval ac_res=\$$3 - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 -$as_echo "$ac_res" >&6; } - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - -} # ac_fn_c_check_header_compile # This file is generated from configure.ac by Autoconf. DO NOT EDIT! # Local configure fragment for sysdeps/x86_64. - -ac_fn_c_check_header_compile "$LINENO" "cpuid.h" "ac_cv_header_cpuid_h" "/* No default includes. */ -" -if test "x$ac_cv_header_cpuid_h" = xyes; then : - -else - as_fn_error $? "gcc must provide the <cpuid.h> header" "$LINENO" 5 -fi - - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SSE4 support" >&5 -$as_echo_n "checking for SSE4 support... " >&6; } -if ${libc_cv_cc_sse4+:} false; then : - $as_echo_n "(cached) " >&6 -else - if { ac_try='${CC-cc} -msse4 -xc /dev/null -S -o /dev/null' - { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 - (eval $ac_try) 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; }; then : - libc_cv_cc_sse4=yes -else - libc_cv_cc_sse4=no -fi - -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_sse4" >&5 -$as_echo "$libc_cv_cc_sse4" >&6; } -if test $libc_cv_cc_sse4 = yes; then - $as_echo "#define HAVE_SSE4_SUPPORT 1" >>confdefs.h - -fi -config_vars="$config_vars -config-cflags-sse4 = $libc_cv_cc_sse4" - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX support" >&5 -$as_echo_n "checking for AVX support... " >&6; } -if ${libc_cv_cc_avx+:} false; then : - $as_echo_n "(cached) " >&6 -else - if { ac_try='${CC-cc} -mavx -xc /dev/null -S -o /dev/null' - { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 - (eval $ac_try) 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; }; then : - libc_cv_cc_avx=yes -else - libc_cv_cc_avx=no -fi - -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_avx" >&5 -$as_echo "$libc_cv_cc_avx" >&6; } -if test $libc_cv_cc_avx = yes; then - $as_echo "#define HAVE_AVX_SUPPORT 1" >>confdefs.h - -fi -config_vars="$config_vars -config-cflags-avx = $libc_cv_cc_avx" - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX512 support in assembler" >&5 $as_echo_n "checking for AVX512 support in assembler... " >&6; } if ${libc_cv_asm_avx512+:} false; then : @@ -149,80 +55,6 @@ fi config_vars="$config_vars config-cflags-avx512 = $libc_cv_cc_avx512" -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX encoding of SSE instructions" >&5 -$as_echo_n "checking for AVX encoding of SSE instructions... " >&6; } -if ${libc_cv_cc_sse2avx+:} false; then : - $as_echo_n "(cached) " >&6 -else - if { ac_try='${CC-cc} -msse2avx -xc /dev/null -S -o /dev/null' - { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 - (eval $ac_try) 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; }; then : - libc_cv_cc_sse2avx=yes -else - libc_cv_cc_sse2avx=no -fi - -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_sse2avx" >&5 -$as_echo "$libc_cv_cc_sse2avx" >&6; } -if test $libc_cv_cc_sse2avx = yes; then - $as_echo "#define HAVE_SSE2AVX_SUPPORT 1" >>confdefs.h - -fi -config_vars="$config_vars -config-cflags-sse2avx = $libc_cv_cc_sse2avx" - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for FMA4 support" >&5 -$as_echo_n "checking for FMA4 support... " >&6; } -if ${libc_cv_cc_fma4+:} false; then : - $as_echo_n "(cached) " >&6 -else - if { ac_try='${CC-cc} -mfma4 -xc /dev/null -S -o /dev/null' - { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 - (eval $ac_try) 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; }; then : - libc_cv_cc_fma4=yes -else - libc_cv_cc_fma4=no -fi - -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_fma4" >&5 -$as_echo "$libc_cv_cc_fma4" >&6; } -if test $libc_cv_cc_fma4 = yes; then - $as_echo "#define HAVE_FMA4_SUPPORT 1" >>confdefs.h - -fi -config_vars="$config_vars -have-mfma4 = $libc_cv_cc_fma4" - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for -mno-vzeroupper support" >&5 -$as_echo_n "checking for -mno-vzeroupper support... " >&6; } -if ${libc_cv_cc_novzeroupper+:} false; then : - $as_echo_n "(cached) " >&6 -else - if { ac_try='${CC-cc} -mno-vzeroupper -xc /dev/null -S -o /dev/null' - { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 - (eval $ac_try) 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; }; then : - libc_cv_cc_novzeroupper=yes -else - libc_cv_cc_novzeroupper=no -fi - -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_novzeroupper" >&5 -$as_echo "$libc_cv_cc_novzeroupper" >&6; } -config_vars="$config_vars -config-cflags-novzeroupper = $libc_cv_cc_novzeroupper" - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for Intel MPX support" >&5 $as_echo_n "checking for Intel MPX support... " >&6; } if ${libc_cv_asm_mpx+:} false; then : @@ -250,32 +82,6 @@ if test $libc_cv_asm_mpx == yes; then fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX2 support" >&5 -$as_echo_n "checking for AVX2 support... " >&6; } -if ${libc_cv_cc_avx2+:} false; then : - $as_echo_n "(cached) " >&6 -else - if { ac_try='${CC-cc} -mavx2 -xc /dev/null -S -o /dev/null' - { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 - (eval $ac_try) 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; }; then : - libc_cv_cc_avx2=yes -else - libc_cv_cc_avx2=no -fi - -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_avx2" >&5 -$as_echo "$libc_cv_cc_avx2" >&6; } -if test $libc_cv_cc_avx2 = yes; then - $as_echo "#define HAVE_AVX2_SUPPORT 1" >>confdefs.h - -fi -config_vars="$config_vars -config-cflags-avx2 = $libc_cv_cc_avx2" - if test x"$build_mathvec" = xnotset; then build_mathvec=yes fi diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac index e7208c9b30..37b1059af3 100644 --- a/sysdeps/x86_64/configure.ac +++ b/sysdeps/x86_64/configure.ac @@ -1,28 +1,6 @@ GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory. # Local configure fragment for sysdeps/x86_64. -AC_CHECK_HEADER([cpuid.h], , - [AC_MSG_ERROR([gcc must provide the <cpuid.h> header])], - [/* No default includes. */]) - -dnl Check if -msse4 works. -AC_CACHE_CHECK(for SSE4 support, libc_cv_cc_sse4, [dnl -LIBC_TRY_CC_OPTION([-msse4], [libc_cv_cc_sse4=yes], [libc_cv_cc_sse4=no]) -]) -if test $libc_cv_cc_sse4 = yes; then - AC_DEFINE(HAVE_SSE4_SUPPORT) -fi -LIBC_CONFIG_VAR([config-cflags-sse4], [$libc_cv_cc_sse4]) - -dnl Check if -mavx works. -AC_CACHE_CHECK(for AVX support, libc_cv_cc_avx, [dnl -LIBC_TRY_CC_OPTION([-mavx], [libc_cv_cc_avx=yes], [libc_cv_cc_avx=no]) -]) -if test $libc_cv_cc_avx = yes; then - AC_DEFINE(HAVE_AVX_SUPPORT) -fi -LIBC_CONFIG_VAR([config-cflags-avx], [$libc_cv_cc_avx]) - dnl Check if asm supports AVX512. AC_CACHE_CHECK(for AVX512 support in assembler, libc_cv_asm_avx512, [dnl cat > conftest.s <<\EOF @@ -48,34 +26,6 @@ if test $libc_cv_cc_avx512 = yes; then fi LIBC_CONFIG_VAR([config-cflags-avx512], [$libc_cv_cc_avx512]) -dnl Check if -msse2avx works. -AC_CACHE_CHECK(for AVX encoding of SSE instructions, libc_cv_cc_sse2avx, [dnl -LIBC_TRY_CC_OPTION([-msse2avx], - [libc_cv_cc_sse2avx=yes], - [libc_cv_cc_sse2avx=no]) -]) -if test $libc_cv_cc_sse2avx = yes; then - AC_DEFINE(HAVE_SSE2AVX_SUPPORT) -fi -LIBC_CONFIG_VAR([config-cflags-sse2avx], [$libc_cv_cc_sse2avx]) - -dnl Check if -mfma4 works. -AC_CACHE_CHECK(for FMA4 support, libc_cv_cc_fma4, [dnl -LIBC_TRY_CC_OPTION([-mfma4], [libc_cv_cc_fma4=yes], [libc_cv_cc_fma4=no]) -]) -if test $libc_cv_cc_fma4 = yes; then - AC_DEFINE(HAVE_FMA4_SUPPORT) -fi -LIBC_CONFIG_VAR([have-mfma4], [$libc_cv_cc_fma4]) - -dnl Check if -mno-vzeroupper works. -AC_CACHE_CHECK(for -mno-vzeroupper support, libc_cv_cc_novzeroupper, [dnl -LIBC_TRY_CC_OPTION([-mno-vzeroupper], - [libc_cv_cc_novzeroupper=yes], - [libc_cv_cc_novzeroupper=no]) -]) -LIBC_CONFIG_VAR([config-cflags-novzeroupper], [$libc_cv_cc_novzeroupper]) - dnl Check whether asm supports Intel MPX AC_CACHE_CHECK(for Intel MPX support, libc_cv_asm_mpx, [dnl cat > conftest.s <<\EOF @@ -91,15 +41,6 @@ if test $libc_cv_asm_mpx == yes; then AC_DEFINE(HAVE_MPX_SUPPORT) fi -dnl Check if -mavx2 works. -AC_CACHE_CHECK(for AVX2 support, libc_cv_cc_avx2, [dnl -LIBC_TRY_CC_OPTION([-mavx2], [libc_cv_cc_avx2=yes], [libc_cv_cc_avx2=no]) -]) -if test $libc_cv_cc_avx2 = yes; then - AC_DEFINE(HAVE_AVX2_SUPPORT) -fi -LIBC_CONFIG_VAR([config-cflags-avx2], [$libc_cv_cc_avx2]) - if test x"$build_mathvec" = xnotset; then build_mathvec=yes fi diff --git a/sysdeps/x86_64/crti.S b/sysdeps/x86_64/crti.S index 595b0fe83b..a34525974a 100644 --- a/sysdeps/x86_64/crti.S +++ b/sysdeps/x86_64/crti.S @@ -1,5 +1,5 @@ /* Special .init and .fini section support for x86-64. - Copyright (C) 2012-2015 Free Software Foundation, Inc. + Copyright (C) 2012-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/crtn.S b/sysdeps/x86_64/crtn.S index e2d6de73e4..b2fa0c6765 100644 --- a/sysdeps/x86_64/crtn.S +++ b/sysdeps/x86_64/crtn.S @@ -1,5 +1,5 @@ /* Special .init and .fini section support for x86-64. - Copyright (C) 2012-2015 Free Software Foundation, Inc. + Copyright (C) 2012-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/dl-irel.h b/sysdeps/x86_64/dl-irel.h index d0fa4330cc..80d7d1dd78 100644 --- a/sysdeps/x86_64/dl-irel.h +++ b/sysdeps/x86_64/dl-irel.h @@ -1,6 +1,6 @@ /* Machine-dependent ELF indirect relocation inline functions. x86-64 version. - Copyright (C) 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2009-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/dl-lookupcfg.h b/sysdeps/x86_64/dl-lookupcfg.h index 310f261fec..033b475889 100644 --- a/sysdeps/x86_64/dl-lookupcfg.h +++ b/sysdeps/x86_64/dl-lookupcfg.h @@ -1,5 +1,5 @@ /* Configuration of lookup functions. - Copyright (C) 2005-2015 Free Software Foundation, Inc. + Copyright (C) 2005-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -26,6 +26,7 @@ struct link_map; -extern void internal_function _dl_unmap (struct link_map *map); +extern void _dl_unmap (struct link_map *map) + internal_function attribute_hidden; #define DL_UNMAP(map) _dl_unmap (map) diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h index cae6db3560..980ca73cf2 100644 --- a/sysdeps/x86_64/dl-machine.h +++ b/sysdeps/x86_64/dl-machine.h @@ -1,5 +1,5 @@ /* Machine-dependent ELF dynamic relocation inline functions. x86-64 version. - Copyright (C) 2001-2015 Free Software Foundation, Inc. + Copyright (C) 2001-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Andreas Jaeger <aj@suse.de>. @@ -26,6 +26,7 @@ #include <sysdep.h> #include <tls.h> #include <dl-tlsdesc.h> +#include <cpu-features.c> /* Return nonzero iff ELF header is compatible with the running host. */ static inline int __attribute__ ((unused)) @@ -65,8 +66,12 @@ static inline int __attribute__ ((unused, always_inline)) elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) { Elf64_Addr *got; - extern void _dl_runtime_resolve (ElfW(Word)) attribute_hidden; - extern void _dl_runtime_profile (ElfW(Word)) attribute_hidden; + extern void _dl_runtime_resolve_sse (ElfW(Word)) attribute_hidden; + extern void _dl_runtime_resolve_avx (ElfW(Word)) attribute_hidden; + extern void _dl_runtime_resolve_avx512 (ElfW(Word)) attribute_hidden; + extern void _dl_runtime_profile_sse (ElfW(Word)) attribute_hidden; + extern void _dl_runtime_profile_avx (ElfW(Word)) attribute_hidden; + extern void _dl_runtime_profile_avx512 (ElfW(Word)) attribute_hidden; if (l->l_info[DT_JMPREL] && lazy) { @@ -94,7 +99,12 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) end in this function. */ if (__glibc_unlikely (profile)) { - *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile; + if (HAS_ARCH_FEATURE (AVX512F_Usable)) + *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_avx512; + else if (HAS_ARCH_FEATURE (AVX_Usable)) + *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_avx; + else + *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_sse; if (GLRO(dl_profile) != NULL && _dl_name_match_p (GLRO(dl_profile), l)) @@ -103,9 +113,17 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) GL(dl_profile_map) = l; } else - /* This function will get called to fix up the GOT entry indicated by - the offset on the stack, and then jump to the resolved address. */ - *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_resolve; + { + /* This function will get called to fix up the GOT entry + indicated by the offset on the stack, and then jump to + the resolved address. */ + if (HAS_ARCH_FEATURE (AVX512F_Usable)) + *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_resolve_avx512; + else if (HAS_ARCH_FEATURE (AVX_Usable)) + *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_resolve_avx; + else + *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_resolve_sse; + } } if (l->l_info[ADDRIDX (DT_TLSDESC_GOT)] && lazy) @@ -205,6 +223,8 @@ dl_platform_init (void) if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0') /* Avoid an empty string which would disturb us. */ GLRO(dl_platform) = NULL; + + init_cpu_features (&GLRO(dl_x86_cpu_features)); } static inline ElfW(Addr) diff --git a/sysdeps/x86_64/dl-procinfo.c b/sysdeps/x86_64/dl-procinfo.c new file mode 100644 index 0000000000..4625695dfb --- /dev/null +++ b/sysdeps/x86_64/dl-procinfo.c @@ -0,0 +1,57 @@ +/* Data for x86-64 version of processor capability information. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* If anything should be added here check whether the size of each string + is still ok with the given array size. + + All the #ifdefs in the definitions are quite irritating but + necessary if we want to avoid duplicating the information. There + are three different modes: + + - PROCINFO_DECL is defined. This means we are only interested in + declarations. + + - PROCINFO_DECL is not defined: + + + if SHARED is defined the file is included in an array + initializer. The .element = { ... } syntax is needed. + + + if SHARED is not defined a normal array initialization is + needed. + */ + +#ifndef PROCINFO_CLASS +# define PROCINFO_CLASS +#endif + +#if !defined PROCINFO_DECL && defined SHARED + ._dl_x86_cpu_features +#else +PROCINFO_CLASS struct cpu_features _dl_x86_cpu_features +#endif +#ifndef PROCINFO_DECL += { } +#endif +#if !defined SHARED || defined PROCINFO_DECL +; +#else +, +#endif + +#undef PROCINFO_DECL +#undef PROCINFO_CLASS diff --git a/sysdeps/x86_64/dl-tls.h b/sysdeps/x86_64/dl-tls.h index 285799b674..0f101e6ac6 100644 --- a/sysdeps/x86_64/dl-tls.h +++ b/sysdeps/x86_64/dl-tls.h @@ -1,5 +1,5 @@ /* Thread-local storage handling in the ELF dynamic linker. x86-64 version. - Copyright (C) 2002-2015 Free Software Foundation, Inc. + Copyright (C) 2002-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S index edb6328f8e..3cb7c3d031 100644 --- a/sysdeps/x86_64/dl-tlsdesc.S +++ b/sysdeps/x86_64/dl-tlsdesc.S @@ -1,5 +1,5 @@ /* Thread-local storage handling in the ELF dynamic linker. x86_64 version. - Copyright (C) 2004-2015 Free Software Foundation, Inc. + Copyright (C) 2004-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/dl-tlsdesc.h b/sysdeps/x86_64/dl-tlsdesc.h index cf32328264..11e1a50b8f 100644 --- a/sysdeps/x86_64/dl-tlsdesc.h +++ b/sysdeps/x86_64/dl-tlsdesc.h @@ -1,6 +1,6 @@ /* Thread-local storage descriptor handling in the ELF dynamic linker. x86_64 version. - Copyright (C) 2005-2015 Free Software Foundation, Inc. + Copyright (C) 2005-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -60,8 +60,9 @@ extern ptrdiff_t attribute_hidden _dl_tlsdesc_resolve_hold(struct tlsdesc *on_rax); # ifdef SHARED -extern void *internal_function _dl_make_tlsdesc_dynamic (struct link_map *map, - size_t ti_offset); +extern void *_dl_make_tlsdesc_dynamic (struct link_map *map, + size_t ti_offset) + internal_function attribute_hidden; extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic(struct tlsdesc *); # endif diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S index 678c57fc24..9fb6b13983 100644 --- a/sysdeps/x86_64/dl-trampoline.S +++ b/sysdeps/x86_64/dl-trampoline.S @@ -1,5 +1,5 @@ /* PLT trampolines. x86-64 version. - Copyright (C) 2004-2015 Free Software Foundation, Inc. + Copyright (C) 2004-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -20,23 +20,40 @@ #include <sysdep.h> #include <link-defines.h> -#if (RTLD_SAVESPACE_SSE % 32) != 0 -# error RTLD_SAVESPACE_SSE must be aligned to 32 bytes +#ifndef DL_STACK_ALIGNMENT +/* Due to GCC bug: + + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 + + __tls_get_addr may be called with 8-byte stack alignment. Although + this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume + that stack will be always aligned at 16 bytes. We use unaligned + 16-byte move to load and store SSE registers, which has no penalty + on modern processors if stack is 16-byte aligned. */ +# define DL_STACK_ALIGNMENT 8 +#endif + +#ifndef DL_RUNIME_UNALIGNED_VEC_SIZE +/* The maximum size of unaligned vector load and store. */ +# define DL_RUNIME_UNALIGNED_VEC_SIZE 16 #endif +/* True if _dl_runtime_resolve should align stack to VEC_SIZE bytes. */ +#define DL_RUNIME_RESOLVE_REALIGN_STACK \ + (VEC_SIZE > DL_STACK_ALIGNMENT \ + && VEC_SIZE > DL_RUNIME_UNALIGNED_VEC_SIZE) + +/* Align vector register save area to 16 bytes. */ +#define REGISTER_SAVE_VEC_OFF 0 + /* Area on stack to save and restore registers used for parameter passing when calling _dl_fixup. */ #ifdef __ILP32__ -/* X32 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX. */ -# define REGISTER_SAVE_AREA (8 * 7) -# define REGISTER_SAVE_RAX 0 +# define REGISTER_SAVE_RAX (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 8) # define PRESERVE_BND_REGS_PREFIX #else -/* X86-64 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as BND0, - BND1, BND2, BND3. */ -# define REGISTER_SAVE_AREA (8 * 7 + 16 * 4) /* Align bound register save area to 16 bytes. */ -# define REGISTER_SAVE_BND0 0 +# define REGISTER_SAVE_BND0 (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 8) # define REGISTER_SAVE_BND1 (REGISTER_SAVE_BND0 + 16) # define REGISTER_SAVE_BND2 (REGISTER_SAVE_BND1 + 16) # define REGISTER_SAVE_BND3 (REGISTER_SAVE_BND2 + 16) @@ -54,386 +71,61 @@ #define REGISTER_SAVE_R8 (REGISTER_SAVE_RDI + 8) #define REGISTER_SAVE_R9 (REGISTER_SAVE_R8 + 8) - .text - .globl _dl_runtime_resolve - .type _dl_runtime_resolve, @function - .align 16 - cfi_startproc -_dl_runtime_resolve: - cfi_adjust_cfa_offset(16) # Incorporate PLT - subq $REGISTER_SAVE_AREA,%rsp - cfi_adjust_cfa_offset(REGISTER_SAVE_AREA) - # Preserve registers otherwise clobbered. - movq %rax, REGISTER_SAVE_RAX(%rsp) - movq %rcx, REGISTER_SAVE_RCX(%rsp) - movq %rdx, REGISTER_SAVE_RDX(%rsp) - movq %rsi, REGISTER_SAVE_RSI(%rsp) - movq %rdi, REGISTER_SAVE_RDI(%rsp) - movq %r8, REGISTER_SAVE_R8(%rsp) - movq %r9, REGISTER_SAVE_R9(%rsp) -#ifndef __ILP32__ - # We also have to preserve bound registers. These are nops if - # Intel MPX isn't available or disabled. -# ifdef HAVE_MPX_SUPPORT - bndmov %bnd0, REGISTER_SAVE_BND0(%rsp) - bndmov %bnd1, REGISTER_SAVE_BND1(%rsp) - bndmov %bnd2, REGISTER_SAVE_BND2(%rsp) - bndmov %bnd3, REGISTER_SAVE_BND3(%rsp) -# else -# if REGISTER_SAVE_BND0 == 0 - .byte 0x66,0x0f,0x1b,0x04,0x24 -# else - .byte 0x66,0x0f,0x1b,0x44,0x24,REGISTER_SAVE_BND0 -# endif - .byte 0x66,0x0f,0x1b,0x4c,0x24,REGISTER_SAVE_BND1 - .byte 0x66,0x0f,0x1b,0x54,0x24,REGISTER_SAVE_BND2 - .byte 0x66,0x0f,0x1b,0x5c,0x24,REGISTER_SAVE_BND3 -# endif -#endif - # Copy args pushed by PLT in register. - # %rdi: link_map, %rsi: reloc_index - movq (REGISTER_SAVE_AREA + 8)(%rsp), %rsi - movq REGISTER_SAVE_AREA(%rsp), %rdi - call _dl_fixup # Call resolver. - movq %rax, %r11 # Save return value -#ifndef __ILP32__ - # Restore bound registers. These are nops if Intel MPX isn't - # avaiable or disabled. -# ifdef HAVE_MPX_SUPPORT - bndmov REGISTER_SAVE_BND3(%rsp), %bnd3 - bndmov REGISTER_SAVE_BND2(%rsp), %bnd2 - bndmov REGISTER_SAVE_BND1(%rsp), %bnd1 - bndmov REGISTER_SAVE_BND0(%rsp), %bnd0 -# else - .byte 0x66,0x0f,0x1a,0x5c,0x24,REGISTER_SAVE_BND3 - .byte 0x66,0x0f,0x1a,0x54,0x24,REGISTER_SAVE_BND2 - .byte 0x66,0x0f,0x1a,0x4c,0x24,REGISTER_SAVE_BND1 -# if REGISTER_SAVE_BND0 == 0 - .byte 0x66,0x0f,0x1a,0x04,0x24 -# else - .byte 0x66,0x0f,0x1a,0x44,0x24,REGISTER_SAVE_BND0 -# endif -# endif -#endif - # Get register content back. - movq REGISTER_SAVE_R9(%rsp), %r9 - movq REGISTER_SAVE_R8(%rsp), %r8 - movq REGISTER_SAVE_RDI(%rsp), %rdi - movq REGISTER_SAVE_RSI(%rsp), %rsi - movq REGISTER_SAVE_RDX(%rsp), %rdx - movq REGISTER_SAVE_RCX(%rsp), %rcx - movq REGISTER_SAVE_RAX(%rsp), %rax - # Adjust stack(PLT did 2 pushes) - addq $(REGISTER_SAVE_AREA + 16), %rsp - cfi_adjust_cfa_offset(-(REGISTER_SAVE_AREA + 16)) - # Preserve bound registers. - PRESERVE_BND_REGS_PREFIX - jmp *%r11 # Jump to function address. - cfi_endproc - .size _dl_runtime_resolve, .-_dl_runtime_resolve - - -#ifndef PROF - .globl _dl_runtime_profile - .type _dl_runtime_profile, @function - .align 16 - cfi_startproc - -_dl_runtime_profile: - cfi_adjust_cfa_offset(16) # Incorporate PLT - /* The La_x86_64_regs data structure pointed to by the - fourth paramater must be 16-byte aligned. This must - be explicitly enforced. We have the set up a dynamically - sized stack frame. %rbx points to the top half which - has a fixed size and preserves the original stack pointer. */ - - subq $32, %rsp # Allocate the local storage. - cfi_adjust_cfa_offset(32) - movq %rbx, (%rsp) - cfi_rel_offset(%rbx, 0) +#define RESTORE_AVX - /* On the stack: - 56(%rbx) parameter #1 - 48(%rbx) return address - - 40(%rbx) reloc index - 32(%rbx) link_map - - 24(%rbx) La_x86_64_regs pointer - 16(%rbx) framesize - 8(%rbx) rax - (%rbx) rbx - */ - - movq %rax, 8(%rsp) - movq %rsp, %rbx - cfi_def_cfa_register(%rbx) - - /* Actively align the La_x86_64_regs structure. */ - andq $0xfffffffffffffff0, %rsp -# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT - /* sizeof(La_x86_64_regs). Need extra space for 8 SSE registers - to detect if any xmm0-xmm7 registers are changed by audit - module. */ - subq $(LR_SIZE + XMM_SIZE*8), %rsp +#ifdef HAVE_AVX512_ASM_SUPPORT +# define VEC_SIZE 64 +# define VMOVA vmovdqa64 +# if DL_RUNIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT +# define VMOV vmovdqa64 # else - subq $LR_SIZE, %rsp # sizeof(La_x86_64_regs) -# endif - movq %rsp, 24(%rbx) - - /* Fill the La_x86_64_regs structure. */ - movq %rdx, LR_RDX_OFFSET(%rsp) - movq %r8, LR_R8_OFFSET(%rsp) - movq %r9, LR_R9_OFFSET(%rsp) - movq %rcx, LR_RCX_OFFSET(%rsp) - movq %rsi, LR_RSI_OFFSET(%rsp) - movq %rdi, LR_RDI_OFFSET(%rsp) - movq %rbp, LR_RBP_OFFSET(%rsp) - - leaq 48(%rbx), %rax - movq %rax, LR_RSP_OFFSET(%rsp) - - /* We always store the XMM registers even if AVX is available. - This is to provide backward binary compatibility for existing - audit modules. */ - movaps %xmm0, (LR_XMM_OFFSET)(%rsp) - movaps %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp) - movaps %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp) - movaps %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp) - movaps %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp) - movaps %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp) - movaps %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp) - movaps %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp) - -# ifndef __ILP32__ -# ifdef HAVE_MPX_SUPPORT - bndmov %bnd0, (LR_BND_OFFSET)(%rsp) # Preserve bound - bndmov %bnd1, (LR_BND_OFFSET + BND_SIZE)(%rsp) # registers. Nops if - bndmov %bnd2, (LR_BND_OFFSET + BND_SIZE*2)(%rsp) # MPX not available - bndmov %bnd3, (LR_BND_OFFSET + BND_SIZE*3)(%rsp) # or disabled. -# else - .byte 0x66,0x0f,0x1b,0x84,0x24;.long (LR_BND_OFFSET) - .byte 0x66,0x0f,0x1b,0x8c,0x24;.long (LR_BND_OFFSET + BND_SIZE) - .byte 0x66,0x0f,0x1b,0x94,0x24;.long (LR_BND_OFFSET + BND_SIZE*2) - .byte 0x66,0x0f,0x1b,0x9c,0x24;.long (LR_BND_OFFSET + BND_SIZE*3) -# endif -# endif - -# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT - .data -L(have_avx): - .zero 4 - .size L(have_avx), 4 - .previous - - cmpl $0, L(have_avx)(%rip) - jne L(defined) - movq %rbx, %r11 # Save rbx - movl $1, %eax - cpuid - movq %r11,%rbx # Restore rbx - xorl %eax, %eax - // AVX and XSAVE supported? - andl $((1 << 28) | (1 << 27)), %ecx - cmpl $((1 << 28) | (1 << 27)), %ecx - jne 10f -# ifdef HAVE_AVX512_ASM_SUPPORT - // AVX512 supported in processor? - movq %rbx, %r11 # Save rbx - xorl %ecx, %ecx - mov $0x7, %eax - cpuid - andl $(1 << 16), %ebx -# endif - xorl %ecx, %ecx - // Get XFEATURE_ENABLED_MASK - xgetbv -# ifdef HAVE_AVX512_ASM_SUPPORT - test %ebx, %ebx - movq %r11, %rbx # Restore rbx - je 20f - // Verify that XCR0[7:5] = '111b' and - // XCR0[2:1] = '11b' which means - // that zmm state is enabled - andl $0xe6, %eax - cmpl $0xe6, %eax - jne 20f - movl %eax, L(have_avx)(%rip) -L(avx512): -# define RESTORE_AVX -# define VMOV vmovdqu64 -# define VEC(i) zmm##i -# define MORE_CODE -# include "dl-trampoline.h" -# undef VMOV -# undef VEC -# undef RESTORE_AVX -# endif -20: andl $0x6, %eax -10: subl $0x5, %eax - movl %eax, L(have_avx)(%rip) - cmpl $0, %eax - -L(defined): - js L(no_avx) -# ifdef HAVE_AVX512_ASM_SUPPORT - cmpl $0xe6, L(have_avx)(%rip) - je L(avx512) -# endif - -# define RESTORE_AVX -# define VMOV vmovdqu -# define VEC(i) ymm##i -# define MORE_CODE -# include "dl-trampoline.h" - - .align 16 -L(no_avx): +# define VMOV vmovdqu64 # endif - -# undef RESTORE_AVX +# define VEC(i) zmm##i +# define _dl_runtime_resolve _dl_runtime_resolve_avx512 +# define _dl_runtime_profile _dl_runtime_profile_avx512 # include "dl-trampoline.h" - - cfi_endproc - .size _dl_runtime_profile, .-_dl_runtime_profile +# undef _dl_runtime_resolve +# undef _dl_runtime_profile +# undef VEC +# undef VMOV +# undef VMOVA +# undef VEC_SIZE +#else +strong_alias (_dl_runtime_resolve_avx, _dl_runtime_resolve_avx512) + .hidden _dl_runtime_resolve_avx512 +strong_alias (_dl_runtime_profile_avx, _dl_runtime_profile_avx512) + .hidden _dl_runtime_profile_avx512 #endif - -#ifdef SHARED - .globl _dl_x86_64_save_sse - .type _dl_x86_64_save_sse, @function - .align 16 - cfi_startproc -_dl_x86_64_save_sse: -# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT - cmpl $0, L(have_avx)(%rip) - jne L(defined_5) - movq %rbx, %r11 # Save rbx - movl $1, %eax - cpuid - movq %r11,%rbx # Restore rbx - xorl %eax, %eax - // AVX and XSAVE supported? - andl $((1 << 28) | (1 << 27)), %ecx - cmpl $((1 << 28) | (1 << 27)), %ecx - jne 1f -# ifdef HAVE_AVX512_ASM_SUPPORT - // AVX512 supported in a processor? - movq %rbx, %r11 # Save rbx - xorl %ecx,%ecx - mov $0x7,%eax - cpuid - andl $(1 << 16), %ebx -# endif - xorl %ecx, %ecx - // Get XFEATURE_ENABLED_MASK - xgetbv -# ifdef HAVE_AVX512_ASM_SUPPORT - test %ebx, %ebx - movq %r11, %rbx # Restore rbx - je 2f - // Verify that XCR0[7:5] = '111b' and - // XCR0[2:1] = '11b' which means - // that zmm state is enabled - andl $0xe6, %eax - movl %eax, L(have_avx)(%rip) - cmpl $0xe6, %eax - je L(avx512_5) -# endif - -2: andl $0x6, %eax -1: subl $0x5, %eax - movl %eax, L(have_avx)(%rip) - cmpl $0, %eax - -L(defined_5): - js L(no_avx5) -# ifdef HAVE_AVX512_ASM_SUPPORT - cmpl $0xe6, L(have_avx)(%rip) - je L(avx512_5) -# endif - - vmovdqa %ymm0, %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE - vmovdqa %ymm1, %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE - vmovdqa %ymm2, %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE - vmovdqa %ymm3, %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE - vmovdqa %ymm4, %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE - vmovdqa %ymm5, %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE - vmovdqa %ymm6, %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE - vmovdqa %ymm7, %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE - ret -# ifdef HAVE_AVX512_ASM_SUPPORT -L(avx512_5): - vmovdqu64 %zmm0, %fs:RTLD_SAVESPACE_SSE+0*ZMM_SIZE - vmovdqu64 %zmm1, %fs:RTLD_SAVESPACE_SSE+1*ZMM_SIZE - vmovdqu64 %zmm2, %fs:RTLD_SAVESPACE_SSE+2*ZMM_SIZE - vmovdqu64 %zmm3, %fs:RTLD_SAVESPACE_SSE+3*ZMM_SIZE - vmovdqu64 %zmm4, %fs:RTLD_SAVESPACE_SSE+4*ZMM_SIZE - vmovdqu64 %zmm5, %fs:RTLD_SAVESPACE_SSE+5*ZMM_SIZE - vmovdqu64 %zmm6, %fs:RTLD_SAVESPACE_SSE+6*ZMM_SIZE - vmovdqu64 %zmm7, %fs:RTLD_SAVESPACE_SSE+7*ZMM_SIZE - ret -# endif -L(no_avx5): -# endif - movdqa %xmm0, %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE - movdqa %xmm1, %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE - movdqa %xmm2, %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE - movdqa %xmm3, %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE - movdqa %xmm4, %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE - movdqa %xmm5, %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE - movdqa %xmm6, %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE - movdqa %xmm7, %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE - ret - cfi_endproc - .size _dl_x86_64_save_sse, .-_dl_x86_64_save_sse - - - .globl _dl_x86_64_restore_sse - .type _dl_x86_64_restore_sse, @function - .align 16 - cfi_startproc -_dl_x86_64_restore_sse: -# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT - cmpl $0, L(have_avx)(%rip) - js L(no_avx6) -# ifdef HAVE_AVX512_ASM_SUPPORT - cmpl $0xe6, L(have_avx)(%rip) - je L(avx512_6) -# endif - - vmovdqa %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE, %ymm0 - vmovdqa %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE, %ymm1 - vmovdqa %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE, %ymm2 - vmovdqa %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE, %ymm3 - vmovdqa %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE, %ymm4 - vmovdqa %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE, %ymm5 - vmovdqa %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE, %ymm6 - vmovdqa %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE, %ymm7 - ret -# ifdef HAVE_AVX512_ASM_SUPPORT -L(avx512_6): - vmovdqu64 %fs:RTLD_SAVESPACE_SSE+0*ZMM_SIZE, %zmm0 - vmovdqu64 %fs:RTLD_SAVESPACE_SSE+1*ZMM_SIZE, %zmm1 - vmovdqu64 %fs:RTLD_SAVESPACE_SSE+2*ZMM_SIZE, %zmm2 - vmovdqu64 %fs:RTLD_SAVESPACE_SSE+3*ZMM_SIZE, %zmm3 - vmovdqu64 %fs:RTLD_SAVESPACE_SSE+4*ZMM_SIZE, %zmm4 - vmovdqu64 %fs:RTLD_SAVESPACE_SSE+5*ZMM_SIZE, %zmm5 - vmovdqu64 %fs:RTLD_SAVESPACE_SSE+6*ZMM_SIZE, %zmm6 - vmovdqu64 %fs:RTLD_SAVESPACE_SSE+7*ZMM_SIZE, %zmm7 - ret -# endif -L(no_avx6): -# endif - movdqa %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE, %xmm0 - movdqa %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE, %xmm1 - movdqa %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE, %xmm2 - movdqa %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE, %xmm3 - movdqa %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE, %xmm4 - movdqa %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE, %xmm5 - movdqa %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE, %xmm6 - movdqa %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE, %xmm7 - ret - cfi_endproc - .size _dl_x86_64_restore_sse, .-_dl_x86_64_restore_sse +#define VEC_SIZE 32 +#define VMOVA vmovdqa +#if DL_RUNIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT +# define VMOV vmovdqa +#else +# define VMOV vmovdqu +#endif +#define VEC(i) ymm##i +#define _dl_runtime_resolve _dl_runtime_resolve_avx +#define _dl_runtime_profile _dl_runtime_profile_avx +#include "dl-trampoline.h" +#undef _dl_runtime_resolve +#undef _dl_runtime_profile +#undef VEC +#undef VMOV +#undef VMOVA +#undef VEC_SIZE + +/* movaps/movups is 1-byte shorter. */ +#define VEC_SIZE 16 +#define VMOVA movaps +#if DL_RUNIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT +# define VMOV movaps +#else +# define VMOV movups #endif +#define VEC(i) xmm##i +#define _dl_runtime_resolve _dl_runtime_resolve_sse +#define _dl_runtime_profile _dl_runtime_profile_sse +#undef RESTORE_AVX +#include "dl-trampoline.h" diff --git a/sysdeps/x86_64/dl-trampoline.h b/sysdeps/x86_64/dl-trampoline.h index d542428ac2..f4191833ab 100644 --- a/sysdeps/x86_64/dl-trampoline.h +++ b/sysdeps/x86_64/dl-trampoline.h @@ -1,6 +1,5 @@ -/* Partial PLT profile trampoline to save and restore x86-64 vector - registers. - Copyright (C) 2009-2015 Free Software Foundation, Inc. +/* PLT trampolines. x86-64 version. + Copyright (C) 2009-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -17,16 +16,248 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#ifdef RESTORE_AVX +#undef REGISTER_SAVE_AREA_RAW +#ifdef __ILP32__ +/* X32 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as VEC0 to + VEC7. */ +# define REGISTER_SAVE_AREA_RAW (8 * 7 + VEC_SIZE * 8) +#else +/* X86-64 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as + BND0, BND1, BND2, BND3 and VEC0 to VEC7. */ +# define REGISTER_SAVE_AREA_RAW (8 * 7 + 16 * 4 + VEC_SIZE * 8) +#endif + +#undef REGISTER_SAVE_AREA +#undef LOCAL_STORAGE_AREA +#undef BASE +#if DL_RUNIME_RESOLVE_REALIGN_STACK +# define REGISTER_SAVE_AREA (REGISTER_SAVE_AREA_RAW + 8) +/* Local stack area before jumping to function address: RBX. */ +# define LOCAL_STORAGE_AREA 8 +# define BASE rbx +# if (REGISTER_SAVE_AREA % VEC_SIZE) != 0 +# error REGISTER_SAVE_AREA must be multples of VEC_SIZE +# endif +#else +# define REGISTER_SAVE_AREA REGISTER_SAVE_AREA_RAW +/* Local stack area before jumping to function address: All saved + registers. */ +# define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA +# define BASE rsp +# if (REGISTER_SAVE_AREA % 16) != 8 +# error REGISTER_SAVE_AREA must be odd multples of 8 +# endif +#endif + + .text + .globl _dl_runtime_resolve + .hidden _dl_runtime_resolve + .type _dl_runtime_resolve, @function + .align 16 + cfi_startproc +_dl_runtime_resolve: + cfi_adjust_cfa_offset(16) # Incorporate PLT +#if DL_RUNIME_RESOLVE_REALIGN_STACK +# if LOCAL_STORAGE_AREA != 8 +# error LOCAL_STORAGE_AREA must be 8 +# endif + pushq %rbx # push subtracts stack by 8. + cfi_adjust_cfa_offset(8) + cfi_rel_offset(%rbx, 0) + mov %RSP_LP, %RBX_LP + cfi_def_cfa_register(%rbx) + and $-VEC_SIZE, %RSP_LP +#endif + sub $REGISTER_SAVE_AREA, %RSP_LP + cfi_adjust_cfa_offset(REGISTER_SAVE_AREA) + # Preserve registers otherwise clobbered. + movq %rax, REGISTER_SAVE_RAX(%rsp) + movq %rcx, REGISTER_SAVE_RCX(%rsp) + movq %rdx, REGISTER_SAVE_RDX(%rsp) + movq %rsi, REGISTER_SAVE_RSI(%rsp) + movq %rdi, REGISTER_SAVE_RDI(%rsp) + movq %r8, REGISTER_SAVE_R8(%rsp) + movq %r9, REGISTER_SAVE_R9(%rsp) + VMOV %VEC(0), (REGISTER_SAVE_VEC_OFF)(%rsp) + VMOV %VEC(1), (REGISTER_SAVE_VEC_OFF + VEC_SIZE)(%rsp) + VMOV %VEC(2), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 2)(%rsp) + VMOV %VEC(3), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 3)(%rsp) + VMOV %VEC(4), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 4)(%rsp) + VMOV %VEC(5), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 5)(%rsp) + VMOV %VEC(6), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 6)(%rsp) + VMOV %VEC(7), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 7)(%rsp) +#ifndef __ILP32__ + # We also have to preserve bound registers. These are nops if + # Intel MPX isn't available or disabled. +# ifdef HAVE_MPX_SUPPORT + bndmov %bnd0, REGISTER_SAVE_BND0(%rsp) + bndmov %bnd1, REGISTER_SAVE_BND1(%rsp) + bndmov %bnd2, REGISTER_SAVE_BND2(%rsp) + bndmov %bnd3, REGISTER_SAVE_BND3(%rsp) +# else +# if REGISTER_SAVE_BND0 == 0 + .byte 0x66,0x0f,0x1b,0x04,0x24 +# else + .byte 0x66,0x0f,0x1b,0x44,0x24,REGISTER_SAVE_BND0 +# endif + .byte 0x66,0x0f,0x1b,0x4c,0x24,REGISTER_SAVE_BND1 + .byte 0x66,0x0f,0x1b,0x54,0x24,REGISTER_SAVE_BND2 + .byte 0x66,0x0f,0x1b,0x5c,0x24,REGISTER_SAVE_BND3 +# endif +#endif + # Copy args pushed by PLT in register. + # %rdi: link_map, %rsi: reloc_index + mov (LOCAL_STORAGE_AREA + 8)(%BASE), %RSI_LP + mov LOCAL_STORAGE_AREA(%BASE), %RDI_LP + call _dl_fixup # Call resolver. + mov %RAX_LP, %R11_LP # Save return value +#ifndef __ILP32__ + # Restore bound registers. These are nops if Intel MPX isn't + # avaiable or disabled. +# ifdef HAVE_MPX_SUPPORT + bndmov REGISTER_SAVE_BND3(%rsp), %bnd3 + bndmov REGISTER_SAVE_BND2(%rsp), %bnd2 + bndmov REGISTER_SAVE_BND1(%rsp), %bnd1 + bndmov REGISTER_SAVE_BND0(%rsp), %bnd0 +# else + .byte 0x66,0x0f,0x1a,0x5c,0x24,REGISTER_SAVE_BND3 + .byte 0x66,0x0f,0x1a,0x54,0x24,REGISTER_SAVE_BND2 + .byte 0x66,0x0f,0x1a,0x4c,0x24,REGISTER_SAVE_BND1 +# if REGISTER_SAVE_BND0 == 0 + .byte 0x66,0x0f,0x1a,0x04,0x24 +# else + .byte 0x66,0x0f,0x1a,0x44,0x24,REGISTER_SAVE_BND0 +# endif +# endif +#endif + # Get register content back. + movq REGISTER_SAVE_R9(%rsp), %r9 + movq REGISTER_SAVE_R8(%rsp), %r8 + movq REGISTER_SAVE_RDI(%rsp), %rdi + movq REGISTER_SAVE_RSI(%rsp), %rsi + movq REGISTER_SAVE_RDX(%rsp), %rdx + movq REGISTER_SAVE_RCX(%rsp), %rcx + movq REGISTER_SAVE_RAX(%rsp), %rax + VMOV (REGISTER_SAVE_VEC_OFF)(%rsp), %VEC(0) + VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE)(%rsp), %VEC(1) + VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 2)(%rsp), %VEC(2) + VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 3)(%rsp), %VEC(3) + VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 4)(%rsp), %VEC(4) + VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 5)(%rsp), %VEC(5) + VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 6)(%rsp), %VEC(6) + VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 7)(%rsp), %VEC(7) +#if DL_RUNIME_RESOLVE_REALIGN_STACK + mov %RBX_LP, %RSP_LP + cfi_def_cfa_register(%rsp) + movq (%rsp), %rbx + cfi_restore(%rbx) +#endif + # Adjust stack(PLT did 2 pushes) + add $(LOCAL_STORAGE_AREA + 16), %RSP_LP + cfi_adjust_cfa_offset(-(LOCAL_STORAGE_AREA + 16)) + # Preserve bound registers. + PRESERVE_BND_REGS_PREFIX + jmp *%r11 # Jump to function address. + cfi_endproc + .size _dl_runtime_resolve, .-_dl_runtime_resolve + + +#ifndef PROF +# if (LR_VECTOR_OFFSET % VEC_SIZE) != 0 +# error LR_VECTOR_OFFSET must be multples of VEC_SIZE +# endif + + .globl _dl_runtime_profile + .hidden _dl_runtime_profile + .type _dl_runtime_profile, @function + .align 16 +_dl_runtime_profile: + cfi_startproc + cfi_adjust_cfa_offset(16) # Incorporate PLT + /* The La_x86_64_regs data structure pointed to by the + fourth paramater must be VEC_SIZE-byte aligned. This must + be explicitly enforced. We have the set up a dynamically + sized stack frame. %rbx points to the top half which + has a fixed size and preserves the original stack pointer. */ + + sub $32, %RSP_LP # Allocate the local storage. + cfi_adjust_cfa_offset(32) + movq %rbx, (%rsp) + cfi_rel_offset(%rbx, 0) + + /* On the stack: + 56(%rbx) parameter #1 + 48(%rbx) return address + + 40(%rbx) reloc index + 32(%rbx) link_map + + 24(%rbx) La_x86_64_regs pointer + 16(%rbx) framesize + 8(%rbx) rax + (%rbx) rbx + */ + + movq %rax, 8(%rsp) + mov %RSP_LP, %RBX_LP + cfi_def_cfa_register(%rbx) + + /* Actively align the La_x86_64_regs structure. */ + and $-VEC_SIZE, %RSP_LP + /* sizeof(La_x86_64_regs). Need extra space for 8 SSE registers + to detect if any xmm0-xmm7 registers are changed by audit + module. */ + sub $(LR_SIZE + XMM_SIZE*8), %RSP_LP + movq %rsp, 24(%rbx) + + /* Fill the La_x86_64_regs structure. */ + movq %rdx, LR_RDX_OFFSET(%rsp) + movq %r8, LR_R8_OFFSET(%rsp) + movq %r9, LR_R9_OFFSET(%rsp) + movq %rcx, LR_RCX_OFFSET(%rsp) + movq %rsi, LR_RSI_OFFSET(%rsp) + movq %rdi, LR_RDI_OFFSET(%rsp) + movq %rbp, LR_RBP_OFFSET(%rsp) + + lea 48(%rbx), %RAX_LP + movq %rax, LR_RSP_OFFSET(%rsp) + + /* We always store the XMM registers even if AVX is available. + This is to provide backward binary compatibility for existing + audit modules. */ + movaps %xmm0, (LR_XMM_OFFSET)(%rsp) + movaps %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp) + movaps %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp) + movaps %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp) + movaps %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp) + movaps %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp) + movaps %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp) + movaps %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp) + +# ifndef __ILP32__ +# ifdef HAVE_MPX_SUPPORT + bndmov %bnd0, (LR_BND_OFFSET)(%rsp) # Preserve bound + bndmov %bnd1, (LR_BND_OFFSET + BND_SIZE)(%rsp) # registers. Nops if + bndmov %bnd2, (LR_BND_OFFSET + BND_SIZE*2)(%rsp) # MPX not available + bndmov %bnd3, (LR_BND_OFFSET + BND_SIZE*3)(%rsp) # or disabled. +# else + .byte 0x66,0x0f,0x1b,0x84,0x24;.long (LR_BND_OFFSET) + .byte 0x66,0x0f,0x1b,0x8c,0x24;.long (LR_BND_OFFSET + BND_SIZE) + .byte 0x66,0x0f,0x1b,0x94,0x24;.long (LR_BND_OFFSET + BND_SIZE*2) + .byte 0x66,0x0f,0x1b,0x9c,0x24;.long (LR_BND_OFFSET + BND_SIZE*3) +# endif +# endif + +# ifdef RESTORE_AVX /* This is to support AVX audit modules. */ - VMOV %VEC(0), (LR_VECTOR_OFFSET)(%rsp) - VMOV %VEC(1), (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) - VMOV %VEC(2), (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) - VMOV %VEC(3), (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) - VMOV %VEC(4), (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) - VMOV %VEC(5), (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) - VMOV %VEC(6), (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) - VMOV %VEC(7), (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) + VMOVA %VEC(0), (LR_VECTOR_OFFSET)(%rsp) + VMOVA %VEC(1), (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) + VMOVA %VEC(2), (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) + VMOVA %VEC(3), (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) + VMOVA %VEC(4), (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) + VMOVA %VEC(5), (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) + VMOVA %VEC(6), (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) + VMOVA %VEC(7), (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) /* Save xmm0-xmm7 registers to detect if any of them are changed by audit module. */ @@ -38,7 +269,7 @@ vmovdqa %xmm5, (LR_SIZE + XMM_SIZE*5)(%rsp) vmovdqa %xmm6, (LR_SIZE + XMM_SIZE*6)(%rsp) vmovdqa %xmm7, (LR_SIZE + XMM_SIZE*7)(%rsp) -#endif +# endif mov %RSP_LP, %RCX_LP # La_x86_64_regs pointer to %rcx. mov 48(%rbx), %RDX_LP # Load return address if needed. @@ -63,7 +294,7 @@ movaps (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6 movaps (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7 -#ifdef RESTORE_AVX +# ifdef RESTORE_AVX /* Check if any xmm0-xmm7 registers are changed by audit module. */ vpcmpeqq (LR_SIZE)(%rsp), %xmm0, %xmm8 @@ -72,7 +303,7 @@ je 2f vmovdqa %xmm0, (LR_VECTOR_OFFSET)(%rsp) jmp 1f -2: VMOV (LR_VECTOR_OFFSET)(%rsp), %VEC(0) +2: VMOVA (LR_VECTOR_OFFSET)(%rsp), %VEC(0) vmovdqa %xmm0, (LR_XMM_OFFSET)(%rsp) 1: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8 @@ -81,7 +312,7 @@ je 2f vmovdqa %xmm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) jmp 1f -2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %VEC(1) +2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %VEC(1) vmovdqa %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp) 1: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8 @@ -90,7 +321,7 @@ je 2f vmovdqa %xmm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) jmp 1f -2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %VEC(2) +2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %VEC(2) vmovdqa %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp) 1: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8 @@ -99,7 +330,7 @@ je 2f vmovdqa %xmm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) jmp 1f -2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %VEC(3) +2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %VEC(3) vmovdqa %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp) 1: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8 @@ -108,7 +339,7 @@ je 2f vmovdqa %xmm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) jmp 1f -2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %VEC(4) +2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %VEC(4) vmovdqa %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp) 1: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8 @@ -117,7 +348,7 @@ je 2f vmovdqa %xmm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) jmp 1f -2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %VEC(5) +2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %VEC(5) vmovdqa %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp) 1: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8 @@ -126,7 +357,7 @@ je 2f vmovdqa %xmm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) jmp 1f -2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %VEC(6) +2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %VEC(6) vmovdqa %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp) 1: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8 @@ -135,25 +366,25 @@ je 2f vmovdqa %xmm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) jmp 1f -2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %VEC(7) +2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %VEC(7) vmovdqa %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp) 1: -#endif +# endif -#ifndef __ILP32__ -# ifdef HAVE_MPX_SUPPORT +# ifndef __ILP32__ +# ifdef HAVE_MPX_SUPPORT bndmov (LR_BND_OFFSET)(%rsp), %bnd0 # Restore bound bndmov (LR_BND_OFFSET + BND_SIZE)(%rsp), %bnd1 # registers. bndmov (LR_BND_OFFSET + BND_SIZE*2)(%rsp), %bnd2 bndmov (LR_BND_OFFSET + BND_SIZE*3)(%rsp), %bnd3 -# else +# else .byte 0x66,0x0f,0x1a,0x84,0x24;.long (LR_BND_OFFSET) .byte 0x66,0x0f,0x1a,0x8c,0x24;.long (LR_BND_OFFSET + BND_SIZE) .byte 0x66,0x0f,0x1a,0x94,0x24;.long (LR_BND_OFFSET + BND_SIZE*2) .byte 0x66,0x0f,0x1a,0x9c,0x24;.long (LR_BND_OFFSET + BND_SIZE*3) +# endif # endif -#endif mov 16(%rbx), %R10_LP # Anything in framesize? test %R10_LP, %R10_LP @@ -168,12 +399,12 @@ movq LR_RSI_OFFSET(%rsp), %rsi movq LR_RDI_OFFSET(%rsp), %rdi - movq %rbx, %rsp + mov %RBX_LP, %RSP_LP movq (%rsp), %rbx - cfi_restore(rbx) + cfi_restore(%rbx) cfi_def_cfa_register(%rsp) - addq $48, %rsp # Adjust the stack to the return value + add $48, %RSP_LP # Adjust the stack to the return value # (eats the reloc index and link_map) cfi_adjust_cfa_offset(-48) PRESERVE_BND_REGS_PREFIX @@ -189,13 +420,13 @@ temporary buffer of the size specified by the 'framesize' returned from _dl_profile_fixup */ - leaq LR_RSP_OFFSET(%rbx), %rsi # stack - addq $8, %r10 - andq $0xfffffffffffffff0, %r10 - movq %r10, %rcx - subq %r10, %rsp - movq %rsp, %rdi - shrq $3, %rcx + lea LR_RSP_OFFSET(%rbx), %RSI_LP # stack + add $8, %R10_LP + and $-16, %R10_LP + mov %R10_LP, %RCX_LP + sub %R10_LP, %RSP_LP + mov %RSP_LP, %RDI_LP + shr $3, %RCX_LP rep movsq @@ -206,21 +437,21 @@ PRESERVE_BND_REGS_PREFIX call *%r11 - mov 24(%rbx), %rsp # Drop the copied stack content + mov 24(%rbx), %RSP_LP # Drop the copied stack content /* Now we have to prepare the La_x86_64_retval structure for the _dl_call_pltexit. The La_x86_64_regs is being pointed by rsp now, so we just need to allocate the sizeof(La_x86_64_retval) space on the stack, since the alignment has already been taken care of. */ -#ifdef RESTORE_AVX +# ifdef RESTORE_AVX /* sizeof(La_x86_64_retval). Need extra space for 2 SSE registers to detect if xmm0/xmm1 registers are changed by audit module. */ - subq $(LRV_SIZE + XMM_SIZE*2), %rsp -#else - subq $LRV_SIZE, %rsp # sizeof(La_x86_64_retval) -#endif - movq %rsp, %rcx # La_x86_64_retval argument to %rcx. + sub $(LRV_SIZE + XMM_SIZE*2), %RSP_LP +# else + sub $LRV_SIZE, %RSP_LP # sizeof(La_x86_64_retval) +# endif + mov %RSP_LP, %RCX_LP # La_x86_64_retval argument to %rcx. /* Fill in the La_x86_64_retval structure. */ movq %rax, LRV_RAX_OFFSET(%rcx) @@ -229,26 +460,26 @@ movaps %xmm0, LRV_XMM0_OFFSET(%rcx) movaps %xmm1, LRV_XMM1_OFFSET(%rcx) -#ifdef RESTORE_AVX +# ifdef RESTORE_AVX /* This is to support AVX audit modules. */ - VMOV %VEC(0), LRV_VECTOR0_OFFSET(%rcx) - VMOV %VEC(1), LRV_VECTOR1_OFFSET(%rcx) + VMOVA %VEC(0), LRV_VECTOR0_OFFSET(%rcx) + VMOVA %VEC(1), LRV_VECTOR1_OFFSET(%rcx) /* Save xmm0/xmm1 registers to detect if they are changed by audit module. */ vmovdqa %xmm0, (LRV_SIZE)(%rcx) vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE)(%rcx) -#endif +# endif -#ifndef __ILP32__ -# ifdef HAVE_MPX_SUPPORT +# ifndef __ILP32__ +# ifdef HAVE_MPX_SUPPORT bndmov %bnd0, LRV_BND0_OFFSET(%rcx) # Preserve returned bounds. bndmov %bnd1, LRV_BND1_OFFSET(%rcx) -# else +# else .byte 0x66,0x0f,0x1b,0x81;.long (LRV_BND0_OFFSET) .byte 0x66,0x0f,0x1b,0x89;.long (LRV_BND1_OFFSET) +# endif # endif -#endif fstpt LRV_ST0_OFFSET(%rcx) fstpt LRV_ST1_OFFSET(%rcx) @@ -265,50 +496,47 @@ movaps LRV_XMM0_OFFSET(%rsp), %xmm0 movaps LRV_XMM1_OFFSET(%rsp), %xmm1 -#ifdef RESTORE_AVX +# ifdef RESTORE_AVX /* Check if xmm0/xmm1 registers are changed by audit module. */ vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2 vpmovmskb %xmm2, %esi cmpl $0xffff, %esi jne 1f - VMOV LRV_VECTOR0_OFFSET(%rsp), %VEC(0) + VMOVA LRV_VECTOR0_OFFSET(%rsp), %VEC(0) 1: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2 vpmovmskb %xmm2, %esi cmpl $0xffff, %esi jne 1f - VMOV LRV_VECTOR1_OFFSET(%rsp), %VEC(1) + VMOVA LRV_VECTOR1_OFFSET(%rsp), %VEC(1) 1: -#endif +# endif -#ifndef __ILP32__ -# ifdef HAVE_MPX_SUPPORT +# ifndef __ILP32__ +# ifdef HAVE_MPX_SUPPORT bndmov LRV_BND0_OFFSET(%rsp), %bnd0 # Restore bound registers. bndmov LRV_BND1_OFFSET(%rsp), %bnd1 -# else +# else .byte 0x66,0x0f,0x1a,0x84,0x24;.long (LRV_BND0_OFFSET) .byte 0x66,0x0f,0x1a,0x8c,0x24;.long (LRV_BND1_OFFSET) +# endif # endif -#endif fldt LRV_ST1_OFFSET(%rsp) fldt LRV_ST0_OFFSET(%rsp) - movq %rbx, %rsp + mov %RBX_LP, %RSP_LP movq (%rsp), %rbx - cfi_restore(rbx) + cfi_restore(%rbx) cfi_def_cfa_register(%rsp) - addq $48, %rsp # Adjust the stack to the return value + add $48, %RSP_LP # Adjust the stack to the return value # (eats the reloc index and link_map) cfi_adjust_cfa_offset(-48) PRESERVE_BND_REGS_PREFIX retq -#ifdef MORE_CODE - cfi_adjust_cfa_offset(48) - cfi_rel_offset(%rbx, 0) - cfi_def_cfa_register(%rbx) -# undef MORE_CODE + cfi_endproc + .size _dl_runtime_profile, .-_dl_runtime_profile #endif diff --git a/sysdeps/x86_64/ffs.c b/sysdeps/x86_64/ffs.c index 48feb4aba2..be5b6c8589 100644 --- a/sysdeps/x86_64/ffs.c +++ b/sysdeps/x86_64/ffs.c @@ -1,7 +1,7 @@ /* ffs -- find first set bit in a word, counted from least significant end. For AMD x86-64. This file is part of the GNU C Library. - Copyright (C) 1991-2015 Free Software Foundation, Inc. + Copyright (C) 1991-2016 Free Software Foundation, Inc. Contributed by Ulrich Drepper <drepper@cygnus.com>. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/ffsll.c b/sysdeps/x86_64/ffsll.c index 1c26679da7..c0f5abc446 100644 --- a/sysdeps/x86_64/ffsll.c +++ b/sysdeps/x86_64/ffsll.c @@ -1,7 +1,7 @@ /* ffsll -- find first set bit in a word, counted from least significant end. For AMD x86-64. This file is part of the GNU C Library. - Copyright (C) 1991-2015 Free Software Foundation, Inc. + Copyright (C) 1991-2016 Free Software Foundation, Inc. Contributed by Ulrich Drepper <drepper@cygnus.com>. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/Makefile b/sysdeps/x86_64/fpu/Makefile index 1ebe5118bf..88742faff1 100644 --- a/sysdeps/x86_64/fpu/Makefile +++ b/sysdeps/x86_64/fpu/Makefile @@ -20,7 +20,9 @@ libmvec-support += svml_d_cos2_core svml_d_cos4_core_avx \ svml_d_pow_data svml_s_powf4_core svml_s_powf8_core_avx \ svml_s_powf8_core svml_s_powf16_core svml_s_powf_data \ svml_s_sincosf4_core svml_s_sincosf8_core_avx \ - svml_s_sincosf8_core svml_s_sincosf16_core init-arch + svml_s_sincosf8_core svml_s_sincosf16_core svml_finite_alias + +libmvec-static-only-routines = svml_finite_alias endif # Variables for libmvec tests. diff --git a/sysdeps/x86_64/fpu/e_exp2l.S b/sysdeps/x86_64/fpu/e_exp2l.S index 7d42a932db..0e059b7565 100644 --- a/sysdeps/x86_64/fpu/e_exp2l.S +++ b/sysdeps/x86_64/fpu/e_exp2l.S @@ -6,7 +6,17 @@ */ #include <machine/asm.h> +#include <x86_64-math-asm.h> +DEFINE_LDBL_MIN + +#ifdef PIC +# define MO(op) op##(%rip) +#else +# define MO(op) op +#endif + + .text ENTRY(__ieee754_exp2l) fldt 8(%rsp) /* I added the following ugly construct because exp(+-Inf) resulted @@ -36,6 +46,7 @@ ENTRY(__ieee754_exp2l) faddp /* 2^(fract(x)) */ fscale /* e^x */ fstp %st(1) + LDBL_CHECK_FORCE_UFLOW_NONNEG_NAN ret 1: testl $0x200, %eax /* Test sign. */ diff --git a/sysdeps/x86_64/fpu/e_expf.S b/sysdeps/x86_64/fpu/e_expf.S index 34453ca409..d4b63a8d8e 100644 --- a/sysdeps/x86_64/fpu/e_expf.S +++ b/sysdeps/x86_64/fpu/e_expf.S @@ -1,5 +1,5 @@ /* Optimized __ieee754_expf function. - Copyright (C) 2012-2015 Free Software Foundation, Inc. + Copyright (C) 2012-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. diff --git a/sysdeps/x86_64/fpu/e_expl.S b/sysdeps/x86_64/fpu/e_expl.S index 14dd29dcad..8b3ddaec59 100644 --- a/sysdeps/x86_64/fpu/e_expl.S +++ b/sysdeps/x86_64/fpu/e_expl.S @@ -23,6 +23,7 @@ */ #include <machine/asm.h> +#include <x86_64-math-asm.h> #ifdef USE_AS_EXP10L # define IEEE754_EXPL __ieee754_exp10l @@ -65,10 +66,7 @@ c1: .byte 0x20, 0xfa, 0xee, 0xc2, 0x5f, 0x70, 0xa5, 0xec, 0xed, 0x3f csat: .byte 0, 0, 0, 0, 0, 0, 0, 0x80, 0x0e, 0x40 .byte 0, 0, 0, 0, 0, 0 ASM_SIZE_DIRECTIVE(csat) - .type cmin,@object -cmin: .byte 0, 0, 0, 0, 0, 0, 0, 0x80, 0x1, 0 - .byte 0, 0, 0, 0, 0, 0 - ASM_SIZE_DIRECTIVE(cmin) +DEFINE_LDBL_MIN #endif #ifdef PIC @@ -192,17 +190,9 @@ ENTRY(IEEE754_EXPL) fstp %st(1) /* 2 */ fscale /* 2 scale factor is st(1); base^x */ fstp %st(1) /* 1 */ - /* Ensure underflow for tiny result. */ - fldt MO(cmin) /* 2 cmin */ - fld %st(1) /* 3 */ - fcomip %st(1), %st /* 2 */ - fstp %st /* 1 */ - jnc 6f - fld %st - fmul %st - fstp %st + LDBL_CHECK_FORCE_UFLOW_NONNEG #endif -6: fstp %st(1) /* 0 */ + fstp %st(1) /* 0 */ jmp 2f 1: #ifdef USE_AS_EXPM1L diff --git a/sysdeps/x86_64/fpu/e_log10l.S b/sysdeps/x86_64/fpu/e_log10l.S index 2607ad199b..8fa61644c1 100644 --- a/sysdeps/x86_64/fpu/e_log10l.S +++ b/sysdeps/x86_64/fpu/e_log10l.S @@ -79,7 +79,13 @@ ENTRY(__log10l_finite) fnstsw // x-1 : x : log10(2) andb $0x45, %ah jz 2b - fstp %st(1) // x-1 : log10(2) + fxam + fnstsw + andb $0x45, %ah + cmpb $0x40, %ah + jne 6f + fabs // log10(1) is +0 in all rounding modes. +6: fstp %st(1) // x-1 : log10(2) fyl2xp1 // log10(x) ret END(__log10l_finite) diff --git a/sysdeps/x86_64/fpu/e_log2l.S b/sysdeps/x86_64/fpu/e_log2l.S index c12906d456..a063255ddd 100644 --- a/sysdeps/x86_64/fpu/e_log2l.S +++ b/sysdeps/x86_64/fpu/e_log2l.S @@ -78,7 +78,13 @@ ENTRY(__log2l_finite) fnstsw // x-1 : x : 1 andb $0x45, %ah jz 2b - fstp %st(1) // x-1 : 1 + fxam + fnstsw + andb $0x45, %ah + cmpb $0x40, %ah + jne 6f + fabs // log2(1) is +0 in all rounding modes. +6: fstp %st(1) // x-1 : 1 fyl2xp1 // log(x) ret END (__log2l_finite) diff --git a/sysdeps/x86_64/fpu/e_logl.S b/sysdeps/x86_64/fpu/e_logl.S index 047b8db88a..dbe6fd59dc 100644 --- a/sysdeps/x86_64/fpu/e_logl.S +++ b/sysdeps/x86_64/fpu/e_logl.S @@ -81,7 +81,13 @@ ENTRY(__logl_finite) fnstsw // x-1 : x : log(2) andb $0x45, %ah jz 2b - fstp %st(1) // x-1 : log(2) + fxam + fnstsw + andb $0x45, %ah + cmpb $0x40, %ah + jne 7f + fabs // log(1) is +0 in all rounding modes. +7: fstp %st(1) // x-1 : log(2) fyl2xp1 // log(x) ret END (__logl_finite) diff --git a/sysdeps/x86_64/fpu/e_powl.S b/sysdeps/x86_64/fpu/e_powl.S index 358abb8dcb..1f68cf0102 100644 --- a/sysdeps/x86_64/fpu/e_powl.S +++ b/sysdeps/x86_64/fpu/e_powl.S @@ -1,5 +1,5 @@ /* ix87 specific implementation of pow function. - Copyright (C) 1996-2015 Free Software Foundation, Inc. + Copyright (C) 1996-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. @@ -18,6 +18,7 @@ <http://www.gnu.org/licenses/>. */ #include <machine/asm.h> +#include <x86_64-math-asm.h> .section .rodata.cst8,"aM",@progbits,8 @@ -59,6 +60,7 @@ minfinity: mzero: .byte 0, 0, 0, 0, 0, 0, 0, 0x80 ASM_SIZE_DIRECTIVE(minf_mzero) +DEFINE_LDBL_MIN #ifdef PIC # define MO(op) op##(%rip) @@ -175,6 +177,7 @@ ENTRY(__ieee754_powl) orl %edx, %ecx jnz 6b fstp %st(0) // ST*x + LDBL_CHECK_FORCE_UFLOW_NONNAN ret /* y is ±NAN */ diff --git a/sysdeps/x86_64/fpu/e_sqrt.c b/sysdeps/x86_64/fpu/e_sqrt.c index b587f1cfb7..4b86434913 100644 --- a/sysdeps/x86_64/fpu/e_sqrt.c +++ b/sysdeps/x86_64/fpu/e_sqrt.c @@ -1,5 +1,5 @@ /* Square root of floating point number. - Copyright (C) 2002-2015 Free Software Foundation, Inc. + Copyright (C) 2002-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/e_sqrtf.c b/sysdeps/x86_64/fpu/e_sqrtf.c index 386ca1ce1d..639137b735 100644 --- a/sysdeps/x86_64/fpu/e_sqrtf.c +++ b/sysdeps/x86_64/fpu/e_sqrtf.c @@ -1,5 +1,5 @@ /* Square root of floating point number. - Copyright (C) 2002-2015 Free Software Foundation, Inc. + Copyright (C) 2002-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/fclrexcpt.c b/sysdeps/x86_64/fpu/fclrexcpt.c index ec03e1e2c2..a8e00c0141 100644 --- a/sysdeps/x86_64/fpu/fclrexcpt.c +++ b/sysdeps/x86_64/fpu/fclrexcpt.c @@ -1,5 +1,5 @@ /* Clear given exceptions in current floating-point environment. - Copyright (C) 2001-2015 Free Software Foundation, Inc. + Copyright (C) 2001-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/fedisblxcpt.c b/sysdeps/x86_64/fpu/fedisblxcpt.c index 95f585aefc..f1ea6cfa97 100644 --- a/sysdeps/x86_64/fpu/fedisblxcpt.c +++ b/sysdeps/x86_64/fpu/fedisblxcpt.c @@ -1,5 +1,5 @@ /* Disable floating-point exceptions. - Copyright (C) 2001-2015 Free Software Foundation, Inc. + Copyright (C) 2001-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Andreas Jaeger <aj@suse.de>, 2001. diff --git a/sysdeps/x86_64/fpu/feenablxcpt.c b/sysdeps/x86_64/fpu/feenablxcpt.c index e04875fe21..df4c628b8d 100644 --- a/sysdeps/x86_64/fpu/feenablxcpt.c +++ b/sysdeps/x86_64/fpu/feenablxcpt.c @@ -1,5 +1,5 @@ /* Enable floating-point exceptions. - Copyright (C) 2001-2015 Free Software Foundation, Inc. + Copyright (C) 2001-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Andreas Jaeger <aj@suse.de>, 2001. diff --git a/sysdeps/x86_64/fpu/fegetenv.c b/sysdeps/x86_64/fpu/fegetenv.c index 7314cee7e6..a28efb36f3 100644 --- a/sysdeps/x86_64/fpu/fegetenv.c +++ b/sysdeps/x86_64/fpu/fegetenv.c @@ -1,5 +1,5 @@ /* Store current floating-point environment. - Copyright (C) 2001-2015 Free Software Foundation, Inc. + Copyright (C) 2001-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/fegetexcept.c b/sysdeps/x86_64/fpu/fegetexcept.c index 27a0803aa2..8acd0382a0 100644 --- a/sysdeps/x86_64/fpu/fegetexcept.c +++ b/sysdeps/x86_64/fpu/fegetexcept.c @@ -1,5 +1,5 @@ /* Get enabled floating-point exceptions. - Copyright (C) 2001-2015 Free Software Foundation, Inc. + Copyright (C) 2001-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Andreas Jaeger <aj@suse.de>, 2001. diff --git a/sysdeps/x86_64/fpu/fegetround.c b/sysdeps/x86_64/fpu/fegetround.c index b515d8afe7..296d366560 100644 --- a/sysdeps/x86_64/fpu/fegetround.c +++ b/sysdeps/x86_64/fpu/fegetround.c @@ -1,5 +1,5 @@ /* Return current rounding direction. - Copyright (C) 1997-2015 Free Software Foundation, Inc. + Copyright (C) 1997-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. diff --git a/sysdeps/x86_64/fpu/feholdexcpt.c b/sysdeps/x86_64/fpu/feholdexcpt.c index 615b702135..a040c3dea5 100644 --- a/sysdeps/x86_64/fpu/feholdexcpt.c +++ b/sysdeps/x86_64/fpu/feholdexcpt.c @@ -1,5 +1,5 @@ /* Store current floating-point environment and clear exceptions. - Copyright (C) 2001-2015 Free Software Foundation, Inc. + Copyright (C) 2001-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/fesetenv.c b/sysdeps/x86_64/fpu/fesetenv.c index 3e3fd8492d..355d02aaa6 100644 --- a/sysdeps/x86_64/fpu/fesetenv.c +++ b/sysdeps/x86_64/fpu/fesetenv.c @@ -1,5 +1,5 @@ /* Install given floating-point environment. - Copyright (C) 2001-2015 Free Software Foundation, Inc. + Copyright (C) 2001-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -17,9 +17,15 @@ <http://www.gnu.org/licenses/>. */ #include <fenv.h> +#include <fpu_control.h> #include <assert.h> +/* All exceptions, including the x86-specific "denormal operand" + exception. */ +#define FE_ALL_EXCEPT_X86 (FE_ALL_EXCEPT | __FE_DENORM) + + int __fesetenv (const fenv_t *envp) { @@ -34,43 +40,61 @@ __fesetenv (const fenv_t *envp) if (envp == FE_DFL_ENV) { - temp.__control_word |= FE_ALL_EXCEPT; + temp.__control_word |= FE_ALL_EXCEPT_X86; temp.__control_word &= ~FE_TOWARDZERO; - temp.__status_word &= ~FE_ALL_EXCEPT; + temp.__control_word |= _FPU_EXTENDED; + temp.__status_word &= ~FE_ALL_EXCEPT_X86; temp.__eip = 0; temp.__cs_selector = 0; temp.__opcode = 0; temp.__data_offset = 0; temp.__data_selector = 0; + /* Clear SSE exceptions. */ + temp.__mxcsr &= ~FE_ALL_EXCEPT_X86; /* Set mask for SSE MXCSR. */ - temp.__mxcsr |= (FE_ALL_EXCEPT << 7); + temp.__mxcsr |= (FE_ALL_EXCEPT_X86 << 7); /* Set rounding to FE_TONEAREST. */ temp.__mxcsr &= ~ 0x6000; temp.__mxcsr |= (FE_TONEAREST << 3); + /* Clear the FZ and DAZ bits. */ + temp.__mxcsr &= ~0x8040; } else if (envp == FE_NOMASK_ENV) { temp.__control_word &= ~(FE_ALL_EXCEPT | FE_TOWARDZERO); - temp.__status_word &= ~FE_ALL_EXCEPT; + /* Keep the "denormal operand" exception masked. */ + temp.__control_word |= __FE_DENORM; + temp.__control_word |= _FPU_EXTENDED; + temp.__status_word &= ~FE_ALL_EXCEPT_X86; temp.__eip = 0; temp.__cs_selector = 0; temp.__opcode = 0; temp.__data_offset = 0; temp.__data_selector = 0; + /* Clear SSE exceptions. */ + temp.__mxcsr &= ~FE_ALL_EXCEPT_X86; /* Set mask for SSE MXCSR. */ /* Set rounding to FE_TONEAREST. */ temp.__mxcsr &= ~ 0x6000; temp.__mxcsr |= (FE_TONEAREST << 3); /* Do not mask exceptions. */ temp.__mxcsr &= ~(FE_ALL_EXCEPT << 7); + /* Keep the "denormal operand" exception masked. */ + temp.__mxcsr |= (__FE_DENORM << 7); + /* Clear the FZ and DAZ bits. */ + temp.__mxcsr &= ~0x8040; } else { - temp.__control_word &= ~(FE_ALL_EXCEPT | FE_TOWARDZERO); + temp.__control_word &= ~(FE_ALL_EXCEPT_X86 + | FE_TOWARDZERO + | _FPU_EXTENDED); temp.__control_word |= (envp->__control_word - & (FE_ALL_EXCEPT | FE_TOWARDZERO)); - temp.__status_word &= ~FE_ALL_EXCEPT; - temp.__status_word |= envp->__status_word & FE_ALL_EXCEPT; + & (FE_ALL_EXCEPT_X86 + | FE_TOWARDZERO + | _FPU_EXTENDED)); + temp.__status_word &= ~FE_ALL_EXCEPT_X86; + temp.__status_word |= envp->__status_word & FE_ALL_EXCEPT_X86; temp.__eip = envp->__eip; temp.__cs_selector = envp->__cs_selector; temp.__opcode = envp->__opcode; diff --git a/sysdeps/x86_64/fpu/fesetround.c b/sysdeps/x86_64/fpu/fesetround.c index 2a9c351142..475d63f4db 100644 --- a/sysdeps/x86_64/fpu/fesetround.c +++ b/sysdeps/x86_64/fpu/fesetround.c @@ -1,5 +1,5 @@ /* Set current rounding direction. - Copyright (C) 2001-2015 Free Software Foundation, Inc. + Copyright (C) 2001-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/feupdateenv.c b/sysdeps/x86_64/fpu/feupdateenv.c index 99dfdd8f5c..f035d57ca8 100644 --- a/sysdeps/x86_64/fpu/feupdateenv.c +++ b/sysdeps/x86_64/fpu/feupdateenv.c @@ -1,5 +1,5 @@ /* Install given floating-point environment and raise exceptions. - Copyright (C) 1997-2015 Free Software Foundation, Inc. + Copyright (C) 1997-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. diff --git a/sysdeps/x86_64/fpu/fgetexcptflg.c b/sysdeps/x86_64/fpu/fgetexcptflg.c index e4f321e239..938cf3e62b 100644 --- a/sysdeps/x86_64/fpu/fgetexcptflg.c +++ b/sysdeps/x86_64/fpu/fgetexcptflg.c @@ -1,5 +1,5 @@ /* Store current representation for exceptions. - Copyright (C) 2001-2015 Free Software Foundation, Inc. + Copyright (C) 2001-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/fraiseexcpt.c b/sysdeps/x86_64/fpu/fraiseexcpt.c index 3cd924647e..e2abbbec33 100644 --- a/sysdeps/x86_64/fpu/fraiseexcpt.c +++ b/sysdeps/x86_64/fpu/fraiseexcpt.c @@ -1,5 +1,5 @@ /* Raise given exceptions. - Copyright (C) 2001-2015 Free Software Foundation, Inc. + Copyright (C) 2001-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/fsetexcptflg.c b/sysdeps/x86_64/fpu/fsetexcptflg.c index f7915e3fdd..76f7bad9a8 100644 --- a/sysdeps/x86_64/fpu/fsetexcptflg.c +++ b/sysdeps/x86_64/fpu/fsetexcptflg.c @@ -1,5 +1,5 @@ /* Set floating-point environment exception handling. - Copyright (C) 2001-2015 Free Software Foundation, Inc. + Copyright (C) 2001-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/ftestexcept.c b/sysdeps/x86_64/fpu/ftestexcept.c index 1e67c2fe21..c8f2c01c67 100644 --- a/sysdeps/x86_64/fpu/ftestexcept.c +++ b/sysdeps/x86_64/fpu/ftestexcept.c @@ -1,5 +1,5 @@ /* Test exception in current environment. - Copyright (C) 2001-2015 Free Software Foundation, Inc. + Copyright (C) 2001-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps index de7d420aef..445b47527d 100644 --- a/sysdeps/x86_64/fpu/libm-test-ulps +++ b/sysdeps/x86_64/fpu/libm-test-ulps @@ -32,34 +32,34 @@ ildouble: 2 ldouble: 2 Function: "acosh": -double: 1 +double: 2 float: 2 -idouble: 1 +idouble: 2 ifloat: 2 ildouble: 2 ldouble: 2 Function: "acosh_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 +double: 2 +float: 2 +idouble: 2 +ifloat: 2 ildouble: 4 ldouble: 4 Function: "acosh_towardzero": double: 2 -float: 1 +float: 2 idouble: 2 -ifloat: 1 +ifloat: 2 ildouble: 4 ldouble: 4 Function: "acosh_upward": double: 2 -float: 1 +float: 2 idouble: 2 -ifloat: 1 +ifloat: 2 ildouble: 3 ldouble: 3 @@ -98,8 +98,8 @@ double: 1 float: 1 idouble: 1 ifloat: 1 -ildouble: 2 -ldouble: 2 +ildouble: 3 +ldouble: 3 Function: "asinh_downward": double: 3 @@ -122,8 +122,8 @@ double: 3 float: 3 idouble: 3 ifloat: 3 -ildouble: 4 -ldouble: 4 +ildouble: 5 +ldouble: 5 Function: "atan": float: 1 @@ -186,18 +186,18 @@ ildouble: 1 ldouble: 1 Function: "atanh": -double: 1 +double: 2 float: 2 -idouble: 1 +idouble: 2 ifloat: 2 ildouble: 3 ldouble: 3 Function: "atanh_downward": double: 3 -float: 2 +float: 3 idouble: 3 -ifloat: 2 +ifloat: 3 ildouble: 5 ldouble: 5 @@ -210,9 +210,9 @@ ildouble: 4 ldouble: 4 Function: "atanh_upward": -double: 2 +double: 3 float: 3 -idouble: 2 +idouble: 3 ifloat: 3 ildouble: 5 ldouble: 5 @@ -668,9 +668,9 @@ ildouble: 1 ldouble: 1 Function: "cbrt_upward": -double: 4 +double: 5 float: 1 -idouble: 4 +idouble: 5 ifloat: 1 ildouble: 1 ldouble: 1 @@ -869,11 +869,11 @@ ldouble: 3 Function: Real part of "clog": double: 3 -float: 2 +float: 3 idouble: 3 -ifloat: 2 -ildouble: 4 -ldouble: 4 +ifloat: 3 +ildouble: 3 +ldouble: 3 Function: Imaginary part of "clog": float: 1 @@ -883,9 +883,9 @@ ldouble: 1 Function: Real part of "clog10": double: 3 -float: 3 +float: 4 idouble: 3 -ifloat: 3 +ifloat: 4 ildouble: 4 ldouble: 4 @@ -898,10 +898,10 @@ ildouble: 2 ldouble: 2 Function: Real part of "clog10_downward": -double: 6 -float: 6 -idouble: 6 -ifloat: 6 +double: 5 +float: 4 +idouble: 5 +ifloat: 4 ildouble: 8 ldouble: 8 @@ -910,14 +910,14 @@ double: 2 float: 4 idouble: 2 ifloat: 4 -ildouble: 2 -ldouble: 2 +ildouble: 3 +ldouble: 3 Function: Real part of "clog10_towardzero": double: 5 -float: 4 +float: 5 idouble: 5 -ifloat: 4 +ifloat: 5 ildouble: 8 ldouble: 8 @@ -930,28 +930,28 @@ ildouble: 3 ldouble: 3 Function: Real part of "clog10_upward": -double: 8 +double: 6 float: 5 -idouble: 8 +idouble: 6 ifloat: 5 -ildouble: 6 -ldouble: 6 +ildouble: 8 +ldouble: 8 Function: Imaginary part of "clog10_upward": double: 2 -float: 3 +float: 4 idouble: 2 -ifloat: 3 +ifloat: 4 ildouble: 3 ldouble: 3 Function: Real part of "clog_downward": -double: 7 -float: 5 -idouble: 7 -ifloat: 5 -ildouble: 7 -ldouble: 7 +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 5 +ldouble: 5 Function: Imaginary part of "clog_downward": double: 1 @@ -962,28 +962,28 @@ ildouble: 1 ldouble: 1 Function: Real part of "clog_towardzero": -double: 7 -float: 5 -idouble: 7 -ifloat: 5 -ildouble: 8 -ldouble: 8 +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 5 +ldouble: 5 Function: Imaginary part of "clog_towardzero": double: 1 -float: 2 +float: 3 idouble: 1 -ifloat: 2 +ifloat: 3 ildouble: 1 ldouble: 1 Function: Real part of "clog_upward": -double: 8 -float: 5 -idouble: 8 -ifloat: 5 -ildouble: 6 -ldouble: 6 +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 4 +ldouble: 4 Function: Imaginary part of "clog_upward": double: 1 @@ -1019,14 +1019,14 @@ Function: "cos_vlen16": float: 1 Function: "cos_vlen2": -double: 1 +double: 2 Function: "cos_vlen4": -double: 1 +double: 2 float: 1 Function: "cos_vlen4_avx2": -double: 1 +double: 2 Function: "cos_vlen8": double: 1 @@ -1040,7 +1040,7 @@ double: 1 float: 1 idouble: 1 ifloat: 1 -ildouble: 1 +ildouble: 2 ldouble: 2 Function: "cosh_downward": @@ -1264,25 +1264,25 @@ ildouble: 2 ldouble: 2 Function: Real part of "csqrt_downward": -double: 4 +double: 5 float: 4 -idouble: 4 +idouble: 5 ifloat: 4 -ildouble: 4 -ldouble: 4 +ildouble: 5 +ldouble: 5 Function: Imaginary part of "csqrt_downward": double: 4 float: 3 idouble: 4 ifloat: 3 -ildouble: 3 -ldouble: 3 +ildouble: 4 +ldouble: 4 Function: Real part of "csqrt_towardzero": -double: 3 +double: 4 float: 3 -idouble: 3 +idouble: 4 ifloat: 3 ildouble: 4 ldouble: 4 @@ -1292,8 +1292,8 @@ double: 4 float: 3 idouble: 4 ifloat: 3 -ildouble: 3 -ldouble: 3 +ildouble: 4 +ldouble: 4 Function: Real part of "csqrt_upward": double: 5 @@ -1308,8 +1308,8 @@ double: 3 float: 3 idouble: 3 ifloat: 3 -ildouble: 3 -ldouble: 3 +ildouble: 4 +ldouble: 4 Function: Real part of "ctan": double: 1 @@ -1472,17 +1472,17 @@ ildouble: 1 ldouble: 1 Function: "erfc": -double: 2 +double: 3 float: 2 -idouble: 2 +idouble: 3 ifloat: 2 -ildouble: 2 -ldouble: 2 +ildouble: 3 +ldouble: 3 Function: "erfc_downward": -double: 4 +double: 5 float: 6 -idouble: 4 +idouble: 5 ifloat: 6 ildouble: 4 ldouble: 4 @@ -1496,12 +1496,12 @@ ildouble: 4 ldouble: 4 Function: "erfc_upward": -double: 4 +double: 5 float: 6 -idouble: 4 +idouble: 5 ifloat: 6 -ildouble: 4 -ldouble: 4 +ildouble: 5 +ldouble: 5 Function: "exp": ildouble: 1 @@ -1578,12 +1578,14 @@ ldouble: 1 Function: "exp_towardzero": double: 1 idouble: 1 -ildouble: 1 -ldouble: 1 +ildouble: 2 +ldouble: 2 Function: "exp_upward": double: 1 +float: 1 idouble: 1 +ifloat: 1 ildouble: 1 ldouble: 1 @@ -1625,9 +1627,9 @@ ldouble: 4 Function: "expm1_towardzero": double: 1 -float: 1 +float: 2 idouble: 1 -ifloat: 1 +ifloat: 2 ildouble: 4 ldouble: 4 @@ -1640,36 +1642,36 @@ ildouble: 4 ldouble: 4 Function: "gamma": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: "gamma_downward": double: 4 -float: 3 +float: 4 idouble: 4 -ifloat: 3 -ildouble: 6 -ldouble: 6 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: "gamma_downward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 7 +ldouble: 7 Function: "gamma_towardzero": -double: 4 -float: 3 -idouble: 4 -ifloat: 3 -ildouble: 6 -ldouble: 6 +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 7 +ldouble: 7 Function: "gamma_upward": -double: 4 -float: 3 -idouble: 4 -ifloat: 3 -ildouble: 4 -ldouble: 4 +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 6 +ldouble: 6 Function: "hypot": double: 1 @@ -1792,36 +1794,36 @@ ildouble: 5 ldouble: 5 Function: "lgamma": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: "lgamma_downward": double: 4 -float: 3 +float: 4 idouble: 4 -ifloat: 3 -ildouble: 6 -ldouble: 6 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: "lgamma_downward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 7 +ldouble: 7 Function: "lgamma_towardzero": -double: 4 -float: 3 -idouble: 4 -ifloat: 3 -ildouble: 6 -ldouble: 6 +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 7 +ldouble: 7 Function: "lgamma_upward": -double: 4 -float: 3 -idouble: 4 -ifloat: 3 -ildouble: 4 -ldouble: 4 +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 6 +ldouble: 6 Function: "log": float: 1 @@ -1874,16 +1876,16 @@ double: 2 float: 2 idouble: 2 ifloat: 2 -ildouble: 3 -ldouble: 3 +ildouble: 4 +ldouble: 4 Function: "log1p_towardzero": double: 2 float: 2 idouble: 2 ifloat: 2 -ildouble: 3 -ldouble: 3 +ildouble: 4 +ldouble: 4 Function: "log1p_upward": double: 2 @@ -1938,7 +1940,9 @@ ildouble: 2 ldouble: 2 Function: "log_upward": +double: 1 float: 2 +idouble: 1 ifloat: 2 ildouble: 1 ldouble: 1 @@ -1964,8 +1968,8 @@ Function: "log_vlen8_avx2": float: 2 Function: "pow": -float: 3 -ifloat: 3 +float: 1 +ifloat: 1 ildouble: 1 ldouble: 1 @@ -2001,25 +2005,25 @@ ldouble: 2 Function: "pow_downward": double: 1 -float: 3 +float: 1 idouble: 1 -ifloat: 3 +ifloat: 1 ildouble: 4 ldouble: 4 Function: "pow_towardzero": double: 1 -float: 4 +float: 1 idouble: 1 -ifloat: 4 +ifloat: 1 ildouble: 1 ldouble: 1 Function: "pow_upward": double: 1 -float: 4 +float: 1 idouble: 1 -ifloat: 4 +ifloat: 1 ildouble: 2 ldouble: 2 @@ -2050,14 +2054,14 @@ ldouble: 1 Function: "sin_downward": double: 1 idouble: 1 -ildouble: 2 -ldouble: 2 +ildouble: 3 +ldouble: 3 Function: "sin_towardzero": double: 1 idouble: 1 -ildouble: 1 -ldouble: 1 +ildouble: 2 +ldouble: 2 Function: "sin_upward": double: 1 @@ -2111,14 +2115,14 @@ Function: "sincos_vlen16": float: 1 Function: "sincos_vlen2": -double: 1 +double: 2 Function: "sincos_vlen4": -double: 1 +double: 2 float: 1 Function: "sincos_vlen4_avx2": -double: 1 +double: 2 Function: "sincos_vlen8": double: 1 @@ -2162,24 +2166,24 @@ ldouble: 5 Function: "tan": float: 1 ifloat: 1 -ildouble: 1 -ldouble: 1 +ildouble: 2 +ldouble: 2 Function: "tan_downward": double: 1 float: 2 idouble: 1 ifloat: 2 -ildouble: 2 -ldouble: 2 +ildouble: 3 +ldouble: 3 Function: "tan_towardzero": double: 1 float: 1 idouble: 1 ifloat: 1 -ildouble: 2 -ldouble: 2 +ildouble: 3 +ldouble: 3 Function: "tan_upward": double: 1 @@ -2194,8 +2198,8 @@ double: 2 float: 2 idouble: 2 ifloat: 2 -ildouble: 2 -ldouble: 2 +ildouble: 3 +ldouble: 3 Function: "tanh_downward": double: 3 @@ -2222,36 +2226,36 @@ ildouble: 4 ldouble: 4 Function: "tgamma": -double: 4 +double: 5 float: 5 -idouble: 4 +idouble: 5 ifloat: 5 -ildouble: 3 -ldouble: 3 +ildouble: 5 +ldouble: 5 Function: "tgamma_downward": -double: 4 -float: 4 -idouble: 4 -ifloat: 4 -ildouble: 3 -ldouble: 3 +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 5 +ldouble: 5 Function: "tgamma_towardzero": double: 5 float: 5 idouble: 5 ifloat: 5 -ildouble: 3 -ldouble: 3 +ildouble: 5 +ldouble: 5 Function: "tgamma_upward": double: 5 float: 5 idouble: 5 ifloat: 5 -ildouble: 3 -ldouble: 3 +ildouble: 5 +ldouble: 5 Function: "y0": double: 2 diff --git a/sysdeps/x86_64/fpu/math-tests-arch.h b/sysdeps/x86_64/fpu/math-tests-arch.h index e8833bfe0a..867152046e 100644 --- a/sysdeps/x86_64/fpu/math-tests-arch.h +++ b/sysdeps/x86_64/fpu/math-tests-arch.h @@ -1,5 +1,5 @@ /* Runtime architecture check for math tests. x86_64 version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -19,66 +19,36 @@ #if defined REQUIRE_AVX # include <init-arch.h> -/* Set to 1 if AVX supported. */ -static int avx_usable; - -# define INIT_ARCH_EXT \ - do \ - { \ - __init_cpu_features (); \ - avx_usable = __cpu_features.feature[index_AVX_Usable] \ - & bit_AVX_Usable; \ - } \ - while (0) +# define INIT_ARCH_EXT # define CHECK_ARCH_EXT \ do \ { \ - if (!avx_usable) return; \ + if (!HAS_ARCH_FEATURE (AVX_Usable)) return; \ } \ while (0) #elif defined REQUIRE_AVX2 # include <init-arch.h> - /* Set to 1 if AVX2 supported. */ - static int avx2_usable; - -# define INIT_ARCH_EXT \ - do \ - { \ - __init_cpu_features (); \ - avx2_usable = __cpu_features.feature[index_AVX2_Usable] \ - & bit_AVX2_Usable; \ - } \ - while (0) +# define INIT_ARCH_EXT # define CHECK_ARCH_EXT \ do \ { \ - if (!avx2_usable) return; \ + if (!HAS_ARCH_FEATURE (AVX2_Usable)) return; \ } \ while (0) #elif defined REQUIRE_AVX512F # include <init-arch.h> - /* Set to 1 if supported. */ - static int avx512f_usable; - -# define INIT_ARCH_EXT \ - do \ - { \ - __init_cpu_features (); \ - avx512f_usable = __cpu_features.feature[index_AVX512F_Usable] \ - & bit_AVX512F_Usable; \ - } \ - while (0) +# define INIT_ARCH_EXT # define CHECK_ARCH_EXT \ do \ { \ - if (!avx512f_usable) return; \ + if (!HAS_ARCH_FEATURE (AVX512F_Usable)) return; \ } \ while (0) diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile index 86ea473b4f..34542155aa 100644 --- a/sysdeps/x86_64/fpu/multiarch/Makefile +++ b/sysdeps/x86_64/fpu/multiarch/Makefile @@ -2,7 +2,6 @@ ifeq ($(subdir),math) libm-sysdep_routines += s_floor-c s_ceil-c s_floorf-c s_ceilf-c \ s_rint-c s_rintf-c s_nearbyint-c s_nearbyintf-c -ifeq ($(have-mfma4),yes) libm-sysdep_routines += e_exp-fma4 e_log-fma4 e_pow-fma4 s_atan-fma4 \ e_asin-fma4 e_atan2-fma4 s_sin-fma4 s_tan-fma4 \ mplog-fma4 mpa-fma4 slowexp-fma4 slowpow-fma4 \ @@ -16,7 +15,7 @@ CFLAGS-e_asin-fma4.c = -mfma4 CFLAGS-e_atan2-fma4.c = -mfma4 CFLAGS-e_exp-fma4.c = -mfma4 CFLAGS-e_log-fma4.c = -mfma4 -CFLAGS-e_pow-fma4.c = -mfma4 +CFLAGS-e_pow-fma4.c = -mfma4 $(config-cflags-nofma) CFLAGS-halfulp-fma4.c = -mfma4 CFLAGS-mpa-fma4.c = -mfma4 CFLAGS-mpatan-fma4.c = -mfma4 @@ -31,9 +30,7 @@ CFLAGS-slowexp-fma4.c = -mfma4 CFLAGS-slowpow-fma4.c = -mfma4 CFLAGS-s_sin-fma4.c = -mfma4 CFLAGS-s_tan-fma4.c = -mfma4 -endif -ifeq ($(config-cflags-sse2avx),yes) libm-sysdep_routines += e_exp-avx e_log-avx s_atan-avx \ e_atan2-avx s_sin-avx s_tan-avx \ mplog-avx mpa-avx slowexp-avx \ @@ -50,7 +47,6 @@ CFLAGS-s_sin-avx.c = -msse2avx -DSSE2AVX CFLAGS-slowexp-avx.c = -msse2avx -DSSE2AVX CFLAGS-s_tan-avx.c = -msse2avx -DSSE2AVX endif -endif ifeq ($(subdir),mathvec) libmvec-sysdep_routines += svml_d_cos2_core_sse4 svml_d_cos4_core_avx2 \ diff --git a/sysdeps/x86_64/fpu/multiarch/e_asin.c b/sysdeps/x86_64/fpu/multiarch/e_asin.c index 55865c02f3..111a5b99bd 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_asin.c +++ b/sysdeps/x86_64/fpu/multiarch/e_asin.c @@ -1,7 +1,6 @@ -#ifdef HAVE_FMA4_SUPPORT -# include <init-arch.h> -# include <math.h> -# include <math_private.h> +#include <init-arch.h> +#include <math.h> +#include <math_private.h> extern double __ieee754_acos_sse2 (double); extern double __ieee754_asin_sse2 (double); @@ -9,16 +8,19 @@ extern double __ieee754_acos_fma4 (double); extern double __ieee754_asin_fma4 (double); libm_ifunc (__ieee754_acos, - HAS_FMA4 ? __ieee754_acos_fma4 : __ieee754_acos_sse2); + HAS_ARCH_FEATURE (FMA4_Usable) + ? __ieee754_acos_fma4 + : __ieee754_acos_sse2); strong_alias (__ieee754_acos, __acos_finite) libm_ifunc (__ieee754_asin, - HAS_FMA4 ? __ieee754_asin_fma4 : __ieee754_asin_sse2); + HAS_ARCH_FEATURE (FMA4_Usable) + ? __ieee754_asin_fma4 + : __ieee754_asin_sse2); strong_alias (__ieee754_asin, __asin_finite) -# define __ieee754_acos __ieee754_acos_sse2 -# define __ieee754_asin __ieee754_asin_sse2 -#endif +#define __ieee754_acos __ieee754_acos_sse2 +#define __ieee754_asin __ieee754_asin_sse2 #include <sysdeps/ieee754/dbl-64/e_asin.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2.c b/sysdeps/x86_64/fpu/multiarch/e_atan2.c index 547681cb59..9ca3c02a44 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_atan2.c +++ b/sysdeps/x86_64/fpu/multiarch/e_atan2.c @@ -1,25 +1,18 @@ -#if defined HAVE_FMA4_SUPPORT || defined HAVE_AVX_SUPPORT -# include <init-arch.h> -# include <math.h> -# include <math_private.h> +#include <init-arch.h> +#include <math.h> +#include <math_private.h> extern double __ieee754_atan2_sse2 (double, double); extern double __ieee754_atan2_avx (double, double); -# ifdef HAVE_FMA4_SUPPORT extern double __ieee754_atan2_fma4 (double, double); -# else -# undef HAS_FMA4 -# define HAS_FMA4 0 -# define __ieee754_atan2_fma4 ((void *) 0) -# endif libm_ifunc (__ieee754_atan2, - HAS_FMA4 ? __ieee754_atan2_fma4 - : (HAS_AVX ? __ieee754_atan2_avx : __ieee754_atan2_sse2)); + HAS_ARCH_FEATURE (FMA4_Usable) ? __ieee754_atan2_fma4 + : (HAS_ARCH_FEATURE (AVX_Usable) + ? __ieee754_atan2_avx : __ieee754_atan2_sse2)); strong_alias (__ieee754_atan2, __atan2_finite) -# define __ieee754_atan2 __ieee754_atan2_sse2 -#endif +#define __ieee754_atan2 __ieee754_atan2_sse2 #include <sysdeps/ieee754/dbl-64/e_atan2.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp.c b/sysdeps/x86_64/fpu/multiarch/e_exp.c index d244954056..b7d7b5ff27 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_exp.c +++ b/sysdeps/x86_64/fpu/multiarch/e_exp.c @@ -1,25 +1,18 @@ -#if defined HAVE_FMA4_SUPPORT || defined HAVE_AVX_SUPPORT -# include <init-arch.h> -# include <math.h> -# include <math_private.h> +#include <init-arch.h> +#include <math.h> +#include <math_private.h> extern double __ieee754_exp_sse2 (double); extern double __ieee754_exp_avx (double); -# ifdef HAVE_FMA4_SUPPORT extern double __ieee754_exp_fma4 (double); -# else -# undef HAS_FMA4 -# define HAS_FMA4 0 -# define __ieee754_exp_fma4 ((void *) 0) -# endif libm_ifunc (__ieee754_exp, - HAS_FMA4 ? __ieee754_exp_fma4 - : (HAS_AVX ? __ieee754_exp_avx : __ieee754_exp_sse2)); + HAS_ARCH_FEATURE (FMA4_Usable) ? __ieee754_exp_fma4 + : (HAS_ARCH_FEATURE (AVX_Usable) + ? __ieee754_exp_avx : __ieee754_exp_sse2)); strong_alias (__ieee754_exp, __exp_finite) -# define __ieee754_exp __ieee754_exp_sse2 -#endif +#define __ieee754_exp __ieee754_exp_sse2 #include <sysdeps/ieee754/dbl-64/e_exp.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_log.c b/sysdeps/x86_64/fpu/multiarch/e_log.c index 98054737bd..cf9533d6c0 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_log.c +++ b/sysdeps/x86_64/fpu/multiarch/e_log.c @@ -1,25 +1,18 @@ -#if defined HAVE_FMA4_SUPPORT || defined HAVE_AVX_SUPPORT -# include <init-arch.h> -# include <math.h> -# include <math_private.h> +#include <init-arch.h> +#include <math.h> +#include <math_private.h> extern double __ieee754_log_sse2 (double); extern double __ieee754_log_avx (double); -# ifdef HAVE_FMA4_SUPPORT extern double __ieee754_log_fma4 (double); -# else -# undef HAS_FMA4 -# define HAS_FMA4 0 -# define __ieee754_log_fma4 ((void *) 0) -# endif libm_ifunc (__ieee754_log, - HAS_FMA4 ? __ieee754_log_fma4 - : (HAS_AVX ? __ieee754_log_avx : __ieee754_log_sse2)); + HAS_ARCH_FEATURE (FMA4_Usable) ? __ieee754_log_fma4 + : (HAS_ARCH_FEATURE (AVX_Usable) + ? __ieee754_log_avx : __ieee754_log_sse2)); strong_alias (__ieee754_log, __log_finite) -# define __ieee754_log __ieee754_log_sse2 -#endif +#define __ieee754_log __ieee754_log_sse2 #include <sysdeps/ieee754/dbl-64/e_log.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow.c b/sysdeps/x86_64/fpu/multiarch/e_pow.c index 433cce0de6..a5c5d89c3e 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_pow.c +++ b/sysdeps/x86_64/fpu/multiarch/e_pow.c @@ -1,16 +1,17 @@ -#ifdef HAVE_FMA4_SUPPORT -# include <init-arch.h> -# include <math.h> -# include <math_private.h> +#include <init-arch.h> +#include <math.h> +#include <math_private.h> extern double __ieee754_pow_sse2 (double, double); extern double __ieee754_pow_fma4 (double, double); -libm_ifunc (__ieee754_pow, HAS_FMA4 ? __ieee754_pow_fma4 : __ieee754_pow_sse2); +libm_ifunc (__ieee754_pow, + HAS_ARCH_FEATURE (FMA4_Usable) + ? __ieee754_pow_fma4 + : __ieee754_pow_sse2); strong_alias (__ieee754_pow, __pow_finite) -# define __ieee754_pow __ieee754_pow_sse2 -#endif +#define __ieee754_pow __ieee754_pow_sse2 #include <sysdeps/ieee754/dbl-64/e_pow.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan.c b/sysdeps/x86_64/fpu/multiarch/s_atan.c index ae16d7c9bb..742e95cb96 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_atan.c +++ b/sysdeps/x86_64/fpu/multiarch/s_atan.c @@ -1,22 +1,15 @@ -#if defined HAVE_FMA4_SUPPORT || defined HAVE_AVX_SUPPORT -# include <init-arch.h> -# include <math.h> +#include <init-arch.h> +#include <math.h> extern double __atan_sse2 (double); extern double __atan_avx (double); -# ifdef HAVE_FMA4_SUPPORT extern double __atan_fma4 (double); -# else -# undef HAS_FMA4 -# define HAS_FMA4 0 -# define __atan_fma4 ((void *) 0) -# endif -libm_ifunc (atan, (HAS_FMA4 ? __atan_fma4 : - HAS_AVX ? __atan_avx : __atan_sse2)); +libm_ifunc (atan, (HAS_ARCH_FEATURE (FMA4_Usable) ? __atan_fma4 : + HAS_ARCH_FEATURE (AVX_Usable) + ? __atan_avx : __atan_sse2)); -# define atan __atan_sse2 -#endif +#define atan __atan_sse2 #include <sysdeps/ieee754/dbl-64/s_atan.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil.S b/sysdeps/x86_64/fpu/multiarch/s_ceil.S index 00ecede74d..40fa729955 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_ceil.S +++ b/sysdeps/x86_64/fpu/multiarch/s_ceil.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2015 Free Software Foundation, Inc. +/* Copyright (C) 2011-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gmail.come>, 2011. @@ -22,10 +22,9 @@ ENTRY(__ceil) .type __ceil, @gnu_indirect_function - call __get_cpu_features@plt - movq %rax, %rdx + LOAD_RTLD_GLOBAL_RO_RDX leaq __ceil_sse41(%rip), %rax - testl $bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx) + HAS_CPU_FEATURE (SSE4_1) jnz 2f leaq __ceil_c(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf.S b/sysdeps/x86_64/fpu/multiarch/s_ceilf.S index c8ed70553e..9a06a5c174 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_ceilf.S +++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2015 Free Software Foundation, Inc. +/* Copyright (C) 2011-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gmail.come>, 2011. @@ -22,10 +22,9 @@ ENTRY(__ceilf) .type __ceilf, @gnu_indirect_function - call __get_cpu_features@plt - movq %rax, %rdx + LOAD_RTLD_GLOBAL_RO_RDX leaq __ceilf_sse41(%rip), %rax - testl $bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx) + HAS_CPU_FEATURE (SSE4_1) jnz 2f leaq __ceilf_c(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor.S b/sysdeps/x86_64/fpu/multiarch/s_floor.S index 952ffaa314..57a0eee5ba 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_floor.S +++ b/sysdeps/x86_64/fpu/multiarch/s_floor.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2015 Free Software Foundation, Inc. +/* Copyright (C) 2011-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gmail.come>, 2011. @@ -22,10 +22,9 @@ ENTRY(__floor) .type __floor, @gnu_indirect_function - call __get_cpu_features@plt - movq %rax, %rdx + LOAD_RTLD_GLOBAL_RO_RDX leaq __floor_sse41(%rip), %rax - testl $bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx) + HAS_CPU_FEATURE (SSE4_1) jnz 2f leaq __floor_c(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf.S b/sysdeps/x86_64/fpu/multiarch/s_floorf.S index c8231e86b3..74a149a950 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_floorf.S +++ b/sysdeps/x86_64/fpu/multiarch/s_floorf.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2015 Free Software Foundation, Inc. +/* Copyright (C) 2011-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gmail.come>, 2011. @@ -22,10 +22,9 @@ ENTRY(__floorf) .type __floorf, @gnu_indirect_function - call __get_cpu_features@plt - movq %rax, %rdx + LOAD_RTLD_GLOBAL_RO_RDX leaq __floorf_sse41(%rip), %rax - testl $bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx) + HAS_CPU_FEATURE (SSE4_1) jnz 2f leaq __floorf_c(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/fpu/multiarch/s_fma.c b/sysdeps/x86_64/fpu/multiarch/s_fma.c index 0963a0b36a..1de1a84cbe 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_fma.c +++ b/sysdeps/x86_64/fpu/multiarch/s_fma.c @@ -1,5 +1,5 @@ /* FMA version of fma. - Copyright (C) 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2009-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. @@ -21,8 +21,6 @@ #include <math.h> #include <init-arch.h> -#ifdef HAVE_AVX_SUPPORT - extern double __fma_sse2 (double x, double y, double z) attribute_hidden; @@ -34,25 +32,19 @@ __fma_fma3 (double x, double y, double z) } -# ifdef HAVE_FMA4_SUPPORT static double __fma_fma4 (double x, double y, double z) { asm ("vfmaddsd %3, %2, %1, %0" : "=x" (x) : "x" (x), "x" (y), "x" (z)); return x; } -# else -# undef HAS_FMA4 -# define HAS_FMA4 0 -# define __fma_fma4 ((void *) 0) -# endif -libm_ifunc (__fma, HAS_FMA - ? __fma_fma3 : (HAS_FMA4 ? __fma_fma4 : __fma_sse2)); +libm_ifunc (__fma, HAS_ARCH_FEATURE (FMA_Usable) + ? __fma_fma3 : (HAS_ARCH_FEATURE (FMA4_Usable) + ? __fma_fma4 : __fma_sse2)); weak_alias (__fma, fma) -# define __fma __fma_sse2 -#endif +#define __fma __fma_sse2 #include <sysdeps/ieee754/dbl-64/s_fma.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c index 6046961f86..8905e4b54f 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c +++ b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c @@ -1,5 +1,5 @@ /* FMA version of fmaf. - Copyright (C) 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2009-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -20,8 +20,6 @@ #include <math.h> #include <init-arch.h> -#ifdef HAVE_AVX_SUPPORT - extern float __fmaf_sse2 (float x, float y, float z) attribute_hidden; @@ -33,25 +31,19 @@ __fmaf_fma3 (float x, float y, float z) } -# ifdef HAVE_FMA4_SUPPORT static float __fmaf_fma4 (float x, float y, float z) { asm ("vfmaddss %3, %2, %1, %0" : "=x" (x) : "x" (x), "x" (y), "x" (z)); return x; } -# else -# undef HAS_FMA4 -# define HAS_FMA4 0 -# define __fmaf_fma4 ((void *) 0) -# endif -libm_ifunc (__fmaf, HAS_FMA - ? __fmaf_fma3 : (HAS_FMA4 ? __fmaf_fma4 : __fmaf_sse2)); +libm_ifunc (__fmaf, HAS_ARCH_FEATURE (FMA_Usable) + ? __fmaf_fma3 : (HAS_ARCH_FEATURE (FMA4_Usable) + ? __fmaf_fma4 : __fmaf_sse2)); weak_alias (__fmaf, fmaf) -# define __fmaf __fmaf_sse2 -#endif +#define __fmaf __fmaf_sse2 #include <sysdeps/ieee754/dbl-64/s_fmaf.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.S index b5d32b5873..5091cf5813 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.S +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2015 Free Software Foundation, Inc. +/* Copyright (C) 2011-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gmail.come>, 2011. @@ -22,10 +22,9 @@ ENTRY(__nearbyint) .type __nearbyint, @gnu_indirect_function - call __get_cpu_features@plt - movq %rax, %rdx + LOAD_RTLD_GLOBAL_RO_RDX leaq __nearbyint_sse41(%rip), %rax - testl $bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx) + HAS_CPU_FEATURE (SSE4_1) jnz 2f leaq __nearbyint_c(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.S index cd7e177a55..4a13700001 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.S +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2015 Free Software Foundation, Inc. +/* Copyright (C) 2011-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gmail.come>, 2011. @@ -22,10 +22,9 @@ ENTRY(__nearbyintf) .type __nearbyintf, @gnu_indirect_function - call __get_cpu_features@plt - movq %rax, %rdx + LOAD_RTLD_GLOBAL_RO_RDX leaq __nearbyintf_sse41(%rip), %rax - testl $bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx) + HAS_CPU_FEATURE (SSE4_1) jnz 2f leaq __nearbyintf_c(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint.S b/sysdeps/x86_64/fpu/multiarch/s_rint.S index f52cef65db..1c0d1e14b7 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_rint.S +++ b/sysdeps/x86_64/fpu/multiarch/s_rint.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2015 Free Software Foundation, Inc. +/* Copyright (C) 2011-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gmail.come>, 2011. @@ -22,10 +22,9 @@ ENTRY(__rint) .type __rint, @gnu_indirect_function - call __get_cpu_features@plt - movq %rax, %rdx + LOAD_RTLD_GLOBAL_RO_RDX leaq __rint_sse41(%rip), %rax - testl $bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx) + HAS_CPU_FEATURE (SSE4_1) jnz 2f leaq __rint_c(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf.S b/sysdeps/x86_64/fpu/multiarch/s_rintf.S index e2608d4c4e..8e42fa561f 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_rintf.S +++ b/sysdeps/x86_64/fpu/multiarch/s_rintf.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2015 Free Software Foundation, Inc. +/* Copyright (C) 2011-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gmail.come>, 2011. @@ -22,10 +22,9 @@ ENTRY(__rintf) .type __rintf, @gnu_indirect_function - call __get_cpu_features@plt - movq %rax, %rdx + LOAD_RTLD_GLOBAL_RO_RDX leaq __rintf_sse41(%rip), %rax - testl $bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx) + HAS_CPU_FEATURE (SSE4_1) jnz 2f leaq __rintf_c(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin.c b/sysdeps/x86_64/fpu/multiarch/s_sin.c index a0c2521c98..8ffd3e7125 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_sin.c +++ b/sysdeps/x86_64/fpu/multiarch/s_sin.c @@ -1,33 +1,26 @@ -#if defined HAVE_FMA4_SUPPORT || defined HAVE_AVX_SUPPORT -# include <init-arch.h> -# include <math.h> -# undef NAN +#include <init-arch.h> +#include <math.h> +#undef NAN extern double __cos_sse2 (double); extern double __sin_sse2 (double); extern double __cos_avx (double); extern double __sin_avx (double); -# ifdef HAVE_FMA4_SUPPORT extern double __cos_fma4 (double); extern double __sin_fma4 (double); -# else -# undef HAS_FMA4 -# define HAS_FMA4 0 -# define __cos_fma4 ((void *) 0) -# define __sin_fma4 ((void *) 0) -# endif -libm_ifunc (__cos, (HAS_FMA4 ? __cos_fma4 : - HAS_AVX ? __cos_avx : __cos_sse2)); +libm_ifunc (__cos, (HAS_ARCH_FEATURE (FMA4_Usable) ? __cos_fma4 : + HAS_ARCH_FEATURE (AVX_Usable) + ? __cos_avx : __cos_sse2)); weak_alias (__cos, cos) -libm_ifunc (__sin, (HAS_FMA4 ? __sin_fma4 : - HAS_AVX ? __sin_avx : __sin_sse2)); +libm_ifunc (__sin, (HAS_ARCH_FEATURE (FMA4_Usable) ? __sin_fma4 : + HAS_ARCH_FEATURE (AVX_Usable) + ? __sin_avx : __sin_sse2)); weak_alias (__sin, sin) -# define __cos __cos_sse2 -# define __sin __sin_sse2 -#endif +#define __cos __cos_sse2 +#define __sin __sin_sse2 #include <sysdeps/ieee754/dbl-64/s_sin.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan.c b/sysdeps/x86_64/fpu/multiarch/s_tan.c index 904308fada..25f3bca07e 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_tan.c +++ b/sysdeps/x86_64/fpu/multiarch/s_tan.c @@ -1,22 +1,15 @@ -#if defined HAVE_FMA4_SUPPORT || defined HAVE_AVX_SUPPORT -# include <init-arch.h> -# include <math.h> +#include <init-arch.h> +#include <math.h> extern double __tan_sse2 (double); extern double __tan_avx (double); -# ifdef HAVE_FMA4_SUPPORT extern double __tan_fma4 (double); -# else -# undef HAS_FMA4 -# define HAS_FMA4 0 -# define __tan_fma4 ((void *) 0) -# endif -libm_ifunc (tan, (HAS_FMA4 ? __tan_fma4 : - HAS_AVX ? __tan_avx : __tan_sse2)); +libm_ifunc (tan, (HAS_ARCH_FEATURE (FMA4_Usable) ? __tan_fma4 : + HAS_ARCH_FEATURE (AVX_Usable) + ? __tan_avx : __tan_sse2)); -# define tan __tan_sse2 -#endif +#define tan __tan_sse2 #include <sysdeps/ieee754/dbl-64/s_tan.c> diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.S index 5f67d83bd4..7d720e2fcb 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized cos, vector length is 2. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,11 +22,9 @@ .text ENTRY (_ZGVbN2v_cos) .type _ZGVbN2v_cos, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVbN2v_cos_sse4(%rip), %rax - testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVbN2v_cos_sse4(%rip), %rax + HAS_CPU_FEATURE (SSE4_1) jz 2f ret 2: leaq _ZGVbN2v_cos_sse2(%rip), %rax diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S index 4420edcae0..088fcae067 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S @@ -1,5 +1,5 @@ /* Function cos vectorized with SSE4. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.S index 5babb834ad..65a3570d2e 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized cos, vector length is 4. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,11 +22,9 @@ .text ENTRY (_ZGVdN4v_cos) .type _ZGVdN4v_cos, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVdN4v_cos_avx2(%rip), %rax - testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVdN4v_cos_avx2(%rip), %rax + HAS_ARCH_FEATURE (AVX2_Usable) jz 2f ret 2: leaq _ZGVdN4v_cos_sse_wrapper(%rip), %rax diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S index 9a776e7df7..4e653216d9 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S @@ -1,5 +1,5 @@ /* Function cos vectorized with AVX2. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S index d0f4f27f46..3e7f16d44e 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized cos, vector length is 8. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,14 +22,12 @@ .text ENTRY (_ZGVeN8v_cos) .type _ZGVeN8v_cos, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features + LOAD_RTLD_GLOBAL_RO_RDX 1: leaq _ZGVeN8v_cos_skx(%rip), %rax - testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip) + HAS_ARCH_FEATURE (AVX512DQ_Usable) jnz 2f leaq _ZGVeN8v_cos_knl(%rip), %rax - testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip) + HAS_ARCH_FEATURE (AVX512F_Usable) jnz 2f leaq _ZGVeN8v_cos_avx2_wrapper(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S index b376155210..1cac1d827a 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S @@ -1,5 +1,5 @@ /* Function cos vectorized with AVX-512, KNL and SKX versions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.S index ef3dc49a1c..136c67a550 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized exp. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,11 +22,9 @@ .text ENTRY (_ZGVbN2v_exp) .type _ZGVbN2v_exp, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVbN2v_exp_sse4(%rip), %rax - testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVbN2v_exp_sse4(%rip), %rax + HAS_CPU_FEATURE (SSE4_1) jz 2f ret 2: leaq _ZGVbN2v_exp_sse2(%rip), %rax diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S index 1f5445924a..445b230152 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S @@ -1,5 +1,5 @@ /* Function exp vectorized with SSE4. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.S index 7f2ebdef67..9d6a47be0a 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized exp. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,11 +22,9 @@ .text ENTRY (_ZGVdN4v_exp) .type _ZGVdN4v_exp, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVdN4v_exp_avx2(%rip), %rax - testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVdN4v_exp_avx2(%rip), %rax + HAS_ARCH_FEATURE (AVX2_Usable) jz 2f ret 2: leaq _ZGVdN4v_exp_sse_wrapper(%rip), %rax diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S index a34e267433..25f9e28941 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S @@ -1,5 +1,5 @@ /* Function exp vectorized with AVX2. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S index 7b7c07d926..317ee36e61 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized exp. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,14 +22,12 @@ .text ENTRY (_ZGVeN8v_exp) .type _ZGVeN8v_exp, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVeN8v_exp_skx(%rip), %rax - testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVeN8v_exp_skx(%rip), %rax + HAS_ARCH_FEATURE (AVX512DQ_Usable) jnz 2f leaq _ZGVeN8v_exp_knl(%rip), %rax - testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip) + HAS_ARCH_FEATURE (AVX512F_Usable) jnz 2f leaq _ZGVeN8v_exp_avx2_wrapper(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S index 049a7e49cd..74f1d2ce7b 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S @@ -1,5 +1,5 @@ /* Function exp vectorized with AVX-512. KNL and SKX versions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S index 38d369fc3c..03d86a3e63 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized log. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,11 +22,9 @@ .text ENTRY (_ZGVbN2v_log) .type _ZGVbN2v_log, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVbN2v_log_sse4(%rip), %rax - testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVbN2v_log_sse4(%rip), %rax + HAS_CPU_FEATURE (SSE4_1) jz 2f ret 2: leaq _ZGVbN2v_log_sse2(%rip), %rax diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S index 82f3d8215d..5d254288f6 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S @@ -1,5 +1,5 @@ /* Function log vectorized with SSE4. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S index ddb6105405..9f6ddbef15 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized log. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,11 +22,9 @@ .text ENTRY (_ZGVdN4v_log) .type _ZGVdN4v_log, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVdN4v_log_avx2(%rip), %rax - testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVdN4v_log_avx2(%rip), %rax + HAS_ARCH_FEATURE (AVX2_Usable) jz 2f ret 2: leaq _ZGVdN4v_log_sse_wrapper(%rip), %rax diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S index 816aede395..5da298747d 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S @@ -1,5 +1,5 @@ /* Function log vectorized with AVX2. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S index 76375fdae0..2e1a1da1a5 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized log. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,14 +22,12 @@ .text ENTRY (_ZGVeN8v_log) .type _ZGVeN8v_log, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVeN8v_log_skx(%rip), %rax - testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVeN8v_log_skx(%rip), %rax + HAS_ARCH_FEATURE (AVX512DQ_Usable) jnz 2f leaq _ZGVeN8v_log_knl(%rip), %rax - testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip) + HAS_ARCH_FEATURE (AVX512F_Usable) jnz 2f leaq _ZGVeN8v_log_avx2_wrapper(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S index b0f3dd580c..dca8e61f34 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S @@ -1,5 +1,5 @@ /* Function log vectorized with AVX-512. KNL and SKX versions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.S index f111388922..4a50246889 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized pow. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,11 +22,9 @@ .text ENTRY (_ZGVbN2vv_pow) .type _ZGVbN2vv_pow, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVbN2vv_pow_sse4(%rip), %rax - testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVbN2vv_pow_sse4(%rip), %rax + HAS_CPU_FEATURE (SSE4_1) jz 2f ret 2: leaq _ZGVbN2vv_pow_sse2(%rip), %rax diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S index 9f6ec29ac5..064d170878 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S @@ -1,5 +1,5 @@ /* Function pow vectorized with SSE4. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.S index 21e3070a42..fb9f989adc 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized pow. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,11 +22,9 @@ .text ENTRY (_ZGVdN4vv_pow) .type _ZGVdN4vv_pow, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVdN4vv_pow_avx2(%rip), %rax - testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVdN4vv_pow_avx2(%rip), %rax + HAS_ARCH_FEATURE (AVX2_Usable) jz 2f ret 2: leaq _ZGVdN4vv_pow_sse_wrapper(%rip), %rax diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S index f1f1f35ca2..f2a73ffe1e 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S @@ -1,5 +1,5 @@ /* Function pow vectorized with AVX2. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S index c1e5e76f92..30bc53f2f7 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized pow. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,14 +22,12 @@ .text ENTRY (_ZGVeN8vv_pow) .type _ZGVeN8vv_pow, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVeN8vv_pow_skx(%rip), %rax - testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVeN8vv_pow_skx(%rip), %rax + HAS_ARCH_FEATURE (AVX512DQ_Usable) jnz 2f leaq _ZGVeN8vv_pow_knl(%rip), %rax - testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip) + HAS_ARCH_FEATURE (AVX512F_Usable) jnz 2f leaq _ZGVeN8vv_pow_avx2_wrapper(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S index 8dd89c8ebb..4a515233fc 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S @@ -1,5 +1,5 @@ /* Function pow vectorized with AVX-512. KNL and SKX versions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.S index 29bd0a7b4d..112bec2224 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized sin. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,11 +22,9 @@ .text ENTRY (_ZGVbN2v_sin) .type _ZGVbN2v_sin, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVbN2v_sin_sse4(%rip), %rax - testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVbN2v_sin_sse4(%rip), %rax + HAS_CPU_FEATURE (SSE4_1) jz 2f ret 2: leaq _ZGVbN2v_sin_sse2(%rip), %rax diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S index 3a1ccbf139..5755ce6f74 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S @@ -1,5 +1,5 @@ /* Function sin vectorized with SSE4. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.S index c3a453a477..700a1c629d 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized sin, vector length is 4. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,11 +22,9 @@ .text ENTRY (_ZGVdN4v_sin) .type _ZGVdN4v_sin, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVdN4v_sin_avx2(%rip), %rax - testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVdN4v_sin_avx2(%rip), %rax + HAS_ARCH_FEATURE (AVX2_Usable) jz 2f ret 2: leaq _ZGVdN4v_sin_sse_wrapper(%rip), %rax diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S index 6bf8b32b4f..46b557158a 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S @@ -1,5 +1,5 @@ /* Function sin vectorized with AVX2. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S index 131f2f47c5..5afce0ed88 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized sin. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,14 +22,12 @@ .text ENTRY (_ZGVeN8v_sin) .type _ZGVeN8v_sin, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVeN8v_sin_skx(%rip), %rax - testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVeN8v_sin_skx(%rip), %rax + HAS_ARCH_FEATURE (AVX512DQ_Usable) jnz 2f leaq _ZGVeN8v_sin_knl(%rip), %rax - testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip) + HAS_ARCH_FEATURE (AVX512F_Usable) jnz 2f leaq _ZGVeN8v_sin_avx2_wrapper(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S index 422f6e8b0f..6c565f3861 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S @@ -1,5 +1,5 @@ /* Function sin vectorized with AVX-512, KNL and SKX versions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.S index e8e5771808..883d7d33a4 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized sincos. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,11 +22,9 @@ .text ENTRY (_ZGVbN2vvv_sincos) .type _ZGVbN2vvv_sincos, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVbN2vvv_sincos_sse4(%rip), %rax - testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVbN2vvv_sincos_sse4(%rip), %rax + HAS_CPU_FEATURE (SSE4_1) jz 2f ret 2: leaq _ZGVbN2vvv_sincos_sse2(%rip), %rax diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S index b504d1d732..65ad540122 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S @@ -1,5 +1,5 @@ /* Function sincos vectorized with SSE4. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.S index 64744ffa62..69a3f74650 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized sincos. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,11 +22,9 @@ .text ENTRY (_ZGVdN4vvv_sincos) .type _ZGVdN4vvv_sincos, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVdN4vvv_sincos_avx2(%rip), %rax - testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVdN4vvv_sincos_avx2(%rip), %rax + HAS_ARCH_FEATURE (AVX2_Usable) jz 2f ret 2: leaq _ZGVdN4vvv_sincos_sse_wrapper(%rip), %rax diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S index dca5604111..60d03e9f8b 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S @@ -1,5 +1,5 @@ /* Function sincos vectorized with AVX2. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S index e33109099e..64cb08c5d1 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized sincos. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,14 +22,12 @@ .text ENTRY (_ZGVeN8vvv_sincos) .type _ZGVeN8vvv_sincos, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVeN8vvv_sincos_skx(%rip), %rax - testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVeN8vvv_sincos_skx(%rip), %rax + HAS_ARCH_FEATURE (AVX512DQ_Usable) jnz 2f leaq _ZGVeN8vvv_sincos_knl(%rip), %rax - testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip) + HAS_ARCH_FEATURE (AVX512F_Usable) jnz 2f leaq _ZGVeN8vvv_sincos_avx2_wrapper(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S index e8388325f7..44700f90b8 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S @@ -1,5 +1,5 @@ /* Function sincos vectorized with AVX-512. KNL and SKX versions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S index 0654d3c19b..755254a280 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized cosf. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,14 +22,12 @@ .text ENTRY (_ZGVeN16v_cosf) .type _ZGVeN16v_cosf, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVeN16v_cosf_skx(%rip), %rax - testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVeN16v_cosf_skx(%rip), %rax + HAS_ARCH_FEATURE (AVX512DQ_Usable) jnz 2f leaq _ZGVeN16v_cosf_knl(%rip), %rax - testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip) + HAS_ARCH_FEATURE (AVX512F_Usable) jnz 2f leaq _ZGVeN16v_cosf_avx2_wrapper(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S index e777476d73..5004cd4758 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S @@ -1,5 +1,5 @@ /* Function cosf vectorized with AVX-512. KNL and SKX versions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S index fa2363bb1f..ad7de18851 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized cosf, vector length is 4. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,11 +22,9 @@ .text ENTRY (_ZGVbN4v_cosf) .type _ZGVbN4v_cosf, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVbN4v_cosf_sse4(%rip), %rax - testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVbN4v_cosf_sse4(%rip), %rax + HAS_CPU_FEATURE (SSE4_1) jz 2f ret 2: leaq _ZGVbN4v_cosf_sse2(%rip), %rax diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S index bdb6591905..d23ff72a30 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S @@ -1,5 +1,5 @@ /* Function cosf vectorized with SSE4. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S index e14bba4a76..602c70e324 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized cosf, vector length is 8. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,11 +22,9 @@ .text ENTRY (_ZGVdN8v_cosf) .type _ZGVdN8v_cosf, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVdN8v_cosf_avx2(%rip), %rax - testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVdN8v_cosf_avx2(%rip), %rax + HAS_ARCH_FEATURE (AVX2_Usable) jz 2f ret 2: leaq _ZGVdN8v_cosf_sse_wrapper(%rip), %rax diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S index 1efc943295..513f3c0a29 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S @@ -1,5 +1,5 @@ /* Function cosf vectorized with AVX2. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S index 62858eb39e..f990d36483 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized expf. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,14 +22,12 @@ .text ENTRY (_ZGVeN16v_expf) .type _ZGVeN16v_expf, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVeN16v_expf_skx(%rip), %rax - testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVeN16v_expf_skx(%rip), %rax + HAS_ARCH_FEATURE (AVX512DQ_Usable) jnz 2f leaq _ZGVeN16v_expf_knl(%rip), %rax - testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip) + HAS_ARCH_FEATURE (AVX512F_Usable) jnz 2f leaq _ZGVeN16v_expf_avx2_wrapper(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S index ec69055351..7eb7a1b775 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S @@ -1,5 +1,5 @@ /* Function expf vectorized with AVX-512. KNL and SKX versions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.S index 37d38bc6f8..2fbe6d475e 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized expf. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,11 +22,9 @@ .text ENTRY (_ZGVbN4v_expf) .type _ZGVbN4v_expf, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVbN4v_expf_sse4(%rip), %rax - testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVbN4v_expf_sse4(%rip), %rax + HAS_CPU_FEATURE (SSE4_1) jz 2f ret 2: leaq _ZGVbN4v_expf_sse2(%rip), %rax diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S index fcc1859c3a..c6f91e8dc1 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S @@ -1,5 +1,5 @@ /* Function expf vectorized with SSE4. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.S index e3dc1b1038..7d19bb423d 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized expf. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,11 +22,9 @@ .text ENTRY (_ZGVdN8v_expf) .type _ZGVdN8v_expf, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVdN8v_expf_avx2(%rip), %rax - testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVdN8v_expf_avx2(%rip), %rax + HAS_ARCH_FEATURE (AVX2_Usable) jz 2f ret 2: leaq _ZGVdN8v_expf_sse_wrapper(%rip), %rax diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core_avx2.S index c876ecc03e..c6be6954f7 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core_avx2.S @@ -1,5 +1,5 @@ /* Function expf vectorized with AVX2. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S index 68c57e4386..9efb2fb7df 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized logf. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,14 +22,12 @@ .text ENTRY (_ZGVeN16v_logf) .type _ZGVeN16v_logf, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVeN16v_logf_skx(%rip), %rax - testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVeN16v_logf_skx(%rip), %rax + HAS_ARCH_FEATURE (AVX512DQ_Usable) jnz 2f leaq _ZGVeN16v_logf_knl(%rip), %rax - testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip) + HAS_ARCH_FEATURE (AVX512F_Usable) jnz 2f leaq _ZGVeN16v_logf_avx2_wrapper(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S index 86fcab6e63..6209058381 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S @@ -1,5 +1,5 @@ /* Function logf vectorized with AVX-512. KNL and SKX versions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.S index 153ed8ebc2..c85615ac25 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized logf. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,11 +22,9 @@ .text ENTRY (_ZGVbN4v_logf) .type _ZGVbN4v_logf, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVbN4v_logf_sse4(%rip), %rax - testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVbN4v_logf_sse4(%rip), %rax + HAS_CPU_FEATURE (SSE4_1) jz 2f ret 2: leaq _ZGVbN4v_logf_sse2(%rip), %rax diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core_sse4.S index 68f11033d9..1ce9838513 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core_sse4.S @@ -1,5 +1,5 @@ /* Function logf vectorized with SSE4. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.S index 6f50bf6bdb..8f6d83dd56 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized logf. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,11 +22,9 @@ .text ENTRY (_ZGVdN8v_logf) .type _ZGVdN8v_logf, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVdN8v_logf_avx2(%rip), %rax - testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVdN8v_logf_avx2(%rip), %rax + HAS_ARCH_FEATURE (AVX2_Usable) jz 2f ret 2: leaq _ZGVdN8v_logf_sse_wrapper(%rip), %rax diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core_avx2.S index 1f08b4218a..91fb549ce6 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core_avx2.S @@ -1,5 +1,5 @@ /* Function logf vectorized with AVX2. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S index 3aa9f952ce..80048ce977 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized powf. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,14 +22,12 @@ .text ENTRY (_ZGVeN16vv_powf) .type _ZGVeN16vv_powf, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVeN16vv_powf_skx(%rip), %rax - testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVeN16vv_powf_skx(%rip), %rax + HAS_ARCH_FEATURE (AVX512DQ_Usable) jnz 2f leaq _ZGVeN16vv_powf_knl(%rip), %rax - testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip) + HAS_ARCH_FEATURE (AVX512F_Usable) jnz 2f leaq _ZGVeN16vv_powf_avx2_wrapper(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S index 4b61974cb6..45d48723af 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S @@ -1,5 +1,5 @@ /* Function powf vectorized with AVX-512. KNL and SKX versions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.S index f88b9ca6d4..b46821189b 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized powf. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,11 +22,9 @@ .text ENTRY (_ZGVbN4vv_powf) .type _ZGVbN4vv_powf, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVbN4vv_powf_sse4(%rip), %rax - testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVbN4vv_powf_sse4(%rip), %rax + HAS_CPU_FEATURE (SSE4_1) jz 2f ret 2: leaq _ZGVbN4vv_powf_sse2(%rip), %rax diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core_sse4.S index 6068f51c46..420f98c6a6 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core_sse4.S @@ -1,5 +1,5 @@ /* Function powf vectorized with SSE4. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.S index 4552e573a9..945908a2ff 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized powf. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,11 +22,9 @@ .text ENTRY (_ZGVdN8vv_powf) .type _ZGVdN8vv_powf, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVdN8vv_powf_avx2(%rip), %rax - testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVdN8vv_powf_avx2(%rip), %rax + HAS_ARCH_FEATURE (AVX2_Usable) jz 2f ret 2: leaq _ZGVdN8vv_powf_sse_wrapper(%rip), %rax diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core_avx2.S index cfb86c7851..4446859130 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core_avx2.S @@ -1,5 +1,5 @@ /* Function powf vectorized with AVX2. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S index bdcabab6e2..16cee0c676 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized sincosf. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,14 +22,12 @@ .text ENTRY (_ZGVeN16vvv_sincosf) .type _ZGVeN16vvv_sincosf, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVeN16vvv_sincosf_skx(%rip), %rax - testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVeN16vvv_sincosf_skx(%rip), %rax + HAS_ARCH_FEATURE (AVX512DQ_Usable) jnz 2f leaq _ZGVeN16vvv_sincosf_knl(%rip), %rax - testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip) + HAS_ARCH_FEATURE (AVX512F_Usable) jnz 2f leaq _ZGVeN16vvv_sincosf_avx2_wrapper(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S index efff91bb0d..758aeeaeed 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S @@ -1,5 +1,5 @@ /* Function sincosf vectorized with AVX-512. KNL and SKX versions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.S index 610046b587..d72b4049e2 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized sincosf. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,11 +22,9 @@ .text ENTRY (_ZGVbN4vvv_sincosf) .type _ZGVbN4vvv_sincosf, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVbN4vvv_sincosf_sse4(%rip), %rax - testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVbN4vvv_sincosf_sse4(%rip), %rax + HAS_CPU_FEATURE (SSE4_1) jz 2f ret 2: leaq _ZGVbN4vvv_sincosf_sse2(%rip), %rax diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S index 4d846b5d7e..643fc0ca3b 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S @@ -1,5 +1,5 @@ /* Function sincosf vectorized with SSE4. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.S index 9e5be67fc9..0123b8024e 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized sincosf. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,11 +22,9 @@ .text ENTRY (_ZGVdN8vvv_sincosf) .type _ZGVdN8vvv_sincosf, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVdN8vvv_sincosf_avx2(%rip), %rax - testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVdN8vvv_sincosf_avx2(%rip), %rax + HAS_ARCH_FEATURE (AVX2_Usable) jz 2f ret 2: leaq _ZGVdN8vvv_sincosf_sse_wrapper(%rip), %rax diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S index 0108fd5126..f2a0ba7116 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S @@ -1,5 +1,5 @@ /* Function sincosf vectorized with AVX2. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S index 3ec78a0b5e..2212cdd94d 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized sinf. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,14 +22,12 @@ .text ENTRY (_ZGVeN16v_sinf) .type _ZGVeN16v_sinf, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVeN16v_sinf_skx(%rip), %rax - testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVeN16v_sinf_skx(%rip), %rax + HAS_ARCH_FEATURE (AVX512DQ_Usable) jnz 2f leaq _ZGVeN16v_sinf_knl(%rip), %rax - testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip) + HAS_ARCH_FEATURE (AVX512F_Usable) jnz 2f leaq _ZGVeN16v_sinf_avx2_wrapper(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S index f13ed96af8..61d8d3793a 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S @@ -1,5 +1,5 @@ /* Function sinf vectorized with AVX-512. KNL and SKX versions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.S index cf1e4df406..b31554730d 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized sinf. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,11 +22,9 @@ .text ENTRY (_ZGVbN4v_sinf) .type _ZGVbN4v_sinf, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features -1: leaq _ZGVbN4v_sinf_sse4(%rip), %rax - testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq _ZGVbN4v_sinf_sse4(%rip), %rax + HAS_CPU_FEATURE (SSE4_1) jz 2f ret 2: leaq _ZGVbN4v_sinf_sse2(%rip), %rax diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core_sse4.S index b8b852bcae..5268ab1f09 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core_sse4.S @@ -1,5 +1,5 @@ /* Function sinf vectorized with SSE4. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.S index b28bf3cabc..47fe0a4adc 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.S @@ -1,5 +1,5 @@ /* Multiple versions of vectorized sinf, vector length is 8. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,11 +22,9 @@ .text ENTRY (_ZGVdN8v_sinf) .type _ZGVdN8v_sinf, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features + LOAD_RTLD_GLOBAL_RO_RDX 1: leaq _ZGVdN8v_sinf_avx2(%rip), %rax - testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip) + HAS_ARCH_FEATURE (AVX2_Usable) jz 2f ret 2: leaq _ZGVdN8v_sinf_sse_wrapper(%rip), %rax diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core_avx2.S index a130d25fce..9fdaadb2e8 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core_avx2.S @@ -1,5 +1,5 @@ /* Function sinf vectorized with AVX2. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/printf_fphex.c b/sysdeps/x86_64/fpu/printf_fphex.c index 7b900caa88..0fbaa3748e 100644 --- a/sysdeps/x86_64/fpu/printf_fphex.c +++ b/sysdeps/x86_64/fpu/printf_fphex.c @@ -1,5 +1,5 @@ /* Print floating point number in hexadecimal notation according to ISO C99. - Copyright (C) 1997-2015 Free Software Foundation, Inc. + Copyright (C) 1997-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/s_copysign.S b/sysdeps/x86_64/fpu/s_copysign.S index 0576343595..18f568f46f 100644 --- a/sysdeps/x86_64/fpu/s_copysign.S +++ b/sysdeps/x86_64/fpu/s_copysign.S @@ -1,5 +1,5 @@ /* copy sign, double version. - Copyright (C) 2002-2015 Free Software Foundation, Inc. + Copyright (C) 2002-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Andreas Jaeger <aj@suse.de>, 2002. diff --git a/sysdeps/x86_64/fpu/s_copysignf.S b/sysdeps/x86_64/fpu/s_copysignf.S index 4961afca46..00a1fabaee 100644 --- a/sysdeps/x86_64/fpu/s_copysignf.S +++ b/sysdeps/x86_64/fpu/s_copysignf.S @@ -1,5 +1,5 @@ /* copy sign, double version. - Copyright (C) 2002-2015 Free Software Foundation, Inc. + Copyright (C) 2002-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Andreas Jaeger <aj@suse.de>, 2002. diff --git a/sysdeps/x86_64/fpu/s_cosf.S b/sysdeps/x86_64/fpu/s_cosf.S index b7868ceb20..31968e498f 100644 --- a/sysdeps/x86_64/fpu/s_cosf.S +++ b/sysdeps/x86_64/fpu/s_cosf.S @@ -1,5 +1,5 @@ /* Optimized cosf function. - Copyright (C) 2012-2015 Free Software Foundation, Inc. + Copyright (C) 2012-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -310,8 +310,14 @@ L(arg_inf_or_nan): /* Here if |x| is Inf or NAN */ jne L(skip_errno_setting) /* in case of x is NaN */ + /* Align stack to 16 bytes. */ + subq $8, %rsp + cfi_adjust_cfa_offset (8) /* Here if x is Inf. Set errno to EDOM. */ call JUMPTARGET(__errno_location) + addq $8, %rsp + cfi_adjust_cfa_offset (-8) + movl $EDOM, (%rax) .p2align 4 diff --git a/sysdeps/x86_64/fpu/s_fabs.c b/sysdeps/x86_64/fpu/s_fabs.c index 5e4f1b390f..d3a313fdf5 100644 --- a/sysdeps/x86_64/fpu/s_fabs.c +++ b/sysdeps/x86_64/fpu/s_fabs.c @@ -1,5 +1,5 @@ /* Absolute value of floating point number. - Copyright (C) 2002-2015 Free Software Foundation, Inc. + Copyright (C) 2002-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/s_fabsf.c b/sysdeps/x86_64/fpu/s_fabsf.c index a80c2589fa..e6dcda9433 100644 --- a/sysdeps/x86_64/fpu/s_fabsf.c +++ b/sysdeps/x86_64/fpu/s_fabsf.c @@ -1,5 +1,5 @@ /* Absolute value of floating point number. - Copyright (C) 2002-2015 Free Software Foundation, Inc. + Copyright (C) 2002-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/s_fabsl.S b/sysdeps/x86_64/fpu/s_fabsl.S index 8d4694b978..6881ff11c7 100644 --- a/sysdeps/x86_64/fpu/s_fabsl.S +++ b/sysdeps/x86_64/fpu/s_fabsl.S @@ -1,5 +1,5 @@ /* Absolute value of floating point number. - Copyright (C) 2002-2015 Free Software Foundation, Inc. + Copyright (C) 2002-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/s_fdiml.S b/sysdeps/x86_64/fpu/s_fdiml.S index ae7490a2a9..f9f1e20259 100644 --- a/sysdeps/x86_64/fpu/s_fdiml.S +++ b/sysdeps/x86_64/fpu/s_fdiml.S @@ -1,5 +1,5 @@ /* Compute positive difference. - Copyright (C) 1997-2015 Free Software Foundation, Inc. + Copyright (C) 1997-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. diff --git a/sysdeps/x86_64/fpu/s_fmax.S b/sysdeps/x86_64/fpu/s_fmax.S index 9857ab0b30..02096c0aea 100644 --- a/sysdeps/x86_64/fpu/s_fmax.S +++ b/sysdeps/x86_64/fpu/s_fmax.S @@ -1,5 +1,5 @@ /* Compute maximum of two numbers, regarding NaN as missing argument. - Copyright (C) 2002-2015 Free Software Foundation, Inc. + Copyright (C) 2002-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Andreas Jaeger <aj@suse.de>, 2002. diff --git a/sysdeps/x86_64/fpu/s_fmaxf.S b/sysdeps/x86_64/fpu/s_fmaxf.S index 0aa9d20cd2..28e129701e 100644 --- a/sysdeps/x86_64/fpu/s_fmaxf.S +++ b/sysdeps/x86_64/fpu/s_fmaxf.S @@ -1,5 +1,5 @@ /* Compute maximum of two numbers, regarding NaN as missing argument. - Copyright (C) 2002-2015 Free Software Foundation, Inc. + Copyright (C) 2002-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Andreas Jaeger <aj@suse.de>, 2002. diff --git a/sysdeps/x86_64/fpu/s_fmaxl.S b/sysdeps/x86_64/fpu/s_fmaxl.S index 11827134c0..f0c2bc0d56 100644 --- a/sysdeps/x86_64/fpu/s_fmaxl.S +++ b/sysdeps/x86_64/fpu/s_fmaxl.S @@ -1,5 +1,5 @@ /* Compute maximum of two numbers, regarding NaN as missing argument. - Copyright (C) 1997-2015 Free Software Foundation, Inc. + Copyright (C) 1997-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. diff --git a/sysdeps/x86_64/fpu/s_fmin.S b/sysdeps/x86_64/fpu/s_fmin.S index 9bd00a70b2..fb14e2f3ed 100644 --- a/sysdeps/x86_64/fpu/s_fmin.S +++ b/sysdeps/x86_64/fpu/s_fmin.S @@ -1,5 +1,5 @@ /* Compute minimum of two numbers, regarding NaN as missing argument. - Copyright (C) 2002-2015 Free Software Foundation, Inc. + Copyright (C) 2002-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Andreas Jaeger <aj@suse.de>, 2002. diff --git a/sysdeps/x86_64/fpu/s_fminf.S b/sysdeps/x86_64/fpu/s_fminf.S index 996c34b1a0..c8d6d0fd33 100644 --- a/sysdeps/x86_64/fpu/s_fminf.S +++ b/sysdeps/x86_64/fpu/s_fminf.S @@ -1,5 +1,5 @@ /* Compute minimum of two numbers, regarding NaN as missing argument. - Copyright (C) 2002-2015 Free Software Foundation, Inc. + Copyright (C) 2002-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Andreas Jaeger <aj@suse.de>, 2002. diff --git a/sysdeps/x86_64/fpu/s_fminl.S b/sysdeps/x86_64/fpu/s_fminl.S index be9571b4f1..f1a06d29d7 100644 --- a/sysdeps/x86_64/fpu/s_fminl.S +++ b/sysdeps/x86_64/fpu/s_fminl.S @@ -1,5 +1,5 @@ /* Compute minimum of two numbers, regarding NaN as missing argument. - Copyright (C) 1997-2015 Free Software Foundation, Inc. + Copyright (C) 1997-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. diff --git a/sysdeps/x86_64/fpu/s_llrint.S b/sysdeps/x86_64/fpu/s_llrint.S index e822c06070..6634c653ea 100644 --- a/sysdeps/x86_64/fpu/s_llrint.S +++ b/sysdeps/x86_64/fpu/s_llrint.S @@ -1,6 +1,6 @@ /* Round argument to nearest integral value according to current rounding direction. - Copyright (C) 2002-2015 Free Software Foundation, Inc. + Copyright (C) 2002-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Andreas Jaeger <aj@suse.d>, 2002. @@ -26,5 +26,7 @@ ENTRY(__llrint) ret END(__llrint) weak_alias (__llrint, llrint) +#ifndef __ILP32__ strong_alias (__llrint, __lrint) weak_alias (__llrint, lrint) +#endif diff --git a/sysdeps/x86_64/fpu/s_llrintf.S b/sysdeps/x86_64/fpu/s_llrintf.S index 6825511a57..5ac03dffd9 100644 --- a/sysdeps/x86_64/fpu/s_llrintf.S +++ b/sysdeps/x86_64/fpu/s_llrintf.S @@ -1,6 +1,6 @@ /* Round argument to nearest integral value according to current rounding direction. - Copyright (C) 2002-2015 Free Software Foundation, Inc. + Copyright (C) 2002-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Andreas Jaeger <aj@suse.d>, 2002. @@ -26,5 +26,7 @@ ENTRY(__llrintf) ret END(__llrintf) weak_alias (__llrintf, llrintf) +#ifndef __ILP32__ strong_alias (__llrintf, __lrintf) weak_alias (__llrintf, lrintf) +#endif diff --git a/sysdeps/x86_64/fpu/s_llrintl.S b/sysdeps/x86_64/fpu/s_llrintl.S index abe3a5bc0b..5f4d827dff 100644 --- a/sysdeps/x86_64/fpu/s_llrintl.S +++ b/sysdeps/x86_64/fpu/s_llrintl.S @@ -1,6 +1,6 @@ /* Round argument to nearest integral value according to current rounding direction. - Copyright (C) 1997-2015 Free Software Foundation, Inc. + Copyright (C) 1997-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -28,6 +28,7 @@ ENTRY(__llrintl) ret END(__llrintl) weak_alias (__llrintl, llrintl) +#ifndef __ILP32__ strong_alias (__llrintl, __lrintl) weak_alias (__llrintl, lrintl) - +#endif diff --git a/sysdeps/x86_64/fpu/s_nearbyintl.S b/sysdeps/x86_64/fpu/s_nearbyintl.S index dab2750a23..76d41bdd52 100644 --- a/sysdeps/x86_64/fpu/s_nearbyintl.S +++ b/sysdeps/x86_64/fpu/s_nearbyintl.S @@ -8,14 +8,16 @@ ENTRY(__nearbyintl) fldt 8(%rsp) - fnstcw -4(%rsp) - movl -4(%rsp), %eax + fnstenv -28(%rsp) + movl -28(%rsp), %eax orl $0x20, %eax - movl %eax, -8(%rsp) - fldcw -8(%rsp) + movl %eax, -32(%rsp) + fldcw -32(%rsp) frndint - fclex - fldcw -4(%rsp) + fnstsw + andl $0x1, %eax + orl %eax, -24(%rsp) + fldenv -28(%rsp) ret END (__nearbyintl) weak_alias (__nearbyintl, nearbyintl) diff --git a/sysdeps/x86_64/fpu/s_scalbnl.S b/sysdeps/x86_64/fpu/s_scalbnl.S index d0e9301eed..6c7683c32b 100644 --- a/sysdeps/x86_64/fpu/s_scalbnl.S +++ b/sysdeps/x86_64/fpu/s_scalbnl.S @@ -15,4 +15,3 @@ ENTRY(__scalbnl) fstp %st(1) ret END (__scalbnl) -weak_alias (__scalbnl, scalbnl) diff --git a/sysdeps/x86_64/fpu/s_signbit.S b/sysdeps/x86_64/fpu/s_signbit.S index a327c45330..92a79d3123 100644 --- a/sysdeps/x86_64/fpu/s_signbit.S +++ b/sysdeps/x86_64/fpu/s_signbit.S @@ -1,5 +1,5 @@ /* Return nonzero value if number is negative. - Copyright (C) 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2009-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@redha.com>, 2009. diff --git a/sysdeps/x86_64/fpu/s_signbitf.S b/sysdeps/x86_64/fpu/s_signbitf.S index 90994705c7..885645372e 100644 --- a/sysdeps/x86_64/fpu/s_signbitf.S +++ b/sysdeps/x86_64/fpu/s_signbitf.S @@ -1,5 +1,5 @@ /* Return nonzero value if number is negative. - Copyright (C) 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2009-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@redha.com>, 2009. diff --git a/sysdeps/x86_64/fpu/s_sincosf.S b/sysdeps/x86_64/fpu/s_sincosf.S index 21db70a88b..5e7cbe57e3 100644 --- a/sysdeps/x86_64/fpu/s_sincosf.S +++ b/sysdeps/x86_64/fpu/s_sincosf.S @@ -1,5 +1,5 @@ /* Optimized sincosf function. - Copyright (C) 2012-2015 Free Software Foundation, Inc. + Copyright (C) 2012-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -354,8 +354,14 @@ L(arg_inf_or_nan): /* Here if |x| is Inf or NAN */ jne L(skip_errno_setting) /* in case of x is NaN */ + /* Align stack to 16 bytes. */ + subq $8, %rsp + cfi_adjust_cfa_offset (8) /* Here if x is Inf. Set errno to EDOM. */ call JUMPTARGET(__errno_location) + addq $8, %rsp + cfi_adjust_cfa_offset (-8) + movl $EDOM, (%rax) .p2align 4 diff --git a/sysdeps/x86_64/fpu/s_sinf.S b/sysdeps/x86_64/fpu/s_sinf.S index dc921641de..c980c6e207 100644 --- a/sysdeps/x86_64/fpu/s_sinf.S +++ b/sysdeps/x86_64/fpu/s_sinf.S @@ -1,5 +1,5 @@ /* Optimized sinf function. - Copyright (C) 2012-2015 Free Software Foundation, Inc. + Copyright (C) 2012-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -336,8 +336,14 @@ L(arg_inf_or_nan): /* Here if |x| is Inf or NAN */ jne L(skip_errno_setting) /* in case of x is NaN */ + /* Align stack to 16 bytes. */ + subq $8, %rsp + cfi_adjust_cfa_offset (8) /* Here if x is Inf. Set errno to EDOM. */ call JUMPTARGET(__errno_location) + addq $8, %rsp + cfi_adjust_cfa_offset (-8) + movl $EDOM, (%rax) .p2align 4 diff --git a/sysdeps/x86_64/fpu/s_truncl.S b/sysdeps/x86_64/fpu/s_truncl.S index 6ba4a27cad..c37cf00241 100644 --- a/sysdeps/x86_64/fpu/s_truncl.S +++ b/sysdeps/x86_64/fpu/s_truncl.S @@ -1,5 +1,5 @@ /* Truncate long double value. - Copyright (C) 1997-2015 Free Software Foundation, Inc. + Copyright (C) 1997-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. diff --git a/sysdeps/x86_64/fpu/svml_d_cos2_core.S b/sysdeps/x86_64/fpu/svml_d_cos2_core.S index a26beca4a1..7f62d29917 100644 --- a/sysdeps/x86_64/fpu/svml_d_cos2_core.S +++ b/sysdeps/x86_64/fpu/svml_d_cos2_core.S @@ -1,5 +1,5 @@ /* Function cos vectorized with SSE2. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_cos4_core.S b/sysdeps/x86_64/fpu/svml_d_cos4_core.S index 35996b7318..b92ff13b86 100644 --- a/sysdeps/x86_64/fpu/svml_d_cos4_core.S +++ b/sysdeps/x86_64/fpu/svml_d_cos4_core.S @@ -1,5 +1,5 @@ /* Function cos vectorized with AVX2, wrapper version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_cos4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_cos4_core_avx.S index bf10b01cc5..a3da721e35 100644 --- a/sysdeps/x86_64/fpu/svml_d_cos4_core_avx.S +++ b/sysdeps/x86_64/fpu/svml_d_cos4_core_avx.S @@ -1,5 +1,5 @@ /* Function cos vectorized in AVX ISA as wrapper to SSE4 ISA version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_cos8_core.S b/sysdeps/x86_64/fpu/svml_d_cos8_core.S index 1ba10e8c9b..e5d986d11a 100644 --- a/sysdeps/x86_64/fpu/svml_d_cos8_core.S +++ b/sysdeps/x86_64/fpu/svml_d_cos8_core.S @@ -1,5 +1,5 @@ /* Function cos vectorized with AVX-512, wrapper to AVX2. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_exp2_core.S b/sysdeps/x86_64/fpu/svml_d_exp2_core.S index ca3dd76364..9e511037a1 100644 --- a/sysdeps/x86_64/fpu/svml_d_exp2_core.S +++ b/sysdeps/x86_64/fpu/svml_d_exp2_core.S @@ -1,5 +1,5 @@ /* Function exp vectorized with SSE2. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_exp4_core.S b/sysdeps/x86_64/fpu/svml_d_exp4_core.S index d497811980..8cac8adbc7 100644 --- a/sysdeps/x86_64/fpu/svml_d_exp4_core.S +++ b/sysdeps/x86_64/fpu/svml_d_exp4_core.S @@ -1,5 +1,5 @@ /* Function exp vectorized with AVX2, wrapper version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_exp4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_exp4_core_avx.S index 5dd2f6cd17..1a0fbf574a 100644 --- a/sysdeps/x86_64/fpu/svml_d_exp4_core_avx.S +++ b/sysdeps/x86_64/fpu/svml_d_exp4_core_avx.S @@ -1,5 +1,5 @@ /* Function exp vectorized in AVX ISA as wrapper to SSE4 ISA version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_exp8_core.S b/sysdeps/x86_64/fpu/svml_d_exp8_core.S index 3e273a3e71..2486e888a4 100644 --- a/sysdeps/x86_64/fpu/svml_d_exp8_core.S +++ b/sysdeps/x86_64/fpu/svml_d_exp8_core.S @@ -1,5 +1,5 @@ /* Function exp vectorized with AVX-512. Wrapper to AVX2 version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_exp_data.S b/sysdeps/x86_64/fpu/svml_d_exp_data.S index 66fa3b88d7..6d1acbdd21 100644 --- a/sysdeps/x86_64/fpu/svml_d_exp_data.S +++ b/sysdeps/x86_64/fpu/svml_d_exp_data.S @@ -1,5 +1,5 @@ /* Data for vector function exp. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_exp_data.h b/sysdeps/x86_64/fpu/svml_d_exp_data.h index 71ebdb799e..f993403d47 100644 --- a/sysdeps/x86_64/fpu/svml_d_exp_data.h +++ b/sysdeps/x86_64/fpu/svml_d_exp_data.h @@ -1,5 +1,5 @@ /* Offsets for data table for function exp. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_log2_core.S b/sysdeps/x86_64/fpu/svml_d_log2_core.S index daa63b583f..8ea40fee56 100644 --- a/sysdeps/x86_64/fpu/svml_d_log2_core.S +++ b/sysdeps/x86_64/fpu/svml_d_log2_core.S @@ -1,5 +1,5 @@ /* Function log vectorized with SSE2. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_log4_core.S b/sysdeps/x86_64/fpu/svml_d_log4_core.S index 009c93c837..72813d8921 100644 --- a/sysdeps/x86_64/fpu/svml_d_log4_core.S +++ b/sysdeps/x86_64/fpu/svml_d_log4_core.S @@ -1,5 +1,5 @@ /* Function log vectorized with AVX2, wrapper version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_log4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_log4_core_avx.S index 554fc45712..6ca1139931 100644 --- a/sysdeps/x86_64/fpu/svml_d_log4_core_avx.S +++ b/sysdeps/x86_64/fpu/svml_d_log4_core_avx.S @@ -1,5 +1,5 @@ /* Function log vectorized in AVX ISA as wrapper to SSE4 ISA version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_log8_core.S b/sysdeps/x86_64/fpu/svml_d_log8_core.S index 9728305f17..6850fd9a44 100644 --- a/sysdeps/x86_64/fpu/svml_d_log8_core.S +++ b/sysdeps/x86_64/fpu/svml_d_log8_core.S @@ -1,5 +1,5 @@ /* Function log vectorized with AVX-512. Wrapper to AVX2 version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_log_data.S b/sysdeps/x86_64/fpu/svml_d_log_data.S index 1ce78e2c8b..9ab541b23f 100644 --- a/sysdeps/x86_64/fpu/svml_d_log_data.S +++ b/sysdeps/x86_64/fpu/svml_d_log_data.S @@ -1,5 +1,5 @@ /* Data for function log. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_log_data.h b/sysdeps/x86_64/fpu/svml_d_log_data.h index 8ca55a8010..30c2b54a4b 100644 --- a/sysdeps/x86_64/fpu/svml_d_log_data.h +++ b/sysdeps/x86_64/fpu/svml_d_log_data.h @@ -1,5 +1,5 @@ /* Offsets for data table for function log. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_pow2_core.S b/sysdeps/x86_64/fpu/svml_d_pow2_core.S index 0b726a1eaf..b25515c825 100644 --- a/sysdeps/x86_64/fpu/svml_d_pow2_core.S +++ b/sysdeps/x86_64/fpu/svml_d_pow2_core.S @@ -1,5 +1,5 @@ /* Function pow vectorized with SSE2. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_pow4_core.S b/sysdeps/x86_64/fpu/svml_d_pow4_core.S index 9eb47ab8c9..547993799e 100644 --- a/sysdeps/x86_64/fpu/svml_d_pow4_core.S +++ b/sysdeps/x86_64/fpu/svml_d_pow4_core.S @@ -1,5 +1,5 @@ /* Function pow vectorized with AVX2, wrapper version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_pow4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_pow4_core_avx.S index 6c7b59995d..4e4e9867b4 100644 --- a/sysdeps/x86_64/fpu/svml_d_pow4_core_avx.S +++ b/sysdeps/x86_64/fpu/svml_d_pow4_core_avx.S @@ -1,5 +1,5 @@ /* Function pow vectorized in AVX ISA as wrapper to SSE4 ISA version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_pow8_core.S b/sysdeps/x86_64/fpu/svml_d_pow8_core.S index cd99457843..372e5a9c83 100644 --- a/sysdeps/x86_64/fpu/svml_d_pow8_core.S +++ b/sysdeps/x86_64/fpu/svml_d_pow8_core.S @@ -1,5 +1,5 @@ /* Function pow vectorized with AVX-512. Wrapper to AVX2 version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_pow_data.S b/sysdeps/x86_64/fpu/svml_d_pow_data.S index 1e0733e0a6..8481f95455 100644 --- a/sysdeps/x86_64/fpu/svml_d_pow_data.S +++ b/sysdeps/x86_64/fpu/svml_d_pow_data.S @@ -1,5 +1,5 @@ /* Data for function pow. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_pow_data.h b/sysdeps/x86_64/fpu/svml_d_pow_data.h index a1b9f9bc46..239ba96984 100644 --- a/sysdeps/x86_64/fpu/svml_d_pow_data.h +++ b/sysdeps/x86_64/fpu/svml_d_pow_data.h @@ -1,5 +1,5 @@ /* Offsets for data table for function pow. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_sin2_core.S b/sysdeps/x86_64/fpu/svml_d_sin2_core.S index c619dab966..f6ec13104b 100644 --- a/sysdeps/x86_64/fpu/svml_d_sin2_core.S +++ b/sysdeps/x86_64/fpu/svml_d_sin2_core.S @@ -1,5 +1,5 @@ /* Function sin vectorized with SSE2. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_sin4_core.S b/sysdeps/x86_64/fpu/svml_d_sin4_core.S index f650d461a5..95a1dec6f6 100644 --- a/sysdeps/x86_64/fpu/svml_d_sin4_core.S +++ b/sysdeps/x86_64/fpu/svml_d_sin4_core.S @@ -1,5 +1,5 @@ /* Function sin vectorized with AVX2, wrapper version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_sin4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_sin4_core_avx.S index a21ffafa32..29d1526a12 100644 --- a/sysdeps/x86_64/fpu/svml_d_sin4_core_avx.S +++ b/sysdeps/x86_64/fpu/svml_d_sin4_core_avx.S @@ -1,5 +1,5 @@ /* Function sin vectorized in AVX ISA as wrapper to SSE4 ISA version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_sin8_core.S b/sysdeps/x86_64/fpu/svml_d_sin8_core.S index 2e78b5e35a..abd86b3d98 100644 --- a/sysdeps/x86_64/fpu/svml_d_sin8_core.S +++ b/sysdeps/x86_64/fpu/svml_d_sin8_core.S @@ -1,5 +1,5 @@ /* Function sin vectorized with AVX-512, wrapper to AVX2 version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_sincos2_core.S b/sysdeps/x86_64/fpu/svml_d_sincos2_core.S index bd089e1ed0..74afa0a677 100644 --- a/sysdeps/x86_64/fpu/svml_d_sincos2_core.S +++ b/sysdeps/x86_64/fpu/svml_d_sincos2_core.S @@ -1,5 +1,5 @@ /* Function sincos vectorized with SSE2. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_sincos4_core.S b/sysdeps/x86_64/fpu/svml_d_sincos4_core.S index d67cd30132..2c0b011fb3 100644 --- a/sysdeps/x86_64/fpu/svml_d_sincos4_core.S +++ b/sysdeps/x86_64/fpu/svml_d_sincos4_core.S @@ -1,5 +1,5 @@ /* Function sincos vectorized with AVX2, wrapper version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S index 4f3f15aea6..e4320a97c7 100644 --- a/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S +++ b/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S @@ -1,5 +1,5 @@ /* Function sincos vectorized in AVX ISA as wrapper to SSE4 ISA version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_sincos8_core.S b/sysdeps/x86_64/fpu/svml_d_sincos8_core.S index e7f7121fa0..68d490e5bc 100644 --- a/sysdeps/x86_64/fpu/svml_d_sincos8_core.S +++ b/sysdeps/x86_64/fpu/svml_d_sincos8_core.S @@ -1,5 +1,5 @@ /* Function sincos vectorized with AVX-512. Wrapper to AVX2 version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_trig_data.S b/sysdeps/x86_64/fpu/svml_d_trig_data.S index d3b30598cc..887dacee91 100644 --- a/sysdeps/x86_64/fpu/svml_d_trig_data.S +++ b/sysdeps/x86_64/fpu/svml_d_trig_data.S @@ -1,5 +1,5 @@ /* Data for vectorized sin, cos, sincos. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_trig_data.h b/sysdeps/x86_64/fpu/svml_d_trig_data.h index 1395337c7e..4617b5e0c3 100644 --- a/sysdeps/x86_64/fpu/svml_d_trig_data.h +++ b/sysdeps/x86_64/fpu/svml_d_trig_data.h @@ -1,5 +1,5 @@ /* Offsets for data table for vectorized sin, cos, sincos. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_wrapper_impl.h b/sysdeps/x86_64/fpu/svml_d_wrapper_impl.h index 5c0ff897c0..54f4f58371 100644 --- a/sysdeps/x86_64/fpu/svml_d_wrapper_impl.h +++ b/sysdeps/x86_64/fpu/svml_d_wrapper_impl.h @@ -1,5 +1,5 @@ /* Wrapper implementations of vector math functions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_finite_alias.S b/sysdeps/x86_64/fpu/svml_finite_alias.S new file mode 100644 index 0000000000..2dcfc37590 --- /dev/null +++ b/sysdeps/x86_64/fpu/svml_finite_alias.S @@ -0,0 +1,58 @@ +/* These aliases added as workaround to exclude unnecessary symbol + aliases in libmvec.so while compiler creates the vector names + based on scalar asm name. Corresponding discussion is at + <https://gcc.gnu.org/ml/gcc/2015-06/msg00173.html>. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define ALIAS_IMPL(alias, target) \ +ENTRY (alias); \ + jmp *target@GOTPCREL(%rip); \ +END (alias) + + .text +ALIAS_IMPL (_ZGVbN2v___log_finite, _ZGVbN2v_log) +ALIAS_IMPL (_ZGVcN4v___log_finite, _ZGVcN4v_log) +ALIAS_IMPL (_ZGVdN4v___log_finite, _ZGVdN4v_log) +ALIAS_IMPL (_ZGVeN8v___log_finite, _ZGVeN8v_log) + +ALIAS_IMPL (_ZGVbN4v___logf_finite, _ZGVbN4v_logf) +ALIAS_IMPL (_ZGVcN8v___logf_finite, _ZGVcN8v_logf) +ALIAS_IMPL (_ZGVdN8v___logf_finite, _ZGVdN8v_logf) +ALIAS_IMPL (_ZGVeN16v___logf_finite, _ZGVeN16v_logf) + +ALIAS_IMPL (_ZGVbN2v___exp_finite, _ZGVbN2v_exp) +ALIAS_IMPL (_ZGVcN4v___exp_finite, _ZGVcN4v_exp) +ALIAS_IMPL (_ZGVdN4v___exp_finite, _ZGVdN4v_exp) +ALIAS_IMPL (_ZGVeN8v___exp_finite, _ZGVeN8v_exp) + +ALIAS_IMPL (_ZGVbN4v___expf_finite, _ZGVbN4v_expf) +ALIAS_IMPL (_ZGVcN8v___expf_finite, _ZGVcN8v_expf) +ALIAS_IMPL (_ZGVdN8v___expf_finite, _ZGVdN8v_expf) +ALIAS_IMPL (_ZGVeN16v___expf_finite, _ZGVeN16v_expf) + +ALIAS_IMPL (_ZGVbN2vv___pow_finite, _ZGVbN2vv_pow) +ALIAS_IMPL (_ZGVcN4vv___pow_finite, _ZGVcN4vv_pow) +ALIAS_IMPL (_ZGVdN4vv___pow_finite, _ZGVdN4vv_pow) +ALIAS_IMPL (_ZGVeN8vv___pow_finite, _ZGVeN8vv_pow) + +ALIAS_IMPL (_ZGVbN4vv___powf_finite, _ZGVbN4vv_powf) +ALIAS_IMPL (_ZGVcN8vv___powf_finite, _ZGVcN8vv_powf) +ALIAS_IMPL (_ZGVdN8vv___powf_finite, _ZGVdN8vv_powf) +ALIAS_IMPL (_ZGVeN16vv___powf_finite, _ZGVeN16vv_powf) diff --git a/sysdeps/x86_64/fpu/svml_s_cosf16_core.S b/sysdeps/x86_64/fpu/svml_s_cosf16_core.S index e623df5dc3..9ca4fbfaa8 100644 --- a/sysdeps/x86_64/fpu/svml_s_cosf16_core.S +++ b/sysdeps/x86_64/fpu/svml_s_cosf16_core.S @@ -1,5 +1,5 @@ /* Function cosf vectorized with AVX-512. Wrapper to AVX2 version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_cosf4_core.S b/sysdeps/x86_64/fpu/svml_s_cosf4_core.S index 9875cd7f71..363090c54a 100644 --- a/sysdeps/x86_64/fpu/svml_s_cosf4_core.S +++ b/sysdeps/x86_64/fpu/svml_s_cosf4_core.S @@ -1,5 +1,5 @@ /* Function cosf vectorized with SSE2, wrapper version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_cosf8_core.S b/sysdeps/x86_64/fpu/svml_s_cosf8_core.S index 376ee358ae..26a6a4e4d6 100644 --- a/sysdeps/x86_64/fpu/svml_s_cosf8_core.S +++ b/sysdeps/x86_64/fpu/svml_s_cosf8_core.S @@ -1,5 +1,5 @@ /* Function cosf vectorized with AVX2, wrapper version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_cosf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_cosf8_core_avx.S index a443fd28ad..6c210d98ce 100644 --- a/sysdeps/x86_64/fpu/svml_s_cosf8_core_avx.S +++ b/sysdeps/x86_64/fpu/svml_s_cosf8_core_avx.S @@ -1,5 +1,5 @@ /* Function cosf vectorized in AVX ISA as wrapper to SSE4 ISA version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_expf16_core.S b/sysdeps/x86_64/fpu/svml_s_expf16_core.S index d9d355c372..d8eecac674 100644 --- a/sysdeps/x86_64/fpu/svml_s_expf16_core.S +++ b/sysdeps/x86_64/fpu/svml_s_expf16_core.S @@ -1,5 +1,5 @@ /* Function expf vectorized with AVX-512. Wrapper to AVX2 version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_expf4_core.S b/sysdeps/x86_64/fpu/svml_s_expf4_core.S index 71c5da4657..65b5d1a3ce 100644 --- a/sysdeps/x86_64/fpu/svml_s_expf4_core.S +++ b/sysdeps/x86_64/fpu/svml_s_expf4_core.S @@ -1,5 +1,5 @@ /* Function expf vectorized with SSE2. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_expf8_core.S b/sysdeps/x86_64/fpu/svml_s_expf8_core.S index d254a992a4..e3cf975bf6 100644 --- a/sysdeps/x86_64/fpu/svml_s_expf8_core.S +++ b/sysdeps/x86_64/fpu/svml_s_expf8_core.S @@ -1,5 +1,5 @@ /* Function expf vectorized with AVX2, wrapper version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_expf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_expf8_core_avx.S index ece40ba972..90469d7dcf 100644 --- a/sysdeps/x86_64/fpu/svml_s_expf8_core_avx.S +++ b/sysdeps/x86_64/fpu/svml_s_expf8_core_avx.S @@ -1,5 +1,5 @@ /* Function expf vectorized in AVX ISA as wrapper to SSE4 ISA version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_expf_data.S b/sysdeps/x86_64/fpu/svml_s_expf_data.S index eee9d69e31..4b644082b6 100644 --- a/sysdeps/x86_64/fpu/svml_s_expf_data.S +++ b/sysdeps/x86_64/fpu/svml_s_expf_data.S @@ -1,5 +1,5 @@ /* Data for function expf. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_expf_data.h b/sysdeps/x86_64/fpu/svml_s_expf_data.h index beaa290540..3610633c96 100644 --- a/sysdeps/x86_64/fpu/svml_s_expf_data.h +++ b/sysdeps/x86_64/fpu/svml_s_expf_data.h @@ -1,5 +1,5 @@ /* Offsets for data table for vector function expf. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_logf16_core.S b/sysdeps/x86_64/fpu/svml_s_logf16_core.S index 47ae7855a3..cc2e97df78 100644 --- a/sysdeps/x86_64/fpu/svml_s_logf16_core.S +++ b/sysdeps/x86_64/fpu/svml_s_logf16_core.S @@ -1,5 +1,5 @@ /* Function logf vectorized with AVX-512. Wrapper to AVX2 version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_logf4_core.S b/sysdeps/x86_64/fpu/svml_s_logf4_core.S index 09be406d3c..195f328d92 100644 --- a/sysdeps/x86_64/fpu/svml_s_logf4_core.S +++ b/sysdeps/x86_64/fpu/svml_s_logf4_core.S @@ -1,5 +1,5 @@ /* Function logf vectorized with SSE2. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_logf8_core.S b/sysdeps/x86_64/fpu/svml_s_logf8_core.S index cf4e9be537..8bb6926667 100644 --- a/sysdeps/x86_64/fpu/svml_s_logf8_core.S +++ b/sysdeps/x86_64/fpu/svml_s_logf8_core.S @@ -1,5 +1,5 @@ /* Function logf vectorized with AVX2, wrapper version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_logf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_logf8_core_avx.S index 7ab572bb30..c2efba23f2 100644 --- a/sysdeps/x86_64/fpu/svml_s_logf8_core_avx.S +++ b/sysdeps/x86_64/fpu/svml_s_logf8_core_avx.S @@ -1,5 +1,5 @@ /* Function logf vectorized in AVX ISA as wrapper to SSE4 ISA version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_logf_data.S b/sysdeps/x86_64/fpu/svml_s_logf_data.S index 1e7f7015d3..a5675f5c7a 100644 --- a/sysdeps/x86_64/fpu/svml_s_logf_data.S +++ b/sysdeps/x86_64/fpu/svml_s_logf_data.S @@ -1,5 +1,5 @@ /* Data for vector function logf. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_logf_data.h b/sysdeps/x86_64/fpu/svml_s_logf_data.h index d42411a849..619d5c4bd1 100644 --- a/sysdeps/x86_64/fpu/svml_s_logf_data.h +++ b/sysdeps/x86_64/fpu/svml_s_logf_data.h @@ -1,5 +1,5 @@ /* Offsets for data table for vectorized function logf. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_powf16_core.S b/sysdeps/x86_64/fpu/svml_s_powf16_core.S index efd84c2fff..cb52af0c6b 100644 --- a/sysdeps/x86_64/fpu/svml_s_powf16_core.S +++ b/sysdeps/x86_64/fpu/svml_s_powf16_core.S @@ -1,5 +1,5 @@ /* Function powf vectorized with AVX-512. Wrapper to AVX2 version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_powf4_core.S b/sysdeps/x86_64/fpu/svml_s_powf4_core.S index 81f0f530de..88fae60892 100644 --- a/sysdeps/x86_64/fpu/svml_s_powf4_core.S +++ b/sysdeps/x86_64/fpu/svml_s_powf4_core.S @@ -1,5 +1,5 @@ /* Function powf vectorized with SSE2. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_powf8_core.S b/sysdeps/x86_64/fpu/svml_s_powf8_core.S index 8fed6c7c86..8ea44897c1 100644 --- a/sysdeps/x86_64/fpu/svml_s_powf8_core.S +++ b/sysdeps/x86_64/fpu/svml_s_powf8_core.S @@ -1,5 +1,5 @@ /* Function powf vectorized with AVX2, wrapper version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_powf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_powf8_core_avx.S index eeeb66d46e..b5e4e5e6ef 100644 --- a/sysdeps/x86_64/fpu/svml_s_powf8_core_avx.S +++ b/sysdeps/x86_64/fpu/svml_s_powf8_core_avx.S @@ -1,5 +1,5 @@ /* Function powf vectorized in AVX ISA as wrapper to SSE4 ISA version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_powf_data.S b/sysdeps/x86_64/fpu/svml_s_powf_data.S index 4a4799ae4f..fc1a3d9390 100644 --- a/sysdeps/x86_64/fpu/svml_s_powf_data.S +++ b/sysdeps/x86_64/fpu/svml_s_powf_data.S @@ -1,5 +1,5 @@ /* Data for function powf. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_powf_data.h b/sysdeps/x86_64/fpu/svml_s_powf_data.h index d847368e4b..514004238a 100644 --- a/sysdeps/x86_64/fpu/svml_s_powf_data.h +++ b/sysdeps/x86_64/fpu/svml_s_powf_data.h @@ -1,5 +1,5 @@ /* Offsets for data table for function powf. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S b/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S index 992f9a91cc..5cbf10b8da 100644 --- a/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S +++ b/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S @@ -1,5 +1,5 @@ /* Function sincosf vectorized with AVX-512. Wrapper to AVX2 version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S b/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S index d402ffba15..1a7d2733af 100644 --- a/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S +++ b/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S @@ -1,5 +1,5 @@ /* Function sincosf vectorized with SSE2. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_sincosf8_core.S b/sysdeps/x86_64/fpu/svml_s_sincosf8_core.S index eec7de87d5..74d1dfd1a8 100644 --- a/sysdeps/x86_64/fpu/svml_s_sincosf8_core.S +++ b/sysdeps/x86_64/fpu/svml_s_sincosf8_core.S @@ -1,5 +1,5 @@ /* Function sincosf vectorized with AVX2, wrapper version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S index c247444dfc..55b8b2d768 100644 --- a/sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S +++ b/sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S @@ -1,5 +1,5 @@ /* Function sincosf vectorized in AVX ISA as wrapper to SSE4 ISA version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_sinf16_core.S b/sysdeps/x86_64/fpu/svml_s_sinf16_core.S index add6e0fd43..d7a31e1ea6 100644 --- a/sysdeps/x86_64/fpu/svml_s_sinf16_core.S +++ b/sysdeps/x86_64/fpu/svml_s_sinf16_core.S @@ -1,5 +1,5 @@ /* Function sinf vectorized with AVX-512. Wrapper to AVX2 version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_sinf4_core.S b/sysdeps/x86_64/fpu/svml_s_sinf4_core.S index 2349c7b788..6f10137134 100644 --- a/sysdeps/x86_64/fpu/svml_s_sinf4_core.S +++ b/sysdeps/x86_64/fpu/svml_s_sinf4_core.S @@ -1,5 +1,5 @@ /* Function sinf vectorized with SSE2. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_sinf8_core.S b/sysdeps/x86_64/fpu/svml_s_sinf8_core.S index fe31e3793e..c459658688 100644 --- a/sysdeps/x86_64/fpu/svml_s_sinf8_core.S +++ b/sysdeps/x86_64/fpu/svml_s_sinf8_core.S @@ -1,5 +1,5 @@ /* Function sinf vectorized with AVX2, wrapper version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_sinf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_sinf8_core_avx.S index f54be48ee3..5e95aa2e02 100644 --- a/sysdeps/x86_64/fpu/svml_s_sinf8_core_avx.S +++ b/sysdeps/x86_64/fpu/svml_s_sinf8_core_avx.S @@ -1,5 +1,5 @@ /* Function sinf vectorized in AVX ISA as wrapper to SSE4 ISA version. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_trig_data.S b/sysdeps/x86_64/fpu/svml_s_trig_data.S index 07fc7d272d..b61aa6abb9 100644 --- a/sysdeps/x86_64/fpu/svml_s_trig_data.S +++ b/sysdeps/x86_64/fpu/svml_s_trig_data.S @@ -1,5 +1,5 @@ /* Data for function cosf. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_trig_data.h b/sysdeps/x86_64/fpu/svml_s_trig_data.h index 5a91dad41c..2e469a918a 100644 --- a/sysdeps/x86_64/fpu/svml_s_trig_data.h +++ b/sysdeps/x86_64/fpu/svml_s_trig_data.h @@ -1,5 +1,5 @@ /* Offsets for data table for vectorized sinf, cosf, sincosf. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h b/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h index d255d195ee..b1a03be3d9 100644 --- a/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h +++ b/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h @@ -1,5 +1,5 @@ /* Wrapper implementations of vector math functions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c index 4e764f2475..a9d15979aa 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c +++ b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c @@ -1,5 +1,5 @@ /* Wrapper part of tests for SSE ISA versions of vector math functions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/test-double-vlen2.c b/sysdeps/x86_64/fpu/test-double-vlen2.c index 2b6896425e..c7a3dff747 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen2.c +++ b/sysdeps/x86_64/fpu/test-double-vlen2.c @@ -1,5 +1,5 @@ /* Tests for SSE ISA versions of vector math functions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c index bc2fd16c5a..eb6a531502 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c +++ b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c @@ -1,5 +1,5 @@ /* Wrapper part of tests for AVX2 ISA versions of vector math functions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-avx2.c b/sysdeps/x86_64/fpu/test-double-vlen4-avx2.c index 56723ab4d7..0cadef03d6 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen4-avx2.c +++ b/sysdeps/x86_64/fpu/test-double-vlen4-avx2.c @@ -1,5 +1,5 @@ /* Tests for AVX2 ISA versions of vector math functions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c index a711c9e1c3..52b81da3ee 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c +++ b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c @@ -1,5 +1,5 @@ /* Wrapper part of tests for AVX ISA versions of vector math functions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/test-double-vlen4.c b/sysdeps/x86_64/fpu/test-double-vlen4.c index f0813437b4..9ae97f1388 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen4.c +++ b/sysdeps/x86_64/fpu/test-double-vlen4.c @@ -1,5 +1,5 @@ /* Tests for AVX ISA versions of vector math functions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c index 942c42b83b..c10bb9cb4a 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c +++ b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c @@ -1,5 +1,5 @@ /* Wrapper part of tests for AVX-512 versions of vector math functions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/test-double-vlen8.c b/sysdeps/x86_64/fpu/test-double-vlen8.c index 1e23b83418..4fb6c8d196 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen8.c +++ b/sysdeps/x86_64/fpu/test-double-vlen8.c @@ -1,5 +1,5 @@ /* Tests for AVX-512 versions of vector math functions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c index bc98e78ff0..dc09e4a338 100644 --- a/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c +++ b/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c @@ -1,5 +1,5 @@ /* Wrapper part of tests for AVX-512 ISA versions of vector math functions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/test-float-vlen16.c b/sysdeps/x86_64/fpu/test-float-vlen16.c index d7f683f09c..882bfc840d 100644 --- a/sysdeps/x86_64/fpu/test-float-vlen16.c +++ b/sysdeps/x86_64/fpu/test-float-vlen16.c @@ -1,5 +1,5 @@ /* Tests for AVX-512 ISA versions of vector math functions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c index 39254efed4..0bb9818146 100644 --- a/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c +++ b/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c @@ -1,5 +1,5 @@ /* Wrapper part of tests for SSE ISA versions of vector math functions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/test-float-vlen4.c b/sysdeps/x86_64/fpu/test-float-vlen4.c index e56d64260e..f6a4cf5c1e 100644 --- a/sysdeps/x86_64/fpu/test-float-vlen4.c +++ b/sysdeps/x86_64/fpu/test-float-vlen4.c @@ -1,5 +1,5 @@ /* Tests for SSE ISA versions of vector math functions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c index 6bd0d50779..4985ac2379 100644 --- a/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c +++ b/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c @@ -1,5 +1,5 @@ /* Wrapper part of tests for AVX2 ISA versions of vector math functions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-avx2.c b/sysdeps/x86_64/fpu/test-float-vlen8-avx2.c index 0012082b8e..7a416385b6 100644 --- a/sysdeps/x86_64/fpu/test-float-vlen8-avx2.c +++ b/sysdeps/x86_64/fpu/test-float-vlen8-avx2.c @@ -1,5 +1,5 @@ /* Tests for AVX2 ISA versions of vector math functions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c index 2fec906de0..9cc2883399 100644 --- a/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c +++ b/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c @@ -1,5 +1,5 @@ /* Wrapper part of tests for AVX ISA versions of vector math functions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/test-float-vlen8.c b/sysdeps/x86_64/fpu/test-float-vlen8.c index 891e58ff88..c92a50ae7e 100644 --- a/sysdeps/x86_64/fpu/test-float-vlen8.c +++ b/sysdeps/x86_64/fpu/test-float-vlen8.c @@ -1,5 +1,5 @@ /* Tests for AVX ISA versions of vector math functions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/x86_64-math-asm.h b/sysdeps/x86_64/fpu/x86_64-math-asm.h new file mode 100644 index 0000000000..db3f9f78b0 --- /dev/null +++ b/sysdeps/x86_64/fpu/x86_64-math-asm.h @@ -0,0 +1,74 @@ +/* Helper macros for x86_64 libm functions. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _X86_64_MATH_ASM_H +#define _X86_64_MATH_ASM_H 1 + +/* Define constants for the minimum value of a floating-point + type. */ +#define DEFINE_LDBL_MIN \ + .section .rodata.cst16,"aM",@progbits,16; \ + .p2align 4; \ + .type ldbl_min,@object; \ +ldbl_min: \ + .byte 0, 0, 0, 0, 0, 0, 0, 0x80, 0x1, 0; \ + .byte 0, 0, 0, 0, 0, 0; \ + .size ldbl_min, .-ldbl_min; + +/* Force an underflow exception if the given value (nonnegative or + NaN) is subnormal. The relevant constant for the minimum of the + type must have been defined, the MO macro must have been defined + for access to memory operands, and, if PIC, the PIC register must + have been loaded. */ +#define LDBL_CHECK_FORCE_UFLOW_NONNEG_NAN \ + fldt MO(ldbl_min); \ + fld %st(1); \ + fucomip %st(1), %st(0); \ + fstp %st(0); \ + jnc 6464f; \ + fld %st(0); \ + fmul %st(0); \ + fstp %st(0); \ +6464: + +/* Likewise, but the argument is not a NaN. */ +#define LDBL_CHECK_FORCE_UFLOW_NONNAN \ + fldt MO(ldbl_min); \ + fld %st(1); \ + fabs; \ + fcomip %st(1), %st(0); \ + fstp %st(0); \ + jnc 6464f; \ + fld %st(0); \ + fmul %st(0); \ + fstp %st(0); \ +6464: + +/* Likewise, but the argument is nonnegative and not a NaN. */ +#define LDBL_CHECK_FORCE_UFLOW_NONNEG \ + fldt MO(ldbl_min); \ + fld %st(1); \ + fcomip %st(1), %st(0); \ + fstp %st(0); \ + jnc 6464f; \ + fld %st(0); \ + fmul %st(0); \ + fstp %st(0); \ +6464: + +#endif /* x86_64-math-asm.h. */ diff --git a/sysdeps/x86_64/hp-timing.h b/sysdeps/x86_64/hp-timing.h index 493f9735bd..65381b314d 100644 --- a/sysdeps/x86_64/hp-timing.h +++ b/sysdeps/x86_64/hp-timing.h @@ -1,5 +1,5 @@ /* High precision, low overhead timing functions. x86-64 version. - Copyright (C) 2002-2015 Free Software Foundation, Inc. + Copyright (C) 2002-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/htonl.S b/sysdeps/x86_64/htonl.S index 85a690f5b1..c92fae8791 100644 --- a/sysdeps/x86_64/htonl.S +++ b/sysdeps/x86_64/htonl.S @@ -1,5 +1,5 @@ /* Change byte order in word. For AMD x86-64. - Copyright (C) 1997-2015 Free Software Foundation, Inc. + Copyright (C) 1997-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/ifuncmain8.c b/sysdeps/x86_64/ifuncmain8.c new file mode 100644 index 0000000000..448ab96bfa --- /dev/null +++ b/sysdeps/x86_64/ifuncmain8.c @@ -0,0 +1,32 @@ +/* Test IFUNC selector with floating-point parameters. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdlib.h> + +extern float foo (float); + +static int +do_test (void) +{ + if (foo (2) != 3) + abort (); + return 0; +} + +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" diff --git a/sysdeps/x86_64/ifuncmod8.c b/sysdeps/x86_64/ifuncmod8.c new file mode 100644 index 0000000000..c00436799c --- /dev/null +++ b/sysdeps/x86_64/ifuncmod8.c @@ -0,0 +1,36 @@ +/* Test IFUNC selector with floating-point parameters. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <emmintrin.h> + +void * foo_ifunc (void) __asm__ ("foo"); +__asm__(".type foo, %gnu_indirect_function"); + +static float +foo_impl (float x) +{ + return x + 1; +} + +void * +foo_ifunc (void) +{ + __m128i xmm = _mm_set1_epi32 (-1); + asm volatile ("movdqa %0, %%xmm0" : : "x" (xmm) : "xmm0" ); + return foo_impl; +} diff --git a/sysdeps/x86_64/jmpbuf-offsets.h b/sysdeps/x86_64/jmpbuf-offsets.h index 03176a91f0..da71e555f7 100644 --- a/sysdeps/x86_64/jmpbuf-offsets.h +++ b/sysdeps/x86_64/jmpbuf-offsets.h @@ -1,5 +1,5 @@ /* Private macros for accessing __jmp_buf contents. x86-64 version. - Copyright (C) 2006-2015 Free Software Foundation, Inc. + Copyright (C) 2006-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/jmpbuf-unwind.h b/sysdeps/x86_64/jmpbuf-unwind.h index 3d9b2b589f..aa0642b54a 100644 --- a/sysdeps/x86_64/jmpbuf-unwind.h +++ b/sysdeps/x86_64/jmpbuf-unwind.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2003-2015 Free Software Foundation, Inc. +/* Copyright (C) 2003-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Jakub Jelinek <jakub@redhat.com>, 2003. diff --git a/sysdeps/x86_64/ldsodefs.h b/sysdeps/x86_64/ldsodefs.h index 84d36e82be..6a96c53721 100644 --- a/sysdeps/x86_64/ldsodefs.h +++ b/sysdeps/x86_64/ldsodefs.h @@ -1,5 +1,5 @@ /* Run-time dynamic linker data structures for loaded ELF shared objects. - Copyright (C) 1995-2015 Free Software Foundation, Inc. + Copyright (C) 1995-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -20,6 +20,7 @@ #define _X86_64_LDSODEFS_H 1 #include <elf.h> +#include <cpu-features.h> struct La_x86_64_regs; struct La_x86_64_retval; diff --git a/sysdeps/x86_64/localplt.data b/sysdeps/x86_64/localplt.data index d140476dfe..f168b143ff 100644 --- a/sysdeps/x86_64/localplt.data +++ b/sysdeps/x86_64/localplt.data @@ -3,17 +3,17 @@ # users can define their own functions and have library internals call them. # Linker in binutils 2.26 and newer consolidates R_X86_64_JUMP_SLOT # relocation with R_X86_64_GLOB_DAT relocation against the same symbol. -libc.so: calloc +libc.so: calloc + RELA R_X86_64_GLOB_DAT libc.so: free + RELA R_X86_64_GLOB_DAT libc.so: malloc + RELA R_X86_64_GLOB_DAT -libc.so: memalign -libc.so: realloc +libc.so: memalign + RELA R_X86_64_GLOB_DAT +libc.so: realloc + RELA R_X86_64_GLOB_DAT libm.so: matherr # The dynamic loader uses __libc_memalign internally to allocate aligned # TLS storage. The other malloc family of functions are expected to allow # user symbol interposition. -ld.so: __libc_memalign -ld.so: malloc -ld.so: calloc -ld.so: realloc +ld.so: __libc_memalign + RELA R_X86_64_GLOB_DAT +ld.so: malloc + RELA R_X86_64_GLOB_DAT +ld.so: calloc + RELA R_X86_64_GLOB_DAT +ld.so: realloc + RELA R_X86_64_GLOB_DAT ld.so: free + RELA R_X86_64_GLOB_DAT diff --git a/sysdeps/x86_64/lshift.S b/sysdeps/x86_64/lshift.S index 03fb631207..49cbfbaf3d 100644 --- a/sysdeps/x86_64/lshift.S +++ b/sysdeps/x86_64/lshift.S @@ -1,5 +1,5 @@ /* x86-64 __mpn_lshift -- - Copyright (C) 2007-2015 Free Software Foundation, Inc. + Copyright (C) 2007-2016 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify diff --git a/sysdeps/x86_64/machine-gmon.h b/sysdeps/x86_64/machine-gmon.h index 51cf7793b4..3d9ce5c44e 100644 --- a/sysdeps/x86_64/machine-gmon.h +++ b/sysdeps/x86_64/machine-gmon.h @@ -1,5 +1,5 @@ /* x86-64-specific implementation of profiling support. - Copyright (C) 2002-2015 Free Software Foundation, Inc. + Copyright (C) 2002-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Andreas Jaeger <aj@suse.de>, 2002. diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S index fae85caae1..132eacba8f 100644 --- a/sysdeps/x86_64/memchr.S +++ b/sysdeps/x86_64/memchr.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2015 Free Software Foundation, Inc. +/* Copyright (C) 2011-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. diff --git a/sysdeps/x86_64/memcmp.S b/sysdeps/x86_64/memcmp.S index f636716b64..3fb018a772 100644 --- a/sysdeps/x86_64/memcmp.S +++ b/sysdeps/x86_64/memcmp.S @@ -1,5 +1,5 @@ /* memcmp with SSE2 - Copyright (C) 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2009-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. diff --git a/sysdeps/x86_64/memcpy.S b/sysdeps/x86_64/memcpy.S index eea8c2a5af..f6e3d9396c 100644 --- a/sysdeps/x86_64/memcpy.S +++ b/sysdeps/x86_64/memcpy.S @@ -1,7 +1,7 @@ /* Optimized memcpy for x86-64. - Copyright (C) 2007-2015 Free Software Foundation, Inc. + Copyright (C) 2007-2016 Free Software Foundation, Inc. Contributed by Evandro Menezes <evandro.menezes@amd.com>, 2007. This file is part of the GNU C Library. diff --git a/sysdeps/x86_64/memcpy_chk.S b/sysdeps/x86_64/memcpy_chk.S index f1a5ac4b23..2296b55119 100644 --- a/sysdeps/x86_64/memcpy_chk.S +++ b/sysdeps/x86_64/memcpy_chk.S @@ -1,5 +1,5 @@ /* Checking memcpy for x86-64. - Copyright (C) 2004-2015 Free Software Foundation, Inc. + Copyright (C) 2004-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/memmove.c b/sysdeps/x86_64/memmove.c index e0694a859f..07f81852d6 100644 --- a/sysdeps/x86_64/memmove.c +++ b/sysdeps/x86_64/memmove.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2015 Free Software Foundation, Inc. +/* Copyright (C) 2011-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/mempcpy_chk.S b/sysdeps/x86_64/mempcpy_chk.S index 968e7edf3f..390abc68dd 100644 --- a/sysdeps/x86_64/mempcpy_chk.S +++ b/sysdeps/x86_64/mempcpy_chk.S @@ -1,5 +1,5 @@ /* Checking mempcpy for x86-64. - Copyright (C) 2004-2015 Free Software Foundation, Inc. + Copyright (C) 2004-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/memrchr.S b/sysdeps/x86_64/memrchr.S index 707b8390db..840de30cd7 100644 --- a/sysdeps/x86_64/memrchr.S +++ b/sysdeps/x86_64/memrchr.S @@ -1,6 +1,6 @@ /* fast SSE2 memrchr with 64 byte loop and pmaxub instruction using - Copyright (C) 2011-2015 Free Software Foundation, Inc. + Copyright (C) 2011-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S index e4962546c4..4cf0da0fb8 100644 --- a/sysdeps/x86_64/memset.S +++ b/sysdeps/x86_64/memset.S @@ -1,6 +1,6 @@ /* memset/bzero -- set memory area to CH/0 Optimized version for x86-64. - Copyright (C) 2002-2015 Free Software Foundation, Inc. + Copyright (C) 2002-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -24,7 +24,7 @@ ENTRY(__bzero) movq %rdi, %rax /* Set return value. */ movq %rsi, %rdx /* Set n. */ - pxor %xmm8, %xmm8 + pxor %xmm0, %xmm0 jmp L(entry_from_bzero) END(__bzero) weak_alias (__bzero, bzero) @@ -33,10 +33,10 @@ weak_alias (__bzero, bzero) ENTRY(__memset_tail) movq %rcx, %rax /* Set return value. */ - movd %esi, %xmm8 - punpcklbw %xmm8, %xmm8 - punpcklwd %xmm8, %xmm8 - pshufd $0, %xmm8, %xmm8 + movd %esi, %xmm0 + punpcklbw %xmm0, %xmm0 + punpcklwd %xmm0, %xmm0 + pshufd $0, %xmm0, %xmm0 jmp L(entry_from_bzero) END(__memset_tail) @@ -50,57 +50,57 @@ END_CHK (__memset_chk) #endif ENTRY (memset) - movd %esi, %xmm8 + movd %esi, %xmm0 movq %rdi, %rax - punpcklbw %xmm8, %xmm8 - punpcklwd %xmm8, %xmm8 - pshufd $0, %xmm8, %xmm8 + punpcklbw %xmm0, %xmm0 + punpcklwd %xmm0, %xmm0 + pshufd $0, %xmm0, %xmm0 L(entry_from_bzero): cmpq $64, %rdx ja L(loop_start) cmpq $16, %rdx jbe L(less_16_bytes) cmpq $32, %rdx - movdqu %xmm8, (%rdi) - movdqu %xmm8, -16(%rdi,%rdx) + movdqu %xmm0, (%rdi) + movdqu %xmm0, -16(%rdi,%rdx) ja L(between_32_64_bytes) L(return): rep ret .p2align 4 L(between_32_64_bytes): - movdqu %xmm8, 16(%rdi) - movdqu %xmm8, -32(%rdi,%rdx) + movdqu %xmm0, 16(%rdi) + movdqu %xmm0, -32(%rdi,%rdx) ret .p2align 4 L(loop_start): leaq 64(%rdi), %rcx - movdqu %xmm8, (%rdi) + movdqu %xmm0, (%rdi) andq $-64, %rcx - movdqu %xmm8, -16(%rdi,%rdx) - movdqu %xmm8, 16(%rdi) - movdqu %xmm8, -32(%rdi,%rdx) - movdqu %xmm8, 32(%rdi) - movdqu %xmm8, -48(%rdi,%rdx) - movdqu %xmm8, 48(%rdi) - movdqu %xmm8, -64(%rdi,%rdx) + movdqu %xmm0, -16(%rdi,%rdx) + movdqu %xmm0, 16(%rdi) + movdqu %xmm0, -32(%rdi,%rdx) + movdqu %xmm0, 32(%rdi) + movdqu %xmm0, -48(%rdi,%rdx) + movdqu %xmm0, 48(%rdi) + movdqu %xmm0, -64(%rdi,%rdx) addq %rdi, %rdx andq $-64, %rdx cmpq %rdx, %rcx je L(return) .p2align 4 L(loop): - movdqa %xmm8, (%rcx) - movdqa %xmm8, 16(%rcx) - movdqa %xmm8, 32(%rcx) - movdqa %xmm8, 48(%rcx) + movdqa %xmm0, (%rcx) + movdqa %xmm0, 16(%rcx) + movdqa %xmm0, 32(%rcx) + movdqa %xmm0, 48(%rcx) addq $64, %rcx cmpq %rcx, %rdx jne L(loop) rep ret L(less_16_bytes): - movq %xmm8, %rcx + movq %xmm0, %rcx testb $24, %dl jne L(between8_16bytes) testb $4, %dl diff --git a/sysdeps/x86_64/memset_chk.S b/sysdeps/x86_64/memset_chk.S index 70204267ca..95bb5d0e94 100644 --- a/sysdeps/x86_64/memset_chk.S +++ b/sysdeps/x86_64/memset_chk.S @@ -1,5 +1,5 @@ /* Checking memset for x86-64. - Copyright (C) 2004-2015 Free Software Foundation, Inc. + Copyright (C) 2004-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/memusage.h b/sysdeps/x86_64/memusage.h index e915c1a672..fc102c4252 100644 --- a/sysdeps/x86_64/memusage.h +++ b/sysdeps/x86_64/memusage.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2001-2015 Free Software Foundation, Inc. +/* Copyright (C) 2001-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/mul_1.S b/sysdeps/x86_64/mul_1.S index 2fb8fad2bd..88b8f920a1 100644 --- a/sysdeps/x86_64/mul_1.S +++ b/sysdeps/x86_64/mul_1.S @@ -1,6 +1,6 @@ /* AMD64 __mpn_mul_1 -- Multiply a limb vector with a limb and store the result in a second limb vector. - Copyright (C) 2003-2015 Free Software Foundation, Inc. + Copyright (C) 2003-2016 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index d7002a9df3..d234f4ab66 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -1,5 +1,4 @@ ifeq ($(subdir),csu) -aux += init-arch tests += test-multiarch gen-as-const-headers += ifunc-defines.sym endif @@ -8,31 +7,26 @@ ifeq ($(subdir),string) sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \ strcmp-sse2-unaligned strncmp-ssse3 \ - memcmp-sse4 memcpy-ssse3 \ - memcpy-sse2-unaligned mempcpy-ssse3 \ - memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \ - memmove-avx-unaligned memcpy-avx-unaligned mempcpy-avx-unaligned \ - memmove-ssse3-back strcasecmp_l-ssse3 \ + memcmp-sse4 memcpy-ssse3 memcpy-sse2-unaligned \ + memcpy-avx512-no-vzeroupper mempcpy-ssse3 memmove-ssse3 \ + memcpy-ssse3-back mempcpy-ssse3-back memmove-avx-unaligned \ + memcpy-avx-unaligned mempcpy-avx-unaligned \ + mempcpy-avx512-no-vzeroupper memmove-ssse3-back \ + memmove-avx512-no-vzeroupper strcasecmp_l-ssse3 \ strncase_l-ssse3 strcat-ssse3 strncat-ssse3\ strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \ strcpy-sse2-unaligned strncpy-sse2-unaligned \ stpcpy-sse2-unaligned stpncpy-sse2-unaligned \ strcat-sse2-unaligned strncat-sse2-unaligned \ - strchr-sse2-no-bsf memcmp-ssse3 strstr-sse2-unaligned - -ifeq (yes,$(config-cflags-sse4)) -sysdep_routines += strcspn-c strpbrk-c strspn-c varshift + strchr-sse2-no-bsf memcmp-ssse3 strstr-sse2-unaligned \ + strcspn-c strpbrk-c strspn-c varshift memset-avx2 \ + memset-avx512-no-vzeroupper CFLAGS-varshift.c += -msse4 CFLAGS-strcspn-c.c += -msse4 CFLAGS-strpbrk-c.c += -msse4 CFLAGS-strspn-c.c += -msse4 endif -ifeq (yes,$(config-cflags-avx2)) -sysdep_routines += memset-avx2 -endif -endif - ifeq ($(subdir),wcsmbs) sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c wcscpy-ssse3 wcscpy-c endif diff --git a/sysdeps/x86_64/multiarch/Versions b/sysdeps/x86_64/multiarch/Versions deleted file mode 100644 index 59b185ac8d..0000000000 --- a/sysdeps/x86_64/multiarch/Versions +++ /dev/null @@ -1,5 +0,0 @@ -libc { - GLIBC_PRIVATE { - __get_cpu_features; - } -} diff --git a/sysdeps/x86_64/multiarch/cacheinfo.c b/sysdeps/x86_64/multiarch/cacheinfo.c deleted file mode 100644 index f87b8dce6b..0000000000 --- a/sysdeps/x86_64/multiarch/cacheinfo.c +++ /dev/null @@ -1,2 +0,0 @@ -#define DISABLE_PREFERRED_MEMORY_INSTRUCTION -#include "../cacheinfo.c" diff --git a/sysdeps/x86_64/multiarch/ifunc-defines.sym b/sysdeps/x86_64/multiarch/ifunc-defines.sym index a410d8808f..3df946f343 100644 --- a/sysdeps/x86_64/multiarch/ifunc-defines.sym +++ b/sysdeps/x86_64/multiarch/ifunc-defines.sym @@ -4,7 +4,6 @@ -- CPU_FEATURES_SIZE sizeof (struct cpu_features) -KIND_OFFSET offsetof (struct cpu_features, kind) CPUID_OFFSET offsetof (struct cpu_features, cpuid) CPUID_SIZE sizeof (struct cpuid_registers) CPUID_EAX_OFFSET offsetof (struct cpuid_registers, eax) diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index b64e4f1532..188b6d36c6 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -1,5 +1,5 @@ /* Enumerate available IFUNC implementations of a function. x86-64 version. - Copyright (C) 2012-2015 Free Software Foundation, Inc. + Copyright (C) 2012-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -20,10 +20,11 @@ #include <string.h> #include <wchar.h> #include <ifunc-impl-list.h> +#include <sysdep.h> #include "init-arch.h" /* Maximum number of IFUNC implementations. */ -#define MAX_IFUNC 4 +#define MAX_IFUNC 5 /* Fill ARRAY of MAX elements with IFUNC implementations for function NAME supported on target machine and return the number of valid @@ -39,48 +40,77 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/memcmp.S. */ IFUNC_IMPL (i, name, memcmp, - IFUNC_IMPL_ADD (array, i, memcmp, HAS_SSE4_1, + IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSE4_1), __memcmp_sse4_1) - IFUNC_IMPL_ADD (array, i, memcmp, HAS_SSSE3, __memcmp_ssse3) + IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSSE3), + __memcmp_ssse3) IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_sse2)) - /* Support sysdeps/x86_64/multiarch/memmove_chk.S. */ + /* Support sysdeps/x86_64/multiarch/memmove_chk.c. */ IFUNC_IMPL (i, name, __memmove_chk, - IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_AVX, +#ifdef HAVE_AVX512_ASM_SUPPORT + IFUNC_IMPL_ADD (array, i, __memmove_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memmove_chk_avx512_no_vzeroupper) +#endif + IFUNC_IMPL_ADD (array, i, __memmove_chk, + HAS_ARCH_FEATURE (AVX_Usable), __memmove_chk_avx_unaligned) - IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3, + IFUNC_IMPL_ADD (array, i, __memmove_chk, + HAS_CPU_FEATURE (SSSE3), __memmove_chk_ssse3_back) - IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3, + IFUNC_IMPL_ADD (array, i, __memmove_chk, + HAS_CPU_FEATURE (SSSE3), __memmove_chk_ssse3) IFUNC_IMPL_ADD (array, i, __memmove_chk, 1, __memmove_chk_sse2)) /* Support sysdeps/x86_64/multiarch/memmove.S. */ IFUNC_IMPL (i, name, memmove, - IFUNC_IMPL_ADD (array, i, memmove, HAS_AVX, + IFUNC_IMPL_ADD (array, i, memmove, + HAS_ARCH_FEATURE (AVX_Usable), __memmove_avx_unaligned) - IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3, +#ifdef HAVE_AVX512_ASM_SUPPORT + IFUNC_IMPL_ADD (array, i, memmove, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memmove_avx512_no_vzeroupper) +#endif + IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3), __memmove_ssse3_back) - IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3, + IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3), __memmove_ssse3) IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_sse2)) -#ifdef HAVE_AVX2_SUPPORT /* Support sysdeps/x86_64/multiarch/memset_chk.S. */ IFUNC_IMPL (i, name, __memset_chk, - IFUNC_IMPL_ADD (array, i, __memset_chk, 1, __memset_chk_sse2) - IFUNC_IMPL_ADD (array, i, __memset_chk, HAS_AVX2, - __memset_chk_avx2)) + IFUNC_IMPL_ADD (array, i, __memset_chk, 1, + __memset_chk_sse2) + IFUNC_IMPL_ADD (array, i, __memset_chk, + HAS_ARCH_FEATURE (AVX2_Usable), + __memset_chk_avx2) +#ifdef HAVE_AVX512_ASM_SUPPORT + IFUNC_IMPL_ADD (array, i, __memset_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memset_chk_avx512_no_vzeroupper) +#endif + ) /* Support sysdeps/x86_64/multiarch/memset.S. */ IFUNC_IMPL (i, name, memset, IFUNC_IMPL_ADD (array, i, memset, 1, __memset_sse2) - IFUNC_IMPL_ADD (array, i, memset, HAS_AVX2, __memset_avx2)) + IFUNC_IMPL_ADD (array, i, memset, + HAS_ARCH_FEATURE (AVX2_Usable), + __memset_avx2) +#ifdef HAVE_AVX512_ASM_SUPPORT + IFUNC_IMPL_ADD (array, i, memset, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memset_avx512_no_vzeroupper) #endif + ) /* Support sysdeps/x86_64/multiarch/stpncpy.S. */ IFUNC_IMPL (i, name, stpncpy, - IFUNC_IMPL_ADD (array, i, stpncpy, HAS_SSSE3, + IFUNC_IMPL_ADD (array, i, stpncpy, HAS_CPU_FEATURE (SSSE3), __stpncpy_ssse3) IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2_unaligned) @@ -88,38 +118,42 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/stpcpy.S. */ IFUNC_IMPL (i, name, stpcpy, - IFUNC_IMPL_ADD (array, i, stpcpy, HAS_SSSE3, __stpcpy_ssse3) + IFUNC_IMPL_ADD (array, i, stpcpy, HAS_CPU_FEATURE (SSSE3), + __stpcpy_ssse3) IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_sse2_unaligned) IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_sse2)) /* Support sysdeps/x86_64/multiarch/strcasecmp_l.S. */ IFUNC_IMPL (i, name, strcasecmp, - IFUNC_IMPL_ADD (array, i, strcasecmp, HAS_AVX, + IFUNC_IMPL_ADD (array, i, strcasecmp, + HAS_ARCH_FEATURE (AVX_Usable), __strcasecmp_avx) - IFUNC_IMPL_ADD (array, i, strcasecmp, HAS_SSE4_2, + IFUNC_IMPL_ADD (array, i, strcasecmp, + HAS_CPU_FEATURE (SSE4_2), __strcasecmp_sse42) - IFUNC_IMPL_ADD (array, i, strcasecmp, HAS_SSSE3, + IFUNC_IMPL_ADD (array, i, strcasecmp, + HAS_CPU_FEATURE (SSSE3), __strcasecmp_ssse3) IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_sse2)) /* Support sysdeps/x86_64/multiarch/strcasecmp_l.S. */ IFUNC_IMPL (i, name, strcasecmp_l, - IFUNC_IMPL_ADD (array, i, strcasecmp_l, HAS_AVX, + IFUNC_IMPL_ADD (array, i, strcasecmp_l, + HAS_ARCH_FEATURE (AVX_Usable), __strcasecmp_l_avx) - IFUNC_IMPL_ADD (array, i, strcasecmp_l, HAS_SSE4_2, + IFUNC_IMPL_ADD (array, i, strcasecmp_l, + HAS_CPU_FEATURE (SSE4_2), __strcasecmp_l_sse42) - IFUNC_IMPL_ADD (array, i, strcasecmp_l, HAS_SSSE3, + IFUNC_IMPL_ADD (array, i, strcasecmp_l, + HAS_CPU_FEATURE (SSSE3), __strcasecmp_l_ssse3) IFUNC_IMPL_ADD (array, i, strcasecmp_l, 1, __strcasecmp_l_sse2)) - /* Support sysdeps/x86_64/multiarch/strcasestr.c. */ - IFUNC_IMPL (i, name, strcasestr, - IFUNC_IMPL_ADD (array, i, strcasestr, 1, __strcasestr_sse2)) - /* Support sysdeps/x86_64/multiarch/strcat.S. */ IFUNC_IMPL (i, name, strcat, - IFUNC_IMPL_ADD (array, i, strcat, HAS_SSSE3, __strcat_ssse3) + IFUNC_IMPL_ADD (array, i, strcat, HAS_CPU_FEATURE (SSSE3), + __strcat_ssse3) IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_sse2_unaligned) IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_sse2)) @@ -130,48 +164,57 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/strcmp.S. */ IFUNC_IMPL (i, name, strcmp, - IFUNC_IMPL_ADD (array, i, strcmp, HAS_SSE4_2, __strcmp_sse42) - IFUNC_IMPL_ADD (array, i, strcmp, HAS_SSSE3, __strcmp_ssse3) + IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSE4_2), + __strcmp_sse42) + IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSSE3), + __strcmp_ssse3) IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_sse2_unaligned) IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_sse2)) /* Support sysdeps/x86_64/multiarch/strcpy.S. */ IFUNC_IMPL (i, name, strcpy, - IFUNC_IMPL_ADD (array, i, strcpy, HAS_SSSE3, __strcpy_ssse3) + IFUNC_IMPL_ADD (array, i, strcpy, HAS_CPU_FEATURE (SSSE3), + __strcpy_ssse3) IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_sse2_unaligned) IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_sse2)) /* Support sysdeps/x86_64/multiarch/strcspn.S. */ IFUNC_IMPL (i, name, strcspn, - IFUNC_IMPL_ADD (array, i, strcspn, HAS_SSE4_2, + IFUNC_IMPL_ADD (array, i, strcspn, HAS_CPU_FEATURE (SSE4_2), __strcspn_sse42) IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2)) /* Support sysdeps/x86_64/multiarch/strncase_l.S. */ IFUNC_IMPL (i, name, strncasecmp, - IFUNC_IMPL_ADD (array, i, strncasecmp, HAS_AVX, + IFUNC_IMPL_ADD (array, i, strncasecmp, + HAS_ARCH_FEATURE (AVX_Usable), __strncasecmp_avx) - IFUNC_IMPL_ADD (array, i, strncasecmp, HAS_SSE4_2, + IFUNC_IMPL_ADD (array, i, strncasecmp, + HAS_CPU_FEATURE (SSE4_2), __strncasecmp_sse42) - IFUNC_IMPL_ADD (array, i, strncasecmp, HAS_SSSE3, + IFUNC_IMPL_ADD (array, i, strncasecmp, + HAS_CPU_FEATURE (SSSE3), __strncasecmp_ssse3) IFUNC_IMPL_ADD (array, i, strncasecmp, 1, __strncasecmp_sse2)) /* Support sysdeps/x86_64/multiarch/strncase_l.S. */ IFUNC_IMPL (i, name, strncasecmp_l, - IFUNC_IMPL_ADD (array, i, strncasecmp_l, HAS_AVX, + IFUNC_IMPL_ADD (array, i, strncasecmp_l, + HAS_ARCH_FEATURE (AVX_Usable), __strncasecmp_l_avx) - IFUNC_IMPL_ADD (array, i, strncasecmp_l, HAS_SSE4_2, + IFUNC_IMPL_ADD (array, i, strncasecmp_l, + HAS_CPU_FEATURE (SSE4_2), __strncasecmp_l_sse42) - IFUNC_IMPL_ADD (array, i, strncasecmp_l, HAS_SSSE3, + IFUNC_IMPL_ADD (array, i, strncasecmp_l, + HAS_CPU_FEATURE (SSSE3), __strncasecmp_l_ssse3) IFUNC_IMPL_ADD (array, i, strncasecmp_l, 1, __strncasecmp_l_sse2)) /* Support sysdeps/x86_64/multiarch/strncat.S. */ IFUNC_IMPL (i, name, strncat, - IFUNC_IMPL_ADD (array, i, strncat, HAS_SSSE3, + IFUNC_IMPL_ADD (array, i, strncat, HAS_CPU_FEATURE (SSSE3), __strncat_ssse3) IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_sse2_unaligned) @@ -179,7 +222,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/strncpy.S. */ IFUNC_IMPL (i, name, strncpy, - IFUNC_IMPL_ADD (array, i, strncpy, HAS_SSSE3, + IFUNC_IMPL_ADD (array, i, strncpy, HAS_CPU_FEATURE (SSSE3), __strncpy_ssse3) IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2_unaligned) @@ -187,14 +230,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/strpbrk.S. */ IFUNC_IMPL (i, name, strpbrk, - IFUNC_IMPL_ADD (array, i, strpbrk, HAS_SSE4_2, + IFUNC_IMPL_ADD (array, i, strpbrk, HAS_CPU_FEATURE (SSE4_2), __strpbrk_sse42) IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2)) /* Support sysdeps/x86_64/multiarch/strspn.S. */ IFUNC_IMPL (i, name, strspn, - IFUNC_IMPL_ADD (array, i, strspn, HAS_SSE4_2, __strspn_sse42) + IFUNC_IMPL_ADD (array, i, strspn, HAS_CPU_FEATURE (SSE4_2), + __strspn_sse42) IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2)) /* Support sysdeps/x86_64/multiarch/strstr.c. */ @@ -204,65 +248,95 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/wcscpy.S. */ IFUNC_IMPL (i, name, wcscpy, - IFUNC_IMPL_ADD (array, i, wcscpy, HAS_SSSE3, __wcscpy_ssse3) + IFUNC_IMPL_ADD (array, i, wcscpy, HAS_CPU_FEATURE (SSSE3), + __wcscpy_ssse3) IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_sse2)) /* Support sysdeps/x86_64/multiarch/wmemcmp.S. */ IFUNC_IMPL (i, name, wmemcmp, - IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_SSE4_1, + IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSE4_1), __wmemcmp_sse4_1) - IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_SSSE3, + IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSSE3), __wmemcmp_ssse3) IFUNC_IMPL_ADD (array, i, wmemcmp, 1, __wmemcmp_sse2)) #ifdef SHARED /* Support sysdeps/x86_64/multiarch/memcpy_chk.S. */ IFUNC_IMPL (i, name, __memcpy_chk, - IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_AVX, +#ifdef HAVE_AVX512_ASM_SUPPORT + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memcpy_chk_avx512_no_vzeroupper) +#endif + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + HAS_ARCH_FEATURE (AVX_Usable), __memcpy_chk_avx_unaligned) - IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3, + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + HAS_CPU_FEATURE (SSSE3), __memcpy_chk_ssse3_back) - IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3, + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + HAS_CPU_FEATURE (SSSE3), __memcpy_chk_ssse3) IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1, __memcpy_chk_sse2)) /* Support sysdeps/x86_64/multiarch/memcpy.S. */ IFUNC_IMPL (i, name, memcpy, - IFUNC_IMPL_ADD (array, i, memcpy, HAS_AVX, + IFUNC_IMPL_ADD (array, i, memcpy, + HAS_ARCH_FEATURE (AVX_Usable), __memcpy_avx_unaligned) - IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3, + IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3), __memcpy_ssse3_back) - IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3, __memcpy_ssse3) + IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3), + __memcpy_ssse3) +#ifdef HAVE_AVX512_ASM_SUPPORT + IFUNC_IMPL_ADD (array, i, memcpy, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memcpy_avx512_no_vzeroupper) +#endif IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned) IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2)) /* Support sysdeps/x86_64/multiarch/mempcpy_chk.S. */ IFUNC_IMPL (i, name, __mempcpy_chk, - IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_AVX, +#ifdef HAVE_AVX512_ASM_SUPPORT + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), + __mempcpy_chk_avx512_no_vzeroupper) +#endif + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + HAS_ARCH_FEATURE (AVX_Usable), __mempcpy_chk_avx_unaligned) - IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3, + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + HAS_CPU_FEATURE (SSSE3), __mempcpy_chk_ssse3_back) - IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3, + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + HAS_CPU_FEATURE (SSSE3), __mempcpy_chk_ssse3) IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1, __mempcpy_chk_sse2)) /* Support sysdeps/x86_64/multiarch/mempcpy.S. */ IFUNC_IMPL (i, name, mempcpy, - IFUNC_IMPL_ADD (array, i, mempcpy, HAS_AVX, +#ifdef HAVE_AVX512_ASM_SUPPORT + IFUNC_IMPL_ADD (array, i, mempcpy, + HAS_ARCH_FEATURE (AVX512F_Usable), + __mempcpy_avx512_no_vzeroupper) +#endif + IFUNC_IMPL_ADD (array, i, mempcpy, + HAS_ARCH_FEATURE (AVX_Usable), __mempcpy_avx_unaligned) - IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3, + IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3), __mempcpy_ssse3_back) - IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3, + IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3), __mempcpy_ssse3) IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_sse2)) /* Support sysdeps/x86_64/multiarch/strncmp.S. */ IFUNC_IMPL (i, name, strncmp, - IFUNC_IMPL_ADD (array, i, strncmp, HAS_SSE4_2, + IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSE4_2), __strncmp_sse42) - IFUNC_IMPL_ADD (array, i, strncmp, HAS_SSSE3, + IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSSE3), __strncmp_ssse3) IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_sse2)) #endif diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c deleted file mode 100644 index aaad5fa841..0000000000 --- a/sysdeps/x86_64/multiarch/init-arch.c +++ /dev/null @@ -1,223 +0,0 @@ -/* Initialize CPU feature data. - This file is part of the GNU C Library. - Copyright (C) 2008-2015 Free Software Foundation, Inc. - Contributed by Ulrich Drepper <drepper@redhat.com>. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <atomic.h> -#include <cpuid.h> -#include "init-arch.h" - - -struct cpu_features __cpu_features attribute_hidden; - - -static void -get_common_indeces (unsigned int *family, unsigned int *model) -{ - __cpuid (1, __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax, - __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx, - __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx, - __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx); - - unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax; - *family = (eax >> 8) & 0x0f; - *model = (eax >> 4) & 0x0f; -} - - -void -__init_cpu_features (void) -{ - unsigned int ebx; - unsigned int ecx; - unsigned int edx; - unsigned int family = 0; - unsigned int model = 0; - enum cpu_features_kind kind; - - __cpuid (0, __cpu_features.max_cpuid, ebx, ecx, edx); - - /* This spells out "GenuineIntel". */ - if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) - { - kind = arch_kind_intel; - - get_common_indeces (&family, &model); - - unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax; - unsigned int extended_family = (eax >> 20) & 0xff; - unsigned int extended_model = (eax >> 12) & 0xf0; - if (family == 0x0f) - { - family += extended_family; - model += extended_model; - } - else if (family == 0x06) - { - ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx; - model += extended_model; - switch (model) - { - case 0x1c: - case 0x26: - /* BSF is slow on Atom. */ - __cpu_features.feature[index_Slow_BSF] |= bit_Slow_BSF; - break; - - case 0x37: - case 0x4a: - case 0x4d: - case 0x5a: - case 0x5d: - /* Unaligned load versions are faster than SSSE3 - on Silvermont. */ -#if index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop -# error index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop -#endif -#if index_Fast_Unaligned_Load != index_Slow_SSE4_2 -# error index_Fast_Unaligned_Load != index_Slow_SSE4_2 -#endif - __cpu_features.feature[index_Fast_Unaligned_Load] - |= (bit_Fast_Unaligned_Load - | bit_Prefer_PMINUB_for_stringop - | bit_Slow_SSE4_2); - break; - - default: - /* Unknown family 0x06 processors. Assuming this is one - of Core i3/i5/i7 processors if AVX is available. */ - if ((ecx & bit_AVX) == 0) - break; - - case 0x1a: - case 0x1e: - case 0x1f: - case 0x25: - case 0x2c: - case 0x2e: - case 0x2f: - /* Rep string instructions, copy backward, unaligned loads - and pminub are fast on Intel Core i3, i5 and i7. */ -#if index_Fast_Rep_String != index_Fast_Copy_Backward -# error index_Fast_Rep_String != index_Fast_Copy_Backward -#endif -#if index_Fast_Rep_String != index_Fast_Unaligned_Load -# error index_Fast_Rep_String != index_Fast_Unaligned_Load -#endif -#if index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop -# error index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop -#endif - __cpu_features.feature[index_Fast_Rep_String] - |= (bit_Fast_Rep_String - | bit_Fast_Copy_Backward - | bit_Fast_Unaligned_Load - | bit_Prefer_PMINUB_for_stringop); - break; - } - } - } - /* This spells out "AuthenticAMD". */ - else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) - { - kind = arch_kind_amd; - - get_common_indeces (&family, &model); - - ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx; - - unsigned int eax; - __cpuid (0x80000000, eax, ebx, ecx, edx); - if (eax >= 0x80000001) - __cpuid (0x80000001, - __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].eax, - __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ebx, - __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx, - __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].edx); - } - else - kind = arch_kind_other; - - if (__cpu_features.max_cpuid >= 7) - __cpuid_count (7, 0, - __cpu_features.cpuid[COMMON_CPUID_INDEX_7].eax, - __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ebx, - __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ecx, - __cpu_features.cpuid[COMMON_CPUID_INDEX_7].edx); - - /* Can we call xgetbv? */ - if (CPUID_OSXSAVE) - { - unsigned int xcrlow; - unsigned int xcrhigh; - asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); - /* Is YMM and XMM state usable? */ - if ((xcrlow & (bit_YMM_state | bit_XMM_state)) == - (bit_YMM_state | bit_XMM_state)) - { - /* Determine if AVX is usable. */ - if (CPUID_AVX) - __cpu_features.feature[index_AVX_Usable] |= bit_AVX_Usable; -#if index_AVX2_Usable != index_AVX_Fast_Unaligned_Load -# error index_AVX2_Usable != index_AVX_Fast_Unaligned_Load -#endif - /* Determine if AVX2 is usable. Unaligned load with 256-bit - AVX registers are faster on processors with AVX2. */ - if (CPUID_AVX2) - __cpu_features.feature[index_AVX2_Usable] - |= bit_AVX2_Usable | bit_AVX_Fast_Unaligned_Load; - /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and - ZMM16-ZMM31 state are enabled. */ - if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state - | bit_ZMM16_31_state)) == - (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state)) - { - /* Determine if AVX512F is usable. */ - if (CPUID_AVX512F) - { - __cpu_features.feature[index_AVX512F_Usable] - |= bit_AVX512F_Usable; - /* Determine if AVX512DQ is usable. */ - if (CPUID_AVX512DQ) - __cpu_features.feature[index_AVX512DQ_Usable] - |= bit_AVX512DQ_Usable; - } - } - /* Determine if FMA is usable. */ - if (CPUID_FMA) - __cpu_features.feature[index_FMA_Usable] |= bit_FMA_Usable; - /* Determine if FMA4 is usable. */ - if (CPUID_FMA4) - __cpu_features.feature[index_FMA4_Usable] |= bit_FMA4_Usable; - } - } - - __cpu_features.family = family; - __cpu_features.model = model; - atomic_write_barrier (); - __cpu_features.kind = kind; -} - -#undef __get_cpu_features - -const struct cpu_features * -__get_cpu_features (void) -{ - if (__cpu_features.kind == arch_kind_unknown) - __init_cpu_features (); - - return &__cpu_features; -} diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h deleted file mode 100644 index cfc6e7049e..0000000000 --- a/sysdeps/x86_64/multiarch/init-arch.h +++ /dev/null @@ -1,206 +0,0 @@ -/* This file is part of the GNU C Library. - Copyright (C) 2008-2015 Free Software Foundation, Inc. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#define bit_Fast_Rep_String (1 << 0) -#define bit_Fast_Copy_Backward (1 << 1) -#define bit_Slow_BSF (1 << 2) -#define bit_Fast_Unaligned_Load (1 << 4) -#define bit_Prefer_PMINUB_for_stringop (1 << 5) -#define bit_AVX_Usable (1 << 6) -#define bit_FMA_Usable (1 << 7) -#define bit_FMA4_Usable (1 << 8) -#define bit_Slow_SSE4_2 (1 << 9) -#define bit_AVX2_Usable (1 << 10) -#define bit_AVX_Fast_Unaligned_Load (1 << 11) -#define bit_AVX512F_Usable (1 << 12) -#define bit_AVX512DQ_Usable (1 << 13) - -/* CPUID Feature flags. */ - -/* COMMON_CPUID_INDEX_1. */ -#define bit_SSE2 (1 << 26) -#define bit_SSSE3 (1 << 9) -#define bit_SSE4_1 (1 << 19) -#define bit_SSE4_2 (1 << 20) -#define bit_OSXSAVE (1 << 27) -#define bit_AVX (1 << 28) -#define bit_POPCOUNT (1 << 23) -#define bit_FMA (1 << 12) -#define bit_FMA4 (1 << 16) - -/* COMMON_CPUID_INDEX_7. */ -#define bit_RTM (1 << 11) -#define bit_AVX2 (1 << 5) -#define bit_AVX512F (1 << 16) -#define bit_AVX512DQ (1 << 17) - -/* XCR0 Feature flags. */ -#define bit_XMM_state (1 << 1) -#define bit_YMM_state (2 << 1) -#define bit_Opmask_state (1 << 5) -#define bit_ZMM0_15_state (1 << 6) -#define bit_ZMM16_31_state (1 << 7) - -/* The integer bit array index for the first set of internal feature bits. */ -# define FEATURE_INDEX_1 0 - -/* The current maximum size of the feature integer bit array. */ -# define FEATURE_INDEX_MAX 1 - -#ifdef __ASSEMBLER__ - -# include <ifunc-defines.h> - -# define index_SSE2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET -# define index_SSSE3 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET -# define index_SSE4_1 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET -# define index_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET -# define index_AVX COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET -# define index_AVX2 COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET - -# define index_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE -# define index_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE -# define index_Slow_BSF FEATURE_INDEX_1*FEATURE_SIZE -# define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE -# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE -# define index_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE -# define index_FMA_Usable FEATURE_INDEX_1*FEATURE_SIZE -# define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE -# define index_Slow_SSE4_2 FEATURE_INDEX_1*FEATURE_SIZE -# define index_AVX2_Usable FEATURE_INDEX_1*FEATURE_SIZE -# define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE -# define index_AVX512F_Usable FEATURE_INDEX_1*FEATURE_SIZE -# define index_AVX512DQ_Usable FEATURE_INDEX_1*FEATURE_SIZE - -#else /* __ASSEMBLER__ */ - -# include <sys/param.h> - -enum - { - COMMON_CPUID_INDEX_1 = 0, - COMMON_CPUID_INDEX_7, - COMMON_CPUID_INDEX_80000001, /* for AMD */ - /* Keep the following line at the end. */ - COMMON_CPUID_INDEX_MAX - }; - -extern struct cpu_features -{ - enum cpu_features_kind - { - arch_kind_unknown = 0, - arch_kind_intel, - arch_kind_amd, - arch_kind_other - } kind; - int max_cpuid; - struct cpuid_registers - { - unsigned int eax; - unsigned int ebx; - unsigned int ecx; - unsigned int edx; - } cpuid[COMMON_CPUID_INDEX_MAX]; - unsigned int family; - unsigned int model; - unsigned int feature[FEATURE_INDEX_MAX]; -} __cpu_features attribute_hidden; - - -extern void __init_cpu_features (void) attribute_hidden; -# define INIT_ARCH() \ - do \ - if (__cpu_features.kind == arch_kind_unknown) \ - __init_cpu_features (); \ - while (0) - -/* Used from outside libc.so to get access to the CPU features structure. */ -extern const struct cpu_features *__get_cpu_features (void) - __attribute__ ((const)); - -# if IS_IN (libc) -# define __get_cpu_features() (&__cpu_features) -# endif - -# define HAS_CPU_FEATURE(idx, reg, bit) \ - ((__get_cpu_features ()->cpuid[idx].reg & (bit)) != 0) - -/* Following are the feature tests used throughout libc. */ - -/* CPUID_* evaluates to true if the feature flag is enabled. - We always use &__cpu_features because the HAS_CPUID_* macros - are called only within __init_cpu_features, where we can't - call __get_cpu_features without infinite recursion. */ -# define HAS_CPUID_FLAG(idx, reg, bit) \ - (((&__cpu_features)->cpuid[idx].reg & (bit)) != 0) - -# define CPUID_OSXSAVE \ - HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_OSXSAVE) -# define CPUID_AVX \ - HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_AVX) -# define CPUID_FMA \ - HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_FMA) -# define CPUID_FMA4 \ - HAS_CPUID_FLAG (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4) -# define CPUID_RTM \ - HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_RTM) -# define CPUID_AVX2 \ - HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX2) -# define CPUID_AVX512F \ - HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX512F) -# define CPUID_AVX512DQ \ - HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX512DQ) - -/* HAS_* evaluates to true if we may use the feature at runtime. */ -# define HAS_SSE2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, bit_SSE2) -# define HAS_POPCOUNT HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_POPCOUNT) -# define HAS_SSSE3 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSSE3) -# define HAS_SSE4_1 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1) -# define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2) -# define HAS_RTM HAS_CPU_FEATURE (COMMON_CPUID_INDEX_7, ebx, bit_RTM) - -# define index_Fast_Rep_String FEATURE_INDEX_1 -# define index_Fast_Copy_Backward FEATURE_INDEX_1 -# define index_Slow_BSF FEATURE_INDEX_1 -# define index_Fast_Unaligned_Load FEATURE_INDEX_1 -# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1 -# define index_AVX_Usable FEATURE_INDEX_1 -# define index_FMA_Usable FEATURE_INDEX_1 -# define index_FMA4_Usable FEATURE_INDEX_1 -# define index_Slow_SSE4_2 FEATURE_INDEX_1 -# define index_AVX2_Usable FEATURE_INDEX_1 -# define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1 -# define index_AVX512F_Usable FEATURE_INDEX_1 -# define index_AVX512DQ_Usable FEATURE_INDEX_1 - -# define HAS_ARCH_FEATURE(name) \ - ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0) - -# define HAS_FAST_REP_STRING HAS_ARCH_FEATURE (Fast_Rep_String) -# define HAS_FAST_COPY_BACKWARD HAS_ARCH_FEATURE (Fast_Copy_Backward) -# define HAS_SLOW_BSF HAS_ARCH_FEATURE (Slow_BSF) -# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load) -# define HAS_AVX HAS_ARCH_FEATURE (AVX_Usable) -# define HAS_AVX2 HAS_ARCH_FEATURE (AVX2_Usable) -# define HAS_AVX512F HAS_ARCH_FEATURE (AVX512F_Usable) -# define HAS_AVX512DQ HAS_ARCH_FEATURE (AVX512DQ_Usable) -# define HAS_FMA HAS_ARCH_FEATURE (FMA_Usable) -# define HAS_FMA4 HAS_ARCH_FEATURE (FMA4_Usable) -# define HAS_AVX_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) - -#endif /* __ASSEMBLER__ */ diff --git a/sysdeps/x86_64/multiarch/memcmp-sse4.S b/sysdeps/x86_64/multiarch/memcmp-sse4.S index 533fece51a..786f87282c 100644 --- a/sysdeps/x86_64/multiarch/memcmp-sse4.S +++ b/sysdeps/x86_64/multiarch/memcmp-sse4.S @@ -1,5 +1,5 @@ /* memcmp with SSE4.1, wmemcmp with SSE4.1 - Copyright (C) 2010-2015 Free Software Foundation, Inc. + Copyright (C) 2010-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. diff --git a/sysdeps/x86_64/multiarch/memcmp-ssse3.S b/sysdeps/x86_64/multiarch/memcmp-ssse3.S index 948148b1cd..a22f399e02 100644 --- a/sysdeps/x86_64/multiarch/memcmp-ssse3.S +++ b/sysdeps/x86_64/multiarch/memcmp-ssse3.S @@ -1,5 +1,5 @@ /* memcmp with SSSE3, wmemcmp with SSSE3 - Copyright (C) 2011-2015 Free Software Foundation, Inc. + Copyright (C) 2011-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. diff --git a/sysdeps/x86_64/multiarch/memcmp.S b/sysdeps/x86_64/multiarch/memcmp.S index f8b46363d0..b5a1cc202e 100644 --- a/sysdeps/x86_64/multiarch/memcmp.S +++ b/sysdeps/x86_64/multiarch/memcmp.S @@ -1,6 +1,6 @@ /* Multiple versions of memcmp All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2015 Free Software Foundation, Inc. + Copyright (C) 2010-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. @@ -26,16 +26,13 @@ .text ENTRY(memcmp) .type memcmp, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features - -1: testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + HAS_CPU_FEATURE (SSSE3) jnz 2f leaq __memcmp_sse2(%rip), %rax ret -2: testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip) +2: HAS_CPU_FEATURE (SSE4_1) jz 3f leaq __memcmp_sse4_1(%rip), %rax ret diff --git a/sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S b/sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S index 9f033f5456..74fed186e9 100644 --- a/sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S +++ b/sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S @@ -1,5 +1,5 @@ /* memcpy with AVX - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S new file mode 100644 index 0000000000..1bb12e81b0 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S @@ -0,0 +1,408 @@ +/* memcpy optimized with AVX512 for KNL hardware. + Copyright (C) 2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#if defined HAVE_AVX512_ASM_SUPPORT && IS_IN (libc) \ + && (defined SHARED \ + || defined USE_AS_MEMMOVE \ + || !defined USE_MULTIARCH) + +#include "asm-syntax.h" +#ifndef MEMCPY +# define MEMCPY __memcpy_avx512_no_vzeroupper +# define MEMCPY_CHK __memcpy_chk_avx512_no_vzeroupper +#endif + + .section .text,"ax",@progbits +#if !defined USE_AS_BCOPY +ENTRY (MEMCPY_CHK) + cmpq %rdx, %rcx + jb HIDDEN_JUMPTARGET (__chk_fail) +END (MEMCPY_CHK) +#endif + +ENTRY (MEMCPY) + mov %rdi, %rax +#ifdef USE_AS_MEMPCPY + add %rdx, %rax +#endif + lea (%rsi, %rdx), %rcx + lea (%rdi, %rdx), %r9 + cmp $512, %rdx + ja L(512bytesormore) + +L(check): + cmp $16, %rdx + jbe L(less_16bytes) + cmp $256, %rdx + jb L(less_256bytes) + vmovups (%rsi), %zmm0 + vmovups 0x40(%rsi), %zmm1 + vmovups 0x80(%rsi), %zmm2 + vmovups 0xC0(%rsi), %zmm3 + vmovups -0x100(%rcx), %zmm4 + vmovups -0xC0(%rcx), %zmm5 + vmovups -0x80(%rcx), %zmm6 + vmovups -0x40(%rcx), %zmm7 + vmovups %zmm0, (%rdi) + vmovups %zmm1, 0x40(%rdi) + vmovups %zmm2, 0x80(%rdi) + vmovups %zmm3, 0xC0(%rdi) + vmovups %zmm4, -0x100(%r9) + vmovups %zmm5, -0xC0(%r9) + vmovups %zmm6, -0x80(%r9) + vmovups %zmm7, -0x40(%r9) + ret + +L(less_256bytes): + cmp $128, %dl + jb L(less_128bytes) + vmovups (%rsi), %zmm0 + vmovups 0x40(%rsi), %zmm1 + vmovups -0x80(%rcx), %zmm2 + vmovups -0x40(%rcx), %zmm3 + vmovups %zmm0, (%rdi) + vmovups %zmm1, 0x40(%rdi) + vmovups %zmm2, -0x80(%r9) + vmovups %zmm3, -0x40(%r9) + ret + +L(less_128bytes): + cmp $64, %dl + jb L(less_64bytes) + vmovdqu (%rsi), %ymm0 + vmovdqu 0x20(%rsi), %ymm1 + vmovdqu -0x40(%rcx), %ymm2 + vmovdqu -0x20(%rcx), %ymm3 + vmovdqu %ymm0, (%rdi) + vmovdqu %ymm1, 0x20(%rdi) + vmovdqu %ymm2, -0x40(%r9) + vmovdqu %ymm3, -0x20(%r9) + ret + +L(less_64bytes): + cmp $32, %dl + jb L(less_32bytes) + vmovdqu (%rsi), %ymm0 + vmovdqu -0x20(%rcx), %ymm1 + vmovdqu %ymm0, (%rdi) + vmovdqu %ymm1, -0x20(%r9) + ret + +L(less_32bytes): + vmovdqu (%rsi), %xmm0 + vmovdqu -0x10(%rcx), %xmm1 + vmovdqu %xmm0, (%rdi) + vmovdqu %xmm1, -0x10(%r9) + ret + +L(less_16bytes): + cmp $8, %dl + jb L(less_8bytes) + movq (%rsi), %rsi + movq -0x8(%rcx), %rcx + movq %rsi, (%rdi) + movq %rcx, -0x8(%r9) + ret + +L(less_8bytes): + cmp $4, %dl + jb L(less_4bytes) + mov (%rsi), %esi + mov -0x4(%rcx), %ecx + mov %esi, (%rdi) + mov %ecx, -0x4(%r9) + ret + +L(less_4bytes): + cmp $2, %dl + jb L(less_2bytes) + mov (%rsi), %si + mov -0x2(%rcx), %cx + mov %si, (%rdi) + mov %cx, -0x2(%r9) + ret + +L(less_2bytes): + cmp $1, %dl + jb L(less_1bytes) + mov (%rsi), %cl + mov %cl, (%rdi) +L(less_1bytes): + ret + +L(512bytesormore): +#ifdef SHARED_CACHE_SIZE_HALF + mov $SHARED_CACHE_SIZE_HALF, %r8 +#else + mov __x86_shared_cache_size_half(%rip), %r8 +#endif + cmp %r8, %rdx + jae L(preloop_large) + cmp $1024, %rdx + ja L(1024bytesormore) + prefetcht1 (%rsi) + prefetcht1 0x40(%rsi) + prefetcht1 0x80(%rsi) + prefetcht1 0xC0(%rsi) + prefetcht1 0x100(%rsi) + prefetcht1 0x140(%rsi) + prefetcht1 0x180(%rsi) + prefetcht1 0x1C0(%rsi) + prefetcht1 -0x200(%rcx) + prefetcht1 -0x1C0(%rcx) + prefetcht1 -0x180(%rcx) + prefetcht1 -0x140(%rcx) + prefetcht1 -0x100(%rcx) + prefetcht1 -0xC0(%rcx) + prefetcht1 -0x80(%rcx) + prefetcht1 -0x40(%rcx) + vmovups (%rsi), %zmm0 + vmovups 0x40(%rsi), %zmm1 + vmovups 0x80(%rsi), %zmm2 + vmovups 0xC0(%rsi), %zmm3 + vmovups 0x100(%rsi), %zmm4 + vmovups 0x140(%rsi), %zmm5 + vmovups 0x180(%rsi), %zmm6 + vmovups 0x1C0(%rsi), %zmm7 + vmovups -0x200(%rcx), %zmm8 + vmovups -0x1C0(%rcx), %zmm9 + vmovups -0x180(%rcx), %zmm10 + vmovups -0x140(%rcx), %zmm11 + vmovups -0x100(%rcx), %zmm12 + vmovups -0xC0(%rcx), %zmm13 + vmovups -0x80(%rcx), %zmm14 + vmovups -0x40(%rcx), %zmm15 + vmovups %zmm0, (%rdi) + vmovups %zmm1, 0x40(%rdi) + vmovups %zmm2, 0x80(%rdi) + vmovups %zmm3, 0xC0(%rdi) + vmovups %zmm4, 0x100(%rdi) + vmovups %zmm5, 0x140(%rdi) + vmovups %zmm6, 0x180(%rdi) + vmovups %zmm7, 0x1C0(%rdi) + vmovups %zmm8, -0x200(%r9) + vmovups %zmm9, -0x1C0(%r9) + vmovups %zmm10, -0x180(%r9) + vmovups %zmm11, -0x140(%r9) + vmovups %zmm12, -0x100(%r9) + vmovups %zmm13, -0xC0(%r9) + vmovups %zmm14, -0x80(%r9) + vmovups %zmm15, -0x40(%r9) + ret + +L(1024bytesormore): + cmp %rsi, %rdi + ja L(1024bytesormore_bkw) + sub $512, %r9 + vmovups -0x200(%rcx), %zmm8 + vmovups -0x1C0(%rcx), %zmm9 + vmovups -0x180(%rcx), %zmm10 + vmovups -0x140(%rcx), %zmm11 + vmovups -0x100(%rcx), %zmm12 + vmovups -0xC0(%rcx), %zmm13 + vmovups -0x80(%rcx), %zmm14 + vmovups -0x40(%rcx), %zmm15 + prefetcht1 (%rsi) + prefetcht1 0x40(%rsi) + prefetcht1 0x80(%rsi) + prefetcht1 0xC0(%rsi) + prefetcht1 0x100(%rsi) + prefetcht1 0x140(%rsi) + prefetcht1 0x180(%rsi) + prefetcht1 0x1C0(%rsi) + +/* Loop with unaligned memory access. */ +L(gobble_512bytes_loop): + vmovups (%rsi), %zmm0 + vmovups 0x40(%rsi), %zmm1 + vmovups 0x80(%rsi), %zmm2 + vmovups 0xC0(%rsi), %zmm3 + vmovups 0x100(%rsi), %zmm4 + vmovups 0x140(%rsi), %zmm5 + vmovups 0x180(%rsi), %zmm6 + vmovups 0x1C0(%rsi), %zmm7 + add $512, %rsi + prefetcht1 (%rsi) + prefetcht1 0x40(%rsi) + prefetcht1 0x80(%rsi) + prefetcht1 0xC0(%rsi) + prefetcht1 0x100(%rsi) + prefetcht1 0x140(%rsi) + prefetcht1 0x180(%rsi) + prefetcht1 0x1C0(%rsi) + vmovups %zmm0, (%rdi) + vmovups %zmm1, 0x40(%rdi) + vmovups %zmm2, 0x80(%rdi) + vmovups %zmm3, 0xC0(%rdi) + vmovups %zmm4, 0x100(%rdi) + vmovups %zmm5, 0x140(%rdi) + vmovups %zmm6, 0x180(%rdi) + vmovups %zmm7, 0x1C0(%rdi) + add $512, %rdi + cmp %r9, %rdi + jb L(gobble_512bytes_loop) + vmovups %zmm8, (%r9) + vmovups %zmm9, 0x40(%r9) + vmovups %zmm10, 0x80(%r9) + vmovups %zmm11, 0xC0(%r9) + vmovups %zmm12, 0x100(%r9) + vmovups %zmm13, 0x140(%r9) + vmovups %zmm14, 0x180(%r9) + vmovups %zmm15, 0x1C0(%r9) + ret + +L(1024bytesormore_bkw): + add $512, %rdi + vmovups 0x1C0(%rsi), %zmm8 + vmovups 0x180(%rsi), %zmm9 + vmovups 0x140(%rsi), %zmm10 + vmovups 0x100(%rsi), %zmm11 + vmovups 0xC0(%rsi), %zmm12 + vmovups 0x80(%rsi), %zmm13 + vmovups 0x40(%rsi), %zmm14 + vmovups (%rsi), %zmm15 + prefetcht1 -0x40(%rcx) + prefetcht1 -0x80(%rcx) + prefetcht1 -0xC0(%rcx) + prefetcht1 -0x100(%rcx) + prefetcht1 -0x140(%rcx) + prefetcht1 -0x180(%rcx) + prefetcht1 -0x1C0(%rcx) + prefetcht1 -0x200(%rcx) + +/* Backward loop with unaligned memory access. */ +L(gobble_512bytes_loop_bkw): + vmovups -0x40(%rcx), %zmm0 + vmovups -0x80(%rcx), %zmm1 + vmovups -0xC0(%rcx), %zmm2 + vmovups -0x100(%rcx), %zmm3 + vmovups -0x140(%rcx), %zmm4 + vmovups -0x180(%rcx), %zmm5 + vmovups -0x1C0(%rcx), %zmm6 + vmovups -0x200(%rcx), %zmm7 + sub $512, %rcx + prefetcht1 -0x40(%rcx) + prefetcht1 -0x80(%rcx) + prefetcht1 -0xC0(%rcx) + prefetcht1 -0x100(%rcx) + prefetcht1 -0x140(%rcx) + prefetcht1 -0x180(%rcx) + prefetcht1 -0x1C0(%rcx) + prefetcht1 -0x200(%rcx) + vmovups %zmm0, -0x40(%r9) + vmovups %zmm1, -0x80(%r9) + vmovups %zmm2, -0xC0(%r9) + vmovups %zmm3, -0x100(%r9) + vmovups %zmm4, -0x140(%r9) + vmovups %zmm5, -0x180(%r9) + vmovups %zmm6, -0x1C0(%r9) + vmovups %zmm7, -0x200(%r9) + sub $512, %r9 + cmp %rdi, %r9 + ja L(gobble_512bytes_loop_bkw) + vmovups %zmm8, -0x40(%rdi) + vmovups %zmm9, -0x80(%rdi) + vmovups %zmm10, -0xC0(%rdi) + vmovups %zmm11, -0x100(%rdi) + vmovups %zmm12, -0x140(%rdi) + vmovups %zmm13, -0x180(%rdi) + vmovups %zmm14, -0x1C0(%rdi) + vmovups %zmm15, -0x200(%rdi) + ret + +L(preloop_large): + cmp %rsi, %rdi + ja L(preloop_large_bkw) + vmovups (%rsi), %zmm4 + vmovups 0x40(%rsi), %zmm5 + +/* Align destination for access with non-temporal stores in the loop. */ + mov %rdi, %r8 + and $-0x80, %rdi + add $0x80, %rdi + sub %rdi, %r8 + sub %r8, %rsi + add %r8, %rdx +L(gobble_256bytes_nt_loop): + prefetcht1 0x200(%rsi) + prefetcht1 0x240(%rsi) + prefetcht1 0x280(%rsi) + prefetcht1 0x2C0(%rsi) + prefetcht1 0x300(%rsi) + prefetcht1 0x340(%rsi) + prefetcht1 0x380(%rsi) + prefetcht1 0x3C0(%rsi) + vmovdqu64 (%rsi), %zmm0 + vmovdqu64 0x40(%rsi), %zmm1 + vmovdqu64 0x80(%rsi), %zmm2 + vmovdqu64 0xC0(%rsi), %zmm3 + vmovntdq %zmm0, (%rdi) + vmovntdq %zmm1, 0x40(%rdi) + vmovntdq %zmm2, 0x80(%rdi) + vmovntdq %zmm3, 0xC0(%rdi) + sub $256, %rdx + add $256, %rsi + add $256, %rdi + cmp $256, %rdx + ja L(gobble_256bytes_nt_loop) + sfence + vmovups %zmm4, (%rax) + vmovups %zmm5, 0x40(%rax) + jmp L(check) + +L(preloop_large_bkw): + vmovups -0x80(%rcx), %zmm4 + vmovups -0x40(%rcx), %zmm5 + +/* Align end of destination for access with non-temporal stores. */ + mov %r9, %r8 + and $-0x80, %r9 + sub %r9, %r8 + sub %r8, %rcx + sub %r8, %rdx + add %r9, %r8 +L(gobble_256bytes_nt_loop_bkw): + prefetcht1 -0x400(%rcx) + prefetcht1 -0x3C0(%rcx) + prefetcht1 -0x380(%rcx) + prefetcht1 -0x340(%rcx) + prefetcht1 -0x300(%rcx) + prefetcht1 -0x2C0(%rcx) + prefetcht1 -0x280(%rcx) + prefetcht1 -0x240(%rcx) + vmovdqu64 -0x100(%rcx), %zmm0 + vmovdqu64 -0xC0(%rcx), %zmm1 + vmovdqu64 -0x80(%rcx), %zmm2 + vmovdqu64 -0x40(%rcx), %zmm3 + vmovntdq %zmm0, -0x100(%r9) + vmovntdq %zmm1, -0xC0(%r9) + vmovntdq %zmm2, -0x80(%r9) + vmovntdq %zmm3, -0x40(%r9) + sub $256, %rdx + sub $256, %rcx + sub $256, %r9 + cmp $256, %rdx + ja L(gobble_256bytes_nt_loop_bkw) + sfence + vmovups %zmm4, -0x80(%r8) + vmovups %zmm5, -0x40(%r8) + jmp L(check) +END (MEMCPY) +#endif diff --git a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S index c5450af25a..c4509831fa 100644 --- a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S +++ b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S @@ -1,5 +1,5 @@ /* memcpy with unaliged loads - Copyright (C) 2013-2015 Free Software Foundation, Inc. + Copyright (C) 2013-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,6 +16,8 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ +#if IS_IN (libc) + #include <sysdep.h> #include "asm-syntax.h" @@ -169,3 +171,5 @@ L(between_5_8): movl %eax, -4(%rdi,%rdx) jmp L(return) END(__memcpy_sse2_unaligned) + +#endif diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S index 30e0d1c575..08b41e9e5a 100644 --- a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S +++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S @@ -1,5 +1,5 @@ /* memcpy with SSSE3 and REP string - Copyright (C) 2010-2015 Free Software Foundation, Inc. + Copyright (C) 2010-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3.S b/sysdeps/x86_64/multiarch/memcpy-ssse3.S index 33cc493dd4..95de9695f9 100644 --- a/sysdeps/x86_64/multiarch/memcpy-ssse3.S +++ b/sysdeps/x86_64/multiarch/memcpy-ssse3.S @@ -1,5 +1,5 @@ /* memcpy with SSSE3 - Copyright (C) 2010-2015 Free Software Foundation, Inc. + Copyright (C) 2010-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S index 4e18cd3070..64a1bcd137 100644 --- a/sysdeps/x86_64/multiarch/memcpy.S +++ b/sysdeps/x86_64/multiarch/memcpy.S @@ -1,6 +1,6 @@ /* Multiple versions of memcpy All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2015 Free Software Foundation, Inc. + Copyright (C) 2010-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. @@ -29,22 +29,28 @@ .text ENTRY(__new_memcpy) .type __new_memcpy, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features + LOAD_RTLD_GLOBAL_RO_RDX +#ifdef HAVE_AVX512_ASM_SUPPORT + HAS_ARCH_FEATURE (AVX512F_Usable) + jz 1f + HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER) + jz 1f + leaq __memcpy_avx512_no_vzeroupper(%rip), %rax + ret +#endif 1: leaq __memcpy_avx_unaligned(%rip), %rax - testl $bit_AVX_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_AVX_Fast_Unaligned_Load(%rip) - jz 1f + HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) + jz 2f ret -1: leaq __memcpy_sse2(%rip), %rax - testl $bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip) - jnz 2f +2: leaq __memcpy_sse2(%rip), %rax + HAS_ARCH_FEATURE (Slow_BSF) + jnz 3f leaq __memcpy_sse2_unaligned(%rip), %rax ret -2: testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip) - jz 3f +3: HAS_CPU_FEATURE (SSSE3) + jz 4f leaq __memcpy_ssse3(%rip), %rax -3: ret +4: ret END(__new_memcpy) # undef ENTRY diff --git a/sysdeps/x86_64/multiarch/memcpy_chk.S b/sysdeps/x86_64/multiarch/memcpy_chk.S index 1e756ea0c2..648217e971 100644 --- a/sysdeps/x86_64/multiarch/memcpy_chk.S +++ b/sysdeps/x86_64/multiarch/memcpy_chk.S @@ -1,6 +1,6 @@ /* Multiple versions of __memcpy_chk All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2015 Free Software Foundation, Inc. + Copyright (C) 2010-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. @@ -29,17 +29,23 @@ .text ENTRY(__memcpy_chk) .type __memcpy_chk, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features + LOAD_RTLD_GLOBAL_RO_RDX +#ifdef HAVE_AVX512_ASM_SUPPORT + HAS_ARCH_FEATURE (AVX512F_Usable) + jz 1f + HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER) + jz 1f + leaq __memcpy_chk_avx512_no_vzeroupper(%rip), %rax + ret +#endif 1: leaq __memcpy_chk_sse2(%rip), %rax - testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip) + HAS_CPU_FEATURE (SSSE3) jz 2f leaq __memcpy_chk_ssse3(%rip), %rax - testl $bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip) + HAS_ARCH_FEATURE (Fast_Copy_Backward) jz 2f leaq __memcpy_chk_ssse3_back(%rip), %rax - testl $bit_AVX_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_AVX_Fast_Unaligned_Load(%rip) + HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) jz 2f leaq __memcpy_chk_avx_unaligned(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/multiarch/memmove-avx-unaligned.S b/sysdeps/x86_64/multiarch/memmove-avx-unaligned.S index 01eac94889..75e35f2957 100644 --- a/sysdeps/x86_64/multiarch/memmove-avx-unaligned.S +++ b/sysdeps/x86_64/multiarch/memmove-avx-unaligned.S @@ -1,5 +1,5 @@ /* memmove with AVX - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S new file mode 100644 index 0000000000..518d1fec35 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S @@ -0,0 +1,22 @@ +/* memmove optimized with AVX512 for KNL hardware. + Copyright (C) 2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define USE_AS_MEMMOVE +#define MEMCPY __memmove_avx512_no_vzeroupper +#define MEMCPY_CHK __memmove_chk_avx512_no_vzeroupper +#include "memcpy-avx512-no-vzeroupper.S" diff --git a/sysdeps/x86_64/multiarch/memmove.c b/sysdeps/x86_64/multiarch/memmove.c index dd153a3eaa..8da5640bb0 100644 --- a/sysdeps/x86_64/multiarch/memmove.c +++ b/sysdeps/x86_64/multiarch/memmove.c @@ -1,6 +1,6 @@ /* Multiple versions of memmove. All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2015 Free Software Foundation, Inc. + Copyright (C) 2010-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -36,6 +36,9 @@ extern __typeof (__redirect_memmove) __memmove_sse2 attribute_hidden; extern __typeof (__redirect_memmove) __memmove_ssse3 attribute_hidden; extern __typeof (__redirect_memmove) __memmove_ssse3_back attribute_hidden; extern __typeof (__redirect_memmove) __memmove_avx_unaligned attribute_hidden; +# ifdef HAVE_AVX512_ASM_SUPPORT + extern __typeof (__redirect_memmove) __memmove_avx512_no_vzeroupper attribute_hidden; +# endif #endif @@ -49,12 +52,18 @@ extern __typeof (__redirect_memmove) __memmove_avx_unaligned attribute_hidden; ifunc symbol properly. */ extern __typeof (__redirect_memmove) __libc_memmove; libc_ifunc (__libc_memmove, - HAS_AVX_FAST_UNALIGNED_LOAD +#ifdef HAVE_AVX512_ASM_SUPPORT + HAS_ARCH_FEATURE (AVX512F_Usable) + && HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER) + ? __memmove_avx512_no_vzeroupper + : +#endif + (HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) ? __memmove_avx_unaligned - : (HAS_SSSE3 - ? (HAS_FAST_COPY_BACKWARD + : (HAS_CPU_FEATURE (SSSE3) + ? (HAS_ARCH_FEATURE (Fast_Copy_Backward) ? __memmove_ssse3_back : __memmove_ssse3) - : __memmove_sse2)); + : __memmove_sse2))); strong_alias (__libc_memmove, memmove) diff --git a/sysdeps/x86_64/multiarch/memmove_chk.c b/sysdeps/x86_64/multiarch/memmove_chk.c index 8b12d002dc..f64da63180 100644 --- a/sysdeps/x86_64/multiarch/memmove_chk.c +++ b/sysdeps/x86_64/multiarch/memmove_chk.c @@ -1,6 +1,6 @@ /* Multiple versions of __memmove_chk. All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2015 Free Software Foundation, Inc. + Copyright (C) 2010-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -26,12 +26,21 @@ extern __typeof (__memmove_chk) __memmove_chk_sse2 attribute_hidden; extern __typeof (__memmove_chk) __memmove_chk_ssse3 attribute_hidden; extern __typeof (__memmove_chk) __memmove_chk_ssse3_back attribute_hidden; extern __typeof (__memmove_chk) __memmove_chk_avx_unaligned attribute_hidden; +# ifdef HAVE_AVX512_ASM_SUPPORT + extern __typeof (__memmove_chk) __memmove_chk_avx512_no_vzeroupper attribute_hidden; +# endif #include "debug/memmove_chk.c" libc_ifunc (__memmove_chk, - HAS_AVX_FAST_UNALIGNED_LOAD ? __memmove_chk_avx_unaligned : - (HAS_SSSE3 - ? (HAS_FAST_COPY_BACKWARD +#ifdef HAVE_AVX512_ASM_SUPPORT + HAS_ARCH_FEATURE (AVX512F_Usable) + && HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER) + ? __memmove_chk_avx512_no_vzeroupper + : +#endif + HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) ? __memmove_chk_avx_unaligned : + (HAS_CPU_FEATURE (SSSE3) + ? (HAS_ARCH_FEATURE (Fast_Copy_Backward) ? __memmove_chk_ssse3_back : __memmove_chk_ssse3) : __memmove_chk_sse2)); diff --git a/sysdeps/x86_64/multiarch/mempcpy-avx-unaligned.S b/sysdeps/x86_64/multiarch/mempcpy-avx-unaligned.S index 128ff832fb..241378e770 100644 --- a/sysdeps/x86_64/multiarch/mempcpy-avx-unaligned.S +++ b/sysdeps/x86_64/multiarch/mempcpy-avx-unaligned.S @@ -1,5 +1,5 @@ /* mempcpy with AVX - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/multiarch/mempcpy-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/mempcpy-avx512-no-vzeroupper.S new file mode 100644 index 0000000000..fcc0945ea7 --- /dev/null +++ b/sysdeps/x86_64/multiarch/mempcpy-avx512-no-vzeroupper.S @@ -0,0 +1,22 @@ +/* mempcpy optimized with AVX512 for KNL hardware. + Copyright (C) 2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define USE_AS_MEMPCPY +#define MEMCPY __mempcpy_avx512_no_vzeroupper +#define MEMCPY_CHK __mempcpy_chk_avx512_no_vzeroupper +#include "memcpy-avx512-no-vzeroupper.S" diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S index 2eaacdf049..ed78623565 100644 --- a/sysdeps/x86_64/multiarch/mempcpy.S +++ b/sysdeps/x86_64/multiarch/mempcpy.S @@ -1,6 +1,6 @@ /* Multiple versions of mempcpy All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2015 Free Software Foundation, Inc. + Copyright (C) 2010-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. @@ -27,17 +27,23 @@ #if defined SHARED && IS_IN (libc) ENTRY(__mempcpy) .type __mempcpy, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features + LOAD_RTLD_GLOBAL_RO_RDX +#ifdef HAVE_AVX512_ASM_SUPPORT + HAS_ARCH_FEATURE (AVX512F_Usable) + jz 1f + HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER) + jz 1f + leaq __mempcpy_avx512_no_vzeroupper(%rip), %rax + ret +#endif 1: leaq __mempcpy_sse2(%rip), %rax - testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip) + HAS_CPU_FEATURE (SSSE3) jz 2f leaq __mempcpy_ssse3(%rip), %rax - testl $bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip) + HAS_ARCH_FEATURE (Fast_Copy_Backward) jz 2f leaq __mempcpy_ssse3_back(%rip), %rax - testl $bit_AVX_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_AVX_Fast_Unaligned_Load(%rip) + HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) jz 2f leaq __mempcpy_avx_unaligned(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk.S b/sysdeps/x86_64/multiarch/mempcpy_chk.S index 17b84701b0..6e8a89d38c 100644 --- a/sysdeps/x86_64/multiarch/mempcpy_chk.S +++ b/sysdeps/x86_64/multiarch/mempcpy_chk.S @@ -1,6 +1,6 @@ /* Multiple versions of __mempcpy_chk All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2015 Free Software Foundation, Inc. + Copyright (C) 2010-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. @@ -29,17 +29,23 @@ .text ENTRY(__mempcpy_chk) .type __mempcpy_chk, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features + LOAD_RTLD_GLOBAL_RO_RDX +#ifdef HAVE_AVX512_ASM_SUPPORT + HAS_ARCH_FEATURE (AVX512F_Usable) + jz 1f + HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER) + jz 1f + leaq __mempcpy_chk_avx512_no_vzeroupper(%rip), %rax + ret +#endif 1: leaq __mempcpy_chk_sse2(%rip), %rax - testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip) + HAS_CPU_FEATURE (SSSE3) jz 2f leaq __mempcpy_chk_ssse3(%rip), %rax - testl $bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip) + HAS_ARCH_FEATURE (Fast_Copy_Backward) jz 2f leaq __mempcpy_chk_ssse3_back(%rip), %rax - testl $bit_AVX_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_AVX_Fast_Unaligned_Load(%rip) + HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) jz 2f leaq __mempcpy_chk_avx_unaligned(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/multiarch/memset-avx2.S b/sysdeps/x86_64/multiarch/memset-avx2.S index 28eabade35..df634728d4 100644 --- a/sysdeps/x86_64/multiarch/memset-avx2.S +++ b/sysdeps/x86_64/multiarch/memset-avx2.S @@ -1,5 +1,5 @@ /* memset with AVX2 - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S new file mode 100644 index 0000000000..1e638d7ac2 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S @@ -0,0 +1,194 @@ +/* memset optimized with AVX512 for KNL hardware. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#if defined HAVE_AVX512_ASM_SUPPORT && IS_IN (libc) + +#include "asm-syntax.h" +#ifndef MEMSET +# define MEMSET __memset_avx512_no_vzeroupper +# define MEMSET_CHK __memset_chk_avx512_no_vzeroupper +#endif + + .section .text,"ax",@progbits +#if defined PIC +ENTRY (MEMSET_CHK) + cmpq %rdx, %rcx + jb HIDDEN_JUMPTARGET (__chk_fail) +END (MEMSET_CHK) +#endif + +ENTRY (MEMSET) + vpxor %xmm0, %xmm0, %xmm0 + vmovd %esi, %xmm1 + lea (%rdi, %rdx), %rsi + mov %rdi, %rax + vpshufb %xmm0, %xmm1, %xmm0 + cmp $16, %rdx + jb L(less_16bytes) + cmp $512, %rdx + vbroadcastss %xmm0, %zmm2 + ja L(512bytesormore) + cmp $256, %rdx + jb L(less_256bytes) + vmovups %zmm2, (%rdi) + vmovups %zmm2, 0x40(%rdi) + vmovups %zmm2, 0x80(%rdi) + vmovups %zmm2, 0xC0(%rdi) + vmovups %zmm2, -0x100(%rsi) + vmovups %zmm2, -0xC0(%rsi) + vmovups %zmm2, -0x80(%rsi) + vmovups %zmm2, -0x40(%rsi) + ret + +L(less_256bytes): + cmp $128, %dl + jb L(less_128bytes) + vmovups %zmm2, (%rdi) + vmovups %zmm2, 0x40(%rdi) + vmovups %zmm2, -0x80(%rsi) + vmovups %zmm2, -0x40(%rsi) + ret + +L(less_128bytes): + cmp $64, %dl + jb L(less_64bytes) + vmovups %zmm2, (%rdi) + vmovups %zmm2, -0x40(%rsi) + ret + +L(less_64bytes): + cmp $32, %dl + jb L(less_32bytes) + vmovdqu %ymm2, (%rdi) + vmovdqu %ymm2, -0x20(%rsi) + ret + +L(less_32bytes): + vmovdqu %xmm0, (%rdi) + vmovdqu %xmm0, -0x10(%rsi) + ret + +L(less_16bytes): + cmp $8, %dl + jb L(less_8bytes) + vmovq %xmm0, (%rdi) + vmovq %xmm0, -0x08(%rsi) + ret + +L(less_8bytes): + vmovd %xmm0, %ecx + cmp $4, %dl + jb L(less_4bytes) + mov %ecx, (%rdi) + mov %ecx, -0x04(%rsi) + ret + +L(less_4bytes): + cmp $2, %dl + jb L(less_2bytes) + mov %cx, (%rdi) + mov %cx, -0x02(%rsi) + ret + +L(less_2bytes): + cmp $1, %dl + jb L(less_1bytes) + mov %cl, (%rdi) +L(less_1bytes): + ret + +L(512bytesormore): + mov __x86_shared_cache_size_half(%rip), %rcx + cmp %rcx, %rdx + ja L(preloop_large) + cmp $1024, %rdx + ja L(1024bytesormore) + + vmovups %zmm2, (%rdi) + vmovups %zmm2, 0x40(%rdi) + vmovups %zmm2, 0x80(%rdi) + vmovups %zmm2, 0xC0(%rdi) + vmovups %zmm2, 0x100(%rdi) + vmovups %zmm2, 0x140(%rdi) + vmovups %zmm2, 0x180(%rdi) + vmovups %zmm2, 0x1C0(%rdi) + vmovups %zmm2, -0x200(%rsi) + vmovups %zmm2, -0x1C0(%rsi) + vmovups %zmm2, -0x180(%rsi) + vmovups %zmm2, -0x140(%rsi) + vmovups %zmm2, -0x100(%rsi) + vmovups %zmm2, -0xC0(%rsi) + vmovups %zmm2, -0x80(%rsi) + vmovups %zmm2, -0x40(%rsi) + ret + +/* Align on 64 and loop with aligned stores. */ +L(1024bytesormore): + sub $0x100, %rsi + vmovups %zmm2, (%rax) + and $-0x40, %rdi + add $0x40, %rdi + +L(gobble_256bytes_loop): + vmovaps %zmm2, (%rdi) + vmovaps %zmm2, 0x40(%rdi) + vmovaps %zmm2, 0x80(%rdi) + vmovaps %zmm2, 0xC0(%rdi) + add $0x100, %rdi + cmp %rsi, %rdi + jb L(gobble_256bytes_loop) + vmovups %zmm2, (%rsi) + vmovups %zmm2, 0x40(%rsi) + vmovups %zmm2, 0x80(%rsi) + vmovups %zmm2, 0xC0(%rsi) + ret + +/* Align on 128 and loop with non-temporal stores. */ +L(preloop_large): + and $-0x80, %rdi + add $0x80, %rdi + vmovups %zmm2, (%rax) + vmovups %zmm2, 0x40(%rax) + sub $0x200, %rsi + +L(gobble_512bytes_nt_loop): + vmovntdq %zmm2, (%rdi) + vmovntdq %zmm2, 0x40(%rdi) + vmovntdq %zmm2, 0x80(%rdi) + vmovntdq %zmm2, 0xC0(%rdi) + vmovntdq %zmm2, 0x100(%rdi) + vmovntdq %zmm2, 0x140(%rdi) + vmovntdq %zmm2, 0x180(%rdi) + vmovntdq %zmm2, 0x1C0(%rdi) + add $0x200, %rdi + cmp %rsi, %rdi + jb L(gobble_512bytes_nt_loop) + sfence + vmovups %zmm2, (%rsi) + vmovups %zmm2, 0x40(%rsi) + vmovups %zmm2, 0x80(%rsi) + vmovups %zmm2, 0xC0(%rsi) + vmovups %zmm2, 0x100(%rsi) + vmovups %zmm2, 0x140(%rsi) + vmovups %zmm2, 0x180(%rsi) + vmovups %zmm2, 0x1C0(%rsi) + ret +END (MEMSET) +#endif diff --git a/sysdeps/x86_64/multiarch/memset.S b/sysdeps/x86_64/multiarch/memset.S index c5f1fb340e..8e3b9b9764 100644 --- a/sysdeps/x86_64/multiarch/memset.S +++ b/sysdeps/x86_64/multiarch/memset.S @@ -1,6 +1,6 @@ /* Multiple versions of memset All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -17,45 +17,48 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#ifdef HAVE_AVX2_SUPPORT #include <sysdep.h> #include <shlib-compat.h> #include <init-arch.h> /* Define multiple versions only for the definition in lib. */ -# if IS_IN (libc) +#if IS_IN (libc) ENTRY(memset) .type memset, @gnu_indirect_function - cmpl $0, __cpu_features+KIND_OFFSET(%rip) - jne 1f - call __init_cpu_features -1: leaq __memset_sse2(%rip), %rax - testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq __memset_sse2(%rip), %rax + HAS_ARCH_FEATURE (AVX2_Usable) jz 2f leaq __memset_avx2(%rip), %rax +#ifdef HAVE_AVX512_ASM_SUPPORT + HAS_ARCH_FEATURE (AVX512F_Usable) + jz 2f + HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER) + jz 2f + leaq __memset_avx512_no_vzeroupper(%rip), %rax +#endif 2: ret END(memset) -# endif +#endif -# if IS_IN (libc) -# undef memset -# define memset __memset_sse2 +#if IS_IN (libc) +# undef memset +# define memset __memset_sse2 -# undef __memset_chk -# define __memset_chk __memset_chk_sse2 +# undef __memset_chk +# define __memset_chk __memset_chk_sse2 -# ifdef SHARED -# undef libc_hidden_builtin_def +# ifdef SHARED +# undef libc_hidden_builtin_def /* It doesn't make sense to send libc-internal memset calls through a PLT. The speedup we get from using GPR instruction is likely eaten away by the indirect call in the PLT. */ -# define libc_hidden_builtin_def(name) \ +# define libc_hidden_builtin_def(name) \ .globl __GI_memset; __GI_memset = __memset_sse2 -# endif - -# undef strong_alias -# define strong_alias(original, alias) # endif + +# undef strong_alias +# define strong_alias(original, alias) #endif #include "../memset.S" diff --git a/sysdeps/x86_64/multiarch/memset_chk.S b/sysdeps/x86_64/multiarch/memset_chk.S index 64fed3118a..9a7b270274 100644 --- a/sysdeps/x86_64/multiarch/memset_chk.S +++ b/sysdeps/x86_64/multiarch/memset_chk.S @@ -1,6 +1,6 @@ /* Multiple versions of memset_chk All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,16 +22,21 @@ /* Define multiple versions only for the definition in lib. */ #if IS_IN (libc) -# if defined SHARED && defined HAVE_AVX2_SUPPORT +# ifdef SHARED ENTRY(__memset_chk) .type __memset_chk, @gnu_indirect_function - cmpl $0, __cpu_features+KIND_OFFSET(%rip) - jne 1f - call __init_cpu_features -1: leaq __memset_chk_sse2(%rip), %rax - testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq __memset_chk_sse2(%rip), %rax + HAS_ARCH_FEATURE (AVX2_Usable) jz 2f leaq __memset_chk_avx2(%rip), %rax +#ifdef HAVE_AVX512_ASM_SUPPORT + HAS_ARCH_FEATURE (AVX512F_Usable) + jz 2f + HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER) + jz 2f + leaq __memset_chk_avx512_no_vzeroupper(%rip), %rax +#endif 2: ret END(__memset_chk) diff --git a/sysdeps/x86_64/multiarch/rtld-memcmp.c b/sysdeps/x86_64/multiarch/rtld-memcmp.c deleted file mode 100644 index 0f271356c2..0000000000 --- a/sysdeps/x86_64/multiarch/rtld-memcmp.c +++ /dev/null @@ -1 +0,0 @@ -#include "../rtld-memcmp.c" diff --git a/sysdeps/x86_64/multiarch/rtld-memset.S b/sysdeps/x86_64/multiarch/rtld-memset.S deleted file mode 100644 index 8092aa07da..0000000000 --- a/sysdeps/x86_64/multiarch/rtld-memset.S +++ /dev/null @@ -1 +0,0 @@ -#include "../rtld-memset.S" diff --git a/sysdeps/x86_64/multiarch/sched_cpucount.c b/sysdeps/x86_64/multiarch/sched_cpucount.c index 72ad7b01a8..b75aeb79b2 100644 --- a/sysdeps/x86_64/multiarch/sched_cpucount.c +++ b/sysdeps/x86_64/multiarch/sched_cpucount.c @@ -1,6 +1,6 @@ /* Count bits in CPU set. x86-64 multi-arch version. This file is part of the GNU C Library. - Copyright (C) 2008-2015 Free Software Foundation, Inc. + Copyright (C) 2008-2016 Free Software Foundation, Inc. Contributed by Ulrich Drepper <drepper@redhat.com>. The GNU C Library is free software; you can redistribute it and/or @@ -33,4 +33,4 @@ #undef __sched_cpucount libc_ifunc (__sched_cpucount, - HAS_POPCOUNT ? popcount_cpucount : generic_cpucount); + HAS_CPU_FEATURE (POPCOUNT) ? popcount_cpucount : generic_cpucount); diff --git a/sysdeps/x86_64/multiarch/strcasestr.c b/sysdeps/x86_64/multiarch/strcasestr.c deleted file mode 100644 index 834e656a2c..0000000000 --- a/sysdeps/x86_64/multiarch/strcasestr.c +++ /dev/null @@ -1,13 +0,0 @@ -/* Multiple versions of strcasestr - All versions must be listed in ifunc-impl-list.c. */ - -#include "init-arch.h" - -#define STRCASESTR __strcasestr_sse2 - -#include "string/strcasestr.c" - -extern __typeof (__strcasestr_sse2) __strcasestr_sse2 attribute_hidden; - -libc_ifunc (__strcasestr, - __strcasestr_sse2); diff --git a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S index 81f1b40ef6..3a694d45c2 100644 --- a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S +++ b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S @@ -1,5 +1,5 @@ /* strcat with SSE2 - Copyright (C) 2011-2015 Free Software Foundation, Inc. + Copyright (C) 2011-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. diff --git a/sysdeps/x86_64/multiarch/strcat-ssse3.S b/sysdeps/x86_64/multiarch/strcat-ssse3.S index d7b990725a..96184d0f0f 100644 --- a/sysdeps/x86_64/multiarch/strcat-ssse3.S +++ b/sysdeps/x86_64/multiarch/strcat-ssse3.S @@ -1,5 +1,5 @@ /* strcat with SSSE3 - Copyright (C) 2011-2015 Free Software Foundation, Inc. + Copyright (C) 2011-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. diff --git a/sysdeps/x86_64/multiarch/strcat.S b/sysdeps/x86_64/multiarch/strcat.S index 44993fade5..7bb38e68ad 100644 --- a/sysdeps/x86_64/multiarch/strcat.S +++ b/sysdeps/x86_64/multiarch/strcat.S @@ -1,6 +1,6 @@ /* Multiple versions of strcat All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2009-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. @@ -47,14 +47,12 @@ .text ENTRY(STRCAT) .type STRCAT, @gnu_indirect_function - cmpl $0, __cpu_features+KIND_OFFSET(%rip) - jne 1f - call __init_cpu_features -1: leaq STRCAT_SSE2_UNALIGNED(%rip), %rax - testl $bit_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_Fast_Unaligned_Load(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq STRCAT_SSE2_UNALIGNED(%rip), %rax + HAS_ARCH_FEATURE (Fast_Unaligned_Load) jnz 2f leaq STRCAT_SSE2(%rip), %rax - testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip) + HAS_CPU_FEATURE (SSSE3) jz 2f leaq STRCAT_SSSE3(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S b/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S index 0398650a01..979d112b28 100644 --- a/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S +++ b/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S @@ -1,5 +1,5 @@ /* strchr with SSE2 without bsf - Copyright (C) 2011-2015 Free Software Foundation, Inc. + Copyright (C) 2011-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. diff --git a/sysdeps/x86_64/multiarch/strchr.S b/sysdeps/x86_64/multiarch/strchr.S index af55fac398..40683ad32b 100644 --- a/sysdeps/x86_64/multiarch/strchr.S +++ b/sysdeps/x86_64/multiarch/strchr.S @@ -1,5 +1,5 @@ /* Multiple versions of strchr - Copyright (C) 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2009-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -25,11 +25,9 @@ .text ENTRY(strchr) .type strchr, @gnu_indirect_function - cmpl $0, __cpu_features+KIND_OFFSET(%rip) - jne 1f - call __init_cpu_features -1: leaq __strchr_sse2(%rip), %rax -2: testl $bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq __strchr_sse2(%rip), %rax +2: HAS_ARCH_FEATURE (Slow_BSF) jz 3f leaq __strchr_sse2_no_bsf(%rip), %rax 3: ret diff --git a/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S index 20b65fa775..bf555b4066 100644 --- a/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S +++ b/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S @@ -1,5 +1,5 @@ /* strcmp with unaligned loads - Copyright (C) 2013-2015 Free Software Foundation, Inc. + Copyright (C) 2013-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,6 +16,8 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ +#if IS_IN (libc) + #include "sysdep.h" ENTRY ( __strcmp_sse2_unaligned) @@ -207,3 +209,5 @@ L(different): subl %ecx, %eax ret END (__strcmp_sse2_unaligned) + +#endif diff --git a/sysdeps/x86_64/multiarch/strcmp-sse42.S b/sysdeps/x86_64/multiarch/strcmp-sse42.S index 4dff0a564b..70df84ae32 100644 --- a/sysdeps/x86_64/multiarch/strcmp-sse42.S +++ b/sysdeps/x86_64/multiarch/strcmp-sse42.S @@ -1,5 +1,5 @@ /* strcmp with SSE4.2 - Copyright (C) 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2009-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. diff --git a/sysdeps/x86_64/multiarch/strcmp.S b/sysdeps/x86_64/multiarch/strcmp.S index f50f26c393..0e4a113f61 100644 --- a/sysdeps/x86_64/multiarch/strcmp.S +++ b/sysdeps/x86_64/multiarch/strcmp.S @@ -1,5 +1,5 @@ /* Multiple versions of strcmp - Copyright (C) 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2009-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. @@ -84,24 +84,20 @@ .text ENTRY(STRCMP) .type STRCMP, @gnu_indirect_function - /* Manually inlined call to __get_cpu_features. */ - cmpl $0, __cpu_features+KIND_OFFSET(%rip) - jne 1f - call __init_cpu_features -1: + LOAD_RTLD_GLOBAL_RO_RDX #ifdef USE_AS_STRCMP leaq __strcmp_sse2_unaligned(%rip), %rax - testl $bit_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_Fast_Unaligned_Load(%rip) + HAS_ARCH_FEATURE (Fast_Unaligned_Load) jnz 3f #else - testl $bit_Slow_SSE4_2, __cpu_features+FEATURE_OFFSET+index_Slow_SSE4_2(%rip) + HAS_ARCH_FEATURE (Slow_SSE4_2) jnz 2f leaq STRCMP_SSE42(%rip), %rax - testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) + HAS_CPU_FEATURE (SSE4_2) jnz 3f #endif 2: leaq STRCMP_SSSE3(%rip), %rax - testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip) + HAS_CPU_FEATURE (SSSE3) jnz 3f leaq STRCMP_SSE2(%rip), %rax 3: ret @@ -110,23 +106,17 @@ END(STRCMP) # ifdef USE_AS_STRCASECMP_L ENTRY(__strcasecmp) .type __strcasecmp, @gnu_indirect_function - /* Manually inlined call to __get_cpu_features. */ - cmpl $0, __cpu_features+KIND_OFFSET(%rip) - jne 1f - call __init_cpu_features -1: -# ifdef HAVE_AVX_SUPPORT + LOAD_RTLD_GLOBAL_RO_RDX leaq __strcasecmp_avx(%rip), %rax - testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip) + HAS_ARCH_FEATURE (AVX_Usable) jnz 3f -# endif - testl $bit_Slow_SSE4_2, __cpu_features+FEATURE_OFFSET+index_Slow_SSE4_2(%rip) + HAS_ARCH_FEATURE (Slow_SSE4_2) jnz 2f leaq __strcasecmp_sse42(%rip), %rax - testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) + HAS_CPU_FEATURE (SSE4_2) jnz 3f 2: leaq __strcasecmp_ssse3(%rip), %rax - testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip) + HAS_CPU_FEATURE (SSSE3) jnz 3f leaq __strcasecmp_sse2(%rip), %rax 3: ret @@ -136,23 +126,17 @@ weak_alias (__strcasecmp, strcasecmp) # ifdef USE_AS_STRNCASECMP_L ENTRY(__strncasecmp) .type __strncasecmp, @gnu_indirect_function - /* Manually inlined call to __get_cpu_features. */ - cmpl $0, __cpu_features+KIND_OFFSET(%rip) - jne 1f - call __init_cpu_features -1: -# ifdef HAVE_AVX_SUPPORT + LOAD_RTLD_GLOBAL_RO_RDX leaq __strncasecmp_avx(%rip), %rax - testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip) + HAS_ARCH_FEATURE (AVX_Usable) jnz 3f -# endif - testl $bit_Slow_SSE4_2, __cpu_features+FEATURE_OFFSET+index_Slow_SSE4_2(%rip) + HAS_ARCH_FEATURE (Slow_SSE4_2) jnz 2f leaq __strncasecmp_sse42(%rip), %rax - testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) + HAS_CPU_FEATURE (SSE4_2) jnz 3f 2: leaq __strncasecmp_ssse3(%rip), %rax - testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip) + HAS_CPU_FEATURE (SSSE3) jnz 3f leaq __strncasecmp_sse2(%rip), %rax 3: ret @@ -167,16 +151,14 @@ weak_alias (__strncasecmp, strncasecmp) # include "strcmp-sse42.S" -# ifdef HAVE_AVX_SUPPORT -# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# define LABEL(l) .L##l##_avx -# define GLABEL(l) l##_avx -# define USE_AVX 1 -# undef STRCMP_SSE42 -# define STRCMP_SSE42 STRCMP_AVX -# define SECTION avx -# include "strcmp-sse42.S" -# endif +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# define LABEL(l) .L##l##_avx +# define GLABEL(l) l##_avx +# define USE_AVX 1 +# undef STRCMP_SSE42 +# define STRCMP_SSE42 STRCMP_AVX +# define SECTION avx +# include "strcmp-sse42.S" # endif diff --git a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S index 8f03d1db24..caa74be2c2 100644 --- a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S +++ b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S @@ -1,5 +1,5 @@ /* strcpy with SSE2 and unaligned load - Copyright (C) 2011-2015 Free Software Foundation, Inc. + Copyright (C) 2011-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. diff --git a/sysdeps/x86_64/multiarch/strcpy-ssse3.S b/sysdeps/x86_64/multiarch/strcpy-ssse3.S index 1f22c9a918..5bdb7671cf 100644 --- a/sysdeps/x86_64/multiarch/strcpy-ssse3.S +++ b/sysdeps/x86_64/multiarch/strcpy-ssse3.S @@ -1,5 +1,5 @@ /* strcpy with SSSE3 - Copyright (C) 2011-2015 Free Software Foundation, Inc. + Copyright (C) 2011-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. diff --git a/sysdeps/x86_64/multiarch/strcpy.S b/sysdeps/x86_64/multiarch/strcpy.S index 9464ee8b63..024f6ef899 100644 --- a/sysdeps/x86_64/multiarch/strcpy.S +++ b/sysdeps/x86_64/multiarch/strcpy.S @@ -1,6 +1,6 @@ /* Multiple versions of strcpy All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2009-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. @@ -61,14 +61,12 @@ .text ENTRY(STRCPY) .type STRCPY, @gnu_indirect_function - cmpl $0, __cpu_features+KIND_OFFSET(%rip) - jne 1f - call __init_cpu_features -1: leaq STRCPY_SSE2_UNALIGNED(%rip), %rax - testl $bit_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_Fast_Unaligned_Load(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq STRCPY_SSE2_UNALIGNED(%rip), %rax + HAS_ARCH_FEATURE (Fast_Unaligned_Load) jnz 2f leaq STRCPY_SSE2(%rip), %rax - testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip) + HAS_CPU_FEATURE (SSSE3) jz 2f leaq STRCPY_SSSE3(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-c.c index 60b2ed7a3f..91b804ddd6 100644 --- a/sysdeps/x86_64/multiarch/strcspn-c.c +++ b/sysdeps/x86_64/multiarch/strcspn-c.c @@ -1,5 +1,5 @@ /* strcspn with SSE4.2 intrinsics - Copyright (C) 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2009-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. diff --git a/sysdeps/x86_64/multiarch/strcspn.S b/sysdeps/x86_64/multiarch/strcspn.S index 95e882c443..8e7ff1c663 100644 --- a/sysdeps/x86_64/multiarch/strcspn.S +++ b/sysdeps/x86_64/multiarch/strcspn.S @@ -1,6 +1,6 @@ /* Multiple versions of strcspn All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2009-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. @@ -19,9 +19,6 @@ <http://www.gnu.org/licenses/>. */ #include <config.h> - -#ifdef HAVE_SSE4_SUPPORT - #include <sysdep.h> #include <init-arch.h> @@ -45,11 +42,9 @@ .text ENTRY(STRCSPN) .type STRCSPN, @gnu_indirect_function - cmpl $0, __cpu_features+KIND_OFFSET(%rip) - jne 1f - call __init_cpu_features -1: leaq STRCSPN_SSE2(%rip), %rax - testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq STRCSPN_SSE2(%rip), %rax + HAS_CPU_FEATURE (SSE4_2) jz 2f leaq STRCSPN_SSE42(%rip), %rax 2: ret @@ -66,7 +61,6 @@ END(STRCSPN) # define END(name) \ cfi_endproc; .size STRCSPN_SSE2, .-STRCSPN_SSE2 #endif -#endif /* HAVE_SSE4_SUPPORT */ #ifdef USE_AS_STRPBRK #include "../strpbrk.S" diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-c.c index 6b0c80aa43..9675f9360e 100644 --- a/sysdeps/x86_64/multiarch/strspn-c.c +++ b/sysdeps/x86_64/multiarch/strspn-c.c @@ -1,5 +1,5 @@ /* strspn with SSE4.2 intrinsics - Copyright (C) 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2009-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. diff --git a/sysdeps/x86_64/multiarch/strspn.S b/sysdeps/x86_64/multiarch/strspn.S index b734c1729a..4942826b24 100644 --- a/sysdeps/x86_64/multiarch/strspn.S +++ b/sysdeps/x86_64/multiarch/strspn.S @@ -1,6 +1,6 @@ /* Multiple versions of strspn All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2009-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. @@ -19,9 +19,6 @@ <http://www.gnu.org/licenses/>. */ #include <config.h> - -#ifdef HAVE_SSE4_SUPPORT - #include <sysdep.h> #include <init-arch.h> @@ -30,11 +27,9 @@ .text ENTRY(strspn) .type strspn, @gnu_indirect_function - cmpl $0, __cpu_features+KIND_OFFSET(%rip) - jne 1f - call __init_cpu_features -1: leaq __strspn_sse2(%rip), %rax - testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + leaq __strspn_sse2(%rip), %rax + HAS_CPU_FEATURE (SSE4_2) jz 2f leaq __strspn_sse42(%rip), %rax 2: ret @@ -52,6 +47,4 @@ END(strspn) cfi_endproc; .size __strspn_sse2, .-__strspn_sse2 #endif -#endif /* HAVE_SSE4_SUPPORT */ - #include "../strspn.S" diff --git a/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S index 4f0e2ebdab..4ead1dfaf5 100644 --- a/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S +++ b/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S @@ -1,5 +1,5 @@ /* strstr with unaligned loads - Copyright (C) 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2009-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c index 507994bd38..eecba2243e 100644 --- a/sysdeps/x86_64/multiarch/strstr.c +++ b/sysdeps/x86_64/multiarch/strstr.c @@ -1,6 +1,6 @@ /* Multiple versions of strstr. All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2012-2015 Free Software Foundation, Inc. + Copyright (C) 2012-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -41,7 +41,10 @@ extern __typeof (__redirect_strstr) __strstr_sse2 attribute_hidden; /* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle ifunc symbol properly. */ extern __typeof (__redirect_strstr) __libc_strstr; -libc_ifunc (__libc_strstr, HAS_FAST_UNALIGNED_LOAD ? __strstr_sse2_unaligned : __strstr_sse2) +libc_ifunc (__libc_strstr, + HAS_ARCH_FEATURE (Fast_Unaligned_Load) + ? __strstr_sse2_unaligned + : __strstr_sse2) #undef strstr strong_alias (__libc_strstr, strstr) diff --git a/sysdeps/x86_64/multiarch/test-multiarch.c b/sysdeps/x86_64/multiarch/test-multiarch.c index 949d26e550..4eb0c16cd8 100644 --- a/sysdeps/x86_64/multiarch/test-multiarch.c +++ b/sysdeps/x86_64/multiarch/test-multiarch.c @@ -1,6 +1,6 @@ /* Test CPU feature data. This file is part of the GNU C Library. - Copyright (C) 2012-2015 Free Software Foundation, Inc. + Copyright (C) 2012-2016 Free Software Foundation, Inc. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -75,12 +75,18 @@ do_test (int argc, char **argv) int fails; get_cpuinfo (); - fails = check_proc ("avx", HAS_AVX, "HAS_AVX"); - fails += check_proc ("fma4", HAS_FMA4, "HAS_FMA4"); - fails += check_proc ("sse4_2", HAS_SSE4_2, "HAS_SSE4_2"); - fails += check_proc ("sse4_1", HAS_SSE4_1, "HAS_SSE4_1"); - fails += check_proc ("ssse3", HAS_SSSE3, "HAS_SSSE3"); - fails += check_proc ("popcnt", HAS_POPCOUNT, "HAS_POPCOUNT"); + fails = check_proc ("avx", HAS_ARCH_FEATURE (AVX_Usable), + "HAS_ARCH_FEATURE (AVX_Usable)"); + fails += check_proc ("fma4", HAS_ARCH_FEATURE (FMA4_Usable), + "HAS_ARCH_FEATURE (FMA4_Usable)"); + fails += check_proc ("sse4_2", HAS_CPU_FEATURE (SSE4_2), + "HAS_CPU_FEATURE (SSE4_2)"); + fails += check_proc ("sse4_1", HAS_CPU_FEATURE (SSE4_1) + , "HAS_CPU_FEATURE (SSE4_1)"); + fails += check_proc ("ssse3", HAS_CPU_FEATURE (SSSE3), + "HAS_CPU_FEATURE (SSSE3)"); + fails += check_proc ("popcnt", HAS_CPU_FEATURE (POPCOUNT), + "HAS_CPU_FEATURE (POPCOUNT)"); printf ("%d differences between /proc/cpuinfo and glibc code.\n", fails); diff --git a/sysdeps/x86_64/multiarch/varshift.c b/sysdeps/x86_64/multiarch/varshift.c index 0007ef79e5..7921be5b57 100644 --- a/sysdeps/x86_64/multiarch/varshift.c +++ b/sysdeps/x86_64/multiarch/varshift.c @@ -1,5 +1,5 @@ /* Helper for variable shifts of SSE registers. - Copyright (C) 2010-2015 Free Software Foundation, Inc. + Copyright (C) 2010-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/multiarch/varshift.h b/sysdeps/x86_64/multiarch/varshift.h index 30ace3d914..7b27d0e9dd 100644 --- a/sysdeps/x86_64/multiarch/varshift.h +++ b/sysdeps/x86_64/multiarch/varshift.h @@ -1,5 +1,5 @@ /* Helper for variable shifts of SSE registers. - Copyright (C) 2010-2015 Free Software Foundation, Inc. + Copyright (C) 2010-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/multiarch/wcscpy-ssse3.S b/sysdeps/x86_64/multiarch/wcscpy-ssse3.S index 8097862574..341e57a5ca 100644 --- a/sysdeps/x86_64/multiarch/wcscpy-ssse3.S +++ b/sysdeps/x86_64/multiarch/wcscpy-ssse3.S @@ -1,5 +1,5 @@ /* wcscpy with SSSE3 - Copyright (C) 2011-2015 Free Software Foundation, Inc. + Copyright (C) 2011-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. diff --git a/sysdeps/x86_64/multiarch/wcscpy.S b/sysdeps/x86_64/multiarch/wcscpy.S index ff2f5a73d1..8e7270b9c7 100644 --- a/sysdeps/x86_64/multiarch/wcscpy.S +++ b/sysdeps/x86_64/multiarch/wcscpy.S @@ -1,6 +1,6 @@ /* Multiple versions of wcscpy All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2015 Free Software Foundation, Inc. + Copyright (C) 2011-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. @@ -27,11 +27,8 @@ .text ENTRY(wcscpy) .type wcscpy, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features - -1: testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + HAS_CPU_FEATURE (SSSE3) jnz 2f leaq __wcscpy_sse2(%rip), %rax ret diff --git a/sysdeps/x86_64/multiarch/wmemcmp.S b/sysdeps/x86_64/multiarch/wmemcmp.S index 109e2457fe..b510f756e2 100644 --- a/sysdeps/x86_64/multiarch/wmemcmp.S +++ b/sysdeps/x86_64/multiarch/wmemcmp.S @@ -1,6 +1,6 @@ /* Multiple versions of wmemcmp All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2015 Free Software Foundation, Inc. + Copyright (C) 2011-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. @@ -26,16 +26,13 @@ .text ENTRY(wmemcmp) .type wmemcmp, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) - jne 1f - call __init_cpu_features - -1: testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip) + LOAD_RTLD_GLOBAL_RO_RDX + HAS_CPU_FEATURE (SSSE3) jnz 2f leaq __wmemcmp_sse2(%rip), %rax ret -2: testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip) +2: HAS_CPU_FEATURE (SSE4_1) jz 3f leaq __wmemcmp_sse4_1(%rip), %rax ret diff --git a/sysdeps/x86_64/nptl/Makefile b/sysdeps/x86_64/nptl/Makefile index 14fb69a94d..9b64b533ee 100644 --- a/sysdeps/x86_64/nptl/Makefile +++ b/sysdeps/x86_64/nptl/Makefile @@ -1,4 +1,4 @@ -# Copyright (C) 2002-2015 Free Software Foundation, Inc. +# Copyright (C) 2002-2016 Free Software Foundation, Inc. # This file is part of the GNU C Library. # The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/nptl/pthread_spin_lock.S b/sysdeps/x86_64/nptl/pthread_spin_lock.S index d1a9b68028..b871241617 100644 --- a/sysdeps/x86_64/nptl/pthread_spin_lock.S +++ b/sysdeps/x86_64/nptl/pthread_spin_lock.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2012-2015 Free Software Foundation, Inc. +/* Copyright (C) 2012-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,11 +16,9 @@ <http://www.gnu.org/licenses/>. */ #include <lowlevellock.h> +#include <sysdep.h> - .globl pthread_spin_lock - .type pthread_spin_lock,@function - .align 16 -pthread_spin_lock: +ENTRY(pthread_spin_lock) 1: LOCK decl 0(%rdi) jne 2f @@ -33,4 +31,4 @@ pthread_spin_lock: cmpl $0, 0(%rdi) jg 1b jmp 2b - .size pthread_spin_lock,.-pthread_spin_lock +END(pthread_spin_lock) diff --git a/sysdeps/x86_64/nptl/pthread_spin_trylock.S b/sysdeps/x86_64/nptl/pthread_spin_trylock.S index 6b58929ef4..c9c53171fe 100644 --- a/sysdeps/x86_64/nptl/pthread_spin_trylock.S +++ b/sysdeps/x86_64/nptl/pthread_spin_trylock.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2002-2015 Free Software Foundation, Inc. +/* Copyright (C) 2002-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@redhat.com>, 2002. @@ -17,6 +17,7 @@ <http://www.gnu.org/licenses/>. */ #include <pthread-errnos.h> +#include <sysdep.h> #ifdef UP @@ -25,10 +26,7 @@ # define LOCK lock #endif - .globl pthread_spin_trylock - .type pthread_spin_trylock,@function - .align 16 -pthread_spin_trylock: +ENTRY(pthread_spin_trylock) movl $1, %eax xorl %ecx, %ecx LOCK @@ -36,4 +34,4 @@ pthread_spin_trylock: movl $EBUSY, %eax cmovel %ecx, %eax retq - .size pthread_spin_trylock,.-pthread_spin_trylock +END(pthread_spin_trylock) diff --git a/sysdeps/x86_64/nptl/pthread_spin_unlock.S b/sysdeps/x86_64/nptl/pthread_spin_unlock.S index 74d7dd6430..188de2e8cb 100644 --- a/sysdeps/x86_64/nptl/pthread_spin_unlock.S +++ b/sysdeps/x86_64/nptl/pthread_spin_unlock.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2002-2015 Free Software Foundation, Inc. +/* Copyright (C) 2002-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@redhat.com>, 2002. @@ -16,14 +16,13 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ - .globl pthread_spin_unlock - .type pthread_spin_unlock,@function - .align 16 -pthread_spin_unlock: +#include <sysdep.h> + +ENTRY(pthread_spin_unlock) movl $1, (%rdi) xorl %eax, %eax retq - .size pthread_spin_unlock,.-pthread_spin_unlock +END(pthread_spin_unlock) /* The implementation of pthread_spin_init is identical. */ .globl pthread_spin_init diff --git a/sysdeps/x86_64/nptl/pthreaddef.h b/sysdeps/x86_64/nptl/pthreaddef.h index 9c7130dae2..9397efc631 100644 --- a/sysdeps/x86_64/nptl/pthreaddef.h +++ b/sysdeps/x86_64/nptl/pthreaddef.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2002-2015 Free Software Foundation, Inc. +/* Copyright (C) 2002-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@redhat.com>, 2002. diff --git a/sysdeps/x86_64/nptl/tcb-offsets.sym b/sysdeps/x86_64/nptl/tcb-offsets.sym index 729d1da38f..aeb752673a 100644 --- a/sysdeps/x86_64/nptl/tcb-offsets.sym +++ b/sysdeps/x86_64/nptl/tcb-offsets.sym @@ -16,7 +16,6 @@ VGETCPU_CACHE_OFFSET offsetof (tcbhead_t, vgetcpu_cache) #ifndef __ASSUME_PRIVATE_FUTEX PRIVATE_FUTEX offsetof (tcbhead_t, private_futex) #endif -RTLD_SAVESPACE_SSE offsetof (tcbhead_t, rtld_savespace_sse) -- Not strictly offsets, but these values are also used in the TCB. TCB_CANCELSTATE_BITMASK CANCELSTATE_BITMASK diff --git a/sysdeps/x86_64/nptl/tls.h b/sysdeps/x86_64/nptl/tls.h index d7543c651f..2b061a07c6 100644 --- a/sysdeps/x86_64/nptl/tls.h +++ b/sysdeps/x86_64/nptl/tls.h @@ -1,5 +1,5 @@ /* Definition for thread-local data handling. nptl/x86_64 version. - Copyright (C) 2002-2015 Free Software Foundation, Inc. + Copyright (C) 2002-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -67,14 +67,15 @@ typedef struct # else int __glibc_reserved1; # endif - int rtld_must_xmm_save; + int __glibc_unused1; /* Reservation of some values for the TM ABI. */ void *__private_tm[4]; /* GCC split stack support. */ void *__private_ss; long int __glibc_reserved2; - /* Have space for the post-AVX register size. */ - __128bits rtld_savespace_sse[8][4] __attribute__ ((aligned (32))); + /* Must be kept even if it is no longer used by glibc since programs, + like AddressSanitizer, depend on the size of tcbhead_t. */ + __128bits __glibc_unused2[8][4] __attribute__ ((aligned (32))); void *__padding[8]; } tcbhead_t; @@ -384,41 +385,6 @@ typedef struct # define THREAD_GSCOPE_WAIT() \ GL(dl_wait_lookup_done) () - -# ifdef SHARED -/* Defined in dl-trampoline.S. */ -extern void _dl_x86_64_save_sse (void); -extern void _dl_x86_64_restore_sse (void); - -# define RTLD_CHECK_FOREIGN_CALL \ - (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save) != 0) - -/* NB: Don't use the xchg operation because that would imply a lock - prefix which is expensive and unnecessary. The cache line is also - not contested at all. */ -# define RTLD_ENABLE_FOREIGN_CALL \ - int old_rtld_must_xmm_save = THREAD_GETMEM (THREAD_SELF, \ - header.rtld_must_xmm_save); \ - THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, 1) - -# define RTLD_PREPARE_FOREIGN_CALL \ - do if (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save)) \ - { \ - _dl_x86_64_save_sse (); \ - THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, 0); \ - } \ - while (0) - -# define RTLD_FINALIZE_FOREIGN_CALL \ - do { \ - if (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save) == 0) \ - _dl_x86_64_restore_sse (); \ - THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, \ - old_rtld_must_xmm_save); \ - } while (0) -# endif - - #endif /* __ASSEMBLER__ */ #endif /* tls.h */ diff --git a/sysdeps/x86_64/rawmemchr.S b/sysdeps/x86_64/rawmemchr.S index ec2cb9c76c..f90b7921a1 100644 --- a/sysdeps/x86_64/rawmemchr.S +++ b/sysdeps/x86_64/rawmemchr.S @@ -1,6 +1,6 @@ /* fast SSE2 memchr with 64 byte loop and pmaxub instruction using - Copyright (C) 2011-2015 Free Software Foundation, Inc. + Copyright (C) 2011-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. diff --git a/sysdeps/x86_64/rshift.S b/sysdeps/x86_64/rshift.S index 4166e612d5..c88c6d82bb 100644 --- a/sysdeps/x86_64/rshift.S +++ b/sysdeps/x86_64/rshift.S @@ -1,5 +1,5 @@ /* x86-64 __mpn_rshift -- - Copyright (C) 2007-2015 Free Software Foundation, Inc. + Copyright (C) 2007-2016 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify diff --git a/sysdeps/x86_64/rtld-memcmp.c b/sysdeps/x86_64/rtld-memcmp.c deleted file mode 100644 index 2ee40328b8..0000000000 --- a/sysdeps/x86_64/rtld-memcmp.c +++ /dev/null @@ -1 +0,0 @@ -#include <string/memcmp.c> diff --git a/sysdeps/x86_64/rtld-strchr.S b/sysdeps/x86_64/rtld-strchr.S deleted file mode 100644 index cc694d71b6..0000000000 --- a/sysdeps/x86_64/rtld-strchr.S +++ /dev/null @@ -1,288 +0,0 @@ -/* strchr (str, ch) -- Return pointer to first occurrence of CH in STR. - For AMD x86-64. - Copyright (C) 2002-2015 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - - - .text -ENTRY (strchr) - - /* Before we start with the main loop we process single bytes - until the source pointer is aligned. This has two reasons: - 1. aligned 64-bit memory access is faster - and (more important) - 2. we process in the main loop 64 bit in one step although - we don't know the end of the string. But accessing at - 8-byte alignment guarantees that we never access illegal - memory if this would not also be done by the trivial - implementation (this is because all processor inherent - boundaries are multiples of 8). */ - - movq %rdi, %rdx - andl $7, %edx /* Mask alignment bits */ - movq %rdi, %rax /* duplicate destination. */ - jz 1f /* aligned => start loop */ - neg %edx - addl $8, %edx /* Align to 8 bytes. */ - - /* Search the first bytes directly. */ -0: movb (%rax), %cl /* load byte */ - cmpb %cl,%sil /* compare byte. */ - je 6f /* target found */ - testb %cl,%cl /* is byte NUL? */ - je 7f /* yes => return NULL */ - incq %rax /* increment pointer */ - decl %edx - jnz 0b - - -1: - /* At the moment %rsi contains C. What we need for the - algorithm is C in all bytes of the register. Avoid - operations on 16 bit words because these require an - prefix byte (and one more cycle). */ - /* Populate 8 bit data to full 64-bit. */ - movabs $0x0101010101010101,%r9 - movzbl %sil,%edx - imul %rdx,%r9 - - movq $0xfefefefefefefeff, %r8 /* Save magic. */ - - /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to - change any of the hole bits of LONGWORD. - - 1) Is this safe? Will it catch all the zero bytes? - Suppose there is a byte with all zeros. Any carry bits - propagating from its left will fall into the hole at its - least significant bit and stop. Since there will be no - carry from its most significant bit, the LSB of the - byte to the left will be unchanged, and the zero will be - detected. - - 2) Is this worthwhile? Will it ignore everything except - zero bytes? Suppose every byte of QUARDWORD has a bit set - somewhere. There will be a carry into bit 8. If bit 8 - is set, this will carry into bit 16. If bit 8 is clear, - one of bits 9-15 must be set, so there will be a carry - into bit 16. Similarly, there will be a carry into bit - 24 tec.. If one of bits 54-63 is set, there will be a carry - into bit 64 (=carry flag), so all of the hole bits will - be changed. - - 3) But wait! Aren't we looking for C, not zero? - Good point. So what we do is XOR LONGWORD with a longword, - each of whose bytes is C. This turns each byte that is C - into a zero. */ - - .p2align 4 -4: - /* Main Loop is unrolled 4 times. */ - /* First unroll. */ - movq (%rax), %rcx /* get double word (= 8 bytes) in question */ - addq $8,%rax /* adjust pointer for next word */ - movq %r8, %rdx /* magic value */ - xorq %r9, %rcx /* XOR with qword c|...|c => bytes of str == c - are now 0 */ - addq %rcx, %rdx /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc 3f /* highest byte is NUL => return pointer */ - xorq %rcx, %rdx /* (word+magic)^word */ - orq %r8, %rdx /* set all non-carry bits */ - incq %rdx /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz 3f /* found c => return pointer */ - - /* The quadword we looked at does not contain the value we're looking - for. Let's search now whether we have reached the end of the - string. */ - xorq %r9, %rcx /* restore original dword without reload */ - movq %r8, %rdx /* magic value */ - addq %rcx, %rdx /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc 7f /* highest byte is NUL => return NULL */ - xorq %rcx, %rdx /* (word+magic)^word */ - orq %r8, %rdx /* set all non-carry bits */ - incq %rdx /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz 7f /* found NUL => return NULL */ - - /* Second unroll. */ - movq (%rax), %rcx /* get double word (= 8 bytes) in question */ - addq $8,%rax /* adjust pointer for next word */ - movq %r8, %rdx /* magic value */ - xorq %r9, %rcx /* XOR with qword c|...|c => bytes of str == c - are now 0 */ - addq %rcx, %rdx /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc 3f /* highest byte is NUL => return pointer */ - xorq %rcx, %rdx /* (word+magic)^word */ - orq %r8, %rdx /* set all non-carry bits */ - incq %rdx /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz 3f /* found c => return pointer */ - - /* The quadword we looked at does not contain the value we're looking - for. Let's search now whether we have reached the end of the - string. */ - xorq %r9, %rcx /* restore original dword without reload */ - movq %r8, %rdx /* magic value */ - addq %rcx, %rdx /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc 7f /* highest byte is NUL => return NULL */ - xorq %rcx, %rdx /* (word+magic)^word */ - orq %r8, %rdx /* set all non-carry bits */ - incq %rdx /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz 7f /* found NUL => return NULL */ - /* Third unroll. */ - movq (%rax), %rcx /* get double word (= 8 bytes) in question */ - addq $8,%rax /* adjust pointer for next word */ - movq %r8, %rdx /* magic value */ - xorq %r9, %rcx /* XOR with qword c|...|c => bytes of str == c - are now 0 */ - addq %rcx, %rdx /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc 3f /* highest byte is NUL => return pointer */ - xorq %rcx, %rdx /* (word+magic)^word */ - orq %r8, %rdx /* set all non-carry bits */ - incq %rdx /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz 3f /* found c => return pointer */ - - /* The quadword we looked at does not contain the value we're looking - for. Let's search now whether we have reached the end of the - string. */ - xorq %r9, %rcx /* restore original dword without reload */ - movq %r8, %rdx /* magic value */ - addq %rcx, %rdx /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc 7f /* highest byte is NUL => return NULL */ - xorq %rcx, %rdx /* (word+magic)^word */ - orq %r8, %rdx /* set all non-carry bits */ - incq %rdx /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz 7f /* found NUL => return NULL */ - /* Fourth unroll. */ - movq (%rax), %rcx /* get double word (= 8 bytes) in question */ - addq $8,%rax /* adjust pointer for next word */ - movq %r8, %rdx /* magic value */ - xorq %r9, %rcx /* XOR with qword c|...|c => bytes of str == c - are now 0 */ - addq %rcx, %rdx /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc 3f /* highest byte is NUL => return pointer */ - xorq %rcx, %rdx /* (word+magic)^word */ - orq %r8, %rdx /* set all non-carry bits */ - incq %rdx /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz 3f /* found c => return pointer */ - - /* The quadword we looked at does not contain the value we're looking - for. Let's search now whether we have reached the end of the - string. */ - xorq %r9, %rcx /* restore original dword without reload */ - movq %r8, %rdx /* magic value */ - addq %rcx, %rdx /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc 7f /* highest byte is NUL => return NULL */ - xorq %rcx, %rdx /* (word+magic)^word */ - orq %r8, %rdx /* set all non-carry bits */ - incq %rdx /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jz 4b /* no NUL found => restart loop */ - - -7: /* Return NULL. */ - xorl %eax, %eax - retq - - - /* We now scan for the byte in which the character was matched. - But we have to take care of the case that a NUL char is - found before this in the dword. Note that we XORed %rcx - with the byte we're looking for, therefore the tests below look - reversed. */ - - - .p2align 4 /* Align, it's a jump target. */ -3: movq %r9,%rdx /* move to %rdx so that we can access bytes */ - subq $8,%rax /* correct pointer increment. */ - testb %cl, %cl /* is first byte C? */ - jz 6f /* yes => return pointer */ - cmpb %dl, %cl /* is first byte NUL? */ - je 7b /* yes => return NULL */ - incq %rax /* increment pointer */ - - testb %ch, %ch /* is second byte C? */ - jz 6f /* yes => return pointer */ - cmpb %dl, %ch /* is second byte NUL? */ - je 7b /* yes => return NULL? */ - incq %rax /* increment pointer */ - - shrq $16, %rcx /* make upper bytes accessible */ - testb %cl, %cl /* is third byte C? */ - jz 6f /* yes => return pointer */ - cmpb %dl, %cl /* is third byte NUL? */ - je 7b /* yes => return NULL */ - incq %rax /* increment pointer */ - - testb %ch, %ch /* is fourth byte C? */ - jz 6f /* yes => return pointer */ - cmpb %dl, %ch /* is fourth byte NUL? */ - je 7b /* yes => return NULL? */ - incq %rax /* increment pointer */ - - shrq $16, %rcx /* make upper bytes accessible */ - testb %cl, %cl /* is fifth byte C? */ - jz 6f /* yes => return pointer */ - cmpb %dl, %cl /* is fifth byte NUL? */ - je 7b /* yes => return NULL */ - incq %rax /* increment pointer */ - - testb %ch, %ch /* is sixth byte C? */ - jz 6f /* yes => return pointer */ - cmpb %dl, %ch /* is sixth byte NUL? */ - je 7b /* yes => return NULL? */ - incq %rax /* increment pointer */ - - shrq $16, %rcx /* make upper bytes accessible */ - testb %cl, %cl /* is seventh byte C? */ - jz 6f /* yes => return pointer */ - cmpb %dl, %cl /* is seventh byte NUL? */ - je 7b /* yes => return NULL */ - - /* It must be in the eigth byte and it cannot be NUL. */ - incq %rax - -6: - nop - retq -END (strchr) - -weak_alias (strchr, index) -libc_hidden_builtin_def (strchr) diff --git a/sysdeps/x86_64/rtld-strlen.S b/sysdeps/x86_64/rtld-strlen.S deleted file mode 100644 index 1328652154..0000000000 --- a/sysdeps/x86_64/rtld-strlen.S +++ /dev/null @@ -1,136 +0,0 @@ -/* strlen(str) -- determine the length of the string STR. - Copyright (C) 2002-2015 Free Software Foundation, Inc. - Based on i486 version contributed by Ulrich Drepper <drepper@redhat.com>. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - - - .text -ENTRY (strlen) - movq %rdi, %rcx /* Duplicate source pointer. */ - andl $7, %ecx /* mask alignment bits */ - movq %rdi, %rax /* duplicate destination. */ - jz 1f /* aligned => start loop */ - - neg %ecx /* We need to align to 8 bytes. */ - addl $8,%ecx - /* Search the first bytes directly. */ -0: cmpb $0x0,(%rax) /* is byte NUL? */ - je 2f /* yes => return */ - incq %rax /* increment pointer */ - decl %ecx - jnz 0b - -1: movq $0xfefefefefefefeff,%r8 /* Save magic. */ - - .p2align 4 /* Align loop. */ -4: /* Main Loop is unrolled 4 times. */ - /* First unroll. */ - movq (%rax), %rcx /* get double word (= 8 bytes) in question */ - addq $8,%rax /* adjust pointer for next word */ - movq %r8, %rdx /* magic value */ - addq %rcx, %rdx /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc 3f /* highest byte is NUL => return pointer */ - xorq %rcx, %rdx /* (word+magic)^word */ - orq %r8, %rdx /* set all non-carry bits */ - incq %rdx /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz 3f /* found NUL => return pointer */ - - /* Second unroll. */ - movq (%rax), %rcx /* get double word (= 8 bytes) in question */ - addq $8,%rax /* adjust pointer for next word */ - movq %r8, %rdx /* magic value */ - addq %rcx, %rdx /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc 3f /* highest byte is NUL => return pointer */ - xorq %rcx, %rdx /* (word+magic)^word */ - orq %r8, %rdx /* set all non-carry bits */ - incq %rdx /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz 3f /* found NUL => return pointer */ - - /* Third unroll. */ - movq (%rax), %rcx /* get double word (= 8 bytes) in question */ - addq $8,%rax /* adjust pointer for next word */ - movq %r8, %rdx /* magic value */ - addq %rcx, %rdx /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc 3f /* highest byte is NUL => return pointer */ - xorq %rcx, %rdx /* (word+magic)^word */ - orq %r8, %rdx /* set all non-carry bits */ - incq %rdx /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz 3f /* found NUL => return pointer */ - - /* Fourth unroll. */ - movq (%rax), %rcx /* get double word (= 8 bytes) in question */ - addq $8,%rax /* adjust pointer for next word */ - movq %r8, %rdx /* magic value */ - addq %rcx, %rdx /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc 3f /* highest byte is NUL => return pointer */ - xorq %rcx, %rdx /* (word+magic)^word */ - orq %r8, %rdx /* set all non-carry bits */ - incq %rdx /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jz 4b /* no NUL found => continue loop */ - - .p2align 4 /* Align, it's a jump target. */ -3: subq $8,%rax /* correct pointer increment. */ - - testb %cl, %cl /* is first byte NUL? */ - jz 2f /* yes => return */ - incq %rax /* increment pointer */ - - testb %ch, %ch /* is second byte NUL? */ - jz 2f /* yes => return */ - incq %rax /* increment pointer */ - - testl $0x00ff0000, %ecx /* is third byte NUL? */ - jz 2f /* yes => return pointer */ - incq %rax /* increment pointer */ - - testl $0xff000000, %ecx /* is fourth byte NUL? */ - jz 2f /* yes => return pointer */ - incq %rax /* increment pointer */ - - shrq $32, %rcx /* look at other half. */ - - testb %cl, %cl /* is first byte NUL? */ - jz 2f /* yes => return */ - incq %rax /* increment pointer */ - - testb %ch, %ch /* is second byte NUL? */ - jz 2f /* yes => return */ - incq %rax /* increment pointer */ - - testl $0xff0000, %ecx /* is third byte NUL? */ - jz 2f /* yes => return pointer */ - incq %rax /* increment pointer */ -2: - subq %rdi, %rax /* compute difference to string start */ - ret -END (strlen) -libc_hidden_builtin_def (strlen) diff --git a/sysdeps/x86_64/sched_cpucount.c b/sysdeps/x86_64/sched_cpucount.c index 72e67aa999..0834e711b3 100644 --- a/sysdeps/x86_64/sched_cpucount.c +++ b/sysdeps/x86_64/sched_cpucount.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2015 Free Software Foundation, Inc. +/* Copyright (C) 2007-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/setjmp.S b/sysdeps/x86_64/setjmp.S index 774aaf1e8d..3e93967c2f 100644 --- a/sysdeps/x86_64/setjmp.S +++ b/sysdeps/x86_64/setjmp.S @@ -1,5 +1,5 @@ /* setjmp for x86-64. - Copyright (C) 2001-2015 Free Software Foundation, Inc. + Copyright (C) 2001-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/stackinfo.h b/sysdeps/x86_64/stackinfo.h index 5b81d808d0..848aa7754c 100644 --- a/sysdeps/x86_64/stackinfo.h +++ b/sysdeps/x86_64/stackinfo.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2001-2015 Free Software Foundation, Inc. +/* Copyright (C) 2001-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/start.S b/sysdeps/x86_64/start.S index 0d27a38e9c..1374974307 100644 --- a/sysdeps/x86_64/start.S +++ b/sysdeps/x86_64/start.S @@ -1,5 +1,5 @@ /* Startup code compliant to the ELF x86-64 ABI. - Copyright (C) 2001-2015 Free Software Foundation, Inc. + Copyright (C) 2001-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Andreas Jaeger <aj@suse.de>, 2001. diff --git a/sysdeps/x86_64/stpcpy_chk.S b/sysdeps/x86_64/stpcpy_chk.S deleted file mode 100644 index 905e8d7ee3..0000000000 --- a/sysdeps/x86_64/stpcpy_chk.S +++ /dev/null @@ -1,3 +0,0 @@ -#define USE_AS_STPCPY_CHK -#define STRCPY_CHK __stpcpy_chk -#include <sysdeps/x86_64/strcpy_chk.S> diff --git a/sysdeps/x86_64/strcat.S b/sysdeps/x86_64/strcat.S index affb15a32e..dadf4c76b2 100644 --- a/sysdeps/x86_64/strcat.S +++ b/sysdeps/x86_64/strcat.S @@ -1,6 +1,6 @@ /* strcat(dest, src) -- Append SRC on the end of DEST. Optimized for x86-64. - Copyright (C) 2002-2015 Free Software Foundation, Inc. + Copyright (C) 2002-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Andreas Jaeger <aj@suse.de>, 2002. diff --git a/sysdeps/x86_64/strchr.S b/sysdeps/x86_64/strchr.S index d7955af8d2..4431fee648 100644 --- a/sysdeps/x86_64/strchr.S +++ b/sysdeps/x86_64/strchr.S @@ -1,6 +1,6 @@ /* strchr (str, ch) -- Return pointer to first occurrence of CH in STR. For AMD x86-64. - Copyright (C) 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2009-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/strchrnul.S b/sysdeps/x86_64/strchrnul.S index e8cab0dd16..7b52d699ee 100644 --- a/sysdeps/x86_64/strchrnul.S +++ b/sysdeps/x86_64/strchrnul.S @@ -1,7 +1,7 @@ /* strchrnul (str, ch) -- Return pointer to first occurrence of CH in STR or terminating NUL byte. For AMD x86-64. - Copyright (C) 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2009-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/strcmp.S b/sysdeps/x86_64/strcmp.S index 1329649d3a..c5c44d4e27 100644 --- a/sysdeps/x86_64/strcmp.S +++ b/sysdeps/x86_64/strcmp.S @@ -1,5 +1,5 @@ /* Highly optimized version for x86-64. - Copyright (C) 1999-2015 Free Software Foundation, Inc. + Copyright (C) 1999-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Based on i686 version contributed by Ulrich Drepper <drepper@cygnus.com>, 1999. @@ -29,13 +29,6 @@ #endif #ifdef USE_AS_STRNCMP -/* The simplified code below is not set up to handle strncmp() so far. - Should this become necessary it has to be implemented. For now - just report the problem. */ -# if !IS_IN (libc) -# error "strncmp not implemented so far" -# endif - /* Since the counter, %r11, is unsigned, we branch to strcmp_exitz if the new counter > the old one or is 0. */ # define UPDATE_STRNCMP_COUNTER \ @@ -50,20 +43,10 @@ #elif defined USE_AS_STRCASECMP_L # include "locale-defines.h" -/* No support for strcasecmp outside libc so far since it is not needed. */ -# if !IS_IN (libc) -# error "strcasecmp_l not implemented so far" -# endif - # define UPDATE_STRNCMP_COUNTER #elif defined USE_AS_STRNCASECMP_L # include "locale-defines.h" -/* No support for strncasecmp outside libc so far since it is not needed. */ -# if !IS_IN (libc) -# error "strncasecmp_l not implemented so far" -# endif - # define UPDATE_STRNCMP_COUNTER \ /* calculate left number to compare */ \ lea -16(%rcx, %r11), %r9; \ @@ -126,63 +109,44 @@ libc_hidden_def (__strncasecmp) #endif ENTRY (STRCMP) -#if !IS_IN (libc) -/* Simple version since we can't use SSE registers in ld.so. */ -L(oop): movb (%rdi), %al - cmpb (%rsi), %al - jne L(neq) - incq %rdi - incq %rsi - testb %al, %al - jnz L(oop) - - xorl %eax, %eax - ret - -L(neq): movl $1, %eax - movl $-1, %ecx - cmovbl %ecx, %eax - ret -END (STRCMP) -#else /* !IS_IN (libc) */ -# ifdef USE_AS_STRCASECMP_L +#ifdef USE_AS_STRCASECMP_L /* We have to fall back on the C implementation for locales with encodings not matching ASCII for single bytes. */ -# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 +# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 mov LOCALE_T___LOCALES+LC_CTYPE*LP_SIZE(%rdx), %RAX_LP -# else +# else mov (%rdx), %RAX_LP -# endif +# endif testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax) jne __strcasecmp_l_nonascii -# elif defined USE_AS_STRNCASECMP_L +#elif defined USE_AS_STRNCASECMP_L /* We have to fall back on the C implementation for locales with encodings not matching ASCII for single bytes. */ -# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 +# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 mov LOCALE_T___LOCALES+LC_CTYPE*LP_SIZE(%rcx), %RAX_LP -# else +# else mov (%rcx), %RAX_LP -# endif +# endif testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax) jne __strncasecmp_l_nonascii -# endif +#endif /* * This implementation uses SSE to compare up to 16 bytes at a time. */ -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L test %rdx, %rdx je LABEL(strcmp_exitz) cmp $1, %rdx je LABEL(Byte0) mov %rdx, %r11 -# endif +#endif mov %esi, %ecx mov %edi, %eax /* Use 64bit AND here to avoid long NOP padding. */ and $0x3f, %rcx /* rsi alignment in cache line */ and $0x3f, %rax /* rdi alignment in cache line */ -# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L .section .rodata.cst16,"aM",@progbits,16 .align 16 .Lbelowupper: @@ -196,12 +160,12 @@ END (STRCMP) .quad 0x2020202020202020 .previous movdqa .Lbelowupper(%rip), %xmm5 -# define UCLOW_reg %xmm5 +# define UCLOW_reg %xmm5 movdqa .Ltopupper(%rip), %xmm6 -# define UCHIGH_reg %xmm6 +# define UCHIGH_reg %xmm6 movdqa .Ltouppermask(%rip), %xmm7 -# define LCQWORD_reg %xmm7 -# endif +# define LCQWORD_reg %xmm7 +#endif cmp $0x30, %ecx ja LABEL(crosscache) /* rsi: 16-byte load will cross cache line */ cmp $0x30, %eax @@ -210,8 +174,8 @@ END (STRCMP) movlpd (%rsi), %xmm2 movhpd 8(%rdi), %xmm1 movhpd 8(%rsi), %xmm2 -# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# define TOLOWER(reg1, reg2) \ +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# define TOLOWER(reg1, reg2) \ movdqa reg1, %xmm8; \ movdqa UCHIGH_reg, %xmm9; \ movdqa reg2, %xmm10; \ @@ -227,9 +191,9 @@ END (STRCMP) por %xmm8, reg1; \ por %xmm10, reg2 TOLOWER (%xmm1, %xmm2) -# else -# define TOLOWER(reg1, reg2) -# endif +#else +# define TOLOWER(reg1, reg2) +#endif pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */ pcmpeqb %xmm1, %xmm0 /* Any null chars? */ pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */ @@ -237,10 +201,10 @@ END (STRCMP) pmovmskb %xmm1, %edx sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */ jnz LABEL(less16bytes) /* If not, find different value or null char */ -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) /* finish comparision */ -# endif +#endif add $16, %rsi /* prepare to search next 16 bytes */ add $16, %rdi /* prepare to search next 16 bytes */ @@ -282,13 +246,13 @@ LABEL(ashr_0): movdqa (%rsi), %xmm1 pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */ pcmpeqb %xmm1, %xmm0 /* Any null chars? */ -# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L +#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */ -# else +#else movdqa (%rdi), %xmm2 TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm2, %xmm1 /* compare 16 bytes for equality */ -# endif +#endif psubb %xmm0, %xmm1 /* packed sub of comparison results*/ pmovmskb %xmm1, %r9d shr %cl, %edx /* adjust 0xffff for offset */ @@ -321,10 +285,10 @@ LABEL(loop_ashr_0): sub $0xffff, %edx jnz LABEL(exit) /* mismatch or null char seen */ -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa (%rsi, %rcx), %xmm1 movdqa (%rdi, %rcx), %xmm2 @@ -336,10 +300,10 @@ LABEL(loop_ashr_0): pmovmskb %xmm1, %edx sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx jmp LABEL(loop_ashr_0) @@ -388,13 +352,13 @@ LABEL(gobble_ashr_1): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 /* store for next cycle */ -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $1, %xmm3 pslldq $15, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -404,10 +368,10 @@ LABEL(gobble_ashr_1): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -418,13 +382,13 @@ LABEL(gobble_ashr_1): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 /* store for next cycle */ -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $1, %xmm3 pslldq $15, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -434,10 +398,10 @@ LABEL(gobble_ashr_1): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 jmp LABEL(loop_ashr_1) @@ -453,10 +417,10 @@ LABEL(nibble_ashr_1): test $0xfffe, %edx jnz LABEL(ashr_1_exittail) /* find null char*/ -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $15, %r11 jbe LABEL(ashr_1_exittail) -# endif +#endif pxor %xmm0, %xmm0 sub $0x1000, %r10 /* substract 4K from %r10 */ @@ -518,13 +482,13 @@ LABEL(gobble_ashr_2): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $2, %xmm3 pslldq $14, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -534,10 +498,10 @@ LABEL(gobble_ashr_2): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -549,13 +513,13 @@ LABEL(gobble_ashr_2): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $2, %xmm3 pslldq $14, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -565,10 +529,10 @@ LABEL(gobble_ashr_2): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -581,10 +545,10 @@ LABEL(nibble_ashr_2): test $0xfffc, %edx jnz LABEL(ashr_2_exittail) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $14, %r11 jbe LABEL(ashr_2_exittail) -# endif +#endif pxor %xmm0, %xmm0 sub $0x1000, %r10 @@ -643,13 +607,13 @@ LABEL(gobble_ashr_3): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $3, %xmm3 pslldq $13, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -659,10 +623,10 @@ LABEL(gobble_ashr_3): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -674,13 +638,13 @@ LABEL(gobble_ashr_3): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $3, %xmm3 pslldq $13, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -690,10 +654,10 @@ LABEL(gobble_ashr_3): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -706,10 +670,10 @@ LABEL(nibble_ashr_3): test $0xfff8, %edx jnz LABEL(ashr_3_exittail) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $13, %r11 jbe LABEL(ashr_3_exittail) -# endif +#endif pxor %xmm0, %xmm0 sub $0x1000, %r10 @@ -768,13 +732,13 @@ LABEL(gobble_ashr_4): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $4, %xmm3 pslldq $12, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -784,10 +748,10 @@ LABEL(gobble_ashr_4): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -799,13 +763,13 @@ LABEL(gobble_ashr_4): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $4, %xmm3 pslldq $12, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -815,10 +779,10 @@ LABEL(gobble_ashr_4): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -831,10 +795,10 @@ LABEL(nibble_ashr_4): test $0xfff0, %edx jnz LABEL(ashr_4_exittail) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $12, %r11 jbe LABEL(ashr_4_exittail) -# endif +#endif pxor %xmm0, %xmm0 sub $0x1000, %r10 @@ -893,13 +857,13 @@ LABEL(gobble_ashr_5): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $5, %xmm3 pslldq $11, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -909,10 +873,10 @@ LABEL(gobble_ashr_5): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -924,13 +888,13 @@ LABEL(gobble_ashr_5): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $5, %xmm3 pslldq $11, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -940,10 +904,10 @@ LABEL(gobble_ashr_5): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -956,10 +920,10 @@ LABEL(nibble_ashr_5): test $0xffe0, %edx jnz LABEL(ashr_5_exittail) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $11, %r11 jbe LABEL(ashr_5_exittail) -# endif +#endif pxor %xmm0, %xmm0 sub $0x1000, %r10 @@ -1018,13 +982,13 @@ LABEL(gobble_ashr_6): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $6, %xmm3 pslldq $10, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1034,10 +998,10 @@ LABEL(gobble_ashr_6): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -1049,13 +1013,13 @@ LABEL(gobble_ashr_6): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $6, %xmm3 pslldq $10, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1065,10 +1029,10 @@ LABEL(gobble_ashr_6): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -1081,10 +1045,10 @@ LABEL(nibble_ashr_6): test $0xffc0, %edx jnz LABEL(ashr_6_exittail) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $10, %r11 jbe LABEL(ashr_6_exittail) -# endif +#endif pxor %xmm0, %xmm0 sub $0x1000, %r10 @@ -1143,13 +1107,13 @@ LABEL(gobble_ashr_7): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $7, %xmm3 pslldq $9, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1159,10 +1123,10 @@ LABEL(gobble_ashr_7): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -1174,13 +1138,13 @@ LABEL(gobble_ashr_7): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $7, %xmm3 pslldq $9, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1190,10 +1154,10 @@ LABEL(gobble_ashr_7): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -1206,10 +1170,10 @@ LABEL(nibble_ashr_7): test $0xff80, %edx jnz LABEL(ashr_7_exittail) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $9, %r11 jbe LABEL(ashr_7_exittail) -# endif +#endif pxor %xmm0, %xmm0 sub $0x1000, %r10 @@ -1268,13 +1232,13 @@ LABEL(gobble_ashr_8): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $8, %xmm3 pslldq $8, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1284,10 +1248,10 @@ LABEL(gobble_ashr_8): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -1299,13 +1263,13 @@ LABEL(gobble_ashr_8): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $8, %xmm3 pslldq $8, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1315,10 +1279,10 @@ LABEL(gobble_ashr_8): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -1331,10 +1295,10 @@ LABEL(nibble_ashr_8): test $0xff00, %edx jnz LABEL(ashr_8_exittail) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $8, %r11 jbe LABEL(ashr_8_exittail) -# endif +#endif pxor %xmm0, %xmm0 sub $0x1000, %r10 @@ -1393,13 +1357,13 @@ LABEL(gobble_ashr_9): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $9, %xmm3 pslldq $7, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1409,10 +1373,10 @@ LABEL(gobble_ashr_9): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -1424,13 +1388,13 @@ LABEL(gobble_ashr_9): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $9, %xmm3 pslldq $7, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1440,10 +1404,10 @@ LABEL(gobble_ashr_9): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 /* store for next cycle */ @@ -1456,10 +1420,10 @@ LABEL(nibble_ashr_9): test $0xfe00, %edx jnz LABEL(ashr_9_exittail) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $7, %r11 jbe LABEL(ashr_9_exittail) -# endif +#endif pxor %xmm0, %xmm0 sub $0x1000, %r10 @@ -1518,13 +1482,13 @@ LABEL(gobble_ashr_10): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $10, %xmm3 pslldq $6, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1534,10 +1498,10 @@ LABEL(gobble_ashr_10): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -1549,13 +1513,13 @@ LABEL(gobble_ashr_10): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $10, %xmm3 pslldq $6, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1565,10 +1529,10 @@ LABEL(gobble_ashr_10): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -1581,10 +1545,10 @@ LABEL(nibble_ashr_10): test $0xfc00, %edx jnz LABEL(ashr_10_exittail) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $6, %r11 jbe LABEL(ashr_10_exittail) -# endif +#endif pxor %xmm0, %xmm0 sub $0x1000, %r10 @@ -1643,13 +1607,13 @@ LABEL(gobble_ashr_11): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $11, %xmm3 pslldq $5, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1659,10 +1623,10 @@ LABEL(gobble_ashr_11): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -1674,13 +1638,13 @@ LABEL(gobble_ashr_11): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $11, %xmm3 pslldq $5, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1690,10 +1654,10 @@ LABEL(gobble_ashr_11): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -1706,10 +1670,10 @@ LABEL(nibble_ashr_11): test $0xf800, %edx jnz LABEL(ashr_11_exittail) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $5, %r11 jbe LABEL(ashr_11_exittail) -# endif +#endif pxor %xmm0, %xmm0 sub $0x1000, %r10 @@ -1768,13 +1732,13 @@ LABEL(gobble_ashr_12): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $12, %xmm3 pslldq $4, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1784,10 +1748,10 @@ LABEL(gobble_ashr_12): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -1799,13 +1763,13 @@ LABEL(gobble_ashr_12): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $12, %xmm3 pslldq $4, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1815,10 +1779,10 @@ LABEL(gobble_ashr_12): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -1831,10 +1795,10 @@ LABEL(nibble_ashr_12): test $0xf000, %edx jnz LABEL(ashr_12_exittail) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $4, %r11 jbe LABEL(ashr_12_exittail) -# endif +#endif pxor %xmm0, %xmm0 sub $0x1000, %r10 @@ -1893,13 +1857,13 @@ LABEL(gobble_ashr_13): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $13, %xmm3 pslldq $3, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1909,10 +1873,10 @@ LABEL(gobble_ashr_13): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -1924,13 +1888,13 @@ LABEL(gobble_ashr_13): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $13, %xmm3 pslldq $3, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1940,10 +1904,10 @@ LABEL(gobble_ashr_13): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -1956,10 +1920,10 @@ LABEL(nibble_ashr_13): test $0xe000, %edx jnz LABEL(ashr_13_exittail) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $3, %r11 jbe LABEL(ashr_13_exittail) -# endif +#endif pxor %xmm0, %xmm0 sub $0x1000, %r10 @@ -2018,13 +1982,13 @@ LABEL(gobble_ashr_14): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $14, %xmm3 pslldq $2, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -2034,10 +1998,10 @@ LABEL(gobble_ashr_14): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -2049,13 +2013,13 @@ LABEL(gobble_ashr_14): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $14, %xmm3 pslldq $2, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -2065,10 +2029,10 @@ LABEL(gobble_ashr_14): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP | defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP | defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -2081,10 +2045,10 @@ LABEL(nibble_ashr_14): test $0xc000, %edx jnz LABEL(ashr_14_exittail) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $2, %r11 jbe LABEL(ashr_14_exittail) -# endif +#endif pxor %xmm0, %xmm0 sub $0x1000, %r10 @@ -2145,13 +2109,13 @@ LABEL(gobble_ashr_15): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $15, %xmm3 pslldq $1, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -2161,10 +2125,10 @@ LABEL(gobble_ashr_15): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -2176,13 +2140,13 @@ LABEL(gobble_ashr_15): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $15, %xmm3 pslldq $1, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -2192,10 +2156,10 @@ LABEL(gobble_ashr_15): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -2208,10 +2172,10 @@ LABEL(nibble_ashr_15): test $0x8000, %edx jnz LABEL(ashr_15_exittail) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmpq $1, %r11 jbe LABEL(ashr_15_exittail) -# endif +#endif pxor %xmm0, %xmm0 sub $0x1000, %r10 @@ -2246,18 +2210,18 @@ LABEL(ret): LABEL(less16bytes): bsf %rdx, %rdx /* find and store bit index in %rdx */ -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub %rdx, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif movzbl (%rsi, %rdx), %ecx movzbl (%rdi, %rdx), %eax -# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx movl (%rdx,%rcx,4), %ecx movl (%rdx,%rax,4), %eax -# endif +#endif sub %ecx, %eax ret @@ -2271,11 +2235,11 @@ LABEL(Byte0): movzx (%rsi), %ecx movzx (%rdi), %eax -# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx movl (%rdx,%rcx,4), %ecx movl (%rdx,%rax,4), %eax -# endif +#endif sub %ecx, %eax ret @@ -2300,5 +2264,4 @@ LABEL(unaligned_table): .int LABEL(ashr_14) - LABEL(unaligned_table) .int LABEL(ashr_15) - LABEL(unaligned_table) .int LABEL(ashr_0) - LABEL(unaligned_table) -#endif /* !IS_IN (libc) */ libc_hidden_builtin_def (STRCMP) diff --git a/sysdeps/x86_64/strcpy.S b/sysdeps/x86_64/strcpy.S index 23231088fd..3f90c0020a 100644 --- a/sysdeps/x86_64/strcpy.S +++ b/sysdeps/x86_64/strcpy.S @@ -1,5 +1,5 @@ /* strcpy/stpcpy implementation for x86-64. - Copyright (C) 2002-2015 Free Software Foundation, Inc. + Copyright (C) 2002-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Andreas Jaeger <aj@suse.de>, 2002. diff --git a/sysdeps/x86_64/strcpy_chk.S b/sysdeps/x86_64/strcpy_chk.S deleted file mode 100644 index 24e51c66f1..0000000000 --- a/sysdeps/x86_64/strcpy_chk.S +++ /dev/null @@ -1,208 +0,0 @@ -/* strcpy/stpcpy checking implementation for x86-64. - Copyright (C) 2002-2015 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Andreas Jaeger <aj@suse.de>, 2002. - Adopted into checking version by Jakub Jelinek <jakub@redhat.com>. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#ifndef USE_AS_STPCPY_CHK -# define STRCPY_CHK __strcpy_chk -#endif - - .text -ENTRY (STRCPY_CHK) - movq %rsi, %rcx /* Source register. */ - andl $7, %ecx /* mask alignment bits */ -#ifndef USE_AS_STPCPY_CHK - movq %rdi, %r10 /* Duplicate destination pointer. */ -#endif - jz 5f /* aligned => start loop */ - - cmpq $8, %rdx /* Check if only few bytes left in - destination. */ - jb 50f - - subq $8, %rcx /* We need to align to 8 bytes. */ - addq %rcx, %rdx /* Subtract count of stored bytes - in the cycle below from destlen. */ - - /* Search the first bytes directly. */ -0: - movb (%rsi), %al /* Fetch a byte */ - testb %al, %al /* Is it NUL? */ - movb %al, (%rdi) /* Store it */ - jz 4f /* If it was NUL, done! */ - incq %rsi - incq %rdi - incl %ecx - jnz 0b - -5: - movq $0xfefefefefefefeff,%r8 - cmpq $32, %rdx /* Are there enough bytes in destination - for the next unrolled round? */ - jb 60f /* If not, avoid the unrolled loop. */ - - /* Now the sources is aligned. Unfortunatly we cannot force - to have both source and destination aligned, so ignore the - alignment of the destination. */ - .p2align 4 -1: - /* 1st unroll. */ - movq (%rsi), %rax /* Read double word (8 bytes). */ - addq $8, %rsi /* Adjust pointer for next word. */ - movq %rax, %r9 /* Save a copy for NUL finding. */ - addq %r8, %r9 /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc 3f /* highest byte is NUL => return pointer */ - xorq %rax, %r9 /* (word+magic)^word */ - orq %r8, %r9 /* set all non-carry bits */ - incq %r9 /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - - jnz 3f /* found NUL => return pointer */ - - movq %rax, (%rdi) /* Write value to destination. */ - addq $8, %rdi /* Adjust pointer. */ - - /* 2nd unroll. */ - movq (%rsi), %rax /* Read double word (8 bytes). */ - addq $8, %rsi /* Adjust pointer for next word. */ - movq %rax, %r9 /* Save a copy for NUL finding. */ - addq %r8, %r9 /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc 3f /* highest byte is NUL => return pointer */ - xorq %rax, %r9 /* (word+magic)^word */ - orq %r8, %r9 /* set all non-carry bits */ - incq %r9 /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - - jnz 3f /* found NUL => return pointer */ - - movq %rax, (%rdi) /* Write value to destination. */ - addq $8, %rdi /* Adjust pointer. */ - - /* 3rd unroll. */ - movq (%rsi), %rax /* Read double word (8 bytes). */ - addq $8, %rsi /* Adjust pointer for next word. */ - movq %rax, %r9 /* Save a copy for NUL finding. */ - addq %r8, %r9 /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc 3f /* highest byte is NUL => return pointer */ - xorq %rax, %r9 /* (word+magic)^word */ - orq %r8, %r9 /* set all non-carry bits */ - incq %r9 /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - - jnz 3f /* found NUL => return pointer */ - - movq %rax, (%rdi) /* Write value to destination. */ - addq $8, %rdi /* Adjust pointer. */ - - /* 4th unroll. */ - movq (%rsi), %rax /* Read double word (8 bytes). */ - addq $8, %rsi /* Adjust pointer for next word. */ - movq %rax, %r9 /* Save a copy for NUL finding. */ - addq %r8, %r9 /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc 3f /* highest byte is NUL => return pointer */ - xorq %rax, %r9 /* (word+magic)^word */ - orq %r8, %r9 /* set all non-carry bits */ - incq %r9 /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - - jnz 3f /* found NUL => return pointer */ - - subq $32, %rdx /* Adjust destlen. */ - movq %rax, (%rdi) /* Write value to destination. */ - addq $8, %rdi /* Adjust pointer. */ - cmpq $32, %rdx /* Are there enough bytes in destination - for the next unrolled round? */ - jae 1b /* Next iteration. */ - -60: - cmpq $8, %rdx /* Are there enough bytes in destination - for the next unrolled round? */ - jb 50f /* Now, copy and check byte by byte. */ - - movq (%rsi), %rax /* Read double word (8 bytes). */ - addq $8, %rsi /* Adjust pointer for next word. */ - movq %rax, %r9 /* Save a copy for NUL finding. */ - addq %r8, %r9 /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc 3f /* highest byte is NUL => return pointer */ - xorq %rax, %r9 /* (word+magic)^word */ - orq %r8, %r9 /* set all non-carry bits */ - incq %r9 /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - - jnz 3f /* found NUL => return pointer */ - - subq $8, %rdx /* Adjust destlen. */ - movq %rax, (%rdi) /* Write value to destination. */ - addq $8, %rdi /* Adjust pointer. */ - jmp 60b /* Next iteration. */ - - /* Do the last few bytes. %rax contains the value to write. - The loop is unrolled twice. */ - .p2align 4 -3: - /* Note that stpcpy needs to return with the value of the NUL - byte. */ - movb %al, (%rdi) /* 1st byte. */ - testb %al, %al /* Is it NUL. */ - jz 4f /* yes, finish. */ - incq %rdi /* Increment destination. */ - movb %ah, (%rdi) /* 2nd byte. */ - testb %ah, %ah /* Is it NUL?. */ - jz 4f /* yes, finish. */ - incq %rdi /* Increment destination. */ - shrq $16, %rax /* Shift... */ - jmp 3b /* and look at next two bytes in %rax. */ - -51: - /* Search the bytes directly, checking for overflows. */ - incq %rsi - incq %rdi - decq %rdx - jz HIDDEN_JUMPTARGET (__chk_fail) -52: - movb (%rsi), %al /* Fetch a byte */ - testb %al, %al /* Is it NUL? */ - movb %al, (%rdi) /* Store it */ - jnz 51b /* If it was NUL, done! */ -4: -#ifdef USE_AS_STPCPY_CHK - movq %rdi, %rax /* Destination is return value. */ -#else - movq %r10, %rax /* Source is return value. */ -#endif - retq - -50: - testq %rdx, %rdx - jnz 52b - jmp HIDDEN_JUMPTARGET (__chk_fail) - -END (STRCPY_CHK) diff --git a/sysdeps/x86_64/strcspn.S b/sysdeps/x86_64/strcspn.S index c6c20f001c..de526c8fdd 100644 --- a/sysdeps/x86_64/strcspn.S +++ b/sysdeps/x86_64/strcspn.S @@ -1,7 +1,7 @@ /* strcspn (str, ss) -- Return the length of the initial segment of STR which contains no characters from SS. For AMD x86-64. - Copyright (C) 1994-2015 Free Software Foundation, Inc. + Copyright (C) 1994-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>. Bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>. diff --git a/sysdeps/x86_64/strlen.S b/sysdeps/x86_64/strlen.S index c382c8d23e..12f63ad1bb 100644 --- a/sysdeps/x86_64/strlen.S +++ b/sysdeps/x86_64/strlen.S @@ -1,5 +1,5 @@ /* SSE2 version of strlen. - Copyright (C) 2012-2015 Free Software Foundation, Inc. + Copyright (C) 2012-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -20,7 +20,7 @@ /* Long lived register in strlen(s), strnlen(s, n) are: - %xmm11 - zero + %xmm3 - zero %rdi - s %r10 (s+n) & (~(64-1)) %r11 s+n @@ -32,14 +32,14 @@ ENTRY(strlen) /* Test 64 bytes from %rax for zero. Save result as bitmask in %rdx. */ #define FIND_ZERO \ - pcmpeqb (%rax), %xmm8; \ - pcmpeqb 16(%rax), %xmm9; \ - pcmpeqb 32(%rax), %xmm10; \ - pcmpeqb 48(%rax), %xmm11; \ - pmovmskb %xmm8, %esi; \ - pmovmskb %xmm9, %edx; \ - pmovmskb %xmm10, %r8d; \ - pmovmskb %xmm11, %ecx; \ + pcmpeqb (%rax), %xmm0; \ + pcmpeqb 16(%rax), %xmm1; \ + pcmpeqb 32(%rax), %xmm2; \ + pcmpeqb 48(%rax), %xmm3; \ + pmovmskb %xmm0, %esi; \ + pmovmskb %xmm1, %edx; \ + pmovmskb %xmm2, %r8d; \ + pmovmskb %xmm3, %ecx; \ salq $16, %rdx; \ salq $16, %rcx; \ orq %rsi, %rdx; \ @@ -63,10 +63,10 @@ L(n_nonzero): mov %rsi, %r11 #endif - pxor %xmm8, %xmm8 - pxor %xmm9, %xmm9 - pxor %xmm10, %xmm10 - pxor %xmm11, %xmm11 + pxor %xmm0, %xmm0 + pxor %xmm1, %xmm1 + pxor %xmm2, %xmm2 + pxor %xmm3, %xmm3 movq %rdi, %rax movq %rdi, %rcx andq $4095, %rcx @@ -103,9 +103,9 @@ L(n_nonzero): FIND_ZERO #else /* Test first 16 bytes unaligned. */ - movdqu (%rax), %xmm12 - pcmpeqb %xmm8, %xmm12 - pmovmskb %xmm12, %edx + movdqu (%rax), %xmm4 + pcmpeqb %xmm0, %xmm4 + pmovmskb %xmm4, %edx test %edx, %edx je L(next48_bytes) bsf %edx, %eax /* If eax is zeroed 16bit bsf can be used. */ @@ -114,12 +114,12 @@ L(n_nonzero): L(next48_bytes): /* Same as FIND_ZERO except we do not check first 16 bytes. */ andq $-16, %rax - pcmpeqb 16(%rax), %xmm9 - pcmpeqb 32(%rax), %xmm10 - pcmpeqb 48(%rax), %xmm11 - pmovmskb %xmm9, %edx - pmovmskb %xmm10, %r8d - pmovmskb %xmm11, %ecx + pcmpeqb 16(%rax), %xmm1 + pcmpeqb 32(%rax), %xmm2 + pcmpeqb 48(%rax), %xmm3 + pmovmskb %xmm1, %edx + pmovmskb %xmm2, %r8d + pmovmskb %xmm3, %ecx salq $16, %rdx salq $16, %rcx orq %r8, %rcx @@ -127,7 +127,7 @@ L(next48_bytes): orq %rcx, %rdx #endif - /* When no zero byte is found xmm9-11 are zero so we do not have to + /* When no zero byte is found xmm1-3 are zero so we do not have to zero them. */ PROLOG(loop) @@ -149,9 +149,9 @@ L(strnlen_ret): #endif .p2align 4 L(loop_init): - pxor %xmm9, %xmm9 - pxor %xmm10, %xmm10 - pxor %xmm11, %xmm11 + pxor %xmm1, %xmm1 + pxor %xmm2, %xmm2 + pxor %xmm3, %xmm3 #ifdef AS_STRNLEN .p2align 4 L(loop): @@ -160,12 +160,12 @@ L(loop): cmpq %rax, %r10 je L(exit_end) - movdqa (%rax), %xmm8 - pminub 16(%rax), %xmm8 - pminub 32(%rax), %xmm8 - pminub 48(%rax), %xmm8 - pcmpeqb %xmm11, %xmm8 - pmovmskb %xmm8, %edx + movdqa (%rax), %xmm0 + pminub 16(%rax), %xmm0 + pminub 32(%rax), %xmm0 + pminub 48(%rax), %xmm0 + pcmpeqb %xmm3, %xmm0 + pmovmskb %xmm0, %edx testl %edx, %edx jne L(exit) jmp L(loop) @@ -174,7 +174,7 @@ L(loop): L(exit_end): cmp %rax, %r11 je L(first) /* Do not read when end is at page boundary. */ - pxor %xmm8, %xmm8 + pxor %xmm0, %xmm0 FIND_ZERO L(first): @@ -186,7 +186,7 @@ L(first): .p2align 4 L(exit): - pxor %xmm8, %xmm8 + pxor %xmm0, %xmm0 FIND_ZERO bsfq %rdx, %rdx @@ -200,23 +200,23 @@ L(exit): .p2align 4 L(loop): - movdqa 64(%rax), %xmm8 - pminub 80(%rax), %xmm8 - pminub 96(%rax), %xmm8 - pminub 112(%rax), %xmm8 - pcmpeqb %xmm11, %xmm8 - pmovmskb %xmm8, %edx + movdqa 64(%rax), %xmm0 + pminub 80(%rax), %xmm0 + pminub 96(%rax), %xmm0 + pminub 112(%rax), %xmm0 + pcmpeqb %xmm3, %xmm0 + pmovmskb %xmm0, %edx testl %edx, %edx jne L(exit64) subq $-128, %rax - movdqa (%rax), %xmm8 - pminub 16(%rax), %xmm8 - pminub 32(%rax), %xmm8 - pminub 48(%rax), %xmm8 - pcmpeqb %xmm11, %xmm8 - pmovmskb %xmm8, %edx + movdqa (%rax), %xmm0 + pminub 16(%rax), %xmm0 + pminub 32(%rax), %xmm0 + pminub 48(%rax), %xmm0 + pcmpeqb %xmm3, %xmm0 + pmovmskb %xmm0, %edx testl %edx, %edx jne L(exit0) jmp L(loop) @@ -225,7 +225,7 @@ L(loop): L(exit64): addq $64, %rax L(exit0): - pxor %xmm8, %xmm8 + pxor %xmm0, %xmm0 FIND_ZERO bsfq %rdx, %rdx diff --git a/sysdeps/x86_64/strrchr.S b/sysdeps/x86_64/strrchr.S index 14a3abafb3..de0be762ed 100644 --- a/sysdeps/x86_64/strrchr.S +++ b/sysdeps/x86_64/strrchr.S @@ -1,5 +1,5 @@ /* strrchr (str, ch) -- Return pointer to last occurrence of CH in STR. - Copyright (C) 2013-2015 Free Software Foundation, Inc. + Copyright (C) 2013-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/strspn.S b/sysdeps/x86_64/strspn.S index 62f061bc8f..49dd4ba9f5 100644 --- a/sysdeps/x86_64/strspn.S +++ b/sysdeps/x86_64/strspn.S @@ -1,7 +1,7 @@ /* strspn (str, ss) -- Return the length of the initial segment of STR which contains only characters from SS. For AMD x86-64. - Copyright (C) 1994-2015 Free Software Foundation, Inc. + Copyright (C) 1994-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>. Bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>. diff --git a/sysdeps/x86_64/strtok.S b/sysdeps/x86_64/strtok.S index f24be7aae7..bd5b103d50 100644 --- a/sysdeps/x86_64/strtok.S +++ b/sysdeps/x86_64/strtok.S @@ -1,6 +1,6 @@ /* strtok (str, delim) -- Return next DELIM separated token from STR. For AMD x86-64. - Copyright (C) 1998-2015 Free Software Foundation, Inc. + Copyright (C) 1998-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Based on i686 version contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. diff --git a/sysdeps/x86_64/sub_n.S b/sysdeps/x86_64/sub_n.S index 4879ace6a3..cc9bc48b01 100644 --- a/sysdeps/x86_64/sub_n.S +++ b/sysdeps/x86_64/sub_n.S @@ -1,6 +1,6 @@ /* x86-64 __mpn_sub_n -- Add two limb vectors of the same length > 0 and store sum in a third limb vector. - Copyright (C) 2006-2015 Free Software Foundation, Inc. + Copyright (C) 2006-2016 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify diff --git a/sysdeps/x86_64/submul_1.S b/sysdeps/x86_64/submul_1.S index f5468b97e3..3037cb9c45 100644 --- a/sysdeps/x86_64/submul_1.S +++ b/sysdeps/x86_64/submul_1.S @@ -1,6 +1,6 @@ /* x86-64 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract the result from a second limb vector. - Copyright (C) 2003-2015 Free Software Foundation, Inc. + Copyright (C) 2003-2016 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify diff --git a/sysdeps/x86_64/sysdep.h b/sysdeps/x86_64/sysdep.h index e79a3974fd..fbe3560588 100644 --- a/sysdeps/x86_64/sysdep.h +++ b/sysdeps/x86_64/sysdep.h @@ -1,5 +1,5 @@ /* Assembler macros for x86-64. - Copyright (C) 2001-2015 Free Software Foundation, Inc. + Copyright (C) 2001-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/tlsdesc.c b/sysdeps/x86_64/tlsdesc.c index 6807fa26ec..aff8b67941 100644 --- a/sysdeps/x86_64/tlsdesc.c +++ b/sysdeps/x86_64/tlsdesc.c @@ -1,5 +1,5 @@ /* Manage TLS descriptors. x86_64 version. - Copyright (C) 2005-2015 Free Software Foundation, Inc. + Copyright (C) 2005-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/tst-audit.h b/sysdeps/x86_64/tst-audit.h index f2bf3aa008..94e9dd5282 100644 --- a/sysdeps/x86_64/tst-audit.h +++ b/sysdeps/x86_64/tst-audit.h @@ -1,6 +1,6 @@ /* Definitions for testing PLT entry/exit auditing. x86_64 version. - Copyright (C) 2012-2015 Free Software Foundation, Inc. + Copyright (C) 2012-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. diff --git a/sysdeps/x86_64/tst-audit10.c b/sysdeps/x86_64/tst-audit10.c index 6919871564..d104341be8 100644 --- a/sysdeps/x86_64/tst-audit10.c +++ b/sysdeps/x86_64/tst-audit10.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2012-2015 Free Software Foundation, Inc. +/* Copyright (C) 2012-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/tst-auditmod10a.c b/sysdeps/x86_64/tst-auditmod10a.c index dc0d276c54..e94dbaf7fe 100644 --- a/sysdeps/x86_64/tst-auditmod10a.c +++ b/sysdeps/x86_64/tst-auditmod10a.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2012-2015 Free Software Foundation, Inc. +/* Copyright (C) 2012-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/tst-auditmod10b.c b/sysdeps/x86_64/tst-auditmod10b.c index 0eb36747d2..ad6fcafdda 100644 --- a/sysdeps/x86_64/tst-auditmod10b.c +++ b/sysdeps/x86_64/tst-auditmod10b.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2012-2015 Free Software Foundation, Inc. +/* Copyright (C) 2012-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/tst-mallocalign1.c b/sysdeps/x86_64/tst-mallocalign1.c index 89f4bed0be..3897af86c1 100644 --- a/sysdeps/x86_64/tst-mallocalign1.c +++ b/sysdeps/x86_64/tst-mallocalign1.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2012-2015 Free Software Foundation, Inc. +/* Copyright (C) 2012-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/tst-quad1.c b/sysdeps/x86_64/tst-quad1.c index c24182c6b6..1cb63a748f 100644 --- a/sysdeps/x86_64/tst-quad1.c +++ b/sysdeps/x86_64/tst-quad1.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2012-2015 Free Software Foundation, Inc. +/* Copyright (C) 2012-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/tst-quadmod1.S b/sysdeps/x86_64/tst-quadmod1.S index 3902850654..588c5016b6 100644 --- a/sysdeps/x86_64/tst-quadmod1.S +++ b/sysdeps/x86_64/tst-quadmod1.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2012-2015 Free Software Foundation, Inc. +/* Copyright (C) 2012-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/tst-quadmod2.S b/sysdeps/x86_64/tst-quadmod2.S index 1d515a8530..7409a9eaa3 100644 --- a/sysdeps/x86_64/tst-quadmod2.S +++ b/sysdeps/x86_64/tst-quadmod2.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2012-2015 Free Software Foundation, Inc. +/* Copyright (C) 2012-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/tst-split-dynreloc.c b/sysdeps/x86_64/tst-split-dynreloc.c new file mode 100644 index 0000000000..2f9e9b9477 --- /dev/null +++ b/sysdeps/x86_64/tst-split-dynreloc.c @@ -0,0 +1,28 @@ +/* This test will be used to create an executable with a specific + section layout in which .rela.dyn and .rela.plt are not contiguous. + For x86 case, readelf will report something like: + + ... + [10] .rela.dyn RELA + [11] .bar PROGBITS + [12] .rela.plt RELA + ... + + This is important as this case was not correctly handled by dynamic + linker in the bind-now case, and the second section was never + processed. */ + +#include <stdio.h> + +const int __attribute__ ((section(".bar"))) bar = 0x12345678; +static const char foo[] = "foo"; + +static int +do_test (void) +{ + printf ("%s %d\n", foo, bar); + return 0; +} + +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" diff --git a/sysdeps/x86_64/tst-split-dynreloc.lds b/sysdeps/x86_64/tst-split-dynreloc.lds new file mode 100644 index 0000000000..2229e698c9 --- /dev/null +++ b/sysdeps/x86_64/tst-split-dynreloc.lds @@ -0,0 +1,5 @@ +SECTIONS +{ + .bar : { *(.bar) } +} +INSERT AFTER .rela.dyn; diff --git a/sysdeps/x86_64/tst-stack-align.h b/sysdeps/x86_64/tst-stack-align.h index 8d91a4c81e..24e8e61c35 100644 --- a/sysdeps/x86_64/tst-stack-align.h +++ b/sysdeps/x86_64/tst-stack-align.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2003-2015 Free Software Foundation, Inc. +/* Copyright (C) 2003-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/wcschr.S b/sysdeps/x86_64/wcschr.S index dad111ff38..8604289e46 100644 --- a/sysdeps/x86_64/wcschr.S +++ b/sysdeps/x86_64/wcschr.S @@ -1,5 +1,5 @@ /* wcschr with SSSE3 - Copyright (C) 2011-2015 Free Software Foundation, Inc. + Copyright (C) 2011-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. diff --git a/sysdeps/x86_64/wcscmp.S b/sysdeps/x86_64/wcscmp.S index bd36d9f5d9..705a73b10e 100644 --- a/sysdeps/x86_64/wcscmp.S +++ b/sysdeps/x86_64/wcscmp.S @@ -1,5 +1,5 @@ /* Optimized wcscmp for x86-64 with SSE2. - Copyright (C) 2011-2015 Free Software Foundation, Inc. + Copyright (C) 2011-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. diff --git a/sysdeps/x86_64/wcslen.S b/sysdeps/x86_64/wcslen.S index 8c1210ed8d..7a9175eefe 100644 --- a/sysdeps/x86_64/wcslen.S +++ b/sysdeps/x86_64/wcslen.S @@ -1,5 +1,5 @@ /* Optimized wcslen for x86-64 with SSE2. - Copyright (C) 2011-2015 Free Software Foundation, Inc. + Copyright (C) 2011-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. diff --git a/sysdeps/x86_64/wcsrchr.S b/sysdeps/x86_64/wcsrchr.S index 9e28aac7f0..fb192f3ecf 100644 --- a/sysdeps/x86_64/wcsrchr.S +++ b/sysdeps/x86_64/wcsrchr.S @@ -1,5 +1,5 @@ /* wcsrchr with SSSE3 - Copyright (C) 2011-2015 Free Software Foundation, Inc. + Copyright (C) 2011-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. diff --git a/sysdeps/x86_64/x32/dl-machine.h b/sysdeps/x86_64/x32/dl-machine.h index f5efaa5060..47132fcd96 100644 --- a/sysdeps/x86_64/x32/dl-machine.h +++ b/sysdeps/x86_64/x32/dl-machine.h @@ -1,5 +1,5 @@ /* Machine-dependent ELF dynamic relocation inline functions. x32 version. - Copyright (C) 2012-2015 Free Software Foundation, Inc. + Copyright (C) 2012-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/rtld-memset.S b/sysdeps/x86_64/x32/fpu/s_lrint.S index f8df3334bc..aa68863553 100644 --- a/sysdeps/x86_64/rtld-memset.S +++ b/sysdeps/x86_64/x32/fpu/s_lrint.S @@ -1,6 +1,6 @@ -/* memset implementation for the dynamic linker. This is separate from the - libc implementation to avoid writing to SSE registers. - Copyright (C) 2013-2015 Free Software Foundation, Inc. +/* Round argument to nearest integral value according to current rounding + direction. + Copyright (C) 2015-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -18,20 +18,10 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include "asm-syntax.h" - .text -/* void *memset (void *dest, char c, size_t count) - dest => %rdi - c => %rsi - count => %rdx */ -ENTRY (memset) - mov %rdx, %rcx - movzbl %sil, %eax - mov %rdi, %rdx - rep stosb - mov %rdx, %rax +ENTRY(__lrint) + cvtsd2si %xmm0,%eax ret -END (memset) -libc_hidden_builtin_def (memset) +END(__lrint) +weak_alias (__lrint, lrint) diff --git a/sysdeps/x86_64/x32/fpu/s_lrintf.S b/sysdeps/x86_64/x32/fpu/s_lrintf.S new file mode 100644 index 0000000000..bb5b1665bd --- /dev/null +++ b/sysdeps/x86_64/x32/fpu/s_lrintf.S @@ -0,0 +1,27 @@ +/* Round argument to nearest integral value according to current rounding + direction. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .text +ENTRY(__lrintf) + cvtss2si %xmm0,%eax + ret +END(__lrintf) +weak_alias (__lrintf, lrintf) diff --git a/sysdeps/x86_64/x32/fpu/s_lrintl.S b/sysdeps/x86_64/x32/fpu/s_lrintl.S new file mode 100644 index 0000000000..6bc8f6fdb9 --- /dev/null +++ b/sysdeps/x86_64/x32/fpu/s_lrintl.S @@ -0,0 +1,30 @@ +/* Round argument to nearest integral value according to current rounding + direction. + Copyright (C) 1997-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .text +ENTRY(__lrintl) + fldt 8(%rsp) + fistpl -4(%rsp) + fwait + movl -4(%rsp),%eax + ret +END(__lrintl) +weak_alias (__lrintl, lrintl) diff --git a/sysdeps/x86_64/x32/gmp-mparam.h b/sysdeps/x86_64/x32/gmp-mparam.h index 2125a70a85..df37442bfb 100644 --- a/sysdeps/x86_64/x32/gmp-mparam.h +++ b/sysdeps/x86_64/x32/gmp-mparam.h @@ -1,6 +1,6 @@ /* gmp-mparam.h -- Compiler/machine parameter header file. -Copyright (C) 2012-2015 Free Software Foundation, Inc. +Copyright (C) 2012-2016 Free Software Foundation, Inc. This file is part of the GNU MP Library. diff --git a/sysdeps/x86_64/x32/nptl/tls.h b/sysdeps/x86_64/x32/nptl/tls.h index bdc74a10a4..245623494b 100644 --- a/sysdeps/x86_64/x32/nptl/tls.h +++ b/sysdeps/x86_64/x32/nptl/tls.h @@ -1,5 +1,5 @@ /* Definition for thread-local data handling. nptl/x32 version. - Copyright (C) 2012-2015 Free Software Foundation, Inc. + Copyright (C) 2012-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/x32/sysdep.h b/sysdeps/x86_64/x32/sysdep.h index 0cbc1a083f..17a1446796 100644 --- a/sysdeps/x86_64/x32/sysdep.h +++ b/sysdeps/x86_64/x32/sysdep.h @@ -1,5 +1,5 @@ /* Assembler macros for x32. - Copyright (C) 2012-2015 Free Software Foundation, Inc. + Copyright (C) 2012-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -90,7 +90,3 @@ # define R15_LP "r15d" #endif /* __ASSEMBLER__ */ - -/* On x32, it is not required to normalize a 64-bit value before using - it as a 32-bit value. */ -#define REGISTER_CAST_INT32_TO_INT64 0 |