diff options
Diffstat (limited to 'sysdeps/x86')
37 files changed, 1132 insertions, 162 deletions
diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile index 19f5eca741..0de4f42168 100644 --- a/sysdeps/x86/Makefile +++ b/sysdeps/x86/Makefile @@ -1,10 +1,10 @@ +ifeq ($(subdir),csu) +gen-as-const-headers += cpu-features-offsets.sym rtld-global-offsets.sym +endif + ifeq ($(subdir),elf) -CFLAGS-.os += $(if $(filter $(@F),$(patsubst %,%.os,$(all-rtld-routines))),\ - -mno-sse -mno-mmx) +sysdep-dl-routines += dl-get-cpu-features -tests-special += $(objpfx)tst-ld-sse-use.out -$(objpfx)tst-ld-sse-use.out: ../sysdeps/x86/tst-ld-sse-use.sh $(objpfx)ld.so - @echo "Checking ld.so for SSE register use. This will take a few seconds..." - $(BASH) $< $(objpfx) '$(NM)' '$(OBJDUMP)' '$(READELF)' > $@; \ - $(evaluate-test) +tests += tst-get-cpu-features +tests-static += tst-get-cpu-features-static endif diff --git a/sysdeps/x86/Versions b/sysdeps/x86/Versions new file mode 100644 index 0000000000..e02923708e --- /dev/null +++ b/sysdeps/x86/Versions @@ -0,0 +1,5 @@ +ld { + GLIBC_PRIVATE { + __get_cpu_features; + } +} diff --git a/sysdeps/x86/bits/byteswap-16.h b/sysdeps/x86/bits/byteswap-16.h index a12b4d7711..e922e20b49 100644 --- a/sysdeps/x86/bits/byteswap-16.h +++ b/sysdeps/x86/bits/byteswap-16.h @@ -1,5 +1,5 @@ /* Macros to swap the order of bytes in 16-bit integer values. - Copyright (C) 2012-2015 Free Software Foundation, Inc. + Copyright (C) 2012-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86/bits/byteswap.h b/sysdeps/x86/bits/byteswap.h index 8eeea6219c..f783aeab1d 100644 --- a/sysdeps/x86/bits/byteswap.h +++ b/sysdeps/x86/bits/byteswap.h @@ -1,5 +1,5 @@ /* Macros to swap the order of bytes in integer values. - Copyright (C) 1997-2015 Free Software Foundation, Inc. + Copyright (C) 1997-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86/bits/huge_vall.h b/sysdeps/x86/bits/huge_vall.h index 570c76ed3f..b19e0b4ec2 100644 --- a/sysdeps/x86/bits/huge_vall.h +++ b/sysdeps/x86/bits/huge_vall.h @@ -1,6 +1,6 @@ /* `HUGE_VALL' constant for ix86 (where it is infinity). Used by <stdlib.h> and <math.h> functions for overflow. - Copyright (C) 1992-2015 Free Software Foundation, Inc. + Copyright (C) 1992-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86/bits/link.h b/sysdeps/x86/bits/link.h index 0bf9b9a29b..8dd11c2d24 100644 --- a/sysdeps/x86/bits/link.h +++ b/sysdeps/x86/bits/link.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2004-2015 Free Software Foundation, Inc. +/* Copyright (C) 2004-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86/bits/mathdef.h b/sysdeps/x86/bits/mathdef.h index 4bb042786a..e61c28aea3 100644 --- a/sysdeps/x86/bits/mathdef.h +++ b/sysdeps/x86/bits/mathdef.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2001-2015 Free Software Foundation, Inc. +/* Copyright (C) 2001-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86/bits/pthreadtypes.h b/sysdeps/x86/bits/pthreadtypes.h index 44606159d0..16b8f4fbb1 100644 --- a/sysdeps/x86/bits/pthreadtypes.h +++ b/sysdeps/x86/bits/pthreadtypes.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2002-2015 Free Software Foundation, Inc. +/* Copyright (C) 2002-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86/bits/select.h b/sysdeps/x86/bits/select.h index b0bb240e3a..6f090f14df 100644 --- a/sysdeps/x86/bits/select.h +++ b/sysdeps/x86/bits/select.h @@ -1,4 +1,4 @@ -/* Copyright (C) 1997-2015 Free Software Foundation, Inc. +/* Copyright (C) 1997-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86/bits/semaphore.h b/sysdeps/x86/bits/semaphore.h index 18b2b3cb02..c86dc6366c 100644 --- a/sysdeps/x86/bits/semaphore.h +++ b/sysdeps/x86/bits/semaphore.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2002-2015 Free Software Foundation, Inc. +/* Copyright (C) 2002-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@redhat.com>, 2002. diff --git a/sysdeps/x86/bits/setjmp.h b/sysdeps/x86/bits/setjmp.h index 084f34e815..bc72814f44 100644 --- a/sysdeps/x86/bits/setjmp.h +++ b/sysdeps/x86/bits/setjmp.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2001-2015 Free Software Foundation, Inc. +/* Copyright (C) 2001-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86/bits/string.h b/sysdeps/x86/bits/string.h index 4973620b83..e4e019f1fc 100644 --- a/sysdeps/x86/bits/string.h +++ b/sysdeps/x86/bits/string.h @@ -1,5 +1,5 @@ /* Optimized, inlined string functions. i486/x86-64 version. - Copyright (C) 2001-2015 Free Software Foundation, Inc. + Copyright (C) 2001-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -20,8 +20,8 @@ # error "Never use <bits/string.h> directly; include <string.h> instead." #endif -/* The ix86 processors can access unaligned multi-byte variables. */ -#define _STRING_ARCH_unaligned 1 +/* Use the unaligned string inline ABI. */ +#define _STRING_INLINE_unaligned 1 /* Enable inline functions only for i486 or better when compiling for ia32. */ diff --git a/sysdeps/x86/bits/xtitypes.h b/sysdeps/x86/bits/xtitypes.h index 95197e1522..31c1210edd 100644 --- a/sysdeps/x86/bits/xtitypes.h +++ b/sysdeps/x86/bits/xtitypes.h @@ -1,5 +1,5 @@ /* bits/xtitypes.h -- Define some types used by <bits/stropts.h>. x86-64. - Copyright (C) 2002-2015 Free Software Foundation, Inc. + Copyright (C) 2002-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86/cpu-features-offsets.sym b/sysdeps/x86/cpu-features-offsets.sym new file mode 100644 index 0000000000..a9d53d195f --- /dev/null +++ b/sysdeps/x86/cpu-features-offsets.sym @@ -0,0 +1,7 @@ +#define SHARED 1 + +#include <ldsodefs.h> + +#define rtld_global_ro_offsetof(mem) offsetof (struct rtld_global_ro, mem) + +RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET rtld_global_ro_offsetof (_dl_x86_cpu_features) diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c new file mode 100644 index 0000000000..218ff2bd86 --- /dev/null +++ b/sysdeps/x86/cpu-features.c @@ -0,0 +1,238 @@ +/* Initialize CPU feature data. + This file is part of the GNU C Library. + Copyright (C) 2008-2016 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <cpuid.h> +#include <cpu-features.h> + +static inline void +get_common_indeces (struct cpu_features *cpu_features, + unsigned int *family, unsigned int *model, + unsigned int *extended_model) +{ + unsigned int eax; + __cpuid (1, eax, cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx, + cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx, + cpu_features->cpuid[COMMON_CPUID_INDEX_1].edx); + GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].eax = eax; + *family = (eax >> 8) & 0x0f; + *model = (eax >> 4) & 0x0f; + *extended_model = (eax >> 12) & 0xf0; + if (*family == 0x0f) + { + *family += (eax >> 20) & 0xff; + *model += *extended_model; + } +} + +static inline void +init_cpu_features (struct cpu_features *cpu_features) +{ + unsigned int ebx, ecx, edx; + unsigned int family = 0; + unsigned int model = 0; + enum cpu_features_kind kind; + +#if !HAS_CPUID + if (__get_cpuid_max (0, 0) == 0) + { + kind = arch_kind_other; + goto no_cpuid; + } +#endif + + __cpuid (0, cpu_features->max_cpuid, ebx, ecx, edx); + + /* This spells out "GenuineIntel". */ + if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) + { + unsigned int extended_model; + + kind = arch_kind_intel; + + get_common_indeces (cpu_features, &family, &model, &extended_model); + + if (family == 0x06) + { + ecx = cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx; + model += extended_model; + switch (model) + { + case 0x1c: + case 0x26: + /* BSF is slow on Atom. */ + cpu_features->feature[index_Slow_BSF] |= bit_Slow_BSF; + break; + + case 0x57: + /* Knights Landing. Enable Silvermont optimizations. */ + cpu_features->feature[index_Prefer_No_VZEROUPPER] + |= bit_Prefer_No_VZEROUPPER; + + case 0x37: + case 0x4a: + case 0x4d: + case 0x5a: + case 0x5d: + /* Unaligned load versions are faster than SSSE3 + on Silvermont. */ +#if index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop +# error index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop +#endif +#if index_Fast_Unaligned_Load != index_Slow_SSE4_2 +# error index_Fast_Unaligned_Load != index_Slow_SSE4_2 +#endif + cpu_features->feature[index_Fast_Unaligned_Load] + |= (bit_Fast_Unaligned_Load + | bit_Prefer_PMINUB_for_stringop + | bit_Slow_SSE4_2); + break; + + default: + /* Unknown family 0x06 processors. Assuming this is one + of Core i3/i5/i7 processors if AVX is available. */ + if ((ecx & bit_AVX) == 0) + break; + + case 0x1a: + case 0x1e: + case 0x1f: + case 0x25: + case 0x2c: + case 0x2e: + case 0x2f: + /* Rep string instructions, copy backward, unaligned loads + and pminub are fast on Intel Core i3, i5 and i7. */ +#if index_Fast_Rep_String != index_Fast_Copy_Backward +# error index_Fast_Rep_String != index_Fast_Copy_Backward +#endif +#if index_Fast_Rep_String != index_Fast_Unaligned_Load +# error index_Fast_Rep_String != index_Fast_Unaligned_Load +#endif +#if index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop +# error index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop +#endif + cpu_features->feature[index_Fast_Rep_String] + |= (bit_Fast_Rep_String + | bit_Fast_Copy_Backward + | bit_Fast_Unaligned_Load + | bit_Prefer_PMINUB_for_stringop); + break; + } + } + } + /* This spells out "AuthenticAMD". */ + else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) + { + unsigned int extended_model; + + kind = arch_kind_amd; + + get_common_indeces (cpu_features, &family, &model, &extended_model); + + ecx = cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx; + + unsigned int eax; + __cpuid (0x80000000, eax, ebx, ecx, edx); + if (eax >= 0x80000001) + __cpuid (0x80000001, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].eax, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ebx, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ecx, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].edx); + + if (family == 0x15) + { + /* "Excavator" */ + if (model >= 0x60 && model <= 0x7f) + cpu_features->feature[index_Fast_Unaligned_Load] + |= bit_Fast_Unaligned_Load; + } + } + else + kind = arch_kind_other; + + /* Support i586 if CX8 is available. */ + if (HAS_CPU_FEATURE (CX8)) + cpu_features->feature[index_I586] |= bit_I586; + + /* Support i686 if CMOV is available. */ + if (HAS_CPU_FEATURE (CMOV)) + cpu_features->feature[index_I686] |= bit_I686; + + if (cpu_features->max_cpuid >= 7) + __cpuid_count (7, 0, + cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax, + cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx, + cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx, + cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx); + + /* Can we call xgetbv? */ + if (HAS_CPU_FEATURE (OSXSAVE)) + { + unsigned int xcrlow; + unsigned int xcrhigh; + asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); + /* Is YMM and XMM state usable? */ + if ((xcrlow & (bit_YMM_state | bit_XMM_state)) == + (bit_YMM_state | bit_XMM_state)) + { + /* Determine if AVX is usable. */ + if (HAS_CPU_FEATURE (AVX)) + cpu_features->feature[index_AVX_Usable] |= bit_AVX_Usable; +#if index_AVX2_Usable != index_AVX_Fast_Unaligned_Load +# error index_AVX2_Usable != index_AVX_Fast_Unaligned_Load +#endif + /* Determine if AVX2 is usable. Unaligned load with 256-bit + AVX registers are faster on processors with AVX2. */ + if (HAS_CPU_FEATURE (AVX2)) + cpu_features->feature[index_AVX2_Usable] + |= bit_AVX2_Usable | bit_AVX_Fast_Unaligned_Load; + /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and + ZMM16-ZMM31 state are enabled. */ + if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state + | bit_ZMM16_31_state)) == + (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state)) + { + /* Determine if AVX512F is usable. */ + if (HAS_CPU_FEATURE (AVX512F)) + { + cpu_features->feature[index_AVX512F_Usable] + |= bit_AVX512F_Usable; + /* Determine if AVX512DQ is usable. */ + if (HAS_CPU_FEATURE (AVX512DQ)) + cpu_features->feature[index_AVX512DQ_Usable] + |= bit_AVX512DQ_Usable; + } + } + /* Determine if FMA is usable. */ + if (HAS_CPU_FEATURE (FMA)) + cpu_features->feature[index_FMA_Usable] |= bit_FMA_Usable; + /* Determine if FMA4 is usable. */ + if (HAS_CPU_FEATURE (FMA4)) + cpu_features->feature[index_FMA4_Usable] |= bit_FMA4_Usable; + } + } + +#if !HAS_CPUID +no_cpuid: +#endif + + cpu_features->family = family; + cpu_features->model = model; + cpu_features->kind = kind; +} diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h new file mode 100644 index 0000000000..e354920d5d --- /dev/null +++ b/sysdeps/x86/cpu-features.h @@ -0,0 +1,289 @@ +/* This file is part of the GNU C Library. + Copyright (C) 2008-2016 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef cpu_features_h +#define cpu_features_h + +#define bit_Fast_Rep_String (1 << 0) +#define bit_Fast_Copy_Backward (1 << 1) +#define bit_Slow_BSF (1 << 2) +#define bit_Fast_Unaligned_Load (1 << 4) +#define bit_Prefer_PMINUB_for_stringop (1 << 5) +#define bit_AVX_Usable (1 << 6) +#define bit_FMA_Usable (1 << 7) +#define bit_FMA4_Usable (1 << 8) +#define bit_Slow_SSE4_2 (1 << 9) +#define bit_AVX2_Usable (1 << 10) +#define bit_AVX_Fast_Unaligned_Load (1 << 11) +#define bit_AVX512F_Usable (1 << 12) +#define bit_AVX512DQ_Usable (1 << 13) +#define bit_I586 (1 << 14) +#define bit_I686 (1 << 15) +#define bit_Prefer_MAP_32BIT_EXEC (1 << 16) +#define bit_Prefer_No_VZEROUPPER (1 << 17) + +/* CPUID Feature flags. */ + +/* COMMON_CPUID_INDEX_1. */ +#define bit_CX8 (1 << 8) +#define bit_CMOV (1 << 15) +#define bit_SSE2 (1 << 26) +#define bit_SSSE3 (1 << 9) +#define bit_SSE4_1 (1 << 19) +#define bit_SSE4_2 (1 << 20) +#define bit_OSXSAVE (1 << 27) +#define bit_AVX (1 << 28) +#define bit_POPCOUNT (1 << 23) +#define bit_FMA (1 << 12) +#define bit_FMA4 (1 << 16) + +/* COMMON_CPUID_INDEX_7. */ +#define bit_RTM (1 << 11) +#define bit_AVX2 (1 << 5) +#define bit_AVX512F (1 << 16) +#define bit_AVX512DQ (1 << 17) + +/* XCR0 Feature flags. */ +#define bit_XMM_state (1 << 1) +#define bit_YMM_state (2 << 1) +#define bit_Opmask_state (1 << 5) +#define bit_ZMM0_15_state (1 << 6) +#define bit_ZMM16_31_state (1 << 7) + +/* The integer bit array index for the first set of internal feature bits. */ +#define FEATURE_INDEX_1 0 + +/* The current maximum size of the feature integer bit array. */ +#define FEATURE_INDEX_MAX 1 + +#ifdef __ASSEMBLER__ + +# include <ifunc-defines.h> +# include <rtld-global-offsets.h> + +# define index_CX8 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET +# define index_CMOV COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET +# define index_SSE2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET +# define index_SSSE3 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET +# define index_SSE4_1 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET +# define index_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET +# define index_AVX COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET +# define index_AVX2 COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET + +# define index_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE +# define index_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE +# define index_Slow_BSF FEATURE_INDEX_1*FEATURE_SIZE +# define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE +# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE +# define index_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE +# define index_FMA_Usable FEATURE_INDEX_1*FEATURE_SIZE +# define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE +# define index_Slow_SSE4_2 FEATURE_INDEX_1*FEATURE_SIZE +# define index_AVX2_Usable FEATURE_INDEX_1*FEATURE_SIZE +# define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE +# define index_AVX512F_Usable FEATURE_INDEX_1*FEATURE_SIZE +# define index_AVX512DQ_Usable FEATURE_INDEX_1*FEATURE_SIZE +# define index_I586 FEATURE_INDEX_1*FEATURE_SIZE +# define index_I686 FEATURE_INDEX_1*FEATURE_SIZE +# define index_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1*FEATURE_SIZE +# define index_Prefer_No_VZEROUPPER FEATURE_INDEX_1*FEATURE_SIZE + + +# if defined (_LIBC) && !IS_IN (nonlib) +# ifdef __x86_64__ +# ifdef SHARED +# if IS_IN (rtld) +# define LOAD_RTLD_GLOBAL_RO_RDX +# define HAS_FEATURE(offset, name) \ + testl $(bit_##name), _rtld_local_ro+offset+(index_##name)(%rip) +# else +# define LOAD_RTLD_GLOBAL_RO_RDX \ + mov _rtld_global_ro@GOTPCREL(%rip), %RDX_LP +# define HAS_FEATURE(offset, name) \ + testl $(bit_##name), \ + RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+offset+(index_##name)(%rdx) +# endif +# else /* SHARED */ +# define LOAD_RTLD_GLOBAL_RO_RDX +# define HAS_FEATURE(offset, name) \ + testl $(bit_##name), _dl_x86_cpu_features+offset+(index_##name)(%rip) +# endif /* !SHARED */ +# else /* __x86_64__ */ +# ifdef SHARED +# define LOAD_FUNC_GOT_EAX(func) \ + leal func@GOTOFF(%edx), %eax +# if IS_IN (rtld) +# define LOAD_GOT_AND_RTLD_GLOBAL_RO \ + LOAD_PIC_REG(dx) +# define HAS_FEATURE(offset, name) \ + testl $(bit_##name), offset+(index_##name)+_rtld_local_ro@GOTOFF(%edx) +# else +# define LOAD_GOT_AND_RTLD_GLOBAL_RO \ + LOAD_PIC_REG(dx); \ + mov _rtld_global_ro@GOT(%edx), %ecx +# define HAS_FEATURE(offset, name) \ + testl $(bit_##name), \ + RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+offset+(index_##name)(%ecx) +# endif +# else /* SHARED */ +# define LOAD_FUNC_GOT_EAX(func) \ + leal func, %eax +# define LOAD_GOT_AND_RTLD_GLOBAL_RO +# define HAS_FEATURE(offset, name) \ + testl $(bit_##name), _dl_x86_cpu_features+offset+(index_##name) +# endif /* !SHARED */ +# endif /* !__x86_64__ */ +# else /* _LIBC && !nonlib */ +# error "Sorry, <cpu-features.h> is unimplemented for assembler" +# endif /* !_LIBC || nonlib */ + +/* HAS_* evaluates to true if we may use the feature at runtime. */ +# define HAS_CPU_FEATURE(name) HAS_FEATURE (CPUID_OFFSET, name) +# define HAS_ARCH_FEATURE(name) HAS_FEATURE (FEATURE_OFFSET, name) + +#else /* __ASSEMBLER__ */ + +enum + { + COMMON_CPUID_INDEX_1 = 0, + COMMON_CPUID_INDEX_7, + COMMON_CPUID_INDEX_80000001, /* for AMD */ + /* Keep the following line at the end. */ + COMMON_CPUID_INDEX_MAX + }; + +struct cpu_features +{ + enum cpu_features_kind + { + arch_kind_unknown = 0, + arch_kind_intel, + arch_kind_amd, + arch_kind_other + } kind; + int max_cpuid; + struct cpuid_registers + { + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + } cpuid[COMMON_CPUID_INDEX_MAX]; + unsigned int family; + unsigned int model; + unsigned int feature[FEATURE_INDEX_MAX]; +}; + +/* Used from outside of glibc to get access to the CPU features + structure. */ +extern const struct cpu_features *__get_cpu_features (void) + __attribute__ ((const)); + +# if defined (_LIBC) && !IS_IN (nonlib) +/* Unused for x86. */ +# define INIT_ARCH() +# define __get_cpu_features() (&GLRO(dl_x86_cpu_features)) +# endif + + +/* HAS_* evaluates to true if we may use the feature at runtime. */ +# define HAS_CPU_FEATURE(name) \ + ((__get_cpu_features ()->cpuid[index_##name].reg_##name & (bit_##name)) != 0) +# define HAS_ARCH_FEATURE(name) \ + ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0) + +# define index_CX8 COMMON_CPUID_INDEX_1 +# define index_CMOV COMMON_CPUID_INDEX_1 +# define index_SSE2 COMMON_CPUID_INDEX_1 +# define index_SSSE3 COMMON_CPUID_INDEX_1 +# define index_SSE4_1 COMMON_CPUID_INDEX_1 +# define index_SSE4_2 COMMON_CPUID_INDEX_1 +# define index_AVX COMMON_CPUID_INDEX_1 +# define index_AVX2 COMMON_CPUID_INDEX_7 +# define index_AVX512F COMMON_CPUID_INDEX_7 +# define index_AVX512DQ COMMON_CPUID_INDEX_7 +# define index_RTM COMMON_CPUID_INDEX_7 +# define index_FMA COMMON_CPUID_INDEX_1 +# define index_FMA4 COMMON_CPUID_INDEX_80000001 +# define index_POPCOUNT COMMON_CPUID_INDEX_1 +# define index_OSXSAVE COMMON_CPUID_INDEX_1 + +# define reg_CX8 edx +# define reg_CMOV edx +# define reg_SSE2 edx +# define reg_SSSE3 ecx +# define reg_SSE4_1 ecx +# define reg_SSE4_2 ecx +# define reg_AVX ecx +# define reg_AVX2 ebx +# define reg_AVX512F ebx +# define reg_AVX512DQ ebx +# define reg_RTM ebx +# define reg_FMA ecx +# define reg_FMA4 ecx +# define reg_POPCOUNT ecx +# define reg_OSXSAVE ecx + +# define index_Fast_Rep_String FEATURE_INDEX_1 +# define index_Fast_Copy_Backward FEATURE_INDEX_1 +# define index_Slow_BSF FEATURE_INDEX_1 +# define index_Fast_Unaligned_Load FEATURE_INDEX_1 +# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1 +# define index_AVX_Usable FEATURE_INDEX_1 +# define index_FMA_Usable FEATURE_INDEX_1 +# define index_FMA4_Usable FEATURE_INDEX_1 +# define index_Slow_SSE4_2 FEATURE_INDEX_1 +# define index_AVX2_Usable FEATURE_INDEX_1 +# define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1 +# define index_AVX512F_Usable FEATURE_INDEX_1 +# define index_AVX512DQ_Usable FEATURE_INDEX_1 +# define index_I586 FEATURE_INDEX_1 +# define index_I686 FEATURE_INDEX_1 +# define index_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1 +# define index_Prefer_No_VZEROUPPER FEATURE_INDEX_1 + +#endif /* !__ASSEMBLER__ */ + +#ifdef __x86_64__ +# define HAS_CPUID 1 +#elif defined __i586__ || defined __pentium__ +# define HAS_CPUID 1 +# define HAS_I586 1 +# define HAS_I686 HAS_ARCH_FEATURE (I686) +#elif (defined __i686__ || defined __pentiumpro__ \ + || defined __pentium4__ || defined __nocona__ \ + || defined __atom__ || defined __core2__ \ + || defined __corei7__ || defined __corei7_avx__ \ + || defined __core_avx2__ || defined __nehalem__ \ + || defined __sandybridge__ || defined __haswell__ \ + || defined __knl__ || defined __bonnell__ \ + || defined __silvermont__ \ + || defined __k6__ || defined __k8__ \ + || defined __athlon__ || defined __amdfam10__ \ + || defined __bdver1__ || defined __bdver2__ \ + || defined __bdver3__ || defined __bdver4__ \ + || defined __btver1__ || defined __btver2__) +# define HAS_CPUID 1 +# define HAS_I586 1 +# define HAS_I686 1 +#else +# define HAS_CPUID 0 +# define HAS_I586 HAS_ARCH_FEATURE (I586) +# define HAS_I686 HAS_ARCH_FEATURE (I686) +#endif + +#endif /* cpu_features_h */ diff --git a/sysdeps/x86/dl-get-cpu-features.c b/sysdeps/x86/dl-get-cpu-features.c new file mode 100644 index 0000000000..839c2a4bba --- /dev/null +++ b/sysdeps/x86/dl-get-cpu-features.c @@ -0,0 +1,27 @@ +/* This file is part of the GNU C Library. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + + +#include <ldsodefs.h> + +#undef __get_cpu_features + +const struct cpu_features * +__get_cpu_features (void) +{ + return &GLRO(dl_x86_cpu_features); +} diff --git a/sysdeps/x86/elide.h b/sysdeps/x86/elide.h index 47e89cfc8d..8691e6673d 100644 --- a/sysdeps/x86/elide.h +++ b/sysdeps/x86/elide.h @@ -1,5 +1,5 @@ /* elide.h: Generic lock elision support. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -93,7 +93,9 @@ elision_adapt(signed char *adapt_count, unsigned int status) ret; \ }) -/* Returns true if lock defined by IS_LOCK_FREE was elided. */ +/* Returns true if lock defined by IS_LOCK_FREE was elided. The call + to _xend crashes if the application incorrectly tries to unlock a + lock which has not been locked. */ #define ELIDE_UNLOCK(is_lock_free) \ ({ \ diff --git a/sysdeps/x86/fpu/Makefile b/sysdeps/x86/fpu/Makefile index 9cb7bb2d85..b561995658 100644 --- a/sysdeps/x86/fpu/Makefile +++ b/sysdeps/x86/fpu/Makefile @@ -1,5 +1,7 @@ ifeq ($(subdir),math) libm-support += powl_helper -tests += test-fenv-sse +tests += test-fenv-sse test-fenv-clear-sse test-fenv-x87 test-fenv-sse-2 CFLAGS-test-fenv-sse.c += -msse2 -mfpmath=sse +CFLAGS-test-fenv-clear-sse.c += -msse2 -mfpmath=sse +CFLAGS-test-fenv-sse-2.c += -msse2 -mfpmath=sse endif diff --git a/sysdeps/x86/fpu/bits/fenv.h b/sysdeps/x86/fpu/bits/fenv.h index 5583ba8f05..8c8503bd7e 100644 --- a/sysdeps/x86/fpu/bits/fenv.h +++ b/sysdeps/x86/fpu/bits/fenv.h @@ -1,4 +1,4 @@ -/* Copyright (C) 1997-2015 Free Software Foundation, Inc. +/* Copyright (C) 1997-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86/fpu/bits/math-vector.h index f9e798b556..ca43cf4b9e 100644 --- a/sysdeps/x86/fpu/bits/math-vector.h +++ b/sysdeps/x86/fpu/bits/math-vector.h @@ -1,5 +1,5 @@ /* Platform-specific SIMD declarations of math functions. - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -28,6 +28,12 @@ # if defined _OPENMP && _OPENMP >= 201307 /* OpenMP case. */ # define __DECL_SIMD_x86_64 _Pragma ("omp declare simd notinbranch") +# elif __GNUC_PREREQ (6,0) +/* W/o OpenMP use GCC 6.* __attribute__ ((__simd__)). */ +# define __DECL_SIMD_x86_64 __attribute__ ((__simd__ ("notinbranch"))) +# endif + +# ifdef __DECL_SIMD_x86_64 # undef __DECL_SIMD_cos # define __DECL_SIMD_cos __DECL_SIMD_x86_64 # undef __DECL_SIMD_cosf @@ -53,34 +59,5 @@ # undef __DECL_SIMD_powf # define __DECL_SIMD_powf __DECL_SIMD_x86_64 -/* Workaround to exclude unnecessary symbol aliases in libmvec - while GCC creates the vector names based on scalar asm name. - Corresponding discussion started at - <https://gcc.gnu.org/ml/gcc/2015-06/msg00173.html>. */ -__asm__ ("_ZGVbN2v___log_finite = _ZGVbN2v_log"); -__asm__ ("_ZGVcN4v___log_finite = _ZGVcN4v_log"); -__asm__ ("_ZGVdN4v___log_finite = _ZGVdN4v_log"); -__asm__ ("_ZGVeN8v___log_finite = _ZGVeN8v_log"); -__asm__ ("_ZGVbN4v___logf_finite = _ZGVbN4v_logf"); -__asm__ ("_ZGVcN8v___logf_finite = _ZGVcN8v_logf"); -__asm__ ("_ZGVdN8v___logf_finite = _ZGVdN8v_logf"); -__asm__ ("_ZGVeN16v___logf_finite = _ZGVeN16v_logf"); -__asm__ ("_ZGVbN2v___exp_finite = _ZGVbN2v_exp"); -__asm__ ("_ZGVcN4v___exp_finite = _ZGVcN4v_exp"); -__asm__ ("_ZGVdN4v___exp_finite = _ZGVdN4v_exp"); -__asm__ ("_ZGVeN8v___exp_finite = _ZGVeN8v_exp"); -__asm__ ("_ZGVbN4v___expf_finite = _ZGVbN4v_expf"); -__asm__ ("_ZGVcN8v___expf_finite = _ZGVcN8v_expf"); -__asm__ ("_ZGVdN8v___expf_finite = _ZGVdN8v_expf"); -__asm__ ("_ZGVeN16v___expf_finite = _ZGVeN16v_expf"); -__asm__ ("_ZGVbN2vv___pow_finite = _ZGVbN2vv_pow"); -__asm__ ("_ZGVcN4vv___pow_finite = _ZGVcN4vv_pow"); -__asm__ ("_ZGVdN4vv___pow_finite = _ZGVdN4vv_pow"); -__asm__ ("_ZGVeN8vv___pow_finite = _ZGVeN8vv_pow"); -__asm__ ("_ZGVbN4vv___powf_finite = _ZGVbN4vv_powf"); -__asm__ ("_ZGVcN8vv___powf_finite = _ZGVcN8vv_powf"); -__asm__ ("_ZGVdN8vv___powf_finite = _ZGVdN8vv_powf"); -__asm__ ("_ZGVeN16vv___powf_finite = _ZGVeN16vv_powf"); - # endif #endif diff --git a/sysdeps/x86/fpu/bits/mathinline.h b/sysdeps/x86/fpu/bits/mathinline.h index c87300f7b8..0ff1aa4cec 100644 --- a/sysdeps/x86/fpu/bits/mathinline.h +++ b/sysdeps/x86/fpu/bits/mathinline.h @@ -1,5 +1,5 @@ /* Inline math functions for i387 and SSE. - Copyright (C) 1995-2015 Free Software Foundation, Inc. + Copyright (C) 1995-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86/fpu/include/bits/fenv.h b/sysdeps/x86/fpu/include/bits/fenv.h index c434117d01..6e8b733f33 100644 --- a/sysdeps/x86/fpu/include/bits/fenv.h +++ b/sysdeps/x86/fpu/include/bits/fenv.h @@ -1,5 +1,5 @@ /* Wrapper for x86 bits/fenv.h for use when building glibc. - Copyright (C) 1997-2015 Free Software Foundation, Inc. + Copyright (C) 1997-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86/fpu/powl_helper.c b/sysdeps/x86/fpu/powl_helper.c index f2774b7059..7c5d2d1492 100644 --- a/sysdeps/x86/fpu/powl_helper.c +++ b/sysdeps/x86/fpu/powl_helper.c @@ -1,5 +1,5 @@ /* Implement powl for x86 using extra-precision log. - Copyright (C) 2012-2015 Free Software Foundation, Inc. + Copyright (C) 2012-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -229,6 +229,7 @@ __powl_helper (long double x, long double y) if (negate) res = -res; asm ("fscale" : "=t" (res) : "0" (res), "u" (log2_res_int)); + math_check_force_underflow (res); return res; } diff --git a/sysdeps/x86/fpu/test-fenv-clear-sse.c b/sysdeps/x86/fpu/test-fenv-clear-sse.c new file mode 100644 index 0000000000..cc4b3f04c7 --- /dev/null +++ b/sysdeps/x86/fpu/test-fenv-clear-sse.c @@ -0,0 +1,45 @@ +/* Test fesetenv (FE_DFL_ENV) and fesetenv (FE_NOMASK_ENV) clear + exceptions (bug 19181). SSE version. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <cpuid.h> +#include <stdbool.h> + +static bool +have_sse2 (void) +{ + unsigned int eax, ebx, ecx, edx; + + if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)) + return false; + + return (edx & bit_SSE2) != 0; +} + +#define CHECK_CAN_TEST \ + do \ + { \ + if (!have_sse2 ()) \ + { \ + puts ("CPU does not support SSE2, cannot test"); \ + return 0; \ + } \ + } \ + while (0) + +#include <test-fenv-clear-main.c> diff --git a/sysdeps/x86/fpu/test-fenv-sse-2.c b/sysdeps/x86/fpu/test-fenv-sse-2.c new file mode 100644 index 0000000000..d3197c3339 --- /dev/null +++ b/sysdeps/x86/fpu/test-fenv-sse-2.c @@ -0,0 +1,176 @@ +/* Test x86-specific floating-point environment (bug 16068): SSE part. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <cpuid.h> +#include <fenv.h> +#include <float.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> + +static bool +have_sse2 (void) +{ + unsigned int eax, ebx, ecx, edx; + + if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)) + return false; + + return (edx & bit_SSE2) != 0; +} + +static uint32_t +get_sse_mxcsr (void) +{ + uint32_t temp; + __asm__ __volatile__ ("stmxcsr %0" : "=m" (temp)); + return temp; +} + +static void +set_sse_mxcsr (uint32_t val) +{ + __asm__ __volatile__ ("ldmxcsr %0" : : "m" (val)); +} + +static void +set_sse_mxcsr_bits (uint32_t mask, uint32_t bits) +{ + uint32_t mxcsr = get_sse_mxcsr (); + mxcsr = (mxcsr & ~mask) | bits; + set_sse_mxcsr (mxcsr); +} + +static int +test_sse_mxcsr_bits (const char *test, uint32_t mask, uint32_t bits) +{ + uint32_t mxcsr = get_sse_mxcsr (); + printf ("Testing %s: mxcsr = %x\n", test, mxcsr); + if ((mxcsr & mask) == bits) + { + printf ("PASS: %s\n", test); + return 0; + } + else + { + printf ("FAIL: %s\n", test); + return 1; + } +} + +#define MXCSR_FZ 0x8000 +#define MXCSR_DAZ 0x40 +#define MXCSR_DE 0x2 +#define MXCSR_DM 0x100 + +static __attribute__ ((noinline)) int +sse_tests (void) +{ + int result = 0; + fenv_t env1, env2; + /* Test FZ bit. */ + fegetenv (&env1); + set_sse_mxcsr_bits (MXCSR_FZ, MXCSR_FZ); + fegetenv (&env2); + fesetenv (&env1); + result |= test_sse_mxcsr_bits ("fesetenv FZ restoration", + MXCSR_FZ, 0); + set_sse_mxcsr_bits (MXCSR_FZ, 0); + fesetenv (&env2); + result |= test_sse_mxcsr_bits ("fesetenv FZ restoration 2", + MXCSR_FZ, MXCSR_FZ); + set_sse_mxcsr_bits (MXCSR_FZ, MXCSR_FZ); + fesetenv (FE_NOMASK_ENV); + result |= test_sse_mxcsr_bits ("fesetenv (FE_NOMASK_ENV) FZ restoration", + MXCSR_FZ, 0); + set_sse_mxcsr_bits (MXCSR_FZ, MXCSR_FZ); + fesetenv (FE_DFL_ENV); + result |= test_sse_mxcsr_bits ("fesetenv (FE_DFL_ENV) FZ restoration", + MXCSR_FZ, 0); + /* Test DAZ bit. */ + set_sse_mxcsr_bits (MXCSR_DAZ, MXCSR_DAZ); + fegetenv (&env2); + fesetenv (&env1); + result |= test_sse_mxcsr_bits ("fesetenv DAZ restoration", + MXCSR_DAZ, 0); + set_sse_mxcsr_bits (MXCSR_DAZ, 0); + fesetenv (&env2); + result |= test_sse_mxcsr_bits ("fesetenv DAZ restoration 2", + MXCSR_DAZ, MXCSR_DAZ); + set_sse_mxcsr_bits (MXCSR_DAZ, MXCSR_DAZ); + fesetenv (FE_NOMASK_ENV); + result |= test_sse_mxcsr_bits ("fesetenv (FE_NOMASK_ENV) DAZ restoration", + MXCSR_DAZ, 0); + set_sse_mxcsr_bits (MXCSR_DAZ, MXCSR_DAZ); + fesetenv (FE_DFL_ENV); + result |= test_sse_mxcsr_bits ("fesetenv (FE_DFL_ENV) DAZ restoration", + MXCSR_DAZ, 0); + /* Test DM bit. */ + set_sse_mxcsr_bits (MXCSR_DM, 0); + fegetenv (&env2); + fesetenv (&env1); + result |= test_sse_mxcsr_bits ("fesetenv DM restoration", + MXCSR_DM, MXCSR_DM); + set_sse_mxcsr_bits (MXCSR_DM, MXCSR_DM); + fesetenv (&env2); + result |= test_sse_mxcsr_bits ("fesetenv DM restoration 2", + MXCSR_DM, 0); + set_sse_mxcsr_bits (MXCSR_DM, 0); + /* Presume FE_NOMASK_ENV should leave the "denormal operand" + exception masked, as not a standard exception. */ + fesetenv (FE_NOMASK_ENV); + result |= test_sse_mxcsr_bits ("fesetenv (FE_NOMASK_ENV) DM restoration", + MXCSR_DM, MXCSR_DM); + set_sse_mxcsr_bits (MXCSR_DM, 0); + fesetenv (FE_DFL_ENV); + result |= test_sse_mxcsr_bits ("fesetenv (FE_DFL_ENV) DM restoration", + MXCSR_DM, MXCSR_DM); + /* Test DE bit. */ + set_sse_mxcsr_bits (MXCSR_DE, MXCSR_DE); + fegetenv (&env2); + fesetenv (&env1); + result |= test_sse_mxcsr_bits ("fesetenv DE restoration", + MXCSR_DE, 0); + set_sse_mxcsr_bits (MXCSR_DE, 0); + fesetenv (&env2); + result |= test_sse_mxcsr_bits ("fesetenv DE restoration 2", + MXCSR_DE, MXCSR_DE); + set_sse_mxcsr_bits (MXCSR_DE, MXCSR_DE); + fesetenv (FE_NOMASK_ENV); + result |= test_sse_mxcsr_bits ("fesetenv (FE_NOMASK_ENV) DE restoration", + MXCSR_DE, 0); + set_sse_mxcsr_bits (MXCSR_DE, MXCSR_DE); + fesetenv (FE_DFL_ENV); + result |= test_sse_mxcsr_bits ("fesetenv (FE_DFL_ENV) DE restoration", + MXCSR_DE, 0); + return result; +} + +static int +do_test (void) +{ + if (!have_sse2 ()) + { + puts ("CPU does not support SSE2, cannot test"); + return 0; + } + return sse_tests (); +} + +#define TEST_FUNCTION do_test () +#include <test-skeleton.c> diff --git a/sysdeps/x86/fpu/test-fenv-sse.c b/sysdeps/x86/fpu/test-fenv-sse.c index 5b552a964b..4f4ff6a0a6 100644 --- a/sysdeps/x86/fpu/test-fenv-sse.c +++ b/sysdeps/x86/fpu/test-fenv-sse.c @@ -1,5 +1,5 @@ /* Test floating-point environment includes SSE state (bug 16064). - Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copyright (C) 2014-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86/fpu/test-fenv-x87.c b/sysdeps/x86/fpu/test-fenv-x87.c new file mode 100644 index 0000000000..b6f0b6af78 --- /dev/null +++ b/sysdeps/x86/fpu/test-fenv-x87.c @@ -0,0 +1,169 @@ +/* Test x86-specific floating-point environment (bug 16068): x87 part. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv.h> +#include <float.h> +#include <fpu_control.h> +#include <stdint.h> +#include <stdio.h> + +static uint16_t +get_x87_cw (void) +{ + fpu_control_t cw; + _FPU_GETCW (cw); + return cw; +} + +static void +set_x87_cw (uint16_t val) +{ + fpu_control_t cw = val; + _FPU_SETCW (cw); +} + +static void +set_x87_cw_bits (uint16_t mask, uint16_t bits) +{ + uint16_t cw = get_x87_cw (); + cw = (cw & ~mask) | bits; + set_x87_cw (cw); +} + +static int +test_x87_cw_bits (const char *test, uint16_t mask, uint16_t bits) +{ + uint16_t cw = get_x87_cw (); + printf ("Testing %s: cw = %x\n", test, cw); + if ((cw & mask) == bits) + { + printf ("PASS: %s\n", test); + return 0; + } + else + { + printf ("FAIL: %s\n", test); + return 1; + } +} + +static uint16_t +get_x87_sw (void) +{ + uint16_t temp; + __asm__ __volatile__ ("fnstsw %0" : "=a" (temp)); + return temp; +} + +static void +set_x87_sw_bits (uint16_t mask, uint16_t bits) +{ + fenv_t temp; + __asm__ __volatile__ ("fnstenv %0" : "=m" (temp)); + temp.__status_word = (temp.__status_word & ~mask) | bits; + __asm__ __volatile__ ("fldenv %0" : : "m" (temp)); +} + +static int +test_x87_sw_bits (const char *test, uint16_t mask, uint16_t bits) +{ + uint16_t sw = get_x87_sw (); + printf ("Testing %s: sw = %x\n", test, sw); + if ((sw & mask) == bits) + { + printf ("PASS: %s\n", test); + return 0; + } + else + { + printf ("FAIL: %s\n", test); + return 1; + } +} + +#define X87_CW_PREC_MASK _FPU_EXTENDED + +static int +do_test (void) +{ + int result = 0; + fenv_t env1, env2; + /* Test precision mask. */ + fegetenv (&env1); + set_x87_cw_bits (X87_CW_PREC_MASK, _FPU_SINGLE); + fegetenv (&env2); + fesetenv (&env1); + result |= test_x87_cw_bits ("fesetenv precision restoration", + X87_CW_PREC_MASK, _FPU_EXTENDED); + set_x87_cw_bits (X87_CW_PREC_MASK, _FPU_EXTENDED); + fesetenv (&env2); + result |= test_x87_cw_bits ("fesetenv precision restoration 2", + X87_CW_PREC_MASK, _FPU_SINGLE); + set_x87_cw_bits (X87_CW_PREC_MASK, _FPU_DOUBLE); + fesetenv (FE_NOMASK_ENV); + result |= test_x87_cw_bits ("fesetenv (FE_NOMASK_ENV) precision restoration", + X87_CW_PREC_MASK, _FPU_EXTENDED); + set_x87_cw_bits (X87_CW_PREC_MASK, _FPU_SINGLE); + fesetenv (FE_DFL_ENV); + result |= test_x87_cw_bits ("fesetenv (FE_DFL_ENV) precision restoration", + X87_CW_PREC_MASK, _FPU_EXTENDED); + /* Test x87 denormal operand masking. */ + set_x87_cw_bits (_FPU_MASK_DM, 0); + fegetenv (&env2); + fesetenv (&env1); + result |= test_x87_cw_bits ("fesetenv denormal mask restoration", + _FPU_MASK_DM, _FPU_MASK_DM); + set_x87_cw_bits (_FPU_MASK_DM, _FPU_MASK_DM); + fesetenv (&env2); + result |= test_x87_cw_bits ("fesetenv denormal mask restoration 2", + _FPU_MASK_DM, 0); + set_x87_cw_bits (_FPU_MASK_DM, 0); + /* Presume FE_NOMASK_ENV should leave the "denormal operand" + exception masked, as not a standard exception. */ + fesetenv (FE_NOMASK_ENV); + result |= test_x87_cw_bits ("fesetenv (FE_NOMASK_ENV) denormal mask " + "restoration", + _FPU_MASK_DM, _FPU_MASK_DM); + set_x87_cw_bits (_FPU_MASK_DM, 0); + fesetenv (FE_DFL_ENV); + result |= test_x87_cw_bits ("fesetenv (FE_DFL_ENV) denormal mask " + "restoration", + _FPU_MASK_DM, _FPU_MASK_DM); + /* Test x87 denormal operand exception. */ + set_x87_sw_bits (__FE_DENORM, __FE_DENORM); + fegetenv (&env2); + fesetenv (&env1); + result |= test_x87_sw_bits ("fesetenv denormal exception restoration", + __FE_DENORM, 0); + set_x87_sw_bits (__FE_DENORM, 0); + fesetenv (&env2); + result |= test_x87_sw_bits ("fesetenv denormal exception restoration 2", + __FE_DENORM, __FE_DENORM); + set_x87_sw_bits (__FE_DENORM, __FE_DENORM); + fesetenv (FE_NOMASK_ENV); + result |= test_x87_sw_bits ("fesetenv (FE_NOMASK_ENV) exception restoration", + __FE_DENORM, 0); + set_x87_sw_bits (__FE_DENORM, __FE_DENORM); + fesetenv (FE_DFL_ENV); + result |= test_x87_sw_bits ("fesetenv (FE_DFL_ENV) exception restoration", + __FE_DENORM, 0); + return result; +} + +#define TEST_FUNCTION do_test () +#include <test-skeleton.c> diff --git a/sysdeps/x86/fpu_control.h b/sysdeps/x86/fpu_control.h index cef2f651fb..4c960580a3 100644 --- a/sysdeps/x86/fpu_control.h +++ b/sysdeps/x86/fpu_control.h @@ -1,5 +1,5 @@ /* FPU control word bits. x86 version. - Copyright (C) 1993-2015 Free Software Foundation, Inc. + Copyright (C) 1993-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Olaf Flebbe. diff --git a/sysdeps/x86/init-arch.h b/sysdeps/x86/init-arch.h new file mode 100644 index 0000000000..17a38d2967 --- /dev/null +++ b/sysdeps/x86/init-arch.h @@ -0,0 +1,35 @@ +/* This file is part of the GNU C Library. + Copyright (C) 2008-2016 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifdef __ASSEMBLER__ +# include <cpu-features.h> +#else +# include <ldsodefs.h> +#endif + +#ifndef __x86_64__ +/* Due to the reordering and the other nifty extensions in i686, it is + not really good to use heavily i586 optimized code on an i686. It's + better to use i486 code if it isn't an i586. */ +# if MINIMUM_ISA == 686 +# define USE_I586 0 +# define USE_I686 1 +# else +# define USE_I586 (HAS_ARCH_FEATURE (I586) && !HAS_ARCH_FEATURE (I686)) +# define USE_I686 HAS_ARCH_FEATURE (I686) +# endif +#endif diff --git a/sysdeps/x86/libc-start.c b/sysdeps/x86/libc-start.c new file mode 100644 index 0000000000..3b5ea6e933 --- /dev/null +++ b/sysdeps/x86/libc-start.c @@ -0,0 +1,41 @@ +/* Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifdef SHARED +# include <csu/libc-start.c> +# else +/* The main work is done in the generic function. */ +# define LIBC_START_DISABLE_INLINE +# define LIBC_START_MAIN generic_start_main +# include <csu/libc-start.c> +# include <cpu-features.h> +# include <cpu-features.c> + +extern struct cpu_features _dl_x86_cpu_features; + +int +__libc_start_main (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL), + int argc, char **argv, + __typeof (main) init, + void (*fini) (void), + void (*rtld_fini) (void), void *stack_end) +{ + init_cpu_features (&_dl_x86_cpu_features); + return generic_start_main (main, argc, argv, init, fini, rtld_fini, + stack_end); +} +#endif diff --git a/sysdeps/x86/bits/linkmap.h b/sysdeps/x86/linkmap.h index dd0d140874..dd0d140874 100644 --- a/sysdeps/x86/bits/linkmap.h +++ b/sysdeps/x86/linkmap.h diff --git a/sysdeps/x86/rtld-global-offsets.sym b/sysdeps/x86/rtld-global-offsets.sym new file mode 100644 index 0000000000..a9d53d195f --- /dev/null +++ b/sysdeps/x86/rtld-global-offsets.sym @@ -0,0 +1,7 @@ +#define SHARED 1 + +#include <ldsodefs.h> + +#define rtld_global_ro_offsetof(mem) offsetof (struct rtld_global_ro, mem) + +RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET rtld_global_ro_offsetof (_dl_x86_cpu_features) diff --git a/sysdeps/x86/string_private.h b/sysdeps/x86/string_private.h new file mode 100644 index 0000000000..e7281eb4ea --- /dev/null +++ b/sysdeps/x86/string_private.h @@ -0,0 +1,20 @@ +/* Define _STRING_ARCH_unaligned. i486/x86-64 version. + Copyright (C) 2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* The ix86 processors can access unaligned multi-byte variables. */ +#define _STRING_ARCH_unaligned 1 diff --git a/sysdeps/x86/tst-get-cpu-features-static.c b/sysdeps/x86/tst-get-cpu-features-static.c new file mode 100644 index 0000000000..03f59060c5 --- /dev/null +++ b/sysdeps/x86/tst-get-cpu-features-static.c @@ -0,0 +1 @@ +#include "tst-get-cpu-features.c" diff --git a/sysdeps/x86/tst-get-cpu-features.c b/sysdeps/x86/tst-get-cpu-features.c new file mode 100644 index 0000000000..da20063a00 --- /dev/null +++ b/sysdeps/x86/tst-get-cpu-features.c @@ -0,0 +1,31 @@ +/* Test case for x86 __get_cpu_features interface + Copyright (C) 2015-2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdlib.h> +#include <cpu-features.h> + +static int +do_test (void) +{ + if (__get_cpu_features ()->kind == arch_kind_unknown) + abort (); + return 0; +} + +#define TEST_FUNCTION do_test () +#include "../../test-skeleton.c" diff --git a/sysdeps/x86/tst-ld-sse-use.sh b/sysdeps/x86/tst-ld-sse-use.sh deleted file mode 100755 index 839de18546..0000000000 --- a/sysdeps/x86/tst-ld-sse-use.sh +++ /dev/null @@ -1,103 +0,0 @@ -#! /bin/bash -# Make sure no code in ld.so uses xmm/ymm/zmm registers on x86-64. -# Copyright (C) 2009-2015 Free Software Foundation, Inc. -# This file is part of the GNU C Library. - -# The GNU C Library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. - -# The GNU C Library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. - -# You should have received a copy of the GNU Lesser General Public -# License along with the GNU C Library; if not, see -# <http://www.gnu.org/licenses/>. - -set -e - -objpfx="$1" -NM="$2" -OBJDUMP="$3" -READELF="$4" - -tmp=$(mktemp ${objpfx}tst-ld-sse-use.XXXXXX) -trap 'rm -f "$tmp"' 1 2 3 15 - -# List of object files we have to test -rtldobjs=$($READELF -W -wi ${objpfx}dl-allobjs.os | - awk '/^ </ { if ($5 == "(DW_TAG_compile_unit)") c=1; else c=0 } $2 == "DW_AT_name" { if (c == 1) print $NF }' | - sed 's,\(.*/\|\)\([_[:alnum:]-]*[.]\).$,\2os,') -rtldobjs="$rtldobjs $(ar t ${objpfx}rtld-libc.a)" - -# OBJECT symbols can be ignored. -$READELF -sW ${objpfx}dl-allobjs.os ${objpfx}rtld-libc.a | -egrep " OBJECT *GLOBAL " | -awk '{if ($7 != "ABS") print $8 }' | -sort -u > "$tmp" -declare -a objects -objects=($(cat "$tmp")) - -objs="dl-runtime.os" -tocheck="dl-runtime.os" - -while test -n "$objs"; do - this="$objs" - objs="" - - for f in $this; do - undef=$($NM -u "$objpfx"../*/"$f" | awk '{print $2}') - if test -n "$undef"; then - for s in $undef; do - for obj in ${objects[*]} "_GLOBAL_OFFSET_TABLE_"; do - if test "$obj" = "$s"; then - continue 2 - fi - done - for o in $rtldobjs; do - ro=$(echo "$objpfx"../*/"$o") - if $NM -g --defined-only "$ro" | egrep -qs " $s\$"; then - if ! (echo "$tocheck $objs" | fgrep -qs "$o"); then - echo "$o needed for $s" - objs="$objs $o" - fi - break; - fi - done - done - fi - done - tocheck="$tocheck$objs" -done - -echo -echo -echo "object files needed: $tocheck" - -cp /dev/null "$tmp" -for f in $tocheck; do - $OBJDUMP -d "$objpfx"../*/"$f" | - awk 'BEGIN { last="" } /^[[:xdigit:]]* <[_[:alnum:]]*>:$/ { fct=substr($2, 2, length($2)-3) } /,%[xyz]mm[[:digit:]]*$/ { if (last != fct) { print fct; last=fct} }' | - while read fct; do - if test "$fct" = "_dl_runtime_profile" -o "$fct" = "_dl_x86_64_restore_sse"; then - continue; - fi - echo "function $fct in $f modifies xmm/ymm/zmm" >> "$tmp" - result=1 - done -done - -if test -s "$tmp"; then - echo - echo - cat "$tmp" - result=1 -else - result=0 -fi - -rm "$tmp" -exit $result |