diff options
Diffstat (limited to 'sysdeps/x86')
82 files changed, 4398 insertions, 3530 deletions
diff --git a/sysdeps/x86/Makeconfig b/sysdeps/x86/Makeconfig new file mode 100644 index 0000000000..d701f8294d --- /dev/null +++ b/sysdeps/x86/Makeconfig @@ -0,0 +1 @@ +float64x-alias-fcts = yes diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile index 0de4f42168..337b0b63dc 100644 --- a/sysdeps/x86/Makefile +++ b/sysdeps/x86/Makefile @@ -1,10 +1,95 @@ ifeq ($(subdir),csu) -gen-as-const-headers += cpu-features-offsets.sym rtld-global-offsets.sym +gen-as-const-headers += cpu-features-offsets.sym endif ifeq ($(subdir),elf) sysdep-dl-routines += dl-get-cpu-features -tests += tst-get-cpu-features +tests += tst-get-cpu-features tst-get-cpu-features-static tests-static += tst-get-cpu-features-static endif + +ifeq ($(subdir),setjmp) +gen-as-const-headers += jmp_buf-ssp.sym +sysdep_routines += __longjmp_cancel +endif + +ifeq ($(enable-cet),yes) +ifeq ($(subdir),elf) +sysdep-dl-routines += dl-cet + +tests += tst-cet-legacy-1 tst-cet-legacy-2 tst-cet-legacy-2a \ + tst-cet-legacy-3 tst-cet-legacy-4 +ifneq (no,$(have-tunables)) +tests += tst-cet-legacy-4a tst-cet-legacy-4b tst-cet-legacy-4c +endif +modules-names += tst-cet-legacy-mod-1 tst-cet-legacy-mod-2 \ + tst-cet-legacy-mod-4 + +CFLAGS-tst-cet-legacy-2.c += -fcf-protection=branch +CFLAGS-tst-cet-legacy-2a.c += -fcf-protection +CFLAGS-tst-cet-legacy-mod-1.c += -fcf-protection=none +CFLAGS-tst-cet-legacy-mod-2.c += -fcf-protection=none +CFLAGS-tst-cet-legacy-3.c += -fcf-protection=none +CFLAGS-tst-cet-legacy-4.c += -fcf-protection=branch +CFLAGS-tst-cet-legacy-4a.c += -fcf-protection +CFLAGS-tst-cet-legacy-4b.c += -fcf-protection +CFLAGS-tst-cet-legacy-mod-4.c += -fcf-protection=none + +$(objpfx)tst-cet-legacy-1: $(objpfx)tst-cet-legacy-mod-1.so \ + $(objpfx)tst-cet-legacy-mod-2.so +$(objpfx)tst-cet-legacy-2: $(objpfx)tst-cet-legacy-mod-2.so $(libdl) +$(objpfx)tst-cet-legacy-2.out: $(objpfx)tst-cet-legacy-mod-1.so +$(objpfx)tst-cet-legacy-2a: $(objpfx)tst-cet-legacy-mod-2.so $(libdl) +$(objpfx)tst-cet-legacy-2a.out: $(objpfx)tst-cet-legacy-mod-1.so +$(objpfx)tst-cet-legacy-4: $(libdl) +$(objpfx)tst-cet-legacy-4.out: $(objpfx)tst-cet-legacy-mod-4.so +ifneq (no,$(have-tunables)) +$(objpfx)tst-cet-legacy-4a: $(libdl) +$(objpfx)tst-cet-legacy-4a.out: $(objpfx)tst-cet-legacy-mod-4.so +tst-cet-legacy-4a-ENV = GLIBC_TUNABLES=glibc.tune.x86_shstk=permissive +$(objpfx)tst-cet-legacy-4b: $(libdl) +$(objpfx)tst-cet-legacy-4b.out: $(objpfx)tst-cet-legacy-mod-4.so +tst-cet-legacy-4b-ENV = GLIBC_TUNABLES=glibc.tune.x86_shstk=on +$(objpfx)tst-cet-legacy-4c: $(libdl) +$(objpfx)tst-cet-legacy-4c.out: $(objpfx)tst-cet-legacy-mod-4.so +tst-cet-legacy-4c-ENV = GLIBC_TUNABLES=glibc.tune.x86_shstk=off +endif +endif + +# Add -fcf-protection to CFLAGS when CET is enabled. +CFLAGS-.o += -fcf-protection +CFLAGS-.os += -fcf-protection +CFLAGS-.op += -fcf-protection +CFLAGS-.oS += -fcf-protection + +# Compile assembly codes with <cet.h> when CET is enabled. +asm-CPPFLAGS += -fcf-protection -include cet.h + +ifeq ($(subdir),elf) +ifeq (yes,$(build-shared)) +tests-special += $(objpfx)check-cet.out +endif + +# FIXME: Can't use all-built-dso in elf/Makefile since this file is +# processed before elf/Makefile. Duplicate it here. +cet-built-dso := $(common-objpfx)elf/ld.so $(common-objpfx)libc.so \ + $(filter-out $(common-objpfx)linkobj/libc.so, \ + $(sort $(wildcard $(addprefix $(common-objpfx), \ + */lib*.so \ + iconvdata/*.so)))) + +$(cet-built-dso:=.note): %.note: % + @rm -f $@T + LC_ALL=C $(READELF) -n $< > $@T + test -s $@T + mv -f $@T $@ +common-generated += $(cet-built-dso:$(common-objpfx)%=%.note) + +$(objpfx)check-cet.out: $(..)sysdeps/x86/check-cet.awk \ + $(cet-built-dso:=.note) + LC_ALL=C $(AWK) -f $^ > $@; \ + $(evaluate-test) +generated += check-cet.out +endif +endif diff --git a/sysdeps/x86/__longjmp_cancel.S b/sysdeps/x86/__longjmp_cancel.S new file mode 100644 index 0000000000..b57dbfa376 --- /dev/null +++ b/sysdeps/x86/__longjmp_cancel.S @@ -0,0 +1,20 @@ +/* __longjmp_cancel for x86. + Copyright (C) 2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __longjmp __longjmp_cancel +#include <__longjmp.S> diff --git a/sysdeps/x86/bits/byteswap-16.h b/sysdeps/x86/bits/byteswap-16.h deleted file mode 100644 index e922e20b49..0000000000 --- a/sysdeps/x86/bits/byteswap-16.h +++ /dev/null @@ -1,49 +0,0 @@ -/* Macros to swap the order of bytes in 16-bit integer values. - Copyright (C) 2012-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef _BITS_BYTESWAP_H -# error "Never use <bits/byteswap-16.h> directly; include <byteswap.h> instead." -#endif - -#ifdef __GNUC__ -# if __GNUC__ >= 2 -# define __bswap_16(x) \ - (__extension__ \ - ({ unsigned short int __v, __x = (unsigned short int) (x); \ - if (__builtin_constant_p (__x)) \ - __v = __bswap_constant_16 (__x); \ - else \ - __asm__ ("rorw $8, %w0" \ - : "=r" (__v) \ - : "0" (__x) \ - : "cc"); \ - __v; })) -# else -/* This is better than nothing. */ -# define __bswap_16(x) \ - (__extension__ \ - ({ unsigned short int __x = (unsigned short int) (x); \ - __bswap_constant_16 (__x); })) -# endif -#else -static __inline unsigned short int -__bswap_16 (unsigned short int __bsx) -{ - return __bswap_constant_16 (__bsx); -} -#endif diff --git a/sysdeps/x86/bits/byteswap.h b/sysdeps/x86/bits/byteswap.h deleted file mode 100644 index f783aeab1d..0000000000 --- a/sysdeps/x86/bits/byteswap.h +++ /dev/null @@ -1,155 +0,0 @@ -/* Macros to swap the order of bytes in integer values. - Copyright (C) 1997-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if !defined _BYTESWAP_H && !defined _NETINET_IN_H && !defined _ENDIAN_H -# error "Never use <bits/byteswap.h> directly; include <byteswap.h> instead." -#endif - -#ifndef _BITS_BYTESWAP_H -#define _BITS_BYTESWAP_H 1 - -#include <features.h> -#include <bits/types.h> -#include <bits/wordsize.h> - -/* Swap bytes in 16 bit value. */ -#define __bswap_constant_16(x) \ - ((unsigned short int) ((((x) >> 8) & 0xff) | (((x) & 0xff) << 8))) - -/* Get __bswap_16. */ -#include <bits/byteswap-16.h> - -/* Swap bytes in 32 bit value. */ -#define __bswap_constant_32(x) \ - ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >> 8) | \ - (((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24)) - -#ifdef __GNUC__ -# if __GNUC_PREREQ (4, 3) -static __inline unsigned int -__bswap_32 (unsigned int __bsx) -{ - return __builtin_bswap32 (__bsx); -} -# elif __GNUC__ >= 2 -# if __WORDSIZE == 64 || (defined __i486__ || defined __pentium__ \ - || defined __pentiumpro__ || defined __pentium4__ \ - || defined __k8__ || defined __athlon__ \ - || defined __k6__ || defined __nocona__ \ - || defined __core2__ || defined __geode__ \ - || defined __amdfam10__) -/* To swap the bytes in a word the i486 processors and up provide the - `bswap' opcode. On i386 we have to use three instructions. */ -# define __bswap_32(x) \ - (__extension__ \ - ({ unsigned int __v, __x = (x); \ - if (__builtin_constant_p (__x)) \ - __v = __bswap_constant_32 (__x); \ - else \ - __asm__ ("bswap %0" : "=r" (__v) : "0" (__x)); \ - __v; })) -# else -# define __bswap_32(x) \ - (__extension__ \ - ({ unsigned int __v, __x = (x); \ - if (__builtin_constant_p (__x)) \ - __v = __bswap_constant_32 (__x); \ - else \ - __asm__ ("rorw $8, %w0;" \ - "rorl $16, %0;" \ - "rorw $8, %w0" \ - : "=r" (__v) \ - : "0" (__x) \ - : "cc"); \ - __v; })) -# endif -# else -# define __bswap_32(x) \ - (__extension__ \ - ({ unsigned int __x = (x); __bswap_constant_32 (__x); })) -# endif -#else -static __inline unsigned int -__bswap_32 (unsigned int __bsx) -{ - return __bswap_constant_32 (__bsx); -} -#endif - - -#if __GNUC_PREREQ (2, 0) -/* Swap bytes in 64 bit value. */ -# define __bswap_constant_64(x) \ - (__extension__ ((((x) & 0xff00000000000000ull) >> 56) \ - | (((x) & 0x00ff000000000000ull) >> 40) \ - | (((x) & 0x0000ff0000000000ull) >> 24) \ - | (((x) & 0x000000ff00000000ull) >> 8) \ - | (((x) & 0x00000000ff000000ull) << 8) \ - | (((x) & 0x0000000000ff0000ull) << 24) \ - | (((x) & 0x000000000000ff00ull) << 40) \ - | (((x) & 0x00000000000000ffull) << 56))) - -# if __GNUC_PREREQ (4, 3) -static __inline __uint64_t -__bswap_64 (__uint64_t __bsx) -{ - return __builtin_bswap64 (__bsx); -} -# elif __WORDSIZE == 64 -# define __bswap_64(x) \ - (__extension__ \ - ({ __uint64_t __v, __x = (x); \ - if (__builtin_constant_p (__x)) \ - __v = __bswap_constant_64 (__x); \ - else \ - __asm__ ("bswap %q0" : "=r" (__v) : "0" (__x)); \ - __v; })) -# else -# define __bswap_64(x) \ - (__extension__ \ - ({ union { __extension__ __uint64_t __ll; \ - unsigned int __l[2]; } __w, __r; \ - if (__builtin_constant_p (x)) \ - __r.__ll = __bswap_constant_64 (x); \ - else \ - { \ - __w.__ll = (x); \ - __r.__l[0] = __bswap_32 (__w.__l[1]); \ - __r.__l[1] = __bswap_32 (__w.__l[0]); \ - } \ - __r.__ll; })) -# endif -#else -# define __bswap_constant_64(x) \ - ((((x) & 0xff00000000000000ull) >> 56) \ - | (((x) & 0x00ff000000000000ull) >> 40) \ - | (((x) & 0x0000ff0000000000ull) >> 24) \ - | (((x) & 0x000000ff00000000ull) >> 8) \ - | (((x) & 0x00000000ff000000ull) << 8) \ - | (((x) & 0x0000000000ff0000ull) << 24) \ - | (((x) & 0x000000000000ff00ull) << 40) \ - | (((x) & 0x00000000000000ffull) << 56)) - -static __inline __uint64_t -__bswap_64 (__uint64_t __bsx) -{ - return __bswap_constant_64 (__bsx); -} -#endif - -#endif /* _BITS_BYTESWAP_H */ diff --git a/sysdeps/x86/bits/floatn.h b/sysdeps/x86/bits/floatn.h new file mode 100644 index 0000000000..49c75f26c5 --- /dev/null +++ b/sysdeps/x86/bits/floatn.h @@ -0,0 +1,121 @@ +/* Macros to control TS 18661-3 glibc features on x86. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _BITS_FLOATN_H +#define _BITS_FLOATN_H + +#include <features.h> + +/* Defined to 1 if the current compiler invocation provides a + floating-point type with the IEEE 754 binary128 format, and this + glibc includes corresponding *f128 interfaces for it. The required + libgcc support was added some time after the basic compiler + support, for x86_64 and x86. */ +#if (defined __x86_64__ \ + ? __GNUC_PREREQ (4, 3) \ + : (defined __GNU__ ? __GNUC_PREREQ (4, 5) : __GNUC_PREREQ (4, 4))) +# define __HAVE_FLOAT128 1 +#else +# define __HAVE_FLOAT128 0 +#endif + +/* Defined to 1 if __HAVE_FLOAT128 is 1 and the type is ABI-distinct + from the default float, double and long double types in this glibc. */ +#if __HAVE_FLOAT128 +# define __HAVE_DISTINCT_FLOAT128 1 +#else +# define __HAVE_DISTINCT_FLOAT128 0 +#endif + +/* Defined to 1 if the current compiler invocation provides a + floating-point type with the right format for _Float64x, and this + glibc includes corresponding *f64x interfaces for it. */ +#define __HAVE_FLOAT64X 1 + +/* Defined to 1 if __HAVE_FLOAT64X is 1 and _Float64x has the format + of long double. Otherwise, if __HAVE_FLOAT64X is 1, _Float64x has + the format of _Float128, which must be different from that of long + double. */ +#define __HAVE_FLOAT64X_LONG_DOUBLE 1 + +#ifndef __ASSEMBLER__ + +/* Defined to concatenate the literal suffix to be used with _Float128 + types, if __HAVE_FLOAT128 is 1. */ +# if __HAVE_FLOAT128 +# if !__GNUC_PREREQ (7, 0) || defined __cplusplus +/* The literal suffix f128 exists only since GCC 7.0. */ +# define __f128(x) x##q +# else +# define __f128(x) x##f128 +# endif +# endif + +/* Defined to a complex binary128 type if __HAVE_FLOAT128 is 1. */ +# if __HAVE_FLOAT128 +# if !__GNUC_PREREQ (7, 0) || defined __cplusplus +/* Add a typedef for older GCC compilers which don't natively support + _Complex _Float128. */ +typedef _Complex float __cfloat128 __attribute__ ((__mode__ (__TC__))); +# define __CFLOAT128 __cfloat128 +# else +# define __CFLOAT128 _Complex _Float128 +# endif +# endif + +/* The remaining of this file provides support for older compilers. */ +# if __HAVE_FLOAT128 + +/* The type _Float128 exists only since GCC 7.0. */ +# if !__GNUC_PREREQ (7, 0) || defined __cplusplus +typedef __float128 _Float128; +# endif + +/* __builtin_huge_valf128 doesn't exist before GCC 7.0. */ +# if !__GNUC_PREREQ (7, 0) +# define __builtin_huge_valf128() ((_Float128) __builtin_huge_val ()) +# endif + +/* Older GCC has only a subset of built-in functions for _Float128 on + x86, and __builtin_infq is not usable in static initializers. + Converting a narrower sNaN to _Float128 produces a quiet NaN, so + attempts to use _Float128 sNaNs will not work properly with older + compilers. */ +# if !__GNUC_PREREQ (7, 0) +# define __builtin_copysignf128 __builtin_copysignq +# define __builtin_fabsf128 __builtin_fabsq +# define __builtin_inff128() ((_Float128) __builtin_inf ()) +# define __builtin_nanf128(x) ((_Float128) __builtin_nan (x)) +# define __builtin_nansf128(x) ((_Float128) __builtin_nans (x)) +# endif + +/* In math/math.h, __MATH_TG will expand signbit to __builtin_signbit*, + e.g.: __builtin_signbitf128, before GCC 6. However, there has never + been a __builtin_signbitf128 in GCC and the type-generic builtin is + only available since GCC 6. */ +# if !__GNUC_PREREQ (6, 0) +# define __builtin_signbitf128 __signbitf128 +# endif + +# endif + +#endif /* !__ASSEMBLER__. */ + +#include <bits/floatn-common.h> + +#endif /* _BITS_FLOATN_H */ diff --git a/sysdeps/x86/bits/flt-eval-method.h b/sysdeps/x86/bits/flt-eval-method.h new file mode 100644 index 0000000000..a6134a455f --- /dev/null +++ b/sysdeps/x86/bits/flt-eval-method.h @@ -0,0 +1,33 @@ +/* Define __GLIBC_FLT_EVAL_METHOD. x86 version. + Copyright (C) 2016-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _MATH_H +# error "Never use <bits/flt-eval-method.h> directly; include <math.h> instead." +#endif + +#ifdef __FLT_EVAL_METHOD__ +# if __FLT_EVAL_METHOD__ == -1 +# define __GLIBC_FLT_EVAL_METHOD 2 +# else +# define __GLIBC_FLT_EVAL_METHOD __FLT_EVAL_METHOD__ +# endif +#elif defined __x86_64__ +# define __GLIBC_FLT_EVAL_METHOD 0 +#else +# define __GLIBC_FLT_EVAL_METHOD 2 +#endif diff --git a/sysdeps/x86/bits/fp-logb.h b/sysdeps/x86/bits/fp-logb.h new file mode 100644 index 0000000000..267c7ec1e1 --- /dev/null +++ b/sysdeps/x86/bits/fp-logb.h @@ -0,0 +1,24 @@ +/* Define __FP_LOGB0_IS_MIN and __FP_LOGBNAN_IS_MIN. x86 version. + Copyright (C) 2016-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _MATH_H +# error "Never use <bits/fp-logb.h> directly; include <math.h> instead." +#endif + +#define __FP_LOGB0_IS_MIN 1 +#define __FP_LOGBNAN_IS_MIN 1 diff --git a/sysdeps/x86/bits/huge_vall.h b/sysdeps/x86/bits/huge_vall.h deleted file mode 100644 index b19e0b4ec2..0000000000 --- a/sysdeps/x86/bits/huge_vall.h +++ /dev/null @@ -1,41 +0,0 @@ -/* `HUGE_VALL' constant for ix86 (where it is infinity). - Used by <stdlib.h> and <math.h> functions for overflow. - Copyright (C) 1992-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef _MATH_H -# error "Never use <bits/huge_vall.h> directly; include <math.h> instead." -#endif - -#if __GNUC_PREREQ(3,3) -# define HUGE_VALL (__builtin_huge_vall()) -#elif __GNUC_PREREQ(2,96) -# define HUGE_VALL (__extension__ 0x1.0p32767L) -#else - -# define __HUGE_VALL_bytes { 0, 0, 0, 0, 0, 0, 0, 0x80, 0xff, 0x7f, 0, 0 } - -# define __huge_vall_t union { unsigned char __c[12]; long double __ld; } -# ifdef __GNUC__ -# define HUGE_VALL (__extension__ \ - ((__huge_vall_t) { __c: __HUGE_VALL_bytes }).__ld) -# else /* Not GCC. */ -static __huge_vall_t __huge_vall = { __HUGE_VALL_bytes }; -# define HUGE_VALL (__huge_vall.__ld) -# endif /* GCC. */ - -#endif /* GCC 2.95 */ diff --git a/sysdeps/x86/bits/indirect-return.h b/sysdeps/x86/bits/indirect-return.h new file mode 100644 index 0000000000..d1acaca3b9 --- /dev/null +++ b/sysdeps/x86/bits/indirect-return.h @@ -0,0 +1,37 @@ +/* Definition of __INDIRECT_RETURN. x86 version. + Copyright (C) 2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _UCONTEXT_H +# error "Never include <bits/indirect-return.h> directly; use <ucontext.h> instead." +#endif + +/* On x86, swapcontext returns via indirect branch when the shadow stack + is enabled. Define __INDIRECT_RETURN to indicate whether swapcontext + returns via indirect branch. */ +#if defined __CET__ && (__CET__ & 2) != 0 +# if __glibc_has_attribute (__indirect_return__) +# define __INDIRECT_RETURN __attribute__ ((__indirect_return__)) +# else +/* Newer compilers provide the indirect_return attribute, but without + it we can use returns_twice to affect the optimizer in the same + way and avoid unsafe optimizations. */ +# define __INDIRECT_RETURN __attribute__ ((__returns_twice__)) +# endif +#else +# define __INDIRECT_RETURN +#endif diff --git a/sysdeps/x86/bits/link.h b/sysdeps/x86/bits/link.h index 8dd11c2d24..a97c41162c 100644 --- a/sysdeps/x86/bits/link.h +++ b/sysdeps/x86/bits/link.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2004-2016 Free Software Foundation, Inc. +/* Copyright (C) 2004-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86/bits/mathdef.h b/sysdeps/x86/bits/mathdef.h deleted file mode 100644 index e61c28aea3..0000000000 --- a/sysdeps/x86/bits/mathdef.h +++ /dev/null @@ -1,59 +0,0 @@ -/* Copyright (C) 2001-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if !defined _MATH_H && !defined _COMPLEX_H -# error "Never use <bits/mathdef.h> directly; include <math.h> instead" -#endif - -#if defined __USE_ISOC99 && defined _MATH_H && !defined _MATH_H_MATHDEF -# define _MATH_H_MATHDEF 1 - -# if defined __x86_64__ || (defined __FLT_EVAL_METHOD__ && __FLT_EVAL_METHOD__ == 0) -/* The x86-64 architecture computes values with the precission of the - used type. Similarly for -m32 -mfpmath=sse. */ -typedef float float_t; /* `float' expressions are evaluated as `float'. */ -typedef double double_t; /* `double' expressions are evaluated - as `double'. */ -# else -/* The ix87 FPUs evaluate all values in the 80 bit floating-point format - which is also available for the user as `long double'. Therefore we - define: */ -typedef long double float_t; /* `float' expressions are evaluated as - `long double'. */ -typedef long double double_t; /* `double' expressions are evaluated as - `long double'. */ -# endif - -/* The values returned by `ilogb' for 0 and NaN respectively. */ -# define FP_ILOGB0 (-2147483647 - 1) -# define FP_ILOGBNAN (-2147483647 - 1) - -/* The GCC 4.6 compiler will define __FP_FAST_FMA{,F,L} if the fma{,f,l} - builtins are supported. */ -# ifdef __FP_FAST_FMA -# define FP_FAST_FMA 1 -# endif - -# ifdef __FP_FAST_FMAF -# define FP_FAST_FMAF 1 -# endif - -# ifdef __FP_FAST_FMAL -# define FP_FAST_FMAL 1 -# endif - -#endif /* ISO C99 */ diff --git a/sysdeps/x86/bits/pthreadtypes.h b/sysdeps/x86/bits/pthreadtypes.h deleted file mode 100644 index 16b8f4fbb1..0000000000 --- a/sysdeps/x86/bits/pthreadtypes.h +++ /dev/null @@ -1,258 +0,0 @@ -/* Copyright (C) 2002-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef _BITS_PTHREADTYPES_H -#define _BITS_PTHREADTYPES_H 1 - -#include <bits/wordsize.h> - -#ifdef __x86_64__ -# if __WORDSIZE == 64 -# define __SIZEOF_PTHREAD_ATTR_T 56 -# define __SIZEOF_PTHREAD_MUTEX_T 40 -# define __SIZEOF_PTHREAD_MUTEXATTR_T 4 -# define __SIZEOF_PTHREAD_COND_T 48 -# define __SIZEOF_PTHREAD_CONDATTR_T 4 -# define __SIZEOF_PTHREAD_RWLOCK_T 56 -# define __SIZEOF_PTHREAD_RWLOCKATTR_T 8 -# define __SIZEOF_PTHREAD_BARRIER_T 32 -# define __SIZEOF_PTHREAD_BARRIERATTR_T 4 -# else -# define __SIZEOF_PTHREAD_ATTR_T 32 -# define __SIZEOF_PTHREAD_MUTEX_T 32 -# define __SIZEOF_PTHREAD_MUTEXATTR_T 4 -# define __SIZEOF_PTHREAD_COND_T 48 -# define __SIZEOF_PTHREAD_CONDATTR_T 4 -# define __SIZEOF_PTHREAD_RWLOCK_T 44 -# define __SIZEOF_PTHREAD_RWLOCKATTR_T 8 -# define __SIZEOF_PTHREAD_BARRIER_T 20 -# define __SIZEOF_PTHREAD_BARRIERATTR_T 4 -# endif -#else -# define __SIZEOF_PTHREAD_ATTR_T 36 -# define __SIZEOF_PTHREAD_MUTEX_T 24 -# define __SIZEOF_PTHREAD_MUTEXATTR_T 4 -# define __SIZEOF_PTHREAD_COND_T 48 -# define __SIZEOF_PTHREAD_CONDATTR_T 4 -# define __SIZEOF_PTHREAD_RWLOCK_T 32 -# define __SIZEOF_PTHREAD_RWLOCKATTR_T 8 -# define __SIZEOF_PTHREAD_BARRIER_T 20 -# define __SIZEOF_PTHREAD_BARRIERATTR_T 4 -#endif - - -/* Thread identifiers. The structure of the attribute type is not - exposed on purpose. */ -typedef unsigned long int pthread_t; - - -union pthread_attr_t -{ - char __size[__SIZEOF_PTHREAD_ATTR_T]; - long int __align; -}; -#ifndef __have_pthread_attr_t -typedef union pthread_attr_t pthread_attr_t; -# define __have_pthread_attr_t 1 -#endif - - -#ifdef __x86_64__ -typedef struct __pthread_internal_list -{ - struct __pthread_internal_list *__prev; - struct __pthread_internal_list *__next; -} __pthread_list_t; -#else -typedef struct __pthread_internal_slist -{ - struct __pthread_internal_slist *__next; -} __pthread_slist_t; -#endif - - -/* Data structures for mutex handling. The structure of the attribute - type is not exposed on purpose. */ -typedef union -{ - struct __pthread_mutex_s - { - int __lock; - unsigned int __count; - int __owner; -#ifdef __x86_64__ - unsigned int __nusers; -#endif - /* KIND must stay at this position in the structure to maintain - binary compatibility. */ - int __kind; -#ifdef __x86_64__ - short __spins; - short __elision; - __pthread_list_t __list; -# define __PTHREAD_MUTEX_HAVE_PREV 1 -/* Mutex __spins initializer used by PTHREAD_MUTEX_INITIALIZER. */ -# define __PTHREAD_SPINS 0, 0 -#else - unsigned int __nusers; - __extension__ union - { - struct - { - short __espins; - short __elision; -# define __spins __elision_data.__espins -# define __elision __elision_data.__elision -# define __PTHREAD_SPINS { 0, 0 } - } __elision_data; - __pthread_slist_t __list; - }; -#endif - } __data; - char __size[__SIZEOF_PTHREAD_MUTEX_T]; - long int __align; -} pthread_mutex_t; - -typedef union -{ - char __size[__SIZEOF_PTHREAD_MUTEXATTR_T]; - int __align; -} pthread_mutexattr_t; - - -/* Data structure for conditional variable handling. The structure of - the attribute type is not exposed on purpose. */ -typedef union -{ - struct - { - int __lock; - unsigned int __futex; - __extension__ unsigned long long int __total_seq; - __extension__ unsigned long long int __wakeup_seq; - __extension__ unsigned long long int __woken_seq; - void *__mutex; - unsigned int __nwaiters; - unsigned int __broadcast_seq; - } __data; - char __size[__SIZEOF_PTHREAD_COND_T]; - __extension__ long long int __align; -} pthread_cond_t; - -typedef union -{ - char __size[__SIZEOF_PTHREAD_CONDATTR_T]; - int __align; -} pthread_condattr_t; - - -/* Keys for thread-specific data */ -typedef unsigned int pthread_key_t; - - -/* Once-only execution */ -typedef int pthread_once_t; - - -#if defined __USE_UNIX98 || defined __USE_XOPEN2K -/* Data structure for read-write lock variable handling. The - structure of the attribute type is not exposed on purpose. */ -typedef union -{ -# ifdef __x86_64__ - struct - { - int __lock; - unsigned int __nr_readers; - unsigned int __readers_wakeup; - unsigned int __writer_wakeup; - unsigned int __nr_readers_queued; - unsigned int __nr_writers_queued; - int __writer; - int __shared; - signed char __rwelision; -# ifdef __ILP32__ - unsigned char __pad1[3]; -# define __PTHREAD_RWLOCK_ELISION_EXTRA 0, { 0, 0, 0 } -# else - unsigned char __pad1[7]; -# define __PTHREAD_RWLOCK_ELISION_EXTRA 0, { 0, 0, 0, 0, 0, 0, 0 } -# endif - unsigned long int __pad2; - /* FLAGS must stay at this position in the structure to maintain - binary compatibility. */ - unsigned int __flags; -# define __PTHREAD_RWLOCK_INT_FLAGS_SHARED 1 - } __data; -# else - struct - { - int __lock; - unsigned int __nr_readers; - unsigned int __readers_wakeup; - unsigned int __writer_wakeup; - unsigned int __nr_readers_queued; - unsigned int __nr_writers_queued; - /* FLAGS must stay at this position in the structure to maintain - binary compatibility. */ - unsigned char __flags; - unsigned char __shared; - signed char __rwelision; -# define __PTHREAD_RWLOCK_ELISION_EXTRA 0 - unsigned char __pad2; - int __writer; - } __data; -# endif - char __size[__SIZEOF_PTHREAD_RWLOCK_T]; - long int __align; -} pthread_rwlock_t; - -typedef union -{ - char __size[__SIZEOF_PTHREAD_RWLOCKATTR_T]; - long int __align; -} pthread_rwlockattr_t; -#endif - - -#ifdef __USE_XOPEN2K -/* POSIX spinlock data type. */ -typedef volatile int pthread_spinlock_t; - - -/* POSIX barriers data type. The structure of the type is - deliberately not exposed. */ -typedef union -{ - char __size[__SIZEOF_PTHREAD_BARRIER_T]; - long int __align; -} pthread_barrier_t; - -typedef union -{ - char __size[__SIZEOF_PTHREAD_BARRIERATTR_T]; - int __align; -} pthread_barrierattr_t; -#endif - - -#ifndef __x86_64__ -/* Extra attributes for the cleanup functions. */ -# define __cleanup_fct_attribute __attribute__ ((__regparm__ (1))) -#endif - -#endif /* bits/pthreadtypes.h */ diff --git a/sysdeps/x86/bits/select.h b/sysdeps/x86/bits/select.h index 6f090f14df..2c0a2b5eb3 100644 --- a/sysdeps/x86/bits/select.h +++ b/sysdeps/x86/bits/select.h @@ -1,4 +1,4 @@ -/* Copyright (C) 1997-2016 Free Software Foundation, Inc. +/* Copyright (C) 1997-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86/bits/semaphore.h b/sysdeps/x86/bits/semaphore.h index c86dc6366c..1b8daf98be 100644 --- a/sysdeps/x86/bits/semaphore.h +++ b/sysdeps/x86/bits/semaphore.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2002-2016 Free Software Foundation, Inc. +/* Copyright (C) 2002-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@redhat.com>, 2002. diff --git a/sysdeps/x86/bits/setjmp.h b/sysdeps/x86/bits/setjmp.h index bc72814f44..e0c22ac78f 100644 --- a/sysdeps/x86/bits/setjmp.h +++ b/sysdeps/x86/bits/setjmp.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2001-2016 Free Software Foundation, Inc. +/* Copyright (C) 2001-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86/bits/string.h b/sysdeps/x86/bits/string.h deleted file mode 100644 index e4e019f1fc..0000000000 --- a/sysdeps/x86/bits/string.h +++ /dev/null @@ -1,1994 +0,0 @@ -/* Optimized, inlined string functions. i486/x86-64 version. - Copyright (C) 2001-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef _STRING_H -# error "Never use <bits/string.h> directly; include <string.h> instead." -#endif - -/* Use the unaligned string inline ABI. */ -#define _STRING_INLINE_unaligned 1 - -/* Enable inline functions only for i486 or better when compiling for - ia32. */ -#if !defined __x86_64__ && (defined __i486__ || defined __pentium__ \ - || defined __pentiumpro__ || defined __pentium4__ \ - || defined __nocona__ || defined __atom__ \ - || defined __core2__ || defined __corei7__ \ - || defined __sandybridge__ || defined __haswell__ \ - || defined __bonnell__ || defined __silvermont__ \ - || defined __k6__ || defined __geode__ \ - || defined __k8__ || defined __athlon__ \ - || defined __amdfam10__ || defined __bdver1__ \ - || defined __bdver2__ || defined __bdver3__ \ - || defined __bdver4__ || defined __btver1__ \ - || defined __btver2__) - -/* We only provide optimizations if the user selects them and if - GNU CC is used. */ -# if !defined __NO_STRING_INLINES && defined __USE_STRING_INLINES \ - && defined __GNUC__ && __GNUC__ >= 2 - -# ifndef __STRING_INLINE -# ifndef __extern_inline -# define __STRING_INLINE inline -# else -# define __STRING_INLINE __extern_inline -# endif -# endif - -/* The macros are used in some of the optimized implementations below. */ -# define __STRING_SMALL_GET16(src, idx) \ - ((((const unsigned char *) (src))[idx + 1] << 8) \ - | ((const unsigned char *) (src))[idx]) -# define __STRING_SMALL_GET32(src, idx) \ - (((((const unsigned char *) (src))[idx + 3] << 8 \ - | ((const unsigned char *) (src))[idx + 2]) << 8 \ - | ((const unsigned char *) (src))[idx + 1]) << 8 \ - | ((const unsigned char *) (src))[idx]) - - -/* Copy N bytes of SRC to DEST. */ -# define _HAVE_STRING_ARCH_memcpy 1 -# define memcpy(dest, src, n) \ - (__extension__ (__builtin_constant_p (n) \ - ? __memcpy_c ((dest), (src), (n)) \ - : __memcpy_g ((dest), (src), (n)))) -# define __memcpy_c(dest, src, n) \ - ((n) == 0 \ - ? (dest) \ - : (((n) % 4 == 0) \ - ? __memcpy_by4 (dest, src, n) \ - : (((n) % 2 == 0) \ - ? __memcpy_by2 (dest, src, n) \ - : __memcpy_g (dest, src, n)))) - -__STRING_INLINE void *__memcpy_by4 (void *__dest, const void *__src, - size_t __n); - -__STRING_INLINE void * -__memcpy_by4 (void *__dest, const void *__src, size_t __n) -{ - register unsigned long int __d0, __d1; - register void *__tmp = __dest; - __asm__ __volatile__ - ("1:\n\t" - "movl (%2),%0\n\t" - "leal 4(%2),%2\n\t" - "movl %0,(%1)\n\t" - "leal 4(%1),%1\n\t" - "decl %3\n\t" - "jnz 1b" - : "=&r" (__d0), "=&r" (__tmp), "=&r" (__src), "=&r" (__d1) - : "1" (__tmp), "2" (__src), "3" (__n / 4) - : "memory", "cc"); - return __dest; -} - -__STRING_INLINE void *__memcpy_by2 (void *__dest, const void *__src, - size_t __n); - -__STRING_INLINE void * -__memcpy_by2 (void *__dest, const void *__src, size_t __n) -{ - register unsigned long int __d0, __d1; - register void *__tmp = __dest; - __asm__ __volatile__ - ("shrl $1,%3\n\t" - "jz 2f\n" /* only a word */ - "1:\n\t" - "movl (%2),%0\n\t" - "leal 4(%2),%2\n\t" - "movl %0,(%1)\n\t" - "leal 4(%1),%1\n\t" - "decl %3\n\t" - "jnz 1b\n" - "2:\n\t" - "movw (%2),%w0\n\t" - "movw %w0,(%1)" - : "=&q" (__d0), "=&r" (__tmp), "=&r" (__src), "=&r" (__d1) - : "1" (__tmp), "2" (__src), "3" (__n / 2) - : "memory", "cc"); - return __dest; -} - -__STRING_INLINE void *__memcpy_g (void *__dest, const void *__src, size_t __n); - -__STRING_INLINE void * -__memcpy_g (void *__dest, const void *__src, size_t __n) -{ - register unsigned long int __d0, __d1, __d2; - register void *__tmp = __dest; - __asm__ __volatile__ - ("cld\n\t" - "shrl $1,%%ecx\n\t" - "jnc 1f\n\t" - "movsb\n" - "1:\n\t" - "shrl $1,%%ecx\n\t" - "jnc 2f\n\t" - "movsw\n" - "2:\n\t" - "rep; movsl" - : "=&c" (__d0), "=&D" (__d1), "=&S" (__d2), - "=m" ( *(struct { __extension__ char __x[__n]; } *)__dest) - : "0" (__n), "1" (__tmp), "2" (__src), - "m" ( *(struct { __extension__ char __x[__n]; } *)__src) - : "cc"); - return __dest; -} - -# define _HAVE_STRING_ARCH_memmove 1 -# ifndef _FORCE_INLINES -/* Copy N bytes of SRC to DEST, guaranteeing - correct behavior for overlapping strings. */ -# define memmove(dest, src, n) __memmove_g (dest, src, n) - -__STRING_INLINE void *__memmove_g (void *, const void *, size_t) - __asm__ ("memmove"); - -__STRING_INLINE void * -__memmove_g (void *__dest, const void *__src, size_t __n) -{ - register unsigned long int __d0, __d1, __d2; - register void *__tmp = __dest; - if (__dest < __src) - __asm__ __volatile__ - ("cld\n\t" - "rep; movsb" - : "=&c" (__d0), "=&S" (__d1), "=&D" (__d2), - "=m" ( *(struct { __extension__ char __x[__n]; } *)__dest) - : "0" (__n), "1" (__src), "2" (__tmp), - "m" ( *(struct { __extension__ char __x[__n]; } *)__src)); - else - __asm__ __volatile__ - ("decl %1\n\t" - "decl %2\n\t" - "std\n\t" - "rep; movsb\n\t" - "cld" - : "=&c" (__d0), "=&S" (__d1), "=&D" (__d2), - "=m" ( *(struct { __extension__ char __x[__n]; } *)__dest) - : "0" (__n), "1" (__n + (const char *) __src), - "2" (__n + (char *) __tmp), - "m" ( *(struct { __extension__ char __x[__n]; } *)__src)); - return __dest; -} -# endif - -/* Compare N bytes of S1 and S2. */ -# define _HAVE_STRING_ARCH_memcmp 1 -# ifndef _FORCE_INLINES -# ifndef __PIC__ -/* gcc has problems to spill registers when using PIC. */ -__STRING_INLINE int -memcmp (const void *__s1, const void *__s2, size_t __n) -{ - register unsigned long int __d0, __d1, __d2; - register int __res; - __asm__ __volatile__ - ("cld\n\t" - "testl %3,%3\n\t" - "repe; cmpsb\n\t" - "je 1f\n\t" - "sbbl %0,%0\n\t" - "orl $1,%0\n" - "1:" - : "=&a" (__res), "=&S" (__d0), "=&D" (__d1), "=&c" (__d2) - : "0" (0), "1" (__s1), "2" (__s2), "3" (__n), - "m" ( *(struct { __extension__ char __x[__n]; } *)__s1), - "m" ( *(struct { __extension__ char __x[__n]; } *)__s2) - : "cc"); - return __res; -} -# endif -# endif - -/* Set N bytes of S to C. */ -# define _HAVE_STRING_ARCH_memset 1 -# define _USE_STRING_ARCH_memset 1 -# define memset(s, c, n) \ - (__extension__ (__builtin_constant_p (n) && (n) <= 16 \ - ? ((n) == 1 \ - ? __memset_c1 ((s), (c)) \ - : __memset_gc ((s), (c), (n))) \ - : (__builtin_constant_p (c) \ - ? (__builtin_constant_p (n) \ - ? __memset_ccn ((s), (c), (n)) \ - : memset ((s), (c), (n))) \ - : (__builtin_constant_p (n) \ - ? __memset_gcn ((s), (c), (n)) \ - : memset ((s), (c), (n)))))) - -# define __memset_c1(s, c) ({ void *__s = (s); \ - *((unsigned char *) __s) = (unsigned char) (c); \ - __s; }) - -# define __memset_gc(s, c, n) \ - ({ void *__s = (s); \ - union { \ - unsigned int __ui; \ - unsigned short int __usi; \ - unsigned char __uc; \ - } *__u = __s; \ - unsigned int __c = ((unsigned int) ((unsigned char) (c))) * 0x01010101; \ - \ - /* We apply a trick here. `gcc' would implement the following \ - assignments using immediate operands. But this uses to much \ - memory (7, instead of 4 bytes). So we force the value in a \ - registers. */ \ - if ((n) == 3 || (n) >= 5) \ - __asm__ __volatile__ ("" : "=r" (__c) : "0" (__c)); \ - \ - /* This `switch' statement will be removed at compile-time. */ \ - switch (n) \ - { \ - case 15: \ - __u->__ui = __c; \ - __u = __extension__ ((void *) __u + 4); \ - case 11: \ - __u->__ui = __c; \ - __u = __extension__ ((void *) __u + 4); \ - case 7: \ - __u->__ui = __c; \ - __u = __extension__ ((void *) __u + 4); \ - case 3: \ - __u->__usi = (unsigned short int) __c; \ - __u = __extension__ ((void *) __u + 2); \ - __u->__uc = (unsigned char) __c; \ - break; \ - \ - case 14: \ - __u->__ui = __c; \ - __u = __extension__ ((void *) __u + 4); \ - case 10: \ - __u->__ui = __c; \ - __u = __extension__ ((void *) __u + 4); \ - case 6: \ - __u->__ui = __c; \ - __u = __extension__ ((void *) __u + 4); \ - case 2: \ - __u->__usi = (unsigned short int) __c; \ - break; \ - \ - case 13: \ - __u->__ui = __c; \ - __u = __extension__ ((void *) __u + 4); \ - case 9: \ - __u->__ui = __c; \ - __u = __extension__ ((void *) __u + 4); \ - case 5: \ - __u->__ui = __c; \ - __u = __extension__ ((void *) __u + 4); \ - case 1: \ - __u->__uc = (unsigned char) __c; \ - break; \ - \ - case 16: \ - __u->__ui = __c; \ - __u = __extension__ ((void *) __u + 4); \ - case 12: \ - __u->__ui = __c; \ - __u = __extension__ ((void *) __u + 4); \ - case 8: \ - __u->__ui = __c; \ - __u = __extension__ ((void *) __u + 4); \ - case 4: \ - __u->__ui = __c; \ - case 0: \ - break; \ - } \ - \ - __s; }) - -# define __memset_ccn(s, c, n) \ - (((n) % 4 == 0) \ - ? __memset_ccn_by4 (s, ((unsigned int) ((unsigned char) (c))) * 0x01010101,\ - n) \ - : (((n) % 2 == 0) \ - ? __memset_ccn_by2 (s, \ - ((unsigned int) ((unsigned char) (c))) * 0x01010101,\ - n) \ - : memset (s, c, n))) - -__STRING_INLINE void *__memset_ccn_by4 (void *__s, unsigned int __c, - size_t __n); - -__STRING_INLINE void * -__memset_ccn_by4 (void *__s, unsigned int __c, size_t __n) -{ - register void *__tmp = __s; - register unsigned long int __d0; -# ifdef __i686__ - __asm__ __volatile__ - ("cld\n\t" - "rep; stosl" - : "=&a" (__c), "=&D" (__tmp), "=&c" (__d0), - "=m" ( *(struct { __extension__ char __x[__n]; } *)__s) - : "0" ((unsigned int) __c), "1" (__tmp), "2" (__n / 4) - : "cc"); -# else - __asm__ __volatile__ - ("1:\n\t" - "movl %0,(%1)\n\t" - "addl $4,%1\n\t" - "decl %2\n\t" - "jnz 1b\n" - : "=&r" (__c), "=&r" (__tmp), "=&r" (__d0), - "=m" ( *(struct { __extension__ char __x[__n]; } *)__s) - : "0" ((unsigned int) __c), "1" (__tmp), "2" (__n / 4) - : "cc"); -# endif - return __s; -} - -__STRING_INLINE void *__memset_ccn_by2 (void *__s, unsigned int __c, - size_t __n); - -__STRING_INLINE void * -__memset_ccn_by2 (void *__s, unsigned int __c, size_t __n) -{ - register unsigned long int __d0, __d1; - register void *__tmp = __s; -# ifdef __i686__ - __asm__ __volatile__ - ("cld\n\t" - "rep; stosl\n" - "stosw" - : "=&a" (__d0), "=&D" (__tmp), "=&c" (__d1), - "=m" ( *(struct { __extension__ char __x[__n]; } *)__s) - : "0" ((unsigned int) __c), "1" (__tmp), "2" (__n / 4) - : "cc"); -# else - __asm__ __volatile__ - ("1:\tmovl %0,(%1)\n\t" - "leal 4(%1),%1\n\t" - "decl %2\n\t" - "jnz 1b\n" - "movw %w0,(%1)" - : "=&q" (__d0), "=&r" (__tmp), "=&r" (__d1), - "=m" ( *(struct { __extension__ char __x[__n]; } *)__s) - : "0" ((unsigned int) __c), "1" (__tmp), "2" (__n / 4) - : "cc"); -#endif - return __s; -} - -# define __memset_gcn(s, c, n) \ - (((n) % 4 == 0) \ - ? __memset_gcn_by4 (s, c, n) \ - : (((n) % 2 == 0) \ - ? __memset_gcn_by2 (s, c, n) \ - : memset (s, c, n))) - -__STRING_INLINE void *__memset_gcn_by4 (void *__s, int __c, size_t __n); - -__STRING_INLINE void * -__memset_gcn_by4 (void *__s, int __c, size_t __n) -{ - register void *__tmp = __s; - register unsigned long int __d0; - __asm__ __volatile__ - ("movb %b0,%h0\n" - "pushw %w0\n\t" - "shll $16,%0\n\t" - "popw %w0\n" - "1:\n\t" - "movl %0,(%1)\n\t" - "addl $4,%1\n\t" - "decl %2\n\t" - "jnz 1b\n" - : "=&q" (__c), "=&r" (__tmp), "=&r" (__d0), - "=m" ( *(struct { __extension__ char __x[__n]; } *)__s) - : "0" ((unsigned int) __c), "1" (__tmp), "2" (__n / 4) - : "cc"); - return __s; -} - -__STRING_INLINE void *__memset_gcn_by2 (void *__s, int __c, size_t __n); - -__STRING_INLINE void * -__memset_gcn_by2 (void *__s, int __c, size_t __n) -{ - register unsigned long int __d0, __d1; - register void *__tmp = __s; - __asm__ __volatile__ - ("movb %b0,%h0\n\t" - "pushw %w0\n\t" - "shll $16,%0\n\t" - "popw %w0\n" - "1:\n\t" - "movl %0,(%1)\n\t" - "leal 4(%1),%1\n\t" - "decl %2\n\t" - "jnz 1b\n" - "movw %w0,(%1)" - : "=&q" (__d0), "=&r" (__tmp), "=&r" (__d1), - "=m" ( *(struct { __extension__ char __x[__n]; } *)__s) - : "0" ((unsigned int) __c), "1" (__tmp), "2" (__n / 4) - : "cc"); - return __s; -} - - -/* Search N bytes of S for C. */ -# define _HAVE_STRING_ARCH_memchr 1 -# ifndef _FORCE_INLINES -__STRING_INLINE void * -memchr (const void *__s, int __c, size_t __n) -{ - register unsigned long int __d0; -# ifdef __i686__ - register unsigned long int __d1; -# endif - register unsigned char *__res; - if (__n == 0) - return NULL; -# ifdef __i686__ - __asm__ __volatile__ - ("cld\n\t" - "repne; scasb\n\t" - "cmovne %2,%0" - : "=D" (__res), "=&c" (__d0), "=&r" (__d1) - : "a" (__c), "0" (__s), "1" (__n), "2" (1), - "m" ( *(struct { __extension__ char __x[__n]; } *)__s) - : "cc"); -# else - __asm__ __volatile__ - ("cld\n\t" - "repne; scasb\n\t" - "je 1f\n\t" - "movl $1,%0\n" - "1:" - : "=D" (__res), "=&c" (__d0) - : "a" (__c), "0" (__s), "1" (__n), - "m" ( *(struct { __extension__ char __x[__n]; } *)__s) - : "cc"); -# endif - return __res - 1; -} -# endif - -# define _HAVE_STRING_ARCH_memrchr 1 -# ifndef _FORCE_INLINES -__STRING_INLINE void *__memrchr (const void *__s, int __c, size_t __n); - -__STRING_INLINE void * -__memrchr (const void *__s, int __c, size_t __n) -{ - register unsigned long int __d0; -# ifdef __i686__ - register unsigned long int __d1; -# endif - register void *__res; - if (__n == 0) - return NULL; -# ifdef __i686__ - __asm__ __volatile__ - ("std\n\t" - "repne; scasb\n\t" - "cmovne %2,%0\n\t" - "cld\n\t" - "incl %0" - : "=D" (__res), "=&c" (__d0), "=&r" (__d1) - : "a" (__c), "0" (__s + __n - 1), "1" (__n), "2" (-1), - "m" ( *(struct { __extension__ char __x[__n]; } *)__s) - : "cc"); -# else - __asm__ __volatile__ - ("std\n\t" - "repne; scasb\n\t" - "je 1f\n\t" - "orl $-1,%0\n" - "1:\tcld\n\t" - "incl %0" - : "=D" (__res), "=&c" (__d0) - : "a" (__c), "0" (__s + __n - 1), "1" (__n), - "m" ( *(struct { __extension__ char __x[__n]; } *)__s) - : "cc"); -# endif - return __res; -} -# ifdef __USE_GNU -# define memrchr(s, c, n) __memrchr ((s), (c), (n)) -# endif -# endif - -/* Return pointer to C in S. */ -# define _HAVE_STRING_ARCH_rawmemchr 1 -__STRING_INLINE void *__rawmemchr (const void *__s, int __c); - -# ifndef _FORCE_INLINES -__STRING_INLINE void * -__rawmemchr (const void *__s, int __c) -{ - register unsigned long int __d0; - register unsigned char *__res; - __asm__ __volatile__ - ("cld\n\t" - "repne; scasb\n\t" - : "=D" (__res), "=&c" (__d0) - : "a" (__c), "0" (__s), "1" (0xffffffff), - "m" ( *(struct { char __x[0xfffffff]; } *)__s) - : "cc"); - return __res - 1; -} -# ifdef __USE_GNU -__STRING_INLINE void * -rawmemchr (const void *__s, int __c) -{ - return __rawmemchr (__s, __c); -} -# endif /* use GNU */ -# endif - - -/* Return the length of S. */ -# define _HAVE_STRING_ARCH_strlen 1 -# define strlen(str) \ - (__extension__ (__builtin_constant_p (str) \ - ? __builtin_strlen (str) \ - : __strlen_g (str))) -__STRING_INLINE size_t __strlen_g (const char *__str); - -__STRING_INLINE size_t -__strlen_g (const char *__str) -{ - register char __dummy; - register const char *__tmp = __str; - __asm__ __volatile__ - ("1:\n\t" - "movb (%0),%b1\n\t" - "leal 1(%0),%0\n\t" - "testb %b1,%b1\n\t" - "jne 1b" - : "=r" (__tmp), "=&q" (__dummy) - : "0" (__str), - "m" ( *(struct { char __x[0xfffffff]; } *)__str) - : "cc" ); - return __tmp - __str - 1; -} - - -/* Copy SRC to DEST. */ -# define _HAVE_STRING_ARCH_strcpy 1 -# define strcpy(dest, src) \ - (__extension__ (__builtin_constant_p (src) \ - ? (sizeof ((src)[0]) == 1 && strlen (src) + 1 <= 8 \ - ? __strcpy_a_small ((dest), (src), strlen (src) + 1) \ - : (char *) memcpy ((char *) (dest), \ - (const char *) (src), \ - strlen (src) + 1)) \ - : __strcpy_g ((dest), (src)))) - -# define __strcpy_a_small(dest, src, srclen) \ - (__extension__ ({ char *__dest = (dest); \ - union { \ - unsigned int __ui; \ - unsigned short int __usi; \ - unsigned char __uc; \ - char __c; \ - } *__u = (void *) __dest; \ - switch (srclen) \ - { \ - case 1: \ - __u->__uc = '\0'; \ - break; \ - case 2: \ - __u->__usi = __STRING_SMALL_GET16 (src, 0); \ - break; \ - case 3: \ - __u->__usi = __STRING_SMALL_GET16 (src, 0); \ - __u = __extension__ ((void *) __u + 2); \ - __u->__uc = '\0'; \ - break; \ - case 4: \ - __u->__ui = __STRING_SMALL_GET32 (src, 0); \ - break; \ - case 5: \ - __u->__ui = __STRING_SMALL_GET32 (src, 0); \ - __u = __extension__ ((void *) __u + 4); \ - __u->__uc = '\0'; \ - break; \ - case 6: \ - __u->__ui = __STRING_SMALL_GET32 (src, 0); \ - __u = __extension__ ((void *) __u + 4); \ - __u->__usi = __STRING_SMALL_GET16 (src, 4); \ - break; \ - case 7: \ - __u->__ui = __STRING_SMALL_GET32 (src, 0); \ - __u = __extension__ ((void *) __u + 4); \ - __u->__usi = __STRING_SMALL_GET16 (src, 4); \ - __u = __extension__ ((void *) __u + 2); \ - __u->__uc = '\0'; \ - break; \ - case 8: \ - __u->__ui = __STRING_SMALL_GET32 (src, 0); \ - __u = __extension__ ((void *) __u + 4); \ - __u->__ui = __STRING_SMALL_GET32 (src, 4); \ - break; \ - } \ - (char *) __dest; })) - -__STRING_INLINE char *__strcpy_g (char *__dest, const char *__src); - -__STRING_INLINE char * -__strcpy_g (char *__dest, const char *__src) -{ - register char *__tmp = __dest; - register char __dummy; - __asm__ __volatile__ - ( - "1:\n\t" - "movb (%0),%b2\n\t" - "leal 1(%0),%0\n\t" - "movb %b2,(%1)\n\t" - "leal 1(%1),%1\n\t" - "testb %b2,%b2\n\t" - "jne 1b" - : "=&r" (__src), "=&r" (__tmp), "=&q" (__dummy), - "=m" ( *(struct { char __x[0xfffffff]; } *)__dest) - : "0" (__src), "1" (__tmp), - "m" ( *(struct { char __x[0xfffffff]; } *)__src) - : "cc"); - return __dest; -} - - -# ifdef __USE_GNU -# define _HAVE_STRING_ARCH_stpcpy 1 -/* Copy SRC to DEST. */ -# define __stpcpy(dest, src) \ - (__extension__ (__builtin_constant_p (src) \ - ? (strlen (src) + 1 <= 8 \ - ? __stpcpy_a_small ((dest), (src), strlen (src) + 1) \ - : __stpcpy_c ((dest), (src), strlen (src) + 1)) \ - : __stpcpy_g ((dest), (src)))) -# define __stpcpy_c(dest, src, srclen) \ - ((srclen) % 4 == 0 \ - ? __mempcpy_by4 (dest, src, srclen) - 1 \ - : ((srclen) % 2 == 0 \ - ? __mempcpy_by2 (dest, src, srclen) - 1 \ - : __mempcpy_byn (dest, src, srclen) - 1)) - -/* In glibc itself we use this symbol for namespace reasons. */ -# define stpcpy(dest, src) __stpcpy ((dest), (src)) - -# define __stpcpy_a_small(dest, src, srclen) \ - (__extension__ ({ union { \ - unsigned int __ui; \ - unsigned short int __usi; \ - unsigned char __uc; \ - char __c; \ - } *__u = (void *) (dest); \ - switch (srclen) \ - { \ - case 1: \ - __u->__uc = '\0'; \ - break; \ - case 2: \ - __u->__usi = __STRING_SMALL_GET16 (src, 0); \ - __u = __extension__ ((void *) __u + 1); \ - break; \ - case 3: \ - __u->__usi = __STRING_SMALL_GET16 (src, 0); \ - __u = __extension__ ((void *) __u + 2); \ - __u->__uc = '\0'; \ - break; \ - case 4: \ - __u->__ui = __STRING_SMALL_GET32 (src, 0); \ - __u = __extension__ ((void *) __u + 3); \ - break; \ - case 5: \ - __u->__ui = __STRING_SMALL_GET32 (src, 0); \ - __u = __extension__ ((void *) __u + 4); \ - __u->__uc = '\0'; \ - break; \ - case 6: \ - __u->__ui = __STRING_SMALL_GET32 (src, 0); \ - __u = __extension__ ((void *) __u + 4); \ - __u->__usi = __STRING_SMALL_GET16 (src, 4); \ - __u = __extension__ ((void *) __u + 1); \ - break; \ - case 7: \ - __u->__ui = __STRING_SMALL_GET32 (src, 0); \ - __u = __extension__ ((void *) __u + 4); \ - __u->__usi = __STRING_SMALL_GET16 (src, 4); \ - __u = __extension__ ((void *) __u + 2); \ - __u->__uc = '\0'; \ - break; \ - case 8: \ - __u->__ui = __STRING_SMALL_GET32 (src, 0); \ - __u = __extension__ ((void *) __u + 4); \ - __u->__ui = __STRING_SMALL_GET32 (src, 4); \ - __u = __extension__ ((void *) __u + 3); \ - break; \ - } \ - (char *) __u; })) - -__STRING_INLINE char *__mempcpy_by4 (char *__dest, const char *__src, - size_t __srclen); - -__STRING_INLINE char * -__mempcpy_by4 (char *__dest, const char *__src, size_t __srclen) -{ - register char *__tmp = __dest; - register unsigned long int __d0, __d1; - __asm__ __volatile__ - ("1:\n\t" - "movl (%2),%0\n\t" - "leal 4(%2),%2\n\t" - "movl %0,(%1)\n\t" - "leal 4(%1),%1\n\t" - "decl %3\n\t" - "jnz 1b" - : "=&r" (__d0), "=r" (__tmp), "=&r" (__src), "=&r" (__d1) - : "1" (__tmp), "2" (__src), "3" (__srclen / 4) - : "memory", "cc"); - return __tmp; -} - -__STRING_INLINE char *__mempcpy_by2 (char *__dest, const char *__src, - size_t __srclen); - -__STRING_INLINE char * -__mempcpy_by2 (char *__dest, const char *__src, size_t __srclen) -{ - register char *__tmp = __dest; - register unsigned long int __d0, __d1; - __asm__ __volatile__ - ("shrl $1,%3\n\t" - "jz 2f\n" /* only a word */ - "1:\n\t" - "movl (%2),%0\n\t" - "leal 4(%2),%2\n\t" - "movl %0,(%1)\n\t" - "leal 4(%1),%1\n\t" - "decl %3\n\t" - "jnz 1b\n" - "2:\n\t" - "movw (%2),%w0\n\t" - "movw %w0,(%1)" - : "=&q" (__d0), "=r" (__tmp), "=&r" (__src), "=&r" (__d1), - "=m" ( *(struct { __extension__ char __x[__srclen]; } *)__dest) - : "1" (__tmp), "2" (__src), "3" (__srclen / 2), - "m" ( *(struct { __extension__ char __x[__srclen]; } *)__src) - : "cc"); - return __tmp + 2; -} - -__STRING_INLINE char *__mempcpy_byn (char *__dest, const char *__src, - size_t __srclen); - -__STRING_INLINE char * -__mempcpy_byn (char *__dest, const char *__src, size_t __srclen) -{ - register unsigned long __d0, __d1; - register char *__tmp = __dest; - __asm__ __volatile__ - ("cld\n\t" - "shrl $1,%%ecx\n\t" - "jnc 1f\n\t" - "movsb\n" - "1:\n\t" - "shrl $1,%%ecx\n\t" - "jnc 2f\n\t" - "movsw\n" - "2:\n\t" - "rep; movsl" - : "=D" (__tmp), "=&c" (__d0), "=&S" (__d1), - "=m" ( *(struct { __extension__ char __x[__srclen]; } *)__dest) - : "0" (__tmp), "1" (__srclen), "2" (__src), - "m" ( *(struct { __extension__ char __x[__srclen]; } *)__src) - : "cc"); - return __tmp; -} - -__STRING_INLINE char *__stpcpy_g (char *__dest, const char *__src); - -__STRING_INLINE char * -__stpcpy_g (char *__dest, const char *__src) -{ - register char *__tmp = __dest; - register char __dummy; - __asm__ __volatile__ - ( - "1:\n\t" - "movb (%0),%b2\n\t" - "leal 1(%0),%0\n\t" - "movb %b2,(%1)\n\t" - "leal 1(%1),%1\n\t" - "testb %b2,%b2\n\t" - "jne 1b" - : "=&r" (__src), "=r" (__tmp), "=&q" (__dummy), - "=m" ( *(struct { char __x[0xfffffff]; } *)__dest) - : "0" (__src), "1" (__tmp), - "m" ( *(struct { char __x[0xfffffff]; } *)__src) - : "cc"); - return __tmp - 1; -} -# endif - - -/* Copy no more than N characters of SRC to DEST. */ -# define _HAVE_STRING_ARCH_strncpy 1 -# define strncpy(dest, src, n) \ - (__extension__ (__builtin_constant_p (src) \ - ? ((strlen (src) + 1 >= ((size_t) (n)) \ - ? (char *) memcpy ((char *) (dest), \ - (const char *) (src), n) \ - : __strncpy_cg ((dest), (src), strlen (src) + 1, n))) \ - : __strncpy_gg ((dest), (src), n))) -# define __strncpy_cg(dest, src, srclen, n) \ - (((srclen) % 4 == 0) \ - ? __strncpy_by4 (dest, src, srclen, n) \ - : (((srclen) % 2 == 0) \ - ? __strncpy_by2 (dest, src, srclen, n) \ - : __strncpy_byn (dest, src, srclen, n))) - -__STRING_INLINE char *__strncpy_by4 (char *__dest, const char __src[], - size_t __srclen, size_t __n); - -__STRING_INLINE char * -__strncpy_by4 (char *__dest, const char __src[], size_t __srclen, size_t __n) -{ - register char *__tmp = __dest; - register int __dummy1, __dummy2; - __asm__ __volatile__ - ("1:\n\t" - "movl (%2),%0\n\t" - "leal 4(%2),%2\n\t" - "movl %0,(%1)\n\t" - "leal 4(%1),%1\n\t" - "decl %3\n\t" - "jnz 1b" - : "=&r" (__dummy1), "=r" (__tmp), "=&r" (__src), "=&r" (__dummy2), - "=m" ( *(struct { __extension__ char __x[__srclen]; } *)__dest) - : "1" (__tmp), "2" (__src), "3" (__srclen / 4), - "m" ( *(struct { __extension__ char __x[__srclen]; } *)__src) - : "cc"); - (void) memset (__tmp, '\0', __n - __srclen); - return __dest; -} - -__STRING_INLINE char *__strncpy_by2 (char *__dest, const char __src[], - size_t __srclen, size_t __n); - -__STRING_INLINE char * -__strncpy_by2 (char *__dest, const char __src[], size_t __srclen, size_t __n) -{ - register char *__tmp = __dest; - register int __dummy1, __dummy2; - __asm__ __volatile__ - ("shrl $1,%3\n\t" - "jz 2f\n" /* only a word */ - "1:\n\t" - "movl (%2),%0\n\t" - "leal 4(%2),%2\n\t" - "movl %0,(%1)\n\t" - "leal 4(%1),%1\n\t" - "decl %3\n\t" - "jnz 1b\n" - "2:\n\t" - "movw (%2),%w0\n\t" - "movw %w0,(%1)\n\t" - : "=&q" (__dummy1), "=r" (__tmp), "=&r" (__src), "=&r" (__dummy2), - "=m" ( *(struct { __extension__ char __x[__srclen]; } *)__dest) - : "1" (__tmp), "2" (__src), "3" (__srclen / 2), - "m" ( *(struct { __extension__ char __x[__srclen]; } *)__src) - : "cc"); - (void) memset (__tmp + 2, '\0', __n - __srclen); - return __dest; -} - -__STRING_INLINE char *__strncpy_byn (char *__dest, const char __src[], - size_t __srclen, size_t __n); - -__STRING_INLINE char * -__strncpy_byn (char *__dest, const char __src[], size_t __srclen, size_t __n) -{ - register unsigned long int __d0, __d1; - register char *__tmp = __dest; - __asm__ __volatile__ - ("cld\n\t" - "shrl $1,%1\n\t" - "jnc 1f\n\t" - "movsb\n" - "1:\n\t" - "shrl $1,%1\n\t" - "jnc 2f\n\t" - "movsw\n" - "2:\n\t" - "rep; movsl" - : "=D" (__tmp), "=&c" (__d0), "=&S" (__d1), - "=m" ( *(struct { __extension__ char __x[__srclen]; } *)__dest) - : "1" (__srclen), "0" (__tmp),"2" (__src), - "m" ( *(struct { __extension__ char __x[__srclen]; } *)__src) - : "cc"); - (void) memset (__tmp, '\0', __n - __srclen); - return __dest; -} - -__STRING_INLINE char *__strncpy_gg (char *__dest, const char *__src, - size_t __n); - -__STRING_INLINE char * -__strncpy_gg (char *__dest, const char *__src, size_t __n) -{ - register char *__tmp = __dest; - register char __dummy; - if (__n > 0) - __asm__ __volatile__ - ("1:\n\t" - "movb (%0),%2\n\t" - "incl %0\n\t" - "movb %2,(%1)\n\t" - "incl %1\n\t" - "decl %3\n\t" - "je 3f\n\t" - "testb %2,%2\n\t" - "jne 1b\n\t" - "2:\n\t" - "movb %2,(%1)\n\t" - "incl %1\n\t" - "decl %3\n\t" - "jne 2b\n\t" - "3:" - : "=&r" (__src), "=&r" (__tmp), "=&q" (__dummy), "=&r" (__n) - : "0" (__src), "1" (__tmp), "3" (__n) - : "memory", "cc"); - - return __dest; -} - - -/* Append SRC onto DEST. */ -# define _HAVE_STRING_ARCH_strcat 1 -# define strcat(dest, src) \ - (__extension__ (__builtin_constant_p (src) \ - ? __strcat_c ((dest), (src), strlen (src) + 1) \ - : __strcat_g ((dest), (src)))) - -__STRING_INLINE char *__strcat_c (char *__dest, const char __src[], - size_t __srclen); - -__STRING_INLINE char * -__strcat_c (char *__dest, const char __src[], size_t __srclen) -{ -# ifdef __i686__ - register unsigned long int __d0; - register char *__tmp; - __asm__ __volatile__ - ("repne; scasb" - : "=D" (__tmp), "=&c" (__d0), - "=m" ( *(struct { char __x[0xfffffff]; } *)__dest) - : "0" (__dest), "1" (0xffffffff), "a" (0), - "m" ( *(struct { __extension__ char __x[__srclen]; } *)__src) - : "cc"); - --__tmp; -# else - register char *__tmp = __dest; - __asm__ __volatile__ - ("decl %0\n\t" - "1:\n\t" - "incl %0\n\t" - "cmpb $0,(%0)\n\t" - "jne 1b\n" - : "=r" (__tmp), - "=m" ( *(struct { char __x[0xfffffff]; } *)__dest) - : "0" (__tmp), - "m" ( *(struct { __extension__ char __x[__srclen]; } *)__src) - : "cc"); -# endif - (void) memcpy (__tmp, __src, __srclen); - return __dest; -} - -__STRING_INLINE char *__strcat_g (char *__dest, const char *__src); - -__STRING_INLINE char * -__strcat_g (char *__dest, const char *__src) -{ - register char *__tmp = __dest; - register char __dummy; - __asm__ __volatile__ - ("decl %1\n\t" - "1:\n\t" - "incl %1\n\t" - "cmpb $0,(%1)\n\t" - "jne 1b\n" - "2:\n\t" - "movb (%2),%b0\n\t" - "incl %2\n\t" - "movb %b0,(%1)\n\t" - "incl %1\n\t" - "testb %b0,%b0\n\t" - "jne 2b\n" - : "=&q" (__dummy), "=&r" (__tmp), "=&r" (__src), - "=m" ( *(struct { char __x[0xfffffff]; } *)__dest) - : "1" (__tmp), "2" (__src), - "m" ( *(struct { char __x[0xfffffff]; } *)__src) - : "memory", "cc"); - return __dest; -} - - -/* Append no more than N characters from SRC onto DEST. */ -# define _HAVE_STRING_ARCH_strncat 1 -# define strncat(dest, src, n) \ - (__extension__ ({ char *__dest = (dest); \ - __builtin_constant_p (src) && __builtin_constant_p (n) \ - ? (strlen (src) < ((size_t) (n)) \ - ? strcat (__dest, (src)) \ - : (*(char *)__mempcpy (strchr (__dest, '\0'), \ - (const char *) (src), \ - (n)) = 0, __dest)) \ - : __strncat_g (__dest, (src), (n)); })) - -__STRING_INLINE char *__strncat_g (char *__dest, const char __src[], - size_t __n); - -__STRING_INLINE char * -__strncat_g (char *__dest, const char __src[], size_t __n) -{ - register char *__tmp = __dest; - register char __dummy; -# ifdef __i686__ - __asm__ __volatile__ - ("repne; scasb\n" - "movl %4, %3\n\t" - "decl %1\n\t" - "1:\n\t" - "subl $1,%3\n\t" - "jc 2f\n\t" - "movb (%2),%b0\n\t" - "movsb\n\t" - "testb %b0,%b0\n\t" - "jne 1b\n\t" - "decl %1\n" - "2:\n\t" - "movb $0,(%1)" - : "=&a" (__dummy), "=&D" (__tmp), "=&S" (__src), "=&c" (__n) - : "g" (__n), "0" (0), "1" (__tmp), "2" (__src), "3" (0xffffffff) - : "memory", "cc"); -# else - --__tmp; - __asm__ __volatile__ - ("1:\n\t" - "cmpb $0,1(%1)\n\t" - "leal 1(%1),%1\n\t" - "jne 1b\n" - "2:\n\t" - "subl $1,%3\n\t" - "jc 3f\n\t" - "movb (%2),%b0\n\t" - "leal 1(%2),%2\n\t" - "movb %b0,(%1)\n\t" - "leal 1(%1),%1\n\t" - "testb %b0,%b0\n\t" - "jne 2b\n\t" - "decl %1\n" - "3:\n\t" - "movb $0,(%1)" - : "=&q" (__dummy), "=&r" (__tmp), "=&r" (__src), "=&r" (__n) - : "1" (__tmp), "2" (__src), "3" (__n) - : "memory", "cc"); -#endif - return __dest; -} - - -/* Compare S1 and S2. */ -# define _HAVE_STRING_ARCH_strcmp 1 -# define strcmp(s1, s2) \ - (__extension__ (__builtin_constant_p (s1) && __builtin_constant_p (s2) \ - && (sizeof ((s1)[0]) != 1 || strlen (s1) >= 4) \ - && (sizeof ((s2)[0]) != 1 || strlen (s2) >= 4) \ - ? memcmp ((const char *) (s1), (const char *) (s2), \ - (strlen (s1) < strlen (s2) \ - ? strlen (s1) : strlen (s2)) + 1) \ - : (__builtin_constant_p (s1) && sizeof ((s1)[0]) == 1 \ - && sizeof ((s2)[0]) == 1 && strlen (s1) < 4 \ - ? (__builtin_constant_p (s2) && sizeof ((s2)[0]) == 1 \ - ? __strcmp_cc ((const unsigned char *) (s1), \ - (const unsigned char *) (s2), \ - strlen (s1)) \ - : __strcmp_cg ((const unsigned char *) (s1), \ - (const unsigned char *) (s2), \ - strlen (s1))) \ - : (__builtin_constant_p (s2) && sizeof ((s1)[0]) == 1 \ - && sizeof ((s2)[0]) == 1 && strlen (s2) < 4 \ - ? (__builtin_constant_p (s1) \ - ? __strcmp_cc ((const unsigned char *) (s1), \ - (const unsigned char *) (s2), \ - strlen (s2)) \ - : __strcmp_gc ((const unsigned char *) (s1), \ - (const unsigned char *) (s2), \ - strlen (s2))) \ - : __strcmp_gg ((s1), (s2)))))) - -# define __strcmp_cc(s1, s2, l) \ - (__extension__ ({ register int __result = (s1)[0] - (s2)[0]; \ - if (l > 0 && __result == 0) \ - { \ - __result = (s1)[1] - (s2)[1]; \ - if (l > 1 && __result == 0) \ - { \ - __result = (s1)[2] - (s2)[2]; \ - if (l > 2 && __result == 0) \ - __result = (s1)[3] - (s2)[3]; \ - } \ - } \ - __result; })) - -# define __strcmp_cg(s1, s2, l1) \ - (__extension__ ({ const unsigned char *__s2 = (s2); \ - register int __result = (s1)[0] - __s2[0]; \ - if (l1 > 0 && __result == 0) \ - { \ - __result = (s1)[1] - __s2[1]; \ - if (l1 > 1 && __result == 0) \ - { \ - __result = (s1)[2] - __s2[2]; \ - if (l1 > 2 && __result == 0) \ - __result = (s1)[3] - __s2[3]; \ - } \ - } \ - __result; })) - -# define __strcmp_gc(s1, s2, l2) \ - (__extension__ ({ const unsigned char *__s1 = (s1); \ - register int __result = __s1[0] - (s2)[0]; \ - if (l2 > 0 && __result == 0) \ - { \ - __result = __s1[1] - (s2)[1]; \ - if (l2 > 1 && __result == 0) \ - { \ - __result = __s1[2] - (s2)[2]; \ - if (l2 > 2 && __result == 0) \ - __result = __s1[3] - (s2)[3]; \ - } \ - } \ - __result; })) - -__STRING_INLINE int __strcmp_gg (const char *__s1, const char *__s2); - -__STRING_INLINE int -__strcmp_gg (const char *__s1, const char *__s2) -{ - register int __res; - __asm__ __volatile__ - ("1:\n\t" - "movb (%1),%b0\n\t" - "leal 1(%1),%1\n\t" - "cmpb %b0,(%2)\n\t" - "jne 2f\n\t" - "leal 1(%2),%2\n\t" - "testb %b0,%b0\n\t" - "jne 1b\n\t" - "xorl %0,%0\n\t" - "jmp 3f\n" - "2:\n\t" - "movl $1,%0\n\t" - "jb 3f\n\t" - "negl %0\n" - "3:" - : "=q" (__res), "=&r" (__s1), "=&r" (__s2) - : "1" (__s1), "2" (__s2), - "m" ( *(struct { char __x[0xfffffff]; } *)__s1), - "m" ( *(struct { char __x[0xfffffff]; } *)__s2) - : "cc"); - return __res; -} - - -/* Compare N characters of S1 and S2. */ -# define _HAVE_STRING_ARCH_strncmp 1 -# define strncmp(s1, s2, n) \ - (__extension__ (__builtin_constant_p (s1) && strlen (s1) < ((size_t) (n)) \ - ? strcmp ((s1), (s2)) \ - : (__builtin_constant_p (s2) && strlen (s2) < ((size_t) (n))\ - ? strcmp ((s1), (s2)) \ - : __strncmp_g ((s1), (s2), (n))))) - -__STRING_INLINE int __strncmp_g (const char *__s1, const char *__s2, - size_t __n); - -__STRING_INLINE int -__strncmp_g (const char *__s1, const char *__s2, size_t __n) -{ - register int __res; - __asm__ __volatile__ - ("1:\n\t" - "subl $1,%3\n\t" - "jc 2f\n\t" - "movb (%1),%b0\n\t" - "incl %1\n\t" - "cmpb %b0,(%2)\n\t" - "jne 3f\n\t" - "incl %2\n\t" - "testb %b0,%b0\n\t" - "jne 1b\n" - "2:\n\t" - "xorl %0,%0\n\t" - "jmp 4f\n" - "3:\n\t" - "movl $1,%0\n\t" - "jb 4f\n\t" - "negl %0\n" - "4:" - : "=q" (__res), "=&r" (__s1), "=&r" (__s2), "=&r" (__n) - : "1" (__s1), "2" (__s2), "3" (__n), - "m" ( *(struct { __extension__ char __x[__n]; } *)__s1), - "m" ( *(struct { __extension__ char __x[__n]; } *)__s2) - : "cc"); - return __res; -} - - -/* Find the first occurrence of C in S. */ -# define _HAVE_STRING_ARCH_strchr 1 -# define _USE_STRING_ARCH_strchr 1 -# define strchr(s, c) \ - (__extension__ (__builtin_constant_p (c) \ - ? ((c) == '\0' \ - ? (char *) __rawmemchr ((s), (c)) \ - : __strchr_c ((s), ((c) & 0xff) << 8)) \ - : __strchr_g ((s), (c)))) - -__STRING_INLINE char *__strchr_c (const char *__s, int __c); - -__STRING_INLINE char * -__strchr_c (const char *__s, int __c) -{ - register unsigned long int __d0; - register char *__res; - __asm__ __volatile__ - ("1:\n\t" - "movb (%0),%%al\n\t" - "cmpb %%ah,%%al\n\t" - "je 2f\n\t" - "leal 1(%0),%0\n\t" - "testb %%al,%%al\n\t" - "jne 1b\n\t" - "xorl %0,%0\n" - "2:" - : "=r" (__res), "=&a" (__d0) - : "0" (__s), "1" (__c), - "m" ( *(struct { char __x[0xfffffff]; } *)__s) - : "cc"); - return __res; -} - -__STRING_INLINE char *__strchr_g (const char *__s, int __c); - -__STRING_INLINE char * -__strchr_g (const char *__s, int __c) -{ - register unsigned long int __d0; - register char *__res; - __asm__ __volatile__ - ("movb %%al,%%ah\n" - "1:\n\t" - "movb (%0),%%al\n\t" - "cmpb %%ah,%%al\n\t" - "je 2f\n\t" - "leal 1(%0),%0\n\t" - "testb %%al,%%al\n\t" - "jne 1b\n\t" - "xorl %0,%0\n" - "2:" - : "=r" (__res), "=&a" (__d0) - : "0" (__s), "1" (__c), - "m" ( *(struct { char __x[0xfffffff]; } *)__s) - : "cc"); - return __res; -} - - -/* Find the first occurrence of C in S or the final NUL byte. */ -# define _HAVE_STRING_ARCH_strchrnul 1 -# define __strchrnul(s, c) \ - (__extension__ (__builtin_constant_p (c) \ - ? ((c) == '\0' \ - ? (char *) __rawmemchr ((s), c) \ - : __strchrnul_c ((s), ((c) & 0xff) << 8)) \ - : __strchrnul_g ((s), c))) - -__STRING_INLINE char *__strchrnul_c (const char *__s, int __c); - -__STRING_INLINE char * -__strchrnul_c (const char *__s, int __c) -{ - register unsigned long int __d0; - register char *__res; - __asm__ __volatile__ - ("1:\n\t" - "movb (%0),%%al\n\t" - "cmpb %%ah,%%al\n\t" - "je 2f\n\t" - "leal 1(%0),%0\n\t" - "testb %%al,%%al\n\t" - "jne 1b\n\t" - "decl %0\n" - "2:" - : "=r" (__res), "=&a" (__d0) - : "0" (__s), "1" (__c), - "m" ( *(struct { char __x[0xfffffff]; } *)__s) - : "cc"); - return __res; -} - -__STRING_INLINE char *__strchrnul_g (const char *__s, int __c); - -__STRING_INLINE char * -__strchrnul_g (const char *__s, int __c) -{ - register unsigned long int __d0; - register char *__res; - __asm__ __volatile__ - ("movb %%al,%%ah\n" - "1:\n\t" - "movb (%0),%%al\n\t" - "cmpb %%ah,%%al\n\t" - "je 2f\n\t" - "leal 1(%0),%0\n\t" - "testb %%al,%%al\n\t" - "jne 1b\n\t" - "decl %0\n" - "2:" - : "=r" (__res), "=&a" (__d0) - : "0" (__s), "1" (__c), - "m" ( *(struct { char __x[0xfffffff]; } *)__s) - : "cc"); - return __res; -} -# ifdef __USE_GNU -# define strchrnul(s, c) __strchrnul ((s), (c)) -# endif - - -# if defined __USE_MISC || defined __USE_XOPEN_EXTENDED -/* Find the first occurrence of C in S. This is the BSD name. */ -# define _HAVE_STRING_ARCH_index 1 -# define index(s, c) \ - (__extension__ (__builtin_constant_p (c) \ - ? __strchr_c ((s), ((c) & 0xff) << 8) \ - : __strchr_g ((s), (c)))) -# endif - - -/* Find the last occurrence of C in S. */ -# define _HAVE_STRING_ARCH_strrchr 1 -# define strrchr(s, c) \ - (__extension__ (__builtin_constant_p (c) \ - ? __strrchr_c ((s), ((c) & 0xff) << 8) \ - : __strrchr_g ((s), (c)))) - -# ifdef __i686__ -__STRING_INLINE char *__strrchr_c (const char *__s, int __c); - -__STRING_INLINE char * -__strrchr_c (const char *__s, int __c) -{ - register unsigned long int __d0, __d1; - register char *__res; - __asm__ __volatile__ - ("cld\n" - "1:\n\t" - "lodsb\n\t" - "cmpb %h2,%b2\n\t" - "cmove %1,%0\n\t" - "testb %b2,%b2\n\t" - "jne 1b" - : "=d" (__res), "=&S" (__d0), "=&a" (__d1) - : "0" (1), "1" (__s), "2" (__c), - "m" ( *(struct { char __x[0xfffffff]; } *)__s) - : "cc"); - return __res - 1; -} - -__STRING_INLINE char *__strrchr_g (const char *__s, int __c); - -__STRING_INLINE char * -__strrchr_g (const char *__s, int __c) -{ - register unsigned long int __d0, __d1; - register char *__res; - __asm__ __volatile__ - ("movb %b2,%h2\n" - "cld\n\t" - "1:\n\t" - "lodsb\n\t" - "cmpb %h2,%b2\n\t" - "cmove %1,%0\n\t" - "testb %b2,%b2\n\t" - "jne 1b" - : "=d" (__res), "=&S" (__d0), "=&a" (__d1) - : "0" (1), "1" (__s), "2" (__c), - "m" ( *(struct { char __x[0xfffffff]; } *)__s) - : "cc"); - return __res - 1; -} -# else -__STRING_INLINE char *__strrchr_c (const char *__s, int __c); - -__STRING_INLINE char * -__strrchr_c (const char *__s, int __c) -{ - register unsigned long int __d0, __d1; - register char *__res; - __asm__ __volatile__ - ("cld\n" - "1:\n\t" - "lodsb\n\t" - "cmpb %%ah,%%al\n\t" - "jne 2f\n\t" - "leal -1(%%esi),%0\n" - "2:\n\t" - "testb %%al,%%al\n\t" - "jne 1b" - : "=d" (__res), "=&S" (__d0), "=&a" (__d1) - : "0" (0), "1" (__s), "2" (__c), - "m" ( *(struct { char __x[0xfffffff]; } *)__s) - : "cc"); - return __res; -} - -__STRING_INLINE char *__strrchr_g (const char *__s, int __c); - -__STRING_INLINE char * -__strrchr_g (const char *__s, int __c) -{ - register unsigned long int __d0, __d1; - register char *__res; - __asm__ __volatile__ - ("movb %%al,%%ah\n" - "cld\n\t" - "1:\n\t" - "lodsb\n\t" - "cmpb %%ah,%%al\n\t" - "jne 2f\n\t" - "leal -1(%%esi),%0\n" - "2:\n\t" - "testb %%al,%%al\n\t" - "jne 1b" - : "=r" (__res), "=&S" (__d0), "=&a" (__d1) - : "0" (0), "1" (__s), "2" (__c), - "m" ( *(struct { char __x[0xfffffff]; } *)__s) - : "cc"); - return __res; -} -# endif - - -# if defined __USE_MISC || defined __USE_XOPEN_EXTENDED -/* Find the last occurrence of C in S. This is the BSD name. */ -# define _HAVE_STRING_ARCH_rindex 1 -# define rindex(s, c) \ - (__extension__ (__builtin_constant_p (c) \ - ? __strrchr_c ((s), ((c) & 0xff) << 8) \ - : __strrchr_g ((s), (c)))) -# endif - - -/* Return the length of the initial segment of S which - consists entirely of characters not in REJECT. */ -# define _HAVE_STRING_ARCH_strcspn 1 -# define strcspn(s, reject) \ - (__extension__ (__builtin_constant_p (reject) && sizeof ((reject)[0]) == 1 \ - ? ((reject)[0] == '\0' \ - ? strlen (s) \ - : ((reject)[1] == '\0' \ - ? __strcspn_c1 ((s), (((reject)[0] << 8) & 0xff00)) \ - : __strcspn_cg ((s), (reject), strlen (reject)))) \ - : __strcspn_g ((s), (reject)))) - -__STRING_INLINE size_t __strcspn_c1 (const char *__s, int __reject); - -# ifndef _FORCE_INLINES -__STRING_INLINE size_t -__strcspn_c1 (const char *__s, int __reject) -{ - register unsigned long int __d0; - register char *__res; - __asm__ __volatile__ - ("1:\n\t" - "movb (%0),%%al\n\t" - "leal 1(%0),%0\n\t" - "cmpb %%ah,%%al\n\t" - "je 2f\n\t" - "testb %%al,%%al\n\t" - "jne 1b\n" - "2:" - : "=r" (__res), "=&a" (__d0) - : "0" (__s), "1" (__reject), - "m" ( *(struct { char __x[0xfffffff]; } *)__s) - : "cc"); - return (__res - 1) - __s; -} -# endif - -__STRING_INLINE size_t __strcspn_cg (const char *__s, const char __reject[], - size_t __reject_len); - -__STRING_INLINE size_t -__strcspn_cg (const char *__s, const char __reject[], size_t __reject_len) -{ - register unsigned long int __d0, __d1, __d2; - register const char *__res; - __asm__ __volatile__ - ("cld\n" - "1:\n\t" - "lodsb\n\t" - "testb %%al,%%al\n\t" - "je 2f\n\t" - "movl %5,%%edi\n\t" - "movl %6,%%ecx\n\t" - "repne; scasb\n\t" - "jne 1b\n" - "2:" - : "=S" (__res), "=&a" (__d0), "=&c" (__d1), "=&D" (__d2) - : "0" (__s), "d" (__reject), "g" (__reject_len) - : "memory", "cc"); - return (__res - 1) - __s; -} - -__STRING_INLINE size_t __strcspn_g (const char *__s, const char *__reject); -# ifdef __PIC__ - -__STRING_INLINE size_t -__strcspn_g (const char *__s, const char *__reject) -{ - register unsigned long int __d0, __d1, __d2; - register const char *__res; - __asm__ __volatile__ - ("pushl %%ebx\n\t" - "movl %4,%%edi\n\t" - "cld\n\t" - "repne; scasb\n\t" - "notl %%ecx\n\t" - "leal -1(%%ecx),%%ebx\n" - "1:\n\t" - "lodsb\n\t" - "testb %%al,%%al\n\t" - "je 2f\n\t" - "movl %4,%%edi\n\t" - "movl %%ebx,%%ecx\n\t" - "repne; scasb\n\t" - "jne 1b\n" - "2:\n\t" - "popl %%ebx" - : "=S" (__res), "=&a" (__d0), "=&c" (__d1), "=&D" (__d2) - : "r" (__reject), "0" (__s), "1" (0), "2" (0xffffffff) - : "memory", "cc"); - return (__res - 1) - __s; -} -# else -__STRING_INLINE size_t -__strcspn_g (const char *__s, const char *__reject) -{ - register unsigned long int __d0, __d1, __d2, __d3; - register const char *__res; - __asm__ __volatile__ - ("cld\n\t" - "repne; scasb\n\t" - "notl %%ecx\n\t" - "leal -1(%%ecx),%%edx\n" - "1:\n\t" - "lodsb\n\t" - "testb %%al,%%al\n\t" - "je 2f\n\t" - "movl %%ebx,%%edi\n\t" - "movl %%edx,%%ecx\n\t" - "repne; scasb\n\t" - "jne 1b\n" - "2:" - : "=S" (__res), "=&a" (__d0), "=&c" (__d1), "=&D" (__d2), "=&d" (__d3) - : "0" (__s), "1" (0), "2" (0xffffffff), "3" (__reject), "b" (__reject) - /* Clobber memory, otherwise GCC cannot handle this. */ - : "memory", "cc"); - return (__res - 1) - __s; -} -# endif - - -/* Return the length of the initial segment of S which - consists entirely of characters in ACCEPT. */ -# define _HAVE_STRING_ARCH_strspn 1 -# define strspn(s, accept) \ - (__extension__ (__builtin_constant_p (accept) && sizeof ((accept)[0]) == 1 \ - ? ((accept)[0] == '\0' \ - ? ((void) (s), 0) \ - : ((accept)[1] == '\0' \ - ? __strspn_c1 ((s), (((accept)[0] << 8 ) & 0xff00)) \ - : __strspn_cg ((s), (accept), strlen (accept)))) \ - : __strspn_g ((s), (accept)))) - -# ifndef _FORCE_INLINES -__STRING_INLINE size_t __strspn_c1 (const char *__s, int __accept); - -__STRING_INLINE size_t -__strspn_c1 (const char *__s, int __accept) -{ - register unsigned long int __d0; - register char *__res; - /* Please note that __accept never can be '\0'. */ - __asm__ __volatile__ - ("1:\n\t" - "movb (%0),%b1\n\t" - "leal 1(%0),%0\n\t" - "cmpb %h1,%b1\n\t" - "je 1b" - : "=r" (__res), "=&q" (__d0) - : "0" (__s), "1" (__accept), - "m" ( *(struct { char __x[0xfffffff]; } *)__s) - : "cc"); - return (__res - 1) - __s; -} -# endif - -__STRING_INLINE size_t __strspn_cg (const char *__s, const char __accept[], - size_t __accept_len); - -__STRING_INLINE size_t -__strspn_cg (const char *__s, const char __accept[], size_t __accept_len) -{ - register unsigned long int __d0, __d1, __d2; - register const char *__res; - __asm__ __volatile__ - ("cld\n" - "1:\n\t" - "lodsb\n\t" - "testb %%al,%%al\n\t" - "je 2f\n\t" - "movl %5,%%edi\n\t" - "movl %6,%%ecx\n\t" - "repne; scasb\n\t" - "je 1b\n" - "2:" - : "=S" (__res), "=&a" (__d0), "=&c" (__d1), "=&D" (__d2) - : "0" (__s), "g" (__accept), "g" (__accept_len), - /* Since we do not know how large the memory we access it, use a - really large amount. */ - "m" ( *(struct { char __x[0xfffffff]; } *)__s), - "m" ( *(struct { __extension__ char __x[__accept_len]; } *)__accept) - : "cc"); - return (__res - 1) - __s; -} - -__STRING_INLINE size_t __strspn_g (const char *__s, const char *__accept); -# ifdef __PIC__ - -__STRING_INLINE size_t -__strspn_g (const char *__s, const char *__accept) -{ - register unsigned long int __d0, __d1, __d2; - register const char *__res; - __asm__ __volatile__ - ("pushl %%ebx\n\t" - "cld\n\t" - "repne; scasb\n\t" - "notl %%ecx\n\t" - "leal -1(%%ecx),%%ebx\n" - "1:\n\t" - "lodsb\n\t" - "testb %%al,%%al\n\t" - "je 2f\n\t" - "movl %%edx,%%edi\n\t" - "movl %%ebx,%%ecx\n\t" - "repne; scasb\n\t" - "je 1b\n" - "2:\n\t" - "popl %%ebx" - : "=S" (__res), "=&a" (__d0), "=&c" (__d1), "=&D" (__d2) - : "d" (__accept), "0" (__s), "1" (0), "2" (0xffffffff), "3" (__accept) - : "memory", "cc"); - return (__res - 1) - __s; -} -# else -__STRING_INLINE size_t -__strspn_g (const char *__s, const char *__accept) -{ - register unsigned long int __d0, __d1, __d2, __d3; - register const char *__res; - __asm__ __volatile__ - ("cld\n\t" - "repne; scasb\n\t" - "notl %%ecx\n\t" - "leal -1(%%ecx),%%edx\n" - "1:\n\t" - "lodsb\n\t" - "testb %%al,%%al\n\t" - "je 2f\n\t" - "movl %%ebx,%%edi\n\t" - "movl %%edx,%%ecx\n\t" - "repne; scasb\n\t" - "je 1b\n" - "2:" - : "=S" (__res), "=&a" (__d0), "=&c" (__d1), "=&D" (__d2), "=&d" (__d3) - : "0" (__s), "1" (0), "2" (0xffffffff), "3" (__accept), "b" (__accept) - : "memory", "cc"); - return (__res - 1) - __s; -} -# endif - - -/* Find the first occurrence in S of any character in ACCEPT. */ -# define _HAVE_STRING_ARCH_strpbrk 1 -# define strpbrk(s, accept) \ - (__extension__ (__builtin_constant_p (accept) && sizeof ((accept)[0]) == 1 \ - ? ((accept)[0] == '\0' \ - ? ((void) (s), (char *) 0) \ - : ((accept)[1] == '\0' \ - ? strchr ((s), (accept)[0]) \ - : __strpbrk_cg ((s), (accept), strlen (accept)))) \ - : __strpbrk_g ((s), (accept)))) - -__STRING_INLINE char *__strpbrk_cg (const char *__s, const char __accept[], - size_t __accept_len); - -__STRING_INLINE char * -__strpbrk_cg (const char *__s, const char __accept[], size_t __accept_len) -{ - register unsigned long int __d0, __d1, __d2; - register char *__res; - __asm__ __volatile__ - ("cld\n" - "1:\n\t" - "lodsb\n\t" - "testb %%al,%%al\n\t" - "je 2f\n\t" - "movl %5,%%edi\n\t" - "movl %6,%%ecx\n\t" - "repne; scasb\n\t" - "jne 1b\n\t" - "decl %0\n\t" - "jmp 3f\n" - "2:\n\t" - "xorl %0,%0\n" - "3:" - : "=S" (__res), "=&a" (__d0), "=&c" (__d1), "=&D" (__d2) - : "0" (__s), "d" (__accept), "g" (__accept_len) - : "memory", "cc"); - return __res; -} - -__STRING_INLINE char *__strpbrk_g (const char *__s, const char *__accept); -# ifdef __PIC__ - -__STRING_INLINE char * -__strpbrk_g (const char *__s, const char *__accept) -{ - register unsigned long int __d0, __d1, __d2; - register char *__res; - __asm__ __volatile__ - ("pushl %%ebx\n\t" - "movl %%edx,%%edi\n\t" - "cld\n\t" - "repne; scasb\n\t" - "notl %%ecx\n\t" - "leal -1(%%ecx),%%ebx\n" - "1:\n\t" - "lodsb\n\t" - "testb %%al,%%al\n\t" - "je 2f\n\t" - "movl %%edx,%%edi\n\t" - "movl %%ebx,%%ecx\n\t" - "repne; scasb\n\t" - "jne 1b\n\t" - "decl %0\n\t" - "jmp 3f\n" - "2:\n\t" - "xorl %0,%0\n" - "3:\n\t" - "popl %%ebx" - : "=S" (__res), "=&a" (__d0), "=&c" (__d1), "=&D" (__d2) - : "d" (__accept), "0" (__s), "1" (0), "2" (0xffffffff) - : "memory", "cc"); - return __res; -} -# else -__STRING_INLINE char * -__strpbrk_g (const char *__s, const char *__accept) -{ - register unsigned long int __d0, __d1, __d2, __d3; - register char *__res; - __asm__ __volatile__ - ("movl %%ebx,%%edi\n\t" - "cld\n\t" - "repne; scasb\n\t" - "notl %%ecx\n\t" - "leal -1(%%ecx),%%edx\n" - "1:\n\t" - "lodsb\n\t" - "testb %%al,%%al\n\t" - "je 2f\n\t" - "movl %%ebx,%%edi\n\t" - "movl %%edx,%%ecx\n\t" - "repne; scasb\n\t" - "jne 1b\n\t" - "decl %0\n\t" - "jmp 3f\n" - "2:\n\t" - "xorl %0,%0\n" - "3:" - : "=S" (__res), "=&a" (__d0), "=&c" (__d1), "=&d" (__d2), "=&D" (__d3) - : "0" (__s), "1" (0), "2" (0xffffffff), "b" (__accept) - : "memory", "cc"); - return __res; -} -# endif - - -/* Find the first occurrence of NEEDLE in HAYSTACK. */ -# define _HAVE_STRING_ARCH_strstr 1 -# define strstr(haystack, needle) \ - (__extension__ (__builtin_constant_p (needle) && sizeof ((needle)[0]) == 1 \ - ? ((needle)[0] == '\0' \ - ? (haystack) \ - : ((needle)[1] == '\0' \ - ? strchr ((haystack), (needle)[0]) \ - : __strstr_cg ((haystack), (needle), \ - strlen (needle)))) \ - : __strstr_g ((haystack), (needle)))) - -/* Please note that this function need not handle NEEDLEs with a - length shorter than two. */ -__STRING_INLINE char *__strstr_cg (const char *__haystack, - const char __needle[], - size_t __needle_len); - -__STRING_INLINE char * -__strstr_cg (const char *__haystack, const char __needle[], - size_t __needle_len) -{ - register unsigned long int __d0, __d1, __d2; - register char *__res; - __asm__ __volatile__ - ("cld\n" \ - "1:\n\t" - "movl %6,%%edi\n\t" - "movl %5,%%eax\n\t" - "movl %4,%%ecx\n\t" - "repe; cmpsb\n\t" - "je 2f\n\t" - "cmpb $0,-1(%%esi)\n\t" - "leal 1(%%eax),%5\n\t" - "jne 1b\n\t" - "xorl %%eax,%%eax\n" - "2:" - : "=&a" (__res), "=&S" (__d0), "=&D" (__d1), "=&c" (__d2) - : "g" (__needle_len), "1" (__haystack), "d" (__needle) - : "memory", "cc"); - return __res; -} - -__STRING_INLINE char *__strstr_g (const char *__haystack, - const char *__needle); -# ifdef __PIC__ - -__STRING_INLINE char * -__strstr_g (const char *__haystack, const char *__needle) -{ - register unsigned long int __d0, __d1, __d2; - register char *__res; - __asm__ __volatile__ - ("cld\n\t" - "repne; scasb\n\t" - "notl %%ecx\n\t" - "pushl %%ebx\n\t" - "decl %%ecx\n\t" /* NOTE! This also sets Z if searchstring='' */ - "movl %%ecx,%%ebx\n" - "1:\n\t" - "movl %%edx,%%edi\n\t" - "movl %%esi,%%eax\n\t" - "movl %%ebx,%%ecx\n\t" - "repe; cmpsb\n\t" - "je 2f\n\t" /* also works for empty string, see above */ - "cmpb $0,-1(%%esi)\n\t" - "leal 1(%%eax),%%esi\n\t" - "jne 1b\n\t" - "xorl %%eax,%%eax\n" - "2:\n\t" - "popl %%ebx" - : "=&a" (__res), "=&c" (__d0), "=&S" (__d1), "=&D" (__d2) - : "0" (0), "1" (0xffffffff), "2" (__haystack), "3" (__needle), - "d" (__needle) - : "memory", "cc"); - return __res; -} -# else -__STRING_INLINE char * -__strstr_g (const char *__haystack, const char *__needle) -{ - register unsigned long int __d0, __d1, __d2, __d3; - register char *__res; - __asm__ __volatile__ - ("cld\n\t" - "repne; scasb\n\t" - "notl %%ecx\n\t" - "decl %%ecx\n\t" /* NOTE! This also sets Z if searchstring='' */ - "movl %%ecx,%%edx\n" - "1:\n\t" - "movl %%ebx,%%edi\n\t" - "movl %%esi,%%eax\n\t" - "movl %%edx,%%ecx\n\t" - "repe; cmpsb\n\t" - "je 2f\n\t" /* also works for empty string, see above */ - "cmpb $0,-1(%%esi)\n\t" - "leal 1(%%eax),%%esi\n\t" - "jne 1b\n\t" - "xorl %%eax,%%eax\n" - "2:" - : "=&a" (__res), "=&c" (__d0), "=&S" (__d1), "=&D" (__d2), "=&d" (__d3) - : "0" (0), "1" (0xffffffff), "2" (__haystack), "3" (__needle), - "b" (__needle) - : "memory", "cc"); - return __res; -} -# endif - - -/* Bit find functions. We define only the i686 version since for the other - processors gcc generates good code. */ -# if defined __USE_MISC || defined __USE_XOPEN_EXTENDED -# ifdef __i686__ -# define _HAVE_STRING_ARCH_ffs 1 -# define ffs(word) (__builtin_constant_p (word) \ - ? __builtin_ffs (word) \ - : ({ int __cnt, __tmp; \ - __asm__ __volatile__ \ - ("bsfl %2,%0\n\t" \ - "cmovel %1,%0" \ - : "=&r" (__cnt), "=r" (__tmp) \ - : "rm" (word), "1" (-1)); \ - __cnt + 1; })) - -# ifndef ffsl -# define ffsl(word) ffs(word) -# endif -# endif /* i686 */ -# endif /* Misc || X/Open */ - -# ifndef _FORCE_INLINES -# undef __STRING_INLINE -# endif - -# endif /* use string inlines && GNU CC */ - -#endif diff --git a/sysdeps/x86/bits/wordsize.h b/sysdeps/x86/bits/wordsize.h index e25af287e8..70f652bca1 100644 --- a/sysdeps/x86/bits/wordsize.h +++ b/sysdeps/x86/bits/wordsize.h @@ -4,10 +4,14 @@ # define __WORDSIZE 64 #else # define __WORDSIZE 32 +#define __WORDSIZE32_SIZE_ULONG 0 +#define __WORDSIZE32_PTRDIFF_LONG 0 #endif #ifdef __x86_64__ # define __WORDSIZE_TIME64_COMPAT32 1 /* Both x86-64 and x32 use the 64-bit system call interface. */ # define __SYSCALL_WORDSIZE 64 +#else +# define __WORDSIZE_TIME64_COMPAT32 0 #endif diff --git a/sysdeps/x86/bits/xtitypes.h b/sysdeps/x86/bits/xtitypes.h index 31c1210edd..4fd0fe788b 100644 --- a/sysdeps/x86/bits/xtitypes.h +++ b/sysdeps/x86/bits/xtitypes.h @@ -1,5 +1,5 @@ /* bits/xtitypes.h -- Define some types used by <bits/stropts.h>. x86-64. - Copyright (C) 2002-2016 Free Software Foundation, Inc. + Copyright (C) 2002-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c new file mode 100644 index 0000000000..b9444ddd52 --- /dev/null +++ b/sysdeps/x86/cacheinfo.c @@ -0,0 +1,791 @@ +/* x86_64 cache info. + Copyright (C) 2003-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) + +#include <assert.h> +#include <stdbool.h> +#include <stdlib.h> +#include <unistd.h> +#include <cpuid.h> +#include <init-arch.h> + +static const struct intel_02_cache_info +{ + unsigned char idx; + unsigned char assoc; + unsigned char linesize; + unsigned char rel_name; + unsigned int size; +} intel_02_known [] = + { +#define M(sc) ((sc) - _SC_LEVEL1_ICACHE_SIZE) + { 0x06, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 8192 }, + { 0x08, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 16384 }, + { 0x09, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 32768 }, + { 0x0a, 2, 32, M(_SC_LEVEL1_DCACHE_SIZE), 8192 }, + { 0x0c, 4, 32, M(_SC_LEVEL1_DCACHE_SIZE), 16384 }, + { 0x0d, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 }, + { 0x0e, 6, 64, M(_SC_LEVEL1_DCACHE_SIZE), 24576 }, + { 0x21, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 }, + { 0x22, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 }, + { 0x23, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 }, + { 0x25, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 }, + { 0x29, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 }, + { 0x2c, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 }, + { 0x30, 8, 64, M(_SC_LEVEL1_ICACHE_SIZE), 32768 }, + { 0x39, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 }, + { 0x3a, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 196608 }, + { 0x3b, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 }, + { 0x3c, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 }, + { 0x3d, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 393216 }, + { 0x3e, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 }, + { 0x3f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 }, + { 0x41, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 131072 }, + { 0x42, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 }, + { 0x43, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 }, + { 0x44, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 }, + { 0x45, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 }, + { 0x46, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 }, + { 0x47, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 }, + { 0x48, 12, 64, M(_SC_LEVEL2_CACHE_SIZE), 3145728 }, + { 0x49, 16, 64, M(_SC_LEVEL2_CACHE_SIZE), 4194304 }, + { 0x4a, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 6291456 }, + { 0x4b, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 }, + { 0x4c, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 }, + { 0x4d, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 16777216 }, + { 0x4e, 24, 64, M(_SC_LEVEL2_CACHE_SIZE), 6291456 }, + { 0x60, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 }, + { 0x66, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 8192 }, + { 0x67, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 }, + { 0x68, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 }, + { 0x78, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 }, + { 0x79, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 }, + { 0x7a, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 }, + { 0x7b, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 }, + { 0x7c, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 }, + { 0x7d, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 2097152 }, + { 0x7f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 }, + { 0x80, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 }, + { 0x82, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 }, + { 0x83, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 }, + { 0x84, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 }, + { 0x85, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 }, + { 0x86, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 }, + { 0x87, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 }, + { 0xd0, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 }, + { 0xd1, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 }, + { 0xd2, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 }, + { 0xd6, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 }, + { 0xd7, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 }, + { 0xd8, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 }, + { 0xdc, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 }, + { 0xdd, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 }, + { 0xde, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 }, + { 0xe2, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 }, + { 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 }, + { 0xe4, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 }, + { 0xea, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 }, + { 0xeb, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 18874368 }, + { 0xec, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 25165824 }, + }; + +#define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0])) + +static int +intel_02_known_compare (const void *p1, const void *p2) +{ + const struct intel_02_cache_info *i1; + const struct intel_02_cache_info *i2; + + i1 = (const struct intel_02_cache_info *) p1; + i2 = (const struct intel_02_cache_info *) p2; + + if (i1->idx == i2->idx) + return 0; + + return i1->idx < i2->idx ? -1 : 1; +} + + +static long int +__attribute__ ((noinline)) +intel_check_word (int name, unsigned int value, bool *has_level_2, + bool *no_level_2_or_3, + const struct cpu_features *cpu_features) +{ + if ((value & 0x80000000) != 0) + /* The register value is reserved. */ + return 0; + + /* Fold the name. The _SC_ constants are always in the order SIZE, + ASSOC, LINESIZE. */ + int folded_rel_name = (M(name) / 3) * 3; + + while (value != 0) + { + unsigned int byte = value & 0xff; + + if (byte == 0x40) + { + *no_level_2_or_3 = true; + + if (folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE)) + /* No need to look further. */ + break; + } + else if (byte == 0xff) + { + /* CPUID leaf 0x4 contains all the information. We need to + iterate over it. */ + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + + unsigned int round = 0; + while (1) + { + __cpuid_count (4, round, eax, ebx, ecx, edx); + + enum { null = 0, data = 1, inst = 2, uni = 3 } type = eax & 0x1f; + if (type == null) + /* That was the end. */ + break; + + unsigned int level = (eax >> 5) & 0x7; + + if ((level == 1 && type == data + && folded_rel_name == M(_SC_LEVEL1_DCACHE_SIZE)) + || (level == 1 && type == inst + && folded_rel_name == M(_SC_LEVEL1_ICACHE_SIZE)) + || (level == 2 && folded_rel_name == M(_SC_LEVEL2_CACHE_SIZE)) + || (level == 3 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE)) + || (level == 4 && folded_rel_name == M(_SC_LEVEL4_CACHE_SIZE))) + { + unsigned int offset = M(name) - folded_rel_name; + + if (offset == 0) + /* Cache size. */ + return (((ebx >> 22) + 1) + * (((ebx >> 12) & 0x3ff) + 1) + * ((ebx & 0xfff) + 1) + * (ecx + 1)); + if (offset == 1) + return (ebx >> 22) + 1; + + assert (offset == 2); + return (ebx & 0xfff) + 1; + } + + ++round; + } + /* There is no other cache information anywhere else. */ + break; + } + else + { + if (byte == 0x49 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE)) + { + /* Intel reused this value. For family 15, model 6 it + specifies the 3rd level cache. Otherwise the 2nd + level cache. */ + unsigned int family = cpu_features->family; + unsigned int model = cpu_features->model; + + if (family == 15 && model == 6) + { + /* The level 3 cache is encoded for this model like + the level 2 cache is for other models. Pretend + the caller asked for the level 2 cache. */ + name = (_SC_LEVEL2_CACHE_SIZE + + (name - _SC_LEVEL3_CACHE_SIZE)); + folded_rel_name = M(_SC_LEVEL2_CACHE_SIZE); + } + } + + struct intel_02_cache_info *found; + struct intel_02_cache_info search; + + search.idx = byte; + found = bsearch (&search, intel_02_known, nintel_02_known, + sizeof (intel_02_known[0]), intel_02_known_compare); + if (found != NULL) + { + if (found->rel_name == folded_rel_name) + { + unsigned int offset = M(name) - folded_rel_name; + + if (offset == 0) + /* Cache size. */ + return found->size; + if (offset == 1) + return found->assoc; + + assert (offset == 2); + return found->linesize; + } + + if (found->rel_name == M(_SC_LEVEL2_CACHE_SIZE)) + *has_level_2 = true; + } + } + + /* Next byte for the next round. */ + value >>= 8; + } + + /* Nothing found. */ + return 0; +} + + +static long int __attribute__ ((noinline)) +handle_intel (int name, const struct cpu_features *cpu_features) +{ + unsigned int maxidx = cpu_features->max_cpuid; + + /* Return -1 for older CPUs. */ + if (maxidx < 2) + return -1; + + /* OK, we can use the CPUID instruction to get all info about the + caches. */ + unsigned int cnt = 0; + unsigned int max = 1; + long int result = 0; + bool no_level_2_or_3 = false; + bool has_level_2 = false; + + while (cnt++ < max) + { + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + __cpuid (2, eax, ebx, ecx, edx); + + /* The low byte of EAX in the first round contain the number of + rounds we have to make. At least one, the one we are already + doing. */ + if (cnt == 1) + { + max = eax & 0xff; + eax &= 0xffffff00; + } + + /* Process the individual registers' value. */ + result = intel_check_word (name, eax, &has_level_2, + &no_level_2_or_3, cpu_features); + if (result != 0) + return result; + + result = intel_check_word (name, ebx, &has_level_2, + &no_level_2_or_3, cpu_features); + if (result != 0) + return result; + + result = intel_check_word (name, ecx, &has_level_2, + &no_level_2_or_3, cpu_features); + if (result != 0) + return result; + + result = intel_check_word (name, edx, &has_level_2, + &no_level_2_or_3, cpu_features); + if (result != 0) + return result; + } + + if (name >= _SC_LEVEL2_CACHE_SIZE && name <= _SC_LEVEL3_CACHE_LINESIZE + && no_level_2_or_3) + return -1; + + return 0; +} + + +static long int __attribute__ ((noinline)) +handle_amd (int name) +{ + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + __cpuid (0x80000000, eax, ebx, ecx, edx); + + /* No level 4 cache (yet). */ + if (name > _SC_LEVEL3_CACHE_LINESIZE) + return 0; + + unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE); + if (eax < fn) + return 0; + + __cpuid (fn, eax, ebx, ecx, edx); + + if (name < _SC_LEVEL1_DCACHE_SIZE) + { + name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE; + ecx = edx; + } + + switch (name) + { + case _SC_LEVEL1_DCACHE_SIZE: + return (ecx >> 14) & 0x3fc00; + + case _SC_LEVEL1_DCACHE_ASSOC: + ecx >>= 16; + if ((ecx & 0xff) == 0xff) + /* Fully associative. */ + return (ecx << 2) & 0x3fc00; + return ecx & 0xff; + + case _SC_LEVEL1_DCACHE_LINESIZE: + return ecx & 0xff; + + case _SC_LEVEL2_CACHE_SIZE: + return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00; + + case _SC_LEVEL2_CACHE_ASSOC: + switch ((ecx >> 12) & 0xf) + { + case 0: + case 1: + case 2: + case 4: + return (ecx >> 12) & 0xf; + case 6: + return 8; + case 8: + return 16; + case 10: + return 32; + case 11: + return 48; + case 12: + return 64; + case 13: + return 96; + case 14: + return 128; + case 15: + return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff); + default: + return 0; + } + /* NOTREACHED */ + + case _SC_LEVEL2_CACHE_LINESIZE: + return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff; + + case _SC_LEVEL3_CACHE_SIZE: + return (edx & 0xf000) == 0 ? 0 : (edx & 0x3ffc0000) << 1; + + case _SC_LEVEL3_CACHE_ASSOC: + switch ((edx >> 12) & 0xf) + { + case 0: + case 1: + case 2: + case 4: + return (edx >> 12) & 0xf; + case 6: + return 8; + case 8: + return 16; + case 10: + return 32; + case 11: + return 48; + case 12: + return 64; + case 13: + return 96; + case 14: + return 128; + case 15: + return ((edx & 0x3ffc0000) << 1) / (edx & 0xff); + default: + return 0; + } + /* NOTREACHED */ + + case _SC_LEVEL3_CACHE_LINESIZE: + return (edx & 0xf000) == 0 ? 0 : edx & 0xff; + + default: + assert (! "cannot happen"); + } + return -1; +} + + +/* Get the value of the system variable NAME. */ +long int +attribute_hidden +__cache_sysconf (int name) +{ + const struct cpu_features *cpu_features = __get_cpu_features (); + + if (cpu_features->kind == arch_kind_intel) + return handle_intel (name, cpu_features); + + if (cpu_features->kind == arch_kind_amd) + return handle_amd (name); + + // XXX Fill in more vendors. + + /* CPU not known, we have no information. */ + return 0; +} + + +/* Data cache size for use in memory and string routines, typically + L1 size, rounded to multiple of 256 bytes. */ +long int __x86_data_cache_size_half attribute_hidden = 32 * 1024 / 2; +long int __x86_data_cache_size attribute_hidden = 32 * 1024; +/* Similar to __x86_data_cache_size_half, but not rounded. */ +long int __x86_raw_data_cache_size_half attribute_hidden = 32 * 1024 / 2; +/* Similar to __x86_data_cache_size, but not rounded. */ +long int __x86_raw_data_cache_size attribute_hidden = 32 * 1024; +/* Shared cache size for use in memory and string routines, typically + L2 or L3 size, rounded to multiple of 256 bytes. */ +long int __x86_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2; +long int __x86_shared_cache_size attribute_hidden = 1024 * 1024; +/* Similar to __x86_shared_cache_size_half, but not rounded. */ +long int __x86_raw_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2; +/* Similar to __x86_shared_cache_size, but not rounded. */ +long int __x86_raw_shared_cache_size attribute_hidden = 1024 * 1024; + +/* Threshold to use non temporal store. */ +long int __x86_shared_non_temporal_threshold attribute_hidden; + +#ifndef DISABLE_PREFETCHW +/* PREFETCHW support flag for use in memory and string routines. */ +int __x86_prefetchw attribute_hidden; +#endif + + +static void +__attribute__((constructor)) +init_cacheinfo (void) +{ + /* Find out what brand of processor. */ + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + int max_cpuid_ex; + long int data = -1; + long int shared = -1; + unsigned int level; + unsigned int threads = 0; + const struct cpu_features *cpu_features = __get_cpu_features (); + int max_cpuid = cpu_features->max_cpuid; + + if (cpu_features->kind == arch_kind_intel) + { + data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, cpu_features); + + long int core = handle_intel (_SC_LEVEL2_CACHE_SIZE, cpu_features); + bool inclusive_cache = true; + + /* Try L3 first. */ + level = 3; + shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, cpu_features); + + /* Number of logical processors sharing L2 cache. */ + int threads_l2; + + /* Number of logical processors sharing L3 cache. */ + int threads_l3; + + if (shared <= 0) + { + /* Try L2 otherwise. */ + level = 2; + shared = core; + threads_l2 = 0; + threads_l3 = -1; + } + else + { + threads_l2 = 0; + threads_l3 = 0; + } + + /* A value of 0 for the HTT bit indicates there is only a single + logical processor. */ + if (HAS_CPU_FEATURE (HTT)) + { + /* Figure out the number of logical threads that share the + highest cache level. */ + if (max_cpuid >= 4) + { + unsigned int family = cpu_features->family; + unsigned int model = cpu_features->model; + + int i = 0; + + /* Query until cache level 2 and 3 are enumerated. */ + int check = 0x1 | (threads_l3 == 0) << 1; + do + { + __cpuid_count (4, i++, eax, ebx, ecx, edx); + + /* There seems to be a bug in at least some Pentium Ds + which sometimes fail to iterate all cache parameters. + Do not loop indefinitely here, stop in this case and + assume there is no such information. */ + if ((eax & 0x1f) == 0) + goto intel_bug_no_cache_info; + + switch ((eax >> 5) & 0x7) + { + default: + break; + case 2: + if ((check & 0x1)) + { + /* Get maximum number of logical processors + sharing L2 cache. */ + threads_l2 = (eax >> 14) & 0x3ff; + check &= ~0x1; + } + break; + case 3: + if ((check & (0x1 << 1))) + { + /* Get maximum number of logical processors + sharing L3 cache. */ + threads_l3 = (eax >> 14) & 0x3ff; + + /* Check if L2 and L3 caches are inclusive. */ + inclusive_cache = (edx & 0x2) != 0; + check &= ~(0x1 << 1); + } + break; + } + } + while (check); + + /* If max_cpuid >= 11, THREADS_L2/THREADS_L3 are the maximum + numbers of addressable IDs for logical processors sharing + the cache, instead of the maximum number of threads + sharing the cache. */ + if (max_cpuid >= 11) + { + /* Find the number of logical processors shipped in + one core and apply count mask. */ + i = 0; + + /* Count SMT only if there is L3 cache. Always count + core if there is no L3 cache. */ + int count = ((threads_l2 > 0 && level == 3) + | ((threads_l3 > 0 + || (threads_l2 > 0 && level == 2)) << 1)); + + while (count) + { + __cpuid_count (11, i++, eax, ebx, ecx, edx); + + int shipped = ebx & 0xff; + int type = ecx & 0xff00; + if (shipped == 0 || type == 0) + break; + else if (type == 0x100) + { + /* Count SMT. */ + if ((count & 0x1)) + { + int count_mask; + + /* Compute count mask. */ + asm ("bsr %1, %0" + : "=r" (count_mask) : "g" (threads_l2)); + count_mask = ~(-1 << (count_mask + 1)); + threads_l2 = (shipped - 1) & count_mask; + count &= ~0x1; + } + } + else if (type == 0x200) + { + /* Count core. */ + if ((count & (0x1 << 1))) + { + int count_mask; + int threads_core + = (level == 2 ? threads_l2 : threads_l3); + + /* Compute count mask. */ + asm ("bsr %1, %0" + : "=r" (count_mask) : "g" (threads_core)); + count_mask = ~(-1 << (count_mask + 1)); + threads_core = (shipped - 1) & count_mask; + if (level == 2) + threads_l2 = threads_core; + else + threads_l3 = threads_core; + count &= ~(0x1 << 1); + } + } + } + } + if (threads_l2 > 0) + threads_l2 += 1; + if (threads_l3 > 0) + threads_l3 += 1; + if (level == 2) + { + if (threads_l2) + { + threads = threads_l2; + if (threads > 2 && family == 6) + switch (model) + { + case 0x37: + case 0x4a: + case 0x4d: + case 0x5a: + case 0x5d: + /* Silvermont has L2 cache shared by 2 cores. */ + threads = 2; + break; + default: + break; + } + } + } + else if (threads_l3) + threads = threads_l3; + } + else + { +intel_bug_no_cache_info: + /* Assume that all logical threads share the highest cache + level. */ + + threads + = ((cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx + >> 16) & 0xff); + } + + /* Cap usage of highest cache level to the number of supported + threads. */ + if (shared > 0 && threads > 0) + shared /= threads; + } + + /* Account for non-inclusive L2 and L3 caches. */ + if (!inclusive_cache) + { + if (threads_l2 > 0) + core /= threads_l2; + shared += core; + } + } + else if (cpu_features->kind == arch_kind_amd) + { + data = handle_amd (_SC_LEVEL1_DCACHE_SIZE); + long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE); + shared = handle_amd (_SC_LEVEL3_CACHE_SIZE); + + /* Get maximum extended function. */ + __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx); + + if (shared <= 0) + /* No shared L3 cache. All we have is the L2 cache. */ + shared = core; + else + { + /* Figure out the number of logical threads that share L3. */ + if (max_cpuid_ex >= 0x80000008) + { + /* Get width of APIC ID. */ + __cpuid (0x80000008, max_cpuid_ex, ebx, ecx, edx); + threads = 1 << ((ecx >> 12) & 0x0f); + } + + if (threads == 0) + { + /* If APIC ID width is not available, use logical + processor count. */ + __cpuid (0x00000001, max_cpuid_ex, ebx, ecx, edx); + + if ((edx & (1 << 28)) != 0) + threads = (ebx >> 16) & 0xff; + } + + /* Cap usage of highest cache level to the number of + supported threads. */ + if (threads > 0) + shared /= threads; + + /* Account for exclusive L2 and L3 caches. */ + shared += core; + } + +#ifndef DISABLE_PREFETCHW + if (max_cpuid_ex >= 0x80000001) + { + __cpuid (0x80000001, eax, ebx, ecx, edx); + /* PREFETCHW || 3DNow! */ + if ((ecx & 0x100) || (edx & 0x80000000)) + __x86_prefetchw = -1; + } +#endif + } + + if (cpu_features->data_cache_size != 0) + data = cpu_features->data_cache_size; + + if (data > 0) + { + __x86_raw_data_cache_size_half = data / 2; + __x86_raw_data_cache_size = data; + /* Round data cache size to multiple of 256 bytes. */ + data = data & ~255L; + __x86_data_cache_size_half = data / 2; + __x86_data_cache_size = data; + } + + if (cpu_features->shared_cache_size != 0) + shared = cpu_features->shared_cache_size; + + if (shared > 0) + { + __x86_raw_shared_cache_size_half = shared / 2; + __x86_raw_shared_cache_size = shared; + /* Round shared cache size to multiple of 256 bytes. */ + shared = shared & ~255L; + __x86_shared_cache_size_half = shared / 2; + __x86_shared_cache_size = shared; + } + + /* The large memcpy micro benchmark in glibc shows that 6 times of + shared cache size is the approximate value above which non-temporal + store becomes faster on a 8-core processor. This is the 3/4 of the + total shared cache size. */ + __x86_shared_non_temporal_threshold + = (cpu_features->non_temporal_threshold != 0 + ? cpu_features->non_temporal_threshold + : __x86_shared_cache_size * threads * 3 / 4); +} + +#endif diff --git a/sysdeps/x86/cet-tunables.h b/sysdeps/x86/cet-tunables.h new file mode 100644 index 0000000000..ca023053ee --- /dev/null +++ b/sysdeps/x86/cet-tunables.h @@ -0,0 +1,29 @@ +/* x86 CET tuning. + This file is part of the GNU C Library. + Copyright (C) 2018 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Valid control values: + 0: Enable CET features based on ELF property note. + 1: Always disable CET features. + 2: Always enable CET features. + 3: Enable CET features permissively. + */ +#define CET_ELF_PROPERTY 0 +#define CET_ALWAYS_OFF 1 +#define CET_ALWAYS_ON 2 +#define CET_PERMISSIVE 3 +#define CET_MAX CET_PERMISSIVE diff --git a/sysdeps/x86/check-cet.awk b/sysdeps/x86/check-cet.awk new file mode 100644 index 0000000000..380d998caf --- /dev/null +++ b/sysdeps/x86/check-cet.awk @@ -0,0 +1,53 @@ +# Verify that all shared objects contain the CET property. +# Copyright (C) 2018 Free Software Foundation, Inc. +# This file is part of the GNU C Library. +# +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# <http://www.gnu.org/licenses/>. + +# This awk script expects to get command-line files that are each +# the output of 'readelf -n' on a single shared object. +# It exits successfully (0) if all of them contained the CET property. +# It fails (1) if any didn't contain the CET property +# It fails (2) if the input did not take the expected form. + +BEGIN { result = cet = sanity = 0 } + +function check_one(name) { + if (!sanity) { + print name ": *** input did not look like readelf -n output"; + result = 2; + } else if (cet) { + print name ": OK"; + } else { + print name ": *** no CET property found"; + result = result ? result : 1; + } + + cet = sanity = 0; +} + +FILENAME != lastfile { + if (lastfile) + check_one(lastfile); + lastfile = FILENAME; +} + +index ($0, "Displaying notes") != 0 { sanity = 1 } +index ($0, "IBT") != 0 && index ($0, "SHSTK") != 0 { cet = 1 } + +END { + check_one(lastfile); + exit(result); +} diff --git a/sysdeps/x86/configure b/sysdeps/x86/configure new file mode 100644 index 0000000000..b1ff281249 --- /dev/null +++ b/sysdeps/x86/configure @@ -0,0 +1,69 @@ +# This file is generated from configure.ac by Autoconf. DO NOT EDIT! + # Local configure fragment for sysdeps/x86. + +if test x"$enable_cet" = xyes; then + # Check if CET can be enabled. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether CET can be enabled" >&5 +$as_echo_n "checking whether CET can be enabled... " >&6; } +if ${libc_cv_x86_cet_available+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat > conftest.c <<EOF +#if !defined __CET__ || __CET__ != 3 +# error CET isn't available. +#endif +EOF + if { ac_try='${CC-cc} -c $CFLAGS -fcf-protection -include cet.h conftest.c 1>&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + libc_cv_x86_cet_available=yes + else + libc_cv_x86_cet_available=no + fi + rm -rf conftest* +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_x86_cet_available" >&5 +$as_echo "$libc_cv_x86_cet_available" >&6; } + if test $libc_cv_x86_cet_available = yes; then + enable_cet=yes + else + if test x"$enable_cet" = xdefault; then + enable_cet=no + else + as_fn_error $? "$CC doesn't support CET" "$LINENO" 5 + fi + fi +fi +if test $enable_cet = yes; then + # Check if assembler supports CET. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $AS supports CET" >&5 +$as_echo_n "checking whether $AS supports CET... " >&6; } +if ${libc_cv_x86_cet_as+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat > conftest.s <<EOF + incsspd %ecx +EOF + if { ac_try='${CC-cc} -c $CFLAGS conftest.s -o conftest.o 1>&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + libc_cv_x86_cet_as=yes + else + libc_cv_x86_cet_as=no + fi + rm -rf conftest* +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_x86_cet_as" >&5 +$as_echo "$libc_cv_x86_cet_as" >&6; } + if test $libc_cv_x86_cet_as = no; then + as_fn_error $? "$AS doesn't support CET" "$LINENO" 5 + fi +fi +config_vars="$config_vars +enable-cet = $enable_cet" diff --git a/sysdeps/x86/configure.ac b/sysdeps/x86/configure.ac new file mode 100644 index 0000000000..a909b073af --- /dev/null +++ b/sysdeps/x86/configure.ac @@ -0,0 +1,46 @@ +GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory. +# Local configure fragment for sysdeps/x86. + +if test x"$enable_cet" = xyes; then + # Check if CET can be enabled. + AC_CACHE_CHECK(whether CET can be enabled, + libc_cv_x86_cet_available, [dnl +cat > conftest.c <<EOF +#if !defined __CET__ || __CET__ != 3 +# error CET isn't available. +#endif +EOF + if AC_TRY_COMMAND(${CC-cc} -c $CFLAGS -fcf-protection -include cet.h conftest.c 1>&AS_MESSAGE_LOG_FD); then + libc_cv_x86_cet_available=yes + else + libc_cv_x86_cet_available=no + fi + rm -rf conftest*]) + if test $libc_cv_x86_cet_available = yes; then + enable_cet=yes + else + if test x"$enable_cet" = xdefault; then + enable_cet=no + else + AC_MSG_ERROR([$CC doesn't support CET]) + fi + fi +fi +if test $enable_cet = yes; then + # Check if assembler supports CET. + AC_CACHE_CHECK(whether $AS supports CET, + libc_cv_x86_cet_as, [dnl +cat > conftest.s <<EOF + incsspd %ecx +EOF + if AC_TRY_COMMAND(${CC-cc} -c $CFLAGS conftest.s -o conftest.o 1>&AS_MESSAGE_LOG_FD); then + libc_cv_x86_cet_as=yes + else + libc_cv_x86_cet_as=no + fi + rm -rf conftest*]) + if test $libc_cv_x86_cet_as = no; then + AC_MSG_ERROR([$AS doesn't support CET]) + fi +fi +LIBC_CONFIG_VAR([enable-cet], [$enable_cet]) diff --git a/sysdeps/x86/cpu-features-offsets.sym b/sysdeps/x86/cpu-features-offsets.sym index a9d53d195f..33dd094e37 100644 --- a/sysdeps/x86/cpu-features-offsets.sym +++ b/sysdeps/x86/cpu-features-offsets.sym @@ -5,3 +5,20 @@ #define rtld_global_ro_offsetof(mem) offsetof (struct rtld_global_ro, mem) RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET rtld_global_ro_offsetof (_dl_x86_cpu_features) + +CPU_FEATURES_SIZE sizeof (struct cpu_features) +CPUID_OFFSET offsetof (struct cpu_features, cpuid) +CPUID_SIZE sizeof (struct cpuid_registers) +CPUID_EAX_OFFSET offsetof (struct cpuid_registers, eax) +CPUID_EBX_OFFSET offsetof (struct cpuid_registers, ebx) +CPUID_ECX_OFFSET offsetof (struct cpuid_registers, ecx) +CPUID_EDX_OFFSET offsetof (struct cpuid_registers, edx) +FAMILY_OFFSET offsetof (struct cpu_features, family) +MODEL_OFFSET offsetof (struct cpu_features, model) +XSAVE_STATE_SIZE_OFFSET offsetof (struct cpu_features, xsave_state_size) +FEATURE_OFFSET offsetof (struct cpu_features, feature) +FEATURE_SIZE sizeof (unsigned int) + +COMMON_CPUID_INDEX_1 +COMMON_CPUID_INDEX_7 +FEATURE_INDEX_1 diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index 218ff2bd86..ea0b64fdb9 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -1,6 +1,6 @@ /* Initialize CPU feature data. This file is part of the GNU C Library. - Copyright (C) 2008-2016 Free Software Foundation, Inc. + Copyright (C) 2008-2018 Free Software Foundation, Inc. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -18,24 +18,195 @@ #include <cpuid.h> #include <cpu-features.h> +#include <dl-hwcap.h> +#include <libc-pointer-arith.h> -static inline void +#if HAVE_TUNABLES +# define TUNABLE_NAMESPACE tune +# include <unistd.h> /* Get STDOUT_FILENO for _dl_printf. */ +# include <elf/dl-tunables.h> + +extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *) + attribute_hidden; + +# if CET_ENABLED +extern void TUNABLE_CALLBACK (set_x86_ibt) (tunable_val_t *) + attribute_hidden; +extern void TUNABLE_CALLBACK (set_x86_shstk) (tunable_val_t *) + attribute_hidden; +# endif +#endif + +#if CET_ENABLED +# include <dl-cet.h> +# include <cet-tunables.h> +#endif + +static void +get_extended_indices (struct cpu_features *cpu_features) +{ + unsigned int eax, ebx, ecx, edx; + __cpuid (0x80000000, eax, ebx, ecx, edx); + if (eax >= 0x80000001) + __cpuid (0x80000001, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].eax, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ebx, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ecx, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].edx); + +} + +static void get_common_indeces (struct cpu_features *cpu_features, unsigned int *family, unsigned int *model, - unsigned int *extended_model) + unsigned int *extended_model, unsigned int *stepping) { - unsigned int eax; - __cpuid (1, eax, cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx, - cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx, - cpu_features->cpuid[COMMON_CPUID_INDEX_1].edx); - GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].eax = eax; - *family = (eax >> 8) & 0x0f; - *model = (eax >> 4) & 0x0f; - *extended_model = (eax >> 12) & 0xf0; - if (*family == 0x0f) + if (family) { - *family += (eax >> 20) & 0xff; - *model += *extended_model; + unsigned int eax; + __cpuid (1, eax, cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx, + cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx, + cpu_features->cpuid[COMMON_CPUID_INDEX_1].edx); + cpu_features->cpuid[COMMON_CPUID_INDEX_1].eax = eax; + *family = (eax >> 8) & 0x0f; + *model = (eax >> 4) & 0x0f; + *extended_model = (eax >> 12) & 0xf0; + *stepping = eax & 0x0f; + if (*family == 0x0f) + { + *family += (eax >> 20) & 0xff; + *model += *extended_model; + } + } + + if (cpu_features->max_cpuid >= 7) + __cpuid_count (7, 0, + cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax, + cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx, + cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx, + cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx); + + /* Can we call xgetbv? */ + if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE)) + { + unsigned int xcrlow; + unsigned int xcrhigh; + asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); + /* Is YMM and XMM state usable? */ + if ((xcrlow & (bit_YMM_state | bit_XMM_state)) == + (bit_YMM_state | bit_XMM_state)) + { + /* Determine if AVX is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX)) + { + cpu_features->feature[index_arch_AVX_Usable] + |= bit_arch_AVX_Usable; + /* The following features depend on AVX being usable. */ + /* Determine if AVX2 is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX2)) + { + cpu_features->feature[index_arch_AVX2_Usable] + |= bit_arch_AVX2_Usable; + + /* Unaligned load with 256-bit AVX registers are faster on + Intel/AMD processors with AVX2. */ + cpu_features->feature[index_arch_AVX_Fast_Unaligned_Load] + |= bit_arch_AVX_Fast_Unaligned_Load; + } + /* Determine if FMA is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, FMA)) + cpu_features->feature[index_arch_FMA_Usable] + |= bit_arch_FMA_Usable; + } + + /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and + ZMM16-ZMM31 state are enabled. */ + if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state + | bit_ZMM16_31_state)) == + (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state)) + { + /* Determine if AVX512F is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512F)) + { + cpu_features->feature[index_arch_AVX512F_Usable] + |= bit_arch_AVX512F_Usable; + /* Determine if AVX512DQ is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512DQ)) + cpu_features->feature[index_arch_AVX512DQ_Usable] + |= bit_arch_AVX512DQ_Usable; + } + } + } + + /* For _dl_runtime_resolve, set xsave_state_size to xsave area + size + integer register save size and align it to 64 bytes. */ + if (cpu_features->max_cpuid >= 0xd) + { + unsigned int eax, ebx, ecx, edx; + + __cpuid_count (0xd, 0, eax, ebx, ecx, edx); + if (ebx != 0) + { + unsigned int xsave_state_full_size + = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64); + + cpu_features->xsave_state_size + = xsave_state_full_size; + cpu_features->xsave_state_full_size + = xsave_state_full_size; + + __cpuid_count (0xd, 1, eax, ebx, ecx, edx); + + /* Check if XSAVEC is available. */ + if ((eax & (1 << 1)) != 0) + { + unsigned int xstate_comp_offsets[32]; + unsigned int xstate_comp_sizes[32]; + unsigned int i; + + xstate_comp_offsets[0] = 0; + xstate_comp_offsets[1] = 160; + xstate_comp_offsets[2] = 576; + xstate_comp_sizes[0] = 160; + xstate_comp_sizes[1] = 256; + + for (i = 2; i < 32; i++) + { + if ((STATE_SAVE_MASK & (1 << i)) != 0) + { + __cpuid_count (0xd, i, eax, ebx, ecx, edx); + xstate_comp_sizes[i] = eax; + } + else + { + ecx = 0; + xstate_comp_sizes[i] = 0; + } + + if (i > 2) + { + xstate_comp_offsets[i] + = (xstate_comp_offsets[i - 1] + + xstate_comp_sizes[i -1]); + if ((ecx & (1 << 1)) != 0) + xstate_comp_offsets[i] + = ALIGN_UP (xstate_comp_offsets[i], 64); + } + } + + /* Use XSAVEC. */ + unsigned int size + = xstate_comp_offsets[31] + xstate_comp_sizes[31]; + if (size) + { + cpu_features->xsave_state_size + = ALIGN_UP (size + STATE_SAVE_OFFSET, 64); + cpu_features->feature[index_arch_XSAVEC_Usable] + |= bit_arch_XSAVEC_Usable; + } + } + } + } } } @@ -60,28 +231,37 @@ init_cpu_features (struct cpu_features *cpu_features) /* This spells out "GenuineIntel". */ if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) { - unsigned int extended_model; + unsigned int extended_model, stepping; kind = arch_kind_intel; - get_common_indeces (cpu_features, &family, &model, &extended_model); + get_common_indeces (cpu_features, &family, &model, &extended_model, + &stepping); + + get_extended_indices (cpu_features); if (family == 0x06) { - ecx = cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx; model += extended_model; switch (model) { case 0x1c: case 0x26: /* BSF is slow on Atom. */ - cpu_features->feature[index_Slow_BSF] |= bit_Slow_BSF; + cpu_features->feature[index_arch_Slow_BSF] + |= bit_arch_Slow_BSF; break; case 0x57: /* Knights Landing. Enable Silvermont optimizations. */ - cpu_features->feature[index_Prefer_No_VZEROUPPER] - |= bit_Prefer_No_VZEROUPPER; + + case 0x5c: + case 0x5f: + /* Unaligned load versions are faster than SSSE3 + on Goldmont. */ + + case 0x4c: + /* Airmont is a die shrink of Silvermont. */ case 0x37: case 0x4a: @@ -90,22 +270,26 @@ init_cpu_features (struct cpu_features *cpu_features) case 0x5d: /* Unaligned load versions are faster than SSSE3 on Silvermont. */ -#if index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop -# error index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop +#if index_arch_Fast_Unaligned_Load != index_arch_Prefer_PMINUB_for_stringop +# error index_arch_Fast_Unaligned_Load != index_arch_Prefer_PMINUB_for_stringop +#endif +#if index_arch_Fast_Unaligned_Load != index_arch_Slow_SSE4_2 +# error index_arch_Fast_Unaligned_Load != index_arch_Slow_SSE4_2 #endif -#if index_Fast_Unaligned_Load != index_Slow_SSE4_2 -# error index_Fast_Unaligned_Load != index_Slow_SSE4_2 +#if index_arch_Fast_Unaligned_Load != index_arch_Fast_Unaligned_Copy +# error index_arch_Fast_Unaligned_Load != index_arch_Fast_Unaligned_Copy #endif - cpu_features->feature[index_Fast_Unaligned_Load] - |= (bit_Fast_Unaligned_Load - | bit_Prefer_PMINUB_for_stringop - | bit_Slow_SSE4_2); + cpu_features->feature[index_arch_Fast_Unaligned_Load] + |= (bit_arch_Fast_Unaligned_Load + | bit_arch_Fast_Unaligned_Copy + | bit_arch_Prefer_PMINUB_for_stringop + | bit_arch_Slow_SSE4_2); break; default: /* Unknown family 0x06 processors. Assuming this is one of Core i3/i5/i7 processors if AVX is available. */ - if ((ecx & bit_AVX) == 0) + if (!CPU_FEATURES_CPU_P (cpu_features, AVX)) break; case 0x1a: @@ -115,124 +299,226 @@ init_cpu_features (struct cpu_features *cpu_features) case 0x2c: case 0x2e: case 0x2f: - /* Rep string instructions, copy backward, unaligned loads + /* Rep string instructions, unaligned load, unaligned copy, and pminub are fast on Intel Core i3, i5 and i7. */ -#if index_Fast_Rep_String != index_Fast_Copy_Backward -# error index_Fast_Rep_String != index_Fast_Copy_Backward +#if index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Load +# error index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Load #endif -#if index_Fast_Rep_String != index_Fast_Unaligned_Load -# error index_Fast_Rep_String != index_Fast_Unaligned_Load +#if index_arch_Fast_Rep_String != index_arch_Prefer_PMINUB_for_stringop +# error index_arch_Fast_Rep_String != index_arch_Prefer_PMINUB_for_stringop #endif -#if index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop -# error index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop +#if index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Copy +# error index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Copy #endif - cpu_features->feature[index_Fast_Rep_String] - |= (bit_Fast_Rep_String - | bit_Fast_Copy_Backward - | bit_Fast_Unaligned_Load - | bit_Prefer_PMINUB_for_stringop); + cpu_features->feature[index_arch_Fast_Rep_String] + |= (bit_arch_Fast_Rep_String + | bit_arch_Fast_Unaligned_Load + | bit_arch_Fast_Unaligned_Copy + | bit_arch_Prefer_PMINUB_for_stringop); + break; + + case 0x3f: + /* Xeon E7 v3 with stepping >= 4 has working TSX. */ + if (stepping >= 4) + break; + case 0x3c: + case 0x45: + case 0x46: + /* Disable Intel TSX on Haswell processors (except Xeon E7 v3 + with stepping >= 4) to avoid TSX on kernels that weren't + updated with the latest microcode package (which disables + broken feature by default). */ + cpu_features->cpuid[index_cpu_RTM].reg_RTM &= ~bit_cpu_RTM; break; } } + + + /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER + if AVX512ER is available. Don't use AVX512 to avoid lower CPU + frequency if AVX512ER isn't available. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER)) + cpu_features->feature[index_arch_Prefer_No_VZEROUPPER] + |= bit_arch_Prefer_No_VZEROUPPER; + else + cpu_features->feature[index_arch_Prefer_No_AVX512] + |= bit_arch_Prefer_No_AVX512; } /* This spells out "AuthenticAMD". */ else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) { - unsigned int extended_model; + unsigned int extended_model, stepping; kind = arch_kind_amd; - get_common_indeces (cpu_features, &family, &model, &extended_model); + get_common_indeces (cpu_features, &family, &model, &extended_model, + &stepping); + + get_extended_indices (cpu_features); ecx = cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx; - unsigned int eax; - __cpuid (0x80000000, eax, ebx, ecx, edx); - if (eax >= 0x80000001) - __cpuid (0x80000001, - cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].eax, - cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ebx, - cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ecx, - cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].edx); + if (HAS_ARCH_FEATURE (AVX_Usable)) + { + /* Since the FMA4 bit is in COMMON_CPUID_INDEX_80000001 and + FMA4 requires AVX, determine if FMA4 is usable here. */ + if (CPU_FEATURES_CPU_P (cpu_features, FMA4)) + cpu_features->feature[index_arch_FMA4_Usable] + |= bit_arch_FMA4_Usable; + } if (family == 0x15) { +#if index_arch_Fast_Unaligned_Load != index_arch_Fast_Copy_Backward +# error index_arch_Fast_Unaligned_Load != index_arch_Fast_Copy_Backward +#endif /* "Excavator" */ if (model >= 0x60 && model <= 0x7f) - cpu_features->feature[index_Fast_Unaligned_Load] - |= bit_Fast_Unaligned_Load; + { + cpu_features->feature[index_arch_Fast_Unaligned_Load] + |= (bit_arch_Fast_Unaligned_Load + | bit_arch_Fast_Copy_Backward); + + /* Unaligned AVX loads are slower.*/ + cpu_features->feature[index_arch_AVX_Fast_Unaligned_Load] + &= ~bit_arch_AVX_Fast_Unaligned_Load; + } } } else - kind = arch_kind_other; + { + kind = arch_kind_other; + get_common_indeces (cpu_features, NULL, NULL, NULL, NULL); + } /* Support i586 if CX8 is available. */ - if (HAS_CPU_FEATURE (CX8)) - cpu_features->feature[index_I586] |= bit_I586; + if (CPU_FEATURES_CPU_P (cpu_features, CX8)) + cpu_features->feature[index_arch_I586] |= bit_arch_I586; /* Support i686 if CMOV is available. */ - if (HAS_CPU_FEATURE (CMOV)) - cpu_features->feature[index_I686] |= bit_I686; + if (CPU_FEATURES_CPU_P (cpu_features, CMOV)) + cpu_features->feature[index_arch_I686] |= bit_arch_I686; - if (cpu_features->max_cpuid >= 7) - __cpuid_count (7, 0, - cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax, - cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx, - cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx, - cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx); +#if !HAS_CPUID +no_cpuid: +#endif - /* Can we call xgetbv? */ - if (HAS_CPU_FEATURE (OSXSAVE)) + cpu_features->family = family; + cpu_features->model = model; + cpu_features->kind = kind; + +#if HAVE_TUNABLES + TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps)); + cpu_features->non_temporal_threshold + = TUNABLE_GET (x86_non_temporal_threshold, long int, NULL); + cpu_features->data_cache_size + = TUNABLE_GET (x86_data_cache_size, long int, NULL); + cpu_features->shared_cache_size + = TUNABLE_GET (x86_shared_cache_size, long int, NULL); +#endif + + /* Reuse dl_platform, dl_hwcap and dl_hwcap_mask for x86. */ +#if !HAVE_TUNABLES && defined SHARED + /* The glibc.tune.hwcap_mask tunable is initialized already, so no need to do + this. */ + GLRO(dl_hwcap_mask) = HWCAP_IMPORTANT; +#endif + +#ifdef __x86_64__ + GLRO(dl_hwcap) = HWCAP_X86_64; + if (cpu_features->kind == arch_kind_intel) { - unsigned int xcrlow; - unsigned int xcrhigh; - asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); - /* Is YMM and XMM state usable? */ - if ((xcrlow & (bit_YMM_state | bit_XMM_state)) == - (bit_YMM_state | bit_XMM_state)) + const char *platform = NULL; + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable) + && CPU_FEATURES_CPU_P (cpu_features, AVX512CD)) { - /* Determine if AVX is usable. */ - if (HAS_CPU_FEATURE (AVX)) - cpu_features->feature[index_AVX_Usable] |= bit_AVX_Usable; -#if index_AVX2_Usable != index_AVX_Fast_Unaligned_Load -# error index_AVX2_Usable != index_AVX_Fast_Unaligned_Load -#endif - /* Determine if AVX2 is usable. Unaligned load with 256-bit - AVX registers are faster on processors with AVX2. */ - if (HAS_CPU_FEATURE (AVX2)) - cpu_features->feature[index_AVX2_Usable] - |= bit_AVX2_Usable | bit_AVX_Fast_Unaligned_Load; - /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and - ZMM16-ZMM31 state are enabled. */ - if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state - | bit_ZMM16_31_state)) == - (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state)) + if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER)) { - /* Determine if AVX512F is usable. */ - if (HAS_CPU_FEATURE (AVX512F)) - { - cpu_features->feature[index_AVX512F_Usable] - |= bit_AVX512F_Usable; - /* Determine if AVX512DQ is usable. */ - if (HAS_CPU_FEATURE (AVX512DQ)) - cpu_features->feature[index_AVX512DQ_Usable] - |= bit_AVX512DQ_Usable; - } + if (CPU_FEATURES_CPU_P (cpu_features, AVX512PF)) + platform = "xeon_phi"; + } + else + { + if (CPU_FEATURES_CPU_P (cpu_features, AVX512BW) + && CPU_FEATURES_CPU_P (cpu_features, AVX512DQ) + && CPU_FEATURES_CPU_P (cpu_features, AVX512VL)) + GLRO(dl_hwcap) |= HWCAP_X86_AVX512_1; } - /* Determine if FMA is usable. */ - if (HAS_CPU_FEATURE (FMA)) - cpu_features->feature[index_FMA_Usable] |= bit_FMA_Usable; - /* Determine if FMA4 is usable. */ - if (HAS_CPU_FEATURE (FMA4)) - cpu_features->feature[index_FMA4_Usable] |= bit_FMA4_Usable; } + + if (platform == NULL + && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, FMA_Usable) + && CPU_FEATURES_CPU_P (cpu_features, BMI1) + && CPU_FEATURES_CPU_P (cpu_features, BMI2) + && CPU_FEATURES_CPU_P (cpu_features, LZCNT) + && CPU_FEATURES_CPU_P (cpu_features, MOVBE) + && CPU_FEATURES_CPU_P (cpu_features, POPCNT)) + platform = "haswell"; + + if (platform != NULL) + GLRO(dl_platform) = platform; } +#else + GLRO(dl_hwcap) = 0; + if (CPU_FEATURES_CPU_P (cpu_features, SSE2)) + GLRO(dl_hwcap) |= HWCAP_X86_SSE2; -#if !HAS_CPUID -no_cpuid: + if (CPU_FEATURES_ARCH_P (cpu_features, I686)) + GLRO(dl_platform) = "i686"; + else if (CPU_FEATURES_ARCH_P (cpu_features, I586)) + GLRO(dl_platform) = "i586"; #endif - cpu_features->family = family; - cpu_features->model = model; - cpu_features->kind = kind; +#if CET_ENABLED +# if HAVE_TUNABLES + TUNABLE_GET (x86_ibt, tunable_val_t *, + TUNABLE_CALLBACK (set_x86_ibt)); + TUNABLE_GET (x86_shstk, tunable_val_t *, + TUNABLE_CALLBACK (set_x86_shstk)); +# endif + + /* Check CET status. */ + unsigned int cet_status = get_cet_status (); + + if (cet_status) + { + GL(dl_x86_feature_1)[0] = cet_status; + +# ifndef SHARED + /* Check if IBT and SHSTK are enabled by kernel. */ + if ((cet_status & GNU_PROPERTY_X86_FEATURE_1_IBT) + || (cet_status & GNU_PROPERTY_X86_FEATURE_1_SHSTK)) + { + /* Disable IBT and/or SHSTK if they are enabled by kernel, but + disabled by environment variable: + + GLIBC_TUNABLES=glibc.tune.hwcaps=-IBT,-SHSTK + */ + unsigned int cet_feature = 0; + if (!HAS_CPU_FEATURE (IBT)) + cet_feature |= GNU_PROPERTY_X86_FEATURE_1_IBT; + if (!HAS_CPU_FEATURE (SHSTK)) + cet_feature |= GNU_PROPERTY_X86_FEATURE_1_SHSTK; + + if (cet_feature) + { + int res = dl_cet_disable_cet (cet_feature); + + /* Clear the disabled bits in dl_x86_feature_1. */ + if (res == 0) + GL(dl_x86_feature_1)[0] &= ~cet_feature; + } + + /* Lock CET if IBT or SHSTK is enabled in executable. Don't + lock CET if SHSTK is enabled permissively. */ + if (((GL(dl_x86_feature_1)[1] >> CET_MAX) + & ((1 << CET_MAX) - 1)) + != CET_PERMISSIVE) + dl_cet_lock_cet (); + } +# endif + } +#endif } diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h index e354920d5d..347a4b118d 100644 --- a/sysdeps/x86/cpu-features.h +++ b/sysdeps/x86/cpu-features.h @@ -1,5 +1,5 @@ /* This file is part of the GNU C Library. - Copyright (C) 2008-2016 Free Software Foundation, Inc. + Copyright (C) 2008-2018 Free Software Foundation, Inc. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -18,48 +18,70 @@ #ifndef cpu_features_h #define cpu_features_h -#define bit_Fast_Rep_String (1 << 0) -#define bit_Fast_Copy_Backward (1 << 1) -#define bit_Slow_BSF (1 << 2) -#define bit_Fast_Unaligned_Load (1 << 4) -#define bit_Prefer_PMINUB_for_stringop (1 << 5) -#define bit_AVX_Usable (1 << 6) -#define bit_FMA_Usable (1 << 7) -#define bit_FMA4_Usable (1 << 8) -#define bit_Slow_SSE4_2 (1 << 9) -#define bit_AVX2_Usable (1 << 10) -#define bit_AVX_Fast_Unaligned_Load (1 << 11) -#define bit_AVX512F_Usable (1 << 12) -#define bit_AVX512DQ_Usable (1 << 13) -#define bit_I586 (1 << 14) -#define bit_I686 (1 << 15) -#define bit_Prefer_MAP_32BIT_EXEC (1 << 16) -#define bit_Prefer_No_VZEROUPPER (1 << 17) +#define bit_arch_Fast_Rep_String (1 << 0) +#define bit_arch_Fast_Copy_Backward (1 << 1) +#define bit_arch_Slow_BSF (1 << 2) +#define bit_arch_Fast_Unaligned_Load (1 << 4) +#define bit_arch_Prefer_PMINUB_for_stringop (1 << 5) +#define bit_arch_AVX_Usable (1 << 6) +#define bit_arch_FMA_Usable (1 << 7) +#define bit_arch_FMA4_Usable (1 << 8) +#define bit_arch_Slow_SSE4_2 (1 << 9) +#define bit_arch_AVX2_Usable (1 << 10) +#define bit_arch_AVX_Fast_Unaligned_Load (1 << 11) +#define bit_arch_AVX512F_Usable (1 << 12) +#define bit_arch_AVX512DQ_Usable (1 << 13) +#define bit_arch_I586 (1 << 14) +#define bit_arch_I686 (1 << 15) +#define bit_arch_Prefer_MAP_32BIT_EXEC (1 << 16) +#define bit_arch_Prefer_No_VZEROUPPER (1 << 17) +#define bit_arch_Fast_Unaligned_Copy (1 << 18) +#define bit_arch_Prefer_ERMS (1 << 19) +#define bit_arch_Prefer_No_AVX512 (1 << 20) +#define bit_arch_MathVec_Prefer_No_AVX512 (1 << 21) +#define bit_arch_XSAVEC_Usable (1 << 22) +#define bit_arch_Prefer_FSRM (1 << 23) /* CPUID Feature flags. */ /* COMMON_CPUID_INDEX_1. */ -#define bit_CX8 (1 << 8) -#define bit_CMOV (1 << 15) -#define bit_SSE2 (1 << 26) -#define bit_SSSE3 (1 << 9) -#define bit_SSE4_1 (1 << 19) -#define bit_SSE4_2 (1 << 20) -#define bit_OSXSAVE (1 << 27) -#define bit_AVX (1 << 28) -#define bit_POPCOUNT (1 << 23) -#define bit_FMA (1 << 12) -#define bit_FMA4 (1 << 16) +#define bit_cpu_CX8 (1 << 8) +#define bit_cpu_CMOV (1 << 15) +#define bit_cpu_SSE (1 << 25) +#define bit_cpu_SSE2 (1 << 26) +#define bit_cpu_SSSE3 (1 << 9) +#define bit_cpu_SSE4_1 (1 << 19) +#define bit_cpu_SSE4_2 (1 << 20) +#define bit_cpu_OSXSAVE (1 << 27) +#define bit_cpu_AVX (1 << 28) +#define bit_cpu_POPCOUNT (1 << 23) +#define bit_cpu_FMA (1 << 12) +#define bit_cpu_FMA4 (1 << 16) +#define bit_cpu_HTT (1 << 28) +#define bit_cpu_LZCNT (1 << 5) +#define bit_cpu_MOVBE (1 << 22) +#define bit_cpu_POPCNT (1 << 23) /* COMMON_CPUID_INDEX_7. */ -#define bit_RTM (1 << 11) -#define bit_AVX2 (1 << 5) -#define bit_AVX512F (1 << 16) -#define bit_AVX512DQ (1 << 17) +#define bit_cpu_BMI1 (1 << 3) +#define bit_cpu_BMI2 (1 << 8) +#define bit_cpu_ERMS (1 << 9) +#define bit_cpu_RTM (1 << 11) +#define bit_cpu_AVX2 (1 << 5) +#define bit_cpu_AVX512F (1 << 16) +#define bit_cpu_AVX512DQ (1 << 17) +#define bit_cpu_AVX512PF (1 << 26) +#define bit_cpu_AVX512ER (1 << 27) +#define bit_cpu_AVX512CD (1 << 28) +#define bit_cpu_AVX512BW (1 << 30) +#define bit_cpu_AVX512VL (1u << 31) +#define bit_cpu_IBT (1u << 20) +#define bit_cpu_SHSTK (1u << 7) +#define bit_cpu_FSRM (1 << 4) /* XCR0 Feature flags. */ -#define bit_XMM_state (1 << 1) -#define bit_YMM_state (2 << 1) +#define bit_XMM_state (1 << 1) +#define bit_YMM_state (1 << 2) #define bit_Opmask_state (1 << 5) #define bit_ZMM0_15_state (1 << 6) #define bit_ZMM16_31_state (1 << 7) @@ -70,98 +92,23 @@ /* The current maximum size of the feature integer bit array. */ #define FEATURE_INDEX_MAX 1 -#ifdef __ASSEMBLER__ - -# include <ifunc-defines.h> -# include <rtld-global-offsets.h> - -# define index_CX8 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET -# define index_CMOV COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET -# define index_SSE2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET -# define index_SSSE3 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET -# define index_SSE4_1 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET -# define index_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET -# define index_AVX COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET -# define index_AVX2 COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET - -# define index_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE -# define index_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE -# define index_Slow_BSF FEATURE_INDEX_1*FEATURE_SIZE -# define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE -# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE -# define index_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE -# define index_FMA_Usable FEATURE_INDEX_1*FEATURE_SIZE -# define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE -# define index_Slow_SSE4_2 FEATURE_INDEX_1*FEATURE_SIZE -# define index_AVX2_Usable FEATURE_INDEX_1*FEATURE_SIZE -# define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE -# define index_AVX512F_Usable FEATURE_INDEX_1*FEATURE_SIZE -# define index_AVX512DQ_Usable FEATURE_INDEX_1*FEATURE_SIZE -# define index_I586 FEATURE_INDEX_1*FEATURE_SIZE -# define index_I686 FEATURE_INDEX_1*FEATURE_SIZE -# define index_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1*FEATURE_SIZE -# define index_Prefer_No_VZEROUPPER FEATURE_INDEX_1*FEATURE_SIZE - - -# if defined (_LIBC) && !IS_IN (nonlib) -# ifdef __x86_64__ -# ifdef SHARED -# if IS_IN (rtld) -# define LOAD_RTLD_GLOBAL_RO_RDX -# define HAS_FEATURE(offset, name) \ - testl $(bit_##name), _rtld_local_ro+offset+(index_##name)(%rip) -# else -# define LOAD_RTLD_GLOBAL_RO_RDX \ - mov _rtld_global_ro@GOTPCREL(%rip), %RDX_LP -# define HAS_FEATURE(offset, name) \ - testl $(bit_##name), \ - RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+offset+(index_##name)(%rdx) -# endif -# else /* SHARED */ -# define LOAD_RTLD_GLOBAL_RO_RDX -# define HAS_FEATURE(offset, name) \ - testl $(bit_##name), _dl_x86_cpu_features+offset+(index_##name)(%rip) -# endif /* !SHARED */ -# else /* __x86_64__ */ -# ifdef SHARED -# define LOAD_FUNC_GOT_EAX(func) \ - leal func@GOTOFF(%edx), %eax -# if IS_IN (rtld) -# define LOAD_GOT_AND_RTLD_GLOBAL_RO \ - LOAD_PIC_REG(dx) -# define HAS_FEATURE(offset, name) \ - testl $(bit_##name), offset+(index_##name)+_rtld_local_ro@GOTOFF(%edx) -# else -# define LOAD_GOT_AND_RTLD_GLOBAL_RO \ - LOAD_PIC_REG(dx); \ - mov _rtld_global_ro@GOT(%edx), %ecx -# define HAS_FEATURE(offset, name) \ - testl $(bit_##name), \ - RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+offset+(index_##name)(%ecx) -# endif -# else /* SHARED */ -# define LOAD_FUNC_GOT_EAX(func) \ - leal func, %eax -# define LOAD_GOT_AND_RTLD_GLOBAL_RO -# define HAS_FEATURE(offset, name) \ - testl $(bit_##name), _dl_x86_cpu_features+offset+(index_##name) -# endif /* !SHARED */ -# endif /* !__x86_64__ */ -# else /* _LIBC && !nonlib */ -# error "Sorry, <cpu-features.h> is unimplemented for assembler" -# endif /* !_LIBC || nonlib */ +/* Offset for fxsave/xsave area used by _dl_runtime_resolve. Also need + space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX. It must be + aligned to 16 bytes for fxsave and 64 bytes for xsave. */ +#define STATE_SAVE_OFFSET (8 * 7 + 8) -/* HAS_* evaluates to true if we may use the feature at runtime. */ -# define HAS_CPU_FEATURE(name) HAS_FEATURE (CPUID_OFFSET, name) -# define HAS_ARCH_FEATURE(name) HAS_FEATURE (FEATURE_OFFSET, name) +/* Save SSE, AVX, AVX512, mask and bound registers. */ +#define STATE_SAVE_MASK \ + ((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7)) +#ifdef __ASSEMBLER__ +# include <cpu-features-offsets.h> #else /* __ASSEMBLER__ */ - enum { COMMON_CPUID_INDEX_1 = 0, COMMON_CPUID_INDEX_7, - COMMON_CPUID_INDEX_80000001, /* for AMD */ + COMMON_CPUID_INDEX_80000001, /* Keep the following line at the end. */ COMMON_CPUID_INDEX_MAX }; @@ -185,7 +132,27 @@ struct cpu_features } cpuid[COMMON_CPUID_INDEX_MAX]; unsigned int family; unsigned int model; + /* The state size for XSAVEC or XSAVE. The type must be unsigned long + int so that we use + + sub xsave_state_size_offset(%rip) %RSP_LP + + in _dl_runtime_resolve. */ + unsigned long int xsave_state_size; + /* The full state size for XSAVE when XSAVEC is disabled by + + GLIBC_TUNABLES=glibc.tune.hwcaps=-XSAVEC_Usable + */ + unsigned int xsave_state_full_size; unsigned int feature[FEATURE_INDEX_MAX]; + /* Data cache size for use in memory and string routines, typically + L1 size. */ + unsigned long int data_cache_size; + /* Shared cache size for use in memory and string routines, typically + L2 or L3 size. */ + unsigned long int shared_cache_size; + /* Threshold to use non temporal store. */ + unsigned long int non_temporal_threshold; }; /* Used from outside of glibc to get access to the CPU features @@ -200,30 +167,53 @@ extern const struct cpu_features *__get_cpu_features (void) # endif +/* Only used directly in cpu-features.c. */ +# define CPU_FEATURES_CPU_P(ptr, name) \ + ((ptr->cpuid[index_cpu_##name].reg_##name & (bit_cpu_##name)) != 0) +# define CPU_FEATURES_ARCH_P(ptr, name) \ + ((ptr->feature[index_arch_##name] & (bit_arch_##name)) != 0) + /* HAS_* evaluates to true if we may use the feature at runtime. */ # define HAS_CPU_FEATURE(name) \ - ((__get_cpu_features ()->cpuid[index_##name].reg_##name & (bit_##name)) != 0) + CPU_FEATURES_CPU_P (__get_cpu_features (), name) # define HAS_ARCH_FEATURE(name) \ - ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0) + CPU_FEATURES_ARCH_P (__get_cpu_features (), name) -# define index_CX8 COMMON_CPUID_INDEX_1 -# define index_CMOV COMMON_CPUID_INDEX_1 -# define index_SSE2 COMMON_CPUID_INDEX_1 -# define index_SSSE3 COMMON_CPUID_INDEX_1 -# define index_SSE4_1 COMMON_CPUID_INDEX_1 -# define index_SSE4_2 COMMON_CPUID_INDEX_1 -# define index_AVX COMMON_CPUID_INDEX_1 -# define index_AVX2 COMMON_CPUID_INDEX_7 -# define index_AVX512F COMMON_CPUID_INDEX_7 -# define index_AVX512DQ COMMON_CPUID_INDEX_7 -# define index_RTM COMMON_CPUID_INDEX_7 -# define index_FMA COMMON_CPUID_INDEX_1 -# define index_FMA4 COMMON_CPUID_INDEX_80000001 -# define index_POPCOUNT COMMON_CPUID_INDEX_1 -# define index_OSXSAVE COMMON_CPUID_INDEX_1 +# define index_cpu_CX8 COMMON_CPUID_INDEX_1 +# define index_cpu_CMOV COMMON_CPUID_INDEX_1 +# define index_cpu_SSE COMMON_CPUID_INDEX_1 +# define index_cpu_SSE2 COMMON_CPUID_INDEX_1 +# define index_cpu_SSSE3 COMMON_CPUID_INDEX_1 +# define index_cpu_SSE4_1 COMMON_CPUID_INDEX_1 +# define index_cpu_SSE4_2 COMMON_CPUID_INDEX_1 +# define index_cpu_AVX COMMON_CPUID_INDEX_1 +# define index_cpu_AVX2 COMMON_CPUID_INDEX_7 +# define index_cpu_AVX512F COMMON_CPUID_INDEX_7 +# define index_cpu_AVX512DQ COMMON_CPUID_INDEX_7 +# define index_cpu_AVX512PF COMMON_CPUID_INDEX_7 +# define index_cpu_AVX512ER COMMON_CPUID_INDEX_7 +# define index_cpu_AVX512CD COMMON_CPUID_INDEX_7 +# define index_cpu_AVX512BW COMMON_CPUID_INDEX_7 +# define index_cpu_AVX512VL COMMON_CPUID_INDEX_7 +# define index_cpu_ERMS COMMON_CPUID_INDEX_7 +# define index_cpu_RTM COMMON_CPUID_INDEX_7 +# define index_cpu_FMA COMMON_CPUID_INDEX_1 +# define index_cpu_FMA4 COMMON_CPUID_INDEX_80000001 +# define index_cpu_POPCOUNT COMMON_CPUID_INDEX_1 +# define index_cpu_OSXSAVE COMMON_CPUID_INDEX_1 +# define index_cpu_HTT COMMON_CPUID_INDEX_1 +# define index_cpu_BMI1 COMMON_CPUID_INDEX_7 +# define index_cpu_BMI2 COMMON_CPUID_INDEX_7 +# define index_cpu_LZCNT COMMON_CPUID_INDEX_80000001 +# define index_cpu_MOVBE COMMON_CPUID_INDEX_1 +# define index_cpu_POPCNT COMMON_CPUID_INDEX_1 +# define index_cpu_IBT COMMON_CPUID_INDEX_7 +# define index_cpu_SHSTK COMMON_CPUID_INDEX_7 +# define index_cpu_FSRM COMMON_CPUID_INDEX_7 # define reg_CX8 edx # define reg_CMOV edx +# define reg_SSE edx # define reg_SSE2 edx # define reg_SSSE3 ecx # define reg_SSE4_1 ecx @@ -232,29 +222,50 @@ extern const struct cpu_features *__get_cpu_features (void) # define reg_AVX2 ebx # define reg_AVX512F ebx # define reg_AVX512DQ ebx +# define reg_AVX512PF ebx +# define reg_AVX512ER ebx +# define reg_AVX512CD ebx +# define reg_AVX512BW ebx +# define reg_AVX512VL ebx +# define reg_ERMS ebx # define reg_RTM ebx # define reg_FMA ecx # define reg_FMA4 ecx # define reg_POPCOUNT ecx # define reg_OSXSAVE ecx +# define reg_HTT edx +# define reg_BMI1 ebx +# define reg_BMI2 ebx +# define reg_LZCNT ecx +# define reg_MOVBE ecx +# define reg_POPCNT ecx +# define reg_IBT edx +# define reg_SHSTK ecx +# define reg_FSRM edx -# define index_Fast_Rep_String FEATURE_INDEX_1 -# define index_Fast_Copy_Backward FEATURE_INDEX_1 -# define index_Slow_BSF FEATURE_INDEX_1 -# define index_Fast_Unaligned_Load FEATURE_INDEX_1 -# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1 -# define index_AVX_Usable FEATURE_INDEX_1 -# define index_FMA_Usable FEATURE_INDEX_1 -# define index_FMA4_Usable FEATURE_INDEX_1 -# define index_Slow_SSE4_2 FEATURE_INDEX_1 -# define index_AVX2_Usable FEATURE_INDEX_1 -# define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1 -# define index_AVX512F_Usable FEATURE_INDEX_1 -# define index_AVX512DQ_Usable FEATURE_INDEX_1 -# define index_I586 FEATURE_INDEX_1 -# define index_I686 FEATURE_INDEX_1 -# define index_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1 -# define index_Prefer_No_VZEROUPPER FEATURE_INDEX_1 +# define index_arch_Fast_Rep_String FEATURE_INDEX_1 +# define index_arch_Fast_Copy_Backward FEATURE_INDEX_1 +# define index_arch_Slow_BSF FEATURE_INDEX_1 +# define index_arch_Fast_Unaligned_Load FEATURE_INDEX_1 +# define index_arch_Prefer_PMINUB_for_stringop FEATURE_INDEX_1 +# define index_arch_AVX_Usable FEATURE_INDEX_1 +# define index_arch_FMA_Usable FEATURE_INDEX_1 +# define index_arch_FMA4_Usable FEATURE_INDEX_1 +# define index_arch_Slow_SSE4_2 FEATURE_INDEX_1 +# define index_arch_AVX2_Usable FEATURE_INDEX_1 +# define index_arch_AVX_Fast_Unaligned_Load FEATURE_INDEX_1 +# define index_arch_AVX512F_Usable FEATURE_INDEX_1 +# define index_arch_AVX512DQ_Usable FEATURE_INDEX_1 +# define index_arch_I586 FEATURE_INDEX_1 +# define index_arch_I686 FEATURE_INDEX_1 +# define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1 +# define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1 +# define index_arch_Fast_Unaligned_Copy FEATURE_INDEX_1 +# define index_arch_Prefer_ERMS FEATURE_INDEX_1 +# define index_arch_Prefer_No_AVX512 FEATURE_INDEX_1 +# define index_arch_MathVec_Prefer_No_AVX512 FEATURE_INDEX_1 +# define index_arch_XSAVEC_Usable FEATURE_INDEX_1 +# define index_arch_Prefer_FSRM FEATURE_INDEX_1 #endif /* !__ASSEMBLER__ */ diff --git a/sysdeps/x86/cpu-tunables.c b/sysdeps/x86/cpu-tunables.c new file mode 100644 index 0000000000..69155a8f44 --- /dev/null +++ b/sysdeps/x86/cpu-tunables.c @@ -0,0 +1,385 @@ +/* x86 CPU feature tuning. + This file is part of the GNU C Library. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if HAVE_TUNABLES +# define TUNABLE_NAMESPACE tune +# include <stdbool.h> +# include <stdint.h> +# include <unistd.h> /* Get STDOUT_FILENO for _dl_printf. */ +# include <elf/dl-tunables.h> +# include <string.h> +# include <cpu-features.h> +# include <ldsodefs.h> + +/* We can't use IFUNC memcmp nor strlen in init_cpu_features from libc.a + since IFUNC must be set up by init_cpu_features. */ +# if defined USE_MULTIARCH && !defined SHARED +# ifdef __x86_64__ +# define DEFAULT_MEMCMP __memcmp_sse2 +# else +# define DEFAULT_MEMCMP __memcmp_ia32 +# endif +extern __typeof (memcmp) DEFAULT_MEMCMP; +# else +# define DEFAULT_MEMCMP memcmp +# endif + +# define CHECK_GLIBC_IFUNC_CPU_OFF(f, cpu_features, name, len) \ + _Static_assert (sizeof (#name) - 1 == len, #name " != " #len); \ + if (!DEFAULT_MEMCMP (f, #name, len)) \ + { \ + cpu_features->cpuid[index_cpu_##name].reg_##name \ + &= ~bit_cpu_##name; \ + break; \ + } + +/* Disable an ARCH feature NAME. We don't enable an ARCH feature which + isn't available. */ +# define CHECK_GLIBC_IFUNC_ARCH_OFF(f, cpu_features, name, len) \ + _Static_assert (sizeof (#name) - 1 == len, #name " != " #len); \ + if (!DEFAULT_MEMCMP (f, #name, len)) \ + { \ + cpu_features->feature[index_arch_##name] \ + &= ~bit_arch_##name; \ + break; \ + } + +/* Enable/disable an ARCH feature NAME. */ +# define CHECK_GLIBC_IFUNC_ARCH_BOTH(f, cpu_features, name, disable, \ + len) \ + _Static_assert (sizeof (#name) - 1 == len, #name " != " #len); \ + if (!DEFAULT_MEMCMP (f, #name, len)) \ + { \ + if (disable) \ + cpu_features->feature[index_arch_##name] \ + &= ~bit_arch_##name; \ + else \ + cpu_features->feature[index_arch_##name] \ + |= bit_arch_##name; \ + break; \ + } + +/* Enable/disable an ARCH feature NAME. Enable an ARCH feature only + if the ARCH feature NEED is also enabled. */ +# define CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH(f, cpu_features, name, \ + need, disable, len) \ + _Static_assert (sizeof (#name) - 1 == len, #name " != " #len); \ + if (!DEFAULT_MEMCMP (f, #name, len)) \ + { \ + if (disable) \ + cpu_features->feature[index_arch_##name] \ + &= ~bit_arch_##name; \ + else if (CPU_FEATURES_ARCH_P (cpu_features, need)) \ + cpu_features->feature[index_arch_##name] \ + |= bit_arch_##name; \ + break; \ + } + +/* Enable/disable an ARCH feature NAME. Enable an ARCH feature only + if the CPU feature NEED is also enabled. */ +# define CHECK_GLIBC_IFUNC_ARCH_NEED_CPU_BOTH(f, cpu_features, name, \ + need, disable, len) \ + _Static_assert (sizeof (#name) - 1 == len, #name " != " #len); \ + if (!DEFAULT_MEMCMP (f, #name, len)) \ + { \ + if (disable) \ + cpu_features->feature[index_arch_##name] \ + &= ~bit_arch_##name; \ + else if (CPU_FEATURES_CPU_P (cpu_features, need)) \ + cpu_features->feature[index_arch_##name] \ + |= bit_arch_##name; \ + break; \ + } + +attribute_hidden +void +TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp) +{ + /* The current IFUNC selection is based on microbenchmarks in glibc. + It should give the best performance for most workloads. But other + choices may have better performance for a particular workload or on + the hardware which wasn't available when the selection was made. + The environment variable: + + GLIBC_TUNABLES=glibc.tune.hwcaps=-xxx,yyy,-zzz,.... + + can be used to enable CPU/ARCH feature yyy, disable CPU/ARCH feature + yyy and zzz, where the feature name is case-sensitive and has to + match the ones in cpu-features.h. It can be used by glibc developers + to tune for a new processor or override the IFUNC selection to + improve performance for a particular workload. + + NOTE: the IFUNC selection may change over time. Please check all + multiarch implementations when experimenting. */ + + const char *p = valp->strval; + struct cpu_features *cpu_features = &GLRO(dl_x86_cpu_features); + size_t len; + + do + { + const char *c, *n; + bool disable; + size_t nl; + + for (c = p; *c != ','; c++) + if (*c == '\0') + break; + + len = c - p; + disable = *p == '-'; + if (disable) + { + n = p + 1; + nl = len - 1; + } + else + { + n = p; + nl = len; + } + switch (nl) + { + default: + break; + case 3: + if (disable) + { + CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX, 3); + CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, CX8, 3); + CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, FMA, 3); + CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, HTT, 3); + CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, IBT, 3); + CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, RTM, 3); + } + break; + case 4: + if (disable) + { + CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX2, 4); + CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, BMI1, 4); + CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, BMI2, 4); + CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, CMOV, 4); + CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, ERMS, 4); + CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, FMA4, 4); + CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, SSE2, 4); + CHECK_GLIBC_IFUNC_ARCH_OFF (n, cpu_features, I586, 4); + CHECK_GLIBC_IFUNC_ARCH_OFF (n, cpu_features, I686, 4); + } + break; + case 5: + if (disable) + { + CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, LZCNT, 5); + CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, MOVBE, 5); + CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, SHSTK, 5); + CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, SSSE3, 5); + } + break; + case 6: + if (disable) + { + CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, POPCNT, 6); + CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, SSE4_1, 6); + CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, SSE4_2, 6); + } + break; + case 7: + if (disable) + { + CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX512F, 7); + CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, OSXSAVE, 7); + } + break; + case 8: + if (disable) + { + CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX512CD, 8); + CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX512BW, 8); + CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX512DQ, 8); + CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX512ER, 8); + CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX512PF, 8); + CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX512VL, 8); + } + CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features, Slow_BSF, + disable, 8); + break; + case 10: + if (disable) + { + CHECK_GLIBC_IFUNC_ARCH_OFF (n, cpu_features, AVX_Usable, + 10); + CHECK_GLIBC_IFUNC_ARCH_OFF (n, cpu_features, FMA_Usable, + 10); + } + break; + case 11: + if (disable) + { + CHECK_GLIBC_IFUNC_ARCH_OFF (n, cpu_features, AVX2_Usable, + 11); + CHECK_GLIBC_IFUNC_ARCH_OFF (n, cpu_features, FMA4_Usable, + 11); + } + CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features, Prefer_ERMS, + disable, 11); + CHECK_GLIBC_IFUNC_ARCH_NEED_CPU_BOTH (n, cpu_features, + Slow_SSE4_2, SSE4_2, + disable, 11); + CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features, Prefer_FSRM, + disable, 11); + break; + case 13: + if (disable) + { + /* Update xsave_state_size to XSAVE state size. */ + cpu_features->xsave_state_size + = cpu_features->xsave_state_full_size; + CHECK_GLIBC_IFUNC_ARCH_OFF (n, cpu_features, + XSAVEC_Usable, 13); + } + break; + case 14: + if (disable) + { + CHECK_GLIBC_IFUNC_ARCH_OFF (n, cpu_features, + AVX512F_Usable, 14); + } + break; + case 15: + if (disable) + { + CHECK_GLIBC_IFUNC_ARCH_OFF (n, cpu_features, + AVX512DQ_Usable, 15); + } + CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features, Fast_Rep_String, + disable, 15); + break; + case 16: + { + CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH + (n, cpu_features, Prefer_No_AVX512, AVX512F_Usable, + disable, 16); + } + break; + case 18: + { + CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features, + Fast_Copy_Backward, disable, + 18); + } + break; + case 19: + { + CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features, + Fast_Unaligned_Load, disable, + 19); + CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features, + Fast_Unaligned_Copy, disable, + 19); + } + break; + case 20: + { + CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH + (n, cpu_features, Prefer_No_VZEROUPPER, AVX_Usable, + disable, 20); + } + break; + case 21: + { + CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features, + Prefer_MAP_32BIT_EXEC, disable, + 21); + } + break; + case 23: + { + CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH + (n, cpu_features, AVX_Fast_Unaligned_Load, AVX_Usable, + disable, 23); + } + break; + case 24: + { + CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH + (n, cpu_features, MathVec_Prefer_No_AVX512, + AVX512F_Usable, disable, 24); + } + break; + case 26: + { + CHECK_GLIBC_IFUNC_ARCH_NEED_CPU_BOTH + (n, cpu_features, Prefer_PMINUB_for_stringop, SSE2, + disable, 26); + } + break; + } + p += len + 1; + } + while (*p != '\0'); +} + +# if CET_ENABLED +# include <cet-tunables.h> + +attribute_hidden +void +TUNABLE_CALLBACK (set_x86_ibt) (tunable_val_t *valp) +{ + if (DEFAULT_MEMCMP (valp->strval, "on", sizeof ("on")) == 0) + { + GL(dl_x86_feature_1)[1] &= ~((1 << CET_MAX) - 1); + GL(dl_x86_feature_1)[1] |= CET_ALWAYS_ON; + } + else if (DEFAULT_MEMCMP (valp->strval, "off", sizeof ("off")) == 0) + { + GL(dl_x86_feature_1)[1] &= ~((1 << CET_MAX) - 1); + GL(dl_x86_feature_1)[1] |= CET_ALWAYS_OFF; + } + else if (DEFAULT_MEMCMP (valp->strval, "permissive", + sizeof ("permissive")) == 0) + { + GL(dl_x86_feature_1)[1] &= ~((1 << CET_MAX) - 1); + GL(dl_x86_feature_1)[1] |= CET_PERMISSIVE; + } +} + +attribute_hidden +void +TUNABLE_CALLBACK (set_x86_shstk) (tunable_val_t *valp) +{ + if (DEFAULT_MEMCMP (valp->strval, "on", sizeof ("on")) == 0) + { + GL(dl_x86_feature_1)[1] &= ~(((1 << CET_MAX) - 1) << CET_MAX); + GL(dl_x86_feature_1)[1] |= (CET_ALWAYS_ON << CET_MAX); + } + else if (DEFAULT_MEMCMP (valp->strval, "off", sizeof ("off")) == 0) + { + GL(dl_x86_feature_1)[1] &= ~(((1 << CET_MAX) - 1) << CET_MAX); + GL(dl_x86_feature_1)[1] |= (CET_ALWAYS_OFF << CET_MAX); + } + else if (DEFAULT_MEMCMP (valp->strval, "permissive", + sizeof ("permissive")) == 0) + { + GL(dl_x86_feature_1)[1] &= ~(((1 << CET_MAX) - 1) << CET_MAX); + GL(dl_x86_feature_1)[1] |= (CET_PERMISSIVE << CET_MAX); + } +} +# endif +#endif diff --git a/sysdeps/x86/dl-cet.c b/sysdeps/x86/dl-cet.c new file mode 100644 index 0000000000..b82ba14e75 --- /dev/null +++ b/sysdeps/x86/dl-cet.c @@ -0,0 +1,346 @@ +/* x86 CET initializers function. + Copyright (C) 2018 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <unistd.h> +#include <errno.h> +#include <libintl.h> +#include <ldsodefs.h> +#include <dl-cet.h> +#include <cet-tunables.h> + +/* GNU_PROPERTY_X86_FEATURE_1_IBT and GNU_PROPERTY_X86_FEATURE_1_SHSTK + are defined in <elf.h>, which are only available for C sources. + X86_FEATURE_1_IBT and X86_FEATURE_1_SHSTK are defined in <sysdep.h> + which are available for both C and asm sources. They must match. */ +#if GNU_PROPERTY_X86_FEATURE_1_IBT != X86_FEATURE_1_IBT +# error GNU_PROPERTY_X86_FEATURE_1_IBT != X86_FEATURE_1_IBT +#endif +#if GNU_PROPERTY_X86_FEATURE_1_SHSTK != X86_FEATURE_1_SHSTK +# error GNU_PROPERTY_X86_FEATURE_1_SHSTK != X86_FEATURE_1_SHSTK +#endif + +static int +dl_cet_mark_legacy_region (struct link_map *l) +{ + /* Mark PT_LOAD segments with PF_X in legacy code page bitmap. */ + size_t i, phnum = l->l_phnum; + const ElfW(Phdr) *phdr = l->l_phdr; +#ifdef __x86_64__ + typedef unsigned long long word_t; +#else + typedef unsigned long word_t; +#endif + unsigned int bits_to_set; + word_t mask_to_set; +#define BITS_PER_WORD (sizeof (word_t) * 8) +#define BITMAP_FIRST_WORD_MASK(start) \ + (~((word_t) 0) << ((start) & (BITS_PER_WORD - 1))) +#define BITMAP_LAST_WORD_MASK(nbits) \ + (~((word_t) 0) >> (-(nbits) & (BITS_PER_WORD - 1))) + + word_t *bitmap = (word_t *) GL(dl_x86_legacy_bitmap)[0]; + word_t bitmap_size = GL(dl_x86_legacy_bitmap)[1]; + word_t *p; + size_t page_size = GLRO(dl_pagesize); + + for (i = 0; i < phnum; i++) + if (phdr[i].p_type == PT_LOAD && (phdr[i].p_flags & PF_X)) + { + /* One bit in legacy bitmap represents a page. */ + ElfW(Addr) start = (phdr[i].p_vaddr + l->l_addr) / page_size; + ElfW(Addr) len = (phdr[i].p_memsz + page_size - 1) / page_size; + ElfW(Addr) end = start + len; + + if ((end / 8) > bitmap_size) + return -EINVAL; + + p = bitmap + (start / BITS_PER_WORD); + bits_to_set = BITS_PER_WORD - (start % BITS_PER_WORD); + mask_to_set = BITMAP_FIRST_WORD_MASK (start); + + while (len >= bits_to_set) + { + *p |= mask_to_set; + len -= bits_to_set; + bits_to_set = BITS_PER_WORD; + mask_to_set = ~((word_t) 0); + p++; + } + if (len) + { + mask_to_set &= BITMAP_LAST_WORD_MASK (end); + *p |= mask_to_set; + } + } + + return 0; +} + +/* Check if object M is compatible with CET. */ + +static void +dl_cet_check (struct link_map *m, const char *program) +{ + /* Check how IBT should be enabled. */ + unsigned int enable_ibt_type + = GL(dl_x86_feature_1)[1] & ((1 << CET_MAX) - 1); + /* Check how SHSTK should be enabled. */ + unsigned int enable_shstk_type + = ((GL(dl_x86_feature_1)[1] >> CET_MAX) & ((1 << CET_MAX) - 1)); + + /* No legacy object check if both IBT and SHSTK are always on. */ + if (enable_ibt_type == CET_ALWAYS_ON + && enable_shstk_type == CET_ALWAYS_ON) + return; + + /* Check if IBT is enabled by kernel. */ + bool ibt_enabled + = (GL(dl_x86_feature_1)[0] & GNU_PROPERTY_X86_FEATURE_1_IBT) != 0; + /* Check if SHSTK is enabled by kernel. */ + bool shstk_enabled + = (GL(dl_x86_feature_1)[0] & GNU_PROPERTY_X86_FEATURE_1_SHSTK) != 0; + + if (ibt_enabled || shstk_enabled) + { + struct link_map *l = NULL; + + /* Check if IBT and SHSTK are enabled in object. */ + bool enable_ibt = (ibt_enabled + && enable_ibt_type != CET_ALWAYS_OFF); + bool enable_shstk = (shstk_enabled + && enable_shstk_type != CET_ALWAYS_OFF); + if (program) + { + /* Enable IBT and SHSTK only if they are enabled in executable. + NB: IBT and SHSTK may be disabled by environment variable: + + GLIBC_TUNABLES=glibc.tune.hwcaps=-IBT,-SHSTK + */ + enable_ibt &= (HAS_CPU_FEATURE (IBT) + && (enable_ibt_type == CET_ALWAYS_ON + || (m->l_cet & lc_ibt) != 0)); + enable_shstk &= (HAS_CPU_FEATURE (SHSTK) + && (enable_shstk_type == CET_ALWAYS_ON + || (m->l_cet & lc_shstk) != 0)); + } + + /* ld.so is CET-enabled by kernel. But shared objects may not + support IBT nor SHSTK. */ + if (enable_ibt || enable_shstk) + { + int res; + unsigned int i; + unsigned int first_legacy, last_legacy; + bool need_legacy_bitmap = false; + + i = m->l_searchlist.r_nlist; + while (i-- > 0) + { + /* Check each shared object to see if IBT and SHSTK are + enabled. */ + l = m->l_initfini[i]; + + if (l->l_init_called) + continue; + +#ifdef SHARED + /* Skip CET check for ld.so since ld.so is CET-enabled. + CET will be disabled later if CET isn't enabled in + executable. */ + if (l == &GL(dl_rtld_map) + || l->l_real == &GL(dl_rtld_map) + || (program && l == m)) + continue; +#endif + + if (enable_ibt + && enable_ibt_type != CET_ALWAYS_ON + && !(l->l_cet & lc_ibt)) + { + /* Remember the first and last legacy objects. */ + if (!need_legacy_bitmap) + last_legacy = i; + first_legacy = i; + need_legacy_bitmap = true; + } + + /* SHSTK is enabled only if it is enabled in executable as + well as all shared objects. */ + enable_shstk &= (enable_shstk_type == CET_ALWAYS_ON + || (l->l_cet & lc_shstk) != 0); + } + + if (need_legacy_bitmap) + { + if (GL(dl_x86_legacy_bitmap)[0]) + { + /* Change legacy bitmap to writable. */ + if (__mprotect ((void *) GL(dl_x86_legacy_bitmap)[0], + GL(dl_x86_legacy_bitmap)[1], + PROT_READ | PROT_WRITE) < 0) + { +mprotect_failure: + if (program) + _dl_fatal_printf ("%s: mprotect legacy bitmap failed\n", + l->l_name); + else + _dl_signal_error (EINVAL, l->l_name, "dlopen", + N_("mprotect legacy bitmap failed")); + } + } + else + { + /* Allocate legacy bitmap. */ + int res = dl_cet_allocate_legacy_bitmap + (GL(dl_x86_legacy_bitmap)); + if (res != 0) + { + if (program) + _dl_fatal_printf ("%s: legacy bitmap isn't available\n", + l->l_name); + else + _dl_signal_error (EINVAL, l->l_name, "dlopen", + N_("legacy bitmap isn't available")); + } + } + + /* Put legacy shared objects in legacy bitmap. */ + for (i = first_legacy; i <= last_legacy; i++) + { + l = m->l_initfini[i]; + + if (l->l_init_called || (l->l_cet & lc_ibt)) + continue; + +#ifdef SHARED + if (l == &GL(dl_rtld_map) + || l->l_real == &GL(dl_rtld_map) + || (program && l == m)) + continue; +#endif + + /* If IBT is enabled in executable and IBT isn't enabled + in this shard object, mark PT_LOAD segments with PF_X + in legacy code page bitmap. */ + res = dl_cet_mark_legacy_region (l); + if (res != 0) + { + if (program) + _dl_fatal_printf ("%s: failed to mark legacy code region\n", + l->l_name); + else + _dl_signal_error (-res, l->l_name, "dlopen", + N_("failed to mark legacy code region")); + } + } + + /* Change legacy bitmap to read-only. */ + if (__mprotect ((void *) GL(dl_x86_legacy_bitmap)[0], + GL(dl_x86_legacy_bitmap)[1], PROT_READ) < 0) + goto mprotect_failure; + } + } + + bool cet_feature_changed = false; + + if (enable_ibt != ibt_enabled || enable_shstk != shstk_enabled) + { + if (!program + && enable_shstk_type != CET_PERMISSIVE) + { + /* When SHSTK is enabled, we can't dlopening a shared + object without SHSTK. */ + if (enable_shstk != shstk_enabled) + _dl_signal_error (EINVAL, l->l_name, "dlopen", + N_("shadow stack isn't enabled")); + return; + } + + /* Disable IBT and/or SHSTK if they are enabled by kernel, but + disabled in executable or shared objects. */ + unsigned int cet_feature = 0; + + /* Disable IBT only during program startup. */ + if (program && !enable_ibt) + cet_feature |= GNU_PROPERTY_X86_FEATURE_1_IBT; + if (!enable_shstk) + cet_feature |= GNU_PROPERTY_X86_FEATURE_1_SHSTK; + + int res = dl_cet_disable_cet (cet_feature); + if (res != 0) + { + if (program) + _dl_fatal_printf ("%s: can't disable CET\n", program); + else + _dl_signal_error (-res, l->l_name, "dlopen", + N_("can't disable CET")); + } + + /* Clear the disabled bits in dl_x86_feature_1. */ + GL(dl_x86_feature_1)[0] &= ~cet_feature; + + cet_feature_changed = true; + } + +#ifdef SHARED + if (program + && (!shstk_enabled + || enable_shstk_type != CET_PERMISSIVE) + && (ibt_enabled || shstk_enabled)) + { + /* Lock CET if IBT or SHSTK is enabled in executable. Don't + lock CET if SHSTK is enabled permissively. */ + int res = dl_cet_lock_cet (); + if (res != 0) + _dl_fatal_printf ("%s: can't lock CET\n", program); + + cet_feature_changed = true; + } +#endif + + if (cet_feature_changed) + { + unsigned int feature_1 = 0; + if (enable_ibt) + feature_1 |= GNU_PROPERTY_X86_FEATURE_1_IBT; + if (enable_shstk) + feature_1 |= GNU_PROPERTY_X86_FEATURE_1_SHSTK; + struct pthread *self = THREAD_SELF; + THREAD_SETMEM (self, header.feature_1, feature_1); + } + } +} + +void +_dl_cet_open_check (struct link_map *l) +{ + dl_cet_check (l, NULL); +} + +#ifdef SHARED + +# ifndef LINKAGE +# define LINKAGE +# endif + +LINKAGE +void +_dl_cet_check (struct link_map *main_map, const char *program) +{ + dl_cet_check (main_map, program); +} +#endif /* SHARED */ diff --git a/sysdeps/x86/dl-get-cpu-features.c b/sysdeps/x86/dl-get-cpu-features.c index 839c2a4bba..49593f19c6 100644 --- a/sysdeps/x86/dl-get-cpu-features.c +++ b/sysdeps/x86/dl-get-cpu-features.c @@ -1,5 +1,5 @@ /* This file is part of the GNU C Library. - Copyright (C) 2015-2016 Free Software Foundation, Inc. + Copyright (C) 2015-2018 Free Software Foundation, Inc. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public diff --git a/sysdeps/x86/dl-hwcap.h b/sysdeps/x86/dl-hwcap.h new file mode 100644 index 0000000000..f5e9d542ca --- /dev/null +++ b/sysdeps/x86/dl-hwcap.h @@ -0,0 +1,77 @@ +/* x86 version of hardware capability information handling macros. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _DL_HWCAP_H +#define _DL_HWCAP_H + +#if IS_IN (ldconfig) +/* Since ldconfig processes both i386 and x86-64 libraries, it needs + to cover all platforms and hardware capabilities. */ +# define HWCAP_PLATFORMS_START 0 +# define HWCAP_PLATFORMS_COUNT 4 +# define HWCAP_START 0 +# define HWCAP_COUNT 3 +# define HWCAP_IMPORTANT \ + (HWCAP_X86_SSE2 | HWCAP_X86_64 | HWCAP_X86_AVX512_1) +#elif defined __x86_64__ +/* For 64 bit, only cover x86-64 platforms and capabilities. */ +# define HWCAP_PLATFORMS_START 2 +# define HWCAP_PLATFORMS_COUNT 4 +# define HWCAP_START 1 +# define HWCAP_COUNT 3 +# define HWCAP_IMPORTANT (HWCAP_X86_64 | HWCAP_X86_AVX512_1) +#else +/* For 32 bit, only cover i586, i686 and SSE2. */ +# define HWCAP_PLATFORMS_START 0 +# define HWCAP_PLATFORMS_COUNT 2 +# define HWCAP_START 0 +# define HWCAP_COUNT 1 +# define HWCAP_IMPORTANT (HWCAP_X86_SSE2) +#endif + +enum +{ + HWCAP_X86_SSE2 = 1 << 0, + HWCAP_X86_64 = 1 << 1, + HWCAP_X86_AVX512_1 = 1 << 2 +}; + +static inline const char * +__attribute__ ((unused)) +_dl_hwcap_string (int idx) +{ + return GLRO(dl_x86_hwcap_flags)[idx]; +}; + +static inline int +__attribute__ ((unused, always_inline)) +_dl_string_hwcap (const char *str) +{ + int i; + + for (i = HWCAP_START; i < HWCAP_COUNT; i++) + { + if (strcmp (str, GLRO(dl_x86_hwcap_flags)[i]) == 0) + return i; + } + return -1; +}; + +/* We cannot provide a general printing function. */ +#define _dl_procinfo(type, word) -1 + +#endif /* dl-hwcap.h */ diff --git a/sysdeps/x86/dl-procinfo.c b/sysdeps/x86/dl-procinfo.c new file mode 100644 index 0000000000..4b0538ede8 --- /dev/null +++ b/sysdeps/x86/dl-procinfo.c @@ -0,0 +1,88 @@ +/* Data for x86 version of processor capability information. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This information must be kept in sync with the _DL_HWCAP_COUNT, + HWCAP_PLATFORMS_START and HWCAP_PLATFORMS_COUNT definitions in + dl-hwcap.h. + + If anything should be added here check whether the size of each string + is still ok with the given array size. + + All the #ifdefs in the definitions are quite irritating but + necessary if we want to avoid duplicating the information. There + are three different modes: + + - PROCINFO_DECL is defined. This means we are only interested in + declarations. + + - PROCINFO_DECL is not defined: + + + if SHARED is defined the file is included in an array + initializer. The .element = { ... } syntax is needed. + + + if SHARED is not defined a normal array initialization is + needed. + */ + +#if !IS_IN (ldconfig) +# if !defined PROCINFO_DECL && defined SHARED + ._dl_x86_cpu_features +# else +PROCINFO_CLASS struct cpu_features _dl_x86_cpu_features +# endif +# ifndef PROCINFO_DECL += { } +# endif +# if !defined SHARED || defined PROCINFO_DECL +; +# else +, +# endif +#endif + +#if !defined PROCINFO_DECL && defined SHARED + ._dl_x86_hwcap_flags +#else +PROCINFO_CLASS const char _dl_x86_hwcap_flags[3][9] +#endif +#ifndef PROCINFO_DECL += { + "sse2", "x86_64", "avx512_1" + } +#endif +#if !defined SHARED || defined PROCINFO_DECL +; +#else +, +#endif + +#if !defined PROCINFO_DECL && defined SHARED + ._dl_x86_platforms +#else +PROCINFO_CLASS const char _dl_x86_platforms[4][9] +#endif +#ifndef PROCINFO_DECL += { + "i586", "i686", "haswell", "xeon_phi" + } +#endif +#if !defined SHARED || defined PROCINFO_DECL +; +#else +, +#endif diff --git a/sysdeps/x86/dl-procinfo.h b/sysdeps/x86/dl-procinfo.h new file mode 100644 index 0000000000..55cafc26e2 --- /dev/null +++ b/sysdeps/x86/dl-procinfo.h @@ -0,0 +1,48 @@ +/* x86 version of processor capability information handling macros. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _DL_PROCINFO_H +#define _DL_PROCINFO_H 1 +#include <ldsodefs.h> +#include <dl-hwcap.h> + +#define _DL_HWCAP_COUNT HWCAP_COUNT +#define _DL_PLATFORMS_COUNT HWCAP_PLATFORMS_COUNT + +/* Start at 48 to reserve spaces for hardware capabilities. */ +#define _DL_FIRST_PLATFORM 48 +/* Mask to filter out platforms. */ +#define _DL_HWCAP_PLATFORM (((1ULL << _DL_PLATFORMS_COUNT) - 1) \ + << _DL_FIRST_PLATFORM) + +static inline int +__attribute__ ((unused, always_inline)) +_dl_string_platform (const char *str) +{ + int i; + + if (str != NULL) + for (i = HWCAP_PLATFORMS_START; i < HWCAP_PLATFORMS_COUNT; ++i) + { + if (strcmp (str, GLRO(dl_x86_platforms)[i]) == 0) + return _DL_FIRST_PLATFORM + i; + } + return -1; +}; + +#endif /* dl-procinfo.h */ diff --git a/sysdeps/x86/dl-procruntime.c b/sysdeps/x86/dl-procruntime.c new file mode 100644 index 0000000000..eddbde6a31 --- /dev/null +++ b/sysdeps/x86/dl-procruntime.c @@ -0,0 +1,68 @@ +/* Data for processor runtime information. x86 version. + Copyright (C) 2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This information must be kept in sync with the _DL_HWCAP_COUNT, + HWCAP_PLATFORMS_START and HWCAP_PLATFORMS_COUNT definitions in + dl-hwcap.h. + + If anything should be added here check whether the size of each string + is still ok with the given array size. + + All the #ifdefs in the definitions are quite irritating but + necessary if we want to avoid duplicating the information. There + are three different modes: + + - PROCINFO_DECL is defined. This means we are only interested in + declarations. + + - PROCINFO_DECL is not defined: + + + if SHARED is defined the file is included in an array + initializer. The .element = { ... } syntax is needed. + + + if SHARED is not defined a normal array initialization is + needed. + */ + +#ifndef PROCINFO_CLASS +# define PROCINFO_CLASS +#endif + +#if !IS_IN (ldconfig) +# if !defined PROCINFO_DECL && defined SHARED + ._dl_x86_feature_1 +# else +PROCINFO_CLASS unsigned int _dl_x86_feature_1[2] +# endif +# if !defined SHARED || defined PROCINFO_DECL +; +# else +, +# endif + +# if !defined PROCINFO_DECL && defined SHARED + ._dl_x86_legacy_bitmap +# else +PROCINFO_CLASS unsigned long _dl_x86_legacy_bitmap[2] +# endif +# if !defined SHARED || defined PROCINFO_DECL +; +# else +, +# endif +#endif diff --git a/sysdeps/x86/dl-prop.h b/sysdeps/x86/dl-prop.h new file mode 100644 index 0000000000..26c3131ac5 --- /dev/null +++ b/sysdeps/x86/dl-prop.h @@ -0,0 +1,164 @@ +/* Support for GNU properties. x86 version. + Copyright (C) 2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _DL_PROP_H +#define _DL_PROP_H + +#include <not-cancel.h> + +extern void _dl_cet_check (struct link_map *, const char *) + attribute_hidden; +extern void _dl_cet_open_check (struct link_map *) + attribute_hidden; + +static inline void __attribute__ ((always_inline)) +_rtld_main_check (struct link_map *m, const char *program) +{ +#if CET_ENABLED + _dl_cet_check (m, program); +#endif +} + +static inline void __attribute__ ((always_inline)) +_dl_open_check (struct link_map *m) +{ +#if CET_ENABLED + _dl_cet_open_check (m); +#endif +} + +static inline void __attribute__ ((unused)) +_dl_process_cet_property_note (struct link_map *l, + const ElfW(Nhdr) *note, + const ElfW(Addr) size, + const ElfW(Addr) align) +{ +#if CET_ENABLED + /* The NT_GNU_PROPERTY_TYPE_0 note must be aliged to 4 bytes in + 32-bit objects and to 8 bytes in 64-bit objects. Skip notes + with incorrect alignment. */ + if (align != (__ELF_NATIVE_CLASS / 8)) + return; + + const ElfW(Addr) start = (ElfW(Addr)) note; + + while ((ElfW(Addr)) (note + 1) - start < size) + { + /* Find the NT_GNU_PROPERTY_TYPE_0 note. */ + if (note->n_namesz == 4 + && note->n_type == NT_GNU_PROPERTY_TYPE_0 + && memcmp (note + 1, "GNU", 4) == 0) + { + /* Check for invalid property. */ + if (note->n_descsz < 8 + || (note->n_descsz % sizeof (ElfW(Addr))) != 0) + break; + + /* Start and end of property array. */ + unsigned char *ptr = (unsigned char *) (note + 1) + 4; + unsigned char *ptr_end = ptr + note->n_descsz; + + do + { + unsigned int type = *(unsigned int *) ptr; + unsigned int datasz = *(unsigned int *) (ptr + 4); + + ptr += 8; + if ((ptr + datasz) > ptr_end) + break; + + if (type == GNU_PROPERTY_X86_FEATURE_1_AND) + { + /* The size of GNU_PROPERTY_X86_FEATURE_1_AND is 4 + bytes. When seeing GNU_PROPERTY_X86_FEATURE_1_AND, + we stop the search regardless if its size is correct + or not. There is no point to continue if this note + is ill-formed. */ + if (datasz == 4) + { + unsigned int feature_1 = *(unsigned int *) ptr; + if ((feature_1 & GNU_PROPERTY_X86_FEATURE_1_IBT)) + l->l_cet |= lc_ibt; + if ((feature_1 & GNU_PROPERTY_X86_FEATURE_1_SHSTK)) + l->l_cet |= lc_shstk; + } + return; + } + + /* Check the next property item. */ + ptr += ALIGN_UP (datasz, sizeof (ElfW(Addr))); + } + while ((ptr_end - ptr) >= 8); + } + + /* NB: Note sections like .note.ABI-tag and .note.gnu.build-id are + aligned to 4 bytes in 64-bit ELF objects. */ + note = ((const void *) note + + ELF_NOTE_NEXT_OFFSET (note->n_namesz, note->n_descsz, + align)); + } +#endif +} + +#ifdef FILEBUF_SIZE +static inline int __attribute__ ((unused)) +_dl_process_pt_note (struct link_map *l, const ElfW(Phdr) *ph, + int fd, struct filebuf *fbp) +{ +# if CET_ENABLED + const ElfW(Nhdr) *note; + ElfW(Nhdr) *note_malloced = NULL; + ElfW(Addr) size = ph->p_filesz; + + if (ph->p_offset + size <= (size_t) fbp->len) + note = (const void *) (fbp->buf + ph->p_offset); + else + { + if (size < __MAX_ALLOCA_CUTOFF) + note = alloca (size); + else + { + note_malloced = malloc (size); + note = note_malloced; + } + __lseek (fd, ph->p_offset, SEEK_SET); + if (__read_nocancel (fd, (void *) note, size) != size) + { + if (note_malloced) + free (note_malloced); + return -1; + } + } + + _dl_process_cet_property_note (l, note, size, ph->p_align); + if (note_malloced) + free (note_malloced); +# endif + return 0; +} +#endif + +static inline int __attribute__ ((unused)) +_rtld_process_pt_note (struct link_map *l, const ElfW(Phdr) *ph) +{ + const ElfW(Nhdr) *note = (const void *) (ph->p_vaddr + l->l_addr); + _dl_process_cet_property_note (l, note, ph->p_memsz, ph->p_align); + return 0; +} + +#endif /* _DL_PROP_H */ diff --git a/sysdeps/x86/dl-tunables.list b/sysdeps/x86/dl-tunables.list new file mode 100644 index 0000000000..73886b1352 --- /dev/null +++ b/sysdeps/x86/dl-tunables.list @@ -0,0 +1,40 @@ +# x86 specific tunables. +# Copyright (C) 2017-2018 Free Software Foundation, Inc. +# This file is part of the GNU C Library. + +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# <http://www.gnu.org/licenses/>. + +glibc { + tune { + hwcaps { + type: STRING + } + x86_ibt { + type: STRING + } + x86_shstk { + type: STRING + } + x86_non_temporal_threshold { + type: SIZE_T + } + x86_data_cache_size { + type: SIZE_T + } + x86_shared_cache_size { + type: SIZE_T + } + } +} diff --git a/sysdeps/x86/elide.h b/sysdeps/x86/elide.h index 8691e6673d..8d5589902f 100644 --- a/sysdeps/x86/elide.h +++ b/sysdeps/x86/elide.h @@ -1,5 +1,5 @@ /* elide.h: Generic lock elision support. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -20,8 +20,8 @@ #include <hle.h> #include <elision-conf.h> +#include <atomic.h> -#define ACCESS_ONCE(x) (* (volatile typeof(x) *) &(x)) /* Adapt elision with ADAPT_COUNT and STATUS and decide retries. */ @@ -35,28 +35,35 @@ elision_adapt(signed char *adapt_count, unsigned int status) { /* Right now we skip here. Better would be to wait a bit and retry. This likely needs some spinning. Be careful - to avoid writing the lock. */ - if (*adapt_count != __elision_aconf.skip_lock_busy) - ACCESS_ONCE (*adapt_count) = __elision_aconf.skip_lock_busy; + to avoid writing the lock. + Using relaxed MO and separate atomic accesses is sufficient because + adapt_count is just a hint. */ + if (atomic_load_relaxed (adapt_count) != __elision_aconf.skip_lock_busy) + atomic_store_relaxed (adapt_count, __elision_aconf.skip_lock_busy); } /* Internal abort. There is no chance for retry. Use the normal locking and next time use lock. - Be careful to avoid writing to the lock. */ - else if (*adapt_count != __elision_aconf.skip_lock_internal_abort) - ACCESS_ONCE (*adapt_count) = __elision_aconf.skip_lock_internal_abort; + Be careful to avoid writing to the lock. See above for MO. */ + else if (atomic_load_relaxed (adapt_count) + != __elision_aconf.skip_lock_internal_abort) + atomic_store_relaxed (adapt_count, + __elision_aconf.skip_lock_internal_abort); return true; } /* is_lock_free must be executed inside the transaction */ /* Returns true if lock defined by IS_LOCK_FREE was elided. - ADAPT_COUNT is a pointer to per-lock state variable. */ + ADAPT_COUNT is a per-lock state variable; it must be accessed atomically + to avoid data races but is just a hint, so using relaxed MO and separate + atomic loads and stores instead of atomic read-modify-write operations is + sufficient. */ #define ELIDE_LOCK(adapt_count, is_lock_free) \ ({ \ int ret = 0; \ \ - if ((adapt_count) <= 0) \ + if (atomic_load_relaxed (&(adapt_count)) <= 0) \ { \ for (int i = __elision_aconf.retry_try_xbegin; i > 0; i--) \ { \ @@ -75,12 +82,13 @@ elision_adapt(signed char *adapt_count, unsigned int status) } \ } \ else \ - (adapt_count)--; /* missing updates ok */ \ + atomic_store_relaxed (&(adapt_count), \ + atomic_load_relaxed (&(adapt_count)) - 1); \ ret; \ }) /* Returns true if lock defined by IS_LOCK_FREE was try-elided. - ADAPT_COUNT is a pointer to per-lock state variable. */ + ADAPT_COUNT is a per-lock state variable. */ #define ELIDE_TRYLOCK(adapt_count, is_lock_free, write) ({ \ int ret = 0; \ diff --git a/sysdeps/x86/float128-abi.h b/sysdeps/x86/float128-abi.h new file mode 100644 index 0000000000..6b954cc673 --- /dev/null +++ b/sysdeps/x86/float128-abi.h @@ -0,0 +1,2 @@ +/* ABI version for _Float128 ABI introduction. */ +#define FLOAT128_VERSION GLIBC_2.26 diff --git a/sysdeps/x86/fpu/Makefile b/sysdeps/x86/fpu/Makefile index b561995658..600e42c3db 100644 --- a/sysdeps/x86/fpu/Makefile +++ b/sysdeps/x86/fpu/Makefile @@ -1,7 +1,14 @@ ifeq ($(subdir),math) +# sqrtf128 requires soft-fp. +CPPFLAGS += -I../soft-fp + libm-support += powl_helper -tests += test-fenv-sse test-fenv-clear-sse test-fenv-x87 test-fenv-sse-2 +tests += test-fenv-sse test-fenv-clear-sse test-fenv-x87 test-fenv-sse-2 \ + test-flt-eval-method-387 test-flt-eval-method-sse CFLAGS-test-fenv-sse.c += -msse2 -mfpmath=sse CFLAGS-test-fenv-clear-sse.c += -msse2 -mfpmath=sse CFLAGS-test-fenv-sse-2.c += -msse2 -mfpmath=sse +CFLAGS-test-flt-eval-method-387.c += -fexcess-precision=standard -mfpmath=387 +CFLAGS-test-flt-eval-method-sse.c += -fexcess-precision=standard -msse2 \ + -mfpmath=sse endif diff --git a/sysdeps/x86/fpu/bits/fenv.h b/sysdeps/x86/fpu/bits/fenv.h index 8c8503bd7e..4103982d8c 100644 --- a/sysdeps/x86/fpu/bits/fenv.h +++ b/sysdeps/x86/fpu/bits/fenv.h @@ -1,4 +1,4 @@ -/* Copyright (C) 1997-2016 Free Software Foundation, Inc. +/* Copyright (C) 1997-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -101,12 +101,28 @@ fenv_t; # define FE_NOMASK_ENV ((const fenv_t *) -2) #endif +#if __GLIBC_USE (IEC_60559_BFP_EXT) +/* Type representing floating-point control modes. */ +typedef struct + { + unsigned short int __control_word; + unsigned short int __glibc_reserved; + unsigned int __mxcsr; + } +femode_t; + +/* Default floating-point control modes. */ +# define FE_DFL_MODE ((const femode_t *) -1L) +#endif + #ifdef __USE_EXTERN_INLINES __BEGIN_DECLS /* Optimized versions. */ +#ifndef _LIBC extern int __REDIRECT_NTH (__feraiseexcept_renamed, (int), feraiseexcept); +#endif __extern_always_inline void __NTH (__feraiseexcept_invalid_divbyzero (int __excepts)) { diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86/fpu/bits/math-vector.h index ca43cf4b9e..3d229d8705 100644 --- a/sysdeps/x86/fpu/bits/math-vector.h +++ b/sysdeps/x86/fpu/bits/math-vector.h @@ -1,5 +1,5 @@ /* Platform-specific SIMD declarations of math functions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86/fpu/bits/mathinline.h b/sysdeps/x86/fpu/bits/mathinline.h index 0ff1aa4cec..91ece8dfb8 100644 --- a/sysdeps/x86/fpu/bits/mathinline.h +++ b/sysdeps/x86/fpu/bits/mathinline.h @@ -1,5 +1,5 @@ /* Inline math functions for i387 and SSE. - Copyright (C) 1995-2016 Free Software Foundation, Inc. + Copyright (C) 1995-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -26,364 +26,6 @@ # define __MATH_INLINE __extern_always_inline #endif - -#if defined __USE_ISOC99 && defined __GNUC__ && __GNUC__ >= 2 -/* GCC 2.97 and up have builtins that actually can be used. */ -# if !__GNUC_PREREQ (2,97) -/* ISO C99 defines some macros to perform unordered comparisons. The - ix87 FPU supports this with special opcodes and we should use them. - These must not be inline functions since we have to be able to handle - all floating-point types. */ -# undef isgreater -# undef isgreaterequal -# undef isless -# undef islessequal -# undef islessgreater -# undef isunordered -# ifdef __i686__ -/* For the PentiumPro and more recent processors we can provide - better code. */ -# define isgreater(x, y) \ - ({ register char __result; \ - __asm__ ("fucomip %%st(1), %%st; seta %%al" \ - : "=a" (__result) : "u" (y), "t" (x) : "cc", "st"); \ - __result; }) -# define isgreaterequal(x, y) \ - ({ register char __result; \ - __asm__ ("fucomip %%st(1), %%st; setae %%al" \ - : "=a" (__result) : "u" (y), "t" (x) : "cc", "st"); \ - __result; }) - -# define isless(x, y) \ - ({ register char __result; \ - __asm__ ("fucomip %%st(1), %%st; seta %%al" \ - : "=a" (__result) : "u" (x), "t" (y) : "cc", "st"); \ - __result; }) - -# define islessequal(x, y) \ - ({ register char __result; \ - __asm__ ("fucomip %%st(1), %%st; setae %%al" \ - : "=a" (__result) : "u" (x), "t" (y) : "cc", "st"); \ - __result; }) - -# define islessgreater(x, y) \ - ({ register char __result; \ - __asm__ ("fucomip %%st(1), %%st; setne %%al" \ - : "=a" (__result) : "u" (y), "t" (x) : "cc", "st"); \ - __result; }) - -# define isunordered(x, y) \ - ({ register char __result; \ - __asm__ ("fucomip %%st(1), %%st; setp %%al" \ - : "=a" (__result) : "u" (y), "t" (x) : "cc", "st"); \ - __result; }) -# else -/* This is the dumb, portable code for i386 and above. */ -# define isgreater(x, y) \ - ({ register char __result; \ - __asm__ ("fucompp; fnstsw; testb $0x45, %%ah; setz %%al" \ - : "=a" (__result) : "u" (y), "t" (x) : "cc", "st", "st(1)"); \ - __result; }) - -# define isgreaterequal(x, y) \ - ({ register char __result; \ - __asm__ ("fucompp; fnstsw; testb $0x05, %%ah; setz %%al" \ - : "=a" (__result) : "u" (y), "t" (x) : "cc", "st", "st(1)"); \ - __result; }) - -# define isless(x, y) \ - ({ register char __result; \ - __asm__ ("fucompp; fnstsw; testb $0x45, %%ah; setz %%al" \ - : "=a" (__result) : "u" (x), "t" (y) : "cc", "st", "st(1)"); \ - __result; }) - -# define islessequal(x, y) \ - ({ register char __result; \ - __asm__ ("fucompp; fnstsw; testb $0x05, %%ah; setz %%al" \ - : "=a" (__result) : "u" (x), "t" (y) : "cc", "st", "st(1)"); \ - __result; }) - -# define islessgreater(x, y) \ - ({ register char __result; \ - __asm__ ("fucompp; fnstsw; testb $0x44, %%ah; setz %%al" \ - : "=a" (__result) : "u" (y), "t" (x) : "cc", "st", "st(1)"); \ - __result; }) - -# define isunordered(x, y) \ - ({ register char __result; \ - __asm__ ("fucompp; fnstsw; sahf; setp %%al" \ - : "=a" (__result) : "u" (y), "t" (x) : "cc", "st", "st(1)"); \ - __result; }) -# endif /* __i686__ */ -# endif /* GCC 2.97 */ - -/* The gcc, version 2.7 or below, has problems with all this inlining - code. So disable it for this version of the compiler. */ -# if __GNUC_PREREQ (2, 8) -__BEGIN_NAMESPACE_C99 - -/* Test for negative number. Used in the signbit() macro. */ -__MATH_INLINE int -__NTH (__signbitf (float __x)) -{ -# ifdef __SSE2_MATH__ - int __m; - __asm ("pmovmskb %1, %0" : "=r" (__m) : "x" (__x)); - return (__m & 0x8) != 0; -# else - __extension__ union { float __f; int __i; } __u = { __f: __x }; - return __u.__i < 0; -# endif -} -__MATH_INLINE int -__NTH (__signbit (double __x)) -{ -# ifdef __SSE2_MATH__ - int __m; - __asm ("pmovmskb %1, %0" : "=r" (__m) : "x" (__x)); - return (__m & 0x80) != 0; -# else - __extension__ union { double __d; int __i[2]; } __u = { __d: __x }; - return __u.__i[1] < 0; -# endif -} -__MATH_INLINE int -__NTH (__signbitl (long double __x)) -{ - __extension__ union { long double __l; int __i[3]; } __u = { __l: __x }; - return (__u.__i[2] & 0x8000) != 0; -} - -__END_NAMESPACE_C99 -# endif -#endif - - -/* The gcc, version 2.7 or below, has problems with all this inlining - code. So disable it for this version of the compiler. */ -#if __GNUC_PREREQ (2, 8) -# if !__GNUC_PREREQ (3, 4) && !defined __NO_MATH_INLINES \ - && defined __OPTIMIZE__ -/* GCC 3.4 introduced builtins for all functions below, so - there's no need to define any of these inline functions. */ - -# ifdef __USE_ISOC99 -__BEGIN_NAMESPACE_C99 - -/* Round to nearest integer. */ -# ifdef __SSE_MATH__ -__MATH_INLINE long int -__NTH (lrintf (float __x)) -{ - long int __res; - /* Mark as volatile since the result is dependent on the state of - the SSE control register (the rounding mode). Otherwise GCC might - remove these assembler instructions since it does not know about - the rounding mode change and cannot currently be told. */ - __asm __volatile__ ("cvtss2si %1, %0" : "=r" (__res) : "xm" (__x)); - return __res; -} -# endif -# ifdef __SSE2_MATH__ -__MATH_INLINE long int -__NTH (lrint (double __x)) -{ - long int __res; - /* Mark as volatile since the result is dependent on the state of - the SSE control register (the rounding mode). Otherwise GCC might - remove these assembler instructions since it does not know about - the rounding mode change and cannot currently be told. */ - __asm __volatile__ ("cvtsd2si %1, %0" : "=r" (__res) : "xm" (__x)); - return __res; -} -# endif -# ifdef __x86_64__ -__extension__ -__MATH_INLINE long long int -__NTH (llrintf (float __x)) -{ - long long int __res; - /* Mark as volatile since the result is dependent on the state of - the SSE control register (the rounding mode). Otherwise GCC might - remove these assembler instructions since it does not know about - the rounding mode change and cannot currently be told. */ - __asm __volatile__ ("cvtss2si %1, %0" : "=r" (__res) : "xm" (__x)); - return __res; -} -__extension__ -__MATH_INLINE long long int -__NTH (llrint (double __x)) -{ - long long int __res; - /* Mark as volatile since the result is dependent on the state of - the SSE control register (the rounding mode). Otherwise GCC might - remove these assembler instructions since it does not know about - the rounding mode change and cannot currently be told. */ - __asm __volatile__ ("cvtsd2si %1, %0" : "=r" (__res) : "xm" (__x)); - return __res; -} -# endif - -# if defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ > 0 \ - && defined __SSE2_MATH__ -/* Determine maximum of two values. */ -__MATH_INLINE float -__NTH (fmaxf (float __x, float __y)) -{ -# ifdef __AVX__ - float __res; - __asm ("vmaxss %2, %1, %0" : "=x" (__res) : "x" (x), "xm" (__y)); - return __res; -# else - __asm ("maxss %1, %0" : "+x" (__x) : "xm" (__y)); - return __x; -# endif -} -__MATH_INLINE double -__NTH (fmax (double __x, double __y)) -{ -# ifdef __AVX__ - float __res; - __asm ("vmaxsd %2, %1, %0" : "=x" (__res) : "x" (x), "xm" (__y)); - return __res; -# else - __asm ("maxsd %1, %0" : "+x" (__x) : "xm" (__y)); - return __x; -# endif -} - -/* Determine minimum of two values. */ -__MATH_INLINE float -__NTH (fminf (float __x, float __y)) -{ -# ifdef __AVX__ - float __res; - __asm ("vminss %2, %1, %0" : "=x" (__res) : "x" (x), "xm" (__y)); - return __res; -# else - __asm ("minss %1, %0" : "+x" (__x) : "xm" (__y)); - return __x; -# endif -} -__MATH_INLINE double -__NTH (fmin (double __x, double __y)) -{ -# ifdef __AVX__ - float __res; - __asm ("vminsd %2, %1, %0" : "=x" (__res) : "x" (x), "xm" (__y)); - return __res; -# else - __asm ("minsd %1, %0" : "+x" (__x) : "xm" (__y)); - return __x; -# endif -} -# endif - -__END_NAMESPACE_C99 -# endif - -# if defined __SSE4_1__ && defined __SSE2_MATH__ -# if defined __USE_XOPEN_EXTENDED || defined __USE_ISOC99 -__BEGIN_NAMESPACE_C99 - -/* Round to nearest integer. */ -__MATH_INLINE double -__NTH (rint (double __x)) -{ - double __res; - /* Mark as volatile since the result is dependent on the state of - the SSE control register (the rounding mode). Otherwise GCC might - remove these assembler instructions since it does not know about - the rounding mode change and cannot currently be told. */ - __asm __volatile__ ("roundsd $4, %1, %0" : "=x" (__res) : "xm" (__x)); - return __res; -} -__MATH_INLINE float -__NTH (rintf (float __x)) -{ - float __res; - /* Mark as volatile since the result is dependent on the state of - the SSE control register (the rounding mode). Otherwise GCC might - remove these assembler instructions since it does not know about - the rounding mode change and cannot currently be told. */ - __asm __volatile__ ("roundss $4, %1, %0" : "=x" (__res) : "xm" (__x)); - return __res; -} - -# ifdef __USE_ISOC99 -/* Round to nearest integer without raising inexact exception. */ -__MATH_INLINE double -__NTH (nearbyint (double __x)) -{ - double __res; - /* Mark as volatile since the result is dependent on the state of - the SSE control register (the rounding mode). Otherwise GCC might - remove these assembler instructions since it does not know about - the rounding mode change and cannot currently be told. */ - __asm __volatile__ ("roundsd $0xc, %1, %0" : "=x" (__res) : "xm" (__x)); - return __res; -} -__MATH_INLINE float -__NTH (nearbyintf (float __x)) -{ - float __res; - /* Mark as volatile since the result is dependent on the state of - the SSE control register (the rounding mode). Otherwise GCC might - remove these assembler instructions since it does not know about - the rounding mode change and cannot currently be told. */ - __asm __volatile__ ("roundss $0xc, %1, %0" : "=x" (__res) : "xm" (__x)); - return __res; -} -# endif - -__END_NAMESPACE_C99 -# endif - -__BEGIN_NAMESPACE_STD -/* Smallest integral value not less than X. */ -__MATH_INLINE double -__NTH (ceil (double __x)) -{ - double __res; - __asm ("roundsd $2, %1, %0" : "=x" (__res) : "xm" (__x)); - return __res; -} -__END_NAMESPACE_STD - -__BEGIN_NAMESPACE_C99 -__MATH_INLINE float -__NTH (ceilf (float __x)) -{ - float __res; - __asm ("roundss $2, %1, %0" : "=x" (__res) : "xm" (__x)); - return __res; -} -__END_NAMESPACE_C99 - -__BEGIN_NAMESPACE_STD -/* Largest integer not greater than X. */ -__MATH_INLINE double -__NTH (floor (double __x)) -{ - double __res; - __asm ("roundsd $1, %1, %0" : "=x" (__res) : "xm" (__x)); - return __res; -} -__END_NAMESPACE_STD - -__BEGIN_NAMESPACE_C99 -__MATH_INLINE float -__NTH (floorf (float __x)) -{ - float __res; - __asm ("roundss $1, %1, %0" : "=x" (__res) : "xm" (__x)); - return __res; -} -__END_NAMESPACE_C99 -# endif -# endif -#endif - /* Disable x87 inlines when -fpmath=sse is passed and also when we're building on x86_64. Older gcc (gcc-3.2 for example) does not define __SSE2_MATH__ for x86_64. */ @@ -532,50 +174,6 @@ __END_NAMESPACE_C99 /* __FAST_MATH__ is defined by gcc -ffast-math. */ # ifdef __FAST_MATH__ -# ifdef __USE_GNU -# define __sincos_code \ - register long double __cosr; \ - register long double __sinr; \ - register unsigned int __swtmp; \ - __asm __volatile__ \ - ("fsincos\n\t" \ - "fnstsw %w2\n\t" \ - "testl $0x400, %2\n\t" \ - "jz 1f\n\t" \ - "fldpi\n\t" \ - "fadd %%st(0)\n\t" \ - "fxch %%st(1)\n\t" \ - "2: fprem1\n\t" \ - "fnstsw %w2\n\t" \ - "testl $0x400, %2\n\t" \ - "jnz 2b\n\t" \ - "fstp %%st(1)\n\t" \ - "fsincos\n\t" \ - "1:" \ - : "=t" (__cosr), "=u" (__sinr), "=a" (__swtmp) : "0" (__x)); \ - *__sinx = __sinr; \ - *__cosx = __cosr - -__MATH_INLINE void -__NTH (__sincos (double __x, double *__sinx, double *__cosx)) -{ - __sincos_code; -} - -__MATH_INLINE void -__NTH (__sincosf (float __x, float *__sinx, float *__cosx)) -{ - __sincos_code; -} - -__MATH_INLINE void -__NTH (__sincosl (long double __x, long double *__sinx, long double *__cosx)) -{ - __sincos_code; -} -# endif - - /* Optimized inline implementation, sometimes with reduced precision and/or argument range. */ @@ -631,50 +229,9 @@ __inline_mathcodeNP_ (long double, __expl, __x, return __builtin_expl (__x)) __inline_mathcodeNP (exp, __x, __exp_code) __inline_mathcodeNP_ (long double, __expl, __x, __exp_code) # endif - - -# if !__GNUC_PREREQ (3, 5) -__inline_mathcodeNP (tan, __x, \ - register long double __value; \ - register long double __value2 __attribute__ ((__unused__)); \ - __asm __volatile__ \ - ("fptan" \ - : "=t" (__value2), "=u" (__value) : "0" (__x)); \ - return __value) -# endif # endif /* __FAST_MATH__ */ -# if __GNUC_PREREQ (3, 4) -__inline_mathcodeNP2_ (long double, __atan2l, __y, __x, - return __builtin_atan2l (__y, __x)) -# else -# define __atan2_code \ - register long double __value; \ - __asm __volatile__ \ - ("fpatan" \ - : "=t" (__value) : "0" (__x), "u" (__y) : "st(1)"); \ - return __value -# ifdef __FAST_MATH__ -__inline_mathcodeNP2 (atan2, __y, __x, __atan2_code) -# endif -__inline_mathcodeNP2_ (long double, __atan2l, __y, __x, __atan2_code) -# endif - - -# if defined __FAST_MATH__ && !__GNUC_PREREQ (3, 5) -__inline_mathcodeNP2 (fmod, __x, __y, \ - register long double __value; \ - __asm __volatile__ \ - ("1: fprem\n\t" \ - "fnstsw %%ax\n\t" \ - "sahf\n\t" \ - "jp 1b" \ - : "=t" (__value) : "0" (__x), "u" (__y) : "ax", "cc"); \ - return __value) -# endif - - # ifdef __FAST_MATH__ # if !__GNUC_PREREQ (3,3) __inline_mathopNP (sqrt, "fsqrt") @@ -697,28 +254,6 @@ __inline_mathop (fabs, "fabs") __inline_mathop_ (long double, __fabsl, "fabs") # endif -# ifdef __FAST_MATH__ -# if !__GNUC_PREREQ (3, 4) -/* The argument range of this inline version is reduced. */ -__inline_mathopNP (sin, "fsin") -/* The argument range of this inline version is reduced. */ -__inline_mathopNP (cos, "fcos") - -__inline_mathop_declNP (log, "fldln2; fxch; fyl2x", "0" (__x) : "st(1)") -# endif - -# if !__GNUC_PREREQ (3, 5) -__inline_mathop_declNP (log10, "fldlg2; fxch; fyl2x", "0" (__x) : "st(1)") - -__inline_mathcodeNP (asin, __x, return __atan2l (__x, __libc_sqrtl (1.0 - __x * __x))) -__inline_mathcodeNP (acos, __x, return __atan2l (__libc_sqrtl (1.0 - __x * __x), __x)) -# endif - -# if !__GNUC_PREREQ (3, 4) -__inline_mathop_declNP (atan, "fld1; fpatan", "0" (__x) : "st(1)") -# endif -# endif /* __FAST_MATH__ */ - __inline_mathcode_ (long double, __sgn1l, __x, \ __extension__ union { long double __xld; unsigned int __xi[3]; } __n = \ { __xld: __x }; \ @@ -743,57 +278,6 @@ __inline_mathcodeNP (tanh, __x, \ return __exm1 / (__exm1 + 2.0) * __sgn1l (-__x)) # endif -__inline_mathcodeNP (floor, __x, \ - register long double __value; \ - register int __ignore; \ - unsigned short int __cw; \ - unsigned short int __cwtmp; \ - __asm __volatile ("fnstcw %3\n\t" \ - "movzwl %3, %1\n\t" \ - "andl $0xf3ff, %1\n\t" \ - "orl $0x0400, %1\n\t" /* rounding down */ \ - "movw %w1, %2\n\t" \ - "fldcw %2\n\t" \ - "frndint\n\t" \ - "fldcw %3" \ - : "=t" (__value), "=&q" (__ignore), "=m" (__cwtmp), \ - "=m" (__cw) \ - : "0" (__x)); \ - return __value) - -__inline_mathcodeNP (ceil, __x, \ - register long double __value; \ - register int __ignore; \ - unsigned short int __cw; \ - unsigned short int __cwtmp; \ - __asm __volatile ("fnstcw %3\n\t" \ - "movzwl %3, %1\n\t" \ - "andl $0xf3ff, %1\n\t" \ - "orl $0x0800, %1\n\t" /* rounding up */ \ - "movw %w1, %2\n\t" \ - "fldcw %2\n\t" \ - "frndint\n\t" \ - "fldcw %3" \ - : "=t" (__value), "=&q" (__ignore), "=m" (__cwtmp), \ - "=m" (__cw) \ - : "0" (__x)); \ - return __value) - -# ifdef __FAST_MATH__ -# define __ldexp_code \ - register long double __value; \ - __asm __volatile__ \ - ("fscale" \ - : "=t" (__value) : "0" (__x), "u" ((long double) __y)); \ - return __value - -__MATH_INLINE double -__NTH (ldexp (double __x, int __y)) -{ - __ldexp_code; -} -# endif - /* Optimized versions for some non-standardized functions. */ # ifdef __USE_ISOC99 @@ -801,25 +285,6 @@ __NTH (ldexp (double __x, int __y)) # ifdef __FAST_MATH__ __inline_mathcodeNP (expm1, __x, __expm1_code) -/* We cannot rely on M_SQRT being defined. So we do it for ourself - here. */ -# define __M_SQRT2 1.41421356237309504880L /* sqrt(2) */ - -# if !__GNUC_PREREQ (3, 5) -__inline_mathcodeNP (log1p, __x, \ - register long double __value; \ - if (__fabsl (__x) >= 1.0 - 0.5 * __M_SQRT2) \ - __value = logl (1.0 + __x); \ - else \ - __asm __volatile__ \ - ("fldln2\n\t" \ - "fxch\n\t" \ - "fyl2xp1" \ - : "=t" (__value) : "0" (__x) : "st(1)"); \ - return __value) -# endif - - /* The argument range of the inline version of asinhl is slightly reduced. */ __inline_mathcodeNP (asinh, __x, \ register long double __y = __fabsl (__x); \ @@ -837,126 +302,14 @@ __inline_mathcodeNP (atanh, __x, \ __inline_mathcodeNP2 (hypot, __x, __y, return __libc_sqrtl (__x * __x + __y * __y)) -# if !__GNUC_PREREQ (3, 5) -__inline_mathcodeNP(logb, __x, \ - register long double __value; \ - register long double __junk; \ - __asm __volatile__ \ - ("fxtract\n\t" \ - : "=t" (__junk), "=u" (__value) : "0" (__x)); \ - return __value) -# endif - # endif # endif -# ifdef __USE_ISOC99 -# ifdef __FAST_MATH__ - -# if !__GNUC_PREREQ (3, 5) -__inline_mathop_declNP (log2, "fld1; fxch; fyl2x", "0" (__x) : "st(1)") -# endif - -__MATH_INLINE float -__NTH (ldexpf (float __x, int __y)) -{ - __ldexp_code; -} - -__MATH_INLINE long double -__NTH (ldexpl (long double __x, int __y)) -{ - __ldexp_code; -} - -__inline_mathopNP (rint, "frndint") -# endif /* __FAST_MATH__ */ - -# define __lrint_code \ - long int __lrintres; \ - __asm__ __volatile__ \ - ("fistpl %0" \ - : "=m" (__lrintres) : "t" (__x) : "st"); \ - return __lrintres -__MATH_INLINE long int -__NTH (lrintf (float __x)) -{ - __lrint_code; -} -__MATH_INLINE long int -__NTH (lrint (double __x)) -{ - __lrint_code; -} -__MATH_INLINE long int -__NTH (lrintl (long double __x)) -{ - __lrint_code; -} -# undef __lrint_code - -# define __llrint_code \ - long long int __llrintres; \ - __asm__ __volatile__ \ - ("fistpll %0" \ - : "=m" (__llrintres) : "t" (__x) : "st"); \ - return __llrintres -__extension__ -__MATH_INLINE long long int -__NTH (llrintf (float __x)) -{ - __llrint_code; -} -__extension__ -__MATH_INLINE long long int -__NTH (llrint (double __x)) -{ - __llrint_code; -} -__extension__ -__MATH_INLINE long long int -__NTH (llrintl (long double __x)) -{ - __llrint_code; -} -# undef __llrint_code - -# endif - - -# ifdef __USE_MISC - -# if defined __FAST_MATH__ && !__GNUC_PREREQ (3, 5) -__inline_mathcodeNP2 (drem, __x, __y, \ - register double __value; \ - register int __clobbered; \ - __asm __volatile__ \ - ("1: fprem1\n\t" \ - "fstsw %%ax\n\t" \ - "sahf\n\t" \ - "jp 1b" \ - : "=t" (__value), "=&a" (__clobbered) : "0" (__x), "u" (__y) : "cc"); \ - return __value) -# endif - - -/* This function is used in the `isfinite' macro. */ -__MATH_INLINE int -__NTH (__finite (double __x)) -{ - return (__extension__ - (((((union { double __d; int __i[2]; }) {__d: __x}).__i[1] - | 0x800fffffu) + 1) >> 31)); -} - -# endif /* __USE_MISC */ /* Undefine some of the large macros which are not used anymore. */ -# undef __atan2_code # ifdef __FAST_MATH__ # undef __expm1_code # undef __exp_code -# undef __sincos_code # endif /* __FAST_MATH__ */ # endif /* __NO_MATH_INLINES */ @@ -964,7 +317,6 @@ __NTH (__finite (double __x)) /* This code is used internally in the GNU libc. */ # ifdef __LIBC_INTERNAL_MATH_INLINES -__inline_mathop (__ieee754_sqrt, "fsqrt") __inline_mathcode2_ (long double, __ieee754_atan2l, __y, __x, register long double __value; __asm __volatile__ ("fpatan\n\t" diff --git a/sysdeps/x86/fpu/e_sqrtf128.c b/sysdeps/x86/fpu/e_sqrtf128.c new file mode 100644 index 0000000000..cac5f63527 --- /dev/null +++ b/sysdeps/x86/fpu/e_sqrtf128.c @@ -0,0 +1,47 @@ +/* soft-fp sqrt for _Float128 + Return sqrt(a) + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file into + combinations with other programs, and to distribute those + combinations without any restriction coming from the use of this + file. (The Lesser General Public License restrictions do apply in + other respects; for example, they cover modification of the file, + and distribution when not linked into a combine executable.) + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <soft-fp.h> +#include <quad.h> + +__float128 +__ieee754_sqrtf128 (__float128 a) +{ + FP_DECL_EX; + FP_DECL_Q (A); + FP_DECL_Q (R); + __float128 r; + + FP_INIT_ROUNDMODE; + FP_UNPACK_Q (A, a); + FP_SQRT_Q (R, A); + FP_PACK_Q (r, R); + FP_HANDLE_EXCEPTIONS; + return r; +} +strong_alias (__ieee754_sqrtf128, __sqrtf128_finite) diff --git a/sysdeps/x86/fpu/fix-fp-int-compare-invalid.h b/sysdeps/x86/fpu/fix-fp-int-compare-invalid.h new file mode 100644 index 0000000000..6bad27d0fa --- /dev/null +++ b/sysdeps/x86/fpu/fix-fp-int-compare-invalid.h @@ -0,0 +1,32 @@ +/* Fix for missing "invalid" exceptions from floating-point + comparisons. x86 version. + Copyright (C) 2016-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef FIX_FP_INT_COMPARE_INVALID_H +#define FIX_FP_INT_COMPARE_INVALID_H 1 + +/* Before GCC 8, both x87 and SSE comparisons use unordered comparison + instructions when they should use ordered comparisons + <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52451>. */ +#if __GNUC_PREREQ (8, 0) +# define FIX_COMPARE_INVALID 0 +#else +# define FIX_COMPARE_INVALID 1 +#endif + +#endif /* fix-fp-int-compare-invalid.h */ diff --git a/sysdeps/x86/fpu/include/bits/fenv.h b/sysdeps/x86/fpu/include/bits/fenv.h index 6e8b733f33..3d2483b0bf 100644 --- a/sysdeps/x86/fpu/include/bits/fenv.h +++ b/sysdeps/x86/fpu/include/bits/fenv.h @@ -1,5 +1,5 @@ /* Wrapper for x86 bits/fenv.h for use when building glibc. - Copyright (C) 1997-2016 Free Software Foundation, Inc. + Copyright (C) 1997-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,8 +16,20 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ +#ifndef _BITS_FENV_H + +#if defined _LIBC && defined __USE_EXTERN_INLINES +# if defined SHARED && !defined NO_HIDDEN && IS_IN (libm) +extern int __REDIRECT_NTH (__feraiseexcept_renamed, (int), __GI_feraiseexcept); +# else +extern int __REDIRECT_NTH (__feraiseexcept_renamed, (int), feraiseexcept); +# endif +#endif + #include_next <bits/fenv.h> +# ifndef _ISOMAC + /* Ensure __feraiseexcept calls in glibc are optimized the same as feraiseexcept calls. */ @@ -40,3 +52,6 @@ __NTH (__feraiseexcept (int __excepts)) __END_DECLS #endif + +# endif /* _ISOMAC */ +#endif /* bits/fenv.h */ diff --git a/sysdeps/x86/fpu/math-barriers.h b/sysdeps/x86/fpu/math-barriers.h new file mode 100644 index 0000000000..1e1fabdb92 --- /dev/null +++ b/sysdeps/x86/fpu/math-barriers.h @@ -0,0 +1,61 @@ +/* Control when floating-point expressions are evaluated. x86 version. + Copyright (C) 2007-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef X86_MATH_BARRIERS_H +#define X86_MATH_BARRIERS_H 1 + +#ifdef __SSE2_MATH__ +# define math_opt_barrier(x) \ + ({ __typeof(x) __x; \ + if (sizeof (x) <= sizeof (double) \ + || __builtin_types_compatible_p (__typeof (x), _Float128)) \ + __asm ("" : "=x" (__x) : "0" (x)); \ + else \ + __asm ("" : "=t" (__x) : "0" (x)); \ + __x; }) +# define math_force_eval(x) \ + do { \ + if (sizeof (x) <= sizeof (double) \ + || __builtin_types_compatible_p (__typeof (x), _Float128)) \ + __asm __volatile ("" : : "x" (x)); \ + else \ + __asm __volatile ("" : : "f" (x)); \ + } while (0) +#else +# define math_opt_barrier(x) \ + ({ __typeof (x) __x; \ + if (__builtin_types_compatible_p (__typeof (x), _Float128)) \ + { \ + __x = (x); \ + __asm ("" : "+m" (__x)); \ + } \ + else \ + __asm ("" : "=t" (__x) : "0" (x)); \ + __x; }) +# define math_force_eval(x) \ + do { \ + __typeof (x) __x = (x); \ + if (sizeof (x) <= sizeof (double) \ + || __builtin_types_compatible_p (__typeof (x), _Float128)) \ + __asm __volatile ("" : : "m" (__x)); \ + else \ + __asm __volatile ("" : : "f" (__x)); \ + } while (0) +#endif + +#endif diff --git a/sysdeps/x86/fpu/powl_helper.c b/sysdeps/x86/fpu/powl_helper.c index 7c5d2d1492..651eedd792 100644 --- a/sysdeps/x86/fpu/powl_helper.c +++ b/sysdeps/x86/fpu/powl_helper.c @@ -1,5 +1,5 @@ /* Implement powl for x86 using extra-precision log. - Copyright (C) 2012-2016 Free Software Foundation, Inc. + Copyright (C) 2012-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -18,6 +18,7 @@ #include <math.h> #include <math_private.h> +#include <math-underflow.h> #include <stdbool.h> /* High parts and low parts of -log (k/16), for integer k from 12 to @@ -120,7 +121,7 @@ __powl_helper (long double x, long double y) corrected for by adding log2 (e) * X_FRAC_LOW to the final result. */ int32_t se; - u_int32_t i0, i1; + uint32_t i0, i1; GET_LDOUBLE_WORDS (se, i0, i1, x_frac); x_frac_low = x_frac; i1 &= 0xffffffe0; @@ -139,7 +140,7 @@ __powl_helper (long double x, long double y) long double w = x_frac - 1; long double w_hi, w_lo; int32_t se; - u_int32_t i0, i1; + uint32_t i0, i1; GET_LDOUBLE_WORDS (se, i0, i1, w); i0 &= 0xffff0000; i1 = 0; diff --git a/sysdeps/x86/fpu/sfp-machine.h b/sysdeps/x86/fpu/sfp-machine.h new file mode 100644 index 0000000000..df8906acb4 --- /dev/null +++ b/sysdeps/x86/fpu/sfp-machine.h @@ -0,0 +1,209 @@ +/* Configure soft-fp for building sqrtf128. Based on sfp-machine.h in + libgcc, with soft-float and other irrelevant parts removed. */ + +/* The type of the result of a floating point comparison. This must + match `__libgcc_cmp_return__' in GCC for the target. */ +typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__))); +#define CMPtype __gcc_CMPtype + +#ifdef __x86_64__ +# define _FP_W_TYPE_SIZE 64 +# define _FP_W_TYPE unsigned long long +# define _FP_WS_TYPE signed long long +# define _FP_I_TYPE long long + +typedef int TItype __attribute__ ((mode (TI))); +typedef unsigned int UTItype __attribute__ ((mode (TI))); + +# define TI_BITS (__CHAR_BIT__ * (int)sizeof(TItype)) + +# define _FP_MUL_MEAT_Q(R,X,Y) \ + _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) + +# define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_2_udiv(Q,R,X,Y) + +# define _FP_NANFRAC_S _FP_QNANBIT_S +# define _FP_NANFRAC_D _FP_QNANBIT_D +# define _FP_NANFRAC_E _FP_QNANBIT_E, 0 +# define _FP_NANFRAC_Q _FP_QNANBIT_Q, 0 + +# define FP_EX_SHIFT 7 + +# define _FP_DECL_EX \ + unsigned int _fcw __attribute__ ((unused)) = FP_RND_NEAREST; + +# define FP_RND_NEAREST 0 +# define FP_RND_ZERO 0x6000 +# define FP_RND_PINF 0x4000 +# define FP_RND_MINF 0x2000 + +# define FP_RND_MASK 0x6000 + +# define FP_INIT_ROUNDMODE \ + do { \ + __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (_fcw)); \ + } while (0) +#else +# define _FP_W_TYPE_SIZE 32 +# define _FP_W_TYPE unsigned int +# define _FP_WS_TYPE signed int +# define _FP_I_TYPE int + +# define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \ + __asm__ ("add{l} {%11,%3|%3,%11}\n\t" \ + "adc{l} {%9,%2|%2,%9}\n\t" \ + "adc{l} {%7,%1|%1,%7}\n\t" \ + "adc{l} {%5,%0|%0,%5}" \ + : "=r" ((USItype) (r3)), \ + "=&r" ((USItype) (r2)), \ + "=&r" ((USItype) (r1)), \ + "=&r" ((USItype) (r0)) \ + : "%0" ((USItype) (x3)), \ + "g" ((USItype) (y3)), \ + "%1" ((USItype) (x2)), \ + "g" ((USItype) (y2)), \ + "%2" ((USItype) (x1)), \ + "g" ((USItype) (y1)), \ + "%3" ((USItype) (x0)), \ + "g" ((USItype) (y0))) +# define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) \ + __asm__ ("add{l} {%8,%2|%2,%8}\n\t" \ + "adc{l} {%6,%1|%1,%6}\n\t" \ + "adc{l} {%4,%0|%0,%4}" \ + : "=r" ((USItype) (r2)), \ + "=&r" ((USItype) (r1)), \ + "=&r" ((USItype) (r0)) \ + : "%0" ((USItype) (x2)), \ + "g" ((USItype) (y2)), \ + "%1" ((USItype) (x1)), \ + "g" ((USItype) (y1)), \ + "%2" ((USItype) (x0)), \ + "g" ((USItype) (y0))) +# define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \ + __asm__ ("sub{l} {%11,%3|%3,%11}\n\t" \ + "sbb{l} {%9,%2|%2,%9}\n\t" \ + "sbb{l} {%7,%1|%1,%7}\n\t" \ + "sbb{l} {%5,%0|%0,%5}" \ + : "=r" ((USItype) (r3)), \ + "=&r" ((USItype) (r2)), \ + "=&r" ((USItype) (r1)), \ + "=&r" ((USItype) (r0)) \ + : "0" ((USItype) (x3)), \ + "g" ((USItype) (y3)), \ + "1" ((USItype) (x2)), \ + "g" ((USItype) (y2)), \ + "2" ((USItype) (x1)), \ + "g" ((USItype) (y1)), \ + "3" ((USItype) (x0)), \ + "g" ((USItype) (y0))) +# define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) \ + __asm__ ("sub{l} {%8,%2|%2,%8}\n\t" \ + "sbb{l} {%6,%1|%1,%6}\n\t" \ + "sbb{l} {%4,%0|%0,%4}" \ + : "=r" ((USItype) (r2)), \ + "=&r" ((USItype) (r1)), \ + "=&r" ((USItype) (r0)) \ + : "0" ((USItype) (x2)), \ + "g" ((USItype) (y2)), \ + "1" ((USItype) (x1)), \ + "g" ((USItype) (y1)), \ + "2" ((USItype) (x0)), \ + "g" ((USItype) (y0))) +# define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i) \ + __asm__ ("add{l} {%4,%3|%3,%4}\n\t" \ + "adc{l} {$0,%2|%2,0}\n\t" \ + "adc{l} {$0,%1|%1,0}\n\t" \ + "adc{l} {$0,%0|%0,0}" \ + : "+r" ((USItype) (x3)), \ + "+&r" ((USItype) (x2)), \ + "+&r" ((USItype) (x1)), \ + "+&r" ((USItype) (x0)) \ + : "g" ((USItype) (i))) + + +# define _FP_MUL_MEAT_S(R,X,Y) \ + _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm) +# define _FP_MUL_MEAT_D(R,X,Y) \ + _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) +# define _FP_MUL_MEAT_Q(R,X,Y) \ + _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) + +# define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_loop(S,R,X,Y) +# define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y) +# define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y) + +# define _FP_NANFRAC_S _FP_QNANBIT_S +# define _FP_NANFRAC_D _FP_QNANBIT_D, 0 +/* Even if XFmode is 12byte, we have to pad it to + 16byte since soft-fp emulation is done in 16byte. */ +# define _FP_NANFRAC_E _FP_QNANBIT_E, 0, 0, 0 +# define _FP_NANFRAC_Q _FP_QNANBIT_Q, 0, 0, 0 + +# define FP_EX_SHIFT 0 + +# define _FP_DECL_EX \ + unsigned short _fcw __attribute__ ((unused)) = FP_RND_NEAREST; + +# define FP_RND_NEAREST 0 +# define FP_RND_ZERO 0xc00 +# define FP_RND_PINF 0x800 +# define FP_RND_MINF 0x400 + +# define FP_RND_MASK 0xc00 + +# define FP_INIT_ROUNDMODE \ + do { \ + __asm__ __volatile__ ("fnstcw\t%0" : "=m" (_fcw)); \ + } while (0) +#endif + +#define _FP_KEEPNANFRACP 1 +#define _FP_QNANNEGATEDP 0 + +#define _FP_NANSIGN_S 1 +#define _FP_NANSIGN_D 1 +#define _FP_NANSIGN_E 1 +#define _FP_NANSIGN_Q 1 + +/* Here is something Intel misdesigned: the specs don't define + the case where we have two NaNs with same mantissas, but + different sign. Different operations pick up different NaNs. */ +#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ + do { \ + if (_FP_FRAC_GT_##wc(X, Y) \ + || (_FP_FRAC_EQ_##wc(X,Y) && (OP == '+' || OP == '*'))) \ + { \ + R##_s = X##_s; \ + _FP_FRAC_COPY_##wc(R,X); \ + } \ + else \ + { \ + R##_s = Y##_s; \ + _FP_FRAC_COPY_##wc(R,Y); \ + } \ + R##_c = FP_CLS_NAN; \ + } while (0) + +#define FP_EX_INVALID 0x01 +#define FP_EX_DENORM 0x02 +#define FP_EX_DIVZERO 0x04 +#define FP_EX_OVERFLOW 0x08 +#define FP_EX_UNDERFLOW 0x10 +#define FP_EX_INEXACT 0x20 +#define FP_EX_ALL \ + (FP_EX_INVALID | FP_EX_DENORM | FP_EX_DIVZERO | FP_EX_OVERFLOW \ + | FP_EX_UNDERFLOW | FP_EX_INEXACT) + +void __sfp_handle_exceptions (int); + +#define FP_HANDLE_EXCEPTIONS \ + do { \ + if (__builtin_expect (_fex, 0)) \ + __sfp_handle_exceptions (_fex); \ + } while (0); + +#define FP_TRAPPING_EXCEPTIONS ((~_fcw >> FP_EX_SHIFT) & FP_EX_ALL) + +#define FP_ROUNDMODE (_fcw & FP_RND_MASK) + +#define _FP_TININESS_AFTER_ROUNDING 1 diff --git a/sysdeps/x86/fpu/test-fenv-clear-sse.c b/sysdeps/x86/fpu/test-fenv-clear-sse.c index cc4b3f04c7..c67a3ba7c9 100644 --- a/sysdeps/x86/fpu/test-fenv-clear-sse.c +++ b/sysdeps/x86/fpu/test-fenv-clear-sse.c @@ -1,6 +1,6 @@ /* Test fesetenv (FE_DFL_ENV) and fesetenv (FE_NOMASK_ENV) clear exceptions (bug 19181). SSE version. - Copyright (C) 2015-2016 Free Software Foundation, Inc. + Copyright (C) 2015-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86/fpu/test-fenv-sse-2.c b/sysdeps/x86/fpu/test-fenv-sse-2.c index d3197c3339..fcb1011555 100644 --- a/sysdeps/x86/fpu/test-fenv-sse-2.c +++ b/sysdeps/x86/fpu/test-fenv-sse-2.c @@ -1,5 +1,5 @@ /* Test x86-specific floating-point environment (bug 16068): SSE part. - Copyright (C) 2015-2016 Free Software Foundation, Inc. + Copyright (C) 2015-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86/fpu/test-fenv-sse.c b/sysdeps/x86/fpu/test-fenv-sse.c index 4f4ff6a0a6..5462315811 100644 --- a/sysdeps/x86/fpu/test-fenv-sse.c +++ b/sysdeps/x86/fpu/test-fenv-sse.c @@ -1,5 +1,5 @@ /* Test floating-point environment includes SSE state (bug 16064). - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86/fpu/test-fenv-x87.c b/sysdeps/x86/fpu/test-fenv-x87.c index b6f0b6af78..c9cd84e9c7 100644 --- a/sysdeps/x86/fpu/test-fenv-x87.c +++ b/sysdeps/x86/fpu/test-fenv-x87.c @@ -1,5 +1,5 @@ /* Test x86-specific floating-point environment (bug 16068): x87 part. - Copyright (C) 2015-2016 Free Software Foundation, Inc. + Copyright (C) 2015-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86/fpu/test-flt-eval-method-387.c b/sysdeps/x86/fpu/test-flt-eval-method-387.c new file mode 100644 index 0000000000..2fb7acfb76 --- /dev/null +++ b/sysdeps/x86/fpu/test-flt-eval-method-387.c @@ -0,0 +1 @@ +#include <test-flt-eval-method.c> diff --git a/sysdeps/x86/fpu/test-flt-eval-method-sse.c b/sysdeps/x86/fpu/test-flt-eval-method-sse.c new file mode 100644 index 0000000000..2fb7acfb76 --- /dev/null +++ b/sysdeps/x86/fpu/test-flt-eval-method-sse.c @@ -0,0 +1 @@ +#include <test-flt-eval-method.c> diff --git a/sysdeps/x86/fpu/test-math-vector-sincos.h b/sysdeps/x86/fpu/test-math-vector-sincos.h new file mode 100644 index 0000000000..d422ffa4a7 --- /dev/null +++ b/sysdeps/x86/fpu/test-math-vector-sincos.h @@ -0,0 +1,98 @@ +/* Wrappers definitions for tests of ABI of vector sincos/sincosf having + vector declaration "#pragma omp declare simd notinbranch". + Copyright (C) 2016-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define INIT_VEC_PTRS_LOOP(vec, val, len) \ + do \ + { \ + union { VEC_INT_TYPE v; __typeof__ ((val)[0]) *a[(len)]; } u; \ + for (i = 0; i < len; i++) \ + u.a[i] = &(val)[i]; \ + (vec) = u.v; \ + } \ + while (0) + +/* Wrapper for vector sincos/sincosf compatible with x86_64 and x32 variants + of _ZGVbN2vvv_sincos, _ZGVdN4vvv_sincos, _ZGVeN8vvv_sincos; + x32 variants of _ZGVbN4vvv_sincosf, _ZGVcN4vvv_sincos, _ZGVdN8vvv_sincosf, + _ZGVeN16vvv_sincosf. */ +#define VECTOR_WRAPPER_fFF_2(scalar_func, vector_func) \ +extern void vector_func (VEC_TYPE, VEC_INT_TYPE, VEC_INT_TYPE); \ +void scalar_func (FLOAT x, FLOAT * r, FLOAT * r1) \ +{ \ + int i; \ + FLOAT r_loc[VEC_LEN], r1_loc[VEC_LEN]; \ + VEC_TYPE mx; \ + VEC_INT_TYPE mr, mr1; \ + INIT_VEC_LOOP (mx, x, VEC_LEN); \ + INIT_VEC_PTRS_LOOP (mr, r_loc, VEC_LEN); \ + INIT_VEC_PTRS_LOOP (mr1, r1_loc, VEC_LEN); \ + vector_func (mx, mr, mr1); \ + TEST_VEC_LOOP (r_loc, VEC_LEN); \ + TEST_VEC_LOOP (r1_loc, VEC_LEN); \ + *r = r_loc[0]; \ + *r1 = r1_loc[0]; \ + return; \ +} + +/* Wrapper for vector sincos/sincosf compatible with x86_64 variants of + _ZGVcN4vvv_sincos, _ZGVeN16vvv_sincosf, _ZGVbN4vvv_sincosf, + _ZGVdN8vvv_sincosf, _ZGVcN8vvv_sincosf. */ +#define VECTOR_WRAPPER_fFF_3(scalar_func, vector_func) \ +extern void vector_func (VEC_TYPE, VEC_INT_TYPE, VEC_INT_TYPE, \ + VEC_INT_TYPE, VEC_INT_TYPE); \ +void scalar_func (FLOAT x, FLOAT * r, FLOAT * r1) \ +{ \ + int i; \ + FLOAT r_loc[VEC_LEN/2], r1_loc[VEC_LEN/2]; \ + VEC_TYPE mx; \ + VEC_INT_TYPE mr, mr1; \ + INIT_VEC_LOOP (mx, x, VEC_LEN); \ + INIT_VEC_PTRS_LOOP (mr, r_loc, VEC_LEN/2); \ + INIT_VEC_PTRS_LOOP (mr1, r1_loc, VEC_LEN/2); \ + vector_func (mx, mr, mr, mr1, mr1); \ + TEST_VEC_LOOP (r_loc, VEC_LEN/2); \ + TEST_VEC_LOOP (r1_loc, VEC_LEN/2); \ + *r = r_loc[0]; \ + *r1 = r1_loc[0]; \ + return; \ +} + +/* Wrapper for vector sincosf compatible with x86_64 variant of + _ZGVcN8vvv_sincosf. */ +#define VECTOR_WRAPPER_fFF_4(scalar_func, vector_func) \ +extern void vector_func (VEC_TYPE, VEC_INT_TYPE, VEC_INT_TYPE, \ + VEC_INT_TYPE, VEC_INT_TYPE, \ + VEC_INT_TYPE, VEC_INT_TYPE, \ + VEC_INT_TYPE, VEC_INT_TYPE); \ +void scalar_func (FLOAT x, FLOAT * r, FLOAT * r1) \ +{ \ + int i; \ + FLOAT r_loc[VEC_LEN/4], r1_loc[VEC_LEN/4]; \ + VEC_TYPE mx; \ + VEC_INT_TYPE mr, mr1; \ + INIT_VEC_LOOP (mx, x, VEC_LEN); \ + INIT_VEC_PTRS_LOOP (mr, r_loc, VEC_LEN/4); \ + INIT_VEC_PTRS_LOOP (mr1, r1_loc, VEC_LEN/4); \ + vector_func (mx, mr, mr, mr, mr, mr1, mr1, mr1, mr1); \ + TEST_VEC_LOOP (r_loc, VEC_LEN/4); \ + TEST_VEC_LOOP (r1_loc, VEC_LEN/4); \ + *r = r_loc[0]; \ + *r1 = r1_loc[0]; \ + return; \ +} diff --git a/sysdeps/x86/fpu_control.h b/sysdeps/x86/fpu_control.h index 4c960580a3..4cb98c5679 100644 --- a/sysdeps/x86/fpu_control.h +++ b/sysdeps/x86/fpu_control.h @@ -1,5 +1,5 @@ /* FPU control word bits. x86 version. - Copyright (C) 1993-2016 Free Software Foundation, Inc. + Copyright (C) 1993-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Olaf Flebbe. diff --git a/sysdeps/x86/init-arch.h b/sysdeps/x86/init-arch.h index 17a38d2967..a81ca8a4eb 100644 --- a/sysdeps/x86/init-arch.h +++ b/sysdeps/x86/init-arch.h @@ -1,5 +1,5 @@ /* This file is part of the GNU C Library. - Copyright (C) 2008-2016 Free Software Foundation, Inc. + Copyright (C) 2008-2018 Free Software Foundation, Inc. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -20,6 +20,7 @@ #else # include <ldsodefs.h> #endif +#include <ifunc-init.h> #ifndef __x86_64__ /* Due to the reordering and the other nifty extensions in i686, it is diff --git a/sysdeps/x86/jmp_buf-ssp.sym b/sysdeps/x86/jmp_buf-ssp.sym new file mode 100644 index 0000000000..1aaaedc9ec --- /dev/null +++ b/sysdeps/x86/jmp_buf-ssp.sym @@ -0,0 +1 @@ +-- FIXME: Define SHADOW_STACK_POINTER_OFFSET to support shadow stack. diff --git a/sysdeps/x86/ldsodefs.h b/sysdeps/x86/ldsodefs.h new file mode 100644 index 0000000000..0616215b7a --- /dev/null +++ b/sysdeps/x86/ldsodefs.h @@ -0,0 +1,66 @@ +/* Run-time dynamic linker data structures for loaded ELF shared objects. + X86 version. + Copyright (C) 1995-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _X86_LDSODEFS_H +#define _X86_LDSODEFS_H 1 + +#include <elf.h> +#include <cpu-features.h> + +struct La_i86_regs; +struct La_i86_retval; +struct La_x86_64_regs; +struct La_x86_64_retval; +struct La_x32_regs; +struct La_x32_retval; + +#define ARCH_PLTENTER_MEMBERS \ + Elf32_Addr (*i86_gnu_pltenter) (Elf32_Sym *, unsigned int, uintptr_t *, \ + uintptr_t *, struct La_i86_regs *, \ + unsigned int *, const char *name, \ + long int *framesizep); \ + Elf64_Addr (*x86_64_gnu_pltenter) (Elf64_Sym *, unsigned int, \ + uintptr_t *, \ + uintptr_t *, struct La_x86_64_regs *, \ + unsigned int *, const char *name, \ + long int *framesizep); \ + Elf32_Addr (*x32_gnu_pltenter) (Elf32_Sym *, unsigned int, uintptr_t *, \ + uintptr_t *, struct La_x32_regs *, \ + unsigned int *, const char *name, \ + long int *framesizep) + +#define ARCH_PLTEXIT_MEMBERS \ + unsigned int (*i86_gnu_pltexit) (Elf32_Sym *, unsigned int, uintptr_t *, \ + uintptr_t *, const struct La_i86_regs *, \ + struct La_i86_retval *, const char *); \ + unsigned int (*x86_64_gnu_pltexit) (Elf64_Sym *, unsigned int, \ + uintptr_t *, \ + uintptr_t *, \ + const struct La_x86_64_regs *, \ + struct La_x86_64_retval *, \ + const char *); \ + unsigned int (*x32_gnu_pltexit) (Elf32_Sym *, unsigned int, uintptr_t *, \ + uintptr_t *, \ + const struct La_x32_regs *, \ + struct La_x86_64_retval *, \ + const char *) + +#include_next <ldsodefs.h> + +#endif diff --git a/sysdeps/x86/libc-start.c b/sysdeps/x86/libc-start.c index 3b5ea6e933..eb5335c154 100644 --- a/sysdeps/x86/libc-start.c +++ b/sysdeps/x86/libc-start.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2016 Free Software Foundation, Inc. +/* Copyright (C) 2015-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -15,27 +15,17 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#ifdef SHARED -# include <csu/libc-start.c> -# else -/* The main work is done in the generic function. */ -# define LIBC_START_DISABLE_INLINE -# define LIBC_START_MAIN generic_start_main -# include <csu/libc-start.c> +#ifndef SHARED +/* Define I386_USE_SYSENTER to support syscall during startup in static + PIE. */ +# include <startup.h> +# include <ldsodefs.h> # include <cpu-features.h> # include <cpu-features.c> extern struct cpu_features _dl_x86_cpu_features; -int -__libc_start_main (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL), - int argc, char **argv, - __typeof (main) init, - void (*fini) (void), - void (*rtld_fini) (void), void *stack_end) -{ - init_cpu_features (&_dl_x86_cpu_features); - return generic_start_main (main, argc, argv, init, fini, rtld_fini, - stack_end); -} -#endif +# define ARCH_INIT_CPU_FEATURES() init_cpu_features (&_dl_x86_cpu_features) + +#endif /* !SHARED */ +#include <csu/libc-start.c> diff --git a/sysdeps/x86/libc-start.h b/sysdeps/x86/libc-start.h new file mode 100644 index 0000000000..6f44262bf4 --- /dev/null +++ b/sysdeps/x86/libc-start.h @@ -0,0 +1,25 @@ +/* X86 definitions for libc main startup. + Copyright (C) 2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef SHARED +# define ARCH_SETUP_IREL() apply_irel () +# define ARCH_APPLY_IREL() +# ifndef ARCH_SETUP_TLS +# define ARCH_SETUP_TLS() __libc_setup_tls () +# endif +#endif /* !SHARED */ diff --git a/sysdeps/x86/link_map.h b/sysdeps/x86/link_map.h new file mode 100644 index 0000000000..ef1206a9d2 --- /dev/null +++ b/sysdeps/x86/link_map.h @@ -0,0 +1,26 @@ +/* Additional fields in struct link_map. Linux/x86 version. + Copyright (C) 2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* If this object is enabled with CET. */ +enum + { + lc_none = 0, /* Not enabled with CET. */ + lc_ibt = 1 << 0, /* Enabled with IBT. */ + lc_shstk = 1 << 1, /* Enabled with STSHK. */ + lc_ibt_and_shstk = lc_ibt | lc_shstk /* Enabled with both. */ + } l_cet:2; diff --git a/sysdeps/x86/longjmp.c b/sysdeps/x86/longjmp.c new file mode 100644 index 0000000000..a53f31e1dd --- /dev/null +++ b/sysdeps/x86/longjmp.c @@ -0,0 +1,45 @@ +/* __libc_siglongjmp for x86. + Copyright (C) 2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __libc_longjmp __redirect___libc_longjmp +#include <setjmp/longjmp.c> +#undef __libc_longjmp + +extern void __longjmp_cancel (__jmp_buf __env, int __val) + __attribute__ ((__noreturn__)) attribute_hidden; + +/* Since __libc_longjmp is a private interface for cancellation + implementation in libpthread, there is no need to restore shadow + stack register. */ + +void +__libc_longjmp (sigjmp_buf env, int val) +{ + /* Perform any cleanups needed by the frames being unwound. */ + _longjmp_unwind (env, val); + + if (env[0].__mask_was_saved) + /* Restore the saved signal mask. */ + (void) __sigprocmask (SIG_SETMASK, + (sigset_t *) &env[0].__saved_mask, + (sigset_t *) NULL); + + /* Call the machine-dependent function to restore machine state + without shadow stack. */ + __longjmp_cancel (env[0].__jmpbuf, val ?: 1); +} diff --git a/sysdeps/x86/math-tests.h b/sysdeps/x86/math-tests.h new file mode 100644 index 0000000000..43c7ebe337 --- /dev/null +++ b/sysdeps/x86/math-tests.h @@ -0,0 +1,25 @@ +/* Configuration for math tests. x86 version. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Before GCC 7, there is no built-in function to provide a __float128 + sNaN, so most sNaN tests for this type cannot work. */ +#if !__GNUC_PREREQ (7, 0) +# define SNAN_TESTS_float128 0 +#endif + +#include_next <math-tests.h> diff --git a/sysdeps/x86/nptl/bits/pthreadtypes-arch.h b/sysdeps/x86/nptl/bits/pthreadtypes-arch.h new file mode 100644 index 0000000000..290f2f4640 --- /dev/null +++ b/sysdeps/x86/nptl/bits/pthreadtypes-arch.h @@ -0,0 +1,106 @@ +/* Copyright (C) 2002-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _BITS_PTHREADTYPES_ARCH_H +#define _BITS_PTHREADTYPES_ARCH_H 1 + +#include <bits/wordsize.h> + +#ifdef __x86_64__ +# if __WORDSIZE == 64 +# define __SIZEOF_PTHREAD_MUTEX_T 40 +# define __SIZEOF_PTHREAD_ATTR_T 56 +# define __SIZEOF_PTHREAD_MUTEX_T 40 +# define __SIZEOF_PTHREAD_RWLOCK_T 56 +# define __SIZEOF_PTHREAD_BARRIER_T 32 +# else +# define __SIZEOF_PTHREAD_MUTEX_T 32 +# define __SIZEOF_PTHREAD_ATTR_T 32 +# define __SIZEOF_PTHREAD_MUTEX_T 32 +# define __SIZEOF_PTHREAD_RWLOCK_T 44 +# define __SIZEOF_PTHREAD_BARRIER_T 20 +# endif +#else +# define __SIZEOF_PTHREAD_MUTEX_T 24 +# define __SIZEOF_PTHREAD_ATTR_T 36 +# define __SIZEOF_PTHREAD_MUTEX_T 24 +# define __SIZEOF_PTHREAD_RWLOCK_T 32 +# define __SIZEOF_PTHREAD_BARRIER_T 20 +#endif +#define __SIZEOF_PTHREAD_MUTEXATTR_T 4 +#define __SIZEOF_PTHREAD_COND_T 48 +#define __SIZEOF_PTHREAD_CONDATTR_T 4 +#define __SIZEOF_PTHREAD_RWLOCKATTR_T 8 +#define __SIZEOF_PTHREAD_BARRIERATTR_T 4 + +/* Definitions for internal mutex struct. */ +#define __PTHREAD_COMPAT_PADDING_MID +#define __PTHREAD_COMPAT_PADDING_END +#define __PTHREAD_MUTEX_LOCK_ELISION 1 +#ifdef __x86_64__ +# define __PTHREAD_MUTEX_NUSERS_AFTER_KIND 0 +# define __PTHREAD_MUTEX_USE_UNION 0 +#else +# define __PTHREAD_MUTEX_NUSERS_AFTER_KIND 1 +# define __PTHREAD_MUTEX_USE_UNION 1 +#endif + +#define __LOCK_ALIGNMENT +#define __ONCE_ALIGNMENT + +struct __pthread_rwlock_arch_t +{ + unsigned int __readers; + unsigned int __writers; + unsigned int __wrphase_futex; + unsigned int __writers_futex; + unsigned int __pad3; + unsigned int __pad4; +#ifdef __x86_64__ + int __cur_writer; + int __shared; + signed char __rwelision; +# ifdef __ILP32__ + unsigned char __pad1[3]; +# define __PTHREAD_RWLOCK_ELISION_EXTRA 0, { 0, 0, 0 } +# else + unsigned char __pad1[7]; +# define __PTHREAD_RWLOCK_ELISION_EXTRA 0, { 0, 0, 0, 0, 0, 0, 0 } +# endif + unsigned long int __pad2; + /* FLAGS must stay at this position in the structure to maintain + binary compatibility. */ + unsigned int __flags; +# define __PTHREAD_RWLOCK_INT_FLAGS_SHARED 1 +#else + /* FLAGS must stay at this position in the structure to maintain + binary compatibility. */ + unsigned char __flags; + unsigned char __shared; + signed char __rwelision; +# define __PTHREAD_RWLOCK_ELISION_EXTRA 0 + unsigned char __pad2; + int __cur_writer; +#endif +}; + +#ifndef __x86_64__ +/* Extra attributes for the cleanup functions. */ +# define __cleanup_fct_attribute __attribute__ ((__regparm__ (1))) +#endif + +#endif /* bits/pthreadtypes.h */ diff --git a/sysdeps/x86/nptl/pt-longjmp.c b/sysdeps/x86/nptl/pt-longjmp.c new file mode 100644 index 0000000000..6165c7d4a7 --- /dev/null +++ b/sysdeps/x86/nptl/pt-longjmp.c @@ -0,0 +1,71 @@ +/* ABI compatibility for 'longjmp' and 'siglongjmp' symbols in libpthread ABI. + X86 version. + Copyright (C) 18 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <pthreadP.h> +#include <jmp_buf-ssp.h> + +#ifdef __x86_64__ +# define SHADOW_STACK_POINTER_SIZE 8 +#else +# define SHADOW_STACK_POINTER_SIZE 4 +#endif + +/* Assert that the priv field in struct pthread_unwind_buf has space + to store shadow stack pointer. */ +_Static_assert ((offsetof (struct pthread_unwind_buf, priv) + <= SHADOW_STACK_POINTER_OFFSET) + && ((offsetof (struct pthread_unwind_buf, priv) + + sizeof (((struct pthread_unwind_buf *) 0)->priv)) + >= (SHADOW_STACK_POINTER_OFFSET + + SHADOW_STACK_POINTER_SIZE)), + "Shadow stack pointer is not within private storage " + "of pthread_unwind_buf."); + +#include <shlib-compat.h> + +/* libpthread once had its own longjmp (and siglongjmp alias), though there + was no apparent reason for it. There is no use in having a separate + symbol in libpthread, but the historical ABI requires it. For static + linking, there is no need to provide anything here--the libc version + will be linked in. For shared library ABI compatibility, there must be + longjmp and siglongjmp symbols in libpthread.so. + + With an IFUNC resolver, it would be possible to avoid the indirection, + but the IFUNC resolver might run before the __libc_longjmp symbol has + been relocated, in which case the IFUNC resolver would not be able to + provide the correct address. */ + +#if SHLIB_COMPAT (libpthread, GLIBC_2_0, GLIBC_2_22) + +static void __attribute__ ((noreturn, used)) +longjmp_compat (jmp_buf env, int val) +{ + /* NB: We call __libc_siglongjmp, instead of __libc_longjmp, since + __libc_longjmp is a private interface for cancellation which + doesn't restore shadow stack register. */ + __libc_siglongjmp (env, val); +} + +strong_alias (longjmp_compat, longjmp_alias) +compat_symbol (libpthread, longjmp_alias, longjmp, GLIBC_2_0); + +strong_alias (longjmp_alias, siglongjmp_alias) +compat_symbol (libpthread, siglongjmp_alias, siglongjmp, GLIBC_2_0); + +#endif diff --git a/sysdeps/x86/nptl/tls-setup.h b/sysdeps/x86/nptl/tls-setup.h new file mode 100644 index 0000000000..ef5a4df78c --- /dev/null +++ b/sysdeps/x86/nptl/tls-setup.h @@ -0,0 +1,23 @@ +/* Definitions to set up thread-local data. x86 version. + Copyright (C) 2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +static inline void __attribute__ ((always_inline)) +tls_setup_tcbhead (struct pthread *pd) +{ + pd->header.feature_1 = THREAD_GETMEM (THREAD_SELF, header.feature_1); +} diff --git a/sysdeps/x86/rtld-global-offsets.sym b/sysdeps/x86/rtld-global-offsets.sym deleted file mode 100644 index a9d53d195f..0000000000 --- a/sysdeps/x86/rtld-global-offsets.sym +++ /dev/null @@ -1,7 +0,0 @@ -#define SHARED 1 - -#include <ldsodefs.h> - -#define rtld_global_ro_offsetof(mem) offsetof (struct rtld_global_ro, mem) - -RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET rtld_global_ro_offsetof (_dl_x86_cpu_features) diff --git a/sysdeps/x86/string_private.h b/sysdeps/x86/string_private.h index e7281eb4ea..4bc45f63d8 100644 --- a/sysdeps/x86/string_private.h +++ b/sysdeps/x86/string_private.h @@ -1,5 +1,5 @@ /* Define _STRING_ARCH_unaligned. i486/x86-64 version. - Copyright (C) 2016 Free Software Foundation, Inc. + Copyright (C) 2016-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h new file mode 100644 index 0000000000..8776ad8374 --- /dev/null +++ b/sysdeps/x86/sysdep.h @@ -0,0 +1,104 @@ +/* Assembler macros for x86. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _X86_SYSDEP_H +#define _X86_SYSDEP_H 1 + +#include <sysdeps/generic/sysdep.h> + +/* __CET__ is defined by GCC with Control-Flow Protection values: + +enum cf_protection_level +{ + CF_NONE = 0, + CF_BRANCH = 1 << 0, + CF_RETURN = 1 << 1, + CF_FULL = CF_BRANCH | CF_RETURN, + CF_SET = 1 << 2 +}; +*/ + +/* Set if CF_BRANCH (IBT) is enabled. */ +#define X86_FEATURE_1_IBT (1U << 0) +/* Set if CF_RETURN (SHSTK) is enabled. */ +#define X86_FEATURE_1_SHSTK (1U << 1) + +#ifdef __CET__ +# define CET_ENABLED 1 +# define IBT_ENABLED (__CET__ & X86_FEATURE_1_IBT) +# define SHSTK_ENABLED (__CET__ & X86_FEATURE_1_SHSTK) +#else +# define CET_ENABLED 0 +# define IBT_ENABLED 0 +# define SHSTK_ENABLED 0 +#endif + +#ifdef __ASSEMBLER__ + +/* Syntactic details of assembler. */ + +#ifdef _CET_ENDBR +# define _CET_NOTRACK notrack +#else +# define _CET_ENDBR +# define _CET_NOTRACK +#endif + +/* ELF uses byte-counts for .align, most others use log2 of count of bytes. */ +#define ALIGNARG(log2) 1<<log2 +#define ASM_SIZE_DIRECTIVE(name) .size name,.-name; + +/* Define an entry point visible from C. */ +#define ENTRY(name) \ + .globl C_SYMBOL_NAME(name); \ + .type C_SYMBOL_NAME(name),@function; \ + .align ALIGNARG(4); \ + C_LABEL(name) \ + cfi_startproc; \ + _CET_ENDBR; \ + CALL_MCOUNT + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(name) + +#define ENTRY_CHK(name) ENTRY (name) +#define END_CHK(name) END (name) + +/* Since C identifiers are not normally prefixed with an underscore + on this system, the asm identifier `syscall_error' intrudes on the + C name space. Make sure we use an innocuous name. */ +#define syscall_error __syscall_error +#define mcount _mcount + +#undef PSEUDO_END +#define PSEUDO_END(name) \ + END (name) + +/* Local label name for asm code. */ +#ifndef L +/* ELF-like local names start with `.L'. */ +# define L(name) .L##name +#endif + +#define atom_text_section .section ".text.atom", "ax" + +#endif /* __ASSEMBLER__ */ + +#endif /* _X86_SYSDEP_H */ diff --git a/sysdeps/x86/tst-cet-legacy-1.c b/sysdeps/x86/tst-cet-legacy-1.c new file mode 100644 index 0000000000..861c09a26e --- /dev/null +++ b/sysdeps/x86/tst-cet-legacy-1.c @@ -0,0 +1,44 @@ +/* Check compatibility of CET-enabled executable linked with legacy + shared object. + Copyright (C) 2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdio.h> +#include <stdlib.h> + +extern int in_dso_1 (void); +extern int in_dso_2 (void); + +static int +do_test (void) +{ + if (in_dso_1 () != 0x1234678) + { + puts ("in_dso_1 () != 0x1234678"); + exit (1); + } + + if (in_dso_2 () != 0xbadbeef) + { + puts ("in_dso_2 () != 0xbadbeef"); + exit (1); + } + + return 0; +} + +#include <support/test-driver.c> diff --git a/sysdeps/x86/tst-cet-legacy-2.c b/sysdeps/x86/tst-cet-legacy-2.c new file mode 100644 index 0000000000..e039a16797 --- /dev/null +++ b/sysdeps/x86/tst-cet-legacy-2.c @@ -0,0 +1,64 @@ +/* Check compatibility of CET-enabled executable with dlopened legacy + shared object. + Copyright (C) 2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <dlfcn.h> +#include <stdio.h> +#include <stdlib.h> + +extern int in_dso_2 (void); + +static int +do_test (void) +{ + static const char modname[] = "tst-cet-legacy-mod-1.so"; + int (*fp) (void); + void *h; + + h = dlopen (modname, RTLD_LAZY); + if (h == NULL) + { + printf ("cannot open '%s': %s\n", modname, dlerror ()); + exit (1); + } + + fp = dlsym (h, "in_dso_1"); + if (fp == NULL) + { + printf ("cannot get symbol 'in_dso': %s\n", dlerror ()); + exit (1); + } + + if (fp () != 0x1234678) + { + puts ("in_dso_1 () != 0x1234678"); + exit (1); + } + + if (in_dso_2 () != 0xbadbeef) + { + puts ("in_dso_2 () != 0xbadbeef"); + exit (1); + } + + dlclose (h); + + return 0; +} + +#include <support/test-driver.c> diff --git a/sysdeps/x86/tst-cet-legacy-2a.c b/sysdeps/x86/tst-cet-legacy-2a.c new file mode 100644 index 0000000000..d5aead4303 --- /dev/null +++ b/sysdeps/x86/tst-cet-legacy-2a.c @@ -0,0 +1 @@ +#include "tst-cet-legacy-2.c" diff --git a/sysdeps/x86/tst-cet-legacy-3.c b/sysdeps/x86/tst-cet-legacy-3.c new file mode 100644 index 0000000000..bab9faa8b0 --- /dev/null +++ b/sysdeps/x86/tst-cet-legacy-3.c @@ -0,0 +1,37 @@ +/* Check compatibility of CET-enabled executable with dlopened legacy + shared object. + Copyright (C) 2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdio.h> + +int +test (void) +{ + printf ("PASS\n"); + return 0; +} + +#ifndef TEST_MODULE +static int +do_test (void) +{ + return test (); +} + +#include <support/test-driver.c> +#endif diff --git a/sysdeps/x86/tst-cet-legacy-4.c b/sysdeps/x86/tst-cet-legacy-4.c new file mode 100644 index 0000000000..3ead63dd24 --- /dev/null +++ b/sysdeps/x86/tst-cet-legacy-4.c @@ -0,0 +1,56 @@ +/* Check compatibility of CET-enabled executable with dlopened legacy + shared object. + Copyright (C) 2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <dlfcn.h> +#include <stdio.h> +#include <stdlib.h> + +static int +do_test (void) +{ + static const char modname[] = "tst-cet-legacy-mod-4.so"; + int (*fp) (void); + void *h; + + h = dlopen (modname, RTLD_LAZY); + if (h == NULL) + { + printf ("cannot open '%s': %s\n", modname, dlerror ()); + exit (1); + } + + fp = dlsym (h, "test"); + if (fp == NULL) + { + printf ("cannot get symbol 'test': %s\n", dlerror ()); + exit (1); + } + + if (fp () != 0) + { + puts ("test () != 0"); + exit (1); + } + + dlclose (h); + + return 0; +} + +#include <support/test-driver.c> diff --git a/sysdeps/x86/tst-cet-legacy-4a.c b/sysdeps/x86/tst-cet-legacy-4a.c new file mode 100644 index 0000000000..b9bb18c36b --- /dev/null +++ b/sysdeps/x86/tst-cet-legacy-4a.c @@ -0,0 +1 @@ +#include "tst-cet-legacy-4.c" diff --git a/sysdeps/x86/tst-cet-legacy-4b.c b/sysdeps/x86/tst-cet-legacy-4b.c new file mode 100644 index 0000000000..b9bb18c36b --- /dev/null +++ b/sysdeps/x86/tst-cet-legacy-4b.c @@ -0,0 +1 @@ +#include "tst-cet-legacy-4.c" diff --git a/sysdeps/x86/tst-cet-legacy-4c.c b/sysdeps/x86/tst-cet-legacy-4c.c new file mode 100644 index 0000000000..b9bb18c36b --- /dev/null +++ b/sysdeps/x86/tst-cet-legacy-4c.c @@ -0,0 +1 @@ +#include "tst-cet-legacy-4.c" diff --git a/sysdeps/x86/tst-cet-legacy-mod-1.c b/sysdeps/x86/tst-cet-legacy-mod-1.c new file mode 100644 index 0000000000..09762bce13 --- /dev/null +++ b/sysdeps/x86/tst-cet-legacy-mod-1.c @@ -0,0 +1,24 @@ +/* Check compatibility of CET-enabled executable with legacy shared + object. + Copyright (C) 2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +int +in_dso_1 (void) +{ + return 0x1234678; +} diff --git a/sysdeps/x86/tst-cet-legacy-mod-2.c b/sysdeps/x86/tst-cet-legacy-mod-2.c new file mode 100644 index 0000000000..1c8de443f6 --- /dev/null +++ b/sysdeps/x86/tst-cet-legacy-mod-2.c @@ -0,0 +1,24 @@ +/* Check compatibility of CET-enabled executable with legacy shared + object. + Copyright (C) 2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +int +in_dso_2 (void) +{ + return 0xbadbeef; +} diff --git a/sysdeps/x86/tst-cet-legacy-mod-4.c b/sysdeps/x86/tst-cet-legacy-mod-4.c new file mode 100644 index 0000000000..a93c2fe4a7 --- /dev/null +++ b/sysdeps/x86/tst-cet-legacy-mod-4.c @@ -0,0 +1,2 @@ +#define TEST_MODULE +#include "tst-cet-legacy-3.c" diff --git a/sysdeps/x86/tst-get-cpu-features.c b/sysdeps/x86/tst-get-cpu-features.c index da20063a00..b2fac197da 100644 --- a/sysdeps/x86/tst-get-cpu-features.c +++ b/sysdeps/x86/tst-get-cpu-features.c @@ -1,5 +1,5 @@ /* Test case for x86 __get_cpu_features interface - Copyright (C) 2015-2016 Free Software Foundation, Inc. + Copyright (C) 2015-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or |