From c86ed71d633c22d6f638576f7660c52a5f783d66 Mon Sep 17 00:00:00 2001
From: Joseph Myers <joseph@codesourcery.com>
Date: Mon, 26 Jun 2017 22:01:27 +0000
Subject: Add float128 support for x86_64, x86.

This patch enables float128 support for x86_64 and x86.  All GCC
versions that can build glibc provide the required support, but since
GCC 6 and before don't provide __builtin_nanq / __builtin_nansq, sNaN
tests and some tests of NaN payloads need to be disabled with such
compilers (this does not affect the generated glibc binaries at all,
just the tests).  bits/floatn.h declares float128 support to be
available for GCC versions that provide the required libgcc support
(4.3 for x86_64, 4.4 for i386 GNU/Linux, 4.5 for i386 GNU/Hurd);
compilation-only support was present some time before then, but not
really useful without the libgcc functions.

fenv_private.h needed updating to avoid trying to put _Float128 values
in registers.  I make no assertion of optimality of the
math_opt_barrier / math_force_eval definitions for this case; they are
simply intended to be sufficient to work correctly.

Tested for x86_64 and x86, with GCC 7 and GCC 6.  (Testing for x32 was
compilation tests only with build-many-glibcs.py to verify the ABI
baseline updates.  I have not done any testing for Hurd, although the
float128 support is enabled there as for GNU/Linux.)

	* sysdeps/i386/Implies: Add ieee754/float128.
	* sysdeps/x86_64/Implies: Likewise.
	* sysdeps/x86/bits/floatn.h: New file.
	* sysdeps/x86/float128-abi.h: Likewise.
	* manual/math.texi (Mathematics): Document support for _Float128
	on x86_64 and x86.
	* sysdeps/i386/fpu/fenv_private.h: Include <bits/floatn.h>.
	(math_opt_barrier): Do not put _Float128 values in floating-point
	registers.
	(math_force_eval): Likewise.
	[__x86_64__] (SET_RESTORE_ROUNDF128): New macro.
	* sysdeps/x86/fpu/Makefile [$(subdir) = math] (CPPFLAGS): Append
	to Makefile variable.
	* sysdeps/x86/fpu/e_sqrtf128.c: New file.
	* sysdeps/x86/fpu/sfp-machine.h: Likewise.  Based on libgcc.
	* sysdeps/x86/math-tests.h: New file.
	* math/libm-test-support.h (XFAIL_FLOAT128_PAYLOAD): New macro.
	* math/libm-test-getpayload.inc (getpayload_test_data): Use
	XFAIL_FLOAT128_PAYLOAD.
	* math/libm-test-setpayload.inc (setpayload_test_data): Likewise.
	* math/libm-test-totalorder.inc (totalorder_test_data): Likewise.
	* math/libm-test-totalordermag.inc (totalordermag_test_data):
	Likewise.
	* sysdeps/unix/sysv/linux/i386/libc.abilist: Update.
	* sysdeps/unix/sysv/linux/i386/libm.abilist: Likewise.
	* sysdeps/unix/sysv/linux/x86_64/64/libc.abilist: Likewise.
	* sysdeps/unix/sysv/linux/x86_64/64/libm.abilist: Likewise.
	* sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist: Likewise.
	* sysdeps/unix/sysv/linux/x86_64/x32/libm.abilist: Likewise.
	* sysdeps/i386/fpu/libm-test-ulps: Likewise.
	* sysdeps/i386/i686/fpu/multiarch/libm-test-ulps: Likewise.
	* sysdeps/x86_64/fpu/libm-test-ulps: Likewise.
---
 sysdeps/i386/fpu/fenv_private.h | 61 ++++++++++++++++++++++++++---------------
 1 file changed, 39 insertions(+), 22 deletions(-)

(limited to 'sysdeps/i386/fpu/fenv_private.h')

diff --git a/sysdeps/i386/fpu/fenv_private.h b/sysdeps/i386/fpu/fenv_private.h
index e20e1f1662..38fd0b92b5 100644
--- a/sysdeps/i386/fpu/fenv_private.h
+++ b/sysdeps/i386/fpu/fenv_private.h
@@ -1,36 +1,46 @@
 #ifndef FENV_PRIVATE_H
 #define FENV_PRIVATE_H 1
 
+#include <bits/floatn.h>
 #include <fenv.h>
 #include <fpu_control.h>
 
 #ifdef __SSE2_MATH__
-# define math_opt_barrier(x) \
-  ({ __typeof(x) __x;					\
-     if (sizeof (x) <= sizeof (double))			\
-       __asm ("" : "=x" (__x) : "0" (x));		\
-     else						\
-       __asm ("" : "=t" (__x) : "0" (x));		\
+# define math_opt_barrier(x)						\
+  ({ __typeof(x) __x;							\
+     if (sizeof (x) <= sizeof (double)					\
+	|| __builtin_types_compatible_p (__typeof (x), _Float128))	\
+       __asm ("" : "=x" (__x) : "0" (x));				\
+     else								\
+       __asm ("" : "=t" (__x) : "0" (x));				\
      __x; })
-# define math_force_eval(x) \
-  do {							\
-    if (sizeof (x) <= sizeof (double))			\
-      __asm __volatile ("" : : "x" (x));		\
-    else						\
-      __asm __volatile ("" : : "f" (x));		\
+# define math_force_eval(x)						\
+  do {									\
+    if (sizeof (x) <= sizeof (double)					\
+	|| __builtin_types_compatible_p (__typeof (x), _Float128))	\
+      __asm __volatile ("" : : "x" (x));				\
+    else								\
+      __asm __volatile ("" : : "f" (x));				\
   } while (0)
 #else
-# define math_opt_barrier(x) \
-  ({ __typeof (x) __x;					\
-     __asm ("" : "=t" (__x) : "0" (x));			\
+# define math_opt_barrier(x)						\
+  ({ __typeof (x) __x;							\
+     if (__builtin_types_compatible_p (__typeof (x), _Float128))	\
+       {								\
+	 __x = (x);							\
+	 __asm ("" : "+m" (__x));					\
+       }								\
+     else								\
+       __asm ("" : "=t" (__x) : "0" (x));				\
      __x; })
-# define math_force_eval(x) \
-  do {							\
-    __typeof (x) __x = (x);				\
-    if (sizeof (x) <= sizeof (double))			\
-      __asm __volatile ("" : : "m" (__x));		\
-    else						\
-      __asm __volatile ("" : : "f" (__x));		\
+# define math_force_eval(x)						\
+  do {									\
+    __typeof (x) __x = (x);						\
+    if (sizeof (x) <= sizeof (double)					\
+	|| __builtin_types_compatible_p (__typeof (x), _Float128))	\
+      __asm __volatile ("" : : "m" (__x));				\
+    else								\
+      __asm __volatile ("" : : "f" (__x));				\
   } while (0)
 #endif
 
@@ -322,6 +332,13 @@ libc_feresetround_387 (fenv_t *e)
 # define libc_feholdsetround_53bit	libc_feholdsetround_387_53bit
 #endif
 
+#ifdef __x86_64__
+/* The SSE rounding mode is used by soft-fp (libgcc and glibc) on
+   x86_64, so that must be set for float128 computations.  */
+# define SET_RESTORE_ROUNDF128(RM) \
+  SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_sse, libc_feresetround_sse)
+#endif
+
 /* We have support for rounding mode context.  */
 #define HAVE_RM_CTX 1
 
-- 
cgit v1.2.3


From 63716ab270df6846233e82c8b7ef494be90b2c45 Mon Sep 17 00:00:00 2001
From: Joseph Myers <joseph@codesourcery.com>
Date: Fri, 9 Feb 2018 21:18:52 +0000
Subject: Add build infrastructure for narrowing libm functions.

TS 18661-1 defines libm functions that carry out an operation (+ - * /
sqrt fma) on their arguments and return a result rounded to a
(usually) narrower type, as if the original result were computed to
infinite precision and then rounded directly to the result type
without any intermediate rounding to the argument type.  For example,
fadd, faddl and daddl for addition.  These are the last remaining TS
18661-1 functions left to be added to glibc.  TS 18661-3 extends this
to corresponding functions for _FloatN and _FloatNx types.

As functions parametrized by two rather than one varying
floating-point types, these functions require infrastructure in glibc
that was not required for previous libm functions.  This patch
provides such infrastructure - excluding test support, and actual
function implementations, which will be in subsequent patches.

Declaring the functions uses a header bits/mathcalls-narrow.h, which
is included many times, for each relevant pair of types.  This will
end up containing macro calls of the form

__MATHCALL_NARROW (__MATHCALL_NAME (add), __MATHCALL_REDIR_NAME (add), 2);

for each family of narrowing functions.  (The structure of this macro
call, with the calls to __MATHCALL_NAME and __MATHCALL_REDIR_NAME
there rather than in the definition of __MATHCALL_NARROW, arises from
the names such as "add" *not* themselves being reserved identifiers -
meaning it's necessary to avoid any indirection that would result in a
user-defined "add" macro being expanded.)  Whereas for existing
functions declaring long double functions is disabled if _LIBC in the
case where they alias double functions, to facilitate defining the
long double functions as aliases of the double ones, there is no such
logic for the narrowing functions in this patch.  Rather, the files
defining such functions are expected to use #define to hide the
original declarations of the alias names, to avoid errors about
defining aliases with incompatible types.

math/Makefile support is added for building the functions (listed in
libm-narrow-fns, currently empty) for all relevant pairs of types.  An
internal header math-narrow.h is added for macros shared between
multiple function implementations - currently a ROUND_TO_ODD macro to
facilitate writing functions using the round-to-odd implementation
approach, and alias macros to create all the required function
aliases.  libc_feholdexcept_setroundf128 and libc_feupdateenv_testf128
are added for use when required (only for x86_64).  float128_private.h
support is added for ldbl-128 narrowing functions to be used for
_Float128.

Certain things are specifically omitted from this patch and the
immediate followups.  tgmath.h support is deferred; there remain
unresolved questions about how the type-generic macros for these
functions are supposed to work, especially in the case of arguments of
integer type.  The math.h / bits/mathcalls-narrow.h logic, and the
logic for determining what functions / aliases to define, will need
some adjustments to support the sqrt and fma functions, where
e.g. f32xsqrtf64 can just be an alias for sqrt rather than a separate
function.  TS 18661-1 defines FP_FAST_* macros but no support is
included for defining them (they won't in general be true without
architecture-specific optimized function versions).

For each of the function groups (add sub mul div sqrt fma) there are
always six functions present (e.g. fadd, faddl, daddl, f32addf64,
f32addf32x, f32xaddf64).  When _Float64x and _Float128 are supported,
there are seven more (e.g. f32addf64x, f32addf128, f64addf64x,
f64addf128, f32xaddf64x, f32xaddf128, f64xaddf128).  In addition, in
the ldbl-opt case there are function names such as __nldbl_daddl (an
alias for f32xaddf64, which is not a reserved name in TS 18661-1, only
in TS 18661-3), for calls to daddl to be mapped to in the
-mlong-double-64 case.  (Calls to faddl just get mapped to fadd, and
for sqrt and fma there won't be __nldbl_* functions because dsqrtl and
dfmal can just be mapped to sqrt and fma with -mlong-double-64.)

While there are six or thirteen functions present in each group (plus
__nldbl_* names only as an ABI, not an API), not all are distinct;
they fall in various groups of aliases.  There are two distinct
versions built if long double has the same format as double; four if
they have distinct formats but there is no _Float64x or _Float128
support; five if long double has binary128 format; seven when
_Float128 is distinct from long double.

Architecture-specific optimized versions are possible, but not
included in my patches.  For example, IA64 generally supports
narrowing the result of most floating-point instructions; Power ISA
2.07 (POWER8) supports double values as arguments to float
instructions, with the results narrowed as expected; Power ISA 3
(POWER9) supports round-to-odd for float128 instructions, so meaning
that approach can be used without needing to set and restore the
rounding mode and test "inexact".  I intend to leave any such
optimized versions to the architecture maintainers.  Generally in such
cases it would also make sense for calls to these functions to be
expanded inline (given -fno-math-errno); I put a suggestion for TS
18661-1 built-in functions at <https://gcc.gnu.org/wiki/SummerOfCode>.

Tested for x86_64 (this patch in isolation, as well as testing for
various configurations in conjunction with further patches).

	* math/bits/mathcalls-narrow.h: New file.
	* include/bits/mathcalls-narrow.h: Likewise.
	* math/math-narrow.h: Likewise.
	* math/math.h (__MATHCALL_NARROW_ARGS_1): New macro.
	(__MATHCALL_NARROW_ARGS_2): Likewise.
	(__MATHCALL_NARROW_ARGS_3): Likewise.
	(__MATHCALL_NARROW_NORMAL): Likewise.
	(__MATHCALL_NARROW_REDIR): Likewise.
	(__MATHCALL_NARROW): Likewise.
	[__GLIBC_USE (IEC_60559_BFP_EXT)]: Repeatedly include
	<bits/mathcalls-narrow.h> with _Mret_, _Marg_ and __MATHCALL_NAME
	defined.
	[__GLIBC_USE (IEC_60559_TYPES_EXT)]: Likewise.
	* math/Makefile (headers): Add bits/mathcalls-narrow.h.
	(libm-narrow-fns): New variable.
	(libm-narrow-types-basic): Likewise.
	(libm-narrow-types-ldouble-yes): Likewise.
	(libm-narrow-types-float128-yes): Likewise.
	(libm-narrow-types-float128-alias-yes): Likewise.
	(libm-narrow-types): Likewise.
	(libm-routines): Add narrowing functions.
	* sysdeps/i386/fpu/fenv_private.h [__x86_64__]
	(libc_feholdexcept_setroundf128): New macro.
	[__x86_64__] (libc_feupdateenv_testf128): Likewise.
	* sysdeps/ieee754/float128/float128_private.h: Include
	<math/math-narrow.h>.
	[libc_feholdexcept_setroundf128] (libc_feholdexcept_setroundl):
	Undefine and redefine.
	[libc_feupdateenv_testf128] (libc_feupdateenv_testl): Likewise.
	(libm_alias_float_ldouble): Undefine and redefine.
	(libm_alias_double_ldouble): Likewise.
---
 ChangeLog                                   |  32 ++++
 include/bits/mathcalls-narrow.h             |   1 +
 math/Makefile                               |  14 +-
 math/bits/mathcalls-narrow.h                |  21 ++
 math/math-narrow.h                          | 146 ++++++++++++++
 math/math.h                                 | 284 ++++++++++++++++++++++++++++
 sysdeps/i386/fpu/fenv_private.h             |   2 +
 sysdeps/ieee754/float128/float128_private.h |  17 ++
 8 files changed, 516 insertions(+), 1 deletion(-)
 create mode 100644 include/bits/mathcalls-narrow.h
 create mode 100644 math/bits/mathcalls-narrow.h
 create mode 100644 math/math-narrow.h

(limited to 'sysdeps/i386/fpu/fenv_private.h')

diff --git a/ChangeLog b/ChangeLog
index ff718e73a2..7aeb699cc3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,37 @@
 2018-02-09  Joseph Myers  <joseph@codesourcery.com>
 
+	* math/bits/mathcalls-narrow.h: New file.
+	* include/bits/mathcalls-narrow.h: Likewise.
+	* math/math-narrow.h: Likewise.
+	* math/math.h (__MATHCALL_NARROW_ARGS_1): New macro.
+	(__MATHCALL_NARROW_ARGS_2): Likewise.
+	(__MATHCALL_NARROW_ARGS_3): Likewise.
+	(__MATHCALL_NARROW_NORMAL): Likewise.
+	(__MATHCALL_NARROW_REDIR): Likewise.
+	(__MATHCALL_NARROW): Likewise.
+	[__GLIBC_USE (IEC_60559_BFP_EXT)]: Repeatedly include
+	<bits/mathcalls-narrow.h> with _Mret_, _Marg_ and __MATHCALL_NAME
+	defined.
+	[__GLIBC_USE (IEC_60559_TYPES_EXT)]: Likewise.
+	* math/Makefile (headers): Add bits/mathcalls-narrow.h.
+	(libm-narrow-fns): New variable.
+	(libm-narrow-types-basic): Likewise.
+	(libm-narrow-types-ldouble-yes): Likewise.
+	(libm-narrow-types-float128-yes): Likewise.
+	(libm-narrow-types-float128-alias-yes): Likewise.
+	(libm-narrow-types): Likewise.
+	(libm-routines): Add narrowing functions.
+	* sysdeps/i386/fpu/fenv_private.h [__x86_64__]
+	(libc_feholdexcept_setroundf128): New macro.
+	[__x86_64__] (libc_feupdateenv_testf128): Likewise.
+	* sysdeps/ieee754/float128/float128_private.h: Include
+	<math/math-narrow.h>.
+	[libc_feholdexcept_setroundf128] (libc_feholdexcept_setroundl):
+	Undefine and redefine.
+	[libc_feupdateenv_testf128] (libc_feupdateenv_testl): Likewise.
+	(libm_alias_float_ldouble): Undefine and redefine.
+	(libm_alias_double_ldouble): Likewise.
+
 	* math/Makefile [$(PERL) != no] (libm-test-incs): Remove variable.
 
 2018-02-09  Wilco Dijkstra  <wdijkstr@arm.com>
diff --git a/include/bits/mathcalls-narrow.h b/include/bits/mathcalls-narrow.h
new file mode 100644
index 0000000000..0c66ecf889
--- /dev/null
+++ b/include/bits/mathcalls-narrow.h
@@ -0,0 +1 @@
+#include <math/bits/mathcalls-narrow.h>
diff --git a/math/Makefile b/math/Makefile
index 5538aa2207..ee0cd6fce1 100644
--- a/math/Makefile
+++ b/math/Makefile
@@ -29,7 +29,7 @@ headers		:= math.h bits/mathcalls.h bits/mathinline.h \
 		   bits/libm-simd-decl-stubs.h bits/iscanonical.h \
 		   bits/flt-eval-method.h bits/fp-fast.h bits/fp-logb.h \
 		   bits/long-double.h bits/mathcalls-helper-functions.h \
-		   bits/floatn.h bits/floatn-common.h
+		   bits/floatn.h bits/floatn-common.h bits/mathcalls-narrow.h
 
 # FPU support code.
 aux		:= setfpucw fpu_control
@@ -89,6 +89,16 @@ libm-compat-calls =							\
 	w_lgammaF_r_compat w_lgammaF_compat2 w_expF_compat		\
 	w_lgamma_compatF k_standardF
 
+libm-narrow-fns =
+libm-narrow-types-basic = s_fF s_f32xFf64
+libm-narrow-types-ldouble-yes = s_fFl s_dFl
+libm-narrow-types-float128-yes = s_f32Ff128 s_f64Ff128 s_f64xFf128
+libm-narrow-types-float128-alias-yes = s_f64xFf128
+libm-narrow-types = $(libm-narrow-types-basic) \
+		    $(libm-narrow-types-ldouble-$(long-double-fcts)) \
+		    $(libm-narrow-types-float128-$(float128-fcts)) \
+		    $(libm-narrow-types-float128-alias-$(float128-alias-fcts))
+
 # Type specific routine support.
 #
 # The following three variables control what is included for each type:
@@ -148,6 +158,8 @@ libm-routines = $(strip $(libm-support)					\
 			       $(libm-compat-calls))			\
 			$(call type-foreach, $(libm-calls))		\
 			$(foreach t, $(types), $(type-$(t)-routines))) 	\
+			$(foreach f,$(libm-narrow-fns),			\
+				    $(subst F,$(f),$(libm-narrow-types)))
 
 # These functions are in libc instead of libm because __printf_fp
 # calls them, so any program using printf will need them linked in,
diff --git a/math/bits/mathcalls-narrow.h b/math/bits/mathcalls-narrow.h
new file mode 100644
index 0000000000..0f1f0510a0
--- /dev/null
+++ b/math/bits/mathcalls-narrow.h
@@ -0,0 +1,21 @@
+/* Declare functions returning a narrower type.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _MATH_H
+# error "Never include <bits/mathcalls-narrow.h> directly; include <math.h> instead."
+#endif
diff --git a/math/math-narrow.h b/math/math-narrow.h
new file mode 100644
index 0000000000..1a3a5c57d7
--- /dev/null
+++ b/math/math-narrow.h
@@ -0,0 +1,146 @@
+/* Helper macros for functions returning a narrower type.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef	_MATH_NARROW_H
+#define	_MATH_NARROW_H	1
+
+#include <bits/floatn.h>
+#include <bits/long-double.h>
+#include <errno.h>
+#include <fenv.h>
+#include <ieee754.h>
+#include <math_private.h>
+
+/* Carry out a computation using round-to-odd.  The computation is
+   EXPR; the union type in which to store the result is UNION and the
+   subfield of the "ieee" field of that union with the low part of the
+   mantissa is MANTISSA; SUFFIX is the suffix for the libc_fe* macros
+   to ensure that the correct rounding mode is used, for platforms
+   with multiple rounding modes where those macros set only the
+   relevant mode.  This macro does not work correctly if the sign of
+   an exact zero result depends on the rounding mode, so that case
+   must be checked for separately.  */
+#define ROUND_TO_ODD(EXPR, UNION, SUFFIX, MANTISSA)			\
+  ({									\
+    fenv_t env;								\
+    UNION u;								\
+									\
+    libc_feholdexcept_setround ## SUFFIX (&env, FE_TOWARDZERO);		\
+    u.d = (EXPR);							\
+    math_force_eval (u.d);						\
+    u.ieee.MANTISSA							\
+      |= libc_feupdateenv_test ## SUFFIX (&env, FE_INEXACT) != 0;	\
+									\
+    u.d;								\
+  })
+
+/* The following macros declare aliases for a narrowing function.  The
+   sole argument is the base name of a family of functions, such as
+   "add".  If any platform changes long double format after the
+   introduction of narrowing functions, in a way requiring symbol
+   versioning compatibility, additional variants of these macros will
+   be needed.  */
+
+#define libm_alias_float_double_main(func)	\
+  weak_alias (__f ## func, f ## func)		\
+  weak_alias (__f ## func, f32 ## func ## f64)	\
+  weak_alias (__f ## func, f32 ## func ## f32x)
+
+#ifdef NO_LONG_DOUBLE
+# define libm_alias_float_double(func)		\
+  libm_alias_float_double_main (func)		\
+  weak_alias (__f ## func, f ## func ## l)
+#else
+# define libm_alias_float_double(func)		\
+  libm_alias_float_double_main (func)
+#endif
+
+#define libm_alias_float32x_float64_main(func)			\
+  weak_alias (__f32x ## func ## f64, f32x ## func ## f64)
+
+#ifdef NO_LONG_DOUBLE
+# define libm_alias_float32x_float64(func)		\
+  libm_alias_float32x_float64_main (func)		\
+  weak_alias (__f32x ## func ## f64, d ## func ## l)
+#elif defined __LONG_DOUBLE_MATH_OPTIONAL
+# define libm_alias_float32x_float64(func)			\
+  libm_alias_float32x_float64_main (func)			\
+  weak_alias (__f32x ## func ## f64, __nldbl_d ## func ## l)
+#else
+# define libm_alias_float32x_float64(func)	\
+  libm_alias_float32x_float64_main (func)
+#endif
+
+#if __HAVE_FLOAT128 && !__HAVE_DISTINCT_FLOAT128
+# define libm_alias_float_ldouble_f128(func)		\
+  weak_alias (__f ## func ## l, f32 ## func ## f128)
+# define libm_alias_double_ldouble_f128(func)		\
+  weak_alias (__d ## func ## l, f32x ## func ## f128)	\
+  weak_alias (__d ## func ## l, f64 ## func ## f128)
+#else
+# define libm_alias_float_ldouble_f128(func)
+# define libm_alias_double_ldouble_f128(func)
+#endif
+
+#if __HAVE_FLOAT64X_LONG_DOUBLE
+# define libm_alias_float_ldouble_f64x(func)		\
+  weak_alias (__f ## func ## l, f32 ## func ## f64x)
+# define libm_alias_double_ldouble_f64x(func)		\
+  weak_alias (__d ## func ## l, f32x ## func ## f64x)	\
+  weak_alias (__d ## func ## l, f64 ## func ## f64x)
+#else
+# define libm_alias_float_ldouble_f64x(func)
+# define libm_alias_double_ldouble_f64x(func)
+#endif
+
+#define libm_alias_float_ldouble(func)		\
+  weak_alias (__f ## func ## l, f ## func ## l) \
+  libm_alias_float_ldouble_f128 (func)		\
+  libm_alias_float_ldouble_f64x (func)
+
+#define libm_alias_double_ldouble(func)		\
+  weak_alias (__d ## func ## l, d ## func ## l) \
+  libm_alias_double_ldouble_f128 (func)		\
+  libm_alias_double_ldouble_f64x (func)
+
+#define libm_alias_float64x_float128(func)			\
+  weak_alias (__f64x ## func ## f128, f64x ## func ## f128)
+
+#define libm_alias_float32_float128_main(func)			\
+  weak_alias (__f32 ## func ## f128, f32 ## func ## f128)
+
+#define libm_alias_float64_float128_main(func)			\
+  weak_alias (__f64 ## func ## f128, f64 ## func ## f128)	\
+  weak_alias (__f64 ## func ## f128, f32x ## func ## f128)
+
+#if __HAVE_FLOAT64X_LONG_DOUBLE
+# define libm_alias_float32_float128(func)	\
+  libm_alias_float32_float128_main (func)
+# define libm_alias_float64_float128(func)	\
+  libm_alias_float64_float128_main (func)
+#else
+# define libm_alias_float32_float128(func)			\
+  libm_alias_float32_float128_main (func)			\
+  weak_alias (__f32 ## func ## f128, f32 ## func ## f64x)
+# define libm_alias_float64_float128(func)			\
+  libm_alias_float64_float128_main (func)			\
+  weak_alias (__f64 ## func ## f128, f64 ## func ## f64x)	\
+  weak_alias (__f64 ## func ## f128, f32x ## func ## f64x)
+#endif
+
+#endif /* math-narrow.h.  */
diff --git a/math/math.h b/math/math.h
index 3c515f817f..2e2696854b 100644
--- a/math/math.h
+++ b/math/math.h
@@ -483,6 +483,290 @@ extern long double __REDIRECT_NTH (nexttowardl,
 #undef	__MATHDECL
 #undef	__MATHCALL
 
+/* Declare functions returning a narrower type.  */
+#define __MATHCALL_NARROW_ARGS_1 (_Marg_ __x)
+#define __MATHCALL_NARROW_ARGS_2 (_Marg_ __x, _Marg_ __y)
+#define __MATHCALL_NARROW_ARGS_3 (_Marg_ __x, _Marg_ __y, _Marg_ __z)
+#define __MATHCALL_NARROW_NORMAL(func, nargs)			\
+  extern _Mret_ func __MATHCALL_NARROW_ARGS_ ## nargs __THROW
+#define __MATHCALL_NARROW_REDIR(func, redir, nargs)			\
+  extern _Mret_ __REDIRECT_NTH (func, __MATHCALL_NARROW_ARGS_ ## nargs, \
+				redir)
+#define __MATHCALL_NARROW(func, redir, nargs)	\
+  __MATHCALL_NARROW_NORMAL (func, nargs)
+
+#if __GLIBC_USE (IEC_60559_BFP_EXT)
+
+# define _Mret_ float
+# define _Marg_ double
+# define __MATHCALL_NAME(name) f ## name
+# include <bits/mathcalls-narrow.h>
+# undef _Mret_
+# undef _Marg_
+# undef __MATHCALL_NAME
+
+# define _Mret_ float
+# define _Marg_ long double
+# define __MATHCALL_NAME(name) f ## name ## l
+# ifdef __LDBL_COMPAT
+#  define __MATHCALL_REDIR_NAME(name) f ## name
+#  undef __MATHCALL_NARROW
+#  define __MATHCALL_NARROW(func, redir, nargs) \
+  __MATHCALL_NARROW_REDIR (func, redir, nargs)
+# endif
+# include <bits/mathcalls-narrow.h>
+# undef _Mret_
+# undef _Marg_
+# undef __MATHCALL_NAME
+# ifdef __LDBL_COMPAT
+#  undef __MATHCALL_REDIR_NAME
+#  undef __MATHCALL_NARROW
+#  define __MATHCALL_NARROW(func, redir, nargs) \
+  __MATHCALL_NARROW_NORMAL (func, nargs)
+# endif
+
+# define _Mret_ double
+# define _Marg_ long double
+# define __MATHCALL_NAME(name) d ## name ## l
+# ifdef __LDBL_COMPAT
+#  define __MATHCALL_REDIR_NAME(name) __nldbl_d ## name ## l
+#  undef __MATHCALL_NARROW
+#  define __MATHCALL_NARROW(func, redir, nargs) \
+  __MATHCALL_NARROW_REDIR (func, redir, nargs)
+# endif
+# include <bits/mathcalls-narrow.h>
+# undef _Mret_
+# undef _Marg_
+# undef __MATHCALL_NAME
+# ifdef __LDBL_COMPAT
+#  undef __MATHCALL_REDIR_NAME
+#  undef __MATHCALL_NARROW
+#  define __MATHCALL_NARROW(func, redir, nargs) \
+  __MATHCALL_NARROW_NORMAL (func, nargs)
+# endif
+
+#endif
+
+#if __GLIBC_USE (IEC_60559_TYPES_EXT)
+
+# if __HAVE_FLOAT16 && __HAVE_FLOAT32
+#  define _Mret_ _Float16
+#  define _Marg_ _Float32
+#  define __MATHCALL_NAME(name) f16 ## name ## f32
+#  include <bits/mathcalls-narrow.h>
+#  undef _Mret_
+#  undef _Marg_
+#  undef __MATHCALL_NAME
+# endif
+
+# if __HAVE_FLOAT16 && __HAVE_FLOAT32X
+#  define _Mret_ _Float16
+#  define _Marg_ _Float32x
+#  define __MATHCALL_NAME(name) f16 ## name ## f32x
+#  include <bits/mathcalls-narrow.h>
+#  undef _Mret_
+#  undef _Marg_
+#  undef __MATHCALL_NAME
+# endif
+
+# if __HAVE_FLOAT16 && __HAVE_FLOAT64
+#  define _Mret_ _Float16
+#  define _Marg_ _Float64
+#  define __MATHCALL_NAME(name) f16 ## name ## f64
+#  include <bits/mathcalls-narrow.h>
+#  undef _Mret_
+#  undef _Marg_
+#  undef __MATHCALL_NAME
+# endif
+
+# if __HAVE_FLOAT16 && __HAVE_FLOAT64X
+#  define _Mret_ _Float16
+#  define _Marg_ _Float64x
+#  define __MATHCALL_NAME(name) f16 ## name ## f64x
+#  include <bits/mathcalls-narrow.h>
+#  undef _Mret_
+#  undef _Marg_
+#  undef __MATHCALL_NAME
+# endif
+
+# if __HAVE_FLOAT16 && __HAVE_FLOAT128
+#  define _Mret_ _Float16
+#  define _Marg_ _Float128
+#  define __MATHCALL_NAME(name) f16 ## name ## f128
+#  include <bits/mathcalls-narrow.h>
+#  undef _Mret_
+#  undef _Marg_
+#  undef __MATHCALL_NAME
+# endif
+
+# if __HAVE_FLOAT16 && __HAVE_FLOAT128X
+#  define _Mret_ _Float16
+#  define _Marg_ _Float128x
+#  define __MATHCALL_NAME(name) f16 ## name ## f128x
+#  include <bits/mathcalls-narrow.h>
+#  undef _Mret_
+#  undef _Marg_
+#  undef __MATHCALL_NAME
+# endif
+
+# if __HAVE_FLOAT32 && __HAVE_FLOAT32X
+#  define _Mret_ _Float32
+#  define _Marg_ _Float32x
+#  define __MATHCALL_NAME(name) f32 ## name ## f32x
+#  include <bits/mathcalls-narrow.h>
+#  undef _Mret_
+#  undef _Marg_
+#  undef __MATHCALL_NAME
+# endif
+
+# if __HAVE_FLOAT32 && __HAVE_FLOAT64
+#  define _Mret_ _Float32
+#  define _Marg_ _Float64
+#  define __MATHCALL_NAME(name) f32 ## name ## f64
+#  include <bits/mathcalls-narrow.h>
+#  undef _Mret_
+#  undef _Marg_
+#  undef __MATHCALL_NAME
+# endif
+
+# if __HAVE_FLOAT32 && __HAVE_FLOAT64X
+#  define _Mret_ _Float32
+#  define _Marg_ _Float64x
+#  define __MATHCALL_NAME(name) f32 ## name ## f64x
+#  include <bits/mathcalls-narrow.h>
+#  undef _Mret_
+#  undef _Marg_
+#  undef __MATHCALL_NAME
+# endif
+
+# if __HAVE_FLOAT32 && __HAVE_FLOAT128
+#  define _Mret_ _Float32
+#  define _Marg_ _Float128
+#  define __MATHCALL_NAME(name) f32 ## name ## f128
+#  include <bits/mathcalls-narrow.h>
+#  undef _Mret_
+#  undef _Marg_
+#  undef __MATHCALL_NAME
+# endif
+
+# if __HAVE_FLOAT32 && __HAVE_FLOAT128X
+#  define _Mret_ _Float32
+#  define _Marg_ _Float128x
+#  define __MATHCALL_NAME(name) f32 ## name ## f128x
+#  include <bits/mathcalls-narrow.h>
+#  undef _Mret_
+#  undef _Marg_
+#  undef __MATHCALL_NAME
+# endif
+
+# if __HAVE_FLOAT32X && __HAVE_FLOAT64
+#  define _Mret_ _Float32x
+#  define _Marg_ _Float64
+#  define __MATHCALL_NAME(name) f32x ## name ## f64
+#  include <bits/mathcalls-narrow.h>
+#  undef _Mret_
+#  undef _Marg_
+#  undef __MATHCALL_NAME
+# endif
+
+# if __HAVE_FLOAT32X && __HAVE_FLOAT64X
+#  define _Mret_ _Float32x
+#  define _Marg_ _Float64x
+#  define __MATHCALL_NAME(name) f32x ## name ## f64x
+#  include <bits/mathcalls-narrow.h>
+#  undef _Mret_
+#  undef _Marg_
+#  undef __MATHCALL_NAME
+# endif
+
+# if __HAVE_FLOAT32X && __HAVE_FLOAT128
+#  define _Mret_ _Float32x
+#  define _Marg_ _Float128
+#  define __MATHCALL_NAME(name) f32x ## name ## f128
+#  include <bits/mathcalls-narrow.h>
+#  undef _Mret_
+#  undef _Marg_
+#  undef __MATHCALL_NAME
+# endif
+
+# if __HAVE_FLOAT32X && __HAVE_FLOAT128X
+#  define _Mret_ _Float32x
+#  define _Marg_ _Float128x
+#  define __MATHCALL_NAME(name) f32x ## name ## f128x
+#  include <bits/mathcalls-narrow.h>
+#  undef _Mret_
+#  undef _Marg_
+#  undef __MATHCALL_NAME
+# endif
+
+# if __HAVE_FLOAT64 && __HAVE_FLOAT64X
+#  define _Mret_ _Float64
+#  define _Marg_ _Float64x
+#  define __MATHCALL_NAME(name) f64 ## name ## f64x
+#  include <bits/mathcalls-narrow.h>
+#  undef _Mret_
+#  undef _Marg_
+#  undef __MATHCALL_NAME
+# endif
+
+# if __HAVE_FLOAT64 && __HAVE_FLOAT128
+#  define _Mret_ _Float64
+#  define _Marg_ _Float128
+#  define __MATHCALL_NAME(name) f64 ## name ## f128
+#  include <bits/mathcalls-narrow.h>
+#  undef _Mret_
+#  undef _Marg_
+#  undef __MATHCALL_NAME
+# endif
+
+# if __HAVE_FLOAT64 && __HAVE_FLOAT128X
+#  define _Mret_ _Float64
+#  define _Marg_ _Float128x
+#  define __MATHCALL_NAME(name) f64 ## name ## f128x
+#  include <bits/mathcalls-narrow.h>
+#  undef _Mret_
+#  undef _Marg_
+#  undef __MATHCALL_NAME
+# endif
+
+# if __HAVE_FLOAT64X && __HAVE_FLOAT128
+#  define _Mret_ _Float64x
+#  define _Marg_ _Float128
+#  define __MATHCALL_NAME(name) f64x ## name ## f128
+#  include <bits/mathcalls-narrow.h>
+#  undef _Mret_
+#  undef _Marg_
+#  undef __MATHCALL_NAME
+# endif
+
+# if __HAVE_FLOAT64X && __HAVE_FLOAT128X
+#  define _Mret_ _Float64x
+#  define _Marg_ _Float128x
+#  define __MATHCALL_NAME(name) f64x ## name ## f128x
+#  include <bits/mathcalls-narrow.h>
+#  undef _Mret_
+#  undef _Marg_
+#  undef __MATHCALL_NAME
+# endif
+
+# if __HAVE_FLOAT128 && __HAVE_FLOAT128X
+#  define _Mret_ _Float128
+#  define _Marg_ _Float128x
+#  define __MATHCALL_NAME(name) f128 ## name ## f128x
+#  include <bits/mathcalls-narrow.h>
+#  undef _Mret_
+#  undef _Marg_
+#  undef __MATHCALL_NAME
+# endif
+
+#endif
+
+#undef __MATHCALL_NARROW_ARGS_1
+#undef __MATHCALL_NARROW_ARGS_2
+#undef __MATHCALL_NARROW_ARGS_3
+#undef __MATHCALL_NARROW_NORMAL
+#undef __MATHCALL_NARROW_REDIR
+#undef __MATHCALL_NARROW
 
 #if defined __USE_MISC || defined __USE_XOPEN
 /* This variable is used by `gamma' and `lgamma'.  */
diff --git a/sysdeps/i386/fpu/fenv_private.h b/sysdeps/i386/fpu/fenv_private.h
index 38fd0b92b5..a258f48e07 100644
--- a/sysdeps/i386/fpu/fenv_private.h
+++ b/sysdeps/i386/fpu/fenv_private.h
@@ -337,6 +337,8 @@ libc_feresetround_387 (fenv_t *e)
    x86_64, so that must be set for float128 computations.  */
 # define SET_RESTORE_ROUNDF128(RM) \
   SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_sse, libc_feresetround_sse)
+# define libc_feholdexcept_setroundf128	libc_feholdexcept_setround_sse
+# define libc_feupdateenv_testf128	libc_feupdateenv_test_sse
 #endif
 
 /* We have support for rounding mode context.  */
diff --git a/sysdeps/ieee754/float128/float128_private.h b/sysdeps/ieee754/float128/float128_private.h
index c9d9924169..d0d41c3cef 100644
--- a/sysdeps/ieee754/float128/float128_private.h
+++ b/sysdeps/ieee754/float128/float128_private.h
@@ -54,6 +54,16 @@
 # define SET_RESTORE_ROUNDL(RM) SET_RESTORE_ROUNDF128 (RM)
 #endif
 
+#ifdef libc_feholdexcept_setroundf128
+# undef libc_feholdexcept_setroundl
+# define libc_feholdexcept_setroundl(ENV, RM)	\
+  libc_feholdexcept_setroundf128 (ENV, RM)
+#endif
+
+#ifdef libc_feupdateenv_testf128
+# undef libc_feupdateenv_testl
+# define libc_feupdateenv_testl(ENV, EX) libc_feupdateenv_testf128 (ENV, EX)
+#endif
 
 /* misc macros from the header below.  */
 #include <fix-fp-int-convert-overflow.h>
@@ -122,6 +132,13 @@
 #define libm_alias_ldouble_r(from, to, r) libm_alias_float128_r (from, to, r)
 
 
+#include <math/math-narrow.h>
+#undef libm_alias_float_ldouble
+#define libm_alias_float_ldouble(func) libm_alias_float32_float128 (func)
+#undef libm_alias_double_ldouble
+#define libm_alias_double_ldouble(func) libm_alias_float64_float128 (func)
+
+
 /* IEEE function renames.  */
 #define __ieee754_acoshl __ieee754_acoshf128
 #define __ieee754_acosl __ieee754_acosf128
-- 
cgit v1.2.3


From e2bcf6a8551c6b6a7eeed8f84be42b29eef4d0e2 Mon Sep 17 00:00:00 2001
From: Joseph Myers <joseph@codesourcery.com>
Date: Wed, 28 Feb 2018 21:55:51 +0000
Subject: Fix i386 fenv_private.h float128 for 32-bit --with-fpmath=sse (bug
 22902).

As discussed in bug 22902, the i386 fenv_private.h implementation has
problems for float128 for the case of 32-bit glibc built with libgcc
from GCC configured using --with-fpmath=sse.

The optimized floating-point state handling in fenv_private.h needs to
know which floating-point state - x87 or SSE - is used for each
floating-point type, so that only one state needs updating / testing
for libm code using that state internally.  On 32-bit x86, the x87
rounding mode is always used for float128, but the x87 exception flags
are only used when libgcc is built using x87 floating-point
arithmetic; if libgcc is built for SSE arithmetic, the SSE exception
flags are used.

The choice of arithmetic with which libgcc is built is independent of
that with which glibc is built.  Thus, since glibc cannot tell the
choice used in libgcc, the default implementations of
libc_feholdexcept_setroundf128 and libc_feupdateenv_testf128 (which
use the <fenv.h> functions, thus using both x87 and SSE state on
processors that have both) need to be used; this patch updates the
code accordingly.

Tested for 32-bit x86; HJ reports testing in the --with-fpmath=sse
case.

	[BZ #22902]
	* sysdeps/i386/fpu/fenv_private.h [!__x86_64__]
	(libc_feholdexcept_setroundf128): New macro.
	[!__x86_64__] (libc_feupdateenv_testf128): Likewise.
---
 ChangeLog                       | 5 +++++
 sysdeps/i386/fpu/fenv_private.h | 6 ++++++
 2 files changed, 11 insertions(+)

(limited to 'sysdeps/i386/fpu/fenv_private.h')

diff --git a/ChangeLog b/ChangeLog
index 56ef28678a..8f641de741 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
 2018-02-28  Joseph Myers  <joseph@codesourcery.com>
 
+	[BZ #22902]
+	* sysdeps/i386/fpu/fenv_private.h [!__x86_64__]
+	(libc_feholdexcept_setroundf128): New macro.
+	[!__x86_64__] (libc_feupdateenv_testf128): Likewise.
+
 	[BZ #15105]
 	* sysdeps/wordsize-32/strtoumax.c (strtoumax): Use
 	libc_hidden_def.
diff --git a/sysdeps/i386/fpu/fenv_private.h b/sysdeps/i386/fpu/fenv_private.h
index a258f48e07..357613a500 100644
--- a/sysdeps/i386/fpu/fenv_private.h
+++ b/sysdeps/i386/fpu/fenv_private.h
@@ -339,6 +339,12 @@ libc_feresetround_387 (fenv_t *e)
   SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_sse, libc_feresetround_sse)
 # define libc_feholdexcept_setroundf128	libc_feholdexcept_setround_sse
 # define libc_feupdateenv_testf128	libc_feupdateenv_test_sse
+#else
+/* The 387 rounding mode is used by soft-fp for 32-bit, but whether
+   387 or SSE exceptions are used depends on whether libgcc was built
+   for SSE math, which is not known when glibc is being built.  */
+# define libc_feholdexcept_setroundf128	default_libc_feholdexcept_setround
+# define libc_feupdateenv_testf128	default_libc_feupdateenv_test
 #endif
 
 /* We have support for rounding mode context.  */
-- 
cgit v1.2.3


From 9ed2e15ff4c9ff27c09103fa13a051e3605cbe5f Mon Sep 17 00:00:00 2001
From: Joseph Myers <joseph@codesourcery.com>
Date: Wed, 9 May 2018 19:45:47 +0000
Subject: Move math_opt_barrier, math_force_eval to separate math-barriers.h.

This patch continues cleaning up math_private.h by moving the
math_opt_barrier and math_force_eval macros to a separate header
math-barriers.h.

At present, those macros are inside a "#ifndef math_opt_barrier" in
math_private.h to allow architectures to override them and then use
a separate math-barriers.h header, no such #ifndef or #include_next is
needed; architectures just have their own alternative version of
math-barriers.h when providing their own optimized versions that avoid
going through memory unnecessarily.  The generic math-barriers.h has a
comment added to document these two macros.

In this patch, math_private.h is made to #include <math-barriers.h>,
so files using these macros do not need updating yet.  That is because
of uses of math_force_eval in math_check_force_underflow and
math_check_force_underflow_nonneg, which are still defined in
math_private.h.  Once those are moved out to a separate header, that
separate header can be made to include <math-barriers.h>, as can the
other files directly using these barrier macros, and then the include
of <math-barriers.h> from math_private.h can be removed.

Tested for x86_64 and x86.  Also tested with build-many-glibcs.py that
installed stripped shared libraries are unchanged by this patch.

	* sysdeps/generic/math-barriers.h: New file.
	* sysdeps/generic/math_private.h [!math_opt_barrier]
	(math_opt_barrier): Move to math-barriers.h.
	[!math_opt_barrier] (math_force_eval): Likewise.
	* sysdeps/aarch64/fpu/math-barriers.h: New file.
	* sysdeps/aarch64/fpu/math_private.h (math_opt_barrier): Move to
	math-barriers.h.
	(math_force_eval): Likewise.
	* sysdeps/alpha/fpu/math-barriers.h: New file.
	* sysdeps/alpha/fpu/math_private.h (math_opt_barrier): Move to
	math-barriers.h.
	(math_force_eval): Likewise.
	* sysdeps/x86/fpu/math-barriers.h: New file.
	* sysdeps/i386/fpu/fenv_private.h (math_opt_barrier): Move to
	math-barriers.h.
	(math_force_eval): Likewise.
	* sysdeps/m68k/m680x0/fpu/math_private.h: Move to....
	* sysdeps/m68k/m680x0/fpu/math-barriers.h: ... here.  Adjust
	multiple-include guard for rename.
	* sysdeps/powerpc/fpu/math-barriers.h: New file.
	* sysdeps/powerpc/fpu/math_private.h (math_opt_barrier): Move to
	math-barriers.h.
	(math_force_eval): Likewise.
---
 ChangeLog                               | 26 ++++++++++++++
 sysdeps/aarch64/fpu/math-barriers.h     | 27 +++++++++++++++
 sysdeps/aarch64/fpu/math_private.h      |  5 ---
 sysdeps/alpha/fpu/math-barriers.h       | 28 +++++++++++++++
 sysdeps/alpha/fpu/math_private.h        |  6 ----
 sysdeps/generic/math-barriers.h         | 37 ++++++++++++++++++++
 sysdeps/generic/math_private.h          |  7 +---
 sysdeps/i386/fpu/fenv_private.h         | 39 ---------------------
 sysdeps/m68k/m680x0/fpu/math-barriers.h | 37 ++++++++++++++++++++
 sysdeps/m68k/m680x0/fpu/math_private.h  | 20 -----------
 sysdeps/powerpc/fpu/math-barriers.h     | 28 +++++++++++++++
 sysdeps/powerpc/fpu/math_private.h      |  6 ----
 sysdeps/x86/fpu/math-barriers.h         | 61 +++++++++++++++++++++++++++++++++
 13 files changed, 245 insertions(+), 82 deletions(-)
 create mode 100644 sysdeps/aarch64/fpu/math-barriers.h
 create mode 100644 sysdeps/alpha/fpu/math-barriers.h
 create mode 100644 sysdeps/generic/math-barriers.h
 create mode 100644 sysdeps/m68k/m680x0/fpu/math-barriers.h
 delete mode 100644 sysdeps/m68k/m680x0/fpu/math_private.h
 create mode 100644 sysdeps/powerpc/fpu/math-barriers.h
 create mode 100644 sysdeps/x86/fpu/math-barriers.h

(limited to 'sysdeps/i386/fpu/fenv_private.h')

diff --git a/ChangeLog b/ChangeLog
index 4164b32036..7ab225e320 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,29 @@
+2018-05-09  Joseph Myers  <joseph@codesourcery.com>
+
+	* sysdeps/generic/math-barriers.h: New file.
+	* sysdeps/generic/math_private.h [!math_opt_barrier]
+	(math_opt_barrier): Move to math-barriers.h.
+	[!math_opt_barrier] (math_force_eval): Likewise.
+	* sysdeps/aarch64/fpu/math-barriers.h: New file.
+	* sysdeps/aarch64/fpu/math_private.h (math_opt_barrier): Move to
+	math-barriers.h.
+	(math_force_eval): Likewise.
+	* sysdeps/alpha/fpu/math-barriers.h: New file.
+	* sysdeps/alpha/fpu/math_private.h (math_opt_barrier): Move to
+	math-barriers.h.
+	(math_force_eval): Likewise.
+	* sysdeps/x86/fpu/math-barriers.h: New file.
+	* sysdeps/i386/fpu/fenv_private.h (math_opt_barrier): Move to
+	math-barriers.h.
+	(math_force_eval): Likewise.
+	* sysdeps/m68k/m680x0/fpu/math_private.h: Move to....
+	* sysdeps/m68k/m680x0/fpu/math-barriers.h: ... here.  Adjust
+	multiple-include guard for rename.
+	* sysdeps/powerpc/fpu/math-barriers.h: New file.
+	* sysdeps/powerpc/fpu/math_private.h (math_opt_barrier): Move to
+	math-barriers.h.
+	(math_force_eval): Likewise.
+
 2018-05-09  Paul Pluzhnikov  <ppluzhnikov@google.com>
 
 	[BZ #22786]
diff --git a/sysdeps/aarch64/fpu/math-barriers.h b/sysdeps/aarch64/fpu/math-barriers.h
new file mode 100644
index 0000000000..7db937bbf4
--- /dev/null
+++ b/sysdeps/aarch64/fpu/math-barriers.h
@@ -0,0 +1,27 @@
+/* Control when floating-point expressions are evaluated.  AArch64 version.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef AARCH64_MATH_BARRIERS_H
+#define AARCH64_MATH_BARRIERS_H 1
+
+#define math_opt_barrier(x)					\
+  ({ __typeof (x) __x = (x); __asm ("" : "+w" (__x)); __x; })
+#define math_force_eval(x)						\
+  ({ __typeof (x) __x = (x); __asm __volatile__ ("" : : "w" (__x)); })
+
+#endif
diff --git a/sysdeps/aarch64/fpu/math_private.h b/sysdeps/aarch64/fpu/math_private.h
index d9c2d710a9..fcd02c0654 100644
--- a/sysdeps/aarch64/fpu/math_private.h
+++ b/sysdeps/aarch64/fpu/math_private.h
@@ -22,11 +22,6 @@
 #include <fenv.h>
 #include <fpu_control.h>
 
-#define math_opt_barrier(x) \
-({ __typeof (x) __x = (x); __asm ("" : "+w" (__x)); __x; })
-#define math_force_eval(x) \
-({ __typeof (x) __x = (x); __asm __volatile__ ("" : : "w" (__x)); })
-
 static __always_inline void
 libc_feholdexcept_aarch64 (fenv_t *envp)
 {
diff --git a/sysdeps/alpha/fpu/math-barriers.h b/sysdeps/alpha/fpu/math-barriers.h
new file mode 100644
index 0000000000..27a64d789d
--- /dev/null
+++ b/sysdeps/alpha/fpu/math-barriers.h
@@ -0,0 +1,28 @@
+/* Control when floating-point expressions are evaluated.  Alpha version.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef ALPHA_MATH_BARRIERS_H
+#define ALPHA_MATH_BARRIERS_H 1
+
+/* Generic code forces values to memory; we don't need to do that.  */
+#define math_opt_barrier(x) \
+  ({ __typeof (x) __x = (x); __asm ("" : "+frm" (__x)); __x; })
+#define math_force_eval(x) \
+  ({ __typeof (x) __x = (x); __asm __volatile__ ("" : : "frm" (__x)); })
+
+#endif
diff --git a/sysdeps/alpha/fpu/math_private.h b/sysdeps/alpha/fpu/math_private.h
index 95dc32c969..c71447abf0 100644
--- a/sysdeps/alpha/fpu/math_private.h
+++ b/sysdeps/alpha/fpu/math_private.h
@@ -13,12 +13,6 @@
 # define __isnanf __isnanf
 #endif
 
-/* Generic code forces values to memory; we don't need to do that.  */
-#define math_opt_barrier(x) \
-  ({ __typeof (x) __x = (x); __asm ("" : "+frm" (__x)); __x; })
-#define math_force_eval(x) \
-  ({ __typeof (x) __x = (x); __asm __volatile__ ("" : : "frm" (__x)); })
-
 #include_next <math_private.h>
 
 #endif /* ALPHA_MATH_PRIVATE_H */
diff --git a/sysdeps/generic/math-barriers.h b/sysdeps/generic/math-barriers.h
new file mode 100644
index 0000000000..425173149c
--- /dev/null
+++ b/sysdeps/generic/math-barriers.h
@@ -0,0 +1,37 @@
+/* Control when floating-point expressions are evaluated.  Generic version.
+   Copyright (C) 2007-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _MATH_BARRIERS_H
+#define _MATH_BARRIERS_H	1
+
+/* math_opt_barrier evaluates and returns its floating-point argument
+   and ensures that the evaluation of any expression using the result
+   of math_opt_barrier is not moved before the call.  math_force_eval
+   ensures that its floating-point argument is evaluated for its side
+   effects even if its value is apparently unused, and that the
+   evaluation of its argument is not moved after the call.  Both these
+   macros are used to ensure the correct ordering of floating-point
+   expression evaluations with respect to accesses to the
+   floating-point environment.  */
+
+#define math_opt_barrier(x)					\
+  ({ __typeof (x) __x = (x); __asm ("" : "+m" (__x)); __x; })
+#define math_force_eval(x)						\
+  ({ __typeof (x) __x = (x); __asm __volatile__ ("" : : "m" (__x)); })
+
+#endif /* math-barriers.h */
diff --git a/sysdeps/generic/math_private.h b/sysdeps/generic/math_private.h
index 703f506ea0..f6faf71895 100644
--- a/sysdeps/generic/math_private.h
+++ b/sysdeps/generic/math_private.h
@@ -263,12 +263,7 @@ extern double __mpsin (double __x, double __dx, bool __range_reduce);
 extern double __mpcos (double __x, double __dx, bool __range_reduce);
 extern void __docos (double __x, double __dx, double __v[]);
 
-#ifndef math_opt_barrier
-# define math_opt_barrier(x) \
-({ __typeof (x) __x = (x); __asm ("" : "+m" (__x)); __x; })
-# define math_force_eval(x) \
-({ __typeof (x) __x = (x); __asm __volatile__ ("" : : "m" (__x)); })
-#endif
+#include <math-barriers.h>
 
 #define fabs_tg(x) __MATH_TG ((x), (__typeof (x)) __builtin_fabs, (x))
 
diff --git a/sysdeps/i386/fpu/fenv_private.h b/sysdeps/i386/fpu/fenv_private.h
index 357613a500..59e83d858a 100644
--- a/sysdeps/i386/fpu/fenv_private.h
+++ b/sysdeps/i386/fpu/fenv_private.h
@@ -5,45 +5,6 @@
 #include <fenv.h>
 #include <fpu_control.h>
 
-#ifdef __SSE2_MATH__
-# define math_opt_barrier(x)						\
-  ({ __typeof(x) __x;							\
-     if (sizeof (x) <= sizeof (double)					\
-	|| __builtin_types_compatible_p (__typeof (x), _Float128))	\
-       __asm ("" : "=x" (__x) : "0" (x));				\
-     else								\
-       __asm ("" : "=t" (__x) : "0" (x));				\
-     __x; })
-# define math_force_eval(x)						\
-  do {									\
-    if (sizeof (x) <= sizeof (double)					\
-	|| __builtin_types_compatible_p (__typeof (x), _Float128))	\
-      __asm __volatile ("" : : "x" (x));				\
-    else								\
-      __asm __volatile ("" : : "f" (x));				\
-  } while (0)
-#else
-# define math_opt_barrier(x)						\
-  ({ __typeof (x) __x;							\
-     if (__builtin_types_compatible_p (__typeof (x), _Float128))	\
-       {								\
-	 __x = (x);							\
-	 __asm ("" : "+m" (__x));					\
-       }								\
-     else								\
-       __asm ("" : "=t" (__x) : "0" (x));				\
-     __x; })
-# define math_force_eval(x)						\
-  do {									\
-    __typeof (x) __x = (x);						\
-    if (sizeof (x) <= sizeof (double)					\
-	|| __builtin_types_compatible_p (__typeof (x), _Float128))	\
-      __asm __volatile ("" : : "m" (__x));				\
-    else								\
-      __asm __volatile ("" : : "f" (__x));				\
-  } while (0)
-#endif
-
 /* This file is used by both the 32- and 64-bit ports.  The 64-bit port
    has a field in the fenv_t for the mxcsr; the 32-bit port does not.
    Instead, we (ab)use the only 32-bit field extant in the struct.  */
diff --git a/sysdeps/m68k/m680x0/fpu/math-barriers.h b/sysdeps/m68k/m680x0/fpu/math-barriers.h
new file mode 100644
index 0000000000..b1b78d5f11
--- /dev/null
+++ b/sysdeps/m68k/m680x0/fpu/math-barriers.h
@@ -0,0 +1,37 @@
+/* Control when floating-point expressions are evaluated.  M68k version.
+   Copyright (C) 2011-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef M68K_MATH_BARRIERS_H
+#define M68K_MATH_BARRIERS_H 1
+
+#define math_opt_barrier(x) \
+({ __typeof (x) __x;					\
+   __asm ("" : "=f" (__x) : "0" (x));			\
+   __x; })
+#define math_force_eval(x) \
+do							\
+  {							\
+    __typeof (x) __x = (x);				\
+    if (sizeof (x) <= sizeof (double))			\
+      __asm __volatile ("" : : "m" (__x));		\
+    else						\
+      __asm __volatile ("" : : "f" (__x));		\
+  }							\
+while (0)
+
+#endif
diff --git a/sysdeps/m68k/m680x0/fpu/math_private.h b/sysdeps/m68k/m680x0/fpu/math_private.h
deleted file mode 100644
index 2b7909535e..0000000000
--- a/sysdeps/m68k/m680x0/fpu/math_private.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef M68K_MATH_PRIVATE_H
-#define M68K_MATH_PRIVATE_H 1
-
-#define math_opt_barrier(x) \
-({ __typeof (x) __x;					\
-   __asm ("" : "=f" (__x) : "0" (x));			\
-   __x; })
-#define math_force_eval(x) \
-do							\
-  {							\
-    __typeof (x) __x = (x);				\
-    if (sizeof (x) <= sizeof (double))			\
-      __asm __volatile ("" : : "m" (__x));		\
-    else						\
-      __asm __volatile ("" : : "f" (__x));		\
-  }							\
-while (0)
-
-#include_next <math_private.h>
-#endif
diff --git a/sysdeps/powerpc/fpu/math-barriers.h b/sysdeps/powerpc/fpu/math-barriers.h
new file mode 100644
index 0000000000..6da210219e
--- /dev/null
+++ b/sysdeps/powerpc/fpu/math-barriers.h
@@ -0,0 +1,28 @@
+/* Control when floating-point expressions are evaluated.  PowerPC version.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef POWERPC_MATH_BARRIERS_H
+#define POWERPC_MATH_BARRIERS_H 1
+
+/* Avoid putting floating point values in memory.  */
+# define math_opt_barrier(x)					\
+  ({ __typeof (x) __x = (x); __asm ("" : "+dwa" (__x)); __x; })
+# define math_force_eval(x)						\
+  ({ __typeof (x) __x = (x); __asm __volatile__ ("" : : "dwa" (__x)); })
+
+#endif
diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h
index b9fc721257..e642d6c823 100644
--- a/sysdeps/powerpc/fpu/math_private.h
+++ b/sysdeps/powerpc/fpu/math_private.h
@@ -24,12 +24,6 @@
 #include <dl-procinfo.h>
 #include <fenv_private.h>
 
-/* Avoid putting floating point values in memory.  */
-# define math_opt_barrier(x)					\
-  ({ __typeof (x) __x = (x); __asm ("" : "+dwa" (__x)); __x; })
-# define math_force_eval(x)						\
-  ({ __typeof (x) __x = (x); __asm __volatile__ ("" : : "dwa" (__x)); })
-
 #include_next <math_private.h>
 
 #if defined _ARCH_PWR9 && __HAVE_DISTINCT_FLOAT128
diff --git a/sysdeps/x86/fpu/math-barriers.h b/sysdeps/x86/fpu/math-barriers.h
new file mode 100644
index 0000000000..1e1fabdb92
--- /dev/null
+++ b/sysdeps/x86/fpu/math-barriers.h
@@ -0,0 +1,61 @@
+/* Control when floating-point expressions are evaluated.  x86 version.
+   Copyright (C) 2007-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef X86_MATH_BARRIERS_H
+#define X86_MATH_BARRIERS_H 1
+
+#ifdef __SSE2_MATH__
+# define math_opt_barrier(x)						\
+  ({ __typeof(x) __x;							\
+     if (sizeof (x) <= sizeof (double)					\
+	|| __builtin_types_compatible_p (__typeof (x), _Float128))	\
+       __asm ("" : "=x" (__x) : "0" (x));				\
+     else								\
+       __asm ("" : "=t" (__x) : "0" (x));				\
+     __x; })
+# define math_force_eval(x)						\
+  do {									\
+    if (sizeof (x) <= sizeof (double)					\
+	|| __builtin_types_compatible_p (__typeof (x), _Float128))	\
+      __asm __volatile ("" : : "x" (x));				\
+    else								\
+      __asm __volatile ("" : : "f" (x));				\
+  } while (0)
+#else
+# define math_opt_barrier(x)						\
+  ({ __typeof (x) __x;							\
+     if (__builtin_types_compatible_p (__typeof (x), _Float128))	\
+       {								\
+	 __x = (x);							\
+	 __asm ("" : "+m" (__x));					\
+       }								\
+     else								\
+       __asm ("" : "=t" (__x) : "0" (x));				\
+     __x; })
+# define math_force_eval(x)						\
+  do {									\
+    __typeof (x) __x = (x);						\
+    if (sizeof (x) <= sizeof (double)					\
+	|| __builtin_types_compatible_p (__typeof (x), _Float128))	\
+      __asm __volatile ("" : : "m" (__x));				\
+    else								\
+      __asm __volatile ("" : : "f" (__x));				\
+  } while (0)
+#endif
+
+#endif
-- 
cgit v1.2.3


From f496b28e61d0342f579bf794c71b80e9c7d0b1b5 Mon Sep 17 00:00:00 2001
From: Florian Weimer <fweimer@redhat.com>
Date: Thu, 21 Jun 2018 08:04:29 +0200
Subject: math: Set 387 and SSE2 rounding mode for tgamma on i386 [BZ #23253]

Previously, only the SSE2 rounding mode was set, so the assembler
implementations using 387 were not following the expecting rounding
mode.
---
 ChangeLog                       | 22 +++++++++++++
 sysdeps/generic/math_private.h  | 68 ++++++++++++++++++++++-------------------
 sysdeps/i386/Makefile           |  8 +++++
 sysdeps/i386/fpu/fenv_private.h | 18 ++++++++---
 4 files changed, 80 insertions(+), 36 deletions(-)

(limited to 'sysdeps/i386/fpu/fenv_private.h')

diff --git a/ChangeLog b/ChangeLog
index a514dd69a6..ff5cbeb265 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,25 @@
+2018-06-21  Florian Weimer  <fweimer@redhat.com>
+
+	[BZ #23253]
+	* sysdeps/generic/math_private.h (default_libc_feholdsetround_ctx):
+	Renamed from libc_feholdsetround_ctx.
+	(default_libc_feresetround_ctx): Renamed from
+	libc_feresetround_ctx.
+	(default_libc_feholdsetround_noex_ctx): Renamed from
+	libc_feholdsetround_noex_ctx.
+	(default_libc_feresetround_noex_ctx): Renamed from
+	libc_feresetround_noex_ctx.
+	[!HAVE_RM_CTX] (libc_feholdsetround_ctx, libc_feresetround_ctx)
+	(libc_feholdsetround_noex_ctx, libc_feresetround_noex_ctx): Macros
+	forwardning to the old implementations under the new names.
+	* sysdeps/i386/fpu/fenv_private.h [__SSE_MATH__]
+	(libc_feholdexcept_setround_ctx, libc_fesetenv_ctx)
+	(libc_feupdateenv_ctx, libc_feholdsetround_ctx)
+	(libc_feresetround_ctx): Forward to default implements for i386
+	and MATH_SET_BOTH_ROUNDING_MODES.
+	* sysdeps/i386/Makefile [$(subdir) == math] (CFLAGS-e_gamma_r.c):
+	Add -DMATH_SET_BOTH_ROUNDING_MODES.
+
 2018-06-20  Joseph Myers  <joseph@codesourcery.com>
 
 	* string/tst-cmp.c: Include <libc-diag.h>.
diff --git a/sysdeps/generic/math_private.h b/sysdeps/generic/math_private.h
index b6612ba6bf..1212abaf47 100644
--- a/sysdeps/generic/math_private.h
+++ b/sysdeps/generic/math_private.h
@@ -428,33 +428,6 @@ default_libc_feupdateenv_test (fenv_t *e, int ex)
 # define HAVE_RM_CTX 0
 #endif
 
-#if HAVE_RM_CTX
-/* Set/Restore Rounding Modes only when necessary.  If defined, these functions
-   set/restore floating point state only if the state needed within the lexical
-   block is different from the current state.  This saves a lot of time when
-   the floating point unit is much slower than the fixed point units.  */
-
-# ifndef libc_feholdsetround_noex_ctx
-#   define libc_feholdsetround_noex_ctx  libc_feholdsetround_ctx
-# endif
-# ifndef libc_feholdsetround_noexf_ctx
-#   define libc_feholdsetround_noexf_ctx libc_feholdsetroundf_ctx
-# endif
-# ifndef libc_feholdsetround_noexl_ctx
-#   define libc_feholdsetround_noexl_ctx libc_feholdsetroundl_ctx
-# endif
-
-# ifndef libc_feresetround_noex_ctx
-#   define libc_feresetround_noex_ctx  libc_fesetenv_ctx
-# endif
-# ifndef libc_feresetround_noexf_ctx
-#   define libc_feresetround_noexf_ctx libc_fesetenvf_ctx
-# endif
-# ifndef libc_feresetround_noexl_ctx
-#   define libc_feresetround_noexl_ctx libc_fesetenvl_ctx
-# endif
-
-#else
 
 /* Default implementation using standard fenv functions.
    Avoid unnecessary rounding mode changes by first checking the
@@ -462,7 +435,7 @@ default_libc_feupdateenv_test (fenv_t *e, int ex)
    important for performance.  */
 
 static __always_inline void
-libc_feholdsetround_ctx (struct rm_ctx *ctx, int round)
+default_libc_feholdsetround_ctx (struct rm_ctx *ctx, int round)
 {
   ctx->updated_status = false;
 
@@ -476,7 +449,7 @@ libc_feholdsetround_ctx (struct rm_ctx *ctx, int round)
 }
 
 static __always_inline void
-libc_feresetround_ctx (struct rm_ctx *ctx)
+default_libc_feresetround_ctx (struct rm_ctx *ctx)
 {
   /* Restore the rounding mode if updated.  */
   if (__glibc_unlikely (ctx->updated_status))
@@ -484,7 +457,7 @@ libc_feresetround_ctx (struct rm_ctx *ctx)
 }
 
 static __always_inline void
-libc_feholdsetround_noex_ctx (struct rm_ctx *ctx, int round)
+default_libc_feholdsetround_noex_ctx (struct rm_ctx *ctx, int round)
 {
   /* Save exception flags and rounding mode, and disable exception
      traps.  */
@@ -496,12 +469,45 @@ libc_feholdsetround_noex_ctx (struct rm_ctx *ctx, int round)
 }
 
 static __always_inline void
-libc_feresetround_noex_ctx (struct rm_ctx *ctx)
+default_libc_feresetround_noex_ctx (struct rm_ctx *ctx)
 {
   /* Restore exception flags and rounding mode.  */
   __fesetenv (&ctx->env);
 }
 
+#if HAVE_RM_CTX
+/* Set/Restore Rounding Modes only when necessary.  If defined, these functions
+   set/restore floating point state only if the state needed within the lexical
+   block is different from the current state.  This saves a lot of time when
+   the floating point unit is much slower than the fixed point units.  */
+
+# ifndef libc_feholdsetround_noex_ctx
+#   define libc_feholdsetround_noex_ctx  libc_feholdsetround_ctx
+# endif
+# ifndef libc_feholdsetround_noexf_ctx
+#   define libc_feholdsetround_noexf_ctx libc_feholdsetroundf_ctx
+# endif
+# ifndef libc_feholdsetround_noexl_ctx
+#   define libc_feholdsetround_noexl_ctx libc_feholdsetroundl_ctx
+# endif
+
+# ifndef libc_feresetround_noex_ctx
+#   define libc_feresetround_noex_ctx  libc_fesetenv_ctx
+# endif
+# ifndef libc_feresetround_noexf_ctx
+#   define libc_feresetround_noexf_ctx libc_fesetenvf_ctx
+# endif
+# ifndef libc_feresetround_noexl_ctx
+#   define libc_feresetround_noexl_ctx libc_fesetenvl_ctx
+# endif
+
+#else
+
+# define libc_feholdsetround_ctx      default_libc_feholdsetround_ctx
+# define libc_feresetround_ctx        default_libc_feresetround_ctx
+# define libc_feholdsetround_noex_ctx default_libc_feholdsetround_noex_ctx
+# define libc_feresetround_noex_ctx   default_libc_feresetround_noex_ctx
+
 # define libc_feholdsetroundf_ctx libc_feholdsetround_ctx
 # define libc_feholdsetroundl_ctx libc_feholdsetround_ctx
 # define libc_feresetroundf_ctx   libc_feresetround_ctx
diff --git a/sysdeps/i386/Makefile b/sysdeps/i386/Makefile
index 1682394e76..c0a4fe15d4 100644
--- a/sysdeps/i386/Makefile
+++ b/sysdeps/i386/Makefile
@@ -5,6 +5,14 @@ asm-CPPFLAGS += -DGAS_SYNTAX
 # The i386 `long double' is a distinct type we support.
 long-double-fcts = yes
 
+ifeq ($(subdir),math)
+# These functions change the rounding mode internally and need to
+# update both the SSE2 rounding mode and the 387 rounding mode.  See
+# the handling of MATH_SET_BOTH_ROUNDING_MODES in
+# sysdeps/i386/fpu/fenv_private.h.
+CFLAGS-e_gamma_r.c += -DMATH_SET_BOTH_ROUNDING_MODES
+endif
+
 ifeq ($(subdir),string)
 sysdep_routines += cacheinfo
 endif
diff --git a/sysdeps/i386/fpu/fenv_private.h b/sysdeps/i386/fpu/fenv_private.h
index 59e83d858a..637dae5f05 100644
--- a/sysdeps/i386/fpu/fenv_private.h
+++ b/sysdeps/i386/fpu/fenv_private.h
@@ -460,11 +460,19 @@ libc_feupdateenv_387_ctx (struct rm_ctx *ctx)
 #endif /* __SSE_MATH__ */
 
 #ifdef __SSE2_MATH__
-# define libc_feholdexcept_setround_ctx	libc_feholdexcept_setround_sse_ctx
-# define libc_fesetenv_ctx		libc_fesetenv_sse_ctx
-# define libc_feupdateenv_ctx		libc_feupdateenv_sse_ctx
-# define libc_feholdsetround_ctx	libc_feholdsetround_sse_ctx
-# define libc_feresetround_ctx		libc_feresetround_sse_ctx
+# if defined (__x86_64__) || !defined (MATH_SET_BOTH_ROUNDING_MODES)
+#  define libc_feholdexcept_setround_ctx libc_feholdexcept_setround_sse_ctx
+#  define libc_fesetenv_ctx		libc_fesetenv_sse_ctx
+#  define libc_feupdateenv_ctx		libc_feupdateenv_sse_ctx
+#  define libc_feholdsetround_ctx	libc_feholdsetround_sse_ctx
+#  define libc_feresetround_ctx		libc_feresetround_sse_ctx
+# else
+#  define libc_feholdexcept_setround_ctx default_libc_feholdexcept_setround_ctx
+#  define libc_fesetenv_ctx		default_libc_fesetenv_ctx
+#  define libc_feupdateenv_ctx		default_libc_feupdateenv_ctx
+#  define libc_feholdsetround_ctx	default_libc_feholdsetround_ctx
+#  define libc_feresetround_ctx		default_libc_feresetround_ctx
+# endif
 #else
 # define libc_feholdexcept_setround_ctx	libc_feholdexcept_setround_387_ctx
 # define libc_feupdateenv_ctx		libc_feupdateenv_387_ctx
-- 
cgit v1.2.3