/* 128-bit floating point to 32-bit floating point. Copyright (C) 1997 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with the GNU C Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include /* float _q_qtos(const long double *a); Convert 'a' to float. Round as per current rounding flags. Input Rounding Output +/-0 * +/-0 +/-Inf * +/-Inf +/-NaN * +/-NaN (with mantissa truncated) +/-SNaN * +/-NaN (with mantissa truncated, MSB of mantissa <- 1) && raise VXSNAN [Note: just truncating the mantissa may not give you a SNaN!] |a|>=2^128 Nearest +/-Inf && raise overflow && raise inexact |a|>=2^128 Truncate +/-(2^128-2^104) && raise overflow && raise inexact a>=2^128 +Inf +Inf && raise overflow && raise inexact a<=-2^128 +Inf -(2^128-2^104) && raise overflow && raise inexact a>=2^128 -Inf +(2^128-2^104) && raise overflow && raise inexact a<=-2^128 -Inf -Inf && raise overflow && raise inexact We also need to raise 'inexact' if the result will be inexact, which depends on the current rounding mode. To avoid having to deal with all that, we convert to a 'double' that will round correctly (but is not itself rounded correctly), and convert that to a float. This makes this procedure much simpler and much faster. */ float __q_qtos(const unsigned long long a[2]) { unsigned long long a0,d; union { double d; unsigned long long ull; } u; a0 = a[0]; /* Truncate the mantissa to 48 bits. */ d = a0 << 4; /* Set the low bit in the mantissa if any of the bits we are dropping were 1. This ensures correct rounding, and also distinguishes 0 and Inf from denormalised numbers and SNaN (respectively). */ d |= a[1] != 0; /* Copy the sign bit. */ d = d & 0x7fffffffffffffffULL | a0 & 0x8000000000000000ULL; /* Now, we need to fix the exponent. If the exponent of a was in the range +127 to -152, or was +16384 or -16383, it is already correct in 'd'. Otherwise, we need to ensure that the new exponent is in the range +1023 to +128, or -153 to -1022, with the same sign as the exponent of 'a'. We can do this by setting bits 1-3 (the second through fourth-most significant bit) of 'd' to 101 if bit 1 of 'a' is 1, or 010 if bit 1 of 'a' is 0. */ if ((a0 >> 56 & 0x7f) - 0x3f > 1) { unsigned t = (a0 >> 32+2 & 2 << 31-1-2)*3 + (2 << 31-2); d = (d & 0x8fffffffffffffffULL | (unsigned long long)t<<32 & 0x7000000000000000ULL); } u.ull = d; return (float)u.d; }