summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoland McGrath <roland@gnu.org>2005-02-16 19:57:14 +0000
committerRoland McGrath <roland@gnu.org>2005-02-16 19:57:14 +0000
commit7cd274587760436effbfce65cfdbd51f761acd67 (patch)
tree8620654b8197c81aded8a206680e53b4294baccf
parentaa29d418cf3ee21f906247b6ab49e8aab6fd19dc (diff)
Updated to fedora-glibc-2_3-20050216T1256
-rw-r--r--ChangeLog6074
-rw-r--r--ChangeLog.1583
-rw-r--r--Makeconfig17
-rw-r--r--catgets/gencat.c2
-rw-r--r--csu/Makefile2
-rw-r--r--csu/version.c2
-rwxr-xr-xdebug/catchsegv.sh2
-rwxr-xr-xdebug/xtrace.sh4
-rw-r--r--dirent/tst-seekdir.c28
-rw-r--r--elf/Makefile12
-rw-r--r--elf/dl-load.c22
-rw-r--r--elf/ldconfig.c2
-rw-r--r--elf/ldd.bash.in4
-rw-r--r--elf/rtld.c4
-rw-r--r--elf/sprof.c2
-rw-r--r--fedora/branch.mk10
-rw-r--r--fedora/glibc.spec.in51
-rw-r--r--hurd/sigunwind.c2
-rw-r--r--iconv/Makefile12
-rw-r--r--iconv/iconv_prog.c2
-rw-r--r--iconv/iconvconfig.c2
-rw-r--r--iconv/strtab.c2
-rw-r--r--include/signal.h3
-rw-r--r--libio/fmemopen.c22
-rw-r--r--libio/iofopncook.c20
-rw-r--r--linuxthreads/ChangeLog22
-rw-r--r--linuxthreads/Makefile2
-rw-r--r--linuxthreads/descr.h4
-rw-r--r--linuxthreads/specific.c14
-rw-r--r--linuxthreads/sysdeps/i386/Makefile1
-rw-r--r--locale/programs/locale.c2
-rw-r--r--locale/programs/localedef.c2
-rw-r--r--localedata/ChangeLog4
-rw-r--r--localedata/gen-unicode-ctype.c2
-rw-r--r--malloc/malloc.h2
-rwxr-xr-xmalloc/memusage.sh4
-rw-r--r--malloc/mtrace.pl2
-rw-r--r--math/libm-test.inc112
-rw-r--r--misc/efgcvt_r.c23
-rw-r--r--misc/qefgcvt_r.c12
-rw-r--r--misc/syslog.c414
-rw-r--r--misc/tst-efgcvt.c5
-rw-r--r--nptl/ChangeLog38
-rw-r--r--nptl/Makefile2
-rw-r--r--nptl/init.c2
-rw-r--r--nptl/sysdeps/i386/Makefile2
-rw-r--r--nptl/sysdeps/i386/tls.h7
-rw-r--r--nptl/sysdeps/unix/sysv/linux/i386/i486/sem_post.S9
-rw-r--r--nptl/sysdeps/unix/sysv/linux/i386/i486/sem_timedwait.S9
-rw-r--r--nptl/sysdeps/unix/sysv/linux/i386/i486/sem_trywait.S9
-rw-r--r--nptl/sysdeps/unix/sysv/linux/i386/i486/sem_wait.S9
-rw-r--r--nscd/Makefile4
-rw-r--r--nscd/nscd.c5
-rw-r--r--nscd/nscd.init10
-rw-r--r--nscd/nscd_getai.c9
-rw-r--r--nscd/nscd_initgroups.c12
-rw-r--r--nscd/nscd_nischeck.c2
-rw-r--r--nss/getent.c2
-rw-r--r--posix/Makefile11
-rw-r--r--posix/bug-regex19.c20
-rw-r--r--posix/execl.c48
-rw-r--r--posix/execle.c51
-rw-r--r--posix/execlp.c48
-rw-r--r--posix/execvp.c96
-rw-r--r--posix/getconf.c2
-rw-r--r--posix/regcomp.c22
-rw-r--r--posix/regex_internal.h7
-rw-r--r--posix/rxspencer/tests9
-rw-r--r--posix/tst-rxspencer.c18
-rw-r--r--posix/unistd.h16
-rw-r--r--stdio-common/Makefile2
-rw-r--r--stdlib/Makefile28
-rw-r--r--stdlib/fmtmsg.c23
-rw-r--r--stdlib/random_r.c9
-rw-r--r--stdlib/tst-fmtmsg.c32
-rw-r--r--sunrpc/openchild.c2
-rw-r--r--sunrpc/svc_tcp.c2
-rw-r--r--sunrpc/svc_unix.c2
-rw-r--r--sysdeps/generic/dl-tls.c11
-rw-r--r--sysdeps/generic/libc-start.c2
-rw-r--r--sysdeps/generic/wordexp.c58
-rw-r--r--sysdeps/i386/Makefile4
-rw-r--r--sysdeps/i386/dl-machine.h10
-rw-r--r--sysdeps/ia64/fpu/Makefile35
-rw-r--r--sysdeps/ia64/fpu/e_acos.S1500
-rw-r--r--sysdeps/ia64/fpu/e_acosf.S79
-rw-r--r--sysdeps/ia64/fpu/e_acosl.S2916
-rw-r--r--sysdeps/ia64/fpu/e_asin.S1466
-rw-r--r--sysdeps/ia64/fpu/e_asinf.S73
-rw-r--r--sysdeps/ia64/fpu/e_asinl.S2833
-rw-r--r--sysdeps/ia64/fpu/e_atan2.S736
-rw-r--r--sysdeps/ia64/fpu/e_atan2f.S87
-rw-r--r--sysdeps/ia64/fpu/e_cosh.S1477
-rw-r--r--sysdeps/ia64/fpu/e_coshf.S1447
-rw-r--r--sysdeps/ia64/fpu/e_coshl.S1661
-rw-r--r--sysdeps/ia64/fpu/e_exp.S887
-rw-r--r--sysdeps/ia64/fpu/e_expf.S949
-rw-r--r--sysdeps/ia64/fpu/e_fmod.S219
-rw-r--r--sysdeps/ia64/fpu/e_fmodf.S226
-rw-r--r--sysdeps/ia64/fpu/e_fmodl.S221
-rw-r--r--sysdeps/ia64/fpu/e_hypot.S73
-rw-r--r--sysdeps/ia64/fpu/e_hypotf.S74
-rw-r--r--sysdeps/ia64/fpu/e_hypotl.S71
-rw-r--r--sysdeps/ia64/fpu/e_log.S2454
-rw-r--r--sysdeps/ia64/fpu/e_logf.S1787
-rw-r--r--sysdeps/ia64/fpu/e_logl.c1
-rw-r--r--sysdeps/ia64/fpu/e_pow.S1633
-rw-r--r--sysdeps/ia64/fpu/e_powf.S1573
-rw-r--r--sysdeps/ia64/fpu/e_powl.S4076
-rw-r--r--sysdeps/ia64/fpu/e_remainder.S114
-rw-r--r--sysdeps/ia64/fpu/e_remainderf.S114
-rw-r--r--sysdeps/ia64/fpu/e_remainderl.S116
-rw-r--r--sysdeps/ia64/fpu/e_scalb.S69
-rw-r--r--sysdeps/ia64/fpu/e_scalbf.S69
-rw-r--r--sysdeps/ia64/fpu/e_scalbl.S69
-rw-r--r--sysdeps/ia64/fpu/e_sinh.S1652
-rw-r--r--sysdeps/ia64/fpu/e_sinhf.S1614
-rw-r--r--sysdeps/ia64/fpu/e_sinhl.S1778
-rw-r--r--sysdeps/ia64/fpu/e_sqrt.S69
-rw-r--r--sysdeps/ia64/fpu/e_sqrtf.S70
-rw-r--r--sysdeps/ia64/fpu/e_sqrtl.S68
-rw-r--r--sysdeps/ia64/fpu/libm_atan2_reg.S1234
-rw-r--r--sysdeps/ia64/fpu/libm_error.c1789
-rw-r--r--sysdeps/ia64/fpu/libm_reduce.S1492
-rw-r--r--sysdeps/ia64/fpu/libm_support.h570
-rw-r--r--sysdeps/ia64/fpu/s_atan.S1193
-rw-r--r--sysdeps/ia64/fpu/s_atanf.S75
-rw-r--r--sysdeps/ia64/fpu/s_atanl.S2157
-rw-r--r--sysdeps/ia64/fpu/s_cbrt.S1224
-rw-r--r--sysdeps/ia64/fpu/s_cbrtf.S1226
-rw-r--r--sysdeps/ia64/fpu/s_cbrtl.S64
-rw-r--r--sysdeps/ia64/fpu/s_ceil.S274
-rw-r--r--sysdeps/ia64/fpu/s_ceilf.S274
-rw-r--r--sysdeps/ia64/fpu/s_ceill.S276
-rw-r--r--sysdeps/ia64/fpu/s_copysign.S6
-rw-r--r--sysdeps/ia64/fpu/s_cos.S3482
-rw-r--r--sysdeps/ia64/fpu/s_cosf.S1181
-rw-r--r--sysdeps/ia64/fpu/s_cosl.S2756
-rw-r--r--sysdeps/ia64/fpu/s_expm1.S2142
-rw-r--r--sysdeps/ia64/fpu/s_expm1f.S2062
-rw-r--r--sysdeps/ia64/fpu/s_expm1l.S1950
-rw-r--r--sysdeps/ia64/fpu/s_fabs.S116
-rw-r--r--sysdeps/ia64/fpu/s_fabsf.S83
-rw-r--r--sysdeps/ia64/fpu/s_fabsl.S83
-rw-r--r--sysdeps/ia64/fpu/s_floor.S252
-rw-r--r--sysdeps/ia64/fpu/s_floorf.S250
-rw-r--r--sysdeps/ia64/fpu/s_floorl.S250
-rw-r--r--sysdeps/ia64/fpu/s_frexp.c33
-rw-r--r--sysdeps/ia64/fpu/s_frexpf.c33
-rw-r--r--sysdeps/ia64/fpu/s_frexpl.c33
-rw-r--r--sysdeps/ia64/fpu/s_ilogb.S306
-rw-r--r--sysdeps/ia64/fpu/s_ilogbf.S306
-rw-r--r--sysdeps/ia64/fpu/s_ilogbl.S306
-rw-r--r--sysdeps/ia64/fpu/s_ldexp.S380
-rw-r--r--sysdeps/ia64/fpu/s_ldexpf.S379
-rw-r--r--sysdeps/ia64/fpu/s_ldexpl.S379
-rw-r--r--sysdeps/ia64/fpu/s_log1p.S2312
-rw-r--r--sysdeps/ia64/fpu/s_log1pf.S2028
-rw-r--r--sysdeps/ia64/fpu/s_log1pl.S2067
-rw-r--r--sysdeps/ia64/fpu/s_logb.S315
-rw-r--r--sysdeps/ia64/fpu/s_logbf.S334
-rw-r--r--sysdeps/ia64/fpu/s_logbl.S311
-rw-r--r--sysdeps/ia64/fpu/s_modf.S47
-rw-r--r--sysdeps/ia64/fpu/s_modff.S47
-rw-r--r--sysdeps/ia64/fpu/s_modfl.S49
-rw-r--r--sysdeps/ia64/fpu/s_nearbyint.S46
-rw-r--r--sysdeps/ia64/fpu/s_nearbyintf.S46
-rw-r--r--sysdeps/ia64/fpu/s_nearbyintl.S46
-rw-r--r--sysdeps/ia64/fpu/s_nextafterl.c1
-rw-r--r--sysdeps/ia64/fpu/s_nexttoward.c1
-rw-r--r--sysdeps/ia64/fpu/s_nexttowardf.c1
-rw-r--r--sysdeps/ia64/fpu/s_rint.S287
-rw-r--r--sysdeps/ia64/fpu/s_rintf.S289
-rw-r--r--sysdeps/ia64/fpu/s_rintl.S289
-rw-r--r--sysdeps/ia64/fpu/s_round.S316
-rw-r--r--sysdeps/ia64/fpu/s_roundf.S316
-rw-r--r--sysdeps/ia64/fpu/s_roundl.S316
-rw-r--r--sysdeps/ia64/fpu/s_scalbn.S379
-rw-r--r--sysdeps/ia64/fpu/s_scalbnf.S379
-rw-r--r--sysdeps/ia64/fpu/s_scalbnl.S379
-rw-r--r--sysdeps/ia64/fpu/s_significand.S61
-rw-r--r--sysdeps/ia64/fpu/s_significandf.S61
-rw-r--r--sysdeps/ia64/fpu/s_significandl.S61
-rw-r--r--sysdeps/ia64/fpu/s_sincos.c10
-rw-r--r--sysdeps/ia64/fpu/s_sincosf.c10
-rw-r--r--sysdeps/ia64/fpu/s_sincosl.c10
-rw-r--r--sysdeps/ia64/fpu/s_tan.S554
-rw-r--r--sysdeps/ia64/fpu/s_tanf.S1003
-rw-r--r--sysdeps/ia64/fpu/s_tanl.S3028
-rw-r--r--sysdeps/ia64/fpu/s_trunc.S207
-rw-r--r--sysdeps/ia64/fpu/s_truncf.S207
-rw-r--r--sysdeps/ia64/fpu/s_truncl.S209
-rw-r--r--sysdeps/mips/Makefile4
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_ceil.S27
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_ceilf.S49
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_floor.S7
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_floorf.S25
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_rint.S11
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_rintf.S29
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_round.S23
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_roundf.S53
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_trunc.S23
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_truncf.S41
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_ceil.S13
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_ceilf.S17
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_floor.S9
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_floorf.S13
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_rint.S13
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_rintf.S17
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_round.S15
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_roundf.S23
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_trunc.S13
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_truncf.S17
-rw-r--r--sysdeps/s390/bits/string.h14
-rw-r--r--sysdeps/unix/alarm.c5
-rw-r--r--sysdeps/unix/i386/sysdep.S10
-rw-r--r--sysdeps/unix/rewinddir.c1
-rw-r--r--sysdeps/unix/sysv/linux/futimes.c64
-rw-r--r--sysdeps/unix/sysv/linux/i386/clone.S2
-rw-r--r--sysdeps/unix/sysv/linux/i386/sysdep.h33
-rw-r--r--sysdeps/unix/sysv/linux/kernel-features.h5
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S8
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/powerpc64/swapcontext.S8
-rw-r--r--sysdeps/x86_64/dl-machine.h12
-rw-r--r--time/strptime_l.c10
-rw-r--r--time/tst-strptime.c22
-rw-r--r--timezone/asia135
-rw-r--r--timezone/backward20
-rw-r--r--timezone/europe18
-rw-r--r--timezone/leapseconds11
-rw-r--r--timezone/northamerica39
-rw-r--r--timezone/private.h13
-rw-r--r--timezone/southamerica17
-rw-r--r--timezone/tzfile.h17
-rw-r--r--timezone/zdump.c377
-rw-r--r--timezone/zic.c89
236 files changed, 37486 insertions, 50557 deletions
diff --git a/ChangeLog b/ChangeLog
index 8bd99f21f1..226211550c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5553 +1,687 @@
-2004-12-19 Roland McGrath <roland@redhat.com>
+2005-02-09 Jakub Jelinek <jakub@redhat.com>
- * iconv/iconvconfig.c (nostdlib, output_file, output_file_len):
- New variables.
- (options, parse_opt, main): Take new options --nostdlib and
- --output/-o to set them. Under --nostdlib, skip GCONV_PATH dirs.
- (write_output): If output_file is set, write the output there.
+ [BZ #710]
+ * stdlib/random_r.c (__initstate_r): Save old state.
+ * stdlib/Makefile (tests): Add tst-random2.
+ * stdlib/tst-random2.c: New test.
+ Reported by Peter Bergner <bergner@vnet.ibm.com>.
-2004-12-19 Andreas Jaeger <aj@suse.de>NULL
+2005-02-07 Ulrich Drepper <drepper@redhat.com>
- [BZ #560]
- * inet/netinet/in.h: Use __interface_addr instead of __interface.
+ * elf/dl-load.c (_dl_map_object_from_fd): Makre sure registers are
+ set correctly.
- [BZ #573]
- * sunrpc/xcrypt.c (passwd2des_internal): Make it hidden instead of
- internal linkage.
+2005-01-07 Jakub Jelinek <jakub@redhat.com>
-2004-12-19 Roland McGrath <roland@frob.com>
+ [BZ #738]
+ * elf/dl-load.c (open_path): If rtld_search_dirs is in RELRO segment,
+ avoid writing to it if none of the standard search directories exist.
- * version.h (VERSION): 2.3.4.
- * README.template: Various updates.
- * README: Regenerated.
- * NEWS: Mention ports.
- * README-alpha: File removed.
+2005-02-07 Steven Munroe <sjmunroe@us.ibm.com>
- [BZ #416]
- * locale/langinfo.h: Comment fixes.
-
-2004-12-17 Ulrich Drepper <drepper@redhat.com>
-
- * po/ja.po: Update from translation team.
-
-2004-12-17 Richard Henderson <rth@redhat.com>
-
- * sysdeps/unix/sysv/linux/alpha/clone.S (__clone): Add support
- for NPTL where the PID is stored at userlevel and needs to be
- reset when CLONE_THREAD is not used.
-
-2004-12-17 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/sparc/sparc64/fpu/libm-test-ulps: Update.
-
-2004-12-17 Andreas Jaeger <aj@suse.de>
-
- * math/libm-test.inc (atan2_test): Compute value with 36 digits.
- * sysdeps/alpha/fpu/libm-test-ulps: Adjust for changed result.
- * sysdeps/powerpc/fpu/libm-test-ulps: Likewise.
- * sysdeps/s390/fpu/libm-test-ulps: Likewise.
- * sysdeps/sparc/sparc32/fpu/libm-test-ulps: Likewise.
- * sysdeps/sparc/sparc64/fpu/libm-test-ulps: Likewise.
- * sysdeps/x86_64/fpu/libm-test-ulps: Likewise.
-
-2004-12-16 Ulrich Drepper <drepper@redhat.com>
-
- * stdlib/tst-setcontext.c: Enlarge st1 and st2 arrays.
-
-2004-09-02 Steven Munroe <sjmunroe@us.ibm.com>
-
- [BZ #610]
- * sysdeps/unix/sysv/linux/powerpc/powerpc64/getcontext.S
- (__novec_getcontext): Fix typo in store of fp29.
- (__getcontext): Fix typo in store of fp29.
+ [BZ #700]
+ * sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S
+ (__novec_setcontext, __setcontext): Fix typo so CCR is restored.
+ Load MSR as a doubleword.
* sysdeps/unix/sysv/linux/powerpc/powerpc64/swapcontext.S
- (__novec_swapcontext): Fix typo in store of fp29.
- (__swapcontext): Fix typo in store of fp29.
-
-2004-12-17 GOTO Masanori <gotom@debian.or.jp>
-
- * sysdeps/unix/sysv/linux/dl-osinfo.h (DL_SYSDEP_OSCHECK): Fix
- vague message.
-
-2004-12-16 Roland McGrath <roland@redhat.com>
-
- * nscd/Makefile ($(objpfx)nscd): Don't depend on $(selinux-LIBS),
- which is usually a -lselinux that make will resolve wrongly.
-
-2004-12-16 Andreas Jaeger <aj@suse.de>
-
- * math/math.h: Use #if defined to not receive warnings about
- undefined symbols.
-
-2004-12-16 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/unix/sysv/linux/sparc/sparc32/clone.S (__clone): Add support
- for NPTL where the PID is stored at userlevel and needs to be reset
- when CLONE_THREAD is not used.
-
- * sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h
- (SYSCALL_ERROR_HANDLER): If RTLD_PRIVATE_ERRNO, use rtld_errno
- instead of errno.
- * sysdeps/unix/sysv/linux/sparc/sparc64/socket.S: Include
- sysdep-cancel.h instead of sysdep.h. Handle cancellation.
- * sysdeps/sparc/sparc64/fpu/libm-test-ulps: Regenerate.
-
- * sysdeps/ieee754/ldbl-128/e_expl.c: Include stdlib.h.
-
-2004-12-15 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/unix/sysv/linux/gethostid.c: Make bi-arch safe.
-
-2004-12-15 Steven Munroe <sjmunroe@us.ibm.com>
-
- * sysdeps/unix/sysv/linux/powerpc/powerpc32/clone.S: Correct stack
- alignment. Clean up flag bit tests.
- * sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S: Correct stack
- alignment. Clean up flag bit tests. Remove redundent SP assignment.
- Add TOC register save/restore around function call.
-
- * sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S: Make no_vmx symbol
- local.
- * sysdeps/powerpc/powerpc32/fpu/setjmp-common.S: Make no_vmx symbol
- local.
- * sysdeps/powerpc/powerpc64/__longjmp-common.S: Make no_vmx symbol
- local.
- * sysdeps/powerpc/powerpc64/setjmp-common.S: Make no_vmx and
- aligned_save_vmx symbol local.
-
-2004-12-15 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/i386/i486/bits/string.h (__strncat_g): Fix i686
- implementation.
-
-2004-09-08 H.J. Lu <hongjiu.lu@intel.com>
-
- * Makeconfig (libunwind): New.
- (libgcc_eh): Add $(libunwind).
- (gnulib): Always set to -lgcc $(libgcc_eh).
- (static-gnulib): Always set to -lgcc -lgcc_eh $(libunwind).
- (libc.so-gnulib): New.
- * Makerules (LDLIBS-c.so): Use $(libc.so-gnulib) instead of
- $(static-gnulib).
- * configure.in (libc_cv_cc_with_libunwind): Set to yes if gcc
- uses -lunwind for static binaries.
-
-2004-06-05 Joseph S. Myers <jsm@polyomino.org.uk>
-
- * malloc/Makefile (install-bin): Remove memusage.
- (install-bin-script): Add memusage.
-
-2004-12-15 Jakub Jelinek <jakub@redhat.com>
-
- * nis/nis_domain_of_r.c (nis_domain_of_r): Use libnsl_hidden_def,
- not libnsl_hidden_proto.
-
- * sysdeps/unix/sysv/linux/s390/s390-32/clone.S (__clone): Add support
- for NPTL where the PID is stored at userlevel and needs to be reset
- when CLONE_THREAD is not used.
- * sysdeps/unix/sysv/linux/s390/s390-64/clone.S (__clone): Likewise.
-
- * sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S (__clone): Save
- and restore r2 around call to fn.
-
-2004-12-15 Andreas Jaeger <aj@suse.de>
-
- * sysdeps/ia64/dl-machine.h (elf_machine_rela): Mark auto instead
- of static, add always_inline attribute.
- (elf_machine_rela_relative): Likewise.
- (elf_machine_lazy_rel): Likewise.
-
-2004-12-15 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/unix/sysv/linux/powerpc/powerpc32/clone.S: Add support
- for pid caching in nptl.
-
-2004-12-14 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S: Add support
- for pid caching in nptl.
-
-2004-10-18 Maciej W. Rozycki <macro@mips.com>
-
- * sysdeps/unix/sysv/linux/mips/bits/socket.h (__cmsg_nxthdr): Use
- __NTH instead of __THROW in the inline definition.
- * sysdeps/unix/sysv/linux/mips/sys/tas.h (_test_and_set): Likewise.
-
- * sysdeps/mips/bits/dlfcn.h (RTLD_DEEPBIND): New macro.
-
- * sysdeps/unix/sysv/linux/mips/bits/mman.h
- (PROT_GROWSDOWN, PROT_GROWSUP): New macros.
-
-2004-10-06 Alan Modra <amodra@bigpond.net.au>
-
- * sysdeps/powerpc/powerpc64/ppc-mcount.S (PROF): Don't undef.
- * sysdeps/unix/sysv/linux/powerpc/powerpc64/brk.S: Invoke CALL_MOUNT.
- * sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S: Likewise.
- * sysdeps/unix/sysv/linux/powerpc/powerpc64/getcontext.S: Likewise.
- * sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S: Likewise.
- * sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S: Likewise.
- * sysdeps/unix/sysv/linux/powerpc/powerpc64/socket.S: Likewise.
- * sysdeps/unix/sysv/linux/powerpc/powerpc64/swapcontext.S: Likewise.
- * sysdeps/unix/sysv/linux/powerpc/powerpc64/vfork.S: Likewise.
-
-2004-10-19 Wolfram Gloger <wg@malloc.de>
-
- * malloc/hooks.c (mem2chunk_check, top_check): Handle
- non-contiguous arena. Reported by Michael Dalton
- <mwdalton@stanford.edu> [BZ #457]. Add further checks for top chunk.
-
-2004-12-14 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/posix/sysconf.c (__sysconf_check_spec): Remove leading
- underscore from GETCONF_DIR filenames.
-
-2004-12-13 Ulrich Drepper <drepper@redhat.com>
-
- * po/de.po: Update from translation team.
-
- * nss/getnssent.c (__nss_getent): Double buffer size each round to
- avoid problems with delays for some people's huge entries.
- * nss/getXXbyYY.c (FUNCTION_NAME): Likewise.
-
-2004-12-13 Jakub Jelinek <jakub@redhat.com>
-
- * posix/getconf.c (main): Prepend just $GETCONF_DIR/ instead of
- $GETCONF_DIR/_ to spec.
- * posix/confstr.c (confstr): Remove leading underscores for
- _CS_POSIX_V6_WIDTH_RESTRICTED_ENVS.
-
-2004-12-12 Ulrich Drepper <drepper@redhat.com>
-
- * elf/dl-load.c (_dl_map_object_from_fd): Fix computation of
- mapping start. It must take the actual pagesize into account, not
- the alignment in the file.
-
-2004-12-11 Ulrich Drepper <drepper@redhat.com>
-
- * malloc/malloc.c (_int_realloc): Add checks for corrupted memory.
- (_int_free): Make clear message are result of free() calls.
-
- * malloc/malloc.c (_int_realloc): Remove unnecessary tests for
- oldmem and size == 0.
-
-2004-12-10 Ulrich Drepper <drepper@redhat.com>
-
- * malloc/arena.c (arena_get2): Prevent endless loop if arenas and
- list lock are taken.
-
-2004-12-08 Thorsten Kukuk <kukuk@suse.de>
-
- * nis/nss_nisplus/nisplus-netgrp.c (_nss_nisplus_getnetgrent_r):
- Add check if the value is not an empty string. [BZ #597]
-
-2004-11-29 Jakub Jelinek <jakub@redhat.com>
-
- * stdlib/strtod_l.c (INTERNAL (__STRTOF)): If densize > 2
- and numsize < densize, always shift num up by empty + 1 limbs.
-
-2004-12-07 Paolo Bonzini <bonzini@gnu.org>
-
- * posix/regexec.c (proceed_next_node): Simplify treatment of epsilon
- nodes. Pass the pushed node to push_fail_stack.
- (push_fail_stack): Accept a single node rather than an array
- of two epsilon destinations.
- (build_sifted_states): Only walk non-epsilon nodes.
- (check_arrival): Don't pass epsilon nodes to
- check_arrival_add_next_nodes.
- (check_arrival_add_next_nodes) [DEBUG]: Abort if an epsilon node is
- found.
- (check_node_accept): Do expensive checks later.
- (add_epsilon_src_nodes): Cache result of merging the inveclosures.
- * posix/regex_internal.h (re_dfastate_t): Add non_eps_nodes and
- inveclosure.
- (re_string_elem_size_at, re_string_char_size_at, re_string_wchar_at,
- re_string_context_at, re_string_peek_byte_case,
- re_string_fetch_byte_case, re_node_set_compare, re_node_set_contains):
- Declare as pure.
- * posix/regex_internal.c (create_newstate_common): Remove.
- (register_state): Move part of it here. Initialize non_eps_nodes.
- (free_state): Free inveclosure and non_eps_nodes.
- (create_cd_newstate, create_ci_newstate): Allocate the new
- re_dfastate_t here.
-
-2004-12-09 Ulrich Drepper <drepper@redhat.com>
-
- * malloc/malloc.c (public_rEALLOc): Add parameter checks.
- (_int_free): Provide better error message for invalid pointers.
-
-2004-12-01 Jakub Jelinek <jakub@redhat.com>
-
- * posix/tst-regex.c: Use defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
- conditionals instead of defined _POSIX_CPUTIME.
- (main): If _POSIX_CPUTIME == 0, call sysconf to see if CPUTIME
- option is available.
- * posix/tst-regex.c2: Use defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
- conditionals instead of defined _POSIX_CPUTIME.
- (do_test): If _POSIX_CPUTIME == 0, call sysconf to see if CPUTIME
- option is available.
- * sysdeps/posix/sysconf.c (__sysconf): If _POSIX_CPUTIME resp.
- _POSIX_THREAD_CPUTIME is defined to 0, return -1 for the corresponding
- _SC_ argument.
-
-2004-12-08 Jakub Jelinek <jakub@redhat.com>
-
- * elf/ldd.bash.in: When set -o pipefail is available, use that for
- piping to cat; when not, don't use the pipe at all.
- Pipe to cat in all cases of running the executable.
- When direct running exits with code 5, retry running via ${RTLD}.
- * elf/rtld.c (process_envvars): If __libc_enable_secure and
- mode != normal, exit with exitcode 5.
-
-2004-12-07 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/posix/sysconf.c (__sysconf_check_spec): Only define
- if it will be actually used.
-
-2004-12-07 Roland McGrath <roland@redhat.com>
-
- * rt/tst-timer5.c (setup_test): New function.
- (TEST_CLOCK_MISSING): Use it to punt test if timer_create does not
- support CLOCK_MONOTONIC.
-
-2004-12-07 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/unix/sysv/linux/ia64/clone2.S (__clone2): Add support for
- NPTL where the PID is stored at userlevel and needs to be reset when
- CLONE_THREAD is not used. Restore gp before calling _exit.
-
-2004-12-07 Kaz Kojima <kkojima@rr.iij4u.or.jp>
-
- * sysdeps/unix/sysv/linux/sh/clone.S: Clear the frame pointer when
- starting a new thread. Add support for NPTL where the PID is stored
- at userlevel and needs to be reset when CLONE_THREAD is not used.
-
-2004-12-01 Jakub Jelinek <jakub@redhat.com>
-
- * elf/rtld.c (process_envvars): Don't consider LD_SHOW_AUXV
- and LD_DYNAMIC_WEAK if __libc_enable_secure.
- If __libc_enable_secure, /etc/suid-debug doesn't exist and
- program will be actually run, turn off all debugging.
- * sysdeps/generic/unsecvars.h (UNSECURE_ENVVARS): Add LD_DEBUG,
- LD_DYNAMIC_WEAK and LD_SHOW_AUXV.
-
-2004-12-06 Jakub Jelinek <jakub@redhat.com>
-
- * time/tzset.c (tzset_internal): If + or - is seen,
- but no offset after it, reset offset to 0. [BZ #601]
-
-2004-12-06 Ulrich Drepper <drepper@redhat.com>
-
- * libio/ioseekpos.c (_IO_seekpos_unlocked): Call _IO_SEEKOFF not
- _IO_SEEKPOS, saving one indirect jump.
-
- * libio/fileops.c (_IO_new_file_seekoff): Fix optimization of in-buffer
- seek. Remove dead code.
-
-2004-12-02 Jakub Jelinek <jakub@redhat.com>
-
- * libio/Makefile (tests): Add bug-ungetc4.
- * libio/bug-ungetc4.c: New test.
-
-2004-12-06 Roland McGrath <roland@redhat.com>
-
- * sysdeps/unix/clock_nanosleep.c (clock_nanosleep): Diagnose EINVAL
- for CLOCK_THREAD_CPUTIME_ID, not ENOTSUP.
- Use SYSDEP_NANOSLEEP handler before validating CLOCK_ID value.
-
- * rt/tst-timer4.c (TEST_CLOCK, TEST_CLOCK_NANOSLEEP): New macros.
- Use them throughout in place of CLOCK_REALTIME and nanosleep.
- (do_test) [TEST_CLOCK_MISSING]: Call this macro and if it returns
- non-null, punt the test with a message using the string returned.
- * rt/tst-timer5.c: New file.
- * rt/Makefile (tests): Add it.
-
-2004-12-01 Paolo Bonzini <bonzini@gnu.org>
-
- * posix/regcomp.c (free_dfa_content, init_dfa): Remove
- references to re_dfa_t's subexps field.
- (parse_sub_exp, parse_expression): Do not use it. Use
- completed_bkref_map instead.
- (create_initial_state, peek_token): Store a backreference \N
- with opr.idx = N-1.
- * posix/regexec.c (proceed_next_node, check_dst_limits, get_subexp):
- Likewise.
- (check_subexp_limits): Remove useless condition.
- * posix/regex_internal.h (re_subexp_t): Remove.
- (re_dfa_t): Remove subexps and subexps_alloc field, add
- completed_bkref_map.
-
-2004-12-05 Roland McGrath <roland@frob.com>
-
- * Makeconfig: Comment typo fix.
-
-2004-11-30 Andreas Schwab <schwab@suse.de>
-
- * nis/ypclnt.c (ypprot_err): Remove unused entries.
-
-2004-11-30 Paolo Bonzini <bonzini@gnu.org>
-
- * posix/regexec.c (check_dst_limits_calc_pos_1): Check for
- bkref_idx == -1, and don't recurse in that case.
-
-2004-11-30 GOTO Masanori <gotom@debian.or.jp>
-
- * posix/confstr.c: Enclose #error message with double quote
- for gcc-3.3 and lower.
-
-2004-12-04 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/unix/sysv/linux/i386/clone.S: Add support for NPTL where
- the PID is stored at userlevel and needs to be reset when CLONE_THREAD
- is not used.
- * sysdeps/unix/sysv/linux/x86_64/clone.S: Likewise.
-
-2004-11-18 Daniel Jacobowitz <dan@codesourcery.com>
-
- * sysdeps/arm/sysdep.h: Define __USE_BX__ if bx is available.
- Use it instead of __THUMB_INTERWORK__. Make RETINSTR take
- only a condition and a register.
- * sysdeps/arm/dl-machine.h: Use __USE_BX__ instead of
- __THUMB_INTERWORK__.
- (_dl_start_user): Use BX.
- * sysdeps/arm/strlen.S: Use DO_RET.
- * sysdeps/unix/arm/brk.S: Likewise.
- * sysdeps/unix/arm/fork.S: Likewise.
- * sysdeps/unix/arm/sysdep.S: Likewise.
- * sysdeps/unix/arm/sysdep.h: Likewise.
- * sysdeps/unix/sysv/linux/arm/clone.S: Update uses of RETINSTR.
- * sysdeps/unix/sysv/linux/arm/mmap.S: Likewise.
- * sysdeps/unix/sysv/linux/arm/mmap64.S: Likewise.
- * sysdeps/unix/sysv/linux/arm/socket.S: Likewise.
- * sysdeps/unix/sysv/linux/arm/sysdep.h: Likewise.
- * sysdeps/unix/sysv/linux/arm/vfork.S: Likewise.
-
-2004-12-02 Roland McGrath <roland@redhat.com>
-
- * extra-lib.mk (object-suffixes-$(lib)): Add .oS when
- $(lib)-static-only-routines is nonempty.
- (extra-objs, o-iterator.mk rule): Filter out .oS from generators.
- Add a special rule for .oS objects -> _nonshared.a library.
-
-2004-12-01 Jakub Jelinek <jakub@redhat.com>
-
- * time/mktime.c (__mktime_internal): If SEC_REQUESTED != SEC,
- convert T2, not T.
- * time/Makefile (tests): Add tst-mktime3.
- * time/tst-mktime3.c: New test.
-
-2004-12-01 Jakub Jelinek <jakub@redhat.com>
-
- * stdlib/stdlib.h (realpath): Remove nonnull attribute.
-
- * posix/fnmatch_loop.c (internal_fnmatch): Clear is_seqval after
- normal_bracket label.
-
- * time/tst-mktime2.c (bigtime_test): Initialize tm.tm_isdst to -1.
-
-2004-12-01 Roland McGrath <roland@redhat.com>
-
- * sysdeps/gnu/Makefile ($(objpfx)errlist-compat.c):
- Do $(make-target-directory).
-
-2004-11-29 Roland McGrath <roland@redhat.com>
-
- * posix/confstr.c: Avoid punctuation in #error text.
-
-2004-11-27 Ulrich Drepper <drepper@redhat.com>
-
- * posix/Makefile (tests): Add tst-sysconf.
- * posix/tst-sysconf.c: New file.
-
- * posix/getconf.c (vars): Add support for _SC_IPV6 and
- _SC_RAW_SOCKETS.
- * sysdeps/posix/sysconf.c (__sysconf): Add support for _SC_IPV6
- and _SC_RAW_SOCKETS.
- * sysdeps/generic/bits/confname.h: Define _SC_IPV6 and
- _SC_RAW_SOCKETS.
-
-2004-11-26 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/generic/unsecvars.h (UNSECURE_ENVVARS): Add GETCONF_DIR.
-
-2004-11-26 Kaz Kojima <kkojima@rr.iij4u.or.jp>
-
- * sysdeps/unix/sysv/linux/mips/pread.c: Include sgidefs.h only if
- NO_SGIDEFS_H isn't defined. Don't include sgidefs.h twice.
- * sysdeps/unix/sysv/linux/mips/pwrite.c: Likewise.
- * sysdeps/unix/sysv/linux/mips/pread64.c: Likewise.
- * sysdeps/unix/sysv/linux/mips/pwrite64.c: Likewise.
-
-2004-11-27 Ulrich Drepper <drepper@redhat.com>
-
- * include/dlfcn.h (__libc_dlopen): Add __RTLD_DLOPEN to the mode
- parameter. Reported by VY Newsum <newsum@fel.tno.nl>.
-
-2004-11-10 Daniel Jacobowitz <dan@debian.org>
-
- * libio/iolibio.h (_IO_fclose, _IO_new_fclose, _IO_old_fclose,
- _IO_fflush, _IO_fgetpos, _IO_fgetpos64, _IO_fgets, _IO_fopen,
- _IO_old_fopen, _IO_new_fopen, _IO_fopen64, __fopen_internal,
- _IO_fprintf, _IO_fputs, _IO_fsetpos, _IO_fsetpos64, _IO_ftell,
- _IO_fread, _IO_fwrite, _IO_gets, _IO_printf, _IO_puts, _IO_scanf,
- _IO_fflush_internal, _IO_ftell_internal, _IO_fputs_internal)
- _IO_fwrite_internal): Remove incorrect __THROW.
- * libio/libioP.h (_IO_default_xsputn, _IO_wdefault_xsputn,
- _IO_default_xsgetn, _IO_wdefault_xsgetn, _IO_default_write,
- _IO_default_read, _IO_getline, _IO_getline_info, _IO_getwline,
- _IO_getwline_info, _IO_file_read_internal, _IO_sgetn_internal,
- _IO_wdo_write_internal, _IO_do_write_internal,
- _IO_getline_info_internal, _IO_getline_internal,
- _IO_vfprintf_internal, _IO_putc_internal, _IO_read, _IO_write,
- _IO_close): Likewise.
-
-2004-11-26 Jakub Jelinek <jakub@redhat.com>
-
- * posix/Makefile (generated: Add getconf.speclist.
- ($(inst_libexecdir)/getconf): Use getconf.speclist instead of
- getconf output.
- ($(objpfx)getconf.speclist): New rule.
- * posix/getconf.speclist.h: New file.
-
-2004-11-26 Jakub Jelinek <jakub@redhat.com>
-
- * posix/Makefile (install-others): Add $(inst_libexecdir)/getconf.
- (CFLAGS-sysconf.c): Add -D_GETCONF_DIR.
- (CFLAGS-getconf.c): New.
- ($(inst_libexecdir)/getconf): New.
- * posix/confstr.c (confstr): Use __sysconf to query specifications
- that don't have _POSIX_V6_* macros defined. Use __*_{C,LD}FLAGS
- macros defined in bits/environments.h.
- * sysdeps/posix/sysconf.c: Include stdlib.h, string.h and
- sys/stat.h.
- (__sysconf_check_spec): New routine.
- (__sysconf): Use it.
- * posix/getconf.c (specs): Change into structure array.
- (main): If -v is not given, try to get default from
- $(libexecdir)/getconf/default. If specification is not
- supported by this getconf, try to execute
- $(libexecdir)/getconf/$(specification).
- * sysdeps/unix/bsd/bsd4.4/freebsd/bits/environments.h: New file.
- * sysdeps/unix/sysv/linux/s390/bits/environments.h: New file.
- * sysdeps/unix/sysv/linux/powerpc/bits/environments.h: New file.
- * sysdeps/unix/sysv/linux/sparc/bits/environments.h: New file.
- * sysdeps/unix/sysv/linux/i386/bits/environments.h: New file.
- * sysdeps/unix/sysv/linux/x86_64/bits/environments.h: New file.
- * sysdeps/generic/bits/environments.h (__ILP32_OFFBIG_CFLAGS):
- Define.
-
- * stdlib/stdlib.h (setenv): Use nonnull only for second argument.
- (unsetenv): Remove.
-
-2004-11-23 Paolo Bonzini <bonzini@gnu.org>
-
- * posix/regcomp.c (analyze_tree): Always call calc_epsdest.
- (calc_inveclosure): Use re_node_set_insert_last.
- (parse_dup_op): Lower X{1,5} to (X(X(X(XX?)?)?)?)?
- rather than X?X?X?X?X?.
- * posix/regex_internal.h (re_node_set_insert_last): New declaration.
- * posix/regex_internal.c (re_node_set_insert_last): New function.
- * posix/PCRE.tests: Add testcases.
-
-2004-11-25 Ulrich Drepper <drepper@redhat.com>
-
- * dlfcn/dlfcn.h: Remove nonnull attribute from dlopen.
-
- * posix/confstr.c (confstr): Return appropriate strings for
- _CS_LFS_LINTFLAGS, _CS_LFS64_CFLAGS, and _CS_LFS64_LINTFLAGS.
-
-2004-11-05 Maciej W. Rozycki <macro@mips.com>
-
- * sysdeps/mips/dl-machine.h: Include <sgidefs.h>. Use _ABIO32,
- _ABIN32 and _ABI64 for ABI selection throughout.
- * sysdeps/mips/elf/start.S: Likewise.
- * sysdeps/mips/mips64/__longjmp.c: Likewise.
- * sysdeps/mips/mips64/bsd-_setjmp.S: Likewise.
- * sysdeps/mips/mips64/bsd-setjmp.S: Likewise.
- * sysdeps/mips/mips64/setjmp.S: Likewise.
- * sysdeps/mips/mips64/setjmp_aux.c: Likewise.
- * sysdeps/mips/sys/regdef.h: Likewise.
- * sysdeps/mips/sys/ucontext.h: Likewise.
- * sysdeps/unix/mips/sysdep.h: Likewise.
- * sysdeps/unix/sysv/linux/kernel-features.h: Likewise.
- * sysdeps/unix/sysv/linux/mips/pread.c: Likewise.
- * sysdeps/unix/sysv/linux/mips/pread64.c: Likewise.
- * sysdeps/unix/sysv/linux/mips/ptrace.c: Likewise.
- * sysdeps/unix/sysv/linux/mips/pwrite.c: Likewise.
- * sysdeps/unix/sysv/linux/mips/pwrite64.c: Likewise.
- * sysdeps/unix/sysv/linux/mips/sigaction.c: Likewise.
- * sysdeps/unix/sysv/linux/mips/sys/procfs.h: Likewise.
- * sysdeps/unix/sysv/linux/mips/sys/ucontext.h: Likewise.
-
- * sysdeps/mips/atomicity.h: Use _ABIO32, _ABIN32 and _ABI64 for
- ABI selection throughout.
- * sysdeps/mips/bits/setjmp.h: Likewise.
- * sysdeps/mips/fpu/bits/mathdef.h: Likewise.
- * sysdeps/mips/machine-gmon.h: Likewise.
- * sysdeps/mips/sys/asm.h: Likewise.
- * sysdeps/unix/sysv/linux/mips/bits/fcntl.h: Likewise.
- * sysdeps/unix/sysv/linux/mips/bits/sigcontext.h: Likewise.
- * sysdeps/unix/sysv/linux/mips/bits/stat.h: Likewise.
- * sysdeps/unix/sysv/linux/mips/kernel_stat.h: Likewise.
- * sysdeps/unix/sysv/linux/mips/sigcontextinfo.h: Likewise.
- * sysdeps/unix/sysv/linux/mips/sys/ptrace.h: Likewise.
- * sysdeps/unix/sysv/linux/mips/sys/tas.h: Likewise.
- * sysdeps/unix/sysv/linux/mips/sys/user.h: Likewise.
-
- * sysdeps/mips/sgidefs.h: Prevent <asm/sgidefs.h> from being
- included by kernel headers and undo its settings if already
- included. Define _ABIO32, _ABIN32 and _ABI64 if missing and use
- them to define _MIPS_SIM_ABI32, _MIPS_SIM_NABI32 and
- _MIPS_SIM_ABI64 for compatibility.
- * sysdeps/unix/sysv/linux/mips/Makefile: Use _ABIO32, _ABIN32 and
- _ABI64 for ABI selection in generated syscall-list.h
- * sysdeps/unix/sysv/linux/mips/configure.in: Use _ABIO32, _ABIN32
- and _ABI64 for ABI selection in generated asm-unistd.h.
- * sysdeps/unix/sysv/linux/mips/configure: Regenerate.
-
- * sysdeps/unix/sysv/linux/mips/pwrite.c (__libc_pwrite): Correct
- an inverted _MIPS_SIM conditional.
-
-2004-11-23 Alexandre Oliva <aoliva@redhat.com>
-
- * sysdeps/generic/dl-tls.c (_dl_determine_tlsoffset): Use free
- range even if it doesn't match exactly.
-
-2004-11-23 Jakub Jelinek <jakub@redhat.com>
-
- * nss/nss_files/files-XXX.c (internal_getent): If parse_line returned
- -1, also do H_ERRNO_SET (NETDB_INTERNAL).
-
-2004-11-22 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/i386/fpu_control.h: Add volatile to the asms.
- Patch by Alexander Stohr.
-
-2004-11-22 Jakub Jelinek <jakub@redhat.com>
-
- * nscd/nscd_getai (__nscd_getai): Avoid memory and file descriptor
- leaks.
- * sysdeps/posix/getaddrinfo.c (gaih_inet): Free air.
-
-2004-11-15 Maciej W. Rozycki <macro@mips.com>
-
- * sysdeps/unix/sysv/linux/mips/bits/siginfo.h (__SI_MAX_SIZE):
- Define appropriately based on __WORDSIZE.
- [struct siginfo] (__pad0): Add for explicit padding.
-
- * sysdeps/unix/sysv/linux/mips/bits/siginfo.h: Formatting fixes
- throughout.
-
-2004-11-22 Ulrich Drepper <drepper@redhat.com>
-
- * dirent/dirent.h: Add nonnull attributes.
- * dlfcn/dlfcn.h: Likewise.
-
-2004-11-20 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/ieee754/k_standard.c: Document code 50.
- (__kernel_standard) <case 50>: Avoid raising div-by-zero
- exception again.
-
-2004-11-19 H.J. Lu <hongjiu.lu@intel.com>
-
- [BZ #552]
- * math/libm-test.inc (tgamma_test): Update tgamma (0) and
- tgamma (-0).
- * sysdeps/generic/w_tgamma.c (__tgamma): Properly handle |x| == 0.
- * sysdeps/generic/w_tgammaf.c (__tgammaf): Likewise.
- * sysdeps/generic/w_tgammal.c (__tgammal): Likewise.
- * sysdeps/ieee754/dbl-64/e_gamma_r.c (__ieee754_gamma_r): Likewise.
- * sysdeps/ieee754/flt-32/e_gammaf_r.c: Likewise.
- * sysdeps/ieee754/ldbl-128/e_gammal_r.c: Likewise.
- * sysdeps/ieee754/ldbl-96/e_gammal_r.c: Likewise.
- * sysdeps/ieee754/k_standard.c (__kernel_standard): Handle
- tgamma (0) and tgamma (-0).
-
-2004-11-20 Ulrich Drepper <drepper@redhat.com>
-
- * time/tzfile.c (__tzfile_read): Avoid open for checking whether
- the file we already use changed.
-
- * misc/syslog.c: Remove !USE_IN_LIBIO code.
-
-2004-11-20 Jakub Jelinek <jakub@redhat.com>
-
- * signal/signal.h (__sysv_signal, sysv_signal, signal, bsd_signal,
- ssignal): Remove __nonnull attribute.
-
-2004-11-20 Kaz Kojima <kkojima@rr.iij4u.or.jp>
-
- * sysdeps/unix/sysv/linux/sh/sys/procfs.h: New file.
-
-2004-11-20 Ulrich Drepper <drepper@redhat.com>
-
- * signal/signal.h: Add nonnull attributes.
-
- * signal/signal.h: Add deprecated attributes to sigstack,
- sigpause, sigblock, sigsetmask, siggetmask.
-
-2004-11-20 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/unix/sysv/linux/bits/socket.h (SCM_RIGHTS): Avoid
- comma at the end of enum if __USE_BSD is not defined.
-
-2004-11-19 Ulrich Drepper <drepper@redhat.com>
-
- * malloc/malloc.c (_int_malloc): Check for corruption of chunk
- which is about to be returned.
-
- * malloc/malloc.c (_int_free): Add a few more cheap tests for
- corruption.
-
-2004-11-17 Randolph Chung <tausq@debian.org>
-
- * sysdeps/hppa/dl-machine.h (TRAMPOLINE_TEMPLATE): Add unwind
- annotations.
-
-2004-11-18 Jakub Jelinek <jakub@redhat.com>
-
- [BZ #544]
- * posix/regex.h (RE_NO_SUB): New define.
- * posix/regex_internal.h (OP_DELETED_SUBEXP): New.
- (re_dfa_t): Add subexp_map.
- * posix/regcomp.c (struct subexp_optimize): New type.
- (optimize_subexps): New routine.
- (re_compile_internal): Call it.
- (re_compile_pattern): Set preg->no_sub to 1 if RE_NO_SUB.
- (free_dfa_content): Free subexp_map.
- (calc_inveclosure, calc_eclosure): Skip OP_DELETED_SUBEXP nodes.
- * posix/regexec.c (re_search_internal): If subexp_map
- is not NULL, duplicate registers as needed.
- * posix/Makefile: Add rules to build and run tst-regex2.
- * posix/tst-regex2.c: New test.
- * posix/rxspencer/tests: Fix last two tests (\0 -> \1).
- Add some new tests for nested subexpressions.
-
-2004-11-18 Ulrich Drepper <drepper@redhat.com>
-
- * libio/libio.h (_IO_FLAGS2_FORTIFY): Renamed from
- _IO_FLAGS2_CHECK_PERCENT_N.
- * debug/fprintf_chk.c: Adjust all users.
- * debug/printf_chk.c: Likewise.
- * debug/vfprintf_chk.c: Likewise.
- * debug/vprintf_chk.c: Likewise.
- * debug/vsnprintf_chk.c: Likewise.
- * debug/vsprintf_chk.c: Likewise.
- * stdio-common/vfprintf.c: Likewise. Detect missing %N$ formats.
- * debug/tst-chk1.c: Test detection of missing %N$ formats.
-
-2004-11-15 Jakub Jelinek <jakub@redhat.com>
-
- * posix/bug-regex24.c: Include string.h.
-
- * nis/nis_clone_obj.c (nis_clone_object): Rename out3 label to out2
- and out2 to out. Remove out label. Formatting.
-
-2004-11-15 Ulrich Drepper <drepper@redhat.com>
-
- * include/stdio.h: Do not mark __libc_message as noreturn.
- * sysdeps/unix/sysv/linux/libc_fatal.c (__libc_fatal): Add loop to
- fool gcc. Include <stdbool.h>.
- * sysdeps/posix/libc_fatal.c (__libc_fatal): Add loop to fool gcc.
- (__libc_message): Fix typo.
-
-2004-11-13 Ulrich Drepper <drepper@redhat.com>
-
- * malloc/malloc.c (malloc_state): stat_lock_* elements need only
- be defined if THREAD_STATS is defined. Remove pad0_ since it does
- not align with cache line sizes in general anyway.
-
-2004-11-13 Jakub Jelinek <jakub@redhat.com>
-
- * elf/rtld.c (print_statistics): Avoid segfaults if not all namespaces
- are used. Fix computation of num_relative_relocations on RELA
- architectures other than IA-64 and Alpha.
-
-2004-11-13 Ulrich Drepper <drepper@redhat.com>
-
- * malloc/malloc.c (_int_free): Use munmap_chunk for handling
- mmaped memory.
-
-2004-11-12 Ulrich Drepper <drepper@redhat.com>
-
- * malloc/malloc.c (_int_free): Remove test for NULL parameter.
- (_int_realloc): Call _int_free only if memory parameter is not NULL.
-
- * sysdeps/unix/sysv/linux/libc_fatal.c: Add new function __libc_message
- which performs the printing and simple format string handling. The
- string is written to tty, stderr, syslog in this order, stopping after
- the first successful output.
- (__libc_fatal): Call __libc_message.
- * include/stdio.h: Declare __libc_message.
- * malloc/malloc.c (malloc_printerr): Use __libc_message.
- * debug/chk_fail.c: Also print message with __libc_message.
- * debug/test-strcpy_chk.c: Ensure that debug messages are not printed
- to the terminal or stderr.
- * debug/tst-chk1.c: Likewise.
-
- * posix/Makefile: Remove gpl2lgpl variable.
-
-2004-11-12 Martin Schwidefsky <schwidefsky@de.ibm.com>
-
- * elf/elf.h: Add 20 bit relocations R_390_*20.
-
-2004-11-12 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/unix/sysv/linux/i386/setuid.c: Include linux/posix_types.h.
- * sysdeps/unix/sysv/linux/i386/setgid.c: Likewise.
- * sysdeps/unix/sysv/linux/i386/setreuid.c: Likewise.
- * sysdeps/unix/sysv/linux/i386/setregid.c: Likewise.
- * sysdeps/unix/sysv/linux/i386/setresuid.c: Likewise.
- * sysdeps/unix/sysv/linux/i386/setresgid.c: Likewise.
-
-2004-11-12 Andreas Schwab <schwab@suse.de>
-
- * nis/ypclnt.c (ypprot_err): Fix "minor optimizations".
-
-2004-11-12 Ulrich Drepper <drepper@redhat.com>
-
- * posix/Makefile (tests): Add bug-regex24.
- * posix/bug-regex24.c: New file.
-
-2004-11-12 Paolo Bonzini <bonzini@gnu.org>
-
- * posix/regexec.c (check_dst_limits_calc_pos_1): Use the map to
- cut recursive paths. Make exit condition more precise.
- (match_ctx_add_entry): Initialize the map.
- * posix/regex_internal.h (struct re_backref_cache_entry): Add a map of
- reachable subexpression nodes from each backreference cache entry.
-
-2004-11-10 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/unix/sysv/linux/setreuid.c: Remove sys/syscall.h,
- sys/types.h, linux/posix_types.h, sysdep.h and pthread-functions.h
- includes. Include setxid.h. Use INLINE_SETXID_SYSCALL macro
- instead of INLINE_SYSCALL, kill the HAVE_PTR__NPTL_SETXID guarded
- snippets.
- * sysdeps/unix/sysv/linux/setegid.c: Likewise.
- * sysdeps/unix/sysv/linux/setuid.c: Likewise.
- * sysdeps/unix/sysv/linux/seteuid.c: Likewise.
- * sysdeps/unix/sysv/linux/setgid.c: Likewise.
- * sysdeps/unix/sysv/linux/setresuid.c: Likewise.
- * sysdeps/unix/sysv/linux/setresgid.c: Likewise.
- * sysdeps/unix/sysv/linux/setregid.c: Likewise.
- * sysdeps/unix/sysv/linux/i386/setegid.c: Likewise.
- * sysdeps/unix/sysv/linux/i386/setreuid.c: Likewise.
- * sysdeps/unix/sysv/linux/i386/setuid.c: Likewise.
- * sysdeps/unix/sysv/linux/i386/seteuid.c: Likewise.
- * sysdeps/unix/sysv/linux/i386/setgid.c: Likewise.
- * sysdeps/unix/sysv/linux/i386/setresuid.c: Likewise.
- * sysdeps/unix/sysv/linux/i386/setresgid.c: Likewise.
- * sysdeps/unix/sysv/linux/i386/setregid.c: Likewise.
- * sysdeps/unix/sysv/linux/alpha/setreuid.c: Likewise.
- Formatting. Change signed int into int.
- * sysdeps/unix/sysv/linux/alpha/setresuid.c: Likewise.
- * sysdeps/unix/sysv/linux/alpha/setresgid.c: Likewise.
- * sysdeps/unix/sysv/linux/alpha/setregid.c: Likewise.
- * sysdeps/unix/sysv/linux/syscalls.list (setresuid, setresgid):
- Remove.
- * sysdeps/unix/setxid.h: New file.
-
- * Rules (binaries-static): Add xtests-static.
- * Makeconfig (run-program-prefix): Filter also xtests-static.
-
-2004-11-09 Paul Eggert <eggert@cs.ucla.edu.
-
- [BZ #535]
- * time/difftime.c: Fix a double-rounding bug on hosts with
- 64-bit time_t and long double being IEEE double. Also, port
- to more valid C99 hosts, even those that have padding bits.
- Don't include <values.h> since it is marked as an obsolescent
- interface. Include <limits.h>, <float.h>, and <stdint.h> instead.
- (TYPE_BITS, TYPE_FLOATING, TYPE_SIGNED): New macros.
- (subtract): New static function, that works correctly without
- double-rounding, even on hosts with 64-bit time_t. Also cater
- to hosts with padding bits.
- (__difftime): Use it. Use DBL_MANT_DIG and LDBL_MANT_DIG to
- determine whether floating types are wide enough: the old
- test (which used sizeof) could in theory report the wrong results
- on hosts with padding bits in floating-point values.
-
-2004-11-11 Simon Josefsson <jas@extundo.com>
-
- [BZ #542]
- * sysdeps/generic/strtok_r.c [HAVE_CONFIG_H]: Include config.h.
- [!_LIBC]: Include strtok_r.h (in gnulib), map __strtok_r to
- strtok_r and __rawmemchr to strch.
- (__strtok_r): Use C89 prototype.
- [weak_alias]: Move calls to libc_hidden_def and weak_alias into
- this #ifdef.
-
-2004-11-10 Paul Eggert <eggert@cs.ucla.edu>
-
- [BZ #541]
- * time/mktime.c (SHR): New macro, which is a portable
- substitute for >> that should work even on Crays.
- (TIME_T_MIDPOINT, ydhms_diff, __mktime_internal): Use it.
- Problem reported by Mark D. Baushke in
- <http://lists.gnu.org/archive/html/bug-gnulib/2004-11/msg00071.html>.
-
-2004-11-09 Paolo Bonzini <bonzini@gnu.org>
-
- * posix/regexec.c (match_ctx_free_subtops): Remove, merge into...
- (match_ctx_clean): ... this function.
- (match_ctx_free): Call match_ctx_clean.
-
- * posix/regexec.c (transit_state): Remove the check for
- out-of-bounds buffers.
- (check_matching): Check here for out-of-bounds buffers.
- (re_search_internal): Store into match_kind a set of bits
- indicating which incantation of fastmap scanning must be
- used. Use a switch statement instead of multiple ifs.
- Exit the final "for (;;)" with goto free_return unless
- the match succeeded, thus simplifying some conditionals.
-
- * posix/regex_internal.c (re_string_reconstruct,
- re_string_context_at): Add several branch predictions for
- case-sensitive matching and no transition table being used.
-
-2004-11-10 Ulrich Drepper <drepper@redhat.com>
-
- * posix/tst-waitid.c: Don't use error to print error message, they
- won't end up in the .out file.
-
-2004-11-09 Ulrich Drepper <drepper@redhat.com>
-
- * nscd/nscd-client.h (libc_locked_map_ptr): Add new first
- parameter, used as class for definition.
- * nscd/nscd_getpw_r.c: Adjust for libc_locked_map_ptr change.
- (pw_map_free): Ensure no crash after memory is freed.
- * nscd/nscd_getgr_r.c: Likewise. Make map externally visible.
- * nscd/nscd_gethst_r.c: Likewise.
- * nscd/nscd_getai.c: Use map from nscd_gethost.c.
- * nscd/nscd_initgroups.c: Use map from nscd_getgr.c.
-
- * nscd/nscd_getai.c: Add some checks to detect corrupt databases.
- * nscd/nscd_getgr_r.c: Likewise
- * nscd/nscd_gethst_r.c: Likewise.
- * nscd/nscd_getpw_r.c: Likewise
-
-2004-11-09 Jakub Jelinek <jakub@redhat.com>
-
- * posix/regcomp.c (calc_eclosure_iter): Don't access
- dfa->edests[node].elems[0] if dfa->edests[node].nelem == 0.
- * posix/rxspencer/tests: Add 5 new tests.
-
-2004-11-09 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/unix/sysv/linux/ifaddrs.c: Determine sin6_scope_id field
- value correctly. Patch by Mitsuru Kanda <mk@karaba.org>.
-
-2004-11-04 Jakub Jelinek <jakub@redhat.com>
-
- * libio/fileops.c (_IO_new_file_seekoff): If mode is 0 and
- fp->_offset == _IO_pos_BAD, just call _IO_SYSSEEK (fp, 0, dir)
- and if successful set fp->_offset.
- * libio/Makefile (tests): Add bug-ungetc3.
- * libio/bug-ungetc3.c: New test.
-
-2004-11-03 Marcus Brinkmann <marcus@gnu.org>
-
- * sysdeps/gnu/_G_config.h (_G_HAVE_MREMAP): Define symbol.
- * sysdeps/mach/hurd/_G_config.h: New file.
- * libio/fileops.c (mmap_remap_check) [__linux__]: Replaced with
- [_G_HAVE_MREMAP].
-
-2004-11-08 Ulrich Drepper <drepper@redhat.com>
-
- * posix/regcomp.c (utf8_sb_map): Define.
- (free_dfa_content): Don't free dfa->sb_char if it's a pointer to
- utf8_sb_map.
- (init_dfa): Use utf8_sb_map instead of initializing memory when the
- encoding is UTF-8.
-
-2004-11-03 Paolo Bonzini <bonzini@gnu.org>
-
- * posix/regcomp.c (init_dfa): Get the codeset name outside glibc as
- well. Check if it is spelled UTF8 as well as UTF-8, and check
- case-insensitively. Set dfa->map_notascii manually when outside
- glibc.
- * posix/regex_internal.c (build_wcs_upper_buffer) [!_LIBC]: Enable
- optimizations based on map_notascii.
- * posix/regex_internal.h [HAVE_LANGINFO_H || HAVE_LANGINFO_CODESET
- || _LIBC]: Include langinfo.h.
-
- * posix/regex_internal.h (struct re_backref_cache_entry): Add "more"
- field.
- * posix/regexec.c (check_dst_limits): Hoist computation of the source
- and destination bkref_idx out of the loop. Pass it to
- check_dst_limits_calc_pos.
- (check_dst_limits_calc_pos_1): New function, containing the recursive
- loop of check_dst_limits_calc_pos; uses the "more" field of
- struct re_backref_cache to control the loop.
- (check_dst_limits_calc_pos): Store into "boundaries" the position
- relative to lim's start and end positions. Do not accept eclosures,
- accept bkref_idx instead. Call check_dst_limits_calc_pos_1 to do the
- work.
- (sift_states_bkref): Use the "more" field of struct re_backref_cache
- to control the loop. A big "if" was turned into a continue and the
- function was reindented.
- (get_subexp): Use the "more" field of struct re_backref_cache
- to control the loop.
- (match_ctx_add_entry): Initialize the bkref_ents' "more" field.
- (search_cur_bkref_entry): Return -1 if out of bounds.
-
- * posix/regexec.c (empty_set): Remove.
- (sift_states_backward): Remove cur_src variable. Move inner loop
- to build_sifted_states.
- (build_sifted_states): Extract from sift_states_backward. Do not
- use empty_set.
- (update_cur_sifted_state): Do not use empty_set. Special case
- dest_nodes->nelem == 0.
-
- * posix/regex_internal.h (struct re_backref_cache_entry): Remove flag
- field.
- (struct re_sift_context_t): Remove cur_bkref, cls_subexp_idx,
- check_subexp fields. Move limits last.
- * posix/regexec.c (match_ctx_clear_flag): Remove.
- (sift_ctx_init): Remove check_subexp parameter. Do not set removed
- fields. Callers adjusted.
- (expand_bkref_cache): Remove last_str parameter. Callers adjusted.
- (re_search_internal): Remove fast_translate variable.
- (update_cur_sifted_state): Pass candidates as the final parameter
- to sift_states_bkref.
- (sift_states_bkref): Change last unused parameter to be "candidates",
- do not fetch candidates into a local variable.
- Remove dead test for "node == sctx->bkref", and the cur_bkref_idx
- variable.
- Remove loops that set/reset the flag field of backref cache entries.
- (check_arrival_add_next_nodes): Use a signed int to hold the return
- value of re_node_set_insert.
- (group_nodes_into_DFAstates): Likewise.
- (match_ctx_add_entry): Do not set the flag field of the new entry.
-
-2004-11-05 Roland McGrath <roland@redhat.com>
-
- * sysdeps/generic/ldsodefs.h (struct rtld_global_ro): Define
- _dl_sysinfo_dso under [NEED_DL_SYSINFO_DSO] as well.
- * elf/rtld.c (dl_main): Set up GLRO(dl_sysinfo_dso) under
- [NEED_DL_SYSINFO_DSO] as well.
- * sysdeps/generic/dl-sysdep.c (_dl_show_auxv): Always include
- AT_SYSINFO and AT_SYSINFO_EHDR in name table.
- (_dl_sysdep_start) [NEED_DL_SYSINFO_DSO]: Match AT_SYSINFO_EHDR.
- * elf/dl-support.c (_dl_sysinfo_dso): Define also under
- [NEED_DL_SYSINFO_DSO].
- (_dl_aux_init) [NEED_DL_SYSINFO || NEED_DL_SYSINFO_DSO]:
- Match AT_SYSINFO_EHDR and set GL(dl_sysinfo_dso).
-
-2004-11-05 Roland McGrath <roland@redhat.com>
-
- * manual/errno.texi (Error Codes): Revert last change for now.
- * sysdeps/gnu/errlist.c: Regenerated.
-
-2004-11-04 Roland McGrath <roland@frob.com>
-
- * Makeconfig (link-libc, rpath-dirs): Remove AIX cruft definitions.
- (LDFLAGS-rpath-ORIGIN, LDFLAGS-soname-fname): Likewise.
- (LDFLAGS-rdynamic, LDFLAGS-Bsymbolic): Likewise.
- ($(common-objpfx)gnu/lib-names.stmp): Likewise.
-
-2004-11-01 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/unix/sysv/linux/x86_64/sys/procfs.h [__WORDSIZE == 32]
- (elf_fpxregset_t): New type.
- (struct elf_prpsinfo): If __WORDSIZE == 32, change pr_[ug]id type
- to unsigned short int.
- * sysdeps/unix/sysv/linux/x86_64/sys/user.h
- (struct user_fpregs_struct): Fix comment.
- * sysdeps/i386/fpu/bits/mathdef.h (float_t, double_t): If
- __FLOAT_EVAL_METHOD__ is defined and 0, typedef to float resp. double.
- * sysdeps/x86_64/fpu/bits/mathdef.h: Include bits/wordsize.h.
- (float_t, double_t): If -m32 and not -mfpmath=sse, typedef to
- long double.
- * sysdeps/x86_64/fpu/bits/fenv.h: Include bits/wordsize.h.
- (fenv_t): Remove __mxcsr field for -m32.
-
-2004-11-04 Jakub Jelinek <jakub@redhat.com>
-
- * libio/ftello.c (ftello): Don't subtract save_end - save_base
- if pos is _IO_pos_BAD.
- * libio/ftello64.c (ftello64): Likewise.
- * libio/iofgetpos.c (_IO_new_fgetpos): Likewise.
- * libio/iofgetpos64.c (_IO_new_fgetpos64): Likewise.
- * libio/oldiofgetpos.c (_IO_old_fgetpos): Likewise.
- * libio/oldiofgetpos64.c (_IO_old_fgetpos64): Likewise.
- * libio/ioftell.c (_IO_ftell): Likewise.
- Cast to long int instead of off_t when checking for overflow.
-
-2004-11-04 Richard Henderson <rth@redhat.com>
-
- * sysdeps/unix/sysv/linux/alpha/register-dump.h (regnames): Align.
- (linefeed): Remove.
- (register_dump): Rewrite to generate into a flat buffer instead
- of into iovecs.
-
-2004-11-02 Jakub Jelinek <jakub@redhat.com>
-
- * debug/tst-chk1.c (ret): New volatile variable.
- (CHK_FAIL_END): Remove redundant ret setting.
- (do_test): Remote ret variable.
-
-2004-01-03 Paolo Bonzini <bonzini@gnu.org>
-
- * posix/regex_internal.h (__regfree) [!_LIBC]: Define to regfree.
-
-2004-11-03 Marcus Brinkmann <marcus@gnu.org>
-
- * sysdeps/generic/utime.c: Include <stddef.h>.
-
- * sysdeps/generic/sysconf.c: Include <grp.h> and <pwd.h>.
-
- * sysdeps/generic/tempname.c (__path_search): Add missing argument
- TRY_TMPDIR.
-
-2004-11-02 Jakub Jelinek <jakub@redhat.com>
-
- * include/features.h (__USE_FORTIFY_LEVEL): Also set for Red Hat
- GCC 3.4.x-RH >= 3.4.2-8.
- * libio/bits/features.h (printf, fprintf, vprintf, vfprintf): For
- GCC 3.4.x-RH use __builtin___{,v}{,f}printf_chk instead of
- __{,v}{,f}printf_chk.
- * debug/tst-chk1.c (do_test): Deal with GCC 3.4.x-RH not
- being able to recognize subobjects.
-
-2004-10-31 Mariusz Mazur <mmazur@kernel.pl>
-
- * sysdeps/unix/sysv/linux/alpha/setregid.c: New file.
- * sysdeps/unix/sysv/linux/alpha/setresgid.c: New file.
- * sysdeps/unix/sysv/linux/alpha/setresuid.c: New file.
- * sysdeps/unix/sysv/linux/alpha/setreuid.c: New file.
-
-2004-10-27 Derek R. Price <derek@ximbiot.com>
-
- [BZ #487] This change is imported from gnulib.
- * time/mktime.c (not_equal_tm) [DEBUG]: Remove redundant check.
-
-2004-10-24 Paul Eggert <eggert@cs.ucla.edu>
-
- [BZ #473]
- * time/tst-mktime.c (main): Don't assume that mktime fails
- when given time stamps before 1970. It returns negative
- time_t values instead, for compatibility with BSD.
-
- * time/tst-mktime2.c: New file.
- * time/Makefile (tests): Add it.
-
- [BZ #473] Import from gnulib. Revamp to avoid several problems near
- time_t extrema, and on hosts with 64-bit time_t and 32-bit int.
- This fixes Debian bug 177940.
- * time/mktime.c (TIME_T_MIDPOINT): New macro.
- (ydhms_diff): Renamed from ydhms_tm_diff, with a new signature,
- which avoids overflow problems on hosts with 64-bit time_t and
- 32-bit int. All callers changed. Now an inline function.
- Verify at compile-time that long int is wide enough to avoid
- these overflow problems.
- (guess_time_tm): New function.
- (__mktime_internal): Use it. Avoid overflow when computing yday on
- hosts with 64-bit long and 32-bit int. Remove tests for 69;
- no longer needed. Use if rather than #ifdef for LEAP_SECONDS_POSSIBLE
- so that the code is checked by more compilers.
- Do not rely on floating point to probe: stick to integer arithmetic,
- to avoid potential porting problems.
- Repair potential overflow correctly in the Southern Hemisphere.
- (localtime_offset): Add a FIXME for the case where time_t is unsigned.
-
-2004-10-30 Andreas Schwab <schwab@suse.de>
-
- * sysdeps/m68k/dl-machine.h (elf_machine_rela)
- (elf_machine_rela_relative, elf_machine_lazy_rel): Mark auto
- instead of static.
-
-2004-10-30 Andreas Schwab <schwab@suse.de>
-
- * sysdeps/unix/sysv/linux/waitid.c: Include <stddef.h> for NULL.
-
-2004-10-30 Ulrich Drepper <drepper@redhat.com>
-
- * malloc/malloc.c (_int_free): Use unique comments for the error
- cases.
-
-2004-10-28 Roland McGrath <roland@frob.com>
-
- * sysdeps/mach/hurd/i386/tls.h (_hurd_tls_fork): Use i386_thread_state
- instead of machine_thread_state.
-
-2004-10-28 Roland McGrath <roland@redhat.com>
-
- * sysdeps/unix/sysv/linux/syscalls.list: Remove setaltroot.
-
-2004-10-28 Ulrich Drepper <drepper@redhat.com>
-
- * elf/dl-open.c (dl_open_worker): Remove reference to glibcbug script.
-
-2004-10-27 Ulrich Drepper <drepper@redhat.com>
-
- * elf/dl-load.c (_dl_map_object): Use cache_rpath to check for
- existing rpath in main executable, not explicit test.
-
-2004-10-27 Jakub Jelinek <jakub@redhat.com>
-
- * include/resolv.h (_res_opcodes): New extern.
- Add libresolv_hidden_proto.
- * resolv/res_debug.c (_res_opcodes): Remove.
- (__p_class_syms, __p_type_syms): Add libresolv_hidden_proto
- and libresolv_hidden_data_def. Remove attribute_hidden.
- * resolv/res_mkquery (_res_opcodes): Remove.
- * resolv/res_data.c (_res_opcodes): Remove attribute_hidden.
- Add libresolv_hidden_data_def.
-
-2004-10-27 Ulrich Drepper <drepper@redhat.com>
-
- * elf/dl-open.c (_dl_open): Don't allow explicitly opening a DSO
- into an empty namespace.
-
- * elf/dl-fini.c (_dl_fini): Fix search for map in maps array.
- Reverse order of namespaces.
- * elf/Makefile: Add rules to build and run tst-dlmopen3.
- * elf/tst-dlmopen3.c: New file.
- * elf/tst-dlmopen1mod.c: Add check whether constructor runs.
-
-2004-10-27 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/generic/glob.c (globfree): Clear gl_pathv after freeing it.
- * posix/Makefile: Add rules to build and run bug-glob2 test.
- * posix/bug-glob2.c: New test.
-
-2004-10-27 Roland McGrath <roland@frob.com>
-
- * sysdeps/mach/hurd/i386/tls.h (HURD_TLS_DESC_DECL): New macro.
- (_hurd_tls_init): Use it.
- (_hurd_tls_fork): New function.
- * sysdeps/mach/hurd/fork.c (__fork) [USE_TLS]: Call it.
-
-2004-10-26 Roland McGrath <roland@frob.com>
-
- * sysdeps/mach/hurd/i386/tls.h (_hurd_tls_init): Initialize TCB->tcb.
-
-2004-10-26 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/gnu/netinet/udp.h (struct udphdr): Use u_int16_t
- type instead of uint16_t. Formatting.
-
-2004-10-25 Roland McGrath <roland@redhat.com>
-
- * login/openpty.c (openpty): Add libutil_hidden_def.
-
- * nss/nss_files/files-parse.c (nss_files_parse_hidden_def): Define to
- either libc_hidden_def or libnss_files_hidden_def, not hidden_def.
- This file is also compiled into libnss_hesiod by #include.
-
-2004-10-25 Roland McGrath <roland@frob.com>
-
- * sysdeps/unix/bsd/hp: Directory and all files removed.
- These are now in the ports repository.
- * sysdeps/unix/bsd/osf: Likewise.
- * sysdeps/unix/bsd/sequent: Likewise.
- * sysdeps/unix/bsd/sony: Likewise.
- * sysdeps/unix/bsd/ultrix4: Likewise.
- * sysdeps/unix/sysv/aix: Likewise.
- * sysdeps/unix/sysv/hpux: Likewise.
- * sysdeps/unix/sysv/irix4: Likewise.
- * sysdeps/unix/sysv/isc2.2: Likewise.
- * sysdeps/unix/sysv/minix: Likewise.
- * sysdeps/unix/sysv/sco3.2.4: Likewise.
- * sysdeps/unix/sysv/sco3.2: Likewise.
- * sysdeps/unix/sysv/sysv4: Likewise.
-
- * configure.in (ASM_LINE_SEP): Move this setting to ...
- * sysdeps/hppa/configure.in: ... here, new file.
- * sysdeps/hppa/configure: New generated file.
- * configure: Regenerated.
-
-2004-10-25 Kaz Kojima <kkojima@rr.iij4u.or.jp>
-
- * sysdeps/sh/dl-machine.h: Include sysdep.h.
- (ELF_MACHINE_RUNTIME_TRAMPOLINE): Add CFI directives.
- (elf_machine_runtime_setup): Add always_inline attribute.
- (_dl_start_user): Pass the correct environ.
- (elf_machine_rela): Replace static inline by auto inline, add
- always_inline attribute.
- (elf_machine_rela_relative): Likewise.
- (elf_machine_lazy_rel): Likewise.
-
-2004-10-24 Ulrich Drepper <drepper@redhat.com>
-
- * nis/nis_call.c: Pretty printing. Minor cleanups.
- * nis/nis_addmember.c (nis_addmember): Add assert to check buffer
- bounds.
-
- * resolv/nss_dns/dns-host.c: Avoid using PLTs.
- * include/libc-symbols.h: Define hidden attribute macros for
- libnss_nisplus and libutil.
- * include/utmp.h: Add libutil_hidden_proto for login_tty.
- * login/login_tty.c: Add libutil_hidden_def.
- * nis/nisplus-parser.h: Add libnss_nisplus_hidden_proto for parsers.
- * nis/nss_nisplus/nisplus-parser.c: Add libnss_nisplus_hidden_def.
- * include/pty.h: New file.
- * include/rpcsvc/yp.h: New file.
- * include/rpcsvc/ypclnt.h: New file.
- * include/rpcsvc/ypupd.h: New file.
- * include/libc-symbols.h: Define hidden attribute macros for libnsl.
- * include/rpcsvc/nislib.h: Use libnsl_hidden_proto for various
- functions.
- * nis/nis_add.c: Add libnsl_hidden_def. Minor optimizations.
- * nis/nis_call.c: Likewise.
- * nis/nis_clone_obj.c: Likewise.
- * nis/nis_defaults.c: Likewise.
- * nis/nis_domain_of_r.c: Likewise.
- * nis/nis_error.c: Likewise.
- * nis/nis_file.c: Likewise.
- * nis/nis_free.c: Likewise.
- * nis/nis_local_names.c: Likewise.
- * nis/nis_lookup.c: Likewise.
- * nis/nis_modify.c: Likewise.
- * nis/nis_print.c: Likewise.
- * nis/nis_remove.c: Likewise.
- * nis/nis_subr.c: Likewise.
- * nis/nis_table.c: Likewise.
- * nis/nis_util.c: Likewise.
- * nis/yp_xdr.c: Likewise.
- * nis/ypclnt.c: Likewise.
- * nis/ypupdate_xdr.c: Likewise.
-
- * resolv/res_send.c (send_dg): Cope with failures.
-
- * include/libc-symbols.h: Define hidden attribute macros for
- libnss_files.
- * include/netdb.h: Use libnss_files_hidden_proto for the parsers
- defined in libnss_files, not libc_hidden_proto.
- * include/netinet/ether.h: Likewise.
- * include/rpc/netdb.h: Likewise.
- * nss/nss_files/files-parse.c: Use hidden_def in parser definitions
- instead of libc_hidden_def.
- * nss/nss_files/files-netgrp.c: Add libnss_files_hidden_def to
- _nss_netgroup_parseline definition.
-
-2004-10-23 Roland McGrath <roland@frob.com>
-
- * sysdeps/mach/hurd/i386/tls.h (_hurd_tls_init): Don't return early
- after an RPC succeeds.
-
- * sysdeps/vax, sysdeps/unix/bsd/vax: Directories and all files removed.
- These are now in the ports repository.
- * sysdeps/tahoe, sysdeps/unix/bsd/tahoe: Likewise.
- * sysdeps/cris, sysdeps/unix/sysv/linux/cris: Likewise.
- * sysdeps/am29k, sysdeps/i860, sysdeps/i960, sysdeps/m88k: Likewise.
- * sysdeps/standalone, sysdeps/z8000: Likewise.
-
-2004-10-23 Ulrich Drepper <drepper@redhat.com>
-
- * resolv/res_send.c (send_dg): Combine write and read to socket
- into one loop.
-
-2004-10-22 Roland McGrath <roland@frob.com>
-
- * Makefile (%.bz2, %.gz): Move these pattern rules ...
- * Makerules: ... to here.
-
-2001-10-31 Alexandre Oliva <aoliva@redhat.com>
-
- * elf/elf.h: Add R_MN10300_* relocation numbers.
-
-2004-10-22 Paul Eggert <eggert@cs.ucla.edu>
-
- [BZ #471] Fix imported from gnulib.
- * time/mktime.c (leapyear, ydms_tm_diff): Year is of type
- long int, not int, to avoid problems when tm_year == INT_MAX
- and tm_mon > 12.
- (__mktime_intenral): Compute year using long int arithmetic,
- not int arithmetic, to avoid problems on hosts where time_t
- and long are 64 bits but int is 32.
-
- [BZ #468] Import a fix from gnulib.
- * time/mktime.c [! DEBUG]: Do not include <string.h>.
- It's needed only if DEBUG is nonzero.
-
- [BZ #470] Import fix from gnulib.
- * time/mktime.c [!_LIBC] (__mktime_internal): Define to
- mktime_internal, to avoid clashes with any __mktime_internal
- function defined in the standard library.
-
- [BZ #469] Imported from gnulib.
- * time/mktime.c (__isleap): Remove; all uses replaced by:
- (leapyear): New function, which avoids overflow by not adding
- 1900 to year before testing whether it is a leap year.
-
- [BZ #472] Imported from gnulib.
- * time/mktime.c (Local Variables): Remove -DHAVE_TIME_R_POSIX;
- no longer used.
-
-2004-10-22 Ulrich Drepper <drepper@redhat.com>
-
- * resolv/res_send.c: Remove compatibility code which is unused in
- glibc and probably bitrotten.
-
- * debug/execinfo.h: Remove __THROW from backtrace prototype.
-
-2004-10-22 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/i386/Makefile (CFLAGS-backtrace.c): Add -fexceptions.
- * sysdeps/i386/backtrace.c: Include <bits/libc-lock.h>, <dlfcn.h>,
- <stdlib.h> and <unwind.h>. Remove <bp-checks.h> include.
- (struct trace_arg): New type.
- (unwind_backtrace, unwind_getip, unwind_getcfa, unwind_getgr): New
- fn pointers resp. macros.
- (init, backtrace_helper): New functions.
- (__backtrace): Rewritten to use _Unwind_Backtrace first and fall
- back to frame pointer walking.
-
-2004-10-22 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/unix/sysv/linux/Versions: Things are still in flux, it
- seems. Undo last additions.
-
-2004-10-21 Ulrich Drepper <drepper@redhat.com>
-
- * posix/execvp.c (execvp): Also ignore ENODEV and ETIMEDOUT errno
- values.
-
-2004-10-20 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/unix/sysv/linux/readonly-area.c (__readonly_area): If /proc
- is not mounted, return 1.
-
-2004-10-20 Roland McGrath <roland@redhat.com>
-
- * Makeconfig ($(common-objpfx)shlib-versions.v.i): Check also
- $(config-sysdirs) for shlib-versions files.
-
- * Makeconfig ($(common-objpfx)soversions.i): Replace shell loop with
- use of ...
- * scripts/soversions.awk: ... this new file. Collect lib info and
- match any DEFAULT line before emitting anything, so DEFAULT can come
- later in the concatenation of shlib-versions files.
-
- * manual/errno.texi (Error Codes): Add ENOKEY, EKEYEXPIRED,
- EKEYREVOKED, EKEYREJECTED.
- * sysdeps/unix/sysv/linux/Versions (libc: GLIBC_2.3.4): New errlist.
- * sysdeps/gnu/errlist.c: Regenerated
-
- * sysdeps/gnu/errlist-compat.awk: Don't bail if Versions gives a count
- higher than ERR_MAX reports. Instead, emit a #define ERR_MAX.
- * sysdeps/gnu/Makefile ($(objpfx)errlist-compat.h): New target.
- (generated): Add errlist-compat.h.
- * sysdeps/gnu/errlist.awk: Make output #include <errlist-compat.h> to
- define ERR_MAX and use that for table size.
-
-2004-10-20 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/unix/sysv/linux/syscalls.list: Add entries for setaltroot,
- key_add, key_request, and keyctl syscalls.
- * sysdeps/unix/sysv/linux/Versions: Export them.
-
-2004-10-19 Roland McGrath <roland@frob.com>
-
- * sysdeps/mach/readonly-area.c: New file.
-
-2004-10-19 Ulrich Drepper <drepper@redhat.com>
-
- * elf/Versions [ld, GLIBC_PRIVATE]: Add _dl_debug_state.
- * elf/dl-debug.c (_dl_debug_state): Add rtld_hidden_def.
- * sysdeps/generic/ldsodefs.h (_dl_debug_state): Don't mark as
- hidden but use rtld_hidden_proto.
-
-2004-10-19 Alfred M. Szmidt <ams@gnu.org>
-
- * sysdeps/generic/readonly-area.c (__readonly_str): Renamed to ...
- (__readonly_area): ... this.
-
-2004-10-18 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/generic/strcpy_chk.c (__strcpy_chk): Speed up by checking
- destlen only every 4 bytes.
-
-2004-10-19 Ulrich Drepper <drepper@redhat.com>
-
- * nss/getent.c (hosts_keys): Let inet_pton decide whether the
- string is an address or not.
-
-2004-10-19 Jakub Jelinek <jakub@redhat.com>
-
- * elf/dl-addr.c (_dl_addr): Don't look at STT_TLS symbols.
- Use DL_SYMBOL_ADDRESS to set dli_saddr.
-
- * debug/Makefile (catchsegv): Prefix $LIB with a backslash.
-
-2004-10-19 Ulrich Drepper <drepper@redhat.com>
-
- * debug/Makefile ($(objpfx)catchsegv): To support multilib
- platforms, use $LIB in path to slibdir.
-
-2004-10-19 Jakub Jelinek <jakub@redhat.com>
-
- * debug/catchsegv.sh: Update copyright year.
- Use mktemp to create segv_output file.
-
-2004-10-19 Jakub Jelinek <jakub@redhat.com>
-
- * include/features.h (__USE_FORTIFY_LEVEL): Enable even with
- Red Hat gcc4 4.0.0 and above.
-
-2004-10-18 Jakub Jelinek <jakub@redhat.com>
-
- * elf/dl-libc.c (__libc_dlsym_private, __libc_register_dl_open_hook):
- New functions.
- (__libc_dlopen_mode): Call __libc_register_dl_open_hook and
- __libc_register_dlfcn_hook.
- * dlfcn/Makefile (routines, elide-routines.os): Set.
- Add rules to build and test tststatic2.
- * dlfcn/tststatic2.c: New test.
- * dlfcn/modstatic2.c: New test module.
- * dlfcn/dladdr.c: Call _dlfcn_hook from libdl.so if not NULL.
- Define __ prefixed routine in libc.a and in libdl.a just call it.
- * dlfcn/dladdr1.c: Likewise.
- * dlfcn/dlclose.c: Likewise.
- * dlfcn/dlerror.c: Likewise.
- * dlfcn/dlinfo.c: Likewise.
- * dlfcn/dlmopen.c: Likewise.
- * dlfcn/dlopen.c: Likewise.
- * dlfcn/dlopenold.c: Likewise.
- * dlfcn/dlsym.c: Likewise.
- * dlfcn/dlvsym.c: Likewise.
- * dlfcn/sdladdr.c: New file.
- * dlfcn/sdladdr1.c: New file.
- * dlfcn/sdlclose.c: New file.
- * dlfcn/sdlerror.c: New file.
- * dlfcn/sdlinfo.c: New file.
- * dlfcn/sdlopen.c: New file.
- * dlfcn/sdlsym.c: New file.
- * dlfcn/sdlvsym.c: New file.
- * dlfcn/Versions (libdl): Export _dlfcn_hook@GLIBC_PRIVATE.
- * include/dlfcn.h (DL_CALLER_DECL, DL_CALLER RETURN_ADDRESS): Define.
- (struct dlfcn_hook): New type.
- (_dlfcn_hook): New extern decl.
- (__dlopen, __dlclose, __dlsym, __dlerror, __dladdr, __dladdr1,
- __dlinfo, __dlmopen, __libc_dlsym_private,
- __libc_register_dl_open_hook, __libc_register_dlfcn_hook): New
- prototypes.
- (__dlvsym): Use DL_CALLER_DECL.
- * include/libc-symbols.h: Define libdl_hidden_proto and friends.
-
- * malloc/arena.c (_dl_open_hook): Extern decl.
- (ptmalloc_init): Don't call _dl_addr when dlopened from statically
- linked programs but don't use brk for them either.
-
-2004-10-18 Roland McGrath <roland@redhat.com>
-
- * dlfcn/bug-dlsym1.c (main): Remove bogus setenv call.
-
-2004-10-18 Ulrich Drepper <drepper@redhat.com>
-
- * elf/dl-open.c (dl_open_worker): Avoid dereferencing map in
- statically linked code if there might none be found.
-
-2004-10-06 Maciej W. Rozycki <macro@mips.com>
-
- * sysdeps/unix/sysv/linux/mips/mips32/sysdep.h
- (__SYSCALL_CLOBBERS): Add "memory".
- * sysdeps/unix/sysv/linux/mips/mips64/n32/sysdep.h
- (__SYSCALL_CLOBBERS): Likewise.
- * sysdeps/unix/sysv/linux/mips/mips64/n64/sysdep.h
- (__SYSCALL_CLOBBERS): Likewise.
-
-2004-10-17 Ulrich Drepper <drepper@redhat.com>
-
- * include/libc-symbols.h: Define libresolv_hidden_proto and friends.
- * include/resolv.h: Add libresolv_hidden_proto for symbols defined,
- used, and exported in libresolv.
- * resolv/base64.c: Add libresolv_hidden_def.
- * resolv/gethnamaddr.c: Likewise.
- * resolv/ns_name.c: Likewise.
- * resolv/ns_netint.c: Likewise.
- * resolv/res_comp.c: Likewise.
- * resolv/res_data.c: Likewise.
- * resolv/res_debug.c: Likewise.
- * resolv/res_mkquery.c: Likewise.
- * resolv/res_query.c: Likewise.
- * resolv/res_send.c: Likewise.
-
-2004-10-15 Jakub Jelinek <jakub@redhat.com>
-
- * elf/dl-minimal.c (__chk_fail): New. Add rtld_hidden_def.
- * sysdeps/unix/sysv/linux/readonly-area.c: New file.
- * sysdeps/i386/i686/memmove.S (__memmove_chk): Add checking
- routine.
- * sysdeps/i386/i686/memcpy.S (__memcpy_chk): Likewise.
- * sysdeps/i386/i686/mempcpy.S (__mempcpy_chk): Likewise.
- * sysdeps/i386/i686/memset.S (__memset_chk): Likewise.
- * sysdeps/i386/i686/memmove-chk.S: New file.
- * sysdeps/i386/i686/memcpy-chk.S: Likewise.
- * sysdeps/i386/i686/mempcpy-chk.S: Likewise.
- * sysdeps/i386/i686/memset-chk.S: Likewise.
- * sysdeps/generic/strcat-chk.c (__strcat_chk): Don't __chk_fail
- if exactly fitting into buffer.
- * sysdeps/generic/strncat-chk.c (__strncat_chk): Likewise.
- * sysdeps/generic/readonly-area.c: New file.
- * sysdeps/generic/strncpy-chk.c (__strncpy_chk): Only test
- destlen once.
- * sysdeps/x86_64/memset.S (__memset_chk): Add checking routine.
- * sysdeps/x86_64/memcpy.S (__memcpy_chk): Likewise.
- * sysdeps/x86_64/mempcpy.S (__memcpy_chk): Define to __mempcpy_chk.
- * sysdeps/x86_64/memcpy-chk.S: New file.
- * sysdeps/x86_64/mempcpy-chk.S: Likewise.
- * sysdeps/x86_64/memset-chk.S: Likewise.
- * sysdeps/x86_64/strcpy-chk.S: Likewise.
- * sysdeps/x86_64/stpcpy-chk.S: Likewise.
- * argp/argp-xinl.c (__OPTIMIZE__): Define to 1 instead of nothing.
- * argp/argp-fs-xinl.c (__OPTIMIZE__): Likewise.
- * debug/tst-chk1.c: New test.
- * debug/tst-chk2.c: Likewise.
- * debug/tst-chk3.c: Likewise.
- * debug/test-strcpy_chk.c: Likewise.
- * debug/test-stpcpy_chk.c: Likewise.
- * debug/vsprintf_chk.c (__vsprintf_chk): If flags > 0, request
- _IO_FLAGS2_CHECK_PERCENT_N. Add libc_hidden_def.
- * debug/Makefile (routines): Add printf_chk, fprintf_chk, vprintf_chk,
- vfprintf_chk, gets_chk and readonly-area.
- (CFLAGS-*_chk.c): Set.
- (tests): Add tst-chk1, tst-chk2, tst-chk3, test-strcpy_chk and
- test-stpcpy_chk.
- * debug/vprintf_chk.c: New file.
- * debug/printf_chk.c: Likewise.
- * debug/vfprintf_chk.c: Likewise.
- * debug/fprintf_chk.c: Likewise.
- * debug/gets_chk.c: Likewise.
- * debug/chk_fail.c (__chk_fail): Add libc_hidden_def.
- * debug/snprintf_chk.c (__snprintf_chk): Fix order of arguments
- passed to __vsnprintf_chk.
- * debug/Versions (libc): Export __printf_chk, __fprintf_chk,
- __vprintf_chk, __vfprintf_chk and __gets_chk @GLIBC_2.3.4.
- * debug/vsnprintf_chk.c (__vsnprintf_chk): Don't call
- __vsnprintf, instead create a temporary file with
- _IO_strn_jumps jumptable. If flags > 0, request
- _IO_FLAGS2_CHECK_PERCENT_N. Add libc_hidden_def.
- * libio/Makefile (headers): Add bits/stdio2.h.
- * libio/stdio.h: Include <bits/stdio2.h> if __USE_FORTIFY_LEVEL.
- (sprintf, snprintf, vsprintf, vsnprintf): Remove defines.
- * libio/strfile.h (_IO_strnfile): New type.
- (_IO_strn_jumps): New extern.
- * libio/vsnprintf.c (_IO_strnfile): Remove.
- (_IO_strn_jumps): Remove static.
- * libio/bits/stdio2.h: New file.
- * libio/vswprintf.c (_IO_strnfile): Rename type to...
- (_IO_wstrnfile): ...this. Adjust all uses.
- * libio/libio.h (_IO_FLAGS2_CHECK_PERCENT_N): Define.
- * stdio-common/vfprintf.c (STR_LEN): Define.
- (vfprintf): Add readonly_format variable.
- Handle _IO_FLAGS2_CHECK_PERCENT_N.
- (buffered_vfprintf): Copy _flags2.
- * include/stdio.h (__sprintf_chk, __snprintf_chk, __vsprintf_chk,
- __vsnprintf_chk, __printf_chk, __fprintf_chk, __vprintf_chk,
- __vfprintf_chk): New prototypes.
- (__vsprintf_chk, __vsnprintf_chk): Add libc_hidden_proto.
- * include/string.h (__memcpy_chk, __memmove_chk, __mempcpy_chk,
- __memset_chk, __strcpy_chk, __stpcpy_chk, __strncpy_chk, __strcat_chk,
- __strncat_chk): New prototypes.
- * include/bits/string3.h: New file.
- * include/sys/cdefs.h (__chk_fail): Add libc_hidden_proto
- and rtld_hidden_proto.
- * string/Makefile (headers): Add bits/string3.h.
- * string/bits/string3.h (bcopy, bzero): New defines.
- (memset, memcpy, memmove, strcpy, strncpy, strcat, strncat): Change
- macros so that inlines are used only if unknown destination size
- or side-effects in destination argument.
- (mempcpy, stpcpy): Likewise. Protect with #ifdef __USE_GNU.
-
-2004-09-16 Ulrich Drepper <drepper@redhat.com>
-
- * debug/Makefile (routines): Add *_chk.
- * debug/Versions (libc): Export __chk_fail, __memcpy_chk,
- __memmove_chk, __mempcpy_chk, __memset_chk, __stpcpy_chk,
- __strcat_chk, __strcpy_chk, __strncat_chk, __strncpy_chk,
- __sprintf_chk, __vsprintf_chk, __snprintf_chk, __vsnprintf_chk
- @GLIBC_2.3.4.
- * debug/chk_fail.c: New file.
- * debug/snprintf_chk.c: Likewise.
- * debug/sprintf_chk.c: Likewise.
- * debug/vsnprintf_chk.c: Likewise.
- * debug/vsprintf_chk.c: Likewise.
- * include/features.h (_FORTIFY_SOURCE): Document, handle.
- (__USE_FORTIFY_LEVEL): Define.
- (__GNUC_PREREQ): Move to earlier location.
- * include/sys/cdefs.h (__chk_fail): New prototype.
- * libio/bits/stdio.h (sprintf, vsprintf, snprintf, vsnprintf):
- Define if __USE_FORTIFY_LEVEL.
- * misc/sys/cdefs.h (__bos, __bos0): Define.
- * string/string.h: Include <bits/string3.h> if __USE_FORTIFY_LEVEL.
- * bits/string/string3.h: New header.
- * sysdeps/generic/memcpy_chk.c: New file.
- * sysdeps/generic/memmove_chk.c: Likewise.
- * sysdeps/generic/mempcpy_chk.c: Likewise.
- * sysdeps/generic/memset_chk.c: Likewise.
- * sysdeps/generic/stpcpy_chk.c: Likewise.
- * sysdeps/generic/strcat_chk.c: Likewise.
- * sysdeps/generic/strcpy_chk.c: Likewise.
- * sysdeps/generic/strncat_chk.c: Likewise.
- * sysdeps/generic/strncpy_chk.c: Likewise.
-
-2004-10-17 Roland McGrath <roland@frob.com>
-
- * manual/memory.texi (Page Lock Functions): Typo fix.
- Reported by Carlos Maziero <maziero@ppgia.pucpr.br>
-
-2004-10-16 Alfred M. Szmidt <ams@kemisten.nu>
-
- * sysdeps/mach/hurd/Makefile (link-libc-static): Use
- `$(static-gnulib') instead of `$(gnulib)'.
-
-2004-10-17 Ulrich Drepper <drepper@redhat.com>
-
- * sunrpc/rpc_clntout.c: Avoid including rcsid into binary.
- * sunrpc/rpc_cout.c: Likewise.
- * sunrpc/rpc_hout.c: Likewise.
- * sunrpc/rpc_main.c: Likewise.
- * sunrpc/rpc_parse.c: Likewise.
- * sunrpc/rpc_sample.c: Likewise.
- * sunrpc/rpc_scan.c: Likewise.
- * sunrpc/rpc_svcout.c: Likewise.
- * sunrpc/rpc_tblout.c: Likewise.
- * sunrpc/rpc_util.c: Likewise.
-
-2004-10-15 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/unix/sysv/linux/i386/sysdep.h (PUSHARGS_6, DOARGS_6,
- POPARGS_6, _PUSHARGS_6, _DOARGS_6, _POPARGS_6): Define.
- * sysdeps/unix/sysv/linux/i386/syscall.S (syscall): Handle 6 argument
- syscalls.
-
-2004-10-15 Ulrich Drepper <drepper@redhat.com>
-
- * nscd/nscd.h (_PATH_NSCD_PASSWD_DB): Move to /var/db.
- (_PATH_NSCD_GROUP_DB): Likewise.
- (_PATH_NSCD_HOSTS_DB): Likewise.
- (_PATH_NSCD_XYZ_DB_TMP): New #define, point to /var/run.
- * nscd/connections.c (nscd_init): Non-persistent database files
- are created with the _PATH_NSCD_XYZ_DB_TMP path.
- * nscd/nscd.init: Create /var/db/nscd if necessary.
-
-2004-10-15 Richard Henderson <rth@redhat.com>
-
- * sysdeps/unix/sysv/linux/alpha/register-dump.h: New file.
- * sysdeps/unix/sysv/linux/alpha/sigcontextinfo.h (SIGCONTEXT): Add
- _code argument, pass sigcontext by pointer.
- (SIGCONTEXT_EXTRA_ARGS): Likewise.
- (GET_PC, GET_FRAME, GET_STACK): Expect ctx as pointer.
-
-2004-10-14 Richard Henderson <rth@redhat.com>
-
- * sysdeps/alpha/dl-machine.h (elf_machine_rela,
- elf_machine_rela_relative, elf_machine_lazy_rel): Mark auto
- instead of static.
-
- * sysdeps/unix/sysv/linux/adjtime.c (ADJTIME): Use prototype
- style definition.
- * sysdeps/unix/sysv/linux/alpha/adjtime.c (ADJTIME): If
- __ASSUME_TIMEVAL64, define __adjtime directly rather than
- via strong_alias.
-
-2004-10-14 Ulrich Drepper <drepper@redhat.com>
-
- * nscd/Makefile: When using compilers without -fpie support, also
- link with -lselinux if necessary.
- Patch by Arkadiusz Miskiewicz <arekm@pld-linux.org>.
-
- * nscd/connections.c (nscd_init): Remove file if not persistent
- and not shared. Patch by Jerome Borsboom <j.borsboom@erasmusmc.nl>.
-
-2004-10-14 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/unix/sysv/linux/i386/setresuid.c: Handle
- defined __NR_setresuid32 && !defined __NR_setresuid.
- * sysdeps/unix/sysv/linux/i386/setresgid.c: Handle
- defined __NR_setresgid32 && !defined __NR_setresgid.
-
- * sysdeps/sparc/fpu/bits/mathinline.h (__signbitf, __signbit,
- __signbitl, sqrtf, sqrt, sqrtl, fdim, fdimf): Use __NTH macro.
-
- * sysdeps/generic/errno-loc.c: Don't undef #errno
- if RTLD_PRIVATE_ERRNO.
- * include/errno.h (__errno_location): If RTLD_PRIVATE_ERRNO,
- add attribute_hidden.
-
- * dlfcn/dlinfo.c (dlinfo_doit): Replace iteration over GL(dl_loaded)
- chain with iteration over all namespaces' _ns_loaded chains.
- * sysdeps/powerpc/powerpc32/dl-machine.c (__elf_preferred_address):
- Likewise.
- * sysdeps/mips/dl-machine.h (elf_machine_runtime_link_map): Likewise.
-
- * elf/rtld.c (_dl_start): Fix one last dl_loaded.
- * elf/dl-load.c (_dl_map_object_from_fd): Avoid definition of
- label when it is not needed.
- * elf/dl-close.c (_dl_close): Typo: & -> &&.
-
-2004-10-12 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/generic/segfault.c: Include alloca.h and stdint.h.
- Don't include frame.h.
- (CURRENT_STACK_FRAME, INNER_THAN, ADVANCE_STACK_FRAME): Remove.
- (catch_segfault): Use backtrace function.
-
- * sysdeps/unix/sysv/linux/ia64/bits/sigcontext.h: Fix comment.
- * sysdeps/unix/sysv/linux/ia64/register-dump.h: New file.
- * sysdeps/unix/sysv/linux/ia64/sigcontextinfo.h (GET_PC): Return sc_ip
- field.
-
-2004-10-13 Ulrich Drepper <drepper@redhat.com>
-
- Add support for namespaces in the dynamic linker.
- * dlfcn/Makefile (libdl-routines): Add dlmopen.
- * dlfcn/Versions [libdl, GLIBC_2.3.4]: Add dlmopen.
- * dlfcn/dlfcn.h: Define Lmid_t, LM_ID_BASE, and LM_ID_NEWLM.
- Declare dlmopen. Document RTLD_DI_LMID.
- * dlfcn/dlinfo.c: Handle RTLD_DI_LMID.
- * dlfcn/dlmopen.c: New file.
- * dlfcn/dlopen.c: Pass new parameter to _dl_open.
- * dlfcn/dlopenold.c: Likewise.
- * elf/dl-addr.c: Adjust for removal of GL(dl_loaded).
- * elf/dl-caller.c: Likewise.
- * elf/dl-close.c: Likewise.
- * elf/dl-conflict.c: Likewise.
- * elf/dl-debug.c: Likewise.
- * elf/dl-lookup.c: Likewise.
- * elf/dl-sym.c: Likewise.
- * elf/dl-version.c: Likewise.
- * elf/do-lookup.h: Likewise.
- * elf/rtld.c: Likewise.
- * sysdeps/unix/sysv/linux/i386/dl-librecon.h: Likewise.
- * elf/dl-depsc: Likewise. Add new parameter to _dl_map_object.
- * elf/dl-fini.c: Call destructors in all namespaces.
- * elf/dl-iteratephdr.c: Compute total nloaded. Adjust for removal of
- GL(dl_loaded).
- * elf/dl-libc.c: Pass new parameter to _dl_open. Adjust for removal
- of GL(dl_loaded).
- * elf/dl-load.c (_dl_map_object_from_fd): Don't load ld.so a second
- time. Reuse the one from the main namespace in all others.
- Pass new parameter to _dl_new_object.
- Adjust for removal of GL(dl_loaded).
- * elf/dl-object.c: Take new parameter. Use it to initialize l_ns.
- Adjust for removal of GL(dl_loaded).
- * elf/dl-open.c (_dl_open): Take new parameter.
- Adjust for removal of GL(dl_loaded).
- * elf/dl-support.c: Replace global _dl_loaded etc variables with
- _dl_ns variable.
- * include/dlfcn.h: Adjust prototype of _dl_open.
- Define __LM_ID_CALLER.
- * include/link.h: Add l_real, l_ns, and l_direct_opencount elements.
- * sysdeps/generic/dl-tls.c: Bump TLS_STATIC_SURPLUS. Since libc is
- using TLS we need memory appropriate to the number of namespaces.
- * sysdeps/generic/ldsodefs.h (struct rtld_global): Replace _dl_loaded,
- _dl_nloaded, _dl_global_scope, _dl_main_searchlist, and
- _dl_global_scope_alloc with _dl_ns element. Define DL_NNS.
- Adjust prototypes of _dl_map_object and member in rtld_global_ro.
- * malloc/malloc.c: Include <dlfcn.h>.
- * malloc/arena.c (ptmalloc_init): If libc is not in primary namespace,
- never use brk.
- * elf/Makefile: Add rules to build and run tst-dlmopen1 and
- tst-dlmopen2.
- * elf/tst-dlmopen1.c: New file.
- * elf/tst-dlmopen1mod.c: New file.
- * elf/tst-dlmopen2.c: New file.
-
- * elf/dl-close.c: Improve reference counting by tracking direct loads.
- * elf/dl-lookup.c (add_dependency): Likewise.
- * elf/dl-open.c (dl_open_worker): Likewise.
- * elf/rtld.c (dl_main): Likewise.
-
-2004-09-09 GOTO Masanori <gotom@debian.or.jp>
-
- [BZ #77]
- * elf/dl-close.c: Count down l_opencount to check not only for
- l_reldeps, but also l_initfini.
-
-2004-10-13 Ulrich Drepper <drepper@redhat.com>
-
- * elf/dl-close.c (_dl_close): Update bug reporting instructions.
-
-2004-10-11 Ulrich Drepper <drepper@redhat.com>
-
- * timezone/asia: Update from tzdata2004e.
- * timezone/southamerica: Likewise.
- * timezone/private.h: Update from tzcode2004e.
- * timezone/zdump.c: Likewise.
-
- * stdio-common/vfscanf.c: Add support for reading localized
- digits. Patch mainly by Hamed Malek <hamed@bamdad.org>.
-
- * resolv/res_init.c (res_thread_freeres): Reset _res.options.
- [BZ #434]
-
- * resolv/res_send.c (send_dg): Use nonblocking sockets. Add
- appropriate poll/select calls and restart operation if necessary.
- Also handle EINTR.
-
- * elf/tst-dlopenrpath.c (do_test): Enable code which was disabled
- for debugging.
-
- * elf/dl-sym.c (do_sym): Avoid using global variable.
-
- * elf/dl-addr.c (_dl_addr): Really use match everywhere.
-
-2004-10-09 Andreas Schwab <schwab@suse.de>
-
- * sysdeps/m68k/memcopy.h (WORD_COPY_BWD): Remove use of cast as
- lvalue.
-
- * sysdeps/m68k/fpu/bits/mathinline.h: Remove __THROW from inline
- definitions.
-
-2004-10-07 Andreas Schwab <schwab@suse.de>
-
- * misc/sys/uio.h: Change __vector to __iovec to avoid clash with
- altivec.
-
-2004-10-06 Alan Modra <amodra@bigpond.net.au>
-
- * gmon/Makefile (CFLAGS-mcount.c): Move before inclusion of "Rules".
- * sysdeps/powerpc/powerpc64/Makefile (CFLAGS-mcount.c): Add
- -msoft-float.
- * sysdeps/powerpc/powerpc64/sysdep.h (SAVE_ARG, REST_ARG): New macros.
- (CALL_MCOUNT): Replace with a gas macro implementation.
- (EALIGN): Delete PROF version.
- * sysdeps/powerpc/powerpc64/__longjmp-common.S: Invoke CALL_MCOUNT.
- * sysdeps/powerpc/powerpc64/bsd-_setjmp.S: Likewise.
- * sysdeps/powerpc/powerpc64/bsd-setjmp.S: Likewise.
- * sysdeps/powerpc/powerpc64/setjmp-common.S: Likewise.
- * sysdeps/powerpc/powerpc64/memcpy.S: Likewise.
- * sysdeps/powerpc/powerpc64/memset.S: Likewise.
- * sysdeps/powerpc/powerpc64/stpcpy.S: Likewise.
- * sysdeps/powerpc/powerpc64/strchr.S: Likewise.
- * sysdeps/powerpc/powerpc64/strcmp.S: Likewise.
- * sysdeps/powerpc/powerpc64/strcpy.S: Likewise.
- * sysdeps/powerpc/powerpc64/strlen.S: Likewise.
- * sysdeps/powerpc/powerpc64/strncmp.S: Likewise.
- * sysdeps/powerpc/powerpc64/fpu/s_ceil.S: Likewise.
- * sysdeps/powerpc/powerpc64/fpu/s_ceilf.S: Likewise.
- * sysdeps/powerpc/powerpc64/fpu/s_copysign.S: Likewise.
- * sysdeps/powerpc/powerpc64/fpu/s_floor.S: Likewise.
- * sysdeps/powerpc/powerpc64/fpu/s_floorf.S: Likewise.
- * sysdeps/powerpc/powerpc64/fpu/s_llrint.S: Likewise.
- * sysdeps/powerpc/powerpc64/fpu/s_llrintf.S: Likewise.
- * sysdeps/powerpc/powerpc64/fpu/s_llround.S: Likewise.
- * sysdeps/powerpc/powerpc64/fpu/s_llroundf.S: Likewise.
- * sysdeps/powerpc/powerpc64/fpu/s_rint.S: Likewise.
- * sysdeps/powerpc/powerpc64/fpu/s_rintf.S: Likewise.
- * sysdeps/powerpc/powerpc64/fpu/s_round.S: Likewise.
- * sysdeps/powerpc/powerpc64/fpu/s_roundf.S: Likewise.
- * sysdeps/powerpc/powerpc64/fpu/s_trunc.S: Likewise.
- * sysdeps/powerpc/powerpc64/fpu/s_truncf.S: Likewise.
-
- * sysdeps/powerpc/powerpc64/setjmp-common.S: Add extra entry point
- past _mcount call.
- * sysdeps/powerpc/powerpc64/bsd-_setjmp.S: Use it.
- * sysdeps/powerpc/powerpc64/bsd-setjmp.S: Likewise.
-
-2004-10-06 Ulrich Drepper <drepper@redhat.com>
-
- * resolv/res_mkquery.c (res_nmkquery): Reject randombits value if
- low 16 bits are zero.
-
-2004-10-06 Jakub Jelinek <jakub@redhat.com>
-
- * posix/tst-getaddrinfo2.c: Include stdlib.h and string.h.
- (do_test): Use %p instead of 0x%08X to print a pointer.
-
- * malloc/malloc.c: Include stdio-common/_itoa.h.
-
-2004-10-05 Ulrich Drepper <drepper@redhat.com>
-
- * elf/rtld.c (dl_main): Use _dl_debug_printf instead of _dl_printf
- for prelink message.
-
-2004-10-05 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/unix/sysv/linux/s390/s390-32/sysdep.h: Include dl-sysdep.h.
- * sysdeps/unix/sysv/linux/s390/s390-64/sysdep.h: Likewise.
-
-2004-10-05 Ulrich Drepper <drepper@redhat.com>
-
- * grp/initgroups.c: Remove duplicate group IDs.
- * grp/compat-initgroups.c: Likewise.
- * nscd/initgrcache.c: Likewise.
-
-2004-10-05 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/unix/sysv/linux/x86_64/sysconf.c (__sysconf): Return 200112L
- for _SC_CPUTIME or _SC_THREAD_CPUTIME.
-
- * nscd/Makefile (nscd-cflags): Set to -DIS_IN_nscd=1, plus
- -fpie if building PIE.
- (CFLAGS-*.c): Use it.
-
- * nscd/Makefile (relro-LDFLAGS): Add -Wl,-z,now if have-z-relro.
- ($(objpfx)nscd): Add $(relro-LDFLAGS).
-
- * sysdeps/unix/sysv/linux/i386/sysconf.c: Include hp-timing.h.
- (__sysconf): Return -1 for _SC_CPUTIME or _SC_THREAD_CPUTIME if
- !HP_TIMING_AVAIL.
-
-2004-10-05 Ulrich Drepper <drepper@redhat.com>
-
- * nscd/Makefile (distribute): Remove TODO.
- * nscd/TODO: Removed.
-
-2004-10-04 Ulrich Drepper <drepper@redhat.com>
-
- * nscd/gai.c: Define __no_netlink_support if NEED_NETLINK is
- defined and __ASSUME_NETLINK_SUPPORT is zero.
- * sysdeps/unix/sysv/linux/Makefile (CFLAGS-gai.c): Add
- -DNEED_NETLINK.
-
- * malloc/mtrace.pl: Avoid calling location unless it is needed for
- output. Patch by Edward Bishop <binutils@gmail.com>.
-
- * nscd/Makefile (CFLAGS-gai.c): Add -fpie.
-
-2004-10-04 H.J. Lu <hongjiu.lu@intel.com>
-
- * sysdeps/unix/sysv/linux/ia64/clock_getcpuclockid.c
- (clock_getcpuclockid): Add missing retval.
-
- * sysdeps/unix/sysv/linux/ia64/sysconf.c (linux_sysconf): Fix a typo.
-
-2004-10-04 Roland McGrath <roland@redhat.com>
-
- * include/errno.h [RTLD_PRIVATE_ERRNO] (errno): Rename the real symbol
- to rtld_errno.
- * sysdeps/generic/errno.c [RTLD_PRIVATE_ERRNO] (rtld_errno): Define it,
- and don't define any other errno names.
- * sysdeps/unix/alpha/sysdep.h [RTLD_PRIVATE_ERRNO]: Use rtld_errno in
- place of errno.
- * sysdeps/unix/i386/sysdep.S: Likewise.
- * sysdeps/unix/sysv/linux/i386/sysdep.h: Likewise.
- * sysdeps/unix/sysv/linux/ia64/sysdep.S: Likewise.
- * sysdeps/unix/sysv/linux/m68k/sysdep.h: Likewise.
- * sysdeps/unix/sysv/linux/s390/s390-32/sysdep.S: Likewise.
- * sysdeps/unix/sysv/linux/s390/s390-32/sysdep.h: Likewise.
- * sysdeps/unix/sysv/linux/s390/s390-64/sysdep.S: Likewise.
- * sysdeps/unix/sysv/linux/s390/s390-64/sysdep.h: Likewise.
- * sysdeps/unix/sysv/linux/sh/sysdep.h: Likewise.
- * sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h: Likewise.
- * sysdeps/unix/sysv/linux/x86_64/sysdep.h: Likewise.
- * sysdeps/unix/x86_64/sysdep.S: Likewise.
-
- * sysdeps/generic/errno.c [! USE___THREAD] (errno): Use `nocommon'
- attribute instead of `section'.
-
-2004-10-04 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/unix/sysv/linux/ia64/sysconf.c: New file.
- * sysdeps/unix/sysv/linux/ia64/Dist: Add has_cpuclock.c.
- * sysdeps/unix/sysv/linux/ia64/clock_getcpuclockid.c: Move actual
- testing code to...
- * sysdeps/unix/sysv/linux/ia64/has_cpuclock.c: ...here. New file.
- * sysdeps/unix/sysv/linux/i386/sysconf.c: Add dynamic check for
- _SC_CPUTIME and _SC_THREAD_CPUTIME.
-
- * nscd/connections.c (start_threads): Use sysconf in case
- _POSIX_CLOCK_SELECTION or _POSIX_MONOTONIC_CLOCK is not greater zero.
-
- * nscd/Makefile (nscd-modules): Add gai.
- * nscd/gai.c: New file.
- * nscd/nscd.c: Remove getaddrinfo stub definition.
-
- * assert/assert.h: Give up on using __builtin_expect.
-
- * elf/rtld.c (dl_main): Only skip => output in ldd mode if both
- strings are identical.
-
-2004-03-18 Jakub Jelinek <jakub@redhat.com>
-
- * malloc/arena.c (aligned_heap_area): New variable.
- (new_heap): If aligned_heap_area != NULL, attempt to use that
- first. If HEAP_MAX_SIZE << 1 area is already HEAP_MAX_SIZE bytes
- aligned, remember the second half in aligned_heap_area.
- (delete_heap): Clear aligned_heap_area if deleting the area right
- before aligned_heap_area.
-
-2004-10-03 Juerg Billeter <j@bitron.ch>
-
- * nscd/nscd_initgroups.c (__nscd_getgrouplist): Return -1 if nscd
- can't be used. [BZ #424]
-
-2004-10-03 Ulrich Drepper <drepper@redhat.com>
-
- Dynamically create new threads if necessary.
- * nscd/connections.c (fd_ready): If no thread available for processing
- the request, create a new one unless the limit is reached.
- (start_threads): Check errors from pthread_create.
- * nscd/nscd.h: Declare max_nthreads.
- * nscd/nscd_conf.c: Parse max-nthreads entry.
- * nscd/nscd.conf: Add max-threads entry.
- * nscd/nscd_stat.c: Print current and maximum number of threads.
-
- Implement paranoia mode.
- * nscd/connections.c (nscd_init): Mark database and socket descriptors
- as close on exec.
- (restart): New function.
- (restart_p): New function.
- (nscd_run): Add missing descrement of nready in case readylist is
- empty.
- (main_loop_poll): Call restart_p and restart.
- (main_loop_epoll): Likewise.
- (begin_drop_privileges): Save original UID and GID.
- * nscd/nscd.c: Define new variables paranoia, restart_time,
- restart_interval, oldcwd, old_gid, old_uid.
- (main): Disable paranoia mode if we are not forking.
- (check_pid): When re-execing, the PID file contains the same PID as
- the current process. Do not fail in this case.
- * nscd/nscd.conf: Add paranoia and restart-interval entries.
- * nscd/nscd.h: Define RESTART_INTERVAL. Declare new variables.
- * nscd/nscd_conf.c: Parse paranoia and restart-internal configurations.
- * nscd/nscd_stat.c: Print paranoia and restart-internal values.
-
- * nscd/connections.c: Implement alternative loop for main thread
- which uses epoll.
- * sysdeps/unix/sysv/linux/Makefile [subdir=nscd]
- (CFLAGS-connections.c): Add -DHAVE_EPOLL.
-
-2004-10-02 Ulrich Drepper <drepper@redhat.com>
-
- * nscd/Makefile (CFLAGS-initgrcache.c): Add to CFLAGS-* variables,
- don't replace old content.
-
- * nscd/connections.c: Rewrite handling of incoming connections. All
- are handled by one thread which then hands off the descriptors for the
- real work to the worker threads.
- * nscd/Makefile: Link nscd with librt.
-
- * nscd/selinux.c: Pretty printing.
-
- * nscd/dbg_log.c (dbg_log): Don't add unnecessary newline to
- output. Let syslog do the formatting if debug_level == 0.
-
- * nscd/nscd_helper.c (get_mapping): No need to check timestamp if
- nscd_certainly_running is nonzero.
-
-2004-10-02 Simon Josefsson <jas@extundo.com>
-
- [BZ #420]
- * sysdeps/generic/memmem.c [!_LIBC]: Define __builtin_expect, to
- make the file usable inside gnulib.
-
-2004-10-01 Ulrich Drepper <drepper@redhat.com>
-
- * malloc/malloc.c (public_vALLOc): Add missing use of hooks.
- (public_pVALLOc): Likewise.
-
- * nscd/nscd_initgroups.c (__nscd_getgrouplist): Always add the
- group the caller provided unless there is a real problem.
-
- * posix/bug-glob1.c (prepare): Fix creation of symlink.
-
-2004-09-30 Ulrich Drepper <drepper@redhat.com>
-
- * posix/Makefile: Add rules to build and run bug-glob1.
- * posix/bug-glob1.c: New file.
-
- * iconv/iconv_prog.c (main): Print progress information to stderr.
-
- * nscd/nscd.c (termination_handler): Reset timestamp so that
- clients immediately stop using the database.
-
- * nscd/nscd-client.h (__nscd_get_map_ref): Drop volatile from last
- parameter.
- (__nscd_drop_map_ref): Change second parameter to be a reference to
- a variable. Update variable when cycle count changed.
- * nscd/nscd_helper.c (__nscd_get_map_ref): Remove volatile here, too.
- * nscd/nscd_getai.c: Correctly use __nscd_drop_map_ref. Reinitialize
- all variables and avoid memory leak in case of retries.
- * nscd/nscd_getgr_r.c: Likewise.
- * nscd/nscd_gethst_r.c: Likewise.
- * nscd/nscd_getpw_r.c: Likewise.
- * nscd/nscd_initgroups.c: Likewise.
-
- * nscd/nscd.h: Add declaration of addinitgroups and
- readdinitgroups.
-
-2004-09-30 Andreas Jaeger <aj@suse.de>
-
- * nscd/Makefile (CFLAGS-nscd_initgroups.c): Set to -fpie.
- (CFLAGS-initgrcache.c): Set to -fexceptions.
-
-2004-09-29 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/generic/glob.c (glob_in_dir): Don't blindly trust
- readdir results; for symlinks or files of unknown type check using
- stat whether the file exists.
-
- * posix/tst-gnuglob.c (find_file): Handle leading "./". Fix
- recognition of files.
-
-2004-09-29 Jakub Jelinek <jakub@redhat.com>
-
- * time/tzfile.c (tzfile_mtime): New variable.
- (__tzfile_read): Reread the file if mtime is different.
-
-2004-09-28 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/alpha/fpu/bits/mathinline.h (__fdimf, __fdim, fdimf, fdim):
- Handle +inf/+inf.
- * sysdeps/powerpc/fpu/bits/mathinline.h (fdim, fdimf): Likewise.
- * sysdeps/sparc/fpu/bits/mathinline.h (fdim, fdimf): Likewise.
-
-2004-09-29 Ulrich Drepper <drepper@redhat.com>
-
- * nscd/nscd_gethst_r.c (nscd_gethst_r): Use correct constant for
- testing result of __nscd_get_map_ref.
-
-2004-09-29 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/i386/fpu/s_fdim.S (__fdim): Handle +inf/+inf.
- * sysdeps/i386/fpu/s_fdimf.S (__fdimf): Likewise.
- * sysdeps/i386/fpu/s_fdiml.S (__fdiml): Likewise.
-
-004-09-29 Ulrich Drepper <drepper@redhat.com>
-
- * grp/initgroups.c: Move compat_call implementation...
- * grp/compat-initgroups.c: ...to here. New file.
- * grp/Makefile (distribute): Add compat-initgroups.c.
- (CFLAGS-initgroups.c): Add -DUSE_NSCD=1.
- * mscd/initgrcache.c: New file.
- * nscd/nscd_initgroups.c: New file.
- * nscd/Makefile (routines): Add nscd_initgroups.
- (nscd-modules): Add initgrcache.
- * nscd/cache.c (prune_cache): Add support for INITGROUPS entries.
- * nscd/connections.c: Handle INITGROUPS requests.
- * nscd/nscd-client.h: Define INITGROUPS, initgr_response_header.
- Add initgrdata element to struct datahead. Fix typo in comment.
- * nscd/nscd_proto.h: Declare __nscd_getgrouplist. Fix parameter
- type in __nscd_getgrgrid_r.
- * nscd/selinux.c (perms): Add INITGROUPS entry.
-
- * nscd/nscd_getai.c: No need to include <sys/mman.h>.
-
- * sunrpc/get_myaddr.c (get_myaddress): Account for interfaces without
- assigned addresses.
- * sunrpc/pmap_clnt.c (__get_myaddress): Likewise.
- * sunrpc/pmap_rmt.c (getbroadcastnets): Likewise.
- * sunrpc/clnt_udp.c (is_network_up): Likewise.
-
- * nscd/nscd.c: Define getaddrinfo hidden so that it is never found
- outside.
-
- * sysdeps/unix/sysv/linux/ia64/bits/siginfo.h (si_segvflags):
- Renamed from si_flags due to conflict with si_flags from <elf.h>.
-
-2004-09-28 Ulrich Drepper <drepper@redhat.com>
-
- * nscd/nscd_getai.c: Use NO_MAPPING instead of MAP_FAILED for test
- of failing __nscd_get_map_ref. Fix a few typos.
-
- * sysdeps/unix/sysv/linux/sigwait.c (do_sigwait): Make sure
- SIGSETXID is not blocked.
- * sysdeps/unix/sysv/linux/sigwaitinfo.c (do_sigwaitinfo): Likewise.
- * sysdeps/unix/sysv/linux/sigtimedwait.c (do_sigtimedwait): Likewise.
- * sysdeps/unix/sysv/linux/sigprocmask.c (__sigprocmask): Likewise.
- * sysdeps/generic/sigfillset.c (sigfillset): Don't set SIGSETXID.
-
- * sunrpc/get_myaddr.c (get_myaddress): Fix test for failing
- getifaddrs call.
- * sunrpc/pmap_clnt.c (__get_myaddress): Likewise.
- * sunrpc/pmap_rmt.c (getbroadcastnets): Likewise.
- * sunrpc/Makefile (xtests): Add tst-getmyaddr.
- * sunrpc/tst-getmyaddr.c: New file.
-
- * malloc/arena.c (ptmalloc_init): Allow MALLOC_CHECK_==0 to
- disable all checking.
-
- * sysdeps/unix/sysv/linux/ia64/bits/siginfo.h (si_flags): Add
- support to get this value.
-
-2004-09-28 Jakub Jelinek <jakub@redhat.com>
-
- * io/utime.h (utime): Allow second argument to be NULL.
-
-2004-09-28 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/posix/getaddrinfo.c (gaih_inet): If NAME is a numerical IP
- address and AI_CANONNAME is set, return copy of NAME as ai_canonname.
-
-2004-09-27 Andreas Jaeger <aj@suse.de>
-
- * sysdeps/i386/dl-machine.h (elf_machine_rela_relative): Replace
- static inline by auto inline, add always_inline attribute.
- (elf_machine_rel): Likewise.
- (elf_machine_rela): Likewise.
- (elf_machine_lazy_rel): Likewise.
- (elf_machine_lazy_rela): Likewise.
- (elf_machine_rel_relative): Likewise.
-
-2004-09-27 Ulrich Drepper <drepper@redhat.com>
-
- * sunrpc/xdr_intXX_t.c: Add xdr_quad_t and xdr_u_quad_t aliases.
- * sunrpc/rpc/xdr.h: Declare xdr_quad_t and xdr_u_quad_t.
- * sunrpc/rpc_parse.c (get_type): Use "quad_t" for TOK_HYPER.
- Otherwise isvectordef will loop infinitely if typedef hyper int64_t
- is seen.
- (unsigned_dec): Use "u_quad_t" for similar reasons.
- * sunrpc/Versions: Export xdr_quad_t and xdr_u_quad_t.
-
-2004-09-27 Roland McGrath <roland@redhat.com>
-
- * sysdeps/generic/bits/waitstatus.h (__WIFSIGNALED): Simplify bit
- twiddling in last change.
-
- * posix/tst-waitid.c (do_test): Add tests for waitpid with WCONTINUED.
-
- [BZ #409]
- * posix/sys/wait.h [__WIFCONTINUED] (WIFCONTINUED): New macro.
- * stdlib/stdlib.h [__WIFCONTINUED] (WIFCONTINUED): New macro.
- * sysdeps/generic/bits/waitstatus.h (__W_CONTINUED): New macro.
- [WCONTINUED] (__WIFCONTINUED): New macro.
- (__WIFSIGNALED): Rewritten to exclude __W_CONTINUED value, and have no
- branches.
-
- * sysdeps/unix/sysv/linux/waitid.c (do_waitid): Add fifth argument to
- all three syscall uses, not just one!
-
-2004-09-26 Richard Henderson <rth@redhat.com>
-
- * sysdeps/alpha/alphaev6/memcpy.S: Mark .prologue.
- * sysdeps/unix/alpha/sysdep.h (LEAF, ENTRY): Align entry points
- to 16 byte boundaries.
-
-2004-09-26 Ulrich Drepper <drepper@redhat.com>
-
- * elf/readlib.c (process_file): Before complaining about too-short
- file, check that it potentially be an ELF file. Also complain about
- empty files. [BZ #151].
-
- * scripts/test-installation.pl: Fix ld.so recognition for new
- LD_TRACE_LOADED_OBJECTS output format.
- Patch by <jsberg04+computing.glibc@ftml.net> [BZ #407].
-
- * elf/dl-support.c (_dl_non_dynamic_init): Fix cleaning of
- environment. [BZ #384]
-
- * sunrpc/clnt_udp.c (is_network_up): Use getifaddrs instead of ioctl.
- * sunrpc/get_myaddr.c (get_myaddress): Likewise.
- * sunrpc/pmap_clnt.c (__get_myaddress): Likewise.
- * sunrpc/pmap_rmt.c (getbroadcastnets): Likewise. Change interface
- to avoid buffer overrun and remove now useless parameters.
- (clnt_broadcast): Adjust caller. [BZ #381].
-
- * sysdeps/generic/s_fdim.c: Handle +inf/+inf
- * sysdeps/generic/s_fdimf.c: Likewise.
- * sysdeps/generic/s_fdiml.c: Likewise.
- * sysdeps/i386/i686/fpu/s_fdim.S: Likewise.
- * sysdeps/i386/i686/fpu/s_fdimf.S: Likewise.
- * sysdeps/i386/i686/fpu/s_fdiml.S: Likewise.
- * sysdeps/powerpc/fpu/s_fdim.c: Likewise.
- * sysdeps/powerpc/fpu/s_fdimf.c: Likewise.
- * sysdeps/x86_64/fpu/s_fdiml.S: Likewise.
- * math/libm-test.inc (fdim_test): Add test case. [BZ #376].
-
- * sysdeps/generic/bits/types.h: Fix __SQUAD_TYPE and __UQUAD_TYPE
- for compilers without __GLIBC_HAVE_LONG_LONG. [BZ #362]
-
- * sysdeps/posix/getaddrinfo.c (getaddrinfo): Remove incorrect
- requirement on socktype and protocol.
- (gaih_inet): If numeric port number is given, return records for all
- possible socket types.
- * posix/tst-getaddrinfo2.c: New file.
- * posix/Makefile (tests): Add tst-getaddrinfo2. [BZ #358]
-
-2004-09-25 Ulrich Drepper <drepper@redhat.com>
-
- * locale/loadlocale.c (_nl_intern_locale_data): Recognize LC_CTYPE
- data where _nl_value_type_LC_CTYPE does not contain the type
- information. Add range checks.
- Reported by John Lumby <johnlumby@hotmail.com> [BZ #356].
-
- * libio/vasprintf.c (_IO_vasprintf): Fix condition to decide
- whether to realloc or not.
- Reported by Pavel Kankovsky <peak@argo.troja.mff.cuni.cz> [BZ #346].
-
- * intl/dcigettext.c (DCIGETTEXT): Protect tfind/tsearch calls.
- * intl/dcigettext.c (_nl_find_msg): Call _nl_load_domain also if
- decided < 0.
- * intl/finddomain.c (_nl_find_domain): Likewise.
- * intl/loadmsgcat.c (_nl_load_domain): Set decided to 1 only once we
- are done. First set to -1 to signal initialization is ongoing.
- Protect against concurrent callers with recursive lock.
- * intl/finddomain.c (_nl_find_domain): Protect calls to
- _nl_make_l10nflist. [BZ #322]
-
- * sysdeps/posix/getaddrinfo.c (getaddrinfo): If determinination of
- source address fails, initialized source_addr_len field so that
- duplicate address recognition does not copy junk.
-
-2004-09-25 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/unix/sysv/linux/i386/setuid.c (__setuid): Remove second
- result declaration.
-
-2004-09-22 Andreas Schwab <schwab@suse.de>
-
- * sysdeps/unix/sysv/linux/ia64/sysdep.h: Adjust whitespace.
-
-2004-09-24 Ulrich Drepper <drepper@redhat.com>
-
- * misc/daemon.c (daemon): Don't succeed if /dev/null cannot be
- opened.
-
- * nis/ypclnt.c (do_ypcall): Add one missing unlock. Simplify the
- code a bit.
-
- * misc/daemon.c (daemon): Define errno in case /dev/null is not
- the correct device.
-
- * nis/ypclnt.c (yp_bind_file): Optimize a bit. Minimal cleanups.
-
-2004-09-23 Andreas Jaeger <aj@suse.de>
-
- * locale/weight.h (findidx): Remove static, it's not supported
- anymore with GCC 4.0 in a block scope.
- * locale/weightwc.h (findidx): Likewise.
- * posix/regcomp.c (seek_collating_symbol_entry): Likewise.
- (lookup_collation_sequence_value): Likewise.
- (build_range_exp): Likewise.
- (build_collating_symbol): Likewise.
- * iconv/iconvconfig.c (write_output): Likewise.
- * elf/do-rel.h (elf_dynamic_do_rel): Likewise.
-
- * sysdeps/x86_64/dl-machine.h (elf_machine_rela_relative): Remove
- static, add always_inline attribute.
- (elf_machine_rela): Likewise.
- (elf_machine_lazy_rel): Likewise.
-
- * elf/dynamic-link.h (elf_get_dynamic_info): Make static dependend
- on !RESOLVE so that it's not defined in local scope.
-
-2004-09-23 Kaz Kojima <kkojima@rr.iij4u.or.jp>
-
- * sysdeps/unix/sysv/linux/sh/sysdep.h (INTERNAL_SYSCALL_NCS): Define.
-
-2004-09-23 Thorsten Kukuk <kukuk@suse.de>
-
- * sysdeps/unix/sysv/linux/sys/mount.h: Sync MS_RMT_MASK flag
- and BLK* ioctls with linux kernel headers.
-
-2004-09-23 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/generic/bits/dlfcn.h: Add RTLD_DEEPBIND.
- * elf/dl-object.c (_dl_new_object): Add new parameter mode. If mode
- has RTLD_DEEPBIND set add local searchlist before global scope.
- * sysdeps/generic/ldsodefs.h (_dl_new_object): Adjust prototype.
- * elf/rtld.c: Adjust callers of _dl_new_object.
- * elf/dl-load.c: Likewise.
- (_dl_map_object_from_fd): If RTLD_DEEPBIND is used, don't do anything
- for DF_SYMBOLIC.
- * elf/dl-open.c (dl_open_writer): Pass RTLD_DEEPBIND flag on to
- _dl_map_object_deps.
- * elf/tst-deep1.c: New file.
- * elf/tst-deep1mod1.c: New file.
- * elf/tst-deep1mod2.c: New file.
- * elf/tst-deep1mod3.c: New file.
- * elf/Makefile: Add rules to build and run new tests.
-
- * elf/dl-deps.c: Pretty printing.
-
-2004-09-23 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/unix/alpha/sysdep.h (inline_syscall[0-6]): Change name
- argument to numbers from syscall names.
- (INLINE_SYSCALL1): Pass __NR_##name to inline_syscall##nr.
- (INTERNAL_SYSCALL_NCS): Renamed from...
- (INTERNAL_SYSCALL_1): ... this. Use INTERNAL_SYSCALL_NCS.
- * sysdeps/unix/sysv/linux/s390/s390-32/sysdep.h
- (INTERNAL_SYSCALL_NCS): Define.
- * sysdeps/unix/sysv/linux/s390/s390-64/sysdep.h
- (INTERNAL_SYSCALL_NCS): Likewise.
- * sysdeps/unix/sysv/linux/sparc/sysdep.h (inline_syscall[0-6]):
- Change name argument to numbers from syscall names.
- (INLINE_SYSCALL, INTERNAL_SYSCALL): Adjust.
- (INTERNAL_SYSCALL_NCS): Define.
-
-2004-09-22 Ulrich Drepper <drepper@redhat.com>
-
- * malloc/malloc.c (malloc_printerr): Use syslog if writev failed.
-
- * string/string.h: Add __nonnull annotations.
- * stdlib/stdlib.h: Likewise.
-
-2004-09-20 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/unix/alpha/sysdep.h (inline_syscall[0-6]): Change name
- argument to numbers from syscall names.
- (INLINE_SYSCALL1): Pass __NR_##name to inline_syscall##nr.
- (INTERNAL_SYSCALL_NCS): Renamed from...
- (INTERNAL_SYSCALL_1): ... this. Use INTERNAL_SYSCALL_NCS.
- * sysdeps/unix/sysv/linux/s390/s390-32/sysdep.h
- (INTERNAL_SYSCALL_NCS): Define.
- * sysdeps/unix/sysv/linux/s390/s390-64/sysdep.h
- (INTERNAL_SYSCALL_NCS): Likewise.
- * sysdeps/unix/sysv/linux/sparc/sysdep.h (inline_syscall[0-6]):
- Change name argument to numbers from syscall names.
- (INLINE_SYSCALL, INTERNAL_SYSCALL): Adjust.
- (INTERNAL_SYSCALL_NCS): Define.
-
-2004-09-20 H.J. Lu <hongjiu.lu@intel.com>
-
- * sysdeps/unix/sysv/linux/ia64/sysdep.h (DO_INLINE_SYSCALL):
- Renamed to ...
- (DO_INLINE_SYSCALL_NCS): This.
- (DO_INLINE_SYSCALL): New.
- (INLINE_SYSCALL): Updated.
- (INTERNAL_SYSCALL_NCS): Updated.
-
-2004-09-21 Ulrich Drepper <drepper@redhat.com>
-
- * elf/sprof.c (load_shobj): Add support for reading symbol table
- from debuginfo file.
-
- * elf/ldd.bash.in: Fix syntax errors.
-
-2004-09-20 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/unix/sysv/linux/dl-execstack.c
- (_dl_make_stack_executable): Remove some duplication.
-
- * nscd/nscd.c (options): Mark S option as hidden.
- (parse_opt): When S option is used, print warning message.
- * nscd/grpcache.c (adgrptbyX): Don't handle secure mode.
- * nscd/hstcache.c (addhstbyX): Don't handle secure mode.
- * nscd/aicache.c (addhstaiX): Don't handle secure mode.
- * nscd/pwdcache.c (addpwbyX): Don't handle secure mode.
-
-2004-09-20 Roland McGrath <roland@frob.com>
-
- * elf/dl-load.c (__stack_prot): Only use PROT_GROWSUP/PROT_GROWSDOWN
- in initializer #if defined.
-
-2004-09-18 Paul Eggert <eggert@cs.ucla.edu>
-
- [BZ #391]
- * stdlib/getsubopt.c: Merge fixes from gnulib.
- (__strchrnul) [!_LIBC]: Define and include "strchrnul.c".
- (getsubopt): Use prototypes, not K&R style.
- Fix bug: memcmp(A,B,N) was being invoked on a memory block B
- whose size might be smaller than N. Use strncmp to avoid the bug.
-
-2004-09-20 Ulrich Drepper <drepper@redhat.com>
-
- * configure.in: If selinux has not explictly been requested, don't
- comment on it missing.
-
- * elf/dl-load.c: Define __stack_prot.
- * sysdeps/unix/sysv/linux/dl-execstack.c: Don't define
- __stack_prot here, just declare it.
-
-2004-09-20 Jakub Jelinek <jakub@redhat.com>
-
- * configure.in (libc_cv_z_relro): Only set to yes if linker script
- contains DATA_SEGMENT_RELRO_END.
-
-2004-09-20 Ulrich Drepper <drepper@redhat.com>
-
- * elf/dl-load.c (_dl_map_object_from_fd): Add some error checking.
- Reorder code slightly.
- * elf/rtld.c (dl_main): No need to check whether l_info[DT_HASH]
- is non-null, _dl_setup_hash will do that.
-
-2004-09-19 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/unix/sysv/linux/setegid.c [HAVE_PTR__NPTL_SETXID]: Call
- callback to set IDs in all other threads as well.
- * sysdeps/unix/sysv/linux/seteuid.c: Likewise.
- * sysdeps/unix/sysv/linux/i386/setegid.c: Likewise.
- * sysdeps/unix/sysv/linux/i386/seteuid.c: Likewise.
- * sysdeps/unix/sysv/linux/i386/setgid.c: Likewise.
- * sysdeps/unix/sysv/linux/i386/setuid.c: Likewise.
- * sysdeps/unix/sysv/linux/i386/setreuid.c: Likewise.
- * sysdeps/unix/sysv/linux/i386/setreuid.c: Likewise.
- * sysdeps/unix/sysv/linux/i386/setresuid.c: Likewise.
- * sysdeps/unix/sysv/linux/i386/setresuid.c: Likewise.
- * sysdeps/unix/sysv/linux/setuid.c: New file.
- * sysdeps/unix/sysv/linux/setgid.c: New file.
- * sysdeps/unix/sysv/linux/setreuid.c: New file.
- * sysdeps/unix/sysv/linux/setregid.c: New file.
- * sysdeps/unix/sysv/linux/setresuid.c: New file.
- * sysdeps/unix/sysv/linux/setresgid.c: New file.
- * sysdeps/unix/sysv/linux/i386/sysdep.h: Define INTERNAL_SYSCALL_NCS.
- * sysdeps/unix/sysv/linux/ia64/sysdep.h: Likewise.
- * sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h: Likewise.
- * sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h: Likewise.
- * sysdeps/unix/sysv/linux/x86_64/sysdep.h: Likewise.
- * sysdeps/unix/sysv/linux/sparc/sparc32/setegid.c: Use x86 version.
- * sysdeps/unix/sysv/linux/sparc/sparc32/seteuid.c: Likewise.
- * sysdeps/unix/sysv/linux/sparc/sparc32/setresgid.c: New file.
- * sysdeps/unix/sysv/linux/sparc/sparc32/setresuid.c: New file.
- * sysdeps/unix/sysv/linux/sparc/sparc32/syscalls.list: Remove setresgid
- and setresuid.
- * nscd/aicache.c: Use pthread_seteuid_np instead of seteuid.
- * nscd/grpcache.c: Likewise.
- * nscd/hstcache.c: Likewise.
- * nscd/pwdcache.c: Likewise.
-
- * resolv/res_mkquery.c (res_nmkquery): Fix typo.
-
-2004-09-18 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/unisx/sysv/linux/dl-execstack.c: Get protection flag
- from memory.
- * elf/dl-load.c (_dl_map_object_from_fd): Add PROT_EXEC flag to
- __stack_flags.
-
- * sysdeps/posix/getaddrinfo.c (gaih_inet): Do not use
- gethostbyname3_r if we are not looking for the canonical name.
-
- * resolv/res_mkquery.c (res_nmkquery): Randomize request ID every
- time.
-
-2004-09-18 Roland McGrath <roland@redhat.com>
-
- * sysdeps/unix/sysv/linux/waitid.c (do_waitid): Pass fifth argument to
- system call.
-
-2004-09-17 Ulrich Drepper <drepper@redhat.com>
-
- * include/link.h (struct link_map): Add l_used element.
- * sysdeps/generic/ldsodefs.h: Define DL_DEBUG_UNUSED.
- * elf/rtld.c (process_dl_debug): Recognize unused.
- (dl_main): When unused debug flag is set check for unused direct
- dependencies.
- When printing dependencies and SONAME starts with /, omit the SONAME =>
- part.
- * elf/dl-lookup.c (_dl_lookup_symbol_x): Mark object in which the
- symbol has been found as used.
- * elf/ldd.bash.in: Add -u option.
-
-2004-09-18 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/unix/sysv/linux/nscd_setup_thread.c (setup_thread):
- Do nothing if __NR_set_tid_address is not defined. [BZ #390]
-
-2004-09-17 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/posix/getaddrinfo.c: Use gethostbyname3_r NSS function
- in case it is available.
-
-2004-09-17 Jakub Jelinek <jakub@redhat.com>
-
- * nscd/nscd.c (parse_opt): Write arg string instead of (void *)
- key to the socket.
-
-2004-09-17 Ulrich Drepper <drepper@redhat.com>
-
- * misc/sys/cdefs.h: Define __nonnull using nonnull function attribute
- for gcc 3.3 and higher.
- * io/fcntl.h: Add __nonnull where appropriate.
- * io/ftw.h: Likewise.
- * io/utime.h: Likewise.
- * io/sys/poll.h: Likewise.
- * io/sys/sendfile.h: Likewise.
- * io/sys/stat.h: Likewise.
- * io/sys/statfs.h: Likewise.
- * io/sys/statvfs.h: Likewise.
- * posix/unistd.h: Likewise.
- * catgets/nl_types.h: Likewise.
- * crypt/crypt.h: Likewise.
- * debug/execinfo.h: Likewise.
-
-2004-09-16 Ulrich Drepper <drepper@redhat.com>
-
- * posix/glob.h: Remove cruft to make header usable outside glibc.
- The maintenance headache is too big.
-
- * configure.in: Add test for required SELinux features.
- * config.make.in: Add have-selinux entry.
- * config.h.in: Add HAVE_SELINUX entry.
- * nscd/Makefile (nscd-modules): Add selinux.
- (CFLAGS-selinux.c): Add -fpie.
- Define selinux-LIBS and use in link line.
- * nscd/connections.c (handle_request): Check access SELinux permissions
- before processing request.
- * nscd/nscd.c (main): Initialize selinux_enabled and stop avc thread.
- * nscd/nscd_stat.c: Transmit and print AVC statistics.
- * nscd/selinux.c: New file.
- * nscd/selinux.h: New file.
- Patch mostly by Matthew Rickard <mjricka@epoch.ncsc.mil>.
-
-2004-09-16 Jakub Jelinek <jakub@redhat.com>
-
- * nscd/nscd_helper.c (__nscd_unmap, get_mapping): Use __munmap
- instead of munmap.
-
- * nscd/Makefile (CFLAGS-aicache.c): Set to -fpie.
-
-2004-09-16 Thorsten Kukuk <kukuk@suse.de>
-
- * sysdeps/posix/getaddrinfo.c (gaih_inet): Check
- __nss_not_use_nscd_hosts variable if nscd should be used or not.
-
-2004-09-16 Ulrich Drepper <drepper@redhat.com>
-
- * nscd/nscd_proto.h: Define NSS_NSCD_RETRY.
- Add __nscd_getai prototype.
- * nss/getXXbyYY_r.c: Remode definition of NSS_NSCD_RETRY.
- * nscd/nscd-client.h: Remove __nscd_getai prototype.
- * nscd/nscd_getai.c: Include nscd_proto.h.
-
- * elf/ldd.bash.in: Add support for SELinux environments.
- Patch by Stephen Smalley <sds@epoch.ncsc.mil>.
-
-2004-09-16 Roland McGrath <roland@redhat.com>
-
- * configure.in (--with-headers): Let argument contain a : separated
- list of directories to use, not just one.
- * configure: Regenerated.
-
-2004-09-15 Richard Henderson <rth@redhat.com>
-
- * sysdeps/alpha/fpu/libm-test-ulps: Update.
- * scripts/data/c++-types-alpha-linux-gnu.data: New file.
-
-2004-09-15 Ulrich Drepper <drepper@redhat.com>
-
- * nscd/aicache.c: Prefer using gethostbyname3_r NSS callback to also
- get ttl and canonical name. Use these two values.
- * resolv/Versions: Export _nss_dns_gethostbyname3_r from libnss_dns.
- * resolv/nss_dns/dns-host.c (getanswer_r): Take two new parameters.
- If nonnull fill with TTL and pointer to canonical name respectively.
- (_nss_dns_gethostbyaddr_r): Pass NULL in new parameters of getanswer_r.
- (_nss_dns_gethostbyname2_r): Just wrapper around
- _nss_dns_gethostbyname3_r.
- (_nss_dns_gethostbyname3_r): Renamed from _nss_dns_gethostbyname2_r.
- Take two new parameters which as passed to getanswer_r.
-
- * nscd/Makefile (rountines): Add nscd_getai.
- (nscd-modules): Add aicache.
- * nscd/aicache.c: New file.
- * nscd/nscd_getai.c: New file.
- * nscd/cache.c (prune_cache): Handle GETAI request type.
- * nscd/connections.c: Add GETAI support in request handling.
- * nscd/nscd-client.h (request_type): Add GETAI.
- Define ai_response_header and struct nscd_ai_result types.
- (struct datahead): Add aidata field.
- Declare __nscd_getai.
- * nscd/nscd.c: Add getaddrinfo definition to catch problems.
- * nscd/nscd.h: Declare addhstai and readdhstai.
-
- * sysdeps/posix/getaddrinfo.c: Add support for using cached results.
- * posix/Makefile (CFLAGS-getaddrinfo.c): Add -DUSE_NSCD.
-
- * nscd/nscd-client.h (struct datahead): Use uint8_t instead of bool.
-
-2004-09-14 Ulrich Drepper <drepper@redhat.com>
-
- * misc/sys/cdefs.h: Remove debugging text from __P and __PMT.
-
-2004-09-13 Ulrich Drepper <drepper@redhat.com>
-
- * misc/sys/cdefs.h: Restore old definition of __P.
- * csu/munch.awk: Remove uses of __P and __PMT.
- * gmon/gmon.c: Likewise.
- * hesiod/hesiod.h: Likewise.
- * include/stdio.h: Likewise.
- * io/fts.c: Likewise.
- * libio/genops.c: Likewise.
- * libio/libioP.h: Likewise.
- * libio/fileops.c: Likewise.
- * libio/iolibio.h: Likewise.
- * libio/libio.h: Likewise.
- * libio/memstream.c: Likewise.
- * libio/oldfileops.c: Likewise.
- * libio/oldstdfiles.c: Likewise.
- * libio/iopopen.c: Likewise.
- * libio/vsnprintf.c: Likewise.
- * libio/vswprintf.c: Likewise.
- * libio/wgenops.c: Likewise.
- * libio/oldiopopen.c: Likewise.
- * locale/programs/xmalloc.c: Likewise.
- * locale/programs/xstrdup.c: Likewise.
- * malloc/mtrace.c: Likewise.
- * misc/getttyent.c: Likewise.
- * misc/getusershell.c: Likewise.
- * nis/rpcsvc/ypupd.h: Likewise.
- * posix/fnmatch.h: Likewise.
- * posix/glob.h: Likewise.
- * resolv/arpa/nameser.h: Likewise.
- * resolv/gethnamaddr.c: Likewise.
- * resolv/resolv.h: Likewise.
- * resolv/inet_net_ntop.c: Likewise.
- * resolv/inet_net_pton.c: Likewise.
- * resolv/res_init.c: Likewise.
- * resolv/nss_dns/dns-network.c: Likewise.
- * stdio-common/vfprintf.c: Likewise.
- * stdio-common/reg-printf.c: Likewise.
- * sysdeps/generic/chflags.c: Likewise.
- * sysdeps/generic/fchflags.c: Likewise.
- * sysdeps/generic/glob.c: Likewise.
- * sysdeps/generic/printf_fphex.c: Likewise.
- * sysdeps/generic/memcmp.c: Likewise.
- * sysdeps/generic/memcopy.h: Likewise.
- * sysdeps/generic/morecore.c: Likewise.
- * sysdeps/generic/sstk.c: Likewise.
- * sysdeps/posix/sigvec.c: Likewise.
- * sysdeps/posix/ttyname_r.c: Likewise.
- * sysdeps/standalone/arm/bits/errno.h: Likewise.
- * sysdeps/standalone/standalone.h: Likewise.
- * sysdeps/standalone/i386/force_cpu386/brdinit.c: Likewise.
- * sysdeps/standalone/i386/force_cpu386/_exit.c: Likewise.
- * sysdeps/unix/arm/start.c: Likewise.
- * sysdeps/unix/bsd/sigaction.c: Likewise.
- * sysdeps/unix/bsd/sun/m68k/sigtramp.c: Likewise.
- * sysdeps/unix/bsd/sun/sparc/sigtramp.c: Likewise.
- * sysdeps/unix/bsd/sun/sunos4/wait4.c: Likewise.
- * sysdeps/unix/bsd/ultrix4/mips/sigvec.c: Likewise.
- * sysdeps/unix/bsd/ultrix4/sysconf.c: Likewise.
- * sysdeps/unix/sparc/start.c: Likewise.
- * sysdeps/unix/sysv/getdents.c: Likewise.
- * sysdeps/unix/sysv/irix4/fpathconf.c: Likewise.
- * sysdeps/unix/sysv/irix4/getgroups.c: Likewise.
- * sysdeps/unix/sysv/irix4/getpriority.c: Likewise.
- * sysdeps/unix/sysv/irix4/getrusage.c: Likewise.
- * sysdeps/unix/sysv/irix4/pathconf.c: Likewise.
- * sysdeps/unix/sysv/irix4/setgroups.c: Likewise.
- * sysdeps/unix/sysv/irix4/sigtramp.c: Likewise.
- * sysdeps/unix/sysv/irix4/start.c: Likewise.
- * sysdeps/unix/sysv/irix4/sysconf.c: Likewise.
- * sysdeps/unix/sysv/sco3.2.4/__setpgid.c: Likewise.
- * sysdeps/unix/sysv/sco3.2.4/getgroups.c: Likewise.
- * sysdeps/unix/sysv/sysv4/__getpgid.c: Likewise.
- * sysdeps/unix/sysv/sysv4/__setpgid.c: Likewise.
- * sysdeps/unix/sysv/sysv4/getpgid.c: Likewise.
- * sysdeps/unix/sysv/sysv4/setpgid.c: Likewise.
- * sysdeps/unix/sysv/sysv4/sethostname.c: Likewise.
- * sysdeps/unix/sysv/sysv4/setsid.c: Likewise.
- * sysdeps/unix/sysv/sysv4/sysconf.c: Likewise.
- * sysdeps/unix/sysv/sysv4/waitpid.c: Likewise.
- * sysdeps/unix/sysv/sysv4/solaris2/getdents.c: Likewise.
- * time/tzset.c: Likewise.
- * time/strftime_l.c: Likewise.
- * time/strptime_l.c: Likewise.
- * crypt/md5.h: Likewise.
+ (__novec_swapcontext, __swapcontext): Likewise.
-2004-09-13 Andreas Jaeger <aj@suse.de>
+2005-02-07 Ulrich Drepper <drepper@redhat.com>
- * configure.in: Support GCC 4.x.
- * configure: Regenerated.
+ * sysdeps/unix/sysv/linux/i386/sysdep.h (check_consistency): Define.
-2004-09-13 Thorsten Kukuk <kukuk@suse.de>
+2005-02-07 Jakub Jelinek <jakub@redhat.com>
- * nscd/nscd_stat.c: Don't access dbs[cnt].head for disabled services.
+ [BZ #741]
+ * nscd/nscd.c (termination_handler): Avoid segfault if some database
+ is not enabled.
- * nscd/nscd.init: Fix path to socket.
+ [BZ #741]
+ * nscd/nscd_getai.c (__nscd_getai): If ai_resp->found == -1, set
+ __nss_not_use_nscd_hosts and return -1.
+ * nscd/nscd_initgroups.c (__nscd_getgrouplist): If
+ initgr_resp->found == -1, set __nss_not_use_nscd_group and return -1.
+ Avoid leaking sockets.
-2004-09-12 Ulrich Drepper <drepper@redhat.com>
+2005-02-07 Jakub Jelinek <jakub@redhat.com>
- * nscd/nscd_helper.c (get_mapping): Correctly check cmsg length.
- Avoid file descriptor leak in case of size mismatch.
+ [BZ #742]
+ * nscd/nscd.init (reload): Print Reloading nscd: before and a newline
+ after the status string printed by killproc.
- * nscd/nscd-client.h: Fix database structure layout for biarch.
- * nscd/mem.c (gc): Add casts to avoid warnings.
+2005-02-02 Alfred M. Szmidt <ams@gnu.org>
- * nss/getent.c: Don't preconstruct help message. Do it only when
- needed.
+ [BZ #671]
+ * sysdeps/generic/syslog.c (send_flags) [!send_flags]: Define it.
- * locale/programs/locale.c: Simplify help message printing.
+2005-01-28 Martin Schwidefsky <schwidefsky@de.ibm.com>
-2004-09-12 Roland McGrath <roland@frob.com>
+ [BZ #743]
+ * sysdeps/s390/bits/string.h (strlen, strncpy, strcat, strncat,
+ strncat, memchr, strcmp): Add missing memory clobber.
- * sysdeps/mach/hurd/i386/init-first.c (init1) [! SHARED]: Add decls
- missing in last change.
+2005-01-27 Jakub Jelinek <jakub@redhat.com>
-2004-09-11 Thorsten Kukuk <kukuk@suse.de>
+ * stdlib/tst-fmtmsg.c: Include stdlib.h. [BZ #731]
+ * stdio-common/tst-fmemopen2.c: Include string.h. [BZ #730]
+ * posix/execvp.c: Include stdbool.h.
- * nis/nss_compat/compat-grp.c: Check that buflen is greater zero
- before writing data into the buffer with negative offset.
- * nis/nss_compat/compat-initgroups.c: Likewise.
- * nis/nss_compat/compat-pwd.c: Likewise.
- * nis/nss_compat/compat-spwd.c Likewise.
+2005-01-26 Ulrich Drepper <drepper@redhat.com>
-2004-09-12 Ulrich Drepper <drepper@redhat.com>
+ [BZ #671]
+ * sysdeps/unix/sysv/linux/kernel-features.h: Found reference to
+ MSG_NOSIGNAL being in 2.2 kernels.
- * misc/syslog.c (vsyslog): Fix copying of PID in case of
- out-of-memory situation. [BZ #365].
+2005-01-26 Jakub Jelinek <jakub@redhat.com>
- * sysdeps/alpha/fpu/bits/mathinline.h: Use __NTH instead of
- __THROW in inline function definitions.
+ [BZ #737]
+ * sysdeps/unix/sysv/linux/i386/sysdep.h
+ (SYSCALL_ERROR_HANDLER_TLS_STORE): Remove unnecessary 0 imm.
- * posix/spawn.h [__USE_GNU]: Define POSIX_SPAWN_USEVFORK.
- * posix/spawnattr_setflags.c: Check whether any unknown bit is set
- in FLAGS parameter and fail if this is the case.
- * sysdeps/posix/spawni.c: Use vfork if POSIX_SPAWN_USEVFORK flag is
- set.
+ [BZ #693]
+ * posix/regex_internal.h (DUMMY_CONSTRAINT): Rename to...
+ (WORD_DELIM_CONSTRAINT): ...this.
+ (NOT_WORD_DELIM_CONSTRAINT): Define.
+ (re_context_type): Add INSIDE_NOTWORD and NOT_WORD_DELIM,
+ change WORD_DELIM to use WORD_DELIM_CONSTRAINT.
+ * posix/regcomp.c (peek_token): For \B create NOT_WORD_DELIM
+ anchor instead of INSIDE_WORD.
+ (parse_expression): Handle NOT_WORD_DELIM constraint.
+ * posix/bug-regex19.c (tests): Adjust tests that relied on \B
+ being inside word instead of not word delim.
+ * posix/tst-rxspencer.c (mb_frob_pattern): Don't frob escaped
+ characters.
+ * posix/rxspencer/tests: Add some new tests.
- * nscd/pwdcache.c (cache_addpw): Sync also negative results to disk.
- * nscd/grpcache.c (cache_addgr): Likewise.
- * nscd/hstcache.c (cache_addhst): Likewise.
+2005-01-25 Roland McGrath <roland@redhat.com>
-2004-09-11 Roland McGrath <roland@frob.com>
+ [BZ #671]
+ * sysdeps/generic/syslog.c [NO_SIGPIPE]: Protect sigpipe_handler decl.
- * sysdeps/mach/hurd/i386/init-first.c (init1) [! SHARED]:
- Set _dl_phdr and _dl_phnum.
- (init1): When bootstrap task, bail early and never examine *D.
+2005-01-23 Roland McGrath <roland@redhat.com>
-2004-09-11 Alfred M. Szmidt <ams@kemisten.nu>
+ [BZ #737]
+ * sysdeps/i386/Makefile (defines): If -mno-tls-direct-seg-refs appears
+ in $(CFLAGS), add -DNO_TLS_DIRECT_SEG_REFS.
+ * sysdeps/unix/sysv/linux/i386/sysdep.h [USE___THREAD]
+ (SYSCALL_ERROR_HANDLER) [NO_TLS_DIRECT_SEG_REFS]: Load thread pointer
+ from %gs:0 and add to that value, rather that direct %gs:OFFSET access.
+ * sysdeps/unix/i386/sysdep.S [NO_TLS_DIRECT_SEG_REFS]: Likewise.
- * sysdeps/mach/hurd/i386/tls.h (__i386_set_gdt) [!HAVE_I386_SET_GDT]:
- Cast THR, SEL and DESC to `void'.
+2005-01-25 Jakub Jelinek <jakub@redhat.com>
-2004-09-11 Ulrich Drepper <drepper@redhat.com>
+ [BZ #731]
+ * stdlib/fmtmsg.c (addseverity): Remove new_string variable.
+ (free_mem): Don't free string.
+ * stdlib/tst-fmtmsg.c: Include string.h.
+ (main): Add some more tests.
- * nscd/connections.c (nscd_run): Call setup_thread only for enabled
- databases.
+2005-01-25 Andreas Schwab <schwab@suse.de>
- * sysdeps/unix/bsd/bsd4.4/bits/socket.h: Use __NTH for __cmsg_nxthdr.
-
-2004-09-10 Ulrich Drepper <drepper@redhat.com>
-
- * nscd/nscd.c (pagesize_m1): New variable.
- (main): Initialize it.
- * nscd/nscd.h: Declare pagesize_m1.
- * nscd/hstcache.c: Pass correctly aligned address to msync.
- * nscd/grpcache.c: Likewise.
- * nscd/pwdcache.c: Likewise.
-
-2004-09-10 Kaz Kojima <kkojima@rr.iij4u.or.jp>
-
- * sysdeps/unix/sysv/linux/sh/pthread_cond_wait.S: Decrement
- __nwaiters. If pthread_cond_destroy has been called and this is
- the last waiter, signal pthread_cond_destroy caller and avoid
- using the pthread_cond_t structure after unlock.
- * sysdeps/unix/sysv/linux/sh/pthread_cond_timedwait.S: Likewise.
-
-2004-09-10 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/unix/sysv/linux/kernel-features.h: Don't define
- __ASSUME_CLONE_STOPPED.
-
-2004-09-10 Jakub Jelinek <jakub@redhat.com>
-
- * misc/sys/cdefs.h (__REDIRECT_NTH): Change order of __THROW and
- __asm__ for C++. [BZ #377]
-
-2004-09-10 Ulrich Drepper <drepper@redhat.com>
-
- * nscd/nscd_stat.c: Improve output by also printing .shared and
- .persistent.
-
- * nscd/connections.c: Allow cache sharing to be really disabled.
-
-2004-09-10 Jakub Jelinek <jakub@redhat.com>
-
- * malloc/malloc.c (_int_free): Only do arena boundary check for
- contiguous arenas.
-
-2004-09-10 Kazuhiro Inaoka <inaoka.kazuhiro@renesas.com>
-
- * stdlib/longlong.h [__M32R__] (add_ssaaaa, sub_ddmmss): Fix broken
- instruct operands.
- * elf/elf.h: Add R_M32R_* relocs.
-
-2004-09-09 Ulrich Drepper <drepper@redhat.com>
-
- * misc/sys/cdefs.h: Despite what the gcc manual says, gcc 3.2
- seems not to support the nothrow attribute. Use it only for gcc
- 3.3 and higher.
-
- * malloc/hooks.c (top_check): Print top chunk corruption as normal
- error message.
-
- * malloc/malloc.c (malloc_printerr): Don't make informational
- message look like error message.
-
-2004-09-09 Andreas Jaeger <aj@suse.de>
-
- * nscd/Makefile (CFLAGS-nscd_setup_thread.c): Set to -fpie.
-
-2004-09-08 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/unix/sysv/linux/bits/socket.h (__SCM_CONNECT): Removed.
-
- * malloc/malloc.c (_int_free): Add inexpensive double free and
- memory corruption tests.
- (malloc_printf_nc): Renamed to malloc_printerr. Second parameter
- is no format string anymore. Don't use stdio. Adjust all callers.
- * malloc/hooks.c: Adjust malloc_printf_nc callers.
-
-2004-09-08 Roland McGrath <roland@redhat.com>
-
- * malloc/mcheck.c: Don't use __P.
- Use prototypes definitions for static functions.
-
-2004-09-08 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/ia64/fpu/bits/mathinline.h: Use __NTH instead of __THROW.
-
-2004-09-08 Ulrich Drepper <drepper@redhat.com>
- Jakub Jelinek <jakub@redhat.com>
-
- * nscd/nscd-client.h: Add a few #includes.
-
- * nscd/Makefile (nscd-modules): Add nscd_setup_thread.
- * nscd/connections.c (nscd_run): Call setup_thread for maintenance
- threads.
- * nscd/nscd-client.h (struct database_pers_head): Add
- nscd_certainly_running field.
- * nscd/nscd.h: Declare setup_thread.
- * nscd/nscd_helper.c (__nscd_get_map_ref): Avoid the time test if
- nscd_certainly_running is nonzero.
- * sysdeps/generic/nscd_setup_thread.c: New file.
- * sysdeps/unix/sysv/linux/nscd_setup_thread.c: New file.
-
-2004-09-08 Ulrich Drepper <drepper@redhat.com>
-
- * nscd/connections.c: Implement r/o sharing of nscd's cache with client
- processes via shared memory.
- * nscd/nscd-client.h: Likewise.
- * nscd/nscd.h: Likewise.
- * nscd/nscd_conf.c: Likewise.
- * nscd/nscd_getgr_r.c: Likewise.
- * nscd/nscd_getpw_r.c: Likewise.
- * nscd/nscd_gethst_r.c: Likewise.
- * nscd/nscd.conf: Add new config parameters.
- * nscd/Makefile (aux): Add nscd_helper.
- * nscd/nscd_helper.c: New file.
- * nscd/mem.c (gc): Indicate beginning and end of the gc cycle.
-
- * nscd/hstcache.c: Simplify a lot. We cache only the request itself,
- no derived information.
- * connections.c (nscd_init): Fix bug in testing size of the persistent.
-
- * nis/Makefile (aux): Add nis_hash.
- * nis/nis_hash.c: New file. Split out from nis_util.c.
- * nis/nis_util.c: Move __nis_hash code in separate file.
-
- * csu/tst-atomic.c: Improve atomic_increment_val test which would
- not have found a ppc bug.
-
- * sysdeps/s390/fpu/bits/mathinline.h: Remove unnecessary includes.
-
- * malloc/arena.c: Remove __MALLOC_P uses.
- * malloc/malloc.c: Likewise.
-
- * malloc/mtrace.c: Remove __P uses.
- * malloc/mcheck-init.c: Likewise.
-
-2004-09-07 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/powerpc/powerpc64/configure.in: New file.
- * config.h.in (USE_PPC64_OVERLAPPING_OPD): Add.
- * configure.in (HAVE_ASM_GLOBAL_DOT_NAME): Remove.
- * sysdeps/powerpc/powerpc64/sysdep.h: Formatting.
- (OPD_ENT, BODY_LABEL, ENTRY_1, ENTRY_2, END_2, DOT_PREFIX,
- BODY_PREFIX): Define.
- (ENTRY, DOT_LABEL, END, TRACEBACK, END_GEN_TB, EALIGN): Support
- HAVE_ASM_GLOBAL_DOT_NAME or no dot symbols,
- USE_PPC64_OVERLAPPING_OPD or never overlapping .opd entries.
- * sysdeps/powerpc/powerpc64/dl-machine.h: Include sysdep.h.
- (TRAMPOLINE_TEMPLATE, RTLD_START): Use the new sysdep.h macros.
-
-2004-09-07 Ulrich Drepper <drepper@redhat.com>
-
- * malloc/malloc.h: Don't define __THROW if it is already defined.
-
- * sysdeps/powerpc/bits/atomic.h (atomic_increment): Define.
- (atomic_decrement): Define.
-
- * sysdeps/powerpc/bits/atomic.h: Implement atomic_increment_val and
- atomic_decrement_val.
- * sysdeps/powerpc/powerpc32/bits/atomic.h: Likewise.
- * sysdeps/powerpc/powerpc64/bits/atomic.h: Likewise.
-
- * csu/tst-atomic.c (do_test): Add tests of atomic_increment_val
- and atomic_decrement_val.
-
- * include/atomic.h: Define atomic_increment_val, atomic_decrement_val,
- and atomic_delay is not already defined.
- * sysdeps/i386/i486/bits/atomic.h: Define atomic_delay.
- * sysdeps/x86_64/bits/atomic.h: Likewise.
-
- * miscd/sys/cdefs.h (__NTH): New macro.
- (__THROW): Define using nothrow attribute for C code and gcc >= 3.2.
- (__REDIRECT_NTH): New macro.
- * argp/argp.h: Use __NTH and __REDIRECT_NTH where necessary.
- * ctype/ctype.h: Likewise.
- * dirent/dirent.h: Likewise.
- * io/fcntl.h: Likewise.
- * io/sys/sendfile.h: Likewise.
- * io/sys/stat.h: Likewise.
- * io/sys/statfs.h: Likewise.
- * io/sys/statvfs.h: Likewise.
- * libio/bits/stdio.h: Likewise.
- * misc/sys/mman.h: Likewise.
- * posix/unistd.h: Likewise.
- * resource/sys/resource.h: Likewise.
- * rt/aio.h: Likewise.
- * signal/signal.h: Likewise.
- * stdlib/stdlib.h: Likewise.
- * string/argz.h: Likewise.
- * string/string.h: Likewise.
- * sysdeps/generic/inttypes.h: Likewise.
- * sysdeps/i386/fpu/bits/mathinline.h: Likewise.
- * sysdeps/powerpc/fpu/bits/mathinline.h: Likewise.
- * sysdeps/s390/fpu/bits/mathinline.h: Likewise.
- * sysdeps/x86_64/fpu/bits/mathinline.h: Likewise.
- * sysdeps/unix/sysv/linux/bits/socket.h: Likewise.
- * sysdeps/unix/sysv/linux/bits/sys/sysmacros.h: Likewise.
- * wcsmbs/wchar.h: Likewise.
-
- * sysdeps/generic/glob.c: Use __PMT isntead of __P where appropriate.
-
- * resolv/gethnamaddr.c (getanswer): Remove __P use in variable
- definition.
-
- * io/sys/poll.h: Remove __THROW from poll prototype, it's a
- cancellation point.
-
- * io/fts.c (fts_open): Remove uses of __P.
-
- * include/stdlib.h: No need to use __THROW in this header.
-
-2004-09-06 Roland McGrath <roland@frob.com>
-
- * sysdeps/mach/hurd/dl-sysdep.c (__writev): Does use assert on FD
- validity, since __assert_fail gets to here anyway. Just fail.
-
-2004-09-06 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/posix/getaddrinfo.c (gaih_inet): Fix problem with
- AF_UNSPEC lookup with AI_CANONNAME of name which has only IPv6
- addresses.
-
-2004-09-05 Richard Henderson <rth@redhat.com>
-
- * sysdeps/alpha/fpu/fraiseexcpt.c: Remove file.
- * sysdeps/unix/sysv/linux/kernel-features.h
- (__ASSUME_IEEE_RAISE_EXCEPTION): New.
- * sysdeps/unix/sysv/linux/alpha/fraiseexcpt.c: New file.
- * sysdeps/unix/sysv/linux/alpha/kernel_sysinfo.h: New file.
- * sysdeps/unix/sysv/linux/alpha/ieee_get_fp_control.S: Use it.
- * sysdeps/unix/sysv/linux/alpha/ieee_set_fp_control.S: Likewise.
-
-2004-09-05 Richard Henderson <rth@redhat.com>
-
- * sysdeps/alpha/div.S: Save and restore FPCR around fp operations.
- * sysdeps/alpha/divl.S, sysdeps/alpha/divq.S, sysdeps/alpha/divqu.S,
- sysdeps/alpha/ldiv.S, sysdeps/alpha/reml.S, sysdeps/alpha/remq.S,
- sysdeps/alpha/remqu.S: Likewise.
- * sysdeps/alpha/div_libc.h (FRAME): Increase to 64.
-
-2004-09-05 Ulrich Drepper <drepper@redhat.com>
-
- * nscd/cache.c (cache_add): Correctly log GETHOSTBYADDR and
- GETHOSTBYADDRv6 requests.
-
-2004-09-04 Ulrich Drepper <drepper@redhat.com>
-
- * nscd/pwdcache.c (cache_addpw): Use correct key length in
- cache_add calls.
- * nscd/grpcache.c (cache_addgr): Likewise.
-
-2004-09-03 Alfred M. Szmidt <ams@kemisten.nu>
-
- * sysdeps/mach/hurd/i386/tls.h (THREAD_DTV): Changed type of _DTV
- to `dtv_t *'.
-
-2004-09-03 Ulrich Drepper <drepper@redhat.com>
-
- * nscd/nscd.c (parse_opt): Use writev instead of two write for
- invalidate command.
-
-2004-09-02 Ulrich Drepper <drepper@redhat.com>
-
- * nscd/connections.c (nscd_run): Check early for invalid request types.
-
-2004-09-02 Roland McGrath <roland@frob.com>
-
- * sysdeps/mach/hurd/i386/tls.h (TLS_INIT_TP_EXPENSIVE): New macro.
- (INSTALL_NEW_DTV, THREAD_DTV): Rewritten to fetch the right word.
- (THREAD_SELF): New macro.
-
-2004-09-02 Steven Munroe <sjmunroe@us.ibm.com>
-
- [BZ #357]
- * stdlib/tst-setcontext.c (test_stack): Added test for stack clobber.
- (main): Call test_stack.
- * sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext.S
- (__getcontext): Push stack frame then save parms in local frame.
- Improve instruction scheduling.
- * sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext.S
- (__swapcontext): Likewise.
-
-2004-09-01 Andreas Schwab <schwab@suse.de>
-
- * sysdeps/unix/sysv/linux/ia64/sys/ucontext.h [g++ >= 3.5]: Use
- __builtin_offsetof.
-
-2004-09-01 Jakub Jelinek <jakub@redhat.com>
-
- [BZ #361]
- * posix/fnmatch_loop.c (FCT): For backslash between brackets, branch
- to normal_bracket after fetching the next character.
- * posix/tst-fnmatch.input: Add 25 new tests.
- Reported by Markus Oberhumer <markus@oberhumer.com>.
-
-2004-09-01 Ulrich Drepper <drepper@redhat.com>
-
- * elf/rtld.c (dl_main): First check existence of ld.so.preload
- with access.
-
-2004-09-01 Roland McGrath <roland@redhat.com>
-
- * sysdeps/unix/sysv/linux/bits/waitflags.h
- (WSTOPPED, WEXITED, WCONTINUED, WNOWAIT): New macros.
- * sysdeps/unix/sysv/linux/kernel-features.h (__ASSUME_WAITID_SYSCALL):
- New macro.
- * sysdeps/unix/sysv/linux/waitid.c: New file. Use new syscall when
- available, or fall back to the waitpid-based generic code.
-
-2004-08-14 Alfred M. Szmidt <ams@kemisten.nu>
-
- * sysdeps/mach/hurd/i386/init-first.c (_hurd_stack_setup): Let gcc
- clobber the `ebp' register.
- * sysdeps/mach/hurd/i386/Makefile (CFLAGS-init-first.c): Removed.
- Reverts change from 2004-05-07 by Jeroen Dekkers.
-
- * sysdeps/mach/hurd/i386/init-first.c (init): Changed the type of
- NEWSP from `void *' to `int *'. Changed all casts accordingly.
-
-2004-08-31 Jakub Jelinek <jakub@redhat.com>
-
- * wcsmbs/wcsmbsload.c (__wcsmbs_getfct): Move attribute_hidden
- before return type.
- * locale/localename.c (__current_locale_name): Likewise.
-
-2004-08-31 Jakub Jelinek <jakub@redhat.com>
-
- * elf/ldconfig.c (parse_conf): Add prefix argument, prepend it
- before arguments to add_dir and pass to parse_conf_include.
- (parse_conf_include): Add prefix argument, pass it down to
- parse_conf.
- (main): Call arch_startup. Adjust parse_conf caller.
- Call add_arch_dirs.
- * sysdeps/generic/dl-cache.h (arch_startup, add_arch_dirs): Define.
- * sysdeps/unix/sysv/linux/i386/dl-cache.h: New file.
- * sysdeps/unix/sysv/linux/ia64/dl-cache.h (EMUL_HACK, arch_startup,
- add_arch_dirs): Define.
- * sysdeps/unix/sysv/linux/ia64/ldd-rewrite.sed: Prepend
- /emul/ia32-linux before the 32-bit ld.so pathname.
- * sysdeps/unix/sysv/linux/ia64/dl-procinfo.c: New file.
- * sysdeps/unix/sysv/linux/ia64/dl-procinfo.h: New file.
-
-2004-08-30 Roland McGrath <roland@frob.com>
-
- * scripts/extract-abilist.awk: If `lastversion' variable defined, omit
- later sets from output.
- * Makerules (check-abi): Pass option to set that with value of
- LIB-abi-frozen variable if one is set.
-
- * abilist/libcidn.abilist: New file (empty).
-
-2004-08-30 Jakub Jelinek <jakub@redhat.com>
-
- * posix/bits/posix1_lim.h (_POSIX_CHILD_MAX, _POSIX_OPEN_MAX): If
- not __USE_XOPEN2K, use the Unix98 mandated values.
-
-2004-08-27 Roland McGrath <roland@redhat.com>
-
- * configure.in (usetls): Default to yes.
- * configure: Regenerated.
-
-2004-08-26 Roland McGrath <roland@redhat.com>
-
- * configure.in (add_ons_automatic): New variable, set to yes or no
- indicating --enable-add-ons with no explicit list.
- (running add-on fragments): Allow a fragment to modify $libc_add_on
- and have that affect its place in the list of add-ons to use.
- * configure: Regenerated.
-
-2004-08-26 Ulrich Drepper <drepper@redhat.com>
-
- * nscd/cache.c: Major rewrite. The data is now optionally kept in
- a mmaped memory region which is automatically mirrored on disk.
- This implements persistent data storage. The Memory handled
- needed to be completely revamped, it now uses a garbage collection
- mechanism instead of malloc.
- * nscd/connections.c: Likewise.
- * nscd/nscd.c: Likewise.
- * nscd/nscd.h: Likewise.
- * nscd/nscd_conf.c: Likewise.
- * nscd/nscd_stat.c: Likewise.
- * nscd/grpcache.c: Likewise.
- * nscd/hstcache.c:: Likewise.
- * nscd/pwdcache.c:: Likewise.
- * nscd/Makefile: Add rules to build mem.c.
- * nscd/mem.c: New file.
- * nscd/nscd.conf: Describe new configuration options.
-
-2004-08-26 Kaz Kojima <kkojima@rr.iij4u.or.jp>
-
- * sysdeps/unix/sysv/linux/mips/pread.c: Include sgidefs.h only if
- NO_SGIDEFS_H isn't defined.
- * sysdeps/unix/sysv/linux/mips/pwrite.c: Likewise.
- * sysdeps/unix/sysv/linux/mips/pread64.c: Likewise.
-
- * sysdeps/unix/sysv/linux/sh/pread.c: Define NO_SGIDEFS_H and
- _MIPS_SIM.
- * sysdeps/unix/sysv/linux/sh/pwrite.c: Likewise.
- * sysdeps/unix/sysv/linux/sh/pread64.c: Likewise.
- * sysdeps/unix/sysv/linux/sh/pwrite64.c: Likewise.
-
-2004-08-26 Ulrich Drepper <drepper@redhat.com>
-
- * nscd/connections.c (nscd_run): atomic_increment was not missing.
-
- * sysdeps/gnu/Makefile (libdl-sysdep_routines): Don't add eval.
- Patch by Greg Schafer.
-
-2004-08-25 Richard Henderson <rth@redhat.com>
-
- * sysdeps/alpha/elf/start.S (_start): Use $15 as frame unwind
- instead of $31. Zero $15.
- * sysdeps/unix/sysv/linux/alpha/clone.S (thread_start): Likewise.
-
-2004-08-25 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/powerpc/powerpc64/bits/atomic.h
- (__arch_compare_and_exchange_bool_32_acq): Fix case where oldval
- is negative.
- (__arch_compare_and_exchange_bool_32_rel): Likewise.
-
- * nscd/connections.c: Make socket nonblocking so that threads
- don't get stuck on accept. Fix locking.
-
- * nscd/grpcache.c (cache_addgr): Use copy of original key in hash
- entry with alternative key.
- * nscd/pwdcache.c (cache_addpw): Likewise.
-
-2004-08-25 Richard Sandiford <rsandifo@redhat.com>
-
- * sysdeps/mips/dl-machine.h (_dl_start_user): Don't set
- __libc_stack_end.
-
-2004-08-23 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/gnu/netinet/udp.h: Cosmetic changes. Remove unnecessary
- __BEGIN_DECLS/__END_DECLS.
-
-2004-08-23 Andreas Jaeger <aj@suse.de>
-
- [BZ #341]
- * sysdeps/unix/sysv/linux/alpha/bits/fcntl.h (O_NOATIME): Define.
- * sysdeps/unix/sysv/linux/arm/bits/fcntl.h (O_NOATIME): Likewise.
- * sysdeps/unix/sysv/linux/cris/bits/fcntl.h (O_NOATIME): Likewise.
- * sysdeps/unix/sysv/linux/hppa/bits/fcntl.h (O_NOATIME): Likewise.
- * sysdeps/unix/sysv/linux/i386/bits/fcntl.h (O_NOATIME): Likewise.
- * sysdeps/unix/sysv/linux/ia64/bits/fcntl.h (O_NOATIME): Likewise.
- * sysdeps/unix/sysv/linux/m68k/bits/fcntl.h (O_NOATIME): Likewise.
- * sysdeps/unix/sysv/linux/mips/bits/fcntl.h (O_NOATIME): Likewise.
- * sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h (O_NOATIME): Likewise.
- * sysdeps/unix/sysv/linux/s390/bits/fcntl.h (O_NOATIME): Likewise.
- * sysdeps/unix/sysv/linux/sh/bits/fcntl.h (O_NOATIME): Likewise.
- * sysdeps/unix/sysv/linux/sparc/bits/fcntl.h (O_NOATIME): Likewise.
- * sysdeps/unix/sysv/linux/x86_64/bits/fcntl.h (O_NOATIME): Likewise.
-
-2004-08-21 Ulrich Drepper <drepper@redhat.com>
-
- * malloc/hooks.c (DEFAULT_CHECK_ACTION): Moved to malloc.c.
- (check_action): Likewise.
- When printing error messages, use malloc_printf_nc now instead of
- fiddling with the streams cancellation flag in every place.
- * malloc/malloc.c (DEFAULT_CHECK_ACTION): New definition. Change
- default to 3.
- (check_action): New variable.
- (unlink): Print error message and eventually terminate in case list
- is corrupted.
- (malloc_printf_nc): New function. Use it in _int_free.
- Change proposed by Arjan van de Ven.
-
- * dlfcn/Makefile: Don't build eval.c anymore.
-
-2004-08-20 Roland McGrath <roland@frob.com>
-
- * csu/Makefile ($(objpfx)version-info.h): Use printf in place
- of echo -e for POSIX.2 portability.
- Reported by Paul Jarc <prj@po.cwru.edu>.
-
-2004-08-19 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/posix/getaddrinfo.c (gaih_inet): Minor optimizations in
- list generation.
-
- * sysdeps/posix/getaddrinfo.c (gaih_inet): Don't use
- getcanonname_r function if AI_CANONNAME flag is not set in
- request.
-
- * nis/nss_compat/compat-initgroups.c (getgrent_next_nss):
- Initialize mysize with limits only if latter is >= 0. Use mysize
- in malloc call.
-
-2004-08-19 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/posix/getaddrinfo.c (gaih_inet): Cast canon to (char *)
- to avoid warning.
-
- * resolv/nss_dns/dns-canon.c (_nss_dns_getcanonname_r): Initialize
- status to NSS_STATUS_UNAVAIL.
-
-2004-08-19 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/powerpc/powerpc64/configure.in: New file.
- * sysdeps/powerpc/powerpc64/configure: Rebuilt.
- * config.h.in (USE_PPC64_OVERLAPPING_OPD): Add.
- * configure.in (HAVE_ASM_GLOBAL_DOT_NAME): Remove.
- * configure: Rebuilt.
- * sysdeps/powerpc/powerpc64/sysdep.h: Formatting.
- (OPD_ENT, BODY_LABEL, ENTRY_1, ENTRY_2, END_2, DOT_PREFIX,
- BODY_PREFIX): Define.
- (ENTRY, DOT_LABEL, END, TRACEBACK, END_GEN_TB, EALIGN): Support
- HAVE_ASM_GLOBAL_DOT_NAME or no dot symbols,
- USE_PPC64_OVERLAPPING_OPD or never overlapping .opd entries.
- * sysdeps/powerpc/powerpc64/dl-machine.h: Include sysdep.h.
- (TRAMPOLINE_TEMPLATE, RTLD_START): Use the new sysdep.h macros.
-
-2004-08-19 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/posix/getaddrinfo.c (gaih_inet): Use h->h_name in the
- cannoname lookup since it has the FQDN even if the original NAME
- value has not.
-
-2004-08-18 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/posix/getaddrinfo.c (gaih_inet): Store NAME parameter
- pointer in new variable ORIG_NAME and use this pointer when
- determination of canonical name failed, not the possibly IDN
- translated value of NAME.
-
-2004-08-17 Ulrich Drepper <drepper@redhat.com>
-
- * resolv/resolv.h (RES_DEFAULT): Add RES_NOIP6DOTINT.
- * resolv/res_init.c (res_setoptions): Recognize ip6-dotint option.
- Reset RES_NOIP6DOTINT flag in this case..
-
- * sysdeps/posix/getaddrinfo.c: Fix memory handling of
- ai_canonname.
-
-2004-08-16 Ulrich Drepper <drepper@redhat.com>
-
- * resolv/nss_dns/dns-canon.c (_nss_dns_getcanonname_r): Don't use
- CNAME records, we better follow the chain of CNAME records which
- can be accomplished with A/AAAA lookups.
-
-2004-08-15 Roland McGrath <roland@redhat.com>
-
- * sysdeps/unix/sysv/linux/bits/resource.h (enum __rusage_who):
- Remove __RUSAGE_BOTH constant and RUSAGE_BOTH macro.
- * sysdeps/unix/sysv/linux/alpha/bits/resource.h: Likewise.
- * sysdeps/unix/sysv/linux/mips/bits/resource.h: Likewise.
- * sysdeps/unix/sysv/linux/sparc/bits/resource.h: Likewise.
-
-2004-08-15 Roland McGrath <roland@frob.com>
-
- * sysdeps/i386/i686/Makefile (elide-routines.os): Append hp-timing to
- this, not ...
- (static-only-routines): ... this.
- * sysdeps/ia64/Makefile: Likewise.
- * sysdeps/sparc/sparc32/sparcv9/Makefile: Likewise.
- * sysdeps/sparc/sparc64/Makefile: Likewise.
- * sysdeps/x86_64/Makefile: Likewise.
- * sysdeps/i386/i686/hp-timing.c: Revert copyright terms change.
- * sysdeps/ia64/hp-timing.c: Likewise.
- * sysdeps/sparc/sparc32/sparcv9/hp-timing.c: Likewise.
- * sysdeps/sparc/sparc64/hp-timing.c: Likewise.
-
- * csu/elf-init.c: Update copyright terms including special exception
- for these trivial files, which are statically linked into executables
- that use dynamic linking for the significant library code.
- * io/fstat.c: Likewise.
- * io/fstat64.c: Likewise.
- * io/lstat.c: Likewise.
- * io/lstat64.c: Likewise.
- * io/stat.c: Likewise.
- * io/stat64.c: Likewise.
- * stdlib/atexit.c: Likewise.
- * sysdeps/alpha/elf/initfini.c: Likewise.
- * sysdeps/alpha/elf/start.S: Likewise.
- * sysdeps/arm/elf/start.S: Likewise.
- * sysdeps/cris/elf/start.S: Likewise.
- * sysdeps/generic/initfini.c: Likewise.
- * sysdeps/generic/mknod.c: Likewise.
- * sysdeps/hppa/elf/initfini.c: Likewise.
- * sysdeps/hppa/elf/start.S: Likewise.
- * sysdeps/i386/elf/start.S: Likewise.
- * sysdeps/i386/i686/hp-timing.c: Likewise.
- * sysdeps/ia64/elf/initfini.c: Likewise.
- * sysdeps/ia64/elf/start.S: Likewise.
- * sysdeps/ia64/hp-timing.c: Likewise.
- * sysdeps/m68k/elf/start.S: Likewise.
- * sysdeps/mach/start.c: Likewise.
- * sysdeps/mips/elf/start.S: Likewise.
- * sysdeps/powerpc/powerpc32/elf/start.S: Likewise.
- * sysdeps/powerpc/powerpc64/elf/start.S: Likewise.
- * sysdeps/s390/s390-32/elf/start.S: Likewise.
- * sysdeps/s390/s390-32/initfini.c: Likewise.
- * sysdeps/s390/s390-64/elf/start.S: Likewise.
- * sysdeps/s390/s390-64/initfini.c: Likewise.
- * sysdeps/sh/elf/initfini.c: Likewise.
- * sysdeps/sh/elf/start.S: Likewise.
- * sysdeps/sparc/sparc32/elf/start.S: Likewise.
- * sysdeps/sparc/sparc32/sparcv9/hp-timing.c: Likewise.
- * sysdeps/sparc/sparc64/elf/start.S: Likewise.
- * sysdeps/sparc/sparc64/hp-timing.c: Likewise.
- * sysdeps/standalone/i386/start.S: Likewise.
- * sysdeps/standalone/i960/start.S: Likewise.
- * sysdeps/standalone/m68k/m68020/start.S: Likewise.
- * sysdeps/unix/arm/start.c: Likewise.
- * sysdeps/unix/bsd/osf/alpha/start.S: Likewise.
- * sysdeps/unix/bsd/ultrix4/mips/start.S: Likewise.
- * sysdeps/unix/sparc/start.c: Likewise.
- * sysdeps/unix/start.c: Likewise.
- * sysdeps/unix/sysv/aix/start.s: Likewise.
- * sysdeps/unix/sysv/irix4/start.c: Likewise.
- * sysdeps/x86_64/elf/initfini.c: Likewise.
- * sysdeps/x86_64/elf/start.S: Likewise.
-
-2004-08-15 Roland McGrath <roland@redhat.com>
-
- [BZ #227]
- * sysdeps/unix/sysv/linux/kernel-features.h
- (__ASSUME_BRK_PAGE_ROUNDED): New macro.
- * sysdeps/unix/sysv/linux/dl-sysdep.c (frob_brk)
- [! __ASSUME_BRK_PAGE_ROUNDED]: Adjust the break up if it falls within
- the partial page after the dynamic linker's own data segment.
-
-2004-08-15 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/posix/getaddrinfo.c (gaih_inet): Optimize generation of
- v4-mapped addresses a bit.
- (gethosts): Move alloca out of macro, so that it is done only once.
-
- * sysdeps/posix/getaddrinfo.c (gaih_addrtuple): Change type of
- addr to avoid casts.
- (gethosts): Removed.
- (gethosts2): Renamed to gethosts. Make it usable for family !=
- AF_UNSPEC. Fix AI_V4MAPPED.
- (gaih_inet): Remove use of old gethosts. Always use what used to be
- gethosts2. If entry is found, try to use the same NSS module's
- getcanonname_r function. Use gethostbyaddr for AI_CANONNAME only
- if getcanonname_r was not available. Fix filtering of AI_V4MAPPED
- addresses. Numerous cleanups.
- * resolv/nss_dns/dns-canon.c: New file.
- * resolv/Makefile (libnss_dns-routines): Add dns-canon.
- * resolv/Versions (libnss_dns): Add _nss_dns_getcanonname_r.
-
- * elf/Makefile: Add rules to build and run tst-dlopenrpath.
- * elf/tst-dlopenrpath.c: New file.
- * elf/tst-dlopenrpathmod.c: New file.
-
- * intl/tst-gettext.sh: Adjust for change of de.po file to UTF-8.
- * intl/tst-gettext.c: Likewise.
-
- * nss/getent.c (ahosts_keys_int): Correctly print IPv6 addresses.
-
- * nss/getent.c: Allow queries for getaddrinfo with AF_INET and
- AF_INET6.
-
-2004-08-14 Ulrich Drepper <drepper@redhat.com>
-
- * po/de.po: Update from translation team.
-
-2004-08-14 Roland McGrath <roland@frob.com>
-
- * sysdeps/mach/configure: Regenerated.
- * sysdeps/mach/hurd/configure: Regenerated.
-
-2004-08-14 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/posix/getaddrinfo.c (getaddrinfo): If RFC3484 sorting is
- performed, make sure it is still the first entry after sorting
- that has the ai_canonname information.
-
- * sysdeps/posix/getaddrinfo.c (gaih_inet): Really set ai_canonname
- only in one entry.
-
-2004-08-13 Daniel Jacobowitz <dan@debian.org>
-
- * scripts/output-format.sed: Handle default case of three-argument
- OUTPUT_FORMAT.
-
- * sysdeps/arm/machine-gmon.h (mcount_internal): Mark as
- __attribute_used__.
-
-2004-08-13 Ulrich Drepper <drepper@redhat.com>
-
- * nss/getent.c (ahosts_keys): ai_canonname is NULL for all but the
- first returned entry. Print name only if not NULL.
-
- * nis/nss_nis/nis-netgrp.c: Remove locking by using data in struct
- __netgrent object passed in instead of global variables.
- Optimize.
- * nis/nss_nisplus/nisplus-netgrp.c: Remove locking by using data
- in struct __netgrent object passed in instead of global variables.
- * inet/netgroup.h (struct __netgrent): Add service_user field.
- Move cursor in anonymous union, add new field location to that
- union.
- * inet/getnetgrent_r.c: Extensive rewrite to really enable
- concurrent uset of set/get/endnetgrent and innetgr.
- Reported by Chuck Simmons.
-
- * inet/netgroup.h (struct name_list): Replace name pointer with
- zero-sized array.
- * inet/getnetgrent_r.c: Adjust code for change in name_list
- layout. Numerous strdup and free calls removed.
-
- * elf/sprof.c (read_symbols): When comparing aliases, prefer
- non-hidden over hidden symbols and strong over weak symbols
- if both don't start with '_'.
-
- * malloc/malloc.c: Use strong_alias instead of weak_alias wherever
- possible.
-
-2004-08-12 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/unix/sysv/linux/bits/resource.h: Define non-standard
- RUSAGE_ enums as __RUSAGE_ and adjust macros accordingly.
- * sysdeps/unix/sysv/linux/alpha/bits/resource.h: Likewise.
- * sysdeps/unix/sysv/linux/sparc/bits/resource.h: Likewise.
- * sysdeps/unix/sysv/linux/mips/bits/resource.h: Likewise.
- Define non-standard RLIMIT_ enums as __RLIMIT_ and adjust macros
- accordingly.
-
-2004-08-11 Andreas Schwab <schwab@suse.de>
-
- * resolv/res_libc.c: Move definition of __res_initstamp ...
- * resolv/res_init.c: ... here.
-
-2004-08-10 GOTO Masanori <gotom@debian.or.jp>
-
- * locale/C-time.c: Change default ERA value from NULL to "".
- * locale/tst-C-locale.c: Add test case for ERA keywords.
-
-2004-08-12 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/unix/sysv/linux/bits/resource.h: Define non-standard
- RLIMIT__ enums as __RLIMIT_ and adjust macros accordingly.
- * sysdeps/unix/sysv/linux/alpha/bits/resource.h: Likewise.
- * sysdeps/unix/sysv/linux/sparc/bits/resource.h: Likewise.
-
-2004-08-12 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/unix/sysv/linux/bits/resource.h (RLIMIT_SIGPENDING,
- RLIMIT_MSGQUEUE): Add.
- (RLIMIT_NLIMITS, RLIM_NLIMITS): Adjust.
- * sysdeps/unix/sysv/linux/alpha/bits/resource.h (RLIMIT_SIGPENDING,
- RLIMIT_MSGQUEUE, RLIMIT_NLIMITS): Add.
- (RLIM_NLIMITS): Adjust.
- * sysdeps/unix/sysv/linux/sparc/bits/resource.h (RLIMIT_SIGPENDING,
- RLIMIT_MSGQUEUE, RLIMIT_NLIMITS): Add.
- (RLIM_NLIMITS): Adjust.
- * sysdeps/unix/sysv/linux/mips/bits/resource.h (RLIMIT_SIGPENDING,
- RLIMIT_MSGQUEUE, RLIMIT_NLIMITS): Add.
- (RLIM_NLIMITS): Adjust.
-
-2004-08-12 Jakub Jelinek <jakub@redhat.com>
-
- * resolv/res_query.c (__libc_res_nsearch): Protect the debugging
- printf with #ifdef DEBUG and RES_DEBUG check.
-
- * sysdeps/unix/sysv/linux/bits/shm.h: Move __END_DECLS after
- __USE_MISC #endif.
- * sysdeps/generic/bits/shm.h: Add __BEGIN_DECLS for __getpagesize
- declaration.
- * sysdeps/gnu/bits/shm.h: Likewise.
- * sysdeps/unix/sysv/linux/alpha/bits/shm.h: Likewise.
- * sysdeps/unix/sysv/linux/powerpc/bits/shm.h: Likewise.
- * sysdeps/unix/sysv/linux/s390/bits/shm.h: Likewise.
- * sysdeps/unix/sysv/linux/sparc/bits/shm.h: Likewise.
- * sysdeps/unix/sysv/linux/x86_64/bits/shm.h: Likewise.
-
-2004-08-12 Ulrich Drepper <drepper@redhat.com>
-
- * po/ca.po: Update from translation team.
-
-2004-08-11 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/unix/sysv/linux/bits/shm.h: Add __BEGIN_DECLS for
- __getpagesize declaration.
-
-2004-08-11 Roland McGrath <roland@redhat.com>
-
- * configure.in (libc_cv_cpp_asm_debuginfo): Add missing braces around
- commands inside &&.
- Reported by Andreas Schwab <schwab@suse.de>.
- * configure: Regenerated.
-
- * posix/tst-waitid.c (do_test): Ignore SIGCHLD before cleanup SIGKILL.
-
-2004-08-11 Ulrich Drepper <drepper@redhat.com>
-
- * time/tzset.c (tzset_internal): Add new parameter which is
- nonzero if called through tzset. Use TZDEFAULT name including
- name comparison if the new parameter is zero. This means implicit
- tzset calls will not cause files to be opened and read by tzfile.c
- all the time.
-
-2004-08-11 Jakub Jelinek <jakub@redhat.com>
-
- * rt/tst-timer2.c (do_test): If timer_create fails, just continue.
- * rt/tst-timer4.c (do_test): If one of the timer_create calls fails,
- return 1 immediately.
-
- * time/tzfile.c (__tzfile_read): Free transitions only if it will
- not be reused.
-
- * sysdeps/ieee754/dbl-64/mpa.c: Include <sys/param.h>.
- * sysdeps/ieee754/dbl-64/mpa.h (MAX, MIN): Macros removed.
-
- * stdio-common/tst-popen.c: Include <string.h>.
-
- * resolv/res_send.c (__libc_res_nsend): Only define TMPBUF #if DEBUG.
-
- * sysdeps/pthread/aio_misc.c (handle_fildes_io): Remove noreturn
- attribute. Return NULL instead of calling pthread_exit at the end.
-
-2004-08-11 Roland McGrath <roland@redhat.com>
-
- * iconvdata/testdata/ISO-2022-JP-3: Regenerated.
-
-2004-07-23 Jakub Jelinek <jakub@redhat.com>
-
- [BZ #284]
- * include/features.h (_POSIX_SOURCE, _POSIX_C_SOURCE): Define
- if _XOPEN_SOURCE >= 500 even if __STRICT_ANSI__ is defined.
-
-2004-08-10 Alfred M. Szmidt <ams@kemisten.nu>
-
- * sysdeps/generic/bits/in.h (struct ip_mreq): Remove definition.
-
-2004-08-10 Jakub Jelinek <jakub@redhat.com>
-
- * libio/bits/stdio.h (fread_unlocked): Cast 0 to (size_t).
- (fwrite_unlocked): When checking if size * n is <= 8, cast each
- argument to size_t individually. Cast n to (void) instead of
- (size_t), surround with (), return (size_t) 0 if one of n or size
- is 0. [BZ #316]
- * stdio-common/Makefile (tests): Add tst-unlockedio.
- * stdio-common/tst-unlockedio.c: New test.
-
-2004-08-09 Roland McGrath <roland@frob.com>
-
- * manual/install.texi (Supported Configurations): Replace bug-glibc
- mention with web URL.
- * INSTALL: Regenerated.
- * locale/iso-3166.def: Likewise, in comment.
- * locale/iso-4217.def: Likewise.
- * locale/iso-639.def: Likewise.
- * posix/cpio.h: Remove bug reporting comment.
-
-2004-08-09 Ulrich Drepper <drepper@redhat.com>
-
- * libio/bits/stdio.h (fread_unlocked): Add a couple of (size_t)
- casts to handle funny calls with floating point argument values
- and signed values correctly and without warning.
- (fwrite_unlocked): Likewise. [BZ #309]
-
- * malloc/memusage.c (me): Use creat64, not creat.
- * malloc/memusagestat.c: Fix handling of very large sizes. [BZ #285]
- Patch by Guy Maor <guymaor@yahoo.com>.
-
- * elf/ldconfig.c (options): Mark parameter option names as
- translatable. [BZ #253] Patch by Jakub Bogusz <qboosh@pld-linux.org>.
-
- * iconv/gconv_charset.h (strip): Also allow comma which is what is
- used to separate options. [BZ #194]
-
-2004-08-09 Roland McGrath <roland@redhat.com>
-
- * FAQ.in: Refer to web pages instead of bug-glibc.
- * FAQ: Regenerated.
-
- * time/strptime_l.c: #include <stdbool.h>, `bool' used in last change.
-
-2004-08-09 Ulrich Drepper <drepper@redhat.com>
-
- * time/tzset.c (tzset_internal): If TZ is not set do not compare
- old and new tz value since it might be /etc/localtime in both
- cases although the file changed. [BZ #154]
- Patch by Christian Franke <franke@computer.org>.
-
- * time/tzfile.c (__tzfile_read): Determine dev/ino of file.
- Compare with values of previously opened file. Don't do anything
- is they match.
-
-2004-08-08 Ulrich Drepper <drepper@redhat.com>
-
- * elf/dl-load.c (_dl_map_object): If __RTLD_CALLMAP flag is set,
- reset loader before the actual loading.
- * elf/dl-open.c (dl_open_worker): If file name contains no path
- element determine map of caller. Pass caller map in this case to
- _dl_map_object. Set __RTLD_CALLMAP in mode.
- * include/dlfcn.h (__RTLD_CALLMAP): Define. [BZ #116]
- Patch by Greg Wolodkin <greg@mathworks.com>.
-
- * misc/syslog.c (openlog_internal): Always try both UDP and TCP.
- [BZ #108] Patch mainly by Bjorn Andersson <bjorn@iki.fi>.
-
- * configure.in: Also recognize i786. [BZ #106]
- Patch by <pluto@pld-linux.org>.
-
- * resolv/res_query.c (__libc_res_nsearch): Correctly test whether
- name contains any dots. [BZ #95]
-
- * resolv/res_send.c: Compiling with DEBUG defined works again.
- * resolv/gethnamaddr.c (dprintf): Renamed to Dprintf. Adjust all
- callers.
-
- * resolv/tst-leaks.c (TIMEOUT): Define so that if no server is
- available the process is not killed. [BZ #41]
-
- * intl/tst-gettext.c (main): Improve some messages. [BZ #33]
-
- * time/strptime_l.c (__strptime_internal): Fix handling of %Ey.
- [BZ #28]
-
- * po/sv.po: Update from translation team.
-
-2004-08-07 Ulrich Drepper <drepper@redhat.com>
-
- * inet/netinet/in.h: Add more const to the setipv4soucefilter,
- getsourcefilter, and setsourcefilter parameter list.
- * sysdeps/generic/setipv4sourcefilter.c: Likewise.
- * sysdeps/generic/getsourcefilter.c: Likewise.
- * sysdeps/generic/setsourcefilter.c: Likewise.
- * sysdeps/unix/sysv/linux/setipv4sourcefilter.c: Likewise.
- * sysdeps/unix/sysv/linux/getsourcefilter.c: Likewise.
- * sysdeps/unix/sysv/linux/setsourcefilter.c: Likewise.
-
- * po/tr.po: Update from translation team.
-
-2004-08-06 Ulrich Drepper <drepper@redhat.com>
-
- * iconvdata/jisx0213.h (jisx0213_added_in_2004_p): Fix typo.
- Reported by Paolo Bonzini.
-
-2004-08-06 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/ia64/dl-machine.h (elf_machine_fixup_plt): Add
- always_inline.
- * sysdeps/powerpc/powerpc64/dl-machine.h (elf_machine_runtime_setup,
- elf_machine_fixup_plt, elf_machine_plt_conflict): Likewise.
-
- * sysdeps/unix/sysv/linux/netatalk/at.h: Include bits/sockaddr.h
- before including linux/atalk.h.
-
- * resolv/res_libc.c: Include atomic.h.
-
- * intl/finddomain.c (free_mem): Rename to...
- (_nl_finddomain_subfreeres): ... this. Add
- __libc_freeres_fn_section.
- * intl/loadmsgcat.c (_nl_unload_domain): Add
- __libc_freeres_fn_section.
- * intl/gettextP.h (_nl_unload_domain): Move into #ifdef _LIBC.
- Add attribute_hidden.
- (_nl_findomain_subfreeres): New prototype.
- * iconv/gconv_db.c (free_mem): Call _nl_findomain_subfreeres.
-
-2004-07-30 Guido Guenther <agx@sigxcpu.org>
-
- * nss/getent.c (passwd_keys): Use strtoul instead of isdigit to
- test if the key is numeric or not.
- (group_keys): Likewise.
-
-2004-08-05 Ulrich Drepper <drepper@redhat.com>
-
- * inet/netinet/in.h: Define struct ip_msfilter, IP_MSFILTER_SIZE,
- struct group_filter, and GROUP_FILTER_SIZE.
- * include/sys/socket.h: Declare __getsockopt.
- * sysdeps/unix/sysv/linux/setipv4sourcefilter.c: New file.
- * sysdeps/unix/sysv/linux/getipv4sourcefilter.c: New file.
- * sysdeps/unix/sysv/linux/setsourcefilter.c: New file.
- * sysdeps/unix/sysv/linux/getsourcefilter.c: New file. [BZ #211]
-
- * po/ko.po: Update from translation team.
-
-2004-08-04 Jakub Jelinek <jakub@redhat.com>
-
- * hesiod/hesiod.c (__hesiod_res_get): Use calloc instead of malloc +
- memset.
- (__hesiod_res_set): Free nsaddrs.
-
- * include/resolv.h (__res_maybe_init): Add prototype.
- * resolv/resolv.h (struct __res_state): Add _u._ext.initstamp field.
- * resolv/Versions (libc): Add __res_maybe_init@@GLIBC_PRIVATE.
- * resolv/res_libc.c (__res_initstamp, lock): New variables.
- (res_init): Increase __res_initstamp.
- (__res_maybe_init): New function.
- * resolv/res_init.c (__res_vinit): Initialize _u._ext.initstamp.
- * hesiod/hesiod.c (__hesiod_res_get): Use __res_maybe_init instead
- of RES_INIT check and {res_ninit,__res_ninit,res_init} call.
- * sysdeps/posix/getaddrinfo.c (gaih_inet): Likewise.
- * resolv/nss_dns/dns-host.c (_nss_dns_gethostbyname2_r,
- _nss_dns_gethostbyaddr_r): Likewise.
- * resolv/nss_dns/dns-network.c (_nss_dns_getnetbyname_r,
- _nss_dns_getnetbyaddr_r): Likewise.
- * resolv/gethnamaddr.c (gethostbyname, gethostbyname2,
- gethostbyaddr): Likewise.
- * resolv/res_data.c (fp_nquery, res_mkquery, res_mkupdate,
- res_isourserver, res_sendsigned, res_update, res_search,
- res_querydomain): Likewise.
- * nss/getXXbyYY_r.c (INTERNAL (REENTRANT_NAME)): Likewise.
- * nss/digits_dots.c (__nss_hostname_digits_dots): Likewise.
- * nss/getnssent_r.c (__nss_setent, __nss_endent, __nss_getent_r):
- Likewise.
-
-2004-08-05 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/posix/getaddrinfo.c (gaih_inet): Set ai_family for
- V4-mapped IPv6 addresses and req->ai_family==AF_INET.
- Reported by A. Guru <a.guru@sympatico.ca>.
-
- * po/sv.po: Update from translation team.
- * po/sk.po: Likewise.
- * po/pl.po: Likewise.
-
-2004-08-04 Jakub Jelinek <jakub@redhat.com>
- Ulrich Drepper <drepper@redhat.com>
-
- * wcsmbs/mbsrtowcs_l.c (__mbsrtowcs_l): Don't read more input
- characters than necessary.
-
-2004-08-05 Ulrich Drepper <drepper@redhat.com>
-
- * wcsmbs/Makefile (tests): Add tst-mbsrtowcs.
- * wcsmbs/tst-mbsrtowcs.c: New file.
-
- * po/fr.po: Update from translation team.
-
-2004-08-04 Jakub Jelinek <jakub@redhat.com>
-
- * resolv/inet_pton.c (inet_pton4): Disallow octal numbers. Reported
- by A. Guru <a.guru@sympatico.ca>. [BZ #295]
-
-2004-08-05 Ulrich Drepper <drepper@redhat.com>
-
- * po/nl.po: Update from translation team.
-
- * sysdeps/posix/getaddrinfo.c (gaih_inet): Recognize all the IPv4
- numeric address formats inet_addr knows.
- (getaddrinfo): Allow AI_NUMERICSERV flag.
- If neither IPv4 nor IPv6 interface is present we cannot make any
- decision for AI_ADDRCONFIG. Fail if AI_NUMERICSERV is set and the
- string is not just a number. Remove useless freeaddrinfo call.
- * resolv/netdb.h (AI_NUMERICSERV): Define.
- Based on a patch by a.guru@sympatico.ca.
-
-2004-08-04 Jakub Jelinek <jakub@redhat.com>
-
- * stdlib/strfmon_l.c (__vstrfmon_l): Memset whole info structure
- instead of trying to initialize some, but not all, fields one by
- one.
- * stdio-common/printf_size.c (printf_size): Initialize fb_info
- structure with *info instead of trying to initialize some, but not
- all, fields from it.
-
- * nscd/connections.c (handle_request): Check if req->type is in
- LASTDBREQ .. LASTREQ range instead of req.
-
- * locale/programs/linereader.c (lr_create): Initialize
- lr->return_widestr to 0.
-
- * elf/dl-close.c (free_slotinfo): Add __libc_freeres_fn_section.
- (free_mem): Call free_slotinfo just once.
-
- * stdio-common/tst-fmemopen.c (main): Check for MAP_FAILED instead
- of NULL.
-
- * locale/localeinfo.h (_nl_locale_subfreeres): New prototype.
- * locale/setlocale.c (free_category): Add __libc_freeres_fn_section.
- (free_mem): Rename to _nl_locale_subfreeres.
- * iconv/gconv_db.c: Include locale/localeinfo.h.
- (free_derivation, free_modules_db): Add __libc_freeres_fn_section.
- (free_mem): Call _nl_locale_subfreeres.
- * iconv/gconv_dl.c (do_release_all): Add __libc_freeres_fn_section.
-
-2004-08-04 Roland McGrath <roland@frob.com>
-
- * Makeconfig ($(common-objpfx)config.status):
- Fix typo: $(add_ons) -> $(add-ons).
- (Makeconfig-add-on): New variable. When doing $(sysdep-makeconfigs)
- include, use black magic to get it set to an add-on's name during the
- include of the add-on's Makeconfig.
-
- * configure.in: Use variable name `libc_add_on' when sourcing add-on
- configure fragments, so they can refer to this.
- * configure: Regenerated.
-
-2004-08-04 Roland McGrath <roland@redhat.com>
-
- * posix/tst-waitid.c (test_child): Sleep a second before stopping.
- (do_test): Bump sleep to three seconds.
- (sigchld, check_sigchld): New functions.
- (do_test): Handle SIGCHLD and check for getting the right details.
-
- * posix/tst-waitid.c (do_test): Kill the child process when bailing
- out early on some failure.
- [WCONTINUED]: Test WCONTINUED functionality.
-
-2004-08-03 Ulrich Drepper <drepper@redhat.com>
-
- * nscd/connections.c (handle_request): Print more descriptive
- message for invalid request types.
-
-2004-08-02 Jakub Jelinek <jakub@redhat.com>
-
- * iconvdata/ibm932.c (BODY): Avoid binary search for ch >= 0xffff.
- Always treat high as highest number in range + 1.
- * iconvdata/ibm943.c (BODY): Likewise.
-
-2004-07-31 Bruno Haible <bruno@clisp.org>
-
- * iconvdata/JISX0213.TXT: Updated to JISX0213 plane 1 version 2004.
- * iconvdata/jisx0213.c (__jisx0213_to_ucs_main,
- __jisx0213_to_ucs_pagestart, __jisx0213_from_ucs_level1,
- __jisx0213_from_ucs_level2): Regenerated.
- * iconvdata/jisx0213.h (jisx0213_added_in_2004_p): New function.
- * iconvdata/iso-2022-jp-3.c (JISX0213_1_2000_set): Renamed from
- JISX0213_1_set.
- (JISX0213_1_2004_set): New enum value.
- (BODY for FROM_LOOP): Treat ESC $ ( Q like ESC $ ( O.
- (BODY for TO_LOOP): For JISX 0213 plane 1 characters, emit ESC $ ( O
- when possible, ESC $ ( Q when needed.
- * iconvdata/testdata/EUC-JISX0213: Add the 10 new characters.
- * iconvdata/testdata/EUC-JISX0213..UTF8: Update.
- * iconvdata/testdata/SHIFT_JISX0213: Add the 10 new characters.
- * iconvdata/testdata/SHIFT_JISX0213..UTF8: Update.
- * iconvdata/testdata/ISO-2022-JP-3: Add the 10 new JISX0213 characters.
- * iconvdata/testdata/ISO-2022-JP-3..UTF8: Update.
-
-2004-07-22 Bruno Haible <bruno@clisp.org>
-
- * iconvdata/gconv-modules (ISO-8859-7): Add alias ISO_8859-7:2003.
-
-2004-07-29 David S. Miller <davem@redhat.com>
-
- * sysdeps/sparc/sparc64/sparcv9b/memcpy.S (memcpy): Optimize
- better for smaller than 256 byte copies. Also, use only one
- unrolled loop instead of two for the large copy case.
-
-2004-07-30 Richard Henderson <rth@redhat.com>
-
- * sysdeps/alpha/divq.S: Save t3 before it gets clobbered.
- * sysdeps/alpha/remq.S: Likewise.
- * sysdeps/alpha/div.S, sysdeps/alpha/ldiv.S: Rewrite with the
- new division algorithms in divl.S and divq.S respectively.
-
-2004-07-28 GOTO Masanori <gotom@debian.or.jp>
-
- * timezone/asia: Update from tzdata2004b.
+ [BZ #736]
+ * timezone/asia: Update from tzdata2005c.
* timezone/backward: Likewise.
* timezone/europe: Likewise.
- * timezone/iso3166.tab: Likewise.
* timezone/leapseconds: Likewise.
* timezone/northamerica: Likewise.
* timezone/southamerica: Likewise.
- * timezone/zone.tab: Likewise.
- * timezone/private.h: Update from tzcode2004b.
+ [BZ #736]
+ * timezone/private.h: Update from tzcode2005c.
+ * timezone/tzfile.h: Likewise.
+ * timezone/zdump.c: Likewise.
* timezone/zic.c: Likewise.
-2004-07-27 Ulrich Drepper <drepper@redhat.com>
-
- * nscd/grpcache.c (cache_addgr): If necessary, add entry also
- under the name the user provided.
- * nscd/pwdcache.c (cache_addpw): Likewise.
-
-2004-07-26 Roland McGrath <roland@redhat.com>
-
- * sysdeps/posix/waitid.c [WEXITED]: Clear WEXITED bit in OPTIONS for
- call to __waitpid.
-
-2004-07-26 Ulrich Drepper <drepper@redhat.com>
-
- * nscd/hstcache.c (cache_addhst): Fix two scenarios which lead to
- memory leaks.
-
- * sysdeps/unix/sysv/linux/ifreq.c (__ifreq): Assign pointer for
- new buffer at the right time.
- Reported by Jakub Bogusz <qboosh@pld-linux.org>.
-
-2004-07-25 Ulrich Drepper <drepper@redhat.com>
-
- * inet/Versions [libc, GLIBC_2.3.4]: Add getipv4sourcefilter,
- getsourcefilter, setipv4sourcefilter, and setsourcefilter.
- * inet/Makefile (routines): Likewise.
- * inet/netinet/in.h: Add prototypes for getipv4sourcefilter,
- getsourcefilter, setipv4sourcefilter, and setsourcefilter.
- * sysdeps/generic/getipv4sourcefilter.c: New file.
- * sysdeps/generic/setipv4sourcefilter.c: New file.
- * sysdeps/generic/getsourcefilter.c: New file.
- * sysdeps/generic/setsourcefilter.c: New file.
-
-2004-07-17 Steven Munroe <sjmunroe@us.ibm.com>
-
- * sysdeps/powerpc/powerpc64/memcpy.S: Improve instruction scheduling
- for POWER4 machines.
-
-2004-07-21 Jakub Jelinek <jakub@redhat.com>
-
- [BZ #274]
- * stdlib/strtod_l.c (INTERNAL (__STRTOF)): Fix used >=
- BITS_PER_MP_LIMB shifting up.
- * stdlib/tst-strtod.c (main): Add new tests.
-
-2004-07-23 Jakub Jelinek <jakub@redhat.com>
-
- [BZ #282]
- * libio/iopopen.c (_IO_new_popen): Use _IO_init instead of
- _IO_no_init. Remove wd from struct locked_FILE.
- (_IO_wproc_jumps): Remove.
- Reported by Andrew Josey <a.josey@opengroup.org>.
- * stdio-common/Makefile (tests): Add tst-popen.
- * stdio-common/tst-popen.c: New test.
-
-2004-07-23 Ulrich Drepper <drepper@redhat.com>
-
- * posix/bits/posix1_lim.h: Fix values for _POSIX_CHILD_MAX and
- _POSIX_OPEN_MAX. Add _POSIX_HOST_NAME_MAX, _POSIX_SYMLINK_MAX,
- _POSIX_SYMLOOP_MAX, and _POSIX_RE_DUP_MAX.
- Reported by Andrew Josey.
-
- * include/features.h: Document _POSIX_C_SOURCE == 200112L.
-
- * grp/grp.h: Define gid_t if __USE_XOPEN2K is defined.
- * pwd/pwd.h: Define uid_t and gid_t if __USE_XOPEN2K is defined.
- * io/sys/stat.h: Define dev_t, gid_t, ino_t, mode_t, nlink_t,
- off_t, time_t, and uid_t if __USE_XOPEN2K is defined.
- * signal/signal.h: Define pid_t if __USE_XOPEN2K is defined.
- * posix/unistd.h: Define gid_t, off_t, pid_t, uid_t, and
- useconds_t if __USE_XOPEN2K is defined.
- * io/utime.h: Define time_t if __USE_XOPEN2K is defined.
- * libio/stdio.h: Declare fseeko and ftello if __USE_XOPEN2K is defined.
-
-2004-07-19 Thorsten Kukuk <kukuk@suse.de>
-
- * nis/nss_compat/compat-initgroups.c (getgrent_next_nss): Don't
- allocate memory for large temporary variables with alloca.
-
-2004-07-22 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext.S: Compatibility
- code must have version GLIBC_2.0. Patch by Dwayne McConnell.
-
- * nscd/nscd_getgr_r.c (nscd_getgr_r): Avoid read call with NULL
- pointer and zero length.
-
-2004-07-21 Ulrich Drepper <drepper@redhat.com>
-
- * nscd/cache.c (prune_cache): Print correct list when debugging.
-
-2004-07-21 Jakub Jelinek <jakub@redhat.com>
-
- * resolv/res_libc.c (res_init): If RES_INIT is set and
- _res.nscount > 0, call __res_nclose and free nsaddrs.
- * resolv/Makefile: Add rules to build and run tst-leaks2.
- * resolv/tst-leaks2.c: New test.
+2005-01-24 Ulrich Drepper <drepper@redhat.com>
-2004-07-21 Ulrich Drepper <drepper@redhat.com>
+ [BZ #671]
+ * misc/syslog.c: Moved to...
+ * sysdeps/generic/syslog.c: ...here.
+ [NO_SIGIPE]: Don't install SIGPIPE handler.
+ * sysdeps/unix/sysv/linux/syslog.c: New file.
+ * sysdeps/unix/sysv/linux/kernel-features.h: Define
+ __ASSUME_MSG_NOSIGNAL.
- * intl/libintl.h: Don't define macros for C++.
- Patch by Goto Masanori.
+2005-01-22 Ulrich Drepper <drepper@redhat.com>
-2004-07-22 GOTO Masanori <gotom@debian.or.jp>
+ * posix/Makefile: Use CFLAGS-*.os instead of CFLAGS-*.c for frame
+ pointer option.
+ * stdlib/Makefile (CFLAGS-system.os): Use this instead of
+ CFLAGS-system.c for frame pointer option.
- [BZ #276]
- * include/arpa/inet.h: Change inet_aton type from in_addr_t to int.
- * inet/arpa/inet.h: Likewise.
- * resolv/inet_addr.c: Likewise.
+2005-01-21 Jakub Jelinek <jakub@redhat.com>
-2004-07-21 Alexandre Oliva <aoliva@redhat.com>
+ [BZ #735]
+ * elf/Makefile: Add rules to build and run tst-align2.
+ * elf/tst-align2.c: New test.
+ * elf/tst-alignmod2.c: New file.
+ * sysdeps/powerpc/tst-stack-align.h: New file.
+ * sysdeps/i386/dl-machine.h (RTLD_START): Align stack and clear frame
+ pointer before calling _dl_init.
+ * sysdeps/x86_64/dl-machine.h (RTLD_START): Likewise.
- * sysdeps/unix/sysv/linux/mips/mips64/syscalls.list: Add semtimedop.
+2005-01-20 Ulrich Drepper <drepper@redhat.com>
-2004-07-20 Roland McGrath <roland@redhat.com>
+ * posix/execl.c: Do not allocate potentially large buffers on the
+ stack.
+ * posix/execle.c: Likewise.
+ * posix/execlp.c: Likewise.
+ * posix/execlp.c: Likewise.
+ (script_execute): Removed.
+ (allocate_scripts_argv): New function. Called at most once to
+ allocate memory, not every time a script is run. Adjust caller.
- * configure.in (add_ons): Substitute this. Move $add_ons handling
- after AC_CANONICAL_HOST and default setting of $machine et al.
- Don't set $subdirs from $add_ons.
- Instead, source add-on/configure fragments early on.
- (base_machine): If it's already set, don't set it based on $machine.
- (libc_config_ok): New variable, set to no. If an add-on fragment sets
- it to yes, skip the tuple sanity check as if --enable-hacker-mode.
- (sysnames): Try appending add-on names after machine as well.
- * config.make.in (add-ons): Set from @add_ons@ instead of @subdirs@.
- * Makeconfig ($(common-objpfx)config.status): Also depend on configure
- files in $(add_ons) dirs.
+ * sysdeps/generic/wordexp.c (exec_comm): Add a few
+ TEMP_FAILURE_RETRY. Reorganize code to avoid multiple calls to
+ exec_comm_child.
+ (exec_comm_child): Can now be inlined.
- * sysdeps/unix/sysv/linux/bits/in.h (struct ip_mreq): Remove
- definition, now in netinet/in.h proper.
+ * posix/Makefile: Add -fomit-frame-pointer for a few more files.
+ * stdlib/Makefile: Likewise.
-2004-07-20 Alexandre Oliva <aoliva@redhat.com>
+2005-01-19 Roland McGrath <roland@redhat.com>
- * sysdeps/unix/sysv/linux/mips/Makefile ($(objpfx)syscall-%.h):
- Sort by syscalls. Make sure we get headers such as sgidefs.h from
- the build tree before just-installed ones.
+ [BZ #681]
+ * sunrpc/openchild.c (_openchild): Use NULL instead of 0 for trailing
+ argument to execlp.
+ Reported by Marcus Meissner <meissner@suse.de>.
- * sysdeps/mips/atomicity.h: Use standard names for ABI macros,
- include sgidefs.h where appropriate.
- * sysdeps/mips/dl-machine.h: Likewise.
- * sysdeps/mips/machine-gmon.h: Likewise.
- * sysdeps/mips/bits/setjmp.h: Likewise.
- * sysdeps/mips/fpu/bits/mathdef.h: Likewise.
- * sysdeps/mips/mips64/__longjmp.c: Likewise.
- * sysdeps/mips/mips64/setjmp_aux.c: Likewise.
- * sysdeps/unix/sysv/linux/mips/kernel_stat.h: Likewise.
- * sysdeps/unix/sysv/linux/mips/pread.c: Likewise.
- * sysdeps/unix/sysv/linux/mips/pread64.c: Likewise.
- * sysdeps/unix/sysv/linux/mips/ptrace.c: Likewise.
- * sysdeps/unix/sysv/linux/mips/pwrite.c: Likewise.
- * sysdeps/unix/sysv/linux/mips/pwrite64.c: Likewise.
- * sysdeps/unix/sysv/linux/mips/sigaction.c: Likewise.
- * sysdeps/unix/sysv/linux/mips/sigcontextinfo.h: Likewise.
- * sysdeps/unix/sysv/linux/mips/bits/fcntl.h: Likewise.
- * sysdeps/unix/sysv/linux/mips/bits/sigcontext.h: Likewise.
- * sysdeps/unix/sysv/linux/mips/bits/stat.h: Likewise.
- * sysdeps/unix/sysv/linux/mips/sys/procfs.h: Likewise.
- * sysdeps/unix/sysv/linux/mips/sys/ptrace.h: Likewise.
- * sysdeps/unix/sysv/linux/mips/sys/tas.h: Likewise.
- * sysdeps/unix/sysv/linux/mips/sys/ucontext.h: Likewise.
- * sysdeps/unix/sysv/linux/mips/sys/user.h: Likewise.
- * sysdeps/unix/sysv/linux/mips/Makefile ($(objpfx)syscall-%.h):
- Likewise.
- * sysdeps/unix/sysv/linux/mips/configure.in (asm-unistd.h):
- Likewise.
+2005-01-19 Jakub Jelinek <jakub@redhat.com>
- * sysdeps/mips/dl-machine.h (__dl_runtime_resolve): Update to use
- _dl_lookup_symbol_x.
- (elf_machine_runtime_link_map): Don't INTUSE _dl_signal_error.
+ * hurd/sigunwind.c (_hurdsig_longjmp_from_handler): Fix a typo
+ in assert.
+ * iconv/strtab.c (strtabfinalize): Likewise.
-2004-07-20 Ulrich Drepper <drepper@redhat.com>
+ [BZ #730]
+ * libio/iofopncook.c (_IO_cookie_seekoff): Add prototype.
- * inet/netinet/in.h: Define struct ip_mreq and struct
- ip_mreq_source.
- Define struct group_req and struct group_source_req.
- * sysdeps/unix/sysv/linux/bits/in.h: Define IP_UNBLOCK_SOURCE,
- IP_BLOCK_SOURCE, IP_ADD_SOURCE_MEMBERSHIP,
- IP_DROP_SOURCE_MEMBERSHIP, IP_MSFILTER, MCAST_JOIN_GROUP,
- MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_LEAVE_GROUP,
- MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP, and
- MCAST_MSFILTER.
- Define MCAST_INCLUDE and MCAST_EXCLUDE.
+2005-01-17 Roland McGrath <roland@redhat.com>
- * iconvdata/gconv-modules: Add alias for IBM874 [BZ #244].
+ [BZ #745]
+ * nscd/Makefile (LDLIBS-nscd): New variable.
+ ($(objpfx)nscd): Use that instead of selinux-LIBS.
-2004-07-19 Jakub Jelinek <jakub@redhat.com>
+ [BZ #745]
+ * Makeconfig (link-extra-libs): Define just as $(LDLIBS-$(@F)).
+ (link-extra-libs-static): Define to $(link-extra-libs).
+ (link-extra-libs-bounded): Likewise.
- [BZ #258]
- * math/libm-test.inc (max_value, min_value): New variables.
- (initialize): Initialize them.
- (pow_test): Add a couple of new tests.
- * sysdeps/i386/fpu/e_powf.S (__ieee754_powf): Don't generate invalid
- exception if |y| >= 1U<<31.
- * sysdeps/i386/fpu/e_pow.S (__ieee754_pow): Don't generate invalid
- exception if |y| >= 1L<<63.
- * sysdeps/i386/fpu/e_powl.S (__ieee754_powl): Likewise.
- If y*log2(x) overflows to +-inf, return still +inf/+0 instead of NaN.
- * sysdeps/x86_64/fpu/e_powl.S (__ieee754_powl): Likewise.
+2005-01-16 GOTO Masanori <gotom@debian.or.jp>
-2004-07-18 Ulrich Drepper <drepper@redhat.com>
+ [BZ #734]
+ * sysdeps/unix/rewinddir.c: Reset filepos.
+ * dirent/tst-seekdir.c: Check telldir value after calling rewinddir.
- * nscd/pwdcache.c (cache_addpw): Optimize case of unsuccessful
- lookup a bit.
- * nscd/grpcache.c (cache_addgr): Likewise.
- * nscd/hstcache.c (cache_addhst): Likewise.
+2005-01-14 Ulrich Drepper <drepper@redhat.com>
-2004-07-10 GOTO Masanori <gotom@debian.or.jp>
+ [BZ #731]
+ * stdlib/fmtmsg.c (internal_addseverity): Remove incorrect free call.
+ * stdlib/tst-fmtmsg.c (main): Add another addseverity test.
- * sysdeps/s390/s390-32/elf/start.S: Remove symbol _fp_hw.
- * sysdeps/s390/s390-64/elf/start.S: Likewise.
+2005-01-12 Ulrich Drepper <drepper@redhat.com>
-2004-04-16 Andreas Schwab <schwab@suse.de>
-
- * sysdeps/ia64/bits/atomic.h: Cast first argument of
- __sync_bool_compare_and_swap_si correct to void*.
-
-2004-07-17 Ulrich Drepper <drepper@redhat.com>
-
- * iconv/iconv_prog.c (print_known_names): Make machine-readable
- output even less cluttered.
-
-2004-07-16 Steven Munroe <sjmunroe@us.ibm.com>
-
- [BZ #269]
- * setjmp/Makefile (tests): Add bug269-setjmp.
- * setjmp/bug269-setjmp.c: New file.
- * sysdeps/powerpc/powerpc64/__longjmp-common.S [SHARED && !IS_IN_rtld]:
- Store R2 from jmpbuf in callers TOC save area.
- * sysdeps/powerpc/powerpc64/bsd-_setjmp.S [SHARED && !IS_IN_rtld]:
- Store R2 in TOC save area.
- * sysdeps/powerpc/powerpc64/setjmp-common.S [SHARED && !IS_IN_rtld]:
- Copy TOC save area from previous frame as R2 (TOC) in jmpbuf.
-
-2004-07-16 Jakub Jelinek <jakub@redhat.com>
-
- * locale/newlocale.c: Include bits/libc-lock.h.
- (__libc_setlocale_lock): Extern decl.
- (__newlocale): Use it.
- Reported by Ulrich Weigand <Ulrich.Weigand@de.ibm.com>.
-
-2004-07-15 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/unix/sysv/linux/fcntl.c (__fcntl_nocancel): Remove
- static inline __attribute ((always_inline)). Don't define if
- NO_CANCELLATION.
- (__libc_fcntl): Use INLINE_SYSCALL directly instead of
- __fcntl_nocancel.
- * sysdeps/unix/sysv/linux/powerpc/powerpc64/fcntl.c (__fcntl_nocancel):
- Remove static inline __attribute ((always_inline)). Don't define
- if NO_CANCELLATION.
- (__libc_fcntl): Use INLINE_SYSCALL directly instead of
- __fcntl_nocancel.
- * sysdeps/unix/sysv/linux/i386/fcntl.c (__fcntl_nocancel): Define to
- __libc_fcntl if NO_CANCELLATION and __ASSUME_FCNTL64 == 0.
- Don't define at all if NO_CANCELLATION and __ASSUME_FCNTL64 > 0.
- (__libc_fcntl): Don't define if __fcntl_nocancel is a macro.
-
- [BZ #262]
- * sysdeps/i386/elf/start.S (_start): Use @GOT instead of @GOTOFF
- for main.
- * elf/Makefile: Add rules to build and run tst-pie1.
- * elf/tst-pie1.c: New test.
- * elf/tst-piemod1.c: New file.
-
-2004-07-14 Jakub Jelinek <jakub@redhat.com>
-
- [BZ #266]
- * manual/string.texi (l64a): Note that the static buffer is 7 bytes
- long. Rewrite example code so that it takes into account l64a output
- shorter than 6 characters.
- Reported by Julian Graham <julian.graham@aya.yale.edu>.
-
-2004-07-14 Kaz Kojima <kkojima@rr.iij4u.or.jp>
-
- * sysdeps/sh/dl-machine.h: Don't reset _dl_starting_up here.
- (elf_machine_rela_relative): Remove unused valiable.
-
-2004-07-12 Paul Eggert <eggert@cs.ucla.edu>
-
- [BZ #263]
- * sysdeps/unix/sysv/linux/getloadavg.c (getloadavg): Don't store
- outside the buffer if the read returns 0. __strtod_l can't set
- endp to NULL, so remove a test for that case.
-
-2004-07-12 Roland McGrath <roland@redhat.com>
-
- * manual/signal.texi (Interrupted Primitives): Make clear that
- TEMP_FAILURE_RETRY evaluates its expression as long int and compares
- it to -1 to define "failure".
-
-2004-07-10 Ulrich Drepper <drepper@redhat.com>
-
- * elf/ldconfig.c: Define PROCINFO_CLASS as static before including
- ldsodefs.h.
- * sysdeps/generic/ldsodefs.h: Only define PROCINFO_CLASS if it is not
- already defined.
- * sysdeps/i386/dl-procinfo.c: Define PROCINFO_CALLS only if not
- already defined.
-
- * elf/rtld.c (print_statistics): Mark with noinline attribute.
- * sysdeps/i386/dl-machine.h (elf_machine_rel): Mark with always_inline
- attribute.
- (elf_machine_rel_relative): Likewise.
-
- * include/string.h: Add libc_hidden_proto for __strtok_r and
- __strsep_g.
- * sysdeps/generic/strsep.c: Add libc_hidden_def.
- * sysdeps/generic/strtok_r.c: Likewise.
- * sysdeps/i386/strtok_r.S: Add alias for internal symbol.
- * sysdeps/i386/i686/strtok_r.S: Likewise.
- * sysdeps/x86_64/strtok_r.S: Likewise.
-
-2004-07-09 Alexandre Oliva <aoliva@redhat.com>
-
- * inet/getnetgrent_r.c (internal_setnetgrent): Make it hidden
- instead of internal-linkage, such that we can alias to it.
- (internal_endnetgrent, internal_getnetgrent_r): Likewise.
-
-2004-07-09 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/unix/sysv/linux/fcntl.c (__fcntl_nocancel): Move attribute
- to the front for gcc 3.5+.
- * sysdeps/unix/sysv/linux/i386/fcntl.c: Likewise.
- * sysdeps/unix/sysv/linux/powerpc/powerpc64/fcntl.c: Likewise.
- * include/fcntl.h: Don't declare __fcntl_nocancel here if
- NO_CANCELLATION is defined.
-
-2004-07-07 Jakub Jelinek <jakub@redhat.com>
-
- * elf/dl-fini.c (_dl_fini): Add nloaded variable, load
- GL(dl_nloaded) into it while still in critical section.
-
-2004-07-06 Thorsten Kukuk <kukuk@suse.de>
-
- * nss/getent.c (print_group, print_passwd): Use %lu format for
- `unsigned long int' values.
-
- * sysdeps/unix/sysv/linux/i386/fcntl.c (__fcntl_nocancel): Fix syntax
- error typos in goto statements.
-
-2004-07-07 Roland McGrath <roland@frob.com>
-
- * Makefile (dist-separate): New variable.
- (glibc-%.tar rule): Make separate tar files for add-ons listed there.
- Depend on their configure files.
- (dist-do-separate-dirs): New canned sequence to do that.
- (dist-separate-libidn, dist-separate-linuxthreads): New variables.
- (dist): Depend on add-on tar files based on $(dist-separate).
- (dist-version): New variable, default to $(version).
- (tag-for-dist, dist): Use that in place of $(version) in deps.
-
-2004-07-07 Ulrich Drepper <drepper@redhat.com>
-
- * elf/dl-fini.c (_dl_fini): Move the unlock of the ld.so lock
- before the loop running the destructors.
-
-2004-05-18 Andreas Schwab <schwab@suse.de>
-
- * elf/dl-load.c (_dl_map_object_from_fd): Use the end address of
- the first segment for mprotect, not l_text_end.
-
-2004-07-05 Ulrich Drepper <drepper@redhat.com>
-
- * elf/dl-profile.c (_dl_start_profile): Compact error handling.
- Remove hashfraction variable. Make kcount and kcountsize local
- variables.
-
- * elf/dl-init.c: Don't define and use _dl_starting_up if
- HAVE_INLINED_SYSCALLS is defined and the variable is not used.
- * elf/dl-support.c: Likewise.
- * elf/rtld.c: Likewise.
- * elf/dl-misc.c (_dl_debug_vdprintf): Use writev syscall directly
- if HAVE_INLINED_SYSCALLS is defined.
- * sysdeps/powerpc/powerpc64/dl-machine.h: Don't reset _dl_starting_up
- here.
- * sysdeps/powerpc/powerpc32/dl-start.S: Likewise.
- * sysdeps/unix/sysv/linux/configure.in: Define HAVE_INLINED_SYSCALLS.
- * config.h.in: Add entry for HAVE_INLINED_SYSCALLS.
-
- * sysdeps/posix/profil.c: If compiled for ld.so, omit code which
- is needed to stop profiling.
- * elf/dl-open.c (dl_open_worker): If a newly opened object is to be
- profile make sure it cannot be unloaded.
-
- * sysdeps/unix/sysv/linux/dl-origin.c: Inline readlink syscall.
-
- * sysdeps/unix/sysv/linux/fcntl.c: If compiled without cancellation
- support, make sure the helper function is inlined.
- * sysdeps/unix/sysv/linux/pread.c: Likewise.
- * sysdeps/unix/sysv/linux/pwrite.c: Likewise.
- * sysdeps/unix/sysv/linux/i386/fcntl.c: Likewise.
- * sysdeps/unix/sysv/linux/powerpc/powerpc64/fcntl.c: Likewise.
-
-2004-07-05 Jakub Jelinek <jakub@redhat.com>
-
- * include/string.h (ffs): Add libc_hidden_builtin_proto.
- * sysdeps/rs6000/ffs.c (ffs): Add libc_hidden_builtin_def.
- * sysdeps/alpha/alphaev67/ffs.S (ffs): Likewise.
- * sysdeps/alpha/ffs.S (ffs): Likewise.
- * sysdeps/s390/ffs.c (ffs): Likewise.
- * sysdeps/powerpc/ffs.c (ffs): Likewise.
- * sysdeps/i386/ffs.c (ffs): Likewise.
- * sysdeps/i386/i686/ffs.c (ffs): Likewise.
- * sysdeps/m68k/ffs.c (ffs): Likewise.
- * sysdeps/generic/ffs.c (ffs): Likewise.
- * sysdeps/m88k/ffs.c (ffs): Likewise.
- * sysdeps/am29k/ffs.c (ffs): Likewise.
- * sysdeps/i960/ffs.c (ffs): Likewise.
- * sysdeps/x86_64/ffs.c (ffs): Likewise.
-
- * Makerules (check-abi): Use diff -p -U 0 instead of diff -pu0.
-
- * sysdeps/powerpc/novmx-longjmp.c (__libc_longjmp,
- __libc_siglongjmp): Remove symbol_version.
- * sysdeps/powerpc/longjmp.c (__libc_longjmp, __libc_siglongjmp):
- Export @@GLIBC_PRIVATE, not @@GLIBC_2.3.4.
- * sysdeps/powerpc/sigjmp.c (__sigjmp_save): Use strong_alias
- unconditionally.
- * sysdeps/powerpc/novmx-sigjmp.c (__sigjmp_save): Remove.
- * sysdeps/powerpc/powerpc32/__longjmp.S (__longjmp): Use
- strong_alias instead of default_symbol_version, remove
- symbol_version.
- * sysdeps/powerpc/powerpc64/__longjmp.S (__longjmp): Likewise.
- * sysdeps/powerpc/powerpc32/bsd-setjmp.S (__novmx__setjmp): Change
- into strong_alias to __novmxsetjmp.
- (__vmx__setjmp): Similarly with __vmxsetjmp.
- (__setjmp): Make it strong_alias to __vmx__setjmp, remove
- default_symbol_version and symbol_version.
- * sysdeps/powerpc/powerpc64/bsd-setjmp.S (__novmx__setjmp): Change
- into strong_alias to __novmxsetjmp.
- (__vmx__setjmp): Similarly with __vmxsetjmp.
- (__setjmp): Make it strong_alias to __vmx__setjmp, remove
- default_symbol_version and symbol_version.
-
- * nscd/nscd_getgr_r.c: Include stdio-common/_itoa.h.
-
-2004-07-05 Ulrich Drepper <drepper@redhat.com>
-
- * elf/rtld.c (dl_main): Mark dyn_temp with attribute_relro.
-
-2004-07-04 Matthew Reppert <arashi@kai.vm.bytemark.co.uk>
-
- * sysdeps/unix/sysv/linux/i386/glob64.c (glob64): Use libc_hidden_ver
- instead of libc_hidden_def.
-
-2004-07-01 Roland McGrath <roland@redhat.com>
-
- * aclocal.m4: Add provide for AC_CONFIG_AUX_DIR_DEFAULT.
-
- * Makeconfig ($(common-objpfx)shlib-versions.v.i): Move top-level
- $(..)shlib-versions file to last in deps list. This lets add-ons give
- more-specific matches that override defaults in the top-level file.
-
- * sysdeps/unix/sysv/linux/configure.in: If arch_minimum_kernel was
- already set, don't set it or libc_cv_gcc_unwind_find_fde.
- * sysdeps/unix/sysv/linux/configure: Regenerated.
-
-2004-07-01 Martin Schwidefsky <schwidefsky@de.ibm.com>
-
- * sysdeps/s390/fpu/bits/mathinline.h [__LIBC_INTERNAL_MATH_INLINES]
- (__ieee754_sqrt): Define as __MATH_INLINE using sqdbr instruction.
- (__ieee754_sqrtf): Define as __MATH_INLINE using sqebr instruction.
- * sysdeps/s390/fpu/e_sqrt.c: New file.
- * sysdeps/s390/fpu/e_sqrtf.c: New file.
- * sysdeps/s390/Implies: New file.
- * sysdeps/s390/s390-32/Implies: Remove ieee754, move
- ieee754/dbl-64 and ieee754/flt-32 to s390/Implies.
- * sysdeps/s390/s390-64/Implies: Likewise.
-
-2004-06-30 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/unix/sysv/linux/s390/s390-32/sysdep.S: Include tls.h.
- * sysdeps/unix/sysv/linux/s390/s390-64/sysdep.S: Likewise.
-
- * sysdeps/unix/sysv/linux/s390/s390-32/clone.S (thread_start):
- DO_CALL (exit, 1) instead of branching to _exit.
- * sysdeps/unix/sysv/linux/s390/s390-64/clone.S (thread_start):
- Likewise.
-
- * sysdeps/s390/fpu/bits/mathinline.h: New file.
-
- * include/glob.h (glob64): Add libc_hidden_proto.
- * sysdeps/generic/glob64.c (glob64): Add libc_hidden_def.
- * sysdeps/gnu/glob64.c (glob64): Likewise.
- * sysdeps/unix/sysv/linux/i386/glob64.c (glob64): Likewise.
-
-2004-06-11 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/i386/fpu/libm-test-ulps: Update for GCC 3.4.
- * sysdeps/s390/fpu/libm-test-ulps: Likewise.
-
-2004-06-11 Dwayne Grant McConnell <dgm69@us.ibm.com>
-
- * sysdeps/powerpc/fpu/s_lround.c: Removed.
- * sysdeps/powerpc/fpu/s_lroundf.c: Removed.
- * sysdeps/powerpc/powerpc32/fpu/s_ceilf.S: New file.
- * sysdeps/powerpc/powerpc32/fpu/s_ceil.S: New file.
- * sysdeps/powerpc/powerpc32/fpu/s_floorf.S: New file.
- * sysdeps/powerpc/powerpc32/fpu/s_floor.S: New file.
- * sysdeps/powerpc/powerpc32/fpu/s_lrint.c: Removed.
- * sysdeps/powerpc/powerpc32/fpu/s_lrint.S: New file.
- * sysdeps/powerpc/powerpc32/fpu/s_lroundf.S: New file.
- * sysdeps/powerpc/powerpc32/fpu/s_lround.S: New file.
- * sysdeps/powerpc/powerpc32/fpu/s_rintf.S: New file.
- * sysdeps/powerpc/powerpc32/fpu/s_rint.S: New file.
- * sysdeps/powerpc/powerpc32/fpu/s_roundf.S: New file.
- * sysdeps/powerpc/powerpc32/fpu/s_round.S: New file.
- * sysdeps/powerpc/powerpc32/fpu/s_truncf.S: New file.
- * sysdeps/powerpc/powerpc32/fpu/s_trunc.S: New file.
-
-2004-06-30 Ulrich Drepper <drepper@redhat.com>
-
- * include/net/if.h: Handle if_nameindex and if_freenameindex with
- libc_proto_hidden.
- * sysdeps/unix/sysv/linux/netlinkaccess.h: New file.
- * sysdeps/unix/sysv/linux/ifaddrs.c: Export netlink handling functions.
- * sysdeps/unix/sysv/linux/if_index.c (if_nameindex): Implement using
- netlink if possible. Fall back on ioctl method if necessary.
- * sysdeps/unix/sysv/linux/Dist: Add netlinkaccess.h.
-
- * include/unistd.h: Declare __truncate.
- * sysdeps/generic/truncate.c: Also define __truncate.
- * sysdeps/mach/hurd/truncate.c: Likewise.
- * sysdeps/unix/common/syscalls.list: Likewise.
- * sysdeps/unix/sysv/linux/truncate64.c: Use __truncate, not truncate.
-
-2004-06-29 Ulrich Drepper <drepper@redhat.com>
-
- * stdio-common/printf-parsemb.c (__parse_one_specmb): Initialize
- info.extra. Patch by Marcus Meissner.
-
-2004-06-29 Kaz Kojima <kkojima@rr.iij4u.or.jp>
-
- * sysdeps/unix/sysv/linux/sh/sysdep.h (SYSCALL_ERROR_HANDLER):
- Fix branch offset for a PLT entry.
-
-2004-06-20 Jim Meyering <jim@meyering.net>
-
- * malloc/obstack.h (obstack_base): Cast to `void *', to align with
- documentation.
-
-2004-06-28 Ulrich Drepper <drepper@redhat.com>
-
- * inet/bug-if1.c (do_test): Simply use 0 as invalid index.
- Workaround for [BZ #232].
-
- * sysdeps/unix/sysv/linux/if_index.c (if_nameindex): Use extend_alloca.
-
-2004-06-28 GOTO Masanori <gotom@debian.or.jp>
-
- * iconv/gconv_simple.c: Use get16/put16 for user given buffer
- in ucs2/ucs2reverse when unaligned memory access is attempted.
- * iconv/tst-iconv5.c: New file.
- * iconv/Makefile (tests): Add tst-iconv5.
-
-2004-06-28 Jakub Jelinek <jakub@redhat.com>
-
- * inet/bug-if1.c: Include <string.h>.
-
-2004-06-19 Roland McGrath <roland@redhat.com>
-
- * sysdeps/posix/waitid.c (do_waitid) [DO_WAITID]: Define function
- under this macro name instead.
- [NO_DO_WAITID]: Don't define it at all.
- (do_waitid) [WNOWAIT, WEXITED]: If these POSIX.1 waitid flag bits are
- defined, then return ENOTSUP for combinations of selection bits other
- than WEXITED and WEXITED|WSTOPPED, which this version cannot support.
-
- * posix/tst-waitid.c: New file.
- * posix/Makefile (tests): Add it.
-
-2004-06-28 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/unix/alpha/sysdep.h (inline_syscall6): Fix a typo.
-
- [BZ #231]
- * sysdeps/unix/alpha/sysdep.S (__syscall_error): Avoid !samegp
- relocation in librt.so.
-
- [BZ #230]
- * sysdeps/alpha/dl-machine.h (_dl_start_user): Use ldah/ldl to load
- _dl_skip_args. Patch by Jakub Bogusz <qboosh@pld-linux.org>.
-
-2004-06-27 Ulrich Drepper <drepper@redhat.com>
-
- * elf/ldconfig.c (add_dir): Take chroot into account.
- Based on changes by HJ Lu and Hideki Iwamoto.
-
- * nscd/connections.c (invalidate_cache): If the host cache has to
- be invalidated, re-read resolv.conf.
-
- * resolv/resolv.h (RES_NOIP6DOTINT): Define.
- * resolv/res_init.c (res_setoptions): Handle no-ip6-dotint option.
- * resolv/gethnamaddr.c (gethostbyaddr): Don't lookup with .ip6.int
- if RES_NOIP6DOTINT flag is set.
- * resolv/nss_dns/dns-host.c (_nss_dns_gethostbyaddr_r): Likewise.
-
-2004-06-25 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/unix/sysv/linux/powerpc/fcntl.c: Move to...
- * sysdeps/unix/sysv/linux/powerpc/powerpc32/fcntl.c: ... here.
- * sysdeps/unix/sysv/linux/powerpc/lockf64.c: Move to...
- * sysdeps/unix/sysv/linux/powerpc/powerpc32/lockf64.c: ... here.
- * sysdeps/unix/sysv/linux/powerpc/powerpc64/fcntl.c: New file.
-
- * sysdeps/unix/sysv/linux/x86_64/bits/fcntl.h (F_GETLK, F_SETLK,
- F_SETLKW): Fix values for -m32 -D_FILE_OFFSET_BITS=64.
-
-2004-06-21 Jakub Jelinek <jakub@redhat.com>
-
- [BZ #231]
- * sysdeps/unix/alpha/Makefile: New file.
- * sysdeps/unix/alpha/rt-sysdep.S: New file.
- Reported by Jakub Bogusz <qboosh@pld-linux.org>.
-
-2004-06-18 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/posix/getaddrinfo.c (gaih_inet): For AI_CANONNAME,
- determine the canonical name only for the first returned entry.
-
- * inet/bug-if1.c: New file.
- * inet/Makefile (tests): Add bug-if1.
-
-2004-06-18 Roland McGrath <roland@frob.com>
-
- * Makerules (compile-mkdep-flags): Add -MT $@.
-
- * Makefile (dist, tag-for-dist): New targets.
- (files-for-dist, tag-of-stem): New variables.
- (glibc-%.tar glibc-linuxthreads-%.tar): New pattern rule.
- Make tar files using cvs export.
- (%.bz2, %.gz, tag-%): New pattern rules.
- (distribute): Variable removed.
- (+subdir_targets): Remove distinfo targets.
- (echo_subdirs, echo-distinfo, parent_echo-distinfo): Targets removed.
- (rpm/%): Pattern rule removed.
- * rpm/Makefile, rpm/rpmrc, rpm/template: Ancient cruft files removed.
- * Rules (subdir_echo-headers, subdir_echo-distinfo, subdir_dist):
- Targets removed.
- * Makerules (dist, subdir_distinfo): Targets removed.
- ($(objpfx)distinfo): Depend on $(distribute).
- * Make-dist: File removed.
-
- * NEWS: Fix a typo. Update bug-reporting instructions.
-
-2004-06-17 Thorsten Kukuk <kukuk@suse.de>
-
- * sysdeps/unix/sysv/linux/sched_setaffinity.c
- (__sched_setaffinity_new): Set errno to EINVAL and return -1 if
- cpuset is wrongly set.
-
-2004-06-15 Steven Munroe <sjmunroe@us.ibm.com>
-
- * sysdeps/powerpc/powerpc64/memcpy.S: Improve instruction scheduling
- for POWER4 machines.
-
-2004-06-14 Jakub Jelinek <jakub@redhat.com>
-
- [BZ #218]
- * sunrpc/pmap_prot2.c (xdr_pmaplist): When freeing, remember pml_next
- in a local variable, point rp to that local variable afterwards.
-
-2004-06-11 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/ieee754/dbl-64/e_sqrt.c (__ieee754_sqrt): Handle special
- cases properly.
-
-2004-06-17 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/unix/sysv/linux/if_index.c (if_indextoname): Correct
- error value for unknown interface. [BZ #198]
-
-2004-06-13 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/i386/bits/string.h (memcpy): Add () around arguments.
- (memchr, __memrchr, strlen, strcmp, strncmp, __strchr_g, __strchr_c,
- __strchrnul_g, __strchrnul_c, strspn, strcspn, strpbrk): Add memory
- the asm uses as its input, either of size __n where __n is known or
- 0xfffffff.
- (strstr): Add "memory" clobber.
-
-2004-06-14 Jakub Jelinek <jakub@redhat.com>
-
- [BZ #219]
- * nss/nsswitch.c (free_mem): Don't try to close a library handle
- if the handle is invalid. Patch by David Kimdon <kimdon@esrf.fr>.
-
-2004-06-11 Dmitry V. Levin <ldv@altlinux.org>
-
- [BZ #217]
- * debug/xtrace.sh: Fix typo in error diagnostics.
-
-2004-06-14 Andreas Schwab <schwab@suse.de>
-
- * stdio-common/psignal.c (psignal): Don't use BUF when asprintf
- failed.
-
-2004-06-15 Steven Munroe <sjmunroe@us.ibm.com>
-
- * sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext.S: Fix pasto
- that clobbers r19. Fix pasto that overflowed sigcontext.v_reserve.
- * sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext.S: Fix pasto
- that clobbers r19.
- * sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext.S: Fix pasto
- that clobbers r19. Fix pasto that overflowed sigcontext.v_reserve.
- * sysdeps/unix/sysv/linux/powerpc/powerpc64/getcontext.S:
- Fix setting of sigcontext.v_regs. Fix pasto that clobbers r19.
- Fix pasto that overflowed sigcontext.v_reserve.
- * sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S:
- Fix pasto that clobbers r19.
- * sysdeps/unix/sysv/linux/powerpc/powerpc64/swapcontext.S: Fix setting
- of sigcontext.v_regs. Fix pasto that clobbers r19. Fix pasto that
- overflowed sigcontext.v_reserve.
-
-2004-05-04 H.J. Lu <hongjiu.lu@intel.com>
-
- [BZ #150]
- * sysdeps/generic/strtol_l.c (DEF): Use ".gnu.linkonce.r."
- instead of ".gnu.linkonce.ro." as the prefix for linkonce
- read-only section name.
-
-2004-06-14 Richard Henderson <rth@redhat.com>
-
- * sysdeps/alpha/div_libc.h (_ITOFS): Use "sp" not "$sp".
- (_ITOFT, _FTOIT, _ITOFT2): Likewise.
-
-2004-06-08 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/s390/s390-32/backtrace.c (init): Guard with #ifdef SHARED.
- (unwind_backtrace, unwind_getip): Define #ifndef SHARED.
- (__backtrace): Don't call init #ifndef SHARED.
- * sysdeps/s390/s390-64/backtrace.c (init): Guard with #ifdef SHARED.
- (unwind_backtrace, unwind_getip): Define #ifndef SHARED.
- (__backtrace): Don't call init #ifndef SHARED.
- * sysdeps/ia64/backtrace.c (init): Guard with #ifdef SHARED.
- (unwind_backtrace, unwind_getip): Define #ifndef SHARED.
- (__backtrace): Don't call init #ifndef SHARED.
-
-2004-06-11 Roland McGrath <roland@redhat.com>
-
- * sysdeps/arm/strlen.S [__ARMEB__]: Compute correctly for big-endian.
- From Krzysztof Halasa <khc@pm.waw.pl>.
-
-2004-06-10 Jakub Jelinek <jakub@redhat.com>
-
- * elf/tls-macros.h [__s390x__] (TLS_LD, TLS_GD): Clobber also r14.
-
-2004-06-08 Jakub Jelinek <jakub@redhat.com>
-
- [BZ #199]
- * crypt/md5-crypt.c (__md5_crypt): Only update buflen if realloc
- succeeds. Reported by Miles Ohlrich <miles@cray.com>.
-
- * elf/chroot_canon.c (chroot_canon): Avoid segfault if first malloc
- fails. Avoid memory leak if realloc fails.
-
-2004-06-09 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/generic/setenv.c (setenv): Return -1/EINVAL if name is
- NULL, "" or contains '=' character in it. Reported by
- Michael T Kerrisk <mtk-lists@gmx.net>.
- * stdlib/tst-environ.c: Include errno.h.
- (main): Add tests for these arguments to setenv/unsetenv.
-
-2004-06-07 Roland McGrath <roland@frob.com>
-
- * NEWS: Update bug reporting instructions. Fix some typos.
-
-2004-06-05 Ulrich Drepper <drepper@redhat.com>
-
- * stdio-common/_itoa.h: Don't expand _itoa inline for libc.
- * stdio-common/_itoa.c: Add _itoa implementation.
-
- * nscd/nscd_gethst_r.c (__nscd_open_socket): Change implementation
- to also send request. Add parameter to allow this.
- Change callers.
- * nscd/nscd_getgr_r.c: Change __nscd_open_socket caller.
- * nscd/nscd_getpw_r.c: Likewise.
- * nscd/nscd-client.h: Change __nscd_open_socket prototype.
-
-2004-06-05 Andreas Jaeger <aj@suse.de>
-
- * sysdeps/unix/sysv/linux/x86_64/makecontext.c (__makecontext):
- Handle more than 6 args correctly. Based on a patch by Masahide
- Washizawa <washi@jp.ibm.com>.
-
-2004-06-04 Ulrich Drepper <drepper@redhat.com>
-
- * nscd/nscd_gethst_r.c (nscd_gethst_r): Use __nss_not_use_nscd_hosts
- in all places, not __nss_not_use_nscd_group.
- Reported by Philippe Gregoire.
-
-2004-06-03 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/i386/i486/bits/atomic.h: Optimize a bit.
-
-2004-05-07 Dmitry V. Levin <ldv@altlinux.org>
-
- * argp/argp-help.c (__argp_error, __argp_failure): Check result
- of __asprintf call and don't use string if it failed.
- * stdio-common/psignal.c (psignal): Likewise.
- * locale/programs/localedef.c (more_help): Likewise.
- * resolv/res_hconf.c (arg_service_list, arg_trimdomain_list,
- arg_bool, parse_line): Check result of __asprintf calls and
- don't use string if they failed.
- * sunrpc/svc_simple.c (registerrpc, universal): Likewise.
- * elf/ldconfig.c (parse_conf_include): Check result of __asprintf
- call and exit if it failed.
-
-2004-05-10 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/posix/sysconf.c (__sysconf) <cases _SC_REALTIME_SIGNALS,
- _SC_PRIORITY_SCHEDULING, _SC_TIMERS, _SC_ASYNCHRONOUS_IO,
- _SC_PRIORITIZED_IO, _SC_SYNCHRONIZED_IO, _SC_FSYNC, _SC_MAPPED_FILES,
- _SC_MEMLOCK, _SC_MEMLOCK_RANGE, _SC_MEMORY_PROTECTION,
- _SC_MESSAGE_PASSING, _SC_SEMAPHORES, _SC_SHARED_MEMORY_OBJECTS,
- _SC_THREADS, _SC_THREAD_SAFE_FUNCTIONS, _SC_THREAD_ATTR_STACKADDR,
- _SC_THREAD_ATTR_STACKSIZE, _SC_THREAD_PRIORITY_SCHEDULING,
- _SC_THREAD_PRIO_INHERIT, _SC_THREAD_PRIO_PROTECT,
- _SC_THREAD_PROCESS_SHARED>: Return _POSIX_* value instead of 1.
- * sysdeps/unix/sysv/linux/sysconf.c (__sysconf)
- <case _SC_MONOTONIC_CLOCK>: Return _POSIX_VERSION instead of 1.
-
-2004-05-07 Jeroen Dekkers <jeroen@dekkers.cx>
-
- * sysdeps/mach/hurd/i386/Makefile (CFLAGS-init-first.c): Add
- -momit-leaf-frame-pointer.
-
- * inet/test-ifaddrs.c (addr_string): Surround AF_PACKET case with
- #ifdef AF_PACKET.
-
- * sysdeps/mach/hurd/getcwd.c
- (_hurd_canonicalize_directory_name_intern): Only realloc when
- size is <= 0.
-
- * sysdeps/mach/hurd/mmap.c (__mmap): Fail when addr or offset
- isn't page aligned.
-
- * sysdeps/mach/hurd/spawni.c (EXPAND_DTABLE): Set dtablesize to
- new size.
-
- * sysdeps/mach/hurd/Versions (GLIBC_PRIVATE): Add __libc_read,
- __libc_write and __libc_lseek64.
-
-2004-05-29 Roland McGrath <roland@redhat.com>
-
- * elf/Makefile (shared-only-routines): Add dl-caller.
-
-2004-05-28 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/powerpc/configure.in: New file.
-
-2004-05-28 Steven Munroe <sjmunroe@us.ibm.com>
-
- * sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S: Add .machine
- "altivec" to enable VMX instructions.
- * sysdeps/powerpc/powerpc32/fpu/setjmp-common.S: Likewise.
- * sysdeps/powerpc/powerpc64/__longjmp-common.S: Likewise.
- * sysdeps/powerpc/powerpc64/setjmp-common.S: Likewise.
- * sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext.S: Likewise.
- * sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext.S: Likewise.
- * sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext.S: Likewise.
- * sysdeps/unix/sysv/linux/powerpc/powerpc64/getcontext.S: Likewise.
- * sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S: Likewise.
- * sysdeps/unix/sysv/linux/powerpc/powerpc64/swapcontext.S: Likewise.
-
-2004-05-27 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/i386/fpu/bits/mathinline.h (__expm1_code): Define using
- __builtin_expm1l for GCC 3.5+.
- (__expl): Define using __builtin_expl for GCC 3.4+.
- (exp, expf, expl): Don't define for GCC 3.4+.
- (tan, tanf, tanl): Don't define for GCC 3.5+.
- (__atan2l): Define using __builtin_atan2l for GCC 3.4+.
- (atan2, atan2f, atan2l): Don't define for GCC 3.4+ or !__FAST_MATH__.
- (fmod, fmodf, fmodl): Don't define for GCC 3.5+ or !__FAST_MATH__.
- (fabsf, fabsl): Only provide if __USE_MISC or __USE_ISOC99.
- (sin, sinf, sinl, cos, cosf, cosl, log, logf, logl): Don't define
- for GCC 3.4+.
- (log10, log10f, log10l, asin, asinf, asinl, acos, acosf, acosl):
- Don't define for GCC 3.5+.
- (atan, atanf, atanl): Don't define for GCC 3.4+ or !__FAST_MATH__.
- (log1p, log1pf, log1pl, logb, logbf, logbl, log2, log2f, log2l): Don't
- define for GCC 3.5+.
- (drem, dremf, dreml): Don't define for GCC 3.5+ or !__FAST_MATH__.
- * sysdeps/sparc/fpu/bits/mathinline.h (sqrt, sqrtf, sqrtl): Don't
- define for GCC 3.2+.
-
-2004-05-27 Jakub Jelinek <jakub@redhat.com>
-
- * string/bits/string2.h (__bzero): Define even for GCC 3.0+.
- * sysdeps/alpha/stpcpy.S (stpcpy): Add libc_hidden_builtin_def.
- * sysdeps/alpha/alphaev67/stpcpy.S (stpcpy): Likewise.
- * sysdeps/powerpc/powerpc32/stpcpy.S (stpcpy): Likewise.
- * sysdeps/powerpc/powerpc64/stpcpy.S (stpcpy): Likewise.
- * sysdeps/sparc/sparc32/stpcpy.S (stpcpy): Likewise.
- * sysdeps/sparc/sparc64/stpcpy.S (stpcpy): Likewise.
- * sysdeps/i386/stpcpy.S (stpcpy): Likewise.
- * sysdeps/i386/i586/stpcpy.S (stpcpy): Likewise.
- * sysdeps/generic/stpcpy.c (stpcpy): Likewise.
- * sysdeps/x86_64/stpcpy.S (stpcpy): Likewise.
- * sysdeps/i386/i586/memcpy.S (memcpy): Remove
- libc_hidden_builtin_def if MEMPCPY_P.
- * sysdeps/x86_64/memcpy.S (memcpy): Likewise.
- * sysdeps/i386/i686/mempcpy.S (mempcpy): Add libc_hidden_builtin_def.
- * sysdeps/i386/i586/mempcpy.S (mempcpy): Likewise.
- * sysdeps/generic/mempcpy.c (mempcpy): Likewise.
- * sysdeps/x86_64/mempcpy.S (mempcpy): Likewise.
-
-2004-05-26 Roland McGrath <roland@frob.com>
-
- * sysdeps/generic/bits/in.h
- (IMPLINK_IP, IMPLINK_LOWEXPER, IMPLINK_HIGHEXPER): Macros removed.
- These are long obsolete in BSD systems where they originated.
- * conform/data/netinet/in.h-data: Remove `allow IMPLINK_*'
-
-2004-05-26 Jakub Jelinek <jakub@redhat.com>
-
- * include/string.h (mempcpy, stpcpy): Add libc_hidden_builtin_proto.
- * string/bits/string2.h (memset): Disable macro for GCC 3.0+.
- (__mempcpy): Use __builtin_mempcpy for GCC 3.4+.
- (strchr): For GCC 3.2+, only use __rawmemchr if second argument is
- constant '\0' and first argument is not constant.
- (__stpcpy): Use __builtin_stpcpy for GCC 3.4+.
- (strncpy): Remove #ifdef _USE_STRING_ARCH_mempcpy variant.
- For GCC 3.2+ use __builtin_strncpy.
- (strncat): For GCC 3.2+ use __builtin_strncat.
- (strcmp): For GCC 3.2+ use __builtin_strcmp if both arguments are
- constant.
- (strcspn, strspn, strpbrk): For GCC 3.2+, use builtin function
- if both arguments are constant.
-
-2004-05-26 Ulrich Drepper <drepper@redhat.com>
-
- * nss/nss_files/files-hosts.c: Fix condition for looking up IPv4
- mapped addresses in gethostbyaddr.
-
-2004-05-25 Ulrich Drepper <drepper@redhat.com>
-
- * nss/digits_dots.c (__nss_hostname_digits_dots): Remove typep and
- flags parameter, convert afp to simple int parameter. Adjust code.
- typep was never != NULL and flags therefore also unused. *afp is
- never modified.
- * nss/nsswitch.h: Adjust __nss_hostname_digits_dots prototype.
- * nss/getXXbyYY.c: Remove HAVE_TYPE handling. Adjust af parameter
- handling for __nss_hostname_digits_dots calls.
- * nss/getXXbyYY_r.c: Likewise.
-
- * elf/dl-load.c (_dl_map_object_from_fd): Map DSOs with MAP_DENYWRITE.
-
-2004-05-25 Steven Munroe <sjmunroe@us.ibm.com>
-
- * sysdeps/powerpc/fpu/Makefile: Make ld.so a dependency of libm.so.
- * sysdeps/powerpc/fpu/bits/mathinline.h [__LIBC_INERNAL_MATH_INLINES]
- (__ieee754_sqrt): Define as __MATH_INLINE using fsqrt instruction.
- (__ieee754_sqrtf): Define as __MATH_INLINE using fsqrts instruction.
- * sysdeps/powerpc/fpu/e_sqrt.c (__slow_ieee754_sqrt): Moved
- implementation from w_sqrt.c.
- * sysdeps/powerpc/fpu/e_sqrtf.c (__slow_ieee754_sqrtf): Moved
- implementation from w_sqrtf.c.
- * sysdeps/powerpc/fpu/w_sqrt.c (__sqrt): Wrapper implementation
- using inline __ieee754_sqrt().
- * sysdeps/powerpc/fpu/w_sqrtf.c (__sqrtf): Wrapper implementation
- using inline __ieee754_sqrtf().
- * sysdeps/powerpc/powerpc32/sysdep.h [__ASSEMBLER__]: Include
- <sysdeps/powerpc/sysdep.h> independent of __ASSEMBLER__.
- * sysdeps/powerpc/sysdep.h [__ASSEMBLER__] (PPC_FEATURE_*): Define
- PPC_FEATURE_* independent of __ASSEMBLER__.
-
-2004-05-25 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/pthread/aio_notify.c: Use <> instead of "" for aio_misc.h
- include.
- (aio_start_notify_thread): Define if not defined.
- (notify_func_wrapper): Use it.
- * sysdeps/pthread/aio_misc.c: Use <> instead of "" for aio_misc.h
- include.
- (aio_create_helper_thread): Define if not defined.
- (__aio_create_helper_thread): New function.
- (__aio_enqueue_request): Use aio_create_helper_thread.
-
- * nis/ypclnt.c (ypall_data, ypall_foreach): Remove.
- (struct ypresp_all_data): New type.
- (__xdr_ypresp_all): Change second argument to
- struct ypresp_all_data *. Replace ypall_foreach and
- ypall_data with objp->foreach and objp->data.
- (yp_all): Remove status variable, add data. Replace
- all uses of status with data.status. Initialize data.foreach
- and data.data instead of ypall_foreach and ypall_data.
-
-2004-05-24 Jakub Jelinek <jakub@redhat.com>
-
- * elf/dl-lookup.c (add_dependency): Set DF_1_NODELETE bit
- in l_flags_1, not in l_flags.
-
-2004-04-10 Robert Millan <robertmh@gnu.org>
-
- * sysdeps/unix/sysv/linux/bits/in.h: Cosmetic fixes to get in sync
- with sysdeps/generic/bits/in.h.
-
-2004-05-25 Roland McGrath <roland@frob.com>
-
- * sysdeps/generic/unwind-dw2-fde-glibc.c: Change copyright terms from
- GCC GPL to standard glibc LGPL.
-
-2004-05-24 Ulrich Drepper <drepper@redhat.com>
-
- * manual/string.texi (Copying and Concatenation): Fixed second
- concat example.
- Reported by Fabian Pietsch <fabian@zzznowman.dyndns.org>.
-
-2004-05-23 Ulrich Drepper <drepper@redhat.com>
-
- * malloc/obstack.c: Don't allow linking with _obstack.
-
-2004-05-23 Andreas Schwab <schwab@suse.de>
-
- * sysdeps/m68k/m68020/bits/atomic.h: Use "+m" constraint instead
- of separate "m" constraints.
-
-2004-05-15 Chris Demetriou <cgd@broadcom.com>
-
- * sysdeps/mips/dl-machine.h (ELF_DL_FRAME_SIZE)
- (ELF_DL_SAVE_ARG_REGS, ELF_DL_RESTORE_ARG_REGS): For the N32
- and 64 ABIs, save and restore regs $10 and $11 (a6 and a7).
-
-2004-05-20 Paul Eggert <eggert@cs.ucla.edu>
-
- * malloc/obstack.c (_obstack) [!defined _LIBC]: Remove; not used.
- Add comment explaining why libc still defines it.
-
-2004-05-19 Paul Eggert <eggert@cs.ucla.edu>
-
- * malloc/obstack.h (_obstack_free, obstack_1grow, obstack_1grow_fast,
- obstack_alignment_mask, obstack_alloc, obstack_base,
- obstack_blank, obstack_blank_fast, obstack_chunk_size,
- obstack_copy, obstack_copy0, obstack_finish, obstack_grow,
- obstack_grow0, obstack_init, obstack_int_grow,
- obstack_int_grow_fast, obstack_make_room, obstack_memory_used,
- obstack_next_free, obstack_object_size, obstack_ptr_grow,
- obstack_ptr_grow_fast, obstack_room): Remove declarations of
- nonexistent functions.
-
-2004-05-18 Steven Munroe <sjmunroe@us.ibm.com>
-
- * sysdeps/powerpc/powerpc64/sysdep.h [__ASSEMBLER__]: Include
- <sysdeps/powerpc/sysdep.h> independent of __ASSEMBLER__.
- [PIC]: Redundent for powerpc64, removed.
- (ENTRY): Generate size and alignment for opd entry.
- (EALIGN): Generate size and alignment for opd entry.
- (END): Use DOT_LABEL in ASM_SIZE_DIRECTIVE.
- (END_GEN_TB): Use DOT_LABEL in ASM_SIZE_DIRECTIVE.
-
-2004-05-19 Ulrich Drepper <drepper@redhat.com>
-
- * misc/regexp.h: Say the functions have been withdrawn.
-
- * wcsmbs/tst-wcpncpy.c: Add more tests.
-
-2004-05-18 Roland McGrath <roland@frob.com>
-
- * sysdeps/generic/unwind.h: Change copyright terms from GCC GPL to
- standard glibc LGPL.
- * sysdeps/generic/unwind-dw2.c: Likewise.
- * sysdeps/generic/unwind-dw2-fde.c: Likewise.
- * sysdeps/generic/unwind-dw2-fde.h: Likewise.
- * sysdeps/generic/unwind-pe.h: Likewise.
-
-2004-05-15 Petter Reinholdtsen <pere@hungry.com>
-
- * locale/iso-3166.def: Remove YUGOSLAVIA and insert "SERBIA AND
- MONTENEGRO" which have taken over the code 819. Patch from
- Danilo Segan. [BZ #40]
-
-2004-05-15 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h
- (SYSCALL_ERROR_HANDLER): Rename __sparc.get_pic.l7 to
- __sparc_get_pic_l7.
-
-2004-05-15 Joseph S. Myers <jsm@polyomino.org.uk>
-
- * catgets/gencat.c: Update bug reporting instructions.
+ [BZ #728]
+ * posix/getconf.c: Update copyright year.
+ * nss/getent.c: Likewise.
+ * nscd/nscd_nischeck.c: Likewise.
+ * iconv/iconvconfig.c: Likewise.
+ * iconv/iconv_prog.c: Likewise.
+ * elf/ldconfig.c: Likewise.
+ * catgets/gencat.c: Likewise.
* csu/version.c: Likewise.
- * debug/catchsegv.sh: Likewise.
- * debug/pcprofiledump.c: Likewise.
- * debug/xtrace.sh: Likewise.
* elf/ldd.bash.in: Likewise.
- * iconv/iconv_prog.c: Likewise.
- * iconv/iconvconfig.c: Likewise.
+ * elf/sprof.c (print_version): Likewise.
* locale/programs/locale.c: Likewise.
* locale/programs/localedef.c: Likewise.
- * login/programs/pt_chown.c: Likewise.
+ * nscd/nscd.c (print_version): Likewise.
+ * debug/xtrace.sh: Likewise.
* malloc/memusage.sh: Likewise.
- * malloc/memusagestat.c: Likewise.
* malloc/mtrace.pl: Likewise.
- * manual/crypt.texi: Likewise.
- * manual/install.texi: Likewise.
- * nss/makedb.c: Likewise.
-
-2004-05-14 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/sparc/sparc32/dl-machine.h (elf_machine_rela): Only
- CHECK_STATIC_TLS if sym != NULL.
- * sysdeps/sh/dl-machine.h (elf_machine_rela): Likewise.
- * sysdeps/i386/dl-machine.h (elf_machine_rela): Likewise.
-
-2004-05-12 Andreas Schwab <schwab@suse.de>
-
- * posix/regex_internal.c (build_wcs_buffer): Also set pstr->mbs
- when translating.
-
-2004-05-13 H.J. Lu <hongjiu.lu@intel.com>
-
- * Rules (xtests): Depend on tests.
-
-2004-05-13 Jakub Jelinek <jakub@redhat.com>
-
- * libio/genops.c (_IO_default_xsputn): Avoid one overflow per char if
- count is negative, yet write_ptr < write_end.
- (_IO_default_xsgetn): Avoid one underflow per char if count is
- negative, yet read_ptr < read_end.
-
-2004-05-12 Steven Munroe <sjmunroe@us.ibm.com>
-
- * sysdeps/unix/sysv/linux/powerpc/bits/termios.h (XTABS): Define XTABS
- equal to TAB3.
-
-2004-05-12 Ulrich Drepper <drepper@redhat.com>
-
- * elf/dl-lookup.c (_dl_lookup_symbol_x): Correct _dl_signal_cerror
- call.
-
-2004-05-10 Roland McGrath <roland@frob.com>
-
- * hurd/hurdlookup.c (__hurd_file_name_split): Return ENOENT for "".
- (__hurd_directory_name_split): Likewise.
-
-2004-05-10 Ulrich Drepper <drepper@redhat.com>
-
- * po/fr.po: Update from translation team.
-
-2004-05-10 Andreas Jaeger <aj@suse.de>
-
- * sysdeps/unix/sysv/linux/x86_64/makecontext.c (__makecontext):
- Correctly compute alignment.
- Patch by Michael Matz <matz@suse.de>.
-
-2004-05-09 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/generic/sigpause.c: Prevent sigpause prototype.
- * sysdeps/posix/sigpause.c: Likewise.
- * signal/signal.h: Don't define sigpause macro unless needed.
-
-2004-05-08 Jakub Jelinek <jakub@redhat.com>
-
- * configure.in (libc_cv_libgcc_s_suffix): New check.
- (libc_cv_as_needed): Use -lgcc_s$libc_cv_libgcc_s_suffix.
- * config.make.in (libgcc_s_suffix): Set.
- * Makeconfig (libgcc_eh): Use -lgcc_s$(libgcc_s_suffix).
-
-2004-05-08 Ulrich Drepper <drepper@redhat.com>
-
- * signal/signal.h: Use BSD sigpause only if BSD behavior is preferred.
-
-2004-04-29 Steven Munroe <sjmunroe@us.ibm.com>
-
- * sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h (LOADARGS_1,
- LOADARGS_2, LOADARGS_3, LOADARGS_4, LOADARGS_5, LOADARGS_6):
- Load argument values into temporary variables.
- * sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h (LOADARGS_1,
- LOADARGS_2, LOADARGS_3, LOADARGS_4, LOADARGS_5, LOADARGS_6):
- Likewise.
-
-2004-05-07 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/unix/sysv/linux/Makefile: mq_send.c and mq_receive.c
- need to be compiled with exceptions.
-
-2004-05-06 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/unix/sysv/linux/ifreq.c (__ifreq): Fix memory handling.
- * sysdeps/generic/ifreq.c (__ifreq): Fix memory handling.
-
- * resolv/res_hconf.c (_res_hconf_reorder_addrs): Make clear that
- realloc cannot fail.
-
- * nss/nss_files/files-netgrp.c (EXPAND): Free buffer which cannot
- be expanded.
-
- * nis/nis_table.c: Clean up memory handling.
- * nis/nis_subr.c (nis_getnames): Clean up memory handling.
- * nis/nis_removemember.c (nis_removemember): Add comment
- explaining use of realloc.
-
- * math/tgmath.h (fabs): Use __TGMATH_UNARY_REAL_IMAG_RET_REAL.
- (carg): Likewise.
- Patch by Lev S Bishop <lev.bishop@yale.edu>.
-
- * math/bug-tgmath1.c (main): Test fabs and carg as well.
-
-2004-05-06 Richard Henderson <rth@redhat.com>
-
- * elf/elf.h (AT_L1I_CACHESHAPE, AT_L1D_CACHESHAPE,
- AT_L2_CACHESHAPE, AT_L3_CACHESHAPE): New.
- * sysdeps/unix/sysv/linux/alpha/Versions: Export
- __libc_alpha_cache_shape as a private symbol.
- * sysdeps/unix/sysv/linux/alpha/dl-sysdep.c: New file.
- * sysdeps/unix/sysv/linux/alpha/sysconf.c: New file.
-
-2004-05-06 Richard Henderson <rth@redhat.com>
-
- * sysdeps/alpha/div_libc.h (_ITOFS, _ITOFT, _FTOIT, _ITOFT2): New.
- * sysdeps/alpha/divl.S: Use them.
- * sysdeps/alpha/divq.S: Likewise.
- * sysdeps/alpha/divqu.S: Likewise.
- * sysdeps/alpha/reml.S: Likewise.
- * sysdeps/alpha/remq.S: Likewise.
- * sysdeps/alpha/remqu.S: Likewise.
-
-2004-05-06 Ulrich Drepper <drepper@redhat.com>
-
- * math/tgmath.h (__TGMATH_UNARY_REAL_IMAG_RET_REAL):Define.
- (cimag): Use it.
- (creal): Likewise.
- * math/Makefile (tests): Add bug-tgmath1.
- * math/bug-tgmath1.c: New file.
-
-2004-05-05 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/alpha/atomicity.h: Remove.
- * sysdeps/generic/atomicity.h: Remove.
-
-2004-05-05 H.J. Lu <hongjiu.lu@intel.com>
-
- * sysdeps/i386/i486/bits/string.h (strpbrk): Cast return to
- char *.
-
-2004-04-22 David Mosberger <davidm@hpl.hp.com>
-
- * sysdeps/unix/sysv/linux/ia64/Makefile (librt-routines): Mention
- rt-sysdep.
- * sysdeps/unix/sysv/linux/ia64/rt-sysdep.S: New file.
-
- * sysdeps/ia64/strcat.c: New file.
- * sysdeps/ia64/strcat.S: Delete.
-
-2004-05-04 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/unix/sysv/linux/ia64/sysdep.S (USE___THREAD): Remove
- defined.
-
-2004-04-23 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/ia64/fpu/libm_support.h (__libm_error_support): Add
- libc_hidden_proto. Define to __GI___libm_error_support for
- assembly going into libc.so.
- * sysdeps/ia64/fpu/libc_libm_error.c (__libm_error_support): Add
- libc_hidden_def.
-
- * include/libc-symbols.h (HIDDEN_BUILTIN_JUMPTARGET): Define.
- * sysdeps/ia64/bcopy.S (bcopy): Use it for jump to memmove.
-
- * sysdeps/unix/sysv/linux/ia64/sysdep.S (__syscall_error): Access
- gprel errno if RTLD_PRIVATE_ERRNO or __thread __libc_errno/errno
- if USE___THREAD.
-
-2004-05-03 Jakub Jelinek <jakub@redhat.com>
-
- * sysdeps/unix/bsd/bits/fcntl.h (F_SETOWN, F_GETOWN): Define if
- __USE_BSD or __USE_UNIX98.
- * sysdeps/unix/bsd/ultrix4/bits/fcntl.h (F_SETOWN, F_GETOWN): Likewise.
- * sysdeps/unix/bsd/bsd4.4/bits/fcntl.h (F_SETOWN, F_GETOWN): Likewise.
- * sysdeps/unix/bsd/sun/sunos4/bits/fcntl.h (F_SETOWN, F_GETOWN):
- Likewise.
- * sysdeps/unix/common/bits/fcntl.h (F_SETOWN, F_GETOWN): Likewise.
- * sysdeps/unix/sysv/aix/bits/fcntl.h (F_SETOWN, F_GETOWN): Likewise.
- * sysdeps/unix/sysv/irix4/bits/fcntl.h (F_SETOWN, F_GETOWN): Likewise.
- * sysdeps/unix/sysv/linux/alpha/bits/fcntl.h (F_SETOWN, F_GETOWN):
- Likewise.
- * sysdeps/unix/sysv/linux/s390/bits/fcntl.h (F_SETOWN, F_GETOWN):
- Likewise.
- * sysdeps/unix/sysv/linux/cris/bits/fcntl.h (F_SETOWN, F_GETOWN):
- Likewise.
- * sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h (F_SETOWN, F_GETOWN):
- Likewise.
- * sysdeps/unix/sysv/linux/sparc/bits/fcntl.h (F_SETOWN, F_GETOWN):
- Likewise.
- * sysdeps/unix/sysv/linux/mips/bits/fcntl.h (F_SETOWN, F_GETOWN):
- Likewise.
- * sysdeps/unix/sysv/linux/sh/bits/fcntl.h (F_SETOWN, F_GETOWN):
- Likewise.
- * sysdeps/unix/sysv/linux/i386/bits/fcntl.h (F_SETOWN, F_GETOWN):
- Likewise.
- * sysdeps/unix/sysv/linux/m68k/bits/fcntl.h (F_SETOWN, F_GETOWN):
- Likewise.
- * sysdeps/unix/sysv/linux/ia64/bits/fcntl.h (F_SETOWN, F_GETOWN):
- Likewise.
- * sysdeps/unix/sysv/linux/arm/bits/fcntl.h (F_SETOWN, F_GETOWN):
- Likewise.
- * sysdeps/unix/sysv/linux/hppa/bits/fcntl.h (F_SETOWN, F_GETOWN):
- Likewise.
- * sysdeps/unix/sysv/linux/x86_64/bits/fcntl.h (F_SETOWN, F_GETOWN):
- Likewise.
- * sysdeps/generic/bits/fcntl.h (F_SETOWN, F_GETOWN): Likewise.
- * sysdeps/mach/hurd/bits/fcntl.h (F_SETOWN, F_GETOWN): Likewise.
- * io/sys/stat.h (S_ISSOCK, S_IFSOCK): Likewise.
-
-2004-05-01 Jakub Jelinek <jakub@redhat.com>
-
- * posix/Versions (libc): Remove __libc_wait, __libc_waitpid,
- __libc_pause, __libc_nanosleep, __libc_pread, __libc_pread64,
- __libc_pwrite64, __waitid and __pselect @@GLIBC_PRIVATE.
- * stdlib/Versions (libc): Remove __on_exit@@GLIBC_PRIVATE.
- * sysdeps/unix/sysv/linux/Versions (libc): Remove
- __libc_sigaction@@GLIBC_PRIVATE.
- * sysdeps/unix/sysv/linux/x86_64/Versions (libc): Remove
- __modify_ldt@@GLIBC_PRIVATE.
- * socket/Versions (libc): Remove __libc_accept, __libc_send,
- __libc_recvfrom, __libc_recvmsg, __libc_sendmsg, __libc_recv,
- __libc_sendto and __libc_connect @@GLIBC_PRIVATE.
- * stdio-common/Versions (libc): Remove
- _itoa_upper_digits@@GLIBC_PRIVATE.
- * resolv/Versions (libresolv): Remove __ns_samename@@GLIBC_PRIVATE.
- * misc/Versions (libc): Remove __libc_fsync, __libc_msync,
- __libc_readv and __libc_writev @@GLIBC_PRIVATE.
- * termios/Versions (libc): Remove __libc_tcdrain@@GLIBC_PRIVATE.
- * io/Versions (libc): Remove __libc_open, __libc_close, __libc_read,
- __libc_write, __libc_lseek, __libc_fcntl, __libc_open64 and
- __libc_lseek64 @@GLIBC_PRIVATE.
-
-2004-04-30 Jakub Jelinek <jakub@redhat.com>
-
- * elf/dl-load.c (open_verify): Move e_phentsize check after e_type
- check.
-
-2004-04-29 Steven Munroe <sjmunroe@us.ibm.com>
-
- * sysdeps/powerpc/powerpc64/fpu/s_ceil.S: New file.
- * sysdeps/powerpc/powerpc64/fpu/s_ceilf.S: New file.
- * sysdeps/powerpc/powerpc64/fpu/s_floor.S: New file.
- * sysdeps/powerpc/powerpc64/fpu/s_floorf.S: New file.
- * sysdeps/powerpc/powerpc64/fpu/s_llrint.c: Removed.
- * sysdeps/powerpc/powerpc64/fpu/s_llrint.S: New file.
- * sysdeps/powerpc/powerpc64/fpu/s_llrintf.c: Removed.
- * sysdeps/powerpc/powerpc64/fpu/s_llrintf.S: New file.
- * sysdeps/powerpc/powerpc64/fpu/s_llround.S: New file.
- * sysdeps/powerpc/powerpc64/fpu/s_llroundf.S: New file.
- * sysdeps/powerpc/powerpc64/fpu/s_lround.S: New file.
- * sysdeps/powerpc/powerpc64/fpu/s_lroundf.S: New file.
- * sysdeps/powerpc/powerpc64/fpu/s_rint.S: New file.
- * sysdeps/powerpc/powerpc64/fpu/s_rintf.S: New file.
- * sysdeps/powerpc/powerpc64/fpu/s_round.S: New file.
- * sysdeps/powerpc/powerpc64/fpu/s_roundf.S: New file.
- * sysdeps/powerpc/powerpc64/fpu/s_trunc.S: New file.
- * sysdeps/powerpc/powerpc64/fpu/s_truncf.S: New file.
-
- * math/test-misc.c [LDBL_MANT_DIG == 106](main): Correct LDBL_MAX
- mantissa for AIX long double format.
- * misc/qefgcvt.c [LDBL_MANT_DIG == 106] (NDIGIT_MAX): Define for AIX
- long double format.
- * misc/qefgcvt_r.c [LDBL_MANT_DIG == 106] (NDIGIT_MAX): Likewise.
- * stdlib/fpioconst.c [!__NO_LONG_DOUBLE_MATH && __LDBL_MAX_EXP__>1024]
- (_fpioconst_pow10): AIX long double format has same exponent range as
- double.
- * stdlib/fpioconst.h [!__NO_LONG_DOUBLE_MATH && __LDBL_MAX_EXP__>1024]
- (LDBL_MAX_10_EXP_LOG): AIX long double format has same exponent range
- as double.
-
-2004-04-23 Art Haas <ahaas@airmail.net>
-
- * sysdeps/unix/sysv/linux/kernel-features.h: Add 'defined'.
-
-2004-04-28 Carlos O'Donell <carlos@baldric.uwo.ca>
-
- * sysdeps/unix/sysv/linux/mq_getattr.c: Include <stddef.h>.
- * sysdeps/unix/sysv/linux/mq_notify.c: Likewise.
- * sysdeps/unix/sysv/linux/mq_open.c: Likewise.
- * sysdeps/unix/sysv/linux/mq_receive.c: Likewise.
- * sysdeps/unix/sysv/linux/mq_send.c: Likewise.
-
-2004-04-29 Philip Blundell <pb@nexus.co.uk>
-
- * sysdeps/arm/dl-machine.h (RTLD_START): Avoid unnecessary GOT
- entries.
-
-2004-04-29 Jakub Jelinek <jakub@redhat.com>
-
- * manual/resource.texi (sched_setaffinity, sched_getaffinity): Fix
- prototypes and description [BZ #131].
-
- * string/bits/string2.h (strpbrk): Cast NULL to char * [BZ #130].
- Patch by Ed Catmur <ed@catmur.co.uk>.
- * string/tst-inlcall.c (main): Add test for strpbrk.
-
- [BZ #140]
- * sysdeps/unix/sysv/linux/sys/sysctl.h: Remove linux/compiler.h
- include.
- (_LINUX_KERNEL_H, _LINUX_TYPES_H, _LINUX_LIST_H): Only define if not
- yet defined, #undef back after including linux/sysctl.h if defined
- here.
- (__LINUX_COMPILER_H, __user): Define if not yet defined, #undef
- back after including linux/sysctl.h if defined here.
-
- * sysdeps/sparc/sparc64/soft-fp/qp_qtoi.c (_Qp_qtoi): Use %f31
- for single precision register, add it to __asm clobbers [BZ #139].
- * sysdeps/sparc/sparc64/soft-fp/qp_qtoui.c (_Qp_qtoui): Use %f31
- for single precision register, add it to __asm clobbers.
- * sysdeps/sparc/sparc64/soft-fp/qp_qtoux.c (_Qp_qtoux): Use fqtox
- instead of fqtoi in QP_HANDLE_EXCEPTIONS.
- * sysdeps/sparc/sparc64/soft-fp/qp_qtox.c (_Qp_qtox): Likewise.
- Reported by M. H. VanLeeuwen <vanl@megsinet.net>.
-
-2004-04-23 Andreas Schwab <schwab@suse.de>
-
- * sysdeps/unix/sysv/linux/m68k/register-dump.h: Use
- __attribute_used__.
-
-2004-04-22 Philip Blundell <philb@gnu.org>
-
- * sysdeps/arm/dl-machine.h (elf_machine_rela): Don't use INTUSE
- when calling _dl_signal_error.
- (elf_machine_rel): Likewise.
-
-2004-04-21 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/unix/sysv/linux/ia64/setjmp.S: Define _GI___sigsetjmp
- and use it internally instead of __sigsetjmp.
- * sysdeps/ia64/fpu/s_frexpf.c (frexpf): Use _GI___libm_frexp_4f.
- * sysdeps/ia64/fpu/s_frexpl.c (frexpl): Use _GI___libm_frexp_4l.
- * sysdeps/ia64/fpu/libm_frexp4.S: Define _GI___libm_frexp_4.
- * sysdeps/ia64/fpu/libm_frexp4f.S: Define _GI___libm_frexp_4f.
- * sysdeps/ia64/fpu/libm_frexp4l.S: Define _GI___libm_frexp_4l.
- * sysdeps/ia64/fpu/s_frexp.c (frexp): Use _GI___libm_frexp_4.
- * sysdeps/ia64/fpu/libm_support.h: Declare _GI___libm_frexp_4,
- _GI___libm_frexp_4f, _GI___libm_frexp_4l.
- * sysdeps/ia64/fpu/bits/mathinline.h: New file.
- * sysdeps/unix/sysv/linux/ia64/__start_context.S: Use
- HIDDEN_JUMPTARGET for exit call.
- * sysdeps/unix/sysv/linux/ia64/clone2.S: Use HIDDEN_JUMPTARGET for
- _exit call.
- * sysdeps/ia64/bcopy.S: Use HIDDEN_JUMPTARGET for memmove call.
- * sysdeps/ia64/strcat.S: Use HIDDEN_JUMPTARGET for strlen and
- strcpy calls.
-
-2004-04-21 Jakub Jelinek <jakub@redhat.com>
-
- * posix/tst-chmod.c (do_test): Fix a typo.
+ * debug/catchsegv.sh: Likewise.
- * elf/lateglobal.c (main): Fix error checks.
- Patch by Stephen Clarke <stephen.clarke@st.com>.
+2005-01-11 Thorsten Kukuk <kukuk@suse.de>
- * manual/ctype.texi (isblank, iswblank): Mark as ISO functions,
- mention they have been added in ISO C99.
- Reported by Ben Pfaff <blp@cs.stanford.edu>.
+ [BZ #733]
+ * sunrpc/svc_tcp.c (svctcp_create): Call listen with SOMAXCONN
+ as backlog.
+ * sunrpc/svc_unix.c (svcunix_create): Likewise.
-2004-03-31 H.J. Lu <hongjiu.lu@intel.com>
+2005-01-10 Jakub Jelinek <jakub@redhat.com>
- * sysdeps/ieee754/ldbl-128/bits/huge_vall.h: Fix typo.
+ [BZ #732]
+ * time/tst-strptime.c (day_tests): Add 2 new tests.
+ (test_tm, main): Issue an error instead of segfaulting if
+ strptime returns NULL.
-2004-04-20 Jakub Jelinek <jakub@redhat.com>
+2005-01-12 GOTO Masanori <gotom@debian.or.jp>
- * sysdeps/unix/sysv/linux/shm_open.c (shm_unlink): Change EPERM into
- EACCES.
+ [BZ #650]
+ * malloc/malloc.h: Always include features.h.
-2004-04-20 Jakub Jelinek <jakub@redhat.com>
+2005-01-10 Ulrich Drepper <drepper@redhat.com>
- * stdio-common/vfscanf.c (_IO_vfscanf): Revert last %% whitespace
- handling change.
- * stdio-common/tst-sscanf.c (int_tests): Adjust.
+ * sysdeps/generic/dl-tls.c (_dl_next_tls_modid): Fix assertion and
+ recognition of last entry.
- * nis/nss-nis.c: Include stdlib.h.
+2005-01-09 Andreas Jaeger <aj@suse.de>
- * sysdeps/sparc/sparc32/dl-machine.h (elf_machine_rela): Shut up a
+ [BZ #732]
+ * time/strptime_l.c (__strptime_internal): Add braces to avoid
warning.
- * sysdeps/sparc/sparc64/memcmp.S (memcmp): Remove BP_SYM () from
- libc_hidden_builtin_def.
-
-2004-04-20 Jim Meyering <jim@meyering.net>
-
- * misc/error.c (error_tail): Don't leak upon realloc failure.
-
-2004-04-20 Martin Schwidefsky <schwidefsky@de.ibm.com>
-
- * sysdeps/unix/sysv/linux/dl-execstack.c (_dl_make_stack_executable):
- Use RETURN_ADDRESS instead of __builtin_return_address.
-
-2004-04-19 Ulrich Drepper <drepper@redhat.com>
-
- * sysdeps/unix/sysv/linux/mq_unlink.c: Rewrite to produce more
- compact code.
-
-2004-04-20 Jakub Jelinek <jakub@redhat.com>
-
- * stdio-common/vfscanf.c (_IO_vfscanf): When skipping whitespace,
- do input_error () instead of conv_error () and don't look at errno.
- Don't eat any whitespace before %% if skip_space == 0.
- * stdio-common/tst-sscanf.c (int_tests): New array.
- (main): Run int_tests.
-
-See ChangeLog.14 for earlier changes.
+2005-01-08 Jakub Jelinek <jakub@redhat.com>
+
+ * elf/Makefile (generated): Add tst-pie1{,.out,.o}.
+
+2005-01-06 Roland McGrath <roland@redhat.com>
+
+ [BZ #633]
+ * sysdeps/unix/sysv/linux/futimes.c (__futimes): Catch errno values
+ indicating file-name lookup errors, and return ENOSYS or EBADF instead.
+
+2005-01-06 Ulrich Drepper <drepper@redhat.com>
+
+ * posix/unistd.h: Declare ftruncate for POSIX 2003. [BZ #640]
+
+2004-12-22 Steven Munroe <sjmunroe@us.ibm.com>
+
+ [BZ #602]
+ * math/libm-test.inc (rint_test_tonearest): New test.
+ (rint_test_towardzero): New test.
+ (rint_test_downward): New test.
+ (rint_test_upward): New test.
+ * sysdeps/powerpc/powerpc32/fpu/s_ceil.S: Fix -0.0 case.
+ Remove redundant const values.
+ * sysdeps/powerpc/powerpc32/fpu/s_ceilf.S: Fix -0.0 case.
+ Remove redundant const values. Use float const.
+ * sysdeps/powerpc/powerpc32/fpu/s_floor.S: Fix -0.0 case.
+ * sysdeps/powerpc/powerpc32/fpu/s_floorf.S: Fix -0.0 case.
+ Use float const.
+ * sysdeps/powerpc/powerpc32/fpu/s_rint.S: Fix -0.0 case.
+ * sysdeps/powerpc/powerpc32/fpu/s_rintf.S: Fix -0.0 case.
+ Use float const.
+ * sysdeps/powerpc/powerpc32/fpu/s_round.S: Fix -0.0 case.
+ Remove redundant const values.
+ * sysdeps/powerpc/powerpc32/fpu/s_roundf.S: Fix -0.0 case.
+ Remove redundant const values. Use float const.
+ * sysdeps/powerpc/powerpc32/fpu/s_trunc.S: Fix -0.0 case.
+ Remove redundant const values.
+ * sysdeps/powerpc/powerpc32/fpu/s_truncf.S: Fix -0.0 case.
+ Remove redundant const values. Use float const.
+ * sysdeps/powerpc/powerpc64/fpu/s_ceil.S: Use EALIGN for Quadword
+ alignment. Fix -0.0 case. Remove redundant const values.
+ * sysdeps/powerpc/powerpc64/fpu/s_ceilf.S: Use EALIGN for Quadword
+ alignment. Fix -0.0 case. Remove redundant const values.
+ Use float const.
+ * sysdeps/powerpc/powerpc64/fpu/s_floor.S: Use EALIGN for Quadword
+ alignment. Fix -0.0 case.
+ * sysdeps/powerpc/powerpc64/fpu/s_floorf.S: Use EALIGN for Quadword
+ alignment. Fix -0.0 case. Use float const.
+ * sysdeps/powerpc/powerpc64/fpu/s_rint.S: Use EALIGN for Quadword
+ alignment. Fix -0.0 case.
+ * sysdeps/powerpc/powerpc64/fpu/s_rintf.S: Use EALIGN for Quadword
+ alignment. Fix -0.0 case. Use float const.
+ * sysdeps/powerpc/powerpc64/fpu/s_round.S: Use EALIGN for Quadword
+ alignment. Fix -0.0 case. Remove redundant const values.
+ * sysdeps/powerpc/powerpc64/fpu/s_roundf.S: Use EALIGN for Quadword
+ alignment. Fix -0.0 case. Remove redundant const values.
+ Use float const.
+ * sysdeps/powerpc/powerpc64/fpu/s_trunc.S: Use EALIGN for Quadword
+ alignment. Fix -0.0 case.
+ * sysdeps/powerpc/powerpc64/fpu/s_truncf.S: Use EALIGN for Quadword
+ alignment. Fix -0.0 case. Remove redundant const values.
+ Use float const.
+
+2004-12-29 Jakub Jelinek <jakub@redhat.com>
+
+ [BZ #592]
+ * sysdeps/ia64/fpu/libm_support.h (__libm_error_support): Use
+ libc_hidden_proto instead of HIDDEN_PROTO.
+ * sysdeps/ia64/fpu/libm-symbols.h (HIDDEN_PROTO): Remove.
+ (__libm_error_support): If ASSEMBLER and in libc, define to
+ HIDDEN_JUMPTARGET(__libm_error_support).
+
+2004-12-28 David Mosberger <davidm@hpl.hp.com>
+
+ [BZ #592]
+ * sysdeps/ia64/fpu/Makefile (duplicated-routines): New macro.
+ (sysdep_routines): Replace libm_ldexp{,f,l} and libm_scalbn{,f,l}
+ with $(duplicated-routines).
+ (libm-sysdep_routines): Likewise, but substitute "s_" prefix for
+ "m_" prefix.
+
+2004-12-27 David Mosberger <davidm@hpl.hp.com>
+
+ [BZ #592]
+ * sysdeps/ia64/fpu/libm-symbols.h: Add include of <sysdep.h> and
+ undefine "ret" macro. Add __libm_error_support hidden definitions.
+
+ [BZ #592]
+ * sysdeps/ia64/fpu/e_lgamma_r.c: Remove CVS-id comment. Add
+ missing portion of copyright statement.
+ * sysdeps/ia64/fpu/e_lgammaf_r.c: Likewise.
+ * sysdeps/ia64/fpu/e_lgammal_r.c: Likewise.
+
+ [BZ #592]
+ * sysdeps/ia64/fpu/w_lgamma.c: Remove CVS-id comment. Add
+ missing portion of copyright statement.
+ (__ieee754_lgamma): Rename from lgamma(). Make lgamma() a weak alias.
+ (__ieee754_gamma): Likewise.
+ * sysdeps/ia64/fpu/w_lgammaf.c: Likewise.
+ * sysdeps/ia64/fpu/w_lgammal.c: Likewise.
+
+2004-12-09 H. J. Lu <hjl@lucon.org>
+
+ [BZ #592]
+ * sysdeps/ia64/fpu/s_nextafterl.c: Remove.
+ * sysdeps/ia64/fpu/s_nexttoward.c: Likewise.
+ * sysdeps/ia64/fpu/s_nexttowardf.c: Likewise.
+ * sysdeps/ia64/fpu/e_atan2l.S: Remove (duplicate of e_atan2l.c).
+ * sysdeps/ia64/fpu/e_expl.S: Likewise.
+ * sysdeps/ia64/fpu/e_logl.c: Remove (conflicts with e_logl.S).
+
+2004-11-18 David Mosberger <davidm@hpl.hp.com>
+
+ [BZ #592]
+ * sysdeps/ia64/fpu/README: New file.
+ * sysdeps/ia64/fpu/gen_import_file_list: New file.
+ * sysdeps/ia64/fpu/import_check: Likewise.
+ * sysdeps/ia64/fpu/import_diffs: Likewise.
+ * sysdeps/ia64/fpu/import_file.awk: Likewise.
+ * sysdeps/ia64/fpu/import_intel_libm: Likewise.
+ * sysdeps/ia64/fpu/libm-symbols.h: Likewise.
+
+ [BZ #592]
+ * sysdeps/ia64/fpu/e_acos.S: Update from Intel libm v2.1+.
+ * sysdeps/ia64/fpu/e_acosf.S: Likewise.
+ * sysdeps/ia64/fpu/e_acosl.S: Likewise.
+ * sysdeps/ia64/fpu/e_asin.S: Likewise.
+ * sysdeps/ia64/fpu/e_asinf.S: Likewise.
+ * sysdeps/ia64/fpu/e_asinl.S: Likewise.
+ * sysdeps/ia64/fpu/e_atan2.S: Likewise.
+ * sysdeps/ia64/fpu/e_atan2f.S: Likewise.
+ * sysdeps/ia64/fpu/e_cosh.S: Likewise.
+ * sysdeps/ia64/fpu/e_coshf.S: Likewise.
+ * sysdeps/ia64/fpu/e_coshl.S: Likewise.
+ * sysdeps/ia64/fpu/e_exp.S: Likewise.
+ * sysdeps/ia64/fpu/e_expf.S: Likewise.
+ * sysdeps/ia64/fpu/e_fmod.S: Likewise.
+ * sysdeps/ia64/fpu/e_fmodf.S: Likewise.
+ * sysdeps/ia64/fpu/e_fmodl.S: Likewise.
+ * sysdeps/ia64/fpu/e_hypot.S: Likewise.
+ * sysdeps/ia64/fpu/e_hypotf.S: Likewise.
+ * sysdeps/ia64/fpu/e_hypotl.S: Likewise.
+ * sysdeps/ia64/fpu/e_log.S: Likewise.
+ * sysdeps/ia64/fpu/e_log2.S: Likewise.
+ * sysdeps/ia64/fpu/e_log2f.S: Likewise.
+ * sysdeps/ia64/fpu/e_log2l.S: Likewise.
+ * sysdeps/ia64/fpu/e_logf.S: Likewise.
+ * sysdeps/ia64/fpu/e_pow.S: Likewise.
+ * sysdeps/ia64/fpu/e_powf.S: Likewise.
+ * sysdeps/ia64/fpu/e_powl.S: Likewise.
+ * sysdeps/ia64/fpu/e_remainder.S: Likewise.
+ * sysdeps/ia64/fpu/e_remainderf.S: Likewise.
+ * sysdeps/ia64/fpu/e_remainderl.S: Likewise.
+ * sysdeps/ia64/fpu/e_scalb.S: Likewise.
+ * sysdeps/ia64/fpu/e_scalbf.S: Likewise.
+ * sysdeps/ia64/fpu/e_scalbl.S: Likewise.
+ * sysdeps/ia64/fpu/e_sinh.S: Likewise.
+ * sysdeps/ia64/fpu/e_sinhf.S: Likewise.
+ * sysdeps/ia64/fpu/e_sinhl.S: Likewise.
+ * sysdeps/ia64/fpu/e_sqrt.S: Likewise.
+ * sysdeps/ia64/fpu/e_sqrtf.S: Likewise.
+ * sysdeps/ia64/fpu/e_sqrtl.S: Likewise.
+ * sysdeps/ia64/fpu/libm_error.c: Likewise.
+ * sysdeps/ia64/fpu/libm_reduce.c: Likewise.
+ * sysdeps/ia64/fpu/libm_support.h: Likewise.
+ * sysdeps/ia64/fpu/s_atan.S: Likewise.
+ * sysdeps/ia64/fpu/s_atanf.S: Likewise.
+ * sysdeps/ia64/fpu/s_atanl.S: Likewise.
+ * sysdeps/ia64/fpu/s_cbrt.S: Likewise.
+ * sysdeps/ia64/fpu/s_cbrtf.S: Likewise.
+ * sysdeps/ia64/fpu/s_cbrtl.S: Likewise.
+ * sysdeps/ia64/fpu/s_ceil.S: Likewise.
+ * sysdeps/ia64/fpu/s_ceilf.S: Likewise.
+ * sysdeps/ia64/fpu/s_ceill.S: Likewise.
+ * sysdeps/ia64/fpu/s_cos.S: Likewise.
+ * sysdeps/ia64/fpu/s_cosf.S: Likewise.
+ * sysdeps/ia64/fpu/s_cosl.S: Likewise.
+ * sysdeps/ia64/fpu/s_expm1.S: Likewise.
+ * sysdeps/ia64/fpu/s_expm1f.S: Likewise.
+ * sysdeps/ia64/fpu/s_expm1l.S: Likewise.
+ * sysdeps/ia64/fpu/s_fabs.S: Likewise.
+ * sysdeps/ia64/fpu/s_fabsf.S: Likewise.
+ * sysdeps/ia64/fpu/s_fabsl.S: Likewise.
+ * sysdeps/ia64/fpu/s_floor.S: Likewise.
+ * sysdeps/ia64/fpu/s_floorf.S: Likewise.
+ * sysdeps/ia64/fpu/s_floorl.S: Likewise.
+ * sysdeps/ia64/fpu/s_frexp.c: Likewise.
+ * sysdeps/ia64/fpu/s_frexpf.c: Likewise.
+ * sysdeps/ia64/fpu/s_frexpl.c: Likewise.
+ * sysdeps/ia64/fpu/s_ilogb.S: Likewise.
+ * sysdeps/ia64/fpu/s_ilogbf.S: Likewise.
+ * sysdeps/ia64/fpu/s_ilogbl.S: Likewise.
+ * sysdeps/ia64/fpu/s_log1p.S: Likewise.
+ * sysdeps/ia64/fpu/s_log1pf.S: Likewise.
+ * sysdeps/ia64/fpu/s_log1pl.S: Likewise.
+ * sysdeps/ia64/fpu/s_logb.S: Likewise.
+ * sysdeps/ia64/fpu/s_logbf.S: Likewise.
+ * sysdeps/ia64/fpu/s_logbl.S: Likewise.
+ * sysdeps/ia64/fpu/s_modf.S: Likewise.
+ * sysdeps/ia64/fpu/s_modff.S: Likewise.
+ * sysdeps/ia64/fpu/s_modfl.S: Likewise.
+ * sysdeps/ia64/fpu/s_nearbyint.S: Likewise.
+ * sysdeps/ia64/fpu/s_nearbyintf.S: Likewise.
+ * sysdeps/ia64/fpu/s_nearbyintl.S: Likewise.
+ * sysdeps/ia64/fpu/s_rint.S: Likewise.
+ * sysdeps/ia64/fpu/s_rintf.S: Likewise.
+ * sysdeps/ia64/fpu/s_rintl.S: Likewise.
+ * sysdeps/ia64/fpu/s_round.S: Likewise.
+ * sysdeps/ia64/fpu/s_roundf.S: Likewise.
+ * sysdeps/ia64/fpu/s_roundl.S: Likewise.
+ * sysdeps/ia64/fpu/s_significand.S: Likewise.
+ * sysdeps/ia64/fpu/s_significandf.S: Likewise.
+ * sysdeps/ia64/fpu/s_significandl.S: Likewise.
+ * sysdeps/ia64/fpu/s_tan.S: Likewise.
+ * sysdeps/ia64/fpu/s_tanf.S: Likewise.
+ * sysdeps/ia64/fpu/s_tanl.S: Likewise.
+ * sysdeps/ia64/fpu/s_trunc.S: Likewise.
+ * sysdeps/ia64/fpu/s_truncf.S: Likewise.
+ * sysdeps/ia64/fpu/s_truncl.S: Likewise.
+
+ [BZ #592]
+ * sysdeps/ia64/fpu/e_acosh.S: New file from Intel libm v2.1+.
+ * sysdeps/ia64/fpu/e_acoshf.S: Likewise.
+ * sysdeps/ia64/fpu/e_acoshl.S: Likewise.
+ * sysdeps/ia64/fpu/e_atanh.S: Likewise.
+ * sysdeps/ia64/fpu/e_atanhf.S: Likewise.
+ * sysdeps/ia64/fpu/e_atanhl.S: Likewise.
+ * sysdeps/ia64/fpu/e_exp10.S: Likewise.
+ * sysdeps/ia64/fpu/e_exp10f.S: Likewise.
+ * sysdeps/ia64/fpu/e_exp10l.S: Likewise.
+ * sysdeps/ia64/fpu/e_exp2.S: Likewise.
+ * sysdeps/ia64/fpu/e_exp2f.S: Likewise.
+ * sysdeps/ia64/fpu/e_exp2l.S: Likewise.
+ * sysdeps/ia64/fpu/e_lgamma_r.S: Likewise.
+ * sysdeps/ia64/fpu/e_lgammaf_r.S: Likewise.
+ * sysdeps/ia64/fpu/e_lgammal_r.S: Likewise.
+ * sysdeps/ia64/fpu/e_logl.S: Likewise.
+ * sysdeps/ia64/fpu/libm_frexp.S: Likewise.
+ * sysdeps/ia64/fpu/libm_frexpf.S: Likewise.
+ * sysdeps/ia64/fpu/libm_frexpl.S: Likewise.
+ * sysdeps/ia64/fpu/s_libm_ldexp.S: Likewise.
+ * sysdeps/ia64/fpu/s_libm_ldexpf.S: Likewise.
+ * sysdeps/ia64/fpu/s_libm_ldexpl.S: Likewise.
+ * sysdeps/ia64/fpu/s_libm_scalbn.S: Likewise.
+ * sysdeps/ia64/fpu/s_libm_scalbnf.S: Likewise.
+ * sysdeps/ia64/fpu/s_libm_scalbnl.S: Likewise.
+ * sysdeps/ia64/fpu/libm_lgamma.S: Likewise.
+ * sysdeps/ia64/fpu/libm_lgammaf.S: Likewise.
+ * sysdeps/ia64/fpu/libm_lgammal.S: Likewise.
+ * sysdeps/ia64/fpu/libm_sincos.S: Likewise.
+ * sysdeps/ia64/fpu/libm_sincos_large.S: Likewise.
+ * sysdeps/ia64/fpu/libm_sincosf.S: Likewise.
+ * sysdeps/ia64/fpu/libm_sincosl.S: Likewise.
+ * sysdeps/ia64/fpu/libm_scalblnf.S: Likewise.
+ * sysdeps/ia64/fpu/s_asinh.S: Likewise.
+ * sysdeps/ia64/fpu/s_asinhf.S: Likewise.
+ * sysdeps/ia64/fpu/s_asinhl.S: Likewise.
+ * sysdeps/ia64/fpu/s_erf.S: Likewise.
+ * sysdeps/ia64/fpu/s_erfc.S: Likewise.
+ * sysdeps/ia64/fpu/s_erfcf.S: Likewise.
+ * sysdeps/ia64/fpu/s_erfcl.S: Likewise.
+ * sysdeps/ia64/fpu/s_erff.S: Likewise.
+ * sysdeps/ia64/fpu/s_erfl.S: Likewise.
+ * sysdeps/ia64/fpu/s_fdim.S: Likewise.
+ * sysdeps/ia64/fpu/s_fdimf.S: Likewise.
+ * sysdeps/ia64/fpu/s_fdiml.S: Likewise.
+ * sysdeps/ia64/fpu/s_fma.S: Likewise.
+ * sysdeps/ia64/fpu/s_fmaf.S: Likewise.
+ * sysdeps/ia64/fpu/s_fmal.S: Likewise.
+ * sysdeps/ia64/fpu/s_fmax.S: Likewise.
+ * sysdeps/ia64/fpu/s_fmaxf.S: Likewise.
+ * sysdeps/ia64/fpu/s_fmaxl.S: Likewise.
+ * sysdeps/ia64/fpu/s_ldexp.c: Likewise.
+ * sysdeps/ia64/fpu/s_ldexpf.c: Likewise.
+ * sysdeps/ia64/fpu/s_ldexpl.c: Likewise.
+ * sysdeps/ia64/fpu/s_nextafter.S: Likewise.
+ * sysdeps/ia64/fpu/s_nextafterf.S: Likewise.
+ * sysdeps/ia64/fpu/s_nextafterl.S: Likewise.
+ * sysdeps/ia64/fpu/s_nexttoward.S: Likewise.
+ * sysdeps/ia64/fpu/s_nexttowardf.S: Likewise.
+ * sysdeps/ia64/fpu/s_nexttowardl.S: Likewise.
+ * sysdeps/ia64/fpu/s_tanh.S: Likewise.
+ * sysdeps/ia64/fpu/s_tanhf.S: Likewise.
+ * sysdeps/ia64/fpu/s_tanhl.S: Likewise.
+ * sysdeps/ia64/fpu/s_scalblnf.c: Likewise.
+ * sysdeps/ia64/fpu/w_lgamma.c: Likewise.
+ * sysdeps/ia64/fpu/w_lgammaf.c: Likewise.
+ * sysdeps/ia64/fpu/w_lgammal.c: Likewise.
+ * sysdeps/ia64/fpu/w_tgamma.S: Likewise.
+ * sysdeps/ia64/fpu/w_tgammaf.S: Likewise.
+ * sysdeps/ia64/fpu/w_tgammal.S: Likewise.
+
+ [BZ #592]
+ * sysdeps/ia64/fpu/e_gamma_r.c: New empty dummy-file.
+ * sysdeps/ia64/fpu/e_gammaf_r.c: Likewise.
+ * sysdeps/ia64/fpu/e_gammal_r.c: Likewise.
+ * sysdeps/ia64/fpu/w_acosh.c: Likewise.
+ * sysdeps/ia64/fpu/w_acoshf.c: Likewise.
+ * sysdeps/ia64/fpu/w_acoshl.c: Likewise.
+ * sysdeps/ia64/fpu/w_atanh.c: Likewise.
+ * sysdeps/ia64/fpu/w_atanhf.c: Likewise.
+ * sysdeps/ia64/fpu/w_atanhl.c: Likewise.
+ * sysdeps/ia64/fpu/w_exp10.c: Likewise.
+ * sysdeps/ia64/fpu/w_exp10f.c: Likewise.
+ * sysdeps/ia64/fpu/w_exp10l.c: Likewise.
+ * sysdeps/ia64/fpu/w_exp2.c: Likewise.
+ * sysdeps/ia64/fpu/w_exp2f.c: Likewise.
+ * sysdeps/ia64/fpu/w_exp2l.c: Likewise.
+ * sysdeps/ia64/fpu/w_expl.c: Likewise.
+ * sysdeps/ia64/fpu/e_expl.S: Likewise.
+ * sysdeps/ia64/fpu/w_lgamma_r.c: Likewise.
+ * sysdeps/ia64/fpu/w_lgammaf_r.c: Likewise.
+ * sysdeps/ia64/fpu/w_lgammal_r.c: Likewise.
+ * sysdeps/ia64/fpu/w_log2.c: Likewise.
+ * sysdeps/ia64/fpu/w_log2f.c: Likewise.
+ * sysdeps/ia64/fpu/w_log2l.c: Likewise.
+ * sysdeps/ia64/fpu/w_sinh.c: Likewise.
+ * sysdeps/ia64/fpu/w_sinhf.c: Likewise.
+ * sysdeps/ia64/fpu/w_sinhl.c: Likewise.
+
+ [BZ #592]
+ * sysdeps/ia64/fpu/libm_atan2_reg.S: Remove.
+ * sysdeps/ia64/fpu/s_ldexp.S: Likewise.
+ * sysdeps/ia64/fpu/s_ldexpf.S: Likewise.
+ * sysdeps/ia64/fpu/s_ldexpl.S: Likewise.
+ * sysdeps/ia64/fpu/s_scalbn.S: Likewise.
+ * sysdeps/ia64/fpu/s_scalbnf.S: Likewise.
+ * sysdeps/ia64/fpu/s_scalbnl.S: Likewise.
+
+ [BZ #592]
+ * sysdeps/ia64/fpu/s_sincos.c: Make it an empty dummy-file.
+ * sysdeps/ia64/fpu/s_sincosf.c: Likewise.
+ * sysdeps/ia64/fpu/s_sincosl.c: Likewise.
+
+ [BZ #592]
+ * sysdeps/ia64/fpu/e_atan2l.S: Add "Not needed" comment.
+
+ [BZ #592]
+ * sysdeps/ia64/fpu/s_copysign.S: Add __libm_copysign{,f,l}
+ alias for use by libm_error.c
+
+ [BZ #592]
+ * sysdeps/ia64/fpu/Makefile (libm-sysdep_routines): Remove
+ libm_atan2_reg, libm_tan, libm_frexp4{f,l}.
+ Mention s_erfc{,f,l}, libm_frexp{,f,l}, libm_ldexp{,f,l},
+ libm_sincos{,f,l}, libm_sincos_large, libm_lgamma{,f,l},
+ libm_scalbn{,f,l}, libm_scalblnf.
+ (sysdep_routines): Remove libm_frexp4{,f,l}.
+ Mention libm_frexp{,f,l}, libm_ldexp{,f,l}, and libm_scalbn{,f,l}.
+ (sysdep-CPPFLAGS): Add -include libm-symbols.h, -D__POSIX__,
+ _D_LIB_VERSIONIMF=_LIB_VERSION, -DSIZE_LONG_INT_64, and
+ -DSIZE_LONG_LONG_INT_64.
+
+2005-01-05 Steven Munroe <sjmunroe@us.ibm.com>
+
+ [BZ #729]
+ * elf/rtld.c (dl_main) [NEED_DL_SYSINFO_DSO]: Insure l_map_end and
+ l_text_end are set for a VDSO with a single PT_LOAD entry.
+
+2005-01-05 Ulrich Drepper <drepper@redhat.com>
+
+ [BZ #730]
+ * libio/iofopncook.c (_IO_cookie_seekoff): Define. Mark offset as
+ invalid to disable optimizations in fileops which won't work here.
+ (_IO_cookie_jumps): Use it.
+ (_IO_old_cookie_jumps): Likewise.
+ * libio/fmemopen.c (fmemopen_seek): Result must be returned in *P,
+ not the return value.
+ * stdio-common/Makefile (tests): Add tst-fmemopen2.
+ * stdio-common/tst-fmemopen2.c: New file.
+
+2005-01-03 Ulrich Drepper <drepper@redhat.com>
+
+ [BZ #727]
+ * sysdeps/generic/libc-start.c [SHARED] (__libc_start_main): Don't
+ initialize __environ again.
+ * stdlib/Makefile: Add rules to build and run tst-putenv.
+ * stdlib/tst-putenv.c: New file.
+ * stdlib/tst-putenvmod.c: New file.
+
+2005-01-03 Andreas Jaeger <aj@suse.de>
+
+ * csu/Makefile (generated): Add start.os and start.ob.
+
+2004-12-29 Roland McGrath <roland@redhat.com>
+
+ [BZ #626]
+ * sysdeps/unix/alarm.c (alarm): Round return value to nearest rather
+ than always up; when nearest is zero, round up to one.
+
+2004-12-28 Ulrich Drepper <drepper@redhat.com>
+
+ [BZ #726]
+ * sysdeps/generic/dl-tls.c (__tls_get_addr): Fix typo.
+
+2004-12-27 Ulrich Drepper <drepper@redhat.com>
+
+ [BZ #744]
+ * include/signal.h: Define __sigemptyset.
+
+2004-12-21 Jakub Jelinek <jakub@redhat.com>
+
+ [BZ #723]
+ * sysdeps/unix/sysv/linux/i386/clone.S (__clone): Make sure %esp when
+ calling fn is 16 byte aligned.
+ * sysdeps/i386/tst-stack-align.h: New file.
+
+ [BZ #725]
+ * misc/efgcvt_r.c (FLOAT_MIN_10_EXP, FLOAT_MIN_10_NORM): Define.
+ (ecvt_r): Special case denormals.
+ * misc/qefgcvt_r.c (FLOAT_MIN_10_EXP, FLOAT_MIN_10_NORM): Define.
+ * misc/tst-efgcvt.c: Include float.h.
+ (ecvt_tests): Add 2 new tests.
+
+
+See ChangeLog.15 for earlier changes.
diff --git a/ChangeLog.15 b/ChangeLog.15
index 5d9b00d42a..055027a3ef 100644
--- a/ChangeLog.15
+++ b/ChangeLog.15
@@ -512,6 +512,14 @@
2004-11-26 Jakub Jelinek <jakub@redhat.com>
+ * posix/Makefile (generated: Add getconf.speclist.
+ ($(inst_libexecdir)/getconf): Use getconf.speclist instead of
+ getconf output.
+ ($(objpfx)getconf.speclist): New rule.
+ * posix/getconf.speclist.h: New file.
+
+2004-11-26 Jakub Jelinek <jakub@redhat.com>
+
* posix/Makefile (install-others): Add $(inst_libexecdir)/getconf.
(CFLAGS-sysconf.c): Add -D_GETCONF_DIR.
(CFLAGS-getconf.c): New.
@@ -1103,6 +1111,16 @@
* sysdeps/generic/tempname.c (__path_search): Add missing argument
TRY_TMPDIR.
+2004-11-02 Jakub Jelinek <jakub@redhat.com>
+
+ * include/features.h (__USE_FORTIFY_LEVEL): Also set for Red Hat
+ GCC 3.4.x-RH >= 3.4.2-8.
+ * libio/bits/features.h (printf, fprintf, vprintf, vfprintf): For
+ GCC 3.4.x-RH use __builtin___{,v}{,f}printf_chk instead of
+ __{,v}{,f}printf_chk.
+ * debug/tst-chk1.c (do_test): Deal with GCC 3.4.x-RH not
+ being able to recognize subobjects.
+
2004-10-31 Mariusz Mazur <mmazur@kernel.pl>
* sysdeps/unix/sysv/linux/alpha/setregid.c: New file.
@@ -1398,6 +1416,11 @@
* posix/execvp.c (execvp): Also ignore ENODEV and ETIMEDOUT errno
values.
+2004-10-20 Jakub Jelinek <jakub@redhat.com>
+
+ * sysdeps/unix/sysv/linux/readonly-area.c (__readonly_area): If /proc
+ is not mounted, return 1.
+
2004-10-20 Roland McGrath <roland@redhat.com>
* Makeconfig ($(common-objpfx)shlib-versions.v.i): Check also
@@ -1470,6 +1493,11 @@
* debug/catchsegv.sh: Update copyright year.
Use mktemp to create segv_output file.
+2004-10-19 Jakub Jelinek <jakub@redhat.com>
+
+ * include/features.h (__USE_FORTIFY_LEVEL): Enable even with
+ Red Hat gcc4 4.0.0 and above.
+
2004-10-18 Jakub Jelinek <jakub@redhat.com>
* elf/dl-libc.c (__libc_dlsym_private, __libc_register_dl_open_hook):
@@ -2453,6 +2481,22 @@
* string/string.h: Add __nonnull annotations.
* stdlib/stdlib.h: Likewise.
+2004-09-20 Jakub Jelinek <jakub@redhat.com>
+
+ * sysdeps/unix/alpha/sysdep.h (inline_syscall[0-6]): Change name
+ argument to numbers from syscall names.
+ (INLINE_SYSCALL1): Pass __NR_##name to inline_syscall##nr.
+ (INTERNAL_SYSCALL_NCS): Renamed from...
+ (INTERNAL_SYSCALL_1): ... this. Use INTERNAL_SYSCALL_NCS.
+ * sysdeps/unix/sysv/linux/s390/s390-32/sysdep.h
+ (INTERNAL_SYSCALL_NCS): Define.
+ * sysdeps/unix/sysv/linux/s390/s390-64/sysdep.h
+ (INTERNAL_SYSCALL_NCS): Likewise.
+ * sysdeps/unix/sysv/linux/sparc/sysdep.h (inline_syscall[0-6]):
+ Change name argument to numbers from syscall names.
+ (INLINE_SYSCALL, INTERNAL_SYSCALL): Adjust.
+ (INTERNAL_SYSCALL_NCS): Define.
+
2004-09-20 H.J. Lu <hongjiu.lu@intel.com>
* sysdeps/unix/sysv/linux/ia64/sysdep.h (DO_INLINE_SYSCALL):
@@ -3182,6 +3226,23 @@
before return type.
* locale/localename.c (__current_locale_name): Likewise.
+2004-08-31 Jakub Jelinek <jakub@redhat.com>
+
+ * elf/ldconfig.c (parse_conf): Add prefix argument, prepend it
+ before arguments to add_dir and pass to parse_conf_include.
+ (parse_conf_include): Add prefix argument, pass it down to
+ parse_conf.
+ (main): Call arch_startup. Adjust parse_conf caller.
+ Call add_arch_dirs.
+ * sysdeps/generic/dl-cache.h (arch_startup, add_arch_dirs): Define.
+ * sysdeps/unix/sysv/linux/i386/dl-cache.h: New file.
+ * sysdeps/unix/sysv/linux/ia64/dl-cache.h (EMUL_HACK, arch_startup,
+ add_arch_dirs): Define.
+ * sysdeps/unix/sysv/linux/ia64/ldd-rewrite.sed: Prepend
+ /emul/ia32-linux before the 32-bit ld.so pathname.
+ * sysdeps/unix/sysv/linux/ia64/dl-procinfo.c: New file.
+ * sysdeps/unix/sysv/linux/ia64/dl-procinfo.h: New file.
+
2004-08-30 Roland McGrath <roland@frob.com>
* scripts/extract-abilist.awk: If `lastversion' variable defined, omit
@@ -3338,6 +3399,22 @@
* resolv/nss_dns/dns-canon.c (_nss_dns_getcanonname_r): Initialize
status to NSS_STATUS_UNAVAIL.
+2004-08-19 Jakub Jelinek <jakub@redhat.com>
+
+ * sysdeps/powerpc/powerpc64/configure.in: New file.
+ * sysdeps/powerpc/powerpc64/configure: Rebuilt.
+ * config.h.in (USE_PPC64_OVERLAPPING_OPD): Add.
+ * configure.in (HAVE_ASM_GLOBAL_DOT_NAME): Remove.
+ * configure: Rebuilt.
+ * sysdeps/powerpc/powerpc64/sysdep.h: Formatting.
+ (OPD_ENT, BODY_LABEL, ENTRY_1, ENTRY_2, END_2, DOT_PREFIX,
+ BODY_PREFIX): Define.
+ (ENTRY, DOT_LABEL, END, TRACEBACK, END_GEN_TB, EALIGN): Support
+ HAVE_ASM_GLOBAL_DOT_NAME or no dot symbols,
+ USE_PPC64_OVERLAPPING_OPD or never overlapping .opd entries.
+ * sysdeps/powerpc/powerpc64/dl-machine.h: Include sysdep.h.
+ (TRAMPOLINE_TEMPLATE, RTLD_START): Use the new sysdep.h macros.
+
2004-08-19 Ulrich Drepper <drepper@redhat.com>
* sysdeps/posix/getaddrinfo.c (gaih_inet): Use h->h_name in the
@@ -3642,6 +3719,12 @@
* iconvdata/testdata/ISO-2022-JP-3: Regenerated.
+2004-07-23 Jakub Jelinek <jakub@redhat.com>
+
+ [BZ #284]
+ * include/features.h (_POSIX_SOURCE, _POSIX_C_SOURCE): Define
+ if _XOPEN_SOURCE >= 500 even if __STRICT_ANSI__ is defined.
+
2004-08-10 Alfred M. Szmidt <ams@kemisten.nu>
* sysdeps/generic/bits/in.h (struct ip_mreq): Remove definition.
diff --git a/Makeconfig b/Makeconfig
index d267e5af21..1a9ad873e5 100644
--- a/Makeconfig
+++ b/Makeconfig
@@ -480,32 +480,21 @@ default-rpath = $(libdir)
endif
ifndef link-extra-libs
-ifeq (yes,$(build-shared))
-ifneq ($(common-objpfx),$(objpfx))
-link-extra-libs = $(foreach lib,$(LDLIBS-$(@F)),\
- $(wildcard $(common-objpfx)$(lib).so$($(notdir $(lib)).so-version) \
- $(objpfx)$(lib).so$($(notdir $(lib)).so-version)))
-else
-link-extra-libs = $(foreach lib,$(LDLIBS-$(@F)),$(common-objpfx)$(lib).so$($(notdir $(lib)).so-version))
-endif
-else
-link-extra-libs = $(foreach lib,$(LDLIBS-$(@F)),$(common-objpfx)$(lib).a)
-endif
+link-extra-libs = $(LDLIBS-$(@F))
+link-extra-libs-static = $(link-extra-libs)
+link-extra-libs-bounded = $(link-extra-libs)
endif
# The static libraries.
ifeq (yes,$(build-static))
link-libc-static = $(common-objpfx)libc.a $(static-gnulib) $(common-objpfx)libc.a
-link-extra-libs-static = $(foreach lib,$(LDLIBS-$(@F)),$(common-objpfx)$(lib).a)
else
ifeq (yes,$(build-shared))
# We can try to link the programs with lib*_pic.a...
link-libc-static = $(static-gnulib) $(common-objpfx)libc_pic.a
-link-extra-libs-static = $(link-extra-libs)
endif
endif
link-libc-bounded = $(common-objpfx)libc_b.a $(gnulib) $(common-objpfx)libc_b.a
-link-extra-libs-bounded = $(foreach lib,$(LDLIBS-$(@F:%-bp=%)),$(common-objpfx)$(lib)_b.a)
ifndef gnulib
ifneq ($(have-cc-with-libunwind),yes)
diff --git a/catgets/gencat.c b/catgets/gencat.c
index 2f6c81dc1d..fa482567b4 100644
--- a/catgets/gencat.c
+++ b/catgets/gencat.c
@@ -248,7 +248,7 @@ print_version (FILE *stream, struct argp_state *state)
Copyright (C) %s Free Software Foundation, Inc.\n\
This is free software; see the source for copying conditions. There is NO\n\
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
-"), "2004");
+"), "2005");
fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
}
diff --git a/csu/Makefile b/csu/Makefile
index fbbfe0050a..5267bff452 100644
--- a/csu/Makefile
+++ b/csu/Makefile
@@ -54,11 +54,13 @@ include ../Makeconfig
ifeq (yes,$(build-shared))
extra-objs += S$(start-installed-name)
install-lib += S$(start-installed-name)
+generated += start.os
endif
ifeq (yes,$(build-bounded))
extra-objs += b$(start-installed-name)
install-lib += b$(start-installed-name)
+generated += start.ob
endif
ifneq ($(start-installed-name),$(static-start-installed-name))
diff --git a/csu/version.c b/csu/version.c
index 1104fa9137..606246a6e1 100644
--- a/csu/version.c
+++ b/csu/version.c
@@ -25,7 +25,7 @@ static const char __libc_version[] = VERSION;
static const char banner[] =
"GNU C Library "RELEASE" release version "VERSION", by Roland McGrath et al.\n\
-Copyright (C) 2004 Free Software Foundation, Inc.\n\
+Copyright (C) 2005 Free Software Foundation, Inc.\n\
This is free software; see the source for copying conditions.\n\
There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A\n\
PARTICULAR PURPOSE.\n\
diff --git a/debug/catchsegv.sh b/debug/catchsegv.sh
index 14556f712a..f7e79bce42 100755
--- a/debug/catchsegv.sh
+++ b/debug/catchsegv.sh
@@ -39,7 +39,7 @@ if test $# -eq 0; then
;;
--v | --ve | --ver | --vers | --versi | --versio | --version)
echo 'catchsegv (GNU libc) @VERSION@'
- echo 'Copyright (C) 2004 Free Software Foundation, Inc.
+ echo 'Copyright (C) 2005 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
Written by Ulrich Drepper.'
diff --git a/debug/xtrace.sh b/debug/xtrace.sh
index 4ce8888490..811d6ba9a6 100755
--- a/debug/xtrace.sh
+++ b/debug/xtrace.sh
@@ -1,5 +1,5 @@
#! @BASH@
-# Copyright (C) 1999, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+# Copyright (C) 1999, 2001-2004, 2005 Free Software Foundation, Inc.
# This file is part of the GNU C Library.
# Contributed by Ulrich Drepper <drepper@gnu.org>, 1999.
@@ -64,7 +64,7 @@ do_version() {
printf $"Copyright (C) %s Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-" "2004"
+" "2005"
printf $"Written by %s.
" "Ulrich Drepper"
exit 0
diff --git a/dirent/tst-seekdir.c b/dirent/tst-seekdir.c
index b833c30705..43808fecb5 100644
--- a/dirent/tst-seekdir.c
+++ b/dirent/tst-seekdir.c
@@ -11,8 +11,23 @@ main (int argc, char *argv[])
int i = 0;
int result = 0;
struct dirent *dp;
+ long int save0;
+ long int rewind;
dirp = opendir (".");
+ if (dirp == NULL)
+ {
+ printf ("opendir failed: %m\n");
+ return 1;
+ }
+
+ save0 = telldir (dirp);
+ if (save0 == -1)
+ {
+ printf ("telldir failed: %m\n");
+ result = 1;
+ }
+
for (dp = readdir (dirp); dp != NULL; dp = readdir (dirp))
{
/* save position 3 (after fourth entry) */
@@ -44,6 +59,19 @@ main (int argc, char *argv[])
for (dp = readdir (dirp); dp != NULL; dp = readdir (dirp))
printf ("%s\n", dp->d_name);
+ /* Check rewinddir */
+ rewinddir (dirp);
+ rewind = telldir (dirp);
+ if (rewind == -1)
+ {
+ printf ("telldir failed: %m\n");
+ result = 1;
+ }
+ else if (save0 != rewind)
+ {
+ printf ("rewinddir didn't reset directory stream\n");
+ result = 1;
+ }
closedir (dirp);
return result;
diff --git a/elf/Makefile b/elf/Makefile
index 028be25b2d..72a037f8e3 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -72,7 +72,7 @@ distribute := rtld-Rules \
tst-tlsmod1.c tst-tlsmod2.c tst-tlsmod3.c tst-tlsmod4.c \
tst-tlsmod5.c tst-tlsmod6.c tst-tlsmod7.c tst-tlsmod8.c \
tst-tlsmod9.c tst-tlsmod10.c tst-tlsmod11.c \
- tst-tlsmod12.c tst-tls10.h tst-alignmod.c \
+ tst-tlsmod12.c tst-tls10.h tst-alignmod.c tst-alignmod2.c \
circlemod1.c circlemod1a.c circlemod2.c circlemod2a.c \
circlemod3.c circlemod3a.c nodlopenmod2.c \
tls-macros.h \
@@ -153,7 +153,7 @@ tests += loadtest restest1 preloadtest loadfail multiload origtest resolvfail \
restest2 next dblload dblunload reldep5 reldep6 reldep7 reldep8 \
circleload1 tst-tls3 tst-tls4 tst-tls5 tst-tls6 tst-tls7 tst-tls8 \
tst-tls10 tst-tls11 tst-tls12 tst-tls13 tst-tls14 tst-align \
- $(tests-execstack-$(have-z-execstack)) tst-dlmodcount \
+ tst-align2 $(tests-execstack-$(have-z-execstack)) tst-dlmodcount \
tst-dlopenrpath tst-deep1 tst-dlmopen1 tst-dlmopen2 tst-dlmopen3
# reldep9
test-srcs = tst-pathopt
@@ -186,7 +186,8 @@ modules-names = testobj1 testobj2 testobj3 testobj4 testobj5 testobj6 \
circlemod3 circlemod3a \
reldep8mod1 reldep8mod2 reldep8mod3 \
reldep9mod1 reldep9mod2 reldep9mod3 \
- tst-alignmod $(modules-execstack-$(have-z-execstack)) \
+ tst-alignmod tst-alignmod2 \
+ $(modules-execstack-$(have-z-execstack)) \
tst-dlopenrpathmod tst-deep1mod1 tst-deep1mod2 tst-deep1mod3 \
tst-dlmopen1mod
ifeq (yes,$(have-initfini-array))
@@ -668,9 +669,12 @@ $(objpfx)tst-tls14: $(objpfx)tst-tlsmod14a.so $(libdl)
$(objpfx)tst-tls14.out:$(objpfx)tst-tlsmod14b.so
CFLAGS-tst-align.c = $(stack-align-test-flags)
+CFLAGS-tst-align2.c = $(stack-align-test-flags)
CFLAGS-tst-alignmod.c = $(stack-align-test-flags)
+CFLAGS-tst-alignmod2.c = $(stack-align-test-flags)
$(objpfx)tst-align: $(libdl)
$(objpfx)tst-align.out: $(objpfx)tst-alignmod.so
+$(objpfx)tst-align2: $(objpfx)tst-alignmod2.so
ifdef libdl
$(objpfx)tst-tls9-static: $(common-objpfx)dlfcn/libdl.a
@@ -732,6 +736,8 @@ $(objpfx)tst-pie1: $(objpfx)tst-pie1.o $(objpfx)tst-piemod1.so
-L$(subst :, -L,$(rpath-link)) -Wl,-rpath-link=$(rpath-link) \
-o $@ $(objpfx)tst-pie1.o $(objpfx)tst-piemod1.so \
$(common-objpfx)libc_nonshared.a
+
+generated += tst-pie1 tst-pie1.out tst-pie1.o
endif
check-textrel-CFLAGS = -O -Wall -D_XOPEN_SOURCE=600 -D_BSD_SOURCE
diff --git a/elf/dl-load.c b/elf/dl-load.c
index eb1a7919fb..ab1080e51f 100644
--- a/elf/dl-load.c
+++ b/elf/dl-load.c
@@ -36,6 +36,7 @@
#include <dl-osinfo.h>
#include <stackinfo.h>
#include <caller.h>
+#include <sysdep.h>
#include <dl-dst.h>
@@ -105,13 +106,11 @@ ELF_PREFERRED_ADDRESS_DATA;
int __stack_prot attribute_hidden attribute_relro
- = (PROT_READ|PROT_WRITE
#if _STACK_GROWS_DOWN && defined PROT_GROWSDOWN
- |PROT_GROWSDOWN
+ = PROT_GROWSDOWN;
#elif _STACK_GROWS_UP && defined PROT_GROWSUP
- |PROT_GROWSUP
+ = PROT_GROWSUP;
#endif
- );
/* Type for the buffer we put the ELF header and hopefully the program
@@ -1327,12 +1326,16 @@ cannot allocate TLS data structures for initial thread");
if (__builtin_expect (__check_caller (RETURN_ADDRESS (0),
allow_ldso|allow_libc) == 0,
0))
- __stack_prot |= PROT_EXEC;
+ __stack_prot |= PROT_READ|PROT_WRITE|PROT_EXEC;
__mprotect ((void *) p, s, PROT_READ);
}
else
#endif
- __stack_prot |= PROT_EXEC;
+ __stack_prot |= PROT_READ|PROT_WRITE|PROT_EXEC;
+
+#ifdef check_consistency
+ check_consistency ();
+#endif
errval = (*GL(dl_make_stack_executable_hook)) (stack_endp);
if (errval)
@@ -1788,7 +1791,12 @@ open_path (const char *name, size_t namelen, int preloaded,
must not be freed using the general free() in libc. */
if (sps->malloced)
free (sps->dirs);
- sps->dirs = (void *) -1;
+#ifdef HAVE_Z_RELRO
+ /* rtld_search_dirs is attribute_relro, therefore avoid writing
+ into it. */
+ if (sps != &rtld_search_dirs)
+#endif
+ sps->dirs = (void *) -1;
}
return -1;
diff --git a/elf/ldconfig.c b/elf/ldconfig.c
index f8504fb95e..d7075ccdc9 100644
--- a/elf/ldconfig.c
+++ b/elf/ldconfig.c
@@ -279,7 +279,7 @@ print_version (FILE *stream, struct argp_state *state)
Copyright (C) %s Free Software Foundation, Inc.\n\
This is free software; see the source for copying conditions. There is NO\n\
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
-"), "2004");
+"), "2005");
fprintf (stream, gettext ("Written by %s.\n"),
"Andreas Jaeger");
}
diff --git a/elf/ldd.bash.in b/elf/ldd.bash.in
index 4d7c33c728..4587efd1cb 100644
--- a/elf/ldd.bash.in
+++ b/elf/ldd.bash.in
@@ -1,5 +1,5 @@
#! @BASH@
-# Copyright (C) 1996-2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+# Copyright (C) 1996-2004, 2005 Free Software Foundation, Inc.
# This file is part of the GNU C Library.
# The GNU C Library is free software; you can redistribute it and/or
@@ -39,7 +39,7 @@ while test $# -gt 0; do
printf $"Copyright (C) %s Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-" "2004"
+" "2005"
printf $"Written by %s and %s.
" "Roland McGrath" "Ulrich Drepper"
exit 0
diff --git a/elf/rtld.c b/elf/rtld.c
index ee7291477f..55a2b8a9ea 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -1412,9 +1412,9 @@ ERROR: ld.so: object '%s' from %s cannot be preloaded: ignored.\n",
{
if (! l->l_addr)
l->l_addr = ph->p_vaddr;
- else if (ph->p_vaddr + ph->p_memsz >= l->l_map_end)
+ if (ph->p_vaddr + ph->p_memsz >= l->l_map_end)
l->l_map_end = ph->p_vaddr + ph->p_memsz;
- else if ((ph->p_flags & PF_X)
+ if ((ph->p_flags & PF_X)
&& ph->p_vaddr + ph->p_memsz >= l->l_text_end)
l->l_text_end = ph->p_vaddr + ph->p_memsz;
}
diff --git a/elf/sprof.c b/elf/sprof.c
index afe3955ad8..686a003bb0 100644
--- a/elf/sprof.c
+++ b/elf/sprof.c
@@ -357,7 +357,7 @@ Copyright (C) %s Free Software Foundation, Inc.\n\
This is free software; see the source for copying conditions. There is NO\n\
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
"),
- "2004");
+ "2005");
fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
}
diff --git a/fedora/branch.mk b/fedora/branch.mk
index 20986645ab..702d5f0c23 100644
--- a/fedora/branch.mk
+++ b/fedora/branch.mk
@@ -1,5 +1,7 @@
# This file is updated automatically by Makefile.
-glibc-branch := fedora
-glibc-base := HEAD
-fedora-sync-date := 2004-12-19 23:31 UTC
-fedora-sync-tag := fedora-glibc-20041219T2331
+glibc-branch := fedora-2_3
+glibc-base := glibc-2_3-branch
+DIST_BRANCH := FC-3
+COLLECTION := dist-fc3-updates-candidate
+fedora-2_3-sync-date := 2005-02-16 12:56 UTC
+fedora-2_3-sync-tag := fedora-glibc-2_3-20050216T1256
diff --git a/fedora/glibc.spec.in b/fedora/glibc.spec.in
index 1a9a960012..34659db32e 100644
--- a/fedora/glibc.spec.in
+++ b/fedora/glibc.spec.in
@@ -1,4 +1,4 @@
-%define glibcrelease 2
+%define glibcrelease 3.fc3
%define auxarches i586 i686 athlon sparcv9 alphaev6
%define prelinkarches noarch
%define nptlarches i386 i686 athlon x86_64 ia64 s390 s390x sparcv9 ppc ppc64
@@ -12,10 +12,10 @@ Version: %{glibcversion}
Release: %{glibcrelease}
Copyright: LGPL
Group: System Environment/Libraries
-%define glibcsrcdir %{name}-%{glibcdate}
+%define glibcsrcdir %{glibcname}-%{glibcdate}
Source0: %{glibcsrcdir}.tar.bz2
-Source1: %{name}-fedora-%{glibcdate}.tar.bz2
-Patch0: %{name}-fedora.patch
+Source1: %{glibcname}-fedora-%{glibcdate}.tar.bz2
+Patch0: %{glibcname}-fedora.patch
Patch1: %{name}-nptl-check.patch
Patch2: %{name}-ppc-assume.patch
Patch3: %{name}-ia64-lib64.patch
@@ -1270,6 +1270,47 @@ rm -f *.filelist*
%endif
%changelog
+* Wed Feb 16 2005 Roland McGrath <roland@redhat.com> 2.3.4-3.fc3
+- update from CVS
+ - fix initstate{,_r} (BZ#710)
+ - fix segfault if chrooted app attempts to dlopen a library
+ and no standard library directory exists at all (#147067, #144303, BZ#738)
+ - fix initgroups when nscd is running, but has group caching disabled
+ (#146588, BZ#741)
+ - fix pthread_key_{create,destroy} in LinuxThreads when pthread_create
+ has not been called yet (#146710, BZ#739)
+ - fix ppc64 swapcontext and setcontext (#146736, BZ#700)
+ - service nscd cosmetic fixes (#146776, BZ#742)
+ - fix s390{,x} string.h (BZ#743)
+ - fix IA-32 and x86-64 stack alignment in DSO constructors (#145689, BZ#735)
+ - fix zdump -v segfaults on x86-64 (#146210, BZ#736)
+ - update IA-64 libm from Intel v2.1 (#142494, BZ#592)
+ - avoid calling sigaction (SIGPIPE, ...) inside syslog
+ (#146021, IT#56686, BZ#671)
+ - declare ftruncate for POSIX 2003 (BZ#640)
+ - fix errno values for futimes (BZ#633)
+ - unconditionally include <features.h> in malloc.h (BZ#650)
+ - change regex \B handling to match old GNU regex as well as perl/grep's dfa
+ (from empty string inside of word to empty string not at a word boundary,
+ BZ#693)
+ - slightly optimize i686 TLS accesses, use direct TLS %gs access in sem_*
+ and allow building -mno-tls-direct-seg-refs glibc that is free of
+ direct TLS %gs access with negative offsets (BZ#737)
+ - fix addseverity (BZ#731)
+ - fix fmemopen (BZ#730)
+ - fix rewinddir (BZ#734)
+ - increase svc{tcp,unix}_create listen backlog (BZ#733)
+ - fix vDSO l_map_end/l_text_end computation (BZ#729)
+ - fix IA-32 stack alignment for LinuxThreads thread functions
+ and functions passed to clone(2) directly (BZ#723)
+ - fix ecvt{,_r} on denormals (#143279, BZ#725)
+ - fix __tls_get_addr typo (BZ#726)
+ - fix rounding in IA-64 alarm (#143710, BZ#626)
+ - don't reinitialize __environ in __libc_start_main, so that
+ effects of setenv/putenv done in DSO initializers are preserved
+ (#144037, IT#57403, BZ#727)
+ - fix ppc/ppc64 rint and other rounding functions (#144931, BZ#602)
+
* Mon Dec 19 2004 Jakub Jelinek <jakub@redhat.com> 2.3.4-2
- work around rpm bug some more, this time by copying
iconvconfig to iconvconfig.%%{_target_cpu}.
@@ -1574,7 +1615,7 @@ rm -f *.filelist*
- update from CVS
- fix BZ #151, #362, #381, #407
- fdim fix for +inf/+inf (BZ #376)
-
+
* Sun Sep 26 2004 Jakub Jelinek <jakub@redhat.com> 2.3.3-58
- update from CVS
- vasprintf fix (BZ #346)
diff --git a/hurd/sigunwind.c b/hurd/sigunwind.c
index 450a385a2a..e2791eab83 100644
--- a/hurd/sigunwind.c
+++ b/hurd/sigunwind.c
@@ -111,7 +111,7 @@ _hurdsig_longjmp_from_handler (void *data, jmp_buf env, int val)
link = (void *) &scp[1];
assert (! link->resource.next && ! link->resource.prevp);
assert (link->thread.next == ss->active_resources);
- assert (link->thread.prevp = &ss->active_resources);
+ assert (link->thread.prevp == &ss->active_resources);
if (link->thread.next)
link->thread.next->thread.prevp = &link->thread.next;
ss->active_resources = link;
diff --git a/iconv/Makefile b/iconv/Makefile
index fe0c453e7b..40c7cbcdd3 100644
--- a/iconv/Makefile
+++ b/iconv/Makefile
@@ -78,3 +78,15 @@ $(inst_bindir)/iconv: $(objpfx)iconv_prog $(+force)
$(objpfx)iconv_prog: $(iconv_prog-modules:%=$(objpfx)%.o)
$(objpfx)iconvconfig: $(iconvconfig-modules:%=$(objpfx)%.o)
+
+ifneq ($(cross-compiling),yes)
+xtests: test-iconvconfig
+endif
+
+.PHONY: test-iconvconfig
+test-iconvconfig: /dev/null $(objpfx)iconvconfig
+ tmp=$(objpfx)gconv-modules.cache.$$$$; \
+ rm -f $$tmp; \
+ $(make-test-out) --output=$$tmp --nostdlib $(inst_gconvdir) && \
+ cmp $$tmp $(inst_gconvdir)/gconv-modules.cache && \
+ rm -f $$tmp
diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c
index 86852857a3..020cc8bc2b 100644
--- a/iconv/iconv_prog.c
+++ b/iconv/iconv_prog.c
@@ -424,7 +424,7 @@ print_version (FILE *stream, struct argp_state *state)
Copyright (C) %s Free Software Foundation, Inc.\n\
This is free software; see the source for copying conditions. There is NO\n\
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
-"), "2004");
+"), "2005");
fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
}
diff --git a/iconv/iconvconfig.c b/iconv/iconvconfig.c
index 0ed210b794..18ff3a53b1 100644
--- a/iconv/iconvconfig.c
+++ b/iconv/iconvconfig.c
@@ -397,7 +397,7 @@ print_version (FILE *stream, struct argp_state *state)
Copyright (C) %s Free Software Foundation, Inc.\n\
This is free software; see the source for copying conditions. There is NO\n\
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
-"), "2004");
+"), "2005");
fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
}
diff --git a/iconv/strtab.c b/iconv/strtab.c
index e6feb2cf24..32c00041f3 100644
--- a/iconv/strtab.c
+++ b/iconv/strtab.c
@@ -326,7 +326,7 @@ strtabfinalize (struct Strtab *st, size_t *size)
copylen = 1;
copystrings (st->root, &endp, &copylen);
assert (copylen == st->total + 1);
- assert (endp = retval + st->total + 1);
+ assert (endp == retval + st->total + 1);
*size = copylen;
return retval;
diff --git a/include/signal.h b/include/signal.h
index 104ea8f83a..dc1e0a12e5 100644
--- a/include/signal.h
+++ b/include/signal.h
@@ -48,6 +48,9 @@ extern int __sigpause (int sig_or_mask, int is_sig);
extern int __default_sigpause (int mask);
extern int __xpg_sigpause (int sig);
+/* Simplified sigemptyset() implementation without the parameter checking. */
+#undef __sigemptyset
+#define __sigemptyset(ss) (memset (ss, '\0', sizeof (sigset_t)), 0)
/* Allocate real-time signal with highest/lowest available priority. */
diff --git a/libio/fmemopen.c b/libio/fmemopen.c
index ab6ffdd678..c22cba1ec4 100644
--- a/libio/fmemopen.c
+++ b/libio/fmemopen.c
@@ -27,8 +27,6 @@
* but couldn't find it in libio. The following snippet of code is an
* attempt to implement what glibc's documentation describes.
*
- * No, it isn't really tested yet. :-)
- *
*
*
* I already see some potential problems:
@@ -166,7 +164,7 @@ fmemopen_seek (void *cookie, _IO_off64_t *p, int w)
break;
case SEEK_END:
- np = c->size - *p;
+ np = c->maxpos - *p;
break;
default:
@@ -176,9 +174,9 @@ fmemopen_seek (void *cookie, _IO_off64_t *p, int w)
if (np < 0 || (size_t) np > c->size)
return -1;
- c->pos = np;
+ *p = c->pos = np;
- return np;
+ return 0;
}
@@ -203,6 +201,13 @@ fmemopen (void *buf, size_t len, const char *mode)
cookie_io_functions_t iof;
fmemopen_cookie_t *c;
+ if (len == 0)
+ {
+ einval:
+ __set_errno (EINVAL);
+ return NULL;
+ }
+
c = (fmemopen_cookie_t *) malloc (sizeof (fmemopen_cookie_t));
if (c == NULL)
return NULL;
@@ -220,7 +225,12 @@ fmemopen (void *buf, size_t len, const char *mode)
c->buffer[0] = '\0';
}
else
- c->buffer = buf;
+ {
+ if ((uintptr_t) len > -(uintptr_t) buf)
+ goto einval;
+
+ c->buffer = buf;
+ }
c->size = len;
diff --git a/libio/iofopncook.c b/libio/iofopncook.c
index 321eb67b8d..6f720b43ef 100644
--- a/libio/iofopncook.c
+++ b/libio/iofopncook.c
@@ -36,6 +36,8 @@ static _IO_ssize_t _IO_cookie_read (register _IO_FILE* fp, void* buf,
static _IO_ssize_t _IO_cookie_write (register _IO_FILE* fp,
const void* buf, _IO_ssize_t size);
static _IO_off64_t _IO_cookie_seek (_IO_FILE *fp, _IO_off64_t offset, int dir);
+static _IO_off64_t _IO_cookie_seekoff (_IO_FILE *fp, _IO_off64_t offset,
+ int dir, int mode);
static int _IO_cookie_close (_IO_FILE* fp);
static _IO_ssize_t
@@ -94,6 +96,20 @@ _IO_cookie_close (fp)
}
+static _IO_off64_t
+_IO_cookie_seekoff (fp, offset, dir, mode)
+ _IO_FILE *fp;
+ _IO_off64_t offset;
+ int dir;
+ int mode;
+{
+ /* We must force the fileops code to always use seek to determine
+ the position. */
+ fp->_offset = _IO_pos_BAD;
+ return INTUSE(_IO_file_seekoff) (fp, offset, dir, mode);
+}
+
+
static const struct _IO_jump_t _IO_cookie_jumps = {
JUMP_INIT_DUMMY,
JUMP_INIT(finish, INTUSE(_IO_file_finish)),
@@ -103,7 +119,7 @@ static const struct _IO_jump_t _IO_cookie_jumps = {
JUMP_INIT(pbackfail, INTUSE(_IO_default_pbackfail)),
JUMP_INIT(xsputn, INTUSE(_IO_file_xsputn)),
JUMP_INIT(xsgetn, INTUSE(_IO_default_xsgetn)),
- JUMP_INIT(seekoff, INTUSE(_IO_file_seekoff)),
+ JUMP_INIT(seekoff, _IO_cookie_seekoff),
JUMP_INIT(seekpos, _IO_default_seekpos),
JUMP_INIT(setbuf, INTUSE(_IO_file_setbuf)),
JUMP_INIT(sync, INTUSE(_IO_file_sync)),
@@ -223,7 +239,7 @@ static const struct _IO_jump_t _IO_old_cookie_jumps = {
JUMP_INIT(pbackfail, INTUSE(_IO_default_pbackfail)),
JUMP_INIT(xsputn, INTUSE(_IO_file_xsputn)),
JUMP_INIT(xsgetn, INTUSE(_IO_default_xsgetn)),
- JUMP_INIT(seekoff, INTUSE(_IO_file_seekoff)),
+ JUMP_INIT(seekoff, _IO_cookie_seekoff),
JUMP_INIT(seekpos, _IO_default_seekpos),
JUMP_INIT(setbuf, INTUSE(_IO_file_setbuf)),
JUMP_INIT(sync, INTUSE(_IO_file_sync)),
diff --git a/linuxthreads/ChangeLog b/linuxthreads/ChangeLog
index 9577130e10..4c80a83534 100644
--- a/linuxthreads/ChangeLog
+++ b/linuxthreads/ChangeLog
@@ -1,3 +1,25 @@
+2005-02-09 Daniel Jacobowitz <dan@codesourcery.com>
+
+ [BZ #740]
+ * descr.h (__pthread_find_self, thread_self): Mark as pure
+ instead of const.
+
+2005-02-07 Jakub Jelinek <jakub@redhat.com>
+
+ [BZ #739]
+ * specific.c (pthread_key_delete): If pthread_create has not been
+ called yet, clear p_specific for the current thread.
+ * Makefile (tests): Add tst-tsd1.
+ * tst-tsd1.c: New test.
+
+2004-12-21 Jakub Jelinek <jakub@redhat.com>
+
+ [BZ #723]
+ * Makefile (tests): Add tst-align.
+ * tst-align.c: New test.
+ * sysdeps/i386/Makefile (CFLAGS-tst-align.c): Add
+ -mpreferred-stack-boundary=4.
+
2004-12-12 Ulrich Drepper <drepper@redhat.com>
* internals.h: Include <stdbool.h> to match includes used in nptl.
diff --git a/linuxthreads/Makefile b/linuxthreads/Makefile
index f4c9f2a916..451ac071f0 100644
--- a/linuxthreads/Makefile
+++ b/linuxthreads/Makefile
@@ -111,7 +111,7 @@ tests = ex1 ex2 ex3 ex4 ex5 ex6 ex7 ex8 ex9 $(librt-tests) ex12 ex13 joinrace \
ex17 ex18 tst-cancel tst-context bug-sleep \
tst-cancel1 tst-cancel2 tst-cancel3 tst-cancel4 tst-cancel5 \
tst-cancel6 tst-cancel7 tst-cancel8 tst-popen tst-popen2 tst-attr1 \
- tst-stack1
+ tst-stack1 tst-align tst-tsd1
test-srcs = tst-signal
# These tests are linked with libc before libpthread
tests-reverse += tst-cancel5
diff --git a/linuxthreads/descr.h b/linuxthreads/descr.h
index bea8b912f7..2b1e49a0ba 100644
--- a/linuxthreads/descr.h
+++ b/linuxthreads/descr.h
@@ -239,9 +239,9 @@ extern int __pthread_nonstandard_stacks;
/* Recover thread descriptor for the current thread */
-extern pthread_descr __pthread_find_self (void) __attribute__ ((const));
+extern pthread_descr __pthread_find_self (void) __attribute__ ((pure));
-static inline pthread_descr thread_self (void) __attribute__ ((const));
+static inline pthread_descr thread_self (void) __attribute__ ((pure));
static inline pthread_descr thread_self (void)
{
#ifdef THREAD_SELF
diff --git a/linuxthreads/specific.c b/linuxthreads/specific.c
index f54fabaeb9..92eec3d99a 100644
--- a/linuxthreads/specific.c
+++ b/linuxthreads/specific.c
@@ -104,15 +104,16 @@ int pthread_key_delete(pthread_key_t key)
that if the key is reallocated later by pthread_key_create, its
associated values will be NULL in all threads.
- Do nothing if no threads have been created yet. */
+ If no threads have been created yet, clear it just in the
+ current thread. */
+ struct pthread_key_delete_helper_args args;
+ args.idx1st = key / PTHREAD_KEY_2NDLEVEL_SIZE;
+ args.idx2nd = key % PTHREAD_KEY_2NDLEVEL_SIZE;
if (__pthread_manager_request != -1)
{
- struct pthread_key_delete_helper_args args;
struct pthread_request request;
- args.idx1st = key / PTHREAD_KEY_2NDLEVEL_SIZE;
- args.idx2nd = key % PTHREAD_KEY_2NDLEVEL_SIZE;
args.self = 0;
request.req_thread = self;
@@ -124,6 +125,11 @@ int pthread_key_delete(pthread_key_t key)
(char *) &request, sizeof(request)));
suspend(self);
}
+ else
+ {
+ if (self->p_specific[args.idx1st] != NULL)
+ self->p_specific[args.idx1st][args.idx2nd] = NULL;
+ }
pthread_mutex_unlock(&pthread_keys_mutex);
return 0;
diff --git a/linuxthreads/sysdeps/i386/Makefile b/linuxthreads/sysdeps/i386/Makefile
index 45183d1cd3..418fa5c6ef 100644
--- a/linuxthreads/sysdeps/i386/Makefile
+++ b/linuxthreads/sysdeps/i386/Makefile
@@ -15,6 +15,7 @@ CFLAGS-pthread.c += -fno-omit-frame-pointer -mpreferred-stack-boundary=4
CFLAGS-ptlongjmp.c += -fno-omit-frame-pointer
CFLAGS-semaphore.c += -fno-omit-frame-pointer
CFLAGS-sighandler.c += -fno-omit-frame-pointer -mpreferred-stack-boundary=4
+CFLAGS-tst-align.c += -mpreferred-stack-boundary=4
endif
ifeq ($(subdir),csu)
diff --git a/locale/programs/locale.c b/locale/programs/locale.c
index adf9944578..8c51d2579a 100644
--- a/locale/programs/locale.c
+++ b/locale/programs/locale.c
@@ -277,7 +277,7 @@ print_version (FILE *stream, struct argp_state *state)
Copyright (C) %s Free Software Foundation, Inc.\n\
This is free software; see the source for copying conditions. There is NO\n\
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
-"), "2004");
+"), "2005");
fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
}
diff --git a/locale/programs/localedef.c b/locale/programs/localedef.c
index 28cb7b316e..af7d488e7d 100644
--- a/locale/programs/localedef.c
+++ b/locale/programs/localedef.c
@@ -389,7 +389,7 @@ print_version (FILE *stream, struct argp_state *state)
Copyright (C) %s Free Software Foundation, Inc.\n\
This is free software; see the source for copying conditions. There is NO\n\
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
-"), "2004");
+"), "2005");
fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
}
diff --git a/localedata/ChangeLog b/localedata/ChangeLog
index 42ce0637ae..8c8616e7cf 100644
--- a/localedata/ChangeLog
+++ b/localedata/ChangeLog
@@ -1,3 +1,7 @@
+2004-12-19 Roland McGrath <roland@frob.com>
+
+ * gen-unicode-ctype.c (output_tables): Fix email address in output.
+
2004-10-02 Petter Reinholdtsen <pere@hungry.com>
[BZ #82]
diff --git a/localedata/gen-unicode-ctype.c b/localedata/gen-unicode-ctype.c
index a9c51b3f48..849f272ed5 100644
--- a/localedata/gen-unicode-ctype.c
+++ b/localedata/gen-unicode-ctype.c
@@ -638,7 +638,7 @@ output_tables (const char *filename, const char *version)
fprintf (stream, "source \"UnicodeData.txt, PropList.txt\"\n");
fprintf (stream, "address \"\"\n");
fprintf (stream, "contact \"\"\n");
- fprintf (stream, "email \"bug-glibc@gnu.org\"\n");
+ fprintf (stream, "email \"bug-glibc-locales@gnu.org\"\n");
fprintf (stream, "tel \"\"\n");
fprintf (stream, "fax \"\"\n");
fprintf (stream, "language \"\"\n");
diff --git a/malloc/malloc.h b/malloc/malloc.h
index 753539e7b0..d0fd967597 100644
--- a/malloc/malloc.h
+++ b/malloc/malloc.h
@@ -20,9 +20,7 @@
#ifndef _MALLOC_H
#define _MALLOC_H 1
-#ifdef _LIBC
#include <features.h>
-#endif
/*
$Id$
diff --git a/malloc/memusage.sh b/malloc/memusage.sh
index be8f755a20..b2e08c6039 100755
--- a/malloc/memusage.sh
+++ b/malloc/memusage.sh
@@ -1,5 +1,5 @@
#! @BASH@
-# Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc.
+# Copyright (C) 1999-2004, 2005 Free Software Foundation, Inc.
# This file is part of the GNU C Library.
# Contributed by Ulrich Drepper <drepper@gnu.org>, 1999.
@@ -71,7 +71,7 @@ do_version() {
printf $"Copyright (C) %s Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-" "2004"
+" "2005"
printf $"Written by %s.
" "Ulrich Drepper"
exit 0
diff --git a/malloc/mtrace.pl b/malloc/mtrace.pl
index 1640fa652d..0036f33f59 100644
--- a/malloc/mtrace.pl
+++ b/malloc/mtrace.pl
@@ -45,7 +45,7 @@ arglist: while (@ARGV) {
$ARGV[0] eq "--vers" || $ARGV[0] eq "--versi" ||
$ARGV[0] eq "--versio" || $ARGV[0] eq "--version") {
print "mtrace (GNU $PACKAGE) $VERSION\n";
- print "Copyright (C) 2004 Free Software Foundation, Inc.\n";
+ print "Copyright (C) 2005 Free Software Foundation, Inc.\n";
print "This is free software; see the source for copying conditions. There is NO\n";
print "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n";
print "Written by Ulrich Drepper <drepper\@gnu.org>\n";
diff --git a/math/libm-test.inc b/math/libm-test.inc
index 60711fabdc..be05222629 100644
--- a/math/libm-test.inc
+++ b/math/libm-test.inc
@@ -3795,6 +3795,114 @@ rint_test (void)
}
static void
+rint_test_tonearest (void)
+{
+ int save_round_mode;
+ START (rint_tonearest);
+
+ save_round_mode = fegetround();
+
+ if (!fesetround (FE_TONEAREST))
+ {
+ TEST_f_f (rint, 2.0, 2.0);
+ TEST_f_f (rint, 1.5, 2.0);
+ TEST_f_f (rint, 1.0, 1.0);
+ TEST_f_f (rint, 0.5, 0.0);
+ TEST_f_f (rint, 0.0, 0.0);
+ TEST_f_f (rint, minus_zero, minus_zero);
+ TEST_f_f (rint, -0.5, -0.0);
+ TEST_f_f (rint, -1.0, -1.0);
+ TEST_f_f (rint, -1.5, -2.0);
+ TEST_f_f (rint, -2.0, -2.0);
+ }
+
+ fesetround(save_round_mode);
+
+ END (rint_tonearest);
+}
+
+static void
+rint_test_towardzero (void)
+{
+ int save_round_mode;
+ START (rint_towardzero);
+
+ save_round_mode = fegetround();
+
+ if (!fesetround (FE_TOWARDZERO))
+ {
+ TEST_f_f (rint, 2.0, 2.0);
+ TEST_f_f (rint, 1.5, 1.0);
+ TEST_f_f (rint, 1.0, 1.0);
+ TEST_f_f (rint, 0.5, 0.0);
+ TEST_f_f (rint, 0.0, 0.0);
+ TEST_f_f (rint, minus_zero, minus_zero);
+ TEST_f_f (rint, -0.5, -0.0);
+ TEST_f_f (rint, -1.0, -1.0);
+ TEST_f_f (rint, -1.5, -1.0);
+ TEST_f_f (rint, -2.0, -2.0);
+ }
+
+ fesetround(save_round_mode);
+
+ END (rint_towardzero);
+}
+
+static void
+rint_test_downward (void)
+{
+ int save_round_mode;
+ START (rint_downward);
+
+ save_round_mode = fegetround();
+
+ if (!fesetround (FE_DOWNWARD))
+ {
+ TEST_f_f (rint, 2.0, 2.0);
+ TEST_f_f (rint, 1.5, 1.0);
+ TEST_f_f (rint, 1.0, 1.0);
+ TEST_f_f (rint, 0.5, 0.0);
+ TEST_f_f (rint, 0.0, 0.0);
+ TEST_f_f (rint, minus_zero, minus_zero);
+ TEST_f_f (rint, -0.5, -1.0);
+ TEST_f_f (rint, -1.0, -1.0);
+ TEST_f_f (rint, -1.5, -2.0);
+ TEST_f_f (rint, -2.0, -2.0);
+ }
+
+ fesetround(save_round_mode);
+
+ END (rint_downward);
+}
+
+static void
+rint_test_upward (void)
+{
+ int save_round_mode;
+ START (rint_upward);
+
+ save_round_mode = fegetround();
+
+ if (!fesetround (FE_UPWARD))
+ {
+ TEST_f_f (rint, 2.0, 2.0);
+ TEST_f_f (rint, 1.5, 2.0);
+ TEST_f_f (rint, 1.0, 1.0);
+ TEST_f_f (rint, 0.5, 1.0);
+ TEST_f_f (rint, 0.0, 0.0);
+ TEST_f_f (rint, minus_zero, minus_zero);
+ TEST_f_f (rint, -0.5, -0.0);
+ TEST_f_f (rint, -1.0, -1.0);
+ TEST_f_f (rint, -1.5, -1.0);
+ TEST_f_f (rint, -2.0, -2.0);
+ }
+
+ fesetround(save_round_mode);
+
+ END (rint_upward);
+}
+
+static void
round_test (void)
{
START (round);
@@ -4557,6 +4665,10 @@ main (int argc, char **argv)
floor_test ();
nearbyint_test ();
rint_test ();
+ rint_test_tonearest ();
+ rint_test_towardzero ();
+ rint_test_downward ();
+ rint_test_upward ();
lrint_test ();
llrint_test ();
round_test ();
diff --git a/misc/efgcvt_r.c b/misc/efgcvt_r.c
index ac2a5c45bf..28bf170c81 100644
--- a/misc/efgcvt_r.c
+++ b/misc/efgcvt_r.c
@@ -31,6 +31,7 @@
# define FUNC_PREFIX
# define FLOAT_FMT_FLAG
# define FLOAT_NAME_EXT
+# define FLOAT_MIN_10_EXP DBL_MIN_10_EXP
# if DBL_MANT_DIG == 53
# define NDIGIT_MAX 17
# elif DBL_MANT_DIG == 24
@@ -43,6 +44,17 @@
# error "NDIGIT_MAX must be precomputed"
# define NDIGIT_MAX (lrint (ceil (M_LN2 / M_LN10 * DBL_MANT_DIG + 1.0)))
# endif
+# if DBL_MIN_10_EXP == -37
+# define FLOAT_MIN_10_NORM 1.0e-37
+# elif DBL_MIN_10_EXP == -307
+# define FLOAT_MIN_10_NORM 1.0e-307
+# elif DBL_MIN_10_EXP == -4931
+# define FLOAT_MIN_10_NORM 1.0e-4931
+# else
+/* libc can't depend on libm. */
+# error "FLOAT_MIN_10_NORM must be precomputed"
+# define FLOAT_MIN_10_NORM exp10 (DBL_MIN_10_EXP)
+# endif
#endif
#define APPEND(a, b) APPEND2 (a, b)
@@ -171,6 +183,17 @@ APPEND (FUNC_PREFIX, ecvt_r) (value, ndigit, decpt, sign, buf, len)
d = -value;
else
d = value;
+ /* For denormalized numbers the d < 1.0 case below won't work,
+ as f can overflow to +Inf. */
+ if (d < FLOAT_MIN_10_NORM)
+ {
+ value /= FLOAT_MIN_10_NORM;
+ if (value < 0.0)
+ d = -value;
+ else
+ d = value;
+ exponent += FLOAT_MIN_10_EXP;
+ }
if (d < 1.0)
{
do
diff --git a/misc/qefgcvt_r.c b/misc/qefgcvt_r.c
index 66cc049ec8..d5b2a799b3 100644
--- a/misc/qefgcvt_r.c
+++ b/misc/qefgcvt_r.c
@@ -24,6 +24,7 @@
#define FUNC_PREFIX q
#define FLOAT_FMT_FLAG "L"
#define FLOAT_NAME_EXT l
+#define FLOAT_MIN_10_EXP LDBL_MIN_10_EXP
#if LDBL_MANT_DIG == 64
# define NDIGIT_MAX 21
#elif LDBL_MANT_DIG == 53
@@ -40,5 +41,16 @@
# error "NDIGIT_MAX must be precomputed"
# define NDIGIT_MAX (lrint (ceil (M_LN2 / M_LN10 * LDBL_MANT_DIG + 1.0)))
#endif
+#if LDBL_MIN_10_EXP == -37
+# define FLOAT_MIN_10_NORM 1.0e-37L
+#elif LDBL_MIN_10_EXP == -307
+# define FLOAT_MIN_10_NORM 1.0e-307L
+#elif LDBL_MIN_10_EXP == -4931
+# define FLOAT_MIN_10_NORM 1.0e-4931L
+#else
+/* libc can't depend on libm. */
+# error "FLOAT_MIN_10_NORM must be precomputed"
+# define FLOAT_MIN_10_NORM exp10l (LDBL_MIN_10_EXP)
+#endif
#include "efgcvt_r.c"
diff --git a/misc/syslog.c b/misc/syslog.c
deleted file mode 100644
index 6916356da7..0000000000
--- a/misc/syslog.c
+++ /dev/null
@@ -1,414 +0,0 @@
-/*
- * Copyright (c) 1983, 1988, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#if defined(LIBC_SCCS) && !defined(lint)
-static char sccsid[] = "@(#)syslog.c 8.4 (Berkeley) 3/18/94";
-#endif /* LIBC_SCCS and not lint */
-
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <sys/syslog.h>
-#include <sys/uio.h>
-#include <netdb.h>
-
-#include <errno.h>
-#include <fcntl.h>
-#include <paths.h>
-#include <stdio.h>
-#include <stdio_ext.h>
-#include <string.h>
-#include <time.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <bits/libc-lock.h>
-#include <signal.h>
-#include <locale.h>
-
-#if __STDC__
-#include <stdarg.h>
-#else
-#include <varargs.h>
-#endif
-
-#include <libio/iolibio.h>
-#define ftell(s) INTUSE(_IO_ftell) (s)
-
-static int LogType = SOCK_DGRAM; /* type of socket connection */
-static int LogFile = -1; /* fd for log */
-static int connected; /* have done connect */
-static int LogStat; /* status bits, set by openlog() */
-static const char *LogTag; /* string to tag the entry with */
-static int LogFacility = LOG_USER; /* default facility code */
-static int LogMask = 0xff; /* mask of priorities to be logged */
-extern char *__progname; /* Program name, from crt0. */
-
-/* Define the lock. */
-__libc_lock_define_initialized (static, syslog_lock)
-
-static void openlog_internal(const char *, int, int) internal_function;
-static void closelog_internal(void);
-static void sigpipe_handler (int);
-
-
-struct cleanup_arg
-{
- void *buf;
- struct sigaction *oldaction;
-};
-
-static void
-cancel_handler (void *ptr)
-{
- /* Restore the old signal handler. */
- struct cleanup_arg *clarg = (struct cleanup_arg *) ptr;
-
- if (clarg != NULL && clarg->oldaction != NULL)
- __sigaction (SIGPIPE, clarg->oldaction, NULL);
-
- /* Free the lock. */
- __libc_lock_unlock (syslog_lock);
-}
-
-
-/*
- * syslog, vsyslog --
- * print message on log file; output is intended for syslogd(8).
- */
-void
-#if __STDC__
-syslog(int pri, const char *fmt, ...)
-#else
-syslog(pri, fmt, va_alist)
- int pri;
- char *fmt;
- va_dcl
-#endif
-{
- va_list ap;
-
-#if __STDC__
- va_start(ap, fmt);
-#else
- va_start(ap);
-#endif
- vsyslog(pri, fmt, ap);
- va_end(ap);
-}
-libc_hidden_def (syslog)
-
-void
-vsyslog(pri, fmt, ap)
- int pri;
- register const char *fmt;
- va_list ap;
-{
- struct tm now_tm;
- time_t now;
- int fd;
- FILE *f;
- char *buf = 0;
- size_t bufsize = 0;
- size_t prioff, msgoff;
- struct sigaction action, oldaction;
- int sigpipe;
- int saved_errno = errno;
- char failbuf[3 * sizeof (pid_t) + sizeof "out of memory []"];
-
-#define INTERNALLOG LOG_ERR|LOG_CONS|LOG_PERROR|LOG_PID
- /* Check for invalid bits. */
- if (pri & ~(LOG_PRIMASK|LOG_FACMASK)) {
- syslog(INTERNALLOG,
- "syslog: unknown facility/priority: %x", pri);
- pri &= LOG_PRIMASK|LOG_FACMASK;
- }
-
- /* Check priority against setlogmask values. */
- if ((LOG_MASK (LOG_PRI (pri)) & LogMask) == 0)
- return;
-
- /* Set default facility if none specified. */
- if ((pri & LOG_FACMASK) == 0)
- pri |= LogFacility;
-
- /* Build the message in a memory-buffer stream. */
- f = open_memstream (&buf, &bufsize);
- if (f == NULL)
- {
- /* We cannot get a stream. There is not much we can do but
- emitting an error messages. */
- char numbuf[3 * sizeof (pid_t)];
- char *nump;
- char *endp = __stpcpy (failbuf, "out of memory [");
- pid_t pid = __getpid ();
-
- nump = numbuf + sizeof (numbuf);
- /* The PID can never be zero. */
- do
- *--nump = '0' + pid % 10;
- while ((pid /= 10) != 0);
-
- endp = __mempcpy (endp, nump, (numbuf + sizeof (numbuf)) - nump);
- *endp++ = ']';
- *endp = '\0';
- buf = failbuf;
- bufsize = endp - failbuf;
- msgoff = 0;
- }
- else
- {
- __fsetlocking (f, FSETLOCKING_BYCALLER);
- prioff = fprintf (f, "<%d>", pri);
- (void) time (&now);
- f->_IO_write_ptr += __strftime_l (f->_IO_write_ptr,
- f->_IO_write_end
- - f->_IO_write_ptr,
- "%h %e %T ",
- __localtime_r (&now, &now_tm),
- &_nl_C_locobj);
- msgoff = ftell (f);
- if (LogTag == NULL)
- LogTag = __progname;
- if (LogTag != NULL)
- fputs_unlocked (LogTag, f);
- if (LogStat & LOG_PID)
- fprintf (f, "[%d]", (int) __getpid ());
- if (LogTag != NULL)
- {
- putc_unlocked (':', f);
- putc_unlocked (' ', f);
- }
-
- /* Restore errno for %m format. */
- __set_errno (saved_errno);
-
- /* We have the header. Print the user's format into the
- buffer. */
- vfprintf (f, fmt, ap);
-
- /* Close the memory stream; this will finalize the data
- into a malloc'd buffer in BUF. */
- fclose (f);
- }
-
- /* Output to stderr if requested. */
- if (LogStat & LOG_PERROR) {
- struct iovec iov[2];
- register struct iovec *v = iov;
-
- v->iov_base = buf + msgoff;
- v->iov_len = bufsize - msgoff;
- /* Append a newline if necessary. */
- if (buf[bufsize - 1] != '\n')
- {
- ++v;
- v->iov_base = (char *) "\n";
- v->iov_len = 1;
- }
-
- __libc_cleanup_push (free, buf == failbuf ? NULL : buf);
-
- /* writev is a cancellation point. */
- (void)__writev(STDERR_FILENO, iov, v - iov + 1);
-
- __libc_cleanup_pop (0);
- }
-
- /* Prepare for multiple users. We have to take care: open and
- write are cancellation points. */
- struct cleanup_arg clarg;
- clarg.buf = buf;
- clarg.oldaction = NULL;
- __libc_cleanup_push (cancel_handler, &clarg);
- __libc_lock_lock (syslog_lock);
-
- /* Prepare for a broken connection. */
- memset (&action, 0, sizeof (action));
- action.sa_handler = sigpipe_handler;
- sigemptyset (&action.sa_mask);
- sigpipe = __sigaction (SIGPIPE, &action, &oldaction);
- if (sigpipe == 0)
- clarg.oldaction = &oldaction;
-
- /* Get connected, output the message to the local logger. */
- if (!connected)
- openlog_internal(LogTag, LogStat | LOG_NDELAY, 0);
-
- /* If we have a SOCK_STREAM connection, also send ASCII NUL as
- a record terminator. */
- if (LogType == SOCK_STREAM)
- ++bufsize;
-
- if (!connected || __send(LogFile, buf, bufsize, 0) < 0)
- {
- if (connected)
- {
- /* Try to reopen the syslog connection. Maybe it went
- down. */
- closelog_internal ();
- openlog_internal(LogTag, LogStat | LOG_NDELAY, 0);
- }
-
- if (!connected || __send(LogFile, buf, bufsize, 0) < 0)
- {
- closelog_internal (); /* attempt re-open next time */
- /*
- * Output the message to the console; don't worry
- * about blocking, if console blocks everything will.
- * Make sure the error reported is the one from the
- * syslogd failure.
- */
- if (LogStat & LOG_CONS &&
- (fd = __open(_PATH_CONSOLE, O_WRONLY|O_NOCTTY, 0)) >= 0)
- {
- dprintf (fd, "%s\r\n", buf + msgoff);
- (void)__close(fd);
- }
- }
- }
-
- if (sigpipe == 0)
- __sigaction (SIGPIPE, &oldaction, (struct sigaction *) NULL);
-
- /* End of critical section. */
- __libc_cleanup_pop (0);
- __libc_lock_unlock (syslog_lock);
-
- if (buf != failbuf)
- free (buf);
-}
-libc_hidden_def (vsyslog)
-
-static struct sockaddr SyslogAddr; /* AF_UNIX address of local logger */
-
-
-static void
-internal_function
-openlog_internal(const char *ident, int logstat, int logfac)
-{
- if (ident != NULL)
- LogTag = ident;
- LogStat = logstat;
- if (logfac != 0 && (logfac &~ LOG_FACMASK) == 0)
- LogFacility = logfac;
-
- int retry = 0;
- while (retry < 2) {
- if (LogFile == -1) {
- SyslogAddr.sa_family = AF_UNIX;
- (void)strncpy(SyslogAddr.sa_data, _PATH_LOG,
- sizeof(SyslogAddr.sa_data));
- if (LogStat & LOG_NDELAY) {
- if ((LogFile = __socket(AF_UNIX, LogType, 0))
- == -1)
- return;
- (void)__fcntl(LogFile, F_SETFD, 1);
- }
- }
- if (LogFile != -1 && !connected)
- {
- int old_errno = errno;
- if (__connect(LogFile, &SyslogAddr, sizeof(SyslogAddr))
- == -1)
- {
- int saved_errno = errno;
- int fd = LogFile;
- LogFile = -1;
- (void)__close(fd);
- __set_errno (old_errno);
- if (saved_errno == EPROTOTYPE)
- {
- /* retry with the other type: */
- LogType = (LogType == SOCK_DGRAM
- ? SOCK_STREAM : SOCK_DGRAM);
- ++retry;
- continue;
- }
- } else
- connected = 1;
- }
- break;
- }
-}
-
-void
-openlog (const char *ident, int logstat, int logfac)
-{
- /* Protect against multiple users and cancellation. */
- __libc_cleanup_push (cancel_handler, NULL);
- __libc_lock_lock (syslog_lock);
-
- openlog_internal (ident, logstat, logfac);
-
- __libc_cleanup_pop (1);
-}
-
-static void
-sigpipe_handler (int signo)
-{
- closelog_internal ();
-}
-
-static void
-closelog_internal()
-{
- if (!connected)
- return;
-
- __close (LogFile);
- LogFile = -1;
- connected = 0;
-}
-
-void
-closelog ()
-{
- /* Protect against multiple users and cancellation. */
- __libc_cleanup_push (cancel_handler, NULL);
- __libc_lock_lock (syslog_lock);
-
- closelog_internal ();
- LogTag = NULL;
- LogType = SOCK_DGRAM; /* this is the default */
-
- /* Free the lock. */
- __libc_cleanup_pop (1);
-}
-
-/* setlogmask -- set the log mask level */
-int
-setlogmask(pmask)
- int pmask;
-{
- int omask;
-
- omask = LogMask;
- if (pmask != 0)
- LogMask = pmask;
- return (omask);
-}
diff --git a/misc/tst-efgcvt.c b/misc/tst-efgcvt.c
index 91e5cf929e..8a31e0280a 100644
--- a/misc/tst-efgcvt.c
+++ b/misc/tst-efgcvt.c
@@ -20,6 +20,7 @@
# define _GNU_SOURCE 1
#endif
+#include <float.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
@@ -59,6 +60,10 @@ static testcase ecvt_tests[] =
{ 123.01, -4, 3, "" },
{ 126.71, -4, 3, "" },
{ 0.0, 4, 1, "0000" },
+#if DBL_MANT_DIG == 53
+ { 0x1p-1074, 3, -323, "494" },
+ { -0x1p-1074, 3, -323, "494" },
+#endif
/* -1.0 is end marker. */
{ -1.0, 0, 0, "" }
};
diff --git a/nptl/ChangeLog b/nptl/ChangeLog
index de90f2a66d..806f077ffb 100644
--- a/nptl/ChangeLog
+++ b/nptl/ChangeLog
@@ -1,3 +1,41 @@
+2005-01-26 Jakub Jelinek <jakub@redhat.com>
+
+ [BZ #737]
+ * sysdeps/unix/sysv/linux/i386/i486/sem_trywait.S (__new_sem_trywait):
+ Use direct %gs segment access or, if NO_TLS_DIRECT_SEG_REFS,
+ at least gotntpoff relocation and addition.
+ * sysdeps/unix/sysv/linux/i386/i486/sem_timedwait.S (sem_timedwait):
+ Likewise.
+ * sysdeps/unix/sysv/linux/i386/i486/sem_post.S (__new_sem_post):
+ Likewise.
+ * sysdeps/unix/sysv/linux/i386/i486/sem_wait.S (__new_sem_wait):
+ Likewise.
+
+2004-12-27 Ulrich Drepper <drepper@redhat.com>
+
+ [BZ #744]
+ * init.c (__pthread_initialize_minimal_internal): Use __sigemptyset.
+
+2004-12-21 Jakub Jelinek <jakub@redhat.com>
+
+ [BZ #723]
+ * sysdeps/i386/tls.h (CALL_THREAD_FCT): Maintain 16 byte alignment of
+ %esp.
+ * Makefile (tests): Add tst-align2.
+ * tst-align2.c: New test.
+ * sysdeps/i386/Makefile (CFLAGS-tst-align{,2}.c): Add
+ -mpreferred-stack-boundary=4.
+
+2004-12-21 Jakub Jelinek <jakub@redhat.com>
+
+ [BZ #723]
+ * sysdeps/i386/tls.h (CALL_THREAD_FCT): Maintain 16 byte alignment of
+ %esp.
+ * Makefile (tests): Add tst-align2.
+ * tst-align2.c: New test.
+ * sysdeps/i386/Makefile (CFLAGS-tst-align{,2}.c): Add
+ -mpreferred-stack-boundary=4.
+
2004-12-18 Roland McGrath <roland@redhat.com>
* sysdeps/unix/sysv/linux/powerpc/powerpc64/bits/local_lim.h:
diff --git a/nptl/Makefile b/nptl/Makefile
index d42f356131..8d18946e6f 100644
--- a/nptl/Makefile
+++ b/nptl/Makefile
@@ -205,7 +205,7 @@ tests = tst-attr1 tst-attr2 tst-attr3 \
tst-sem1 tst-sem2 tst-sem3 tst-sem4 tst-sem5 tst-sem6 tst-sem7 \
tst-sem8 tst-sem9 \
tst-barrier1 tst-barrier2 tst-barrier3 tst-barrier4 \
- tst-align \
+ tst-align tst-align2 \
tst-basic1 tst-basic2 tst-basic3 tst-basic4 tst-basic5 tst-basic6 \
tst-kill1 tst-kill2 tst-kill3 tst-kill4 tst-kill5 tst-kill6 \
tst-raise1 \
diff --git a/nptl/init.c b/nptl/init.c
index 3751e6be77..86745af8d1 100644
--- a/nptl/init.c
+++ b/nptl/init.c
@@ -262,7 +262,7 @@ __pthread_initialize_minimal_internal (void)
struct sigaction sa;
sa.sa_sigaction = sigcancel_handler;
sa.sa_flags = SA_SIGINFO;
- sigemptyset (&sa.sa_mask);
+ __sigemptyset (&sa.sa_mask);
(void) __libc_sigaction (SIGCANCEL, &sa, NULL);
diff --git a/nptl/sysdeps/i386/Makefile b/nptl/sysdeps/i386/Makefile
index 693fb0569f..2f0d88f303 100644
--- a/nptl/sysdeps/i386/Makefile
+++ b/nptl/sysdeps/i386/Makefile
@@ -22,4 +22,6 @@ endif
ifeq ($(subdir),nptl)
CFLAGS-pthread_create.c += -mpreferred-stack-boundary=4
+CFLAGS-tst-align.c += -mpreferred-stack-boundary=4
+CFLAGS-tst-align2.c += -mpreferred-stack-boundary=4
endif
diff --git a/nptl/sysdeps/i386/tls.h b/nptl/sysdeps/i386/tls.h
index 18b038f93e..945a4c71d6 100644
--- a/nptl/sysdeps/i386/tls.h
+++ b/nptl/sysdeps/i386/tls.h
@@ -397,9 +397,12 @@ union user_desc_init
#define CALL_THREAD_FCT(descr) \
({ void *__res; \
int __ignore1, __ignore2; \
- asm volatile ("pushl %%gs:%P4\n\t" \
+ asm volatile ("pushl %%eax\n\t" \
+ "pushl %%eax\n\t" \
+ "pushl %%eax\n\t" \
+ "pushl %%gs:%P4\n\t" \
"call *%%gs:%P3\n\t" \
- "addl $4, %%esp" \
+ "addl $16, %%esp" \
: "=a" (__res), "=c" (__ignore1), "=d" (__ignore2) \
: "i" (offsetof (struct pthread, start_routine)), \
"i" (offsetof (struct pthread, arg))); \
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_post.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_post.S
index 3d67329bd1..0219fea141 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_post.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_post.S
@@ -65,9 +65,14 @@ __new_sem_post:
#endif
addl $_GLOBAL_OFFSET_TABLE_, %ebx
#if USE___THREAD
- movl %gs:0, %edx
- subl errno@gottpoff(%ebx), %edx
+# ifdef NO_TLS_DIRECT_SEG_REFS
+ movl errno@gotntpoff(%ebx), %edx
+ addl %gs:0, %edx
movl $EINVAL, (%edx)
+# else
+ movl errno@gotntpoff(%ebx), %edx
+ movl $EINVAL, %gs:(%edx)
+# endif
#else
call __errno_location@plt
movl $EINVAL, (%eax)
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_timedwait.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_timedwait.S
index 5b24476936..4a451dc29d 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_timedwait.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_timedwait.S
@@ -158,9 +158,14 @@ sem_timedwait:
#endif
addl $_GLOBAL_OFFSET_TABLE_, %ebx
#if USE___THREAD
- movl %gs:0, %edx
- subl errno@gottpoff(%ebx), %edx
+# ifdef NO_TLS_DIRECT_SEG_REFS
+ movl errno@gotntpoff(%ebx), %edx
+ addl %gs:0, %edx
movl %esi, (%edx)
+# else
+ movl errno@gotntpoff(%ebx), %edx
+ movl %esi, %gs:(%edx)
+# endif
#else
call __errno_location@plt
movl %esi, (%eax)
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_trywait.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_trywait.S
index a7c405d95f..e6106f806f 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_trywait.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_trywait.S
@@ -55,9 +55,14 @@ __new_sem_trywait:
#endif
addl $_GLOBAL_OFFSET_TABLE_, %ecx
#if USE___THREAD
- movl %gs:0, %edx
- subl errno@gottpoff(%ecx), %edx
+# ifdef NO_TLS_DIRECT_SEG_REFS
+ movl errno@gotntpoff(%ecx), %edx
+ addl %gs:0, %edx
movl $EAGAIN, (%edx)
+# else
+ movl errno@gotntpoff(%ecx), %edx
+ movl $EAGAIN, %gs:(%edx)
+# endif
#else
call __errno_location@plt
movl $EAGAIN, (%eax)
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_wait.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_wait.S
index b7674dc3ba..76947719ad 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_wait.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_wait.S
@@ -102,9 +102,14 @@ __new_sem_wait:
#endif
addl $_GLOBAL_OFFSET_TABLE_, %ebx
#if USE___THREAD
- movl %gs:0, %edx
- subl errno@gottpoff(%ebx), %edx
+# ifdef NO_TLS_DIRECT_SEG_REFS
+ movl errno@gotntpoff(%ebx), %edx
+ addl %gs:0, %edx
movl %esi, (%edx)
+# else
+ movl errno@gotntpoff(%ebx), %edx
+ movl %esi, %gs:(%edx)
+# endif
#else
call __errno_location@plt
movl %esi, (%eax)
diff --git a/nscd/Makefile b/nscd/Makefile
index 70a35198c2..23feca4273 100644
--- a/nscd/Makefile
+++ b/nscd/Makefile
@@ -56,6 +56,8 @@ nscd-modules += selinux
selinux-LIBS := -lselinux
endif
+LDLIBS-nscd = $(selinux-LIBS)
+
distribute := nscd.h nscd-client.h dbg_log.h \
$(addsuffix .c, $(filter-out xmalloc, $(nscd-modules))) \
nscd_nischeck.c nscd.conf nscd.init nscd_proto.h \
@@ -110,7 +112,7 @@ $(objpfx)nscd: $(addprefix $(objpfx),$(nscd-modules:=.o))
$(extra-B-$(@F:lib%.so=%).so) $(load-map-file) \
$(LDFLAGS) $(LDFLAGS-$(@F)) \
-L$(subst :, -L,$(rpath-link)) -Wl,-rpath-link=$(rpath-link) \
- -o $@ $^ $(selinux-LIBS) $(common-objpfx)libc_nonshared.a
+ -o $@ $^ $(LDLIBS-nscd) $(common-objpfx)libc_nonshared.a
endif
# This makes sure -DNOT_IN_libc is passed for all these modules.
diff --git a/nscd/nscd.c b/nscd/nscd.c
index 5cca127f91..dec2020b8e 100644
--- a/nscd/nscd.c
+++ b/nscd/nscd.c
@@ -403,7 +403,7 @@ print_version (FILE *stream, struct argp_state *state)
Copyright (C) %s Free Software Foundation, Inc.\n\
This is free software; see the source for copying conditions. There is NO\n\
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
-"), "2004");
+"), "2005");
fprintf (stream, gettext ("Written by %s.\n"),
"Thorsten Kukuk and Ulrich Drepper");
}
@@ -450,6 +450,9 @@ termination_handler (int signum)
/* Synchronize memory. */
for (int cnt = 0; cnt < lastdb; ++cnt)
{
+ if (!dbs[cnt].enabled)
+ continue;
+
/* Make sure nobody keeps using the database. */
dbs[cnt].head->timestamp = 0;
diff --git a/nscd/nscd.init b/nscd/nscd.init
index d5c1cb9ae3..23e20c3a82 100644
--- a/nscd/nscd.init
+++ b/nscd/nscd.init
@@ -88,9 +88,9 @@ case "$1" in
RETVAL=$?
;;
status)
- status nscd
+ status nscd
RETVAL=$?
- ;;
+ ;;
restart)
restart
RETVAL=$?
@@ -100,9 +100,11 @@ case "$1" in
RETVAL=$?
;;
reload)
- killproc /usr/sbin/nscd -HUP
+ echo -n $"Reloading $prog: "
+ killproc /usr/sbin/nscd -HUP
RETVAL=$?
- ;;
+ echo
+ ;;
*)
echo $"Usage: $0 {start|stop|status|restart|reload|condrestart}"
RETVAL=1
diff --git a/nscd/nscd_getai.c b/nscd/nscd_getai.c
index 24b374b0dc..cfb3fe2701 100644
--- a/nscd/nscd_getai.c
+++ b/nscd/nscd_getai.c
@@ -78,7 +78,7 @@ __nscd_getai (const char *key, struct nscd_ai_result **result, int *h_errnop)
sizeof (ai_resp_mem));
if (sock == -1)
{
- /* nscd not running or wrong version or hosts caching disabled. */
+ /* nscd not running or wrong version. */
__nss_not_use_nscd_hosts = 1;
goto out;
}
@@ -151,6 +151,13 @@ __nscd_getai (const char *key, struct nscd_ai_result **result, int *h_errnop)
}
else
{
+ if (__builtin_expect (ai_resp->found == -1, 0))
+ {
+ /* The daemon does not cache this database. */
+ __nss_not_use_nscd_hosts = 1;
+ goto out_close;
+ }
+
/* Store the error number. */
*h_errnop = ai_resp->error;
diff --git a/nscd/nscd_initgroups.c b/nscd/nscd_initgroups.c
index 2ea9e7f862..a25f1fbcd7 100644
--- a/nscd/nscd_initgroups.c
+++ b/nscd/nscd_initgroups.c
@@ -75,7 +75,7 @@ __nscd_getgrouplist (const char *user, gid_t group, long int *size,
sizeof (initgr_resp_mem));
if (sock == -1)
{
- /* nscd not running or wrong version or hosts caching disabled. */
+ /* nscd not running or wrong version. */
__nss_not_use_nscd_group = 1;
goto out;
}
@@ -101,7 +101,7 @@ __nscd_getgrouplist (const char *user, gid_t group, long int *size,
(initgr_resp->ngrps + 1) * sizeof (gid_t));
if (newp == NULL)
/* We cannot increase the buffer size. */
- goto out;
+ goto out_close;
*groupsp = newp;
*size = initgr_resp->ngrps + 1;
@@ -125,6 +125,13 @@ __nscd_getgrouplist (const char *user, gid_t group, long int *size,
}
else
{
+ if (__builtin_expect (initgr_resp->found == -1, 0))
+ {
+ /* The daemon does not cache this database. */
+ __nss_not_use_nscd_group = 1;
+ goto out_close;
+ }
+
/* No group found yet. */
retval = 0;
@@ -143,6 +150,7 @@ __nscd_getgrouplist (const char *user, gid_t group, long int *size,
(*groupsp)[retval++] = group;
}
+ out_close:
if (sock != -1)
close_not_cancel_no_status (sock);
out:
diff --git a/nscd/nscd_nischeck.c b/nscd/nscd_nischeck.c
index a6817cf79e..6266b9d23f 100644
--- a/nscd/nscd_nischeck.c
+++ b/nscd/nscd_nischeck.c
@@ -91,6 +91,6 @@ print_version (FILE *stream, struct argp_state *state)
Copyright (C) %s Free Software Foundation, Inc.\n\
This is free software; see the source for copying conditions. There is NO\n\
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
-"), "2004");
+"), "2005");
fprintf (stream, gettext ("Written by %s.\n"), "Thorsten Kukuk");
}
diff --git a/nss/getent.c b/nss/getent.c
index c0a273241f..319322fa94 100644
--- a/nss/getent.c
+++ b/nss/getent.c
@@ -83,7 +83,7 @@ print_version (FILE *stream, struct argp_state *state)
Copyright (C) %s Free Software Foundation, Inc.\n\
This is free software; see the source for copying conditions. There is NO\n\
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
-"), "2004");
+"), "2005");
fprintf (stream, gettext ("Written by %s.\n"), "Thorsten Kukuk");
}
diff --git a/posix/Makefile b/posix/Makefile
index 149283c65d..2a6f737165 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -140,16 +140,27 @@ CFLAGS-waitid.c = -fexceptions
CFLAGS-waitpid.c = -fexceptions -fasynchronous-unwind-tables
CFLAGS-getopt.c = -fexceptions
CFLAGS-wordexp.c = -fexceptions
+CFLAGS-wordexp.os = -fomit-frame-pointer
CFLAGS-sysconf.c = -fexceptions -DGETCONF_DIR='"$(libexecdir)/getconf"'
CFLAGS-pathconf.c = -fexceptions
CFLAGS-fpathconf.c = -fexceptions
CFLAGS-spawn.c = -fexceptions
+CFLAGS-spawn.os = -fomit-frame-pointer
CFLAGS-spawnp.c = -fexceptions
+CFLAGS-spawnp.os = -fomit-frame-pointer
CFLAGS-spawni.c = -fexceptions
+CFLAGS-spawni.os = -fomit-frame-pointer
CFLAGS-pause.c = -fexceptions
CFLAGS-glob.c = $(uses-callbacks) -fexceptions
CFLAGS-glob64.c = $(uses-callbacks) -fexceptions
CFLAGS-getconf.c = -DGETCONF_DIR='"$(libexecdir)/getconf"'
+CFLAGS-execve.os = -fomit-frame-pointer
+CFLAGS-fexecve.os = -fomit-frame-pointer
+CFLAGS-execv.os = -fomit-frame-pointer
+CFLAGS-execle.os = -fomit-frame-pointer
+CFLAGS-execl.os = -fomit-frame-pointer
+CFLAGS-execvp.os = -fomit-frame-pointer
+CFLAGS-execlp.os = -fomit-frame-pointer
tstgetopt-ARGS = -a -b -cfoobar --required foobar --optional=bazbug \
--none random --col --color --colour
diff --git a/posix/bug-regex19.c b/posix/bug-regex19.c
index 4000b19b4d..3a6391cb01 100644
--- a/posix/bug-regex19.c
+++ b/posix/bug-regex19.c
@@ -170,22 +170,22 @@ static struct test_s
{ERE, "[^k]\\B[^k]", "kBk", 0, -1},
{ERE, "[^C]\\B[^C]", "CCCABA", 0, 3},
{ERE, "[^C]\\B[^C]", "CBC", 0, -1},
- {ERE, ".(\\b|\\B).", "=~AB", 0, 1},
+ {ERE, ".(\\b|\\B).", "=~AB", 0, 0},
{ERE, ".(\\b|\\B).", "A=C", 0, 0},
{ERE, ".(\\b|\\B).", "ABC", 0, 0},
- {ERE, ".(\\b|\\B).", "=~\\!", 0, -1},
- {ERE, "[^k](\\b|\\B)[^k]", "=~AB", 0, 1},
+ {ERE, ".(\\b|\\B).", "=~\\!", 0, 0},
+ {ERE, "[^k](\\b|\\B)[^k]", "=~AB", 0, 0},
{ERE, "[^k](\\b|\\B)[^k]", "A=C", 0, 0},
{ERE, "[^k](\\b|\\B)[^k]", "ABC", 0, 0},
- {ERE, "[^k](\\b|\\B)[^k]", "=~kBD", 0, 3},
- {ERE, "[^k](\\b|\\B)[^k]", "=~\\!", 0, -1},
- {ERE, "[^k](\\b|\\B)[^k]", "=~kB", 0, -1},
- {ERE, "[^C](\\b|\\B)[^C]", "=~AB", 0, 1},
+ {ERE, "[^k](\\b|\\B)[^k]", "=~kBD", 0, 0},
+ {ERE, "[^k](\\b|\\B)[^k]", "=~\\!", 0, 0},
+ {ERE, "[^k](\\b|\\B)[^k]", "=~kB", 0, 0},
+ {ERE, "[^C](\\b|\\B)[^C]", "=~AB", 0, 0},
{ERE, "[^C](\\b|\\B)[^C]", "A=C", 0, 0},
{ERE, "[^C](\\b|\\B)[^C]", "ABC", 0, 0},
- {ERE, "[^C](\\b|\\B)[^C]", "=~CBD", 0, 3},
- {ERE, "[^C](\\b|\\B)[^C]", "=~\\!", 0, -1},
- {ERE, "[^C](\\b|\\B)[^C]", "=~CB", 0, -1},
+ {ERE, "[^C](\\b|\\B)[^C]", "=~CBD", 0, 0},
+ {ERE, "[^C](\\b|\\B)[^C]", "=~\\!", 0, 0},
+ {ERE, "[^C](\\b|\\B)[^C]", "=~CB", 0, 0},
{ERE, "\\b([A]|[!]|.B)", "A=AC", 0, 0},
{ERE, "\\b([A]|[!]|.B)", "=AC", 0, 1},
{ERE, "\\b([A]|[!]|.B)", "!AC", 0, 1},
diff --git a/posix/execl.c b/posix/execl.c
index 62fd45db58..cffd714258 100644
--- a/posix/execl.c
+++ b/posix/execl.c
@@ -16,10 +16,10 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
-#include <alloca.h>
#include <unistd.h>
#include <stdarg.h>
#include <stddef.h>
+#include <stdlib.h>
#include <string.h>
#include <stackinfo.h>
@@ -33,46 +33,44 @@
int
execl (const char *path, const char *arg, ...)
{
- size_t argv_max = 1024;
- const char **argv = alloca (argv_max * sizeof (const char *));
- unsigned int i;
+#define INITIAL_ARGV_MAX 1024
+ size_t argv_max = INITIAL_ARGV_MAX;
+ const char *initial_argv[INITIAL_ARGV_MAX];
+ const char **argv = initial_argv;
va_list args;
argv[0] = arg;
va_start (args, arg);
- i = 0;
+ unsigned int i = 0;
while (argv[i++] != NULL)
{
if (i == argv_max)
{
- const char **nptr = alloca ((argv_max *= 2) * sizeof (const char *));
-
-#ifndef _STACK_GROWS_UP
- if ((char *) nptr + argv_max == (char *) argv)
+ argv_max *= 2;
+ const char **nptr = realloc (argv == initial_argv ? NULL : argv,
+ argv_max * sizeof (const char *));
+ if (nptr == NULL)
{
- /* Stack grows down. */
- argv = (const char **) memcpy (nptr, argv,
- i * sizeof (const char *));
- argv_max += i;
+ if (argv != initial_argv)
+ free (argv);
+ return -1;
}
- else
-#endif
-#ifndef _STACK_GROWS_DOWN
- if ((char *) argv + i == (char *) nptr)
- /* Stack grows up. */
- argv_max += i;
- else
-#endif
- /* We have a hole in the stack. */
- argv = (const char **) memcpy (nptr, argv,
- i * sizeof (const char *));
+ if (argv == initial_argv)
+ /* We have to copy the already filled-in data ourselves. */
+ memcpy (nptr, argv, i * sizeof (const char *));
+
+ argv = nptr;
}
argv[i] = va_arg (args, const char *);
}
va_end (args);
- return __execve (path, (char *const *) argv, __environ);
+ int ret = __execve (path, (char *const *) argv, __environ);
+ if (argv != initial_argv)
+ free (argv);
+
+ return ret;
}
libc_hidden_def (execl)
diff --git a/posix/execle.c b/posix/execle.c
index 2199ebeb74..e956bb63cd 100644
--- a/posix/execle.c
+++ b/posix/execle.c
@@ -16,10 +16,10 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
-#include <alloca.h>
#include <unistd.h>
#include <stdarg.h>
#include <stddef.h>
+#include <stdlib.h>
#include <string.h>
#include <stackinfo.h>
@@ -29,48 +29,45 @@
int
execle (const char *path, const char *arg, ...)
{
- size_t argv_max = 1024;
- const char **argv = alloca (argv_max * sizeof (const char *));
- const char *const *envp;
- unsigned int i;
+#define INITIAL_ARGV_MAX 1024
+ size_t argv_max = INITIAL_ARGV_MAX;
+ const char *initial_argv[INITIAL_ARGV_MAX];
+ const char **argv = initial_argv;
va_list args;
argv[0] = arg;
va_start (args, arg);
- i = 0;
+ unsigned int i = 0;
while (argv[i++] != NULL)
{
if (i == argv_max)
{
- const char **nptr = alloca ((argv_max *= 2) * sizeof (const char *));
-
-#ifndef _STACK_GROWS_UP
- if ((char *) nptr + argv_max == (char *) argv)
+ argv_max *= 2;
+ const char **nptr = realloc (argv == initial_argv ? NULL : argv,
+ argv_max * sizeof (const char *));
+ if (nptr == NULL)
{
- /* Stack grows down. */
- argv = (const char **) memcpy (nptr, argv,
- i * sizeof (const char *));
- argv_max += i;
+ if (argv != initial_argv)
+ free (argv);
+ return -1;
}
- else
-#endif
-#ifndef _STACK_GROWS_DOWN
- if ((char *) argv + i == (char *) nptr)
- /* Stack grows up. */
- argv_max += i;
- else
-#endif
- /* We have a hole in the stack. */
- argv = (const char **) memcpy (nptr, argv,
- i * sizeof (const char *));
+ if (argv == initial_argv)
+ /* We have to copy the already filled-in data ourselves. */
+ memcpy (nptr, argv, i * sizeof (const char *));
+
+ argv = nptr;
}
argv[i] = va_arg (args, const char *);
}
- envp = va_arg (args, const char *const *);
+ const char *const *envp = va_arg (args, const char *const *);
va_end (args);
- return __execve (path, (char *const *) argv, (char *const *) envp);
+ int ret = __execve (path, (char *const *) argv, (char *const *) envp);
+ if (argv != initial_argv)
+ free (argv);
+
+ return ret;
}
libc_hidden_def (execle)
diff --git a/posix/execlp.c b/posix/execlp.c
index ba8fc74c90..c530397aa0 100644
--- a/posix/execlp.c
+++ b/posix/execlp.c
@@ -16,10 +16,10 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
-#include <alloca.h>
#include <unistd.h>
#include <stdarg.h>
#include <stddef.h>
+#include <stdlib.h>
#include <string.h>
#include <stackinfo.h>
@@ -30,46 +30,44 @@
int
execlp (const char *file, const char *arg, ...)
{
- size_t argv_max = 1024;
- const char **argv = alloca (argv_max * sizeof (const char *));
- unsigned int i;
+#define INITIAL_ARGV_MAX 1024
+ size_t argv_max = INITIAL_ARGV_MAX;
+ const char *initial_argv[INITIAL_ARGV_MAX];
+ const char **argv = initial_argv;
va_list args;
argv[0] = arg;
va_start (args, arg);
- i = 0;
+ unsigned int i = 0;
while (argv[i++] != NULL)
{
if (i == argv_max)
{
- const char **nptr = alloca ((argv_max *= 2) * sizeof (const char *));
-
-#ifndef _STACK_GROWS_UP
- if ((char *) nptr + argv_max == (char *) argv)
+ argv_max *= 2;
+ const char **nptr = realloc (argv == initial_argv ? NULL : argv,
+ argv_max * sizeof (const char *));
+ if (nptr == NULL)
{
- /* Stack grows down. */
- argv = (const char **) memcpy (nptr, argv,
- i * sizeof (const char *));
- argv_max += i;
+ if (argv != initial_argv)
+ free (argv);
+ return -1;
}
- else
-#endif
-#ifndef _STACK_GROWS_DOWN
- if ((char *) argv + i == (char *) nptr)
- /* Stack grows up. */
- argv_max += i;
- else
-#endif
- /* We have a hole in the stack. */
- argv = (const char **) memcpy (nptr, argv,
- i * sizeof (const char *));
+ if (argv == initial_argv)
+ /* We have to copy the already filled-in data ourselves. */
+ memcpy (nptr, argv, i * sizeof (const char *));
+
+ argv = nptr;
}
argv[i] = va_arg (args, const char *);
}
va_end (args);
- return execvp (file, (char *const *) argv);
+ int ret = execvp (file, (char *const *) argv);
+ if (argv != initial_argv)
+ free (argv);
+
+ return ret;
}
libc_hidden_def (execlp)
diff --git a/posix/execvp.c b/posix/execvp.c
index d6f60c02e7..a79d4a89c5 100644
--- a/posix/execvp.c
+++ b/posix/execvp.c
@@ -18,6 +18,7 @@
#include <unistd.h>
#include <stdarg.h>
+#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
@@ -26,9 +27,9 @@
/* The file is accessible but it is not an executable file. Invoke
the shell to interpret it as a script. */
-static void
+static char **
internal_function
-script_execute (const char *file, char *const argv[])
+allocate_scripts_argv (const char *file, char *const argv[])
{
/* Count the arguments. */
int argc = 0;
@@ -36,19 +37,19 @@ script_execute (const char *file, char *const argv[])
;
/* Construct an argument list for the shell. */
- {
- char *new_argv[argc + 1];
- new_argv[0] = (char *) _PATH_BSHELL;
- new_argv[1] = (char *) file;
- while (argc > 1)
- {
- new_argv[argc] = argv[argc - 1];
- --argc;
- }
-
- /* Execute the shell. */
- __execve (new_argv[0], new_argv, __environ);
- }
+ char **new_argv = (char **) malloc ((argc + 1) * sizeof (char *));
+ if (new_argv != NULL)
+ {
+ new_argv[0] = (char *) _PATH_BSHELL;
+ new_argv[1] = (char *) file;
+ while (argc > 1)
+ {
+ new_argv[argc] = argv[argc - 1];
+ --argc;
+ }
+ }
+
+ return new_argv;
}
@@ -66,42 +67,58 @@ execvp (file, argv)
return -1;
}
+ char **script_argv = NULL;
+
if (strchr (file, '/') != NULL)
{
/* Don't search when it contains a slash. */
__execve (file, argv, __environ);
if (errno == ENOEXEC)
- script_execute (file, argv);
+ {
+ script_argv = allocate_scripts_argv (file, argv);
+ if (script_argv != NULL)
+ {
+ __execve (script_argv[0], script_argv, __environ);
+
+ free (script_argv);
+ }
+ }
}
else
{
- int got_eacces = 0;
- char *path, *p, *name;
- size_t len;
- size_t pathlen;
-
- path = getenv ("PATH");
+ char *path = getenv ("PATH");
+ bool path_malloc = false;
if (path == NULL)
{
/* There is no `PATH' in the environment.
The default search path is the current directory
followed by the path `confstr' returns for `_CS_PATH'. */
- len = confstr (_CS_PATH, (char *) NULL, 0);
- path = (char *) __alloca (1 + len);
+ size_t len = confstr (_CS_PATH, (char *) NULL, 0);
+ path = (char *) malloc (1 + len);
+ if (path == NULL)
+ return -1;
path[0] = ':';
(void) confstr (_CS_PATH, path + 1, len);
+ path_malloc = true;
}
- len = strlen (file) + 1;
- pathlen = strlen (path);
- name = __alloca (pathlen + len + 1);
+ size_t len = strlen (file) + 1;
+ size_t pathlen = strlen (path);
+ char *name = malloc (pathlen + len + 1);
+ if (name == NULL)
+ {
+ if (path_malloc)
+ free (path);
+ return -1;
+ }
/* Copy the file name at the top. */
name = (char *) memcpy (name + pathlen + 1, file, len);
/* And add the slash. */
*--name = '/';
- p = path;
+ bool got_eacces = false;
+ char *p = path;
do
{
char *startp;
@@ -120,7 +137,21 @@ execvp (file, argv)
__execve (startp, argv, __environ);
if (errno == ENOEXEC)
- script_execute (startp, argv);
+ {
+ if (script_argv == NULL)
+ {
+ script_argv = allocate_scripts_argv (file, argv);
+ if (script_argv == NULL)
+ {
+ /* A possible EACCES error is not as important as
+ the ENOMEM. */
+ got_eacces = false;
+ break;
+ }
+ }
+
+ __execve (script_argv[0], script_argv, __environ);
+ }
switch (errno)
{
@@ -128,7 +159,7 @@ execvp (file, argv)
/* Record the we got a `Permission denied' error. If we end
up finding no executable we can use, we want to diagnose
that we did find one but were denied access. */
- got_eacces = 1;
+ got_eacces = true;
case ENOENT:
case ESTALE:
case ENOTDIR:
@@ -156,6 +187,11 @@ execvp (file, argv)
/* At least one failure was due to permissions, so report that
error. */
__set_errno (EACCES);
+
+ free (script_argv);
+ free (name);
+ if (path_malloc)
+ free (path);
}
/* Return the error from the last attempt (probably ENOENT). */
diff --git a/posix/getconf.c b/posix/getconf.c
index 4ce4f8e413..e21e3f5a72 100644
--- a/posix/getconf.c
+++ b/posix/getconf.c
@@ -964,7 +964,7 @@ main (int argc, char *argv[])
Copyright (C) %s Free Software Foundation, Inc.\n\
This is free software; see the source for copying conditions. There is NO\n\
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
-"), "2004");
+"), "2005");
fprintf (stderr, gettext ("Written by %s.\n"), "Roland McGrath");
return 0;
}
diff --git a/posix/regcomp.c b/posix/regcomp.c
index 5de5bf725a..72bf187b14 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -1859,7 +1859,7 @@ peek_token (token, input, syntax)
if (!(syntax & RE_NO_GNU_OPS))
{
token->type = ANCHOR;
- token->opr.ctx_type = INSIDE_WORD;
+ token->opr.ctx_type = NOT_WORD_DELIM;
}
break;
case 'w':
@@ -2349,15 +2349,25 @@ parse_expression (regexp, preg, token, syntax, nest, err)
break;
case ANCHOR:
if ((token->opr.ctx_type
- & (WORD_DELIM | INSIDE_WORD | WORD_FIRST | WORD_LAST))
+ & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST))
&& dfa->word_ops_used == 0)
init_word_char (dfa);
- if (token->opr.ctx_type == WORD_DELIM)
+ if (token->opr.ctx_type == WORD_DELIM
+ || token->opr.ctx_type == NOT_WORD_DELIM)
{
bin_tree_t *tree_first, *tree_last;
- token->opr.ctx_type = WORD_FIRST;
- tree_first = re_dfa_add_tree_node (dfa, NULL, NULL, token);
- token->opr.ctx_type = WORD_LAST;
+ if (token->opr.ctx_type == WORD_DELIM)
+ {
+ token->opr.ctx_type = WORD_FIRST;
+ tree_first = re_dfa_add_tree_node (dfa, NULL, NULL, token);
+ token->opr.ctx_type = WORD_LAST;
+ }
+ else
+ {
+ token->opr.ctx_type = INSIDE_WORD;
+ tree_first = re_dfa_add_tree_node (dfa, NULL, NULL, token);
+ token->opr.ctx_type = INSIDE_NOTWORD;
+ }
tree_last = re_dfa_add_tree_node (dfa, NULL, NULL, token);
token->type = OP_ALT;
tree = re_dfa_add_tree_node (dfa, tree_first, tree_last, token);
diff --git a/posix/regex_internal.h b/posix/regex_internal.h
index 0ccd8d3665..18865a7266 100644
--- a/posix/regex_internal.h
+++ b/posix/regex_internal.h
@@ -143,18 +143,21 @@ static inline void bitset_mask (bitset dest, const bitset src);
#define NEXT_NEWLINE_CONSTRAINT 0x0020
#define PREV_BEGBUF_CONSTRAINT 0x0040
#define NEXT_ENDBUF_CONSTRAINT 0x0080
-#define DUMMY_CONSTRAINT 0x0100
+#define WORD_DELIM_CONSTRAINT 0x0100
+#define NOT_WORD_DELIM_CONSTRAINT 0x0200
typedef enum
{
INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
+ INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
- WORD_DELIM = DUMMY_CONSTRAINT
+ WORD_DELIM = WORD_DELIM_CONSTRAINT,
+ NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT
} re_context_type;
typedef struct
diff --git a/posix/rxspencer/tests b/posix/rxspencer/tests
index a724252d8c..a8b6e4baa8 100644
--- a/posix/rxspencer/tests
+++ b/posix/rxspencer/tests
@@ -526,3 +526,12 @@ a((b+|((c)*)))+d - abcd abcd c,c,c,c
(((\b))){0} - x @x -,-,-
a(((.*)))b((\2)){0}c - abc abc @bc,@bc,@bc,-,-
a(((.*)))b((\1)){0}c - axbc axbc x,x,x,-,-
+
+\b & SaT @aT
+\b & aT @aT
+a.*\b & abT ab
+\b & STSS
+\B & abc @bc
+\B & aSbTc
+\B & SaT @SaT
+\B & aSTSb @TSb
diff --git a/posix/tst-rxspencer.c b/posix/tst-rxspencer.c
index cb40421797..3febc01cb2 100644
--- a/posix/tst-rxspencer.c
+++ b/posix/tst-rxspencer.c
@@ -127,14 +127,15 @@ mb_frob_string (const char *str, const char *letters)
}
/* Like mb_frob_string, but don't replace anything between
- [: and :], [. and .] or [= and =]. */
+ [: and :], [. and .] or [= and =] or characters escaped
+ with a backslash. */
static char *
mb_frob_pattern (const char *str, const char *letters)
{
char *ret, *dst;
const char *src;
- int in_class = 0;
+ int in_class = 0, escaped = 0;
if (str == NULL)
return NULL;
@@ -144,7 +145,18 @@ mb_frob_pattern (const char *str, const char *letters)
return NULL;
for (src = str, dst = ret; *src; ++src)
- if (!in_class && strchr (letters, *src))
+ if (*src == '\\')
+ {
+ escaped ^= 1;
+ *dst++ = *src;
+ }
+ else if (escaped)
+ {
+ escaped = 0;
+ *dst++ = *src;
+ continue;
+ }
+ else if (!in_class && strchr (letters, *src))
dst = mb_replace (dst, *src);
else
{
diff --git a/posix/unistd.h b/posix/unistd.h
index 5d42169e82..bf66f7543a 100644
--- a/posix/unistd.h
+++ b/posix/unistd.h
@@ -884,6 +884,11 @@ extern void sync (void) __THROW;
extern int getpagesize (void) __THROW __attribute__ ((__const__));
+/* Return the maximum number of file descriptors
+ the current process could possibly have. */
+extern int getdtablesize (void) __THROW;
+
+
/* Truncate FILE to LENGTH bytes. */
# ifndef __USE_FILE_OFFSET64
extern int truncate (__const char *__file, __off_t __length)
@@ -902,6 +907,10 @@ extern int truncate64 (__const char *__file, __off64_t __length)
__THROW __nonnull ((1));
# endif
+#endif /* Use BSD || X/Open Unix. */
+
+#if defined __USE_BSD || defined __USE_XOPEN_EXTENDED || defined __USE_XOPEN2K
+
/* Truncate the file FD is open on to LENGTH bytes. */
# ifndef __USE_FILE_OFFSET64
extern int ftruncate (int __fd, __off_t __length) __THROW;
@@ -917,12 +926,7 @@ extern int __REDIRECT_NTH (ftruncate, (int __fd, __off64_t __length),
extern int ftruncate64 (int __fd, __off64_t __length) __THROW;
# endif
-
-/* Return the maximum number of file descriptors
- the current process could possibly have. */
-extern int getdtablesize (void) __THROW;
-
-#endif /* Use BSD || X/Open Unix. */
+#endif /* Use BSD || X/Open Unix || POSIX 2003. */
#if defined __USE_MISC || defined __USE_XOPEN_EXTENDED
diff --git a/stdio-common/Makefile b/stdio-common/Makefile
index 3a66f1d021..947ec8ab31 100644
--- a/stdio-common/Makefile
+++ b/stdio-common/Makefile
@@ -53,7 +53,7 @@ tests := tstscanf test_rdwr test-popen tstgetln test-fseek \
scanf11 scanf12 tst-tmpnam tst-cookie tst-obprintf tst-sscanf \
tst-swprintf tst-fseek tst-fmemopen test-vfprintf tst-gets \
tst-perror tst-sprintf tst-rndseek tst-fdopen tst-fphex bug14 bug15 \
- tst-popen tst-unlockedio
+ tst-popen tst-unlockedio tst-fmemopen2
test-srcs = tst-unbputc tst-printf
diff --git a/stdlib/Makefile b/stdlib/Makefile
index 5f4675033e..24db960044 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -61,10 +61,15 @@ distribute := exit.h grouping.h abort-instr.h isomac.c tst-fmtmsg.sh \
test-srcs := tst-fmtmsg
tests := tst-strtol tst-strtod testmb testrand testsort testdiv \
test-canon test-canon2 tst-strtoll tst-environ \
- tst-xpg-basename tst-random tst-bsearch tst-limits \
- tst-rand48 bug-strtod tst-setcontext test-a64l tst-qsort \
- tst-system testmb2
+ tst-xpg-basename tst-random tst-random2 tst-bsearch \
+ tst-limits tst-rand48 bug-strtod tst-setcontext \
+ test-a64l tst-qsort tst-system testmb2
+include ../Makeconfig
+
+ifeq ($(build-shared),yes)
+tests += tst-putenv
+endif
# Several mpn functions from GNU MP are used by the strtod function.
mpn-routines := inlines add_n addmul_1 cmp divmod_1 divrem udiv_qrnnd \
@@ -75,18 +80,21 @@ routines := $(strip $(routines) $(mpn-routines)) \
dbl2mpn ldbl2mpn \
mpn2flt mpn2dbl mpn2ldbl
aux += fpioconst mp_clz_tab
-distribute := $(distribute) $(mpn-headers) gen-mpn-copy fpioconst.h
+distribute := $(distribute) $(mpn-headers) gen-mpn-copy fpioconst.h \
+ tst-putenvmod.c
+
+tests-extras += tst-putenvmod
+extra-objs += tst-putenvmod.os
-generated += isomac isomac.out
+generated += isomac isomac.out tst-putenvmod.so
CFLAGS-bsearch.c = $(uses-callbacks)
CFLAGS-msort.c = $(uses-callbacks)
CFLAGS-qsort.c = $(uses-callbacks)
CFLAGS-system.c = -fexceptions
+CFLAGS-system.os = -fomit-frame-pointer
CFLAGS-fmtmsg.c = -fexceptions
-include ../Makeconfig
-
ifneq (,$(filter %REENTRANT, $(defines)))
CFLAGS-strfmon.c = -D_IO_MTSAFE_IO
CFLAGS-strfmon_l.c = -D_IO_MTSAFE_IO
@@ -124,3 +132,9 @@ $(objpfx)isomac: isomac.c
$(objpfx)tst-fmtmsg.out: tst-fmtmsg.sh $(objpfx)tst-fmtmsg
$(SHELL) -e $< $(common-objpfx) '$(run-program-prefix)' $(common-objpfx)stdlib/
+
+$(objpfx)tst-putenv: $(objpfx)tst-putenvmod.so
+
+$(objpfx)tst-putenvmod.so: $(objpfx)tst-putenvmod.os
+ $(build-module)
+CFLAGS-tst-putenvmod.c = -DNOT_IN_libc=1
diff --git a/stdlib/fmtmsg.c b/stdlib/fmtmsg.c
index 2ab97b7d90..7505e4c78d 100644
--- a/stdlib/fmtmsg.c
+++ b/stdlib/fmtmsg.c
@@ -316,7 +316,7 @@ internal_addseverity (int severity, const char *string)
int result = MM_OK;
/* First see if there is already a record for the severity level. */
- for (runp = severity_list, lastp = NULL; runp != NULL; runp = runp-> next)
+ for (runp = severity_list, lastp = NULL; runp != NULL; runp = runp->next)
if (runp->severity == severity)
break;
else
@@ -324,9 +324,6 @@ internal_addseverity (int severity, const char *string)
if (runp != NULL)
{
- /* Release old string. */
- free ((char *) runp->string);
-
if (string != NULL)
/* Change the string. */
runp->string = string;
@@ -367,34 +364,17 @@ int
addseverity (int severity, const char *string)
{
int result;
- const char *new_string;
/* Prevent illegal SEVERITY values. */
if (severity <= MM_INFO)
return MM_NOTOK;
- if (string == NULL)
- /* We want to remove the severity class. */
- new_string = NULL;
- else
- {
- new_string = __strdup (string);
-
- if (new_string == NULL)
- /* Allocation failed or illegal value. */
- return MM_NOTOK;
- }
-
/* Protect the global data. */
__libc_lock_lock (lock);
/* Do the real work. */
result = internal_addseverity (severity, string);
- if (result != MM_OK)
- /* Free the allocated string. */
- free ((char *) new_string);
-
/* Release the lock. */
__libc_lock_unlock (lock);
@@ -411,7 +391,6 @@ libc_freeres_fn (free_mem)
{
/* This is data we have to release. */
struct severity_info *here = runp;
- free ((char *) runp->string);
runp = runp->next;
free (here);
}
diff --git a/stdlib/random_r.c b/stdlib/random_r.c
index 09677e6077..e955ea576a 100644
--- a/stdlib/random_r.c
+++ b/stdlib/random_r.c
@@ -240,10 +240,19 @@ __initstate_r (seed, arg_state, n, buf)
int degree;
int separation;
int32_t *state;
+ int old_type;
+ int32_t *old_state;
if (buf == NULL)
goto fail;
+ old_type = buf->rand_type;
+ old_state = buf->state;
+ if (old_type == TYPE_0)
+ old_state[-1] = TYPE_0;
+ else
+ old_state[-1] = (MAX_TYPES * (buf->rptr - old_state)) + old_type;
+
if (n >= BREAK_3)
type = n < BREAK_4 ? TYPE_3 : TYPE_4;
else if (n < BREAK_1)
diff --git a/stdlib/tst-fmtmsg.c b/stdlib/tst-fmtmsg.c
index d5369bda62..c3748d64d5 100644
--- a/stdlib/tst-fmtmsg.c
+++ b/stdlib/tst-fmtmsg.c
@@ -1,6 +1,8 @@
#include <fmtmsg.h>
#include <mcheck.h>
#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
#define MM_TEST 10
@@ -12,11 +14,13 @@ main (void)
mtrace ();
- if (addseverity (MM_TEST, "TEST") != MM_OK)
+ char TEST[] = "ABCD";
+ if (addseverity (MM_TEST, TEST) != MM_OK)
{
puts ("addseverity failed");
result = 1;
}
+ strcpy (TEST, "TEST");
if (fmtmsg (MM_PRINT, "GLIBC:tst-fmtmsg", MM_HALT, "halt",
"should print message for MM_HALT", "GLIBC:tst-fmtmsg:1")
@@ -48,5 +52,31 @@ main (void)
!= MM_OK)
result = 1;
+ if (addseverity (MM_TEST, NULL) != MM_OK)
+ {
+ puts ("second addseverity failed");
+ result = 1;
+ }
+
+ if (addseverity (MM_TEST, NULL) != MM_NOTOK)
+ {
+ puts ("third addseverity unexpectedly succeeded");
+ result = 1;
+ }
+
+ char *p = strdup ("TEST2");
+ if (addseverity (MM_TEST, p) != MM_OK)
+ {
+ puts ("fourth addseverity failed");
+ result = 1;
+ }
+ if (addseverity (MM_TEST, "TEST3") != MM_OK)
+ {
+ puts ("fifth addseverity failed");
+ result = 1;
+ }
+
+ free (p);
+
return result;
}
diff --git a/sunrpc/openchild.c b/sunrpc/openchild.c
index 16f6a34e7a..29ddfa6b75 100644
--- a/sunrpc/openchild.c
+++ b/sunrpc/openchild.c
@@ -81,7 +81,7 @@ _openchild (const char *command, FILE ** fto, FILE ** ffrom)
for (i = _rpc_dtablesize () - 1; i >= 3; i--)
__close (i);
fflush (stderr);
- execlp (command, command, 0);
+ execlp (command, command, NULL);
perror ("exec");
_exit (~0);
diff --git a/sunrpc/svc_tcp.c b/sunrpc/svc_tcp.c
index 4decfa4fd0..873d39d52a 100644
--- a/sunrpc/svc_tcp.c
+++ b/sunrpc/svc_tcp.c
@@ -165,7 +165,7 @@ svctcp_create (int sock, u_int sendsize, u_int recvsize)
(void) __bind (sock, (struct sockaddr *) &addr, len);
}
if ((__getsockname (sock, (struct sockaddr *) &addr, &len) != 0) ||
- (__listen (sock, 2) != 0))
+ (__listen (sock, SOMAXCONN) != 0))
{
perror (_("svc_tcp.c - cannot getsockname or listen"));
if (madesock)
diff --git a/sunrpc/svc_unix.c b/sunrpc/svc_unix.c
index cfbc63866b..d95e884434 100644
--- a/sunrpc/svc_unix.c
+++ b/sunrpc/svc_unix.c
@@ -161,7 +161,7 @@ svcunix_create (int sock, u_int sendsize, u_int recvsize, char *path)
__bind (sock, (struct sockaddr *) &addr, len);
if (__getsockname (sock, (struct sockaddr *) &addr, &len) != 0
- || __listen (sock, 2) != 0)
+ || __listen (sock, SOMAXCONN) != 0)
{
perror (_("svc_unix.c - cannot getsockname or listen"));
if (madesock)
diff --git a/sysdeps/generic/dl-tls.c b/sysdeps/generic/dl-tls.c
index 3382e3493c..03a45c7c88 100644
--- a/sysdeps/generic/dl-tls.c
+++ b/sysdeps/generic/dl-tls.c
@@ -65,7 +65,10 @@ _dl_next_tls_modid (void)
/* Note that this branch will never be executed during program
start since there are no gaps at that time. Therefore it
does not matter that the dl_tls_dtv_slotinfo is not allocated
- yet when the function is called for the first times. */
+ yet when the function is called for the first times.
+
+ NB: the offset +1 is due to the fact that DTV[0] is used
+ for something else. */
result = GL(dl_tls_static_nelem) + 1;
/* If the following would not be true we mustn't have assumed
there is a gap. */
@@ -88,11 +91,11 @@ _dl_next_tls_modid (void)
}
while ((runp = runp->next) != NULL);
- if (result >= GL(dl_tls_max_dtv_idx))
+ if (result > GL(dl_tls_max_dtv_idx))
{
/* The new index must indeed be exactly one higher than the
previous high. */
- assert (result == GL(dl_tls_max_dtv_idx));
+ assert (result == GL(dl_tls_max_dtv_idx) + 1);
/* There is no gap anymore. */
GL(dl_tls_dtv_gaps) = false;
@@ -577,7 +580,7 @@ __tls_get_addr (GET_ADDR_ARGS)
{
size_t cnt;
- for (cnt = total = 0 ? 1 : 0; cnt < listp->len; ++cnt)
+ for (cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt)
{
size_t gen = listp->slotinfo[cnt].gen;
struct link_map *map;
diff --git a/sysdeps/generic/libc-start.c b/sysdeps/generic/libc-start.c
index fc9df40996..0a8b0ce32a 100644
--- a/sysdeps/generic/libc-start.c
+++ b/sysdeps/generic/libc-start.c
@@ -106,9 +106,9 @@ LIBC_START_MAIN (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),
__libc_multiple_libcs = &_dl_starting_up && !_dl_starting_up;
+#ifndef SHARED
INIT_ARGV_and_ENVIRON;
-#ifndef SHARED
/* Store the lowest stack address. This is done in ld.so if this is
the code for the DSO. */
__libc_stack_end = stack_end;
diff --git a/sysdeps/generic/wordexp.c b/sysdeps/generic/wordexp.c
index 3e37d6449c..852e171767 100644
--- a/sysdeps/generic/wordexp.c
+++ b/sysdeps/generic/wordexp.c
@@ -810,7 +810,7 @@ parse_arith (char **word, size_t *word_length, size_t *max_length,
/* Function called by child process in exec_comm() */
static void
-internal_function
+internal_function __attribute__ ((always_inline))
exec_comm_child (char *comm, int *fildes, int showerr, int noexec)
{
const char *args[4] = { _PATH_BSHELL, "-c", comm, NULL };
@@ -868,13 +868,14 @@ exec_comm (char *comm, char **word, size_t *word_length, size_t *max_length,
const char *ifs_white)
{
int fildes[2];
- int bufsize = 128;
+#define bufsize 128
int buflen;
int i;
int status = 0;
size_t maxnewlines = 0;
- char *buffer;
+ char buffer[bufsize];
pid_t pid;
+ int noexec = 0;
/* Don't fork() unless necessary */
if (!comm || !*comm)
@@ -884,32 +885,42 @@ exec_comm (char *comm, char **word, size_t *word_length, size_t *max_length,
/* Bad */
return WRDE_NOSPACE;
+ again:
if ((pid = __fork ()) < 0)
{
/* Bad */
- __close (fildes[0]);
- __close (fildes[1]);
+ if (fildes[0] != -1)
+ __close (fildes[0]);
+ if (fildes[1] != -1)
+ __close (fildes[1]);
return WRDE_NOSPACE;
}
if (pid == 0)
- exec_comm_child (comm, fildes, flags & WRDE_SHOWERR, 0);
+ exec_comm_child (comm, fildes, noexec ? 0 : flags & WRDE_SHOWERR, noexec);
/* Parent */
+ /* If we are just testing the syntax, only wait. */
+ if (noexec)
+ return (TEMP_FAILURE_RETRY (__waitpid (pid, &status, 0)) == pid
+ && status != 0) ? WRDE_SYNTAX : 0;
+
__close (fildes[1]);
- buffer = __alloca (bufsize);
+ fildes[1] = -1;
if (!pwordexp)
/* Quoted - no field splitting */
{
while (1)
{
- if ((buflen = __read (fildes[0], buffer, bufsize)) < 1)
+ if ((buflen = TEMP_FAILURE_RETRY (__read (fildes[0], buffer,
+ bufsize))) < 1)
{
- if (__waitpid (pid, &status, WNOHANG) == 0)
+ if (TEMP_FAILURE_RETRY (__waitpid (pid, &status, WNOHANG)) == 0)
continue;
- if ((buflen = __read (fildes[0], buffer, bufsize)) < 1)
+ if ((buflen = TEMP_FAILURE_RETRY (__read (fildes[0], buffer,
+ bufsize))) < 1)
break;
}
@@ -933,11 +944,13 @@ exec_comm (char *comm, char **word, size_t *word_length, size_t *max_length,
while (1)
{
- if ((buflen = __read (fildes[0], buffer, bufsize)) < 1)
+ if ((buflen = TEMP_FAILURE_RETRY (__read (fildes[0], buffer,
+ bufsize))) < 1)
{
- if (__waitpid (pid, &status, WNOHANG) == 0)
+ if (TEMP_FAILURE_RETRY (__waitpid (pid, &status, WNOHANG)) == 0)
continue;
- if ((buflen = __read (fildes[0], buffer, bufsize)) < 1)
+ if ((buflen = TEMP_FAILURE_RETRY (__read (fildes[0], buffer,
+ bufsize))) < 1)
break;
}
@@ -1053,31 +1066,20 @@ exec_comm (char *comm, char **word, size_t *word_length, size_t *max_length,
}
__close (fildes[0]);
+ fildes[0] = -1;
/* Check for syntax error (re-execute but with "-n" flag) */
if (buflen < 1 && status != 0)
{
- if ((pid = __fork ()) < 0)
- {
- /* Bad */
- return WRDE_NOSPACE;
- }
-
- if (pid == 0)
- {
- fildes[0] = fildes[1] = -1;
- exec_comm_child (comm, fildes, 0, 1);
- }
-
- if (__waitpid (pid, &status, 0) == pid && status != 0)
- return WRDE_SYNTAX;
+ noexec = 1;
+ goto again;
}
return 0;
no_space:
__kill (pid, SIGKILL);
- __waitpid (pid, NULL, 0);
+ TEMP_FAILURE_RETRY (__waitpid (pid, NULL, 0));
__close (fildes[0]);
return WRDE_NOSPACE;
}
diff --git a/sysdeps/i386/Makefile b/sysdeps/i386/Makefile
index 52faaa3109..ece94f3610 100644
--- a/sysdeps/i386/Makefile
+++ b/sysdeps/i386/Makefile
@@ -61,3 +61,7 @@ CFLAGS-dlopenold.c += -mpreferred-stack-boundary=4
CFLAGS-dlclose.c += -mpreferred-stack-boundary=4
CFLAGS-dlerror.c += -mpreferred-stack-boundary=4
endif
+
+ifneq (,$(filter -mno-tls-direct-seg-refs,$(CFLAGS)))
+defines += -DNO_TLS_DIRECT_SEG_REFS
+endif
diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h
index c48d9d325e..af7e3f1bba 100644
--- a/sysdeps/i386/dl-machine.h
+++ b/sysdeps/i386/dl-machine.h
@@ -308,11 +308,21 @@ _dl_start_user:\n\
movl _rtld_local@GOTOFF(%ebx), %eax\n\
leal 8(%esp,%edx,4), %esi\n\
leal 4(%esp), %ecx\n\
+ movl %esp, %ebp\n\
+ # Make sure _dl_init is run with 16 byte aligned stack.\n\
+ andl $-16, %esp\n\
+ pushl %eax\n\
+ pushl %eax\n\
+ pushl %ebp\n\
pushl %esi\n\
+ # Clear %ebp, so that even constructors have terminated backchain.\n\
+ xorl %ebp, %ebp\n\
# Call the function to run the initializers.\n\
call _dl_init_internal@PLT\n\
# Pass our finalizer function to the user in %edx, as per ELF ABI.\n\
leal _dl_fini@GOTOFF(%ebx), %edx\n\
+ # Restore %esp _start expects.\n\
+ movl (%esp), %esp\n\
# Jump to the user's entry point.\n\
jmp *%edi\n\
.previous\n\
diff --git a/sysdeps/ia64/fpu/Makefile b/sysdeps/ia64/fpu/Makefile
index 6d1b0c1717..7ec30c43d3 100644
--- a/sysdeps/ia64/fpu/Makefile
+++ b/sysdeps/ia64/fpu/Makefile
@@ -1,8 +1,33 @@
ifeq ($(subdir),math)
-libm-sysdep_routines += libm_atan2_reg s_matherrf s_matherrl libm_reduce \
- libm_tan libm_error \
- libm_frexp4 libm_frexp4f libm_frexp4l
+#
+# Some files which need to go both into libc and libm have external
+# dependencies which need to be resolved differently for libc
+# vs. libm. For example, inside libc, __libm_error_support needs to
+# resolve to HIDDEN_JUMPTARGET(__libm_error_support) whereas within
+# libm it always resolves to __libm_error_support. Such files need to
+# be compiled twice. Fortunately, math/Makefile already has logic to
+# support this: if a file starts with "s_", make will automatically
+# generate a matching file whose name starts with "m_" which simply
+# includes the corresponding "s_" file.
+#
+duplicated-routines = s_libm_ldexp s_libm_ldexpf s_libm_ldexpl \
+ s_libm_scalbn s_libm_scalbnf s_libm_scalbnl
-sysdep_routines += libm_frexp4 libm_frexp4f libm_frexp4l libc_libm_error
-sysdep-CPPFLAGS += -DSIZE_INT_32
+libm-sysdep_routines += s_erfc s_erfcf s_erfcl \
+ s_matherrf s_matherrl libm_reduce \
+ libm_error \
+ libm_frexp libm_frexpf libm_frexpl \
+ libm_sincos libm_sincosf libm_sincosl \
+ libm_sincos_large \
+ libm_lgamma libm_lgammaf libm_lgammal \
+ libm_scalblnf \
+ $(duplicated-routines:s_%=m_%)
+
+sysdep_routines += libc_libm_error libm_frexp libm_frexpf libm_frexpl \
+ $(duplicated-routines)
+
+sysdep-CPPFLAGS += -include libm-symbols.h \
+ -D__POSIX__ \
+ -D_LIB_VERSIONIMF=_LIB_VERSION \
+ -DSIZE_INT_32 -DSIZE_LONG_INT_64 -DSIZE_LONG_LONG_INT_64
endif
diff --git a/sysdeps/ia64/fpu/e_acos.S b/sysdeps/ia64/fpu/e_acos.S
index 7e83811727..b515f01a1e 100644
--- a/sysdeps/ia64/fpu/e_acos.S
+++ b/sysdeps/ia64/fpu/e_acos.S
@@ -1,10 +1,10 @@
.file "acos.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003 Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,9 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// WARRANTY DISCLAIMER
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -37,838 +35,800 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
// History
//==============================================================
-// 2/02/00 Initial version
-// 8/17/00 New and much faster algorithm.
-// 8/30/00 Avoided bank conflicts on loads, shortened |x|=1 and x=0 paths,
+// 02/02/00 Initial version
+// 08/17/00 New and much faster algorithm.
+// 08/30/00 Avoided bank conflicts on loads, shortened |x|=1 and x=0 paths,
// fixed mfb split issue stalls.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 08/02/02 New and much faster algorithm II
+// 02/06/03 Reordered header: .section, .global, .proc, .align
// Description
//=========================================
-// The acos function computes the principle value of the arc sine of x.
+// The acos function computes the principal value of the arc cosine of x.
+// acos(0) returns Pi/2, acos(1) returns 0, acos(-1) returns Pi.
// A doman error occurs for arguments not in the range [-1,+1].
+//
+// The acos function returns the arc cosine in the range [0, Pi] radians.
+//
+// There are 8 paths:
+// 1. x = +/-0.0
+// Return acos(x) = Pi/2 + x
+//
+// 2. 0.0 < |x| < 0.625
+// Return acos(x) = Pi/2 - x - x^3 *PolA(x^2)
+// where PolA(x^2) = A3 + A5*x^2 + A7*x^4 +...+ A35*x^32
+//
+// 3. 0.625 <=|x| < 1.0
+// Return acos(x) = Pi/2 - asin(x) =
+// = Pi/2 - sign(x) * ( Pi/2 - sqrt(R) * PolB(R))
+// Where R = 1 - |x|,
+// PolB(R) = B0 + B1*R + B2*R^2 +...+B12*R^12
+//
+// sqrt(R) is approximated using the following sequence:
+// y0 = (1 + eps)/sqrt(R) - initial approximation by frsqrta,
+// |eps| < 2^(-8)
+// Then 3 iterations are used to refine the result:
+// H0 = 0.5*y0
+// S0 = R*y0
+//
+// d0 = 0.5 - H0*S0
+// H1 = H0 + d0*H0
+// S1 = S0 + d0*S0
+//
+// d1 = 0.5 - H1*S1
+// H2 = H1 + d0*H1
+// S2 = S1 + d0*S1
+//
+// d2 = 0.5 - H2*S2
+// S3 = S3 + d2*S3
+//
+// S3 approximates sqrt(R) with enough accuracy for this algorithm
+//
+// So, the result should be reconstracted as follows:
+// acos(x) = Pi/2 - sign(x) * (Pi/2 - S3*PolB(R))
+//
+// But for optimization purposes the reconstruction step is slightly
+// changed:
+// acos(x) = Cpi + sign(x)*PolB(R)*S2 - sign(x)*d2*S2*PolB(R)
+// where Cpi = 0 if x > 0 and Cpi = Pi if x < 0
+//
+// 4. |x| = 1.0
+// Return acos(1.0) = 0.0, acos(-1.0) = Pi
+//
+// 5. 1.0 < |x| <= +INF
+// A doman error occurs for arguments not in the range [-1,+1]
+//
+// 6. x = [S,Q]NaN
+// Return acos(x) = QNaN
+//
+// 7. x is denormal
+// Return acos(x) = Pi/2 - x,
+//
+// 8. x is unnormal
+// Normalize input in f8 and return to the very beginning of the function
+//
+// Registers used
+//==============================================================
+// Floating Point registers used:
+// f8, input, output
+// f6, f7, f9 -> f15, f32 -> f64
-// The acos function returns the arc cosine in the range [0, +pi] radians.
-// acos(1) returns +0, acos(-1) returns pi, acos(0) returns pi/2.
-// acos(x) returns a Nan and raises the invalid exception for |x| >1
+// General registers used:
+// r3, r21 -> r31, r32 -> r38
-// The acos function is just like asin except that pi/2 is added at the end.
+// Predicate registers used:
+// p0, p6 -> p14
//
// Assembly macros
//=========================================
-
-#include "libm_support.h"
-
-// predicate registers
-//acos_pred_LEsqrt2by2 = p7
-//acos_pred_GTsqrt2by2 = p8
-
-// integer registers
-ASIN_Addr1 = r33
-ASIN_Addr2 = r34
-ASIN_FFFE = r35
-
-GR_SAVE_B0 = r36
-GR_SAVE_PFS = r37
-GR_SAVE_GP = r38
-
-GR_Parameter_X = r39
-GR_Parameter_Y = r40
-GR_Parameter_RESULT = r41
-GR_Parameter_Tag = r42
-
-// floating point registers
-acos_coeff_P1 = f32
-acos_coeff_P2 = f33
-acos_coeff_P3 = f34
-acos_coeff_P4 = f35
-
-acos_coeff_P5 = f36
-acos_coeff_P6 = f37
-acos_coeff_P7 = f38
-acos_coeff_P8 = f39
-acos_coeff_P9 = f40
-
-acos_coeff_P10 = f41
-acos_coeff_P11 = f42
-acos_coeff_P12 = f43
-acos_coeff_P13 = f44
-acos_coeff_P14 = f45
-
-acos_coeff_P15 = f46
-acos_coeff_P16 = f47
-acos_coeff_P17 = f48
-acos_coeff_P18 = f49
-acos_coeff_P19 = f50
-
-acos_coeff_P20 = f51
-acos_coeff_P21 = f52
-acos_const_sqrt2by2 = f53
-acos_const_piby2 = f54
-acos_abs_x = f55
-
-acos_tx = f56
-acos_tx2 = f57
-acos_tx3 = f58
-acos_tx4 = f59
-acos_tx8 = f60
-
-acos_tx11 = f61
-acos_1poly_p8 = f62
-acos_1poly_p19 = f63
-acos_1poly_p4 = f64
-acos_1poly_p15 = f65
-
-acos_1poly_p6 = f66
-acos_1poly_p17 = f67
-acos_1poly_p0 = f68
-acos_1poly_p11 = f69
-acos_1poly_p2 = f70
-
-acos_1poly_p13 = f71
-acos_series_tx = f72
-acos_t = f73
-acos_t2 = f74
-acos_t3 = f75
-
-acos_t4 = f76
-acos_t8 = f77
-acos_t11 = f78
-acos_poly_p8 = f79
-acos_poly_p19 = f80
-
-acos_poly_p4 = f81
-acos_poly_p15 = f82
-acos_poly_p6 = f83
-acos_poly_p17 = f84
-acos_poly_p0 = f85
-
-acos_poly_p11 = f86
-acos_poly_p2 = f87
-acos_poly_p13 = f88
-acos_series_t = f89
-acos_1by2 = f90
-
-acos_3by2 = f91
-acos_5by2 = f92
-acos_11by4 = f93
-acos_35by8 = f94
-acos_63by8 = f95
-
-acos_231by16 = f96
-acos_y0 = f97
-acos_H0 = f98
-acos_S0 = f99
-acos_d = f100
-
-acos_l1 = f101
-acos_d2 = f102
-acos_T0 = f103
-acos_d1 = f104
-acos_e0 = f105
-
-acos_l2 = f106
-acos_d3 = f107
-acos_T3 = f108
-acos_S1 = f109
-acos_e1 = f110
-
-acos_z = f111
-answer2 = f112
-acos_sgn_x = f113
-acos_429by16 = f114
-acos_18by4 = f115
-
-acos_3by4 = f116
-acos_l3 = f117
-acos_T6 = f118
-acos_const_add = f119
+// integer registers used
+// scratch
+rTblAddr = r3
+
+rPiBy2Ptr = r21
+rTmpPtr3 = r22
+rDenoBound = r23
+rOne = r24
+rAbsXBits = r25
+rHalf = r26
+r0625 = r27
+rSign = r28
+rXBits = r29
+rTmpPtr2 = r30
+rTmpPtr1 = r31
+
+// stacked
+GR_SAVE_PFS = r32
+GR_SAVE_B0 = r33
+GR_SAVE_GP = r34
+GR_Parameter_X = r35
+GR_Parameter_Y = r36
+GR_Parameter_RESULT = r37
+GR_Parameter_TAG = r38
+
+// floating point registers used
+FR_X = f10
+FR_Y = f1
+FR_RESULT = f8
+
+
+// scratch
+fXSqr = f6
+fXCube = f7
+fXQuadr = f9
+f1pX = f10
+f1mX = f11
+f1pXRcp = f12
+f1mXRcp = f13
+fH = f14
+fS = f15
+// stacked
+fA3 = f32
+fB1 = f32
+fA5 = f33
+fB2 = f33
+fA7 = f34
+fPiBy2 = f34
+fA9 = f35
+fA11 = f36
+fB10 = f35
+fB11 = f36
+fA13 = f37
+fA15 = f38
+fB4 = f37
+fB5 = f38
+fA17 = f39
+fA19 = f40
+fB6 = f39
+fB7 = f40
+fA21 = f41
+fA23 = f42
+fB3 = f41
+fB8 = f42
+fA25 = f43
+fA27 = f44
+fB9 = f43
+fB12 = f44
+fA29 = f45
+fA31 = f46
+fA33 = f47
+fA35 = f48
+fBaseP = f49
+fB0 = f50
+fSignedS = f51
+fD = f52
+fHalf = f53
+fR = f54
+fCloseTo1Pol = f55
+fSignX = f56
+fDenoBound = f57
+fNormX = f58
+fX8 = f59
+fRSqr = f60
+fRQuadr = f61
+fR8 = f62
+fX16 = f63
+fCpi = f64
// Data tables
//==============================================================
-
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
-
+RODATA
.align 16
-
-acos_coeff_1_table:
-ASM_TYPE_DIRECTIVE(acos_coeff_1_table,@object)
-data8 0xE4E7E0A423A21249 , 0x00003FF8 //P7
-data8 0xC2F7EE0200FCE2A5 , 0x0000C003 //P18
-data8 0xB745D7F6C65C20E0 , 0x00003FF9 //P5
-data8 0xF75E381A323D4D94 , 0x0000C002 //P16
-data8 0x8959C2629C1024C0 , 0x0000C002 //P20
-data8 0xAFF68E7D241292C5 , 0x00003FF8 //P9
-data8 0xB6DB6DB7260AC30D , 0x00003FFA //P3
-data8 0xD0417CE2B41CB7BF , 0x0000C000 //P14
-data8 0x81D570FEA724E3E4 , 0x0000BFFD //P12
-data8 0xAAAAAAAAAAAAC277 , 0x00003FFC //P1
-data8 0xF534912FF3E7B76F , 0x00003FFF //P21
-data8 0xc90fdaa22168c235 , 0x00003fff // pi/2
-data8 0x0000000000000000 , 0x00000000 // pad to avoid bank conflicts
-ASM_SIZE_DIRECTIVE(acos_coeff_1_table)
-
-
-acos_coeff_2_table:
-ASM_TYPE_DIRECTIVE(acos_coeff_2_table,@object)
-data8 0x8E26AF5F29B39A2A , 0x00003FF9 //P6
-data8 0xB4F118A4B1015470 , 0x00004003 //P17
-data8 0xF8E38E10C25990E0 , 0x00003FF9 //P4
-data8 0x80F50489AEF1CAC6 , 0x00004002 //P15
-data8 0x92728015172CFE1C , 0x00004003 //P19
-data8 0xBBC3D831D4595971 , 0x00003FF8 //P8
-data8 0x999999999952A5C3 , 0x00003FFB //P2
-data8 0x855576BE6F0975EC , 0x00003FFF //P13
-data8 0xF12420E778077D89 , 0x00003FFA //P11
-data8 0xB6590FF4D23DE003 , 0x00003FF3 //P10
-data8 0xb504f333f9de6484 , 0x00003ffe // sqrt(2)/2
-ASM_SIZE_DIRECTIVE(acos_coeff_2_table)
-
-
-.align 32
-.global acos
-ASM_TYPE_DIRECTIVE(acos,@function)
+LOCAL_OBJECT_START(acos_base_range_table)
+// Ai: Polynomial coefficients for the acos(x), |x| < .625000
+// Bi: Polynomial coefficients for the acos(x), |x| > .625000
+data8 0xBFDAAB56C01AE468 //A29
+data8 0x3FE1C470B76A5B2B //A31
+data8 0xBFDC5FF82A0C4205 //A33
+data8 0x3FC71FD88BFE93F0 //A35
+data8 0xB504F333F9DE6487, 0x00003FFF //B0
+data8 0xAAAAAAAAAAAAFC18, 0x00003FFC //A3
+data8 0x3F9F1C71BC4A7823 //A9
+data8 0x3F96E8BBAAB216B2 //A11
+data8 0x3F91C4CA1F9F8A98 //A13
+data8 0x3F8C9DDCEDEBE7A6 //A15
+data8 0x3F877784442B1516 //A17
+data8 0x3F859C0491802BA2 //A19
+data8 0x9999999998C88B8F, 0x00003FFB //A5
+data8 0x3F6BD7A9A660BF5E //A21
+data8 0x3F9FC1659340419D //A23
+data8 0xB6DB6DB798149BDF, 0x00003FFA //A7
+data8 0xBFB3EF18964D3ED3 //A25
+data8 0x3FCD285315542CF2 //A27
+data8 0xF15BEEEFF7D2966A, 0x00003FFB //B1
+data8 0x3EF0DDA376D10FB3 //B10
+data8 0xBEB83CAFE05EBAC9 //B11
+data8 0x3F65FFB67B513644 //B4
+data8 0x3F5032FBB86A4501 //B5
+data8 0x3F392162276C7CBA //B6
+data8 0x3F2435949FD98BDF //B7
+data8 0xD93923D7FA08341C, 0x00003FF9 //B2
+data8 0x3F802995B6D90BDB //B3
+data8 0x3F10DF86B341A63F //B8
+data8 0xC90FDAA22168C235, 0x00003FFF // Pi/2
+data8 0x3EFA3EBD6B0ECB9D //B9
+data8 0x3EDE18BA080E9098 //B12
+LOCAL_OBJECT_END(acos_base_range_table)
.section .text
-.proc acos
-.align 32
-
-
-acos:
-
-{ .mfi
- alloc r32 = ar.pfs,1,6,4,0
- fma.s1 acos_tx = f8,f8,f0
- addl ASIN_Addr2 = @ltoff(acos_coeff_2_table),gp
-}
-{ .mfi
- mov ASIN_FFFE = 0xFFFE
- fnma.s1 acos_t = f8,f8,f1
- addl ASIN_Addr1 = @ltoff(acos_coeff_1_table),gp
+GLOBAL_LIBM_ENTRY(acos)
+acos_unnormal_back:
+{ .mfi
+ getf.d rXBits = f8 // grab bits of input value
+ // set p12 = 1 if x is a NaN, denormal, or zero
+ fclass.m p12, p0 = f8, 0xcf
+ adds rSign = 1, r0
+}
+{ .mfi
+ addl rTblAddr = @ltoff(acos_base_range_table),gp
+ // 1 - x = 1 - |x| for positive x
+ fms.s1 f1mX = f1, f1, f8
+ addl rHalf = 0xFFFE, r0 // exponent of 1/2
}
;;
-
-
-{ .mfi
- setf.exp acos_1by2 = ASIN_FFFE
- fmerge.s acos_abs_x = f1,f8
- nop.i 999 ;;
-}
-
-
-{ .mmf
- ld8 ASIN_Addr1 = [ASIN_Addr1]
- ld8 ASIN_Addr2 = [ASIN_Addr2]
- fmerge.s acos_sgn_x = f8,f1
-}
-;;
-
-
-{ .mfi
- nop.m 999
- fcmp.lt.s1 p11,p12 = f8, f0
- nop.i 999 ;;
-}
-
-
-{ .mfi
- ldfe acos_coeff_P7 = [ASIN_Addr1],16
- fma.s1 acos_tx2 = acos_tx,acos_tx,f0
- nop.i 999
-}
-{ .mfi
- ldfe acos_coeff_P6 = [ASIN_Addr2],16
- fma.s1 acos_t2 = acos_t,acos_t,f0
- nop.i 999;;
+{ .mfi
+ addl r0625 = 0x3FE4, r0 // high 16 bits of 0.625
+ // set p8 = 1 if x < 0
+ fcmp.lt.s1 p8, p9 = f8, f0
+ shl rSign = rSign, 63 // sign bit
+}
+{ .mfi
+ // point to the beginning of the table
+ ld8 rTblAddr = [rTblAddr]
+ // 1 + x = 1 - |x| for negative x
+ fma.s1 f1pX = f1, f1, f8
+ adds rOne = 0x3FF, r0
}
-
-
-{ .mmf
- ldfe acos_coeff_P18 = [ASIN_Addr1],16
- ldfe acos_coeff_P17 = [ASIN_Addr2],16
- fclass.m.unc p8,p0 = f8, 0xc3 //@qnan |@snan
-}
;;
-
-
-{ .mmf
- ldfe acos_coeff_P5 = [ASIN_Addr1],16
- ldfe acos_coeff_P4 = [ASIN_Addr2],16
- frsqrta.s1 acos_y0,p0 = acos_t
-}
+{ .mfi
+ andcm rAbsXBits = rXBits, rSign // bits of |x|
+ fmerge.s fSignX = f8, f1 // signum(x)
+ shl r0625 = r0625, 48 // bits of DP representation of 0.625
+}
+{ .mfb
+ setf.exp fHalf = rHalf // load A2 to FP reg
+ fma.s1 fXSqr = f8, f8, f0 // x^2
+ // branch on special path if x is a NaN, denormal, or zero
+(p12) br.cond.spnt acos_special
+}
;;
-
-
-{ .mfi
- ldfe acos_coeff_P16 = [ASIN_Addr1],16
- fcmp.gt.s1 p9,p0 = acos_abs_x,f1
- nop.i 999
-}
-{ .mfb
- ldfe acos_coeff_P15 = [ASIN_Addr2],16
-(p8) fma.d f8 = f8,f1,f0
-(p8) br.ret.spnt b0
+{ .mfi
+ adds rPiBy2Ptr = 272, rTblAddr
+ nop.f 0
+ shl rOne = rOne, 52 // bits of 1.0
+}
+{ .mfi
+ adds rTmpPtr1 = 16, rTblAddr
+ nop.f 0
+ // set p6 = 1 if |x| < 0.625
+ cmp.lt p6, p7 = rAbsXBits, r0625
}
;;
-
-
-{ .mmf
- ldfe acos_coeff_P20 = [ASIN_Addr1],16
- ldfe acos_coeff_P19 = [ASIN_Addr2],16
- fclass.m.unc p10,p0 = f8, 0x07 //@zero
-}
+{ .mfi
+ ldfpd fA29, fA31 = [rTblAddr] // A29, fA31
+ // 1 - x = 1 - |x| for positive x
+(p9) fms.s1 fR = f1, f1, f8
+ // point to coefficient of "near 1" polynomial
+(p7) adds rTmpPtr2 = 176, rTblAddr
+}
+{ .mfi
+ ldfpd fA33, fA35 = [rTmpPtr1], 16 // A33, fA35
+ // 1 + x = 1 - |x| for negative x
+(p8) fma.s1 fR = f1, f1, f8
+(p6) adds rTmpPtr2 = 48, rTblAddr
+}
;;
-
-
-{ .mfi
- ldfe acos_coeff_P9 = [ASIN_Addr1],16
- fma.s1 acos_t4 = acos_t2,acos_t2,f0
-(p9) mov GR_Parameter_Tag = 58
-}
-{ .mfi
- ldfe acos_coeff_P8 = [ASIN_Addr2],16
- fma.s1 acos_3by2 = acos_1by2,f1,f1
- nop.i 999;;
+{ .mfi
+ ldfe fB0 = [rTmpPtr1], 16 // B0
+ nop.f 0
+ nop.i 0
}
-
-
-{ .mfi
- ldfe acos_coeff_P2 = [ASIN_Addr2],16
- fma.s1 acos_tx4 = acos_tx2,acos_tx2,f0
- nop.i 999
-}
-{ .mfb
- ldfe acos_coeff_P3 = [ASIN_Addr1],16
- fma.s1 acos_t3 = acos_t,acos_t2,f0
-(p9) br.cond.spnt __libm_error_region
+{ .mib
+ adds rTmpPtr3 = 16, rTmpPtr2
+ // set p10 = 1 if |x| = 1.0
+ cmp.eq p10, p0 = rAbsXBits, rOne
+ // branch on special path for |x| = 1.0
+(p10) br.cond.spnt acos_abs_1
}
;;
-
-
-{ .mfi
- ldfe acos_coeff_P13 = [ASIN_Addr2],16
- fma.s1 acos_H0 = acos_y0,acos_1by2,f0
- nop.i 999
-}
-{ .mfi
- ldfe acos_coeff_P14 = [ASIN_Addr1],16
- fma.s1 acos_S0 = acos_y0,acos_t,f0
- nop.i 999;;
+{ .mfi
+ ldfe fA3 = [rTmpPtr2], 48 // A3 or B1
+ nop.f 0
+ adds rTmpPtr1 = 64, rTmpPtr3
}
-
-
-{ .mfi
- ldfe acos_coeff_P11 = [ASIN_Addr2],16
- fcmp.eq.s1 p6,p0 = acos_abs_x, f1
- nop.i 999
-}
-{ .mfi
- ldfe acos_coeff_P12 = [ASIN_Addr1],16
- fma.s1 acos_tx3 = acos_tx,acos_tx2,f0
- nop.i 999
+{ .mib
+ ldfpd fA9, fA11 = [rTmpPtr3], 16 // A9, A11 or B10, B11
+ // set p11 = 1 if |x| > 1.0
+ cmp.gt p11, p0 = rAbsXBits, rOne
+ // branch on special path for |x| > 1.0
+(p11) br.cond.spnt acos_abs_gt_1
}
;;
-
-
-{ .mfi
- ldfe acos_coeff_P10 = [ASIN_Addr2],16
- fma.s1 acos_1poly_p6 = acos_tx,acos_coeff_P7,acos_coeff_P6
- nop.i 999
-}
-{ .mfi
- ldfe acos_coeff_P1 = [ASIN_Addr1],16
- fma.s1 acos_poly_p6 = acos_t,acos_coeff_P7,acos_coeff_P6
- nop.i 999;;
+{ .mfi
+ ldfpd fA17, fA19 = [rTmpPtr2], 16 // A17, A19 or B6, B7
+ // initial approximation of 1 / sqrt(1 - x)
+ frsqrta.s1 f1mXRcp, p0 = f1mX
+ nop.i 0
}
-
-
-{ .mfi
- ldfe acos_const_sqrt2by2 = [ASIN_Addr2],16
- fma.s1 acos_5by2 = acos_3by2,f1,f1
- nop.i 999
-}
-{ .mfi
- ldfe acos_coeff_P21 = [ASIN_Addr1],16
- fma.s1 acos_11by4 = acos_3by2,acos_3by2,acos_1by2
- nop.i 999;;
+{ .mfi
+ ldfpd fA13, fA15 = [rTmpPtr3] // A13, A15 or B4, B5
+ fma.s1 fXCube = fXSqr, f8, f0 // x^3
+ nop.i 0
}
-
-
-{ .mfi
- ldfe acos_const_piby2 = [ASIN_Addr1],16
- fma.s1 acos_poly_p17 = acos_t,acos_coeff_P18,acos_coeff_P17
- nop.i 999
-}
-{ .mfb
- nop.m 999
- fma.s1 acos_3by4 = acos_3by2,acos_1by2,f0
-(p10) br.cond.spnt L(ACOS_ZERO) // Branch to short path if x=0
+;;
+{ .mfi
+ ldfe fA5 = [rTmpPtr2], 48 // A5 or B2
+ // initial approximation of 1 / sqrt(1 + x)
+ frsqrta.s1 f1pXRcp, p0 = f1pX
+ nop.i 0
+}
+{ .mfi
+ ldfpd fA21, fA23 = [rTmpPtr1], 16 // A21, A23 or B3, B8
+ fma.s1 fXQuadr = fXSqr, fXSqr, f0 // x^4
+ nop.i 0
}
;;
-
-
-{ .mfi
- nop.m 999
- fma.s1 acos_poly_p15 = acos_t,acos_coeff_P16,acos_coeff_P15
- nop.i 999
-}
-{ .mfb
- nop.m 999
- fnma.s1 acos_d = acos_S0,acos_H0,acos_1by2
-(p6) br.cond.spnt L(ACOS_ABS_ONE) // Branch to short path if |x|=1
+{ .mfi
+ ldfe fA7 = [rTmpPtr1] // A7 or Pi/2
+ fma.s1 fRSqr = fR, fR, f0 // R^2
+ nop.i 0
+}
+{ .mfb
+ ldfpd fA25, fA27 = [rTmpPtr2] // A25, A27 or B9, B12
+ nop.f 0
+(p6) br.cond.spnt acos_base_range;
}
;;
-
-{ .mfi
- nop.m 999
- fma.s1 acos_poly_p19 = acos_t,acos_coeff_P20,acos_coeff_P19
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 acos_poly_p4 = acos_t,acos_coeff_P5,acos_coeff_P4
- nop.i 999;;
+{ .mfi
+ nop.m 0
+(p9) fma.s1 fH = fHalf, f1mXRcp, f0 // H0 for x > 0
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 acos_1poly_p17 = acos_tx,acos_coeff_P18,acos_coeff_P17
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 acos_poly_p8 = acos_t,acos_coeff_P9,acos_coeff_P8
- nop.i 999;;
+{ .mfi
+ nop.m 0
+(p9) fma.s1 fS = f1mX, f1mXRcp, f0 // S0 for x > 0
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fms.s1 acos_35by8 = acos_5by2,acos_11by4,acos_5by2
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 acos_63by8 = acos_5by2,acos_11by4,f1
- nop.i 999;;
+;;
+{ .mfi
+ nop.m 0
+(p8) fma.s1 fH = fHalf, f1pXRcp, f0 // H0 for x < 0
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 acos_poly_p13 = acos_t,acos_coeff_P14,acos_coeff_P13
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 acos_18by4 = acos_3by2,acos_5by2,acos_3by4
- nop.i 999;;
+{ .mfi
+ nop.m 0
+(p8) fma.s1 fS = f1pX, f1pXRcp, f0 // S0 for x > 0
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 acos_l1 = acos_5by2,acos_d,acos_3by2
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 acos_d2 = acos_d,acos_d,f0
- nop.i 999;;
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fRQuadr = fRSqr, fRSqr, f0 // R^4
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 acos_poly_p15 = acos_t2,acos_poly_p17,acos_poly_p15
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 acos_T0 = acos_d,acos_S0,f0
- nop.i 999;;
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fB11 = fB11, fR, fB10
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 acos_poly_p19 = acos_t2,acos_coeff_P21,acos_poly_p19
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 acos_poly_p4 = acos_t2,acos_poly_p6,acos_poly_p4
- nop.i 999;;
+{ .mfi
+ nop.m 0
+ fma.s1 fB1 = fB1, fR, fB0
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 acos_d1 = acos_35by8,acos_d,f0
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 acos_231by16 = acos_3by2,acos_35by8,acos_63by8
- nop.i 999;;
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fB5 = fB5, fR, fB4
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 acos_poly_p2 = acos_t,acos_coeff_P3,acos_coeff_P2
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 acos_poly_p8 = acos_t2,acos_coeff_P10,acos_poly_p8
- nop.i 999;;
+{ .mfi
+ nop.m 0
+ fma.s1 fB7 = fB7, fR, fB6
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 acos_poly_p11 = acos_t,acos_coeff_P12,acos_coeff_P11
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 acos_e0 = acos_d2,acos_l1,acos_d
- nop.i 999;;
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fB3 = fB3, fR, fB2
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 acos_1poly_p15 = acos_tx,acos_coeff_P16,acos_coeff_P15
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 acos_poly_p0 = acos_t,acos_coeff_P1,f1
- nop.i 999;;
+;;
+{ .mfi
+ nop.m 0
+ fnma.s1 fD = fH, fS, fHalf // d0 = 1/2 - H0*S0
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 acos_1poly_p19 = acos_tx,acos_coeff_P20,acos_coeff_P19
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 acos_1poly_p4 = acos_tx,acos_coeff_P5,acos_coeff_P4
- nop.i 999;;
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fR8 = fRQuadr, fRQuadr, f0 // R^4
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 acos_1poly_p8 = acos_tx,acos_coeff_P9,acos_coeff_P8
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 acos_l2 = acos_231by16,acos_d,acos_63by8
- nop.i 999;;
+{ .mfi
+ nop.m 0
+ fma.s1 fB9 = fB9, fR, fB8
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 acos_d3 = acos_d2,acos_d,f0
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 acos_T3 = acos_d2,acos_T0,f0
- nop.i 999;;
+;;
+{.mfi
+ nop.m 0
+ fma.s1 fB12 = fB12, fRSqr, fB11
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 acos_429by16 = acos_18by4,acos_11by4,acos_231by16
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 acos_S1 = acos_e0,acos_S0,acos_S0
- nop.i 999;;
+{.mfi
+ nop.m 0
+ fma.s1 fB7 = fB7, fRSqr, fB5
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 acos_poly_p4 = acos_t4,acos_poly_p8,acos_poly_p4
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 acos_poly_p15 = acos_t4,acos_poly_p19,acos_poly_p15
- nop.i 999;;
+;;
+{.mfi
+ nop.m 0
+ fma.s1 fB3 = fB3, fRSqr, fB1
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 acos_poly_p0 = acos_t2,acos_poly_p2,acos_poly_p0
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 acos_poly_p11 = acos_t2,acos_poly_p13,acos_poly_p11
- nop.i 999;;
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fH = fH, fD, fH // H1 = H0 + H0*d0
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 acos_t8 = acos_t4,acos_t4,f0
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 acos_e1 = acos_d2,acos_l2,acos_d1
- nop.i 999;;
+{ .mfi
+ nop.m 0
+ fma.s1 fS = fS, fD, fS // S1 = S0 + S0*d0
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 acos_1poly_p4 = acos_tx2,acos_1poly_p6,acos_1poly_p4
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 acos_1poly_p15 = acos_tx2,acos_1poly_p17,acos_1poly_p15
- nop.i 999;;
+;;
+{.mfi
+ nop.m 0
+(p9) fma.s1 fCpi = f1, f0, f0 // Cpi = 0 if x > 0
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 acos_1poly_p8 = acos_tx2,acos_coeff_P10,acos_1poly_p8
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 acos_1poly_p19 = acos_tx2,acos_coeff_P21,acos_1poly_p19
- nop.i 999;;
+{ .mfi
+ nop.m 0
+(p8) fma.s1 fCpi = fPiBy2, f1, fPiBy2 // Cpi = Pi if x < 0
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 acos_1poly_p2 = acos_tx,acos_coeff_P3,acos_coeff_P2
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 acos_1poly_p13 = acos_tx,acos_coeff_P14,acos_coeff_P13
- nop.i 999;;
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fB12 = fB12, fRSqr, fB9
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 acos_1poly_p0 = acos_tx,acos_coeff_P1,f1
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 acos_1poly_p11 = acos_tx,acos_coeff_P12,acos_coeff_P11
- nop.i 999;;
+{ .mfi
+ nop.m 0
+ fma.s1 fB7 = fB7, fRQuadr, fB3
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 acos_l3 = acos_429by16,acos_d,f0
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 acos_z = acos_e1,acos_T3,acos_S1
- nop.i 999;;
+;;
+{.mfi
+ nop.m 0
+ fnma.s1 fD = fH, fS, fHalf // d1 = 1/2 - H1*S1
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 acos_poly_p11 = acos_t4,acos_poly_p15,acos_poly_p11
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 acos_T6 = acos_T3,acos_d3,f0
- nop.i 999;;
+{ .mfi
+ nop.m 0
+ fnma.s1 fSignedS = fSignX, fS, f0 // -signum(x)*S1
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 acos_t11 = acos_t8,acos_t3,f0
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 acos_poly_p0 = acos_t4,acos_poly_p4,acos_poly_p0
- nop.i 999;;
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fCloseTo1Pol = fB12, fR8, fB7
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 acos_1poly_p4 = acos_tx4,acos_1poly_p8,acos_1poly_p4
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 acos_1poly_p15 = acos_tx4,acos_1poly_p19,acos_1poly_p15
- nop.i 999;;
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fH = fH, fD, fH // H2 = H1 + H1*d1
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 acos_1poly_p0 = acos_tx2,acos_1poly_p2,acos_1poly_p0
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 acos_1poly_p11 = acos_tx2,acos_1poly_p13,acos_1poly_p11
- nop.i 999;;
+{ .mfi
+ nop.m 0
+ fma.s1 fS = fS, fD, fS // S2 = S1 + S1*d1
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
-// fcmp.le.s1 acos_pred_LEsqrt2by2,acos_pred_GTsqrt2by2 = acos_abs_x,acos_const_sqrt2by2
- fcmp.le.s1 p7,p8 = acos_abs_x,acos_const_sqrt2by2
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 acos_tx8 = acos_tx4,acos_tx4,f0
- nop.i 999;;
+;;
+{ .mfi
+ nop.m 0
+ // -signum(x)* S2 = -signum(x)*(S1 + S1*d1)
+ fma.s1 fSignedS = fSignedS, fD, fSignedS
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 acos_z = acos_l3,acos_T6,acos_z
- nop.i 999;;
-}
-
-{ .mfi
- nop.m 999
- fma.s1 acos_series_t = acos_t11,acos_poly_p11,acos_poly_p0
- nop.i 999
-}
-{ .mfi
- nop.m 999
-(p11) fma.s1 acos_const_add = acos_const_piby2, f1, acos_const_piby2
- nop.i 999
+;;
+{.mfi
+ nop.m 0
+ fnma.s1 fD = fH, fS, fHalf // d2 = 1/2 - H2*S2
+ nop.i 0
}
;;
-
{ .mfi
- nop.m 999
-(p12) fma.s1 acos_const_add = f1,f0,f0
- nop.i 999
+ nop.m 0
+ // Cpi + signum(x)*PolB*S2
+ fnma.s1 fCpi = fSignedS, fCloseTo1Pol, fCpi
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // signum(x)*PolB * S2
+ fnma.s1 fCloseTo1Pol = fSignedS, fCloseTo1Pol, f0
+ nop.i 0
}
;;
-
-{ .mfi
- nop.m 999
- fma.s1 acos_1poly_p0 = acos_tx4,acos_1poly_p4,acos_1poly_p0
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 acos_1poly_p11 = acos_tx4,acos_1poly_p15,acos_1poly_p11
- nop.i 999;;
+{ .mfb
+ nop.m 0
+ // final result for 0.625 <= |x| < 1
+ fma.d.s0 f8 = fCloseTo1Pol, fD, fCpi
+ // exit here for 0.625 <= |x| < 1
+ br.ret.sptk b0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 acos_tx11 = acos_tx8,acos_tx3,f0
- nop.i 999;;
-}
-
-{ .mfi
- nop.m 999
-//(acos_pred_GTsqrt2by2) fnma.s1 answer2 = acos_z,acos_series_t,acos_const_piby2
-(p8) fnma.s1 answer2 = acos_z,acos_series_t,f0
- nop.i 999;;
-}
-
-{ .mfi
- nop.m 999
- fma.s1 acos_series_tx = acos_tx11,acos_1poly_p11,acos_1poly_p0
- nop.i 999;;
-}
-
-{ .mfi
- nop.m 999
-//(acos_pred_GTsqrt2by2) fnma.d f8 = acos_sgn_x,answer2,acos_const_piby2
-(p8) fnma.d f8 = acos_sgn_x,answer2,acos_const_add
- nop.i 999;;
-}
-
-{ .mfb
- nop.m 999
-//(acos_pred_LEsqrt2by2) fnma.d f8 = f8,acos_series_tx,acos_const_piby2
-(p7) fnma.d f8 = f8,acos_series_tx,acos_const_piby2
- br.ret.sptk b0 ;;
-}
+;;
-L(ACOS_ZERO):
-// Here if x=0
-{ .mfb
- nop.m 999
- fma.d f8 = acos_const_piby2,f1,f0
- br.ret.sptk b0 ;;
-}
+// here if |x| < 0.625
+.align 32
+acos_base_range:
+{ .mfi
+ ldfe fCpi = [rPiBy2Ptr] // Pi/2
+ fma.s1 fA33 = fA33, fXSqr, fA31
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA15 = fA15, fXSqr, fA13
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fA29 = fA29, fXSqr, fA27
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA25 = fA25, fXSqr, fA23
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fA21 = fA21, fXSqr, fA19
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA9 = fA9, fXSqr, fA7
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fA5 = fA5, fXSqr, fA3
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fA35 = fA35, fXQuadr, fA33
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA17 = fA17, fXQuadr, fA15
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fX8 = fXQuadr, fXQuadr, f0 // x^8
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA25 = fA25, fXQuadr, fA21
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fA9 = fA9, fXQuadr, fA5
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fms.s1 fCpi = fCpi, f1, f8 // Pi/2 - x
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fA35 = fA35, fXQuadr, fA29
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA17 = fA17, fXSqr, fA11
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fX16 = fX8, fX8, f0 // x^16
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fA35 = fA35, fX8, fA25
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA17 = fA17, fX8, fA9
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fBaseP = fA35, fX16, fA17
+ nop.i 0
+}
+;;
+{ .mfb
+ nop.m 0
+ // final result for |x| < 0.625
+ fnma.d.s0 f8 = fBaseP, fXCube, fCpi
+ // exit here for |x| < 0.625 path
+ br.ret.sptk b0
+}
+;;
+// here if |x| = 1
+// acos(1) = 0
+// acos(-1) = Pi
+.align 32
+acos_abs_1:
+{ .mfi
+ ldfe fPiBy2 = [rPiBy2Ptr] // Pi/2
+ nop.f 0
+ nop.i 0
+}
+;;
+.pred.rel "mutex", p8, p9
+{ .mfi
+ nop.m 0
+ // result for x = 1.0
+(p9) fma.d.s0 f8 = f1, f0, f0 // 0.0
+ nop.i 0
+}
+{.mfb
+ nop.m 0
+ // result for x = -1.0
+(p8) fma.d.s0 f8 = fPiBy2, f1, fPiBy2 // Pi
+ // exit here for |x| = 1.0
+ br.ret.sptk b0
+}
+;;
-L(ACOS_ABS_ONE):
-.pred.rel "mutex",p11,p12
-// Here if |x|=1
-{ .mfi
- nop.m 999
-(p11) fma.d f8 = acos_const_piby2,f1,acos_const_piby2 // acos(-1)=pi
- nop.i 999
-}
-{ .mfb
- nop.m 999
-(p12) fma.d f8 = f1,f0,f0 // acos(1)=0
- br.ret.sptk b0 ;;
-}
+// here if x is a NaN, denormal, or zero
+.align 32
+acos_special:
+{ .mfi
+ // point to Pi/2
+ adds rPiBy2Ptr = 272, rTblAddr
+ // set p12 = 1 if x is a NaN
+ fclass.m p12, p0 = f8, 0xc3
+ nop.i 0
+}
+{ .mlx
+ nop.m 0
+ // smallest positive DP normalized number
+ movl rDenoBound = 0x0010000000000000
+}
+;;
+{ .mfi
+ ldfe fPiBy2 = [rPiBy2Ptr] // Pi/2
+ // set p13 = 1 if x = 0.0
+ fclass.m p13, p0 = f8, 0x07
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fnorm.s1 fNormX = f8
+ nop.i 0
+}
+;;
+{ .mfb
+ // load smallest normal to FP reg
+ setf.d fDenoBound = rDenoBound
+ // answer if x is a NaN
+(p12) fma.d.s0 f8 = f8,f1,f0
+ // exit here if x is a NaN
+(p12) br.ret.spnt b0
+}
+;;
+{ .mfi
+ nop.m 0
+ // absolute value of normalized x
+ fmerge.s fNormX = f1, fNormX
+ nop.i 0
+}
+;;
+{ .mfb
+ nop.m 0
+ // final result for x = 0
+(p13) fma.d.s0 f8 = fPiBy2, f1, f8
+ // exit here if x = 0.0
+(p13) br.ret.spnt b0
+}
+;;
+// if we still here then x is denormal or unnormal
+{ .mfi
+ nop.m 0
+ // set p14 = 1 if normalized x is greater than or
+ // equal to the smallest denormalized value
+ // So, if p14 is set to 1 it means that we deal with
+ // unnormal rather than with "true" denormal
+ fcmp.ge.s1 p14, p0 = fNormX, fDenoBound
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+(p14) fcmp.eq.s0 p6, p0 = f8, f0 // Set D flag if x unnormal
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+ // normalize unnormal input
+(p14) fnorm.s1 f8 = f8
+ // return to the main path
+(p14) br.cond.sptk acos_unnormal_back
+}
+;;
+// if we still here it means that input is "true" denormal
+{ .mfb
+ nop.m 0
+ // final result if x is denormal
+ fms.d.s0 f8 = fPiBy2, f1, f8 // Pi/2 - x
+ // exit here if x is denormal
+ br.ret.sptk b0
+}
+;;
+// here if |x| > 1.0
+// error handler should be called
+.align 32
+acos_abs_gt_1:
+{ .mfi
+ alloc r32 = ar.pfs, 0, 3, 4, 0 // get some registers
+ fmerge.s FR_X = f8,f8
+ nop.i 0
+}
+{ .mfb
+ mov GR_Parameter_TAG = 58 // error code
+ frcpa.s0 FR_RESULT, p0 = f0,f0
+ // call error handler routine
+ br.cond.sptk __libm_error_region
+}
+;;
+GLOBAL_LIBM_END(acos)
-.endp acos
-ASM_SIZE_DIRECTIVE(acos)
-.proc __libm_error_region
-__libm_error_region:
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
- nop.f 999
+ nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
@@ -879,28 +839,29 @@ __libm_error_region:
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
- stfs [GR_Parameter_Y] = f1,16 // Store Parameter 2 on stack
+ stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
-
.body
- frcpa.s0 f9,p0 = f0,f0
-;;
-
{ .mib
- stfd [GR_Parameter_X] = f8 // Store Parameter 1 on stack
- add GR_Parameter_RESULT = 0,GR_Parameter_Y
- nop.b 0 // Parameter 3 address
+ stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
}
{ .mib
- stfd [GR_Parameter_Y] = f9,-16 // Store Parameter 3 on stack
- adds r32 = 48,sp
- br.call.sptk b0=__libm_error_support# // Call error handling function
+ stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
- ldfd f8 = [r32] // Get return result off stack
+ add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
+};;
+{ .mmi
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
@@ -909,11 +870,8 @@ __libm_error_region:
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
-
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
-
-.type __libm_error_support,@function
-.global __libm_error_support
+LOCAL_LIBM_END(__libm_error_region)
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_acosf.S b/sysdeps/ia64/fpu/e_acosf.S
index a3425414cf..417f5b7ddc 100644
--- a/sysdeps/ia64/fpu/e_acosf.S
+++ b/sysdeps/ia64/fpu/e_acosf.S
@@ -1,10 +1,10 @@
.file "acosf.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,19 +35,23 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
// History
//==============================================================
-// 2/02/00 Initial revision
-// 6/28/00 Improved speed
-// 6/31/00 Changed register allocation because of some duplicate macros
+// 02/02/00 Initial version
+// 06/28/00 Improved speed
+// 06/31/00 Changed register allocation because of some duplicate macros
// moved nan exit bundle up to gain a cycle.
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
-// 8/17/00 Changed predicate register macro-usage to direct predicate
+// 08/17/00 Changed predicate register macro-usage to direct predicate
// names due to an assembler bug.
// 10/17/00 Improved speed of x=0 and x=1 paths, set D flag if x denormal.
+// 03/13/01 Corrected sign of imm1 value in dep instruction.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/06/03 Reordered header: .section, .global, .proc, .align
+// 04/17/03 Moved mutex after label
// Description
@@ -115,7 +119,6 @@
// answer2 = sign(x) z P(t) if x>0
// = sign(x) z P(t) + pi if x<0
-#include "libm_support.h"
//
// Assembly macros
@@ -222,42 +225,30 @@ acosf_poly_p1a = f90
// Data tables
//==============================================================
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
+RODATA
.align 16
-acosf_coeff_1_table:
-ASM_TYPE_DIRECTIVE(acosf_coeff_1_table,@object)
+LOCAL_OBJECT_START(acosf_coeff_1_table)
data8 0x3FC5555607DCF816 // P1
data8 0x3F9CF81AD9BAB2C6 // P4
data8 0x3FC59E0975074DF3 // P7
data8 0xBFA6F4CC2780AA1D // P6
data8 0x3FC2DD45292E93CB // P9
data8 0x3fe6a09e667f3bcd // sqrt(2)/2
-ASM_SIZE_DIRECTIVE(acosf_coeff_1_table)
+LOCAL_OBJECT_END(acosf_coeff_1_table)
-acosf_coeff_2_table:
-ASM_TYPE_DIRECTIVE(acosf_coeff_2_table,@object)
+LOCAL_OBJECT_START(acosf_coeff_2_table)
data8 0x3FA6F108E31EFBA6 // P3
data8 0xBFCA31BF175D82A0 // P8
data8 0x3FA30C0337F6418B // P5
data8 0x3FB332C9266CB1F9 // P2
data8 0x3ff921fb54442d18 // pi_by_2
-ASM_SIZE_DIRECTIVE(acosf_coeff_2_table)
+LOCAL_OBJECT_END(acosf_coeff_2_table)
-.align 32
-.global acosf
-ASM_TYPE_DIRECTIVE(acosf,@function)
.section .text
-.proc acosf
-.align 32
-
-acosf:
+GLOBAL_LIBM_ENTRY(acosf)
// Load the addresses of the two tables.
// Then, load the coefficients and other constants.
@@ -342,7 +333,7 @@ acosf:
}
{ .mfb
nop.m 999
-(p8) fma.s f8 = f8,f1,f0
+(p8) fma.s.s0 f8 = f8,f1,f0
(p8) br.ret.spnt b0 ;; // Exit if x=nan
}
@@ -350,7 +341,7 @@ acosf:
{ .mfb
nop.m 999
fcmp.eq.s1 p6,p0 = acosf_abs_x,f1
-(p10) br.cond.spnt L(ACOSF_ZERO) ;; // Branch if x=0
+(p10) br.cond.spnt ACOSF_ZERO ;; // Branch if x=0
}
{ .mfi
@@ -367,7 +358,7 @@ acosf:
{ .mfb
nop.m 999
fma.s1 acosf_t4 = acosf_t2,acosf_t2,f0
-(p6) br.cond.spnt L(ACOSF_ABS_ONE) ;; // Branch if |x|=1
+(p6) br.cond.spnt ACOSF_ABS_ONE ;; // Branch if |x|=1
}
{ .mfi
@@ -575,42 +566,40 @@ acosf:
.pred.rel "mutex",p8,p7 //acosf_pred_GTsqrt2by2,acosf_pred_LEsqrt2by2
{ .mfi
nop.m 999
-(p8) fma.s f8 = acosf_z,acosf_Pt,acosf_sgn_x_piby2
+(p8) fma.s.s0 f8 = acosf_z,acosf_Pt,acosf_sgn_x_piby2
nop.i 999
}
{ .mfb
nop.m 999
-(p7) fms.s f8 = acosf_const_piby2,f1,acosf_sinf1
+(p7) fms.s.s0 f8 = acosf_const_piby2,f1,acosf_sinf1
br.ret.sptk b0 ;;
}
-L(ACOSF_ZERO):
+ACOSF_ZERO:
// Here if x=0
{ .mfb
nop.m 999
- fma.s f8 = acosf_const_piby2,f1,f0 // acosf(0)=pi/2
+ fma.s.s0 f8 = acosf_const_piby2,f1,f0 // acosf(0)=pi/2
br.ret.sptk b0 ;;
}
-L(ACOSF_ABS_ONE):
+ACOSF_ABS_ONE:
.pred.rel "mutex",p11,p12
// Here if |x|=1
{ .mfi
nop.m 999
-(p11) fma.s f8 = acosf_const_piby2,f1,acosf_const_piby2 // acosf(-1)=pi
+(p11) fma.s.s0 f8 = acosf_const_piby2,f1,acosf_const_piby2 // acosf(-1)=pi
nop.i 999
}
{ .mfb
nop.m 999
-(p12) fma.s f8 = f1,f0,f0 // acosf(1)=0
+(p12) fma.s.s0 f8 = f1,f0,f0 // acosf(1)=0
br.ret.sptk b0 ;;
}
-.endp acosf
-ASM_SIZE_DIRECTIVE(acosf)
-
+GLOBAL_LIBM_END(acosf)
// Stack operations when calling error support.
// (1) (2)
@@ -642,8 +631,7 @@ ASM_SIZE_DIRECTIVE(acosf)
// restore ar.pfs
-.proc __libm_error_region
-__libm_error_region:
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
@@ -699,8 +687,7 @@ __libm_error_region:
br.ret.sptk b0 // Return
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_acosl.S b/sysdeps/ia64/fpu/e_acosl.S
index ab1bbf41a7..daa75b18a5 100644
--- a/sysdeps/ia64/fpu/e_acosl.S
+++ b/sysdeps/ia64/fpu/e_acosl.S
@@ -1,10 +1,10 @@
.file "acosl.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2001 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2001 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,1027 +20,2469 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http: //www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00 Initial version
-// 2/07/00 Modified calculation of acos_corr to correct acosl
-// 4/04/00 Unwind support added
-// 8/15/00 Bundle added after call to __libm_error_support to properly
-// set [the previously overwritten] GR_Parameter_RESULT.
-// 12/20/00 Set denormal flag properly.
+// 08/28/01 New version
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/06/03 Reordered header: .section, .global, .proc, .align
//
// API
//==============================================================
-// double-extended = acosl (double-extended)
-// input floating point f8
-// output floating point f8
+// long double acosl(long double)
//
-// Registers used
+// Overview of operation
//==============================================================
+// Background
//
-// predicate registers used:
-// p6 -> p12
+// Implementation
//
-// floating-point registers used:
-// f8 has input, then output
-// f8 -> f15, f32 ->f99
+// For |s| in [2^{-4}, sqrt(2)/2]:
+// Let t= 2^k*1.b1 b2..b6 1, where s= 2^k*1.b1 b2.. b52
+// acos(s)= pi/2-asin(t)-asin(r), where r= s*sqrt(1-t^2)-t*sqrt(1-s^2), i.e.
+// r= (s-t)*sqrt(1-t^2)-t*sqrt(1-t^2)*(sqrt((1-s^2)/(1-t^2))-1)
+// asin(r)-r evaluated as 9-degree polynomial (c3*r^3+c5*r^5+c7*r^7+c9*r^9)
+// The 64-bit significands of sqrt(1-t^2), 1/(1-t^2) are read from the table,
+// along with the high and low parts of asin(t) (stored as two double precision
+// values)
//
-// general registers used:
-// r32 -> r48
+// |s| in (sqrt(2)/2, sqrt(255/256)):
+// Let t= 2^k*1.b1 b2..b6 1, where (1-s^2)*frsqrta(1-s^2)= 2^k*1.b1 b2..b6..
+// acos(|s|)= asin(t)-asin(r)
+// acos(-|s|)=pi-asin(t)+asin(r), r= s*t-sqrt(1-s^2)*sqrt(1-t^2)
+// To minimize accumulated errors, r is computed as
+// r= (t*s)_s-t^2*y*z+z*y*(t^2-1+s^2)_s+z*y*(1-s^2)_s*x+z'*y*(1-s^2)*PS29+
+// +(t*s-(t*s)_s)+z*y*((t^2-1-(t^2-1+s^2)_s)+s^2)+z*y*(1-s^2-(1-s^2)_s)+
+// +ez*z'*y*(1-s^2)*(1-x),
+// where y= frsqrta(1-s^2), z= (sqrt(1-t^2))_s (rounded to 24 significant bits)
+// z'= sqrt(1-t^2), x= ((1-s^2)*y^2-1)/2
+//
+// |s|<2^{-4}: evaluate asin(s) as 17-degree polynomial, return pi/2-asin(s)
+// (or simply return pi/2-s, if|s|<2^{-64})
+//
+// |s| in [sqrt(255/256), 1): acos(|s|)= asin(sqrt(1-s^2))
+// acos(-|s|)= pi-asin(sqrt(1-s^2))
+// use 17-degree polynomial for asin(sqrt(1-s^2)),
+// 9-degree polynomial to evaluate sqrt(1-s^2)
+// High order term is (pi)_high-(y*(1-s^2))_high, for s<0,
+// or y*(1-s^2)_s, for s>0
//
-// Overview of operation
-//==============================================================
-// There are three paths
-// 1. |x| < 2^-25 ACOS_TINY
-// 2. 2^-25 <= |x| < 1/4 ACOS_POLY
-// 3. 1/4 <= |x| < 1 ACOS_ATAN
-#include "libm_support.h"
-// Assembly macros
+
+// Registers used
//==============================================================
+// f6-f15, f32-f36
+// r2-r3, r23-r23
+// p6, p7, p8, p12
+//
-// f8 is input, but acos_V must be put in f8
-// when __libm_atan2_reg is called, f8 must get V
-// f9 gets U when __libm_atan2_reg is called
+ GR_SAVE_B0= r33
+ GR_SAVE_PFS= r34
+ GR_SAVE_GP= r35 // This reg. can safely be used
+ GR_SAVE_SP= r36
-// __libm_atan2_reg returns
-// f8 = Z_hi
-// f10 = Z_lo
-// f11 = s_lo
+ GR_Parameter_X= r37
+ GR_Parameter_Y= r38
+ GR_Parameter_RESULT= r39
+ GR_Parameter_TAG= r40
-acos_Z_hi = f8
-acos_Z_lo = f10
-acos_S_lo = f11
+ FR_X= f10
+ FR_Y= f1
+ FR_RESULT= f8
-// When we call __libm_atan2_reg, we must save
-// the following:
-acos_corr = f12
-acos_X = f13
-acos_pi_hi = f14
-acos_pi_lo = f15
-// The rest of the assembly macros
-
-acos_P79 = f32
-acos_P59 = f33
-acos_P39 = f34
-acos_P19 = f35
+RODATA
-acos_P810 = f36
-acos_P610 = f37
-acos_P410 = f38
-acos_P210 = f39
+.align 16
-acos_A1 = f41
-acos_A2 = f42
-acos_A3 = f43
-acos_A4 = f44
-acos_A5 = f45
-acos_A6 = f46
-acos_A7 = f47
-acos_A8 = f48
-acos_A9 = f49
-acos_A10 = f50
+LOCAL_OBJECT_START(T_table)
+
+// stores 64-bit significand of 1/(1-t^2), 64-bit significand of sqrt(1-t^2),
+// asin(t)_high (double precision), asin(t)_low (double precision)
+
+data8 0x80828692b71c4391, 0xff7ddcec2d87e879
+data8 0x3fb022bc0ae531a0, 0x3c9f599c7bb42af6
+data8 0x80869f0163d0b082, 0xff79cad2247914d3
+data8 0x3fb062dd26afc320, 0x3ca4eff21bd49c5c
+data8 0x808ac7d5a8690705, 0xff75a89ed6b626b9
+data8 0x3fb0a2ff4a1821e0, 0x3cb7e33b58f164cc
+data8 0x808f0112ad8ad2e0, 0xff7176517c2cc0cb
+data8 0x3fb0e32279319d80, 0x3caee31546582c43
+data8 0x80934abba8a1da0a, 0xff6d33e949b1ed31
+data8 0x3fb12346b8101da0, 0x3cb8bfe463d087cd
+data8 0x8097a4d3dbe63d8f, 0xff68e16571015c63
+data8 0x3fb1636c0ac824e0, 0x3c8870a7c5a3556f
+data8 0x809c0f5e9662b3dd, 0xff647ec520bca0f0
+data8 0x3fb1a392756ed280, 0x3c964f1a927461ae
+data8 0x80a08a5f33fadc66, 0xff600c07846a6830
+data8 0x3fb1e3b9fc19e580, 0x3c69eb3576d56332
+data8 0x80a515d91d71acd4, 0xff5b892bc475affa
+data8 0x3fb223e2a2dfbe80, 0x3c6a4e19fd972fb6
+data8 0x80a9b1cfc86ff7cd, 0xff56f631062cf93d
+data8 0x3fb2640c6dd76260, 0x3c62041160e0849e
+data8 0x80ae5e46b78b0d68, 0xff5253166bc17794
+data8 0x3fb2a43761187c80, 0x3cac61651af678c0
+data8 0x80b31b417a4b756b, 0xff4d9fdb14463dc8
+data8 0x3fb2e46380bb6160, 0x3cb06ef23eeba7a1
+data8 0x80b7e8c3ad33c369, 0xff48dc7e1baf6738
+data8 0x3fb32490d0d910c0, 0x3caa05f480b300d5
+data8 0x80bcc6d0f9c784d6, 0xff4408fe9ad13e37
+data8 0x3fb364bf558b3820, 0x3cb01e7e403aaab9
+data8 0x80c1b56d1692492d, 0xff3f255ba75f5f4e
+data8 0x3fb3a4ef12ec3540, 0x3cb4fe8fcdf5f5f1
+data8 0x80c6b49bc72ec446, 0xff3a319453ebd961
+data8 0x3fb3e5200d171880, 0x3caf2dc089b2b7e2
+data8 0x80cbc460dc4e0ae8, 0xff352da7afe64ac6
+data8 0x3fb425524827a720, 0x3cb75a855e7c6053
+data8 0x80d0e4c033bee9c4, 0xff301994c79afb32
+data8 0x3fb46585c83a5e00, 0x3cb3264981c019ab
+data8 0x80d615bdb87556db, 0xff2af55aa431f291
+data8 0x3fb4a5ba916c73c0, 0x3c994251d94427b5
+data8 0x80db575d6291fd8a, 0xff25c0f84bae0cb9
+data8 0x3fb4e5f0a7dbdb20, 0x3cbee2fcc4c786cb
+data8 0x80e0a9a33769e535, 0xff207c6cc0ec09fd
+data8 0x3fb526280fa74620, 0x3c940656e5549b91
+data8 0x80e60c93498e32cd, 0xff1b27b703a19c98
+data8 0x3fb56660ccee2740, 0x3ca7082374d7b2cd
+data8 0x80eb8031b8d4052d, 0xff15c2d6105c72f8
+data8 0x3fb5a69ae3d0b520, 0x3c7c4d46e09ac68a
+data8 0x80f10482b25c6c8a, 0xff104dc8e0813ed4
+data8 0x3fb5e6d6586fec20, 0x3c9aa84ffd9b4958
+data8 0x80f6998a709c7cfb, 0xff0ac88e6a4ab926
+data8 0x3fb627132eed9140, 0x3cbced2cbbbe7d16
+data8 0x80fc3f4d3b657c44, 0xff053325a0c8a2ec
+data8 0x3fb667516b6c34c0, 0x3c6489c5fc68595a
+data8 0x8101f5cf67ed2af8, 0xfeff8d8d73dec2bb
+data8 0x3fb6a791120f33a0, 0x3cbe12acf159dfad
+data8 0x8107bd1558d6291f, 0xfef9d7c4d043df29
+data8 0x3fb6e7d226fabba0, 0x3ca386d099cd0dc7
+data8 0x810d95237e38766a, 0xfef411ca9f80b5f7
+data8 0x3fb72814ae53cc20, 0x3cb9f35731e71dd6
+data8 0x81137dfe55aa0e29, 0xfeee3b9dc7eef009
+data8 0x3fb76858ac403a00, 0x3c74df3dd959141a
+data8 0x811977aa6a479f0f, 0xfee8553d2cb8122c
+data8 0x3fb7a89e24e6b0e0, 0x3ca6034406ee42bc
+data8 0x811f822c54bd5ef8, 0xfee25ea7add46a91
+data8 0x3fb7e8e51c6eb6a0, 0x3cb82f8f78e68ed7
+data8 0x81259d88bb4ffac1, 0xfedc57dc2809fb1d
+data8 0x3fb8292d9700ad60, 0x3cbebb73c0e653f9
+data8 0x812bc9c451e5a257, 0xfed640d974eb6068
+data8 0x3fb8697798c5d620, 0x3ca2feee76a9701b
+data8 0x813206e3da0f3124, 0xfed0199e6ad6b585
+data8 0x3fb8a9c325e852e0, 0x3cb9e88f2f4d0efe
+data8 0x813854ec231172f9, 0xfec9e229dcf4747d
+data8 0x3fb8ea1042932a00, 0x3ca5ff40d81f66fd
+data8 0x813eb3e209ee858f, 0xfec39a7a9b36538b
+data8 0x3fb92a5ef2f247c0, 0x3cb5e3bece4d6b07
+data8 0x814523ca796f56ce, 0xfebd428f72561efe
+data8 0x3fb96aaf3b3281a0, 0x3cb7b9e499436d7c
+data8 0x814ba4aa6a2d3ff9, 0xfeb6da672bd48fe4
+data8 0x3fb9ab011f819860, 0x3cb9168143cc1a7f
+data8 0x81523686e29bbdd7, 0xfeb062008df81f50
+data8 0x3fb9eb54a40e3ac0, 0x3cb6e544197eb1e1
+data8 0x8158d964f7124614, 0xfea9d95a5bcbd65a
+data8 0x3fba2ba9cd080800, 0x3ca9a717be8f7446
+data8 0x815f8d49c9d639e4, 0xfea34073551e1ac8
+data8 0x3fba6c009e9f9260, 0x3c741e989a60938a
+data8 0x8166523a8b24f626, 0xfe9c974a367f785c
+data8 0x3fbaac591d0661a0, 0x3cb2c1290107e57d
+data8 0x816d283c793e0114, 0xfe95ddddb94166cb
+data8 0x3fbaecb34c6ef600, 0x3c9c7d5fbaec405d
+data8 0x81740f54e06d55bd, 0xfe8f142c93750c50
+data8 0x3fbb2d0f310cca00, 0x3cbc09479a9cbcfb
+data8 0x817b07891b15cd5e, 0xfe883a3577e9fceb
+data8 0x3fbb6d6ccf1455e0, 0x3cb9450bff4ee307
+data8 0x818210de91bba6c8, 0xfe814ff7162cf62f
+data8 0x3fbbadcc2abb1180, 0x3c9227fda12a8d24
+data8 0x81892b5abb0f2bf9, 0xfe7a55701a8697b1
+data8 0x3fbbee2d48377700, 0x3cb6fad72acfe356
+data8 0x819057031bf7760e, 0xfe734a9f2dfa1810
+data8 0x3fbc2e902bc10600, 0x3cb4465b588d16ad
+data8 0x819793dd479d4fbe, 0xfe6c2f82f643f68b
+data8 0x3fbc6ef4d9904580, 0x3c8b9ac54823960d
+data8 0x819ee1eedf76367a, 0xfe65041a15d8a92c
+data8 0x3fbcaf5b55dec6a0, 0x3ca2b8d28a954db2
+data8 0x81a6413d934f7a66, 0xfe5dc8632be3477f
+data8 0x3fbcefc3a4e727a0, 0x3c9380da83713ab4
+data8 0x81adb1cf21597d4b, 0xfe567c5cd44431d5
+data8 0x3fbd302dcae51600, 0x3ca995b83421756a
+data8 0x81b533a9563310b8, 0xfe4f2005a78fb50f
+data8 0x3fbd7099cc155180, 0x3caefa2f7a817d5f
+data8 0x81bcc6d20cf4f373, 0xfe47b35c3b0caaeb
+data8 0x3fbdb107acb5ae80, 0x3cb455fc372dd026
+data8 0x81c46b4f2f3d6e68, 0xfe40365f20b316d6
+data8 0x3fbdf177710518c0, 0x3cbee3dcc5b01434
+data8 0x81cc2126b53c1144, 0xfe38a90ce72abf36
+data8 0x3fbe31e91d439620, 0x3cb3e131c950aebd
+data8 0x81d3e85ea5bd8ee2, 0xfe310b6419c9c33a
+data8 0x3fbe725cb5b24900, 0x3c01d3fac6029027
+data8 0x81dbc0fd1637b9c1, 0xfe295d6340932d15
+data8 0x3fbeb2d23e937300, 0x3c6304cc44aeedd1
+data8 0x81e3ab082ad5a0a4, 0xfe219f08e03580b3
+data8 0x3fbef349bc2a77e0, 0x3cac1d2d6abe9c72
+data8 0x81eba6861683cb97, 0xfe19d0537a0946e2
+data8 0x3fbf33c332bbe020, 0x3ca0909dba4e96ca
+data8 0x81f3b37d1afc9979, 0xfe11f1418c0f94e2
+data8 0x3fbf743ea68d5b60, 0x3c937fc12a2a779a
+data8 0x81fbd1f388d4be45, 0xfe0a01d190f09063
+data8 0x3fbfb4bc1be5c340, 0x3cbf51a504b55813
+data8 0x820401efbf87e248, 0xfe020201fff9efea
+data8 0x3fbff53b970d1e80, 0x3ca625444b260078
+data8 0x82106ad2ffdca049, 0xfdf5e3940a49135e
+data8 0x3fc02aff52065460, 0x3c9125d113e22a57
+data8 0x8221343d6ea1d3e2, 0xfde581a45429b0a0
+data8 0x3fc06b84f8e03220, 0x3caccf362295894b
+data8 0x82324434adbf99c2, 0xfdd4de1a001fb775
+data8 0x3fc0ac0ed1fe7240, 0x3cc22f676096b0af
+data8 0x82439aee8d0c7747, 0xfdc3f8e8269d1f03
+data8 0x3fc0ec9cee9e4820, 0x3cca147e2886a628
+data8 0x825538a1d0fcb2f0, 0xfdb2d201a9b1ba66
+data8 0x3fc12d2f6006f0a0, 0x3cc72b36633bc2d4
+data8 0x82671d86345c5cee, 0xfda1695934d723e7
+data8 0x3fc16dc63789de60, 0x3cb11f9c47c7b83f
+data8 0x827949d46a121770, 0xfd8fbee13cbbb823
+data8 0x3fc1ae618682e620, 0x3cce1b59020cef8e
+data8 0x828bbdc61eeab9ba, 0xfd7dd28bff0c9f34
+data8 0x3fc1ef015e586c40, 0x3cafec043e0225ee
+data8 0x829e7995fb6de9e1, 0xfd6ba44b823ee1ca
+data8 0x3fc22fa5d07b90c0, 0x3cba905409caf8e3
+data8 0x82b17d7fa5bbc982, 0xfd5934119557883a
+data8 0x3fc2704eee685da0, 0x3cb5ef21838a823e
+data8 0x82c4c9bfc373d276, 0xfd4681cfcfb2c161
+data8 0x3fc2b0fcc9a5f3e0, 0x3ccc7952c5e0e312
+data8 0x82d85e93fba50136, 0xfd338d7790ca0f41
+data8 0x3fc2f1af73c6ba00, 0x3cbecf5f977d1ca9
+data8 0x82ec3c3af8c76b32, 0xfd2056f9fff97727
+data8 0x3fc33266fe6889a0, 0x3c9d329c022ebdb5
+data8 0x830062f46abf6022, 0xfd0cde480c43b327
+data8 0x3fc373237b34de60, 0x3cc95806d4928adb
+data8 0x8314d30108ea35f0, 0xfcf923526c1562b2
+data8 0x3fc3b3e4fbe10520, 0x3cbc299fe7223d54
+data8 0x83298ca29434df97, 0xfce526099d0737ed
+data8 0x3fc3f4ab922e4a60, 0x3cb59d8bb8fdbccc
+data8 0x833e901bd93c7009, 0xfcd0e65de39f1f7c
+data8 0x3fc435774fea2a60, 0x3c9ec18b43340914
+data8 0x8353ddb0b278aad8, 0xfcbc643f4b106055
+data8 0x3fc4764846ee80a0, 0x3cb90402efd87ed6
+data8 0x836975a60a70c52e, 0xfca79f9da4fab13a
+data8 0x3fc4b71e8921b860, 0xbc58f23449ed6365
+data8 0x837f5841ddfa7a46, 0xfc92986889284148
+data8 0x3fc4f7fa2876fca0, 0xbc6294812bf43acd
+data8 0x839585cb3e839773, 0xfc7d4e8f554ab12f
+data8 0x3fc538db36ee6960, 0x3cb910b773d4c578
+data8 0x83abfe8a5466246f, 0xfc67c2012cb6fa68
+data8 0x3fc579c1c6953cc0, 0x3cc5ede909fc47fc
+data8 0x83c2c2c861474d91, 0xfc51f2acf82041d5
+data8 0x3fc5baade9860880, 0x3cac63cdfc3588e5
+data8 0x83d9d2cfc2813637, 0xfc3be08165519325
+data8 0x3fc5fb9fb1e8e3a0, 0x3cbf7c8466578c29
+data8 0x83f12eebf397daac, 0xfc258b6ce6e6822f
+data8 0x3fc63c9731f39d40, 0x3cb6d2a7ffca3e9e
+data8 0x8408d76990b9296e, 0xfc0ef35db402af94
+data8 0x3fc67d947be9eec0, 0x3cb1980da09e6566
+data8 0x8420cc9659487cd7, 0xfbf81841c8082dc4
+data8 0x3fc6be97a21daf00, 0x3cc2ac8330e59aa5
+data8 0x84390ec132759ecb, 0xfbe0fa06e24cc390
+data8 0x3fc6ffa0b6ef05e0, 0x3ccc1a030fee56c4
+data8 0x84519e3a29df811a, 0xfbc9989a85ce0954
+data8 0x3fc740afcccca000, 0x3cc19692a5301ca6
+data8 0x846a7b527842d61b, 0xfbb1f3e9f8e45dc4
+data8 0x3fc781c4f633e2c0, 0x3cc0e98f3868a508
+data8 0x8483a65c8434b5f0, 0xfb9a0be244f4af45
+data8 0x3fc7c2e045b12140, 0x3cb2a8d309754420
+data8 0x849d1fabe4e97dd7, 0xfb81e070362116d1
+data8 0x3fc80401cddfd120, 0x3ca7a44544aa4ce6
+data8 0x84b6e795650817ea, 0xfb6971805af8411e
+data8 0x3fc84529a16ac020, 0x3c9e3b709c7d6f94
+data8 0x84d0fe6f0589da92, 0xfb50beff0423a2f5
+data8 0x3fc88657d30c49e0, 0x3cc60d65a7f0a278
+data8 0x84eb649000a73014, 0xfb37c8d84414755c
+data8 0x3fc8c78c758e8e80, 0x3cc94b2ee984c2b7
+data8 0x85061a50ccd13781, 0xfb1e8ef7eeaf764b
+data8 0x3fc908c79bcba900, 0x3cc8540ae794a2fe
+data8 0x8521200b1fb8916e, 0xfb05114998f76a83
+data8 0x3fc94a0958ade6c0, 0x3ca127f49839fa9c
+data8 0x853c7619f1618bf6, 0xfaeb4fb898b65d19
+data8 0x3fc98b51bf2ffee0, 0x3c8c9ba7a803909a
+data8 0x85581cd97f45e274, 0xfad14a3004259931
+data8 0x3fc9cca0e25d4ac0, 0x3cba458e91d3bf54
+data8 0x857414a74f8446b4, 0xfab7009ab1945a54
+data8 0x3fca0df6d551fe80, 0x3cc78ea1d329d2b2
+data8 0x85905de2341dea46, 0xfa9c72e3370d2fbc
+data8 0x3fca4f53ab3b6200, 0x3ccf60dca86d57ef
+data8 0x85acf8ea4e423ff8, 0xfa81a0f3e9fa0ee9
+data8 0x3fca90b777580aa0, 0x3ca4c4e2ec8a867e
+data8 0x85c9e62111a92e7d, 0xfa668ab6dec711b1
+data8 0x3fcad2224cf814e0, 0x3c303de5980d071c
+data8 0x85e725e947fbee97, 0xfa4b3015e883dbfe
+data8 0x3fcb13943f7d5f80, 0x3cc29d4eefa5cb1e
+data8 0x8604b8a7144cd054, 0xfa2f90fa9883a543
+data8 0x3fcb550d625bc6a0, 0x3c9e01a746152daf
+data8 0x86229ebff69e2415, 0xfa13ad4e3dfbe1c1
+data8 0x3fcb968dc9195ea0, 0x3ccc091bd73ae518
+data8 0x8640d89acf78858c, 0xf9f784f9e5a1877b
+data8 0x3fcbd815874eb160, 0x3cb5f4b89875e187
+data8 0x865f669fe390c7f5, 0xf9db17e65944eacf
+data8 0x3fcc19a4b0a6f9c0, 0x3cc5c0bc2b0bbf14
+data8 0x867e4938df7dc45f, 0xf9be65fc1f6c2e6e
+data8 0x3fcc5b3b58e061e0, 0x3cc1ca70df8f57e7
+data8 0x869d80d0db7e4c0c, 0xf9a16f237aec427a
+data8 0x3fcc9cd993cc4040, 0x3cbae93acc85eccf
+data8 0x86bd0dd45f4f8265, 0xf98433446a806e70
+data8 0x3fccde7f754f5660, 0x3cb22f70e64568d0
+data8 0x86dcf0b16613e37a, 0xf966b246a8606170
+data8 0x3fcd202d11620fa0, 0x3c962030e5d4c849
+data8 0x86fd29d7624b3d5d, 0xf948ec11a9d4c45b
+data8 0x3fcd61e27c10c0a0, 0x3cc7083c91d59217
+data8 0x871db9b741dbe44a, 0xf92ae08c9eca4941
+data8 0x3fcda39fc97be7c0, 0x3cc9258579e57211
+data8 0x873ea0c3722d6af2, 0xf90c8f9e71633363
+data8 0x3fcde5650dd86d60, 0x3ca4755a9ea582a9
+data8 0x875fdf6fe45529e8, 0xf8edf92dc5875319
+data8 0x3fce27325d6fe520, 0x3cbc1e2b6c1954f9
+data8 0x878176321154e2bc, 0xf8cf1d20f87270b8
+data8 0x3fce6907cca0d060, 0x3cb6ca4804750830
+data8 0x87a36580fe6bccf5, 0xf8affb5e20412199
+data8 0x3fceaae56fdee040, 0x3cad6b310d6fd46c
+data8 0x87c5add5417a5cb9, 0xf89093cb0b7c0233
+data8 0x3fceeccb5bb33900, 0x3cc16e99cedadb20
+data8 0x87e84fa9057914ca, 0xf870e64d40a15036
+data8 0x3fcf2eb9a4bcb600, 0x3cc75ee47c8b09e9
+data8 0x880b4b780f02b709, 0xf850f2c9fdacdf78
+data8 0x3fcf70b05fb02e20, 0x3cad6350d379f41a
+data8 0x882ea1bfc0f228ac, 0xf830b926379e6465
+data8 0x3fcfb2afa158b8a0, 0x3cce0ccd9f829985
+data8 0x885252ff21146108, 0xf810394699fe0e8e
+data8 0x3fcff4b77e97f3e0, 0x3c9b30faa7a4c703
+data8 0x88765fb6dceebbb3, 0xf7ef730f865f6df0
+data8 0x3fd01b6406332540, 0x3cdc5772c9e0b9bd
+data8 0x88ad1f69be2cc730, 0xf7bdc59bc9cfbd97
+data8 0x3fd04cf8ad203480, 0x3caeef44fe21a74a
+data8 0x88f763f70ae2245e, 0xf77a91c868a9c54e
+data8 0x3fd08f23ce0162a0, 0x3cd6290ab3fe5889
+data8 0x89431fc7bc0c2910, 0xf73642973c91298e
+data8 0x3fd0d1610f0c1ec0, 0x3cc67401a01f08cf
+data8 0x8990573407c7738e, 0xf6f0d71d1d7a2dd6
+data8 0x3fd113b0c65d88c0, 0x3cc7aa4020fe546f
+data8 0x89df0eb108594653, 0xf6aa4e6a05cfdef2
+data8 0x3fd156134ada6fe0, 0x3cc87369da09600c
+data8 0x8a2f4ad16e0ed78a, 0xf662a78900c35249
+data8 0x3fd19888f43427a0, 0x3cc62b220f38e49c
+data8 0x8a811046373e0819, 0xf619e180181d97cc
+data8 0x3fd1db121aed7720, 0x3ca3ede7490b52f4
+data8 0x8ad463df6ea0fa2c, 0xf5cffb504190f9a2
+data8 0x3fd21daf185fa360, 0x3caafad98c1d6c1b
+data8 0x8b294a8cf0488daf, 0xf584f3f54b8604e6
+data8 0x3fd2606046bf95a0, 0x3cdb2d704eeb08fa
+data8 0x8b7fc95f35647757, 0xf538ca65c960b582
+data8 0x3fd2a32601231ec0, 0x3cc661619fa2f126
+data8 0x8bd7e588272276f8, 0xf4eb7d92ff39fccb
+data8 0x3fd2e600a3865760, 0x3c8a2a36a99aca4a
+data8 0x8c31a45bf8e9255e, 0xf49d0c68cd09b689
+data8 0x3fd328f08ad12000, 0x3cb9efaf1d7ab552
+data8 0x8c8d0b520a35eb18, 0xf44d75cd993cfad2
+data8 0x3fd36bf614dcc040, 0x3ccacbb590bef70d
+data8 0x8cea2005d068f23d, 0xf3fcb8a23ab4942b
+data8 0x3fd3af11a079a6c0, 0x3cd9775872cf037d
+data8 0x8d48e837c8cd5027, 0xf3aad3c1e2273908
+data8 0x3fd3f2438d754b40, 0x3ca03304f667109a
+data8 0x8da969ce732f3ac7, 0xf357c60202e2fd7e
+data8 0x3fd4358c3ca032e0, 0x3caecf2504ff1a9d
+data8 0x8e0baad75555e361, 0xf3038e323ae9463a
+data8 0x3fd478ec0fd419c0, 0x3cc64bdc3d703971
+data8 0x8e6fb18807ba877e, 0xf2ae2b1c3a6057f7
+data8 0x3fd4bc6369fa40e0, 0x3cbb7122ec245cf2
+data8 0x8ed5843f4bda74d5, 0xf2579b83aa556f0c
+data8 0x3fd4fff2af11e2c0, 0x3c9cfa2dc792d394
+data8 0x8f3d29862c861fef, 0xf1ffde2612ca1909
+data8 0x3fd5439a4436d000, 0x3cc38d46d310526b
+data8 0x8fa6a81128940b2d, 0xf1a6f1bac0075669
+data8 0x3fd5875a8fa83520, 0x3cd8bf59b8153f8a
+data8 0x901206c1686317a6, 0xf14cd4f2a730d480
+data8 0x3fd5cb33f8cf8ac0, 0x3c9502b5c4d0e431
+data8 0x907f4ca5fe9cf739, 0xf0f186784a125726
+data8 0x3fd60f26e847b120, 0x3cc8a1a5e0acaa33
+data8 0x90ee80fd34aeda5e, 0xf09504ef9a212f18
+data8 0x3fd65333c7e43aa0, 0x3cae5b029cb1f26e
+data8 0x915fab35e37421c6, 0xf0374ef5daab5c45
+data8 0x3fd6975b02b8e360, 0x3cd5aa1c280c45e6
+data8 0x91d2d2f0d894d73c, 0xefd86321822dbb51
+data8 0x3fd6db9d05213b20, 0x3cbecf2c093ccd8b
+data8 0x9248000249200009, 0xef7840021aca5a72
+data8 0x3fd71ffa3cc87fc0, 0x3cb8d273f08d00d9
+data8 0x92bf3a7351f081d2, 0xef16e42021d7cbd5
+data8 0x3fd7647318b1ad20, 0x3cbce099d79cdc46
+data8 0x93388a8386725713, 0xeeb44dfce6820283
+data8 0x3fd7a908093fc1e0, 0x3ccb033ec17a30d9
+data8 0x93b3f8aa8e653812, 0xee507c126774fa45
+data8 0x3fd7edb9803e3c20, 0x3cc10aedb48671eb
+data8 0x94318d99d341ade4, 0xedeb6cd32f891afb
+data8 0x3fd83287f0e9cf80, 0x3c994c0c1505cd2a
+data8 0x94b1523e3dedc630, 0xed851eaa3168f43c
+data8 0x3fd87773cff956e0, 0x3cda3b7bce6a6b16
+data8 0x95334fc20577563f, 0xed1d8ffaa2279669
+data8 0x3fd8bc7d93a70440, 0x3cd4922edc792ce2
+data8 0x95b78f8e8f92f274, 0xecb4bf1fd2be72da
+data8 0x3fd901a5b3b9cf40, 0x3cd3fea1b00f9d0d
+data8 0x963e1b4e63a87c3f, 0xec4aaa6d08694cc1
+data8 0x3fd946eca98f2700, 0x3cdba4032d968ff1
+data8 0x96c6fcef314074fc, 0xebdf502d53d65fea
+data8 0x3fd98c52f024e800, 0x3cbe7be1ab8c95c9
+data8 0x97523ea3eab028b2, 0xeb72aea36720793e
+data8 0x3fd9d1d904239860, 0x3cd72d08a6a22b70
+data8 0x97dfeae6f4ee4a9a, 0xeb04c4096a884e94
+data8 0x3fda177f63e8ef00, 0x3cd818c3c1ebfac7
+data8 0x98700c7c6d85d119, 0xea958e90cfe1efd7
+data8 0x3fda5d468f92a540, 0x3cdf45fbfaa080fe
+data8 0x9902ae7487a9caa1, 0xea250c6224aab21a
+data8 0x3fdaa32f090998e0, 0x3cd715a9353cede4
+data8 0x9997dc2e017a9550, 0xe9b33b9ce2bb7638
+data8 0x3fdae939540d3f00, 0x3cc545c014943439
+data8 0x9a2fa158b29b649b, 0xe9401a573f8aa706
+data8 0x3fdb2f65f63f6c60, 0x3cd4a63c2f2ca8e2
+data8 0x9aca09f835466186, 0xe8cba69df9f0bf35
+data8 0x3fdb75b5773075e0, 0x3cda310ce1b217ec
+data8 0x9b672266ab1e0136, 0xe855de74266193d4
+data8 0x3fdbbc28606babc0, 0x3cdc84b75cca6c44
+data8 0x9c06f7579f0b7bd5, 0xe7debfd2f98c060b
+data8 0x3fdc02bf3d843420, 0x3cd225d967ffb922
+data8 0x9ca995db058cabdc, 0xe76648a991511c6e
+data8 0x3fdc497a9c224780, 0x3cde08101c5b825b
+data8 0x9d4f0b605ce71e88, 0xe6ec76dcbc02d9a7
+data8 0x3fdc905b0c10d420, 0x3cb1abbaa3edf120
+data8 0x9df765b9eecad5e6, 0xe6714846bdda7318
+data8 0x3fdcd7611f4b8a00, 0x3cbf6217ae80aadf
+data8 0x9ea2b320350540fe, 0xe5f4bab71494cd6b
+data8 0x3fdd1e8d6a0d56c0, 0x3cb726e048cc235c
+data8 0x9f51023562fc5676, 0xe576cbf239235ecb
+data8 0x3fdd65e082df5260, 0x3cd9e66872bd5250
+data8 0xa002620915c2a2f6, 0xe4f779b15f5ec5a7
+data8 0x3fddad5b02a82420, 0x3c89743b0b57534b
+data8 0xa0b6e21c2caf9992, 0xe476c1a233a7873e
+data8 0x3fddf4fd84bbe160, 0x3cbf7adea9ee3338
+data8 0xa16e9264cc83a6b2, 0xe3f4a16696608191
+data8 0x3fde3cc8a6ec6ee0, 0x3cce46f5a51f49c6
+data8 0xa22983528f3d8d49, 0xe3711694552da8a8
+data8 0x3fde84bd099a6600, 0x3cdc78f6490a2d31
+data8 0xa2e7c5d2e2e69460, 0xe2ec1eb4e1e0a5fb
+data8 0x3fdeccdb4fc685c0, 0x3cdd3aedb56a4825
+data8 0xa3a96b5599bd2532, 0xe265b74506fbe1c9
+data8 0x3fdf15241f23b3e0, 0x3cd440f3c6d65f65
+data8 0xa46e85d1ae49d7de, 0xe1ddddb499b3606f
+data8 0x3fdf5d98202994a0, 0x3cd6c44bd3fb745a
+data8 0xa53727ca3e11b99e, 0xe1548f662951b00d
+data8 0x3fdfa637fe27bf60, 0x3ca8ad1cd33054dd
+data8 0xa6036453bdc20186, 0xe0c9c9aeabe5e481
+data8 0x3fdfef0467599580, 0x3cc0f1ac0685d78a
+data8 0xa6d34f1969dda338, 0xe03d89d5281e4f81
+data8 0x3fe01bff067d6220, 0x3cc0731e8a9ef057
+data8 0xa7a6fc62f7246ff3, 0xdfafcd125c323f54
+data8 0x3fe04092d1ae3b40, 0x3ccabda24b59906d
+data8 0xa87e811a861df9b9, 0xdf20909061bb9760
+data8 0x3fe0653df0fd9fc0, 0x3ce94c8dcc722278
+data8 0xa959f2d2dd687200, 0xde8fd16a4e5f88bd
+data8 0x3fe08a00c1cae320, 0x3ce6b888bb60a274
+data8 0xaa3967cdeea58bda, 0xddfd8cabd1240d22
+data8 0x3fe0aedba3221c00, 0x3ced5941cd486e46
+data8 0xab904fd587263c84, 0xdd1f4472e1cf64ed
+data8 0x3fe0e651e85229c0, 0x3cdb6701042299b1
+data8 0xad686d44dd5a74bb, 0xdbf173e1f6b46e92
+data8 0x3fe1309cbf4cdb20, 0x3cbf1be7bb3f0ec5
+data8 0xaf524e15640ebee4, 0xdabd54896f1029f6
+data8 0x3fe17b4ee1641300, 0x3ce81dd055b792f1
+data8 0xb14eca24ef7db3fa, 0xd982cb9ae2f47e41
+data8 0x3fe1c66b9ffd6660, 0x3cd98ea31eb5ddc7
+data8 0xb35ec807669920ce, 0xd841bd1b8291d0b6
+data8 0x3fe211f66db3a5a0, 0x3ca480c35a27b4a2
+data8 0xb5833e4755e04dd1, 0xd6fa0bd3150b6930
+data8 0x3fe25df2e05b6c40, 0x3ca4bc324287a351
+data8 0xb7bd34c8000b7bd3, 0xd5ab9939a7d23aa1
+data8 0x3fe2aa64b32f7780, 0x3cba67314933077c
+data8 0xba0dc64d126cc135, 0xd4564563ce924481
+data8 0x3fe2f74fc9289ac0, 0x3cec1a1dc0efc5ec
+data8 0xbc76222cbbfa74a6, 0xd2f9eeed501125a8
+data8 0x3fe344b82f859ac0, 0x3ceeef218de413ac
+data8 0xbef78e31985291a9, 0xd19672e2182f78be
+data8 0x3fe392a22087b7e0, 0x3cd2619ba201204c
+data8 0xc19368b2b0629572, 0xd02baca5427e436a
+data8 0x3fe3e11206694520, 0x3cb5d0b3143fe689
+data8 0xc44b2ae8c6733e51, 0xceb975d60b6eae5d
+data8 0x3fe4300c7e945020, 0x3cbd367143da6582
+data8 0xc7206b894212dfef, 0xcd3fa6326ff0ac9a
+data8 0x3fe47f965d201d60, 0x3ce797c7a4ec1d63
+data8 0xca14e1b0622de526, 0xcbbe13773c3c5338
+data8 0x3fe4cfb4b09d1a20, 0x3cedfadb5347143c
+data8 0xcd2a6825eae65f82, 0xca34913d425a5ae9
+data8 0x3fe5206cc637e000, 0x3ce2798b38e54193
+data8 0xd06301095e1351ee, 0xc8a2f0d3679c08c0
+data8 0x3fe571c42e3d0be0, 0x3ccd7cb9c6c2ca68
+data8 0xd3c0d9f50057adda, 0xc70901152d59d16b
+data8 0x3fe5c3c0c108f940, 0x3ceb6c13563180ab
+data8 0xd74650a98cc14789, 0xc5668e3d4cbf8828
+data8 0x3fe61668a46ffa80, 0x3caa9092e9e3c0e5
+data8 0xdaf5f8579dcc8f8f, 0xc3bb61b3eed42d02
+data8 0x3fe669c251ad69e0, 0x3cccf896ef3b4fee
+data8 0xded29f9f9a6171b4, 0xc20741d7f8e8e8af
+data8 0x3fe6bdd49bea05c0, 0x3cdc6b29937c575d
+data8 0xe2df5765854ccdb0, 0xc049f1c2d1b8014b
+data8 0x3fe712a6b76c6e80, 0x3ce1ddc6f2922321
+data8 0xe71f7a9b94fcb4c3, 0xbe833105ec291e91
+data8 0x3fe76840418978a0, 0x3ccda46e85432c3d
+data8 0xeb96b72d3374b91e, 0xbcb2bb61493b28b3
+data8 0x3fe7bea9496d5a40, 0x3ce37b42ec6e17d3
+data8 0xf049183c3f53c39b, 0xbad848720223d3a8
+data8 0x3fe815ea59dab0a0, 0x3cb03ad41bfc415b
+data8 0xf53b11ec7f415f15, 0xb8f38b57c53c9c48
+data8 0x3fe86e0c84010760, 0x3cc03bfcfb17fe1f
+data8 0xfa718f05adbf2c33, 0xb70432500286b185
+data8 0x3fe8c7196b9225c0, 0x3ced99fcc6866ba9
+data8 0xfff200c3f5489608, 0xb509e6454dca33cc
+data8 0x3fe9211b54441080, 0x3cb789cb53515688
+// The following table entries are not used
+//data8 0x82e138a0fac48700, 0xb3044a513a8e6132
+//data8 0x3fe97c1d30f5b7c0, 0x3ce1eb765612d1d0
+//data8 0x85f4cc7fc670d021, 0xb0f2fb2ea6cbbc88
+//data8 0x3fe9d82ab4b5fde0, 0x3ced3fe6f27e8039
+//data8 0x89377c1387d5b908, 0xaed58e9a09014d5c
+//data8 0x3fea355065f87fa0, 0x3cbef481d25f5b58
+//data8 0x8cad7a2c98dec333, 0xacab929ce114d451
+//data8 0x3fea939bb451e2a0, 0x3c8e92b4fbf4560f
+//data8 0x905b7dfc99583025, 0xaa748cc0dbbbc0ec
+//data8 0x3feaf31b11270220, 0x3cdced8c61bd7bd5
+//data8 0x9446d8191f80dd42, 0xa82ff92687235baf
+//data8 0x3feb53de0bcffc20, 0x3cbe1722fb47509e
+//data8 0x98758ba086e4000a, 0xa5dd497a9c184f58
+//data8 0x3febb5f571cb0560, 0x3ce0c7774329a613
+//data8 0x9cee6c7bf18e4e24, 0xa37be3c3cd1de51b
+//data8 0x3fec197373bc7be0, 0x3ce08ebdb55c3177
+//data8 0xa1b944000a1b9440, 0xa10b2101b4f27e03
+//data8 0x3fec7e6bd023da60, 0x3ce5fc5fd4995959
+//data8 0xa6defd8ba04d3e38, 0x9e8a4b93cad088ec
+//data8 0x3fece4f404e29b20, 0x3cea3413401132b5
+//data8 0xac69dd408a10c62d, 0x9bf89d5d17ddae8c
+//data8 0x3fed4d2388f63600, 0x3cd5a7fb0d1d4276
+//data8 0xb265c39cbd80f97a, 0x99553d969fec7beb
+//data8 0x3fedb714101e0a00, 0x3cdbda21f01193f2
+//data8 0xb8e081a16ae4ae73, 0x969f3e3ed2a0516c
+//data8 0x3fee22e1da97bb00, 0x3ce7231177f85f71
+//data8 0xbfea427678945732, 0x93d5990f9ee787af
+//data8 0x3fee90ac13b18220, 0x3ce3c8a5453363a5
+//data8 0xc79611399b8c90c5, 0x90f72bde80febc31
+//data8 0x3fef009542b712e0, 0x3ce218fd79e8cb56
+//data8 0xcffa8425040624d7, 0x8e02b4418574ebed
+//data8 0x3fef72c3d2c57520, 0x3cd32a717f82203f
+//data8 0xd93299cddcf9cf23, 0x8af6ca48e9c44024
+//data8 0x3fefe762b77744c0, 0x3ce53478a6bbcf94
+//data8 0xe35eda760af69ad9, 0x87d1da0d7f45678b
+//data8 0x3ff02f511b223c00, 0x3ced6e11782c28fc
+//data8 0xeea6d733421da0a6, 0x84921bbe64ae029a
+//data8 0x3ff06c5c6f8ce9c0, 0x3ce71fc71c1ffc02
+//data8 0xfb3b2c73fc6195cc, 0x813589ba3a5651b6
+//data8 0x3ff0aaf2613700a0, 0x3cf2a72d2fd94ef3
+//data8 0x84ac1fcec4203245, 0xfb73a828893df19e
+//data8 0x3ff0eb367c3fd600, 0x3cf8054c158610de
+//data8 0x8ca50621110c60e6, 0xf438a14c158d867c
+//data8 0x3ff12d51caa6b580, 0x3ce6bce9748739b6
+//data8 0x95b8c2062d6f8161, 0xecb3ccdd37b369da
+//data8 0x3ff1717418520340, 0x3ca5c2732533177c
+//data8 0xa0262917caab4ad1, 0xe4dde4ddc81fd119
+//data8 0x3ff1b7d59dd40ba0, 0x3cc4c7c98e870ff5
+//data8 0xac402c688b72f3f4, 0xdcae469be46d4c8d
+//data8 0x3ff200b93cc5a540, 0x3c8dd6dc1bfe865a
+//data8 0xba76968b9eabd9ab, 0xd41a8f3df1115f7f
+//data8 0x3ff24c6f8f6affa0, 0x3cf1acb6d2a7eff7
+//data8 0xcb63c87c23a71dc5, 0xcb161074c17f54ec
+//data8 0x3ff29b5b338b7c80, 0x3ce9b5845f6ec746
+//data8 0xdfe323b8653af367, 0xc19107d99ab27e42
+//data8 0x3ff2edf6fac7f5a0, 0x3cf77f961925fa02
+//data8 0xf93746caaba3e1f1, 0xb777744a9df03bff
+//data8 0x3ff344df237486c0, 0x3cf6ddf5f6ddda43
+//data8 0x8ca77052f6c340f0, 0xacaf476f13806648
+//data8 0x3ff3a0dfa4bb4ae0, 0x3cfee01bbd761bff
+//data8 0xa1a48604a81d5c62, 0xa11575d30c0aae50
+//data8 0x3ff4030b73c55360, 0x3cf1cf0e0324d37c
+//data8 0xbe45074b05579024, 0x9478e362a07dd287
+//data8 0x3ff46ce4c738c4e0, 0x3ce3179555367d12
+//data8 0xe7a08b5693d214ec, 0x8690e3575b8a7c3b
+//data8 0x3ff4e0a887c40a80, 0x3cfbd5d46bfefe69
+//data8 0x94503d69396d91c7, 0xedd2ce885ff04028
+//data8 0x3ff561ebd9c18cc0, 0x3cf331bd176b233b
+//data8 0xced1d96c5bb209e6, 0xc965278083808702
+//data8 0x3ff5f71d7ff42c80, 0x3ce3301cc0b5a48c
+//data8 0xabac2cee0fc24e20, 0x9c4eb1136094cbbd
+//data8 0x3ff6ae4c63222720, 0x3cf5ff46874ee51e
+//data8 0x8040201008040201, 0xb4d7ac4d9acb1bf4
+//data8 0x3ff7b7d33b928c40, 0x3cfacdee584023bb
+LOCAL_OBJECT_END(T_table)
-acos_X2 = f51
-acos_X4 = f52
-acos_B = f53
-acos_Bb = f54
-acos_A = f55
-acos_Aa = f56
-acos_1mA = f57
+.align 16
-acos_W = f58
-acos_Ww = f59
+LOCAL_OBJECT_START(poly_coeffs)
+ // C_3
+data8 0xaaaaaaaaaaaaaaab, 0x0000000000003ffc
+ // C_5
+data8 0x999999999999999a, 0x0000000000003ffb
+ // C_7, C_9
+data8 0x3fa6db6db6db6db7, 0x3f9f1c71c71c71c8
+ // pi/2 (low, high)
+data8 0x3C91A62633145C07, 0x3FF921FB54442D18
+ // C_11, C_13
+data8 0x3f96e8ba2e8ba2e9, 0x3f91c4ec4ec4ec4e
+ // C_15, C_17
+data8 0x3f8c99999999999a, 0x3f87a87878787223
+ // pi (low, high)
+data8 0x3CA1A62633145C07, 0x400921FB54442D18
+LOCAL_OBJECT_END(poly_coeffs)
+
+
+R_DBL_S = r21
+R_EXP0 = r22
+R_EXP = r15
+R_SGNMASK = r23
+R_TMP = r24
+R_TMP2 = r25
+R_INDEX = r26
+R_TMP3 = r27
+R_TMP03 = r27
+R_TMP4 = r28
+R_TMP5 = r23
+R_TMP6 = r22
+R_TMP7 = r21
+R_T = r29
+R_BIAS = r20
+
+F_T = f6
+F_1S2 = f7
+F_1S2_S = f9
+F_INV_1T2 = f10
+F_SQRT_1T2 = f11
+F_S2T2 = f12
+F_X = f13
+F_D = f14
+F_2M64 = f15
+
+F_CS2 = f32
+F_CS3 = f33
+F_CS4 = f34
+F_CS5 = f35
+F_CS6 = f36
+F_CS7 = f37
+F_CS8 = f38
+F_CS9 = f39
+F_S23 = f40
+F_S45 = f41
+F_S67 = f42
+F_S89 = f43
+F_S25 = f44
+F_S69 = f45
+F_S29 = f46
+F_X2 = f47
+F_X4 = f48
+F_TSQRT = f49
+F_DTX = f50
+F_R = f51
+F_R2 = f52
+F_R3 = f53
+F_R4 = f54
+
+F_C3 = f55
+F_C5 = f56
+F_C7 = f57
+F_C9 = f58
+F_P79 = f59
+F_P35 = f60
+F_P39 = f61
+
+F_ATHI = f62
+F_ATLO = f63
+
+F_T1 = f64
+F_Y = f65
+F_Y2 = f66
+F_ANDMASK = f67
+F_ORMASK = f68
+F_S = f69
+F_05 = f70
+F_SQRT_1S2 = f71
+F_DS = f72
+F_Z = f73
+F_1T2 = f74
+F_DZ = f75
+F_ZE = f76
+F_YZ = f77
+F_Y1S2 = f78
+F_Y1S2X = f79
+F_1X = f80
+F_ST = f81
+F_1T2_ST = f82
+F_TSS = f83
+F_Y1S2X2 = f84
+F_DZ_TERM = f85
+F_DTS = f86
+F_DS2X = f87
+F_T2 = f88
+F_ZY1S2S = f89
+F_Y1S2_1X = f90
+F_TS = f91
+F_PI2_LO = f92
+F_PI2_HI = f93
+F_S19 = f94
+F_INV1T2_2 = f95
+F_CORR = f96
+F_DZ0 = f97
+
+F_C11 = f98
+F_C13 = f99
+F_C15 = f100
+F_C17 = f101
+F_P1113 = f102
+F_P1517 = f103
+F_P1117 = f104
+F_P317 = f105
+F_R8 = f106
+F_HI = f107
+F_1S2_HI = f108
+F_DS2 = f109
+F_Y2_2 = f110
+//F_S2 = f111
+//F_S_DS2 = f112
+F_S_1S2S = f113
+F_XL = f114
+F_2M128 = f115
+F_1AS = f116
+F_AS = f117
-acos_y0 = f60
-acos_y1 = f61
-acos_y2 = f62
-acos_H = f63
-acos_Hh = f64
-acos_t1 = f65
-acos_t2 = f66
-acos_t3 = f67
-acos_t4 = f68
-acos_t5 = f69
+.section .text
+GLOBAL_LIBM_ENTRY(acosl)
-acos_Pseries = f70
-acos_NORM_f8 = f71
-acos_ABS_NORM_f8 = f72
+{.mfi
+ // get exponent, mantissa (rounded to double precision) of s
+ getf.d R_DBL_S = f8
+ // 1-s^2
+ fnma.s1 F_1S2 = f8, f8, f1
+ // r2 = pointer to T_table
+ addl r2 = @ltoff(T_table), gp
+}
-acos_2 = f73
-acos_P1P2 = f74
-acos_HALF = f75
-acos_U = f76
+{.mfi
+ // sign mask
+ mov R_SGNMASK = 0x20000
+ nop.f 0
+ // bias-63-1
+ mov R_TMP03 = 0xffff-64;;
+}
-acos_1mB = f77
-acos_V = f78
-acos_S = f79
-acos_BmUU = f80
-acos_BmUUpb = f81
-acos_2U = f82
-acos_1d2U = f83
+{.mfi
+ // get exponent of s
+ getf.exp R_EXP = f8
+ nop.f 0
+ // R_TMP4 = 2^45
+ shl R_TMP4 = R_SGNMASK, 45-17
+}
-acos_Dd = f84
+{.mlx
+ // load bias-4
+ mov R_TMP = 0xffff-4
+ // load RU(sqrt(2)/2) to integer register (in double format, shifted left by 1)
+ movl R_TMP2 = 0x7fcd413cccfe779a;;
+}
-acos_pi_by_2_hi = f85
-acos_pi_by_2_lo = f86
-acos_xmpi_by_2_lo = f87
-acos_xPmw = f88
-acos_Uu = f89
-acos_AmVV = f90
-acos_AmVVpa = f91
+{.mfi
+ // load 2^{-64} in FP register
+ setf.exp F_2M64 = R_TMP03
+ nop.f 0
+ // index = (0x7-exponent)|b1 b2.. b6
+ extr.u R_INDEX = R_DBL_S, 46, 9
+}
-acos_2V = f92
-acos_1d2V = f93
-acos_Vv = f94
+{.mfi
+ // get t = sign|exponent|b1 b2.. b6 1 x.. x
+ or R_T = R_DBL_S, R_TMP4
+ nop.f 0
+ // R_TMP4 = 2^45-1
+ sub R_TMP4 = R_TMP4, r0, 1;;
+}
-acos_Vu = f95
-acos_Uv = f96
-
-acos_2_Z_hi = f97
-acos_s_lo_Z_lo = f98
-acos_result_lo = f99
-
-acos_Z_hi = f8
-acos_Z_lo = f10
-acos_s_lo = f11
-
-acos_GR_17_ones = r33
-acos_GR_16_ones = r34
-acos_GR_signexp_f8 = r35
-acos_GR_exp = r36
-acos_GR_true_exp = r37
-acos_GR_fffe = r38
-
-GR_SAVE_PFS = r43
-GR_SAVE_B0 = r39
-GR_SAVE_GP = r41
-
-// r40 is address of table of coefficients
-// r42
-
-GR_Parameter_X = r44
-GR_Parameter_Y = r45
-GR_Parameter_RESULT = r46
-GR_Parameter_TAG = r47
-
-
-// 2^-40:
-// A true exponent of -40 is
-// : -40 + register_bias
-// : -28 + ffff = ffd7
-// A true exponent of 1 is
-// : 1 + register_bias
-// : 1 + ffff = 10000
+{.mfi
+ // get t = sign|exponent|b1 b2.. b6 1 0.. 0
+ andcm R_T = R_T, R_TMP4
+ nop.f 0
+ // eliminate sign from R_DBL_S (shift left by 1)
+ shl R_TMP3 = R_DBL_S, 1
+}
-// Data tables
-//==============================================================
+{.mfi
+ // R_BIAS = 3*2^6
+ mov R_BIAS = 0xc0
+ nop.f 0
+ // eliminate sign from R_EXP
+ andcm R_EXP0 = R_EXP, R_SGNMASK;;
+}
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
-.align 16
-acos_coefficients:
-ASM_TYPE_DIRECTIVE(acos_coefficients,@object)
-data8 0xc90fdaa22168c234, 0x00003FFF // pi_by_2_hi
-data8 0xc4c6628b80dc1cd1, 0x00003FBF // pi_by_2_lo
-data8 0xc90fdaa22168c234, 0x00004000 // pi_hi
-data8 0xc4c6628b80dc1cd1, 0x00003FC0 // pi_lo
-
-data8 0xBB08911F2013961E, 0x00003FF8 // A10
-data8 0x981F1095A23A87D3, 0x00003FF8 // A9
-data8 0xBDF09C6C4177BCC6, 0x00003FF8 // A8
-data8 0xE4C3A60B049ACCEA, 0x00003FF8 // A7
-data8 0x8E2789F4E8A8F1AD, 0x00003FF9 // A6
-data8 0xB745D09B2B0E850B, 0x00003FF9 // A5
-data8 0xF8E38E3BC4C50920, 0x00003FF9 // A4
-data8 0xB6DB6DB6D89FCD81, 0x00003FFA // A3
-data8 0x99999999999AF376, 0x00003FFB // A2
-data8 0xAAAAAAAAAAAAAA71, 0x00003FFC // A1
-ASM_SIZE_DIRECTIVE(acos_coefficients)
-
-
-.align 32
-.global acosl#
-ASM_TYPE_DIRECTIVE(acosl#,@function)
+{.mfi
+ // load start address for T_table
+ ld8 r2 = [r2]
+ nop.f 0
+ // p8 = 1 if |s|> = sqrt(2)/2
+ cmp.geu p8, p0 = R_TMP3, R_TMP2
+}
-.section .text
-.proc acosl#
-.align 32
+{.mlx
+ // p7 = 1 if |s|<2^{-4} (exponent of s<bias-4)
+ cmp.lt p7, p0 = R_EXP0, R_TMP
+ // sqrt coefficient cs8 = -33*13/128
+ movl R_TMP2 = 0xc0568000;;
+}
-acosl:
-// After normalizing f8, get its true exponent
-{ .mfi
- alloc r32 = ar.pfs,1,11,4,0
-(p0) fnorm.s1 acos_NORM_f8 = f8
-(p0) mov acos_GR_17_ones = 0x1ffff
+{.mbb
+ // load t in FP register
+ setf.d F_T = R_T
+ // if |s|<2^{-4}, take alternate path
+ (p7) br.cond.spnt SMALL_S
+ // if |s|> = sqrt(2)/2, take alternate path
+ (p8) br.cond.sptk LARGE_S
}
-{ .mmi
-(p0) mov acos_GR_16_ones = 0xffff
-(p0) addl r40 = @ltoff(acos_coefficients), gp
- nop.i 999
+{.mlx
+ // index = (4-exponent)|b1 b2.. b6
+ sub R_INDEX = R_INDEX, R_BIAS
+ // sqrt coefficient cs9 = 55*13/128
+ movl R_TMP = 0x40b2c000;;
}
-;;
-// Set denormal flag on denormal input with fcmp
-{ .mfi
- ld8 r40 = [r40]
- fcmp.eq p6,p0 = f8,f0
- nop.i 999
+
+{.mfi
+ // sqrt coefficient cs8 = -33*13/128
+ setf.s F_CS8 = R_TMP2
+ nop.f 0
+ // shift R_INDEX by 5
+ shl R_INDEX = R_INDEX, 5
}
-;;
+{.mfi
+ // sqrt coefficient cs3 = 0.5 (set exponent = bias-1)
+ mov R_TMP4 = 0xffff - 1
+ nop.f 0
+ // sqrt coefficient cs6 = -21/16
+ mov R_TMP6 = 0xbfa8;;
+}
-// Load the constants pi_by_2 and pi.
-// Each is stored as hi and lo values
-// Also load the coefficients for ACOS_POLY
-{ .mmi
-(p0) ldfe acos_pi_by_2_hi = [r40],16 ;;
-(p0) ldfe acos_pi_by_2_lo = [r40],16
- nop.i 999 ;;
+{.mlx
+ // table index
+ add r2 = r2, R_INDEX
+ // sqrt coefficient cs7 = 33/16
+ movl R_TMP2 = 0x40040000;;
}
-{ .mmi
-(p0) ldfe acos_pi_hi = [r40],16 ;;
-(p0) ldfe acos_pi_lo = [r40],16
- nop.i 999 ;;
+
+{.mmi
+ // load cs9 = 55*13/128
+ setf.s F_CS9 = R_TMP
+ // sqrt coefficient cs5 = 7/8
+ mov R_TMP3 = 0x3f60
+ // sqrt coefficient cs6 = 21/16
+ shl R_TMP6 = R_TMP6, 16;;
}
-{ .mmi
-(p0) ldfe acos_A10 = [r40],16 ;;
-(p0) ldfe acos_A9 = [r40],16
- nop.i 999 ;;
+
+{.mmi
+ // load significand of 1/(1-t^2)
+ ldf8 F_INV_1T2 = [r2], 8
+ // sqrt coefficient cs7 = 33/16
+ setf.s F_CS7 = R_TMP2
+ // sqrt coefficient cs4 = -5/8
+ mov R_TMP5 = 0xbf20;;
}
-// Take the absolute value of f8
-{ .mmf
- nop.m 999
-(p0) getf.exp acos_GR_signexp_f8 = acos_NORM_f8
-(p0) fmerge.s acos_ABS_NORM_f8 = f0, acos_NORM_f8
+
+{.mmi
+ // load significand of sqrt(1-t^2)
+ ldf8 F_SQRT_1T2 = [r2], 8
+ // sqrt coefficient cs6 = 21/16
+ setf.s F_CS6 = R_TMP6
+ // sqrt coefficient cs5 = 7/8
+ shl R_TMP3 = R_TMP3, 16;;
}
-{ .mii
-(p0) ldfe acos_A8 = [r40],16
- nop.i 999 ;;
-(p0) and acos_GR_exp = acos_GR_signexp_f8, acos_GR_17_ones ;;
+
+{.mmi
+ // sqrt coefficient cs3 = 0.5 (set exponent = bias-1)
+ setf.exp F_CS3 = R_TMP4
+ // r3 = pointer to polynomial coefficients
+ addl r3 = @ltoff(poly_coeffs), gp
+ // sqrt coefficient cs4 = -5/8
+ shl R_TMP5 = R_TMP5, 16;;
}
-// case 1: |x| < 2^-25 ==> p6 ACOS_TINY
-// case 2: 2^-25 <= |x| < 2^-2 ==> p8 ACOS_POLY
-// case 3: 2^-2 <= |x| < 1 ==> p9 ACOS_ATAN
-// case 4: 1 <= |x| ==> p11 ACOS_ERROR_RETURN
-// Admittedly |x| = 1 is not an error but this is where that case is
-// handled.
-{ .mii
-(p0) ldfe acos_A7 = [r40],16
-(p0) sub acos_GR_true_exp = acos_GR_exp, acos_GR_16_ones ;;
-(p0) cmp.ge.unc p6, p7 = -26, acos_GR_true_exp ;;
+{.mfi
+ // sqrt coefficient cs5 = 7/8
+ setf.s F_CS5 = R_TMP3
+ // d = s-t
+ fms.s1 F_D = f8, f1, F_T
+ // set p6 = 1 if s<0, p11 = 1 if s> = 0
+ cmp.ge p6, p11 = R_EXP, R_DBL_S
}
-{ .mii
-(p0) ldfe acos_A6 = [r40],16
-(p7) cmp.ge.unc p8, p9 = -3, acos_GR_true_exp ;;
-(p9) cmp.ge.unc p10, p11 = -1, acos_GR_true_exp
+{.mfi
+ // r3 = load start address to polynomial coefficients
+ ld8 r3 = [r3]
+ // s+t
+ fma.s1 F_S2T2 = f8, f1, F_T
+ nop.i 0;;
}
-{ .mmi
-(p0) ldfe acos_A5 = [r40],16 ;;
-(p0) ldfe acos_A4 = [r40],16
- nop.i 999 ;;
+
+{.mfi
+ // sqrt coefficient cs4 = -5/8
+ setf.s F_CS4 = R_TMP5
+ // s^2-t^2
+ fma.s1 F_S2T2 = F_S2T2, F_D, f0
+ nop.i 0;;
}
-{ .mmi
-(p0) ldfe acos_A3 = [r40],16 ;;
-(p0) ldfe acos_A2 = [r40],16
- nop.i 999 ;;
+
+{.mfi
+ // load C3
+ ldfe F_C3 = [r3], 16
+ // 0.5/(1-t^2) = 2^{-64}*(2^63/(1-t^2))
+ fma.s1 F_INV_1T2 = F_INV_1T2, F_2M64, f0
+ nop.i 0;;
}
-// ACOS_ERROR_RETURN ==> p11 is true
-// case 4: |x| >= 1
-{ .mib
-(p0) ldfe acos_A1 = [r40],16
- nop.i 999
-(p11) br.spnt L(ACOS_ERROR_RETURN) ;;
+{.mfi
+ // load C_5
+ ldfe F_C5 = [r3], 16
+ // set correct exponent for sqrt(1-t^2)
+ fma.s1 F_SQRT_1T2 = F_SQRT_1T2, F_2M64, f0
+ nop.i 0;;
}
-// ACOS_TINY ==> p6 is true
-// case 1: |x| < 2^-25
-{ .mfi
- nop.m 999
-(p6) fms.s1 acos_xmpi_by_2_lo = acos_NORM_f8,f1, acos_pi_by_2_lo
- nop.i 999 ;;
+
+{.mfi
+ // load C_7, C_9
+ ldfpd F_C7, F_C9 = [r3], 16
+ // x = -(s^2-t^2)/(1-t^2)/2
+ fnma.s1 F_X = F_INV_1T2, F_S2T2, f0
+ nop.i 0;;
}
-{ .mfb
- nop.m 999
-(p6) fms.s0 f8 = acos_pi_by_2_hi,f1, acos_xmpi_by_2_lo
-(p6) br.ret.spnt b0 ;;
+
+{.mmf
+ // load asin(t)_high, asin(t)_low
+ ldfpd F_ATHI, F_ATLO = [r2]
+ // load pi/2
+ ldfpd F_PI2_LO, F_PI2_HI = [r3]
+ // t*sqrt(1-t^2)
+ fma.s1 F_TSQRT = F_T, F_SQRT_1T2, f0;;
}
+{.mfi
+ nop.m 0
+ // cs9*x+cs8
+ fma.s1 F_S89 = F_CS9, F_X, F_CS8
+ nop.i 0
+}
-// ACOS_POLY ==> p8 is true
-// case 2: 2^-25 <= |x| < 2^-2
-{ .mfi
- nop.m 999
-(p8) fms.s1 acos_W = acos_pi_by_2_hi, f1, acos_NORM_f8
- nop.i 999 ;;
+{.mfi
+ nop.m 0
+ // cs7*x+cs6
+ fma.s1 F_S67 = F_CS7, F_X, F_CS6
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p8) fma.s1 acos_X2 = f8,f8, f0
- nop.i 999 ;;
+{.mfi
+ nop.m 0
+ // cs5*x+cs4
+ fma.s1 F_S45 = F_CS5, F_X, F_CS4
+ nop.i 0
}
-{ .mfi
- nop.m 999
-(p8) fms.s1 acos_Ww = acos_pi_by_2_hi, f1, acos_W
- nop.i 999 ;;
+{.mfi
+ nop.m 0
+ // x*x
+ fma.s1 F_X2 = F_X, F_X, f0
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p8) fma.s1 acos_X4 = acos_X2,acos_X2, f0
- nop.i 999 ;;
+
+{.mfi
+ nop.m 0
+ // (s-t)-t*x
+ fnma.s1 F_DTX = F_T, F_X, F_D
+ nop.i 0
}
-{ .mfi
- nop.m 999
-(p8) fms.s1 acos_Ww = acos_Ww, f1, acos_NORM_f8
- nop.i 999 ;;
+{.mfi
+ nop.m 0
+ // cs3*x+cs2 (cs2 = -0.5 = -cs3)
+ fms.s1 F_S23 = F_CS3, F_X, F_CS3
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p8) fma.s1 acos_P810 = acos_X4, acos_A10, acos_A8
- nop.i 999
+{.mfi
+ nop.m 0
+ // if sign is negative, negate table values: asin(t)_low
+ (p6) fnma.s1 F_ATLO = F_ATLO, f1, f0
+ nop.i 0
}
-// acos_P79 = X4*A9 + A7
-// acos_P810 = X4*A10 + A8
-{ .mfi
- nop.m 999
-(p8) fma.s1 acos_P79 = acos_X4, acos_A9, acos_A7
- nop.i 999 ;;
+{.mfi
+ nop.m 0
+ // if sign is negative, negate table values: asin(t)_high
+ (p6) fnma.s1 F_ATHI = F_ATHI, f1, f0
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p8) fma.s1 acos_Ww = acos_Ww, f1, acos_pi_by_2_lo
- nop.i 999 ;;
+
+{.mfi
+ nop.m 0
+ // cs9*x^3+cs8*x^2+cs7*x+cs6
+ fma.s1 F_S69 = F_S89, F_X2, F_S67
+ nop.i 0
}
-{ .mfi
- nop.m 999
-(p8) fma.s1 acos_P610 = acos_X4, acos_P810, acos_A6
- nop.i 999
+{.mfi
+ nop.m 0
+ // x^4
+ fma.s1 F_X4 = F_X2, F_X2, f0
+ nop.i 0;;
}
-// acos_P59 = X4*(X4*A9 + A7) + A5
-// acos_P610 = X4*(X4*A10 + A8) + A6
-{ .mfi
- nop.m 999
-(p8) fma.s1 acos_P59 = acos_X4, acos_P79, acos_A5
- nop.i 999 ;;
+{.mfi
+ nop.m 0
+ // t*sqrt(1-t^2)*x^2
+ fma.s1 F_TSQRT = F_TSQRT, F_X2, f0
+ nop.i 0
}
-{ .mfi
- nop.m 999
-(p8) fma.s1 acos_P410 = acos_X4, acos_P610, acos_A4
- nop.i 999
+{.mfi
+ nop.m 0
+ // cs5*x^3+cs4*x^2+cs3*x+cs2
+ fma.s1 F_S25 = F_S45, F_X2, F_S23
+ nop.i 0;;
}
-// acos_P39 = X4*(X4*(X4*A9 + A7) + A5) + A3
-// acos_P410 = X4*(X4*(X4*A10 + A8) + A6) + A4
-{ .mfi
- nop.m 999
-(p8) fma.s1 acos_P39 = acos_X4, acos_P59, acos_A3
- nop.i 999 ;;
+
+{.mfi
+ nop.m 0
+ // ((s-t)-t*x)*sqrt(1-t^2)
+ fma.s1 F_DTX = F_DTX, F_SQRT_1T2, f0
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p8) fma.s1 acos_P210 = acos_X4, acos_P410, acos_A2
- nop.i 999
+{.mfi
+ nop.m 0
+ // (pi/2)_high - asin(t)_high
+ fnma.s1 F_ATHI = F_ATHI, f1, F_PI2_HI
+ nop.i 0
}
-// acos_P19 = X4*(X4*(X4*(X4*A9 + A7) + A5) + A3) + A1 = P1
-// acos_P210 = X4*(X4*(X4*(X4*A10 + A8) + A6) + A4) + A2 = P2
-{ .mfi
- nop.m 999
-(p8) fma.s1 acos_P19 = acos_X4, acos_P39, acos_A1
- nop.i 999 ;;
+{.mfi
+ nop.m 0
+ // asin(t)_low - (pi/2)_low
+ fnma.s1 F_ATLO = F_PI2_LO, f1, F_ATLO
+ nop.i 0;;
}
-// acos_P1P2 = Xsq*P2 + P1
-// acos_P1P2 = Xsq*(Xsq*P2 + P1)
-{ .mfi
- nop.m 999
-(p8) fma.s1 acos_P1P2 = acos_X2, acos_P210, acos_P19
- nop.i 999 ;;
+
+{.mfi
+ nop.m 0
+ // PS29 = cs9*x^7+..+cs5*x^3+cs4*x^2+cs3*x+cs2
+ fma.s1 F_S29 = F_S69, F_X4, F_S25
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p8) fma.s1 acos_P1P2 = acos_X2, acos_P1P2, f0
- nop.i 999 ;;
+
+
+{.mfi
+ nop.m 0
+ // R = ((s-t)-t*x)*sqrt(1-t^2)-t*sqrt(1-t^2)*x^2*PS29
+ fnma.s1 F_R = F_S29, F_TSQRT, F_DTX
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p8) fms.s1 acos_xPmw = acos_NORM_f8, acos_P1P2, acos_Ww
- nop.i 999 ;;
+
+{.mfi
+ nop.m 0
+ // R^2
+ fma.s1 F_R2 = F_R, F_R, f0
+ nop.i 0;;
}
-{ .mfb
- nop.m 999
-(p8) fms.s0 f8 = acos_W, f1, acos_xPmw
-(p8) br.ret.spnt b0 ;;
+
+{.mfi
+ nop.m 0
+ // c7+c9*R^2
+ fma.s1 F_P79 = F_C9, F_R2, F_C7
+ nop.i 0
}
+{.mfi
+ nop.m 0
+ // c3+c5*R^2
+ fma.s1 F_P35 = F_C5, F_R2, F_C3
+ nop.i 0;;
+}
-// ACOS_ATAN
-// case 3: 2^-2 <= |x| < 1
-// case 3: 2^-2 <= |x| < 1 ==> p9 ACOS_ATAN
+{.mfi
+ nop.m 0
+ // R^3
+ fma.s1 F_R4 = F_R2, F_R2, f0
+ nop.i 0;;
+}
-// Step 1.1: Get A,B and a,b
-// A + a = 1- |X|
-// B + b = 1+ |X|
-// Note also that we will use acos_corr (f13)
-// and acos_W
+{.mfi
+ nop.m 0
+ // R^3
+ fma.s1 F_R3 = F_R2, F_R, f0
+ nop.i 0;;
+}
-// Step 2
-// Call __libm_atan2_reg
-{ .mfi
-(p0) mov acos_GR_fffe = 0xfffe
-(p0) fma.s1 acos_B = f1,f1, acos_ABS_NORM_f8
-(p0) mov GR_SAVE_B0 = b0 ;;
+{.mfi
+ nop.m 0
+ // c3+c5*R^2+c7*R^4+c9*R^6
+ fma.s1 F_P39 = F_P79, F_R4, F_P35
+ nop.i 0;;
}
-{ .mmf
-(p0) mov GR_SAVE_GP = gp
- nop.m 999
-(p0) fms.s1 acos_A = f1,f1, acos_ABS_NORM_f8
+
+{.mfi
+ nop.m 0
+ // asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
+ fma.s1 F_P39 = F_P39, F_R3, F_ATLO
+ nop.i 0;;
}
-{ .mfi
-(p0) setf.exp acos_HALF = acos_GR_fffe
- nop.f 999
- nop.i 999 ;;
+
+{.mfi
+ nop.m 0
+ // R+asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
+ fma.s1 F_P39 = F_P39, f1, F_R
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p0) fms.s1 acos_1mB = f1,f1, acos_B
- nop.i 999 ;;
+
+{.mfb
+ nop.m 0
+ // result = (pi/2)-asin(t)_high+R+asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
+ fnma.s0 f8 = F_P39, f1, F_ATHI
+ // return
+ br.ret.sptk b0;;
}
-// We want atan2(V,U)
-// so put V in f8 and U in f9
-// but save X in acos_X
-{ .mfi
- nop.m 999
-(p0) fmerge.se acos_X = f8, f8
- nop.i 999 ;;
+
+
+LARGE_S:
+
+{.mfi
+ // bias-1
+ mov R_TMP3 = 0xffff - 1
+ // y ~ 1/sqrt(1-s^2)
+ frsqrta.s1 F_Y, p7 = F_1S2
+ // c9 = 55*13*17/128
+ mov R_TMP4 = 0x10af7b
}
-// Step 1.2:
-/////////////////////////
-// Get U = sqrt(B)
-/////////////////////////
+{.mlx
+ // c8 = -33*13*15/128
+ mov R_TMP5 = 0x184923
+ movl R_TMP2 = 0xff00000000000000;;
+}
-{ .mfi
- nop.m 999
-(p0) frsqrta.s1 acos_y0,p8 = acos_B
- nop.i 999
+{.mfi
+ // set p6 = 1 if s<0, p11 = 1 if s>0
+ cmp.ge p6, p11 = R_EXP, R_DBL_S
+ // 1-s^2
+ fnma.s1 F_1S2 = f8, f8, f1
+ // set p9 = 1
+ cmp.eq p9, p0 = r0, r0;;
}
-{ .mfi
- nop.m 999
-(p0) fms.s1 acos_1mA = f1,f1, acos_A
- nop.i 999 ;;
+
+{.mfi
+ // load 0.5
+ setf.exp F_05 = R_TMP3
+ // (1-s^2) rounded to single precision
+ fnma.s.s1 F_1S2_S = f8, f8, f1
+ // c9 = 55*13*17/128
+ shl R_TMP4 = R_TMP4, 10
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 acos_Bb = acos_1mB,f1, acos_ABS_NORM_f8
- nop.i 999 ;;
+{.mlx
+ // AND mask for getting t ~ sqrt(1-s^2)
+ setf.sig F_ANDMASK = R_TMP2
+ // OR mask
+ movl R_TMP2 = 0x0100000000000000;;
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 acos_Hh = acos_HALF, acos_B, f0
- nop.i 999 ;;
+.pred.rel "mutex", p6, p11
+{.mfi
+ nop.m 0
+ // 1-|s|
+ (p6) fma.s1 F_1AS = f8, f1, f1
+ nop.i 0
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 acos_t1 = acos_y0, acos_y0, f0
- nop.i 999
+{.mfi
+ nop.m 0
+ // 1-|s|
+ (p11) fnma.s1 F_1AS = f8, f1, f1
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p0) fms.s1 acos_Aa = acos_1mA,f1, acos_ABS_NORM_f8
- nop.i 999 ;;
+
+{.mfi
+ // c9 = 55*13*17/128
+ setf.s F_CS9 = R_TMP4
+ // |s|
+ (p6) fnma.s1 F_AS = f8, f1, f0
+ // c8 = -33*13*15/128
+ shl R_TMP5 = R_TMP5, 11
}
-{ .mfi
- nop.m 999
-(p0) fnma.s1 acos_t2 = acos_t1, acos_Hh, acos_HALF
- nop.i 999 ;;
+{.mfi
+ // c7 = 33*13/16
+ mov R_TMP4 = 0x41d68
+ // |s|
+ (p11) fma.s1 F_AS = f8, f1, f0
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 acos_y1 = acos_t2, acos_y0, acos_y0
- nop.i 999
+
+{.mfi
+ setf.sig F_ORMASK = R_TMP2
+ // y^2
+ fma.s1 F_Y2 = F_Y, F_Y, f0
+ // c7 = 33*13/16
+ shl R_TMP4 = R_TMP4, 12
}
+{.mfi
+ // c6 = -33*7/16
+ mov R_TMP6 = 0xc1670
+ // y' ~ sqrt(1-s^2)
+ fma.s1 F_T1 = F_Y, F_1S2, f0
+ // c5 = 63/8
+ mov R_TMP7 = 0x40fc;;
+}
-// Step 1.2:
-/////////////////////////
-// Get V = sqrt(A)
-/////////////////////////
-{ .mfi
- nop.m 999
-(p0) frsqrta.s1 acos_y0,p8 = acos_A
- nop.i 999 ;;
+
+{.mlx
+ // load c8 = -33*13*15/128
+ setf.s F_CS8 = R_TMP5
+ // c4 = -35/8
+ movl R_TMP5 = 0xc08c0000;;
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 acos_t3 = acos_y1, acos_Hh, f0
- nop.i 999 ;;
+{.mfi
+ // r3 = pointer to polynomial coefficients
+ addl r3 = @ltoff(poly_coeffs), gp
+ // 1-s-(1-s^2)_s
+ fnma.s1 F_DS = F_1S2_S, f1, F_1AS
+ // p9 = 0 if p7 = 1 (p9 = 1 for special cases only)
+ (p7) cmp.ne p9, p0 = r0, r0
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 acos_t1 = acos_y0, acos_y0, f0
- nop.i 999 ;;
+{.mlx
+ // load c7 = 33*13/16
+ setf.s F_CS7 = R_TMP4
+ // c3 = 5/2
+ movl R_TMP4 = 0x40200000;;
}
-{ .mfi
- nop.m 999
-(p0) fnma.s1 acos_t4 = acos_t3, acos_y1, acos_HALF
- nop.i 999 ;;
+
+{.mlx
+ // load c4 = -35/8
+ setf.s F_CS4 = R_TMP5
+ // c2 = -3/2
+ movl R_TMP5 = 0xbfc00000;;
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 acos_y2 = acos_t4, acos_y1, acos_y1
- nop.i 999 ;;
+
+{.mfi
+ // load c3 = 5/2
+ setf.s F_CS3 = R_TMP4
+ // x = (1-s^2)_s*y^2-1
+ fms.s1 F_X = F_1S2_S, F_Y2, f1
+ // c6 = -33*7/16
+ shl R_TMP6 = R_TMP6, 12
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 acos_S = acos_B, acos_y2, f0
- nop.i 999
+{.mfi
+ nop.m 0
+ // y^2/2
+ fma.s1 F_Y2_2 = F_Y2, F_05, f0
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 acos_H = acos_y2, acos_HALF, f0
- nop.i 999 ;;
+
+{.mfi
+ // load c6 = -33*7/16
+ setf.s F_CS6 = R_TMP6
+ // eliminate lower bits from y'
+ fand F_T = F_T1, F_ANDMASK
+ // c5 = 63/8
+ shl R_TMP7 = R_TMP7, 16
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 acos_t5 = acos_Hh, acos_y2, f0
- nop.i 999
+
+{.mfb
+ // r3 = load start address to polynomial coefficients
+ ld8 r3 = [r3]
+ // 1-(1-s^2)_s-s^2
+ fma.s1 F_DS = F_AS, F_1AS, F_DS
+ // p9 = 1 if s is a special input (NaN, or |s|> = 1)
+ (p9) br.cond.spnt acosl_SPECIAL_CASES;;
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 acos_Hh = acos_HALF, acos_A, f0
- nop.i 999 ;;
+{.mmf
+ // get exponent, significand of y' (in single prec.)
+ getf.s R_TMP = F_T1
+ // load c3 = -3/2
+ setf.s F_CS2 = R_TMP5
+ // y*(1-s^2)
+ fma.s1 F_Y1S2 = F_Y, F_1S2, f0;;
}
-{ .mfi
- nop.m 999
-(p0) fnma.s1 acos_Dd = acos_S, acos_S, acos_B
- nop.i 999 ;;
+
+
+{.mfi
+ nop.m 0
+ // if s<0, set s = -s
+ (p6) fnma.s1 f8 = f8, f1, f0
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p0) fnma.s1 acos_t2 = acos_t1, acos_Hh, acos_HALF
- nop.i 999 ;;
+
+{.mfi
+ // load c5 = 63/8
+ setf.s F_CS5 = R_TMP7
+ // x = (1-s^2)_s*y^2-1+(1-(1-s^2)_s-s^2)*y^2
+ fma.s1 F_X = F_DS, F_Y2, F_X
+ // for t = 2^k*1.b1 b2.., get 7-k|b1.. b6
+ extr.u R_INDEX = R_TMP, 17, 9;;
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 acos_U = acos_Dd, acos_H, acos_S
- nop.i 999 ;;
+
+{.mmi
+ // index = (4-exponent)|b1 b2.. b6
+ sub R_INDEX = R_INDEX, R_BIAS
+ nop.m 0
+ // get exponent of y
+ shr.u R_TMP2 = R_TMP, 23;;
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 acos_y1 = acos_t2, acos_y0, acos_y0
- nop.i 999 ;;
+{.mmi
+ // load C3
+ ldfe F_C3 = [r3], 16
+ // set p8 = 1 if y'<2^{-4}
+ cmp.gt p8, p0 = 0x7b, R_TMP2
+ // shift R_INDEX by 5
+ shl R_INDEX = R_INDEX, 5;;
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 acos_2U = acos_U, f1, acos_U
- nop.i 999 ;;
+
+{.mfb
+ // get table index for sqrt(1-t^2)
+ add r2 = r2, R_INDEX
+ // get t = 2^k*1.b1 b2.. b7 1
+ for F_T = F_T, F_ORMASK
+ (p8) br.cond.spnt VERY_LARGE_INPUT;;
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 acos_t3 = acos_y1, acos_Hh, f0
- nop.i 999
+
+
+{.mmf
+ // load C5
+ ldfe F_C5 = [r3], 16
+ // load 1/(1-t^2)
+ ldfp8 F_INV_1T2, F_SQRT_1T2 = [r2], 16
+ // x = ((1-s^2)*y^2-1)/2
+ fma.s1 F_X = F_X, F_05, f0;;
}
-// Step 1.3:
-// sqrt(A + a) = V + v
-// sqrt(B + b) = U + u
-/////////////////////////
-// Get u
-/////////////////////////
+{.mmf
+ nop.m 0
+ // C7, C9
+ ldfpd F_C7, F_C9 = [r3], 16
+ // set correct exponent for t
+ fmerge.se F_T = F_T1, F_T;;
+}
-// acos_BmUU = B - UU
-// acos_BmUUpb = (B - UU) + b
-{ .mfi
- nop.m 999
-(p0) fnma.s1 acos_BmUU = acos_U, acos_U, acos_B
- nop.i 999 ;;
+
+{.mfi
+ // get address for loading pi
+ add r3 = 48, r3
+ // c9*x+c8
+ fma.s1 F_S89 = F_X, F_CS9, F_CS8
+ nop.i 0
}
-{ .mfi
- nop.m 999
-(p0) fmerge.se f9 = acos_U, acos_U
- nop.i 999 ;;
+{.mfi
+ nop.m 0
+ // x^2
+ fma.s1 F_X2 = F_X, F_X, f0
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p0) fnma.s1 acos_t4 = acos_t3, acos_y1, acos_HALF
- nop.i 999 ;;
+
+{.mfi
+ // pi (low, high)
+ ldfpd F_PI2_LO, F_PI2_HI = [r3]
+ // y*(1-s^2)*x
+ fma.s1 F_Y1S2X = F_Y1S2, F_X, f0
+ nop.i 0
}
-// acos_1d2U = frcpa(2U)
-{ .mfi
- nop.m 999
-(p0) frcpa.s1 acos_1d2U,p9 = f1, acos_2U
- nop.i 999
+{.mfi
+ nop.m 0
+ // c7*x+c6
+ fma.s1 F_S67 = F_X, F_CS7, F_CS6
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 acos_BmUUpb = acos_BmUU, f1, acos_Bb
- nop.i 999 ;;
+
+{.mfi
+ nop.m 0
+ // 1-x
+ fnma.s1 F_1X = F_X, f1, f1
+ nop.i 0
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 acos_y2 = acos_t4, acos_y1, acos_y1
- nop.i 999 ;;
+{.mfi
+ nop.m 0
+ // c3*x+c2
+ fma.s1 F_S23 = F_X, F_CS3, F_CS2
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-// acos_Uu = ((B - UU) + b) * frcpa(2U)
-(p0) fma.s1 acos_Uu = acos_BmUUpb, acos_1d2U, f0
- nop.i 999 ;;
+
+{.mfi
+ nop.m 0
+ // 1-t^2
+ fnma.s1 F_1T2 = F_T, F_T, f1
+ nop.i 0
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 acos_S = acos_A, acos_y2, f0
- nop.i 999
+{.mfi
+ // load asin(t)_high, asin(t)_low
+ ldfpd F_ATHI, F_ATLO = [r2]
+ // c5*x+c4
+ fma.s1 F_S45 = F_X, F_CS5, F_CS4
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 acos_H = acos_y2, acos_HALF, f0
- nop.i 999 ;;
+
+
+{.mfi
+ nop.m 0
+ // t*s
+ fma.s1 F_TS = F_T, f8, f0
+ nop.i 0
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 acos_t5 = acos_Hh, acos_y2, f0
- nop.i 999 ;;
+{.mfi
+ nop.m 0
+ // 0.5/(1-t^2)
+ fma.s1 F_INV_1T2 = F_INV_1T2, F_2M64, f0
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p0) fnma.s1 acos_Dd = acos_S, acos_S, acos_A
- nop.i 999 ;;
+{.mfi
+ nop.m 0
+ // z~sqrt(1-t^2), rounded to 24 significant bits
+ fma.s.s1 F_Z = F_SQRT_1T2, F_2M64, f0
+ nop.i 0
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 acos_V = acos_Dd, acos_H, acos_S
- nop.i 999 ;;
+{.mfi
+ nop.m 0
+ // sqrt(1-t^2)
+ fma.s1 F_SQRT_1T2 = F_SQRT_1T2, F_2M64, f0
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 acos_2V = acos_V, f1, acos_V
- nop.i 999
+
+{.mfi
+ nop.m 0
+ // y*(1-s^2)*x^2
+ fma.s1 F_Y1S2X2 = F_Y1S2, F_X2, f0
+ nop.i 0
}
-// Step 3
-/////////////////////////
-// Calculate the correction, acos_corr
-/////////////////////////
-// acos_corr = U*v - (V*u)
+{.mfi
+ nop.m 0
+ // x^4
+ fma.s1 F_X4 = F_X2, F_X2, f0
+ nop.i 0;;
+}
-{ .mfi
- nop.m 999
-(p0) fma.s1 acos_Vu = acos_V,acos_Uu, f0
- nop.i 999 ;;
+
+{.mfi
+ nop.m 0
+ // s*t rounded to 24 significant bits
+ fma.s.s1 F_TSS = F_T, f8, f0
+ nop.i 0
}
-/////////////////////////
-// Get v
-/////////////////////////
-// acos_AmVV = A - VV
-// acos_AmVVpa = (A - VV) + a
+{.mfi
+ nop.m 0
+ // c9*x^3+..+c6
+ fma.s1 F_S69 = F_X2, F_S89, F_S67
+ nop.i 0;;
+}
-{ .mfi
- nop.m 999
-(p0) fnma.s1 acos_AmVV = acos_V, acos_V, acos_A
- nop.i 999 ;;
+
+{.mfi
+ nop.m 0
+ // ST = (t^2-1+s^2) rounded to 24 significant bits
+ fms.s.s1 F_ST = f8, f8, F_1T2
+ nop.i 0
}
-{ .mfi
- nop.m 999
-(p0) fmerge.se f8 = acos_V, acos_V
- nop.i 999 ;;
+{.mfi
+ nop.m 0
+ // c5*x^3+..+c2
+ fma.s1 F_S25 = F_X2, F_S45, F_S23
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 acos_AmVVpa = acos_AmVV, f1, acos_Aa
- nop.i 999 ;;
+
+{.mfi
+ nop.m 0
+ // 0.25/(1-t^2)
+ fma.s1 F_INV1T2_2 = F_05, F_INV_1T2, f0
+ nop.i 0
}
-// acos_1d2V = frcpa(2V)
-{ .mfi
- nop.m 999
-(p0) frcpa.s1 acos_1d2V,p9 = f1, acos_2V
- nop.i 999 ;;
+{.mfi
+ nop.m 0
+ // t*s-sqrt(1-t^2)*(1-s^2)*y
+ fnma.s1 F_TS = F_Y1S2, F_SQRT_1T2, F_TS
+ nop.i 0;;
}
-// acos_Vv = ((A - VV) + a) * frcpa(2V)
-{ .mfi
- nop.m 999
-(p0) fma.s1 acos_Vv = acos_AmVVpa, acos_1d2V, f0
- nop.i 999 ;;
+
+{.mfi
+ nop.m 0
+ // z*0.5/(1-t^2)
+ fma.s1 F_ZE = F_INV_1T2, F_SQRT_1T2, f0
+ nop.i 0
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 acos_Uv = acos_U,acos_Vv, f0
- nop.i 999 ;;
+{.mfi
+ nop.m 0
+ // z^2+t^2-1
+ fms.s1 F_DZ0 = F_Z, F_Z, F_1T2
+ nop.i 0;;
}
-.endp acosl#
-ASM_SIZE_DIRECTIVE(acosl#)
+{.mfi
+ nop.m 0
+ // (1-s^2-(1-s^2)_s)*x
+ fma.s1 F_DS2X = F_X, F_DS, f0
+ nop.i 0;;
+}
-.proc __libm_callout
-__libm_callout:
-.prologue
-{ .mfi
- nop.m 0
- nop.f 0
-.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs
+{.mfi
+ nop.m 0
+ // t*s-(t*s)_s
+ fms.s1 F_DTS = F_T, f8, F_TSS
+ nop.i 0
}
-;;
-{ .mfi
- mov GR_SAVE_GP=gp
- nop.f 0
-.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0
+{.mfi
+ nop.m 0
+ // c9*x^7+..+c2
+ fma.s1 F_S29 = F_X4, F_S69, F_S25
+ nop.i 0;;
}
-.body
-{ .mfb
- nop.m 999
-(p0) fms.s1 acos_corr = acos_Uv,f1, acos_Vu
-(p0) br.call.sptk.many b0=__libm_atan2_reg# ;;
+
+{.mfi
+ nop.m 0
+ // y*z
+ fma.s1 F_YZ = F_Z, F_Y, f0
+ nop.i 0
}
+{.mfi
+ nop.m 0
+ // t^2
+ fma.s1 F_T2 = F_T, F_T, f0
+ nop.i 0;;
+}
-// p6 ==> X is negative
-// p7 ==> x is positive
-// We know that |X| >= 1/4
-{ .mfi
-(p0) mov gp = GR_SAVE_GP
-(p0) fcmp.lt.unc p6,p7 = acos_X , f0
-(p0) mov b0 = GR_SAVE_B0 ;;
+{.mfi
+ nop.m 0
+ // 1-t^2+ST
+ fma.s1 F_1T2_ST = F_ST, f1, F_1T2
+ nop.i 0;;
}
-// acos_2_Z_hi = 2 * acos_Z_hi
-// acos_s_lo_Z_lo = s_lo * Z_lo
-{ .mfi
- nop.m 999
-(p0) fma.s1 acos_2_Z_hi = acos_Z_hi, f1, acos_Z_hi
-(p0) mov ar.pfs = GR_SAVE_PFS
+{.mfi
+ nop.m 0
+ // y*(1-s^2)(1-x)
+ fma.s1 F_Y1S2_1X = F_Y1S2, F_1X, f0
+ nop.i 0
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 acos_s_lo_Z_lo = acos_s_lo, acos_Z_lo, f0
- nop.i 999 ;;
+{.mfi
+ nop.m 0
+ // dz ~ sqrt(1-t^2)-z
+ fma.s1 F_DZ = F_DZ0, F_ZE, f0
+ nop.i 0;;
}
-// 2 is a constant needed later
-{ .mfi
- nop.m 999
-(p0) fma.s1 acos_2 = f1,f1,f1
- nop.i 999 ;;
+
+{.mfi
+ nop.m 0
+ // -1+correction for sqrt(1-t^2)-z
+ fnma.s1 F_CORR = F_INV1T2_2, F_DZ0, f0
+ nop.i 0;;
}
-// X >= 1/4
-// acos_result_lo = 2(s_lo * Z_lo) - corr
-// f8 = (2*Z_hi) + (2(s_lo * Z_lo) - corr)
-{ .mfi
- nop.m 999
-(p7) fma.s1 acos_result_lo = acos_s_lo_Z_lo, acos_2, acos_corr
- nop.i 999 ;;
+{.mfi
+ nop.m 0
+ // (PS29*x^2+x)*y*(1-s^2)
+ fma.s1 F_S19 = F_Y1S2X2, F_S29, F_Y1S2X
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p7) fma.s0 f8 = acos_2_Z_hi, f1, acos_result_lo
- nop.i 999
+{.mfi
+ nop.m 0
+ // z*y*(1-s^2)_s
+ fma.s1 F_ZY1S2S = F_YZ, F_1S2_S, f0
+ nop.i 0
}
-// acos_result_lo = (pi_lo - corr)
-// acos_result_lo = (pi_lo - corr) + acos_Ww
-{ .mfi
- nop.m 999
-(p6) fms.s1 acos_result_lo = acos_pi_lo, f1, acos_corr
- nop.i 999 ;;
+{.mfi
+ nop.m 0
+ // s^2-(1-t^2+ST)
+ fms.s1 F_1T2_ST = f8, f8, F_1T2_ST
+ nop.i 0;;
}
-// X <= -1/4
-// acos_W = pi_hi - 2 * Z_hi
-{ .mfi
- nop.m 999
-(p6) fnma.s1 acos_W = acos_2, acos_Z_hi, acos_pi_hi
- nop.i 999 ;;
+
+{.mfi
+ nop.m 0
+ // (t*s-(t*s)_s)+z*y*(1-s^2-(1-s^2)_s)*x
+ fma.s1 F_DTS = F_YZ, F_DS2X, F_DTS
+ nop.i 0
}
-// acos_Ww = pi_hi - W
-// acos_Ww = (pi_hi - W) + (2 * Z_hi)
-{ .mfi
- nop.m 999
-(p6) fms.s1 acos_Ww = acos_pi_hi, f1, acos_W
- nop.i 999 ;;
+{.mfi
+ nop.m 0
+ // dz*y*(1-s^2)*(1-x)
+ fma.s1 F_DZ_TERM = F_DZ, F_Y1S2_1X, f0
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p6) fms.s1 acos_Ww = acos_Ww, f1, acos_2_Z_hi
- nop.i 999 ;;
+
+{.mfi
+ nop.m 0
+ // R = t*s-sqrt(1-t^2)*(1-s^2)*y+sqrt(1-t^2)*(1-s^2)*y*PS19
+ // (used for polynomial evaluation)
+ fma.s1 F_R = F_S19, F_SQRT_1T2, F_TS
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p6) fma.s1 acos_result_lo = acos_result_lo, f1, acos_Ww
- nop.i 999 ;;
+
+{.mfi
+ nop.m 0
+ // (PS29*x^2)*y*(1-s^2)
+ fma.s1 F_S29 = F_Y1S2X2, F_S29, f0
+ nop.i 0
}
-// acos_Z_lo = ((pi_lo - corr) + acos_Ww) - 2 * (s_lo * Z_lo)
-{ .mfi
- nop.m 999
-(p6) fnma.s1 acos_Z_lo = acos_s_lo_Z_lo, acos_2, acos_result_lo
- nop.i 999 ;;
+{.mfi
+ nop.m 0
+ // apply correction to dz*y*(1-s^2)*(1-x)
+ fma.s1 F_DZ_TERM = F_DZ_TERM, F_CORR, F_DZ_TERM
+ nop.i 0;;
}
-{ .mfb
- nop.m 999
-(p6) fma.s0 f8 = acos_W, f1, acos_Z_lo
-(p0) br.ret.sptk b0 ;;
+
+{.mfi
+ nop.m 0
+ // R^2
+ fma.s1 F_R2 = F_R, F_R, f0
+ nop.i 0;;
}
-.endp __libm_callout
-ASM_SIZE_DIRECTIVE(__libm_callout)
-.proc SPECIAL
-SPECIAL:
-L(ACOS_NAN):
-{ .mfb
- nop.m 999
-(p0) fma.s0 f8 = f8,f1,f0
-(p0) br.ret.sptk b0 ;;
+
+{.mfi
+ nop.m 0
+ // (t*s-(t*s)_s)+z*y*(1-s^2-(1-s^2)_s)*x+dz*y*(1-s^2)*(1-x)
+ fma.s1 F_DZ_TERM = F_DZ_TERM, f1, F_DTS
+ nop.i 0;;
}
-L(ACOS_ERROR_RETURN):
-// Save ar.pfs, b0, and gp; restore on exit
-// qnan snan inf norm unorm 0 -+
-// 1 1 0 0 0 0 11 = 0xc3
+{.mfi
+ nop.m 0
+ // c7+c9*R^2
+ fma.s1 F_P79 = F_C9, F_R2, F_C7
+ nop.i 0
+}
-// Coming in as X = +- 1
-// What should we return?
+{.mfi
+ nop.m 0
+ // c3+c5*R^2
+ fma.s1 F_P35 = F_C5, F_R2, F_C3
+ nop.i 0;;
+}
-// If X is 1, return (sign of X)pi/2
+{.mfi
+ nop.m 0
+ // asin(t)_low-(pi)_low (if s<0)
+ (p6) fms.s1 F_ATLO = F_ATLO, f1, F_PI2_LO
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // R^4
+ fma.s1 F_R4 = F_R2, F_R2, f0
+ nop.i 0;;
+}
-{ .mfi
- nop.m 999
-(p0) fcmp.eq.unc p6,p7 = acos_ABS_NORM_f8,f1
- nop.i 999 ;;
+{.mfi
+ nop.m 0
+ // R^3
+ fma.s1 F_R3 = F_R2, F_R, f0
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p6) fcmp.lt.unc p8,p9 = f8,f0
- nop.i 999 ;;
+
+{.mfi
+ nop.m 0
+ // (t*s)_s-t^2*y*z
+ fnma.s1 F_TSS = F_T2, F_YZ, F_TSS
+ nop.i 0
}
-{ .mfi
- nop.m 999
-(p8) fma.s0 f8 = acos_pi_hi, f1, acos_pi_lo
- nop.i 999
+{.mfi
+ nop.m 0
+ // d(ts)+z*y*d(1-s^2)*x+dz*y*(1-s^2)*(1-x)+z*y*(s^2-1+t^2-ST)
+ fma.s1 F_DZ_TERM = F_YZ, F_1T2_ST, F_DZ_TERM
+ nop.i 0;;
}
-{ .mfb
- nop.m 999
-(p9) fmerge.s f8 = f8,f0
-(p6) br.ret.spnt b0 ;;
+
+{.mfi
+ nop.m 0
+ // (pi)_hi-asin(t)_hi (if s<0)
+ (p6) fms.s1 F_ATHI = F_PI2_HI, f1, F_ATHI
+ nop.i 0
}
-// If X is a NAN, leave
-{ .mfi
- nop.m 999
-(p0) fclass.m.unc p12,p0 = f8, 0xc3
- nop.i 999 ;;
+{.mfi
+ nop.m 0
+ // c3+c5*R^2+c7*R^4+c9*R^6
+ fma.s1 F_P39 = F_P79, F_R4, F_P35
+ nop.i 0;;
}
-{ .mfb
- nop.m 999
-(p12) fma.s0 f8 = f8,f1,f0
-(p12) br.ret.spnt b0 ;;
+
+{.mfi
+ nop.m 0
+ // d(ts)+z*y*d(1-s^2)*x+dz*y*(1-s^2)*(1-x)+z*y*(s^2-1+t^2-ST)+
+ // + sqrt(1-t^2)*y*(1-s^2)*x^2*PS29
+ fma.s1 F_DZ_TERM = F_SQRT_1T2, F_S29, F_DZ_TERM
+ nop.i 0;;
}
-{ .mfi
-(p0) mov GR_Parameter_TAG = 57
-(p0) frcpa f10, p6 = f0, f0
-nop.i 999
-};;
-.endp SPECIAL
-ASM_SIZE_DIRECTIVE(SPECIAL)
+{.mfi
+ nop.m 0
+ // (t*s)_s-t^2*y*z+z*y*ST
+ fma.s1 F_TSS = F_YZ, F_ST, F_TSS
+ nop.i 0
+}
-.proc __libm_error_region
-__libm_error_region:
+{.mfi
+ nop.m 0
+ // -asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
+ fms.s1 F_P39 = F_P39, F_R3, F_ATLO
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // d(ts)+z*y*d(1-s^2)*x+dz*y*(1-s^2)*(1-x)+z*y*(s^2-1+t^2-ST) +
+ // + sqrt(1-t^2)*y*(1-s^2)*x^2*PS29 +
+ // - asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
+ fma.s1 F_DZ_TERM = F_P39, f1, F_DZ_TERM
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // d(ts)+z*y*d(1-s^2)*x+dz*y*(1-s^2)*(1-x)+z*y*(s^2-1+t^2-ST) +
+ // + sqrt(1-t^2)*y*(1-s^2)*x^2*PS29 + z*y*(1-s^2)_s*x +
+ // - asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
+ fma.s1 F_DZ_TERM = F_ZY1S2S, F_X, F_DZ_TERM
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // d(ts)+z*y*d(1-s^2)*x+dz*y*(1-s^2)*(1-x)+z*y*(s^2-1+t^2-ST) +
+ // + sqrt(1-t^2)*y*(1-s^2)*x^2*PS29 + z*y*(1-s^2)_s*x +
+ // - asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6) +
+ // + (t*s)_s-t^2*y*z+z*y*ST
+ fma.s1 F_DZ_TERM = F_TSS, f1, F_DZ_TERM
+ nop.i 0;;
+}
+
+
+.pred.rel "mutex", p6, p11
+{.mfi
+ nop.m 0
+ // result: add high part of table value
+ // s>0 in this case
+ (p11) fnma.s0 f8 = F_DZ_TERM, f1, F_ATHI
+ nop.i 0
+}
+
+{.mfb
+ nop.m 0
+ // result: add high part of pi-table value
+ // if s<0
+ (p6) fma.s0 f8 = F_DZ_TERM, f1, F_ATHI
+ br.ret.sptk b0;;
+}
+
+
+
+
+
+
+SMALL_S:
+
+ // use 15-term polynomial approximation
+
+{.mmi
+ // r3 = pointer to polynomial coefficients
+ addl r3 = @ltoff(poly_coeffs), gp;;
+ // load start address for coefficients
+ ld8 r3 = [r3]
+ mov R_TMP = 0x3fbf;;
+}
+
+
+{.mmi
+ add r2 = 64, r3
+ ldfe F_C3 = [r3], 16
+ // p7 = 1 if |s|<2^{-64} (exponent of s<bias-64)
+ cmp.lt p7, p0 = R_EXP0, R_TMP;;
+}
+
+{.mmf
+ ldfe F_C5 = [r3], 16
+ ldfpd F_C11, F_C13 = [r2], 16
+ nop.f 0;;
+}
+
+{.mmf
+ ldfpd F_C7, F_C9 = [r3], 16
+ ldfpd F_C15, F_C17 = [r2]
+ nop.f 0;;
+}
+
+
+
+{.mfb
+ // load pi/2
+ ldfpd F_PI2_LO, F_PI2_HI = [r3]
+ // s^2
+ fma.s1 F_R2 = f8, f8, f0
+ // |s|<2^{-64}
+ (p7) br.cond.spnt RETURN_PI2;;
+}
+
+
+{.mfi
+ nop.m 0
+ // s^3
+ fma.s1 F_R3 = f8, F_R2, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // s^4
+ fma.s1 F_R4 = F_R2, F_R2, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c3+c5*s^2
+ fma.s1 F_P35 = F_C5, F_R2, F_C3
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c11+c13*s^2
+ fma.s1 F_P1113 = F_C13, F_R2, F_C11
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c7+c9*s^2
+ fma.s1 F_P79 = F_C9, F_R2, F_C7
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c15+c17*s^2
+ fma.s1 F_P1517 = F_C17, F_R2, F_C15
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // (pi/2)_high-s_high
+ fnma.s1 F_T = f8, f1, F_PI2_HI
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // s^8
+ fma.s1 F_R8 = F_R4, F_R4, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c3+c5*s^2+c7*s^4+c9*s^6
+ fma.s1 F_P39 = F_P79, F_R4, F_P35
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c11+c13*s^2+c15*s^4+c17*s^6
+ fma.s1 F_P1117 = F_P1517, F_R4, F_P1113
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // -s_high
+ fms.s1 F_S = F_T, f1, F_PI2_HI
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // c3+..+c17*s^14
+ fma.s1 F_P317 = F_R8, F_P1117, F_P39
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // s_low
+ fma.s1 F_DS = f8, f1, F_S
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // (pi/2)_low-s^3*(c3+..+c17*s^14)
+ fnma.s0 F_P317 = F_P317, F_R3, F_PI2_LO
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // (pi/2)_low-s_low-s^3*(c3+..+c17*s^14)
+ fms.s1 F_P317 = F_P317, f1, F_DS
+ nop.i 0;;
+}
+
+{.mfb
+ nop.m 0
+ // result: pi/2-s-c3*s^3-..-c17*s^17
+ fma.s0 f8 = F_T, f1, F_P317
+ br.ret.sptk b0;;
+}
+
+
+
+
+
+RETURN_PI2:
+
+{.mfi
+ nop.m 0
+ // (pi/2)_low-s
+ fms.s0 F_PI2_LO = F_PI2_LO, f1, f8
+ nop.i 0;;
+}
+
+{.mfb
+ nop.m 0
+ // (pi/2)-s
+ fma.s0 f8 = F_PI2_HI, f1, F_PI2_LO
+ br.ret.sptk b0;;
+}
+
+
+
+
+
+VERY_LARGE_INPUT:
+
+
+{.mmf
+ // pointer to pi_low, pi_high
+ add r2 = 80, r3
+ // load C5
+ ldfe F_C5 = [r3], 16
+ // x = ((1-(s^2)_s)*y^2-1)/2-(s^2-(s^2)_s)*y^2/2
+ fma.s1 F_X = F_X, F_05, f0;;
+}
+
+.pred.rel "mutex", p6, p11
+{.mmf
+ // load pi (low, high), if s<0
+ (p6) ldfpd F_PI2_LO, F_PI2_HI = [r2]
+ // C7, C9
+ ldfpd F_C7, F_C9 = [r3], 16
+ // if s>0, set F_PI2_LO=0
+ (p11) fma.s1 F_PI2_HI = f0, f0, f0;;
+}
+
+{.mfi
+ nop.m 0
+ (p11) fma.s1 F_PI2_LO = f0, f0, f0
+ nop.i 0;;
+}
+
+{.mfi
+ // adjust address for C_11
+ add r3 = 16, r3
+ // c9*x+c8
+ fma.s1 F_S89 = F_X, F_CS9, F_CS8
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // x^2
+ fma.s1 F_X2 = F_X, F_X, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // y*(1-s^2)*x
+ fma.s1 F_Y1S2X = F_Y1S2, F_X, f0
+ nop.i 0
+}
+
+{.mfi
+ // C11, C13
+ ldfpd F_C11, F_C13 = [r3], 16
+ // c7*x+c6
+ fma.s1 F_S67 = F_X, F_CS7, F_CS6
+ nop.i 0;;
+}
+
+
+{.mfi
+ // C15, C17
+ ldfpd F_C15, F_C17 = [r3], 16
+ // c3*x+c2
+ fma.s1 F_S23 = F_X, F_CS3, F_CS2
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c5*x+c4
+ fma.s1 F_S45 = F_X, F_CS5, F_CS4
+ nop.i 0;;
+}
+
+
+
+
+{.mfi
+ nop.m 0
+ // y*(1-s^2)*x^2
+ fma.s1 F_Y1S2X2 = F_Y1S2, F_X2, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // x^4
+ fma.s1 F_X4 = F_X2, F_X2, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c9*x^3+..+c6
+ fma.s1 F_S69 = F_X2, F_S89, F_S67
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c5*x^3+..+c2
+ fma.s1 F_S25 = F_X2, F_S45, F_S23
+ nop.i 0;;
+}
+
+
+
+{.mfi
+ nop.m 0
+ // (pi)_high-y*(1-s^2)_s
+ fnma.s1 F_HI = F_Y, F_1S2_S, F_PI2_HI
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c9*x^7+..+c2
+ fma.s1 F_S29 = F_X4, F_S69, F_S25
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // -(y*(1-s^2)_s)_high
+ fms.s1 F_1S2_HI = F_HI, f1, F_PI2_HI
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (PS29*x^2+x)*y*(1-s^2)
+ fma.s1 F_S19 = F_Y1S2X2, F_S29, F_Y1S2X
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // y*(1-s^2)_s-(y*(1-s^2))_high
+ fma.s1 F_DS2 = F_Y, F_1S2_S, F_1S2_HI
+ nop.i 0;;
+}
+
+
+
+{.mfi
+ nop.m 0
+ // R ~ sqrt(1-s^2)
+ // (used for polynomial evaluation)
+ fnma.s1 F_R = F_S19, f1, F_Y1S2
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // y*(1-s^2)-(y*(1-s^2))_high
+ fma.s1 F_DS2 = F_Y, F_DS, F_DS2
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // (pi)_low+(PS29*x^2)*y*(1-s^2)
+ fma.s1 F_S29 = F_Y1S2X2, F_S29, F_PI2_LO
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // R^2
+ fma.s1 F_R2 = F_R, F_R, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // if s<0
+ // (pi)_low+(PS29*x^2)*y*(1-s^2)-(y*(1-s^2)-(y*(1-s^2))_high)
+ fms.s1 F_S29 = F_S29, f1, F_DS2
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c7+c9*R^2
+ fma.s1 F_P79 = F_C9, F_R2, F_C7
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c3+c5*R^2
+ fma.s1 F_P35 = F_C5, F_R2, F_C3
+ nop.i 0;;
+}
+
+
+
+{.mfi
+ nop.m 0
+ // R^4
+ fma.s1 F_R4 = F_R2, F_R2, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // R^3
+ fma.s1 F_R3 = F_R2, F_R, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c11+c13*R^2
+ fma.s1 F_P1113 = F_C13, F_R2, F_C11
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c15+c17*R^2
+ fma.s1 F_P1517 = F_C17, F_R2, F_C15
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (pi)_low+(PS29*x^2)*y*(1-s^2)-(y*(1-s^2)-(y*(1-s^2))_high)+y*(1-s^2)*x
+ fma.s1 F_S29 = F_Y1S2, F_X, F_S29
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c11+c13*R^2+c15*R^4+c17*R^6
+ fma.s1 F_P1117 = F_P1517, F_R4, F_P1113
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c3+c5*R^2+c7*R^4+c9*R^6
+ fma.s1 F_P39 = F_P79, F_R4, F_P35
+ nop.i 0;;
+}
+
+
+
+{.mfi
+ nop.m 0
+ // R^8
+ fma.s1 F_R8 = F_R4, F_R4, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c3+c5*R^2+c7*R^4+c9*R^6+..+c17*R^14
+ fma.s1 F_P317 = F_P1117, F_R8, F_P39
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (pi)_low-(PS29*x^2)*y*(1-s^2)-(y*(1-s^2)-
+ // -(y*(1-s^2))_high)+y*(1-s^2)*x - P3, 17
+ fnma.s1 F_S29 = F_P317, F_R3, F_S29
+ nop.i 0;;
+}
+
+.pred.rel "mutex", p6, p11
+{.mfi
+ nop.m 0
+ // Result (if s<0):
+ // (pi)_low-(PS29*x^2)*y*(1-s^2)-(y*(1-s^2)-
+ // -(y*(1-s^2))_high)+y*(1-s^2)*x - P3, 17
+ // +(pi)_high-(y*(1-s^2))_high
+ (p6) fma.s0 f8 = F_S29, f1, F_HI
+ nop.i 0
+}
+
+{.mfb
+ nop.m 0
+ // Result (if s>0):
+ // (PS29*x^2)*y*(1-s^2)-
+ // -y*(1-s^2)*x + P3, 17
+ // +(y*(1-s^2))
+ (p11) fms.s0 f8 = F_Y, F_1S2_S, F_S29
+ br.ret.sptk b0;;
+}
+
+
+
+
+
+
+acosl_SPECIAL_CASES:
+
+{.mfi
+ alloc r32 = ar.pfs, 1, 4, 4, 0
+ // check if the input is a NaN, or unsupported format
+ // (i.e. not infinity or normal/denormal)
+ fclass.nm p7, p8 = f8, 0x3f
+ // pointer to pi/2
+ add r3 = 96, r3;;
+}
+
+
+{.mfi
+ // load pi/2
+ ldfpd F_PI2_HI, F_PI2_LO = [r3]
+ // get |s|
+ fmerge.s F_S = f0, f8
+ nop.i 0
+}
+
+{.mfb
+ nop.m 0
+ // if NaN, quietize it, and return
+ (p7) fma.s0 f8 = f8, f1, f0
+ (p7) br.ret.spnt b0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // |s| = 1 ?
+ fcmp.eq.s0 p9, p10 = F_S, f1
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // load FR_X
+ fma.s1 FR_X = f8, f1, f0
+ // load error tag
+ mov GR_Parameter_TAG = 57;;
+}
+
+
+{.mfi
+ nop.m 0
+ // if s = 1, result is 0
+ (p9) fma.s0 f8 = f0, f0, f0
+ // set p6=0 for |s|>1
+ (p10) cmp.ne p6, p0 = r0, r0;;
+}
+
+
+{.mfb
+ nop.m 0
+ // if s = -1, result is pi
+ (p6) fma.s0 f8 = F_PI2_HI, f1, F_PI2_LO
+ // return if |s| = 1
+ (p9) br.ret.sptk b0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // get Infinity
+ frcpa.s1 FR_RESULT, p0 = f1, f0
+ nop.i 0;;
+}
+
+
+{.mfb
+ nop.m 0
+ // return QNaN indefinite (0*Infinity)
+ fma.s0 FR_RESULT = f0, FR_RESULT, f0
+ nop.b 0;;
+}
+
+
+GLOBAL_LIBM_END(acosl)
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
// (1)
{ .mfi
@@ -1068,12 +2510,12 @@ __libm_error_region:
.body
// (3)
{ .mib
- stfe [GR_Parameter_X] = f8 // Store Parameter 1 on stack
+ stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
}
{ .mib
- stfe [GR_Parameter_Y] = f10 // Store Parameter 3 on stack
+ stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
@@ -1097,11 +2539,13 @@ __libm_error_region:
br.ret.sptk b0 // Return
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#
-.type __libm_atan2_reg#,@function
-.global __libm_atan2_reg#
+
+
+
+
+
diff --git a/sysdeps/ia64/fpu/e_asin.S b/sysdeps/ia64/fpu/e_asin.S
index bb4c242fb2..398079eae4 100644
--- a/sysdeps/ia64/fpu/e_asin.S
+++ b/sysdeps/ia64/fpu/e_asin.S
@@ -1,10 +1,10 @@
.file "asin.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003 Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,818 +35,776 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
// History
//==============================================================
-// 2/02/00 Initial version
-// 8/17/00 New and much faster algorithm.
-// 8/31/00 Avoided bank conflicts on loads, shortened |x|=1 path,
+// 02/02/00 Initial version
+// 08/17/00 New and much faster algorithm.
+// 08/31/00 Avoided bank conflicts on loads, shortened |x|=1 path,
// fixed mfb split issue stalls.
// 12/19/00 Fixed small arg cases to force inexact, or inexact and underflow.
+// 08/02/02 New and much faster algorithm II
+// 02/06/03 Reordered header: .section, .global, .proc, .align
// Description
//=========================================
-// The asin function computes the principle value of the arc sine of x.
+// The asin function computes the principal value of the arc sine of x.
// asin(0) returns 0, asin(1) returns pi/2, asin(-1) returns -pi/2.
// A doman error occurs for arguments not in the range [-1,+1].
-
+//
// The asin function returns the arc sine in the range [-pi/2, +pi/2] radians.
+//
+// There are 8 paths:
+// 1. x = +/-0.0
+// Return asin(x) = +/-0.0
+//
+// 2. 0.0 < |x| < 0.625
+// Return asin(x) = x + x^3 *PolA(x^2)
+// where PolA(x^2) = A3 + A5*x^2 + A7*x^4 +...+ A35*x^32
+//
+// 3. 0.625 <=|x| < 1.0
+// Return asin(x) = sign(x) * ( Pi/2 - sqrt(R) * PolB(R))
+// Where R = 1 - |x|,
+// PolB(R) = B0 + B1*R + B2*R^2 +...+B12*R^12
+//
+// sqrt(R) is approximated using the following sequence:
+// y0 = (1 + eps)/sqrt(R) - initial approximation by frsqrta,
+// |eps| < 2^(-8)
+// Then 3 iterations are used to refine the result:
+// H0 = 0.5*y0
+// S0 = R*y0
+//
+// d0 = 0.5 - H0*S0
+// H1 = H0 + d0*H0
+// S1 = S0 + d0*S0
+//
+// d1 = 0.5 - H1*S1
+// H2 = H1 + d0*H1
+// S2 = S1 + d0*S1
+//
+// d2 = 0.5 - H2*S2
+// S3 = S3 + d2*S3
+//
+// S3 approximates sqrt(R) with enough accuracy for this algorithm
+//
+// So, the result should be reconstracted as follows:
+// asin(x) = sign(x) * (Pi/2 - S3*PolB(R))
+//
+// But for optimization perposes the reconstruction step is slightly
+// changed:
+// asin(x) = sign(x)*(Pi/2 - PolB(R)*S2) + sign(x)*d2*S2*PolB(R)
+//
+// 4. |x| = 1.0
+// Return asin(x) = sign(x)*Pi/2
+//
+// 5. 1.0 < |x| <= +INF
+// A doman error occurs for arguments not in the range [-1,+1]
+//
+// 6. x = [S,Q]NaN
+// Return asin(x) = QNaN
+//
+// 7. x is denormal
+// Return asin(x) = x + x^3,
+//
+// 8. x is unnormal
+// Normalize input in f8 and return to the very beginning of the function
+//
+// Registers used
+//==============================================================
+// Floating Point registers used:
+// f8, input, output
+// f6, f7, f9 -> f15, f32 -> f63
-#include "libm_support.h"
+// General registers used:
+// r3, r21 -> r31, r32 -> r38
+
+// Predicate registers used:
+// p0, p6 -> p14
//
// Assembly macros
//=========================================
-
-
-// predicate registers
-//asin_pred_LEsqrt2by2 = p7
-//asin_pred_GTsqrt2by2 = p8
-
-// integer registers
-ASIN_Addr1 = r33
-ASIN_Addr2 = r34
-ASIN_FFFE = r35
-ASIN_lnorm_sig = r36
-ASIN_snorm_exp = r37
-
-GR_SAVE_B0 = r36
-GR_SAVE_PFS = r37
-GR_SAVE_GP = r38
-
-GR_Parameter_X = r39
-GR_Parameter_Y = r40
-GR_Parameter_RESULT = r41
-GR_Parameter_Tag = r42
-
-// floating point registers
-asin_coeff_P1 = f32
-asin_coeff_P2 = f33
-asin_coeff_P3 = f34
-asin_coeff_P4 = f35
-
-asin_coeff_P5 = f36
-asin_coeff_P6 = f37
-asin_coeff_P7 = f38
-asin_coeff_P8 = f39
-asin_coeff_P9 = f40
-
-asin_coeff_P10 = f41
-asin_coeff_P11 = f42
-asin_coeff_P12 = f43
-asin_coeff_P13 = f44
-asin_coeff_P14 = f45
-
-asin_coeff_P15 = f46
-asin_coeff_P16 = f47
-asin_coeff_P17 = f48
-asin_coeff_P18 = f49
-asin_coeff_P19 = f50
-
-asin_coeff_P20 = f51
-asin_coeff_P21 = f52
-asin_const_sqrt2by2 = f53
-asin_const_piby2 = f54
-asin_abs_x = f55
-
-asin_tx = f56
-asin_tx2 = f57
-asin_tx3 = f58
-asin_tx4 = f59
-asin_tx8 = f60
-
-asin_tx11 = f61
-asin_1poly_p8 = f62
-asin_1poly_p19 = f63
-asin_1poly_p4 = f64
-asin_1poly_p15 = f65
-
-asin_1poly_p6 = f66
-asin_1poly_p17 = f67
-asin_1poly_p0 = f68
-asin_1poly_p11 = f69
-asin_1poly_p2 = f70
-
-asin_1poly_p13 = f71
-asin_series_tx = f72
-asin_t = f73
-asin_t2 = f74
-asin_t3 = f75
-
-asin_t4 = f76
-asin_t8 = f77
-asin_t11 = f78
-asin_poly_p8 = f79
-asin_poly_p19 = f80
-
-asin_poly_p4 = f81
-asin_poly_p15 = f82
-asin_poly_p6 = f83
-asin_poly_p17 = f84
-asin_poly_p0 = f85
-
-asin_poly_p11 = f86
-asin_poly_p2 = f87
-asin_poly_p13 = f88
-asin_series_t = f89
-asin_1by2 = f90
-
-asin_3by2 = f91
-asin_5by2 = f92
-asin_11by4 = f93
-asin_35by8 = f94
-asin_63by8 = f95
-
-asin_231by16 = f96
-asin_y0 = f97
-asin_H0 = f98
-asin_S0 = f99
-asin_d = f100
-
-asin_l1 = f101
-asin_d2 = f102
-asin_T0 = f103
-asin_d1 = f104
-asin_e0 = f105
-
-asin_l2 = f106
-asin_d3 = f107
-asin_T3 = f108
-asin_S1 = f109
-asin_e1 = f110
-
-asin_z = f111
-answer2 = f112
-asin_sgn_x = f113
-asin_429by16 = f114
-asin_18by4 = f115
-
-asin_3by4 = f116
-asin_l3 = f117
-asin_T6 = f118
-asin_eps_exp = f119
-asin_eps_sig = f120
-asin_eps = f120
-
+// integer registers used
+// scratch
+rTblAddr = r3
+
+rPiBy2Ptr = r21
+rTmpPtr3 = r22
+rDenoBound = r23
+rOne = r24
+rAbsXBits = r25
+rHalf = r26
+r0625 = r27
+rSign = r28
+rXBits = r29
+rTmpPtr2 = r30
+rTmpPtr1 = r31
+
+// stacked
+GR_SAVE_PFS = r32
+GR_SAVE_B0 = r33
+GR_SAVE_GP = r34
+GR_Parameter_X = r35
+GR_Parameter_Y = r36
+GR_Parameter_RESULT = r37
+GR_Parameter_TAG = r38
+
+// floating point registers used
+FR_X = f10
+FR_Y = f1
+FR_RESULT = f8
+
+
+// scratch
+fXSqr = f6
+fXCube = f7
+fXQuadr = f9
+f1pX = f10
+f1mX = f11
+f1pXRcp = f12
+f1mXRcp = f13
+fH = f14
+fS = f15
+// stacked
+fA3 = f32
+fB1 = f32
+fA5 = f33
+fB2 = f33
+fA7 = f34
+fPiBy2 = f34
+fA9 = f35
+fA11 = f36
+fB10 = f35
+fB11 = f36
+fA13 = f37
+fA15 = f38
+fB4 = f37
+fB5 = f38
+fA17 = f39
+fA19 = f40
+fB6 = f39
+fB7 = f40
+fA21 = f41
+fA23 = f42
+fB3 = f41
+fB8 = f42
+fA25 = f43
+fA27 = f44
+fB9 = f43
+fB12 = f44
+fA29 = f45
+fA31 = f46
+fA33 = f47
+fA35 = f48
+fBaseP = f49
+fB0 = f50
+fSignedS = f51
+fD = f52
+fHalf = f53
+fR = f54
+fCloseTo1Pol = f55
+fSignX = f56
+fDenoBound = f57
+fNormX = f58
+fX8 = f59
+fRSqr = f60
+fRQuadr = f61
+fR8 = f62
+fX16 = f63
// Data tables
//==============================================================
-
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
-
+RODATA
.align 16
-
-asin_coeff_1_table:
-ASM_TYPE_DIRECTIVE(asin_coeff_1_table,@object)
-data8 0xE4E7E0A423A21249 , 0x00003FF8 //P7
-data8 0xC2F7EE0200FCE2A5 , 0x0000C003 //P18
-data8 0xB745D7F6C65C20E0 , 0x00003FF9 //P5
-data8 0xF75E381A323D4D94 , 0x0000C002 //P16
-data8 0x8959C2629C1024C0 , 0x0000C002 //P20
-data8 0xAFF68E7D241292C5 , 0x00003FF8 //P9
-data8 0xB6DB6DB7260AC30D , 0x00003FFA //P3
-data8 0xD0417CE2B41CB7BF , 0x0000C000 //P14
-data8 0x81D570FEA724E3E4 , 0x0000BFFD //P12
-data8 0xAAAAAAAAAAAAC277 , 0x00003FFC //P1
-data8 0xF534912FF3E7B76F , 0x00003FFF //P21
-data8 0xc90fdaa22168c235 , 0x00003fff // pi/2
-data8 0x0000000000000000 , 0x00000000 // pad to avoid data bank conflict
-ASM_SIZE_DIRECTIVE(asin_coeff_1_table)
-
-
-asin_coeff_2_table:
-ASM_TYPE_DIRECTIVE(asin_coeff_2_table,@object)
-data8 0x8E26AF5F29B39A2A , 0x00003FF9 //P6
-data8 0xB4F118A4B1015470 , 0x00004003 //P17
-data8 0xF8E38E10C25990E0 , 0x00003FF9 //P4
-data8 0x80F50489AEF1CAC6 , 0x00004002 //P15
-data8 0x92728015172CFE1C , 0x00004003 //P19
-data8 0xBBC3D831D4595971 , 0x00003FF8 //P8
-data8 0x999999999952A5C3 , 0x00003FFB //P2
-data8 0x855576BE6F0975EC , 0x00003FFF //P13
-data8 0xF12420E778077D89 , 0x00003FFA //P11
-data8 0xB6590FF4D23DE003 , 0x00003FF3 //P10
-data8 0xb504f333f9de6484 , 0x00003ffe // sqrt(2)/2
-ASM_SIZE_DIRECTIVE(asin_coeff_2_table)
+LOCAL_OBJECT_START(asin_base_range_table)
+// Ai: Polynomial coefficients for the asin(x), |x| < .625000
+// Bi: Polynomial coefficients for the asin(x), |x| > .625000
+data8 0xBFDAAB56C01AE468 //A29
+data8 0x3FE1C470B76A5B2B //A31
+data8 0xBFDC5FF82A0C4205 //A33
+data8 0x3FC71FD88BFE93F0 //A35
+data8 0xB504F333F9DE6487, 0x00003FFF //B0
+data8 0xAAAAAAAAAAAAFC18, 0x00003FFC //A3
+data8 0x3F9F1C71BC4A7823 //A9
+data8 0x3F96E8BBAAB216B2 //A11
+data8 0x3F91C4CA1F9F8A98 //A13
+data8 0x3F8C9DDCEDEBE7A6 //A15
+data8 0x3F877784442B1516 //A17
+data8 0x3F859C0491802BA2 //A19
+data8 0x9999999998C88B8F, 0x00003FFB //A5
+data8 0x3F6BD7A9A660BF5E //A21
+data8 0x3F9FC1659340419D //A23
+data8 0xB6DB6DB798149BDF, 0x00003FFA //A7
+data8 0xBFB3EF18964D3ED3 //A25
+data8 0x3FCD285315542CF2 //A27
+data8 0xF15BEEEFF7D2966A, 0x00003FFB //B1
+data8 0x3EF0DDA376D10FB3 //B10
+data8 0xBEB83CAFE05EBAC9 //B11
+data8 0x3F65FFB67B513644 //B4
+data8 0x3F5032FBB86A4501 //B5
+data8 0x3F392162276C7CBA //B6
+data8 0x3F2435949FD98BDF //B7
+data8 0xD93923D7FA08341C, 0x00003FF9 //B2
+data8 0x3F802995B6D90BDB //B3
+data8 0x3F10DF86B341A63F //B8
+data8 0xC90FDAA22168C235, 0x00003FFF // Pi/2
+data8 0x3EFA3EBD6B0ECB9D //B9
+data8 0x3EDE18BA080E9098 //B12
+LOCAL_OBJECT_END(asin_base_range_table)
-
-.align 32
-.global asin
-
.section .text
-.proc asin
-.align 32
-
-
-asin:
-
-{ .mfi
- alloc r32 = ar.pfs,1,6,4,0
- fma.s1 asin_tx = f8,f8,f0
- addl ASIN_Addr2 = @ltoff(asin_coeff_2_table),gp
-}
-{ .mfi
- mov ASIN_FFFE = 0xFFFE
- fnma.s1 asin_t = f8,f8,f1
- addl ASIN_Addr1 = @ltoff(asin_coeff_1_table),gp
+GLOBAL_LIBM_ENTRY(asin)
+asin_unnormal_back:
+{ .mfi
+ getf.d rXBits = f8 // grab bits of input value
+ // set p12 = 1 if x is a NaN, denormal, or zero
+ fclass.m p12, p0 = f8, 0xcf
+ adds rSign = 1, r0
+}
+{ .mfi
+ addl rTblAddr = @ltoff(asin_base_range_table),gp
+ // 1 - x = 1 - |x| for positive x
+ fms.s1 f1mX = f1, f1, f8
+ addl rHalf = 0xFFFE, r0 // exponent of 1/2
}
;;
-
-
-{ .mfi
- setf.exp asin_1by2 = ASIN_FFFE
- fmerge.s asin_abs_x = f1,f8
- nop.i 999 ;;
-}
-
-{ .mmf
- ld8 ASIN_Addr1 = [ASIN_Addr1]
- ld8 ASIN_Addr2 = [ASIN_Addr2]
- fmerge.s asin_sgn_x = f8,f1 ;;
-}
-
-
-{ .mfi
- ldfe asin_coeff_P7 = [ASIN_Addr1],16
- fma.s1 asin_tx2 = asin_tx,asin_tx,f0
- nop.i 999
-}
-{ .mfi
- ldfe asin_coeff_P6 = [ASIN_Addr2],16
- fma.s1 asin_t2 = asin_t,asin_t,f0
- nop.i 999;;
+{ .mfi
+ addl r0625 = 0x3FE4, r0 // high 16 bits of 0.625
+ // set p8 = 1 if x < 0
+ fcmp.lt.s1 p8, p9 = f8, f0
+ shl rSign = rSign, 63 // sign bit
}
-
-
-{ .mmf
- ldfe asin_coeff_P18 = [ASIN_Addr1],16
- ldfe asin_coeff_P17 = [ASIN_Addr2],16
- fclass.m.unc p8,p0 = f8, 0xc3 //@qnan |@snan
-}
-;;
-
-{ .mmf
- ldfe asin_coeff_P5 = [ASIN_Addr1],16
- ldfe asin_coeff_P4 = [ASIN_Addr2],16
- frsqrta.s1 asin_y0,p0 = asin_t
-}
-;;
-
-{ .mfi
- ldfe asin_coeff_P16 = [ASIN_Addr1],16
- fcmp.gt.s1 p9,p0 = asin_abs_x,f1
- nop.i 999
-}
-{ .mfb
- ldfe asin_coeff_P15 = [ASIN_Addr2],16
-(p8) fma.d f8 = f8,f1,f0
-(p8) br.ret.spnt b0
+{ .mfi
+ // point to the beginning of the table
+ ld8 rTblAddr = [rTblAddr]
+ // 1 + x = 1 - |x| for negative x
+ fma.s1 f1pX = f1, f1, f8
+ adds rOne = 0x3FF, r0
}
;;
-
-
-{ .mmf
- ldfe asin_coeff_P20 = [ASIN_Addr1],16
- ldfe asin_coeff_P19 = [ASIN_Addr2],16
- fclass.m.unc p8,p0 = f8, 0x07 //@zero
-}
-;;
-
-
-{ .mfi
- ldfe asin_coeff_P9 = [ASIN_Addr1],16
- fma.s1 asin_t4 = asin_t2,asin_t2,f0
-(p9) mov GR_Parameter_Tag = 61
-}
-{ .mfi
- ldfe asin_coeff_P8 = [ASIN_Addr2],16
- fma.s1 asin_3by2 = asin_1by2,f1,f1
- nop.i 999;;
+{ .mfi
+ andcm rAbsXBits = rXBits, rSign // bits of |x|
+ fmerge.s fSignX = f8, f1 // signum(x)
+ shl r0625 = r0625, 48 // bits of DP representation of 0.625
}
-
-
-{ .mfi
- ldfe asin_coeff_P2 = [ASIN_Addr2],16
- fma.s1 asin_tx4 = asin_tx2,asin_tx2,f0
- nop.i 999
-}
-{ .mfb
- ldfe asin_coeff_P3 = [ASIN_Addr1],16
- fma.s1 asin_t3 = asin_t,asin_t2,f0
-(p8) br.ret.spnt b0
+{ .mfb
+ setf.exp fHalf = rHalf // load A2 to FP reg
+ fma.s1 fXSqr = f8, f8, f0 // x^2
+ // branch on special path if x is a NaN, denormal, or zero
+(p12) br.cond.spnt asin_special
}
;;
-
-
-{ .mfi
- ldfe asin_coeff_P13 = [ASIN_Addr2],16
- fma.s1 asin_H0 = asin_y0,asin_1by2,f0
- nop.i 999
-}
-{ .mfb
- ldfe asin_coeff_P14 = [ASIN_Addr1],16
- fma.s1 asin_S0 = asin_y0,asin_t,f0
-(p9) br.cond.spnt __libm_error_region
+{ .mfi
+ adds rPiBy2Ptr = 272, rTblAddr
+ nop.f 0
+ shl rOne = rOne, 52 // bits of 1.0
+}
+{ .mfi
+ adds rTmpPtr1 = 16, rTblAddr
+ nop.f 0
+ // set p6 = 1 if |x| < 0.625
+ cmp.lt p6, p7 = rAbsXBits, r0625
}
;;
-
-
-{ .mfi
- ldfe asin_coeff_P11 = [ASIN_Addr2],16
- fcmp.eq.s1 p6,p0 = asin_abs_x,f1
- nop.i 999
-}
-{ .mfi
- ldfe asin_coeff_P12 = [ASIN_Addr1],16
- fma.s1 asin_tx3 = asin_tx,asin_tx2,f0
- nop.i 999;;
+{ .mfi
+ ldfpd fA29, fA31 = [rTblAddr] // A29, fA31
+ // 1 - x = 1 - |x| for positive x
+(p9) fms.s1 fR = f1, f1, f8
+ // point to coefficient of "near 1" polynomial
+(p7) adds rTmpPtr2 = 176, rTblAddr
}
-
-
-{ .mfi
- ldfe asin_coeff_P10 = [ASIN_Addr2],16
- fma.s1 asin_1poly_p6 = asin_tx,asin_coeff_P7,asin_coeff_P6
- nop.i 999
-}
-{ .mfi
- ldfe asin_coeff_P1 = [ASIN_Addr1],16
- fma.s1 asin_poly_p6 = asin_t,asin_coeff_P7,asin_coeff_P6
- nop.i 999;;
+{ .mfi
+ ldfpd fA33, fA35 = [rTmpPtr1], 16 // A33, fA35
+ // 1 + x = 1 - |x| for negative x
+(p8) fma.s1 fR = f1, f1, f8
+(p6) adds rTmpPtr2 = 48, rTblAddr
}
-
-
-{ .mfi
- ldfe asin_const_sqrt2by2 = [ASIN_Addr2],16
- fma.s1 asin_5by2 = asin_3by2,f1,f1
- nop.i 999
-}
-{ .mfi
- ldfe asin_coeff_P21 = [ASIN_Addr1],16
- fma.s1 asin_11by4 = asin_3by2,asin_3by2,asin_1by2
- nop.i 999;;
+;;
+{ .mfi
+ ldfe fB0 = [rTmpPtr1], 16 // B0
+ nop.f 0
+ nop.i 0
}
-
-
-{ .mfi
- ldfe asin_const_piby2 = [ASIN_Addr1],16
- fma.s1 asin_poly_p17 = asin_t,asin_coeff_P18,asin_coeff_P17
- nop.i 999
-}
-{ .mfb
- nop.m 999
- fma.s1 asin_3by4 = asin_3by2,asin_1by2,f0
-(p6) br.cond.spnt L(ASIN_ABS_1) // Branch to short exit if |x|=1
+{ .mib
+ adds rTmpPtr3 = 16, rTmpPtr2
+ // set p10 = 1 if |x| = 1.0
+ cmp.eq p10, p0 = rAbsXBits, rOne
+ // branch on special path for |x| = 1.0
+(p10) br.cond.spnt asin_abs_1
}
;;
-
-
-{ .mfi
- addl ASIN_lnorm_sig = -0x1,r0 // Form significand 0xffffffffffffffff
- fma.s1 asin_poly_p15 = asin_t,asin_coeff_P16,asin_coeff_P15
- nop.i 999
-}
-{ .mfi
- addl ASIN_snorm_exp = 0x0c001,r0 // Form small exponent
- fnma.s1 asin_d = asin_S0,asin_H0,asin_1by2
- nop.i 999;;
+{ .mfi
+ ldfe fA3 = [rTmpPtr2], 48 // A3 or B1
+ nop.f 0
+ adds rTmpPtr1 = 64, rTmpPtr3
}
-
-
-// Form the exponent and significand of a small number
-{ .mfi
- setf.sig asin_eps_sig = ASIN_lnorm_sig
- fma.s1 asin_poly_p19 = asin_t,asin_coeff_P20,asin_coeff_P19
- nop.i 999
-}
-{ .mfi
- setf.exp asin_eps_exp = ASIN_snorm_exp
- fma.s1 asin_poly_p4 = asin_t,asin_coeff_P5,asin_coeff_P4
- nop.i 999;;
+{ .mib
+ ldfpd fA9, fA11 = [rTmpPtr3], 16 // A9, A11 or B10, B11
+ // set p11 = 1 if |x| > 1.0
+ cmp.gt p11, p0 = rAbsXBits, rOne
+ // branch on special path for |x| > 1.0
+(p11) br.cond.spnt asin_abs_gt_1
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 asin_1poly_p17 = asin_tx,asin_coeff_P18,asin_coeff_P17
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 asin_poly_p8 = asin_t,asin_coeff_P9,asin_coeff_P8
- nop.i 999;;
+;;
+{ .mfi
+ ldfpd fA17, fA19 = [rTmpPtr2], 16 // A17, A19 or B6, B7
+ // initial approximation of 1 / sqrt(1 - x)
+ frsqrta.s1 f1mXRcp, p0 = f1mX
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fms.s1 asin_35by8 = asin_5by2,asin_11by4,asin_5by2
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 asin_63by8 = asin_5by2,asin_11by4,f1
- nop.i 999;;
+{ .mfi
+ ldfpd fA13, fA15 = [rTmpPtr3] // A13, A15 or B4, B5
+ fma.s1 fXCube = fXSqr, f8, f0 // x^3
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 asin_poly_p13 = asin_t,asin_coeff_P14,asin_coeff_P13
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 asin_18by4 = asin_3by2,asin_5by2,asin_3by4
- nop.i 999;;
+;;
+{ .mfi
+ ldfe fA5 = [rTmpPtr2], 48 // A5 or B2
+ // initial approximation of 1 / sqrt(1 + x)
+ frsqrta.s1 f1pXRcp, p0 = f1pX
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 asin_l1 = asin_5by2,asin_d,asin_3by2
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 asin_d2 = asin_d,asin_d,f0
- nop.i 999;;
+{ .mfi
+ ldfpd fA21, fA23 = [rTmpPtr1], 16 // A21, A23 or B3, B8
+ fma.s1 fXQuadr = fXSqr, fXSqr, f0 // x^4
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 asin_poly_p15 = asin_t2,asin_poly_p17,asin_poly_p15
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 asin_T0 = asin_d,asin_S0,f0
- nop.i 999;;
+;;
+{ .mfi
+ ldfe fA7 = [rTmpPtr1] // A7 or Pi/2
+ fma.s1 fRSqr = fR, fR, f0 // R^2
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 asin_poly_p19 = asin_t2,asin_coeff_P21,asin_poly_p19
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 asin_poly_p4 = asin_t2,asin_poly_p6,asin_poly_p4
- nop.i 999;;
+{ .mfb
+ ldfpd fA25, fA27 = [rTmpPtr2] // A25, A27 or B9, B12
+ nop.f 0
+(p6) br.cond.spnt asin_base_range;
}
+;;
-
-{ .mfi
- nop.m 999
- fma.s1 asin_d1 = asin_35by8,asin_d,f0
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 asin_231by16 = asin_3by2,asin_35by8,asin_63by8
- nop.i 999;;
+{ .mfi
+ nop.m 0
+(p9) fma.s1 fH = fHalf, f1mXRcp, f0 // H0 for x > 0
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 asin_poly_p2 = asin_t,asin_coeff_P3,asin_coeff_P2
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 asin_poly_p8 = asin_t2,asin_coeff_P10,asin_poly_p8
- nop.i 999;;
+{ .mfi
+ nop.m 0
+(p9) fma.s1 fS = f1mX, f1mXRcp, f0 // S0 for x > 0
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 asin_poly_p11 = asin_t,asin_coeff_P12,asin_coeff_P11
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 asin_e0 = asin_d2,asin_l1,asin_d
- nop.i 999;;
+;;
+{ .mfi
+ nop.m 0
+(p8) fma.s1 fH = fHalf, f1pXRcp, f0 // H0 for x < 0
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 asin_1poly_p15 = asin_tx,asin_coeff_P16,asin_coeff_P15
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 asin_poly_p0 = asin_t,asin_coeff_P1,f1
- nop.i 999;;
+{ .mfi
+ nop.m 0
+(p8) fma.s1 fS = f1pX, f1pXRcp, f0 // S0 for x > 0
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 asin_1poly_p19 = asin_tx,asin_coeff_P20,asin_coeff_P19
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 asin_1poly_p4 = asin_tx,asin_coeff_P5,asin_coeff_P4
- nop.i 999;;
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fRQuadr = fRSqr, fRSqr, f0 // R^4
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 asin_1poly_p8 = asin_tx,asin_coeff_P9,asin_coeff_P8
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 asin_l2 = asin_231by16,asin_d,asin_63by8
- nop.i 999;;
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fB11 = fB11, fR, fB10
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 asin_d3 = asin_d2,asin_d,f0
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 asin_T3 = asin_d2,asin_T0,f0
- nop.i 999;;
+{ .mfi
+ nop.m 0
+ fma.s1 fB1 = fB1, fR, fB0
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 asin_429by16 = asin_18by4,asin_11by4,asin_231by16
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 asin_S1 = asin_e0,asin_S0,asin_S0
- nop.i 999;;
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fB5 = fB5, fR, fB4
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 asin_poly_p4 = asin_t4,asin_poly_p8,asin_poly_p4
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 asin_poly_p15 = asin_t4,asin_poly_p19,asin_poly_p15
- nop.i 999;;
+{ .mfi
+ nop.m 0
+ fma.s1 fB7 = fB7, fR, fB6
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 asin_poly_p0 = asin_t2,asin_poly_p2,asin_poly_p0
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 asin_poly_p11 = asin_t2,asin_poly_p13,asin_poly_p11
- nop.i 999;;
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fB3 = fB3, fR, fB2
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 asin_t8 = asin_t4,asin_t4,f0
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 asin_e1 = asin_d2,asin_l2,asin_d1
- nop.i 999;;
+;;
+{ .mfi
+ nop.m 0
+ fnma.s1 fD = fH, fS, fHalf // d0 = 1/2 - H0*S0
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 asin_1poly_p4 = asin_tx2,asin_1poly_p6,asin_1poly_p4
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 asin_1poly_p15 = asin_tx2,asin_1poly_p17,asin_1poly_p15
- nop.i 999;;
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fR8 = fRQuadr, fRQuadr, f0 // R^4
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 asin_1poly_p8 = asin_tx2,asin_coeff_P10,asin_1poly_p8
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 asin_1poly_p19 = asin_tx2,asin_coeff_P21,asin_1poly_p19
- nop.i 999;;
+{ .mfi
+ nop.m 0
+ fma.s1 fB9 = fB9, fR, fB8
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 asin_1poly_p2 = asin_tx,asin_coeff_P3,asin_coeff_P2
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 asin_1poly_p13 = asin_tx,asin_coeff_P14,asin_coeff_P13
- nop.i 999;;
+;;
+{.mfi
+ nop.m 0
+ fma.s1 fB12 = fB12, fRSqr, fB11
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 asin_1poly_p0 = asin_tx,asin_coeff_P1,f1
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 asin_1poly_p11 = asin_tx,asin_coeff_P12,asin_coeff_P11
- nop.i 999;;
+{.mfi
+ nop.m 0
+ fma.s1 fB7 = fB7, fRSqr, fB5
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 asin_l3 = asin_429by16,asin_d,f0
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 asin_z = asin_e1,asin_T3,asin_S1
- nop.i 999;;
+;;
+{.mfi
+ nop.m 0
+ fma.s1 fB3 = fB3, fRSqr, fB1
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 asin_poly_p11 = asin_t4,asin_poly_p15,asin_poly_p11
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 asin_T6 = asin_T3,asin_d3,f0
- nop.i 999;;
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fH = fH, fD, fH // H1 = H0 + H0*d0
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 asin_t11 = asin_t8,asin_t3,f0
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 asin_poly_p0 = asin_t4,asin_poly_p4,asin_poly_p0
- nop.i 999;;
+{ .mfi
+ nop.m 0
+ fma.s1 fS = fS, fD, fS // S1 = S0 + S0*d0
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 asin_1poly_p4 = asin_tx4,asin_1poly_p8,asin_1poly_p4
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 asin_1poly_p15 = asin_tx4,asin_1poly_p19,asin_1poly_p15
- nop.i 999;;
+;;
+{.mfi
+ nop.m 0
+ fma.s1 fPiBy2 = fPiBy2, fSignX, f0 // signum(x)*Pi/2
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
- fma.s1 asin_1poly_p0 = asin_tx2,asin_1poly_p2,asin_1poly_p0
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 asin_1poly_p11 = asin_tx2,asin_1poly_p13,asin_1poly_p11
- nop.i 999;;
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fB12 = fB12, fRSqr, fB9
+ nop.i 0
}
-
-
-{ .mfi
- nop.m 999
-// fcmp.le.s1 asin_pred_LEsqrt2by2,asin_pred_GTsqrt2by2 = asin_abs_x,asin_const_sqrt2by2
- fcmp.le.s1 p7,p8 = asin_abs_x,asin_const_sqrt2by2
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 asin_tx8 = asin_tx4,asin_tx4,f0
- nop.i 999;;
+{ .mfi
+ nop.m 0
+ fma.s1 fB7 = fB7, fRQuadr, fB3
+ nop.i 0
}
-
-
-// Form a small number to force inexact flag for small args
-{ .mfi
- nop.m 999
- fmerge.se asin_eps = asin_eps_exp,asin_eps_sig
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 asin_z = asin_l3,asin_T6,asin_z
- nop.i 999;;
-}
-
-{ .mfi
- nop.m 999
- fma.s1 asin_series_t = asin_t11,asin_poly_p11,asin_poly_p0
- nop.i 999;;
-}
-
-{ .mfi
- nop.m 999
- fma.s1 asin_1poly_p0 = asin_tx4,asin_1poly_p4,asin_1poly_p0
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 asin_1poly_p11 = asin_tx4,asin_1poly_p15,asin_1poly_p11
- nop.i 999;;
+;;
+{.mfi
+ nop.m 0
+ fnma.s1 fD = fH, fS, fHalf // d1 = 1/2 - H1*S1
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fnma.s1 fSignedS = fSignX, fS, f0 // -signum(x)*S1
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fCloseTo1Pol = fB12, fR8, fB7
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fH = fH, fD, fH // H2 = H1 + H1*d1
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fS = fS, fD, fS // S2 = S1 + S1*d1
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ // -signum(x)* S2 = -signum(x)*(S1 + S1*d1)
+ fma.s1 fSignedS = fSignedS, fD, fSignedS
+ nop.i 0
+}
+;;
+{.mfi
+ nop.m 0
+ fnma.s1 fD = fH, fS, fHalf // d2 = 1/2 - H2*S2
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ // signum(x)*(Pi/2 - PolB*S2)
+ fma.s1 fPiBy2 = fSignedS, fCloseTo1Pol, fPiBy2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // -signum(x)*PolB * S2
+ fma.s1 fCloseTo1Pol = fSignedS, fCloseTo1Pol, f0
+ nop.i 0
+}
+;;
+{ .mfb
+ nop.m 0
+ // final result for 0.625 <= |x| < 1
+ fma.d.s0 f8 = fCloseTo1Pol, fD, fPiBy2
+ // exit here for 0.625 <= |x| < 1
+ br.ret.sptk b0
}
+;;
-
-{ .mfi
- nop.m 999
- fma.s1 asin_tx11 = asin_tx8,asin_tx3,f0
- nop.i 999;;
-}
-
-{ .mfi
- nop.m 999
-//(asin_pred_GTsqrt2by2) fnma.s1 answer2 = asin_z,asin_series_t,asin_const_piby2
-(p8) fnma.s1 answer2 = asin_z,asin_series_t,asin_const_piby2
- nop.i 999;;
-}
-
-{ .mfi
- nop.m 999
- fma.s1 asin_series_tx = asin_tx11,asin_1poly_p11,asin_1poly_p0
- nop.i 999;;
-}
-
-{ .mfi
- nop.m 999
-//(asin_pred_GTsqrt2by2) fma.d f8 = asin_sgn_x,answer2,f0
-(p8) fma.d f8 = asin_sgn_x,answer2,f0
- nop.i 999;;
-}
-
-// asin_eps is added only to force inexact and possibly underflow flag
-// in case asin_series_tx is zero
-//
-{ .mfi
- nop.m 999
-(p7) fma.d asin_eps = f8,asin_series_tx,asin_eps
- nop.i 999
-}
-{ .mfb
- nop.m 999
-//(asin_pred_LEsqrt2by2) fma.d f8 = f8,asin_series_tx,f0
-(p7) fma.d f8 = f8,asin_series_tx,f0
- br.ret.sptk b0
-}
+
+// here if |x| < 0.625
+.align 32
+asin_base_range:
+{ .mfi
+ nop.m 0
+ fma.s1 fA33 = fA33, fXSqr, fA31
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA15 = fA15, fXSqr, fA13
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fA29 = fA29, fXSqr, fA27
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA25 = fA25, fXSqr, fA23
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fA21 = fA21, fXSqr, fA19
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA9 = fA9, fXSqr, fA7
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fA5 = fA5, fXSqr, fA3
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fA35 = fA35, fXQuadr, fA33
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA17 = fA17, fXQuadr, fA15
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fX8 = fXQuadr, fXQuadr, f0 // x^8
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA25 = fA25, fXQuadr, fA21
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fA9 = fA9, fXQuadr, fA5
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fA35 = fA35, fXQuadr, fA29
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA17 = fA17, fXSqr, fA11
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fX16 = fX8, fX8, f0 // x^16
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fA35 = fA35, fX8, fA25
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA17 = fA17, fX8, fA9
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fBaseP = fA35, fX16, fA17
+ nop.i 0
+}
+;;
+{ .mfb
+ nop.m 0
+ // final result for |x| < 0.625
+ fma.d.s0 f8 = fBaseP, fXCube, f8
+ // exit here for |x| < 0.625 path
+ br.ret.sptk b0
+}
;;
+// here if |x| = 1
+// asin(x) = sign(x) * Pi/2
+.align 32
+asin_abs_1:
+{ .mfi
+ ldfe fPiBy2 = [rPiBy2Ptr] // Pi/2
+ nop.f 0
+ nop.i 0
+}
+;;
+{.mfb
+ nop.m 0
+ // result for |x| = 1.0
+ fma.d.s0 f8 = fPiBy2, fSignX, f0
+ // exit here for |x| = 1.0
+ br.ret.sptk b0
+}
+;;
-L(ASIN_ABS_1):
-// Here for short exit if |x|=1
-{ .mfb
- nop.m 999
- fma.d f8 = asin_sgn_x,asin_const_piby2,f0
- br.ret.sptk b0
-}
+// here if x is a NaN, denormal, or zero
+.align 32
+asin_special:
+{ .mfi
+ nop.m 0
+ // set p12 = 1 if x is a NaN
+ fclass.m p12, p0 = f8, 0xc3
+ nop.i 0
+}
+{ .mlx
+ nop.m 0
+ // smallest positive DP normalized number
+ movl rDenoBound = 0x0010000000000000
+}
+;;
+{ .mfi
+ nop.m 0
+ // set p13 = 1 if x = 0.0
+ fclass.m p13, p0 = f8, 0x07
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fnorm.s1 fNormX = f8
+ nop.i 0
+}
+;;
+{ .mfb
+ // load smallest normal to FP reg
+ setf.d fDenoBound = rDenoBound
+ // answer if x is a NaN
+(p12) fma.d.s0 f8 = f8,f1,f0
+ // exit here if x is a NaN
+(p12) br.ret.spnt b0
+}
+;;
+{ .mfb
+ nop.m 0
+ nop.f 0
+ // exit here if x = 0.0
+(p13) br.ret.spnt b0
+}
+;;
+// if we still here then x is denormal or unnormal
+{ .mfi
+ nop.m 0
+ // absolute value of normalized x
+ fmerge.s fNormX = f1, fNormX
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ // set p14 = 1 if normalized x is greater than or
+ // equal to the smallest denormalized value
+ // So, if p14 is set to 1 it means that we deal with
+ // unnormal rather than with "true" denormal
+ fcmp.ge.s1 p14, p0 = fNormX, fDenoBound
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+(p14) fcmp.eq.s0 p6, p0 = f8, f0 // Set D flag if x unnormal
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+ // normalize unnormal input
+(p14) fnorm.s1 f8 = f8
+ // return to the main path
+(p14) br.cond.sptk asin_unnormal_back
+}
+;;
+// if we still here it means that input is "true" denormal
+{ .mfb
+ nop.m 0
+ // final result if x is denormal
+ fma.d.s0 f8 = f8, fXSqr, f8
+ // exit here if x is denormal
+ br.ret.sptk b0
+}
;;
+// here if |x| > 1.0
+// error handler should be called
+.align 32
+asin_abs_gt_1:
+{ .mfi
+ alloc r32 = ar.pfs, 0, 3, 4, 0 // get some registers
+ fmerge.s FR_X = f8,f8
+ nop.i 0
+}
+{ .mfb
+ mov GR_Parameter_TAG = 61 // error code
+ frcpa.s0 FR_RESULT, p0 = f0,f0
+ // call error handler routine
+ br.cond.sptk __libm_error_region
+}
+;;
+GLOBAL_LIBM_END(asin)
-.endp asin
-ASM_SIZE_DIRECTIVE(asin)
-.proc __libm_error_region
-__libm_error_region:
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
- nop.f 999
+ nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
@@ -857,28 +815,29 @@ __libm_error_region:
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
- stfs [GR_Parameter_Y] = f1,16 // Store Parameter 2 on stack
+ stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
-
.body
- frcpa.s0 f9,p0 = f0,f0
-;;
-
{ .mib
- stfd [GR_Parameter_X] = f8 // Store Parameter 1 on stack
- add GR_Parameter_RESULT = 0,GR_Parameter_Y
- nop.b 0 // Parameter 3 address
+ stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
}
{ .mib
- stfd [GR_Parameter_Y] = f9,-16 // Store Parameter 3 on stack
- adds r32 = 48,sp
- br.call.sptk b0=__libm_error_support# // Call error handling function
+ stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
- ldfd f8 = [r32] // Get return result off stack
+ add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
+};;
+{ .mmi
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
@@ -887,11 +846,8 @@ __libm_error_region:
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
-
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
-
-.type __libm_error_support,@function
-.global __libm_error_support
+LOCAL_LIBM_END(__libm_error_region)
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_asinf.S b/sysdeps/ia64/fpu/e_asinf.S
index ddae85880b..f9a1312b26 100644
--- a/sysdeps/ia64/fpu/e_asinf.S
+++ b/sysdeps/ia64/fpu/e_asinf.S
@@ -1,10 +1,10 @@
.file "asinf.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
-// Contributed 2/02/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,21 +35,25 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
// History
//==============================================================
-// 2/02/00 Initial revision
-// 6/28/00 Improved speed
-// 6/31/00 Changed register allocation because of some duplicate macros
+// 02/02/00 Initial version
+// 06/28/00 Improved speed
+// 06/31/00 Changed register allocation because of some duplicate macros
// moved nan exit bundle up to gain a cycle.
-// 8/08/00 Improved speed by avoiding SIR flush.
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 08/08/00 Improved speed by avoiding SIR flush.
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
-// 8/17/00 Changed predicate register macro-usage to direct predicate
+// 08/17/00 Changed predicate register macro-usage to direct predicate
// names due to an assembler bug.
// 10/17/00 Improved speed of x=0 and x=1 paths, set D flag if x denormal.
+// 03/13/01 Corrected sign of imm1 value in dep instruction.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/06/03 Reordered header: .section, .global, .proc, .align
+
// Description
//=========================================
// The asinf function computes the arc sine of x in the range [-pi,+pi].
@@ -119,7 +123,6 @@
// answer2 = - sign(x) z P(t) + (sign(x) pi/2)
//
-#include "libm_support.h"
// Assembly macros
//=========================================
@@ -225,42 +228,30 @@ asinf_poly_p1a = f90
// Data tables
//==============================================================
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
+RODATA
.align 16
-asinf_coeff_1_table:
-ASM_TYPE_DIRECTIVE(asinf_coeff_1_table,@object)
+LOCAL_OBJECT_START(asinf_coeff_1_table)
data8 0x3FC5555607DCF816 // P1
data8 0x3F9CF81AD9BAB2C6 // P4
data8 0x3FC59E0975074DF3 // P7
data8 0xBFA6F4CC2780AA1D // P6
data8 0x3FC2DD45292E93CB // P9
data8 0x3fe6a09e667f3bcd // sqrt(2)/2
-ASM_SIZE_DIRECTIVE(asinf_coeff_1_table)
+LOCAL_OBJECT_END(asinf_coeff_1_table)
-asinf_coeff_2_table:
-ASM_TYPE_DIRECTIVE(asinf_coeff_2_table,@object)
+LOCAL_OBJECT_START(asinf_coeff_2_table)
data8 0x3FA6F108E31EFBA6 // P3
data8 0xBFCA31BF175D82A0 // P8
data8 0x3FA30C0337F6418B // P5
data8 0x3FB332C9266CB1F9 // P2
data8 0x3ff921fb54442d18 // pi_by_2
-ASM_SIZE_DIRECTIVE(asinf_coeff_2_table)
+LOCAL_OBJECT_END(asinf_coeff_2_table)
-.align 32
-.global asinf
-
.section .text
-.proc asinf
-.align 32
-
-asinf:
+GLOBAL_LIBM_ENTRY(asinf)
// Load the addresses of the two tables.
// Then, load the coefficients and other constants.
@@ -345,7 +336,7 @@ asinf:
}
{ .mfb
nop.m 999
-(p8) fma.s f8 = f8,f1,f0
+(p8) fma.s.s0 f8 = f8,f1,f0
(p8) br.ret.spnt b0 ;; // Exit if x=nan
}
@@ -370,7 +361,7 @@ asinf:
{ .mfb
nop.m 999
fma.s1 asinf_t4 = asinf_t2,asinf_t2,f0
-(p6) br.cond.spnt L(ASINF_ABS_ONE) ;; // Branch if |x|=1
+(p6) br.cond.spnt ASINF_ABS_ONE ;; // Branch if |x|=1
}
{ .mfi
@@ -572,28 +563,26 @@ asinf:
.pred.rel "mutex",p8,p7 //asinf_pred_GTsqrt2by2,asinf_pred_LEsqrt2by2
{ .mfi
nop.m 999
-(p8) fnma.s f8 = asinf_z,asinf_Pt,asinf_sgn_x_piby2
+(p8) fnma.s.s0 f8 = asinf_z,asinf_Pt,asinf_sgn_x_piby2
nop.i 999
}
{ .mfb
nop.m 999
-(p7) fma.s f8 = asinf_x11,asinf_poly_Bx,asinf_poly_Ax
+(p7) fma.s.s0 f8 = asinf_x11,asinf_poly_Bx,asinf_poly_Ax
br.ret.sptk b0 ;;
}
-L(ASINF_ABS_ONE):
+ASINF_ABS_ONE:
// Here for short exit if |x|=1
{ .mfb
nop.m 999
- fma.s f8 = asinf_sgn_x,asinf_const_piby2,f0
+ fma.s.s0 f8 = asinf_sgn_x,asinf_const_piby2,f0
br.ret.sptk b0
}
;;
-.endp asinf
-ASM_SIZE_DIRECTIVE(asinf)
-
+GLOBAL_LIBM_END(asinf)
// Stack operations when calling error support.
// (1) (2)
// sp -> + psp -> +
@@ -623,8 +612,7 @@ ASM_SIZE_DIRECTIVE(asinf)
// restore gp
// restore ar.pfs
-.proc __libm_error_region
-__libm_error_region:
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
@@ -680,8 +668,7 @@ __libm_error_region:
br.ret.sptk b0 // Return
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_asinl.S b/sysdeps/ia64/fpu/e_asinl.S
index 9153832090..bf5feba155 100644
--- a/sysdeps/ia64/fpu/e_asinl.S
+++ b/sysdeps/ia64/fpu/e_asinl.S
@@ -1,10 +1,10 @@
.file "asinl.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2001 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2001 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,720 +20,2448 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http: //www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00 Initial version
-// 4/04/00 Unwind support added
-// 8/15/00 Bundle added after call to __libm_error_support to properly
-// set [the previously overwritten] GR_Parameter_RESULT.
+// 08/28/01 New version
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/06/03 Reordered header: .section, .global, .proc, .align
//
// API
//==============================================================
-// long double = asinl(long double)
-// input floating point f8
-// output floating point f8
+// long double asinl(long double)
//
-// Registers used
+// Overview of operation
//==============================================================
+// Background
//
-// predicate registers used:
-// p6 -> p12
+// Implementation
//
-// floating-point registers used:
-// f8 has input, then output
-// f32 -> f87, f8 -> f13, f32 -> f87
+// For |s| in [2^{-4}, sqrt(2)/2]:
+// Let t= 2^k*1.b1 b2..b6 1, where s= 2^k*1.b1 b2.. b52
+// asin(s)= asin(t)+asin(r), where r= s*sqrt(1-t^2)-t*sqrt(1-s^2), i.e.
+// r= (s-t)*sqrt(1-t^2)-t*sqrt(1-t^2)*(sqrt((1-s^2)/(1-t^2))-1)
+// asin(r)-r evaluated as 9-degree polynomial (c3*r^3+c5*r^5+c7*r^7+c9*r^9)
+// The 64-bit significands of sqrt(1-t^2), 1/(1-t^2) are read from the table,
+// along with the high and low parts of asin(t) (stored as two double precision
+// values)
//
-// general registers used:
-// r32 -> r47
+// |s| in (sqrt(2)/2, sqrt(255/256)):
+// Let t= 2^k*1.b1 b2..b6 1, where (1-s^2)*frsqrta(1-s^2)= 2^k*1.b1 b2..b6..
+// asin(|s|)= pi/2-asin(t)+asin(r), r= s*t-sqrt(1-s^2)*sqrt(1-t^2)
+// To minimize accumulated errors, r is computed as
+// r= (t*s)_s-t^2*y*z+z*y*(t^2-1+s^2)_s+z*y*(1-s^2)_s*x+z'*y*(1-s^2)*PS29+
+// +(t*s-(t*s)_s)+z*y*((t^2-1-(t^2-1+s^2)_s)+s^2)+z*y*(1-s^2-(1-s^2)_s)+
+// +ez*z'*y*(1-s^2)*(1-x),
+// where y= frsqrta(1-s^2), z= (sqrt(1-t^2))_s (rounded to 24 significant bits)
+// z'= sqrt(1-t^2), x= ((1-s^2)*y^2-1)/2
+//
+// |s|<2^{-4}: evaluate as 17-degree polynomial
+// (or simply return s, if|s|<2^{-64})
+//
+// |s| in [sqrt(255/256), 1): asin(|s|)= pi/2-asin(sqrt(1-s^2))
+// use 17-degree polynomial for asin(sqrt(1-s^2)),
+// 9-degree polynomial to evaluate sqrt(1-s^2)
+// High order term is (pi/2)_high-(y*(1-s^2))_high
//
-// Overview of operation
-//==============================================================
-// There are three paths
-// 1. |x| < 2^-40 ASIN_TINY
-// 2. 2^-40 <= |x| < 1/4 ASIN_POLY
-// 3. 1/4 <= |x| < 1 ASIN_ATAN
-#include "libm_support.h"
-// Assembly macros
-//==============================================================
-FR_RESULT = f10
-FR_X = f8
-FR_Y = f1
-asin_P79 = f32
-asin_P59 = f33
-asin_P39 = f34
-asin_P19 = f35
-
-asin_P810 = f36
-asin_P610 = f37
-asin_P410 = f38
-asin_P210 = f39
-
-asin_A1 = f41
-asin_A2 = f42
-asin_A3 = f43
-asin_A4 = f44
-asin_A5 = f45
-asin_A6 = f46
-asin_A7 = f47
-asin_A8 = f48
-asin_A9 = f49
-asin_A10 = f50
-
-asin_X2 = f51
-asin_X4 = f52
-
-asin_B = f53
-asin_Bb = f54
-asin_C = f55
-asin_Cc = f56
-asin_D = f57
-
-asin_W = f58
-asin_Ww = f59
-
-asin_y0 = f60
-asin_y1 = f61
-asin_y2 = f62
-
-asin_H = f63
-asin_Hh = f64
-
-asin_t1 = f65
-asin_t2 = f66
-asin_t3 = f67
-asin_t4 = f68
-asin_t5 = f69
-
-asin_Pseries = f70
-asin_NORM_f8 = f71
-asin_ABS_NORM_f8 = f72
-
-asin_2m100 = f73
-asin_P1P2 = f74
-asin_HALF = f75
-asin_1mD = f76
-
-asin_1mB = f77
-asin_1mBmC = f78
-asin_S = f79
-
-asin_BmWW = f80
-asin_BmWWpb = f81
-asin_2W = f82
-asin_1d2W = f83
-asin_Dd = f84
-
-asin_XWw = f85
-asin_low = f86
-
-asin_pi_by_2 = f87
-asin_pi_by_2_lo = f88
-
-asin_GR_17_ones = r33
-asin_GR_16_ones = r34
-asin_GR_signexp_f8 = r35
-asin_GR_exp = r36
-asin_GR_true_exp = r37
-asin_GR_ff9b = r38
-
-GR_SAVE_B0 = r39
-GR_SAVE_SP = r40
-GR_SAVE_PFS = r33
-// r33 can be used safely.
-// r40 is address of table of coefficients
-// Later it is used to save sp across calls
-GR_SAVE_GP = r41
-asin_GR_fffe = r42
-asin_GR_retval = r43
-
-GR_Parameter_X = r44
-GR_Parameter_Y = r45
-GR_Parameter_RESULT = r46
-GR_Parameter_TAG = r47
-
-
-// 2^-40:
-// A true exponent of -40 is
-// : -40 + register_bias
-// : -28 + ffff = ffd7
-
-// A true exponent of -100 is
-// : -100 + register_bias
-// : -64 + ffff = ff9b
-
-// Data tables
+
+// Registers used
//==============================================================
+// f6-f15, f32-f36
+// r2-r3, r23-r23
+// p6, p7, p8, p12
+//
+
+
+ GR_SAVE_B0= r33
+ GR_SAVE_PFS= r34
+ GR_SAVE_GP= r35 // This reg. can safely be used
+ GR_SAVE_SP= r36
+
+ GR_Parameter_X= r37
+ GR_Parameter_Y= r38
+ GR_Parameter_RESULT= r39
+ GR_Parameter_TAG= r40
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
+ FR_X= f10
+ FR_Y= f1
+ FR_RESULT= f8
+
+
+
+RODATA
.align 16
-asin_coefficients:
-ASM_TYPE_DIRECTIVE(asin_coefficients,@object)
-data8 0xBB08911F2013961E, 0x00003FF8 // A10
-data8 0x981F1095A23A87D3, 0x00003FF8 // A9
-data8 0xBDF09C6C4177BCC6, 0x00003FF8 // A8
-data8 0xE4C3A60B049ACCEA, 0x00003FF8 // A7
-data8 0x8E2789F4E8A8F1AD, 0x00003FF9 // A6
-data8 0xB745D09B2B0E850B, 0x00003FF9 // A5
-data8 0xF8E38E3BC4C50920, 0x00003FF9 // A4
-data8 0xB6DB6DB6D89FCD81, 0x00003FFA // A3
-data8 0x99999999999AF376, 0x00003FFB // A2
-data8 0xAAAAAAAAAAAAAA71, 0x00003FFC // A1
-
-data8 0xc90fdaa22168c234, 0x00003FFF // pi_by_2_hi
-data8 0xc4c6628b80dc1cd1, 0x00003FBF // pi_by_2_lo
-ASM_SIZE_DIRECTIVE(asin_coefficients)
-
-.align 32
-.global asinl#
+
+
+LOCAL_OBJECT_START(T_table)
+
+// stores 64-bit significand of 1/(1-t^2), 64-bit significand of sqrt(1-t^2),
+// asin(t)_high (double precision), asin(t)_low (double precision)
+
+data8 0x80828692b71c4391, 0xff7ddcec2d87e879
+data8 0x3fb022bc0ae531a0, 0x3c9f599c7bb42af6
+data8 0x80869f0163d0b082, 0xff79cad2247914d3
+data8 0x3fb062dd26afc320, 0x3ca4eff21bd49c5c
+data8 0x808ac7d5a8690705, 0xff75a89ed6b626b9
+data8 0x3fb0a2ff4a1821e0, 0x3cb7e33b58f164cc
+data8 0x808f0112ad8ad2e0, 0xff7176517c2cc0cb
+data8 0x3fb0e32279319d80, 0x3caee31546582c43
+data8 0x80934abba8a1da0a, 0xff6d33e949b1ed31
+data8 0x3fb12346b8101da0, 0x3cb8bfe463d087cd
+data8 0x8097a4d3dbe63d8f, 0xff68e16571015c63
+data8 0x3fb1636c0ac824e0, 0x3c8870a7c5a3556f
+data8 0x809c0f5e9662b3dd, 0xff647ec520bca0f0
+data8 0x3fb1a392756ed280, 0x3c964f1a927461ae
+data8 0x80a08a5f33fadc66, 0xff600c07846a6830
+data8 0x3fb1e3b9fc19e580, 0x3c69eb3576d56332
+data8 0x80a515d91d71acd4, 0xff5b892bc475affa
+data8 0x3fb223e2a2dfbe80, 0x3c6a4e19fd972fb6
+data8 0x80a9b1cfc86ff7cd, 0xff56f631062cf93d
+data8 0x3fb2640c6dd76260, 0x3c62041160e0849e
+data8 0x80ae5e46b78b0d68, 0xff5253166bc17794
+data8 0x3fb2a43761187c80, 0x3cac61651af678c0
+data8 0x80b31b417a4b756b, 0xff4d9fdb14463dc8
+data8 0x3fb2e46380bb6160, 0x3cb06ef23eeba7a1
+data8 0x80b7e8c3ad33c369, 0xff48dc7e1baf6738
+data8 0x3fb32490d0d910c0, 0x3caa05f480b300d5
+data8 0x80bcc6d0f9c784d6, 0xff4408fe9ad13e37
+data8 0x3fb364bf558b3820, 0x3cb01e7e403aaab9
+data8 0x80c1b56d1692492d, 0xff3f255ba75f5f4e
+data8 0x3fb3a4ef12ec3540, 0x3cb4fe8fcdf5f5f1
+data8 0x80c6b49bc72ec446, 0xff3a319453ebd961
+data8 0x3fb3e5200d171880, 0x3caf2dc089b2b7e2
+data8 0x80cbc460dc4e0ae8, 0xff352da7afe64ac6
+data8 0x3fb425524827a720, 0x3cb75a855e7c6053
+data8 0x80d0e4c033bee9c4, 0xff301994c79afb32
+data8 0x3fb46585c83a5e00, 0x3cb3264981c019ab
+data8 0x80d615bdb87556db, 0xff2af55aa431f291
+data8 0x3fb4a5ba916c73c0, 0x3c994251d94427b5
+data8 0x80db575d6291fd8a, 0xff25c0f84bae0cb9
+data8 0x3fb4e5f0a7dbdb20, 0x3cbee2fcc4c786cb
+data8 0x80e0a9a33769e535, 0xff207c6cc0ec09fd
+data8 0x3fb526280fa74620, 0x3c940656e5549b91
+data8 0x80e60c93498e32cd, 0xff1b27b703a19c98
+data8 0x3fb56660ccee2740, 0x3ca7082374d7b2cd
+data8 0x80eb8031b8d4052d, 0xff15c2d6105c72f8
+data8 0x3fb5a69ae3d0b520, 0x3c7c4d46e09ac68a
+data8 0x80f10482b25c6c8a, 0xff104dc8e0813ed4
+data8 0x3fb5e6d6586fec20, 0x3c9aa84ffd9b4958
+data8 0x80f6998a709c7cfb, 0xff0ac88e6a4ab926
+data8 0x3fb627132eed9140, 0x3cbced2cbbbe7d16
+data8 0x80fc3f4d3b657c44, 0xff053325a0c8a2ec
+data8 0x3fb667516b6c34c0, 0x3c6489c5fc68595a
+data8 0x8101f5cf67ed2af8, 0xfeff8d8d73dec2bb
+data8 0x3fb6a791120f33a0, 0x3cbe12acf159dfad
+data8 0x8107bd1558d6291f, 0xfef9d7c4d043df29
+data8 0x3fb6e7d226fabba0, 0x3ca386d099cd0dc7
+data8 0x810d95237e38766a, 0xfef411ca9f80b5f7
+data8 0x3fb72814ae53cc20, 0x3cb9f35731e71dd6
+data8 0x81137dfe55aa0e29, 0xfeee3b9dc7eef009
+data8 0x3fb76858ac403a00, 0x3c74df3dd959141a
+data8 0x811977aa6a479f0f, 0xfee8553d2cb8122c
+data8 0x3fb7a89e24e6b0e0, 0x3ca6034406ee42bc
+data8 0x811f822c54bd5ef8, 0xfee25ea7add46a91
+data8 0x3fb7e8e51c6eb6a0, 0x3cb82f8f78e68ed7
+data8 0x81259d88bb4ffac1, 0xfedc57dc2809fb1d
+data8 0x3fb8292d9700ad60, 0x3cbebb73c0e653f9
+data8 0x812bc9c451e5a257, 0xfed640d974eb6068
+data8 0x3fb8697798c5d620, 0x3ca2feee76a9701b
+data8 0x813206e3da0f3124, 0xfed0199e6ad6b585
+data8 0x3fb8a9c325e852e0, 0x3cb9e88f2f4d0efe
+data8 0x813854ec231172f9, 0xfec9e229dcf4747d
+data8 0x3fb8ea1042932a00, 0x3ca5ff40d81f66fd
+data8 0x813eb3e209ee858f, 0xfec39a7a9b36538b
+data8 0x3fb92a5ef2f247c0, 0x3cb5e3bece4d6b07
+data8 0x814523ca796f56ce, 0xfebd428f72561efe
+data8 0x3fb96aaf3b3281a0, 0x3cb7b9e499436d7c
+data8 0x814ba4aa6a2d3ff9, 0xfeb6da672bd48fe4
+data8 0x3fb9ab011f819860, 0x3cb9168143cc1a7f
+data8 0x81523686e29bbdd7, 0xfeb062008df81f50
+data8 0x3fb9eb54a40e3ac0, 0x3cb6e544197eb1e1
+data8 0x8158d964f7124614, 0xfea9d95a5bcbd65a
+data8 0x3fba2ba9cd080800, 0x3ca9a717be8f7446
+data8 0x815f8d49c9d639e4, 0xfea34073551e1ac8
+data8 0x3fba6c009e9f9260, 0x3c741e989a60938a
+data8 0x8166523a8b24f626, 0xfe9c974a367f785c
+data8 0x3fbaac591d0661a0, 0x3cb2c1290107e57d
+data8 0x816d283c793e0114, 0xfe95ddddb94166cb
+data8 0x3fbaecb34c6ef600, 0x3c9c7d5fbaec405d
+data8 0x81740f54e06d55bd, 0xfe8f142c93750c50
+data8 0x3fbb2d0f310cca00, 0x3cbc09479a9cbcfb
+data8 0x817b07891b15cd5e, 0xfe883a3577e9fceb
+data8 0x3fbb6d6ccf1455e0, 0x3cb9450bff4ee307
+data8 0x818210de91bba6c8, 0xfe814ff7162cf62f
+data8 0x3fbbadcc2abb1180, 0x3c9227fda12a8d24
+data8 0x81892b5abb0f2bf9, 0xfe7a55701a8697b1
+data8 0x3fbbee2d48377700, 0x3cb6fad72acfe356
+data8 0x819057031bf7760e, 0xfe734a9f2dfa1810
+data8 0x3fbc2e902bc10600, 0x3cb4465b588d16ad
+data8 0x819793dd479d4fbe, 0xfe6c2f82f643f68b
+data8 0x3fbc6ef4d9904580, 0x3c8b9ac54823960d
+data8 0x819ee1eedf76367a, 0xfe65041a15d8a92c
+data8 0x3fbcaf5b55dec6a0, 0x3ca2b8d28a954db2
+data8 0x81a6413d934f7a66, 0xfe5dc8632be3477f
+data8 0x3fbcefc3a4e727a0, 0x3c9380da83713ab4
+data8 0x81adb1cf21597d4b, 0xfe567c5cd44431d5
+data8 0x3fbd302dcae51600, 0x3ca995b83421756a
+data8 0x81b533a9563310b8, 0xfe4f2005a78fb50f
+data8 0x3fbd7099cc155180, 0x3caefa2f7a817d5f
+data8 0x81bcc6d20cf4f373, 0xfe47b35c3b0caaeb
+data8 0x3fbdb107acb5ae80, 0x3cb455fc372dd026
+data8 0x81c46b4f2f3d6e68, 0xfe40365f20b316d6
+data8 0x3fbdf177710518c0, 0x3cbee3dcc5b01434
+data8 0x81cc2126b53c1144, 0xfe38a90ce72abf36
+data8 0x3fbe31e91d439620, 0x3cb3e131c950aebd
+data8 0x81d3e85ea5bd8ee2, 0xfe310b6419c9c33a
+data8 0x3fbe725cb5b24900, 0x3c01d3fac6029027
+data8 0x81dbc0fd1637b9c1, 0xfe295d6340932d15
+data8 0x3fbeb2d23e937300, 0x3c6304cc44aeedd1
+data8 0x81e3ab082ad5a0a4, 0xfe219f08e03580b3
+data8 0x3fbef349bc2a77e0, 0x3cac1d2d6abe9c72
+data8 0x81eba6861683cb97, 0xfe19d0537a0946e2
+data8 0x3fbf33c332bbe020, 0x3ca0909dba4e96ca
+data8 0x81f3b37d1afc9979, 0xfe11f1418c0f94e2
+data8 0x3fbf743ea68d5b60, 0x3c937fc12a2a779a
+data8 0x81fbd1f388d4be45, 0xfe0a01d190f09063
+data8 0x3fbfb4bc1be5c340, 0x3cbf51a504b55813
+data8 0x820401efbf87e248, 0xfe020201fff9efea
+data8 0x3fbff53b970d1e80, 0x3ca625444b260078
+data8 0x82106ad2ffdca049, 0xfdf5e3940a49135e
+data8 0x3fc02aff52065460, 0x3c9125d113e22a57
+data8 0x8221343d6ea1d3e2, 0xfde581a45429b0a0
+data8 0x3fc06b84f8e03220, 0x3caccf362295894b
+data8 0x82324434adbf99c2, 0xfdd4de1a001fb775
+data8 0x3fc0ac0ed1fe7240, 0x3cc22f676096b0af
+data8 0x82439aee8d0c7747, 0xfdc3f8e8269d1f03
+data8 0x3fc0ec9cee9e4820, 0x3cca147e2886a628
+data8 0x825538a1d0fcb2f0, 0xfdb2d201a9b1ba66
+data8 0x3fc12d2f6006f0a0, 0x3cc72b36633bc2d4
+data8 0x82671d86345c5cee, 0xfda1695934d723e7
+data8 0x3fc16dc63789de60, 0x3cb11f9c47c7b83f
+data8 0x827949d46a121770, 0xfd8fbee13cbbb823
+data8 0x3fc1ae618682e620, 0x3cce1b59020cef8e
+data8 0x828bbdc61eeab9ba, 0xfd7dd28bff0c9f34
+data8 0x3fc1ef015e586c40, 0x3cafec043e0225ee
+data8 0x829e7995fb6de9e1, 0xfd6ba44b823ee1ca
+data8 0x3fc22fa5d07b90c0, 0x3cba905409caf8e3
+data8 0x82b17d7fa5bbc982, 0xfd5934119557883a
+data8 0x3fc2704eee685da0, 0x3cb5ef21838a823e
+data8 0x82c4c9bfc373d276, 0xfd4681cfcfb2c161
+data8 0x3fc2b0fcc9a5f3e0, 0x3ccc7952c5e0e312
+data8 0x82d85e93fba50136, 0xfd338d7790ca0f41
+data8 0x3fc2f1af73c6ba00, 0x3cbecf5f977d1ca9
+data8 0x82ec3c3af8c76b32, 0xfd2056f9fff97727
+data8 0x3fc33266fe6889a0, 0x3c9d329c022ebdb5
+data8 0x830062f46abf6022, 0xfd0cde480c43b327
+data8 0x3fc373237b34de60, 0x3cc95806d4928adb
+data8 0x8314d30108ea35f0, 0xfcf923526c1562b2
+data8 0x3fc3b3e4fbe10520, 0x3cbc299fe7223d54
+data8 0x83298ca29434df97, 0xfce526099d0737ed
+data8 0x3fc3f4ab922e4a60, 0x3cb59d8bb8fdbccc
+data8 0x833e901bd93c7009, 0xfcd0e65de39f1f7c
+data8 0x3fc435774fea2a60, 0x3c9ec18b43340914
+data8 0x8353ddb0b278aad8, 0xfcbc643f4b106055
+data8 0x3fc4764846ee80a0, 0x3cb90402efd87ed6
+data8 0x836975a60a70c52e, 0xfca79f9da4fab13a
+data8 0x3fc4b71e8921b860, 0xbc58f23449ed6365
+data8 0x837f5841ddfa7a46, 0xfc92986889284148
+data8 0x3fc4f7fa2876fca0, 0xbc6294812bf43acd
+data8 0x839585cb3e839773, 0xfc7d4e8f554ab12f
+data8 0x3fc538db36ee6960, 0x3cb910b773d4c578
+data8 0x83abfe8a5466246f, 0xfc67c2012cb6fa68
+data8 0x3fc579c1c6953cc0, 0x3cc5ede909fc47fc
+data8 0x83c2c2c861474d91, 0xfc51f2acf82041d5
+data8 0x3fc5baade9860880, 0x3cac63cdfc3588e5
+data8 0x83d9d2cfc2813637, 0xfc3be08165519325
+data8 0x3fc5fb9fb1e8e3a0, 0x3cbf7c8466578c29
+data8 0x83f12eebf397daac, 0xfc258b6ce6e6822f
+data8 0x3fc63c9731f39d40, 0x3cb6d2a7ffca3e9e
+data8 0x8408d76990b9296e, 0xfc0ef35db402af94
+data8 0x3fc67d947be9eec0, 0x3cb1980da09e6566
+data8 0x8420cc9659487cd7, 0xfbf81841c8082dc4
+data8 0x3fc6be97a21daf00, 0x3cc2ac8330e59aa5
+data8 0x84390ec132759ecb, 0xfbe0fa06e24cc390
+data8 0x3fc6ffa0b6ef05e0, 0x3ccc1a030fee56c4
+data8 0x84519e3a29df811a, 0xfbc9989a85ce0954
+data8 0x3fc740afcccca000, 0x3cc19692a5301ca6
+data8 0x846a7b527842d61b, 0xfbb1f3e9f8e45dc4
+data8 0x3fc781c4f633e2c0, 0x3cc0e98f3868a508
+data8 0x8483a65c8434b5f0, 0xfb9a0be244f4af45
+data8 0x3fc7c2e045b12140, 0x3cb2a8d309754420
+data8 0x849d1fabe4e97dd7, 0xfb81e070362116d1
+data8 0x3fc80401cddfd120, 0x3ca7a44544aa4ce6
+data8 0x84b6e795650817ea, 0xfb6971805af8411e
+data8 0x3fc84529a16ac020, 0x3c9e3b709c7d6f94
+data8 0x84d0fe6f0589da92, 0xfb50beff0423a2f5
+data8 0x3fc88657d30c49e0, 0x3cc60d65a7f0a278
+data8 0x84eb649000a73014, 0xfb37c8d84414755c
+data8 0x3fc8c78c758e8e80, 0x3cc94b2ee984c2b7
+data8 0x85061a50ccd13781, 0xfb1e8ef7eeaf764b
+data8 0x3fc908c79bcba900, 0x3cc8540ae794a2fe
+data8 0x8521200b1fb8916e, 0xfb05114998f76a83
+data8 0x3fc94a0958ade6c0, 0x3ca127f49839fa9c
+data8 0x853c7619f1618bf6, 0xfaeb4fb898b65d19
+data8 0x3fc98b51bf2ffee0, 0x3c8c9ba7a803909a
+data8 0x85581cd97f45e274, 0xfad14a3004259931
+data8 0x3fc9cca0e25d4ac0, 0x3cba458e91d3bf54
+data8 0x857414a74f8446b4, 0xfab7009ab1945a54
+data8 0x3fca0df6d551fe80, 0x3cc78ea1d329d2b2
+data8 0x85905de2341dea46, 0xfa9c72e3370d2fbc
+data8 0x3fca4f53ab3b6200, 0x3ccf60dca86d57ef
+data8 0x85acf8ea4e423ff8, 0xfa81a0f3e9fa0ee9
+data8 0x3fca90b777580aa0, 0x3ca4c4e2ec8a867e
+data8 0x85c9e62111a92e7d, 0xfa668ab6dec711b1
+data8 0x3fcad2224cf814e0, 0x3c303de5980d071c
+data8 0x85e725e947fbee97, 0xfa4b3015e883dbfe
+data8 0x3fcb13943f7d5f80, 0x3cc29d4eefa5cb1e
+data8 0x8604b8a7144cd054, 0xfa2f90fa9883a543
+data8 0x3fcb550d625bc6a0, 0x3c9e01a746152daf
+data8 0x86229ebff69e2415, 0xfa13ad4e3dfbe1c1
+data8 0x3fcb968dc9195ea0, 0x3ccc091bd73ae518
+data8 0x8640d89acf78858c, 0xf9f784f9e5a1877b
+data8 0x3fcbd815874eb160, 0x3cb5f4b89875e187
+data8 0x865f669fe390c7f5, 0xf9db17e65944eacf
+data8 0x3fcc19a4b0a6f9c0, 0x3cc5c0bc2b0bbf14
+data8 0x867e4938df7dc45f, 0xf9be65fc1f6c2e6e
+data8 0x3fcc5b3b58e061e0, 0x3cc1ca70df8f57e7
+data8 0x869d80d0db7e4c0c, 0xf9a16f237aec427a
+data8 0x3fcc9cd993cc4040, 0x3cbae93acc85eccf
+data8 0x86bd0dd45f4f8265, 0xf98433446a806e70
+data8 0x3fccde7f754f5660, 0x3cb22f70e64568d0
+data8 0x86dcf0b16613e37a, 0xf966b246a8606170
+data8 0x3fcd202d11620fa0, 0x3c962030e5d4c849
+data8 0x86fd29d7624b3d5d, 0xf948ec11a9d4c45b
+data8 0x3fcd61e27c10c0a0, 0x3cc7083c91d59217
+data8 0x871db9b741dbe44a, 0xf92ae08c9eca4941
+data8 0x3fcda39fc97be7c0, 0x3cc9258579e57211
+data8 0x873ea0c3722d6af2, 0xf90c8f9e71633363
+data8 0x3fcde5650dd86d60, 0x3ca4755a9ea582a9
+data8 0x875fdf6fe45529e8, 0xf8edf92dc5875319
+data8 0x3fce27325d6fe520, 0x3cbc1e2b6c1954f9
+data8 0x878176321154e2bc, 0xf8cf1d20f87270b8
+data8 0x3fce6907cca0d060, 0x3cb6ca4804750830
+data8 0x87a36580fe6bccf5, 0xf8affb5e20412199
+data8 0x3fceaae56fdee040, 0x3cad6b310d6fd46c
+data8 0x87c5add5417a5cb9, 0xf89093cb0b7c0233
+data8 0x3fceeccb5bb33900, 0x3cc16e99cedadb20
+data8 0x87e84fa9057914ca, 0xf870e64d40a15036
+data8 0x3fcf2eb9a4bcb600, 0x3cc75ee47c8b09e9
+data8 0x880b4b780f02b709, 0xf850f2c9fdacdf78
+data8 0x3fcf70b05fb02e20, 0x3cad6350d379f41a
+data8 0x882ea1bfc0f228ac, 0xf830b926379e6465
+data8 0x3fcfb2afa158b8a0, 0x3cce0ccd9f829985
+data8 0x885252ff21146108, 0xf810394699fe0e8e
+data8 0x3fcff4b77e97f3e0, 0x3c9b30faa7a4c703
+data8 0x88765fb6dceebbb3, 0xf7ef730f865f6df0
+data8 0x3fd01b6406332540, 0x3cdc5772c9e0b9bd
+data8 0x88ad1f69be2cc730, 0xf7bdc59bc9cfbd97
+data8 0x3fd04cf8ad203480, 0x3caeef44fe21a74a
+data8 0x88f763f70ae2245e, 0xf77a91c868a9c54e
+data8 0x3fd08f23ce0162a0, 0x3cd6290ab3fe5889
+data8 0x89431fc7bc0c2910, 0xf73642973c91298e
+data8 0x3fd0d1610f0c1ec0, 0x3cc67401a01f08cf
+data8 0x8990573407c7738e, 0xf6f0d71d1d7a2dd6
+data8 0x3fd113b0c65d88c0, 0x3cc7aa4020fe546f
+data8 0x89df0eb108594653, 0xf6aa4e6a05cfdef2
+data8 0x3fd156134ada6fe0, 0x3cc87369da09600c
+data8 0x8a2f4ad16e0ed78a, 0xf662a78900c35249
+data8 0x3fd19888f43427a0, 0x3cc62b220f38e49c
+data8 0x8a811046373e0819, 0xf619e180181d97cc
+data8 0x3fd1db121aed7720, 0x3ca3ede7490b52f4
+data8 0x8ad463df6ea0fa2c, 0xf5cffb504190f9a2
+data8 0x3fd21daf185fa360, 0x3caafad98c1d6c1b
+data8 0x8b294a8cf0488daf, 0xf584f3f54b8604e6
+data8 0x3fd2606046bf95a0, 0x3cdb2d704eeb08fa
+data8 0x8b7fc95f35647757, 0xf538ca65c960b582
+data8 0x3fd2a32601231ec0, 0x3cc661619fa2f126
+data8 0x8bd7e588272276f8, 0xf4eb7d92ff39fccb
+data8 0x3fd2e600a3865760, 0x3c8a2a36a99aca4a
+data8 0x8c31a45bf8e9255e, 0xf49d0c68cd09b689
+data8 0x3fd328f08ad12000, 0x3cb9efaf1d7ab552
+data8 0x8c8d0b520a35eb18, 0xf44d75cd993cfad2
+data8 0x3fd36bf614dcc040, 0x3ccacbb590bef70d
+data8 0x8cea2005d068f23d, 0xf3fcb8a23ab4942b
+data8 0x3fd3af11a079a6c0, 0x3cd9775872cf037d
+data8 0x8d48e837c8cd5027, 0xf3aad3c1e2273908
+data8 0x3fd3f2438d754b40, 0x3ca03304f667109a
+data8 0x8da969ce732f3ac7, 0xf357c60202e2fd7e
+data8 0x3fd4358c3ca032e0, 0x3caecf2504ff1a9d
+data8 0x8e0baad75555e361, 0xf3038e323ae9463a
+data8 0x3fd478ec0fd419c0, 0x3cc64bdc3d703971
+data8 0x8e6fb18807ba877e, 0xf2ae2b1c3a6057f7
+data8 0x3fd4bc6369fa40e0, 0x3cbb7122ec245cf2
+data8 0x8ed5843f4bda74d5, 0xf2579b83aa556f0c
+data8 0x3fd4fff2af11e2c0, 0x3c9cfa2dc792d394
+data8 0x8f3d29862c861fef, 0xf1ffde2612ca1909
+data8 0x3fd5439a4436d000, 0x3cc38d46d310526b
+data8 0x8fa6a81128940b2d, 0xf1a6f1bac0075669
+data8 0x3fd5875a8fa83520, 0x3cd8bf59b8153f8a
+data8 0x901206c1686317a6, 0xf14cd4f2a730d480
+data8 0x3fd5cb33f8cf8ac0, 0x3c9502b5c4d0e431
+data8 0x907f4ca5fe9cf739, 0xf0f186784a125726
+data8 0x3fd60f26e847b120, 0x3cc8a1a5e0acaa33
+data8 0x90ee80fd34aeda5e, 0xf09504ef9a212f18
+data8 0x3fd65333c7e43aa0, 0x3cae5b029cb1f26e
+data8 0x915fab35e37421c6, 0xf0374ef5daab5c45
+data8 0x3fd6975b02b8e360, 0x3cd5aa1c280c45e6
+data8 0x91d2d2f0d894d73c, 0xefd86321822dbb51
+data8 0x3fd6db9d05213b20, 0x3cbecf2c093ccd8b
+data8 0x9248000249200009, 0xef7840021aca5a72
+data8 0x3fd71ffa3cc87fc0, 0x3cb8d273f08d00d9
+data8 0x92bf3a7351f081d2, 0xef16e42021d7cbd5
+data8 0x3fd7647318b1ad20, 0x3cbce099d79cdc46
+data8 0x93388a8386725713, 0xeeb44dfce6820283
+data8 0x3fd7a908093fc1e0, 0x3ccb033ec17a30d9
+data8 0x93b3f8aa8e653812, 0xee507c126774fa45
+data8 0x3fd7edb9803e3c20, 0x3cc10aedb48671eb
+data8 0x94318d99d341ade4, 0xedeb6cd32f891afb
+data8 0x3fd83287f0e9cf80, 0x3c994c0c1505cd2a
+data8 0x94b1523e3dedc630, 0xed851eaa3168f43c
+data8 0x3fd87773cff956e0, 0x3cda3b7bce6a6b16
+data8 0x95334fc20577563f, 0xed1d8ffaa2279669
+data8 0x3fd8bc7d93a70440, 0x3cd4922edc792ce2
+data8 0x95b78f8e8f92f274, 0xecb4bf1fd2be72da
+data8 0x3fd901a5b3b9cf40, 0x3cd3fea1b00f9d0d
+data8 0x963e1b4e63a87c3f, 0xec4aaa6d08694cc1
+data8 0x3fd946eca98f2700, 0x3cdba4032d968ff1
+data8 0x96c6fcef314074fc, 0xebdf502d53d65fea
+data8 0x3fd98c52f024e800, 0x3cbe7be1ab8c95c9
+data8 0x97523ea3eab028b2, 0xeb72aea36720793e
+data8 0x3fd9d1d904239860, 0x3cd72d08a6a22b70
+data8 0x97dfeae6f4ee4a9a, 0xeb04c4096a884e94
+data8 0x3fda177f63e8ef00, 0x3cd818c3c1ebfac7
+data8 0x98700c7c6d85d119, 0xea958e90cfe1efd7
+data8 0x3fda5d468f92a540, 0x3cdf45fbfaa080fe
+data8 0x9902ae7487a9caa1, 0xea250c6224aab21a
+data8 0x3fdaa32f090998e0, 0x3cd715a9353cede4
+data8 0x9997dc2e017a9550, 0xe9b33b9ce2bb7638
+data8 0x3fdae939540d3f00, 0x3cc545c014943439
+data8 0x9a2fa158b29b649b, 0xe9401a573f8aa706
+data8 0x3fdb2f65f63f6c60, 0x3cd4a63c2f2ca8e2
+data8 0x9aca09f835466186, 0xe8cba69df9f0bf35
+data8 0x3fdb75b5773075e0, 0x3cda310ce1b217ec
+data8 0x9b672266ab1e0136, 0xe855de74266193d4
+data8 0x3fdbbc28606babc0, 0x3cdc84b75cca6c44
+data8 0x9c06f7579f0b7bd5, 0xe7debfd2f98c060b
+data8 0x3fdc02bf3d843420, 0x3cd225d967ffb922
+data8 0x9ca995db058cabdc, 0xe76648a991511c6e
+data8 0x3fdc497a9c224780, 0x3cde08101c5b825b
+data8 0x9d4f0b605ce71e88, 0xe6ec76dcbc02d9a7
+data8 0x3fdc905b0c10d420, 0x3cb1abbaa3edf120
+data8 0x9df765b9eecad5e6, 0xe6714846bdda7318
+data8 0x3fdcd7611f4b8a00, 0x3cbf6217ae80aadf
+data8 0x9ea2b320350540fe, 0xe5f4bab71494cd6b
+data8 0x3fdd1e8d6a0d56c0, 0x3cb726e048cc235c
+data8 0x9f51023562fc5676, 0xe576cbf239235ecb
+data8 0x3fdd65e082df5260, 0x3cd9e66872bd5250
+data8 0xa002620915c2a2f6, 0xe4f779b15f5ec5a7
+data8 0x3fddad5b02a82420, 0x3c89743b0b57534b
+data8 0xa0b6e21c2caf9992, 0xe476c1a233a7873e
+data8 0x3fddf4fd84bbe160, 0x3cbf7adea9ee3338
+data8 0xa16e9264cc83a6b2, 0xe3f4a16696608191
+data8 0x3fde3cc8a6ec6ee0, 0x3cce46f5a51f49c6
+data8 0xa22983528f3d8d49, 0xe3711694552da8a8
+data8 0x3fde84bd099a6600, 0x3cdc78f6490a2d31
+data8 0xa2e7c5d2e2e69460, 0xe2ec1eb4e1e0a5fb
+data8 0x3fdeccdb4fc685c0, 0x3cdd3aedb56a4825
+data8 0xa3a96b5599bd2532, 0xe265b74506fbe1c9
+data8 0x3fdf15241f23b3e0, 0x3cd440f3c6d65f65
+data8 0xa46e85d1ae49d7de, 0xe1ddddb499b3606f
+data8 0x3fdf5d98202994a0, 0x3cd6c44bd3fb745a
+data8 0xa53727ca3e11b99e, 0xe1548f662951b00d
+data8 0x3fdfa637fe27bf60, 0x3ca8ad1cd33054dd
+data8 0xa6036453bdc20186, 0xe0c9c9aeabe5e481
+data8 0x3fdfef0467599580, 0x3cc0f1ac0685d78a
+data8 0xa6d34f1969dda338, 0xe03d89d5281e4f81
+data8 0x3fe01bff067d6220, 0x3cc0731e8a9ef057
+data8 0xa7a6fc62f7246ff3, 0xdfafcd125c323f54
+data8 0x3fe04092d1ae3b40, 0x3ccabda24b59906d
+data8 0xa87e811a861df9b9, 0xdf20909061bb9760
+data8 0x3fe0653df0fd9fc0, 0x3ce94c8dcc722278
+data8 0xa959f2d2dd687200, 0xde8fd16a4e5f88bd
+data8 0x3fe08a00c1cae320, 0x3ce6b888bb60a274
+data8 0xaa3967cdeea58bda, 0xddfd8cabd1240d22
+data8 0x3fe0aedba3221c00, 0x3ced5941cd486e46
+data8 0xab904fd587263c84, 0xdd1f4472e1cf64ed
+data8 0x3fe0e651e85229c0, 0x3cdb6701042299b1
+data8 0xad686d44dd5a74bb, 0xdbf173e1f6b46e92
+data8 0x3fe1309cbf4cdb20, 0x3cbf1be7bb3f0ec5
+data8 0xaf524e15640ebee4, 0xdabd54896f1029f6
+data8 0x3fe17b4ee1641300, 0x3ce81dd055b792f1
+data8 0xb14eca24ef7db3fa, 0xd982cb9ae2f47e41
+data8 0x3fe1c66b9ffd6660, 0x3cd98ea31eb5ddc7
+data8 0xb35ec807669920ce, 0xd841bd1b8291d0b6
+data8 0x3fe211f66db3a5a0, 0x3ca480c35a27b4a2
+data8 0xb5833e4755e04dd1, 0xd6fa0bd3150b6930
+data8 0x3fe25df2e05b6c40, 0x3ca4bc324287a351
+data8 0xb7bd34c8000b7bd3, 0xd5ab9939a7d23aa1
+data8 0x3fe2aa64b32f7780, 0x3cba67314933077c
+data8 0xba0dc64d126cc135, 0xd4564563ce924481
+data8 0x3fe2f74fc9289ac0, 0x3cec1a1dc0efc5ec
+data8 0xbc76222cbbfa74a6, 0xd2f9eeed501125a8
+data8 0x3fe344b82f859ac0, 0x3ceeef218de413ac
+data8 0xbef78e31985291a9, 0xd19672e2182f78be
+data8 0x3fe392a22087b7e0, 0x3cd2619ba201204c
+data8 0xc19368b2b0629572, 0xd02baca5427e436a
+data8 0x3fe3e11206694520, 0x3cb5d0b3143fe689
+data8 0xc44b2ae8c6733e51, 0xceb975d60b6eae5d
+data8 0x3fe4300c7e945020, 0x3cbd367143da6582
+data8 0xc7206b894212dfef, 0xcd3fa6326ff0ac9a
+data8 0x3fe47f965d201d60, 0x3ce797c7a4ec1d63
+data8 0xca14e1b0622de526, 0xcbbe13773c3c5338
+data8 0x3fe4cfb4b09d1a20, 0x3cedfadb5347143c
+data8 0xcd2a6825eae65f82, 0xca34913d425a5ae9
+data8 0x3fe5206cc637e000, 0x3ce2798b38e54193
+data8 0xd06301095e1351ee, 0xc8a2f0d3679c08c0
+data8 0x3fe571c42e3d0be0, 0x3ccd7cb9c6c2ca68
+data8 0xd3c0d9f50057adda, 0xc70901152d59d16b
+data8 0x3fe5c3c0c108f940, 0x3ceb6c13563180ab
+data8 0xd74650a98cc14789, 0xc5668e3d4cbf8828
+data8 0x3fe61668a46ffa80, 0x3caa9092e9e3c0e5
+data8 0xdaf5f8579dcc8f8f, 0xc3bb61b3eed42d02
+data8 0x3fe669c251ad69e0, 0x3cccf896ef3b4fee
+data8 0xded29f9f9a6171b4, 0xc20741d7f8e8e8af
+data8 0x3fe6bdd49bea05c0, 0x3cdc6b29937c575d
+data8 0xe2df5765854ccdb0, 0xc049f1c2d1b8014b
+data8 0x3fe712a6b76c6e80, 0x3ce1ddc6f2922321
+data8 0xe71f7a9b94fcb4c3, 0xbe833105ec291e91
+data8 0x3fe76840418978a0, 0x3ccda46e85432c3d
+data8 0xeb96b72d3374b91e, 0xbcb2bb61493b28b3
+data8 0x3fe7bea9496d5a40, 0x3ce37b42ec6e17d3
+data8 0xf049183c3f53c39b, 0xbad848720223d3a8
+data8 0x3fe815ea59dab0a0, 0x3cb03ad41bfc415b
+data8 0xf53b11ec7f415f15, 0xb8f38b57c53c9c48
+data8 0x3fe86e0c84010760, 0x3cc03bfcfb17fe1f
+data8 0xfa718f05adbf2c33, 0xb70432500286b185
+data8 0x3fe8c7196b9225c0, 0x3ced99fcc6866ba9
+data8 0xfff200c3f5489608, 0xb509e6454dca33cc
+data8 0x3fe9211b54441080, 0x3cb789cb53515688
+// The following table entries are not used
+//data8 0x82e138a0fac48700, 0xb3044a513a8e6132
+//data8 0x3fe97c1d30f5b7c0, 0x3ce1eb765612d1d0
+//data8 0x85f4cc7fc670d021, 0xb0f2fb2ea6cbbc88
+//data8 0x3fe9d82ab4b5fde0, 0x3ced3fe6f27e8039
+//data8 0x89377c1387d5b908, 0xaed58e9a09014d5c
+//data8 0x3fea355065f87fa0, 0x3cbef481d25f5b58
+//data8 0x8cad7a2c98dec333, 0xacab929ce114d451
+//data8 0x3fea939bb451e2a0, 0x3c8e92b4fbf4560f
+//data8 0x905b7dfc99583025, 0xaa748cc0dbbbc0ec
+//data8 0x3feaf31b11270220, 0x3cdced8c61bd7bd5
+//data8 0x9446d8191f80dd42, 0xa82ff92687235baf
+//data8 0x3feb53de0bcffc20, 0x3cbe1722fb47509e
+//data8 0x98758ba086e4000a, 0xa5dd497a9c184f58
+//data8 0x3febb5f571cb0560, 0x3ce0c7774329a613
+//data8 0x9cee6c7bf18e4e24, 0xa37be3c3cd1de51b
+//data8 0x3fec197373bc7be0, 0x3ce08ebdb55c3177
+//data8 0xa1b944000a1b9440, 0xa10b2101b4f27e03
+//data8 0x3fec7e6bd023da60, 0x3ce5fc5fd4995959
+//data8 0xa6defd8ba04d3e38, 0x9e8a4b93cad088ec
+//data8 0x3fece4f404e29b20, 0x3cea3413401132b5
+//data8 0xac69dd408a10c62d, 0x9bf89d5d17ddae8c
+//data8 0x3fed4d2388f63600, 0x3cd5a7fb0d1d4276
+//data8 0xb265c39cbd80f97a, 0x99553d969fec7beb
+//data8 0x3fedb714101e0a00, 0x3cdbda21f01193f2
+//data8 0xb8e081a16ae4ae73, 0x969f3e3ed2a0516c
+//data8 0x3fee22e1da97bb00, 0x3ce7231177f85f71
+//data8 0xbfea427678945732, 0x93d5990f9ee787af
+//data8 0x3fee90ac13b18220, 0x3ce3c8a5453363a5
+//data8 0xc79611399b8c90c5, 0x90f72bde80febc31
+//data8 0x3fef009542b712e0, 0x3ce218fd79e8cb56
+//data8 0xcffa8425040624d7, 0x8e02b4418574ebed
+//data8 0x3fef72c3d2c57520, 0x3cd32a717f82203f
+//data8 0xd93299cddcf9cf23, 0x8af6ca48e9c44024
+//data8 0x3fefe762b77744c0, 0x3ce53478a6bbcf94
+//data8 0xe35eda760af69ad9, 0x87d1da0d7f45678b
+//data8 0x3ff02f511b223c00, 0x3ced6e11782c28fc
+//data8 0xeea6d733421da0a6, 0x84921bbe64ae029a
+//data8 0x3ff06c5c6f8ce9c0, 0x3ce71fc71c1ffc02
+//data8 0xfb3b2c73fc6195cc, 0x813589ba3a5651b6
+//data8 0x3ff0aaf2613700a0, 0x3cf2a72d2fd94ef3
+//data8 0x84ac1fcec4203245, 0xfb73a828893df19e
+//data8 0x3ff0eb367c3fd600, 0x3cf8054c158610de
+//data8 0x8ca50621110c60e6, 0xf438a14c158d867c
+//data8 0x3ff12d51caa6b580, 0x3ce6bce9748739b6
+//data8 0x95b8c2062d6f8161, 0xecb3ccdd37b369da
+//data8 0x3ff1717418520340, 0x3ca5c2732533177c
+//data8 0xa0262917caab4ad1, 0xe4dde4ddc81fd119
+//data8 0x3ff1b7d59dd40ba0, 0x3cc4c7c98e870ff5
+//data8 0xac402c688b72f3f4, 0xdcae469be46d4c8d
+//data8 0x3ff200b93cc5a540, 0x3c8dd6dc1bfe865a
+//data8 0xba76968b9eabd9ab, 0xd41a8f3df1115f7f
+//data8 0x3ff24c6f8f6affa0, 0x3cf1acb6d2a7eff7
+//data8 0xcb63c87c23a71dc5, 0xcb161074c17f54ec
+//data8 0x3ff29b5b338b7c80, 0x3ce9b5845f6ec746
+//data8 0xdfe323b8653af367, 0xc19107d99ab27e42
+//data8 0x3ff2edf6fac7f5a0, 0x3cf77f961925fa02
+//data8 0xf93746caaba3e1f1, 0xb777744a9df03bff
+//data8 0x3ff344df237486c0, 0x3cf6ddf5f6ddda43
+//data8 0x8ca77052f6c340f0, 0xacaf476f13806648
+//data8 0x3ff3a0dfa4bb4ae0, 0x3cfee01bbd761bff
+//data8 0xa1a48604a81d5c62, 0xa11575d30c0aae50
+//data8 0x3ff4030b73c55360, 0x3cf1cf0e0324d37c
+//data8 0xbe45074b05579024, 0x9478e362a07dd287
+//data8 0x3ff46ce4c738c4e0, 0x3ce3179555367d12
+//data8 0xe7a08b5693d214ec, 0x8690e3575b8a7c3b
+//data8 0x3ff4e0a887c40a80, 0x3cfbd5d46bfefe69
+//data8 0x94503d69396d91c7, 0xedd2ce885ff04028
+//data8 0x3ff561ebd9c18cc0, 0x3cf331bd176b233b
+//data8 0xced1d96c5bb209e6, 0xc965278083808702
+//data8 0x3ff5f71d7ff42c80, 0x3ce3301cc0b5a48c
+//data8 0xabac2cee0fc24e20, 0x9c4eb1136094cbbd
+//data8 0x3ff6ae4c63222720, 0x3cf5ff46874ee51e
+//data8 0x8040201008040201, 0xb4d7ac4d9acb1bf4
+//data8 0x3ff7b7d33b928c40, 0x3cfacdee584023bb
+LOCAL_OBJECT_END(T_table)
+
+
+
+.align 16
+
+LOCAL_OBJECT_START(poly_coeffs)
+ // C_3
+data8 0xaaaaaaaaaaaaaaab, 0x0000000000003ffc
+ // C_5
+data8 0x999999999999999a, 0x0000000000003ffb
+ // C_7, C_9
+data8 0x3fa6db6db6db6db7, 0x3f9f1c71c71c71c8
+ // pi/2 (low, high)
+data8 0x3C91A62633145C07, 0x3FF921FB54442D18
+ // C_11, C_13
+data8 0x3f96e8ba2e8ba2e9, 0x3f91c4ec4ec4ec4e
+ // C_15, C_17
+data8 0x3f8c99999999999a, 0x3f87a87878787223
+LOCAL_OBJECT_END(poly_coeffs)
+
+
+R_DBL_S = r21
+R_EXP0 = r22
+R_EXP = r15
+R_SGNMASK = r23
+R_TMP = r24
+R_TMP2 = r25
+R_INDEX = r26
+R_TMP3 = r27
+R_TMP03 = r27
+R_TMP4 = r28
+R_TMP5 = r23
+R_TMP6 = r22
+R_TMP7 = r21
+R_T = r29
+R_BIAS = r20
+
+F_T = f6
+F_1S2 = f7
+F_1S2_S = f9
+F_INV_1T2 = f10
+F_SQRT_1T2 = f11
+F_S2T2 = f12
+F_X = f13
+F_D = f14
+F_2M64 = f15
+
+F_CS2 = f32
+F_CS3 = f33
+F_CS4 = f34
+F_CS5 = f35
+F_CS6 = f36
+F_CS7 = f37
+F_CS8 = f38
+F_CS9 = f39
+F_S23 = f40
+F_S45 = f41
+F_S67 = f42
+F_S89 = f43
+F_S25 = f44
+F_S69 = f45
+F_S29 = f46
+F_X2 = f47
+F_X4 = f48
+F_TSQRT = f49
+F_DTX = f50
+F_R = f51
+F_R2 = f52
+F_R3 = f53
+F_R4 = f54
+
+F_C3 = f55
+F_C5 = f56
+F_C7 = f57
+F_C9 = f58
+F_P79 = f59
+F_P35 = f60
+F_P39 = f61
+
+F_ATHI = f62
+F_ATLO = f63
+
+F_T1 = f64
+F_Y = f65
+F_Y2 = f66
+F_ANDMASK = f67
+F_ORMASK = f68
+F_S = f69
+F_05 = f70
+F_SQRT_1S2 = f71
+F_DS = f72
+F_Z = f73
+F_1T2 = f74
+F_DZ = f75
+F_ZE = f76
+F_YZ = f77
+F_Y1S2 = f78
+F_Y1S2X = f79
+F_1X = f80
+F_ST = f81
+F_1T2_ST = f82
+F_TSS = f83
+F_Y1S2X2 = f84
+F_DZ_TERM = f85
+F_DTS = f86
+F_DS2X = f87
+F_T2 = f88
+F_ZY1S2S = f89
+F_Y1S2_1X = f90
+F_TS = f91
+F_PI2_LO = f92
+F_PI2_HI = f93
+F_S19 = f94
+F_INV1T2_2 = f95
+F_CORR = f96
+F_DZ0 = f97
+
+F_C11 = f98
+F_C13 = f99
+F_C15 = f100
+F_C17 = f101
+F_P1113 = f102
+F_P1517 = f103
+F_P1117 = f104
+F_P317 = f105
+F_R8 = f106
+F_HI = f107
+F_1S2_HI = f108
+F_DS2 = f109
+F_Y2_2 = f110
+F_S2 = f111
+F_S_DS2 = f112
+F_S_1S2S = f113
+F_XL = f114
+F_2M128 = f115
+
.section .text
-.proc asinl#
-.align 32
+GLOBAL_LIBM_ENTRY(asinl)
+
+{.mfi
+ // get exponent, mantissa (rounded to double precision) of s
+ getf.d R_DBL_S = f8
+ // 1-s^2
+ fnma.s1 F_1S2 = f8, f8, f1
+ // r2 = pointer to T_table
+ addl r2 = @ltoff(T_table), gp
+}
+{.mfi
+ // sign mask
+ mov R_SGNMASK = 0x20000
+ nop.f 0
+ // bias-63-1
+ mov R_TMP03 = 0xffff-64;;
+}
-asinl:
-{ .mfi
- alloc r32 = ar.pfs,1,11,4,0
-(p0) fnorm asin_NORM_f8 = f8
-(p0) mov asin_GR_17_ones = 0x1ffff
+{.mfi
+ // get exponent of s
+ getf.exp R_EXP = f8
+ nop.f 0
+ // R_TMP4 = 2^45
+ shl R_TMP4 = R_SGNMASK, 45-17
}
-{ .mii
-(p0) mov asin_GR_16_ones = 0xffff
-(p0) mov asin_GR_ff9b = 0xff9b ;;
- nop.i 999
+{.mlx
+ // load bias-4
+ mov R_TMP = 0xffff-4
+ // load RU(sqrt(2)/2) to integer register (in double format, shifted left by 1)
+ movl R_TMP2 = 0x7fcd413cccfe779a;;
}
-{ .mmi
-(p0) setf.exp asin_2m100 = asin_GR_ff9b
-(p0) addl r40 = @ltoff(asin_coefficients), gp
- nop.i 999
+{.mfi
+ // load 2^{-64} in FP register
+ setf.exp F_2M64 = R_TMP03
+ nop.f 0
+ // index = (0x7-exponent)|b1 b2.. b6
+ extr.u R_INDEX = R_DBL_S, 46, 9
}
-;;
-{ .mmi
- ld8 r40 = [r40]
- nop.m 999
- nop.i 999
+{.mfi
+ // get t = sign|exponent|b1 b2.. b6 1 x.. x
+ or R_T = R_DBL_S, R_TMP4
+ nop.f 0
+ // R_TMP4 = 2^45-1
+ sub R_TMP4 = R_TMP4, r0, 1;;
}
-;;
+{.mfi
+ // get t = sign|exponent|b1 b2.. b6 1 0.. 0
+ andcm R_T = R_T, R_TMP4
+ nop.f 0
+ // eliminate sign from R_DBL_S (shift left by 1)
+ shl R_TMP3 = R_DBL_S, 1
+}
-// Load the constants
+{.mfi
+ // R_BIAS = 3*2^6
+ mov R_BIAS = 0xc0
+ nop.f 0
+ // eliminate sign from R_EXP
+ andcm R_EXP0 = R_EXP, R_SGNMASK;;
+}
-{ .mmi
-(p0) ldfe asin_A10 = [r40],16 ;;
-(p0) ldfe asin_A9 = [r40],16
- nop.i 999 ;;
+
+
+{.mfi
+ // load start address for T_table
+ ld8 r2 = [r2]
+ nop.f 0
+ // p8 = 1 if |s|> = sqrt(2)/2
+ cmp.geu p8, p0 = R_TMP3, R_TMP2
}
-{ .mmi
-(p0) ldfe asin_A8 = [r40],16 ;;
-(p0) ldfe asin_A7 = [r40],16
- nop.i 999 ;;
+{.mlx
+ // p7 = 1 if |s|<2^{-4} (exponent of s<bias-4)
+ cmp.lt p7, p0 = R_EXP0, R_TMP
+ // sqrt coefficient cs8 = -33*13/128
+ movl R_TMP2 = 0xc0568000;;
}
-{ .mmi
-(p0) ldfe asin_A6 = [r40],16 ;;
-(p0) getf.exp asin_GR_signexp_f8 = asin_NORM_f8
- nop.i 999
+
+
+{.mbb
+ // load t in FP register
+ setf.d F_T = R_T
+ // if |s|<2^{-4}, take alternate path
+ (p7) br.cond.spnt SMALL_S
+ // if |s|> = sqrt(2)/2, take alternate path
+ (p8) br.cond.sptk LARGE_S
}
-{ .mmi
-(p0) ldfe asin_A5 = [r40],16 ;;
-(p0) ldfe asin_A4 = [r40],16
- nop.i 999 ;;
+{.mlx
+ // index = (4-exponent)|b1 b2.. b6
+ sub R_INDEX = R_INDEX, R_BIAS
+ // sqrt coefficient cs9 = 55*13/128
+ movl R_TMP = 0x40b2c000;;
}
-{ .mfi
- nop.m 999
-(p0) fmerge.s asin_ABS_NORM_f8 = f0, asin_NORM_f8
-(p0) and asin_GR_exp = asin_GR_signexp_f8, asin_GR_17_ones ;;
+
+{.mfi
+ // sqrt coefficient cs8 = -33*13/128
+ setf.s F_CS8 = R_TMP2
+ nop.f 0
+ // shift R_INDEX by 5
+ shl R_INDEX = R_INDEX, 5
+}
+
+{.mfi
+ // sqrt coefficient cs3 = 0.5 (set exponent = bias-1)
+ mov R_TMP4 = 0xffff - 1
+ nop.f 0
+ // sqrt coefficient cs6 = -21/16
+ mov R_TMP6 = 0xbfa8;;
}
-// case 1: |x| < 2^-40 ==> p6 (includes x = +-0)
-// case 2: 2^-40 <= |x| < 2^-2 ==> p8
-// case 3: 2^-2 <= |x| < 1 ==> p9
-// case 4: 1 <= |x| ==> p11
-// In case 4, we pick up the special case x = +-1 and return +-pi/2
-{ .mii
-(p0) ldfe asin_A3 = [r40],16
-(p0) sub asin_GR_true_exp = asin_GR_exp, asin_GR_16_ones ;;
-(p0) cmp.ge.unc p6, p7 = -41, asin_GR_true_exp ;;
+{.mlx
+ // table index
+ add r2 = r2, R_INDEX
+ // sqrt coefficient cs7 = 33/16
+ movl R_TMP2 = 0x40040000;;
}
-{ .mii
-(p0) ldfe asin_A2 = [r40],16
-(p7) cmp.ge.unc p8, p9 = -3, asin_GR_true_exp ;;
-(p9) cmp.ge.unc p10, p11 = -1, asin_GR_true_exp
+
+{.mmi
+ // load cs9 = 55*13/128
+ setf.s F_CS9 = R_TMP
+ // sqrt coefficient cs5 = 7/8
+ mov R_TMP3 = 0x3f60
+ // sqrt coefficient cs6 = 21/16
+ shl R_TMP6 = R_TMP6, 16;;
}
-{ .mmi
-(p0) ldfe asin_A1 = [r40],16 ;;
-(p0) ldfe asin_pi_by_2 = [r40],16
- nop.i 999
+
+{.mmi
+ // load significand of 1/(1-t^2)
+ ldf8 F_INV_1T2 = [r2], 8
+ // sqrt coefficient cs7 = 33/16
+ setf.s F_CS7 = R_TMP2
+ // sqrt coefficient cs4 = -5/8
+ mov R_TMP5 = 0xbf20;;
}
-// case 4: |x| >= 1
-{ .mib
- nop.m 999
- nop.i 999
-(p11) br.spnt L(ASIN_ERROR_RETURN) ;;
+
+{.mmi
+ // load significand of sqrt(1-t^2)
+ ldf8 F_SQRT_1T2 = [r2], 8
+ // sqrt coefficient cs6 = 21/16
+ setf.s F_CS6 = R_TMP6
+ // sqrt coefficient cs5 = 7/8
+ shl R_TMP3 = R_TMP3, 16;;
}
-// case 1: |x| < 2^-40
-{ .mfb
- nop.m 999
-(p6) fma.s0 f8 = asin_2m100,f8,f8
-(p6) br.ret.spnt b0 ;;
+
+{.mmi
+ // sqrt coefficient cs3 = 0.5 (set exponent = bias-1)
+ setf.exp F_CS3 = R_TMP4
+ // r3 = pointer to polynomial coefficients
+ addl r3 = @ltoff(poly_coeffs), gp
+ // sqrt coefficient cs4 = -5/8
+ shl R_TMP5 = R_TMP5, 16;;
}
-// case 2: 2^-40 <= |x| < 2^-2 ==> p8
-{ .mfi
- nop.m 999
-(p8) fma.s1 asin_X2 = f8,f8, f0
- nop.i 999 ;;
+{.mfi
+ // sqrt coefficient cs5 = 7/8
+ setf.s F_CS5 = R_TMP3
+ // d = s-t
+ fms.s1 F_D = f8, f1, F_T
+ // set p6 = 1 if s<0, p11 = 1 if s> = 0
+ cmp.ge p6, p11 = R_EXP, R_DBL_S
}
-{ .mfi
- nop.m 999
-(p8) fma.s1 asin_X4 = asin_X2,asin_X2, f0
- nop.i 999 ;;
+{.mfi
+ // r3 = load start address to polynomial coefficients
+ ld8 r3 = [r3]
+ // s+t
+ fma.s1 F_S2T2 = f8, f1, F_T
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p8) fma.s1 asin_P810 = asin_X4, asin_A10, asin_A8
- nop.i 999
+
+{.mfi
+ // sqrt coefficient cs4 = -5/8
+ setf.s F_CS4 = R_TMP5
+ // s^2-t^2
+ fma.s1 F_S2T2 = F_S2T2, F_D, f0
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p8) fma.s1 asin_P79 = asin_X4, asin_A9, asin_A7
- nop.i 999 ;;
+
+{.mfi
+ // load C3
+ ldfe F_C3 = [r3], 16
+ // 0.5/(1-t^2) = 2^{-64}*(2^63/(1-t^2))
+ fma.s1 F_INV_1T2 = F_INV_1T2, F_2M64, f0
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p8) fma.s1 asin_P610 = asin_X4, asin_P810, asin_A6
- nop.i 999
+{.mfi
+ // load C_5
+ ldfe F_C5 = [r3], 16
+ // set correct exponent for sqrt(1-t^2)
+ fma.s1 F_SQRT_1T2 = F_SQRT_1T2, F_2M64, f0
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p8) fma.s1 asin_P59 = asin_X4, asin_P79, asin_A5
- nop.i 999 ;;
+
+{.mfi
+ // load C_7, C_9
+ ldfpd F_C7, F_C9 = [r3]
+ // x = -(s^2-t^2)/(1-t^2)/2
+ fnma.s1 F_X = F_INV_1T2, F_S2T2, f0
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p8) fma.s1 asin_P410 = asin_X4, asin_P610, asin_A4
- nop.i 999
+
+{.mfi
+ // load asin(t)_high, asin(t)_low
+ ldfpd F_ATHI, F_ATLO = [r2]
+ // t*sqrt(1-t^2)
+ fma.s1 F_TSQRT = F_T, F_SQRT_1T2, f0
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p8) fma.s1 asin_P39 = asin_X4, asin_P59, asin_A3
- nop.i 999 ;;
+
+{.mfi
+ nop.m 0
+ // cs9*x+cs8
+ fma.s1 F_S89 = F_CS9, F_X, F_CS8
+ nop.i 0
}
-{ .mfi
- nop.m 999
-(p8) fma.s1 asin_P210 = asin_X4, asin_P410, asin_A2
- nop.i 999
+{.mfi
+ nop.m 0
+ // cs7*x+cs6
+ fma.s1 F_S67 = F_CS7, F_X, F_CS6
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p8) fma.s1 asin_P19 = asin_X4, asin_P39, asin_A1
- nop.i 999 ;;
+{.mfi
+ nop.m 0
+ // cs5*x+cs4
+ fma.s1 F_S45 = F_CS5, F_X, F_CS4
+ nop.i 0
}
-{ .mfi
- nop.m 999
-(p8) fma.s1 asin_P1P2 = asin_X2, asin_P210, asin_P19
- nop.i 999 ;;
+{.mfi
+ nop.m 0
+ // x*x
+ fma.s1 F_X2 = F_X, F_X, f0
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p8) fma.s1 asin_P1P2 = asin_X2, asin_P1P2, f0
- nop.i 999 ;;
+
+{.mfi
+ nop.m 0
+ // (s-t)-t*x
+ fnma.s1 F_DTX = F_T, F_X, F_D
+ nop.i 0
}
-{ .mfb
- nop.m 999
-(p8) fma.s0 f8 = asin_NORM_f8, asin_P1P2, asin_NORM_f8
-(p8) br.ret.spnt b0 ;;
+{.mfi
+ nop.m 0
+ // cs3*x+cs2 (cs2 = -0.5 = -cs3)
+ fms.s1 F_S23 = F_CS3, F_X, F_CS3
+ nop.i 0;;
}
-// case 3: 2^-2 <= |x| < 1
-// 1- X*X is computed as B + b
-// Step 1.1: Get B and b
-// atan2 will return
-// f8 = Z_hi
-// f10 = Z_lo
-// f11 = s_lo
+{.mfi
+ nop.m 0
+ // cs9*x^3+cs8*x^2+cs7*x+cs6
+ fma.s1 F_S69 = F_S89, F_X2, F_S67
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // x^4
+ fma.s1 F_X4 = F_X2, F_X2, f0
+ nop.i 0;;
+}
-{ .mfi
-(p0) mov asin_GR_fffe = 0xfffe
-(p0) fmerge.se f8 = asin_ABS_NORM_f8, asin_ABS_NORM_f8
-nop.i 0
-};;
-{ .mmf
-nop.m 0
-(p0) setf.exp asin_HALF = asin_GR_fffe
-(p0) fmerge.se f12 = asin_NORM_f8, asin_NORM_f8 ;;
+{.mfi
+ nop.m 0
+ // t*sqrt(1-t^2)*x^2
+ fma.s1 F_TSQRT = F_TSQRT, F_X2, f0
+ nop.i 0
}
+{.mfi
+ nop.m 0
+ // cs5*x^3+cs4*x^2+cs3*x+cs2
+ fma.s1 F_S25 = F_S45, F_X2, F_S23
+ nop.i 0;;
+}
-{ .mfi
- nop.m 999
-(p0) fcmp.lt.unc.s1 p6,p7 = asin_ABS_NORM_f8, asin_HALF
- nop.i 999 ;;
+
+{.mfi
+ nop.m 0
+ // ((s-t)-t*x)*sqrt(1-t^2)
+ fma.s1 F_DTX = F_DTX, F_SQRT_1T2, f0
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p7) fma.s1 asin_D = f1,f1,asin_ABS_NORM_f8
- nop.i 999
+
+{.mfi
+ nop.m 0
+ // if sign is negative, negate table values: asin(t)_low
+ (p6) fnma.s1 F_ATLO = F_ATLO, f1, f0
+ nop.i 0
}
-{ .mfi
- nop.m 999
-(p7) fms.s1 asin_C = f1,f1,asin_ABS_NORM_f8
- nop.i 999 ;;
+{.mfi
+ nop.m 0
+ // PS29 = cs9*x^7+..+cs5*x^3+cs4*x^2+cs3*x+cs2
+ fma.s1 F_S29 = F_S69, F_X4, F_S25
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p7) fma.s1 asin_B = asin_C, asin_D, f0
- nop.i 999
+
+{.mfi
+ nop.m 0
+ // if sign is negative, negate table values: asin(t)_high
+ (p6) fnma.s1 F_ATHI = F_ATHI, f1, f0
+ nop.i 0
}
-{ .mfi
- nop.m 999
-(p7) fms.s1 asin_1mD = f1,f1,asin_D
- nop.i 999 ;;
+{.mfi
+ nop.m 0
+ // R = ((s-t)-t*x)*sqrt(1-t^2)-t*sqrt(1-t^2)*x^2*PS29
+ fnma.s1 F_R = F_S29, F_TSQRT, F_DTX
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p7) fma.s1 asin_Dd = asin_1mD,f1, asin_ABS_NORM_f8
- nop.i 999
+
+{.mfi
+ nop.m 0
+ // R^2
+ fma.s1 F_R2 = F_R, F_R, f0
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p7) fms.s1 asin_Bb = asin_C, asin_D, asin_B
- nop.i 999 ;;
+
+{.mfi
+ nop.m 0
+ // c7+c9*R^2
+ fma.s1 F_P79 = F_C9, F_R2, F_C7
+ nop.i 0
}
-{ .mfi
- nop.m 999
-(p7) fma.s1 asin_Bb = asin_C, asin_Dd, asin_Bb
- nop.i 999
+{.mfi
+ nop.m 0
+ // c3+c5*R^2
+ fma.s1 F_P35 = F_C5, F_R2, F_C3
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p6) fma.s1 asin_C = asin_ABS_NORM_f8, asin_ABS_NORM_f8, f0
- nop.i 999 ;;
+{.mfi
+ nop.m 0
+ // R^3
+ fma.s1 F_R4 = F_R2, F_R2, f0
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p6) fms.s1 asin_B = f1, f1, asin_C
- nop.i 999
+{.mfi
+ nop.m 0
+ // R^3
+ fma.s1 F_R3 = F_R2, F_R, f0
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p6) fms.s1 asin_Cc = asin_ABS_NORM_f8, asin_ABS_NORM_f8, asin_C
- nop.i 999 ;;
+
+
+{.mfi
+ nop.m 0
+ // c3+c5*R^2+c7*R^4+c9*R^6
+ fma.s1 F_P39 = F_P79, F_R4, F_P35
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 asin_Hh = asin_HALF, asin_B, f0
- nop.i 999
+
+{.mfi
+ nop.m 0
+ // asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
+ fma.s1 F_P39 = F_P39, F_R3, F_ATLO
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p6) fms.s1 asin_1mB = f1, f1, asin_B
- nop.i 999 ;;
+
+{.mfi
+ nop.m 0
+ // R+asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
+ fma.s1 F_P39 = F_P39, f1, F_R
+ nop.i 0;;
}
-// Step 1.2:
-// sqrt(B + b) is computed as W + w
-// Get W
-{ .mfi
- nop.m 999
-(p0) frsqrta.s1 asin_y0,p8 = asin_B
- nop.i 999 ;;
+{.mfb
+ nop.m 0
+ // result = asin(t)_high+R+asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
+ fma.s0 f8 = F_ATHI, f1, F_P39
+ // return
+ br.ret.sptk b0;;
}
-{ .mfi
- nop.m 999
-(p6) fms.s1 asin_1mBmC = asin_1mB, f1, asin_C
- nop.i 999 ;;
+
+
+
+LARGE_S:
+
+{.mfi
+ // bias-1
+ mov R_TMP3 = 0xffff - 1
+ // y ~ 1/sqrt(1-s^2)
+ frsqrta.s1 F_Y, p7 = F_1S2
+ // c9 = 55*13*17/128
+ mov R_TMP4 = 0x10af7b
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 asin_t1 = asin_y0, asin_y0, f0
- nop.i 999 ;;
+{.mlx
+ // c8 = -33*13*15/128
+ mov R_TMP5 = 0x184923
+ movl R_TMP2 = 0xff00000000000000;;
}
-{ .mfi
- nop.m 999
-(p6) fms.s1 asin_Bb = asin_1mBmC, f1, asin_Cc
- nop.i 999 ;;
+{.mfi
+ // set p6 = 1 if s<0, p11 = 1 if s>0
+ cmp.ge p6, p11 = R_EXP, R_DBL_S
+ // 1-s^2
+ fnma.s1 F_1S2 = f8, f8, f1
+ // set p9 = 1
+ cmp.eq p9, p0 = r0, r0;;
}
-{ .mfi
- nop.m 999
-(p0) fnma.s1 asin_t2 = asin_t1, asin_Hh, asin_HALF
- nop.i 999 ;;
+
+{.mfi
+ // load 0.5
+ setf.exp F_05 = R_TMP3
+ // (1-s^2) rounded to single precision
+ fnma.s.s1 F_1S2_S = f8, f8, f1
+ // c9 = 55*13*17/128
+ shl R_TMP4 = R_TMP4, 10
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 asin_y1 = asin_t2, asin_y0, asin_y0
- nop.i 999 ;;
+{.mlx
+ // AND mask for getting t ~ sqrt(1-s^2)
+ setf.sig F_ANDMASK = R_TMP2
+ // OR mask
+ movl R_TMP2 = 0x0100000000000000;;
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 asin_t3 = asin_y1, asin_Hh, f0
- nop.i 999 ;;
+
+{.mfi
+ nop.m 0
+ // (s^2)_s
+ fma.s.s1 F_S2 = f8, f8, f0
+ nop.i 0;;
}
-{ .mfi
- nop.m 999
-(p0) fnma.s1 asin_t4 = asin_t3, asin_y1, asin_HALF
- nop.i 999 ;;
+
+{.mmi
+ // c9 = 55*13*17/128
+ setf.s F_CS9 = R_TMP4
+ // c7 = 33*13/16
+ mov R_TMP4 = 0x41d68
+ // c8 = -33*13*15/128
+ shl R_TMP5 = R_TMP5, 11;;
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 asin_y2 = asin_t4, asin_y1, asin_y1
- nop.i 999 ;;
+
+{.mfi
+ setf.sig F_ORMASK = R_TMP2
+ // y^2
+ fma.s1 F_Y2 = F_Y, F_Y, f0
+ // c7 = 33*13/16
+ shl R_TMP4 = R_TMP4, 12
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 asin_S = asin_B, asin_y2, f0
- nop.i 999
+{.mfi
+ // c6 = -33*7/16
+ mov R_TMP6 = 0xc1670
+ // y' ~ sqrt(1-s^2)
+ fma.s1 F_T1 = F_Y, F_1S2, f0
+ // c5 = 63/8
+ mov R_TMP7 = 0x40fc;;
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 asin_H = asin_y2, asin_HALF, f0
- nop.i 999 ;;
+
+{.mlx
+ // load c8 = -33*13*15/128
+ setf.s F_CS8 = R_TMP5
+ // c4 = -35/8
+ movl R_TMP5 = 0xc08c0000;;
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 asin_t5 = asin_Hh, asin_y2, f0
- nop.i 999 ;;
+{.mfi
+ // r3 = pointer to polynomial coefficients
+ addl r3 = @ltoff(poly_coeffs), gp
+ // 1-(1-s^2)_s
+ fnma.s1 F_DS = F_1S2_S, f1, f1
+ // p9 = 0 if p7 = 1 (p9 = 1 for special cases only)
+ (p7) cmp.ne p9, p0 = r0, r0
}
-{ .mfi
- nop.m 999
-(p0) fnma.s1 asin_Dd = asin_S, asin_S, asin_B
- nop.i 999 ;;
+{.mlx
+ // load c7 = 33*13/16
+ setf.s F_CS7 = R_TMP4
+ // c3 = 5/2
+ movl R_TMP4 = 0x40200000;;
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 asin_W = asin_Dd, asin_H, asin_S
- nop.i 999 ;;
+
+{.mfi
+ nop.m 0
+ // 1-(s^2)_s
+ fnma.s1 F_S_1S2S = F_S2, f1, f1
+ nop.i 0
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 asin_2W = asin_W, f1, asin_W
- nop.i 999
+{.mlx
+ // load c4 = -35/8
+ setf.s F_CS4 = R_TMP5
+ // c2 = -3/2
+ movl R_TMP5 = 0xbfc00000;;
}
-// Step 1.3
-// Get w
-{ .mfi
- nop.m 999
-(p0) fnma.s1 asin_BmWW = asin_W, asin_W, asin_B
- nop.i 999 ;;
+
+{.mfi
+ // load c3 = 5/2
+ setf.s F_CS3 = R_TMP4
+ // x = (1-s^2)_s*y^2-1
+ fms.s1 F_X = F_1S2_S, F_Y2, f1
+ // c6 = -33*7/16
+ shl R_TMP6 = R_TMP6, 12
}
-// Step 2
-// asin(x) = atan2(X,sqrt(1-X*X))
-// = atan2(X, W) -Xw
-// corr = Xw
-// asin(x) = Z_hi + (s_lo*Z_lo - corr)
-// Call atan2(X, W)
-// Save W in f9
-// Save X in f12
-// Save w in f13
+{.mfi
+ nop.m 0
+ // y^2/2
+ fma.s1 F_Y2_2 = F_Y2, F_05, f0
+ nop.i 0;;
+}
-{ .mfi
- nop.m 999
-(p0) fmerge.se f9 = asin_W, asin_W
- nop.i 999 ;;
+
+{.mfi
+ // load c6 = -33*7/16
+ setf.s F_CS6 = R_TMP6
+ // eliminate lower bits from y'
+ fand F_T = F_T1, F_ANDMASK
+ // c5 = 63/8
+ shl R_TMP7 = R_TMP7, 16
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 asin_BmWWpb = asin_BmWW, f1, asin_Bb
- nop.i 999 ;;
+{.mfb
+ // r3 = load start address to polynomial coefficients
+ ld8 r3 = [r3]
+ // 1-(1-s^2)_s-s^2
+ fnma.s1 F_DS = f8, f8, F_DS
+ // p9 = 1 if s is a special input (NaN, or |s|> = 1)
+ (p9) br.cond.spnt ASINL_SPECIAL_CASES;;
}
-{ .mfi
- nop.m 999
-(p0) frcpa.s1 asin_1d2W,p9 = f1, asin_2W
- nop.i 999 ;;
+{.mmf
+ // get exponent, significand of y' (in single prec.)
+ getf.s R_TMP = F_T1
+ // load c3 = -3/2
+ setf.s F_CS2 = R_TMP5
+ // y*(1-s^2)
+ fma.s1 F_Y1S2 = F_Y, F_1S2, f0;;
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 asin_Ww = asin_BmWWpb, asin_1d2W, f0
- nop.i 999 ;;
+
+{.mfi
+ nop.m 0
+ // x' = (y^2/2)*(1-(s^2)_s)-0.5
+ fms.s1 F_XL = F_Y2_2, F_S_1S2S, F_05
+ nop.i 0
}
-.endp asinl
-ASM_SIZE_DIRECTIVE(asinl)
-.proc __libm_callout
-__libm_callout:
-.prologue
-{ .mfi
- nop.m 0
- nop.f 0
-.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
-};;
-{ .mfi
- mov GR_SAVE_GP=gp // Save gp
- nop.f 0
-.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0 // Save b0
+{.mfi
+ nop.m 0
+ // s^2-(s^2)_s
+ fms.s1 F_S_DS2 = f8, f8, F_S2
+ nop.i 0;;
}
-.body
+
+
+{.mfi
+ nop.m 0
+ // if s<0, set s = -s
+ (p6) fnma.s1 f8 = f8, f1, f0
+ nop.i 0;;
+}
+
+{.mfi
+ // load c5 = 63/8
+ setf.s F_CS5 = R_TMP7
+ // x = (1-s^2)_s*y^2-1+(1-(1-s^2)_s-s^2)*y^2
+ fma.s1 F_X = F_DS, F_Y2, F_X
+ // for t = 2^k*1.b1 b2.., get 7-k|b1.. b6
+ extr.u R_INDEX = R_TMP, 17, 9;;
+}
+
+
+{.mmi
+ // index = (4-exponent)|b1 b2.. b6
+ sub R_INDEX = R_INDEX, R_BIAS
+ nop.m 0
+ // get exponent of y
+ shr.u R_TMP2 = R_TMP, 23;;
+}
+
+{.mmi
+ // load C3
+ ldfe F_C3 = [r3], 16
+ // set p8 = 1 if y'<2^{-4}
+ cmp.gt p8, p0 = 0x7b, R_TMP2
+ // shift R_INDEX by 5
+ shl R_INDEX = R_INDEX, 5;;
+}
+
+
{.mfb
- nop.m 0
-(p0) fmerge.se f13 = asin_Ww, asin_Ww
-(p0) br.call.sptk.many b0=__libm_atan2_reg#
-};;
-{ .mfi
- mov gp = GR_SAVE_GP // Restore gp
-(p0) fma.s1 asin_XWw = asin_ABS_NORM_f8,f13,f0
- mov b0 = GR_SAVE_B0 // Restore return address
-};;
-// asin_XWw = Xw = corr
-// asin_low = (s_lo * Z_lo - corr)
-// f8 = Z_hi + (s_lo * Z_lo - corr)
+ // get table index for sqrt(1-t^2)
+ add r2 = r2, R_INDEX
+ // get t = 2^k*1.b1 b2.. b7 1
+ for F_T = F_T, F_ORMASK
+ (p8) br.cond.spnt VERY_LARGE_INPUT;;
+}
-{ .mfi
- nop.m 999
-(p0) fms.s1 asin_low = f11, f10, asin_XWw
- mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
-};;
-{ .mfi
- nop.m 999
-(p0) fma.s0 f8 = f8, f1, asin_low
- nop.i 999 ;;
+
+{.mmf
+ // load C5
+ ldfe F_C5 = [r3], 16
+ // load 1/(1-t^2)
+ ldfp8 F_INV_1T2, F_SQRT_1T2 = [r2], 16
+ // x = ((1-s^2)*y^2-1)/2
+ fma.s1 F_X = F_X, F_05, f0;;
}
-{ .mfb
- nop.m 999
-(p0) fmerge.s f8 = f12,f8
-(p0) br.ret.sptk b0 ;;
+
+
+{.mmf
+ nop.m 0
+ // C7, C9
+ ldfpd F_C7, F_C9 = [r3], 16
+ // set correct exponent for t
+ fmerge.se F_T = F_T1, F_T;;
}
-.endp __libm_callout
-ASM_SIZE_DIRECTIVE(__libm_callout)
-.proc SPECIAL
-SPECIAL:
-L(ASIN_ERROR_RETURN):
-// If X is 1, return (sign of X)pi/2
-{ .mfi
- nop.m 999
-(p0) fcmp.eq.unc p6,p7 = asin_ABS_NORM_f8,f1
- nop.i 999 ;;
+{.mfi
+ // pi/2 (low, high)
+ ldfpd F_PI2_LO, F_PI2_HI = [r3]
+ // c9*x+c8
+ fma.s1 F_S89 = F_X, F_CS9, F_CS8
+ nop.i 0
}
-{ .mfb
-(p6) ldfe asin_pi_by_2_lo = [r40]
-(p6) fmerge.s asin_pi_by_2 = f8,asin_pi_by_2
- nop.b 0;;
+{.mfi
+ nop.m 0
+ // x^2
+ fma.s1 F_X2 = F_X, F_X, f0
+ nop.i 0;;
}
-// If X is a NAN, leave
-// qnan snan inf norm unorm 0 -+
-// 1 1 0 0 0 0 11
-{ .mfb
- nop.m 999
-(p6) fma.s0 f8 = f8,asin_pi_by_2_lo,asin_pi_by_2
-(p6) br.ret.spnt b0
+
+{.mfi
+ nop.m 0
+ // y*(1-s^2)*x
+ fma.s1 F_Y1S2X = F_Y1S2, F_X, f0
+ nop.i 0
}
-{ .mfi
- nop.m 999
-(p0) fclass.m.unc p12,p0 = f8, 0xc3
- nop.i 999 ;;
+
+{.mfi
+ nop.m 0
+ // c7*x+c6
+ fma.s1 F_S67 = F_X, F_CS7, F_CS6
+ nop.i 0;;
}
-{ .mfb
- nop.m 999
-(p12) fma.s0 f8 = f8,f1,f0
-(p12) br.ret.spnt b0 ;;
+
+{.mfi
+ nop.m 0
+ // 1-x
+ fnma.s1 F_1X = F_X, f1, f1
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c3*x+c2
+ fma.s1 F_S23 = F_X, F_CS3, F_CS2
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // 1-t^2
+ fnma.s1 F_1T2 = F_T, F_T, f1
+ nop.i 0
+}
+
+{.mfi
+ // load asin(t)_high, asin(t)_low
+ ldfpd F_ATHI, F_ATLO = [r2]
+ // c5*x+c4
+ fma.s1 F_S45 = F_X, F_CS5, F_CS4
+ nop.i 0;;
+}
+
+
+
+{.mfi
+ nop.m 0
+ // t*s
+ fma.s1 F_TS = F_T, f8, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // 0.5/(1-t^2)
+ fma.s1 F_INV_1T2 = F_INV_1T2, F_2M64, f0
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // z~sqrt(1-t^2), rounded to 24 significant bits
+ fma.s.s1 F_Z = F_SQRT_1T2, F_2M64, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // sqrt(1-t^2)
+ fma.s1 F_SQRT_1T2 = F_SQRT_1T2, F_2M64, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // y*(1-s^2)*x^2
+ fma.s1 F_Y1S2X2 = F_Y1S2, F_X2, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // x^4
+ fma.s1 F_X4 = F_X2, F_X2, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // s*t rounded to 24 significant bits
+ fma.s.s1 F_TSS = F_T, f8, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c9*x^3+..+c6
+ fma.s1 F_S69 = F_X2, F_S89, F_S67
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // ST = (t^2-1+s^2) rounded to 24 significant bits
+ fms.s.s1 F_ST = f8, f8, F_1T2
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c5*x^3+..+c2
+ fma.s1 F_S25 = F_X2, F_S45, F_S23
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // 0.25/(1-t^2)
+ fma.s1 F_INV1T2_2 = F_05, F_INV_1T2, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // t*s-sqrt(1-t^2)*(1-s^2)*y
+ fnma.s1 F_TS = F_Y1S2, F_SQRT_1T2, F_TS
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // z*0.5/(1-t^2)
+ fma.s1 F_ZE = F_INV_1T2, F_SQRT_1T2, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // z^2+t^2-1
+ fms.s1 F_DZ0 = F_Z, F_Z, F_1T2
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (1-s^2-(1-s^2)_s)*x
+ fma.s1 F_DS2X = F_X, F_DS, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // t*s-(t*s)_s
+ fms.s1 F_DTS = F_T, f8, F_TSS
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c9*x^7+..+c2
+ fma.s1 F_S29 = F_X4, F_S69, F_S25
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // y*z
+ fma.s1 F_YZ = F_Z, F_Y, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // t^2
+ fma.s1 F_T2 = F_T, F_T, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // 1-t^2+ST
+ fma.s1 F_1T2_ST = F_ST, f1, F_1T2
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // y*(1-s^2)(1-x)
+ fma.s1 F_Y1S2_1X = F_Y1S2, F_1X, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // dz ~ sqrt(1-t^2)-z
+ fma.s1 F_DZ = F_DZ0, F_ZE, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // -1+correction for sqrt(1-t^2)-z
+ fnma.s1 F_CORR = F_INV1T2_2, F_DZ0, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (PS29*x^2+x)*y*(1-s^2)
+ fma.s1 F_S19 = F_Y1S2X2, F_S29, F_Y1S2X
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // z*y*(1-s^2)_s
+ fma.s1 F_ZY1S2S = F_YZ, F_1S2_S, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // s^2-(1-t^2+ST)
+ fms.s1 F_1T2_ST = f8, f8, F_1T2_ST
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (t*s-(t*s)_s)+z*y*(1-s^2-(1-s^2)_s)*x
+ fma.s1 F_DTS = F_YZ, F_DS2X, F_DTS
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // dz*y*(1-s^2)*(1-x)
+ fma.s1 F_DZ_TERM = F_DZ, F_Y1S2_1X, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // R = t*s-sqrt(1-t^2)*(1-s^2)*y+sqrt(1-t^2)*(1-s^2)*y*PS19
+ // (used for polynomial evaluation)
+ fma.s1 F_R = F_S19, F_SQRT_1T2, F_TS
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (PS29*x^2)*y*(1-s^2)
+ fma.s1 F_S29 = F_Y1S2X2, F_S29, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // apply correction to dz*y*(1-s^2)*(1-x)
+ fma.s1 F_DZ_TERM = F_DZ_TERM, F_CORR, F_DZ_TERM
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // R^2
+ fma.s1 F_R2 = F_R, F_R, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (t*s-(t*s)_s)+z*y*(1-s^2-(1-s^2)_s)*x+dz*y*(1-s^2)*(1-x)
+ fma.s1 F_DZ_TERM = F_DZ_TERM, f1, F_DTS
+ nop.i 0;;
}
-{ .mfi
-(p0) mov GR_Parameter_TAG = 60
-(p0) frcpa f10, p6 = f0, f0
-nop.i 0
-};;
-.endp SPECIAL
-ASM_SIZE_DIRECTIVE(SPECIAL)
-.proc __libm_error_region
-__libm_error_region:
+
+{.mfi
+ nop.m 0
+ // c7+c9*R^2
+ fma.s1 F_P79 = F_C9, F_R2, F_C7
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c3+c5*R^2
+ fma.s1 F_P35 = F_C5, F_R2, F_C3
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // asin(t)_low-(pi/2)_low
+ fms.s1 F_ATLO = F_ATLO, f1, F_PI2_LO
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // R^4
+ fma.s1 F_R4 = F_R2, F_R2, f0
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // R^3
+ fma.s1 F_R3 = F_R2, F_R, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (t*s)_s-t^2*y*z
+ fnma.s1 F_TSS = F_T2, F_YZ, F_TSS
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // d(ts)+z*y*d(1-s^2)*x+dz*y*(1-s^2)*(1-x)+z*y*(s^2-1+t^2-ST)
+ fma.s1 F_DZ_TERM = F_YZ, F_1T2_ST, F_DZ_TERM
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (pi/2)_hi-asin(t)_hi
+ fms.s1 F_ATHI = F_PI2_HI, f1, F_ATHI
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c3+c5*R^2+c7*R^4+c9*R^6
+ fma.s1 F_P39 = F_P79, F_R4, F_P35
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // d(ts)+z*y*d(1-s^2)*x+dz*y*(1-s^2)*(1-x)+z*y*(s^2-1+t^2-ST)+
+ // + sqrt(1-t^2)*y*(1-s^2)*x^2*PS29
+ fma.s1 F_DZ_TERM = F_SQRT_1T2, F_S29, F_DZ_TERM
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (t*s)_s-t^2*y*z+z*y*ST
+ fma.s1 F_TSS = F_YZ, F_ST, F_TSS
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // -asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
+ fms.s1 F_P39 = F_P39, F_R3, F_ATLO
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // if s<0, change sign of F_ATHI
+ (p6) fnma.s1 F_ATHI = F_ATHI, f1, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // d(ts)+z*y*d(1-s^2)*x+dz*y*(1-s^2)*(1-x)+z*y*(s^2-1+t^2-ST) +
+ // + sqrt(1-t^2)*y*(1-s^2)*x^2*PS29 +
+ // - asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
+ fma.s1 F_DZ_TERM = F_P39, f1, F_DZ_TERM
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // d(ts)+z*y*d(1-s^2)*x+dz*y*(1-s^2)*(1-x)+z*y*(s^2-1+t^2-ST) +
+ // + sqrt(1-t^2)*y*(1-s^2)*x^2*PS29 + z*y*(1-s^2)_s*x +
+ // - asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
+ fma.s1 F_DZ_TERM = F_ZY1S2S, F_X, F_DZ_TERM
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // d(ts)+z*y*d(1-s^2)*x+dz*y*(1-s^2)*(1-x)+z*y*(s^2-1+t^2-ST) +
+ // + sqrt(1-t^2)*y*(1-s^2)*x^2*PS29 + z*y*(1-s^2)_s*x +
+ // - asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6) +
+ // + (t*s)_s-t^2*y*z+z*y*ST
+ fma.s1 F_DZ_TERM = F_TSS, f1, F_DZ_TERM
+ nop.i 0;;
+}
+
+
+.pred.rel "mutex", p6, p11
+{.mfi
+ nop.m 0
+ // result: add high part of pi/2-table value
+ // s>0 in this case
+ (p11) fma.s0 f8 = F_DZ_TERM, f1, F_ATHI
+ nop.i 0
+}
+
+{.mfb
+ nop.m 0
+ // result: add high part of pi/2-table value
+ // if s<0
+ (p6) fnma.s0 f8 = F_DZ_TERM, f1, F_ATHI
+ br.ret.sptk b0;;
+}
+
+
+
+
+
+
+SMALL_S:
+
+ // use 15-term polynomial approximation
+
+{.mmi
+ // r3 = pointer to polynomial coefficients
+ addl r3 = @ltoff(poly_coeffs), gp;;
+ // load start address for coefficients
+ ld8 r3 = [r3]
+ mov R_TMP = 0x3fbf;;
+}
+
+
+{.mmi
+ add r2 = 64, r3
+ ldfe F_C3 = [r3], 16
+ // p7 = 1 if |s|<2^{-64} (exponent of s<bias-64)
+ cmp.lt p7, p0 = R_EXP0, R_TMP;;
+}
+
+{.mmf
+ ldfe F_C5 = [r3], 16
+ ldfpd F_C11, F_C13 = [r2], 16
+ // 2^{-128}
+ fma.s1 F_2M128 = F_2M64, F_2M64, f0;;
+}
+
+{.mmf
+ ldfpd F_C7, F_C9 = [r3]
+ ldfpd F_C15, F_C17 = [r2]
+ // if |s|<2^{-64}, return s+2^{-128}*s
+ (p7) fma.s0 f8 = f8, F_2M128, f8;;
+}
+
+
+
+{.mfb
+ nop.m 0
+ // s^2
+ fma.s1 F_R2 = f8, f8, f0
+ // if |s|<2^{-64}, return s
+ (p7) br.ret.spnt b0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // s^3
+ fma.s1 F_R3 = f8, F_R2, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // s^4
+ fma.s1 F_R4 = F_R2, F_R2, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c3+c5*s^2
+ fma.s1 F_P35 = F_C5, F_R2, F_C3
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c11+c13*s^2
+ fma.s1 F_P1113 = F_C13, F_R2, F_C11
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c7+c9*s^2
+ fma.s1 F_P79 = F_C9, F_R2, F_C7
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c15+c17*s^2
+ fma.s1 F_P1517 = F_C17, F_R2, F_C15
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // s^8
+ fma.s1 F_R8 = F_R4, F_R4, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c3+c5*s^2+c7*s^4+c9*s^6
+ fma.s1 F_P39 = F_P79, F_R4, F_P35
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c11+c13*s^2+c15*s^4+c17*s^6
+ fma.s1 F_P1117 = F_P1517, F_R4, F_P1113
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c3+..+c17*s^14
+ fma.s1 F_P317 = F_R8, F_P1117, F_P39
+ nop.i 0;;
+}
+
+
+{.mfb
+ nop.m 0
+ // result
+ fma.s0 f8 = F_P317, F_R3, f8
+ br.ret.sptk b0;;
+}
+
+
+{.mfb
+ nop.m 0
+ fma.s0 f8 = F_P317, F_R3, f0//F_P317, F_R3, F_S29
+ // nop.f 0//fma.s0 f8 = f13, f6, f0
+ br.ret.sptk b0;;
+}
+
+
+
+
+
+ VERY_LARGE_INPUT:
+
+{.mfi
+ nop.m 0
+ // s rounded to 24 significant bits
+ fma.s.s1 F_S = f8, f1, f0
+ nop.i 0
+}
+
+{.mfi
+ // load C5
+ ldfe F_C5 = [r3], 16
+ // x = ((1-(s^2)_s)*y^2-1)/2-(s^2-(s^2)_s)*y^2/2
+ fnma.s1 F_X = F_S_DS2, F_Y2_2, F_XL
+ nop.i 0;;
+}
+
+
+
+{.mmf
+ nop.m 0
+ // C7, C9
+ ldfpd F_C7, F_C9 = [r3], 16
+ nop.f 0;;
+}
+
+
+
+{.mfi
+ // pi/2 (low, high)
+ ldfpd F_PI2_LO, F_PI2_HI = [r3], 16
+ // c9*x+c8
+ fma.s1 F_S89 = F_X, F_CS9, F_CS8
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // x^2
+ fma.s1 F_X2 = F_X, F_X, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // y*(1-s^2)*x
+ fma.s1 F_Y1S2X = F_Y1S2, F_X, f0
+ nop.i 0
+}
+
+{.mfi
+ // C11, C13
+ ldfpd F_C11, F_C13 = [r3], 16
+ // c7*x+c6
+ fma.s1 F_S67 = F_X, F_CS7, F_CS6
+ nop.i 0;;
+}
+
+
+{.mfi
+ // C15, C17
+ ldfpd F_C15, F_C17 = [r3], 16
+ // c3*x+c2
+ fma.s1 F_S23 = F_X, F_CS3, F_CS2
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c5*x+c4
+ fma.s1 F_S45 = F_X, F_CS5, F_CS4
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (s_s)^2
+ fma.s1 F_DS = F_S, F_S, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // 1-(s_s)^2
+ fnma.s1 F_1S2_S = F_S, F_S, f1
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // y*(1-s^2)*x^2
+ fma.s1 F_Y1S2X2 = F_Y1S2, F_X2, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // x^4
+ fma.s1 F_X4 = F_X2, F_X2, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c9*x^3+..+c6
+ fma.s1 F_S69 = F_X2, F_S89, F_S67
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c5*x^3+..+c2
+ fma.s1 F_S25 = F_X2, F_S45, F_S23
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // ((s_s)^2-s^2)
+ fnma.s1 F_DS = f8, f8, F_DS
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // (pi/2)_high-y*(1-(s_s)^2)
+ fnma.s1 F_HI = F_Y, F_1S2_S, F_PI2_HI
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c9*x^7+..+c2
+ fma.s1 F_S29 = F_X4, F_S69, F_S25
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // -(y*(1-(s_s)^2))_high
+ fms.s1 F_1S2_HI = F_HI, f1, F_PI2_HI
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (PS29*x^2+x)*y*(1-s^2)
+ fma.s1 F_S19 = F_Y1S2X2, F_S29, F_Y1S2X
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // y*(1-(s_s)^2)-(y*(1-s^2))_high
+ fma.s1 F_DS2 = F_Y, F_1S2_S, F_1S2_HI
+ nop.i 0;;
+}
+
+
+
+{.mfi
+ nop.m 0
+ // R ~ sqrt(1-s^2)
+ // (used for polynomial evaluation)
+ fnma.s1 F_R = F_S19, f1, F_Y1S2
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // y*(1-s^2)-(y*(1-s^2))_high
+ fma.s1 F_DS2 = F_Y, F_DS, F_DS2
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // (pi/2)_low+(PS29*x^2)*y*(1-s^2)
+ fma.s1 F_S29 = F_Y1S2X2, F_S29, F_PI2_LO
+ nop.i 0;;
+}
+
+
+
+{.mfi
+ nop.m 0
+ // R^2
+ fma.s1 F_R2 = F_R, F_R, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (pi/2)_low+(PS29*x^2)*y*(1-s^2)-(y*(1-s^2)-(y*(1-s^2))_high)
+ fms.s1 F_S29 = F_S29, f1, F_DS2
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c7+c9*R^2
+ fma.s1 F_P79 = F_C9, F_R2, F_C7
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c3+c5*R^2
+ fma.s1 F_P35 = F_C5, F_R2, F_C3
+ nop.i 0;;
+}
+
+
+
+{.mfi
+ nop.m 0
+ // R^4
+ fma.s1 F_R4 = F_R2, F_R2, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // R^3
+ fma.s1 F_R3 = F_R2, F_R, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c11+c13*R^2
+ fma.s1 F_P1113 = F_C13, F_R2, F_C11
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c15+c17*R^2
+ fma.s1 F_P1517 = F_C17, F_R2, F_C15
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (pi/2)_low+(PS29*x^2)*y*(1-s^2)-(y*(1-s^2)-(y*(1-s^2))_high)+y*(1-s^2)*x
+ fma.s1 F_S29 = F_Y1S2, F_X, F_S29
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c11+c13*R^2+c15*R^4+c17*R^6
+ fma.s1 F_P1117 = F_P1517, F_R4, F_P1113
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c3+c5*R^2+c7*R^4+c9*R^6
+ fma.s1 F_P39 = F_P79, F_R4, F_P35
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // R^8
+ fma.s1 F_R8 = F_R4, F_R4, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c3+c5*R^2+c7*R^4+c9*R^6+..+c17*R^14
+ fma.s1 F_P317 = F_P1117, F_R8, F_P39
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (pi/2)_low-(PS29*x^2)*y*(1-s^2)-(y*(1-s^2)-
+ // -(y*(1-s^2))_high)+y*(1-s^2)*x - P3, 17
+ fnma.s1 F_S29 = F_P317, F_R3, F_S29
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // set sign
+ (p6) fnma.s1 F_S29 = F_S29, f1, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ (p6) fnma.s1 F_HI = F_HI, f1, f0
+ nop.i 0;;
+}
+
+
+{.mfb
+ nop.m 0
+ // Result:
+ // (pi/2)_low-(PS29*x^2)*y*(1-s^2)-(y*(1-s^2)-
+ // -(y*(1-s^2))_high)+y*(1-s^2)*x - P3, 17
+ // +(pi/2)_high-(y*(1-s^2))_high
+ fma.s0 f8 = F_S29, f1, F_HI
+ br.ret.sptk b0;;
+}
+
+
+
+
+
+
+
+
+
+ ASINL_SPECIAL_CASES:
+
+{.mfi
+ alloc r32 = ar.pfs, 1, 4, 4, 0
+ // check if the input is a NaN, or unsupported format
+ // (i.e. not infinity or normal/denormal)
+ fclass.nm p7, p8 = f8, 0x3f
+ // pointer to pi/2
+ add r3 = 48, r3;;
+}
+
+
+{.mfi
+ // load pi/2
+ ldfpd F_PI2_HI, F_PI2_LO = [r3]
+ // get |s|
+ fmerge.s F_S = f0, f8
+ nop.i 0
+}
+
+{.mfb
+ nop.m 0
+ // if NaN, quietize it, and return
+ (p7) fma.s0 f8 = f8, f1, f0
+ (p7) br.ret.spnt b0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // |s| = 1 ?
+ fcmp.eq.s0 p9, p0 = F_S, f1
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // load FR_X
+ fma.s1 FR_X = f8, f1, f0
+ // load error tag
+ mov GR_Parameter_TAG = 60;;
+}
+
+
+{.mfb
+ nop.m 0
+ // change sign if s = -1
+ (p6) fnma.s1 F_PI2_HI = F_PI2_HI, f1, f0
+ nop.b 0
+}
+
+{.mfb
+ nop.m 0
+ // change sign if s = -1
+ (p6) fnma.s1 F_PI2_LO = F_PI2_LO, f1, f0
+ nop.b 0;;
+}
+
+{.mfb
+ nop.m 0
+ // if s = 1, result is pi/2
+ (p9) fma.s0 f8 = F_PI2_HI, f1, F_PI2_LO
+ // return if |s| = 1
+ (p9) br.ret.sptk b0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // get Infinity
+ frcpa.s1 FR_RESULT, p0 = f1, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // return QNaN indefinite (0*Infinity)
+ fma.s0 FR_RESULT = f0, FR_RESULT, f0
+ nop.i 0;;
+}
+
+
+GLOBAL_LIBM_END(asinl)
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
+// (1)
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
@@ -742,24 +2470,29 @@ __libm_error_region:
}
{ .mfi
.fframe 64
- add sp=-64,sp // Create new stack
+ add sp=-64,sp // Create new stack
nop.f 0
- mov GR_SAVE_GP=gp // Save gp
+ mov GR_SAVE_GP=gp // Save gp
};;
+
+
+// (2)
{ .mmi
- stfe [GR_Parameter_Y] = FR_Y,16 // Store Parameter 2 on stack
- add GR_Parameter_X = 16,sp // Parameter 1 address
+ stfe [GR_Parameter_Y] = f1,16 // Store Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0 // Save b0
+ mov GR_SAVE_B0=b0 // Save b0
};;
+
.body
+// (3)
{ .mib
- stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
}
{ .mib
- stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
@@ -768,23 +2501,27 @@ __libm_error_region:
nop.m 0
add GR_Parameter_RESULT = 48,sp
};;
+
+// (4)
{ .mmi
ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
+
{ .mib
- mov gp = GR_SAVE_GP // Restore gp
+ mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
-};;
+};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#
-.type __libm_atan2_reg#,@function
-.global __libm_atan2_reg#
+
+
+
+
diff --git a/sysdeps/ia64/fpu/e_atan2.S b/sysdeps/ia64/fpu/e_atan2.S
index 38dd2f749a..8be7c6cec5 100644
--- a/sysdeps/ia64/fpu/e_atan2.S
+++ b/sysdeps/ia64/fpu/e_atan2.S
@@ -1,10 +1,10 @@
.file "atan2.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,33 +20,38 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00 Initial version
-// 4/04/00 Unwind support added
-// 8/15/00 Bundle added after call to __libm_error_support to properly
-// set [the previously overwritten] GR_Parameter_RESULT.
-// 8/17/00 Changed predicate register macro-usage to direct predicate
-// names due to an assembler bug.
-// 9/28/00 Updated to set invalid on SNaN inputs
-// 1/19/01 Fixed flags for small results
+// 02/02/00 Initial version
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 08/17/00 Changed predicate register macro-usage to direct predicate
+// names due to an assembler bug.
+// 09/28/00 Updated to set invalid on SNaN inputs
+// 01/19/01 Fixed flags for small results
+// 04/13/01 Rescheduled to make all paths faster
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 08/20/02 Corrected inexact flag and directed rounding symmetry bugs
+// 02/06/03 Reordered header: .section, .global, .proc, .align
+// 04/17/03 Added missing mutex directive
//
// API
//==============================================================
@@ -55,10 +60,12 @@
// Overview of operation
//==============================================================
//
+// The atan2 function returns values in the interval [-pi,+pi].
+//
// There are two basic paths: swap true and swap false.
// atan2(Y,X) ==> atan2(V/U) where U >= V. If Y > X, we must swap.
//
-// p6 swap True |Y| > |X|
+// p6 swap True |Y| > |X|
// p7 swap False |Y| <= |X|
// p8 X+ (If swap=True p8=p9=0)
// p9 X-
@@ -66,21 +73,21 @@
// all the other predicates p10 thru p15 are false for the main path
//
// Simple trigonometric identities show
-// Region 1 (-45 to +45 degrees):
+// Region 1 (-45 to +45 degrees):
// X>0, |Y|<=X, V=Y, U=X atan2(Y,X) = sgnY * (0 + atan(V/U))
//
-// Region 2 (-90 to -45 degrees, and +45 to +90 degrees):
+// Region 2 (-90 to -45 degrees, and +45 to +90 degrees):
// X>0, |Y|>X, V=X, U=Y atan2(Y,X) = sgnY * (pi/2 - atan(V/U))
//
-// Region 3 (-135 to -90 degrees, and +90 to +135 degrees):
+// Region 3 (-135 to -90 degrees, and +90 to +135 degrees):
// X<0, |Y|>X, V=X, U=Y atan2(Y,X) = sgnY * (pi/2 + atan(V/U))
//
-// Region 4 (-180 to -135 degrees, and +135 to +180 degrees):
+// Region 4 (-180 to -135 degrees, and +135 to +180 degrees):
// X<0, |Y|<=X, V=Y, U=X atan2(Y,X) = sgnY * (pi - atan(V/U))
//
// So the result is always of the form atan2(Y,X) = P + sgnXY * atan(V/U)
//
-// We compute atan(V/U) from the identity
+// We compute atan(V/U) from the identity
// atan(z) + atan([(V/U)-z] / [1+(V/U)z])
// where z is a limited precision approximation (16 bits) to V/U
//
@@ -124,13 +131,13 @@
// +number -0 +pi/2
// -number -0 -pi/2
//
-// +0 +number +0
-// -0 +number -0
+// +0 +number +0
+// -0 +number -0
// +0 -number +pi
// -0 -number -pi
//
-// +0 +0 +0
-// -0 +0 -0
+// +0 +0 +0
+// -0 +0 -0
// +0 -0 +pi
// -0 -0 -pi
//
@@ -138,16 +145,26 @@
// anything NaN quiet X
// atan2(+-0/+-0) sets double error tag to 37
-// atan2(+-0/+-0) sets single error tag to 38
-#include "libm_support.h"
+// Registers used
+//==============================================================
+
+// predicate registers used:
+// p6 -> p15
+
+// floating-point registers used:
+// f8, f9 input
+// f32 -> f119
+
+// general registers used
+// r32 -> r41
// Assembly macros
//==============================================================
EXP_AD_P1 = r33
EXP_AD_P2 = r34
-atan2_GR_sml_exp = r35
+rsig_near_one = r35
GR_SAVE_B0 = r35
@@ -159,22 +176,23 @@ GR_Parameter_Y = r39
GR_Parameter_RESULT = r40
atan2_GR_tag = r41
-
-atan2_X = f9
atan2_Y = f8
+atan2_X = f9
atan2_u1_X = f32
atan2_u1_Y = f33
-atan2_Umax = f34
-atan2_Vmin = f35
+atan2_z2_X = f34
+atan2_z2_Y = f35
+
atan2_two = f36
-atan2_absX = f37
+atan2_B1sq_Y = f37
atan2_z1_X = f38
atan2_z1_Y = f39
atan2_B1X = f40
+
atan2_B1Y = f41
-atan2_wp = f42
-atan2_B1sq = f43
+atan2_wp_X = f42
+atan2_B1sq_X = f43
atan2_z = f44
atan2_w = f45
@@ -183,178 +201,149 @@ atan2_P1 = f47
atan2_P2 = f48
atan2_P3 = f49
atan2_P4 = f50
+
atan2_P5 = f51
atan2_P6 = f52
atan2_P7 = f53
atan2_P8 = f54
atan2_P9 = f55
+
atan2_P10 = f56
atan2_P11 = f57
atan2_P12 = f58
atan2_P13 = f59
atan2_P14 = f60
+
atan2_P15 = f61
atan2_P16 = f62
atan2_P17 = f63
atan2_P18 = f64
atan2_P19 = f65
+
atan2_P20 = f66
atan2_P21 = f67
atan2_P22 = f68
-atan2_Pi_by_2 = f69
-
+atan2_tmp = f68
+atan2_pi_by_2 = f69
+atan2_sgn_pi_by_2 = f69
atan2_V13 = f70
+
atan2_W11 = f71
atan2_E = f72
-atan2_gamma = f73
+atan2_wp_Y = f73
atan2_V11 = f74
atan2_V12 = f75
+
atan2_V7 = f76
atan2_V8 = f77
atan2_W7 = f78
atan2_W8 = f79
atan2_W3 = f80
+
atan2_W4 = f81
atan2_V3 = f82
atan2_V4 = f83
atan2_F = f84
atan2_gV = f85
+
atan2_V10 = f86
atan2_zcub = f87
atan2_V6 = f88
atan2_V9 = f89
atan2_W10 = f90
+
atan2_W6 = f91
atan2_W2 = f92
atan2_V2 = f93
-
atan2_alpha = f94
atan2_alpha_1 = f95
+
atan2_gVF = f96
atan2_V5 = f97
atan2_W12 = f98
atan2_W5 = f99
atan2_alpha_sq = f100
+
atan2_Cp = f101
atan2_V1 = f102
-
-atan2_sml_norm = f103
-atan2_FR_tmp = f103
-
+atan2_ysq = f103
atan2_W1 = f104
atan2_alpha_cub = f105
+
atan2_C = f106
-atan2_P = f107
+atan2_xsq = f107
atan2_d = f108
atan2_A_hi = f109
atan2_dsq = f110
+
atan2_pd = f111
atan2_A_lo = f112
atan2_A = f113
-
atan2_Pp = f114
+atan2_sgnY = f115
-atan2_sgnY = f116
+atan2_sig_near_one = f116
+atan2_near_one = f116
atan2_pi = f117
-atan2_sgnX = f118
-atan2_sgnXY = f119
-
-atan2_3pi_by_4 = f120
-atan2_pi_by_4 = f121
-
-//atan2_sF = p7
-//atan2_sT = p6
+atan2_sgn_pi = f117
+atan2_3pi_by_4 = f118
+atan2_pi_by_4 = f119
-// These coefficients are for atan2.
-// You can also use this set to substitute those used in the |X| <= 1 case for atan;
-// BUT NOT vice versa.
/////////////////////////////////////////////////////////////
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
+RODATA
.align 16
-atan2_tb1:
-ASM_TYPE_DIRECTIVE(atan2_tb1,@object)
-data8 0xB199DD6D2675C40F , 0x0000BFFA // P10
+LOCAL_OBJECT_START(atan2_tb1)
data8 0xA21922DC45605EA1 , 0x00003FFA // P11
-data8 0xD78F28FC2A592781 , 0x0000BFFA // P8
+data8 0xB199DD6D2675C40F , 0x0000BFFA // P10
data8 0xC2F01E5DDD100DBE , 0x00003FFA // P9
-data8 0x9D89D7D55C3287A5 , 0x00003FFB // P5
+data8 0xD78F28FC2A592781 , 0x0000BFFA // P8
data8 0xF0F03ADB3FC930D3 , 0x00003FFA // P7
-data8 0xF396268151CFB11C , 0x00003FF7 // P17
-data8 0x9D3436AABE218776 , 0x00003FF5 // P19
-data8 0x80D601879218B53A , 0x00003FFA // P13
-data8 0xA2270D30A90AA220 , 0x00003FF9 // P15
-data8 0xCCCCCCCCCCC906CD , 0x00003FFC // P1
+data8 0x88887EBB209E3543 , 0x0000BFFB // P6
+data8 0x9D89D7D55C3287A5 , 0x00003FFB // P5
+data8 0xBA2E8B9793955C77 , 0x0000BFFB // P4
data8 0xE38E38E320A8A098 , 0x00003FFB // P3
-data8 0xFE7E52D2A89995B3 , 0x0000BFEC // P22
-data8 0xC90FDAA22168C235 , 0x00003FFE // pi/4
-ASM_SIZE_DIRECTIVE(atan2_tb1)
+data8 0x9249249247E37913 , 0x0000BFFC // P2
+data8 0xCCCCCCCCCCC906CD , 0x00003FFC // P1
+data8 0xAAAAAAAAAAAAA8A9 , 0x0000BFFD // P0
+data8 0xC90FDAA22168C235 , 0x00004000 // pi
+LOCAL_OBJECT_END(atan2_tb1)
-atan2_tb2:
-ASM_TYPE_DIRECTIVE(atan2_tb2,@object)
-data8 0x9F90FB984D8E39D0 , 0x0000BFF3 // P20
+LOCAL_OBJECT_START(atan2_tb2)
data8 0xCE585A259BD8374C , 0x00003FF0 // P21
-data8 0xBA2E8B9793955C77 , 0x0000BFFB // P4
-data8 0x88887EBB209E3543 , 0x0000BFFB // P6
-data8 0xD818B4BB43D84BF2 , 0x0000BFF8 // P16
+data8 0x9F90FB984D8E39D0 , 0x0000BFF3 // P20
+data8 0x9D3436AABE218776 , 0x00003FF5 // P19
data8 0xDEC343E068A6D2A8 , 0x0000BFF6 // P18
-data8 0x9297B23CCFFB291F , 0x0000BFFA // P12
+data8 0xF396268151CFB11C , 0x00003FF7 // P17
+data8 0xD818B4BB43D84BF2 , 0x0000BFF8 // P16
+data8 0xA2270D30A90AA220 , 0x00003FF9 // P15
data8 0xD5F4F2182E7A8725 , 0x0000BFF9 // P14
-data8 0xAAAAAAAAAAAAA8A9 , 0x0000BFFD // P0
-data8 0x9249249247E37913 , 0x0000BFFC // P2
+data8 0x80D601879218B53A , 0x00003FFA // P13
+data8 0x9297B23CCFFB291F , 0x0000BFFA // P12
+data8 0xFE7E52D2A89995B3 , 0x0000BFEC // P22
data8 0xC90FDAA22168C235 , 0x00003FFF // pi/2
-data8 0xC90FDAA22168C235 , 0x00004000 // pi
+data8 0xC90FDAA22168C235 , 0x00003FFE // pi/4
data8 0x96cbe3f9990e91a8 , 0x00004000 // 3pi/4
-ASM_SIZE_DIRECTIVE(atan2_tb2)
-
-
+LOCAL_OBJECT_END(atan2_tb2)
-.align 32
-.global atan2#
-#ifdef _LIBC
-.global __atan2#
-.global __ieee754_atan2#
-#endif
-////////////////////////////////////////////////////////
.section .text
-.align 32
-
-.proc atan2#
-atan2:
-#ifdef _LIBC
-.proc __atan2#
-__atan2:
-.proc __ieee754_atan2#
-__ieee754_atan2:
-#endif
-// qnan snan inf norm unorm 0 -+
-// 0 0 1 0 0 0 11
-
-
-// Y NAN? p10 p11
-// p10 ==> quiet Y and return
-// p11 X NAN? p12, p13
-// p12 ==> quiet X and return
+GLOBAL_IEEE754_ENTRY(atan2)
{ .mfi
alloc r32 = ar.pfs,1,5,4,0
frcpa.s1 atan2_u1_X,p6 = f1,atan2_X
- addl EXP_AD_P2 = @ltoff(atan2_tb2), gp
+ nop.i 999
}
{ .mfi
addl EXP_AD_P1 = @ltoff(atan2_tb1), gp
- fclass.m.unc p10,p11 = f8, 0xc3
+ fma.s1 atan2_two = f1,f1,f1
nop.i 999
;;
}
@@ -366,256 +355,233 @@ __ieee754_atan2:
}
{ .mfi
nop.m 999
- fma.s1 atan2_two = f1,f1,f1
+ fma.s1 atan2_xsq = atan2_X,atan2_X,f0
nop.i 999
;;
}
-
{ .mfi
- ld8 EXP_AD_P2 = [ EXP_AD_P2]
- famax.s1 atan2_Umax = f8,f9
+ nop.m 999
+ fclass.m p10,p0 = atan2_Y, 0xc3 // Test for y=nan
nop.i 999
}
-;;
-
{ .mfi
nop.m 999
- fmerge.s atan2_absX = f0,atan2_X
+ fma.s1 atan2_ysq = atan2_Y,atan2_Y,f0
nop.i 999
}
;;
-// p10 Y NAN, quiet and return
{ .mfi
- ldfe atan2_P10 = [EXP_AD_P1],16
- fmerge.s atan2_sgnY = atan2_Y,f1
+ add EXP_AD_P2 = 0xd0,EXP_AD_P1
+ fclass.m p12,p0 = atan2_X, 0xc3 // Test for x nan
nop.i 999
}
-{ .mfb
- nop.m 999
-(p10) fma.d f8 = f8,f9,f0
-(p10) br.ret.spnt b0
;;
-}
-{ .mmf
+// p10 Y NAN, quiet and return
+{ .mfi
ldfe atan2_P11 = [EXP_AD_P1],16
- ldfe atan2_P20 = [EXP_AD_P2],16
- fmerge.s atan2_sgnX = atan2_X,f1
+ fmerge.s atan2_sgnY = atan2_Y,f1
+ nop.i 999
+}
+{ .mfb
+ ldfe atan2_P21 = [EXP_AD_P2],16
+(p10) fma.d.s0 f8 = atan2_Y,atan2_X,f0 // If y=nan, result quietized y
+(p10) br.ret.spnt b0 // Exit if y=nan
;;
}
-{ .mfi
- ldfe atan2_P8 = [EXP_AD_P1],16
+{ .mfi
+ ldfe atan2_P10 = [EXP_AD_P1],16
fma.s1 atan2_z1_X = atan2_u1_X, atan2_Y, f0
nop.i 999
}
-{ .mfi
-
- ldfe atan2_P21 = [EXP_AD_P2],16
- fma.s1 atan2_z1_Y = atan2_u1_Y, atan2_X, f0
+{ .mfi
+ ldfe atan2_P20 = [EXP_AD_P2],16
+ fnma.s1 atan2_B1X = atan2_u1_X, atan2_X, atan2_two
nop.i 999
;;
}
-{ .mfi
+{ .mfi
ldfe atan2_P9 = [EXP_AD_P1],16
- fnma.s1 atan2_B1X = atan2_u1_X, atan2_X, atan2_two
+ fma.s1 atan2_z1_Y = atan2_u1_Y, atan2_X, f0
nop.i 999
}
-{ .mfi
-
- ldfe atan2_P4 = [EXP_AD_P2],16
+{ .mfi
+ ldfe atan2_P19 = [EXP_AD_P2],16
fnma.s1 atan2_B1Y = atan2_u1_Y, atan2_Y, atan2_two
nop.i 999
-;;
}
-
-// p6 (atan2_sT) true if swap
-// p7 (atan2_sF) true if no swap
-// p11 ==> Y !NAN; X NAN?
+;;
{ .mfi
- ldfe atan2_P5 = [EXP_AD_P1],16
-// fcmp.eq.unc.s1 atan2_sF,atan2_sT = atan2_Umax, atan2_X
- fcmp.eq.unc.s1 p7,p6 = atan2_Umax, atan2_X
+ ldfe atan2_P8 = [EXP_AD_P1],16
+ fma.s1 atan2_z2_X = atan2_u1_X, atan2_ysq, f0
nop.i 999
}
{ .mfi
- ldfe atan2_P6 = [EXP_AD_P2],16
-(p11) fclass.m.unc p12,p13 = f9, 0xc3
+ ldfe atan2_P18 = [EXP_AD_P2],16
+ fma.s1 atan2_z2_Y = atan2_u1_Y, atan2_xsq, f0
nop.i 999
-;;
}
-
-{ .mmf
- ldfe atan2_P7 = [EXP_AD_P1],16
- ldfe atan2_P16 = [EXP_AD_P2],16
- famin.s1 atan2_Vmin = f8,f9
;;
-}
-// p8 true if X positive
-// p9 true if X negative
-// both are false is swap is true
+// p10 ==> x inf y ?
+// p11 ==> x !inf y ?
{ .mfi
- ldfe atan2_P17 = [EXP_AD_P1],16
-//(atan2_sF) fcmp.eq.unc.s1 p8,p9 = atan2_sgnX,f1
-(p7) fcmp.eq.unc.s1 p8,p9 = atan2_sgnX,f1
+ ldfe atan2_P7 = [EXP_AD_P1],16
+ fclass.m p10,p11 = atan2_X, 0x23 // test for x inf
nop.i 999
}
-{ .mfi
- ldfe atan2_P18 = [EXP_AD_P2],16
- fma.s1 atan2_sgnXY = atan2_sgnX, atan2_sgnY, f0
- nop.i 999
+{ .mfb
+ ldfe atan2_P17 = [EXP_AD_P2],16
+(p12) fma.d.s0 f8 = atan2_X,atan2_Y,f0 // If x nan, result quiet x
+(p12) br.ret.spnt b0 // Exit for x nan
;;
}
+// p6 true if swap, means |y| > |x| or ysq > xsq
+// p7 true if no swap, means |x| >= |y| or xsq >= ysq
+{ .mmf
+ ldfe atan2_P6 = [EXP_AD_P1],16
+ ldfe atan2_P16 = [EXP_AD_P2],16
+ fcmp.ge.s1 p7,p6 = atan2_xsq, atan2_ysq
+;;
+}
{ .mfi
- ldfe atan2_P19 = [EXP_AD_P1],16
-//(atan2_sF) fma.s1 atan2_wp = atan2_z1_X, atan2_z1_X, f0
-(p7) fma.s1 atan2_wp = atan2_z1_X, atan2_z1_X, f0
+ ldfe atan2_P5 = [EXP_AD_P1],16
+ fma.s1 atan2_wp_X = atan2_z1_X, atan2_z1_X, f0
nop.i 999
}
{ .mfi
- ldfe atan2_P12 = [EXP_AD_P2],16
-//(atan2_sT) fma.s1 atan2_wp = atan2_z1_Y, atan2_z1_Y, f0
-(p6) fma.s1 atan2_wp = atan2_z1_Y, atan2_z1_Y, f0
+ ldfe atan2_P15 = [EXP_AD_P2],16
+ fma.s1 atan2_B1sq_X = atan2_B1X, atan2_B1X, f0
nop.i 999
;;
}
-
{ .mfi
- ldfe atan2_P13 = [EXP_AD_P1],16
-//(atan2_sF) fma.s1 atan2_z = atan2_z1_X, atan2_B1X, f0
-(p7) fma.s1 atan2_z = atan2_z1_X, atan2_B1X, f0
+ ldfe atan2_P4 = [EXP_AD_P1],16
+(p6) fma.s1 atan2_wp_Y = atan2_z1_Y, atan2_z1_Y, f0
nop.i 999
}
{ .mfi
ldfe atan2_P14 = [EXP_AD_P2],16
-//(atan2_sT) fma.s1 atan2_z = atan2_z1_Y, atan2_B1Y, f0
-(p6) fma.s1 atan2_z = atan2_z1_Y, atan2_B1Y, f0
+(p6) fma.s1 atan2_B1sq_Y = atan2_B1Y, atan2_B1Y, f0
nop.i 999
;;
}
-
{ .mfi
- ldfe atan2_P15 = [EXP_AD_P1],16
-//(atan2_sF) fma.s1 atan2_B1sq = atan2_B1X, atan2_B1X, f0
-(p7) fma.s1 atan2_B1sq = atan2_B1X, atan2_B1X, f0
+ ldfe atan2_P3 = [EXP_AD_P1],16
+(p6) fma.s1 atan2_E = atan2_z2_Y, atan2_B1Y, atan2_Y
nop.i 999
}
{ .mfi
- ldfe atan2_P0 = [EXP_AD_P2],16
-//(atan2_sT) fma.s1 atan2_B1sq = atan2_B1Y, atan2_B1Y, f0
-(p6) fma.s1 atan2_B1sq = atan2_B1Y, atan2_B1Y, f0
+ ldfe atan2_P13 = [EXP_AD_P2],16
+(p7) fma.s1 atan2_E = atan2_z2_X, atan2_B1X, atan2_X
nop.i 999
;;
}
-// p12 ==> X NAN, quiet and return
{ .mfi
- ldfe atan2_P1 = [EXP_AD_P1],16
- fmerge.s atan2_Umax = f0,atan2_Umax
+ ldfe atan2_P2 = [EXP_AD_P1],16
+(p6) fma.s1 atan2_z = atan2_z1_Y, atan2_B1Y, f0
nop.i 999
}
-{ .mfb
- ldfe atan2_P2 = [EXP_AD_P2],16
-(p12) fma.d f8 = f9,f8,f0
-(p12) br.ret.spnt b0
+{ .mfi
+ ldfe atan2_P12 = [EXP_AD_P2],16
+(p7) fma.s1 atan2_z = atan2_z1_X, atan2_B1X, f0
+ nop.i 999
;;
}
-// p10 ==> x inf y ?
-// p11 ==> x !inf y ?
{ .mfi
- ldfe atan2_P3 = [EXP_AD_P1],16
- fmerge.s atan2_Vmin = f0,atan2_Vmin
+ ldfe atan2_P1 = [EXP_AD_P1],16
+ fcmp.eq.s0 p14,p15=atan2_X,atan2_Y // Dummy for denorm and invalid
nop.i 999
}
-{ .mfi
- ldfe atan2_Pi_by_2 = [EXP_AD_P2],16
- fclass.m.unc p10,p11 = f9, 0x23
- nop.i 999
+{ .mlx
+ ldfe atan2_P22 = [EXP_AD_P2],16
+ movl rsig_near_one = 0x8000000000000001 // signif near 1.0
;;
}
+// p12 ==> x inf y inf
+// p13 ==> x inf y !inf
{ .mmf
- ldfe atan2_P22 = [EXP_AD_P1],16
- ldfe atan2_pi = [EXP_AD_P2],16
- nop.f 999
+ ldfe atan2_P0 = [EXP_AD_P1],16
+ ldfe atan2_pi_by_2 = [EXP_AD_P2],16
+(p10) fclass.m.unc p12,p13 = atan2_Y, 0x23 // x inf, test if y inf
;;
}
{ .mfi
- nop.m 999
- fcmp.eq.s0 p12,p13=f9,f8 // Dummy to catch denormal and invalid
+ ldfe atan2_pi = [EXP_AD_P1],16
+(p6) fma.s1 atan2_w = atan2_wp_Y, atan2_B1sq_Y,f0
nop.i 999
-;;
}
-
-
{ .mfi
- ldfe atan2_pi_by_4 = [EXP_AD_P1],16
-//(atan2_sT) fmerge.ns atan2_sgnXY = atan2_sgnXY, atan2_sgnXY
-(p6) fmerge.ns atan2_sgnXY = atan2_sgnXY, atan2_sgnXY
+ ldfe atan2_pi_by_4 = [EXP_AD_P2],16
+(p7) fma.s1 atan2_w = atan2_wp_X, atan2_B1sq_X,f0
nop.i 999
+;;
}
+
{ .mfi
ldfe atan2_3pi_by_4 = [EXP_AD_P2],16
- fma.s1 atan2_w = atan2_wp, atan2_B1sq,f0
+(p11) fclass.m.unc p9,p0 = atan2_Y, 0x23 // x not inf, test if y inf
nop.i 999
;;
}
-// p12 ==> x inf y inf
-// p13 ==> x inf y !inf
+{ .mfi
+ setf.sig atan2_sig_near_one = rsig_near_one
+(p12) fcmp.gt.unc.s1 p10,p11 = atan2_X,f0 // x inf, y inf, test if x +inf
+ nop.i 999
+}
{ .mfi
nop.m 999
- fmerge.s atan2_z = f0, atan2_z
+(p6) fnma.s1 atan2_gV = atan2_Y, atan2_z, atan2_X
nop.i 999
;;
}
{ .mfi
- nop.m 99
-(p10) fclass.m.unc p12,p13 = f8, 0x23
+ nop.m 999
+ frcpa.s1 atan2_F,p0 = f1, atan2_E
nop.i 999
}
{ .mfi
- nop.m 99
-(p11) fclass.m.unc p14,p15 = f8, 0x23
+ nop.m 999
+(p7) fnma.s1 atan2_gV = atan2_X, atan2_z, atan2_Y
nop.i 999
;;
}
+// p13 ==> x inf y !inf
{ .mfi
nop.m 999
-(p12) fcmp.eq.unc.s1 p10,p11 = atan2_sgnX,f1
- nop.i 99
-;;
+(p13) fcmp.gt.unc.s1 p14,p15 = atan2_X,f0 // x inf, y !inf, test if x +inf
+ nop.i 999
}
-
-
{ .mfb
- mov atan2_GR_sml_exp = 0x1 // Small exponent for making small norm
-(p14) fma.d f8 = atan2_sgnY, atan2_Pi_by_2, f0
-(p14) br.ret.spnt b0
+ nop.m 999
+(p9) fma.d.s0 f8 = atan2_sgnY, atan2_pi_by_2, f0 // +-pi/2 if x !inf, y inf
+(p9) br.ret.spnt b0 // exit if x not inf, y inf, result is +-pi/2
;;
}
-// Make a very small normal in case need to force inexact and underflow
{ .mfi
- setf.exp atan2_sml_norm = atan2_GR_sml_exp
+ nop.m 999
fma.s1 atan2_V13 = atan2_w, atan2_P11, atan2_P10
nop.i 999
}
@@ -626,58 +592,58 @@ __ieee754_atan2:
;;
}
-
{ .mfi
nop.m 999
- fma.s1 atan2_E = atan2_Vmin, atan2_z, atan2_Umax
+ fma.s1 atan2_V11 = atan2_w, atan2_P9, atan2_P8
nop.i 999
}
{ .mfi
nop.m 999
- fnma.s1 atan2_gamma = atan2_Umax, atan2_z, f1
+ fma.s1 atan2_V12 = atan2_w, atan2_w, f0
nop.i 999
;;
}
{ .mfi
nop.m 999
- fma.s1 atan2_V11 = atan2_w, atan2_P9, atan2_P8
+ fma.s1 atan2_V8 = atan2_w, atan2_P7 , atan2_P6
nop.i 999
}
{ .mfi
nop.m 999
- fma.s1 atan2_V12 = atan2_w, atan2_w, f0
+ fma.s1 atan2_W8 = atan2_w, atan2_P19, atan2_P18
nop.i 999
;;
}
{ .mfi
nop.m 999
- fma.s1 atan2_V7 = atan2_w, atan2_P5 , atan2_P4
+ fnma.s1 atan2_alpha = atan2_E, atan2_F, f1
nop.i 999
}
{ .mfi
nop.m 999
- fma.s1 atan2_V8 = atan2_w, atan2_P7 , atan2_P6
+ fnma.s1 atan2_alpha_1 = atan2_E, atan2_F, atan2_two
nop.i 999
;;
}
+
{ .mfi
nop.m 999
- fma.s1 atan2_W7 = atan2_w, atan2_P17, atan2_P16
+ fma.s1 atan2_V7 = atan2_w, atan2_P5 , atan2_P4
nop.i 999
}
{ .mfi
nop.m 999
- fma.s1 atan2_W8 = atan2_w, atan2_P19, atan2_P18
+ fma.s1 atan2_W7 = atan2_w, atan2_P17, atan2_P16
nop.i 999
;;
}
{ .mfi
nop.m 999
- fma.s1 atan2_W3 = atan2_w, atan2_P13, atan2_P12
+ fma.s1 atan2_V4 = atan2_w, atan2_P3 , atan2_P2
nop.i 999
}
{ .mfi
@@ -689,55 +655,55 @@ __ieee754_atan2:
{ .mfi
nop.m 999
- fma.s1 atan2_V3 = atan2_w, atan2_P1 , atan2_P0
+ fma.s1 atan2_V3 = atan2_w, atan2_P1 , atan2_P0
nop.i 999
}
{ .mfi
nop.m 999
- fma.s1 atan2_V4 = atan2_w, atan2_P3 , atan2_P2
+ fma.s1 atan2_W3 = atan2_w, atan2_P13, atan2_P12
nop.i 999
;;
}
{ .mfi
nop.m 999
- fma.s1 atan2_zcub = atan2_z, atan2_w, f0
+ fma.s1 atan2_V10 = atan2_V12, atan2_V13, atan2_V11
nop.i 999
}
{ .mfi
nop.m 999
- fnma.s1 atan2_gV = atan2_Umax, atan2_z, atan2_Vmin
+ fma.s1 atan2_gVF = atan2_gV, atan2_F, f0
nop.i 999
;;
}
{ .mfi
nop.m 999
- frcpa.s1 atan2_F,p15 = f1, atan2_E
+ fma.s1 atan2_alpha_sq = atan2_alpha, atan2_alpha, f0
nop.i 999
}
{ .mfi
nop.m 999
- fma.s1 atan2_V10 = atan2_V12, atan2_V13, atan2_V11
+ fma.s1 atan2_Cp = atan2_alpha, atan2_alpha_1, f1
nop.i 999
;;
}
{ .mfi
nop.m 999
- fma.s1 atan2_V6 = atan2_V12, atan2_V8 , atan2_V7
+ fma.s1 atan2_V9 = atan2_V12, atan2_V12, f0
nop.i 999
}
{ .mfi
nop.m 999
- fma.s1 atan2_V9 = atan2_V12, atan2_V12, f0
+ fma.s1 atan2_W10 = atan2_V12, atan2_P22 , atan2_W11
nop.i 999
;;
}
{ .mfi
nop.m 999
- fma.s1 atan2_W10 = atan2_V12, atan2_P22 , atan2_W11
+ fma.s1 atan2_V6 = atan2_V12, atan2_V8 , atan2_V7
nop.i 999
}
{ .mfi
@@ -749,65 +715,47 @@ __ieee754_atan2:
{ .mfi
nop.m 999
- fma.s1 atan2_W2 = atan2_V12, atan2_W4 , atan2_W3
+ fma.s1 atan2_V2 = atan2_V12, atan2_V4 , atan2_V3
nop.i 999
}
{ .mfi
nop.m 999
- fma.s1 atan2_V2 = atan2_V12, atan2_V4 , atan2_V3
+ fma.s1 atan2_W2 = atan2_V12, atan2_W4 , atan2_W3
nop.i 999
;;
}
-
-// Both X and Y are INF
-// p10 ==> X +
-// p11 ==> X -
-.pred.rel "mutex",p10,p11
-{ .mfb
- nop.m 999
-(p10) fma.d f8 = atan2_sgnY, atan2_pi_by_4, f0
-(p10) br.ret.spnt b0
-}
-{ .mfb
- nop.m 999
-(p11) fma.d f8 = atan2_sgnY, atan2_3pi_by_4, f0
-(p11) br.ret.spnt b0
-;;
-}
-
-
-.pred.rel "mutex",p8,p9,p6
+// p8 ==> y 0 x?
+// p9 ==> y !0 x?
{ .mfi
nop.m 999
- fnma.s1 atan2_alpha = atan2_E, atan2_F, f1
+ fclass.m p8,p9 = atan2_Y, 0x07 // Test for y=0
nop.i 999
}
{ .mfi
nop.m 999
- fnma.s1 atan2_alpha_1 = atan2_E, atan2_F, atan2_two
+ fma.s1 atan2_zcub = atan2_z, atan2_w, f0
nop.i 999
;;
}
-
{ .mfi
nop.m 999
-//(atan2_sT) fmerge.s atan2_P = atan2_Y, atan2_Pi_by_2
-(p6) fmerge.s atan2_P = atan2_Y, atan2_Pi_by_2
+ fma.s1 atan2_alpha_cub = atan2_alpha, atan2_alpha_sq, f0
nop.i 999
}
{ .mfi
nop.m 999
- fma.s1 atan2_gVF = atan2_gV, atan2_F, f0
+ fma.s1 atan2_C = atan2_gVF, atan2_Cp, f0
nop.i 999
;;
}
-
+// p12 ==> y0 x0
+// p13 ==> y0 x!0
{ .mfi
nop.m 999
- fma.s1 atan2_V5 = atan2_V9, atan2_V10, atan2_V6
+(p8) fclass.m.unc p12,p13 = atan2_X, 0x07 // y=0, test if x is 0
nop.i 999
}
{ .mfi
@@ -817,11 +765,9 @@ __ieee754_atan2:
;;
}
-
-
{ .mfi
nop.m 999
-(p8) fmerge.s atan2_P = atan2_sgnY, f0
+ fma.s1 atan2_V5 = atan2_V9, atan2_V10, atan2_V6
nop.i 999
}
{ .mfi
@@ -832,249 +778,214 @@ __ieee754_atan2:
}
-
-
+// p9 ==> y!0 x0
{ .mfi
nop.m 999
-(p9) fmerge.s atan2_P = atan2_sgnY, atan2_pi
+(p9) fclass.m.unc p9,p0 = atan2_X, 0x07 // y not 0, test if x is 0
nop.i 999
+}
+// p10 ==> X +INF, Y +-INF
+{ .mfb
+ nop.m 999
+(p10) fma.d.s0 f8 = atan2_sgnY, atan2_pi_by_4, f0 // x=+inf, y=inf
+(p10) br.ret.spnt b0 // Exit for x=+inf, y=inf, result is +-pi/4
;;
}
-
+.pred.rel "mutex",p11,p14
{ .mfi
nop.m 999
- fma.s1 atan2_alpha_sq = atan2_alpha, atan2_alpha, f0
+(p14) fmerge.s f8 = atan2_sgnY, f0 // x=+inf, y !inf, result +-0
nop.i 999
}
-{ .mfi
+// p11 ==> X -INF, Y +-INF
+{ .mfb
nop.m 999
- fma.s1 atan2_Cp = atan2_alpha, atan2_alpha_1, f1
- nop.i 999
+(p11) fma.d.s0 f8 = atan2_sgnY, atan2_3pi_by_4, f0 // x=-inf, y=inf
+(p11) br.ret.spnt b0 // Exit for x=-inf, y=inf, result is +-3pi/4
;;
}
-
{ .mfi
nop.m 999
- fma.s1 atan2_V1 = atan2_V9, atan2_V5, atan2_V2
+(p13) fcmp.gt.unc.s1 p10,p11 = atan2_X,f0 // x not 0, y=0, test if x>0
nop.i 999
}
-{ .mfi
+{ .mfb
nop.m 999
- fma.s1 atan2_W12 = atan2_V9, atan2_W12, f0
- nop.i 999
+ fma.s1 atan2_d = atan2_alpha_cub, atan2_C, atan2_C
+(p14) br.ret.spnt b0 // Exit if x=+inf, y !inf, result +-0
;;
}
-
-// p13 ==> x inf y !inf
{ .mfi
nop.m 999
- fma.s1 atan2_W1 = atan2_V9, atan2_W5, atan2_W2
+ fma.s1 atan2_W12 = atan2_V9, atan2_W12, f0
nop.i 999
}
-{ .mfi
+{ .mfb
nop.m 999
-(p13) fcmp.eq.unc.s1 p10,p11 = atan2_sgnX,f1
- nop.i 999
+(p9) fma.d.s0 f8 = atan2_sgnY, atan2_pi_by_2, f0 // x=0, y not 0
+(p9) br.ret.spnt b0 // Exit if x=0 and y not 0, result is +-pi/2
;;
}
-
{ .mfi
nop.m 999
- fma.s1 atan2_alpha_cub = atan2_alpha, atan2_alpha_sq, f0
+ fma.s1 atan2_V1 = atan2_V9, atan2_V5, atan2_V2
nop.i 999
}
-{ .mfi
+{ .mfb
nop.m 999
- fma.s1 atan2_C = atan2_gVF, atan2_Cp, f0
- nop.i 999
+ fma.s1 atan2_W1 = atan2_V9, atan2_W5, atan2_W2
+(p12) br.spnt ATAN2_ERROR // Branch if x=0 and y=0
;;
}
-.pred.rel "mutex",p10,p11
-// x inf y !inf
-{ .mfb
+{ .mfi
nop.m 999
-(p10) fmerge.s f8 = atan2_sgnY, f0
-(p10) br.ret.spnt b0
+(p10) fmerge.s f8 = atan2_sgnY, f0 // +-0 if x>0, y=0
+ nop.i 999
}
{ .mfb
nop.m 999
-(p11) fma.d f8 = atan2_sgnY, atan2_pi, f0
-(p11) br.ret.spnt b0
+(p11) fma.d.s0 f8 = atan2_sgnY, atan2_pi, f0 // +-pi if x<0, y=0
+(p13) br.ret.spnt b0 // Exit if x!0 and y=0
;;
}
-
-// p10 ==> y 0 x?
-// p11 ==> y !0 x?
{ .mfi
nop.m 999
- fclass.m.unc p10,p11 = f8, 0x07
+ fma.s1 atan2_pd = atan2_P0, atan2_d, f0
nop.i 999
-;;
}
-
{ .mfi
nop.m 999
-(p8) fmerge.s atan2_sml_norm = atan2_sgnY, atan2_sml_norm
+ fma.s1 atan2_dsq = atan2_d, atan2_d, f0
nop.i 999
;;
}
+
{ .mfi
nop.m 999
- fma.s1 atan2_Pp = atan2_W12, atan2_W1, atan2_V1
+ fmerge.se atan2_near_one = f1, atan2_sig_near_one // Const ~1.0
nop.i 999
}
{ .mfi
nop.m 999
- fma.s1 atan2_d = atan2_alpha_cub, atan2_C, atan2_C
+ fma.s1 atan2_Pp = atan2_W12, atan2_W1, atan2_V1
nop.i 999
;;
}
-// p12 ==> y0 x0
-// p13 ==> y0 x!0
-// p14 ==> y!0 x0
-// p15 ==> y!0 x!0
-{ .mfi
- nop.m 999
-(p10) fclass.m.unc p12,p13 = f9, 0x07
- nop.i 999
-}
+// p8 true if no swap and X positive
+// p9 true if no swap and X negative
+// both are false is swap is true
{ .mfi
nop.m 999
-(p11) fclass.m.unc p14,p15 = f9, 0x07
+(p7) fcmp.ge.unc.s1 p8,p9 = atan2_X,f0
nop.i 999
-;;
}
-
-
-
-
{ .mfb
nop.m 999
-(p13) fcmp.eq.unc.s1 p10,p11 = atan2_sgnX,f1
-(p12) br.spnt ATAN2_ERROR
+(p15) fma.d.s0 f8 = atan2_sgnY, atan2_pi, f0
+(p15) br.ret.spnt b0 // Exit if x=-inf, y !inf, result +-pi
;;
}
-
-
{ .mfi
nop.m 999
- fma.s1 atan2_pd = atan2_P0, atan2_d, f0
+ fma.s1 atan2_sgn_pi_by_2 = atan2_pi_by_2, atan2_sgnY, f0
nop.i 999
}
{ .mfi
nop.m 999
- fma.s1 atan2_dsq = atan2_d, atan2_d, f0
+ fma.s1 atan2_A_lo = atan2_pd, atan2_dsq, atan2_d
nop.i 999
;;
}
+
{ .mfi
nop.m 999
- fma.s1 atan2_A_hi = atan2_zcub, atan2_Pp, atan2_z
+ fma.s1 atan2_sgn_pi = atan2_pi, atan2_sgnY, f0
nop.i 999
}
-{ .mfb
+{ .mfi
nop.m 999
-(p14) fma.d f8 = atan2_sgnY, atan2_Pi_by_2, f0
-(p14) br.ret.spnt b0
+ fma.s1 atan2_A_hi = atan2_zcub, atan2_Pp, atan2_z
+ nop.i 999
;;
}
-
-{ .mfb
- nop.m 999
-(p10) fmerge.s f8 = atan2_sgnY, f0
-(p10) br.ret.spnt b0
-}
-{ .mfb
+// For |Y| <= |X| and X > 0, force inexact in case A_lo is zero
+{ .mfi
nop.m 999
-(p11) fma.d f8 = atan2_sgnY, atan2_pi, f0
-(p11) br.ret.spnt b0
+(p8) fmpy.s0 atan2_tmp = atan2_P22, atan2_P22
+ nop.i 999
;;
}
-
-
{ .mfi
nop.m 999
- fma.s1 atan2_A_lo = atan2_pd, atan2_dsq, atan2_d
+ fma.s1 atan2_A = atan2_A_hi, f1, atan2_A_lo
nop.i 999
-;;
}
-
-
+// For |Y| <= |X| and X > 0, result is A_hi + A_lo
{ .mfi
nop.m 999
- fma.s1 atan2_A = atan2_A_hi, f1, atan2_A_lo
+(p8) fma.d.s0 f8 = atan2_A_hi, f1, atan2_A_lo
nop.i 999
;;
}
-// Force inexact and possibly underflow if very small results
+.pred.rel "mutex",p6,p9
+// We perturb A by multiplying by 1.0+1ulp as we produce the result
+// in order to get symmetrically rounded results in directed rounding modes.
+// If we don't do this, there are a few cases where the trailing 11 bits of
+// the significand of the result, before converting to double, are zero. These
+// cases do not round symmetrically in round to +infinity or round to -infinity.
+// The perturbation also insures that the inexact flag is set.
+// For |Y| > |X|, result is +- pi/2 - (A_hi + A_lo)
{ .mfi
nop.m 999
-(p8) fma.d atan2_FR_tmp = atan2_sgnXY, atan2_A, atan2_sml_norm
+(p6) fnma.d.s0 f8 = atan2_A, atan2_near_one, atan2_sgn_pi_by_2
nop.i 999
}
+// For |Y| <= |X|, and X < 0, result is +- pi + (A_hi + A_lo)
{ .mfb
nop.m 999
- fma.d f8 = atan2_sgnXY, atan2_A, atan2_P
- br.ret.sptk b0
+(p9) fma.d.s0 f8 = atan2_A, atan2_near_one, atan2_sgn_pi
+ br.ret.sptk b0
;;
}
ATAN2_ERROR:
-
+// Here if x=0 and y=0
{ .mfi
nop.m 999
- fcmp.eq.unc.s1 p10,p11 = atan2_sgnX,f1
+ fclass.m p10,p11 = atan2_X,0x05 // Test if x=+0
nop.i 999
}
;;
{ .mfi
- mov atan2_GR_tag = 37
-(p10) fmerge.s f10 = atan2_sgnY, f0
- nop.i 999
+ mov atan2_GR_tag = 37
+(p10) fmerge.s f10 = atan2_sgnY, f0 // x=+0, y=0
+ nop.i 999
}
{ .mfi
nop.m 999
-(p11) fma.d f10 = atan2_sgnY, atan2_pi, f0
+(p11) fma.d.s0 f10 = atan2_sgnY, atan2_pi, f0 // x=-0, y=0
nop.i 999
;;
}
-.endp atan2#
-ASM_SIZE_DIRECTIVE(atan2#)
-
-
-// Stack operations when calling error support.
-// (1) (2) (3) (call) (4)
-// sp -> + psp -> + psp -> + sp -> +
-// | | | |
-// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
-// | | | |
-// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
-// | | | |
-// | | <- GR_X X1 ->| |
-// | | | |
-// sp-64 -> + sp -> + sp -> + +
-// save ar.pfs save b0 restore gp
-// save gp restore ar.pfs
+GLOBAL_IEEE754_END(atan2)
-
-.proc __libm_error_region
-__libm_error_region:
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
// (1)
{ .mfi
@@ -1102,19 +1013,19 @@ __libm_error_region:
.body
// (3)
{ .mib
- stfd [GR_Parameter_X] = f9 // STORE Parameter 1 on stack
+ stfd [GR_Parameter_X] = f9 // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
- nop.b 0
+ nop.b 0
}
{ .mib
- stfd [GR_Parameter_Y] = f10 // STORE Parameter 3 on stack
+ stfd [GR_Parameter_Y] = f10 // STORE Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support# // Call error handling function
+ br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
- nop.m 0
- nop.m 0
add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
};;
// (4)
@@ -1130,8 +1041,7 @@ __libm_error_region:
br.ret.sptk b0 // Return
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_atan2f.S b/sysdeps/ia64/fpu/e_atan2f.S
index 03a4fed82f..c483a7ad34 100644
--- a/sysdeps/ia64/fpu/e_atan2f.S
+++ b/sysdeps/ia64/fpu/e_atan2f.S
@@ -1,10 +1,10 @@
.file "atan2f.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
-// Contributed 6/1/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,18 +35,21 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
// History
//==============================================================
-// 6/01/00 Initial version
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 06/01/00 Initial version
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
-// 8/17/00 Changed predicate register macro-usage to direct predicate
+// 08/17/00 Changed predicate register macro-usage to direct predicate
// names due to an assembler bug.
-// 1/05/01 Fixed flag settings for denormal input.
-// 1/19/01 Added documentation
-// 1/30/01 Improved speed
+// 01/05/01 Fixed flag settings for denormal input.
+// 01/19/01 Added documentation
+// 01/30/01 Improved speed
+// 02/06/02 Corrected .section statement
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/06/03 Reordered header: .section, .global, .proc, .align
// Description
//=========================================
@@ -226,7 +229,6 @@
// atan2f(+-0/+-0) sets single error tag to 38
// These are domain errors.
-#include "libm_support.h"
//
// Assembly macros
@@ -324,22 +326,20 @@ atan2f_poly_atan_U = f88
//atan2f_Pred_Xneg = p9 // x < 0
-.data
+RODATA
.align 16
-atan2f_coef_table1:
-ASM_TYPE_DIRECTIVE(atan2f_coef_table1,@object)
+LOCAL_OBJECT_START(atan2f_coef_table1)
data8 0xBFD5555512191621 // p1
data8 0x3F522E5D33BC9BAA // p10
data8 0xBFA6E10BA401393F // p7
data8 0x3FB142A73D7C54E3 // p6
data8 0xBFC2473C5145EE38 // p3
data8 0x3FC9997E7AFBFF4E // p2
-ASM_SIZE_DIRECTIVE(atan2f_coef_table1)
+LOCAL_OBJECT_END(atan2f_coef_table1)
-atan2f_coef_table2:
-ASM_TYPE_DIRECTIVE(atan2f_coef_table2,@object)
+LOCAL_OBJECT_START(atan2f_coef_table2)
data8 0xBF7DEAADAA336451 // p9
data8 0x3F97105B4160F86B // p8
data8 0xBFB68EED6A8CFA32 // p5
@@ -348,29 +348,12 @@ data8 0x3ff921fb54442d18 // pi/2
data8 0x400921fb54442d18 // pi
data8 0x3fe921fb54442d18 // pi/4
data8 0x4002d97c7f3321d2 // 3pi/4
-ASM_SIZE_DIRECTIVE(atan2f_coef_table2)
-
+LOCAL_OBJECT_END(atan2f_coef_table2)
-.global atan2f
-#ifdef _LIBC
-.global __atan2f
-.global __ieee754_atan2f
-#endif
-
-.text
-.align 32
-
-atan2f:
-.proc atan2f
-#ifdef _LIBC
-.proc __atan2f
-__atan2f:
-.proc __ieee754_atan2f
-__ieee754_atan2f:
-#endif
-
+.section .text
+GLOBAL_IEEE754_ENTRY(atan2f)
{ .mfi
alloc r32 = ar.pfs,1,5,4,0
@@ -724,7 +707,7 @@ ATAN2F_XY_INF_NAN_ZERO:
}
{ .mfb
nop.m 999
-(p10) fma.s f8 = f9,f8,f0 // Result quietized y if y is nan
+(p10) fma.s.s0 f8 = f9,f8,f0 // Result quietized y if y is nan
(p10) br.ret.spnt b0 // Exit if y is nan
}
;;
@@ -737,7 +720,7 @@ ATAN2F_XY_INF_NAN_ZERO:
}
{ .mfb
nop.m 999
-(p12) fnorm.s f8 = f9 // Result quietized x if x is nan, y not nan
+(p12) fnorm.s.s0 f8 = f9 // Result quietized x if x is nan, y not nan
(p12) br.ret.spnt b0 // Exit if x is nan, y not nan
}
;;
@@ -757,7 +740,7 @@ ATAN2F_XY_INF_NAN_ZERO:
}
{ .mfb
nop.m 999
-(p7) fma.s f8 = atan2f_sgn_Y, atan2f_const_piby4,f0 // Result +-pi/4
+(p7) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_piby4,f0 // Result +-pi/4
(p7) br.ret.spnt b0 // Exit if x +inf and y inf
}
;;
@@ -790,19 +773,19 @@ ATAN2F_XY_INF_NAN_ZERO:
}
{ .mfb
nop.m 999
-(p13) fma.s f8 = atan2f_sgn_Y, atan2f_const_piby2,f0 // Result +-pi/2
+(p13) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_piby2,f0 // Result +-pi/2
(p13) br.ret.spnt b0 // Exit if x not -inf and y inf
}
;;
{ .mfi
nop.m 999
-(p14) fma.s f8 = atan2f_sgn_Y, atan2f_const_3piby4,f0 // Result +-3pi/4
+(p14) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_3piby4,f0 // Result +-3pi/4
nop.i 999
}
{ .mfb
nop.m 999
-(p15) fma.s f8 = atan2f_sgn_Y, atan2f_const_pi,f0 // Result +-pi
+(p15) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_pi,f0 // Result +-pi
(p11) br.ret.spnt b0 // Exit if x -inf
}
;;
@@ -829,31 +812,28 @@ ATAN2F_XY_INF_NAN_ZERO:
}
{ .mfb
nop.m 999
-(p9) fma.s f8 = atan2f_sgn_Y, atan2f_const_pi,f0 // x < 0, y 0, result +-pi
+(p9) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_pi,f0 // x < 0, y 0, result +-pi
(p10) br.cond.spnt __libm_error_region // Branch if x zero and y zero
}
;;
{ .mfb
nop.m 999
-(p11) fma.s f8 = atan2f_sgn_Y, atan2f_const_piby2,f0 // x zero, y not zero
+(p11) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_piby2,f0 // x zero, y not zero
br.ret.sptk b0 // Final special case exit
}
;;
-.endp atan2f
-ASM_SIZE_DIRECTIVE(atan2f)
-
+GLOBAL_IEEE754_END(atan2f)
-.proc __libm_error_region
-__libm_error_region:
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
mov GR_Parameter_TAG = 38
fclass.m p10,p11 = f9,0x5 // @zero | @pos
;;
(p10) fmerge.s f10 = f8, f0
-(p11) fma.s f10 = atan2f_sgn_Y, atan2f_const_pi,f0
+(p11) fma.s.s0 f10 = atan2f_sgn_Y, atan2f_const_pi,f0
;;
{ .mfi
@@ -913,8 +893,7 @@ __libm_error_region:
}
;;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_cosh.S b/sysdeps/ia64/fpu/e_cosh.S
index 205653d4bf..0c6c5b451e 100644
--- a/sysdeps/ia64/fpu/e_cosh.S
+++ b/sysdeps/ia64/fpu/e_cosh.S
@@ -1,10 +1,10 @@
.file "cosh.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2002, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,1081 +20,799 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00 Initial version
-// 4/04/00 Unwind support added
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 02/02/00 Initial version
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
-//
+// 05/07/01 Reworked to improve speed of all paths
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 11/15/02 Improved speed with new algorithm
+
// API
//==============================================================
-// double = cosh(double)
-// input floating point f8
-// output floating point f8
-
+// double cosh(double)
// Overview of operation
//==============================================================
-// There are four paths
+// Case 1: 0 < |x| < 0.25
+// Evaluate cosh(x) by a 12th order polynomial
+// Care is take for the order of multiplication; and A2 is not exactly 1/4!,
+// A3 is not exactly 1/6!, etc.
+// cosh(x) = 1 + (A1*x^2 + A2*x^4 + A3*x^6 + A4*x^8 + A5*x^10 + A6*x^12)
+//
+// Case 2: 0.25 < |x| < 710.47586
+// Algorithm is based on the identity cosh(x) = ( exp(x) + exp(-x) ) / 2.
+// The algorithm for exp is described as below. There are a number of
+// economies from evaluating both exp(x) and exp(-x). Although we
+// are evaluating both quantities, only where the quantities diverge do we
+// duplicate the computations. The basic algorithm for exp(x) is described
+// below.
+//
+// Take the input x. w is "how many log2/128 in x?"
+// w = x * 128/log2
+// n = int(w)
+// x = n log2/128 + r + delta
-// 1. |x| < 0.25 COSH_BY_POLY
-// 2. |x| < 32 COSH_BY_TBL
-// 3. |x| < 2^14 COSH_BY_EXP
-// 4. |x_ >= 2^14 COSH_HUGE
+// n = 128M + index_1 + 2^4 index_2
+// x = M log2 + (log2/128) index_1 + (log2/8) index_2 + r + delta
-// For paths 1, and 2 SAFE is always 1.
-// For path 4, Safe is always 0.
-// SAFE = 1 means we cannot overflow.
+// exp(x) = 2^M 2^(index_1/128) 2^(index_2/8) exp(r) exp(delta)
+// Construct 2^M
+// Get 2^(index_1/128) from table_1;
+// Get 2^(index_2/8) from table_2;
+// Calculate exp(r) by 5th order polynomial
+// r = x - n (log2/128)_high
+// delta = - n (log2/128)_low
+// Calculate exp(delta) as 1 + delta
-#include "libm_support.h"
-// Assembly macros
+// Special values
//==============================================================
-cosh_FR_X = f44
-cosh_FR_SGNX = f40
-
-cosh_FR_Inv_log2by64 = f9
-cosh_FR_log2by64_lo = f11
-cosh_FR_log2by64_hi = f10
-
-cosh_FR_A1 = f9
-cosh_FR_A2 = f10
-cosh_FR_A3 = f11
-
-cosh_FR_Rcub = f12
-cosh_FR_M_temp = f13
-cosh_FR_R_temp = f13
-cosh_FR_Rsq = f13
-cosh_FR_R = f14
-
-cosh_FR_M = f38
-
-cosh_FR_B1 = f15
-cosh_FR_B2 = f32
-cosh_FR_B3 = f33
-
-cosh_FR_peven_temp1 = f34
-cosh_FR_peven_temp2 = f35
-cosh_FR_peven = f36
-
-cosh_FR_podd_temp1 = f34
-cosh_FR_podd_temp2 = f35
-cosh_FR_podd = f37
-
-cosh_FR_J_temp = f9
-cosh_FR_J = f10
+// cosh(+0) = 1.0
+// cosh(-0) = 1.0
-cosh_FR_Mmj = f39
+// cosh(+qnan) = +qnan
+// cosh(-qnan) = -qnan
+// cosh(+snan) = +qnan
+// cosh(-snan) = -qnan
-cosh_FR_N_temp1 = f11
-cosh_FR_N_temp2 = f12
-cosh_FR_N = f13
+// cosh(-inf) = +inf
+// cosh(+inf) = +inf
-cosh_FR_spos = f14
-cosh_FR_sneg = f15
-
-cosh_FR_Tjhi = f32
-cosh_FR_Tjlo = f33
-cosh_FR_Tmjhi = f34
-cosh_FR_Tmjlo = f35
-
-GR_mJ = r35
-GR_J = r36
-
-AD_mJ = r38
-AD_J = r39
-
-cosh_FR_C_hi = f9
-cosh_FR_C_hi_temp = f10
-cosh_FR_C_lo_temp1 = f11
-cosh_FR_C_lo_temp2 = f12
-cosh_FR_C_lo_temp3 = f13
-
-cosh_FR_C_lo = f38
-cosh_FR_S_hi = f39
-
-cosh_FR_S_hi_temp1 = f10
-cosh_FR_Y_hi = f11
-cosh_FR_Y_lo_temp = f12
-cosh_FR_Y_lo = f13
-cosh_FR_COSH = f9
-
-cosh_FR_X2 = f9
-cosh_FR_X4 = f10
-
-cosh_FR_P1 = f14
-cosh_FR_P2 = f15
-cosh_FR_P3 = f32
-cosh_FR_P4 = f33
-cosh_FR_P5 = f34
-cosh_FR_P6 = f35
-
-cosh_FR_TINY_THRESH = f9
-
-cosh_FR_COSH_temp = f10
-cosh_FR_SCALE = f11
+// Overflow and Underflow
+//=======================
+// cosh(x) = largest double normal when
+// x = 710.47586 = 0x408633ce8fb9f87d
+//
+// There is no underflow.
-cosh_FR_hi_lo = f10
+// Registers used
+//==============================================================
+// Floating Point registers used:
+// f8, input, output
+// f6 -> f15, f32 -> f61
-cosh_FR_poly_podd_temp1 = f11
-cosh_FR_poly_podd_temp2 = f13
-cosh_FR_poly_peven_temp1 = f11
-cosh_FR_poly_peven_temp2 = f13
+// General registers used:
+// r14 -> r40
-GR_SAVE_PFS = r41
-GR_SAVE_B0 = r42
-GR_SAVE_GP = r43
+// Predicate registers used:
+// p6 -> p15
-GR_Parameter_X = r44
-GR_Parameter_Y = r45
-GR_Parameter_RESULT = r46
+// Assembly macros
+//==============================================================
+rRshf = r14
+rN_neg = r14
+rAD_TB1 = r15
+rAD_TB2 = r16
+rAD_P = r17
+rN = r18
+rIndex_1 = r19
+rIndex_2_16 = r20
+rM = r21
+rBiased_M = r21
+rSig_inv_ln2 = r22
+rIndex_1_neg = r22
+rExp_bias = r23
+rExp_bias_minus_1 = r23
+rExp_mask = r24
+rTmp = r24
+rGt_ln = r24
+rIndex_2_16_neg = r24
+rM_neg = r25
+rBiased_M_neg = r25
+rRshf_2to56 = r26
+rAD_T1_neg = r26
+rExp_2tom56 = r28
+rAD_T2_neg = r28
+rAD_T1 = r29
+rAD_T2 = r30
+rSignexp_x = r31
+rExp_x = r31
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+GR_SAVE_SP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+
+FR_X = f10
+FR_Y = f1
+FR_RESULT = f8
+
+fRSHF_2TO56 = f6
+fINV_LN2_2TO63 = f7
+fW_2TO56_RSH = f9
+f2TOM56 = f11
+fP5 = f12
+fP4 = f13
+fP3 = f14
+fP2 = f15
+
+fLn2_by_128_hi = f33
+fLn2_by_128_lo = f34
+
+fRSHF = f35
+fNfloat = f36
+fNormX = f37
+fR = f38
+fF = f39
+
+fRsq = f40
+f2M = f41
+fS1 = f42
+fT1 = f42
+fS2 = f43
+fT2 = f43
+fS = f43
+fWre_urm_f8 = f44
+fAbsX = f44
+
+fMIN_DBL_OFLOW_ARG = f45
+fMAX_DBL_NORM_ARG = f46
+fXsq = f47
+fX4 = f48
+fGt_pln = f49
+fTmp = f49
+
+fP54 = f50
+fP5432 = f50
+fP32 = f51
+fP = f52
+fP54_neg = f53
+fP5432_neg = f53
+fP32_neg = f54
+fP_neg = f55
+fF_neg = f56
+
+f2M_neg = f57
+fS1_neg = f58
+fT1_neg = f58
+fS2_neg = f59
+fT2_neg = f59
+fS_neg = f59
+fExp = f60
+fExp_neg = f61
+
+fA6 = f50
+fA65 = f50
+fA6543 = f50
+fA654321 = f50
+fA5 = f51
+fA4 = f52
+fA43 = f52
+fA3 = f53
+fA2 = f54
+fA21 = f54
+fA1 = f55
// Data tables
//==============================================================
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
-
+RODATA
.align 16
-double_cosh_arg_reduction:
-ASM_TYPE_DIRECTIVE(double_cosh_arg_reduction,@object)
- data8 0xB8AA3B295C17F0BC, 0x00004005
- data8 0xB17217F7D1000000, 0x00003FF8
- data8 0xCF79ABC9E3B39804, 0x00003FD0
-ASM_SIZE_DIRECTIVE(double_cosh_arg_reduction)
-
-double_cosh_p_table:
-ASM_TYPE_DIRECTIVE(double_cosh_p_table,@object)
- data8 0x8000000000000000, 0x00003FFE
- data8 0xAAAAAAAAAAAAAB80, 0x00003FFA
- data8 0xB60B60B60B4FE884, 0x00003FF5
- data8 0xD00D00D1021D7370, 0x00003FEF
- data8 0x93F27740C0C2F1CC, 0x00003FE9
- data8 0x8FA02AC65BCBD5BC, 0x00003FE2
-ASM_SIZE_DIRECTIVE(double_cosh_p_table)
-
-double_cosh_ab_table:
-ASM_TYPE_DIRECTIVE(double_cosh_ab_table,@object)
- data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC
- data8 0x88888888884ECDD5, 0x00003FF8
- data8 0xD00D0C6DCC26A86B, 0x00003FF2
- data8 0x8000000000000002, 0x00003FFE
- data8 0xAAAAAAAAAA402C77, 0x00003FFA
- data8 0xB60B6CC96BDB144D, 0x00003FF5
-ASM_SIZE_DIRECTIVE(double_cosh_ab_table)
-
-double_cosh_j_table:
-ASM_TYPE_DIRECTIVE(double_cosh_j_table,@object)
- data8 0xB504F333F9DE6484, 0x00003FFE, 0x1EB2FB13, 0x00000000
- data8 0xB6FD91E328D17791, 0x00003FFE, 0x1CE2CBE2, 0x00000000
- data8 0xB8FBAF4762FB9EE9, 0x00003FFE, 0x1DDC3CBC, 0x00000000
- data8 0xBAFF5AB2133E45FB, 0x00003FFE, 0x1EE9AA34, 0x00000000
- data8 0xBD08A39F580C36BF, 0x00003FFE, 0x9EAEFDC1, 0x00000000
- data8 0xBF1799B67A731083, 0x00003FFE, 0x9DBF517B, 0x00000000
- data8 0xC12C4CCA66709456, 0x00003FFE, 0x1EF88AFB, 0x00000000
- data8 0xC346CCDA24976407, 0x00003FFE, 0x1E03B216, 0x00000000
- data8 0xC5672A115506DADD, 0x00003FFE, 0x1E78AB43, 0x00000000
- data8 0xC78D74C8ABB9B15D, 0x00003FFE, 0x9E7B1747, 0x00000000
- data8 0xC9B9BD866E2F27A3, 0x00003FFE, 0x9EFE3C0E, 0x00000000
- data8 0xCBEC14FEF2727C5D, 0x00003FFE, 0x9D36F837, 0x00000000
- data8 0xCE248C151F8480E4, 0x00003FFE, 0x9DEE53E4, 0x00000000
- data8 0xD06333DAEF2B2595, 0x00003FFE, 0x9E24AE8E, 0x00000000
- data8 0xD2A81D91F12AE45A, 0x00003FFE, 0x1D912473, 0x00000000
- data8 0xD4F35AABCFEDFA1F, 0x00003FFE, 0x1EB243BE, 0x00000000
- data8 0xD744FCCAD69D6AF4, 0x00003FFE, 0x1E669A2F, 0x00000000
- data8 0xD99D15C278AFD7B6, 0x00003FFE, 0x9BBC610A, 0x00000000
- data8 0xDBFBB797DAF23755, 0x00003FFE, 0x1E761035, 0x00000000
- data8 0xDE60F4825E0E9124, 0x00003FFE, 0x9E0BE175, 0x00000000
- data8 0xE0CCDEEC2A94E111, 0x00003FFE, 0x1CCB12A1, 0x00000000
- data8 0xE33F8972BE8A5A51, 0x00003FFE, 0x1D1BFE90, 0x00000000
- data8 0xE5B906E77C8348A8, 0x00003FFE, 0x1DF2F47A, 0x00000000
- data8 0xE8396A503C4BDC68, 0x00003FFE, 0x1EF22F22, 0x00000000
- data8 0xEAC0C6E7DD24392F, 0x00003FFE, 0x9E3F4A29, 0x00000000
- data8 0xED4F301ED9942B84, 0x00003FFE, 0x1EC01A5B, 0x00000000
- data8 0xEFE4B99BDCDAF5CB, 0x00003FFE, 0x1E8CAC3A, 0x00000000
- data8 0xF281773C59FFB13A, 0x00003FFE, 0x9DBB3FAB, 0x00000000
- data8 0xF5257D152486CC2C, 0x00003FFE, 0x1EF73A19, 0x00000000
- data8 0xF7D0DF730AD13BB9, 0x00003FFE, 0x9BB795B5, 0x00000000
- data8 0xFA83B2DB722A033A, 0x00003FFE, 0x1EF84B76, 0x00000000
- data8 0xFD3E0C0CF486C175, 0x00003FFE, 0x9EF5818B, 0x00000000
- data8 0x8000000000000000, 0x00003FFF, 0x00000000, 0x00000000
- data8 0x8164D1F3BC030773, 0x00003FFF, 0x1F77CACA, 0x00000000
- data8 0x82CD8698AC2BA1D7, 0x00003FFF, 0x1EF8A91D, 0x00000000
- data8 0x843A28C3ACDE4046, 0x00003FFF, 0x1E57C976, 0x00000000
- data8 0x85AAC367CC487B15, 0x00003FFF, 0x9EE8DA92, 0x00000000
- data8 0x871F61969E8D1010, 0x00003FFF, 0x1EE85C9F, 0x00000000
- data8 0x88980E8092DA8527, 0x00003FFF, 0x1F3BF1AF, 0x00000000
- data8 0x8A14D575496EFD9A, 0x00003FFF, 0x1D80CA1E, 0x00000000
- data8 0x8B95C1E3EA8BD6E7, 0x00003FFF, 0x9D0373AF, 0x00000000
- data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF, 0x9F167097, 0x00000000
- data8 0x8EA4398B45CD53C0, 0x00003FFF, 0x1EB70051, 0x00000000
- data8 0x9031DC431466B1DC, 0x00003FFF, 0x1F6EB029, 0x00000000
- data8 0x91C3D373AB11C336, 0x00003FFF, 0x1DFD6D8E, 0x00000000
- data8 0x935A2B2F13E6E92C, 0x00003FFF, 0x9EB319B0, 0x00000000
- data8 0x94F4EFA8FEF70961, 0x00003FFF, 0x1EBA2BEB, 0x00000000
- data8 0x96942D3720185A00, 0x00003FFF, 0x1F11D537, 0x00000000
- data8 0x9837F0518DB8A96F, 0x00003FFF, 0x1F0D5A46, 0x00000000
- data8 0x99E0459320B7FA65, 0x00003FFF, 0x9E5E7BCA, 0x00000000
- data8 0x9B8D39B9D54E5539, 0x00003FFF, 0x9F3AAFD1, 0x00000000
- data8 0x9D3ED9A72CFFB751, 0x00003FFF, 0x9E86DACC, 0x00000000
- data8 0x9EF5326091A111AE, 0x00003FFF, 0x9F3EDDC2, 0x00000000
- data8 0xA0B0510FB9714FC2, 0x00003FFF, 0x1E496E3D, 0x00000000
- data8 0xA27043030C496819, 0x00003FFF, 0x9F490BF6, 0x00000000
- data8 0xA43515AE09E6809E, 0x00003FFF, 0x1DD1DB48, 0x00000000
- data8 0xA5FED6A9B15138EA, 0x00003FFF, 0x1E65EBFB, 0x00000000
- data8 0xA7CD93B4E965356A, 0x00003FFF, 0x9F427496, 0x00000000
- data8 0xA9A15AB4EA7C0EF8, 0x00003FFF, 0x1F283C4A, 0x00000000
- data8 0xAB7A39B5A93ED337, 0x00003FFF, 0x1F4B0047, 0x00000000
- data8 0xAD583EEA42A14AC6, 0x00003FFF, 0x1F130152, 0x00000000
- data8 0xAF3B78AD690A4375, 0x00003FFF, 0x9E8367C0, 0x00000000
- data8 0xB123F581D2AC2590, 0x00003FFF, 0x9F705F90, 0x00000000
- data8 0xB311C412A9112489, 0x00003FFF, 0x1EFB3C53, 0x00000000
- data8 0xB504F333F9DE6484, 0x00003FFF, 0x1F32FB13, 0x00000000
-ASM_SIZE_DIRECTIVE(double_cosh_j_table)
-
-.align 32
-.global cosh#
-.section .text
-.proc cosh#
-.align 32
+// ************* DO NOT CHANGE ORDER OF THESE TABLES ********************
-cosh:
+// double-extended 1/ln(2)
+// 3fff b8aa 3b29 5c17 f0bb be87fed0691d3e88
+// 3fff b8aa 3b29 5c17 f0bc
+// For speed the significand will be loaded directly with a movl and setf.sig
+// and the exponent will be bias+63 instead of bias+0. Thus subsequent
+// computations need to scale appropriately.
+// The constant 128/ln(2) is needed for the computation of w. This is also
+// obtained by scaling the computations.
+//
+// Two shifting constants are loaded directly with movl and setf.d.
+// 1. fRSHF_2TO56 = 1.1000..00 * 2^(63-7)
+// This constant is added to x*1/ln2 to shift the integer part of
+// x*128/ln2 into the rightmost bits of the significand.
+// The result of this fma is fW_2TO56_RSH.
+// 2. fRSHF = 1.1000..00 * 2^(63)
+// This constant is subtracted from fW_2TO56_RSH * 2^(-56) to give
+// the integer part of w, n, as a floating-point number.
+// The result of this fms is fNfloat.
+
+
+LOCAL_OBJECT_START(exp_table_1)
+data8 0x408633ce8fb9f87e // smallest dbl overflow arg
+data8 0x408633ce8fb9f87d // largest dbl arg to give normal dbl result
+data8 0xb17217f7d1cf79ab , 0x00003ff7 // ln2/128 hi
+data8 0xc9e3b39803f2f6af , 0x00003fb7 // ln2/128 lo
+//
+// Table 1 is 2^(index_1/128) where
+// index_1 goes from 0 to 15
+//
+data8 0x8000000000000000 , 0x00003FFF
+data8 0x80B1ED4FD999AB6C , 0x00003FFF
+data8 0x8164D1F3BC030773 , 0x00003FFF
+data8 0x8218AF4373FC25EC , 0x00003FFF
+data8 0x82CD8698AC2BA1D7 , 0x00003FFF
+data8 0x8383594EEFB6EE37 , 0x00003FFF
+data8 0x843A28C3ACDE4046 , 0x00003FFF
+data8 0x84F1F656379C1A29 , 0x00003FFF
+data8 0x85AAC367CC487B15 , 0x00003FFF
+data8 0x8664915B923FBA04 , 0x00003FFF
+data8 0x871F61969E8D1010 , 0x00003FFF
+data8 0x87DB357FF698D792 , 0x00003FFF
+data8 0x88980E8092DA8527 , 0x00003FFF
+data8 0x8955EE03618E5FDD , 0x00003FFF
+data8 0x8A14D575496EFD9A , 0x00003FFF
+data8 0x8AD4C6452C728924 , 0x00003FFF
+LOCAL_OBJECT_END(exp_table_1)
+
+// Table 2 is 2^(index_1/8) where
+// index_2 goes from 0 to 7
+LOCAL_OBJECT_START(exp_table_2)
+data8 0x8000000000000000 , 0x00003FFF
+data8 0x8B95C1E3EA8BD6E7 , 0x00003FFF
+data8 0x9837F0518DB8A96F , 0x00003FFF
+data8 0xA5FED6A9B15138EA , 0x00003FFF
+data8 0xB504F333F9DE6484 , 0x00003FFF
+data8 0xC5672A115506DADD , 0x00003FFF
+data8 0xD744FCCAD69D6AF4 , 0x00003FFF
+data8 0xEAC0C6E7DD24392F , 0x00003FFF
+LOCAL_OBJECT_END(exp_table_2)
+
+LOCAL_OBJECT_START(exp_p_table)
+data8 0x3f8111116da21757 //P5
+data8 0x3fa55555d787761c //P4
+data8 0x3fc5555555555414 //P3
+data8 0x3fdffffffffffd6a //P2
+LOCAL_OBJECT_END(exp_p_table)
+
+LOCAL_OBJECT_START(cosh_p_table)
+data8 0x8FA02AC65BCBD5BC, 0x00003FE2 // A6
+data8 0xD00D00D1021D7370, 0x00003FEF // A4
+data8 0xAAAAAAAAAAAAAB80, 0x00003FFA // A2
+data8 0x93F27740C0C2F1CC, 0x00003FE9 // A5
+data8 0xB60B60B60B4FE884, 0x00003FF5 // A3
+data8 0x8000000000000000, 0x00003FFE // A1
+LOCAL_OBJECT_END(cosh_p_table)
-#ifdef _LIBC
-.global __ieee754_cosh#
-.proc __ieee754_cosh#
-__ieee754_cosh:
-#endif
-// X NAN?
+.section .text
+GLOBAL_IEEE754_ENTRY(cosh)
-{ .mfi
- alloc r32 = ar.pfs,0,12,4,0
-(p0) fclass.m.unc p6,p7 = f8, 0xc3 //@snan | @qnan
- nop.i 999
+{ .mlx
+ getf.exp rSignexp_x = f8 // Must recompute if x unorm
+ movl rSig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2
}
-;;
-
-
-{ .mfb
- nop.m 999
-(p6) fma.d.s0 f8 = f8,f1,f8
-(p6) br.ret.spnt b0 ;;
+{ .mlx
+ addl rAD_TB1 = @ltoff(exp_table_1), gp
+ movl rRshf_2to56 = 0x4768000000000000 // 1.10000 2^(63+56)
}
+;;
-
-// X infinity
{ .mfi
- nop.m 999
-(p0) fclass.m.unc p6,p0 = f8, 0x23 //@inf
- nop.i 999 ;;
-}
-
-{ .mfb
- nop.m 999
-(p6) fmerge.s f8 = f0,f8
-(p6) br.ret.spnt b0 ;;
+ ld8 rAD_TB1 = [rAD_TB1]
+ fclass.m p6,p0 = f8,0x0b // Test for x=unorm
+ mov rExp_mask = 0x1ffff
}
-
-
-
-// Put 0.25 in f9; p6 true if x < 0.25
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x000000000000fffd ;;
-}
-
{ .mfi
-(p0) setf.exp f9 = r32
- nop.f 999
- nop.i 999 ;;
+ mov rExp_bias = 0xffff
+ fnorm.s1 fNormX = f8
+ mov rExp_2tom56 = 0xffff-56
}
+;;
+
+// Form two constants we need
+// 1/ln2 * 2^63 to compute w = x * 1/ln2 * 128
+// 1.1000..000 * 2^(63+63-7) to right shift int(w) into the significand
{ .mfi
- nop.m 999
-(p0) fmerge.s cosh_FR_X = f0,f8
+ setf.sig fINV_LN2_2TO63 = rSig_inv_ln2 // form 1/ln2 * 2^63
+ fclass.m p8,p0 = f8,0x07 // Test for x=0
nop.i 999
}
-
-{ .mfi
- nop.m 999
-(p0) fmerge.s cosh_FR_SGNX = f8,f1
- nop.i 999 ;;
+{ .mlx
+ setf.d fRSHF_2TO56 = rRshf_2to56 // Form const 1.100 * 2^(63+56)
+ movl rRshf = 0x43e8000000000000 // 1.10000 2^63 for right shift
}
+;;
{ .mfi
- nop.m 999
-(p0) fcmp.lt.unc p0,p7 = cosh_FR_X,f9
- nop.i 999 ;;
+ ldfpd fMIN_DBL_OFLOW_ARG, fMAX_DBL_NORM_ARG = [rAD_TB1],16
+ fclass.m p10,p0 = f8,0x1e3 // Test for x=inf, nan, NaT
+ nop.i 0
}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p7) br.cond.sptk L(COSH_BY_TBL)
+{ .mfb
+ setf.exp f2TOM56 = rExp_2tom56 // form 2^-56 for scaling Nfloat
+ nop.f 0
+(p6) br.cond.spnt COSH_UNORM // Branch if x=unorm
}
;;
-
-// COSH_BY_POLY:
-// POLY cannot overflow so there is no need to call __libm_error_support
-// Get the values of P_x from the table
-
-{ .mmi
- nop.m 999
-(p0) addl r34 = @ltoff(double_cosh_p_table), gp
- nop.i 999
+COSH_COMMON:
+{ .mfi
+ ldfe fLn2_by_128_hi = [rAD_TB1],16
+ nop.f 0
+ nop.i 0
}
-;;
-
-{ .mmi
- ld8 r34 = [r34]
- nop.m 999
- nop.i 999
+{ .mfb
+ setf.d fRSHF = rRshf // Form right shift const 1.100 * 2^63
+(p8) fma.d.s0 f8 = f1,f1,f0 // quick exit for x=0
+(p8) br.ret.spnt b0
}
;;
-
-// Calculate cosh_FR_X2 = ax*ax and cosh_FR_X4 = ax*ax*ax*ax
-{ .mmf
- nop.m 999
-(p0) ldfe cosh_FR_P1 = [r34],16
-(p0) fma.s1 cosh_FR_X2 = cosh_FR_X, cosh_FR_X, f0 ;;
-}
-
-{ .mmi
-(p0) ldfe cosh_FR_P2 = [r34],16 ;;
-(p0) ldfe cosh_FR_P3 = [r34],16
- nop.i 999 ;;
+{ .mfi
+ ldfe fLn2_by_128_lo = [rAD_TB1],16
+ nop.f 0
+ nop.i 0
}
-
-{ .mmi
-(p0) ldfe cosh_FR_P4 = [r34],16 ;;
-(p0) ldfe cosh_FR_P5 = [r34],16
- nop.i 999 ;;
+{ .mfb
+ and rExp_x = rExp_mask, rSignexp_x // Biased exponent of x
+(p10) fma.d.s0 f8 = f8,f8,f0 // Result if x=inf, nan, NaT
+(p10) br.ret.spnt b0 // quick exit for x=inf, nan, NaT
}
+;;
+// After that last load rAD_TB1 points to the beginning of table 1
{ .mfi
-(p0) ldfe cosh_FR_P6 = [r34],16
-(p0) fma.s1 cosh_FR_X4 = cosh_FR_X2, cosh_FR_X2, f0
- nop.i 999 ;;
+ nop.m 0
+ fcmp.eq.s0 p6,p0 = f8, f0 // Dummy to set D
+ sub rExp_x = rExp_x, rExp_bias // True exponent of x
}
+;;
-// Calculate cosh_FR_podd = x4 *(x4 * P_5 + P_3) + P_1
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_poly_podd_temp1 = cosh_FR_X4, cosh_FR_P5, cosh_FR_P3
- nop.i 999 ;;
+ nop.m 0
+ fmerge.s fAbsX = f0, fNormX // Form |x|
+ nop.i 0
}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_podd = cosh_FR_X4, cosh_FR_poly_podd_temp1, cosh_FR_P1
- nop.i 999
+{ .mfb
+ cmp.gt p7, p0 = -2, rExp_x // Test |x| < 2^(-2)
+ fma.s1 fXsq = fNormX, fNormX, f0 // x*x for small path
+(p7) br.cond.spnt COSH_SMALL // Branch if 0 < |x| < 2^-2
}
+;;
-// Calculate cosh_FR_peven = p_even = x4 *(x4 * (x4 * P_6 + P_4) + P_2)
-{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_poly_peven_temp1 = cosh_FR_X4, cosh_FR_P6, cosh_FR_P4
- nop.i 999 ;;
-}
+// W = X * Inv_log2_by_128
+// By adding 1.10...0*2^63 we shift and get round_int(W) in significand.
+// We actually add 1.10...0*2^56 to X * Inv_log2 to do the same thing.
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_poly_peven_temp2 = cosh_FR_X4, cosh_FR_poly_peven_temp1, cosh_FR_P2
- nop.i 999 ;;
+ add rAD_P = 0x180, rAD_TB1
+ fma.s1 fW_2TO56_RSH = fNormX, fINV_LN2_2TO63, fRSHF_2TO56
+ add rAD_TB2 = 0x100, rAD_TB1
}
+;;
+
+// Divide arguments into the following categories:
+// Certain Safe - 0.25 <= |x| <= MAX_DBL_NORM_ARG
+// Possible Overflow p14 - MAX_DBL_NORM_ARG < |x| < MIN_DBL_OFLOW_ARG
+// Certain Overflow p15 - MIN_DBL_OFLOW_ARG <= |x| < +inf
+//
+// If the input is really a double arg, then there will never be
+// "Possible Overflow" arguments.
+//
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_peven = cosh_FR_X4, cosh_FR_poly_peven_temp2, f0
- nop.i 999 ;;
+ ldfpd fP5, fP4 = [rAD_P] ,16
+ fcmp.ge.s1 p15,p14 = fAbsX,fMIN_DBL_OFLOW_ARG
+ nop.i 0
}
+;;
+
+// Nfloat = round_int(W)
+// The signficand of fW_2TO56_RSH contains the rounded integer part of W,
+// as a twos complement number in the lower bits (that is, it may be negative).
+// That twos complement number (called N) is put into rN.
+
+// Since fW_2TO56_RSH is scaled by 2^56, it must be multiplied by 2^-56
+// before the shift constant 1.10000 * 2^63 is subtracted to yield fNfloat.
+// Thus, fNfloat contains the floating point version of N
-// Y_lo = x2*p_odd + p_even
-// Calculate f8 = Y_hi + Y_lo
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_Y_lo = cosh_FR_X2, cosh_FR_podd, cosh_FR_peven
- nop.i 999 ;;
+ ldfpd fP3, fP2 = [rAD_P]
+(p14) fcmp.gt.unc.s1 p14,p0 = fAbsX,fMAX_DBL_NORM_ARG
+ nop.i 0
}
-
{ .mfb
- nop.m 999
-(p0) fma.d.s0 f8 = f1, f1, cosh_FR_Y_lo
-(p0) br.ret.sptk b0 ;;
+ nop.m 0
+ fms.s1 fNfloat = fW_2TO56_RSH, f2TOM56, fRSHF
+(p15) br.cond.spnt COSH_CERTAIN_OVERFLOW
}
+;;
-
-L(COSH_BY_TBL):
-
-// Now that we are at TBL; so far all we know is that |x| >= 0.25.
-// The first two steps are the same for TBL and EXP, but if we are HUGE
-// Double
-// Go to HUGE if |x| >= 2^10, 10009 (register-biased) is e = 10 (true)
-// Single
-// Go to HUGE if |x| >= 2^7, 10006 (register-biased) is e = 7 (true)
-// we want to leave now. Go to HUGE if |x| >= 2^14
-// 1000d (register-biased) is e = 14 (true)
-
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x0000000000010009 ;;
+{ .mfi
+ getf.sig rN = fW_2TO56_RSH
+ nop.f 0
+ mov rExp_bias_minus_1 = 0xfffe
}
+;;
+
+// rIndex_1 has index_1
+// rIndex_2_16 has index_2 * 16
+// rBiased_M has M
+// rM has true M
+// r = x - Nfloat * ln2_by_128_hi
+// f = 1 - Nfloat * ln2_by_128_lo
{ .mfi
-(p0) setf.exp f9 = r32
- nop.f 999
- nop.i 999 ;;
+ and rIndex_1 = 0x0f, rN
+ fnma.s1 fR = fNfloat, fLn2_by_128_hi, fNormX
+ shr rM = rN, 0x7
}
-
{ .mfi
- nop.m 999
-(p0) fcmp.ge.unc p6,p7 = cosh_FR_X,f9
- nop.i 999 ;;
+ and rIndex_2_16 = 0x70, rN
+ fnma.s1 fF = fNfloat, fLn2_by_128_lo, f1
+ sub rN_neg = r0, rN
}
+;;
-{ .mib
- nop.m 999
- nop.i 999
-(p6) br.cond.spnt L(COSH_HUGE) ;;
+{ .mmi
+ and rIndex_1_neg = 0x0f, rN_neg
+ add rBiased_M = rExp_bias_minus_1, rM
+ shr rM_neg = rN_neg, 0x7
}
-
-// r32 = 1
-// r34 = N-1
-// r35 = N
-// r36 = j
-// r37 = N+1
-
-// TBL can never overflow
-// cosh(x) = cosh(B+R)
-// = cosh(B) cosh(R) + sinh(B) sinh(R)
-// cosh(R) can be approximated by 1 + p_even
-// sinh(R) can be approximated by p_odd
-
-// ******************************************************
-// STEP 1 (TBL and EXP)
-// ******************************************************
-// Get the following constants.
-// f9 = Inv_log2by64
-// f10 = log2by64_hi
-// f11 = log2by64_lo
-
{ .mmi
-(p0) adds r32 = 0x1,r0
-(p0) addl r34 = @ltoff(double_cosh_arg_reduction), gp
- nop.i 999
+ and rIndex_2_16_neg = 0x70, rN_neg
+ add rAD_T2 = rAD_TB2, rIndex_2_16
+ shladd rAD_T1 = rIndex_1, 4, rAD_TB1
}
;;
-// We want 2^(N-1) and 2^(-N-1). So bias N-1 and -N-1 and
-// put them in an exponent.
-// cosh_FR_spos = 2^(N-1) and cosh_FR_sneg = 2^(-N-1)
-// r39 = 0xffff + (N-1) = 0xffff +N -1
-// r40 = 0xffff - (N +1) = 0xffff -N -1
-
-{ .mlx
- ld8 r34 = [r34]
-(p0) movl r38 = 0x000000000000fffe ;;
-}
+// rAD_T1 has address of T1
+// rAD_T2 has address if T2
{ .mmi
-(p0) ldfe cosh_FR_Inv_log2by64 = [r34],16 ;;
-(p0) ldfe cosh_FR_log2by64_hi = [r34],16
- nop.i 999 ;;
+ setf.exp f2M = rBiased_M
+ ldfe fT2 = [rAD_T2]
+ nop.i 0
}
-
-{ .mbb
-(p0) ldfe cosh_FR_log2by64_lo = [r34],16
- nop.b 999
- nop.b 999 ;;
-}
-
-// Get the A coefficients
-// f9 = A_1
-// f10 = A_2
-// f11 = A_3
-
{ .mmi
- nop.m 999
-(p0) addl r34 = @ltoff(double_cosh_ab_table), gp
- nop.i 999
+ add rBiased_M_neg = rExp_bias_minus_1, rM_neg
+ add rAD_T2_neg = rAD_TB2, rIndex_2_16_neg
+ shladd rAD_T1_neg = rIndex_1_neg, 4, rAD_TB1
}
;;
+// Create Scale = 2^M
+// Load T1 and T2
{ .mmi
- ld8 r34 = [r34]
- nop.m 999
- nop.i 999
+ ldfe fT1 = [rAD_T1]
+ nop.m 0
+ nop.i 0
+}
+{ .mmf
+ setf.exp f2M_neg = rBiased_M_neg
+ ldfe fT2_neg = [rAD_T2_neg]
+ fma.s1 fF_neg = fNfloat, fLn2_by_128_lo, f1
}
;;
-
-// Calculate M and keep it as integer and floating point.
-// M = round-to-integer(x*Inv_log2by64)
-// cosh_FR_M = M = truncate(ax/(log2/64))
-// Put the significand of M in r35
-// and the floating point representation of M in cosh_FR_M
-
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_M = cosh_FR_X, cosh_FR_Inv_log2by64, f0
- nop.i 999
+ nop.m 0
+ fma.s1 fRsq = fR, fR, f0
+ nop.i 0
}
-
{ .mfi
-(p0) ldfe cosh_FR_A1 = [r34],16
- nop.f 999
- nop.i 999 ;;
+ ldfe fT1_neg = [rAD_T1_neg]
+ fma.s1 fP54 = fR, fP5, fP4
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fcvt.fx.s1 cosh_FR_M_temp = cosh_FR_M
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fP32 = fR, fP3, fP2
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p0) fnorm.s1 cosh_FR_M = cosh_FR_M_temp
- nop.i 999 ;;
+ nop.m 0
+ fnma.s1 fP54_neg = fR, fP5, fP4
+ nop.i 0
}
+;;
{ .mfi
-(p0) getf.sig r35 = cosh_FR_M_temp
- nop.f 999
- nop.i 999 ;;
-}
-
-// M is still in r35. Calculate j. j is the signed extension of the six lsb of M. It
-// has a range of -32 thru 31.
-// r35 = M
-// r36 = j
-{ .mii
- nop.m 999
- nop.i 999 ;;
-(p0) and r36 = 0x3f, r35 ;;
+ nop.m 0
+ fnma.s1 fP32_neg = fR, fP3, fP2
+ nop.i 0
}
-
-// Calculate R
-// f13 = f44 - f12*f10 = x - M*log2by64_hi
-// f14 = f13 - f8*f11 = R = (x - M*log2by64_hi) - M*log2by64_lo
+;;
{ .mfi
- nop.m 999
-(p0) fnma.s1 cosh_FR_R_temp = cosh_FR_M, cosh_FR_log2by64_hi, cosh_FR_X
- nop.i 999
+ nop.m 0
+ fma.s1 fP5432 = fRsq, fP54, fP32
+ nop.i 0
}
-
{ .mfi
-(p0) ldfe cosh_FR_A2 = [r34],16
- nop.f 999
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fS2 = fF,fT2,f0
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fnma.s1 cosh_FR_R = cosh_FR_M, cosh_FR_log2by64_lo, cosh_FR_R_temp
- nop.i 999
+ nop.m 0
+ fma.s1 fS1 = f2M,fT1,f0
+ nop.i 0
}
-
-// Get the B coefficients
-// f15 = B_1
-// f32 = B_2
-// f33 = B_3
-
-{ .mmi
-(p0) ldfe cosh_FR_A3 = [r34],16 ;;
-(p0) ldfe cosh_FR_B1 = [r34],16
- nop.i 999 ;;
-}
-
-{ .mmi
-(p0) ldfe cosh_FR_B2 = [r34],16 ;;
-(p0) ldfe cosh_FR_B3 = [r34],16
- nop.i 999 ;;
-}
-
-{ .mii
- nop.m 999
-(p0) shl r34 = r36, 0x2 ;;
-(p0) sxt1 r37 = r34 ;;
-}
-
-// ******************************************************
-// STEP 2 (TBL and EXP)
-// ******************************************************
-// Calculate Rsquared and Rcubed in preparation for p_even and p_odd
-// f12 = R*R*R
-// f13 = R*R
-// f14 = R <== from above
-
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_Rsq = cosh_FR_R, cosh_FR_R, f0
-(p0) shr r36 = r37, 0x2 ;;
-}
-
-// r34 = M-j = r35 - r36
-// r35 = N = (M-j)/64
-
-{ .mii
-(p0) sub r34 = r35, r36
- nop.i 999 ;;
-(p0) shr r35 = r34, 0x6 ;;
-}
-
-{ .mii
-(p0) sub r40 = r38, r35
-(p0) adds r37 = 0x1, r35
-(p0) add r39 = r38, r35 ;;
-}
-
-// Get the address of the J table, add the offset,
-// addresses are sinh_AD_mJ and sinh_AD_J, get the T value
-// f32 = T(j)_hi
-// f33 = T(j)_lo
-// f34 = T(-j)_hi
-// f35 = T(-j)_lo
-
-{ .mmi
-(p0) sub r34 = r35, r32
-(p0) addl r37 = @ltoff(double_cosh_j_table), gp
- nop.i 999
+ nop.m 0
+ fma.s1 fP5432_neg = fRsq, fP54_neg, fP32_neg
+ nop.i 0
}
;;
{ .mfi
- ld8 r37 = [r37]
-(p0) fma.s1 cosh_FR_Rcub = cosh_FR_Rsq, cosh_FR_R, f0
- nop.i 999
+ nop.m 0
+ fma.s1 fS1_neg = f2M_neg,fT1_neg,f0
+ nop.i 0
}
-
-// ******************************************************
-// STEP 3 Now decide if we need to branch to EXP
-// ******************************************************
-// Put 32 in f9; p6 true if x < 32
-
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x0000000000010004 ;;
-}
-
-// Calculate p_even
-// f34 = B_2 + Rsq *B_3
-// f35 = B_1 + Rsq*f34 = B_1 + Rsq * (B_2 + Rsq *B_3)
-// f36 = peven = Rsq * f35 = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
-
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_peven_temp1 = cosh_FR_Rsq, cosh_FR_B3, cosh_FR_B2
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fS2_neg = fF_neg,fT2_neg,f0
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_peven_temp2 = cosh_FR_Rsq, cosh_FR_peven_temp1, cosh_FR_B1
- nop.i 999
+ nop.m 0
+ fma.s1 fP = fRsq, fP5432, fR
+ nop.i 0
}
-
-// Calculate p_odd
-// f34 = A_2 + Rsq *A_3
-// f35 = A_1 + Rsq * (A_2 + Rsq *A_3)
-// f37 = podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
-
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_podd_temp1 = cosh_FR_Rsq, cosh_FR_A3, cosh_FR_A2
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fS = fS1,fS2,f0
+ nop.i 0
}
+;;
{ .mfi
-(p0) setf.exp cosh_FR_N_temp1 = r39
- nop.f 999
- nop.i 999 ;;
+ nop.m 0
+ fms.s1 fP_neg = fRsq, fP5432_neg, fR
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_peven = cosh_FR_Rsq, cosh_FR_peven_temp2, f0
- nop.i 999
+ nop.m 0
+ fma.s1 fS_neg = fS1_neg,fS2_neg,f0
+ nop.i 0
}
+;;
-{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_podd_temp2 = cosh_FR_Rsq, cosh_FR_podd_temp1, cosh_FR_A1
- nop.i 999 ;;
+{ .mfb
+ nop.m 0
+ fmpy.s0 fTmp = fLn2_by_128_lo, fLn2_by_128_lo // Force inexact
+(p14) br.cond.spnt COSH_POSSIBLE_OVERFLOW
}
+;;
{ .mfi
-(p0) setf.exp f9 = r32
- nop.f 999
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fExp = fS, fP, fS
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_podd = cosh_FR_podd_temp2, cosh_FR_Rcub, cosh_FR_R
- nop.i 999
+ nop.m 0
+ fma.s1 fExp_neg = fS_neg, fP_neg, fS_neg
+ nop.i 0
}
+;;
-// sinh_GR_mj contains the table offset for -j
-// sinh_GR_j contains the table offset for +j
-// p6 is true when j <= 0
-
-{ .mlx
-(p0) setf.exp cosh_FR_N_temp2 = r40
-(p0) movl r40 = 0x0000000000000020 ;;
+{ .mfb
+ nop.m 0
+ fma.d.s0 f8 = fExp, f1, fExp_neg
+ br.ret.sptk b0 // Normal path exit
}
+;;
-{ .mfi
-(p0) sub GR_mJ = r40, r36
-(p0) fmerge.se cosh_FR_spos = cosh_FR_N_temp1, f1
-(p0) adds GR_J = 0x20, r36 ;;
+// Here if 0 < |x| < 0.25
+COSH_SMALL:
+{ .mmf
+ add rAD_T1 = 0x1a0, rAD_TB1
+ add rAD_T2 = 0x1d0, rAD_TB1
}
+;;
-{ .mii
- nop.m 999
-(p0) shl GR_mJ = GR_mJ, 5 ;;
-(p0) add AD_mJ = r37, GR_mJ ;;
+{ .mmf
+ ldfe fA6 = [rAD_T1],16
+ ldfe fA5 = [rAD_T2],16
+ nop.f 0
}
+;;
{ .mmi
- nop.m 999
-(p0) ldfe cosh_FR_Tmjhi = [AD_mJ],16
-(p0) shl GR_J = GR_J, 5 ;;
-}
-
-{ .mfi
-(p0) ldfs cosh_FR_Tmjlo = [AD_mJ],16
-(p0) fcmp.lt.unc.s1 p6,p7 = cosh_FR_X,f9
-(p0) add AD_J = r37, GR_J ;;
+ ldfe fA4 = [rAD_T1],16
+ ldfe fA3 = [rAD_T2],16
+ nop.i 0
}
+;;
{ .mmi
-(p0) ldfe cosh_FR_Tjhi = [AD_J],16 ;;
-(p0) ldfs cosh_FR_Tjlo = [AD_J],16
- nop.i 999 ;;
+ ldfe fA2 = [rAD_T1],16
+ ldfe fA1 = [rAD_T2],16
+ nop.i 0
}
-
-{ .mfb
- nop.m 999
-(p0) fmerge.se cosh_FR_sneg = cosh_FR_N_temp2, f1
-(p7) br.cond.spnt L(COSH_BY_EXP) ;;
-}
-
-// ******************************************************
-// If NOT branch to EXP
-// ******************************************************
-// Calculate C_hi
-// ******************************************************
-// cosh_FR_C_hi_temp = cosh_FR_sneg * cosh_FR_Tmjhi
-// cosh_FR_C_hi = cosh_FR_spos * cosh_FR_Tjhi + (cosh_FR_sneg * cosh_FR_Tmjhi)
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_C_hi_temp = cosh_FR_sneg, cosh_FR_Tmjhi, f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_C_hi = cosh_FR_spos, cosh_FR_Tjhi, cosh_FR_C_hi_temp
- nop.i 999
-}
-
-// ******************************************************
-// Calculate S_hi
-// ******************************************************
-// cosh_FR_S_hi_temp1 = cosh_FR_sneg * cosh_FR_Tmjhi
-// cosh_FR_S_hi = cosh_FR_spos * cosh_FR_Tjhi - cosh_FR_C_hi_temp1
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_S_hi_temp1 = cosh_FR_sneg, cosh_FR_Tmjhi, f0
- nop.i 999 ;;
-}
-
-// ******************************************************
-// Calculate C_lo
-// ******************************************************
-// cosh_FR_C_lo_temp1 = cosh_FR_spos * cosh_FR_Tjhi - cosh_FR_C_hi
-// cosh_FR_C_lo_temp2 = cosh_FR_sneg * cosh_FR_Tmjlo + (cosh_FR_spos * cosh_FR_Tjhi - cosh_FR_C_hi)
-// cosh_FR_C_lo_temp1 = cosh_FR_sneg * cosh_FR_Tmjlo
-// cosh_FR_C_lo_temp3 = cosh_FR_spos * cosh_FR_Tjlo + (cosh_FR_sneg * cosh_FR_Tmjlo)
-// cosh_FR_C_lo = cosh_FR_C_lo_temp3 + cosh_FR_C_lo_temp2
+;;
{ .mfi
- nop.m 999
-(p0) fms.s1 cosh_FR_C_lo_temp1 = cosh_FR_spos, cosh_FR_Tjhi, cosh_FR_C_hi
- nop.i 999
+ nop.m 0
+ fma.s1 fX4 = fXsq, fXsq, f0
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fms.s1 cosh_FR_S_hi = cosh_FR_spos, cosh_FR_Tjhi, cosh_FR_S_hi_temp1
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fA65 = fXsq, fA6, fA5
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_C_lo_temp2 = cosh_FR_sneg, cosh_FR_Tmjhi, cosh_FR_C_lo_temp1
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_C_lo_temp1 = cosh_FR_sneg, cosh_FR_Tmjlo, f0
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fA43 = fXsq, fA4, fA3
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_C_lo_temp3 = cosh_FR_spos, cosh_FR_Tjlo, cosh_FR_C_lo_temp1
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fA21 = fXsq, fA2, fA1
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_C_lo = cosh_FR_C_lo_temp3, f1, cosh_FR_C_lo_temp2
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fA6543 = fX4, fA65, fA43
+ nop.i 0
}
-
-// ******************************************************
-// cosh_FR_Y_lo_temp = cosh_FR_C_hi * cosh_FR_peven + cosh_FR_C_lo
-// cosh_FR_Y_lo = cosh_FR_S_hi * cosh_FR_podd + cosh_FR_Y_lo_temp
-// cosh_FR_COSH = Y_hi + Y_lo
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_Y_lo_temp = cosh_FR_C_hi, cosh_FR_peven, cosh_FR_C_lo
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fA654321 = fX4, fA6543, fA21
+ nop.i 0
}
+;;
+// Dummy multiply to generate inexact
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_Y_lo = cosh_FR_S_hi, cosh_FR_podd, cosh_FR_Y_lo_temp
- nop.i 999 ;;
+ nop.m 0
+ fmpy.s0 fTmp = fA6, fA6
+ nop.i 0
}
-
{ .mfb
- nop.m 999
-(p0) fma.d.s0 f8 = cosh_FR_C_hi, f1, cosh_FR_Y_lo
-(p0) br.ret.sptk b0 ;;
+ nop.m 0
+ fma.d.s0 f8 = fA654321, fXsq, f1
+ br.ret.sptk b0 // Exit if 0 < |x| < 0.25
}
+;;
-L(COSH_BY_EXP):
-// When p7 is true, we know that an overflow is not going to happen
-// When p7 is false, we must check for possible overflow
-// p7 is the over_SAFE flag
-// f44 = Scale * (Y_hi + Y_lo)
-// = cosh_FR_spos * (cosh_FR_Tjhi + cosh_FR_Y_lo)
+COSH_POSSIBLE_OVERFLOW:
-{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_Y_lo_temp = cosh_FR_peven, f1, cosh_FR_podd
- nop.i 999
-}
-
-// Now we are in EXP. This is the only path where an overflow is possible
-// but not for certain. So this is the only path where over_SAFE has any use.
-// r34 still has N-1
-// There is a danger of double-extended overflow if N-1 > 16382 = 0x3ffe
-// There is a danger of double overflow if N-1 > 0x3fe = 1022
+// Here if fMAX_DBL_NORM_ARG < |x| < fMIN_DBL_OFLOW_ARG
+// This cannot happen if input is a double, only if input higher precision.
+// Overflow is a possibility, not a certainty.
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x00000000000003fe ;;
-}
+// Recompute result using status field 2 with user's rounding mode,
+// and wre set. If result is larger than largest double, then we have
+// overflow
{ .mfi
-(p0) cmp.gt.unc p0,p7 = r34, r32
- nop.f 999
- nop.i 999 ;;
+ mov rGt_ln = 0x103ff // Exponent for largest dbl + 1 ulp
+ fsetc.s2 0x7F,0x42 // Get user's round mode, set wre
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_Y_lo = cosh_FR_Tjhi, cosh_FR_Y_lo_temp, cosh_FR_Tjlo
- nop.i 999 ;;
+ setf.exp fGt_pln = rGt_ln // Create largest double + 1 ulp
+ fma.d.s2 fWre_urm_f8 = fS, fP, fS // Result with wre set
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_COSH_temp = cosh_FR_Y_lo, f1, cosh_FR_Tjhi
- nop.i 999 ;;
+ nop.m 0
+ fsetc.s2 0x7F,0x40 // Turn off wre in sf2
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.d.s0 f44 = cosh_FR_spos, cosh_FR_COSH_temp, f0
- nop.i 999 ;;
+ nop.m 0
+ fcmp.ge.s1 p6, p0 = fWre_urm_f8, fGt_pln // Test for overflow
+ nop.i 0
}
+;;
-// If over_SAFE is set, return
{ .mfb
- nop.m 999
-(p7) fmerge.s f8 = f44,f44
-(p7) br.ret.sptk b0 ;;
-}
-
-// Else see if we overflowed
-// S0 user supplied status
-// S2 user supplied status + WRE + TD (Overflows)
-// If WRE is set then an overflow will not occur in EXP.
-// The input value that would cause a register (WRE) value to overflow is about 2^15
-// and this input would go into the HUGE path.
-// Answer with WRE is in f43.
-
-{ .mfi
- nop.m 999
-(p0) fsetc.s2 0x7F,0x42
- nop.i 999;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fma.d.s2 f43 = cosh_FR_spos, cosh_FR_COSH_temp, f0
- nop.i 999 ;;
-}
-
-// 103FF => 103FF -FFFF = 400(true)
-// 400 + 3FF = 7FF, which is 1 more that the exponent of the largest
-// double (7FE). So 0 103FF 8000000000000000 is one ulp more than
-// largest double in register bias
-// Now set p8 if the answer with WRE is greater than or equal this value
-// Also set p9 if the answer with WRE is less than or equal to negative this value
-
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x00000000000103ff ;;
+ nop.m 0
+ nop.f 0
+(p6) br.cond.spnt COSH_CERTAIN_OVERFLOW // Branch if overflow
}
+;;
-{ .mmf
- nop.m 999
-(p0) setf.exp f41 = r32
-(p0) fsetc.s2 0x7F,0x40 ;;
+{ .mfb
+ nop.m 0
+ fma.d.s0 f8 = fS, fP, fS
+ br.ret.sptk b0 // Exit if really no overflow
}
+;;
-{ .mfi
- nop.m 999
-(p0) fcmp.ge.unc.s1 p8, p0 = f43, f41
- nop.i 999
+COSH_CERTAIN_OVERFLOW:
+{ .mmi
+ sub rTmp = rExp_mask, r0, 1
+;;
+ setf.exp fTmp = rTmp
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fmerge.ns f42 = f41, f41
- nop.i 999 ;;
+ alloc r32=ar.pfs,1,4,4,0
+ fmerge.s FR_X = f8,f8
+ nop.i 0
}
-
-// The error tag for overflow is 64
-{ .mii
- nop.m 999
- nop.i 999 ;;
-(p8) mov r47 = 64 ;;
-}
-
{ .mfb
- nop.m 999
-(p0) fcmp.le.unc.s1 p9, p0 = f43, f42
-(p8) br.cond.spnt __libm_error_region ;;
-}
-
-{ .mii
- nop.m 999
- nop.i 999 ;;
-(p9) mov r47 = 64
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p9) br.cond.spnt __libm_error_region ;;
+ mov GR_Parameter_TAG = 64
+ fma.d.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and +INF result
+ br.cond.sptk __libm_error_region
}
+;;
+// Here if x unorm
+COSH_UNORM:
{ .mfb
- nop.m 999
-(p0) fmerge.s f8 = f44,f44
-(p0) br.ret.sptk b0 ;;
-}
-
-
-// for COSH_HUGE, put 24000 in exponent; take sign from input; add 1
-// SAFE: SAFE is always 0 for HUGE
-
-L(COSH_HUGE):
-
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x0000000000015dbf ;;
-}
-
-{ .mfi
-(p0) setf.exp f9 = r32
- nop.f 999
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_hi_lo = f1, f9, f1
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fma.d.s0 f44 = f9, cosh_FR_hi_lo, f0
-(p0) mov r47 = 64
+ getf.exp rSignexp_x = fNormX // Must recompute if x unorm
+ fcmp.eq.s0 p6, p0 = f8, f0 // Set D flag
+ br.cond.sptk COSH_COMMON
}
;;
-.endp cosh#
-ASM_SIZE_DIRECTIVE(cosh#)
-
-// Stack operations when calling error support.
-// (1) (2) (3) (call) (4)
-// sp -> + psp -> + psp -> + sp -> +
-// | | | |
-// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
-// | | | |
-// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
-// | | | |
-// | | <- GR_X X1 ->| |
-// | | | |
-// sp-64 -> + sp -> + sp -> + +
-// save ar.pfs save b0 restore gp
-// save gp restore ar.pfs
-
-.proc __libm_error_region
-__libm_error_region:
+GLOBAL_IEEE754_END(cosh)
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
-// (1)
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
@@ -1103,39 +821,32 @@ __libm_error_region:
}
{ .mfi
.fframe 64
- add sp=-64,sp // Create new stack
+ add sp=-64,sp // Create new stack
nop.f 0
- mov GR_SAVE_GP=gp // Save gp
+ mov GR_SAVE_GP=gp // Save gp
};;
-
-
-// (2)
{ .mmi
- stfd [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
- add GR_Parameter_X = 16,sp // Parameter 1 address
+ stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0 // Save b0
+ mov GR_SAVE_B0=b0 // Save b0
};;
-
.body
-// (3)
{ .mib
- stfd [GR_Parameter_X] = f8 // STORE Parameter 1 on stack
+ stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
- nop.b 0
+ nop.b 0
}
{ .mib
- stfd [GR_Parameter_Y] = f44 // STORE Parameter 3 on stack
+ stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support# // Call error handling function
+ br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
- nop.m 0
- nop.m 0
add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
};;
-
-// (4)
{ .mmi
ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
@@ -1148,8 +859,6 @@ __libm_error_region:
br.ret.sptk b0 // Return
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
-
+LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_coshf.S b/sysdeps/ia64/fpu/e_coshf.S
index 969abc4ff6..91846e4717 100644
--- a/sysdeps/ia64/fpu/e_coshf.S
+++ b/sysdeps/ia64/fpu/e_coshf.S
@@ -1,10 +1,10 @@
.file "coshf.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2002, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,1127 +20,690 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+
// History
-//==============================================================
-// 2/02/00 Initial version
-// 2/16/00 The error tag for coshf overflow changed to 65 (from 64).
-// 4/04/00 Unwind support added
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+//*********************************************************************
+// 02/02/00 Initial version
+// 02/16/00 The error tag for coshf overflow changed to 65 (from 64).
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
+// 05/07/01 Reworked to improve speed of all paths
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 11/15/02 Improved algorithm based on expf
//
// API
-//==============================================================
-// float = coshf(float)
-// input floating point f8
-// output floating point f8
-
-
+//*********************************************************************
+// float coshf(float)
+//
// Overview of operation
-//==============================================================
-// There are four paths
-
-// 1. |x| < 0.25 COSH_BY_POLY
-// 2. |x| < 32 COSH_BY_TBL
-// 3. |x| < 2^14 COSH_BY_EXP
-// 4. |x_ >= 2^14 COSH_HUGE
-
-// For paths 1, and 2 SAFE is always 1.
-// For path 4, Safe is always 0.
-// SAFE = 1 means we cannot overflow.
-
-#include "libm_support.h"
-
-// Assembly macros
-//==============================================================
-coshf_FR_X = f44
-coshf_FR_SGNX = f40
-
-coshf_FR_Inv_log2by64 = f9
-coshf_FR_log2by64_lo = f11
-coshf_FR_log2by64_hi = f10
-
-coshf_FR_A1 = f9
-coshf_FR_A2 = f10
-coshf_FR_A3 = f11
-
-coshf_FR_Rcub = f12
-coshf_FR_M_temp = f13
-coshf_FR_R_temp = f13
-coshf_FR_Rsq = f13
-coshf_FR_R = f14
-
-coshf_FR_M = f38
-
-coshf_FR_B1 = f15
-coshf_FR_B2 = f32
-coshf_FR_B3 = f33
-
-coshf_FR_peven_temp1 = f34
-coshf_FR_peven_temp2 = f35
-coshf_FR_peven = f36
-
-coshf_FR_podd_temp1 = f34
-coshf_FR_podd_temp2 = f35
-coshf_FR_podd = f37
-
-coshf_FR_J_temp = f9
-coshf_FR_J = f10
-
-coshf_FR_Mmj = f39
-
-coshf_FR_N_temp1 = f11
-coshf_FR_N_temp2 = f12
-coshf_FR_N = f13
-
-coshf_FR_spos = f14
-coshf_FR_sneg = f15
-
-coshf_FR_Tjhi = f32
-coshf_FR_Tjlo = f33
-coshf_FR_Tmjhi = f34
-coshf_FR_Tmjlo = f35
-
-GR_mJ = r35
-GR_J = r36
-
-AD_mJ = r38
-AD_J = r39
-
-
-GR_SAVE_B0 = r42
-GR_SAVE_PFS = r41
-GR_SAVE_GP = r43
-
-GR_Parameter_X = r44
-GR_Parameter_Y = r45
-GR_Parameter_RESULT = r46
-GR_Parameter_TAG = r47
-
-FR_X = f8
-FR_Y = f0
-FR_RESULT = f44
-
-
-coshf_FR_C_hi = f9
-coshf_FR_C_hi_temp = f10
-coshf_FR_C_lo_temp1 = f11
-coshf_FR_C_lo_temp2 = f12
-coshf_FR_C_lo_temp3 = f13
-
-coshf_FR_C_lo = f38
-coshf_FR_S_hi = f39
+//*********************************************************************
+// Case 1: 0 < |x| < 0.25
+// Evaluate cosh(x) by a 8th order polynomial
+// Care is take for the order of multiplication; and A2 is not exactly 1/4!,
+// A3 is not exactly 1/6!, etc.
+// cosh(x) = 1 + (A1*x^2 + A2*x^4 + A3*x^6 + A4*x^8)
+//
+// Case 2: 0.25 < |x| < 89.41598
+// Algorithm is based on the identity cosh(x) = ( exp(x) + exp(-x) ) / 2.
+// The algorithm for exp is described as below. There are a number of
+// economies from evaluating both exp(x) and exp(-x). Although we
+// are evaluating both quantities, only where the quantities diverge do we
+// duplicate the computations. The basic algorithm for exp(x) is described
+// below.
+//
+// Take the input x. w is "how many log2/128 in x?"
+// w = x * 64/log2
+// NJ = int(w)
+// x = NJ*log2/64 + R
-coshf_FR_S_hi_temp1 = f10
-coshf_FR_Y_hi = f11
-coshf_FR_Y_lo_temp = f12
-coshf_FR_Y_lo = f13
-coshf_FR_COSH = f9
+// NJ = 64*n + j
+// x = n*log2 + (log2/64)*j + R
+//
+// So, exp(x) = 2^n * 2^(j/64)* exp(R)
+//
+// T = 2^n * 2^(j/64)
+// Construct 2^n
+// Get 2^(j/64) table
+// actually all the entries of 2^(j/64) table are stored in DP and
+// with exponent bits set to 0 -> multiplication on 2^n can be
+// performed by doing logical "or" operation with bits presenting 2^n
+
+// exp(R) = 1 + (exp(R) - 1)
+// P = exp(R) - 1 approximated by Taylor series of 3rd degree
+// P = A3*R^3 + A2*R^2 + R, A3 = 1/6, A2 = 1/2
+//
-coshf_FR_X2 = f9
-coshf_FR_X4 = f10
+// The final result is reconstructed as follows
+// exp(x) = T + T*P
-coshf_FR_P1 = f14
-coshf_FR_P2 = f15
-coshf_FR_P3 = f32
-coshf_FR_P4 = f33
-coshf_FR_P5 = f34
-coshf_FR_P6 = f35
+// Special values
+//*********************************************************************
+// coshf(+0) = 1.0
+// coshf(-0) = 1.0
-coshf_FR_TINY_THRESH = f9
+// coshf(+qnan) = +qnan
+// coshf(-qnan) = -qnan
+// coshf(+snan) = +qnan
+// coshf(-snan) = -qnan
-coshf_FR_COSH_temp = f10
-coshf_FR_SCALE = f11
+// coshf(-inf) = +inf
+// coshf(+inf) = +inf
-coshf_FR_hi_lo = f10
+// Overflow and Underflow
+//*********************************************************************
+// coshf(x) = largest single normal when
+// x = 89.41598 = 0x42b2d4fc
+//
+// There is no underflow.
-coshf_FR_poly_podd_temp1 = f11
-coshf_FR_poly_podd_temp2 = f13
-coshf_FR_poly_peven_temp1 = f11
-coshf_FR_poly_peven_temp2 = f13
+// Registers used
+//*********************************************************************
+// Floating Point registers used:
+// f8 input, output
+// f6,f7, f9 -> f15, f32 -> f45
-// Data tables
-//==============================================================
+// General registers used:
+// r2, r3, r16 -> r38
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
+// Predicate registers used:
+// p6 -> p15
+// Assembly macros
+//*********************************************************************
+// integer registers used
+// scratch
+rNJ = r2
+rNJ_neg = r3
+
+rJ_neg = r16
+rN_neg = r17
+rSignexp_x = r18
+rExp_x = r18
+rExp_mask = r19
+rExp_bias = r20
+rAd1 = r21
+rAd2 = r22
+rJ = r23
+rN = r24
+rTblAddr = r25
+rA3 = r26
+rExpHalf = r27
+rLn2Div64 = r28
+rGt_ln = r29
+r17ones_m1 = r29
+rRightShifter = r30
+rJ_mask = r30
+r64DivLn2 = r31
+rN_mask = r31
+// stacked
+GR_SAVE_PFS = r32
+GR_SAVE_B0 = r33
+GR_SAVE_GP = r34
+GR_Parameter_X = r35
+GR_Parameter_Y = r36
+GR_Parameter_RESULT = r37
+GR_Parameter_TAG = r38
+
+// floating point registers used
+FR_X = f10
+FR_Y = f1
+FR_RESULT = f8
+// scratch
+fRightShifter = f6
+f64DivLn2 = f7
+fNormX = f9
+fNint = f10
+fN = f11
+fR = f12
+fLn2Div64 = f13
+fA2 = f14
+fA3 = f15
+// stacked
+fP = f32
+fT = f33
+fMIN_SGL_OFLOW_ARG = f34
+fMAX_SGL_NORM_ARG = f35
+fRSqr = f36
+fA1 = f37
+fA21 = f37
+fA4 = f38
+fA43 = f38
+fA4321 = f38
+fX4 = f39
+fTmp = f39
+fGt_pln = f39
+fWre_urm_f8 = f40
+fXsq = f40
+fP_neg = f41
+fT_neg = f42
+fExp = f43
+fExp_neg = f44
+fAbsX = f45
+
+
+RODATA
.align 16
-single_coshf_arg_reduction:
-ASM_TYPE_DIRECTIVE(single_coshf_arg_reduction,@object)
- data8 0xB8AA3B295C17F0BC, 0x00004005
- data8 0xB17217F7D1000000, 0x00003FF8
- data8 0xCF79ABC9E3B39804, 0x00003FD0
-ASM_SIZE_DIRECTIVE(single_coshf_arg_reduction)
-
-single_coshf_p_table:
-ASM_TYPE_DIRECTIVE(single_coshf_p_table,@object)
- data8 0x8000000000000000, 0x00003FFE
- data8 0xAAAAAAAAAAAAAB80, 0x00003FFA
- data8 0xB60B60B60B4FE884, 0x00003FF5
- data8 0xD00D00D1021D7370, 0x00003FEF
- data8 0x93F27740C0C2F1CC, 0x00003FE9
- data8 0x8FA02AC65BCBD5BC, 0x00003FE2
-ASM_SIZE_DIRECTIVE(single_coshf_p_table)
-
-single_coshf_ab_table:
-ASM_TYPE_DIRECTIVE(single_coshf_ab_table,@object)
- data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC
- data8 0x88888888884ECDD5, 0x00003FF8
- data8 0xD00D0C6DCC26A86B, 0x00003FF2
- data8 0x8000000000000002, 0x00003FFE
- data8 0xAAAAAAAAAA402C77, 0x00003FFA
- data8 0xB60B6CC96BDB144D, 0x00003FF5
-ASM_SIZE_DIRECTIVE(single_coshf_ab_table)
-
-single_coshf_j_table:
-ASM_TYPE_DIRECTIVE(single_coshf_j_table,@object)
- data8 0xB504F333F9DE6484, 0x00003FFE, 0x1EB2FB13, 0x00000000
- data8 0xB6FD91E328D17791, 0x00003FFE, 0x1CE2CBE2, 0x00000000
- data8 0xB8FBAF4762FB9EE9, 0x00003FFE, 0x1DDC3CBC, 0x00000000
- data8 0xBAFF5AB2133E45FB, 0x00003FFE, 0x1EE9AA34, 0x00000000
- data8 0xBD08A39F580C36BF, 0x00003FFE, 0x9EAEFDC1, 0x00000000
- data8 0xBF1799B67A731083, 0x00003FFE, 0x9DBF517B, 0x00000000
- data8 0xC12C4CCA66709456, 0x00003FFE, 0x1EF88AFB, 0x00000000
- data8 0xC346CCDA24976407, 0x00003FFE, 0x1E03B216, 0x00000000
- data8 0xC5672A115506DADD, 0x00003FFE, 0x1E78AB43, 0x00000000
- data8 0xC78D74C8ABB9B15D, 0x00003FFE, 0x9E7B1747, 0x00000000
- data8 0xC9B9BD866E2F27A3, 0x00003FFE, 0x9EFE3C0E, 0x00000000
- data8 0xCBEC14FEF2727C5D, 0x00003FFE, 0x9D36F837, 0x00000000
- data8 0xCE248C151F8480E4, 0x00003FFE, 0x9DEE53E4, 0x00000000
- data8 0xD06333DAEF2B2595, 0x00003FFE, 0x9E24AE8E, 0x00000000
- data8 0xD2A81D91F12AE45A, 0x00003FFE, 0x1D912473, 0x00000000
- data8 0xD4F35AABCFEDFA1F, 0x00003FFE, 0x1EB243BE, 0x00000000
- data8 0xD744FCCAD69D6AF4, 0x00003FFE, 0x1E669A2F, 0x00000000
- data8 0xD99D15C278AFD7B6, 0x00003FFE, 0x9BBC610A, 0x00000000
- data8 0xDBFBB797DAF23755, 0x00003FFE, 0x1E761035, 0x00000000
- data8 0xDE60F4825E0E9124, 0x00003FFE, 0x9E0BE175, 0x00000000
- data8 0xE0CCDEEC2A94E111, 0x00003FFE, 0x1CCB12A1, 0x00000000
- data8 0xE33F8972BE8A5A51, 0x00003FFE, 0x1D1BFE90, 0x00000000
- data8 0xE5B906E77C8348A8, 0x00003FFE, 0x1DF2F47A, 0x00000000
- data8 0xE8396A503C4BDC68, 0x00003FFE, 0x1EF22F22, 0x00000000
- data8 0xEAC0C6E7DD24392F, 0x00003FFE, 0x9E3F4A29, 0x00000000
- data8 0xED4F301ED9942B84, 0x00003FFE, 0x1EC01A5B, 0x00000000
- data8 0xEFE4B99BDCDAF5CB, 0x00003FFE, 0x1E8CAC3A, 0x00000000
- data8 0xF281773C59FFB13A, 0x00003FFE, 0x9DBB3FAB, 0x00000000
- data8 0xF5257D152486CC2C, 0x00003FFE, 0x1EF73A19, 0x00000000
- data8 0xF7D0DF730AD13BB9, 0x00003FFE, 0x9BB795B5, 0x00000000
- data8 0xFA83B2DB722A033A, 0x00003FFE, 0x1EF84B76, 0x00000000
- data8 0xFD3E0C0CF486C175, 0x00003FFE, 0x9EF5818B, 0x00000000
- data8 0x8000000000000000, 0x00003FFF, 0x00000000, 0x00000000
- data8 0x8164D1F3BC030773, 0x00003FFF, 0x1F77CACA, 0x00000000
- data8 0x82CD8698AC2BA1D7, 0x00003FFF, 0x1EF8A91D, 0x00000000
- data8 0x843A28C3ACDE4046, 0x00003FFF, 0x1E57C976, 0x00000000
- data8 0x85AAC367CC487B15, 0x00003FFF, 0x9EE8DA92, 0x00000000
- data8 0x871F61969E8D1010, 0x00003FFF, 0x1EE85C9F, 0x00000000
- data8 0x88980E8092DA8527, 0x00003FFF, 0x1F3BF1AF, 0x00000000
- data8 0x8A14D575496EFD9A, 0x00003FFF, 0x1D80CA1E, 0x00000000
- data8 0x8B95C1E3EA8BD6E7, 0x00003FFF, 0x9D0373AF, 0x00000000
- data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF, 0x9F167097, 0x00000000
- data8 0x8EA4398B45CD53C0, 0x00003FFF, 0x1EB70051, 0x00000000
- data8 0x9031DC431466B1DC, 0x00003FFF, 0x1F6EB029, 0x00000000
- data8 0x91C3D373AB11C336, 0x00003FFF, 0x1DFD6D8E, 0x00000000
- data8 0x935A2B2F13E6E92C, 0x00003FFF, 0x9EB319B0, 0x00000000
- data8 0x94F4EFA8FEF70961, 0x00003FFF, 0x1EBA2BEB, 0x00000000
- data8 0x96942D3720185A00, 0x00003FFF, 0x1F11D537, 0x00000000
- data8 0x9837F0518DB8A96F, 0x00003FFF, 0x1F0D5A46, 0x00000000
- data8 0x99E0459320B7FA65, 0x00003FFF, 0x9E5E7BCA, 0x00000000
- data8 0x9B8D39B9D54E5539, 0x00003FFF, 0x9F3AAFD1, 0x00000000
- data8 0x9D3ED9A72CFFB751, 0x00003FFF, 0x9E86DACC, 0x00000000
- data8 0x9EF5326091A111AE, 0x00003FFF, 0x9F3EDDC2, 0x00000000
- data8 0xA0B0510FB9714FC2, 0x00003FFF, 0x1E496E3D, 0x00000000
- data8 0xA27043030C496819, 0x00003FFF, 0x9F490BF6, 0x00000000
- data8 0xA43515AE09E6809E, 0x00003FFF, 0x1DD1DB48, 0x00000000
- data8 0xA5FED6A9B15138EA, 0x00003FFF, 0x1E65EBFB, 0x00000000
- data8 0xA7CD93B4E965356A, 0x00003FFF, 0x9F427496, 0x00000000
- data8 0xA9A15AB4EA7C0EF8, 0x00003FFF, 0x1F283C4A, 0x00000000
- data8 0xAB7A39B5A93ED337, 0x00003FFF, 0x1F4B0047, 0x00000000
- data8 0xAD583EEA42A14AC6, 0x00003FFF, 0x1F130152, 0x00000000
- data8 0xAF3B78AD690A4375, 0x00003FFF, 0x9E8367C0, 0x00000000
- data8 0xB123F581D2AC2590, 0x00003FFF, 0x9F705F90, 0x00000000
- data8 0xB311C412A9112489, 0x00003FFF, 0x1EFB3C53, 0x00000000
- data8 0xB504F333F9DE6484, 0x00003FFF, 0x1F32FB13, 0x00000000
-ASM_SIZE_DIRECTIVE(single_coshf_j_table)
-
-.align 32
-.global coshf#
-
-.section .text
-.proc coshf#
-.align 32
-
-coshf:
-
-#ifdef _LIBC
-.global __ieee754_coshf#
-.proc __ieee754_coshf#
-__ieee754_coshf:
-#endif
-// X NAN?
-
-
-{ .mfi
- alloc r32 = ar.pfs,0,12,4,0
-(p0) fclass.m.unc p6,p7 = f8, 0xc3
- nop.i 999 ;;
-}
-{ .mfb
- nop.m 999
-(p6) fma.s.s0 f8 = f8,f1,f8
-(p6) br.ret.spnt b0 ;;
-}
-
-{ .mfi
- nop.m 999
- nop.f 999
- nop.i 999 ;;
-}
+LOCAL_OBJECT_START(_coshf_table)
+data4 0x42b2d4fd // Smallest single arg to overflow single result
+data4 0x42b2d4fc // Largest single arg to give normal single result
+data4 0x00000000 // pad
+data4 0x00000000 // pad
+//
+// 2^(j/64) table, j goes from 0 to 63
+data8 0x0000000000000000 // 2^(0/64)
+data8 0x00002C9A3E778061 // 2^(1/64)
+data8 0x000059B0D3158574 // 2^(2/64)
+data8 0x0000874518759BC8 // 2^(3/64)
+data8 0x0000B5586CF9890F // 2^(4/64)
+data8 0x0000E3EC32D3D1A2 // 2^(5/64)
+data8 0x00011301D0125B51 // 2^(6/64)
+data8 0x0001429AAEA92DE0 // 2^(7/64)
+data8 0x000172B83C7D517B // 2^(8/64)
+data8 0x0001A35BEB6FCB75 // 2^(9/64)
+data8 0x0001D4873168B9AA // 2^(10/64)
+data8 0x0002063B88628CD6 // 2^(11/64)
+data8 0x0002387A6E756238 // 2^(12/64)
+data8 0x00026B4565E27CDD // 2^(13/64)
+data8 0x00029E9DF51FDEE1 // 2^(14/64)
+data8 0x0002D285A6E4030B // 2^(15/64)
+data8 0x000306FE0A31B715 // 2^(16/64)
+data8 0x00033C08B26416FF // 2^(17/64)
+data8 0x000371A7373AA9CB // 2^(18/64)
+data8 0x0003A7DB34E59FF7 // 2^(19/64)
+data8 0x0003DEA64C123422 // 2^(20/64)
+data8 0x0004160A21F72E2A // 2^(21/64)
+data8 0x00044E086061892D // 2^(22/64)
+data8 0x000486A2B5C13CD0 // 2^(23/64)
+data8 0x0004BFDAD5362A27 // 2^(24/64)
+data8 0x0004F9B2769D2CA7 // 2^(25/64)
+data8 0x0005342B569D4F82 // 2^(26/64)
+data8 0x00056F4736B527DA // 2^(27/64)
+data8 0x0005AB07DD485429 // 2^(28/64)
+data8 0x0005E76F15AD2148 // 2^(29/64)
+data8 0x0006247EB03A5585 // 2^(30/64)
+data8 0x0006623882552225 // 2^(31/64)
+data8 0x0006A09E667F3BCD // 2^(32/64)
+data8 0x0006DFB23C651A2F // 2^(33/64)
+data8 0x00071F75E8EC5F74 // 2^(34/64)
+data8 0x00075FEB564267C9 // 2^(35/64)
+data8 0x0007A11473EB0187 // 2^(36/64)
+data8 0x0007E2F336CF4E62 // 2^(37/64)
+data8 0x00082589994CCE13 // 2^(38/64)
+data8 0x000868D99B4492ED // 2^(39/64)
+data8 0x0008ACE5422AA0DB // 2^(40/64)
+data8 0x0008F1AE99157736 // 2^(41/64)
+data8 0x00093737B0CDC5E5 // 2^(42/64)
+data8 0x00097D829FDE4E50 // 2^(43/64)
+data8 0x0009C49182A3F090 // 2^(44/64)
+data8 0x000A0C667B5DE565 // 2^(45/64)
+data8 0x000A5503B23E255D // 2^(46/64)
+data8 0x000A9E6B5579FDBF // 2^(47/64)
+data8 0x000AE89F995AD3AD // 2^(48/64)
+data8 0x000B33A2B84F15FB // 2^(49/64)
+data8 0x000B7F76F2FB5E47 // 2^(50/64)
+data8 0x000BCC1E904BC1D2 // 2^(51/64)
+data8 0x000C199BDD85529C // 2^(52/64)
+data8 0x000C67F12E57D14B // 2^(53/64)
+data8 0x000CB720DCEF9069 // 2^(54/64)
+data8 0x000D072D4A07897C // 2^(55/64)
+data8 0x000D5818DCFBA487 // 2^(56/64)
+data8 0x000DA9E603DB3285 // 2^(57/64)
+data8 0x000DFC97337B9B5F // 2^(58/64)
+data8 0x000E502EE78B3FF6 // 2^(59/64)
+data8 0x000EA4AFA2A490DA // 2^(60/64)
+data8 0x000EFA1BEE615A27 // 2^(61/64)
+data8 0x000F50765B6E4540 // 2^(62/64)
+data8 0x000FA7C1819E90D8 // 2^(63/64)
+LOCAL_OBJECT_END(_coshf_table)
+
+LOCAL_OBJECT_START(cosh_p_table)
+data8 0x3efa3001dcf5905b // A4
+data8 0x3f56c1437543543e // A3
+data8 0x3fa5555572601504 // A2
+data8 0x3fdfffffffe2f097 // A1
+LOCAL_OBJECT_END(cosh_p_table)
-// X infinity
-{ .mfi
- nop.m 999
-(p0) fclass.m.unc p6,p0 = f8, 0x23
- nop.i 999 ;;
-}
-{ .mfb
- nop.m 999
-(p6) fmerge.s f8 = f0,f8
-(p6) br.ret.spnt b0 ;;
-}
+.section .text
+GLOBAL_IEEE754_ENTRY(coshf)
-// Put 0.25 in f9; p6 true if x < 0.25
{ .mlx
- nop.m 999
-(p0) movl r32 = 0x000000000000fffd ;;
-}
-
-{ .mfi
-(p0) setf.exp f9 = r32
- nop.f 999
- nop.i 999 ;;
+ getf.exp rSignexp_x = f8 // Must recompute if x unorm
+ movl r64DivLn2 = 0x40571547652B82FE // 64/ln(2)
}
-
-{ .mfi
- nop.m 999
-(p0) fmerge.s coshf_FR_X = f0,f8
- nop.i 999
+{ .mlx
+ addl rTblAddr = @ltoff(_coshf_table),gp
+ movl rRightShifter = 0x43E8000000000000 // DP Right Shifter
}
+;;
{ .mfi
- nop.m 999
-(p0) fmerge.s coshf_FR_SGNX = f8,f1
- nop.i 999 ;;
+ // point to the beginning of the table
+ ld8 rTblAddr = [rTblAddr]
+ fclass.m p6, p0 = f8, 0x0b // Test for x=unorm
+ addl rA3 = 0x3E2AA, r0 // high bits of 1.0/6.0 rounded to SP
}
-
{ .mfi
- nop.m 999
-(p0) fcmp.lt.unc p0,p7 = coshf_FR_X,f9
- nop.i 999 ;;
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p7) br.cond.sptk L(COSH_BY_TBL) ;;
-}
-
-
-// COSH_BY_POLY:
-
-// POLY cannot overflow so there is no need to call __libm_error_support
-// Get the values of P_x from the table
-
-{ .mmi
- nop.m 999
-(p0) addl r34 = @ltoff(single_coshf_p_table), gp
- nop.i 999
+ nop.m 0
+ fnorm.s1 fNormX = f8 // normalized x
+ addl rExpHalf = 0xFFFE, r0 // exponent of 1/2
}
;;
-{ .mmi
- ld8 r34 = [r34]
- nop.m 999
- nop.i 999
-}
-;;
-
-// Calculate coshf_FR_X2 = ax*ax and coshf_FR_X4 = ax*ax*ax*ax
-{ .mmf
- nop.m 999
-(p0) ldfe coshf_FR_P1 = [r34],16
-(p0) fma.s1 coshf_FR_X2 = coshf_FR_X, coshf_FR_X, f0 ;;
-}
-
-{ .mmi
-(p0) ldfe coshf_FR_P2 = [r34],16 ;;
-(p0) ldfe coshf_FR_P3 = [r34],16
- nop.i 999 ;;
-}
-
-{ .mmi
-(p0) ldfe coshf_FR_P4 = [r34],16 ;;
-(p0) ldfe coshf_FR_P5 = [r34],16
- nop.i 999 ;;
-}
-
{ .mfi
-(p0) ldfe coshf_FR_P6 = [r34],16
-(p0) fma.s1 coshf_FR_X4 = coshf_FR_X2, coshf_FR_X2, f0
- nop.i 999 ;;
+ setf.d f64DivLn2 = r64DivLn2 // load 64/ln(2) to FP reg
+ fclass.m p15, p0 = f8, 0x1e3 // test for NaT,NaN,Inf
+ nop.i 0
}
-
-// Calculate coshf_FR_podd = x4 *(x4 * P_5 + P_3) + P_1
-{ .mfi
- nop.m 999
-(p0) fma.s1 coshf_FR_poly_podd_temp1 = coshf_FR_X4, coshf_FR_P5, coshf_FR_P3
- nop.i 999 ;;
+{ .mlx
+ // load Right Shifter to FP reg
+ setf.d fRightShifter = rRightShifter
+ movl rLn2Div64 = 0x3F862E42FEFA39EF // DP ln(2)/64 in GR
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 coshf_FR_podd = coshf_FR_X4, coshf_FR_poly_podd_temp1, coshf_FR_P1
- nop.i 999
+ mov rExp_mask = 0x1ffff
+ fcmp.eq.s1 p13, p0 = f0, f8 // test for x = 0.0
+ shl rA3 = rA3, 12 // 0x3E2AA000, approx to 1.0/6.0 in SP
}
-
-// Calculate coshf_FR_peven = p_even = x4 *(x4 * (x4 * P_6 + P_4) + P_2)
-{ .mfi
- nop.m 999
-(p0) fma.s1 coshf_FR_poly_peven_temp1 = coshf_FR_X4, coshf_FR_P6, coshf_FR_P4
- nop.i 999 ;;
+{ .mfb
+ nop.m 0
+ nop.f 0
+(p6) br.cond.spnt COSH_UNORM // Branch if x=unorm
}
+;;
+COSH_COMMON:
{ .mfi
- nop.m 999
-(p0) fma.s1 coshf_FR_poly_peven_temp2 = coshf_FR_X4, coshf_FR_poly_peven_temp1, coshf_FR_P2
- nop.i 999 ;;
+ setf.exp fA2 = rExpHalf // load A2 to FP reg
+ nop.f 0
+ mov rExp_bias = 0xffff
}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 coshf_FR_peven = coshf_FR_X4, coshf_FR_poly_peven_temp2, f0
- nop.i 999 ;;
+{ .mfb
+ setf.d fLn2Div64 = rLn2Div64 // load ln(2)/64 to FP reg
+(p15) fma.s.s0 f8 = f8, f8, f0 // result if x = NaT,NaN,Inf
+(p15) br.ret.spnt b0 // exit here if x = NaT,NaN,Inf
}
-
-// Y_lo = x2*p_odd + p_even
-// Calculate f8 = Y_hi + Y_lo
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 coshf_FR_Y_lo = coshf_FR_X2, coshf_FR_podd, coshf_FR_peven
- nop.i 999 ;;
+ // min overflow and max normal threshold
+ ldfps fMIN_SGL_OFLOW_ARG, fMAX_SGL_NORM_ARG = [rTblAddr], 8
+ nop.f 0
+ and rExp_x = rExp_mask, rSignexp_x // Biased exponent of x
}
-
{ .mfb
- nop.m 999
-(p0) fma.s.s0 f8 = f1, f1, coshf_FR_Y_lo
-(p0) br.ret.sptk b0 ;;
-}
-
-
-L(COSH_BY_TBL):
-
-// Now that we are at TBL; so far all we know is that |x| >= 0.25.
-// The first two steps are the same for TBL and EXP, but if we are HUGE
-// Double
-// Go to HUGE if |x| >= 2^10, 10009 (register-biased) is e = 10 (true)
-// Single
-// Go to HUGE if |x| >= 2^7, 10006 (register-biased) is e = 7 (true)
-// we want to leave now. Go to HUGE if |x| >= 2^14
-// 1000d (register-biased) is e = 14 (true)
-
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x0000000000010006 ;;
+ setf.s fA3 = rA3 // load A3 to FP reg
+(p13) fma.s.s0 f8 = f1, f1, f0 // result if x = 0.0
+(p13) br.ret.spnt b0 // exit here if x =0.0
}
+;;
{ .mfi
-(p0) setf.exp f9 = r32
- nop.f 999
- nop.i 999 ;;
+ sub rExp_x = rExp_x, rExp_bias // True exponent of x
+ fmerge.s fAbsX = f0, fNormX // Form |x|
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fcmp.ge.unc p6,p7 = coshf_FR_X,f9
- nop.i 999 ;;
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p6) br.cond.spnt L(COSH_HUGE) ;;
+ nop.m 0
+ // x*(64/ln(2)) + Right Shifter
+ fma.s1 fNint = fNormX, f64DivLn2, fRightShifter
+ add rTblAddr = 8, rTblAddr
}
-
-// r32 = 1
-// r34 = N-1
-// r35 = N
-// r36 = j
-// r37 = N+1
-
-// TBL can never overflow
-// coshf(x) = coshf(B+R)
-// = coshf(B) coshf(R) + sinh(B) sinh(R)
-// coshf(R) can be approximated by 1 + p_even
-// sinh(R) can be approximated by p_odd
-
-// ******************************************************
-// STEP 1 (TBL and EXP)
-// ******************************************************
-// Get the following constants.
-// f9 = Inv_log2by64
-// f10 = log2by64_hi
-// f11 = log2by64_lo
-
-{ .mmi
-(p0) adds r32 = 0x1,r0
-(p0) addl r34 = @ltoff(single_coshf_arg_reduction), gp
- nop.i 999
+{ .mfb
+ cmp.gt p7, p0 = -2, rExp_x // Test |x| < 2^(-2)
+ fma.s1 fXsq = fNormX, fNormX, f0 // x*x for small path
+(p7) br.cond.spnt COSH_SMALL // Branch if 0 < |x| < 2^-2
}
;;
-
-// We want 2^(N-1) and 2^(-N-1). So bias N-1 and -N-1 and
-// put them in an exponent.
-// coshf_FR_spos = 2^(N-1) and coshf_FR_sneg = 2^(-N-1)
-// r39 = 0xffff + (N-1) = 0xffff +N -1
-// r40 = 0xffff - (N +1) = 0xffff -N -1
-
-{ .mlx
- ld8 r34 = [r34]
-(p0) movl r38 = 0x000000000000fffe ;;
-}
-
-{ .mmi
-(p0) ldfe coshf_FR_Inv_log2by64 = [r34],16 ;;
-(p0) ldfe coshf_FR_log2by64_hi = [r34],16
- nop.i 999 ;;
-}
-
-{ .mbb
-(p0) ldfe coshf_FR_log2by64_lo = [r34],16
- nop.b 999
- nop.b 999 ;;
-}
-
-// Get the A coefficients
-// f9 = A_1
-// f10 = A_2
-// f11 = A_3
-
-{ .mmi
- nop.m 999
-(p0) addl r34 = @ltoff(single_coshf_ab_table), gp
- nop.i 999
+{ .mfi
+ nop.m 0
+ // check for overflow
+ fcmp.ge.s1 p12, p13 = fAbsX, fMIN_SGL_OFLOW_ARG
+ mov rJ_mask = 0x3f // 6-bit mask for J
}
;;
-{ .mmi
- ld8 r34 = [r34]
- nop.m 999
- nop.i 999
+{ .mfb
+ nop.m 0
+ fms.s1 fN = fNint, f1, fRightShifter // n in FP register
+ // branch out if overflow
+(p12) br.cond.spnt COSH_CERTAIN_OVERFLOW
}
;;
-
-// Calculate M and keep it as integer and floating point.
-// M = round-to-integer(x*Inv_log2by64)
-// coshf_FR_M = M = truncate(ax/(log2/64))
-// Put the significand of M in r35
-// and the floating point representation of M in coshf_FR_M
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 coshf_FR_M = coshf_FR_X, coshf_FR_Inv_log2by64, f0
- nop.i 999
-}
-
-{ .mfi
-(p0) ldfe coshf_FR_A1 = [r34],16
- nop.f 999
- nop.i 999 ;;
-}
-
{ .mfi
- nop.m 999
-(p0) fcvt.fx.s1 coshf_FR_M_temp = coshf_FR_M
- nop.i 999 ;;
+ getf.sig rNJ = fNint // bits of n, j
+ // check for possible overflow
+ fcmp.gt.s1 p13, p0 = fAbsX, fMAX_SGL_NORM_ARG
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fnorm.s1 coshf_FR_M = coshf_FR_M_temp
- nop.i 999 ;;
+ addl rN = 0xFFBF - 63, rNJ // biased and shifted n-1,j
+ fnma.s1 fR = fLn2Div64, fN, fNormX // R = x - N*ln(2)/64
+ and rJ = rJ_mask, rNJ // bits of j
}
-
{ .mfi
-(p0) getf.sig r35 = coshf_FR_M_temp
- nop.f 999
- nop.i 999 ;;
-}
-
-// M is still in r35. Calculate j. j is the signed extension of the six lsb of M. It
-// has a range of -32 thru 31.
-// r35 = M
-// r36 = j
-
-{ .mii
- nop.m 999
- nop.i 999 ;;
-(p0) and r36 = 0x3f, r35 ;;
+ sub rNJ_neg = r0, rNJ // bits of n, j for -x
+ nop.f 0
+ andcm rN_mask = -1, rJ_mask // 0xff...fc0 to mask N
}
-
-// Calculate R
-// f13 = f44 - f12*f10 = x - M*log2by64_hi
-// f14 = f13 - f8*f11 = R = (x - M*log2by64_hi) - M*log2by64_lo
+;;
{ .mfi
- nop.m 999
-(p0) fnma.s1 coshf_FR_R_temp = coshf_FR_M, coshf_FR_log2by64_hi, coshf_FR_X
- nop.i 999
+ shladd rJ = rJ, 3, rTblAddr // address in the 2^(j/64) table
+ nop.f 0
+ and rN = rN_mask, rN // biased, shifted n-1
}
-
{ .mfi
-(p0) ldfe coshf_FR_A2 = [r34],16
- nop.f 999
- nop.i 999 ;;
+ addl rN_neg = 0xFFBF - 63, rNJ_neg // -x biased, shifted n-1,j
+ nop.f 0
+ and rJ_neg = rJ_mask, rNJ_neg // bits of j for -x
}
+;;
{ .mfi
- nop.m 999
-(p0) fnma.s1 coshf_FR_R = coshf_FR_M, coshf_FR_log2by64_lo, coshf_FR_R_temp
- nop.i 999
+ ld8 rJ = [rJ] // Table value
+ nop.f 0
+ shl rN = rN, 46 // 2^(n-1) bits in DP format
}
-
-// Get the B coefficients
-// f15 = B_1
-// f32 = B_2
-// f33 = B_3
-
-{ .mmi
-(p0) ldfe coshf_FR_A3 = [r34],16 ;;
-(p0) ldfe coshf_FR_B1 = [r34],16
- nop.i 999 ;;
-}
-
-{ .mmi
-(p0) ldfe coshf_FR_B2 = [r34],16 ;;
-(p0) ldfe coshf_FR_B3 = [r34],16
- nop.i 999 ;;
-}
-
-{ .mii
- nop.m 999
-(p0) shl r34 = r36, 0x2 ;;
-(p0) sxt1 r37 = r34 ;;
-}
-
-// ******************************************************
-// STEP 2 (TBL and EXP)
-// ******************************************************
-// Calculate Rsquared and Rcubed in preparation for p_even and p_odd
-// f12 = R*R*R
-// f13 = R*R
-// f14 = R <== from above
-
{ .mfi
- nop.m 999
-(p0) fma.s1 coshf_FR_Rsq = coshf_FR_R, coshf_FR_R, f0
-(p0) shr r36 = r37, 0x2 ;;
-}
-
-// r34 = M-j = r35 - r36
-// r35 = N = (M-j)/64
-
-{ .mii
-(p0) sub r34 = r35, r36
- nop.i 999 ;;
-(p0) shr r35 = r34, 0x6 ;;
-}
-
-{ .mii
-(p0) sub r40 = r38, r35
-(p0) adds r37 = 0x1, r35
-(p0) add r39 = r38, r35 ;;
-}
-
-// Get the address of the J table, add the offset,
-// addresses are sinh_AD_mJ and sinh_AD_J, get the T value
-// f32 = T(j)_hi
-// f33 = T(j)_lo
-// f34 = T(-j)_hi
-// f35 = T(-j)_lo
-
-{ .mmi
-(p0) sub r34 = r35, r32
-(p0) addl r37 = @ltoff(single_coshf_j_table), gp
- nop.i 999
+ shladd rJ_neg = rJ_neg, 3, rTblAddr // addr in 2^(j/64) table -x
+ nop.f 0
+ and rN_neg = rN_mask, rN_neg // biased, shifted n-1 for -x
}
;;
{ .mfi
- ld8 r37 = [r37]
-(p0) fma.s1 coshf_FR_Rcub = coshf_FR_Rsq, coshf_FR_R, f0
- nop.i 999
-}
-
-// ******************************************************
-// STEP 3 Now decide if we need to branch to EXP
-// ******************************************************
-// Put 32 in f9; p6 true if x < 32
-
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x0000000000010004 ;;
+ ld8 rJ_neg = [rJ_neg] // Table value for -x
+ nop.f 0
+ shl rN_neg = rN_neg, 46 // 2^(n-1) bits in DP format for -x
}
-
-// Calculate p_even
-// f34 = B_2 + Rsq *B_3
-// f35 = B_1 + Rsq*f34 = B_1 + Rsq * (B_2 + Rsq *B_3)
-// f36 = peven = Rsq * f35 = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 coshf_FR_peven_temp1 = coshf_FR_Rsq, coshf_FR_B3, coshf_FR_B2
- nop.i 999 ;;
+ or rN = rN, rJ // bits of 2^n * 2^(j/64) in DP format
+ nop.f 0
+ nop.i 0
}
+;;
-{ .mfi
- nop.m 999
-(p0) fma.s1 coshf_FR_peven_temp2 = coshf_FR_Rsq, coshf_FR_peven_temp1, coshf_FR_B1
- nop.i 999
+{ .mmf
+ setf.d fT = rN // 2^(n-1) * 2^(j/64)
+ or rN_neg = rN_neg, rJ_neg // -x bits of 2^n * 2^(j/64) in DP
+ fma.s1 fRSqr = fR, fR, f0 // R^2
}
-
-// Calculate p_odd
-// f34 = A_2 + Rsq *A_3
-// f35 = A_1 + Rsq * (A_2 + Rsq *A_3)
-// f37 = podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 coshf_FR_podd_temp1 = coshf_FR_Rsq, coshf_FR_A3, coshf_FR_A2
- nop.i 999 ;;
+ setf.d fT_neg = rN_neg // 2^(n-1) * 2^(j/64) for -x
+ fma.s1 fP = fA3, fR, fA2 // A3*R + A2
+ nop.i 0
}
-
{ .mfi
-(p0) setf.exp coshf_FR_N_temp1 = r39
- nop.f 999
- nop.i 999 ;;
+ nop.m 0
+ fnma.s1 fP_neg = fA3, fR, fA2 // A3*R + A2 for -x
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 coshf_FR_peven = coshf_FR_Rsq, coshf_FR_peven_temp2, f0
- nop.i 999
+ nop.m 0
+ fma.s1 fP = fP, fRSqr, fR // P = (A3*R + A2)*R^2 + R
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p0) fma.s1 coshf_FR_podd_temp2 = coshf_FR_Rsq, coshf_FR_podd_temp1, coshf_FR_A1
- nop.i 999 ;;
+ nop.m 0
+ fms.s1 fP_neg = fP_neg, fRSqr, fR // P = (A3*R + A2)*R^2 + R, -x
+ nop.i 0
}
+;;
{ .mfi
-(p0) setf.exp f9 = r32
- nop.f 999
- nop.i 999 ;;
+ nop.m 0
+ fmpy.s0 fTmp = fLn2Div64, fLn2Div64 // Force inexact
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 coshf_FR_podd = coshf_FR_podd_temp2, coshf_FR_Rcub, coshf_FR_R
- nop.i 999
-}
-
-// sinh_GR_mj contains the table offset for -j
-// sinh_GR_j contains the table offset for +j
-// p6 is true when j <= 0
-
-{ .mlx
-(p0) setf.exp coshf_FR_N_temp2 = r40
-(p0) movl r40 = 0x0000000000000020 ;;
+ nop.m 0
+ fma.s1 fExp = fP, fT, fT // exp(x)/2
+ nop.i 0
}
-
-{ .mfi
-(p0) sub GR_mJ = r40, r36
-(p0) fmerge.se coshf_FR_spos = coshf_FR_N_temp1, f1
-(p0) adds GR_J = 0x20, r36 ;;
+{ .mfb
+ nop.m 0
+ fma.s1 fExp_neg = fP_neg, fT_neg, fT_neg // exp(-x)/2
+ // branch out if possible overflow result
+(p13) br.cond.spnt COSH_POSSIBLE_OVERFLOW
}
+;;
-{ .mii
- nop.m 999
-(p0) shl GR_mJ = GR_mJ, 5 ;;
-(p0) add AD_mJ = r37, GR_mJ ;;
+{ .mfb
+ nop.m 0
+ // final result in the absence of overflow
+ fma.s.s0 f8 = fExp, f1, fExp_neg // result = (exp(x)+exp(-x))/2
+ // exit here in the absence of overflow
+ br.ret.sptk b0 // Exit main path, 0.25 <= |x| < 89.41598
}
+;;
+// Here if 0 < |x| < 0.25. Evaluate 8th order polynomial.
+COSH_SMALL:
{ .mmi
- nop.m 999
-(p0) ldfe coshf_FR_Tmjhi = [AD_mJ],16
-(p0) shl GR_J = GR_J, 5 ;;
-}
-
-{ .mfi
-(p0) ldfs coshf_FR_Tmjlo = [AD_mJ],16
-(p0) fcmp.lt.unc.s1 p6,p7 = coshf_FR_X,f9
-(p0) add AD_J = r37, GR_J ;;
+ add rAd1 = 0x200, rTblAddr
+ add rAd2 = 0x210, rTblAddr
+ nop.i 0
}
+;;
{ .mmi
-(p0) ldfe coshf_FR_Tjhi = [AD_J],16 ;;
-(p0) ldfs coshf_FR_Tjlo = [AD_J],16
- nop.i 999 ;;
-}
-
-{ .mfb
- nop.m 999
-(p0) fmerge.se coshf_FR_sneg = coshf_FR_N_temp2, f1
-(p7) br.cond.spnt L(COSH_BY_EXP) ;;
-}
-
-// ******************************************************
-// If NOT branch to EXP
-// ******************************************************
-// Calculate C_hi
-// ******************************************************
-// coshf_FR_C_hi_temp = coshf_FR_sneg * coshf_FR_Tmjhi
-// coshf_FR_C_hi = coshf_FR_spos * coshf_FR_Tjhi + (coshf_FR_sneg * coshf_FR_Tmjhi)
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 coshf_FR_C_hi_temp = coshf_FR_sneg, coshf_FR_Tmjhi, f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 coshf_FR_C_hi = coshf_FR_spos, coshf_FR_Tjhi, coshf_FR_C_hi_temp
- nop.i 999
-}
-
-// ******************************************************
-// Calculate S_hi
-// ******************************************************
-// coshf_FR_S_hi_temp1 = coshf_FR_sneg * coshf_FR_Tmjhi
-// coshf_FR_S_hi = coshf_FR_spos * coshf_FR_Tjhi - coshf_FR_C_hi_temp1
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 coshf_FR_S_hi_temp1 = coshf_FR_sneg, coshf_FR_Tmjhi, f0
- nop.i 999 ;;
-}
-
-// ******************************************************
-// Calculate C_lo
-// ******************************************************
-// coshf_FR_C_lo_temp1 = coshf_FR_spos * coshf_FR_Tjhi - coshf_FR_C_hi
-// coshf_FR_C_lo_temp2 = coshf_FR_sneg * coshf_FR_Tmjlo + (coshf_FR_spos * coshf_FR_Tjhi - coshf_FR_C_hi)
-// coshf_FR_C_lo_temp1 = coshf_FR_sneg * coshf_FR_Tmjlo
-// coshf_FR_C_lo_temp3 = coshf_FR_spos * coshf_FR_Tjlo + (coshf_FR_sneg * coshf_FR_Tmjlo)
-// coshf_FR_C_lo = coshf_FR_C_lo_temp3 + coshf_FR_C_lo_temp2
-
-{ .mfi
- nop.m 999
-(p0) fms.s1 coshf_FR_C_lo_temp1 = coshf_FR_spos, coshf_FR_Tjhi, coshf_FR_C_hi
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-(p0) fms.s1 coshf_FR_S_hi = coshf_FR_spos, coshf_FR_Tjhi, coshf_FR_S_hi_temp1
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 coshf_FR_C_lo_temp2 = coshf_FR_sneg, coshf_FR_Tmjhi, coshf_FR_C_lo_temp1
- nop.i 999
+ ldfpd fA4, fA3 = [rAd1]
+ ldfpd fA2, fA1 = [rAd2]
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 coshf_FR_C_lo_temp1 = coshf_FR_sneg, coshf_FR_Tmjlo, f0
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fX4 = fXsq, fXsq, f0
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 coshf_FR_C_lo_temp3 = coshf_FR_spos, coshf_FR_Tjlo, coshf_FR_C_lo_temp1
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fA43 = fXsq, fA4, fA3
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p0) fma.s1 coshf_FR_C_lo = coshf_FR_C_lo_temp3, f1, coshf_FR_C_lo_temp2
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fA21 = fXsq, fA2, fA1
+ nop.i 0
}
-
-// ******************************************************
-// coshf_FR_Y_lo_temp = coshf_FR_C_hi * coshf_FR_peven + coshf_FR_C_lo
-// coshf_FR_Y_lo = coshf_FR_S_hi * coshf_FR_podd + coshf_FR_Y_lo_temp
-// coshf_FR_COSH = Y_hi + Y_lo
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 coshf_FR_Y_lo_temp = coshf_FR_C_hi, coshf_FR_peven, coshf_FR_C_lo
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fA4321 = fX4, fA43, fA21
+ nop.i 0
}
+;;
+// Dummy multiply to generate inexact
{ .mfi
- nop.m 999
-(p0) fma.s1 coshf_FR_Y_lo = coshf_FR_S_hi, coshf_FR_podd, coshf_FR_Y_lo_temp
- nop.i 999 ;;
+ nop.m 0
+ fmpy.s0 fTmp = fA4, fA4
+ nop.i 0
}
-
{ .mfb
- nop.m 999
-(p0) fma.s.s0 f8 = coshf_FR_C_hi, f1, coshf_FR_Y_lo
-(p0) br.ret.sptk b0 ;;
+ nop.m 0
+ fma.s.s0 f8 = fA4321, fXsq, f1
+ br.ret.sptk b0 // Exit if 0 < |x| < 0.25
}
+;;
+COSH_POSSIBLE_OVERFLOW:
-L(COSH_BY_EXP):
+// Here if fMAX_SGL_NORM_ARG < x < fMIN_SGL_OFLOW_ARG
+// This cannot happen if input is a single, only if input higher precision.
+// Overflow is a possibility, not a certainty.
-// When p7 is true, we know that an overflow is not going to happen
-// When p7 is false, we must check for possible overflow
-// p7 is the over_SAFE flag
-// f44 = Scale * (Y_hi + Y_lo)
-// = coshf_FR_spos * (coshf_FR_Tjhi + coshf_FR_Y_lo)
+// Recompute result using status field 2 with user's rounding mode,
+// and wre set. If result is larger than largest single, then we have
+// overflow
{ .mfi
- nop.m 999
-(p0) fma.s1 coshf_FR_Y_lo_temp = coshf_FR_peven, f1, coshf_FR_podd
- nop.i 999
-}
-
-// Now we are in EXP. This is the only path where an overflow is possible
-// but not for certain. So this is the only path where over_SAFE has any use.
-// r34 still has N-1
-// There is a danger of double-extended overflow if N-1 > 16382 = 0x3ffe
-// There is a danger of double overflow if N-1 > 0x3fe = 1022
-// There is a danger of single overflow if N-1 > 0x7e = 126
-
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x000000000000007e ;;
-}
-
-{ .mfi
-(p0) cmp.gt.unc p0,p7 = r34, r32
- nop.f 999
- nop.i 999 ;;
+ mov rGt_ln = 0x1007f // Exponent for largest single + 1 ulp
+ fsetc.s2 0x7F,0x42 // Get user's round mode, set wre
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 coshf_FR_Y_lo = coshf_FR_Tjhi, coshf_FR_Y_lo_temp, coshf_FR_Tjlo
- nop.i 999 ;;
+ setf.exp fGt_pln = rGt_ln // Create largest single + 1 ulp
+ fma.s.s2 fWre_urm_f8 = fP, fT, fT // Result with wre set
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 coshf_FR_COSH_temp = coshf_FR_Y_lo, f1, coshf_FR_Tjhi
- nop.i 999 ;;
+ nop.m 0
+ fsetc.s2 0x7F,0x40 // Turn off wre in sf2
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s.s0 f44 = coshf_FR_spos, coshf_FR_COSH_temp, f0
- nop.i 999 ;;
+ nop.m 0
+ fcmp.ge.s1 p6, p0 = fWre_urm_f8, fGt_pln // Test for overflow
+ nop.i 0
}
+;;
-// If over_SAFE is set, return
{ .mfb
- nop.m 999
-(p7) fmerge.s f8 = f44,f44
-(p7) br.ret.sptk b0 ;;
-}
-
-// Else see if we overflowed
-// S0 user supplied status
-// S2 user supplied status + WRE + TD (Overflows)
-// If WRE is set then an overflow will not occur in EXP.
-// The input value that would cause a register (WRE) value to overflow is about 2^15
-// and this input would go into the HUGE path.
-// Answer with WRE is in f43.
-
-{ .mfi
- nop.m 999
-(p0) fsetc.s2 0x7F,0x42
- nop.i 999;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fma.s.s2 f43 = coshf_FR_spos, coshf_FR_COSH_temp, f0
- nop.i 999 ;;
-}
-
-// 1 more that the exponent of the largest double (7FE) = 7FF
-// 7FF - 3FF = 400 (true); 400 + FFFF = 103FF (register-biased)
-// So 0 103FF 8000000000000000 is one ulp more than
-// largest double in register bias
-// 1 more that the exponent of the largest single (FE) = FF
-// FF - 7F = 80 (true); 80 + FFFF = 1007F (register-biased)
-// Now set p8 if the answer with WRE is greater than or equal this value
-// Also set p9 if the answer with WRE is less than or equal to negative this value
-
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x000000000001007f ;;
+ nop.m 0
+ nop.f 0
+(p6) br.cond.spnt COSH_CERTAIN_OVERFLOW // Branch if overflow
}
+;;
-{ .mmf
- nop.m 999
-(p0) setf.exp f41 = r32
-(p0) fsetc.s2 0x7F,0x40 ;;
+{ .mfb
+ nop.m 0
+ fma.s.s0 f8 = fP, fT, fT
+ br.ret.sptk b0 // Exit if really no overflow
}
+;;
-{ .mfi
- nop.m 999
-(p0) fcmp.ge.unc.s1 p8, p0 = f43, f41
- nop.i 999
+// here if overflow
+COSH_CERTAIN_OVERFLOW:
+{ .mmi
+ addl r17ones_m1 = 0x1FFFE, r0
+;;
+ setf.exp fTmp = r17ones_m1
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fmerge.ns f42 = f41, f41
- nop.i 999 ;;
-}
-
-// The error tag for overflow is 65
-{ .mii
- nop.m 999
- nop.i 999 ;;
-(p8) mov GR_Parameter_TAG = 65 ;;
+ alloc r32 = ar.pfs, 0, 3, 4, 0 // get some registers
+ fmerge.s FR_X = f8,f8
+ nop.i 0
}
-
{ .mfb
- nop.m 999
-(p0) fcmp.le.unc.s1 p9, p0 = f43, f42
-(p8) br.cond.spnt __libm_error_region ;;
-}
-
-{ .mii
- nop.m 999
- nop.i 999 ;;
-(p9) mov GR_Parameter_TAG = 64
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p9) br.cond.spnt __libm_error_region ;;
+ mov GR_Parameter_TAG = 65
+ fma.s.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and +INF result
+ br.cond.sptk __libm_error_region
}
+;;
+// Here if x unorm
+COSH_UNORM:
{ .mfb
- nop.m 999
-(p0) fmerge.s f8 = f44,f44
-(p0) br.ret.sptk b0 ;;
+ getf.exp rSignexp_x = fNormX // Must recompute if x unorm
+ fcmp.eq.s0 p6, p0 = f8, f0 // Set D flag
+ br.cond.sptk COSH_COMMON // Return to main path
}
+;;
+GLOBAL_IEEE754_END(coshf)
-L(COSH_HUGE):
-
-// for COSH_HUGE, put 24000 in exponent; take sign from input; add 1
-// SAFE: SAFE is always 0 for HUGE
-
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x0000000000015dbf ;;
-}
-
-{ .mfi
-(p0) setf.exp f9 = r32
- nop.f 999
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 coshf_FR_hi_lo = f1, f9, f1
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fma.s.s0 f44 = f9, coshf_FR_hi_lo, f0
-(p0) mov GR_Parameter_TAG = 65
-}
-.endp coshf
-ASM_SIZE_DIRECTIVE(coshf)
-
-
-.proc __libm_error_region
-__libm_error_region:
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
- add GR_Parameter_Y=-32,sp // Parameter 2 value
- nop.f 0
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
-.fframe 64
- add sp=-64,sp // Create new stack
- nop.f 0
- mov GR_SAVE_GP=gp // Save gp
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
- stfs [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
- add GR_Parameter_X = 16,sp // Parameter 1 address
-.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0 // Save b0
+ stfs [GR_Parameter_Y] = FR_Y,16 // Store Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
};;
.body
-{ .mib
- stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
- add GR_Parameter_RESULT = 0,GR_Parameter_Y
- nop.b 0 // Parameter 3 address
+{ .mfi
+ stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ nop.f 0
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
}
{ .mib
- stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
- add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk.many b0=__libm_error_support# // Call error handling function
+ stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
};;
+
{ .mmi
- nop.m 0
- nop.m 0
- add GR_Parameter_RESULT = 48,sp
+ add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
};;
+
{ .mmi
- ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
- add sp = 64,sp // Restore stack pointer
- mov b0 = GR_SAVE_B0 // Restore return address
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
- mov gp = GR_SAVE_GP // Restore gp
- mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
- br.ret.sptk b0 // Return
-};;
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_coshl.S b/sysdeps/ia64/fpu/e_coshl.S
index daac20d9a3..cef8be0b1a 100644
--- a/sysdeps/ia64/fpu/e_coshl.S
+++ b/sysdeps/ia64/fpu/e_coshl.S
@@ -1,10 +1,10 @@
.file "coshl.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2002, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,1129 +35,1060 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00 Initial version
-// 4/04/00 Unwind support added
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 02/02/00 Initial version
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
-// 1/23/01 Set inexact flag for large args.
+// 01/23/01 Set inexact flag for large args.
+// 05/07/01 Reworked to improve speed of all paths
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 12/06/02 Improved performance
//
// API
//==============================================================
-// float = cosh(float)
-// double = cosh(double)
// long double = coshl(long double)
// input floating point f8
// output floating point f8
-
-
+//
+// Registers used
+//==============================================================
+// general registers:
+// r14 -> r40
+// predicate registers used:
+// p6 -> p11
+// floating-point registers used:
+// f9 -> f15; f32 -> f90;
+// f8 has input, then output
+//
// Overview of operation
//==============================================================
-// There are four paths
-
-// 1. |x| < 0.25 COSH_BY_POLY
-// 2. |x| < 32 COSH_BY_TBL
-// 3. |x| < 2^14 COSH_BY_EXP
-// 4. |x| >= 2^14 COSH_HUGE
-
-// For paths 1, and 2 SAFE is always 1.
-// For path 4, Safe is always 0.
-// SAFE = 1 means we cannot overflow.
-
-#include "libm_support.h"
-
+// There are seven paths
+// 1. 0 < |x| < 0.25 COSH_BY_POLY
+// 2. 0.25 <=|x| < 32 COSH_BY_TBL
+// 3. 32 <= |x| < 11357.21655 COSH_BY_EXP (merged path with COSH_BY_TBL)
+// 4. |x| >= 11357.21655 COSH_HUGE
+// 5. x=0 Done with early exit
+// 6. x=inf,nan Done with early exit
+// 7. x=denormal COSH_DENORM
+//
+// For double extended we get overflow for x >= 400c b174 ddc0 31ae c0ea
+// >= 11357.21655
+//
+//
+// 1. COSH_BY_POLY 0 < |x| < 0.25
+// ===============
+// Evaluate cosh(x) by a 12th order polynomial
+// Care is take for the order of multiplication; and P2 is not exactly 1/4!,
+// P3 is not exactly 1/6!, etc.
+// cosh(x) = 1 + (P1*x^2 + P2*x^4 + P3*x^6 + P4*x^8 + P5*x^10 + P6*x^12)
+//
+// 2. COSH_BY_TBL 0.25 <= |x| < 32.0
+// =============
+// cosh(x) = cosh(B+R)
+// = cosh(B)cosh(R) + sinh(B)sinh(R)
+//
+// ax = |x| = M*log2/64 + R
+// B = M*log2/64
+// M = 64*N + j
+// We will calculate M and get N as (M-j)/64
+// The division is a shift.
+// exp(B) = exp(N*log2 + j*log2/64)
+// = 2^N * 2^(j*log2/64)
+// cosh(B) = 1/2(e^B + e^-B)
+// = 1/2(2^N * 2^(j*log2/64) + 2^-N * 2^(-j*log2/64))
+// cosh(B) = (2^(N-1) * 2^(j*log2/64) + 2^(-N-1) * 2^(-j*log2/64))
+// sinh(B) = (2^(N-1) * 2^(j*log2/64) - 2^(-N-1) * 2^(-j*log2/64))
+// 2^(j*log2/64) is stored as Tjhi + Tjlo , j= -32,....,32
+// Tjhi is double-extended (80-bit) and Tjlo is single(32-bit)
+//
+// R = ax - M*log2/64
+// R = ax - M*log2_by_64_hi - M*log2_by_64_lo
+// exp(R) = 1 + R +R^2(1/2! + R(1/3! + R(1/4! + ... + R(1/n!)...)
+// = 1 + p_odd + p_even
+// where the p_even uses the A coefficients and the p_even uses
+// the B coefficients
+//
+// So sinh(R) = 1 + p_odd + p_even -(1 -p_odd -p_even)/2 = p_odd
+// cosh(R) = 1 + p_even
+// cosh(B) = C_hi + C_lo
+// sinh(B) = S_hi
+// cosh(x) = cosh(B)cosh(R) + sinh(B)sinh(R)
+//
+// 3. COSH_BY_EXP 32.0 <= |x| < 11357.21655 ( 400c b174 ddc0 31ae c0ea )
+// ==============
+// Can approximate result by exp(x)/2 in this region.
+// Y_hi = Tjhi
+// Y_lo = Tjhi * (p_odd + p_even) + Tjlo
+// cosh(x) = Y_hi + Y_lo
+//
+// 4. COSH_HUGE |x| >= 11357.21655 ( 400c b174 ddc0 31ae c0ea )
+// ============
+// Set error tag and call error support
+//
+//
// Assembly macros
//==============================================================
-cosh_FR_X = f44
-FR_RESULT = f44
-cosh_FR_SGNX = f40
-cosh_FR_all_ones = f45
-
-FR_X = f8
-FR_Y = f0
-cosh_FR_Inv_log2by64 = f9
-cosh_FR_log2by64_lo = f11
-cosh_FR_log2by64_hi = f10
-
-cosh_FR_A1 = f9
-cosh_FR_A2 = f10
-cosh_FR_A3 = f11
-
-cosh_FR_Rcub = f12
-cosh_FR_M_temp = f13
-cosh_FR_R_temp = f13
-cosh_FR_Rsq = f13
-cosh_FR_R = f14
-
-cosh_FR_M = f38
-
-cosh_FR_tmp = f15
-cosh_FR_B1 = f15
-cosh_FR_B2 = f32
-cosh_FR_B3 = f33
-
-cosh_FR_peven_temp1 = f34
-cosh_FR_peven_temp2 = f35
-cosh_FR_peven = f36
-
-cosh_FR_podd_temp1 = f34
-cosh_FR_podd_temp2 = f35
-cosh_FR_podd = f37
-
-cosh_FR_J_temp = f9
-cosh_FR_J = f10
-
-cosh_FR_Mmj = f39
-
-cosh_FR_N_temp1 = f11
-cosh_FR_N_temp2 = f12
-cosh_FR_N = f13
-
-cosh_FR_spos = f14
-cosh_FR_sneg = f15
-
-cosh_FR_Tjhi = f32
-cosh_FR_Tjlo = f33
-cosh_FR_Tmjhi = f34
-cosh_FR_Tmjlo = f35
-
-GR_mJ = r35
-GR_J = r36
-
-AD_mJ = r38
-AD_J = r39
-
-cosh_GR_all_ones = r40
-
-GR_SAVE_PFS = r41
-GR_SAVE_B0 = r42
-GR_SAVE_GP = r43
-GR_Parameter_X = r44
-GR_Parameter_Y = r45
-GR_Parameter_RESULT = r46
-GR_Parameter_TAG = r47
+r_ad5 = r14
+r_rshf_2to57 = r15
+r_exp_denorm = r15
+r_ad_mJ_lo = r15
+r_ad_J_lo = r16
+r_2Nm1 = r17
+r_2mNm1 = r18
+r_exp_x = r18
+r_ad_J_hi = r19
+r_ad2o = r19
+r_ad_mJ_hi = r20
+r_mj = r21
+r_ad2e = r22
+r_ad3 = r23
+r_ad1 = r24
+r_Mmj = r24
+r_rshf = r25
+r_M = r25
+r_N = r25
+r_jshf = r26
+r_exp_2tom57 = r26
+r_j = r26
+r_exp_mask = r27
+r_signexp_x = r28
+r_signexp_0_5 = r28
+r_exp_0_25 = r29
+r_sig_inv_ln2 = r30
+r_exp_32 = r30
+r_exp_huge = r30
+r_ad4 = r31
+
+GR_SAVE_PFS = r34
+GR_SAVE_B0 = r35
+GR_SAVE_GP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+
+f_ABS_X = f9
+f_X2 = f10
+f_X4 = f11
+f_tmp = f14
+f_RSHF = f15
+
+f_Inv_log2by64 = f32
+f_log2by64_lo = f33
+f_log2by64_hi = f34
+f_A1 = f35
+
+f_A2 = f36
+f_A3 = f37
+f_Rcub = f38
+f_M_temp = f39
+f_R_temp = f40
+
+f_Rsq = f41
+f_R = f42
+f_M = f43
+f_B1 = f44
+f_B2 = f45
+
+f_B3 = f46
+f_peven_temp1 = f47
+f_peven_temp2 = f48
+f_peven = f49
+f_podd_temp1 = f50
+
+f_podd_temp2 = f51
+f_podd = f52
+f_poly65 = f53
+f_poly6543 = f53
+f_poly6to1 = f53
+f_poly43 = f54
+f_poly21 = f55
+
+f_X3 = f56
+f_INV_LN2_2TO63 = f57
+f_RSHF_2TO57 = f58
+f_2TOM57 = f59
+f_smlst_oflow_input = f60
+
+f_pre_result = f61
+f_huge = f62
+f_spos = f63
+f_sneg = f64
+f_Tjhi = f65
+
+f_Tjlo = f66
+f_Tmjhi = f67
+f_Tmjlo = f68
+f_S_hi = f69
+f_SC_hi_temp = f70
+
+f_C_lo_temp1 = f71
+f_C_lo_temp2 = f72
+f_C_lo_temp3 = f73
+f_C_lo_temp4 = f73
+f_C_lo = f74
+f_C_hi = f75
+
+f_Y_hi = f77
+f_Y_lo_temp = f78
+f_Y_lo = f79
+f_NORM_X = f80
+
+f_P1 = f81
+f_P2 = f82
+f_P3 = f83
+f_P4 = f84
+f_P5 = f85
+
+f_P6 = f86
+f_Tjhi_spos = f87
+f_Tjlo_spos = f88
+f_huge = f89
+f_signed_hi_lo = f90
-cosh_FR_C_hi = f9
-cosh_FR_C_hi_temp = f10
-cosh_FR_C_lo_temp1 = f11
-cosh_FR_C_lo_temp2 = f12
-cosh_FR_C_lo_temp3 = f13
-
-cosh_FR_C_lo = f38
-cosh_FR_S_hi = f39
-
-cosh_FR_S_hi_temp1 = f10
-cosh_FR_Y_hi = f11
-cosh_FR_Y_lo_temp = f12
-cosh_FR_Y_lo = f13
-cosh_FR_COSH = f9
-
-cosh_FR_X2 = f9
-cosh_FR_X4 = f10
-
-cosh_FR_P1 = f14
-cosh_FR_P2 = f15
-cosh_FR_P3 = f32
-cosh_FR_P4 = f33
-cosh_FR_P5 = f34
-cosh_FR_P6 = f35
-
-cosh_FR_TINY_THRESH = f9
-
-cosh_FR_COSH_temp = f10
-cosh_FR_SCALE = f11
-
-cosh_FR_hi_lo = f10
-
-cosh_FR_poly_podd_temp1 = f11
-cosh_FR_poly_podd_temp2 = f13
-cosh_FR_poly_peven_temp1 = f11
-cosh_FR_poly_peven_temp2 = f13
// Data tables
//==============================================================
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
+// DO NOT CHANGE ORDER OF THESE TABLES
+RODATA
.align 16
-double_cosh_arg_reduction:
-ASM_TYPE_DIRECTIVE(double_cosh_arg_reduction,@object)
- data8 0xB8AA3B295C17F0BC, 0x00004005
- data8 0xB17217F7D1000000, 0x00003FF8
- data8 0xCF79ABC9E3B39804, 0x00003FD0
-ASM_SIZE_DIRECTIVE(double_cosh_arg_reduction)
-
-double_cosh_p_table:
-ASM_TYPE_DIRECTIVE(double_cosh_p_table,@object)
- data8 0x8000000000000000, 0x00003FFE
- data8 0xAAAAAAAAAAAAAB80, 0x00003FFA
- data8 0xB60B60B60B4FE884, 0x00003FF5
- data8 0xD00D00D1021D7370, 0x00003FEF
- data8 0x93F27740C0C2F1CC, 0x00003FE9
- data8 0x8FA02AC65BCBD5BC, 0x00003FE2
-ASM_SIZE_DIRECTIVE(double_cosh_p_table)
-
-double_cosh_ab_table:
-ASM_TYPE_DIRECTIVE(double_cosh_ab_table,@object)
- data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC
- data8 0x88888888884ECDD5, 0x00003FF8
- data8 0xD00D0C6DCC26A86B, 0x00003FF2
- data8 0x8000000000000002, 0x00003FFE
- data8 0xAAAAAAAAAA402C77, 0x00003FFA
- data8 0xB60B6CC96BDB144D, 0x00003FF5
-ASM_SIZE_DIRECTIVE(double_cosh_ab_table)
-
-double_cosh_j_table:
-ASM_TYPE_DIRECTIVE(double_cosh_j_table,@object)
- data8 0xB504F333F9DE6484, 0x00003FFE, 0x1EB2FB13, 0x00000000
- data8 0xB6FD91E328D17791, 0x00003FFE, 0x1CE2CBE2, 0x00000000
- data8 0xB8FBAF4762FB9EE9, 0x00003FFE, 0x1DDC3CBC, 0x00000000
- data8 0xBAFF5AB2133E45FB, 0x00003FFE, 0x1EE9AA34, 0x00000000
- data8 0xBD08A39F580C36BF, 0x00003FFE, 0x9EAEFDC1, 0x00000000
- data8 0xBF1799B67A731083, 0x00003FFE, 0x9DBF517B, 0x00000000
- data8 0xC12C4CCA66709456, 0x00003FFE, 0x1EF88AFB, 0x00000000
- data8 0xC346CCDA24976407, 0x00003FFE, 0x1E03B216, 0x00000000
- data8 0xC5672A115506DADD, 0x00003FFE, 0x1E78AB43, 0x00000000
- data8 0xC78D74C8ABB9B15D, 0x00003FFE, 0x9E7B1747, 0x00000000
- data8 0xC9B9BD866E2F27A3, 0x00003FFE, 0x9EFE3C0E, 0x00000000
- data8 0xCBEC14FEF2727C5D, 0x00003FFE, 0x9D36F837, 0x00000000
- data8 0xCE248C151F8480E4, 0x00003FFE, 0x9DEE53E4, 0x00000000
- data8 0xD06333DAEF2B2595, 0x00003FFE, 0x9E24AE8E, 0x00000000
- data8 0xD2A81D91F12AE45A, 0x00003FFE, 0x1D912473, 0x00000000
- data8 0xD4F35AABCFEDFA1F, 0x00003FFE, 0x1EB243BE, 0x00000000
- data8 0xD744FCCAD69D6AF4, 0x00003FFE, 0x1E669A2F, 0x00000000
- data8 0xD99D15C278AFD7B6, 0x00003FFE, 0x9BBC610A, 0x00000000
- data8 0xDBFBB797DAF23755, 0x00003FFE, 0x1E761035, 0x00000000
- data8 0xDE60F4825E0E9124, 0x00003FFE, 0x9E0BE175, 0x00000000
- data8 0xE0CCDEEC2A94E111, 0x00003FFE, 0x1CCB12A1, 0x00000000
- data8 0xE33F8972BE8A5A51, 0x00003FFE, 0x1D1BFE90, 0x00000000
- data8 0xE5B906E77C8348A8, 0x00003FFE, 0x1DF2F47A, 0x00000000
- data8 0xE8396A503C4BDC68, 0x00003FFE, 0x1EF22F22, 0x00000000
- data8 0xEAC0C6E7DD24392F, 0x00003FFE, 0x9E3F4A29, 0x00000000
- data8 0xED4F301ED9942B84, 0x00003FFE, 0x1EC01A5B, 0x00000000
- data8 0xEFE4B99BDCDAF5CB, 0x00003FFE, 0x1E8CAC3A, 0x00000000
- data8 0xF281773C59FFB13A, 0x00003FFE, 0x9DBB3FAB, 0x00000000
- data8 0xF5257D152486CC2C, 0x00003FFE, 0x1EF73A19, 0x00000000
- data8 0xF7D0DF730AD13BB9, 0x00003FFE, 0x9BB795B5, 0x00000000
- data8 0xFA83B2DB722A033A, 0x00003FFE, 0x1EF84B76, 0x00000000
- data8 0xFD3E0C0CF486C175, 0x00003FFE, 0x9EF5818B, 0x00000000
- data8 0x8000000000000000, 0x00003FFF, 0x00000000, 0x00000000
- data8 0x8164D1F3BC030773, 0x00003FFF, 0x1F77CACA, 0x00000000
- data8 0x82CD8698AC2BA1D7, 0x00003FFF, 0x1EF8A91D, 0x00000000
- data8 0x843A28C3ACDE4046, 0x00003FFF, 0x1E57C976, 0x00000000
- data8 0x85AAC367CC487B15, 0x00003FFF, 0x9EE8DA92, 0x00000000
- data8 0x871F61969E8D1010, 0x00003FFF, 0x1EE85C9F, 0x00000000
- data8 0x88980E8092DA8527, 0x00003FFF, 0x1F3BF1AF, 0x00000000
- data8 0x8A14D575496EFD9A, 0x00003FFF, 0x1D80CA1E, 0x00000000
- data8 0x8B95C1E3EA8BD6E7, 0x00003FFF, 0x9D0373AF, 0x00000000
- data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF, 0x9F167097, 0x00000000
- data8 0x8EA4398B45CD53C0, 0x00003FFF, 0x1EB70051, 0x00000000
- data8 0x9031DC431466B1DC, 0x00003FFF, 0x1F6EB029, 0x00000000
- data8 0x91C3D373AB11C336, 0x00003FFF, 0x1DFD6D8E, 0x00000000
- data8 0x935A2B2F13E6E92C, 0x00003FFF, 0x9EB319B0, 0x00000000
- data8 0x94F4EFA8FEF70961, 0x00003FFF, 0x1EBA2BEB, 0x00000000
- data8 0x96942D3720185A00, 0x00003FFF, 0x1F11D537, 0x00000000
- data8 0x9837F0518DB8A96F, 0x00003FFF, 0x1F0D5A46, 0x00000000
- data8 0x99E0459320B7FA65, 0x00003FFF, 0x9E5E7BCA, 0x00000000
- data8 0x9B8D39B9D54E5539, 0x00003FFF, 0x9F3AAFD1, 0x00000000
- data8 0x9D3ED9A72CFFB751, 0x00003FFF, 0x9E86DACC, 0x00000000
- data8 0x9EF5326091A111AE, 0x00003FFF, 0x9F3EDDC2, 0x00000000
- data8 0xA0B0510FB9714FC2, 0x00003FFF, 0x1E496E3D, 0x00000000
- data8 0xA27043030C496819, 0x00003FFF, 0x9F490BF6, 0x00000000
- data8 0xA43515AE09E6809E, 0x00003FFF, 0x1DD1DB48, 0x00000000
- data8 0xA5FED6A9B15138EA, 0x00003FFF, 0x1E65EBFB, 0x00000000
- data8 0xA7CD93B4E965356A, 0x00003FFF, 0x9F427496, 0x00000000
- data8 0xA9A15AB4EA7C0EF8, 0x00003FFF, 0x1F283C4A, 0x00000000
- data8 0xAB7A39B5A93ED337, 0x00003FFF, 0x1F4B0047, 0x00000000
- data8 0xAD583EEA42A14AC6, 0x00003FFF, 0x1F130152, 0x00000000
- data8 0xAF3B78AD690A4375, 0x00003FFF, 0x9E8367C0, 0x00000000
- data8 0xB123F581D2AC2590, 0x00003FFF, 0x9F705F90, 0x00000000
- data8 0xB311C412A9112489, 0x00003FFF, 0x1EFB3C53, 0x00000000
- data8 0xB504F333F9DE6484, 0x00003FFF, 0x1F32FB13, 0x00000000
-ASM_SIZE_DIRECTIVE(double_cosh_j_table)
-
-.align 32
-.global coshl#
-
-.section .text
-.proc coshl#
-.align 32
-
-coshl:
-
-#ifdef _LIBC
-.global __ieee754_coshl#
-.proc __ieee754_coshl#
-__ieee754_coshl:
-#endif
-
-// X NAN?
-
-{ .mfi
- alloc r32 = ar.pfs,0,12,4,0
-(p0) fclass.m.unc p6,p7 = f8, 0xc3
- mov cosh_GR_all_ones = -1
-};;
-
-// This is more than we need but it is in preparation
-// for the values we add for error support. We push three
-// addresses on the stack (3*8) = 24 bytes and one tag
-
-{ .mfb
- nop.m 999
-(p6) fma.s0 f8 = f8,f1,f8
-(p6) br.ret.spnt b0 ;;
-}
-
-
-// Make constant that will generate inexact when squared
-// X infinity
-{ .mfi
- setf.sig cosh_FR_all_ones = cosh_GR_all_ones
-(p0) fclass.m.unc p6,p0 = f8, 0x23
- nop.i 999 ;;
-}
-
-{ .mfb
- nop.m 999
-(p6) fmerge.s f8 = f0,f8
-(p6) br.ret.spnt b0 ;;
-}
+LOCAL_OBJECT_START(cosh_arg_reduction)
+// data8 0xB8AA3B295C17F0BC, 0x00004005 // 64/log2 -- signif loaded with setf
+ data8 0xB17217F7D1000000, 0x00003FF8 // log2/64 high part
+ data8 0xCF79ABC9E3B39804, 0x00003FD0 // log2/64 low part
+ data8 0xb174ddc031aec0ea, 0x0000400c // Smallest x to overflow (11357.21655)
+LOCAL_OBJECT_END(cosh_arg_reduction)
+
+LOCAL_OBJECT_START(cosh_p_table)
+ data8 0x8FA02AC65BCBD5BC, 0x00003FE2 // P6
+ data8 0xD00D00D1021D7370, 0x00003FEF // P4
+ data8 0xAAAAAAAAAAAAAB80, 0x00003FFA // P2
+ data8 0x93F27740C0C2F1CC, 0x00003FE9 // P5
+ data8 0xB60B60B60B4FE884, 0x00003FF5 // P3
+ data8 0x8000000000000000, 0x00003FFE // P1
+LOCAL_OBJECT_END(cosh_p_table)
+
+LOCAL_OBJECT_START(cosh_ab_table)
+ data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC // A1
+ data8 0x88888888884ECDD5, 0x00003FF8 // A2
+ data8 0xD00D0C6DCC26A86B, 0x00003FF2 // A3
+ data8 0x8000000000000002, 0x00003FFE // B1
+ data8 0xAAAAAAAAAA402C77, 0x00003FFA // B2
+ data8 0xB60B6CC96BDB144D, 0x00003FF5 // B3
+LOCAL_OBJECT_END(cosh_ab_table)
+
+LOCAL_OBJECT_START(cosh_j_hi_table)
+ data8 0xB504F333F9DE6484, 0x00003FFE
+ data8 0xB6FD91E328D17791, 0x00003FFE
+ data8 0xB8FBAF4762FB9EE9, 0x00003FFE
+ data8 0xBAFF5AB2133E45FB, 0x00003FFE
+ data8 0xBD08A39F580C36BF, 0x00003FFE
+ data8 0xBF1799B67A731083, 0x00003FFE
+ data8 0xC12C4CCA66709456, 0x00003FFE
+ data8 0xC346CCDA24976407, 0x00003FFE
+ data8 0xC5672A115506DADD, 0x00003FFE
+ data8 0xC78D74C8ABB9B15D, 0x00003FFE
+ data8 0xC9B9BD866E2F27A3, 0x00003FFE
+ data8 0xCBEC14FEF2727C5D, 0x00003FFE
+ data8 0xCE248C151F8480E4, 0x00003FFE
+ data8 0xD06333DAEF2B2595, 0x00003FFE
+ data8 0xD2A81D91F12AE45A, 0x00003FFE
+ data8 0xD4F35AABCFEDFA1F, 0x00003FFE
+ data8 0xD744FCCAD69D6AF4, 0x00003FFE
+ data8 0xD99D15C278AFD7B6, 0x00003FFE
+ data8 0xDBFBB797DAF23755, 0x00003FFE
+ data8 0xDE60F4825E0E9124, 0x00003FFE
+ data8 0xE0CCDEEC2A94E111, 0x00003FFE
+ data8 0xE33F8972BE8A5A51, 0x00003FFE
+ data8 0xE5B906E77C8348A8, 0x00003FFE
+ data8 0xE8396A503C4BDC68, 0x00003FFE
+ data8 0xEAC0C6E7DD24392F, 0x00003FFE
+ data8 0xED4F301ED9942B84, 0x00003FFE
+ data8 0xEFE4B99BDCDAF5CB, 0x00003FFE
+ data8 0xF281773C59FFB13A, 0x00003FFE
+ data8 0xF5257D152486CC2C, 0x00003FFE
+ data8 0xF7D0DF730AD13BB9, 0x00003FFE
+ data8 0xFA83B2DB722A033A, 0x00003FFE
+ data8 0xFD3E0C0CF486C175, 0x00003FFE
+ data8 0x8000000000000000, 0x00003FFF // Center of table
+ data8 0x8164D1F3BC030773, 0x00003FFF
+ data8 0x82CD8698AC2BA1D7, 0x00003FFF
+ data8 0x843A28C3ACDE4046, 0x00003FFF
+ data8 0x85AAC367CC487B15, 0x00003FFF
+ data8 0x871F61969E8D1010, 0x00003FFF
+ data8 0x88980E8092DA8527, 0x00003FFF
+ data8 0x8A14D575496EFD9A, 0x00003FFF
+ data8 0x8B95C1E3EA8BD6E7, 0x00003FFF
+ data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF
+ data8 0x8EA4398B45CD53C0, 0x00003FFF
+ data8 0x9031DC431466B1DC, 0x00003FFF
+ data8 0x91C3D373AB11C336, 0x00003FFF
+ data8 0x935A2B2F13E6E92C, 0x00003FFF
+ data8 0x94F4EFA8FEF70961, 0x00003FFF
+ data8 0x96942D3720185A00, 0x00003FFF
+ data8 0x9837F0518DB8A96F, 0x00003FFF
+ data8 0x99E0459320B7FA65, 0x00003FFF
+ data8 0x9B8D39B9D54E5539, 0x00003FFF
+ data8 0x9D3ED9A72CFFB751, 0x00003FFF
+ data8 0x9EF5326091A111AE, 0x00003FFF
+ data8 0xA0B0510FB9714FC2, 0x00003FFF
+ data8 0xA27043030C496819, 0x00003FFF
+ data8 0xA43515AE09E6809E, 0x00003FFF
+ data8 0xA5FED6A9B15138EA, 0x00003FFF
+ data8 0xA7CD93B4E965356A, 0x00003FFF
+ data8 0xA9A15AB4EA7C0EF8, 0x00003FFF
+ data8 0xAB7A39B5A93ED337, 0x00003FFF
+ data8 0xAD583EEA42A14AC6, 0x00003FFF
+ data8 0xAF3B78AD690A4375, 0x00003FFF
+ data8 0xB123F581D2AC2590, 0x00003FFF
+ data8 0xB311C412A9112489, 0x00003FFF
+ data8 0xB504F333F9DE6484, 0x00003FFF
+LOCAL_OBJECT_END(cosh_j_hi_table)
+
+LOCAL_OBJECT_START(cosh_j_lo_table)
+ data4 0x1EB2FB13
+ data4 0x1CE2CBE2
+ data4 0x1DDC3CBC
+ data4 0x1EE9AA34
+ data4 0x9EAEFDC1
+ data4 0x9DBF517B
+ data4 0x1EF88AFB
+ data4 0x1E03B216
+ data4 0x1E78AB43
+ data4 0x9E7B1747
+ data4 0x9EFE3C0E
+ data4 0x9D36F837
+ data4 0x9DEE53E4
+ data4 0x9E24AE8E
+ data4 0x1D912473
+ data4 0x1EB243BE
+ data4 0x1E669A2F
+ data4 0x9BBC610A
+ data4 0x1E761035
+ data4 0x9E0BE175
+ data4 0x1CCB12A1
+ data4 0x1D1BFE90
+ data4 0x1DF2F47A
+ data4 0x1EF22F22
+ data4 0x9E3F4A29
+ data4 0x1EC01A5B
+ data4 0x1E8CAC3A
+ data4 0x9DBB3FAB
+ data4 0x1EF73A19
+ data4 0x9BB795B5
+ data4 0x1EF84B76
+ data4 0x9EF5818B
+ data4 0x00000000 // Center of table
+ data4 0x1F77CACA
+ data4 0x1EF8A91D
+ data4 0x1E57C976
+ data4 0x9EE8DA92
+ data4 0x1EE85C9F
+ data4 0x1F3BF1AF
+ data4 0x1D80CA1E
+ data4 0x9D0373AF
+ data4 0x9F167097
+ data4 0x1EB70051
+ data4 0x1F6EB029
+ data4 0x1DFD6D8E
+ data4 0x9EB319B0
+ data4 0x1EBA2BEB
+ data4 0x1F11D537
+ data4 0x1F0D5A46
+ data4 0x9E5E7BCA
+ data4 0x9F3AAFD1
+ data4 0x9E86DACC
+ data4 0x9F3EDDC2
+ data4 0x1E496E3D
+ data4 0x9F490BF6
+ data4 0x1DD1DB48
+ data4 0x1E65EBFB
+ data4 0x9F427496
+ data4 0x1F283C4A
+ data4 0x1F4B0047
+ data4 0x1F130152
+ data4 0x9E8367C0
+ data4 0x9F705F90
+ data4 0x1EFB3C53
+ data4 0x1F32FB13
+LOCAL_OBJECT_END(cosh_j_lo_table)
+.section .text
+GLOBAL_IEEE754_ENTRY(coshl)
-// Put 0.25 in f9; p6 true if x < 0.25
{ .mlx
- nop.m 999
-(p0) movl r32 = 0x000000000000fffd ;;
-}
-
-{ .mfi
-(p0) setf.exp f9 = r32
- nop.f 999
- nop.i 999 ;;
+ getf.exp r_signexp_x = f8 // Get signexp of x, must redo if unorm
+ movl r_sig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2
}
-
-{ .mfi
- nop.m 999
-(p0) fmerge.s cosh_FR_X = f0,f8
- nop.i 999
+{ .mlx
+ addl r_ad1 = @ltoff(cosh_arg_reduction), gp
+ movl r_rshf_2to57 = 0x4778000000000000 // 1.10000 2^(63+57)
}
+;;
{ .mfi
- nop.m 999
-(p0) fmerge.s cosh_FR_SGNX = f8,f1
- nop.i 999 ;;
+ ld8 r_ad1 = [r_ad1]
+ fmerge.s f_ABS_X = f0,f8
+ mov r_exp_0_25 = 0x0fffd // Form exponent for 0.25
}
-
{ .mfi
- nop.m 999
-(p0) fcmp.lt.unc p0,p7 = cosh_FR_X,f9
- nop.i 999 ;;
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p7) br.cond.sptk L(COSH_BY_TBL)
+ nop.m 0
+ fnorm.s1 f_NORM_X = f8
+ mov r_exp_2tom57 = 0xffff-57
}
;;
-
-// COSH_BY_POLY:
-// POLY cannot overflow so there is no need to call __libm_error_support
-// Get the values of P_x from the table
-
-{ .mmi
- nop.m 999
-(p0) addl r34 = @ltoff(double_cosh_p_table), gp
- nop.i 999
+{ .mfi
+ setf.d f_RSHF_2TO57 = r_rshf_2to57 // Form const 1.100 * 2^120
+ fclass.m p10,p0 = f8, 0x0b // Test for denorm
+ mov r_exp_mask = 0x1ffff
}
-;;
-
-{ .mmi
- ld8 r34 = [r34]
- nop.m 999
- nop.i 999
+{ .mlx
+ setf.sig f_INV_LN2_2TO63 = r_sig_inv_ln2 // Form 1/ln2 * 2^63
+ movl r_rshf = 0x43e8000000000000 // 1.1000 2^63 for right shift
}
;;
-
-// Calculate cosh_FR_X2 = ax*ax and cosh_FR_X4 = ax*ax*ax*ax
-{ .mmf
- nop.m 999
-(p0) ldfe cosh_FR_P1 = [r34],16
-(p0) fma.s1 cosh_FR_X2 = cosh_FR_X, cosh_FR_X, f0 ;;
-}
-
-{ .mmi
-(p0) ldfe cosh_FR_P2 = [r34],16 ;;
-(p0) ldfe cosh_FR_P3 = [r34],16
- nop.i 999 ;;
-}
-
-{ .mmi
-(p0) ldfe cosh_FR_P4 = [r34],16 ;;
-(p0) ldfe cosh_FR_P5 = [r34],16
- nop.i 999 ;;
-}
-
{ .mfi
-(p0) ldfe cosh_FR_P6 = [r34],16
-(p0) fma.s1 cosh_FR_X4 = cosh_FR_X2, cosh_FR_X2, f0
- nop.i 999 ;;
+ nop.m 0
+ fclass.m p7,p0 = f8, 0x07 // Test if x=0
+ nop.i 0
}
-
-// Calculate cosh_FR_podd = x4 *(x4 * P_5 + P_3) + P_1
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_poly_podd_temp1 = cosh_FR_X4, cosh_FR_P5, cosh_FR_P3
- nop.i 999 ;;
+ setf.exp f_2TOM57 = r_exp_2tom57 // Form 2^-57 for scaling
+ nop.f 0
+ add r_ad3 = 0x90, r_ad1 // Point to ab_table
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_podd = cosh_FR_X4, cosh_FR_poly_podd_temp1, cosh_FR_P1
- nop.i 999
+ setf.d f_RSHF = r_rshf // Form right shift const 1.100 * 2^63
+ fclass.m p6,p0 = f8, 0xe3 // Test if x nan, inf
+ add r_ad4 = 0x2f0, r_ad1 // Point to j_hi_table midpoint
}
-
-// Calculate cosh_FR_peven = p_even = x4 *(x4 * (x4 * P_6 + P_4) + P_2)
-{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_poly_peven_temp1 = cosh_FR_X4, cosh_FR_P6, cosh_FR_P4
- nop.i 999 ;;
+{ .mib
+ add r_ad2e = 0x20, r_ad1 // Point to p_table
+ nop.i 0
+(p10) br.cond.spnt COSH_DENORM // Branch if x denorm
}
+;;
+// Common path -- return here from COSH_DENORM if x is unnorm
+COSH_COMMON:
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_poly_peven_temp2 = cosh_FR_X4, cosh_FR_poly_peven_temp1, cosh_FR_P2
- nop.i 999 ;;
+ ldfe f_smlst_oflow_input = [r_ad2e],16
+(p7) fma.s0 f8 = f1, f1, f0 // Result = 1.0 if x=0
+ add r_ad5 = 0x580, r_ad1 // Point to j_lo_table midpoint
}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_peven = cosh_FR_X4, cosh_FR_poly_peven_temp2, f0
- nop.i 999 ;;
+{ .mib
+ ldfe f_log2by64_hi = [r_ad1],16
+ and r_exp_x = r_exp_mask, r_signexp_x
+(p7) br.ret.spnt b0 // Exit if x=0
}
+;;
-// Y_lo = x2*p_odd + p_even
-// Calculate f8 = Y_hi + Y_lo
+// Get the A coefficients for COSH_BY_TBL
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_Y_lo = cosh_FR_X2, cosh_FR_podd, cosh_FR_peven
- nop.i 999 ;;
+ ldfe f_A1 = [r_ad3],16
+ fcmp.lt.s1 p8,p9 = f8,f0 // Test for x<0
+ cmp.lt p7,p0 = r_exp_x, r_exp_0_25 // Test x < 0.25
}
-
{ .mfb
- nop.m 999
-(p0) fma.s0 f8 = f1, f1, cosh_FR_Y_lo
-(p0) br.ret.sptk b0 ;;
-}
-
-
-L(COSH_BY_TBL):
-
-// Now that we are at TBL; so far all we know is that |x| >= 0.25.
-// The first two steps are the same for TBL and EXP, but if we are HUGE
-// Double Extended
-// Go to HUGE if |x| >= 2^14, 1000d (register-biased) is e = 14 (true)
-// Double
-// Go to HUGE if |x| >= 2^10, 10009 (register-biased) is e = 10 (true)
-// Single
-// Go to HUGE if |x| >= 2^7, 10006 (register-biased) is e = 7 (true)
-// we want to leave now. Go to HUGE if |x| >= 2^14
-// 1000d (register-biased) is e = 14 (true)
-
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x000000000001000d ;;
-}
-
-{ .mfi
-(p0) setf.exp f9 = r32
- nop.f 999
- nop.i 999 ;;
+ add r_ad2o = 0x30, r_ad2e // Point to p_table odd coeffs
+(p6) fma.s0 f8 = f8,f8,f0 // Result for x nan, inf
+(p6) br.ret.spnt b0 // Exit for x nan, inf
}
+;;
+// Calculate X2 = ax*ax for COSH_BY_POLY
{ .mfi
- nop.m 999
-(p0) fcmp.ge.unc p6,p7 = cosh_FR_X,f9
- nop.i 999 ;;
+ ldfe f_log2by64_lo = [r_ad1],16
+ nop.f 0
+ nop.i 0
}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p6) br.cond.spnt L(COSH_HUGE) ;;
+{ .mfb
+ ldfe f_A2 = [r_ad3],16
+ fma.s1 f_X2 = f_NORM_X, f_NORM_X, f0
+(p7) br.cond.spnt COSH_BY_POLY
}
+;;
-// r32 = 1
-// r34 = N-1
-// r35 = N
-// r36 = j
-// r37 = N+1
-
-// TBL can never overflow
-// cosh(x) = cosh(B+R)
-// = cosh(B) cosh(R) + sinh(B) sinh(R)
-// cosh(R) can be approximated by 1 + p_even
-// sinh(R) can be approximated by p_odd
-
+// Here if |x| >= 0.25
+COSH_BY_TBL:
// ******************************************************
-// STEP 1 (TBL and EXP)
+// STEP 1 (TBL and EXP) - Argument reduction
// ******************************************************
-// Get the following constants.
-// f9 = Inv_log2by64
-// f10 = log2by64_hi
-// f11 = log2by64_lo
+// Get the following constants.
+// Inv_log2by64
+// log2by64_hi
+// log2by64_lo
-{ .mmi
-(p0) adds r32 = 0x1,r0
-(p0) addl r34 = @ltoff(double_cosh_arg_reduction), gp
- nop.i 999
-}
-;;
// We want 2^(N-1) and 2^(-N-1). So bias N-1 and -N-1 and
// put them in an exponent.
-// cosh_FR_spos = 2^(N-1) and cosh_FR_sneg = 2^(-N-1)
-// r39 = 0xffff + (N-1) = 0xffff +N -1
-// r40 = 0xffff - (N +1) = 0xffff -N -1
-
-{ .mlx
- ld8 r34 = [r34]
-(p0) movl r38 = 0x000000000000fffe ;;
-}
+// f_spos = 2^(N-1) and f_sneg = 2^(-N-1)
+// 0xffff + (N-1) = 0xffff +N -1
+// 0xffff - (N +1) = 0xffff -N -1
-{ .mmi
-(p0) ldfe cosh_FR_Inv_log2by64 = [r34],16 ;;
-(p0) ldfe cosh_FR_log2by64_hi = [r34],16
- nop.i 999 ;;
-}
-
-{ .mbb
-(p0) ldfe cosh_FR_log2by64_lo = [r34],16
- nop.b 999
- nop.b 999 ;;
-}
-
-// Get the A coefficients
-// f9 = A_1
-// f10 = A_2
-// f11 = A_3
-{ .mmi
- nop.m 999
-(p0) addl r34 = @ltoff(double_cosh_ab_table), gp
- nop.i 999
-}
-;;
+// Calculate M and keep it as integer and floating point.
+// M = round-to-integer(x*Inv_log2by64)
+// f_M = M = truncate(ax/(log2/64))
+// Put the integer representation of M in r_M
+// and the floating point representation of M in f_M
+// Get the remaining A,B coefficients
{ .mmi
- ld8 r34 = [r34]
- nop.m 999
- nop.i 999
+ ldfe f_A3 = [r_ad3],16
+ nop.m 0
+ nop.i 0
}
;;
-
-// Calculate M and keep it as integer and floating point.
-// M = round-to-integer(x*Inv_log2by64)
-// cosh_FR_M = M = truncate(ax/(log2/64))
-// Put the significand of M in r35
-// and the floating point representation of M in cosh_FR_M
-
+// Use constant (1.100*2^(63-6)) to get rounded M into rightmost significand
+// |x| * 64 * 1/ln2 * 2^(63-6) + 1.1000 * 2^(63+(63-6))
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_M = cosh_FR_X, cosh_FR_Inv_log2by64, f0
- nop.i 999
+ nop.m 0
+ fma.s1 f_M_temp = f_ABS_X, f_INV_LN2_2TO63, f_RSHF_2TO57
+ mov r_signexp_0_5 = 0x0fffe // signexp of +0.5
}
+;;
+// Test for |x| >= overflow limit
{ .mfi
-(p0) ldfe cosh_FR_A1 = [r34],16
- nop.f 999
- nop.i 999 ;;
+ ldfe f_B1 = [r_ad3],16
+ fcmp.ge.s1 p6,p0 = f_ABS_X, f_smlst_oflow_input
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fcvt.fx.s1 cosh_FR_M_temp = cosh_FR_M
- nop.i 999 ;;
+ ldfe f_B2 = [r_ad3],16
+ nop.f 0
+ mov r_exp_32 = 0x10004
}
+;;
-{ .mfi
- nop.m 999
-(p0) fnorm.s1 cosh_FR_M = cosh_FR_M_temp
- nop.i 999 ;;
+// Subtract RSHF constant to get rounded M as a floating point value
+// M_temp * 2^(63-6) - 2^63
+{ .mfb
+ ldfe f_B3 = [r_ad3],16
+ fms.s1 f_M = f_M_temp, f_2TOM57, f_RSHF
+(p6) br.cond.spnt COSH_HUGE // Branch if result will overflow
}
+;;
{ .mfi
-(p0) getf.sig r35 = cosh_FR_M_temp
- nop.f 999
- nop.i 999 ;;
+ getf.sig r_M = f_M_temp
+ nop.f 0
+ cmp.ge p7,p6 = r_exp_x, r_exp_32 // Test if x >= 32
}
+;;
-// M is still in r35. Calculate j. j is the signed extension of the six lsb of M. It
+// Calculate j. j is the signed extension of the six lsb of M. It
// has a range of -32 thru 31.
-// r35 = M
-// r36 = j
-{ .mii
- nop.m 999
- nop.i 999 ;;
-(p0) and r36 = 0x3f, r35 ;;
-}
// Calculate R
-// f13 = f44 - f12*f10 = x - M*log2by64_hi
-// f14 = f13 - f8*f11 = R = (x - M*log2by64_hi) - M*log2by64_lo
-
-{ .mfi
- nop.m 999
-(p0) fnma.s1 cosh_FR_R_temp = cosh_FR_M, cosh_FR_log2by64_hi, cosh_FR_X
- nop.i 999
-}
+// ax - M*log2by64_hi
+// R = (ax - M*log2by64_hi) - M*log2by64_lo
{ .mfi
-(p0) ldfe cosh_FR_A2 = [r34],16
- nop.f 999
- nop.i 999 ;;
+ nop.m 0
+ fnma.s1 f_R_temp = f_M, f_log2by64_hi, f_ABS_X
+ and r_j = 0x3f, r_M
}
+;;
-{ .mfi
- nop.m 999
-(p0) fnma.s1 cosh_FR_R = cosh_FR_M, cosh_FR_log2by64_lo, cosh_FR_R_temp
- nop.i 999
+{ .mii
+ nop.m 0
+ shl r_jshf = r_j, 0x2 // Shift j so can sign extend it
+;;
+ sxt1 r_jshf = r_jshf
}
+;;
-// Get the B coefficients
-// f15 = B_1
-// f32 = B_2
-// f33 = B_3
-
-{ .mmi
-(p0) ldfe cosh_FR_A3 = [r34],16 ;;
-(p0) ldfe cosh_FR_B1 = [r34],16
- nop.i 999 ;;
+{ .mii
+ nop.m 0
+ shr r_j = r_jshf, 0x2 // Now j has range -32 to 31
+ nop.i 0
}
+;;
{ .mmi
-(p0) ldfe cosh_FR_B2 = [r34],16 ;;
-(p0) ldfe cosh_FR_B3 = [r34],16
- nop.i 999 ;;
+ shladd r_ad_J_hi = r_j, 4, r_ad4 // pointer to Tjhi
+ sub r_Mmj = r_M, r_j // M-j
+ sub r_mj = r0, r_j // Form -j
}
+;;
-{ .mii
- nop.m 999
-(p0) shl r34 = r36, 0x2 ;;
-(p0) sxt1 r37 = r34 ;;
+// The TBL and EXP branches are merged and predicated
+// If TBL, p6 true, 0.25 <= |x| < 32
+// If EXP, p7 true, 32 <= |x| < overflow_limit
+//
+// N = (M-j)/64
+{ .mfi
+ ldfe f_Tjhi = [r_ad_J_hi]
+ fnma.s1 f_R = f_M, f_log2by64_lo, f_R_temp
+ shr r_N = r_Mmj, 0x6 // N = (M-j)/64
}
-
-// ******************************************************
-// STEP 2 (TBL and EXP)
-// ******************************************************
-// Calculate Rsquared and Rcubed in preparation for p_even and p_odd
-// f12 = R*R*R
-// f13 = R*R
-// f14 = R <== from above
-
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_Rsq = cosh_FR_R, cosh_FR_R, f0
-(p0) shr r36 = r37, 0x2 ;;
+ shladd r_ad_mJ_hi = r_mj, 4, r_ad4 // pointer to Tmjhi
+ nop.f 0
+ shladd r_ad_mJ_lo = r_mj, 2, r_ad5 // pointer to Tmjlo
}
+;;
-// r34 = M-j = r35 - r36
-// r35 = N = (M-j)/64
-
-{ .mii
-(p0) sub r34 = r35, r36
- nop.i 999 ;;
-(p0) shr r35 = r34, 0x6 ;;
+{ .mfi
+ sub r_2mNm1 = r_signexp_0_5, r_N // signexp 2^(-N-1)
+ nop.f 0
+ shladd r_ad_J_lo = r_j, 2, r_ad5 // pointer to Tjlo
}
-
-{ .mii
-(p0) sub r40 = r38, r35
-(p0) adds r37 = 0x1, r35
-(p0) add r39 = r38, r35 ;;
+{ .mfi
+ ldfe f_Tmjhi = [r_ad_mJ_hi]
+ nop.f 0
+ add r_2Nm1 = r_signexp_0_5, r_N // signexp 2^(N-1)
}
+;;
-// Get the address of the J table, add the offset,
-// addresses are sinh_AD_mJ and sinh_AD_J, get the T value
-// f32 = T(j)_hi
-// f33 = T(j)_lo
-// f34 = T(-j)_hi
-// f35 = T(-j)_lo
-
-{ .mmi
-(p0) sub r34 = r35, r32
-(p0) addl r37 = @ltoff(double_cosh_j_table), gp
- nop.i 999
+{ .mmf
+ ldfs f_Tmjlo = [r_ad_mJ_lo]
+ setf.exp f_sneg = r_2mNm1 // Form 2^(-N-1)
+ nop.f 0
}
;;
-{ .mfi
- ld8 r37 = [r37]
-(p0) fma.s1 cosh_FR_Rcub = cosh_FR_Rsq, cosh_FR_R, f0
- nop.i 999
+{ .mmf
+ ldfs f_Tjlo = [r_ad_J_lo]
+ setf.exp f_spos = r_2Nm1 // Form 2^(N-1)
+ nop.f 0
}
+;;
// ******************************************************
-// STEP 3 Now decide if we need to branch to EXP
+// STEP 2 (TBL and EXP)
// ******************************************************
-// Put 32 in f9; p6 true if x < 32
+// Calculate Rsquared and Rcubed in preparation for p_even and p_odd
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x0000000000010004 ;;
+{ .mmf
+ nop.m 0
+ nop.m 0
+ fma.s1 f_Rsq = f_R, f_R, f0
}
+;;
-// Calculate p_even
-// f34 = B_2 + Rsq *B_3
-// f35 = B_1 + Rsq*f34 = B_1 + Rsq * (B_2 + Rsq *B_3)
-// f36 = peven = Rsq * f35 = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_peven_temp1 = cosh_FR_Rsq, cosh_FR_B3, cosh_FR_B2
- nop.i 999 ;;
-}
+// Calculate p_even
+// B_2 + Rsq *B_3
+// B_1 + Rsq * (B_2 + Rsq *B_3)
+// p_even = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_peven_temp2 = cosh_FR_Rsq, cosh_FR_peven_temp1, cosh_FR_B1
- nop.i 999
+ nop.m 0
+ fma.s1 f_peven_temp1 = f_Rsq, f_B3, f_B2
+ nop.i 0
}
-
// Calculate p_odd
-// f34 = A_2 + Rsq *A_3
-// f35 = A_1 + Rsq * (A_2 + Rsq *A_3)
-// f37 = podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
-
+// A_2 + Rsq *A_3
+// A_1 + Rsq * (A_2 + Rsq *A_3)
+// podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_podd_temp1 = cosh_FR_Rsq, cosh_FR_A3, cosh_FR_A2
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 f_podd_temp1 = f_Rsq, f_A3, f_A2
+ nop.i 0
}
+;;
{ .mfi
-(p0) setf.exp cosh_FR_N_temp1 = r39
- nop.f 999
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 f_Rcub = f_Rsq, f_R, f0
+ nop.i 0
}
+;;
-{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_peven = cosh_FR_Rsq, cosh_FR_peven_temp2, f0
- nop.i 999
-}
+//
+// If TBL,
+// Calculate S_hi and S_lo, and C_hi
+// SC_hi_temp = sneg * Tmjhi
+// S_hi = spos * Tjhi - SC_hi_temp
+// S_hi = spos * Tjhi - (sneg * Tmjhi)
+// C_hi = spos * Tjhi + SC_hi_temp
+// C_hi = spos * Tjhi + (sneg * Tmjhi)
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_podd_temp2 = cosh_FR_Rsq, cosh_FR_podd_temp1, cosh_FR_A1
- nop.i 999 ;;
+ nop.m 0
+(p6) fma.s1 f_SC_hi_temp = f_sneg, f_Tmjhi, f0
+ nop.i 0
}
+;;
+// If TBL,
+// C_lo_temp3 = sneg * Tmjlo
+// C_lo_temp4 = spos * Tjlo + C_lo_temp3
+// C_lo_temp4 = spos * Tjlo + (sneg * Tmjlo)
{ .mfi
-(p0) setf.exp f9 = r32
- nop.f 999
- nop.i 999 ;;
+ nop.m 0
+(p6) fma.s1 f_C_lo_temp3 = f_sneg, f_Tmjlo, f0
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_podd = cosh_FR_podd_temp2, cosh_FR_Rcub, cosh_FR_R
- nop.i 999
-}
-
-// sinh_GR_mj contains the table offset for -j
-// sinh_GR_j contains the table offset for +j
-// p6 is true when j <= 0
-
-{ .mlx
-(p0) setf.exp cosh_FR_N_temp2 = r40
-(p0) movl r40 = 0x0000000000000020 ;;
+ nop.m 0
+ fma.s1 f_peven_temp2 = f_Rsq, f_peven_temp1, f_B1
+ nop.i 0
}
-
{ .mfi
-(p0) sub GR_mJ = r40, r36
-(p0) fmerge.se cosh_FR_spos = cosh_FR_N_temp1, f1
-(p0) adds GR_J = 0x20, r36 ;;
+ nop.m 0
+ fma.s1 f_podd_temp2 = f_Rsq, f_podd_temp1, f_A1
+ nop.i 0
}
+;;
-{ .mii
- nop.m 999
-(p0) shl GR_mJ = GR_mJ, 5 ;;
-(p0) add AD_mJ = r37, GR_mJ ;;
+// If EXP,
+// Compute 2^(N-1) * Tjhi and 2^(N-1) * Tjlo
+{ .mfi
+ nop.m 0
+(p7) fma.s1 f_Tjhi_spos = f_Tjhi, f_spos, f0
+ nop.i 0
}
-
-{ .mmi
- nop.m 999
-(p0) ldfe cosh_FR_Tmjhi = [AD_mJ],16
-(p0) shl GR_J = GR_J, 5 ;;
+{ .mfi
+ nop.m 0
+(p7) fma.s1 f_Tjlo_spos = f_Tjlo, f_spos, f0
+ nop.i 0
}
+;;
{ .mfi
-(p0) ldfs cosh_FR_Tmjlo = [AD_mJ],16
-(p0) fcmp.lt.unc.s1 p6,p7 = cosh_FR_X,f9
-(p0) add AD_J = r37, GR_J ;;
+ nop.m 0
+(p6) fma.s1 f_C_hi = f_spos, f_Tjhi, f_SC_hi_temp
+ nop.i 0
}
+;;
-{ .mmi
-(p0) ldfe cosh_FR_Tjhi = [AD_J],16 ;;
-(p0) ldfs cosh_FR_Tjlo = [AD_J],16
- nop.i 999 ;;
+{ .mfi
+ nop.m 0
+(p6) fms.s1 f_S_hi = f_spos, f_Tjhi, f_SC_hi_temp
+ nop.i 0
}
-
-{ .mfb
- nop.m 999
-(p0) fmerge.se cosh_FR_sneg = cosh_FR_N_temp2, f1
-(p7) br.cond.spnt L(COSH_BY_EXP) ;;
+{ .mfi
+ nop.m 0
+(p6) fma.s1 f_C_lo_temp4 = f_spos, f_Tjlo, f_C_lo_temp3
+ nop.i 0
}
-
-// ******************************************************
-// If NOT branch to EXP
-// ******************************************************
-// Calculate C_hi
-// ******************************************************
-// cosh_FR_C_hi_temp = cosh_FR_sneg * cosh_FR_Tmjhi
-// cosh_FR_C_hi = cosh_FR_spos * cosh_FR_Tjhi + (cosh_FR_sneg * cosh_FR_Tmjhi)
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_C_hi_temp = cosh_FR_sneg, cosh_FR_Tmjhi, f0
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 f_peven = f_Rsq, f_peven_temp2, f0
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_C_hi = cosh_FR_spos, cosh_FR_Tjhi, cosh_FR_C_hi_temp
- nop.i 999
+ nop.m 0
+ fma.s1 f_podd = f_podd_temp2, f_Rcub, f_R
+ nop.i 0
}
+;;
-// ******************************************************
-// Calculate S_hi
-// ******************************************************
-// cosh_FR_S_hi_temp1 = cosh_FR_sneg * cosh_FR_Tmjhi
-// cosh_FR_S_hi = cosh_FR_spos * cosh_FR_Tjhi - cosh_FR_C_hi_temp1
+// If TBL,
+// C_lo_temp1 = spos * Tjhi - C_hi
+// C_lo_temp2 = sneg * Tmjlo + C_lo_temp1
+// C_lo_temp2 = sneg * Tmjlo + (spos * Tjhi - C_hi)
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_S_hi_temp1 = cosh_FR_sneg, cosh_FR_Tmjhi, f0
- nop.i 999 ;;
+ nop.m 0
+(p6) fms.s1 f_C_lo_temp1 = f_spos, f_Tjhi, f_C_hi
+ nop.i 0
}
-
-// ******************************************************
-// Calculate C_lo
-// ******************************************************
-// cosh_FR_C_lo_temp1 = cosh_FR_spos * cosh_FR_Tjhi - cosh_FR_C_hi
-// cosh_FR_C_lo_temp2 = cosh_FR_sneg * cosh_FR_Tmjlo + (cosh_FR_spos * cosh_FR_Tjhi - cosh_FR_C_hi)
-// cosh_FR_C_lo_temp1 = cosh_FR_sneg * cosh_FR_Tmjlo
-// cosh_FR_C_lo_temp3 = cosh_FR_spos * cosh_FR_Tjlo + (cosh_FR_sneg * cosh_FR_Tmjlo)
-// cosh_FR_C_lo = cosh_FR_C_lo_temp3 + cosh_FR_C_lo_temp2
+;;
{ .mfi
- nop.m 999
-(p0) fms.s1 cosh_FR_C_lo_temp1 = cosh_FR_spos, cosh_FR_Tjhi, cosh_FR_C_hi
- nop.i 999
+ nop.m 0
+(p6) fma.s1 f_C_lo_temp2 = f_sneg, f_Tmjhi, f_C_lo_temp1
+ nop.i 0
}
+;;
+// If EXP,
+// Y_hi = 2^(N-1) * Tjhi
+// Y_lo = 2^(N-1) * Tjhi * (p_odd + p_even) + 2^(N-1) * Tjlo
{ .mfi
- nop.m 999
-(p0) fms.s1 cosh_FR_S_hi = cosh_FR_spos, cosh_FR_Tjhi, cosh_FR_S_hi_temp1
- nop.i 999 ;;
+ nop.m 0
+(p7) fma.s1 f_Y_lo_temp = f_peven, f1, f_podd
+ nop.i 0
}
+;;
+// If TBL,
+// C_lo = C_lo_temp4 + C_lo_temp2
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_C_lo_temp2 = cosh_FR_sneg, cosh_FR_Tmjhi, cosh_FR_C_lo_temp1
- nop.i 999
+ nop.m 0
+(p6) fma.s1 f_C_lo = f_C_lo_temp4, f1, f_C_lo_temp2
+ nop.i 0
}
+;;
+// If TBL,
+// Y_hi = C_hi
+// Y_lo = S_hi*p_odd + (C_hi*p_even + C_lo)
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_C_lo_temp1 = cosh_FR_sneg, cosh_FR_Tmjlo, f0
- nop.i 999 ;;
+ nop.m 0
+(p6) fma.s1 f_Y_lo_temp = f_C_hi, f_peven, f_C_lo
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_C_lo_temp3 = cosh_FR_spos, cosh_FR_Tjlo, cosh_FR_C_lo_temp1
- nop.i 999 ;;
+ nop.m 0
+(p7) fma.s1 f_Y_lo = f_Tjhi_spos, f_Y_lo_temp, f_Tjlo_spos
+ nop.i 0
}
+;;
+// Dummy multiply to generate inexact
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_C_lo = cosh_FR_C_lo_temp3, f1, cosh_FR_C_lo_temp2
- nop.i 999 ;;
+ nop.m 0
+ fmpy.s0 f_tmp = f_B2, f_B2
+ nop.i 0
}
-
-// ******************************************************
-// cosh_FR_Y_lo_temp = cosh_FR_C_hi * cosh_FR_peven + cosh_FR_C_lo
-// cosh_FR_Y_lo = cosh_FR_S_hi * cosh_FR_podd + cosh_FR_Y_lo_temp
-// cosh_FR_COSH = Y_hi + Y_lo
-
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_Y_lo_temp = cosh_FR_C_hi, cosh_FR_peven, cosh_FR_C_lo
- nop.i 999 ;;
+ nop.m 0
+(p6) fma.s1 f_Y_lo = f_S_hi, f_podd, f_Y_lo_temp
+ nop.i 0
}
+;;
+// f8 = answer = Y_hi + Y_lo
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_Y_lo = cosh_FR_S_hi, cosh_FR_podd, cosh_FR_Y_lo_temp
- nop.i 999 ;;
+ nop.m 0
+(p7) fma.s0 f8 = f_Y_lo, f1, f_Tjhi_spos
+ nop.i 0
}
+;;
+// f8 = answer = Y_hi + Y_lo
{ .mfb
- nop.m 999
-(p0) fma.s0 f8 = cosh_FR_C_hi, f1, cosh_FR_Y_lo
-(p0) br.ret.sptk b0 ;;
+ nop.m 0
+(p6) fma.s0 f8 = f_Y_lo, f1, f_C_hi
+ br.ret.sptk b0 // Exit for COSH_BY_TBL and COSH_BY_EXP
}
+;;
-L(COSH_BY_EXP):
-
-// When p7 is true, we know that an overflow is not going to happen
-// When p7 is false, we must check for possible overflow
-// p7 is the over_SAFE flag
-// f44 = Scale * (Y_hi + Y_lo)
-// = cosh_FR_spos * (cosh_FR_Tjhi + cosh_FR_Y_lo)
-{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_Y_lo_temp = cosh_FR_peven, f1, cosh_FR_podd
- nop.i 999
+// Here if 0 < |x| < 0.25
+COSH_BY_POLY:
+{ .mmf
+ ldfe f_P6 = [r_ad2e],16
+ ldfe f_P5 = [r_ad2o],16
+ nop.f 0
}
+;;
-// Now we are in EXP. This is the only path where an overflow is possible
-// but not for certain. So this is the only path where over_SAFE has any use.
-// r34 still has N-1
-// There is a danger of double-extended overflow if N-1 > 0x3ffe = 16382
-// There is a danger of double overflow if N-1 > 0x3fe = 1022
-// There is a danger of single overflow if N-1 > 0x7e = 126
+{ .mmi
+ ldfe f_P4 = [r_ad2e],16
+ ldfe f_P3 = [r_ad2o],16
+ nop.i 0
+}
+;;
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x0000000000003ffe ;;
+{ .mmi
+ ldfe f_P2 = [r_ad2e],16
+ ldfe f_P1 = [r_ad2o],16
+ nop.i 0
}
+;;
{ .mfi
-(p0) cmp.gt.unc p0,p7 = r34, r32
- nop.f 999
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 f_X3 = f_NORM_X, f_X2, f0
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_Y_lo = cosh_FR_Tjhi, cosh_FR_Y_lo_temp, cosh_FR_Tjlo
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 f_X4 = f_X2, f_X2, f0
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_COSH_temp = cosh_FR_Y_lo, f1, cosh_FR_Tjhi
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 f_poly65 = f_X2, f_P6, f_P5
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p0) fma.s0 f44 = cosh_FR_spos, cosh_FR_COSH_temp, f0
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 f_poly43 = f_X2, f_P4, f_P3
+ nop.i 0
}
+;;
-// Dummy multiply to generate inexact
{ .mfi
- nop.m 999
-(p7) fmpy.s0 cosh_FR_tmp = cosh_FR_all_ones, cosh_FR_all_ones
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 f_poly21 = f_X2, f_P2, f_P1
+ nop.i 0
}
+;;
-// If over_SAFE is set, return
-{ .mfb
- nop.m 999
-(p7) fmerge.s f8 = f44,f44
-(p7) br.ret.sptk b0 ;;
+{ .mfi
+ nop.m 0
+ fma.s1 f_poly6543 = f_X4, f_poly65, f_poly43
+ nop.i 0
}
-
-// Else see if we overflowed
-// S0 user supplied status
-// S2 user supplied status + WRE + TD (Overflows)
-// If WRE is set then an overflow will not occur in EXP.
-// The input value that would cause a register (WRE) value to overflow is about 2^15
-// and this input would go into the HUGE path.
-// Answer with WRE is in f43.
+;;
{ .mfi
- nop.m 999
-(p0) fsetc.s2 0x7F,0x42
- nop.i 999;;
+ nop.m 0
+ fma.s1 f_poly6to1 = f_X4, f_poly6543, f_poly21
+ nop.i 0
}
+;;
+// Dummy multiply to generate inexact
{ .mfi
- nop.m 999
-(p0) fma.s2 f43 = cosh_FR_spos, cosh_FR_COSH_temp, f0
- nop.i 999 ;;
+ nop.m 0
+ fmpy.s0 f_tmp = f_P6, f_P6
+ nop.i 0
}
-
-// 103FF => 103FF -FFFF = 400(true)
-// 400 + 3FF = 7FF, which is 1 more than the exponent of the largest
-// double (7FE). So 0 103FF 8000000000000000 is one ulp more than
-// largest double in register bias
-
-// 13FFF => 13FFF -FFFF = 4000(true)
-
-// Now set p8 if the answer with WRE is greater than or equal this value
-// Also set p9 if the answer with WRE is less than or equal to negative this value
-
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x0000000000013fff ;;
+{ .mfb
+ nop.m 0
+ fma.s0 f8 = f_poly6to1, f_X2, f1
+ br.ret.sptk b0 // Exit COSH_BY_POLY
}
+;;
-{ .mmf
- nop.m 999
-(p0) setf.exp f41 = r32
-(p0) fsetc.s2 0x7F,0x40 ;;
-}
-{ .mfi
- nop.m 999
-(p0) fcmp.ge.unc.s1 p8, p0 = f43, f41
- nop.i 999
+// Here if x denorm or unorm
+COSH_DENORM:
+// Determine if x really a denorm and not a unorm
+{ .mmf
+ getf.exp r_signexp_x = f_NORM_X
+ mov r_exp_denorm = 0x0c001 // Real denorms have exp < this
+ fmerge.s f_ABS_X = f0, f_NORM_X
}
+;;
{ .mfi
- nop.m 999
-(p0) fmerge.ns f42 = f41, f41
- nop.i 999 ;;
+ nop.m 0
+ fcmp.eq.s0 p10,p0 = f8, f0 // Set denorm flag
+ nop.i 0
}
+;;
-// The error tag for overflow is 63
-{ .mii
- nop.m 999
- nop.i 999 ;;
-(p8) mov GR_Parameter_TAG = 63 ;;
+// Set p8 if really a denorm
+{ .mmi
+ and r_exp_x = r_exp_mask, r_signexp_x
+;;
+ cmp.lt p8,p9 = r_exp_x, r_exp_denorm
+ nop.i 0
}
+;;
+// Identify denormal operands.
{ .mfb
- nop.m 999
-(p0) fcmp.le.unc.s1 p9, p0 = f43, f42
-(p8) br.cond.spnt __libm_error_region ;;
-}
-
-{ .mii
- nop.m 999
- nop.i 999 ;;
-(p9) mov GR_Parameter_TAG = 63
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p9) br.cond.spnt __libm_error_region ;;
-}
-
-// Dummy multiply to generate inexact
-{ .mfi
- nop.m 999
-(p0) fmpy.s0 cosh_FR_tmp = cosh_FR_all_ones, cosh_FR_all_ones
- nop.i 999 ;;
+ nop.m 0
+(p8) fma.s0 f8 = f8,f8,f1 // If x denorm, result=1+x^2
+(p9) br.cond.sptk COSH_COMMON // Return to main path if x unorm
}
+;;
{ .mfb
- nop.m 999
-(p0) fmerge.s f8 = f44,f44
-(p0) br.ret.sptk b0 ;;
+ nop.m 0
+ nop.f 0
+ br.ret.sptk b0 // Exit if x denorm
}
+;;
-// for COSH_HUGE, put 24000 in exponent; take sign from input; add 1
-// SAFE: SAFE is always 0 for HUGE
-
-L(COSH_HUGE):
-
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x0000000000015dbf ;;
+// Here if |x| >= overflow limit
+COSH_HUGE:
+// for COSH_HUGE, put 24000 in exponent; take sign from input
+{ .mmi
+ mov r_exp_huge = 0x15dbf
+;;
+ setf.exp f_huge = r_exp_huge
+ nop.i 0
}
+;;
{ .mfi
-(p0) setf.exp f9 = r32
- nop.f 999
- nop.i 999 ;;
+ alloc r32 = ar.pfs,0,5,4,0
+ fma.s1 f_signed_hi_lo = f_huge, f1, f1
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 cosh_FR_hi_lo = f1, f9, f1
- nop.i 999 ;;
+ nop.m 0
+ fma.s0 f_pre_result = f_signed_hi_lo, f_huge, f0
+ mov GR_Parameter_TAG = 63
}
+;;
-{ .mfi
- nop.m 999
-(p0) fma.s0 f44 = f9, cosh_FR_hi_lo, f0
-(p0) mov GR_Parameter_TAG = 63
-}
-.endp coshl
-ASM_SIZE_DIRECTIVE(coshl)
+GLOBAL_IEEE754_END(coshl)
-.proc __libm_error_region
-__libm_error_region:
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
+
{ .mfi
- add GR_Parameter_Y=-32,sp // Parameter 2 value
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
- add sp=-64,sp // Create new stack
+ add sp=-64,sp // Create new stack
nop.f 0
- mov GR_SAVE_GP=gp // Save gp
+ mov GR_SAVE_GP=gp // Save gp
};;
+
{ .mmi
- stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
- add GR_Parameter_X = 16,sp // Parameter 1 address
+ stfe [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0 // Save b0
+ mov GR_SAVE_B0=b0 // Save b0
};;
+
.body
{ .mib
- stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
- add GR_Parameter_RESULT = 0,GR_Parameter_Y
- nop.b 0 // Parameter 3 address
+ stfe [GR_Parameter_X] = f8 // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
}
{ .mib
- stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ stfe [GR_Parameter_Y] = f_pre_result // STORE Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support# // Call error handling function
+ br.call.sptk b0=__libm_error_support# // Call error handling function
};;
+
{ .mmi
- nop.m 0
- nop.m 0
add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
};;
+
{ .mmi
- ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
+ ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
- add sp = 64,sp // Restore stack pointer
- mov b0 = GR_SAVE_B0 // Restore return address
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
};;
+
{ .mib
- mov gp = GR_SAVE_GP // Restore gp
- mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
- br.ret.sptk b0 // Return
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region)
+
.type __libm_error_support#,@function
.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_exp.S b/sysdeps/ia64/fpu/e_exp.S
index db02336ecf..5ae8afeb99 100644
--- a/sysdeps/ia64/fpu/e_exp.S
+++ b/sysdeps/ia64/fpu/e_exp.S
@@ -1,10 +1,10 @@
.file "exp.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2002, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,26 +20,26 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00 Initial version
+// 2/02/00 Initial version
// 3/07/00 exp(inf) = inf but now does NOT call error support
// exp(-inf) = 0 but now does NOT call error support
// 4/04/00 Unwind support added
@@ -48,6 +48,10 @@
// 11/30/00 Reworked to shorten main path, widen main path to include all
// args in normal range, and add quick exit for 0, nan, inf.
// 12/05/00 Loaded constants earlier with setf to save 2 cycles.
+// 02/05/02 Corrected uninitialize predicate in POSSIBLE_UNDERFLOW path
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 09/07/02 Force inexact flag
+// 11/15/02 Split underflow path into zero/nonzero; eliminated fma in main path
// API
//==============================================================
@@ -67,187 +71,167 @@
// Construct 2^M
// Get 2^(index_1/128) from table_1;
// Get 2^(index_2/8) from table_2;
-// Calculate exp(r) by series
+// Calculate exp(r) by 5th order polynomial
// r = x - n (log2/128)_high
// delta = - n (log2/128)_low
// Calculate exp(delta) as 1 + delta
-// Special values
+// Special values
//==============================================================
// exp(+0) = 1.0
// exp(-0) = 1.0
-// exp(+qnan) = +qnan
-// exp(-qnan) = -qnan
-// exp(+snan) = +qnan
-// exp(-snan) = -qnan
+// exp(+qnan) = +qnan
+// exp(-qnan) = -qnan
+// exp(+snan) = +qnan
+// exp(-snan) = -qnan
-// exp(-inf) = +0
+// exp(-inf) = +0
// exp(+inf) = +inf
-// Overfow and Underfow
+// Overflow and Underflow
//=======================
-// exp(-x) = smallest double normal when
-// x = -708.396 = c086232bdd7abcd2
-
// exp(x) = largest double normal when
-// x = 709.7827 = 40862e42fefa39ef
+// x = 709.7827 = 0x40862e42fefa39ef
+
+// exp(x) = smallest double normal when
+// x = -708.396 = 0xc086232bdd7abcd2
+// exp(x) = largest round-to-nearest single zero when
+// x = -745.1332 = 0xc0874910d52d3052
// Registers used
//==============================================================
-// Floating Point registers used:
-// f8, input
-// f9 -> f15, f32 -> f60
+// Floating Point registers used:
+// f8, input, output
+// f6 -> f15, f32 -> f49
-// General registers used:
-// r32 -> r60
+// General registers used:
+// r14 -> r40
// Predicate registers used:
// p6 -> p15
-#include "libm_support.h"
-
// Assembly macros
//==============================================================
-exp_GR_rshf = r33
-EXP_AD_TB1 = r34
-EXP_AD_TB2 = r35
-EXP_AD_P = r36
-
-exp_GR_N = r37
-exp_GR_index_1 = r38
-exp_GR_index_2_16 = r39
-
-exp_GR_biased_M = r40
-exp_GR_index_1_16 = r41
-EXP_AD_T1 = r42
-EXP_AD_T2 = r43
-exp_GR_sig_inv_ln2 = r44
-
-exp_GR_17ones = r45
-exp_GR_one = r46
-exp_TB1_size = r47
-exp_TB2_size = r48
-exp_GR_rshf_2to56 = r49
-
-exp_GR_gt_ln = r50
-exp_GR_exp_2tom56 = r51
-
-exp_GR_17ones_m1 = r52
-
-GR_SAVE_B0 = r53
-GR_SAVE_PFS = r54
-GR_SAVE_GP = r55
-GR_SAVE_SP = r56
-
-GR_Parameter_X = r57
-GR_Parameter_Y = r58
-GR_Parameter_RESULT = r59
-GR_Parameter_TAG = r60
-
-
-FR_X = f10
-FR_Y = f1
-FR_RESULT = f8
-
-EXP_RSHF_2TO56 = f6
-EXP_INV_LN2_2TO63 = f7
-EXP_W_2TO56_RSH = f9
-EXP_2TOM56 = f11
-exp_P4 = f12
-exp_P3 = f13
-exp_P2 = f14
-exp_P1 = f15
-
-exp_ln2_by_128_hi = f33
-exp_ln2_by_128_lo = f34
-
-EXP_RSHF = f35
-EXP_Nfloat = f36
-exp_W = f37
-exp_r = f38
-exp_f = f39
-
-exp_rsq = f40
-exp_rcube = f41
-
-EXP_2M = f42
-exp_S1 = f43
-exp_T1 = f44
-
-EXP_MIN_DBL_OFLOW_ARG = f45
-EXP_MAX_DBL_ZERO_ARG = f46
-EXP_MAX_DBL_NORM_ARG = f47
-EXP_MAX_DBL_UFLOW_ARG = f48
-EXP_MIN_DBL_NORM_ARG = f49
-exp_rP4pP3 = f50
-exp_P_lo = f51
-exp_P_hi = f52
-exp_P = f53
-exp_S = f54
-
-EXP_NORM_f8 = f56
-
-exp_wre_urm_f8 = f57
-exp_ftz_urm_f8 = f57
-
-exp_gt_pln = f58
-
-exp_S2 = f59
-exp_T2 = f60
+rRshf = r14
+rAD_TB1 = r15
+rAD_T1 = r15
+rAD_TB2 = r16
+rAD_T2 = r16
+rAD_P = r17
+rN = r18
+rIndex_1 = r19
+rIndex_2_16 = r20
+rM = r21
+rBiased_M = r21
+rIndex_1_16 = r21
+rSig_inv_ln2 = r22
+rExp_bias = r23
+rExp_mask = r24
+rTmp = r25
+rRshf_2to56 = r26
+rGt_ln = r27
+rExp_2tom56 = r28
+
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+GR_SAVE_SP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+
+FR_X = f10
+FR_Y = f1
+FR_RESULT = f8
+
+fRSHF_2TO56 = f6
+fINV_LN2_2TO63 = f7
+fW_2TO56_RSH = f9
+f2TOM56 = f11
+fP5 = f12
+fP54 = f12
+fP5432 = f12
+fP4 = f13
+fP3 = f14
+fP32 = f14
+fP2 = f15
+fP = f15
+
+fLn2_by_128_hi = f33
+fLn2_by_128_lo = f34
+
+fRSHF = f35
+fNfloat = f36
+fNormX = f37
+fR = f38
+fF = f39
+
+fRsq = f40
+f2M = f41
+fS1 = f42
+fT1 = f42
+fS2 = f43
+fT2 = f43
+fS = f43
+fWre_urm_f8 = f44
+fFtz_urm_f8 = f44
+
+fMIN_DBL_OFLOW_ARG = f45
+fMAX_DBL_ZERO_ARG = f46
+fMAX_DBL_NORM_ARG = f47
+fMIN_DBL_NORM_ARG = f48
+fGt_pln = f49
+fTmp = f49
// Data tables
//==============================================================
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
-
+RODATA
.align 16
// ************* DO NOT CHANGE ORDER OF THESE TABLES ********************
// double-extended 1/ln(2)
// 3fff b8aa 3b29 5c17 f0bb be87fed0691d3e88
-// 3fff b8aa 3b29 5c17 f0bc
+// 3fff b8aa 3b29 5c17 f0bc
// For speed the significand will be loaded directly with a movl and setf.sig
// and the exponent will be bias+63 instead of bias+0. Thus subsequent
// computations need to scale appropriately.
-// The constant 128/ln(2) is needed for the computation of w. This is also
+// The constant 128/ln(2) is needed for the computation of w. This is also
// obtained by scaling the computations.
//
-// Two shifting constants are loaded directly with movl and setf.d.
-// 1. EXP_RSHF_2TO56 = 1.1000..00 * 2^(63-7)
+// Two shifting constants are loaded directly with movl and setf.d.
+// 1. fRSHF_2TO56 = 1.1000..00 * 2^(63-7)
// This constant is added to x*1/ln2 to shift the integer part of
// x*128/ln2 into the rightmost bits of the significand.
-// The result of this fma is EXP_W_2TO56_RSH.
-// 2. EXP_RSHF = 1.1000..00 * 2^(63)
-// This constant is subtracted from EXP_W_2TO56_RSH * 2^(-56) to give
+// The result of this fma is fW_2TO56_RSH.
+// 2. fRSHF = 1.1000..00 * 2^(63)
+// This constant is subtracted from fW_2TO56_RSH * 2^(-56) to give
// the integer part of w, n, as a floating-point number.
-// The result of this fms is EXP_Nfloat.
+// The result of this fms is fNfloat.
-exp_table_1:
-ASM_TYPE_DIRECTIVE(exp_table_1,@object)
-data8 0x40862e42fefa39f0 // smallest dbl overflow arg
-data8 0xc0874c0000000000 // approx largest arg for zero result
-data8 0x40862e42fefa39ef // largest dbl arg to give normal dbl result
-data8 0xc086232bdd7abcd3 // largest dbl underflow arg
-data8 0xc086232bdd7abcd2 // smallest dbl arg to give normal dbl result
-data8 0x0 // pad
+LOCAL_OBJECT_START(exp_table_1)
+data8 0x40862e42fefa39f0 // smallest dbl overflow arg, +709.7827
+data8 0xc0874910d52d3052 // largest arg for rnd-to-nearest 0 result, -745.133
+data8 0x40862e42fefa39ef // largest dbl arg to give normal dbl result, +709.7827
+data8 0xc086232bdd7abcd2 // smallest dbl arg to give normal dbl result, -708.396
data8 0xb17217f7d1cf79ab , 0x00003ff7 // ln2/128 hi
data8 0xc9e3b39803f2f6af , 0x00003fb7 // ln2/128 lo
-
+//
// Table 1 is 2^(index_1/128) where
// index_1 goes from 0 to 15
-
+//
data8 0x8000000000000000 , 0x00003FFF
data8 0x80B1ED4FD999AB6C , 0x00003FFF
data8 0x8164D1F3BC030773 , 0x00003FFF
@@ -264,12 +248,11 @@ data8 0x88980E8092DA8527 , 0x00003FFF
data8 0x8955EE03618E5FDD , 0x00003FFF
data8 0x8A14D575496EFD9A , 0x00003FFF
data8 0x8AD4C6452C728924 , 0x00003FFF
-ASM_SIZE_DIRECTIVE(exp_table_1)
+LOCAL_OBJECT_END(exp_table_1)
// Table 2 is 2^(index_1/8) where
// index_2 goes from 0 to 7
-exp_table_2:
-ASM_TYPE_DIRECTIVE(exp_table_2,@object)
+LOCAL_OBJECT_START(exp_table_2)
data8 0x8000000000000000 , 0x00003FFF
data8 0x8B95C1E3EA8BD6E7 , 0x00003FFF
data8 0x9837F0518DB8A96F , 0x00003FFF
@@ -278,413 +261,356 @@ data8 0xB504F333F9DE6484 , 0x00003FFF
data8 0xC5672A115506DADD , 0x00003FFF
data8 0xD744FCCAD69D6AF4 , 0x00003FFF
data8 0xEAC0C6E7DD24392F , 0x00003FFF
-ASM_SIZE_DIRECTIVE (exp_table_2)
-
+LOCAL_OBJECT_END(exp_table_2)
-exp_p_table:
-ASM_TYPE_DIRECTIVE(exp_p_table,@object)
-data8 0x3f8111116da21757 //P_4
-data8 0x3fa55555d787761c //P_3
-data8 0x3fc5555555555414 //P_2
-data8 0x3fdffffffffffd6a //P_1
-ASM_SIZE_DIRECTIVE(exp_p_table)
+LOCAL_OBJECT_START(exp_p_table)
+data8 0x3f8111116da21757 //P5
+data8 0x3fa55555d787761c //P4
+data8 0x3fc5555555555414 //P3
+data8 0x3fdffffffffffd6a //P2
+LOCAL_OBJECT_END(exp_p_table)
-.align 32
-.global exp#
.section .text
-.proc exp#
-.align 32
-exp:
-#ifdef _LIBC
-.global __ieee754_exp#
-__ieee754_exp:
-#endif
+GLOBAL_IEEE754_ENTRY(exp)
{ .mlx
- alloc r32=ar.pfs,1,24,4,0
- movl exp_GR_sig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2
+ nop.m 0
+ movl rSig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2
}
{ .mlx
- addl EXP_AD_TB1 = @ltoff(exp_table_1), gp
- movl exp_GR_rshf_2to56 = 0x4768000000000000 ;; // 1.10000 2^(63+56)
+ addl rAD_TB1 = @ltoff(exp_table_1), gp
+ movl rRshf_2to56 = 0x4768000000000000 // 1.10000 2^(63+56)
}
;;
-// We do this fnorm right at the beginning to take any enabled
-// faults and to normalize any input unnormals so that SWA is not taken.
{ .mfi
- ld8 EXP_AD_TB1 = [EXP_AD_TB1]
- fclass.m p8,p0 = f8,0x07 // Test for x=0
- mov exp_GR_17ones = 0x1FFFF
+ ld8 rAD_TB1 = [rAD_TB1]
+ fclass.m p8,p0 = f8,0x07 // Test for x=0
+ mov rExp_mask = 0x1ffff
}
{ .mfi
- mov exp_TB1_size = 0x100
- fnorm EXP_NORM_f8 = f8
- mov exp_GR_exp_2tom56 = 0xffff-56
+ mov rExp_bias = 0xffff
+ fnorm.s1 fNormX = f8
+ mov rExp_2tom56 = 0xffff-56
}
;;
// Form two constants we need
-// 1/ln2 * 2^63 to compute w = x * 1/ln2 * 128
+// 1/ln2 * 2^63 to compute w = x * 1/ln2 * 128
// 1.1000..000 * 2^(63+63-7) to right shift int(w) into the significand
-{ .mmf
- setf.sig EXP_INV_LN2_2TO63 = exp_GR_sig_inv_ln2 // form 1/ln2 * 2^63
- setf.d EXP_RSHF_2TO56 = exp_GR_rshf_2to56 // Form const 1.100 * 2^(63+56)
- fclass.m p9,p0 = f8,0x22 // Test for x=-inf
+{ .mfi
+ setf.sig fINV_LN2_2TO63 = rSig_inv_ln2 // form 1/ln2 * 2^63
+ fclass.m p9,p0 = f8,0x22 // Test for x=-inf
+ nop.i 0
+}
+{ .mlx
+ setf.d fRSHF_2TO56 = rRshf_2to56 // Form const 1.100 * 2^(63+56)
+ movl rRshf = 0x43e8000000000000 // 1.10000 2^63 for right shift
}
;;
-{ .mlx
- setf.exp EXP_2TOM56 = exp_GR_exp_2tom56 // form 2^-56 for scaling Nfloat
- movl exp_GR_rshf = 0x43e8000000000000 // 1.10000 2^63 for right shift
+{ .mfi
+ ldfpd fMIN_DBL_OFLOW_ARG, fMAX_DBL_ZERO_ARG = [rAD_TB1],16
+ fclass.m p10,p0 = f8,0x1e1 // Test for x=+inf, nan, NaT
+ nop.i 0
}
{ .mfb
- mov exp_TB2_size = 0x80
-(p8) fma.d f8 = f1,f1,f0 // quick exit for x=0
-(p8) br.ret.spnt b0
-;;
+ setf.exp f2TOM56 = rExp_2tom56 // form 2^-56 for scaling Nfloat
+(p9) fma.d.s0 f8 = f0,f0,f0 // quick exit for x=-inf
+(p9) br.ret.spnt b0
}
+;;
{ .mfi
- ldfpd EXP_MIN_DBL_OFLOW_ARG, EXP_MAX_DBL_ZERO_ARG = [EXP_AD_TB1],16
- fclass.m p10,p0 = f8,0x21 // Test for x=+inf
- nop.i 999
+ ldfpd fMAX_DBL_NORM_ARG, fMIN_DBL_NORM_ARG = [rAD_TB1],16
+ nop.f 0
+ nop.i 0
}
{ .mfb
- nop.m 999
-(p9) fma.d f8 = f0,f0,f0 // quick exit for x=-inf
-(p9) br.ret.spnt b0
-;;
+ setf.d fRSHF = rRshf // Form right shift const 1.100 * 2^63
+(p8) fma.d.s0 f8 = f1,f1,f0 // quick exit for x=0
+(p8) br.ret.spnt b0
}
-
-{ .mmf
- ldfpd EXP_MAX_DBL_NORM_ARG, EXP_MAX_DBL_UFLOW_ARG = [EXP_AD_TB1],16
- setf.d EXP_RSHF = exp_GR_rshf // Form right shift const 1.100 * 2^63
- fclass.m p11,p0 = f8,0xc3 // Test for x=nan
;;
-}
{ .mfb
- ldfd EXP_MIN_DBL_NORM_ARG = [EXP_AD_TB1],16
- nop.f 999
-(p10) br.ret.spnt b0 // quick exit for x=+inf
-;;
+ ldfe fLn2_by_128_hi = [rAD_TB1],16
+(p10) fma.d.s0 f8 = f8,f8,f0 // Result if x=+inf, nan, NaT
+(p10) br.ret.spnt b0 // quick exit for x=+inf, nan, NaT
}
+;;
{ .mfi
- ldfe exp_ln2_by_128_hi = [EXP_AD_TB1],16
- nop.f 999
- nop.i 999
-;;
+ ldfe fLn2_by_128_lo = [rAD_TB1],16
+ fcmp.eq.s0 p6,p0 = f8, f0 // Dummy to set D
+ nop.i 0
}
-
-
-{ .mfb
- ldfe exp_ln2_by_128_lo = [EXP_AD_TB1],16
-(p11) fmerge.s f8 = EXP_NORM_f8, EXP_NORM_f8
-(p11) br.ret.spnt b0 // quick exit for x=nan
;;
-}
-// After that last load, EXP_AD_TB1 points to the beginning of table 1
+// After that last load, rAD_TB1 points to the beginning of table 1
// W = X * Inv_log2_by_128
// By adding 1.10...0*2^63 we shift and get round_int(W) in significand.
// We actually add 1.10...0*2^56 to X * Inv_log2 to do the same thing.
{ .mfi
- nop.m 999
- fma.s1 EXP_W_2TO56_RSH = EXP_NORM_f8, EXP_INV_LN2_2TO63, EXP_RSHF_2TO56
- nop.i 999
-;;
+ nop.m 0
+ fma.s1 fW_2TO56_RSH = fNormX, fINV_LN2_2TO63, fRSHF_2TO56
+ nop.i 0
}
-
+;;
// Divide arguments into the following categories:
-// Certain Underflow/zero p11 - -inf < x <= MAX_DBL_ZERO_ARG
-// Certain Underflow p12 - MAX_DBL_ZERO_ARG < x <= MAX_DBL_UFLOW_ARG
-// Possible Underflow p13 - MAX_DBL_UFLOW_ARG < x < MIN_DBL_NORM_ARG
+// Certain Underflow p11 - -inf < x <= MAX_DBL_ZERO_ARG
+// Possible Underflow p13 - MAX_DBL_ZERO_ARG < x < MIN_DBL_NORM_ARG
// Certain Safe - MIN_DBL_NORM_ARG <= x <= MAX_DBL_NORM_ARG
// Possible Overflow p14 - MAX_DBL_NORM_ARG < x < MIN_DBL_OFLOW_ARG
// Certain Overflow p15 - MIN_DBL_OFLOW_ARG <= x < +inf
//
-// If the input is really a double arg, then there will never be "Possible
-// Underflow" or "Possible Overflow" arguments.
+// If the input is really a double arg, then there will never be
+// "Possible Overflow" arguments.
//
{ .mfi
- add EXP_AD_TB2 = exp_TB1_size, EXP_AD_TB1
- fcmp.ge.s1 p15,p14 = EXP_NORM_f8,EXP_MIN_DBL_OFLOW_ARG
- nop.i 999
-;;
+ add rAD_TB2 = 0x100, rAD_TB1
+ fcmp.ge.s1 p15,p0 = fNormX,fMIN_DBL_OFLOW_ARG
+ nop.i 0
}
+;;
{ .mfi
- add EXP_AD_P = exp_TB2_size, EXP_AD_TB2
- fcmp.le.s1 p11,p12 = EXP_NORM_f8,EXP_MAX_DBL_ZERO_ARG
- nop.i 999
-;;
+ add rAD_P = 0x80, rAD_TB2
+ fcmp.le.s1 p11,p0 = fNormX,fMAX_DBL_ZERO_ARG
+ nop.i 0
}
+;;
{ .mfb
- ldfpd exp_P4, exp_P3 = [EXP_AD_P] ,16
-(p14) fcmp.gt.unc.s1 p14,p0 = EXP_NORM_f8,EXP_MAX_DBL_NORM_ARG
-(p15) br.cond.spnt L(EXP_CERTAIN_OVERFLOW)
-;;
+ ldfpd fP5, fP4 = [rAD_P] ,16
+ fcmp.gt.s1 p14,p0 = fNormX,fMAX_DBL_NORM_ARG
+(p15) br.cond.spnt EXP_CERTAIN_OVERFLOW
}
+;;
-
-// Nfloat = round_int(W)
-// The signficand of EXP_W_2TO56_RSH contains the rounded integer part of W,
+// Nfloat = round_int(W)
+// The signficand of fW_2TO56_RSH contains the rounded integer part of W,
// as a twos complement number in the lower bits (that is, it may be negative).
-// That twos complement number (called N) is put into exp_GR_N.
+// That twos complement number (called N) is put into rN.
-// Since EXP_W_2TO56_RSH is scaled by 2^56, it must be multiplied by 2^-56
-// before the shift constant 1.10000 * 2^63 is subtracted to yield EXP_Nfloat.
-// Thus, EXP_Nfloat contains the floating point version of N
+// Since fW_2TO56_RSH is scaled by 2^56, it must be multiplied by 2^-56
+// before the shift constant 1.10000 * 2^63 is subtracted to yield fNfloat.
+// Thus, fNfloat contains the floating point version of N
-
-{ .mfi
- nop.m 999
-(p12) fcmp.le.unc p12,p0 = EXP_NORM_f8,EXP_MAX_DBL_UFLOW_ARG
- nop.i 999
-}
{ .mfb
- ldfpd exp_P2, exp_P1 = [EXP_AD_P]
- fms.s1 EXP_Nfloat = EXP_W_2TO56_RSH, EXP_2TOM56, EXP_RSHF
-(p11) br.cond.spnt L(EXP_CERTAIN_UNDERFLOW_ZERO)
-;;
+ ldfpd fP3, fP2 = [rAD_P]
+ fms.s1 fNfloat = fW_2TO56_RSH, f2TOM56, fRSHF
+(p11) br.cond.spnt EXP_CERTAIN_UNDERFLOW
}
+;;
{ .mfi
- getf.sig exp_GR_N = EXP_W_2TO56_RSH
-(p13) fcmp.lt.unc p13,p0 = EXP_NORM_f8,EXP_MIN_DBL_NORM_ARG
- nop.i 999
-;;
+ getf.sig rN = fW_2TO56_RSH
+ nop.f 0
+ nop.i 0
}
+;;
+// rIndex_1 has index_1
+// rIndex_2_16 has index_2 * 16
+// rBiased_M has M
+// rIndex_1_16 has index_1 * 16
-// exp_GR_index_1 has index_1
-// exp_GR_index_2_16 has index_2 * 16
-// exp_GR_biased_M has M
-// exp_GR_index_1_16 has index_1 * 16
-
-// r2 has true M
+// rM has true M
+// r = x - Nfloat * ln2_by_128_hi
+// f = 1 - Nfloat * ln2_by_128_lo
{ .mfi
- and exp_GR_index_1 = 0x0f, exp_GR_N
- fnma.s1 exp_r = EXP_Nfloat, exp_ln2_by_128_hi, EXP_NORM_f8
- shr r2 = exp_GR_N, 0x7
+ and rIndex_1 = 0x0f, rN
+ fnma.s1 fR = fNfloat, fLn2_by_128_hi, fNormX
+ shr rM = rN, 0x7
}
{ .mfi
- and exp_GR_index_2_16 = 0x70, exp_GR_N
- fnma.s1 exp_f = EXP_Nfloat, exp_ln2_by_128_lo, f1
- nop.i 999
-;;
+ and rIndex_2_16 = 0x70, rN
+ fnma.s1 fF = fNfloat, fLn2_by_128_lo, f1
+ nop.i 0
}
+;;
-
-// EXP_AD_T1 has address of T1
-// EXP_AD_T2 has address if T2
+// rAD_T1 has address of T1
+// rAD_T2 has address if T2
{ .mmi
- addl exp_GR_biased_M = 0xffff, r2
- add EXP_AD_T2 = EXP_AD_TB2, exp_GR_index_2_16
- shladd EXP_AD_T1 = exp_GR_index_1, 4, EXP_AD_TB1
-;;
+ add rBiased_M = rExp_bias, rM
+ add rAD_T2 = rAD_TB2, rIndex_2_16
+ shladd rAD_T1 = rIndex_1, 4, rAD_TB1
}
-
+;;
// Create Scale = 2^M
-// r = x - Nfloat * ln2_by_128_hi
-// f = 1 - Nfloat * ln2_by_128_lo
-
{ .mmi
- setf.exp EXP_2M = exp_GR_biased_M
- ldfe exp_T2 = [EXP_AD_T2]
- nop.i 999
-;;
+ setf.exp f2M = rBiased_M
+ ldfe fT2 = [rAD_T2]
+ nop.i 0
}
+;;
// Load T1 and T2
{ .mfi
- ldfe exp_T1 = [EXP_AD_T1]
- nop.f 999
- nop.i 999
-;;
+ ldfe fT1 = [rAD_T1]
+ fmpy.s0 fTmp = fLn2_by_128_lo, fLn2_by_128_lo // Force inexact
+ nop.i 0
}
-
+;;
{ .mfi
- nop.m 999
- fma.s1 exp_rsq = exp_r, exp_r, f0
- nop.i 999
+ nop.m 0
+ fma.s1 fRsq = fR, fR, f0
+ nop.i 0
}
{ .mfi
- nop.m 999
- fma.s1 exp_rP4pP3 = exp_r, exp_P4, exp_P3
- nop.i 999
-;;
+ nop.m 0
+ fma.s1 fP54 = fR, fP5, fP4
+ nop.i 0
}
-
-
+;;
{ .mfi
- nop.m 999
- fma.s1 exp_rcube = exp_r, exp_rsq, f0
- nop.i 999
+ nop.m 0
+ fcmp.lt.s1 p13,p0 = fNormX,fMIN_DBL_NORM_ARG
+ nop.i 0
}
{ .mfi
- nop.m 999
- fma.s1 exp_P_lo = exp_r, exp_rP4pP3, exp_P2
- nop.i 999
-;;
+ nop.m 0
+ fma.s1 fP32 = fR, fP3, fP2
+ nop.i 0
}
-
+;;
{ .mfi
- nop.m 999
- fma.s1 exp_P_hi = exp_rsq, exp_P1, exp_r
- nop.i 999
+ nop.m 0
+ fma.s1 fP5432 = fRsq, fP54, fP32
+ nop.i 0
}
-{ .mfi
- nop.m 999
- fma.s1 exp_S2 = exp_f,exp_T2,f0
- nop.i 999
;;
-}
{ .mfi
- nop.m 999
- fma.s1 exp_S1 = EXP_2M,exp_T1,f0
- nop.i 999
-;;
+ nop.m 0
+ fma.s1 fS1 = f2M,fT1,f0
+ nop.i 0
}
-
-
{ .mfi
- nop.m 999
- fma.s1 exp_P = exp_rcube, exp_P_lo, exp_P_hi
- nop.i 999
-;;
+ nop.m 0
+ fma.s1 fS2 = fF,fT2,f0
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
- fma.s1 exp_S = exp_S1,exp_S2,f0
- nop.i 999
-;;
+ nop.m 0
+ fma.s1 fP = fRsq, fP5432, fR
+ nop.i 0
}
-
-{ .bbb
-(p12) br.cond.spnt L(EXP_CERTAIN_UNDERFLOW)
-(p13) br.cond.spnt L(EXP_POSSIBLE_UNDERFLOW)
-(p14) br.cond.spnt L(EXP_POSSIBLE_OVERFLOW)
-;;
+{ .mfi
+ nop.m 0
+ fma.s1 fS = fS1,fS2,f0
+ nop.i 0
}
+;;
+{ .mbb
+ nop.m 0
+(p13) br.cond.spnt EXP_POSSIBLE_UNDERFLOW
+(p14) br.cond.spnt EXP_POSSIBLE_OVERFLOW
+}
+;;
{ .mfb
- nop.m 999
- fma.d f8 = exp_S, exp_P, exp_S
- br.ret.sptk b0 ;; // Normal path exit
+ nop.m 0
+ fma.d.s0 f8 = fS, fP, fS
+ br.ret.sptk b0 // Normal path exit
}
+;;
-L(EXP_POSSIBLE_OVERFLOW):
+EXP_POSSIBLE_OVERFLOW:
-// We got an answer. EXP_MAX_DBL_NORM_ARG < x < EXP_MIN_DBL_OFLOW_ARG
-// overflow is a possibility, not a certainty
+// Here if fMAX_DBL_NORM_ARG < x < fMIN_DBL_OFLOW_ARG
+// This cannot happen if input is a double, only if input higher precision.
+// Overflow is a possibility, not a certainty.
-{ .mfi
- nop.m 999
- fsetc.s2 0x7F,0x42
- nop.i 999 ;;
-}
+// Recompute result using status field 2 with user's rounding mode,
+// and wre set. If result is larger than largest double, then we have
+// overflow
{ .mfi
- nop.m 999
- fma.d.s2 exp_wre_urm_f8 = exp_S, exp_P, exp_S
- nop.i 999 ;;
+ mov rGt_ln = 0x103ff // Exponent for largest dbl + 1 ulp
+ fsetc.s2 0x7F,0x42 // Get user's round mode, set wre
+ nop.i 0
}
-
-// We define an overflow when the answer with
-// WRE set
-// user-defined rounding mode
-// is ldn +1
-
-// Is the exponent 1 more than the largest double?
-// If so, go to ERROR RETURN, else get the answer and
-// leave.
-
-// Largest double is 7FE (biased double)
-// 7FE - 3FF + FFFF = 103FE
-// Create + largest_double_plus_ulp
-// Create - largest_double_plus_ulp
-// Calculate answer with WRE set.
-
-// Cases when answer is ldn+1 are as follows:
-// ldn ldn+1
-// --+----------|----------+------------
-// |
-// +inf +inf -inf
-// RN RN
-// RZ
+;;
{ .mfi
- nop.m 999
- fsetc.s2 0x7F,0x40
- mov exp_GR_gt_ln = 0x103ff ;;
+ setf.exp fGt_pln = rGt_ln // Create largest double + 1 ulp
+ fma.d.s2 fWre_urm_f8 = fS, fP, fS // Result with wre set
+ nop.i 0
}
+;;
{ .mfi
- setf.exp exp_gt_pln = exp_GR_gt_ln
- nop.f 999
- nop.i 999 ;;
+ nop.m 0
+ fsetc.s2 0x7F,0x40 // Turn off wre in sf2
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
- fcmp.ge.unc.s1 p6, p0 = exp_wre_urm_f8, exp_gt_pln
- nop.i 999 ;;
+ nop.m 0
+ fcmp.ge.s1 p6, p0 = fWre_urm_f8, fGt_pln // Test for overflow
+ nop.i 0
}
+;;
{ .mfb
- nop.m 999
- nop.f 999
-(p6) br.cond.spnt L(EXP_CERTAIN_OVERFLOW) ;; // Branch if really overflow
+ nop.m 0
+ nop.f 0
+(p6) br.cond.spnt EXP_CERTAIN_OVERFLOW // Branch if overflow
}
+;;
{ .mfb
- nop.m 999
- fma.d f8 = exp_S, exp_P, exp_S
- br.ret.sptk b0 ;; // Exit if really no overflow
+ nop.m 0
+ fma.d.s0 f8 = fS, fP, fS
+ br.ret.sptk b0 // Exit if really no overflow
}
+;;
-L(EXP_CERTAIN_OVERFLOW):
+EXP_CERTAIN_OVERFLOW:
{ .mmi
- sub exp_GR_17ones_m1 = exp_GR_17ones, r0, 1 ;;
- setf.exp f9 = exp_GR_17ones_m1
- nop.i 999 ;;
+ sub rTmp = rExp_mask, r0, 1
+;;
+ setf.exp fTmp = rTmp
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
- fmerge.s FR_X = f8,f8
- nop.i 999
+ alloc r32=ar.pfs,1,4,4,0
+ fmerge.s FR_X = f8,f8
+ nop.i 0
}
{ .mfb
- mov GR_Parameter_TAG = 14
- fma.d FR_RESULT = f9, f9, f0 // Set I,O and +INF result
- br.cond.sptk __libm_error_region ;;
+ mov GR_Parameter_TAG = 14
+ fma.d.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and +INF result
+ br.cond.sptk __libm_error_region
}
+;;
-L(EXP_POSSIBLE_UNDERFLOW):
+EXP_POSSIBLE_UNDERFLOW:
-// We got an answer. EXP_MAX_DBL_UFLOW_ARG < x < EXP_MIN_DBL_NORM_ARG
-// underflow is a possibility, not a certainty
+// Here if fMAX_DBL_ZERO_ARG < x < fMIN_DBL_NORM_ARG
+// Underflow is a possibility, not a certainty
// We define an underflow when the answer with
// ftz set
@@ -709,81 +635,111 @@ L(EXP_POSSIBLE_UNDERFLOW):
// largest dn smallest normal
{ .mfi
- nop.m 999
- fsetc.s2 0x7F,0x41
- nop.i 999 ;;
+ nop.m 0
+ fsetc.s2 0x7F,0x41 // Get user's round mode, set ftz
+ nop.i 0
}
+;;
+
{ .mfi
- nop.m 999
- fma.d.s2 exp_ftz_urm_f8 = exp_S, exp_P, exp_S
- nop.i 999 ;;
+ nop.m 0
+ fma.d.s2 fFtz_urm_f8 = fS, fP, fS // Result with ftz set
+ nop.i 0
}
+;;
+
{ .mfi
- nop.m 999
- fsetc.s2 0x7F,0x40
- nop.i 999 ;;
+ nop.m 0
+ fsetc.s2 0x7F,0x40 // Turn off ftz in sf2
+ nop.i 0
}
+;;
+
{ .mfi
- nop.m 999
- fcmp.eq.unc.s1 p6, p0 = exp_ftz_urm_f8, f0
- nop.i 999 ;;
+ nop.m 0
+ fcmp.eq.s1 p6, p7 = fFtz_urm_f8, f0 // Test for underflow
+ nop.i 0
}
-{ .mfb
- nop.m 999
- nop.f 999
-(p6) br.cond.spnt L(EXP_CERTAIN_UNDERFLOW) ;; // Branch if really underflow
+{ .mfi
+ nop.m 0
+ fma.d.s0 f8 = fS, fP, fS // Compute result, set I, maybe U
+ nop.i 0
}
-{ .mfb
- nop.m 999
- fma.d f8 = exp_S, exp_P, exp_S
- br.ret.sptk b0 ;; // Exit if really no underflow
+;;
+
+{ .mbb
+ nop.m 0
+(p6) br.cond.spnt EXP_UNDERFLOW_COMMON // Branch if really underflow
+(p7) br.ret.sptk b0 // Exit if really no underflow
}
+;;
-L(EXP_CERTAIN_UNDERFLOW):
-{ .mfi
- nop.m 999
- fmerge.s FR_X = f8,f8
- nop.i 999
+EXP_CERTAIN_UNDERFLOW:
+// Here if x < fMAX_DBL_ZERO_ARG
+// Result will be zero (or smallest denorm if round to +inf) with I, U set
+{ .mmi
+ mov rTmp = 1
+;;
+ setf.exp fTmp = rTmp // Form small normal
+ nop.i 0
}
+;;
+
{ .mfb
- mov GR_Parameter_TAG = 15
- fma.d FR_RESULT = exp_S, exp_P, exp_S // Set I,U and tiny result
- br.cond.sptk __libm_error_region ;;
+ nop.m 0
+ fma.d.s0 f8 = fTmp, fTmp, f0 // Set I,U, tiny (+0.0) result
+ br.cond.sptk EXP_UNDERFLOW_COMMON
}
+;;
-L(EXP_CERTAIN_UNDERFLOW_ZERO):
-{ .mmi
- mov exp_GR_one = 1 ;;
- setf.exp f9 = exp_GR_one
- nop.i 999 ;;
+EXP_UNDERFLOW_COMMON:
+// Determine if underflow result is zero or nonzero
+{ .mfi
+ alloc r32=ar.pfs,1,4,4,0
+ fcmp.eq.s1 p6, p0 = f8, f0
+ nop.i 0
}
+;;
-{ .mfi
- nop.m 999
- fmerge.s FR_X = f8,f8
- nop.i 999
+{ .mfb
+ nop.m 0
+ fmerge.s FR_X = fNormX,fNormX
+(p6) br.cond.spnt EXP_UNDERFLOW_ZERO
}
+;;
+
+EXP_UNDERFLOW_NONZERO:
+// Here if x < fMIN_DBL_NORM_ARG and result nonzero;
+// I, U are set
{ .mfb
- mov GR_Parameter_TAG = 15
- fma.d FR_RESULT = f9, f9, f0 // Set I,U and tiny (+0.0) result
- br.cond.sptk __libm_error_region ;;
+ mov GR_Parameter_TAG = 15
+ nop.f 0 // FR_RESULT already set
+ br.cond.sptk __libm_error_region
}
+;;
-.endp exp
-ASM_SIZE_DIRECTIVE(exp)
+EXP_UNDERFLOW_ZERO:
+// Here if x < fMIN_DBL_NORM_ARG and result zero;
+// I, U are set
+{ .mfb
+ mov GR_Parameter_TAG = 15
+ nop.f 0 // FR_RESULT already set
+ br.cond.sptk __libm_error_region
+}
+;;
+GLOBAL_IEEE754_END(exp)
-.proc __libm_error_region
-__libm_error_region:
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
-.fframe 64
+.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
@@ -791,24 +747,24 @@ __libm_error_region:
{ .mmi
stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
-.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0 // Save b0
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
- stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
- add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
- nop.b 0
+ stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
}
{ .mib
- stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
- add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support# // Call error handling function
+ stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
- nop.m 0
- nop.m 0
add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
};;
{ .mmi
ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
@@ -817,12 +773,11 @@ __libm_error_region:
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
- mov gp = GR_SAVE_GP // Restore gp
+ mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
-};;
+};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_expf.S b/sysdeps/ia64/fpu/e_expf.S
index 2aad021335..8d620b6ffa 100644
--- a/sysdeps/ia64/fpu/e_expf.S
+++ b/sysdeps/ia64/fpu/e_expf.S
@@ -1,10 +1,10 @@
.file "expf.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2002, Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,589 +35,501 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
// History
-//==============================================================
-// 4/04/00 Unwind update
-// 4/04/00 Unwind support added
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+//*********************************************************************
+// 02/02/00 Original version
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
-// 8/21/00 Improvements to save 2 cycles on main path, and shorten x=0 case
+// 08/21/00 Improvements to save 2 cycles on main path, and shorten x=0 case
// 12/07/00 Widen main path, shorten x=inf, nan paths
+// 03/15/01 Fix monotonicity problem around x=0 for round to +inf
+// 02/05/02 Corrected uninitialize predicate in POSSIBLE_UNDERFLOW path
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 07/26/02 Algorithm changed, accuracy improved
+// 09/26/02 support of higher precision inputs added, underflow threshold
+// corrected
+// 11/15/02 Improved performance on Itanium 2, added possible over/under paths
+//
+//
+// API
+//*********************************************************************
+// float expf(float)
+//
+// Overview of operation
+//*********************************************************************
+// Take the input x. w is "how many log2/128 in x?"
+// w = x * 64/log2
+// NJ = int(w)
+// x = NJ*log2/64 + R
+
+// NJ = 64*n + j
+// x = n*log2 + (log2/64)*j + R
+//
+// So, exp(x) = 2^n * 2^(j/64)* exp(R)
+//
+// T = 2^n * 2^(j/64)
+// Construct 2^n
+// Get 2^(j/64) table
+// actually all the entries of 2^(j/64) table are stored in DP and
+// with exponent bits set to 0 -> multiplication on 2^n can be
+// performed by doing logical "or" operation with bits presenting 2^n
+
+// exp(R) = 1 + (exp(R) - 1)
+// P = exp(R) - 1 approximated by Taylor series of 3rd degree
+// P = A3*R^3 + A2*R^2 + R, A3 = 1/6, A2 = 1/2
//
-#include "libm_support.h"
-
-// Assembly macros
-//==============================================================
-// integer registers used
-
- exp_GR_0x0f = r33
- exp_GR_0xf0 = r34
+// The final result is reconstructed as follows
+// exp(x) = T + T*P
- EXP_AD_P_1 = r36
- EXP_AD_P_2 = r37
- EXP_AD_T1 = r38
- EXP_AD_T2 = r39
- exp_GR_Mint = r40
+// Special values
+//*********************************************************************
+// expf(+0) = 1.0
+// expf(-0) = 1.0
- exp_GR_Mint_p_128 = r41
- exp_GR_Ind1 = r42
- EXP_AD_M1 = r43
- exp_GR_Ind2 = r44
- EXP_AD_M2 = r45
+// expf(+qnan) = +qnan
+// expf(-qnan) = -qnan
+// expf(+snan) = +qnan
+// expf(-snan) = -qnan
- exp_GR_min_oflow = r46
- exp_GR_max_zero = r47
- exp_GR_max_norm = r48
- exp_GR_max_uflow = r49
- exp_GR_min_norm = r50
+// expf(-inf) = +0
+// expf(+inf) = +inf
- exp_GR_17ones = r51
- exp_GR_gt_ln = r52
- exp_GR_T2_size = r53
+// Overflow and Underflow
+//*********************************************************************
+// expf(x) = largest single normal when
+// x = 88.72283 = 0x42b17217
- exp_GR_17ones_m1 = r56
- exp_GR_one = r57
+// expf(x) = smallest single normal when
+// x = -87.33654 = 0xc2aeac4f
+// expf(x) = largest round-to-nearest single zero when
+// x = -103.97208 = 0xc2cff1b5
-GR_SAVE_B0 = r53
-GR_SAVE_PFS = r55
-GR_SAVE_GP = r54
+// Registers used
+//*********************************************************************
+// Floating Point registers used:
+// f8, input
+// f6,f7, f9 -> f15, f32 -> f40
-GR_Parameter_X = r59
-GR_Parameter_Y = r60
-GR_Parameter_RESULT = r61
-GR_Parameter_TAG = r62
+// General registers used:
+// r3, r23 -> r38
-FR_X = f10
-FR_Y = f1
-FR_RESULT = f8
+// Predicate registers used:
+// p10 -> p15
+// Assembly macros
+//*********************************************************************
+// integer registers used
+// scratch
+rNJ = r3
+
+rTmp = r23
+rJ = r23
+rN = r24
+rTblAddr = r25
+rA3 = r26
+rExpHalf = r27
+rLn2Div64 = r28
+r17ones_m1 = r29
+rGt_ln = r29
+rRightShifter = r30
+r64DivLn2 = r31
+// stacked
+GR_SAVE_PFS = r32
+GR_SAVE_B0 = r33
+GR_SAVE_GP = r34
+GR_Parameter_X = r35
+GR_Parameter_Y = r36
+GR_Parameter_RESULT = r37
+GR_Parameter_TAG = r38
// floating point registers used
-
- EXP_MIN_SGL_OFLOW_ARG = f11
- EXP_MAX_SGL_ZERO_ARG = f12
- EXP_MAX_SGL_NORM_ARG = f13
- EXP_MAX_SGL_UFLOW_ARG = f14
- EXP_MIN_SGL_NORM_ARG = f15
-
- exp_coeff_P5 = f32
- exp_coeff_P6 = f33
- exp_coeff_P3 = f34
- exp_coeff_P4 = f35
-
- exp_coeff_P1 = f36
- exp_coeff_P2 = f37
- exp_Mx = f38
- exp_Mfloat = f39
- exp_R = f40
-
- exp_P1 = f41
- exp_P2 = f42
- exp_P3 = f43
- exp_Rsq = f44
- exp_R4 = f45
-
- exp_P4 = f46
- exp_P5 = f47
- exp_P6 = f48
- exp_P7 = f49
- exp_T1 = f50
-
- exp_T2 = f51
- exp_T = f52
- exp_A = f53
- exp_norm_f8 = f54
- exp_wre_urm_f8 = f55
-
- exp_ftz_urm_f8 = f56
- exp_gt_pln = f57
-
-
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
-
+FR_X = f10
+FR_Y = f1
+FR_RESULT = f8
+// scratch
+fRightShifter = f6
+f64DivLn2 = f7
+fNormX = f9
+fNint = f10
+fN = f11
+fR = f12
+fLn2Div64 = f13
+fA2 = f14
+fA3 = f15
+// stacked
+fP = f32
+fT = f33
+fMIN_SGL_OFLOW_ARG = f34
+fMAX_SGL_ZERO_ARG = f35
+fMAX_SGL_NORM_ARG = f36
+fMIN_SGL_NORM_ARG = f37
+fRSqr = f38
+fTmp = f39
+fGt_pln = f39
+fWre_urm_f8 = f40
+fFtz_urm_f8 = f40
+
+
+RODATA
.align 16
-exp_coeff_1_table:
-ASM_TYPE_DIRECTIVE(exp_coeff_1_table,@object)
-data8 0x3F56F35FDE4F8563 // p5
-data8 0x3F2A378BEFECCFDD // p6
-data8 0x3FE00000258C581D // p1
-data8 0x3FC555557AE7B3D4 // p2
-ASM_SIZE_DIRECTIVE(exp_coeff_1_table)
-
-
-exp_coeff_2_table:
-ASM_TYPE_DIRECTIVE(exp_coeff_2_table,@object)
-data8 0x3FA5551BB6592FAE // p3
-data8 0x3F8110E8EBFFD485 // p4
-ASM_SIZE_DIRECTIVE(exp_coeff_2_table)
-
-
-exp_T2_table:
-ASM_TYPE_DIRECTIVE(exp_T2_table,@object)
-data8 0xa175cf9cd7d85844 , 0x00003f46 // exp(-128)
-data8 0xdb7279415a1f9eed , 0x00003f47 // exp(-127)
-data8 0x95213b242bd8ca5f , 0x00003f49 // exp(-126)
-data8 0xcab03c968c989f83 , 0x00003f4a // exp(-125)
-data8 0x89bdb674702961ad , 0x00003f4c // exp(-124)
-data8 0xbb35a2eec278be35 , 0x00003f4d // exp(-123)
-data8 0xfe71b17f373e7e7a , 0x00003f4e // exp(-122)
-data8 0xace9a6ec52a39b63 , 0x00003f50 // exp(-121)
-data8 0xeb03423fe393cf1c , 0x00003f51 // exp(-120)
-data8 0x9fb52c5bcaef1693 , 0x00003f53 // exp(-119)
-data8 0xd910b6377ed60bf1 , 0x00003f54 // exp(-118)
-data8 0x9382dad8a9fdbfe4 , 0x00003f56 // exp(-117)
-data8 0xc87d0a84dea869a3 , 0x00003f57 // exp(-116)
-data8 0x883efb4c6d1087b0 , 0x00003f59 // exp(-115)
-data8 0xb92d7373dce9a502 , 0x00003f5a // exp(-114)
-data8 0xfbaeb020577fb0cb , 0x00003f5b // exp(-113)
-ASM_SIZE_DIRECTIVE(exp_T2_table)
-
-
-exp_T1_table:
-ASM_TYPE_DIRECTIVE(exp_T1_table,@object)
-data8 0x8000000000000000 , 0x00003fff // exp(16 * 0)
-data8 0x87975e8540010249 , 0x00004016 // exp(16 * 1)
-data8 0x8fa1fe625b3163ec , 0x0000402d // exp(16 * 2)
-data8 0x9826b576512a59d7 , 0x00004044 // exp(16 * 3)
-data8 0xa12cc167acbe6902 , 0x0000405b // exp(16 * 4)
-data8 0xaabbcdcc279f59e4 , 0x00004072 // exp(16 * 5)
-data8 0xb4dbfaadc045d16f , 0x00004089 // exp(16 * 6)
-data8 0xbf95e372ccdbf146 , 0x000040a0 // exp(16 * 7)
-data8 0xcaf2a62eea10bbfb , 0x000040b7 // exp(16 * 8)
-data8 0xd6fbeb62fddbd340 , 0x000040ce // exp(16 * 9)
-data8 0xe3bbee32e4a440ea , 0x000040e5 // exp(16 * 10)
-data8 0xf13d8517c34199a8 , 0x000040fc // exp(16 * 11)
-data8 0xff8c2b166241eedd , 0x00004113 // exp(16 * 12)
-data8 0x875a04c0b38d6129 , 0x0000412b // exp(16 * 13)
-data8 0x8f610127db6774d7 , 0x00004142 // exp(16 * 14)
-data8 0x97e1dd87e5c20bb6 , 0x00004159 // exp(16 * 15)
-ASM_SIZE_DIRECTIVE(exp_T1_table)
-
-// Argument Reduction
-// exp_Mx = (int)f8 ==> The value of f8 rounded to int is placed into the
-// significand of exp_Mx as a two's
-// complement number.
-
-// Later we want to have exp_Mx in a general register. Do this with a getf.sig
-// and call the general register exp_GR_Mint
-
-// exp_Mfloat = (float)(int)f8 ==> the two's complement number in
-// significand of exp_Mx is turned
-// into a floating point number.
-// R = 1 - exp_Mfloat ==> reduced argument
-
-// Core Approximation
-// Calculate a series in R
-// R * p6 + p5
-// R * p4 + p3
-// R * p2 + p1
-// R^2
-// R^4
-// R^2(R * p6 + p5) + (R * p4 + p3)
-// R^2(R * p2 + p1)
-// R^4(R^2(R * p6 + p5) + (R * p4 + p3)) + (R^2(R * p2 + p1))
-// R + 1
-// exp(R) = (1 + R) + R^4(R^2(R * p6 + p5) + (R * p4 + p3)) + (R^2(R * p2 + p1))
-// exp(R) = 1 + R + R^2 * p1 + R^3 * p2 + R^4 * p3 + R^5 * p4 + R^6 * p5 + R^7 * p6
-
-// Reconstruction
-// signficand of exp_Mx is two's complement,
-// -103 < x < 89
-// The smallest single denormal is 2^-149 = ssdn
-// For e^x = ssdn
-// x = log(ssdn) = -103.279
-// But with rounding result goes to ssdn until -103.972079
-// The largest single normal is 1.<23 1's> 2^126 ~ 2^127 = lsn
-// For e^x = lsn
-// x = log(lsn) = 88.7228
+LOCAL_OBJECT_START(_expf_table)
+data4 0x42b17218 // Smallest sgl arg to overflow sgl result, +88.7228
+data4 0xc2cff1b5 // Largest sgl for rnd-to-nearest 0 result, -103.9720
+data4 0x42b17217 // Largest sgl arg to give normal sgl result, +88.7228
+data4 0xc2aeac4f // Smallest sgl arg to give normal sgl result, -87.3365
//
-// expf overflows when x > 42b17218 = 88.7228
-// expf returns largest single denormal when x = c2aeac50
-// expf goes to zero when x < c2cff1b5
-
-// Consider range of 8-bit two's complement, -128 ---> 127
-// Add 128; range becomes 0 ---> 255
-
-// The number (=i) in 0 ---> 255 is used as offset into two tables.
-
-// i = abcd efgh = abcd * 16 + efgh = i1 * 16 + i2
-
-// i1 = (exp_GR_Mint + 128) & 0xf0 (show 0xf0 as -0x10 to avoid assembler error)
-// (The immediate in the AND is an 8-bit two's complement)
-// i1 = i1 + start of T1 table (EXP_AD_T1)
-// Note that the entries in T1 are double-extended numbers on 16-byte boundaries
-// and that i1 is already shifted left by 16 after the AND.
-
-// i2 must be shifted left by 4 before adding to the start of the table.
-// i2 = ((exp_GR_Mint + 128) & 0x0f) << 4
-// i2 = i2 + start of T2 table (EXP_AD_T2)
-
-// T = T1 * T2
-// A = T * (1 + R)
-// answer = T * (R^2 * p1 + R^3 * p2 + R^4 * p3 + R^5 * p4 + R^6 * p5 + R^7 * p6) +
-// T * (1 + R)
-// = T * exp(R)
-
+// 2^(j/64) table, j goes from 0 to 63
+data8 0x0000000000000000 // 2^(0/64)
+data8 0x00002C9A3E778061 // 2^(1/64)
+data8 0x000059B0D3158574 // 2^(2/64)
+data8 0x0000874518759BC8 // 2^(3/64)
+data8 0x0000B5586CF9890F // 2^(4/64)
+data8 0x0000E3EC32D3D1A2 // 2^(5/64)
+data8 0x00011301D0125B51 // 2^(6/64)
+data8 0x0001429AAEA92DE0 // 2^(7/64)
+data8 0x000172B83C7D517B // 2^(8/64)
+data8 0x0001A35BEB6FCB75 // 2^(9/64)
+data8 0x0001D4873168B9AA // 2^(10/64)
+data8 0x0002063B88628CD6 // 2^(11/64)
+data8 0x0002387A6E756238 // 2^(12/64)
+data8 0x00026B4565E27CDD // 2^(13/64)
+data8 0x00029E9DF51FDEE1 // 2^(14/64)
+data8 0x0002D285A6E4030B // 2^(15/64)
+data8 0x000306FE0A31B715 // 2^(16/64)
+data8 0x00033C08B26416FF // 2^(17/64)
+data8 0x000371A7373AA9CB // 2^(18/64)
+data8 0x0003A7DB34E59FF7 // 2^(19/64)
+data8 0x0003DEA64C123422 // 2^(20/64)
+data8 0x0004160A21F72E2A // 2^(21/64)
+data8 0x00044E086061892D // 2^(22/64)
+data8 0x000486A2B5C13CD0 // 2^(23/64)
+data8 0x0004BFDAD5362A27 // 2^(24/64)
+data8 0x0004F9B2769D2CA7 // 2^(25/64)
+data8 0x0005342B569D4F82 // 2^(26/64)
+data8 0x00056F4736B527DA // 2^(27/64)
+data8 0x0005AB07DD485429 // 2^(28/64)
+data8 0x0005E76F15AD2148 // 2^(29/64)
+data8 0x0006247EB03A5585 // 2^(30/64)
+data8 0x0006623882552225 // 2^(31/64)
+data8 0x0006A09E667F3BCD // 2^(32/64)
+data8 0x0006DFB23C651A2F // 2^(33/64)
+data8 0x00071F75E8EC5F74 // 2^(34/64)
+data8 0x00075FEB564267C9 // 2^(35/64)
+data8 0x0007A11473EB0187 // 2^(36/64)
+data8 0x0007E2F336CF4E62 // 2^(37/64)
+data8 0x00082589994CCE13 // 2^(38/64)
+data8 0x000868D99B4492ED // 2^(39/64)
+data8 0x0008ACE5422AA0DB // 2^(40/64)
+data8 0x0008F1AE99157736 // 2^(41/64)
+data8 0x00093737B0CDC5E5 // 2^(42/64)
+data8 0x00097D829FDE4E50 // 2^(43/64)
+data8 0x0009C49182A3F090 // 2^(44/64)
+data8 0x000A0C667B5DE565 // 2^(45/64)
+data8 0x000A5503B23E255D // 2^(46/64)
+data8 0x000A9E6B5579FDBF // 2^(47/64)
+data8 0x000AE89F995AD3AD // 2^(48/64)
+data8 0x000B33A2B84F15FB // 2^(49/64)
+data8 0x000B7F76F2FB5E47 // 2^(50/64)
+data8 0x000BCC1E904BC1D2 // 2^(51/64)
+data8 0x000C199BDD85529C // 2^(52/64)
+data8 0x000C67F12E57D14B // 2^(53/64)
+data8 0x000CB720DCEF9069 // 2^(54/64)
+data8 0x000D072D4A07897C // 2^(55/64)
+data8 0x000D5818DCFBA487 // 2^(56/64)
+data8 0x000DA9E603DB3285 // 2^(57/64)
+data8 0x000DFC97337B9B5F // 2^(58/64)
+data8 0x000E502EE78B3FF6 // 2^(59/64)
+data8 0x000EA4AFA2A490DA // 2^(60/64)
+data8 0x000EFA1BEE615A27 // 2^(61/64)
+data8 0x000F50765B6E4540 // 2^(62/64)
+data8 0x000FA7C1819E90D8 // 2^(63/64)
+LOCAL_OBJECT_END(_expf_table)
-.global expf#
.section .text
-.proc expf#
-.align 32
-expf:
-#ifdef _LIBC
-.global __ieee754_expf#
-__ieee754_expf:
-#endif
-
-{ .mfi
- alloc r32 = ar.pfs,1,26,4,0
- fcvt.fx.s1 exp_Mx = f8
- mov exp_GR_17ones = 0x1FFFF
+GLOBAL_IEEE754_ENTRY(expf)
+
+{ .mlx
+ addl rTblAddr = @ltoff(_expf_table),gp
+ movl r64DivLn2 = 0x40571547652B82FE // 64/ln(2)
}
{ .mlx
- addl EXP_AD_P_1 = @ltoff(exp_coeff_1_table),gp
- movl exp_GR_min_oflow = 0x42b17218
+ addl rA3 = 0x3E2AA, r0 // high bits of 1.0/6.0 rounded to SP
+ movl rRightShifter = 0x43E8000000000000 // DP Right Shifter
}
;;
-// Fnorm done to take any enabled faults
{ .mfi
- ld8 EXP_AD_P_1 = [EXP_AD_P_1]
- fclass.m p6,p0 = f8, 0x07 //@zero
- nop.i 999
+ // point to the beginning of the table
+ ld8 rTblAddr = [rTblAddr]
+ fclass.m p14, p0 = f8, 0x22 // test for -INF
+ shl rA3 = rA3, 12 // 0x3E2AA000, approx to 1.0/6.0 in SP
}
{ .mfi
- add exp_GR_max_norm = -1, exp_GR_min_oflow // 0x42b17217
- fnorm exp_norm_f8 = f8
- nop.i 999
+ nop.m 0
+ fnorm.s1 fNormX = f8 // normalized x
+ addl rExpHalf = 0xFFFE, r0 // exponent of 1/2
}
;;
{ .mfi
- setf.s EXP_MIN_SGL_OFLOW_ARG = exp_GR_min_oflow // 0x42b17218
- fclass.m p7,p0 = f8, 0x22 // Test for x=-inf
- mov exp_GR_0xf0 = 0x0f0
+ setf.d f64DivLn2 = r64DivLn2 // load 64/ln(2) to FP reg
+ fclass.m p15, p0 = f8, 0x1e1 // test for NaT,NaN,+Inf
+ nop.i 0
}
{ .mlx
- setf.s EXP_MAX_SGL_NORM_ARG = exp_GR_max_norm
- movl exp_GR_max_zero = 0xc2cff1b5
+ // load Right Shifter to FP reg
+ setf.d fRightShifter = rRightShifter
+ movl rLn2Div64 = 0x3F862E42FEFA39EF // DP ln(2)/64 in GR
}
;;
-
-{ .mlx
- mov exp_GR_0x0f = 0x00f
- movl exp_GR_max_uflow = 0xc2aeac50
+{ .mfi
+ nop.m 0
+ fcmp.eq.s1 p13, p0 = f0, f8 // test for x = 0.0
+ nop.i 0
}
{ .mfb
- nop.m 999
-(p6) fma.s f8 = f1,f1,f0
-(p6) br.ret.spnt b0 // quick exit for x=0
+ setf.s fA3 = rA3 // load A3 to FP reg
+(p14) fma.s.s0 f8 = f0, f1, f0 // result if x = -inf
+(p14) br.ret.spnt b0 // exit here if x = -inf
}
;;
{ .mfi
- setf.s EXP_MAX_SGL_ZERO_ARG = exp_GR_max_zero
- fclass.m p8,p0 = f8, 0x21 // Test for x=+inf
- adds exp_GR_min_norm = 1, exp_GR_max_uflow // 0xc2aeac51
+ setf.exp fA2 = rExpHalf // load A2 to FP reg
+ fcmp.eq.s0 p6, p0 = f8, f0 // Dummy to flag denorm
+ nop.i 0
}
{ .mfb
- ldfpd exp_coeff_P5,exp_coeff_P6 = [EXP_AD_P_1],16
-(p7) fma.s f8 = f0,f0,f0
-(p7) br.ret.spnt b0 // quick exit for x=-inf
+ setf.d fLn2Div64 = rLn2Div64 // load ln(2)/64 to FP reg
+(p15) fma.s.s0 f8 = f8, f1, f0 // result if x = NaT,NaN,+Inf
+(p15) br.ret.spnt b0 // exit here if x = NaT,NaN,+Inf
}
;;
-{ .mmf
- ldfpd exp_coeff_P1,exp_coeff_P2 = [EXP_AD_P_1],16
- setf.s EXP_MAX_SGL_UFLOW_ARG = exp_GR_max_uflow
- fclass.m p9,p0 = f8, 0xc3 // Test for x=nan
-}
-;;
-
-{ .mmb
- ldfpd exp_coeff_P3,exp_coeff_P4 = [EXP_AD_P_1],16
- setf.s EXP_MIN_SGL_NORM_ARG = exp_GR_min_norm
-(p8) br.ret.spnt b0 // quick exit for x=+inf
+{ .mfb
+ // overflow and underflow_zero threshold
+ ldfps fMIN_SGL_OFLOW_ARG, fMAX_SGL_ZERO_ARG = [rTblAddr], 8
+(p13) fma.s.s0 f8 = f1, f1, f0 // result if x = 0.0
+(p13) br.ret.spnt b0 // exit here if x =0.0
}
;;
-// EXP_AD_P_1 now points to exp_T2_table
+ // max normal and underflow_denorm threshold
{ .mfi
- mov exp_GR_T2_size = 0x100
- fcvt.xf exp_Mfloat = exp_Mx
- nop.i 999
+ ldfps fMAX_SGL_NORM_ARG, fMIN_SGL_NORM_ARG = [rTblAddr], 8
+ nop.f 0
+ nop.i 0
}
;;
-{ .mfb
- getf.sig exp_GR_Mint = exp_Mx
-(p9) fmerge.s f8 = exp_norm_f8, exp_norm_f8
-(p9) br.ret.spnt b0 // quick exit for x=nan
+{ .mfi
+ nop.m 0
+ // x*(64/ln(2)) + Right Shifter
+ fma.s1 fNint = fNormX, f64DivLn2, fRightShifter
+ nop.i 0
}
;;
-{ .mmi
- nop.m 999
- mov EXP_AD_T2 = EXP_AD_P_1
- add EXP_AD_T1 = exp_GR_T2_size,EXP_AD_P_1 ;;
-}
-
-
-{ .mmi
- adds exp_GR_Mint_p_128 = 0x80,exp_GR_Mint ;;
- and exp_GR_Ind1 = exp_GR_Mint_p_128, exp_GR_0xf0
- and exp_GR_Ind2 = exp_GR_Mint_p_128, exp_GR_0x0f ;;
-}
-
// Divide arguments into the following categories:
-// Certain Underflow/zero p11 - -inf < x <= MAX_SGL_ZERO_ARG
-// Certain Underflow p12 - MAX_SGL_ZERO_ARG < x <= MAX_SGL_UFLOW_ARG
-// Possible Underflow p13 - MAX_SGL_UFLOW_ARG < x < MIN_SGL_NORM_ARG
+// Certain Underflow p11 - -inf < x <= MAX_SGL_ZERO_ARG
+// Possible Underflow p13 - MAX_SGL_ZERO_ARG < x < MIN_SGL_NORM_ARG
// Certain Safe - MIN_SGL_NORM_ARG <= x <= MAX_SGL_NORM_ARG
// Possible Overflow p14 - MAX_SGL_NORM_ARG < x < MIN_SGL_OFLOW_ARG
// Certain Overflow p15 - MIN_SGL_OFLOW_ARG <= x < +inf
//
-// If the input is really a single arg, then there will never be "Possible
-// Underflow" or "Possible Overflow" arguments.
+// If the input is really a single arg, then there will never be
+// "Possible Overflow" arguments.
//
{ .mfi
- add EXP_AD_M1 = exp_GR_Ind1,EXP_AD_T1
- fcmp.ge.s1 p15,p14 = exp_norm_f8,EXP_MIN_SGL_OFLOW_ARG
- nop.i 999
-}
-{ .mfi
- shladd EXP_AD_M2 = exp_GR_Ind2,4,EXP_AD_T2
- fms.s1 exp_R = f1,f8,exp_Mfloat
- nop.i 999 ;;
+ nop.m 0
+ // check for overflow
+ fcmp.ge.s1 p15, p0 = fNormX, fMIN_SGL_OFLOW_ARG
+ nop.i 0
}
+;;
{ .mfi
- ldfe exp_T1 = [EXP_AD_M1]
- fcmp.le.s1 p11,p12 = exp_norm_f8,EXP_MAX_SGL_ZERO_ARG
- nop.i 999 ;;
+ nop.m 0
+ // check for underflow and tiny (+0) result
+ fcmp.le.s1 p11, p0 = fNormX, fMAX_SGL_ZERO_ARG
+ nop.i 0
}
-
{ .mfb
- ldfe exp_T2 = [EXP_AD_M2]
-(p14) fcmp.gt.s1 p14,p0 = exp_norm_f8,EXP_MAX_SGL_NORM_ARG
-(p15) br.cond.spnt L(EXP_CERTAIN_OVERFLOW) ;;
-}
-
-{ .mfb
- nop.m 999
-(p12) fcmp.le.s1 p12,p0 = exp_norm_f8,EXP_MAX_SGL_UFLOW_ARG
-(p11) br.cond.spnt L(EXP_CERTAIN_UNDERFLOW_ZERO)
+ nop.m 0
+ fms.s1 fN = fNint, f1, fRightShifter // n in FP register
+ // branch out if overflow
+(p15) br.cond.spnt EXP_CERTAIN_OVERFLOW
}
;;
-{ .mfi
- nop.m 999
-(p13) fcmp.lt.s1 p13,p0 = exp_norm_f8,EXP_MIN_SGL_NORM_ARG
- nop.i 999
+{ .mfb
+ getf.sig rNJ = fNint // bits of n, j
+ // check for underflow and deno result
+ fcmp.lt.s1 p13, p0 = fNormX, fMIN_SGL_NORM_ARG
+ // branch out if underflow and tiny (+0) result
+(p11) br.cond.spnt EXP_CERTAIN_UNDERFLOW
}
;;
-
{ .mfi
- nop.m 999
- fma.s1 exp_Rsq = exp_R,exp_R,f0
- nop.i 999
+ nop.m 0
+ // check for possible overflow
+ fcmp.gt.s1 p14, p0 = fNormX, fMAX_SGL_NORM_ARG
+ extr.u rJ = rNJ, 0, 6 // bits of j
}
{ .mfi
- nop.m 999
- fma.s1 exp_P3 = exp_R,exp_coeff_P2,exp_coeff_P1
- nop.i 999
+ addl rN = 0xFFFF - 63, rNJ // biased and shifted n
+ fnma.s1 fR = fLn2Div64, fN, fNormX // R = x - N*ln(2)/64
+ nop.i 0
}
;;
{ .mfi
- nop.m 999
- fma.s1 exp_P1 = exp_R,exp_coeff_P6,exp_coeff_P5
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 exp_P2 = exp_R,exp_coeff_P4,exp_coeff_P3
- nop.i 999
+ shladd rJ = rJ, 3, rTblAddr // address in the 2^(j/64) table
+ nop.f 0
+ shr rN = rN, 6 // biased n
}
;;
-
{ .mfi
- nop.m 999
- fma.s1 exp_P7 = f1,exp_R,f1
- nop.i 999
+ ld8 rJ = [rJ]
+ nop.f 0
+ shl rN = rN, 52 // 2^n bits in DP format
}
;;
-
-{ .mfi
- nop.m 999
- fma.s1 exp_P5 = exp_Rsq,exp_P3,f0
- nop.i 999
-}
{ .mfi
- nop.m 999
- fma.s1 exp_R4 = exp_Rsq,exp_Rsq,f0
- nop.i 999
+ or rN = rN, rJ // bits of 2^n * 2^(j/64) in DP format
+ nop.f 0
+ nop.i 0
}
;;
{ .mfi
- nop.m 999
- fma.s1 exp_T = exp_T1,exp_T2,f0
- nop.i 999
+ setf.d fT = rN // 2^n * 2^(j/64)
+ fma.s1 fP = fA3, fR, fA2 // A3*R + A2
+ nop.i 0
}
{ .mfi
- nop.m 999
- fma.s1 exp_P4 = exp_Rsq,exp_P1,exp_P2
- nop.i 999
+ nop.m 0
+ fma.s1 fRSqr = fR, fR, f0 // R^2
+ nop.i 0
}
;;
{ .mfi
- nop.m 999
- fma.s1 exp_A = exp_T,exp_P7,f0
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 exp_P6 = exp_R4,exp_P4,exp_P5
- nop.i 999
+ nop.m 0
+ fma.s1 fP = fP, fRSqr, fR // P = (A3*R + A2)*R^2 + R
+ nop.i 0
}
;;
-{ .bbb
-(p12) br.cond.spnt L(EXP_CERTAIN_UNDERFLOW)
-(p13) br.cond.spnt L(EXP_POSSIBLE_UNDERFLOW)
-(p14) br.cond.spnt L(EXP_POSSIBLE_OVERFLOW)
+{ .mbb
+ nop.m 0
+ // branch out if possible underflow
+(p13) br.cond.spnt EXP_POSSIBLE_UNDERFLOW
+ // branch out if possible overflow result
+(p14) br.cond.spnt EXP_POSSIBLE_OVERFLOW
}
;;
{ .mfb
- nop.m 999
- fma.s f8 = exp_T,exp_P6,exp_A
- br.ret.sptk b0
+ nop.m 0
+ // final result in the absence of over- and underflow
+ fma.s.s0 f8 = fP, fT, fT
+ // exit here in the absence of over- and underflow
+ br.ret.sptk b0
}
;;
-L(EXP_POSSIBLE_OVERFLOW):
-
-// We got an answer. EXP_MAX_SGL_NORM_ARG < x < EXP_MIN_SGL_OFLOW_ARG
-// overflow is a possibility, not a certainty
-// Set wre in s2 and perform the last operation with s2
-
-// We define an overflow when the answer with
-// WRE set
-// user-defined rounding mode
-// is lsn +1
-
-// Is the exponent 1 more than the largest single?
-// If so, go to ERROR RETURN, else (no overflow) get the answer and
-// leave.
-
-// Largest single is FE (biased single)
-// FE - 7F + FFFF = 1007E
+EXP_POSSIBLE_OVERFLOW:
-// Create + largest_single_plus_ulp
-// Create - largest_single_plus_ulp
+// Here if fMAX_SGL_NORM_ARG < x < fMIN_SGL_OFLOW_ARG
+// This cannot happen if input is a single, only if input higher precision.
+// Overflow is a possibility, not a certainty.
-// Calculate answer with WRE set.
-
-// Cases when answer is lsn+1 are as follows:
-
-// midpoint
-// |
-// lsn | lsn+1
-// --+----------|----------+------------
-// |
-// +inf +inf -inf
-// RN RN
-// RZ
-// exp_gt_pln contains the floating point number lsn+1.
-// The setf.exp puts 0x1007f in the exponent and 0x800... in the significand.
-
-// If the answer is >= lsn+1, we have overflowed.
-// Then p6 is TRUE. Set the overflow tag, save input in FR_X,
-// do the final calculation for IEEE result, and branch to error return.
+// Recompute result using status field 2 with user's rounding mode,
+// and wre set. If result is larger than largest single, then we have
+// overflow
{ .mfi
- mov exp_GR_gt_ln = 0x1007F
- fsetc.s2 0x7F,0x42
- nop.i 999
+ mov rGt_ln = 0x1007f // Exponent for largest single + 1 ulp
+ fsetc.s2 0x7F,0x42 // Get user's round mode, set wre
+ nop.i 0
}
;;
{ .mfi
- setf.exp exp_gt_pln = exp_GR_gt_ln
- fma.s.s2 exp_wre_urm_f8 = exp_T, exp_P6, exp_A
- nop.i 999
+ setf.exp fGt_pln = rGt_ln // Create largest single + 1 ulp
+ fma.s.s2 fWre_urm_f8 = fP, fT, fT // Result with wre set
+ nop.i 0
}
;;
{ .mfi
- nop.m 999
- fsetc.s2 0x7F,0x40
- nop.i 999
+ nop.m 0
+ fsetc.s2 0x7F,0x40 // Turn off wre in sf2
+ nop.i 0
}
;;
{ .mfi
- nop.m 999
- fcmp.ge.unc.s1 p6, p0 = exp_wre_urm_f8, exp_gt_pln
- nop.i 999
+ nop.m 0
+ fcmp.ge.s1 p6, p0 = fWre_urm_f8, fGt_pln // Test for overflow
+ nop.i 0
}
;;
{ .mfb
- nop.m 999
- nop.f 999
-(p6) br.cond.spnt L(EXP_CERTAIN_OVERFLOW) // Branch if really overflow
+ nop.m 0
+ nop.f 0
+(p6) br.cond.spnt EXP_CERTAIN_OVERFLOW // Branch if overflow
}
;;
{ .mfb
- nop.m 999
- fma.s f8 = exp_T, exp_P6, exp_A
- br.ret.sptk b0 // Exit if really no overflow
+ nop.m 0
+ fma.s.s0 f8 = fP, fT, fT
+ br.ret.sptk b0 // Exit if really no overflow
}
;;
-L(EXP_CERTAIN_OVERFLOW):
+// here if overflow
+EXP_CERTAIN_OVERFLOW:
{ .mmi
- sub exp_GR_17ones_m1 = exp_GR_17ones, r0, 1 ;;
- setf.exp f9 = exp_GR_17ones_m1
- nop.i 999 ;;
+ addl r17ones_m1 = 0x1FFFE, r0
+;;
+ setf.exp fTmp = r17ones_m1
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
- fmerge.s FR_X = f8,f8
- nop.i 999
+ alloc r32=ar.pfs,0,3,4,0
+ fmerge.s FR_X = f8,f8
+ nop.i 0
}
{ .mfb
- mov GR_Parameter_TAG = 16
- fma.s FR_RESULT = f9, f9, f0 // Set I,O and +INF result
- br.cond.sptk __libm_error_region ;;
+ mov GR_Parameter_TAG = 16
+ fma.s.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and +INF result
+ br.cond.sptk __libm_error_region
}
+;;
-L(EXP_POSSIBLE_UNDERFLOW):
+EXP_POSSIBLE_UNDERFLOW:
-// We got an answer. EXP_MAX_SGL_UFLOW_ARG < x < EXP_MIN_SGL_NORM_ARG
-// underflow is a possibility, not a certainty
+// Here if fMAX_SGL_ZERO_ARG < x < fMIN_SGL_NORM_ARG
+// Underflow is a possibility, not a certainty
// We define an underflow when the answer with
// ftz set
@@ -637,144 +549,157 @@ L(EXP_POSSIBLE_UNDERFLOW):
// E
// -----+--------------------+--------------------+-----
// | | |
-// 1.1...10 2^-7f 1.1...11 2^-7f 1.0...00 2^-7e
-// 0.1...11 2^-7e (biased, 1)
+// 1.1...10 2^-3fff 1.1...11 2^-3fff 1.0...00 2^-3ffe
+// 0.1...11 2^-3ffe (biased, 1)
// largest dn smallest normal
-// If the answer is = 0, we have underflowed.
-// Then p6 is TRUE. Set the underflow tag, save input in FR_X,
-// do the final calculation for IEEE result, and branch to error return.
-
{ .mfi
- nop.m 999
- fsetc.s2 0x7F,0x41
- nop.i 999
+ nop.m 0
+ fsetc.s2 0x7F,0x41 // Get user's round mode, set ftz
+ nop.i 0
}
;;
{ .mfi
- nop.m 999
- fma.s.s2 exp_ftz_urm_f8 = exp_T, exp_P6, exp_A
- nop.i 999
+ nop.m 0
+ fma.s.s2 fFtz_urm_f8 = fP, fT, fT // Result with ftz set
+ nop.i 0
}
;;
-
{ .mfi
- nop.m 999
- fsetc.s2 0x7F,0x40
- nop.i 999
+ nop.m 0
+ fsetc.s2 0x7F,0x40 // Turn off ftz in sf2
+ nop.i 0
}
;;
{ .mfi
- nop.m 999
- fcmp.eq.unc.s1 p6, p0 = exp_ftz_urm_f8, f0
- nop.i 999
+ nop.m 0
+ fcmp.eq.s1 p6, p7 = fFtz_urm_f8, f0 // Test for underflow
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s.s0 f8 = fP, fT, fT // Compute result, set I, maybe U
+ nop.i 0
}
;;
-{ .mfb
- nop.m 999
- nop.f 999
-(p6) br.cond.spnt L(EXP_CERTAIN_UNDERFLOW) // Branch if really underflow
+{ .mbb
+ nop.m 0
+(p6) br.cond.spnt EXP_UNDERFLOW_COMMON // Branch if really underflow
+(p7) br.ret.sptk b0 // Exit if really no underflow
+}
+;;
+
+EXP_CERTAIN_UNDERFLOW:
+// Here if x < fMAX_SGL_ZERO_ARG
+// Result will be zero (or smallest denorm if round to +inf) with I, U set
+{ .mmi
+ mov rTmp = 1
+;;
+ setf.exp fTmp = rTmp // Form small normal
+ nop.i 0
}
;;
{ .mfb
- nop.m 999
- fma.s f8 = exp_T, exp_P6, exp_A
- br.ret.sptk b0 // Exit if really no underflow
+ nop.m 0
+ fma.s.s0 f8 = fTmp, fTmp, f0 // Set I,U, tiny (+0.0) result
+ br.cond.sptk EXP_UNDERFLOW_COMMON
}
;;
-L(EXP_CERTAIN_UNDERFLOW):
+EXP_UNDERFLOW_COMMON:
+// Determine if underflow result is zero or nonzero
{ .mfi
- nop.m 999
- fmerge.s FR_X = f8,f8
- nop.i 999
+ alloc r32=ar.pfs,0,3,4,0
+ fcmp.eq.s1 p6, p0 = f8, f0
+ nop.i 0
}
+;;
+
{ .mfb
- mov GR_Parameter_TAG = 17
- fma.s FR_RESULT = exp_T, exp_P6, exp_A // Set I,U and tiny result
- br.cond.sptk __libm_error_region ;;
+ nop.m 0
+ fmerge.s FR_X = fNormX,fNormX
+(p6) br.cond.spnt EXP_UNDERFLOW_ZERO
}
+;;
-L(EXP_CERTAIN_UNDERFLOW_ZERO):
-{ .mmi
- mov exp_GR_one = 1 ;;
- setf.exp f9 = exp_GR_one
- nop.i 999 ;;
+EXP_UNDERFLOW_NONZERO:
+// Here if x < fMIN_SGL_NORM_ARG and result nonzero;
+// I, U are set
+{ .mfb
+ mov GR_Parameter_TAG = 17
+ nop.f 0 // FR_RESULT already set
+ br.cond.sptk __libm_error_region
}
+;;
-{ .mfi
- nop.m 999
- fmerge.s FR_X = f8,f8
- nop.i 999
-}
+EXP_UNDERFLOW_ZERO:
+// Here if x < fMIN_SGL_NORM_ARG and result zero;
+// I, U are set
{ .mfb
- mov GR_Parameter_TAG = 17
- fma.s FR_RESULT = f9, f9, f0 // Set I,U and tiny (+0.0) result
- br.cond.sptk __libm_error_region ;;
+ mov GR_Parameter_TAG = 17
+ nop.f 0 // FR_RESULT already set
+ br.cond.sptk __libm_error_region
}
+;;
-.endp expf
-ASM_SIZE_DIRECTIVE(expf)
-
+GLOBAL_IEEE754_END(expf)
-.proc __libm_error_region
-__libm_error_region:
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
- add GR_Parameter_Y=-32,sp // Parameter 2 value
- nop.f 999
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
- add sp=-64,sp // Create new stack
- nop.f 0
- mov GR_SAVE_GP=gp // Save gp
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
- stfs [GR_Parameter_Y] = FR_Y,16 // Store Parameter 2 on stack
- add GR_Parameter_X = 16,sp // Parameter 1 address
+ stfs [GR_Parameter_Y] = FR_Y,16 // Store Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0 // Save b0
+ mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mfi
- stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
- nop.f 0
- add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ nop.f 0
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
}
{ .mib
- stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
- add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support# // Call error handling function
+ stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
- nop.m 0
- nop.m 0
- add GR_Parameter_RESULT = 48,sp
+ add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
};;
{ .mmi
- ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
- add sp = 64,sp // Restore stack pointer
- mov b0 = GR_SAVE_B0 // Restore return address
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
- mov gp = GR_SAVE_GP // Restore gp
- mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
- br.ret.sptk b0 // Return
-};;
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
diff --git a/sysdeps/ia64/fpu/e_fmod.S b/sysdeps/ia64/fpu/e_fmod.S
index 2b3ee9610f..d801e0c128 100644
--- a/sysdeps/ia64/fpu/e_fmod.S
+++ b/sysdeps/ia64/fpu/e_fmod.S
@@ -1,11 +1,10 @@
.file "fmod.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
-// Bob Norin, Shane Story, and Ping Tak Peter Tang of the Computational
-// Software Lab, Intel Corporation.
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -21,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -36,38 +35,42 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//====================================================================
-// 2/02/00 Initial version
-// 3/02/00 New Algorithm
-// 4/04/00 Unwind support added
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 02/02/00 Initial version
+// 03/02/00 New Algorithm
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
-//11/28/00 Set FR_Y to f9
+// 11/28/00 Set FR_Y to f9
+// 03/11/02 Fixed flags for fmod(qnan,zero)
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 04/28/03 Fix: fmod(sNaN,0) no longer sets errno
//
// API
//====================================================================
-// double fmod(double,double);
+// double fmod(double,double);
//
// Overview of operation
//====================================================================
// fmod(a,b)=a-i*b,
-// where i is an integer such that, if b!=0,
+// where i is an integer such that, if b!=0,
// |i|<|a/b| and |a/b-i|<1
//
// Algorithm
//====================================================================
// a). if |a|<|b|, return a
-// b). get quotient and reciprocal overestimates accurate to
+// b). get quotient and reciprocal overestimates accurate to
// 33 bits (q2,y2)
// c). if the exponent difference (exponent(a)-exponent(b))
// is less than 32, truncate quotient to integer and
// finish in one iteration
// d). if exponent(a)-exponent(b)>=32 (q2>=2^32)
// round quotient estimate to single precision (k=RN(q2)),
-// calculate partial remainder (a'=a-k*b),
+// calculate partial remainder (a'=a-k*b),
// get quotient estimate (a'*y2), and repeat from c).
//
// Special cases
@@ -81,14 +84,9 @@
// General registers: r2,r29,r32 (ar.pfs), r33-r39
// Floating point registers: f6-f15
-#include "libm_support.h"
-
-.section .text
-
-
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
-GR_SAVE_GP = r35
+GR_SAVE_GP = r35
GR_SAVE_SP = r36
GR_Parameter_X = r37
@@ -101,17 +99,9 @@ FR_Y = f9
FR_RESULT = f8
-.proc fmod#
-.align 32
-.global fmod#
-.align 32
+.section .text
+GLOBAL_IEEE754_ENTRY(fmod)
-fmod:
-#ifdef _LIBC
-.global __ieee754_fmod
-.type __ieee754_fmod,@function
-__ieee754_fmod:
-#endif
// inputs in f8, f9
// result in f8
@@ -133,12 +123,12 @@ __ieee754_fmod:
// (1) y0
frcpa.s1 f10,p6=f6,f7
nop.i 0
-}
+}
// Y +-NAN, +-inf, +-0? p7
{ .mfi
nop.m 999
-(p0) fclass.m.unc p7,p0 = f9, 0xe7
+ fclass.m.unc p7,p0 = f9, 0xe7
nop.i 999;;
}
@@ -149,14 +139,14 @@ __ieee754_fmod:
{ .mfi
nop.m 999
-(p0) fclass.m.unc p9,p0 = f8, 0xe3
- nop.i 999
+ fclass.m.unc p9,p0 = f8, 0xe3
+ nop.i 999
}
// |x| < |y|? Return x p8
{ .mfi
nop.m 999
-(p0) fcmp.lt.unc.s1 p8,p0 = f6,f7
+ fcmp.lt.unc.s1 p8,p0 = f6,f7
nop.i 999 ;;
}
@@ -172,33 +162,33 @@ __ieee754_fmod:
// (2) q0=a*y0
(p6) fma.s1 f13=f6,f10,f0
nop.i 0
-}
+}
{ .mfi
nop.m 0
// (3) e0 = 1 - b * y0
(p6) fnma.s1 f12=f7,f10,f1
nop.i 0;;
-}
+}
{.mfi
nop.m 0
// normalize x (if |x|<|y|)
(p8) fma.d.s0 f8=f8,f1,f0
nop.i 0
-}
+}
{.bbb
- (p9) br.cond.spnt L(FMOD_X_NAN_INF)
- (p7) br.cond.spnt L(FMOD_Y_NAN_INF_ZERO)
+ (p9) br.cond.spnt FMOD_X_NAN_INF
+ (p7) br.cond.spnt FMOD_Y_NAN_INF_ZERO
// if |x|<|y|, return
(p8) br.ret.spnt b0;;
}
- {.mfi
+ {.mfi
nop.m 0
// normalize x
fma.s0 f6=f6,f1,f0
nop.i 0
-}
+}
{.mfi
nop.m 0
// normalize y
@@ -212,45 +202,45 @@ __ieee754_fmod:
// (4) q1=q0+e0*q0
(p6) fma.s1 f13=f12,f13,f13
nop.i 0
-}
+}
{ .mfi
nop.m 0
// (5) e1 = e0 * e0 + 2^-34
(p6) fma.s1 f14=f12,f12,f11
nop.i 0;;
-}
+}
{.mlx
nop.m 0
movl r2=0x33a00000;;
-}
+}
{ .mfi
nop.m 0
// (6) y1 = y0 + e0 * y0
(p6) fma.s1 f10=f12,f10,f10
nop.i 0;;
-}
+}
{.mfi
// set f12=1.25*2^{-24}
setf.s f12=r2
// (7) q2=q1+e1*q1
(p6) fma.s1 f13=f13,f14,f13
nop.i 0;;
-}
+}
{.mfi
nop.m 0
fmerge.s f9=f8,f9
nop.i 0
-}
+}
{ .mfi
nop.m 0
// (8) y2 = y1 + e1 * y1
(p6) fma.s1 f10=f14,f10,f10
// set p6=0, p10=0
cmp.ne.and p6,p10=r0,r0;;
-}
+}
.align 32
-L(loop53):
+loop53:
{.mfi
nop.m 0
// compare q2, 2^32
@@ -280,7 +270,7 @@ L(loop53):
// normalize truncated quotient
(p8) fcvt.xf f13=f11
nop.i 0;;
-}
+}
{ .mfi
nop.m 0
// calculate remainder (assuming f13=RZ(Q))
@@ -289,7 +279,7 @@ L(loop53):
}
{.mfi
nop.m 0
- // also if exponent>32, round quotient to single precision
+ // also if exponent>32, round quotient to single precision
// and subtract 1 ulp: q=q-q*(1.25*2^{-24})
(p7) fnma.s.s1 f11=f13,f12,f13
nop.i 0;;
@@ -332,7 +322,7 @@ L(loop53):
.pred.rel "mutex",p6,p10
{.mfb
nop.m 0
- // add b to estimated remainder (to cover the case when the quotient was overestimated)
+ // add b to estimated remainder (to cover the case when the quotient was overestimated)
// also set correct sign by using f9=|b|*sgn(a), f12=sgn(a)
(p6) fma.d.s0 f8=f11,f12,f9
nop.b 0
@@ -354,97 +344,114 @@ L(loop53):
nop.m 0
// if f14 was RZ(Q), set remainder to f14
(p9) mov f6=f14
- br.cond.sptk L(loop53);;
+ br.cond.sptk loop53;;
}
-L(FMOD_X_NAN_INF):
+FMOD_X_NAN_INF:
// Y zero ?
-{.mfi
+{.mfi
+ nop.m 0
+ fclass.m p10,p0=f8,0xc3 // Test x=nan
+ nop.i 0
+}
+{.mfi
nop.m 0
fma.s1 f10=f9,f1,f0
nop.i 0;;
}
+
{.mfi
+ nop.m 0
+ fma.s0 f8=f8,f1,f0
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+(p10) fclass.m p10,p0=f9,0x07 // Test x=nan, and y=zero
+ nop.i 0;;
+}
+
+{.mfb
nop.m 0
fcmp.eq.unc.s1 p11,p0=f10,f0
- nop.i 0;;
+(p10) br.ret.spnt b0;; // Exit with result=x if x=nan and y=zero
}
{.mib
nop.m 0
nop.i 0
// if Y zero
- (p11) br.cond.spnt L(FMOD_Y_ZERO);;
+ (p11) br.cond.spnt FMOD_Y_ZERO;;
}
// X infinity? Return QNAN indefinite
{ .mfi
nop.m 999
-(p0) fclass.m.unc p8,p9 = f8, 0x23
- nop.i 999;;
+ fclass.m.unc p8,p9 = f8, 0x23
+ nop.i 999;;
}
// Y NaN ?
{.mfi
- nop.m 999
+ nop.m 999
(p8) fclass.m p9,p8=f9,0xc3
- nop.i 0;;
+ nop.i 0;;
}
{.mfi
- nop.m 999
-(p8) frcpa.s0 f8,p0 = f8,f8
+ nop.m 999
+(p8) frcpa.s0 f8,p0 = f8,f8
nop.i 0
-}
+}
{ .mfi
nop.m 999
- // also set Denormal flag if necessary
+ // also set Denormal flag if necessary
(p8) fma.s0 f9=f9,f1,f0
nop.i 999 ;;
}
{ .mfb
nop.m 999
-(p8) fma.d f8=f8,f1,f0
- nop.b 999 ;;
+(p8) fma.d.s0 f8=f8,f1,f0
+ nop.b 999 ;;
}
{ .mfb
nop.m 999
-(p9) frcpa.s0 f8,p7=f8,f9
- br.ret.sptk b0 ;;
+(p9) frcpa.s0 f8,p7=f8,f9
+ br.ret.sptk b0 ;;
}
-L(FMOD_Y_NAN_INF_ZERO):
+FMOD_Y_NAN_INF_ZERO:
// Y INF
{ .mfi
nop.m 999
-(p0) fclass.m.unc p7,p0 = f9, 0x23
+ fclass.m.unc p7,p0 = f9, 0x23
nop.i 999 ;;
}
{ .mfb
nop.m 999
-(p7) fma.d f8=f8,f1,f0
-(p7) br.ret.spnt b0 ;;
+(p7) fma.d.s0 f8=f8,f1,f0
+(p7) br.ret.spnt b0 ;;
}
// Y NAN?
{ .mfi
nop.m 999
-(p0) fclass.m.unc p9,p0 = f9, 0xc3
+ fclass.m.unc p9,p0 = f9, 0xc3
nop.i 999 ;;
}
{ .mfb
nop.m 999
-(p9) fma.d f8=f9,f1,f0
-(p9) br.ret.spnt b0 ;;
+(p9) fma.d.s0 f8=f9,f1,f0
+(p9) br.ret.spnt b0 ;;
}
-L(FMOD_Y_ZERO):
+FMOD_Y_ZERO:
// Y zero? Must be zero at this point
// because it is the only choice left.
// Return QNAN indefinite
@@ -452,60 +459,56 @@ L(FMOD_Y_ZERO):
{.mfi
nop.m 0
// set Invalid
- frcpa f12,p0=f0,f0
+ frcpa.s0 f12,p0=f0,f0
nop.i 0
}
// X NAN?
{ .mfi
nop.m 999
-(p0) fclass.m.unc p9,p10 = f8, 0xc3
+ fclass.m.unc p9,p10 = f8, 0xc3
nop.i 999 ;;
}
{ .mfi
nop.m 999
-(p10) fclass.nm p9,p10 = f8, 0xff
+(p10) fclass.nm p9,p10 = f8, 0xff
nop.i 999 ;;
}
{.mfi
nop.m 999
- (p9) frcpa f11,p7=f8,f0
+ (p9) frcpa.s0 f11,p7=f8,f0
nop.i 0;;
}
{ .mfi
nop.m 999
-(p10) frcpa f11,p7 = f9,f9
-(p0) mov GR_Parameter_TAG = 121 ;;
+(p10) frcpa.s0 f11,p7 = f9,f9
+ mov GR_Parameter_TAG = 121 ;;
}
{ .mfi
nop.m 999
-(p0) fmerge.s f10 = f8, f8
+ fmerge.s f10 = f8, f8
nop.i 999
}
{ .mfb
nop.m 999
-(p0) fma.d f8=f11,f1,f0
-(p0) br.sptk __libm_error_region;;
+ fma.d.s0 f8=f11,f1,f0
+ br.sptk __libm_error_region;;
}
-.endp fmod
-ASM_SIZE_DIRECTIVE(fmod)
-ASM_SIZE_DIRECTIVE(__ieee754_fmod)
-
-.proc __libm_error_region
-__libm_error_region:
+GLOBAL_IEEE754_END(fmod)
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
-.fframe 64
+.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
@@ -513,18 +516,18 @@ __libm_error_region:
{ .mmi
stfd [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
-.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0 // Save b0
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
- stfd [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
- add GR_Parameter_RESULT = 0,GR_Parameter_Y
- nop.b 0 // Parameter 3 address
+ stfd [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
}
{ .mib
stfd [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
- add GR_Parameter_Y = -16,GR_Parameter_Y
+ add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
@@ -539,13 +542,17 @@ __libm_error_region:
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
- mov gp = GR_SAVE_GP // Restore gp
+ mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
-};;
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#
+
+
+
+
diff --git a/sysdeps/ia64/fpu/e_fmodf.S b/sysdeps/ia64/fpu/e_fmodf.S
index 5b6390eeec..fe1ec0304d 100644
--- a/sysdeps/ia64/fpu/e_fmodf.S
+++ b/sysdeps/ia64/fpu/e_fmodf.S
@@ -1,10 +1,10 @@
.file "fmodf.s"
-// Copyright (c) 2000, 2001, Intel Corporation
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
-// Bob Norin, Shane Story, and Ping Tak Peter Tang of the Computational
-// Software Lab, Intel Corporation.
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,9 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// WARRANTY DISCLAIMER
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -37,38 +35,42 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//====================================================================
-// 2/02/00 Initial version
-// 3/02/00 New Algorithm
-// 4/04/00 Unwind support added
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 02/02/00 Initial version
+// 03/02/00 New Algorithm
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
-//11/28/00 Set FR_Y to f9
+// 11/28/00 Set FR_Y to f9
+// 03/11/02 Fixed flags for fmodf(qnan,zero)
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 04/28/03 Fix: fmod(sNaN,0) no longer sets errno
//
// API
//====================================================================
-// float fmodf(float,float);
+// float fmodf(float,float);
//
// Overview of operation
//====================================================================
// fmod(a,b)=a-i*b,
-// where i is an integer such that, if b!=0,
+// where i is an integer such that, if b!=0,
// |i|<|a/b| and |a/b-i|<1
// Algorithm
//====================================================================
// a). if |a|<|b|, return a
-// b). get quotient and reciprocal overestimates accurate to
+// b). get quotient and reciprocal overestimates accurate to
// 33 bits (q2,y2)
// c). if the exponent difference (exponent(a)-exponent(b))
// is less than 32, truncate quotient to integer and
// finish in one iteration
// d). if exponent(a)-exponent(b)>=32 (q2>=2^32)
// round quotient estimate to single precision (k=RN(q2)),
-// calculate partial remainder (a'=a-k*b),
+// calculate partial remainder (a'=a-k*b),
// get quotient estimate (a'*y2), and repeat from c).
// Special cases
@@ -82,13 +84,9 @@
// General registers: r2,r29,r32 (ar.pfs), r33-r39
// Floating point registers: f6-f15
-#include "libm_support.h"
-
-.section .text
-
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
-GR_SAVE_GP = r35
+GR_SAVE_GP = r35
GR_SAVE_SP = r36
GR_Parameter_X = r37
@@ -101,18 +99,9 @@ FR_Y = f9
FR_RESULT = f8
+.section .text
+GLOBAL_IEEE754_ENTRY(fmodf)
-.proc fmodf#
-.align 32
-.global fmodf#
-.align 32
-
-fmodf:
-#ifdef _LIBC
-.global __ieee754_fmodf
-.type __ieee754_fmodf,@function
-__ieee754_fmodf:
-#endif
// inputs in f8, f9
// result in f8
@@ -134,13 +123,13 @@ __ieee754_fmodf:
// (1) y0
frcpa.s1 f10,p6=f6,f7
nop.i 0
-}
+}
// eliminate special cases
// Y +-NAN, +-inf, +-0? p7
{ .mfi
nop.m 999
-(p0) fclass.m.unc p7,p0 = f9, 0xe7
+ fclass.m.unc p7,p0 = f9, 0xe7
nop.i 999;;
}
@@ -151,14 +140,14 @@ __ieee754_fmodf:
{ .mfi
nop.m 999
-(p0) fclass.m.unc p9,p0 = f8, 0xe3
- nop.i 999
+ fclass.m.unc p9,p0 = f8, 0xe3
+ nop.i 999
}
// |x| < |y|? Return x p8
{ .mfi
nop.m 999
-(p0) fcmp.lt.unc.s1 p8,p0 = f6,f7
+ fcmp.lt.unc.s1 p8,p0 = f6,f7
nop.i 999 ;;
}
@@ -174,33 +163,33 @@ __ieee754_fmodf:
// (2) q0=a*y0
(p6) fma.s1 f13=f6,f10,f0
nop.i 0
-}
+}
{ .mfi
nop.m 0
// (3) e0 = 1 - b * y0
(p6) fnma.s1 f12=f7,f10,f1
nop.i 0;;
-}
+}
{.mfi
nop.m 0
// normalize x (if |x|<|y|)
(p8) fma.s.s0 f8=f8,f1,f0
nop.i 0
-}
+}
{.bbb
- (p9) br.cond.spnt L(FMOD_X_NAN_INF)
- (p7) br.cond.spnt L(FMOD_Y_NAN_INF_ZERO)
+ (p9) br.cond.spnt FMOD_X_NAN_INF
+ (p7) br.cond.spnt FMOD_Y_NAN_INF_ZERO
// if |x|<|y|, return
(p8) br.ret.spnt b0;;
}
- {.mfi
+ {.mfi
nop.m 0
// normalize x
fma.s0 f6=f6,f1,f0
nop.i 0
-}
+}
{.mfi
nop.m 0
// normalize y
@@ -215,45 +204,45 @@ __ieee754_fmodf:
// (4) q1=q0+e0*q0
(p6) fma.s1 f13=f12,f13,f13
nop.i 0
-}
+}
{ .mfi
nop.m 0
// (5) e1 = e0 * e0 + 2^-34
(p6) fma.s1 f14=f12,f12,f11
nop.i 0;;
-}
+}
{.mlx
nop.m 0
movl r2=0x33a00000;;
-}
+}
{ .mfi
nop.m 0
// (6) y1 = y0 + e0 * y0
(p6) fma.s1 f10=f12,f10,f10
nop.i 0;;
-}
+}
{.mfi
// set f12=1.25*2^{-24}
setf.s f12=r2
// (7) q2=q1+e1*q1
(p6) fma.s1 f13=f13,f14,f13
nop.i 0;;
-}
+}
{.mfi
nop.m 0
fmerge.s f9=f8,f9
nop.i 0
-}
+}
{ .mfi
nop.m 0
// (8) y2 = y1 + e1 * y1
(p6) fma.s1 f10=f14,f10,f10
// set p6=0, p10=0
cmp.ne.and p6,p10=r0,r0;;
-}
+}
.align 32
-L(loop24):
+loop24:
{.mfi
nop.m 0
// compare q2, 2^32
@@ -283,7 +272,7 @@ L(loop24):
// normalize truncated quotient
(p8) fcvt.xf f13=f11
nop.i 0;;
-}
+}
{ .mfi
nop.m 0
// calculate remainder (assuming f13=RZ(Q))
@@ -292,7 +281,7 @@ L(loop24):
}
{.mfi
nop.m 0
- // also if exponent>32, round quotient to single precision
+ // also if exponent>32, round quotient to single precision
// and subtract 1 ulp: q=q-q*(1.25*2^{-24})
(p7) fnma.s.s1 f11=f13,f12,f13
nop.i 0;;
@@ -335,7 +324,7 @@ L(loop24):
.pred.rel "mutex",p6,p10
{.mfb
nop.m 0
- // add b to estimated remainder (to cover the case when the quotient was overestimated)
+ // add b to estimated remainder (to cover the case when the quotient was overestimated)
// also set correct sign by using f9=|b|*sgn(a), f12=sgn(a)
(p6) fma.s.s0 f8=f11,f12,f9
nop.b 0
@@ -357,102 +346,118 @@ L(loop24):
nop.m 0
// if f14 was RZ(Q), set remainder to f14
(p9) mov f6=f14
- br.cond.sptk L(loop24);;
+ br.cond.sptk loop24;;
}
{ .mmb
- nop.m 0
- nop.m 0
- br.ret.sptk b0;;
+ nop.m 0
+ nop.m 0
+ br.ret.sptk b0;;
}
-L(FMOD_X_NAN_INF):
+FMOD_X_NAN_INF:
// Y zero ?
-{.mfi
+{.mfi
+ nop.m 0
+ fclass.m p10,p0=f8,0xc3 // Test x=nan
+ nop.i 0
+}
+{.mfi
nop.m 0
fma.s1 f10=f9,f1,f0
nop.i 0;;
}
+
{.mfi
+ nop.m 0
+ fma.s0 f8=f8,f1,f0
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+(p10) fclass.m p10,p0=f9,0x07 // Test x=nan, and y=zero
+ nop.i 0;;
+}
+{.mfb
nop.m 0
fcmp.eq.unc.s1 p11,p0=f10,f0
- nop.i 0;;
+(p10) br.ret.spnt b0;; // Exit with result=x if x=nan and y=zero
}
{.mib
nop.m 0
nop.i 0
// if Y zero
- (p11) br.cond.spnt L(FMOD_Y_ZERO);;
+ (p11) br.cond.spnt FMOD_Y_ZERO;;
}
// X infinity? Return QNAN indefinite
{ .mfi
nop.m 999
-(p0) fclass.m.unc p8,p9 = f8, 0x23
- nop.i 999;;
+ fclass.m.unc p8,p9 = f8, 0x23
+ nop.i 999;;
}
// Y NaN ?
{.mfi
- nop.m 999
+ nop.m 999
(p8) fclass.m p9,p8=f9,0xc3
- nop.i 0;;
+ nop.i 0;;
}
{.mfi
- nop.m 999
-(p8) frcpa.s0 f8,p0 = f8,f8
+ nop.m 999
+(p8) frcpa.s0 f8,p0 = f8,f8
nop.i 0
-}
+}
{ .mfi
nop.m 999
- // also set Denormal flag if necessary
+ // also set Denormal flag if necessary
(p8) fma.s0 f9=f9,f1,f0
nop.i 999 ;;
}
{ .mfb
nop.m 999
-(p8) fma.s f8=f8,f1,f0
- nop.b 999 ;;
+(p8) fma.s.s0 f8=f8,f1,f0
+ nop.b 999 ;;
}
{ .mfb
nop.m 999
-(p9) frcpa.s0 f8,p7=f8,f9
- br.ret.sptk b0 ;;
+(p9) frcpa.s0 f8,p7=f8,f9
+ br.ret.sptk b0 ;;
}
-L(FMOD_Y_NAN_INF_ZERO):
+FMOD_Y_NAN_INF_ZERO:
// Y INF
{ .mfi
nop.m 999
-(p0) fclass.m.unc p7,p0 = f9, 0x23
+ fclass.m.unc p7,p0 = f9, 0x23
nop.i 999 ;;
}
{ .mfb
nop.m 999
-(p7) fma.s f8=f8,f1,f0
-(p7) br.ret.spnt b0 ;;
+(p7) fma.s.s0 f8=f8,f1,f0
+(p7) br.ret.spnt b0 ;;
}
// Y NAN?
{ .mfi
nop.m 999
-(p0) fclass.m.unc p9,p0 = f9, 0xc3
+ fclass.m.unc p9,p0 = f9, 0xc3
nop.i 999 ;;
}
{ .mfb
nop.m 999
-(p9) fma.s f8=f9,f1,f0
-(p9) br.ret.spnt b0 ;;
+(p9) fma.s.s0 f8=f9,f1,f0
+(p9) br.ret.spnt b0 ;;
}
-L(FMOD_Y_ZERO):
+FMOD_Y_ZERO:
// Y zero? Must be zero at this point
// because it is the only choice left.
// Return QNAN indefinite
@@ -460,69 +465,65 @@ L(FMOD_Y_ZERO):
{.mfi
nop.m 0
// set Invalid
- frcpa f12,p0=f0,f0
+ frcpa.s0 f12,p0=f0,f0
nop.i 999
}
// X NAN?
{ .mfi
nop.m 999
-(p0) fclass.m.unc p9,p10 = f8, 0xc3
+ fclass.m.unc p9,p10 = f8, 0xc3
nop.i 999 ;;
}
{ .mfi
nop.m 999
-(p10) fclass.nm p9,p10 = f8, 0xff
+(p10) fclass.nm p9,p10 = f8, 0xff
nop.i 999 ;;
}
{.mfi
nop.m 999
- (p9) frcpa f11,p7=f8,f0
+ (p9) frcpa.s0 f11,p7=f8,f0
nop.i 0;;
}
{ .mfi
nop.m 999
-(p10) frcpa f11,p7 = f0,f0
+(p10) frcpa.s0 f11,p7 = f0,f0
nop.i 999;;
}
{ .mfi
nop.m 999
-(p0) fmerge.s f10 = f8, f8
+ fmerge.s f10 = f8, f8
nop.i 999
}
{ .mfi
nop.m 999
-(p0) fma.s f8=f11,f1,f0
+ fma.s.s0 f8=f11,f1,f0
nop.i 999;;
}
-L(EXP_ERROR_RETURN):
+EXP_ERROR_RETURN:
{ .mib
nop.m 0
-(p0) mov GR_Parameter_TAG=122
-(p0) br.sptk __libm_error_region;;
+ mov GR_Parameter_TAG=122
+ br.sptk __libm_error_region;;
}
-.endp fmodf
-ASM_SIZE_DIRECTIVE(fmodf)
-ASM_SIZE_DIRECTIVE(__ieee754_fmodf)
-
-.proc __libm_error_region
-__libm_error_region:
+GLOBAL_IEEE754_END(fmodf)
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
-.fframe 64
+.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
@@ -530,18 +531,18 @@ __libm_error_region:
{ .mmi
stfs [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
-.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0 // Save b0
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
- stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
- add GR_Parameter_RESULT = 0,GR_Parameter_Y
- nop.b 0 // Parameter 3 address
+ stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
}
{ .mib
stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
- add GR_Parameter_Y = -16,GR_Parameter_Y
+ add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support#;; // Call error handling function
}
{ .mmi
@@ -556,13 +557,14 @@ __libm_error_region:
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
- mov gp = GR_SAVE_GP // Restore gp
+ mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
-};;
+};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#
+
+
diff --git a/sysdeps/ia64/fpu/e_fmodl.S b/sysdeps/ia64/fpu/e_fmodl.S
index 85c9f6ef82..da08ae3f5c 100644
--- a/sysdeps/ia64/fpu/e_fmodl.S
+++ b/sysdeps/ia64/fpu/e_fmodl.S
@@ -1,11 +1,10 @@
.file "fmodl.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
-// Bob Norin, Shane Story, and Ping Tak Peter Tang of the Computational
-// Software Lab, Intel Corporation.
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -21,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -36,38 +35,42 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//====================================================================
-// 2/02/00 Initial version
-// 3/02/00 New Algorithm
-// 4/04/00 Unwind support added
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 02/02/00 Initial version
+// 03/02/00 New Algorithm
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
-//11/28/00 Set FR_Y to f9
+// 11/28/00 Set FR_Y to f9
+// 03/11/02 Fixed flags for fmodl(qnan,zero)
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 04/28/03 Fix: fmod(sNaN,0) no longer sets errno
//
// API
//====================================================================
-// long double fmodl(long double,long double);
+// long double fmodl(long double,long double);
//
// Overview of operation
//====================================================================
// fmod(a,b)=a-i*b,
-// where i is an integer such that, if b!=0,
+// where i is an integer such that, if b!=0,
// |i|<|a/b| and |a/b-i|<1
//
// Algorithm
//====================================================================
// a). if |a|<|b|, return a
-// b). get quotient and reciprocal overestimates accurate to
+// b). get quotient and reciprocal overestimates accurate to
// 33 bits (q2,y2)
// c). if the exponent difference (exponent(a)-exponent(b))
// is less than 32, truncate quotient to integer and
// finish in one iteration
// d). if exponent(a)-exponent(b)>=32 (q2>=2^32)
// round quotient estimate to single precision (k=RN(q2)),
-// calculate partial remainder (a'=a-k*b),
+// calculate partial remainder (a'=a-k*b),
// get quotient estimate (a'*y2), and repeat from c).
//
// Registers used
@@ -76,13 +79,9 @@
// General registers: r2,r29,r32 (ar.pfs), r33-r39
// Floating point registers: f6-f15
-#include "libm_support.h"
-
-.section .text
-
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
-GR_SAVE_GP = r35
+GR_SAVE_GP = r35
GR_SAVE_SP = r36
GR_Parameter_X = r37
@@ -95,18 +94,9 @@ FR_Y = f9
FR_RESULT = f8
+.section .text
+GLOBAL_IEEE754_ENTRY(fmodl)
-.proc fmodl#
-.align 32
-.global fmodl#
-.align 32
-
-fmodl:
-#ifdef _LIBC
-.global __ieee754_fmodl
-.type __ieee754_fmodl,@function
-__ieee754_fmodl:
-#endif
// inputs in f8, f9
// result in f8
@@ -128,7 +118,7 @@ __ieee754_fmodl:
// (1) y0
frcpa.s1 f10,p6=f6,f7
nop.i 0;;
-}
+}
// eliminate special cases
{.mmi
@@ -141,7 +131,7 @@ cmp.eq p7,p10=r29,r0;;
// Y +-NAN, +-inf, +-0? p7
{ .mfi
nop.m 999
-(p10) fclass.m p7,p10 = f9, 0xe7
+(p10) fclass.m p7,p10 = f9, 0xe7
nop.i 999;;
}
@@ -152,14 +142,14 @@ cmp.eq p7,p10=r29,r0;;
{ .mfi
nop.m 999
-(p0) fclass.m.unc p9,p11 = f8, 0xe3
- nop.i 999
+ fclass.m.unc p9,p11 = f8, 0xe3
+ nop.i 999
}
// |x| < |y|? Return x p8
{ .mfi
nop.m 999
-(p10) fcmp.lt.unc.s1 p8,p0 = f6,f7
+(p10) fcmp.lt.unc.s1 p8,p0 = f6,f7
nop.i 999 ;;
}
@@ -173,13 +163,13 @@ cmp.eq p7,p10=r29,r0;;
// (3) e0 = 1 - b * y0
(p6) fnma.s1 f12=f7,f10,f1
nop.i 0;;
-}
+}
// Y +-NAN, +-inf, +-0? p7
{ .mfi
nop.m 999
- // pseudo-NaN ?
-(p10) fclass.nm p7,p0 = f9, 0xff
+ // pseudo-NaN ?
+(p10) fclass.nm p7,p0 = f9, 0xff
nop.i 999
}
@@ -190,7 +180,7 @@ cmp.eq p7,p10=r29,r0;;
{ .mfi
nop.m 999
-(p11) fclass.nm p9,p0 = f8, 0xff
+(p11) fclass.nm p9,p0 = f8, 0xff
nop.i 999;;
}
@@ -209,18 +199,18 @@ cmp.eq p7,p10=r29,r0;;
nop.i 0
}
{.bbb
- (p9) br.cond.spnt L(FMOD_X_NAN_INF)
- (p7) br.cond.spnt L(FMOD_Y_NAN_INF_ZERO)
+ (p9) br.cond.spnt FMOD_X_NAN_INF
+ (p7) br.cond.spnt FMOD_Y_NAN_INF_ZERO
// if |x|<|y|, return
(p8) br.ret.spnt b0;;
}
- {.mfi
+ {.mfi
nop.m 0
// x denormal ? set D flag
fnma.s0 f32=f6,f1,f6
nop.i 0
-}
+}
{.mfi
nop.m 0
// y denormal ? set D flag
@@ -234,46 +224,46 @@ cmp.eq p7,p10=r29,r0;;
// (4) q1=q0+e0*q0
(p6) fma.s1 f13=f12,f13,f13
nop.i 0
-}
+}
{ .mfi
nop.m 0
// (5) e1 = e0 * e0 + 2^-34
(p6) fma.s1 f14=f12,f12,f11
nop.i 0;;
-}
+}
{.mlx
nop.m 0
movl r2=0x33a00000;;
-}
+}
{ .mfi
nop.m 0
// (6) y1 = y0 + e0 * y0
(p6) fma.s1 f10=f12,f10,f10
nop.i 0;;
-}
+}
{.mfi
// set f12=1.25*2^{-24}
setf.s f12=r2
// (7) q2=q1+e1*q1
(p6) fma.s1 f13=f13,f14,f13
nop.i 0;;
-}
+}
{.mfi
nop.m 0
fmerge.s f9=f8,f9
nop.i 0
-}
+}
{ .mfi
nop.m 0
// (8) y2 = y1 + e1 * y1
(p6) fma.s1 f10=f14,f10,f10
// set p6=0, p10=0
cmp.ne.and p6,p10=r0,r0;;
-}
+}
.align 32
-L(loop64):
+loop64:
{.mfi
nop.m 0
// compare q2, 2^32
@@ -305,7 +295,7 @@ L(loop64):
// normalize truncated quotient
(p8) fcvt.xf f13=f11
nop.i 0;;
-}
+}
{ .mfi
nop.m 0
// calculate remainder (assuming f13=RZ(Q))
@@ -314,7 +304,7 @@ L(loop64):
}
{.mfi
nop.m 0
- // also if exponent>32, round quotient to single precision
+ // also if exponent>32, round quotient to single precision
// and subtract 1 ulp: q=q-q*(1.25*2^{-24})
(p7) fnma.s.s1 f11=f13,f12,f13
nop.i 0;;
@@ -357,7 +347,7 @@ L(loop64):
.pred.rel "mutex",p6,p10
{.mfb
nop.m 0
- // add b to estimated remainder (to cover the case when the quotient was overestimated)
+ // add b to estimated remainder (to cover the case when the quotient was overestimated)
// also set correct sign by using f9=|b|*sgn(a), f12=sgn(a)
(p6) fma.s0 f8=f11,f12,f9
nop.b 0
@@ -378,43 +368,59 @@ L(loop64):
nop.m 0
// if f14 was RZ(Q), set remainder to f14
(p9) mov f6=f14
- br.cond.sptk L(loop64);;
+ br.cond.sptk loop64;;
}
-L(FMOD_X_NAN_INF):
+FMOD_X_NAN_INF:
// Y zero ?
-{.mfi
+{.mfi
+ nop.m 0
+ fclass.m p10,p0=f8,0xc3 // Test x=nan
+ nop.i 0
+}
+{.mfi
nop.m 0
fma.s1 f10=f9,f1,f0
nop.i 0;;
}
+
+{.mfi
+ nop.m 0
+ fma.s0 f8=f8,f1,f0
+ nop.i 0
+}
{.mfi
+ nop.m 0
+(p10) fclass.m p10,p0=f9,0x07 // Test x=nan, and y=zero
+ nop.i 0;;
+}
+{.mfb
nop.m 0
fcmp.eq.unc.s1 p11,p0=f10,f0
- nop.i 0;;
+(p10) br.ret.spnt b0;; // Exit with result=x if x=nan and y=zero
}
{.mib
nop.m 0
nop.i 0
// if Y zero
- (p11) br.cond.spnt L(FMOD_Y_ZERO);;
+ (p11) br.cond.spnt FMOD_Y_ZERO;;
}
// X infinity? Return QNAN indefinite
{ .mfi
- // set p7 t0 0
- cmp.ne p7,p0=r0,r0
-(p0) fclass.m.unc p8,p9 = f8, 0x23
- nop.i 999;;
+ // set p7 t0 0
+ cmp.ne p7,p0=r0,r0
+ fclass.m.unc p8,p9 = f8, 0x23
+ nop.i 999;;
}
// Y NaN ?
{.mfi
nop.m 999
(p8) fclass.m p9,p8=f9,0xc3
- nop.i 0;;
+ nop.i 0;;
}
// Y not pseudo-zero ? (r29 holds significand)
{.mii
@@ -423,63 +429,63 @@ L(FMOD_X_NAN_INF):
nop.i 0;;
}
{.mfi
- nop.m 999
-(p8) frcpa.s0 f8,p0 = f8,f8
+ nop.m 999
+(p8) frcpa.s0 f8,p0 = f8,f8
nop.i 0
-}
+}
{ .mfi
nop.m 999
- // also set Denormal flag if necessary
+ // also set Denormal flag if necessary
(p7) fnma.s0 f9=f9,f1,f9
nop.i 999 ;;
}
{ .mfb
nop.m 999
-(p8) fma.s0 f8=f8,f1,f0
- nop.b 999 ;;
+(p8) fma.s0 f8=f8,f1,f0
+ nop.b 999 ;;
}
{ .mfb
nop.m 999
-(p9) frcpa.s0 f8,p7=f8,f9
- br.ret.sptk b0 ;;
+(p9) frcpa.s0 f8,p7=f8,f9
+ br.ret.sptk b0 ;;
}
-L(FMOD_Y_NAN_INF_ZERO):
+FMOD_Y_NAN_INF_ZERO:
// Y INF
{ .mfi
nop.m 999
-(p0) fclass.m.unc p7,p0 = f9, 0x23
+ fclass.m.unc p7,p0 = f9, 0x23
nop.i 999 ;;
}
{ .mfb
nop.m 999
-(p7) fma f8=f8,f1,f0
-(p7) br.ret.spnt b0 ;;
+(p7) fma.s0 f8=f8,f1,f0
+(p7) br.ret.spnt b0 ;;
}
// Y NAN?
{ .mfi
nop.m 999
-(p0) fclass.m.unc p9,p10 = f9, 0xc3
+ fclass.m.unc p9,p10 = f9, 0xc3
nop.i 999 ;;
}
{ .mfi
nop.m 999
-(p10) fclass.nm p9,p0 = f9, 0xff
+(p10) fclass.nm p9,p0 = f9, 0xff
nop.i 999 ;;
}
{ .mfb
nop.m 999
-(p9) fma f8=f9,f1,f0
-(p9) br.ret.spnt b0 ;;
+(p9) fma.s0 f8=f9,f1,f0
+(p9) br.ret.spnt b0 ;;
}
-L(FMOD_Y_ZERO):
+FMOD_Y_ZERO:
// Y zero? Must be zero at this point
// because it is the only choice left.
// Return QNAN indefinite
@@ -487,62 +493,59 @@ L(FMOD_Y_ZERO):
{.mfi
nop.m 0
// set Invalid
- frcpa f12,p0=f0,f0
+ frcpa.s0 f12,p0=f0,f0
nop.i 0
}
// X NAN?
{ .mfi
nop.m 999
-(p0) fclass.m.unc p9,p10 = f8, 0xc3
+ fclass.m.unc p9,p10 = f8, 0xc3
nop.i 999 ;;
}
{ .mfi
nop.m 999
-(p10) fclass.nm p9,p10 = f8, 0xff
+(p10) fclass.nm p9,p10 = f8, 0xff
nop.i 999 ;;
}
{.mfi
nop.m 999
- (p9) frcpa f11,p7=f8,f0
+ (p9) frcpa.s0 f11,p7=f8,f0
nop.i 0;;
}
{ .mfi
nop.m 999
-(p10) frcpa f11,p7 = f9,f9
-(p0) mov GR_Parameter_TAG = 120 ;;
+(p10) frcpa.s0 f11,p7 = f9,f9
+ mov GR_Parameter_TAG = 120 ;;
}
{ .mfi
nop.m 999
-(p0) fmerge.s f10 = f8, f8
+ fmerge.s f10 = f8, f8
nop.i 999
}
{ .mfb
nop.m 999
-(p0) fma f8=f11,f1,f0
-(p0) br.sptk __libm_error_region;;
+ fma.s0 f8=f11,f1,f0
+ br.sptk __libm_error_region;;
}
-.endp fmodl
-ASM_SIZE_DIRECTIVE(fmodl)
-ASM_SIZE_DIRECTIVE(__ieee754_fmodl)
+GLOBAL_IEEE754_END(fmodl)
-.proc __libm_error_region
-__libm_error_region:
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
-.fframe 64
+.fframe 64
add sp=-64,sp // Create new stack
nop.f 0
mov GR_SAVE_GP=gp // Save gp
@@ -550,18 +553,18 @@ __libm_error_region:
{ .mmi
stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
-.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0 // Save b0
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
- stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
- add GR_Parameter_RESULT = 0,GR_Parameter_Y
- nop.b 0 // Parameter 3 address
+ stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
}
{ .mib
stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
- add GR_Parameter_Y = -16,GR_Parameter_Y
+ add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
@@ -576,15 +579,17 @@ __libm_error_region:
mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
- mov gp = GR_SAVE_GP // Restore gp
+ mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
-};;
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#
+
+
diff --git a/sysdeps/ia64/fpu/e_hypot.S b/sysdeps/ia64/fpu/e_hypot.S
index 113aac3461..885c819326 100644
--- a/sysdeps/ia64/fpu/e_hypot.S
+++ b/sysdeps/ia64/fpu/e_hypot.S
@@ -1,11 +1,10 @@
-.file "hypot.asm"
+.file "hypot.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
-// Bob Norin, Shane Story, and Ping Tak Peter Tang of the
-// Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -21,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -36,24 +35,27 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
-// *********************************************************************
+//*********************************************************************
//
// History:
-// 2/02/00 hand-optimized
-// 4/04/00 Unwind support added
-// 6/20/00 new version
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 02/02/00 hand-optimized
+// 04/04/00 Unwind support added
+// 06/20/00 new version
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 04/17/03 Added missing mutex directive
//
-// *********************************************************************
+//*********************************************************************
// ___________
// Function: hypot(x,y) = |(x^2 + y^2) = for double precision values
// x and y
// Also provides cabs functionality.
//
-// *********************************************************************
+//*********************************************************************
//
// Resources Used:
//
@@ -68,7 +70,7 @@
//
// Predicate Registers: p6 - p10
//
-// *********************************************************************
+//*********************************************************************
//
// IEEE Special Conditions:
//
@@ -78,7 +80,7 @@
// hypot(QNaN and anything) = QNaN
// hypot(SNaN and anything ) = QNaN
//
-// *********************************************************************
+//*********************************************************************
//
// Implementation:
// x2 = x * x in double-extended
@@ -86,9 +88,7 @@
// temp = x2 + y2 in double-extended
// sqrt(temp) rounded to double
//
-// *********************************************************************
-
-#include "libm_support.h"
+//*********************************************************************
GR_SAVE_PFS = r33
GR_SAVE_B0 = r34
@@ -103,23 +103,11 @@ FR_Y = f33
FR_RESULT = f8
.section .text
-#ifndef _LIBC
-.proc cabs#
-.global cabs#
-cabs:
-.endp cabs
-#endif
-.proc hypot#
-.global hypot#
-.align 64
-hypot:
-#ifdef _LIBC
-.global __hypot
-__hypot:
-.global __ieee754_hypot
-__ieee754_hypot:
-#endif
+LOCAL_LIBM_ENTRY(cabs)
+LOCAL_LIBM_END(cabs)
+GLOBAL_IEEE754_ENTRY(hypot)
+
{.mfi
alloc r32= ar.pfs,0,4,4,0
// Compute x*x
@@ -221,6 +209,7 @@ __ieee754_hypot:
mov r2=0x107fb;;
}
+.pred.rel "mutex",p7,p8
{.mfb
nop.m 0
// if f8=Infinity or f9=Zero, return |f8|
@@ -394,11 +383,8 @@ __ieee754_hypot:
// No overflow
(p9) br.ret.sptk b0;;
}
-.endp hypot
-ASM_SIZE_DIRECTIVE(hypot)
-
-.proc __libm_error_region
-__libm_error_region:
+GLOBAL_IEEE754_END(hypot)
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
@@ -445,7 +431,8 @@ __libm_error_region:
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region#)
.type __libm_error_support#,@function
.global __libm_error_support#
+
+
diff --git a/sysdeps/ia64/fpu/e_hypotf.S b/sysdeps/ia64/fpu/e_hypotf.S
index 0a11ec5b41..633bb67e59 100644
--- a/sysdeps/ia64/fpu/e_hypotf.S
+++ b/sysdeps/ia64/fpu/e_hypotf.S
@@ -1,11 +1,10 @@
-.file "hypotf.asm"
+.file "hypotf.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
-// Bob Norin, Shane Story, and Ping Tak Peter Tang of the
-// Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -21,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -36,24 +35,27 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
-// *********************************************************************
+//*********************************************************************
//
// History:
-// 2/02/00 hand-optimized
-// 4/04/00 Unwind support added
-// 6/26/00 new version
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 02/02/00 hand-optimized
+// 04/04/00 Unwind support added
+// 06/26/00 new version
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 04/17/03 Added missing mutex directive
//
-// *********************************************************************
+//*********************************************************************
// ___________
// Function: hypotf(x,y) = |(x^2 + y^2) = for single precision values
// x and y
// Also provides cabsf functionality.
//
-// *********************************************************************
+//*********************************************************************
//
// Resources Used:
//
@@ -68,7 +70,7 @@
//
// Predicate Registers: p6 - p10
//
-// *********************************************************************
+//*********************************************************************
//
// IEEE Special Conditions:
//
@@ -78,7 +80,7 @@
// hypotf(QNaN and anything) = QNaN
// hypotf(SNaN and anything ) = QNaN
//
-// *********************************************************************
+//*********************************************************************
//
// Implementation:
// x2 = x * x in double-extended
@@ -86,9 +88,7 @@
// temp = x2 + y2 in double-extended
// sqrt(temp) rounded to single precision
//
-// *********************************************************************
-
-#include "libm_support.h"
+//*********************************************************************
GR_SAVE_PFS = r33
GR_SAVE_B0 = r34
@@ -103,23 +103,10 @@ FR_Y = f15
FR_RESULT = f8
.section .text
-#ifndef _LIBC
-.proc cabsf#
-.global cabsf#
-cabsf:
-.endp cabsf
-#endif
-.proc hypotf#
-.global hypotf#
-.align 64
-hypotf:
-#ifdef _LIBC
-.global __hypotf
-__hypotf:
-.global __ieee754_hypotf
-__ieee754_hypotf:
-#endif
+LOCAL_LIBM_ENTRY(cabsf)
+LOCAL_LIBM_END(cabsf)
+GLOBAL_IEEE754_ENTRY(hypotf)
{.mfi
alloc r32= ar.pfs,0,4,4,0
// Compute x*x
@@ -207,6 +194,7 @@ __ieee754_hypotf:
nop.i 0;;
}
+.pred.rel "mutex",p7,p8
{.mfb
nop.m 0
// if f8=Infinity or f9=Zero, return |f8|
@@ -348,15 +336,12 @@ __ieee754_hypotf:
// No overflow
(p9) br.ret.sptk b0;;
}
-.endp hypotf
-ASM_SIZE_DIRECTIVE(hypotf)
-
-.proc __libm_error_region
-__libm_error_region:
+GLOBAL_IEEE754_END(hypotf)
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mii
add GR_Parameter_Y=-32,sp // Parameter 2 value
-(p0) mov GR_Parameter_TAG = 47
+ mov GR_Parameter_TAG = 47
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
@@ -400,8 +385,9 @@ __libm_error_region:
br.ret.sptk b0 // Return
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region)
+
.type __libm_error_support#,@function
.global __libm_error_support#
+
diff --git a/sysdeps/ia64/fpu/e_hypotl.S b/sysdeps/ia64/fpu/e_hypotl.S
index 986faf6fcc..0aa94b69b8 100644
--- a/sysdeps/ia64/fpu/e_hypotl.S
+++ b/sysdeps/ia64/fpu/e_hypotl.S
@@ -1,11 +1,10 @@
-.file "hypotl.asm"
+.file "hypotl.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
-// Bob Norin, Shane Story, and Ping Tak Peter Tang of the
-// Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -21,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -36,24 +35,26 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
-// *********************************************************************
+//*********************************************************************
//
// History:
-// 2/02/00 hand-optimized
-// 4/04/00 Unwind support added
-// 6/20/00 new version
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 02/02/00 hand-optimized
+// 04/04/00 Unwind support added
+// 06/20/00 new version
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
//
-// *********************************************************************
+//*********************************************************************
// ___________
// Function: hypotl(x,y) = |(x^2 + y^2) = for double extended values
// x and y
// Also provides cabsl functionality.
//
-// *********************************************************************
+//*********************************************************************
//
// Resources Used:
//
@@ -68,7 +69,7 @@
//
// Predicate Registers: p6 - p10
//
-// *********************************************************************
+//*********************************************************************
//
// IEEE Special Conditions:
//
@@ -78,7 +79,7 @@
// hypotl(QNaN and anything) = QNaN
// hypotl(SNaN and anything ) = QNaN
//
-// *********************************************************************
+//*********************************************************************
//
// Implementation:
// x2 = x * x in double-extended
@@ -86,9 +87,7 @@
// temp = x2 + y2 in double-extended
// sqrt(temp) rounded to double extended
//
-// *********************************************************************
-
-#include "libm_support.h"
+//*********************************************************************
GR_SAVE_PFS = r33
GR_SAVE_B0 = r34
@@ -103,23 +102,10 @@ FR_Y = f33
FR_RESULT = f8
.section .text
-#ifndef _LIBC
-.proc cabsl#
-.global cabsl#
-cabsl:
-.endp cabsl
-#endif
-.proc hypotl#
-.global hypotl#
-.align 64
-hypotl:
-#ifdef _LIBC
-.global __hypotl
-__hypotl:
-.global __ieee754_hypotl
-__ieee754_hypotl:
-#endif
+LOCAL_LIBM_ENTRY(cabsl)
+LOCAL_LIBM_END(cabsl)
+GLOBAL_IEEE754_ENTRY(hypotl)
{.mfi
alloc r32= ar.pfs,0,4,4,0
// Compute x*x
@@ -434,11 +420,8 @@ __ieee754_hypotl:
// No overflow
(p9) br.ret.sptk b0;;
}
-.endp hypotl
-ASM_SIZE_DIRECTIVE(hypotl)
-
-.proc __libm_error_region
-__libm_error_region:
+GLOBAL_IEEE754_END(hypotl)
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
@@ -485,7 +468,9 @@ __libm_error_region:
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region#)
.type __libm_error_support#,@function
.global __libm_error_support#
+
+
+
diff --git a/sysdeps/ia64/fpu/e_log.S b/sysdeps/ia64/fpu/e_log.S
index 9ad1e5fe56..f80f153679 100644
--- a/sysdeps/ia64/fpu/e_log.S
+++ b/sysdeps/ia64/fpu/e_log.S
@@ -1,10 +1,10 @@
.file "log.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2002, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,1085 +20,1707 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00 Initial version
-// 4/04/00 Unwind support added
-// 6/16/00 Updated table to be rounded correctly
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 02/02/00 Initial version
+// 04/04/00 Unwind support added
+// 06/16/00 Updated table to be rounded correctly
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
-// 8/17/00 Improved speed of main path by 5 cycles
+// 08/17/00 Improved speed of main path by 5 cycles
// Shortened path for x=1.0
-// 1/09/01 Improved speed, fixed flags for neg denormals
-//
+// 01/09/01 Improved speed, fixed flags for neg denormals
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 05/23/02 Modified algorithm. Now only one polynomial is used
+// for |x-1| >= 1/256 and for |x-1| < 1/256
+// 12/11/02 Improved performance for Itanium 2
//
// API
//==============================================================
// double log(double)
// double log10(double)
//
+//
// Overview of operation
//==============================================================
// Background
+// ----------
//
-// Consider x = 2^N 1.f1 f2 f3 f4...f63
-// Log(x) = log(frcpa(x) x/frcpa(x))
-// = log(1/frcpa(x)) + log(frcpa(x) x)
-// = -log(frcpa(x)) + log(frcpa(x) x)
+// This algorithm is based on fact that
+// log(a b) = log(a) + log(b).
+// In our case we have x = 2^N f, where 1 <= f < 2.
+// So
+// log(x) = log(2^N f) = log(2^N) + log(f) = n*log(2) + log(f)
//
-// frcpa(x) = 2^-N frcpa((1.f1 f2 ... f63)
+// To calculate log(f) we do following
+// log(f) = log(f * frcpa(f) / frcpa(f)) =
+// = log(f * frcpa(f)) + log(1/frcpa(f))
//
-// -log(frcpa(x)) = -log(C)
-// = -log(2^-N) - log(frcpa(1.f1 f2 ... f63))
+// According to definition of IA-64's frcpa instruction it's a
+// floating point that approximates 1/f using a lookup on the
+// top of 8 bits of the input number's significand with relative
+// error < 2^(-8.886). So we have following
//
-// -log(frcpa(x)) = -log(C)
-// = +Nlog2 - log(frcpa(1.f1 f2 ... f63))
+// |(1/f - frcpa(f)) / (1/f))| = |1 - f*frcpa(f)| < 1/256
//
-// -log(frcpa(x)) = -log(C)
-// = +Nlog2 + log(frcpa(1.f1 f2 ... f63))
+// and
//
-// Log(x) = log(1/frcpa(x)) + log(frcpa(x) x)
-
-// Log(x) = +Nlog2 + log(1./frcpa(1.f1 f2 ... f63)) + log(frcpa(x) x)
-// Log(x) = +Nlog2 - log(/frcpa(1.f1 f2 ... f63)) + log(frcpa(x) x)
-// Log(x) = +Nlog2 + T + log(frcpa(x) x)
+// log(f) = log(f * frcpa(f)) + log(1/frcpa(f)) =
+// = log(1 + r) + T
+//
+// The first value can be computed by polynomial P(r) approximating
+// log(1 + r) on |r| < 1/256 and the second is precomputed tabular
+// value defined by top 8 bit of f.
//
-// Log(x) = +Nlog2 + T + log(C x)
+// Finally we have that log(x) ~ (N*log(2) + T) + P(r)
//
-// Cx = 1 + r
+// Note that if input argument is close to 1.0 (in our case it means
+// that |1 - x| < 1/256) we can use just polynomial approximation
+// because x = 2^0 * f = f = 1 + r and
+// log(x) = log(1 + r) ~ P(r)
//
-// Log(x) = +Nlog2 + T + log(1+r)
-// Log(x) = +Nlog2 + T + Series( r - r^2/2 + r^3/3 - r^4/4 ....)
//
-// 1.f1 f2 ... f8 has 256 entries.
-// They are 1 + k/2^8, k = 0 ... 255
-// These 256 values are the table entries.
+// To compute log10(x) we use the simple identity
+//
+// log10(x) = log(x)/log(10)
+//
+// so we have that
+//
+// log10(x) = (N*log(2) + T + log(1+r)) / log(10) =
+// = N*(log(2)/log(10)) + (T/log(10)) + log(1 + r)/log(10)
+//
//
// Implementation
-//===============
-// CASE 1: |x-1| >= 2^-6
-// C = frcpa(x)
-// r = C * x - 1
+// --------------
+// It can be seen that formulas for log and log10 differ from one another
+// only by coefficients and tabular values. Namely as log as log10 are
+// calculated as (N*L1 + T) + L2*Series(r) where in case of log
+// L1 = log(2)
+// T = log(1/frcpa(x))
+// L2 = 1.0
+// and in case of log10
+// L1 = log(2)/log(10)
+// T = log(1/frcpa(x))/log(10)
+// L2 = 1.0/log(10)
//
-// Form rseries = r + P1*r^2 + P2*r^3 + P3*r^4 + P4*r^5 + P5*r^6
+// So common code with two different entry points those set pointers
+// to the base address of coresponding data sets containing values
+// of L2,T and prepare integer representation of L1 needed for following
+// setf instruction.
//
-// x = f * 2*n where f is 1.f_1f_2f_3....f_63
-// Nfloat = float(n) where n is the true unbiased exponent
-// pre-index = f_1f_2....f_8
-// index = pre_index * 16
-// get the dxt table entry at index + offset = T
+// Note that both log and log10 use common approximation polynomial
+// it means we need only one set of coefficients of approximation.
//
-// result = (T + Nfloat * log(2)) + rseries
//
-// The T table is calculated as follows
-// Form x_k = 1 + k/2^8 where k goes from 0... 255
-// y_k = frcpa(x_k)
-// log(1/y_k) in quad and round to double-extended
-
-// CASE 2: |x-1| < 2^-6
-// w = x - 1
+// 1. |x-1| >= 1/256
+// InvX = frcpa(x)
+// r = InvX*x - 1
+// P(r) = r*((r*A3 - A2) + r^4*((A4 + r*A5) + r^2*(A6 + r*A7)),
+// all coefficients are calcutated in quad and rounded to double
+// precision. A7,A6,A5,A4 are stored in memory whereas A3 and A2
+// created with setf.
+//
+// N = float(n) where n is true unbiased exponent of x
+//
+// T is tabular value of log(1/frcpa(x)) calculated in quad precision
+// and represented by two floating-point numbers 64-bit Thi and 32-bit Tlo.
+// To load Thi,Tlo we get bits from 55 to 62 of register format significand
+// as index and calculate two addresses
+// ad_Thi = Thi_table_base_addr + 8 * index
+// ad_Tlo = Tlo_table_base_addr + 4 * index
+//
+// L2 (1.0 or 1.0/log(10) depending on function) is calculated in quad
+// precision and rounded to double extended; it's loaded from memory.
+//
+// L1 (log(2) or log10(2) depending on function) is calculated in quad
+// precision and represented by two floating-point 64-bit numbers L1hi,L1lo
+// stored in memory.
//
-// Form wseries = w + Q1*w^2 + Q2*w^3 + ... + Q7*w^8 + Q8*w^9
+// And final result = ((L1hi*N + Thi) + (N*L1lo + Tlo)) + L2*P(r)
+//
+//
+// 2. |x-1| < 1/256
+// r = x - 1
+// P(r) = r*((r*A3 - A2) + r^4*((A4 + r*A5) + r^2*(A6 + r*A7)),
+// A7,A6,A5A4,A3,A2 are the same as in case |x-1| >= 1/256
+//
+// And final results
+// log(x) = P(r)
+// log10(x) = L2*P(r)
+//
+// 3. How we define is input argument such that |x-1| < 1/256 or not.
+//
+// To do it we analyze biased exponent and integer representation of
+// input argument
+//
+// a) First we test is biased exponent equal to 0xFFFE or 0xFFFF (i.e.
+// we test is 0.5 <= x < 2). This comparison can be performed using
+// unsigned version of cmp instruction in such a way
+// biased_exponent_of_x - 0xFFFE < 2
+//
+//
+// b) Second (in case when result of a) is true) we need to compare x
+// with 1-1/256 and 1+1/256 or in double precision memory representation
+// with 0x3FEFE00000000000 and 0x3FF0100000000000 correspondingly.
+// This comparison can be made like in a), using unsigned
+// version of cmp i.e. ix - 0x3FEFE00000000000 < 0x0000300000000000.
+// 0x0000300000000000 is difference between 0x3FF0100000000000 and
+// 0x3FEFE00000000000
+//
+// Note: NaT, any NaNs, +/-INF, +/-0, negatives and unnormalized numbers are
+// filtered and processed on special branches.
//
-// result = wseries
-// Special values
+//
+// Special values
//==============================================================
-
-
+//
// log(+0) = -inf
// log(-0) = -inf
-
-// log(+qnan) = +qnan
-// log(-qnan) = -qnan
-// log(+snan) = +qnan
-// log(-snan) = -qnan
-
+//
+// log(+qnan) = +qnan
+// log(-qnan) = -qnan
+// log(+snan) = +qnan
+// log(-snan) = -qnan
+//
// log(-n) = QNAN Indefinite
-// log(-inf) = QNAN Indefinite
-
+// log(-inf) = QNAN Indefinite
+//
// log(+inf) = +inf
-
+//
+//
// Registers used
//==============================================================
-// Floating Point registers used:
+// Floating Point registers used:
// f8, input
-// f9 -> f15, f32 -> f68
-
-// General registers used:
-// r32 -> r51
-
+// f7 -> f15, f32 -> f42
+//
+// General registers used:
+// r8 -> r11
+// r14 -> r23
+//
// Predicate registers used:
// p6 -> p15
-// p8 log base e
-// p6 log base e special
-// p9 used in the frcpa
-// p13 log base e large W
-// p14 log base e small w
-
-// p7 log base 10
-// p10 log base 10 large W
-// p11 log base 10 small w
-// p12 log base 10 special
-
-#include "libm_support.h"
-
// Assembly macros
//==============================================================
-
-log_int_Nfloat = f9
-log_Nfloat = f10
-
-log_P5 = f11
-log_P4 = f12
-log_P3 = f13
-log_P2 = f14
-log_half = f15
-
-log_log2 = f32
-log_T = f33
-
-log_rp_p4 = f34
-log_rp_p32 = f35
-log_rp_p2 = f36
-log_w6 = f37
-log_rp_p10 = f38
-log_rcube = f39
-log_rsq = f40
-
-log_T_plus_Nlog2 = f41
-log_w3 = f42
-
-log_r = f43
-log_C = f44
-
-log_w = f45
-log_Q8 = f46
-log_Q7 = f47
-log_Q4 = f48
-log_Q3 = f49
-log_Q6 = f50
-log_Q5 = f51
-log_Q2 = f52
-log_Q1 = f53
-log_P1 = f53
-
-log_rp_q7 = f54
-log_rp_q65 = f55
-log_Qlo = f56
-
-log_rp_q3 = f57
-log_rp_q21 = f58
-log_Qhi = f59
-
-log_wsq = f60
-log_w4 = f61
-log_Q = f62
-
-log_inv_ln10 = f63
-log_log10_hi = f64
-log_log10_lo = f65
-log_rp_q10 = f66
-log_NORM_f8 = f67
-log_r2P_r = f68
-
-// ===================================
-
-log_GR_exp_17_ones = r33
-log_GR_exp_16_ones = r34
-log_GR_exp_f8 = r35
-log_GR_signexp_f8 = r36
-log_GR_true_exp_f8 = r37
-log_GR_significand_f8 = r38
-log_GR_half_exp = r39
-log_GR_index = r39
-log_AD_1 = r40
-log_GR_signexp_w = r41
-log_GR_fff9 = r42
-log_AD_2 = r43
-log_GR_exp_w = r44
-
-GR_SAVE_B0 = r45
-GR_SAVE_GP = r46
-GR_SAVE_PFS = r47
-
-GR_Parameter_X = r48
-GR_Parameter_Y = r49
-GR_Parameter_RESULT = r50
-log_GR_tag = r51
-
-
-// Data tables
+GR_TAG = r8
+GR_ad_1 = r8
+GR_ad_2 = r9
+GR_Exp = r10
+GR_N = r11
+
+GR_x = r14
+GR_dx = r15
+GR_NearOne = r15
+GR_xorg = r16
+GR_mask = r16
+GR_05 = r17
+GR_A3 = r18
+GR_Sig = r19
+GR_Ind = r19
+GR_Nm1 = r20
+GR_bias = r21
+GR_ad_3 = r22
+GR_rexp = r23
+
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+GR_SAVE_SP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+
+
+FR_NormX = f7
+FR_RcpX = f9
+FR_tmp = f9
+FR_r = f10
+FR_r2 = f11
+FR_r4 = f12
+FR_N = f13
+FR_Ln2hi = f14
+FR_Ln2lo = f15
+
+FR_A7 = f32
+FR_A6 = f33
+FR_A5 = f34
+FR_A4 = f35
+FR_A3 = f36
+FR_A2 = f37
+
+FR_Thi = f38
+FR_NxLn2hipThi = f38
+FR_NxLn2pT = f38
+FR_Tlo = f39
+FR_NxLn2lopTlo = f39
+
+FR_InvLn10 = f40
+FR_A32 = f41
+FR_A321 = f42
+
+
+FR_Y = f1
+FR_X = f10
+FR_RESULT = f8
+
+
+// Data
//==============================================================
-
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
-
+RODATA
.align 16
-log_table_1:
-ASM_TYPE_DIRECTIVE(log_table_1,@object)
-data8 0xBFC5555DA7212371 // P5
-data8 0x3FC999A19EEF5826 // P4
-data8 0x3FBC756AC654273B // Q8
-data8 0xBFC001A42489AB4D // Q7
-data8 0x3FC99999999A169B // Q4
-data8 0xBFD00000000019AC // Q3
-ASM_SIZE_DIRECTIVE(log_table_1)
-log_table_2:
-ASM_TYPE_DIRECTIVE(log_table_2,@object)
-data8 0xBFCFFFFFFFFEF009 // P3
-data8 0x3FD555555554ECB2 // P2
-data8 0x3FC2492479AA0DF8 // Q6
-data8 0xBFC5555544986F52 // Q5
-data8 0x3FD5555555555555 // Q2
-data8 0xBFE0000000000000 // Q1, P1 = -0.5
-
-
-data8 0xde5bd8a937287195, 0x00003ffd // double-extended 1/ln(10)
-data8 0xb17217f7d1cf79ac, 0x00003ffe // log2
-// b17217f7d1cf79ab c9e3b39803f2f6a
-
-
-data8 0x80200aaeac44ef38 , 0x00003ff6 // log(1/frcpa(1+ 0/2^-8))
-
-data8 0xc09090a2c35aa070 , 0x00003ff7 // log(1/frcpa(1+ 1/2^-8))
-data8 0xa0c94fcb41977c75 , 0x00003ff8 // log(1/frcpa(1+ 2/2^-8))
-data8 0xe18b9c263af83301 , 0x00003ff8 // log(1/frcpa(1+ 3/2^-8))
-data8 0x8d35c8d6399c30ea , 0x00003ff9 // log(1/frcpa(1+ 4/2^-8))
-data8 0xadd4d2ecd601cbb8 , 0x00003ff9 // log(1/frcpa(1+ 5/2^-8))
-
-data8 0xce95403a192f9f01 , 0x00003ff9 // log(1/frcpa(1+ 6/2^-8))
-data8 0xeb59392cbcc01096 , 0x00003ff9 // log(1/frcpa(1+ 7/2^-8))
-data8 0x862c7d0cefd54c5d , 0x00003ffa // log(1/frcpa(1+ 8/2^-8))
-data8 0x94aa63c65e70d499 , 0x00003ffa // log(1/frcpa(1+ 9/2^-8))
-data8 0xa54a696d4b62b382 , 0x00003ffa // log(1/frcpa(1+ 10/2^-8))
-
-data8 0xb3e4a796a5dac208 , 0x00003ffa // log(1/frcpa(1+ 11/2^-8))
-data8 0xc28c45b1878340a9 , 0x00003ffa // log(1/frcpa(1+ 12/2^-8))
-data8 0xd35c55f39d7a6235 , 0x00003ffa // log(1/frcpa(1+ 13/2^-8))
-data8 0xe220f037b954f1f5 , 0x00003ffa // log(1/frcpa(1+ 14/2^-8))
-data8 0xf0f3389b036834f3 , 0x00003ffa // log(1/frcpa(1+ 15/2^-8))
-
-data8 0xffd3488d5c980465 , 0x00003ffa // log(1/frcpa(1+ 16/2^-8))
-data8 0x87609ce2ed300490 , 0x00003ffb // log(1/frcpa(1+ 17/2^-8))
-data8 0x8ede9321e8c85927 , 0x00003ffb // log(1/frcpa(1+ 18/2^-8))
-data8 0x96639427f2f8e2f4 , 0x00003ffb // log(1/frcpa(1+ 19/2^-8))
-data8 0x9defad3e8f73217b , 0x00003ffb // log(1/frcpa(1+ 20/2^-8))
-
-data8 0xa582ebd50097029c , 0x00003ffb // log(1/frcpa(1+ 21/2^-8))
-data8 0xac06dbe75ab80fee , 0x00003ffb // log(1/frcpa(1+ 22/2^-8))
-data8 0xb3a78449b2d3ccca , 0x00003ffb // log(1/frcpa(1+ 23/2^-8))
-data8 0xbb4f79635ab46bb2 , 0x00003ffb // log(1/frcpa(1+ 24/2^-8))
-data8 0xc2fec93a83523f3f , 0x00003ffb // log(1/frcpa(1+ 25/2^-8))
-
-data8 0xc99af2eaca4c4571 , 0x00003ffb // log(1/frcpa(1+ 26/2^-8))
-data8 0xd1581106472fa653 , 0x00003ffb // log(1/frcpa(1+ 27/2^-8))
-data8 0xd8002560d4355f2e , 0x00003ffb // log(1/frcpa(1+ 28/2^-8))
-data8 0xdfcb43b4fe508632 , 0x00003ffb // log(1/frcpa(1+ 29/2^-8))
-data8 0xe67f6dff709d4119 , 0x00003ffb // log(1/frcpa(1+ 30/2^-8))
-
-data8 0xed393b1c22351280 , 0x00003ffb // log(1/frcpa(1+ 31/2^-8))
-data8 0xf5192bff087bcc35 , 0x00003ffb // log(1/frcpa(1+ 32/2^-8))
-data8 0xfbdf4ff6dfef2fa3 , 0x00003ffb // log(1/frcpa(1+ 33/2^-8))
-data8 0x81559a97f92f9cc7 , 0x00003ffc // log(1/frcpa(1+ 34/2^-8))
-data8 0x84be72bce90266e8 , 0x00003ffc // log(1/frcpa(1+ 35/2^-8))
-
-data8 0x88bc74113f23def2 , 0x00003ffc // log(1/frcpa(1+ 36/2^-8))
-data8 0x8c2ba3edf6799d11 , 0x00003ffc // log(1/frcpa(1+ 37/2^-8))
-data8 0x8f9dc92f92ea08b1 , 0x00003ffc // log(1/frcpa(1+ 38/2^-8))
-data8 0x9312e8f36efab5a7 , 0x00003ffc // log(1/frcpa(1+ 39/2^-8))
-data8 0x968b08643409ceb6 , 0x00003ffc // log(1/frcpa(1+ 40/2^-8))
-
-data8 0x9a062cba08a1708c , 0x00003ffc // log(1/frcpa(1+ 41/2^-8))
-data8 0x9d845b3abf95485c , 0x00003ffc // log(1/frcpa(1+ 42/2^-8))
-data8 0xa06fd841bc001bb4 , 0x00003ffc // log(1/frcpa(1+ 43/2^-8))
-data8 0xa3f3a74652fbe0db , 0x00003ffc // log(1/frcpa(1+ 44/2^-8))
-data8 0xa77a8fb2336f20f5 , 0x00003ffc // log(1/frcpa(1+ 45/2^-8))
-
-data8 0xab0497015d28b0a0 , 0x00003ffc // log(1/frcpa(1+ 46/2^-8))
-data8 0xae91c2be6ba6a615 , 0x00003ffc // log(1/frcpa(1+ 47/2^-8))
-data8 0xb189d1b99aebb20b , 0x00003ffc // log(1/frcpa(1+ 48/2^-8))
-data8 0xb51cced5de9c1b2c , 0x00003ffc // log(1/frcpa(1+ 49/2^-8))
-data8 0xb819bee9e720d42f , 0x00003ffc // log(1/frcpa(1+ 50/2^-8))
-
-data8 0xbbb2a0947b093a5d , 0x00003ffc // log(1/frcpa(1+ 51/2^-8))
-data8 0xbf4ec1505811684a , 0x00003ffc // log(1/frcpa(1+ 52/2^-8))
-data8 0xc2535bacfa8975ff , 0x00003ffc // log(1/frcpa(1+ 53/2^-8))
-data8 0xc55a3eafad187eb8 , 0x00003ffc // log(1/frcpa(1+ 54/2^-8))
-data8 0xc8ff2484b2c0da74 , 0x00003ffc // log(1/frcpa(1+ 55/2^-8))
-
-data8 0xcc0b1a008d53ab76 , 0x00003ffc // log(1/frcpa(1+ 56/2^-8))
-data8 0xcfb6203844b3209b , 0x00003ffc // log(1/frcpa(1+ 57/2^-8))
-data8 0xd2c73949a47a19f5 , 0x00003ffc // log(1/frcpa(1+ 58/2^-8))
-data8 0xd5daae18b49d6695 , 0x00003ffc // log(1/frcpa(1+ 59/2^-8))
-data8 0xd8f08248cf7e8019 , 0x00003ffc // log(1/frcpa(1+ 60/2^-8))
-
-data8 0xdca7749f1b3e540e , 0x00003ffc // log(1/frcpa(1+ 61/2^-8))
-data8 0xdfc28e033aaaf7c7 , 0x00003ffc // log(1/frcpa(1+ 62/2^-8))
-data8 0xe2e012a5f91d2f55 , 0x00003ffc // log(1/frcpa(1+ 63/2^-8))
-data8 0xe600064ed9e292a8 , 0x00003ffc // log(1/frcpa(1+ 64/2^-8))
-data8 0xe9226cce42b39f60 , 0x00003ffc // log(1/frcpa(1+ 65/2^-8))
-
-data8 0xec4749fd97a28360 , 0x00003ffc // log(1/frcpa(1+ 66/2^-8))
-data8 0xef6ea1bf57780495 , 0x00003ffc // log(1/frcpa(1+ 67/2^-8))
-data8 0xf29877ff38809091 , 0x00003ffc // log(1/frcpa(1+ 68/2^-8))
-data8 0xf5c4d0b245cb89be , 0x00003ffc // log(1/frcpa(1+ 69/2^-8))
-data8 0xf8f3afd6fcdef3aa , 0x00003ffc // log(1/frcpa(1+ 70/2^-8))
-
-data8 0xfc2519756be1abc7 , 0x00003ffc // log(1/frcpa(1+ 71/2^-8))
-data8 0xff59119f503e6832 , 0x00003ffc // log(1/frcpa(1+ 72/2^-8))
-data8 0x8147ce381ae0e146 , 0x00003ffd // log(1/frcpa(1+ 73/2^-8))
-data8 0x82e45f06cb1ad0f2 , 0x00003ffd // log(1/frcpa(1+ 74/2^-8))
-data8 0x842f5c7c573cbaa2 , 0x00003ffd // log(1/frcpa(1+ 75/2^-8))
-
-data8 0x85ce471968c8893a , 0x00003ffd // log(1/frcpa(1+ 76/2^-8))
-data8 0x876e8305bc04066d , 0x00003ffd // log(1/frcpa(1+ 77/2^-8))
-data8 0x891012678031fbb3 , 0x00003ffd // log(1/frcpa(1+ 78/2^-8))
-data8 0x8a5f1493d766a05f , 0x00003ffd // log(1/frcpa(1+ 79/2^-8))
-data8 0x8c030c778c56fa00 , 0x00003ffd // log(1/frcpa(1+ 80/2^-8))
-
-data8 0x8da85df17e31d9ae , 0x00003ffd // log(1/frcpa(1+ 81/2^-8))
-data8 0x8efa663e7921687e , 0x00003ffd // log(1/frcpa(1+ 82/2^-8))
-data8 0x90a22b6875c6a1f8 , 0x00003ffd // log(1/frcpa(1+ 83/2^-8))
-data8 0x91f62cc8f5d24837 , 0x00003ffd // log(1/frcpa(1+ 84/2^-8))
-data8 0x93a06cfc3857d980 , 0x00003ffd // log(1/frcpa(1+ 85/2^-8))
-
-data8 0x94f66d5e6fd01ced , 0x00003ffd // log(1/frcpa(1+ 86/2^-8))
-data8 0x96a330156e6772f2 , 0x00003ffd // log(1/frcpa(1+ 87/2^-8))
-data8 0x97fb3582754ea25b , 0x00003ffd // log(1/frcpa(1+ 88/2^-8))
-data8 0x99aa8259aad1bbf2 , 0x00003ffd // log(1/frcpa(1+ 89/2^-8))
-data8 0x9b0492f6227ae4a8 , 0x00003ffd // log(1/frcpa(1+ 90/2^-8))
-
-data8 0x9c5f8e199bf3a7a5 , 0x00003ffd // log(1/frcpa(1+ 91/2^-8))
-data8 0x9e1293b9998c1daa , 0x00003ffd // log(1/frcpa(1+ 92/2^-8))
-data8 0x9f6fa31e0b41f308 , 0x00003ffd // log(1/frcpa(1+ 93/2^-8))
-data8 0xa0cda11eaf46390e , 0x00003ffd // log(1/frcpa(1+ 94/2^-8))
-data8 0xa22c8f029cfa45aa , 0x00003ffd // log(1/frcpa(1+ 95/2^-8))
-
-data8 0xa3e48badb7856b34 , 0x00003ffd // log(1/frcpa(1+ 96/2^-8))
-data8 0xa5459a0aa95849f9 , 0x00003ffd // log(1/frcpa(1+ 97/2^-8))
-data8 0xa6a79c84480cfebd , 0x00003ffd // log(1/frcpa(1+ 98/2^-8))
-data8 0xa80a946d0fcb3eb2 , 0x00003ffd // log(1/frcpa(1+ 99/2^-8))
-data8 0xa96e831a3ea7b314 , 0x00003ffd // log(1/frcpa(1+100/2^-8))
-
-data8 0xaad369e3dc544e3b , 0x00003ffd // log(1/frcpa(1+101/2^-8))
-data8 0xac92e9588952c815 , 0x00003ffd // log(1/frcpa(1+102/2^-8))
-data8 0xadfa035aa1ed8fdc , 0x00003ffd // log(1/frcpa(1+103/2^-8))
-data8 0xaf6219eae1ad6e34 , 0x00003ffd // log(1/frcpa(1+104/2^-8))
-data8 0xb0cb2e6d8160f753 , 0x00003ffd // log(1/frcpa(1+105/2^-8))
-
-data8 0xb2354249ad950f72 , 0x00003ffd // log(1/frcpa(1+106/2^-8))
-data8 0xb3a056e98ef4a3b4 , 0x00003ffd // log(1/frcpa(1+107/2^-8))
-data8 0xb50c6dba52c6292a , 0x00003ffd // log(1/frcpa(1+108/2^-8))
-data8 0xb679882c33876165 , 0x00003ffd // log(1/frcpa(1+109/2^-8))
-data8 0xb78c07429785cedc , 0x00003ffd // log(1/frcpa(1+110/2^-8))
-
-data8 0xb8faeb8dc4a77d24 , 0x00003ffd // log(1/frcpa(1+111/2^-8))
-data8 0xba6ad77eb36ae0d6 , 0x00003ffd // log(1/frcpa(1+112/2^-8))
-data8 0xbbdbcc915e9bee50 , 0x00003ffd // log(1/frcpa(1+113/2^-8))
-data8 0xbd4dcc44f8cf12ef , 0x00003ffd // log(1/frcpa(1+114/2^-8))
-data8 0xbec0d81bf5b531fa , 0x00003ffd // log(1/frcpa(1+115/2^-8))
-
-data8 0xc034f19c139186f4 , 0x00003ffd // log(1/frcpa(1+116/2^-8))
-data8 0xc14cb69f7c5e55ab , 0x00003ffd // log(1/frcpa(1+117/2^-8))
-data8 0xc2c2abbb6e5fd56f , 0x00003ffd // log(1/frcpa(1+118/2^-8))
-data8 0xc439b2c193e6771e , 0x00003ffd // log(1/frcpa(1+119/2^-8))
-data8 0xc553acb9d5c67733 , 0x00003ffd // log(1/frcpa(1+120/2^-8))
-
-data8 0xc6cc96e441272441 , 0x00003ffd // log(1/frcpa(1+121/2^-8))
-data8 0xc8469753eca88c30 , 0x00003ffd // log(1/frcpa(1+122/2^-8))
-data8 0xc962cf3ce072b05c , 0x00003ffd // log(1/frcpa(1+123/2^-8))
-data8 0xcadeba8771f694aa , 0x00003ffd // log(1/frcpa(1+124/2^-8))
-data8 0xcc5bc08d1f72da94 , 0x00003ffd // log(1/frcpa(1+125/2^-8))
-
-data8 0xcd7a3f99ea035c29 , 0x00003ffd // log(1/frcpa(1+126/2^-8))
-data8 0xcef93860c8a53c35 , 0x00003ffd // log(1/frcpa(1+127/2^-8))
-data8 0xd0192f68a7ed23df , 0x00003ffd // log(1/frcpa(1+128/2^-8))
-data8 0xd19a201127d3c645 , 0x00003ffd // log(1/frcpa(1+129/2^-8))
-data8 0xd2bb92f4061c172c , 0x00003ffd // log(1/frcpa(1+130/2^-8))
-
-data8 0xd43e80b2ee8cc8fc , 0x00003ffd // log(1/frcpa(1+131/2^-8))
-data8 0xd56173601fc4ade4 , 0x00003ffd // log(1/frcpa(1+132/2^-8))
-data8 0xd6e6637efb54086f , 0x00003ffd // log(1/frcpa(1+133/2^-8))
-data8 0xd80ad9f58f3c8193 , 0x00003ffd // log(1/frcpa(1+134/2^-8))
-data8 0xd991d1d31aca41f8 , 0x00003ffd // log(1/frcpa(1+135/2^-8))
-
-data8 0xdab7d02231484a93 , 0x00003ffd // log(1/frcpa(1+136/2^-8))
-data8 0xdc40d532cde49a54 , 0x00003ffd // log(1/frcpa(1+137/2^-8))
-data8 0xdd685f79ed8b265e , 0x00003ffd // log(1/frcpa(1+138/2^-8))
-data8 0xde9094bbc0e17b1d , 0x00003ffd // log(1/frcpa(1+139/2^-8))
-data8 0xe01c91b78440c425 , 0x00003ffd // log(1/frcpa(1+140/2^-8))
-
-data8 0xe14658f26997e729 , 0x00003ffd // log(1/frcpa(1+141/2^-8))
-data8 0xe270cdc2391e0d23 , 0x00003ffd // log(1/frcpa(1+142/2^-8))
-data8 0xe3ffce3a2aa64922 , 0x00003ffd // log(1/frcpa(1+143/2^-8))
-data8 0xe52bdb274ed82887 , 0x00003ffd // log(1/frcpa(1+144/2^-8))
-data8 0xe6589852e75d7df6 , 0x00003ffd // log(1/frcpa(1+145/2^-8))
-
-data8 0xe786068c79937a7d , 0x00003ffd // log(1/frcpa(1+146/2^-8))
-data8 0xe91903adad100911 , 0x00003ffd // log(1/frcpa(1+147/2^-8))
-data8 0xea481236f7d35bb0 , 0x00003ffd // log(1/frcpa(1+148/2^-8))
-data8 0xeb77d48c692e6b14 , 0x00003ffd // log(1/frcpa(1+149/2^-8))
-data8 0xeca84b83d7297b87 , 0x00003ffd // log(1/frcpa(1+150/2^-8))
-
-data8 0xedd977f4962aa158 , 0x00003ffd // log(1/frcpa(1+151/2^-8))
-data8 0xef7179a22f257754 , 0x00003ffd // log(1/frcpa(1+152/2^-8))
-data8 0xf0a450d139366ca7 , 0x00003ffd // log(1/frcpa(1+153/2^-8))
-data8 0xf1d7e0524ff9ffdb , 0x00003ffd // log(1/frcpa(1+154/2^-8))
-data8 0xf30c29036a8b6cae , 0x00003ffd // log(1/frcpa(1+155/2^-8))
-
-data8 0xf4412bc411ea8d92 , 0x00003ffd // log(1/frcpa(1+156/2^-8))
-data8 0xf576e97564c8619d , 0x00003ffd // log(1/frcpa(1+157/2^-8))
-data8 0xf6ad62fa1b5f172f , 0x00003ffd // log(1/frcpa(1+158/2^-8))
-data8 0xf7e499368b55c542 , 0x00003ffd // log(1/frcpa(1+159/2^-8))
-data8 0xf91c8d10abaffe22 , 0x00003ffd // log(1/frcpa(1+160/2^-8))
-
-data8 0xfa553f7018c966f3 , 0x00003ffd // log(1/frcpa(1+161/2^-8))
-data8 0xfb8eb13e185d802c , 0x00003ffd // log(1/frcpa(1+162/2^-8))
-data8 0xfcc8e3659d9bcbed , 0x00003ffd // log(1/frcpa(1+163/2^-8))
-data8 0xfe03d6d34d487fd2 , 0x00003ffd // log(1/frcpa(1+164/2^-8))
-data8 0xff3f8c7581e9f0ae , 0x00003ffd // log(1/frcpa(1+165/2^-8))
-
-data8 0x803e029e280173ae , 0x00003ffe // log(1/frcpa(1+166/2^-8))
-data8 0x80dca10cc52d0757 , 0x00003ffe // log(1/frcpa(1+167/2^-8))
-data8 0x817ba200632755a1 , 0x00003ffe // log(1/frcpa(1+168/2^-8))
-data8 0x821b05f3b01d6774 , 0x00003ffe // log(1/frcpa(1+169/2^-8))
-data8 0x82bacd623ff19d06 , 0x00003ffe // log(1/frcpa(1+170/2^-8))
-
-data8 0x835af8c88e7a8f47 , 0x00003ffe // log(1/frcpa(1+171/2^-8))
-data8 0x83c5f8299e2b4091 , 0x00003ffe // log(1/frcpa(1+172/2^-8))
-data8 0x8466cb43f3d87300 , 0x00003ffe // log(1/frcpa(1+173/2^-8))
-data8 0x850803a67c80ca4b , 0x00003ffe // log(1/frcpa(1+174/2^-8))
-data8 0x85a9a1d11a23b461 , 0x00003ffe // log(1/frcpa(1+175/2^-8))
-
-data8 0x864ba644a18e6e05 , 0x00003ffe // log(1/frcpa(1+176/2^-8))
-data8 0x86ee1182dcc432f7 , 0x00003ffe // log(1/frcpa(1+177/2^-8))
-data8 0x875a925d7e48c316 , 0x00003ffe // log(1/frcpa(1+178/2^-8))
-data8 0x87fdaa109d23aef7 , 0x00003ffe // log(1/frcpa(1+179/2^-8))
-data8 0x88a129ed4becfaf2 , 0x00003ffe // log(1/frcpa(1+180/2^-8))
-
-data8 0x89451278ecd7f9cf , 0x00003ffe // log(1/frcpa(1+181/2^-8))
-data8 0x89b29295f8432617 , 0x00003ffe // log(1/frcpa(1+182/2^-8))
-data8 0x8a572ac5a5496882 , 0x00003ffe // log(1/frcpa(1+183/2^-8))
-data8 0x8afc2d0ce3b2dadf , 0x00003ffe // log(1/frcpa(1+184/2^-8))
-data8 0x8b6a69c608cfd3af , 0x00003ffe // log(1/frcpa(1+185/2^-8))
-
-data8 0x8c101e106e899a83 , 0x00003ffe // log(1/frcpa(1+186/2^-8))
-data8 0x8cb63de258f9d626 , 0x00003ffe // log(1/frcpa(1+187/2^-8))
-data8 0x8d2539c5bd19e2b1 , 0x00003ffe // log(1/frcpa(1+188/2^-8))
-data8 0x8dcc0e064b29e6f1 , 0x00003ffe // log(1/frcpa(1+189/2^-8))
-data8 0x8e734f45d88357ae , 0x00003ffe // log(1/frcpa(1+190/2^-8))
-
-data8 0x8ee30cef034a20db , 0x00003ffe // log(1/frcpa(1+191/2^-8))
-data8 0x8f8b0515686d1d06 , 0x00003ffe // log(1/frcpa(1+192/2^-8))
-data8 0x90336bba039bf32f , 0x00003ffe // log(1/frcpa(1+193/2^-8))
-data8 0x90a3edd23d1c9d58 , 0x00003ffe // log(1/frcpa(1+194/2^-8))
-data8 0x914d0de2f5d61b32 , 0x00003ffe // log(1/frcpa(1+195/2^-8))
-
-data8 0x91be0c20d28173b5 , 0x00003ffe // log(1/frcpa(1+196/2^-8))
-data8 0x9267e737c06cd34a , 0x00003ffe // log(1/frcpa(1+197/2^-8))
-data8 0x92d962ae6abb1237 , 0x00003ffe // log(1/frcpa(1+198/2^-8))
-data8 0x9383fa6afbe2074c , 0x00003ffe // log(1/frcpa(1+199/2^-8))
-data8 0x942f0421651c1c4e , 0x00003ffe // log(1/frcpa(1+200/2^-8))
-
-data8 0x94a14a3845bb985e , 0x00003ffe // log(1/frcpa(1+201/2^-8))
-data8 0x954d133857f861e7 , 0x00003ffe // log(1/frcpa(1+202/2^-8))
-data8 0x95bfd96468e604c4 , 0x00003ffe // log(1/frcpa(1+203/2^-8))
-data8 0x9632d31cafafa858 , 0x00003ffe // log(1/frcpa(1+204/2^-8))
-data8 0x96dfaabd86fa1647 , 0x00003ffe // log(1/frcpa(1+205/2^-8))
-
-data8 0x9753261fcbb2a594 , 0x00003ffe // log(1/frcpa(1+206/2^-8))
-data8 0x9800c11b426b996d , 0x00003ffe // log(1/frcpa(1+207/2^-8))
-data8 0x9874bf4d45ae663c , 0x00003ffe // log(1/frcpa(1+208/2^-8))
-data8 0x99231f5ee9a74f79 , 0x00003ffe // log(1/frcpa(1+209/2^-8))
-data8 0x9997a18a56bcad28 , 0x00003ffe // log(1/frcpa(1+210/2^-8))
-
-data8 0x9a46c873a3267e79 , 0x00003ffe // log(1/frcpa(1+211/2^-8))
-data8 0x9abbcfc621eb6cb6 , 0x00003ffe // log(1/frcpa(1+212/2^-8))
-data8 0x9b310cb0d354c990 , 0x00003ffe // log(1/frcpa(1+213/2^-8))
-data8 0x9be14cf9e1b3515c , 0x00003ffe // log(1/frcpa(1+214/2^-8))
-data8 0x9c5710b8cbb73a43 , 0x00003ffe // log(1/frcpa(1+215/2^-8))
-
-data8 0x9ccd0abd301f399c , 0x00003ffe // log(1/frcpa(1+216/2^-8))
-data8 0x9d7e67f3bdce8888 , 0x00003ffe // log(1/frcpa(1+217/2^-8))
-data8 0x9df4ea81a99daa01 , 0x00003ffe // log(1/frcpa(1+218/2^-8))
-data8 0x9e6ba405a54514ba , 0x00003ffe // log(1/frcpa(1+219/2^-8))
-data8 0x9f1e21c8c7bb62b3 , 0x00003ffe // log(1/frcpa(1+220/2^-8))
-
-data8 0x9f956593f6b6355c , 0x00003ffe // log(1/frcpa(1+221/2^-8))
-data8 0xa00ce1092e5498c3 , 0x00003ffe // log(1/frcpa(1+222/2^-8))
-data8 0xa0c08309c4b912c1 , 0x00003ffe // log(1/frcpa(1+223/2^-8))
-data8 0xa1388a8c6faa2afa , 0x00003ffe // log(1/frcpa(1+224/2^-8))
-data8 0xa1b0ca7095b5f985 , 0x00003ffe // log(1/frcpa(1+225/2^-8))
-
-data8 0xa22942eb47534a00 , 0x00003ffe // log(1/frcpa(1+226/2^-8))
-data8 0xa2de62326449d0a3 , 0x00003ffe // log(1/frcpa(1+227/2^-8))
-data8 0xa357690f88bfe345 , 0x00003ffe // log(1/frcpa(1+228/2^-8))
-data8 0xa3d0a93f45169a4b , 0x00003ffe // log(1/frcpa(1+229/2^-8))
-data8 0xa44a22f7ffe65f30 , 0x00003ffe // log(1/frcpa(1+230/2^-8))
-
-data8 0xa500c5e5b4c1aa36 , 0x00003ffe // log(1/frcpa(1+231/2^-8))
-data8 0xa57ad064eb2ebbc2 , 0x00003ffe // log(1/frcpa(1+232/2^-8))
-data8 0xa5f5152dedf4384e , 0x00003ffe // log(1/frcpa(1+233/2^-8))
-data8 0xa66f9478856233ec , 0x00003ffe // log(1/frcpa(1+234/2^-8))
-data8 0xa6ea4e7cca02c32e , 0x00003ffe // log(1/frcpa(1+235/2^-8))
-
-data8 0xa765437325341ccf , 0x00003ffe // log(1/frcpa(1+236/2^-8))
-data8 0xa81e21e6c75b4020 , 0x00003ffe // log(1/frcpa(1+237/2^-8))
-data8 0xa899ab333fe2b9ca , 0x00003ffe // log(1/frcpa(1+238/2^-8))
-data8 0xa9157039c51ebe71 , 0x00003ffe // log(1/frcpa(1+239/2^-8))
-data8 0xa991713433c2b999 , 0x00003ffe // log(1/frcpa(1+240/2^-8))
-
-data8 0xaa0dae5cbcc048b3 , 0x00003ffe // log(1/frcpa(1+241/2^-8))
-data8 0xaa8a27ede5eb13ad , 0x00003ffe // log(1/frcpa(1+242/2^-8))
-data8 0xab06de228a9e3499 , 0x00003ffe // log(1/frcpa(1+243/2^-8))
-data8 0xab83d135dc633301 , 0x00003ffe // log(1/frcpa(1+244/2^-8))
-data8 0xac3fb076adc7fe7a , 0x00003ffe // log(1/frcpa(1+245/2^-8))
-
-data8 0xacbd3cbbe47988f1 , 0x00003ffe // log(1/frcpa(1+246/2^-8))
-data8 0xad3b06b1a5dc57c3 , 0x00003ffe // log(1/frcpa(1+247/2^-8))
-data8 0xadb90e94af887717 , 0x00003ffe // log(1/frcpa(1+248/2^-8))
-data8 0xae3754a218f7c816 , 0x00003ffe // log(1/frcpa(1+249/2^-8))
-data8 0xaeb5d9175437afa2 , 0x00003ffe // log(1/frcpa(1+250/2^-8))
-
-data8 0xaf349c322e9c7cee , 0x00003ffe // log(1/frcpa(1+251/2^-8))
-data8 0xafb39e30d1768d1c , 0x00003ffe // log(1/frcpa(1+252/2^-8))
-data8 0xb032df51c2c93116 , 0x00003ffe // log(1/frcpa(1+253/2^-8))
-data8 0xb0b25fd3e6035ad9 , 0x00003ffe // log(1/frcpa(1+254/2^-8))
-data8 0xb1321ff67cba178c , 0x00003ffe // log(1/frcpa(1+255/2^-8))
-ASM_SIZE_DIRECTIVE(log_table_2)
-
-
-.align 32
-.global log#
-.global log10#
+LOCAL_OBJECT_START(log_data)
+// coefficients of polynomial approximation
+data8 0x3FC2494104381A8E // A7
+data8 0xBFC5556D556BBB69 // A6
+//
+// two parts of ln(2)
+data8 0x3FE62E42FEF00000,0x3DD473DE6AF278ED
+//
+data8 0x8000000000000000,0x3FFF // 1.0
+//
+data8 0x3FC999999988B5E9 // A5
+data8 0xBFCFFFFFFFF6FFF5 // A4
+//
+// hi parts of ln(1/frcpa(1+i/256)), i=0...255
+data8 0x3F60040155D5889D // 0
+data8 0x3F78121214586B54 // 1
+data8 0x3F841929F96832EF // 2
+data8 0x3F8C317384C75F06 // 3
+data8 0x3F91A6B91AC73386 // 4
+data8 0x3F95BA9A5D9AC039 // 5
+data8 0x3F99D2A8074325F3 // 6
+data8 0x3F9D6B2725979802 // 7
+data8 0x3FA0C58FA19DFAA9 // 8
+data8 0x3FA2954C78CBCE1A // 9
+data8 0x3FA4A94D2DA96C56 // 10
+data8 0x3FA67C94F2D4BB58 // 11
+data8 0x3FA85188B630F068 // 12
+data8 0x3FAA6B8ABE73AF4C // 13
+data8 0x3FAC441E06F72A9E // 14
+data8 0x3FAE1E6713606D06 // 15
+data8 0x3FAFFA6911AB9300 // 16
+data8 0x3FB0EC139C5DA600 // 17
+data8 0x3FB1DBD2643D190B // 18
+data8 0x3FB2CC7284FE5F1C // 19
+data8 0x3FB3BDF5A7D1EE64 // 20
+data8 0x3FB4B05D7AA012E0 // 21
+data8 0x3FB580DB7CEB5701 // 22
+data8 0x3FB674F089365A79 // 23
+data8 0x3FB769EF2C6B568D // 24
+data8 0x3FB85FD927506A47 // 25
+data8 0x3FB9335E5D594988 // 26
+data8 0x3FBA2B0220C8E5F4 // 27
+data8 0x3FBB0004AC1A86AB // 28
+data8 0x3FBBF968769FCA10 // 29
+data8 0x3FBCCFEDBFEE13A8 // 30
+data8 0x3FBDA727638446A2 // 31
+data8 0x3FBEA3257FE10F79 // 32
+data8 0x3FBF7BE9FEDBFDE5 // 33
+data8 0x3FC02AB352FF25F3 // 34
+data8 0x3FC097CE579D204C // 35
+data8 0x3FC1178E8227E47B // 36
+data8 0x3FC185747DBECF33 // 37
+data8 0x3FC1F3B925F25D41 // 38
+data8 0x3FC2625D1E6DDF56 // 39
+data8 0x3FC2D1610C868139 // 40
+data8 0x3FC340C59741142E // 41
+data8 0x3FC3B08B6757F2A9 // 42
+data8 0x3FC40DFB08378003 // 43
+data8 0x3FC47E74E8CA5F7C // 44
+data8 0x3FC4EF51F6466DE4 // 45
+data8 0x3FC56092E02BA516 // 46
+data8 0x3FC5D23857CD74D4 // 47
+data8 0x3FC6313A37335D76 // 48
+data8 0x3FC6A399DABBD383 // 49
+data8 0x3FC70337DD3CE41A // 50
+data8 0x3FC77654128F6127 // 51
+data8 0x3FC7E9D82A0B022D // 52
+data8 0x3FC84A6B759F512E // 53
+data8 0x3FC8AB47D5F5A30F // 54
+data8 0x3FC91FE49096581B // 55
+data8 0x3FC981634011AA75 // 56
+data8 0x3FC9F6C407089664 // 57
+data8 0x3FCA58E729348F43 // 58
+data8 0x3FCABB55C31693AC // 59
+data8 0x3FCB1E104919EFD0 // 60
+data8 0x3FCB94EE93E367CA // 61
+data8 0x3FCBF851C067555E // 62
+data8 0x3FCC5C0254BF23A5 // 63
+data8 0x3FCCC000C9DB3C52 // 64
+data8 0x3FCD244D99C85673 // 65
+data8 0x3FCD88E93FB2F450 // 66
+data8 0x3FCDEDD437EAEF00 // 67
+data8 0x3FCE530EFFE71012 // 68
+data8 0x3FCEB89A1648B971 // 69
+data8 0x3FCF1E75FADF9BDE // 70
+data8 0x3FCF84A32EAD7C35 // 71
+data8 0x3FCFEB2233EA07CD // 72
+data8 0x3FD028F9C7035C1C // 73
+data8 0x3FD05C8BE0D9635A // 74
+data8 0x3FD085EB8F8AE797 // 75
+data8 0x3FD0B9C8E32D1911 // 76
+data8 0x3FD0EDD060B78080 // 77
+data8 0x3FD122024CF0063F // 78
+data8 0x3FD14BE2927AECD4 // 79
+data8 0x3FD180618EF18ADF // 80
+data8 0x3FD1B50BBE2FC63B // 81
+data8 0x3FD1DF4CC7CF242D // 82
+data8 0x3FD214456D0EB8D4 // 83
+data8 0x3FD23EC5991EBA49 // 84
+data8 0x3FD2740D9F870AFB // 85
+data8 0x3FD29ECDABCDFA03 // 86
+data8 0x3FD2D46602ADCCEE // 87
+data8 0x3FD2FF66B04EA9D4 // 88
+data8 0x3FD335504B355A37 // 89
+data8 0x3FD360925EC44F5C // 90
+data8 0x3FD38BF1C3337E74 // 91
+data8 0x3FD3C25277333183 // 92
+data8 0x3FD3EDF463C1683E // 93
+data8 0x3FD419B423D5E8C7 // 94
+data8 0x3FD44591E0539F48 // 95
+data8 0x3FD47C9175B6F0AD // 96
+data8 0x3FD4A8B341552B09 // 97
+data8 0x3FD4D4F39089019F // 98
+data8 0x3FD501528DA1F967 // 99
+data8 0x3FD52DD06347D4F6 // 100
+data8 0x3FD55A6D3C7B8A89 // 101
+data8 0x3FD5925D2B112A59 // 102
+data8 0x3FD5BF406B543DB1 // 103
+data8 0x3FD5EC433D5C35AD // 104
+data8 0x3FD61965CDB02C1E // 105
+data8 0x3FD646A84935B2A1 // 106
+data8 0x3FD6740ADD31DE94 // 107
+data8 0x3FD6A18DB74A58C5 // 108
+data8 0x3FD6CF31058670EC // 109
+data8 0x3FD6F180E852F0B9 // 110
+data8 0x3FD71F5D71B894EF // 111
+data8 0x3FD74D5AEFD66D5C // 112
+data8 0x3FD77B79922BD37D // 113
+data8 0x3FD7A9B9889F19E2 // 114
+data8 0x3FD7D81B037EB6A6 // 115
+data8 0x3FD8069E33827230 // 116
+data8 0x3FD82996D3EF8BCA // 117
+data8 0x3FD85855776DCBFA // 118
+data8 0x3FD8873658327CCE // 119
+data8 0x3FD8AA75973AB8CE // 120
+data8 0x3FD8D992DC8824E4 // 121
+data8 0x3FD908D2EA7D9511 // 122
+data8 0x3FD92C59E79C0E56 // 123
+data8 0x3FD95BD750EE3ED2 // 124
+data8 0x3FD98B7811A3EE5B // 125
+data8 0x3FD9AF47F33D406B // 126
+data8 0x3FD9DF270C1914A7 // 127
+data8 0x3FDA0325ED14FDA4 // 128
+data8 0x3FDA33440224FA78 // 129
+data8 0x3FDA57725E80C382 // 130
+data8 0x3FDA87D0165DD199 // 131
+data8 0x3FDAAC2E6C03F895 // 132
+data8 0x3FDADCCC6FDF6A81 // 133
+data8 0x3FDB015B3EB1E790 // 134
+data8 0x3FDB323A3A635948 // 135
+data8 0x3FDB56FA04462909 // 136
+data8 0x3FDB881AA659BC93 // 137
+data8 0x3FDBAD0BEF3DB164 // 138
+data8 0x3FDBD21297781C2F // 139
+data8 0x3FDC039236F08818 // 140
+data8 0x3FDC28CB1E4D32FC // 141
+data8 0x3FDC4E19B84723C1 // 142
+data8 0x3FDC7FF9C74554C9 // 143
+data8 0x3FDCA57B64E9DB05 // 144
+data8 0x3FDCCB130A5CEBAF // 145
+data8 0x3FDCF0C0D18F326F // 146
+data8 0x3FDD232075B5A201 // 147
+data8 0x3FDD490246DEFA6B // 148
+data8 0x3FDD6EFA918D25CD // 149
+data8 0x3FDD9509707AE52F // 150
+data8 0x3FDDBB2EFE92C554 // 151
+data8 0x3FDDEE2F3445E4AE // 152
+data8 0x3FDE148A1A2726CD // 153
+data8 0x3FDE3AFC0A49FF3F // 154
+data8 0x3FDE6185206D516D // 155
+data8 0x3FDE882578823D51 // 156
+data8 0x3FDEAEDD2EAC990C // 157
+data8 0x3FDED5AC5F436BE2 // 158
+data8 0x3FDEFC9326D16AB8 // 159
+data8 0x3FDF2391A21575FF // 160
+data8 0x3FDF4AA7EE03192C // 161
+data8 0x3FDF71D627C30BB0 // 162
+data8 0x3FDF991C6CB3B379 // 163
+data8 0x3FDFC07ADA69A90F // 164
+data8 0x3FDFE7F18EB03D3E // 165
+data8 0x3FE007C053C5002E // 166
+data8 0x3FE01B942198A5A0 // 167
+data8 0x3FE02F74400C64EA // 168
+data8 0x3FE04360BE7603AC // 169
+data8 0x3FE05759AC47FE33 // 170
+data8 0x3FE06B5F1911CF51 // 171
+data8 0x3FE078BF0533C568 // 172
+data8 0x3FE08CD9687E7B0E // 173
+data8 0x3FE0A10074CF9019 // 174
+data8 0x3FE0B5343A234476 // 175
+data8 0x3FE0C974C89431CD // 176
+data8 0x3FE0DDC2305B9886 // 177
+data8 0x3FE0EB524BAFC918 // 178
+data8 0x3FE0FFB54213A475 // 179
+data8 0x3FE114253DA97D9F // 180
+data8 0x3FE128A24F1D9AFF // 181
+data8 0x3FE1365252BF0864 // 182
+data8 0x3FE14AE558B4A92D // 183
+data8 0x3FE15F85A19C765B // 184
+data8 0x3FE16D4D38C119FA // 185
+data8 0x3FE18203C20DD133 // 186
+data8 0x3FE196C7BC4B1F3A // 187
+data8 0x3FE1A4A738B7A33C // 188
+data8 0x3FE1B981C0C9653C // 189
+data8 0x3FE1CE69E8BB106A // 190
+data8 0x3FE1DC619DE06944 // 191
+data8 0x3FE1F160A2AD0DA3 // 192
+data8 0x3FE2066D7740737E // 193
+data8 0x3FE2147DBA47A393 // 194
+data8 0x3FE229A1BC5EBAC3 // 195
+data8 0x3FE237C1841A502E // 196
+data8 0x3FE24CFCE6F80D9A // 197
+data8 0x3FE25B2C55CD5762 // 198
+data8 0x3FE2707F4D5F7C40 // 199
+data8 0x3FE285E0842CA383 // 200
+data8 0x3FE294294708B773 // 201
+data8 0x3FE2A9A2670AFF0C // 202
+data8 0x3FE2B7FB2C8D1CC0 // 203
+data8 0x3FE2C65A6395F5F5 // 204
+data8 0x3FE2DBF557B0DF42 // 205
+data8 0x3FE2EA64C3F97654 // 206
+data8 0x3FE3001823684D73 // 207
+data8 0x3FE30E97E9A8B5CC // 208
+data8 0x3FE32463EBDD34E9 // 209
+data8 0x3FE332F4314AD795 // 210
+data8 0x3FE348D90E7464CF // 211
+data8 0x3FE35779F8C43D6D // 212
+data8 0x3FE36621961A6A99 // 213
+data8 0x3FE37C299F3C366A // 214
+data8 0x3FE38AE2171976E7 // 215
+data8 0x3FE399A157A603E7 // 216
+data8 0x3FE3AFCCFE77B9D1 // 217
+data8 0x3FE3BE9D503533B5 // 218
+data8 0x3FE3CD7480B4A8A2 // 219
+data8 0x3FE3E3C43918F76C // 220
+data8 0x3FE3F2ACB27ED6C6 // 221
+data8 0x3FE4019C2125CA93 // 222
+data8 0x3FE4181061389722 // 223
+data8 0x3FE42711518DF545 // 224
+data8 0x3FE436194E12B6BF // 225
+data8 0x3FE445285D68EA69 // 226
+data8 0x3FE45BCC464C893A // 227
+data8 0x3FE46AED21F117FC // 228
+data8 0x3FE47A1527E8A2D3 // 229
+data8 0x3FE489445EFFFCCB // 230
+data8 0x3FE4A018BCB69835 // 231
+data8 0x3FE4AF5A0C9D65D7 // 232
+data8 0x3FE4BEA2A5BDBE87 // 233
+data8 0x3FE4CDF28F10AC46 // 234
+data8 0x3FE4DD49CF994058 // 235
+data8 0x3FE4ECA86E64A683 // 236
+data8 0x3FE503C43CD8EB68 // 237
+data8 0x3FE513356667FC57 // 238
+data8 0x3FE522AE0738A3D7 // 239
+data8 0x3FE5322E26867857 // 240
+data8 0x3FE541B5CB979809 // 241
+data8 0x3FE55144FDBCBD62 // 242
+data8 0x3FE560DBC45153C6 // 243
+data8 0x3FE5707A26BB8C66 // 244
+data8 0x3FE587F60ED5B8FF // 245
+data8 0x3FE597A7977C8F31 // 246
+data8 0x3FE5A760D634BB8A // 247
+data8 0x3FE5B721D295F10E // 248
+data8 0x3FE5C6EA94431EF9 // 249
+data8 0x3FE5D6BB22EA86F5 // 250
+data8 0x3FE5E6938645D38F // 251
+data8 0x3FE5F673C61A2ED1 // 252
+data8 0x3FE6065BEA385926 // 253
+data8 0x3FE6164BFA7CC06B // 254
+data8 0x3FE62643FECF9742 // 255
+//
+// lo parts of ln(1/frcpa(1+i/256)), i=0...255
+data4 0x20E70672 // 0
+data4 0x1F60A5D0 // 1
+data4 0x218EABA0 // 2
+data4 0x21403104 // 3
+data4 0x20E9B54E // 4
+data4 0x21EE1382 // 5
+data4 0x226014E3 // 6
+data4 0x2095E5C9 // 7
+data4 0x228BA9D4 // 8
+data4 0x22932B86 // 9
+data4 0x22608A57 // 10
+data4 0x220209F3 // 11
+data4 0x212882CC // 12
+data4 0x220D46E2 // 13
+data4 0x21FA4C28 // 14
+data4 0x229E5BD9 // 15
+data4 0x228C9838 // 16
+data4 0x2311F954 // 17
+data4 0x221365DF // 18
+data4 0x22BD0CB3 // 19
+data4 0x223D4BB7 // 20
+data4 0x22A71BBE // 21
+data4 0x237DB2FA // 22
+data4 0x23194C9D // 23
+data4 0x22EC639E // 24
+data4 0x2367E669 // 25
+data4 0x232E1D5F // 26
+data4 0x234A639B // 27
+data4 0x2365C0E0 // 28
+data4 0x234646C1 // 29
+data4 0x220CBF9C // 30
+data4 0x22A00FD4 // 31
+data4 0x2306A3F2 // 32
+data4 0x23745A9B // 33
+data4 0x2398D756 // 34
+data4 0x23DD0B6A // 35
+data4 0x23DE338B // 36
+data4 0x23A222DF // 37
+data4 0x223164F8 // 38
+data4 0x23B4E87B // 39
+data4 0x23D6CCB8 // 40
+data4 0x220C2099 // 41
+data4 0x21B86B67 // 42
+data4 0x236D14F1 // 43
+data4 0x225A923F // 44
+data4 0x22748723 // 45
+data4 0x22200D13 // 46
+data4 0x23C296EA // 47
+data4 0x2302AC38 // 48
+data4 0x234B1996 // 49
+data4 0x2385E298 // 50
+data4 0x23175BE5 // 51
+data4 0x2193F482 // 52
+data4 0x23BFEA90 // 53
+data4 0x23D70A0C // 54
+data4 0x231CF30A // 55
+data4 0x235D9E90 // 56
+data4 0x221AD0CB // 57
+data4 0x22FAA08B // 58
+data4 0x23D29A87 // 59
+data4 0x20C4B2FE // 60
+data4 0x2381B8B7 // 61
+data4 0x23F8D9FC // 62
+data4 0x23EAAE7B // 63
+data4 0x2329E8AA // 64
+data4 0x23EC0322 // 65
+data4 0x2357FDCB // 66
+data4 0x2392A9AD // 67
+data4 0x22113B02 // 68
+data4 0x22DEE901 // 69
+data4 0x236A6D14 // 70
+data4 0x2371D33E // 71
+data4 0x2146F005 // 72
+data4 0x23230B06 // 73
+data4 0x22F1C77D // 74
+data4 0x23A89FA3 // 75
+data4 0x231D1241 // 76
+data4 0x244DA96C // 77
+data4 0x23ECBB7D // 78
+data4 0x223E42B4 // 79
+data4 0x23801BC9 // 80
+data4 0x23573263 // 81
+data4 0x227C1158 // 82
+data4 0x237BD749 // 83
+data4 0x21DDBAE9 // 84
+data4 0x23401735 // 85
+data4 0x241D9DEE // 86
+data4 0x23BC88CB // 87
+data4 0x2396D5F1 // 88
+data4 0x23FC89CF // 89
+data4 0x2414F9A2 // 90
+data4 0x2474A0F5 // 91
+data4 0x24354B60 // 92
+data4 0x23C1EB40 // 93
+data4 0x2306DD92 // 94
+data4 0x24353B6B // 95
+data4 0x23CD1701 // 96
+data4 0x237C7A1C // 97
+data4 0x245793AA // 98
+data4 0x24563695 // 99
+data4 0x23C51467 // 100
+data4 0x24476B68 // 101
+data4 0x212585A9 // 102
+data4 0x247B8293 // 103
+data4 0x2446848A // 104
+data4 0x246A53F8 // 105
+data4 0x246E496D // 106
+data4 0x23ED1D36 // 107
+data4 0x2314C258 // 108
+data4 0x233244A7 // 109
+data4 0x245B7AF0 // 110
+data4 0x24247130 // 111
+data4 0x22D67B38 // 112
+data4 0x2449F620 // 113
+data4 0x23BBC8B8 // 114
+data4 0x237D3BA0 // 115
+data4 0x245E8F13 // 116
+data4 0x2435573F // 117
+data4 0x242DE666 // 118
+data4 0x2463BC10 // 119
+data4 0x2466587D // 120
+data4 0x2408144B // 121
+data4 0x2405F0E5 // 122
+data4 0x22381CFF // 123
+data4 0x24154F9B // 124
+data4 0x23A4E96E // 125
+data4 0x24052967 // 126
+data4 0x2406963F // 127
+data4 0x23F7D3CB // 128
+data4 0x2448AFF4 // 129
+data4 0x24657A21 // 130
+data4 0x22FBC230 // 131
+data4 0x243C8DEA // 132
+data4 0x225DC4B7 // 133
+data4 0x23496EBF // 134
+data4 0x237C2B2B // 135
+data4 0x23A4A5B1 // 136
+data4 0x2394E9D1 // 137
+data4 0x244BC950 // 138
+data4 0x23C7448F // 139
+data4 0x2404A1AD // 140
+data4 0x246511D5 // 141
+data4 0x24246526 // 142
+data4 0x23111F57 // 143
+data4 0x22868951 // 144
+data4 0x243EB77F // 145
+data4 0x239F3DFF // 146
+data4 0x23089666 // 147
+data4 0x23EBFA6A // 148
+data4 0x23C51312 // 149
+data4 0x23E1DD5E // 150
+data4 0x232C0944 // 151
+data4 0x246A741F // 152
+data4 0x2414DF8D // 153
+data4 0x247B5546 // 154
+data4 0x2415C980 // 155
+data4 0x24324ABD // 156
+data4 0x234EB5E5 // 157
+data4 0x2465E43E // 158
+data4 0x242840D1 // 159
+data4 0x24444057 // 160
+data4 0x245E56F0 // 161
+data4 0x21AE30F8 // 162
+data4 0x23FB3283 // 163
+data4 0x247A4D07 // 164
+data4 0x22AE314D // 165
+data4 0x246B7727 // 166
+data4 0x24EAD526 // 167
+data4 0x24B41DC9 // 168
+data4 0x24EE8062 // 169
+data4 0x24A0C7C4 // 170
+data4 0x24E8DA67 // 171
+data4 0x231120F7 // 172
+data4 0x24401FFB // 173
+data4 0x2412DD09 // 174
+data4 0x248C131A // 175
+data4 0x24C0A7CE // 176
+data4 0x243DD4C8 // 177
+data4 0x24457FEB // 178
+data4 0x24DEEFBB // 179
+data4 0x243C70AE // 180
+data4 0x23E7A6FA // 181
+data4 0x24C2D311 // 182
+data4 0x23026255 // 183
+data4 0x2437C9B9 // 184
+data4 0x246BA847 // 185
+data4 0x2420B448 // 186
+data4 0x24C4CF5A // 187
+data4 0x242C4981 // 188
+data4 0x24DE1525 // 189
+data4 0x24F5CC33 // 190
+data4 0x235A85DA // 191
+data4 0x24A0B64F // 192
+data4 0x244BA0A4 // 193
+data4 0x24AAF30A // 194
+data4 0x244C86F9 // 195
+data4 0x246D5B82 // 196
+data4 0x24529347 // 197
+data4 0x240DD008 // 198
+data4 0x24E98790 // 199
+data4 0x2489B0CE // 200
+data4 0x22BC29AC // 201
+data4 0x23F37C7A // 202
+data4 0x24987FE8 // 203
+data4 0x22AFE20B // 204
+data4 0x24C8D7C2 // 205
+data4 0x24B28B7D // 206
+data4 0x23B6B271 // 207
+data4 0x24C77CB6 // 208
+data4 0x24EF1DCA // 209
+data4 0x24A4F0AC // 210
+data4 0x24CF113E // 211
+data4 0x2496BBAB // 212
+data4 0x23C7CC8A // 213
+data4 0x23AE3961 // 214
+data4 0x2410A895 // 215
+data4 0x23CE3114 // 216
+data4 0x2308247D // 217
+data4 0x240045E9 // 218
+data4 0x24974F60 // 219
+data4 0x242CB39F // 220
+data4 0x24AB8D69 // 221
+data4 0x23436788 // 222
+data4 0x24305E9E // 223
+data4 0x243E71A9 // 224
+data4 0x23C2A6B3 // 225
+data4 0x23FFE6CF // 226
+data4 0x2322D801 // 227
+data4 0x24515F21 // 228
+data4 0x2412A0D6 // 229
+data4 0x24E60D44 // 230
+data4 0x240D9251 // 231
+data4 0x247076E2 // 232
+data4 0x229B101B // 233
+data4 0x247B12DE // 234
+data4 0x244B9127 // 235
+data4 0x2499EC42 // 236
+data4 0x21FC3963 // 237
+data4 0x23E53266 // 238
+data4 0x24CE102D // 239
+data4 0x23CC45D2 // 240
+data4 0x2333171D // 241
+data4 0x246B3533 // 242
+data4 0x24931129 // 243
+data4 0x24405FFA // 244
+data4 0x24CF464D // 245
+data4 0x237095CD // 246
+data4 0x24F86CBD // 247
+data4 0x24E2D84B // 248
+data4 0x21ACBB44 // 249
+data4 0x24F43A8C // 250
+data4 0x249DB931 // 251
+data4 0x24A385EF // 252
+data4 0x238B1279 // 253
+data4 0x2436213E // 254
+data4 0x24F18A3B // 255
+LOCAL_OBJECT_END(log_data)
+
+
+LOCAL_OBJECT_START(log10_data)
+// coefficients of polynoimal approximation
+data8 0x3FC2494104381A8E // A7
+data8 0xBFC5556D556BBB69 // A6
+//
+// two parts of ln(2)/ln(10)
+data8 0x3FD3441350900000, 0x3DCEF3FDE623E256
+//
+data8 0xDE5BD8A937287195,0x3FFD // 1/ln(10)
+//
+data8 0x3FC999999988B5E9 // A5
+data8 0xBFCFFFFFFFF6FFF5 // A4
+//
+// Hi parts of ln(1/frcpa(1+i/256))/ln(10), i=0...255
+data8 0x3F4BD27045BFD024 // 0
+data8 0x3F64E84E793A474A // 1
+data8 0x3F7175085AB85FF0 // 2
+data8 0x3F787CFF9D9147A5 // 3
+data8 0x3F7EA9D372B89FC8 // 4
+data8 0x3F82DF9D95DA961C // 5
+data8 0x3F866DF172D6372B // 6
+data8 0x3F898D79EF5EEDEF // 7
+data8 0x3F8D22ADF3F9579C // 8
+data8 0x3F9024231D30C398 // 9
+data8 0x3F91F23A98897D49 // 10
+data8 0x3F93881A7B818F9E // 11
+data8 0x3F951F6E1E759E35 // 12
+data8 0x3F96F2BCE7ADC5B4 // 13
+data8 0x3F988D362CDF359E // 14
+data8 0x3F9A292BAF010981 // 15
+data8 0x3F9BC6A03117EB97 // 16
+data8 0x3F9D65967DE3AB08 // 17
+data8 0x3F9F061167FC31E7 // 18
+data8 0x3FA05409E4F7819B // 19
+data8 0x3FA125D0432EA20D // 20
+data8 0x3FA1F85D440D299B // 21
+data8 0x3FA2AD755749617C // 22
+data8 0x3FA381772A00E603 // 23
+data8 0x3FA45643E165A70A // 24
+data8 0x3FA52BDD034475B8 // 25
+data8 0x3FA5E3966B7E9295 // 26
+data8 0x3FA6BAAF47C5B244 // 27
+data8 0x3FA773B3E8C4F3C7 // 28
+data8 0x3FA84C51EBEE8D15 // 29
+data8 0x3FA906A6786FC1CA // 30
+data8 0x3FA9C197ABF00DD6 // 31
+data8 0x3FAA9C78712191F7 // 32
+data8 0x3FAB58C09C8D637C // 33
+data8 0x3FAC15A8BCDD7B7E // 34
+data8 0x3FACD331E2C2967B // 35
+data8 0x3FADB11ED766ABF4 // 36
+data8 0x3FAE70089346A9E6 // 37
+data8 0x3FAF2F96C6754AED // 38
+data8 0x3FAFEFCA8D451FD5 // 39
+data8 0x3FB0585283764177 // 40
+data8 0x3FB0B913AAC7D3A6 // 41
+data8 0x3FB11A294F2569F5 // 42
+data8 0x3FB16B51A2696890 // 43
+data8 0x3FB1CD03ADACC8BD // 44
+data8 0x3FB22F0BDD7745F5 // 45
+data8 0x3FB2916ACA38D1E7 // 46
+data8 0x3FB2F4210DF7663C // 47
+data8 0x3FB346A6C3C49065 // 48
+data8 0x3FB3A9FEBC605409 // 49
+data8 0x3FB3FD0C10A3AA54 // 50
+data8 0x3FB46107D3540A81 // 51
+data8 0x3FB4C55DD16967FE // 52
+data8 0x3FB51940330C000A // 53
+data8 0x3FB56D620EE7115E // 54
+data8 0x3FB5D2ABCF26178D // 55
+data8 0x3FB6275AA5DEBF81 // 56
+data8 0x3FB68D4EAF26D7EE // 57
+data8 0x3FB6E28C5C54A28D // 58
+data8 0x3FB7380B9665B7C7 // 59
+data8 0x3FB78DCCC278E85B // 60
+data8 0x3FB7F50C2CF25579 // 61
+data8 0x3FB84B5FD5EAEFD7 // 62
+data8 0x3FB8A1F6BAB2B226 // 63
+data8 0x3FB8F8D144557BDF // 64
+data8 0x3FB94FEFDCD61D92 // 65
+data8 0x3FB9A752EF316149 // 66
+data8 0x3FB9FEFAE7611EDF // 67
+data8 0x3FBA56E8325F5C86 // 68
+data8 0x3FBAAF1B3E297BB3 // 69
+data8 0x3FBB079479C372AC // 70
+data8 0x3FBB6054553B12F7 // 71
+data8 0x3FBBB95B41AB5CE5 // 72
+data8 0x3FBC12A9B13FE079 // 73
+data8 0x3FBC6C4017382BEA // 74
+data8 0x3FBCB41FBA42686C // 75
+data8 0x3FBD0E38CE73393E // 76
+data8 0x3FBD689B2193F132 // 77
+data8 0x3FBDC3472B1D285F // 78
+data8 0x3FBE0C06300D528B // 79
+data8 0x3FBE6738190E394B // 80
+data8 0x3FBEC2B50D208D9A // 81
+data8 0x3FBF0C1C2B936827 // 82
+data8 0x3FBF68216C9CC726 // 83
+data8 0x3FBFB1F6381856F3 // 84
+data8 0x3FC00742AF4CE5F8 // 85
+data8 0x3FC02C64906512D2 // 86
+data8 0x3FC05AF1E63E03B4 // 87
+data8 0x3FC0804BEA723AA8 // 88
+data8 0x3FC0AF1FD6711526 // 89
+data8 0x3FC0D4B2A88059FF // 90
+data8 0x3FC0FA5EF136A06C // 91
+data8 0x3FC1299A4FB3E305 // 92
+data8 0x3FC14F806253C3EC // 93
+data8 0x3FC175805D1587C1 // 94
+data8 0x3FC19B9A637CA294 // 95
+data8 0x3FC1CB5FC26EDE16 // 96
+data8 0x3FC1F1B4E65F2590 // 97
+data8 0x3FC218248B5DC3E5 // 98
+data8 0x3FC23EAED62ADC76 // 99
+data8 0x3FC26553EBD337BC // 100
+data8 0x3FC28C13F1B118FF // 101
+data8 0x3FC2BCAA14381385 // 102
+data8 0x3FC2E3A740B7800E // 103
+data8 0x3FC30ABFD8F333B6 // 104
+data8 0x3FC331F403985096 // 105
+data8 0x3FC35943E7A6068F // 106
+data8 0x3FC380AFAC6E7C07 // 107
+data8 0x3FC3A8377997B9E5 // 108
+data8 0x3FC3CFDB771C9ADB // 109
+data8 0x3FC3EDA90D39A5DE // 110
+data8 0x3FC4157EC09505CC // 111
+data8 0x3FC43D7113FB04C0 // 112
+data8 0x3FC4658030AD1CCE // 113
+data8 0x3FC48DAC404638F5 // 114
+data8 0x3FC4B5F56CBBB869 // 115
+data8 0x3FC4DE5BE05E7582 // 116
+data8 0x3FC4FCBC0776FD85 // 117
+data8 0x3FC525561E9256EE // 118
+data8 0x3FC54E0DF3198865 // 119
+data8 0x3FC56CAB7112BDE2 // 120
+data8 0x3FC59597BA735B15 // 121
+data8 0x3FC5BEA23A506FD9 // 122
+data8 0x3FC5DD7E08DE382E // 123
+data8 0x3FC606BDD3F92355 // 124
+data8 0x3FC6301C518A501E // 125
+data8 0x3FC64F3770618915 // 126
+data8 0x3FC678CC14C1E2D7 // 127
+data8 0x3FC6981005ED2947 // 128
+data8 0x3FC6C1DB5F9BB335 // 129
+data8 0x3FC6E1488ECD2880 // 130
+data8 0x3FC70B4B2E7E41B8 // 131
+data8 0x3FC72AE209146BF8 // 132
+data8 0x3FC7551C81BD8DCF // 133
+data8 0x3FC774DD76CC43BD // 134
+data8 0x3FC79F505DB00E88 // 135
+data8 0x3FC7BF3BDE099F30 // 136
+data8 0x3FC7E9E7CAC437F8 // 137
+data8 0x3FC809FE4902D00D // 138
+data8 0x3FC82A2757995CBD // 139
+data8 0x3FC85525C625E098 // 140
+data8 0x3FC8757A79831887 // 141
+data8 0x3FC895E2058D8E02 // 142
+data8 0x3FC8C13437695531 // 143
+data8 0x3FC8E1C812EF32BE // 144
+data8 0x3FC9026F112197E8 // 145
+data8 0x3FC923294888880A // 146
+data8 0x3FC94EEA4B8334F2 // 147
+data8 0x3FC96FD1B639FC09 // 148
+data8 0x3FC990CCA66229AB // 149
+data8 0x3FC9B1DB33334842 // 150
+data8 0x3FC9D2FD740E6606 // 151
+data8 0x3FC9FF49EEDCB553 // 152
+data8 0x3FCA209A84FBCFF7 // 153
+data8 0x3FCA41FF1E43F02B // 154
+data8 0x3FCA6377D2CE9377 // 155
+data8 0x3FCA8504BAE0D9F5 // 156
+data8 0x3FCAA6A5EEEBEFE2 // 157
+data8 0x3FCAC85B878D7878 // 158
+data8 0x3FCAEA259D8FFA0B // 159
+data8 0x3FCB0C0449EB4B6A // 160
+data8 0x3FCB2DF7A5C50299 // 161
+data8 0x3FCB4FFFCA70E4D1 // 162
+data8 0x3FCB721CD17157E2 // 163
+data8 0x3FCB944ED477D4EC // 164
+data8 0x3FCBB695ED655C7C // 165
+data8 0x3FCBD8F2364AEC0F // 166
+data8 0x3FCBFB63C969F4FF // 167
+data8 0x3FCC1DEAC134D4E9 // 168
+data8 0x3FCC4087384F4F80 // 169
+data8 0x3FCC6339498F09E1 // 170
+data8 0x3FCC86010FFC076B // 171
+data8 0x3FCC9D3D065C5B41 // 172
+data8 0x3FCCC029375BA079 // 173
+data8 0x3FCCE32B66978BA4 // 174
+data8 0x3FCD0643AFD51404 // 175
+data8 0x3FCD29722F0DEA45 // 176
+data8 0x3FCD4CB70070FE43 // 177
+data8 0x3FCD6446AB3F8C95 // 178
+data8 0x3FCD87B0EF71DB44 // 179
+data8 0x3FCDAB31D1FE99A6 // 180
+data8 0x3FCDCEC96FDC888E // 181
+data8 0x3FCDE69088763579 // 182
+data8 0x3FCE0A4E4A25C1FF // 183
+data8 0x3FCE2E2315755E32 // 184
+data8 0x3FCE461322D1648A // 185
+data8 0x3FCE6A0E95C7787B // 186
+data8 0x3FCE8E216243DD60 // 187
+data8 0x3FCEA63AF26E007C // 188
+data8 0x3FCECA74ED15E0B7 // 189
+data8 0x3FCEEEC692CCD259 // 190
+data8 0x3FCF070A36B8D9C0 // 191
+data8 0x3FCF2B8393E34A2D // 192
+data8 0x3FCF5014EF538A5A // 193
+data8 0x3FCF68833AF1B17F // 194
+data8 0x3FCF8D3CD9F3F04E // 195
+data8 0x3FCFA5C61ADD93E9 // 196
+data8 0x3FCFCAA8567EBA79 // 197
+data8 0x3FCFE34CC8743DD8 // 198
+data8 0x3FD0042BFD74F519 // 199
+data8 0x3FD016BDF6A18017 // 200
+data8 0x3FD023262F907322 // 201
+data8 0x3FD035CCED8D32A1 // 202
+data8 0x3FD042430E869FFB // 203
+data8 0x3FD04EBEC842B2DF // 204
+data8 0x3FD06182E84FD4AB // 205
+data8 0x3FD06E0CB609D383 // 206
+data8 0x3FD080E60BEC8F12 // 207
+data8 0x3FD08D7E0D894735 // 208
+data8 0x3FD0A06CC96A2055 // 209
+data8 0x3FD0AD131F3B3C55 // 210
+data8 0x3FD0C01771E775FB // 211
+data8 0x3FD0CCCC3CAD6F4B // 212
+data8 0x3FD0D986D91A34A8 // 213
+data8 0x3FD0ECA9B8861A2D // 214
+data8 0x3FD0F972F87FF3D5 // 215
+data8 0x3FD106421CF0E5F7 // 216
+data8 0x3FD11983EBE28A9C // 217
+data8 0x3FD12661E35B7859 // 218
+data8 0x3FD13345D2779D3B // 219
+data8 0x3FD146A6F597283A // 220
+data8 0x3FD15399E81EA83D // 221
+data8 0x3FD16092E5D3A9A6 // 222
+data8 0x3FD17413C3B7AB5D // 223
+data8 0x3FD1811BF629D6FA // 224
+data8 0x3FD18E2A47B46685 // 225
+data8 0x3FD19B3EBE1A4418 // 226
+data8 0x3FD1AEE9017CB450 // 227
+data8 0x3FD1BC0CED7134E1 // 228
+data8 0x3FD1C93712ABC7FF // 229
+data8 0x3FD1D66777147D3E // 230
+data8 0x3FD1EA3BD1286E1C // 231
+data8 0x3FD1F77BED932C4C // 232
+data8 0x3FD204C25E1B031F // 233
+data8 0x3FD2120F28CE69B1 // 234
+data8 0x3FD21F6253C48D00 // 235
+data8 0x3FD22CBBE51D60A9 // 236
+data8 0x3FD240CE4C975444 // 237
+data8 0x3FD24E37F8ECDAE7 // 238
+data8 0x3FD25BA8215AF7FC // 239
+data8 0x3FD2691ECC29F042 // 240
+data8 0x3FD2769BFFAB2DFF // 241
+data8 0x3FD2841FC23952C9 // 242
+data8 0x3FD291AA1A384978 // 243
+data8 0x3FD29F3B0E15584A // 244
+data8 0x3FD2B3A0EE479DF7 // 245
+data8 0x3FD2C142842C09E5 // 246
+data8 0x3FD2CEEACCB7BD6C // 247
+data8 0x3FD2DC99CE82FF20 // 248
+data8 0x3FD2EA4F902FD7D9 // 249
+data8 0x3FD2F80C186A25FC // 250
+data8 0x3FD305CF6DE7B0F6 // 251
+data8 0x3FD3139997683CE7 // 252
+data8 0x3FD3216A9BB59E7C // 253
+data8 0x3FD32F4281A3CEFE // 254
+data8 0x3FD33D2150110091 // 255
+//
+// Lo parts of ln(1/frcpa(1+i/256))/ln(10), i=0...255
+data4 0x1FB0EB5A // 0
+data4 0x206E5EE3 // 1
+data4 0x208F3609 // 2
+data4 0x2070EB03 // 3
+data4 0x1F314BAE // 4
+data4 0x217A889D // 5
+data4 0x21E63650 // 6
+data4 0x21C2F4A3 // 7
+data4 0x2192A10C // 8
+data4 0x1F84B73E // 9
+data4 0x2243FBCA // 10
+data4 0x21BD9C51 // 11
+data4 0x213C542B // 12
+data4 0x21047386 // 13
+data4 0x21217D8F // 14
+data4 0x226791B7 // 15
+data4 0x204CCE66 // 16
+data4 0x2234CE9F // 17
+data4 0x220675E2 // 18
+data4 0x22B8E5BA // 19
+data4 0x22C12D14 // 20
+data4 0x211D41F0 // 21
+data4 0x228507F3 // 22
+data4 0x22F7274B // 23
+data4 0x22A7FDD1 // 24
+data4 0x2244A06E // 25
+data4 0x215DCE69 // 26
+data4 0x22F5C961 // 27
+data4 0x22EBEF29 // 28
+data4 0x222A2CB6 // 29
+data4 0x22B9FE00 // 30
+data4 0x22E79EB7 // 31
+data4 0x222F9607 // 32
+data4 0x2189D87F // 33
+data4 0x2236DB45 // 34
+data4 0x22ED77FB // 35
+data4 0x21CB70F0 // 36
+data4 0x21B8ACE8 // 37
+data4 0x22EC58C1 // 38
+data4 0x22CFCC1C // 39
+data4 0x2343E77A // 40
+data4 0x237FBC7F // 41
+data4 0x230D472E // 42
+data4 0x234686FB // 43
+data4 0x23770425 // 44
+data4 0x223977EC // 45
+data4 0x2345800A // 46
+data4 0x237BC351 // 47
+data4 0x23191502 // 48
+data4 0x232BAC12 // 49
+data4 0x22692421 // 50
+data4 0x234D409D // 51
+data4 0x22EC3214 // 52
+data4 0x2376C916 // 53
+data4 0x22B00DD1 // 54
+data4 0x2309D910 // 55
+data4 0x22F925FD // 56
+data4 0x22A63A7B // 57
+data4 0x2106264A // 58
+data4 0x234227F9 // 59
+data4 0x1ECB1978 // 60
+data4 0x23460A62 // 61
+data4 0x232ED4B1 // 62
+data4 0x226DDC38 // 63
+data4 0x1F101A73 // 64
+data4 0x21B1F82B // 65
+data4 0x22752F19 // 66
+data4 0x2320BC15 // 67
+data4 0x236EEC5E // 68
+data4 0x23404D3E // 69
+data4 0x2304C517 // 70
+data4 0x22F7441A // 71
+data4 0x230D3D7A // 72
+data4 0x2264A9DF // 73
+data4 0x22410CC8 // 74
+data4 0x2342CCCB // 75
+data4 0x23560BD4 // 76
+data4 0x237BBFFE // 77
+data4 0x2373A206 // 78
+data4 0x22C871B9 // 79
+data4 0x2354B70C // 80
+data4 0x232EDB33 // 81
+data4 0x235DB680 // 82
+data4 0x230EF422 // 83
+data4 0x235316CA // 84
+data4 0x22EEEE8B // 85
+data4 0x2375C88C // 86
+data4 0x235ABD21 // 87
+data4 0x23A0D232 // 88
+data4 0x23F5FFB5 // 89
+data4 0x23D3CEC8 // 90
+data4 0x22A92204 // 91
+data4 0x238C64DF // 92
+data4 0x23B82896 // 93
+data4 0x22D633B8 // 94
+data4 0x23861E93 // 95
+data4 0x23CB594B // 96
+data4 0x2330387E // 97
+data4 0x21CD4702 // 98
+data4 0x2284C505 // 99
+data4 0x23D6995C // 100
+data4 0x23F6C807 // 101
+data4 0x239CEF5C // 102
+data4 0x239442B0 // 103
+data4 0x22B35EE5 // 104
+data4 0x2391E9A4 // 105
+data4 0x23A390F5 // 106
+data4 0x2349AC9C // 107
+data4 0x23FA5535 // 108
+data4 0x21E3A46A // 109
+data4 0x23B44ABA // 110
+data4 0x23CEA8E0 // 111
+data4 0x23F647DC // 112
+data4 0x2390D1A8 // 113
+data4 0x23D0CFA2 // 114
+data4 0x236E0872 // 115
+data4 0x23B88B91 // 116
+data4 0x2283C359 // 117
+data4 0x232F647F // 118
+data4 0x23122CD7 // 119
+data4 0x232CF564 // 120
+data4 0x232630FD // 121
+data4 0x23BEE1C8 // 122
+data4 0x23B2BD30 // 123
+data4 0x2301F1C0 // 124
+data4 0x23CE4D67 // 125
+data4 0x23A353C9 // 126
+data4 0x238086E8 // 127
+data4 0x22D0D29E // 128
+data4 0x23A3B3C8 // 129
+data4 0x23F69F4B // 130
+data4 0x23EA3C21 // 131
+data4 0x23951C88 // 132
+data4 0x2372AFFC // 133
+data4 0x23A6D1A8 // 134
+data4 0x22BBBAF4 // 135
+data4 0x227FA3DD // 136
+data4 0x23804D9B // 137
+data4 0x232D771F // 138
+data4 0x239CB57B // 139
+data4 0x2303CF34 // 140
+data4 0x22218C2A // 141
+data4 0x23991BEE // 142
+data4 0x23EB3596 // 143
+data4 0x230487FA // 144
+data4 0x2135DF4C // 145
+data4 0x2380FD2D // 146
+data4 0x23EB75E9 // 147
+data4 0x211C62C8 // 148
+data4 0x23F518F1 // 149
+data4 0x23FEF882 // 150
+data4 0x239097C7 // 151
+data4 0x223E2BDA // 152
+data4 0x23988F89 // 153
+data4 0x22E4A4AD // 154
+data4 0x23F03D9C // 155
+data4 0x23F5018F // 156
+data4 0x23E1E250 // 157
+data4 0x23FD3D90 // 158
+data4 0x22DEE2FF // 159
+data4 0x238342AB // 160
+data4 0x22E6736F // 161
+data4 0x233AFC28 // 162
+data4 0x2395F661 // 163
+data4 0x23D8B991 // 164
+data4 0x23CD58D5 // 165
+data4 0x21941FD6 // 166
+data4 0x23352915 // 167
+data4 0x235D09EE // 168
+data4 0x22DC7EF9 // 169
+data4 0x238BC9F3 // 170
+data4 0x2397DF8F // 171
+data4 0x2380A7BB // 172
+data4 0x23EFF48C // 173
+data4 0x21E67408 // 174
+data4 0x236420F7 // 175
+data4 0x22C8DFB5 // 176
+data4 0x239B5D35 // 177
+data4 0x23BDC09D // 178
+data4 0x239E822C // 179
+data4 0x23984F0A // 180
+data4 0x23EF2119 // 181
+data4 0x23F738B8 // 182
+data4 0x23B66187 // 183
+data4 0x23B06AD7 // 184
+data4 0x2369140F // 185
+data4 0x218DACE6 // 186
+data4 0x21DF23F1 // 187
+data4 0x235D8B34 // 188
+data4 0x23460333 // 189
+data4 0x23F11D62 // 190
+data4 0x23C37147 // 191
+data4 0x22B2AE2A // 192
+data4 0x23949211 // 193
+data4 0x23B69799 // 194
+data4 0x23DBEC75 // 195
+data4 0x229A6FB3 // 196
+data4 0x23FC6C60 // 197
+data4 0x22D01FFC // 198
+data4 0x235985F0 // 199
+data4 0x23F7ECA5 // 200
+data4 0x23F924D3 // 201
+data4 0x2381B92F // 202
+data4 0x243A0FBE // 203
+data4 0x24712D72 // 204
+data4 0x24594E2F // 205
+data4 0x220CD12A // 206
+data4 0x23D87FB0 // 207
+data4 0x2338288A // 208
+data4 0x242BB2CC // 209
+data4 0x220F6265 // 210
+data4 0x23BB7FE3 // 211
+data4 0x2301C0A2 // 212
+data4 0x246709AB // 213
+data4 0x23A619E2 // 214
+data4 0x24030E3B // 215
+data4 0x233C36CC // 216
+data4 0x241AAB77 // 217
+data4 0x243D41A3 // 218
+data4 0x23834A60 // 219
+data4 0x236AC7BF // 220
+data4 0x23B6D597 // 221
+data4 0x210E9474 // 222
+data4 0x242156E6 // 223
+data4 0x243A1D68 // 224
+data4 0x2472187C // 225
+data4 0x23834E86 // 226
+data4 0x23CA0807 // 227
+data4 0x24745887 // 228
+data4 0x23E2B0E1 // 229
+data4 0x2421EB67 // 230
+data4 0x23DCC64E // 231
+data4 0x22DF71D1 // 232
+data4 0x238D5ECA // 233
+data4 0x23CDE86F // 234
+data4 0x24131F45 // 235
+data4 0x240FE4E2 // 236
+data4 0x2317731A // 237
+data4 0x24015C76 // 238
+data4 0x2301A4E8 // 239
+data4 0x23E52A6D // 240
+data4 0x247D8A0D // 241
+data4 0x23DFEEBA // 242
+data4 0x22139FEC // 243
+data4 0x2454A112 // 244
+data4 0x23C21E28 // 245
+data4 0x2460D813 // 246
+data4 0x24258924 // 247
+data4 0x2425680F // 248
+data4 0x24194D1E // 249
+data4 0x24242C2F // 250
+data4 0x243DDE5E // 251
+data4 0x23DEB388 // 252
+data4 0x23E0E6EB // 253
+data4 0x24393E74 // 254
+data4 0x241B1863 // 255
+LOCAL_OBJECT_END(log10_data)
+
+
+
+// Code
+//==============================================================
-// log10 has p7 true, p8 false
-// log has p8 true, p7 false
+// log has p13 true, p14 false
+// log10 has p14 true, p13 false
.section .text
-.proc log10#
-.align 32
-
-log10:
-#ifdef _LIBC
-.global __ieee754_log10
-.type __ieee754_log10,@function
-__ieee754_log10:
-#endif
+GLOBAL_IEEE754_ENTRY(log10)
{ .mfi
- alloc r32=ar.pfs,1,15,4,0
- frcpa.s1 log_C,p9 = f1,f8
- cmp.eq.unc p7,p8 = r0, r0
-}
-{ .mfb
- addl log_AD_1 = @ltoff(log_table_1), gp
- fnorm.s1 log_NORM_f8 = f8
- br.sptk L(LOG_LOG10_X)
+ getf.exp GR_Exp = f8 // if x is unorm then must recompute
+ frcpa.s1 FR_RcpX,p0 = f1,f8
+ mov GR_05 = 0xFFFE // biased exponent of A2=0.5
}
-;;
-
-.endp log10
-ASM_SIZE_DIRECTIVE(log10)
-ASM_SIZE_DIRECTIVE(__ieee754_log10)
-
-
-.section .text
-.proc log#
-.align 32
-log:
-#ifdef _LIBC
-.global __ieee754_log
-.type __ieee754_log,@function
-__ieee754_log:
-#endif
+{ .mlx
+ addl GR_ad_1 = @ltoff(log10_data),gp
+ movl GR_A3 = 0x3fd5555555555557 // double precision memory
+ // representation of A3
+};;
{ .mfi
- alloc r32=ar.pfs,1,15,4,0
- frcpa.s1 log_C,p9 = f1,f8
- cmp.eq.unc p8,p7 = r0, r0
+ getf.sig GR_Sig = f8 // get significand to calculate index
+ fclass.m p8,p0 = f8,9 // is x positive unorm?
+ mov GR_xorg = 0x3fefe // double precision memory msb of 255/256
}
-{ .mfi
- addl log_AD_1 = @ltoff(log_table_1), gp
- fnorm.s1 log_NORM_f8 = f8
- nop.i 999
-}
-;;
-
-L(LOG_LOG10_X):
+{ .mib
+ ld8 GR_ad_1 = [GR_ad_1]
+ cmp.eq p14,p13 = r0,r0 // set p14 to 1 for log10
+ br.cond.sptk log_log10_common
+};;
+GLOBAL_IEEE754_END(log10)
+GLOBAL_IEEE754_ENTRY(log)
{ .mfi
- ld8 log_AD_1 = [log_AD_1]
- fclass.m.unc p15,p0 = f8, 0x0b // Test for x=unorm
- mov log_GR_fff9 = 0xfff9
-}
-{ .mfi
- mov log_GR_half_exp = 0x0fffe
- fms.s1 log_w = f8,f1,f1
- mov log_GR_exp_17_ones = 0x1ffff
-}
-;;
-
-{ .mmi
- getf.exp log_GR_signexp_f8 = f8 // If x unorm then must recompute
- setf.exp log_half = log_GR_half_exp // Form 0.5 = -Q1
- nop.i 999
-}
-;;
-
-{ .mmb
- adds log_AD_2 = 0x30, log_AD_1
- mov log_GR_exp_16_ones = 0xffff
-(p15) br.cond.spnt L(LOG_DENORM)
-}
-;;
-
-L(LOG_COMMON):
-{.mfi
- ldfpd log_P5,log_P4 = [log_AD_1],16
- fclass.m.unc p6,p0 = f8, 0xc3 // Test for x=nan
- and log_GR_exp_f8 = log_GR_signexp_f8, log_GR_exp_17_ones
+ getf.exp GR_Exp = f8 // if x is unorm then must recompute
+ frcpa.s1 FR_RcpX,p0 = f1,f8
+ mov GR_05 = 0xfffe
}
-{.mfi
- ldfpd log_P3,log_P2 = [log_AD_2],16
- nop.f 999
- nop.i 999
-}
-;;
+{ .mlx
+ addl GR_ad_1 = @ltoff(log_data),gp
+ movl GR_A3 = 0x3fd5555555555557 // double precision memory
+ // representation of A3
+};;
{ .mfi
- ldfpd log_Q8,log_Q7 = [log_AD_1],16
- fclass.m.unc p11,p0 = f8, 0x21 // Test for x=+inf
- sub log_GR_true_exp_f8 = log_GR_exp_f8, log_GR_exp_16_ones
+ getf.sig GR_Sig = f8 // get significand to calculate index
+ fclass.m p8,p0 = f8,9 // is x positive unorm?
+ mov GR_xorg = 0x3fefe // double precision memory msb of 255/256
}
{ .mfi
- ldfpd log_Q6,log_Q5 = [log_AD_2],16
- nop.f 999
- nop.i 999
-}
-;;
-
+ ld8 GR_ad_1 = [GR_ad_1]
+ nop.f 0
+ cmp.eq p13,p14 = r0,r0 // set p13 to 1 for log
+};;
+log_log10_common:
{ .mfi
- ldfpd log_Q4,log_Q3 = [log_AD_1],16
- fma.s1 log_wsq = log_w, log_w, f0
- nop.i 999
-}
-{ .mfb
- ldfpd log_Q2,log_Q1 = [log_AD_2],16
-(p6) fma.d.s0 f8 = f8,f1,f0 // quietize nan result if x=nan
-(p6) br.ret.spnt b0 // Exit for x=nan
+ getf.d GR_x = f8 // double precision memory representation of x
+ fclass.m p9,p0 = f8,0x1E1 // is x NaN, NaT or +Inf?
+ dep.z GR_dx = 3, 44, 2 // Create 0x0000300000000000
+ // Difference between double precision
+ // memory representations of 257/256 and
+ // 255/256
}
-;;
-
-
{ .mfi
- setf.sig log_int_Nfloat = log_GR_true_exp_f8
- fcmp.eq.s1 p10,p0 = log_NORM_f8, f1 // Test for x=+1.0
- nop.i 999
-}
-{ .mfb
- nop.m 999
- fms.s1 log_r = log_C,f8,f1
-(p11) br.ret.spnt b0 // Exit for x=+inf
-}
-;;
-
-
-{ .mmf
- getf.sig log_GR_significand_f8 = log_NORM_f8
- ldfe log_inv_ln10 = [log_AD_2],16
- fclass.m.unc p6,p0 = f8, 0x07 // Test for x=0
-}
-;;
-
-
-{ .mfb
- nop.m 999
-(p10) fmerge.s f8 = f0, f0
-(p10) br.ret.spnt b0 // Exit for x=1.0
-;;
-}
-
+ setf.exp FR_A2 = GR_05 // create A2
+ fnorm.s1 FR_NormX = f8
+ mov GR_bias = 0xffff
+};;
+
{ .mfi
- getf.exp log_GR_signexp_w = log_w
- fclass.m.unc p12,p0 = f8, 0x3a // Test for x neg norm, unorm, inf
- shl log_GR_index = log_GR_significand_f8,1
+ setf.d FR_A3 = GR_A3 // create A3
+ fcmp.eq.s1 p12,p0 = f1,f8 // is x equal to 1.0?
+ dep.z GR_xorg = GR_xorg, 44, 19 // 0x3fefe00000000000
+ // double precision memory
+ // representation of 255/256
}
-;;
+{ .mib
+ add GR_ad_2 = 0x30,GR_ad_1 // address of A5,A4
+ add GR_ad_3 = 0x840,GR_ad_1 // address of ln(1/frcpa) lo parts
+(p8) br.cond.spnt log_positive_unorms
+};;
+log_core:
{ .mfi
- ldfe log_log2 = [log_AD_2],16
- fnma.s1 log_rp_q10 = log_half, log_wsq, log_w
- shr.u log_GR_index = log_GR_index,56
+ ldfpd FR_A7,FR_A6 = [GR_ad_1],16
+ fclass.m p10,p0 = f8,0x3A // is x < 0?
+ sub GR_Nm1 = GR_Exp,GR_05 // unbiased_exponent_of_x - 1
}
-{ .mfb
- nop.m 999
- fma.s1 log_w3 = log_wsq, log_w, f0
-(p6) br.cond.spnt L(LOG_ZERO_NEG) // Branch if x=0
-;;
-}
-
-
{ .mfi
- and log_GR_exp_w = log_GR_exp_17_ones, log_GR_signexp_w
- fma.s1 log_w4 = log_wsq, log_wsq, f0
- nop.i 999
-}
-{ .mfb
- shladd log_AD_2 = log_GR_index,4,log_AD_2
- fma.s1 log_rsq = log_r, log_r, f0
-(p12) br.cond.spnt L(LOG_ZERO_NEG) // Branch if x<0
-;;
-}
+ ldfpd FR_A5,FR_A4 = [GR_ad_2],16
+(p9) fma.d.s0 f8 = f8,f1,f0 // set V-flag
+ sub GR_N = GR_Exp,GR_bias // unbiased_exponent_of_x
+};;
{ .mfi
- ldfe log_T = [log_AD_2]
- fma.s1 log_rp_p4 = log_P5, log_r, log_P4
- nop.i 999
+ setf.sig FR_N = GR_N // copy unbiased exponent of x to significand
+ fms.s1 FR_r = FR_RcpX,f8,f1 // range reduction for |x-1|>=1/256
+ extr.u GR_Ind = GR_Sig,55,8 // get bits from 55 to 62 as index
}
-{ .mfi
- nop.m 999
- fma.s1 log_rp_p32 = log_P3, log_r, log_P2
- nop.i 999
-;;
-}
-
+{ .mib
+ sub GR_x = GR_x, GR_xorg // get diff between x and 255/256
+ cmp.gtu p6, p7 = 2, GR_Nm1 // p6 true if 0.5 <= x < 2
+(p9) br.ret.spnt b0 // exit for NaN, NaT and +Inf
+};;
{ .mfi
- nop.m 999
- fma.s1 log_rp_q7 = log_Q8, log_w, log_Q7
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 log_rp_q65 = log_Q6, log_w, log_Q5
- nop.i 999
-;;
+ ldfpd FR_Ln2hi,FR_Ln2lo = [GR_ad_1],16
+ fclass.m p11,p0 = f8,0x07 // is x = 0?
+ shladd GR_ad_3 = GR_Ind,2,GR_ad_3 // address of Tlo
}
+{ .mib
+ shladd GR_ad_2 = GR_Ind,3,GR_ad_2 // address of Thi
+(p6) cmp.leu p6, p7 = GR_x, GR_dx // 255/256 <= x <= 257/256
+(p10) br.cond.spnt log_negatives // jump if x is negative
+};;
-// p13 <== large w log
-// p14 <== small w log
+// p6 is true if |x-1| < 1/256
+// p7 is true if |x-1| >= 1/256
{ .mfi
-(p8) cmp.ge.unc p13,p14 = log_GR_exp_w, log_GR_fff9
- fma.s1 log_rp_q3 = log_Q4, log_w, log_Q3
- nop.i 999
-;;
-}
+ ldfd FR_Thi = [GR_ad_2]
+(p6) fms.s1 FR_r = f8,f1,f1 // range reduction for |x-1|<1/256
+ nop.i 0
+};;
-// p10 <== large w log10
-// p11 <== small w log10
-{ .mfi
-(p7) cmp.ge.unc p10,p11 = log_GR_exp_w, log_GR_fff9
- fcvt.xf log_Nfloat = log_int_Nfloat
- nop.i 999
+{ .mmi
+(p7) ldfs FR_Tlo = [GR_ad_3]
+ nop.m 0
+ nop.i 0
}
+{ .mfb
+ nop.m 0
+(p12) fma.d.s0 f8 = f0,f0,f0
+(p12) br.ret.spnt b0 // exit for +1.0
+};;
+.pred.rel "mutex",p6,p7
{ .mfi
- nop.m 999
- fma.s1 log_rp_q21 = log_Q2, log_w3, log_rp_q10
- nop.i 999 ;;
+(p6) mov GR_NearOne = 1
+ fms.s1 FR_A32 = FR_A3,FR_r,FR_A2 // A3*r-A2
+(p7) mov GR_NearOne = 0
}
+{ .mfb
+ ldfe FR_InvLn10 = [GR_ad_1],16
+ fma.s1 FR_r2 = FR_r,FR_r,f0 // r^2
+(p11) br.cond.spnt log_zeroes // jump if x is zero
+};;
{ .mfi
- nop.m 999
- fma.s1 log_rcube = log_rsq, log_r, f0
- nop.i 999
+ nop.m 0
+ fma.s1 FR_A6 = FR_A7,FR_r,FR_A6 // A7*r+A6
+ nop.i 0
}
{ .mfi
- nop.m 999
- fma.s1 log_rp_p10 = log_rsq, log_P1, log_r
- nop.i 999
-;;
-}
+(p7) cmp.eq.unc p9,p0 = r0,r0 // set p9 if |x-1| > 1/256
+ fma.s1 FR_A4 = FR_A5,FR_r,FR_A4 // A5*r+A4
+(p14) cmp.eq.unc p8,p0 = 1,GR_NearOne // set p8 to 1 if it's log10
+ // and argument near 1.0
+};;
{ .mfi
- nop.m 999
- fcmp.eq.s0 p6,p0 = f8,f0 // Sets flag on +denormal input
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 log_rp_p2 = log_rp_p4, log_rsq, log_rp_p32
- nop.i 999
-;;
-}
-
+(p6) getf.exp GR_rexp = FR_r // Get signexp of x-1
+(p7) fcvt.xf FR_N = FR_N
+(p8) cmp.eq p9,p6 = r0,r0 // Also set p9 and clear p6 if log10
+ // and arg near 1
+};;
{ .mfi
- nop.m 999
- fma.s1 log_w6 = log_w3, log_w3, f0
- nop.i 999
+ nop.m 0
+ fma.s1 FR_r4 = FR_r2,FR_r2,f0 // r^4
+ nop.i 0
}
{ .mfi
- nop.m 999
- fma.s1 log_Qlo = log_rp_q7, log_wsq, log_rp_q65
- nop.i 999
-}
-;;
+ nop.m 0
+(p8) fma.s1 FR_NxLn2pT = f0,f0,f0 // Clear NxLn2pT if log10 near 1
+ nop.i 0
+};;
{ .mfi
- nop.m 999
- fma.s1 log_Qhi = log_rp_q3, log_w4, log_rp_q21
- nop.i 999 ;;
+ nop.m 0
+ // (A3*r+A2)*r^2+r
+ fma.s1 FR_A321 = FR_A32,FR_r2,FR_r
+ mov GR_mask = 0x1ffff
}
-
-
{ .mfi
- nop.m 999
- fma.s1 log_T_plus_Nlog2 = log_Nfloat,log_log2, log_T
- nop.i 999 ;;
-}
+ nop.m 0
+ // (A7*r+A6)*r^2+(A5*r+A4)
+ fma.s1 FR_A4 = FR_A6,FR_r2,FR_A4
+ nop.i 0
+};;
{ .mfi
- nop.m 999
- fma.s1 log_r2P_r = log_rp_p2, log_rcube, log_rp_p10
- nop.i 999 ;;
+(p6) and GR_rexp = GR_rexp, GR_mask
+ // N*Ln2hi+Thi
+(p7) fma.s1 FR_NxLn2hipThi = FR_N,FR_Ln2hi,FR_Thi
+ nop.i 0
}
+{ .mfi
+ nop.m 0
+ // N*Ln2lo+Tlo
+(p7) fma.s1 FR_NxLn2lopTlo = FR_N,FR_Ln2lo,FR_Tlo
+ nop.i 0
+};;
-
-// small w, log <== p14
{ .mfi
- nop.m 999
-(p14) fma.d f8 = log_Qlo, log_w6, log_Qhi
- nop.i 999
+(p6) sub GR_rexp = GR_rexp, GR_bias // unbiased exponent of x-1
+(p9) fma.s1 f8 = FR_A4,FR_r4,FR_A321 // P(r) if |x-1| >= 1/256 or
+ // log10 and |x-1| < 1/256
+ nop.i 0
}
{ .mfi
- nop.m 999
- fma.s1 log_Q = log_Qlo, log_w6, log_Qhi
- nop.i 999 ;;
-}
-
+ nop.m 0
+ // (N*Ln2hi+Thi) + (N*Ln2lo+Tlo)
+(p7) fma.s1 FR_NxLn2pT = FR_NxLn2hipThi,f1,FR_NxLn2lopTlo
+ nop.i 0
+};;
{ .mfi
- nop.m 999
-(p10) fma.s1 log_log10_hi = log_T_plus_Nlog2, log_inv_ln10,f0
- nop.i 999 ;;
-}
+(p6) cmp.gt.unc p10, p6 = -40, GR_rexp // Test |x-1| < 2^-40
+ nop.f 0
+ nop.i 0
+};;
-// large w, log <== p13
-.pred.rel "mutex",p13,p10
{ .mfi
- nop.m 999
-(p13) fadd.d f8 = log_T_plus_Nlog2, log_r2P_r
- nop.i 999
-}
-{ .mfi
- nop.m 999
-(p10) fma.s1 log_log10_lo = log_inv_ln10, log_r2P_r,f0
- nop.i 999 ;;
-}
-
+ nop.m 0
+(p10) fma.d.s0 f8 = FR_A32,FR_r2,FR_r // log(x) if |x-1| < 2^-40
+ nop.i 0
+};;
-// small w, log10 <== p11
+.pred.rel "mutex",p6,p9
{ .mfi
- nop.m 999
-(p11) fma.d f8 = log_inv_ln10,log_Q,f0
- nop.i 999 ;;
-}
-
-// large w, log10 <== p10
-{ .mfb
- nop.m 999
-(p10) fma.d f8 = log_log10_hi, f1, log_log10_lo
- br.ret.sptk b0
-;;
+ nop.m 0
+(p6) fma.d.s0 f8 = FR_A4,FR_r4,FR_A321 // log(x) if 2^-40 <= |x-1| < 1/256
+ nop.i 0
}
-
-L(LOG_DENORM):
{ .mfb
- getf.exp log_GR_signexp_f8 = log_NORM_f8
- nop.f 999
- br.cond.sptk L(LOG_COMMON)
-}
-;;
-
-L(LOG_ZERO_NEG):
-
-// qnan snan inf norm unorm 0 -+
-// 0 0 0 0 0 1 11 0x7
-// 0 0 1 1 1 0 10 0x3a
-
-// Save x (f8) in f10
-{ .mfi
- nop.m 999
- fmerge.s f10 = f8,f8
- nop.i 999 ;;
-}
-
-// p8 p9 means ln(+-0) = -inf
-// p7 p10 means log(+-0) = -inf
-
-// p13 means ln(-)
-// p14 means log(-)
-
+ nop.m 0
+(p9) fma.d.s0 f8 = f8,FR_InvLn10,FR_NxLn2pT // result if |x-1| >= 1/256
+ // or log10 and |x-1| < 1/256
+ br.ret.sptk b0
+};;
-{ .mfi
- nop.m 999
- fmerge.ns f6 = f1,f1 // Form -1.0
- nop.i 999 ;;
-}
+.align 32
+log_positive_unorms:
+{ .mmf
+ getf.exp GR_Exp = FR_NormX // recompute biased exponent
+ getf.d GR_x = FR_NormX // recompute double precision x
+ fcmp.eq.s1 p12,p0 = f1,FR_NormX // is x equal to 1.0?
+};;
-// p9 means ln(+-0) = -inf
-// p10 means log(+-0) = -inf
-// Log(+-0) = -inf
+{ .mfb
+ getf.sig GR_Sig = FR_NormX // recompute significand
+ fcmp.eq.s0 p15, p0 = f8, f0 // set denormal flag
+ br.cond.sptk log_core
+};;
+.align 32
+log_zeroes:
{ .mfi
- nop.m 999
-(p8) fclass.m.unc p9,p0 = f10, 0x07
- nop.i 999
+ nop.m 0
+ fmerge.s FR_X = f8,f8 // keep input argument for subsequent
+ // call of __libm_error_support#
+ nop.i 0
}
{ .mfi
- nop.m 999
-(p7) fclass.m.unc p10,p0 = f10, 0x07
- nop.i 999 ;;
-}
-
-
-// p13 ln(-)
-// p14 log(-)
+ nop.m 0
+ fms.s1 FR_tmp = f0,f0,f1 // -1.0
+ nop.i 0
+};;
-// Log(-inf, -normal, -unnormal) = QNAN indefinite
-{ .mfi
- nop.m 999
-(p8) fclass.m.unc p13,p0 = f10, 0x3a
- nop.i 999
-}
+.pred.rel "mutex",p13,p14
{ .mfi
- nop.m 999
-(p7) fclass.m.unc p14,p0 = f10, 0x3a
- nop.i 999 ;;
+(p13) mov GR_TAG = 2 // set libm error in case of log
+ frcpa.s0 f8,p0 = FR_tmp,f0 // log(+/-0) should be equal to -INF.
+ // We can get it using frcpa because it
+ // sets result to the IEEE-754 mandated
+ // quotient of FR_tmp/f0.
+ // As far as FR_tmp is -1 it'll be -INF
+ nop.i 0
}
+{ .mib
+(p14) mov GR_TAG = 8 // set libm error in case of log10
+ nop.i 0
+ br.cond.sptk log_libm_err
+};;
-
-.pred.rel "mutex",p9,p10
-{ .mfi
-(p9) mov log_GR_tag = 2
-(p9) frcpa f8,p11 = f6,f0
- nop.i 999
-}
+.align 32
+log_negatives:
{ .mfi
-(p10) mov log_GR_tag = 8
-(p10) frcpa f8,p12 = f6,f0
- nop.i 999 ;;
-}
+ nop.m 0
+ fmerge.s FR_X = f8,f8
+ nop.i 0
+};;
.pred.rel "mutex",p13,p14
{ .mfi
-(p13) mov log_GR_tag = 3
-(p13) frcpa f8,p11 = f0,f0
- nop.i 999
-}
-{ .mfb
-(p14) mov log_GR_tag = 9
-(p14) frcpa f8,p12 = f0,f0
- br.cond.sptk __libm_error_region ;;
-}
-.endp log
-ASM_SIZE_DIRECTIVE(log)
-ASM_SIZE_DIRECTIVE(__ieee754_log)
-
-
-// Stack operations when calling error support.
-// (1) (2) (3) (call) (4)
-// sp -> + psp -> + psp -> + sp -> +
-// | | | |
-// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
-// | | | |
-// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
-// | | | |
-// | | <- GR_X X1 ->| |
-// | | | |
-// sp-64 -> + sp -> + sp -> + +
-// save ar.pfs save b0 restore gp
-// save gp restore ar.pfs
-
+(p13) mov GR_TAG = 3 // set libm error in case of log
+ frcpa.s0 f8,p0 = f0,f0 // log(negatives) should be equal to NaN.
+ // We can get it using frcpa because it
+ // sets result to the IEEE-754 mandated
+ // quotient of f0/f0 i.e. NaN.
+(p14) mov GR_TAG = 9 // set libm error in case of log10
+};;
+.align 32
+log_libm_err:
+{ .mmi
+ alloc r32 = ar.pfs,1,4,4,0
+ mov GR_Parameter_TAG = GR_TAG
+ nop.i 0
+};;
+GLOBAL_IEEE754_END(log)
-.proc __libm_error_region
-__libm_error_region:
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
-
-// (1)
{ .mfi
- add GR_Parameter_Y=-32,sp // Parameter 2 value
+ add GR_Parameter_Y = -32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+ mov GR_SAVE_PFS = ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
- add sp=-64,sp // Create new stack
+ add sp = -64,sp // Create new stack
nop.f 0
- mov GR_SAVE_GP=gp // Save gp
+ mov GR_SAVE_GP = gp // Save gp
};;
-
-// (2)
{ .mmi
- stfd [GR_Parameter_Y] = f1,16 // STORE Parameter 2 on stack
+ stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0 // Save b0
+ mov GR_SAVE_B0 = b0 // Save b0
};;
.body
-// (3)
{ .mib
- stfd [GR_Parameter_X] = f10 // STORE Parameter 1 on stack
- add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
- nop.b 0
+ stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
}
{ .mib
- stfd [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack
+ stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support# // Call error handling function
+ br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
- nop.m 0
- nop.m 0
add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
};;
-// (4)
{ .mmi
- ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
- add sp = 64,sp // Restore stack pointer
- mov b0 = GR_SAVE_B0 // Restore return address
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
};;
+
{ .mib
- mov gp = GR_SAVE_GP // Restore gp
- mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
- br.ret.sptk b0 // Return
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
};;
-
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
-
+LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#
+
diff --git a/sysdeps/ia64/fpu/e_logf.S b/sysdeps/ia64/fpu/e_logf.S
index 829d0abed0..0ca6d3f2c8 100644
--- a/sysdeps/ia64/fpu/e_logf.S
+++ b/sysdeps/ia64/fpu/e_logf.S
@@ -1,10 +1,10 @@
.file "logf.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,861 +20,1072 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 3/01/00 Initial version
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 03/01/00 Initial version
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
-// 1/10/01 Improved speed, fixed flags for neg denormals
-//
+// 01/10/01 Improved speed, fixed flags for neg denormals
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 05/23/02 Modified algorithm. Now only one polynomial is used
+// for |x-1| >= 1/256 and for |x-1| < 1/256
+// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// API
//==============================================================
// float logf(float)
// float log10f(float)
//
+//
// Overview of operation
//==============================================================
// Background
+// ----------
//
-// Consider x = 2^N 1.f1 f2 f3 f4...f63
-// Log(x) = log(frcpa(x) x/frcpa(x))
-// = log(1/frcpa(x)) + log(frcpa(x) x)
-// = -log(frcpa(x)) + log(frcpa(x) x)
+// This algorithm is based on fact that
+// log(a b) = log(a) + log(b).
//
-// frcpa(x) = 2^-N frcpa((1.f1 f2 ... f63)
+// In our case we have x = 2^N f, where 1 <= f < 2.
+// So
+// log(x) = log(2^N f) = log(2^N) + log(f) = n*log(2) + log(f)
//
-// -log(frcpa(x)) = -log(C)
-// = -log(2^-N) - log(frcpa(1.f1 f2 ... f63))
+// To calculate log(f) we do following
+// log(f) = log(f * frcpa(f) / frcpa(f)) =
+// = log(f * frcpa(f)) + log(1/frcpa(f))
//
-// -log(frcpa(x)) = -log(C)
-// = +Nlog2 - log(frcpa(1.f1 f2 ... f63))
+// According to definition of IA-64's frcpa instruction it's a
+// floating point that approximates 1/f using a lookup on the
+// top of 8 bits of the input number's significand with relative
+// error < 2^(-8.886). So we have following
//
-// -log(frcpa(x)) = -log(C)
-// = +Nlog2 + log(frcpa(1.f1 f2 ... f63))
+// |(1/f - frcpa(f)) / (1/f))| = |1 - f*frcpa(f)| < 1/256
//
-// Log(x) = log(1/frcpa(x)) + log(frcpa(x) x)
-
-// Log(x) = +Nlog2 + log(1./frcpa(1.f1 f2 ... f63)) + log(frcpa(x) x)
-// Log(x) = +Nlog2 - log(/frcpa(1.f1 f2 ... f63)) + log(frcpa(x) x)
-// Log(x) = +Nlog2 + T + log(frcpa(x) x)
+// and
+//
+// log(f) = log(f * frcpa(f)) + log(1/frcpa(f)) =
+// = log(1 + r) + T
+//
+// The first value can be computed by polynomial P(r) approximating
+// log(1 + r) on |r| < 1/256 and the second is precomputed tabular
+// value defined by top 8 bit of f.
+//
+// Finally we have that log(x) ~ (N*log(2) + T) + P(r)
+//
+// Note that if input argument is close to 1.0 (in our case it means
+// that |1 - x| < 1/256) we can use just polynomial approximation
+// because x = 2^0 * f = f = 1 + r and
+// log(x) = log(1 + r) ~ P(r)
+//
+//
+// To compute log10(x) we just use identity:
//
-// Log(x) = +Nlog2 + T + log(C x)
+// log10(x) = log(x)/log(10)
//
-// Cx = 1 + r
+// so we have that
//
-// Log(x) = +Nlog2 + T + log(1+r)
-// Log(x) = +Nlog2 + T + Series( r - r^2/2 + r^3/3 - r^4/4 ....)
+// log10(x) = (N*log(2) + T + log(1+r)) / log(10) =
+// = N*(log(2)/log(10)) + (T/log(10)) + log(1 + r)/log(10)
//
-// 1.f1 f2 ... f8 has 256 entries.
-// They are 1 + k/2^8, k = 0 ... 255
-// These 256 values are the table entries.
//
// Implementation
-//===============
-// CASE 1: |x-1| >= 2^-8
-// C = frcpa(x)
-// r = C * x - 1
+// --------------
+// It can be seen that formulas for log and log10 differ from one another
+// only by coefficients and tabular values. Namely as log as log10 are
+// calculated as (N*L1 + T) + L2*Series(r) where in case of log
+// L1 = log(2)
+// T = log(1/frcpa(x))
+// L2 = 1.0
+// and in case of log10
+// L1 = log(2)/log(10)
+// T = log(1/frcpa(x))/log(10)
+// L2 = 1.0/log(10)
//
-// Form rseries = r + P1*r^2 + P2*r^3 + P3*r^4
+// So common code with two different entry points those set pointers
+// to the base address of coresponding data sets containing values
+// of L2,T and prepare integer representation of L1 needed for following
+// setf instruction can be used.
//
-// x = f * 2*n where f is 1.f_1f_2f_3....f_63
-// Nfloat = float(n) where n is the true unbiased exponent
-// pre-index = f_1f_2....f_8
-// index = pre_index * 16
-// get the dxt table entry at index + offset = T
+// Note that both log and log10 use common approximation polynomial
+// it means we need only one set of coefficients of approximation.
//
-// result = (T + Nfloat * log(2)) + rseries
+// 1. Computation of log(x) for |x-1| >= 1/256
+// InvX = frcpa(x)
+// r = InvX*x - 1
+// P(r) = r*((1 - A2*r) + r^2*(A3 - A4*r)) = r*P2(r),
+// A4,A3,A2 are created with setf inctruction.
+// We use Taylor series and so A4 = 1/4, A3 = 1/3,
+// A2 = 1/2 rounded to double.
//
-// The T table is calculated as follows
-// Form x_k = 1 + k/2^8 where k goes from 0... 255
-// y_k = frcpa(x_k)
-// log(1/y_k) in quad and round to double
-
-// CASE 2: |x-1| < 2^-6
-// w = x - 1
+// N = float(n) where n is true unbiased exponent of x
//
-// Form wseries = w + Q1*w^2 + Q2*w^3 + Q3*w^4
+// T is tabular value of log(1/frcpa(x)) calculated in quad precision
+// and rounded to double. To T we get bits from 55 to 62 of register
+// format significand of x and calculate address
+// ad_T = table_base_addr + 8 * index
//
-// result = wseries
-
-// Special values
+// L2 (1.0 or 1.0/log(10) depending on function) is calculated in quad
+// precision and rounded to double; it's loaded from memory
+//
+// L1 (log(2) or log10(2) depending on function) is calculated in quad
+// precision and rounded to double; it's created with setf.
+//
+// And final result = P2(r)*(r*L2) + (T + N*L1)
+//
+//
+// 2. Computation of log(x) for |x-1| < 1/256
+// r = x - 1
+// P(r) = r*((1 - A2*r) + r^2*(A3 - A4*r)) = r*P2(r),
+// A4,A3,A2 are the same as in case |x-1| >= 1/256
+//
+// And final result = P2(r)*(r*L2)
+//
+// 3. How we define is input argument such that |x-1| < 1/256 or not.
+//
+// To do it we analyze biased exponent and significand of input argment.
+//
+// a) First we test is biased exponent equal to 0xFFFE or 0xFFFF (i.e.
+// we test is 0.5 <= x < 2). This comparison can be performed using
+// unsigned version of cmp instruction in such a way
+// biased_exponent_of_x - 0xFFFE < 2
+//
+//
+// b) Second (in case when result of a) is true) we need to compare x
+// with 1-1/256 and 1+1/256 or in register format representation with
+// 0xFFFEFF00000000000000 and 0xFFFF8080000000000000 correspondingly.
+// As far as biased exponent of x here can be equal only to 0xFFFE or
+// 0xFFFF we need to test only last bit of it. Also signifigand always
+// has implicit bit set to 1 that can be exluded from comparison.
+// Thus it's quite enough to generate 64-bit integer bits of that are
+// ix[63] = biased_exponent_of_x[0] and ix[62-0] = significand_of_x[62-0]
+// and compare it with 0x7F00000000000000 and 0x80800000000000000 (those
+// obtained like ix from register representatinos of 255/256 and
+// 257/256). This comparison can be made like in a), using unsigned
+// version of cmp i.e. ix - 0x7F00000000000000 < 0x0180000000000000.
+// 0x0180000000000000 is difference between 0x80800000000000000 and
+// 0x7F00000000000000.
+//
+// Note: NaT, any NaNs, +/-INF, +/-0, negatives and unnormalized numbers are
+// filtered and processed on special branches.
+//
+//
+// Special values
//==============================================================
-
-
-// log(+0) = -inf
-// log(-0) = -inf
-
-// log(+qnan) = +qnan
-// log(-qnan) = -qnan
-// log(+snan) = +qnan
-// log(-snan) = -qnan
-
-// log(-n) = QNAN Indefinite
-// log(-inf) = QNAN Indefinite
-
-// log(+inf) = +inf
-
+//
+// logf(+0) = -inf
+// logf(-0) = -inf
+//
+// logf(+qnan) = +qnan
+// logf(-qnan) = -qnan
+// logf(+snan) = +qnan
+// logf(-snan) = -qnan
+//
+// logf(-n) = QNAN Indefinite
+// logf(-inf) = QNAN Indefinite
+//
+// logf(+inf) = +inf
+//
// Registers used
//==============================================================
-// Floating Point registers used:
+// Floating Point registers used:
// f8, input
-// f9 -> f15, f32 -> f47
-
-// General registers used:
-// r32 -> r51
-
+// f12 -> f14, f33 -> f39
+//
+// General registers used:
+// r8 -> r11
+// r14 -> r19
+//
// Predicate registers used:
-// p6 -> p15
+// p6 -> p12
-// p8 log base e
-// p6 log base e special
-// p9 used in the frcpa
-// p13 log base e large W
-// p14 log base e small w
-
-// p7 log base 10
-// p10 log base 10 large W
-// p11 log base 10 small w
-// p12 log base 10 special
-
-#include "libm_support.h"
// Assembly macros
//==============================================================
-log_int_Nfloat = f9
-log_Nfloat = f10
-
-log_P3 = f11
-log_P2 = f12
-log_P1 = f13
-log_inv_ln10 = f14
-log_log2 = f15
-
-log_w = f32
-log_T = f33
-log_rp_p32 = f34
-log_rp_p2 = f35
-log_rp_p10 = f36
-log_rsq = f37
-log_T_plus_Nlog2 = f38
-log_r = f39
-log_C = f40
-log_rp_q32 = f41
-log_rp_q2 = f42
-log_rp_q10 = f43
-log_wsq = f44
-log_Q = f45
-log_inv_ln10 = f46
-log_NORM_f8 = f47
-
-// ===================================
-
-log_GR_exp_17_ones = r33
-log_GR_exp_16_ones = r34
-log_GR_exp_f8 = r35
-log_GR_signexp_f8 = r36
-log_GR_true_exp_f8 = r37
-log_GR_significand_f8 = r38
-log_GR_index = r39
-log_AD_1 = r40
-log_GR_signexp_w = r41
-log_GR_fff7 = r42
-log_AD_2 = r43
-log_GR_exp_w = r44
-
-GR_SAVE_B0 = r45
-GR_SAVE_GP = r46
-GR_SAVE_PFS = r47
-
-GR_Parameter_X = r48
-GR_Parameter_Y = r49
-GR_Parameter_RESULT = r50
-log_GR_tag = r51
+GR_TAG = r8
+GR_ad_T = r8
+GR_N = r9
+GR_Exp = r10
+GR_Sig = r11
+
+GR_025 = r14
+GR_05 = r15
+GR_A3 = r16
+GR_Ind = r17
+GR_dx = r15
+GR_Ln2 = r19
+GR_de = r20
+GR_x = r21
+GR_xorg = r22
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+GR_SAVE_SP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+
+FR_A2 = f12
+FR_A3 = f13
+FR_A4 = f14
+
+FR_RcpX = f33
+FR_r = f34
+FR_r2 = f35
+FR_tmp = f35
+FR_Ln2 = f36
+FR_T = f37
+FR_N = f38
+FR_NxLn2pT = f38
+FR_NormX = f39
+FR_InvLn10 = f40
+
+
+FR_Y = f1
+FR_X = f10
+FR_RESULT = f8
// Data tables
//==============================================================
-
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
-
+RODATA
.align 16
-
-log_table_1:
-ASM_TYPE_DIRECTIVE(log_table_1,@object)
-data8 0xbfd0001008f39d59 // p3
-data8 0x3fd5556073e0c45a // p2
-ASM_SIZE_DIRECTIVE(log_table_1)
-
-log_table_2:
-ASM_TYPE_DIRECTIVE(log_table_2,@object)
-data8 0xbfdffffffffaea15 // p1
-data8 0x3fdbcb7b1526e50e // 1/ln10
-data8 0x3fe62e42fefa39ef // Log(2)
-data8 0x0 // pad
-
-data8 0x3F60040155D5889E //log(1/frcpa(1+ 0/256)
-data8 0x3F78121214586B54 //log(1/frcpa(1+ 1/256)
-data8 0x3F841929F96832F0 //log(1/frcpa(1+ 2/256)
-data8 0x3F8C317384C75F06 //log(1/frcpa(1+ 3/256)
-data8 0x3F91A6B91AC73386 //log(1/frcpa(1+ 4/256)
-data8 0x3F95BA9A5D9AC039 //log(1/frcpa(1+ 5/256)
-data8 0x3F99D2A8074325F4 //log(1/frcpa(1+ 6/256)
-data8 0x3F9D6B2725979802 //log(1/frcpa(1+ 7/256)
-data8 0x3FA0C58FA19DFAAA //log(1/frcpa(1+ 8/256)
-data8 0x3FA2954C78CBCE1B //log(1/frcpa(1+ 9/256)
-data8 0x3FA4A94D2DA96C56 //log(1/frcpa(1+ 10/256)
-data8 0x3FA67C94F2D4BB58 //log(1/frcpa(1+ 11/256)
-data8 0x3FA85188B630F068 //log(1/frcpa(1+ 12/256)
-data8 0x3FAA6B8ABE73AF4C //log(1/frcpa(1+ 13/256)
-data8 0x3FAC441E06F72A9E //log(1/frcpa(1+ 14/256)
-data8 0x3FAE1E6713606D07 //log(1/frcpa(1+ 15/256)
-data8 0x3FAFFA6911AB9301 //log(1/frcpa(1+ 16/256)
-data8 0x3FB0EC139C5DA601 //log(1/frcpa(1+ 17/256)
-data8 0x3FB1DBD2643D190B //log(1/frcpa(1+ 18/256)
-data8 0x3FB2CC7284FE5F1C //log(1/frcpa(1+ 19/256)
-data8 0x3FB3BDF5A7D1EE64 //log(1/frcpa(1+ 20/256)
-data8 0x3FB4B05D7AA012E0 //log(1/frcpa(1+ 21/256)
-data8 0x3FB580DB7CEB5702 //log(1/frcpa(1+ 22/256)
-data8 0x3FB674F089365A7A //log(1/frcpa(1+ 23/256)
-data8 0x3FB769EF2C6B568D //log(1/frcpa(1+ 24/256)
-data8 0x3FB85FD927506A48 //log(1/frcpa(1+ 25/256)
-data8 0x3FB9335E5D594989 //log(1/frcpa(1+ 26/256)
-data8 0x3FBA2B0220C8E5F5 //log(1/frcpa(1+ 27/256)
-data8 0x3FBB0004AC1A86AC //log(1/frcpa(1+ 28/256)
-data8 0x3FBBF968769FCA11 //log(1/frcpa(1+ 29/256)
-data8 0x3FBCCFEDBFEE13A8 //log(1/frcpa(1+ 30/256)
-data8 0x3FBDA727638446A2 //log(1/frcpa(1+ 31/256)
-data8 0x3FBEA3257FE10F7A //log(1/frcpa(1+ 32/256)
-data8 0x3FBF7BE9FEDBFDE6 //log(1/frcpa(1+ 33/256)
-data8 0x3FC02AB352FF25F4 //log(1/frcpa(1+ 34/256)
-data8 0x3FC097CE579D204D //log(1/frcpa(1+ 35/256)
-data8 0x3FC1178E8227E47C //log(1/frcpa(1+ 36/256)
-data8 0x3FC185747DBECF34 //log(1/frcpa(1+ 37/256)
-data8 0x3FC1F3B925F25D41 //log(1/frcpa(1+ 38/256)
-data8 0x3FC2625D1E6DDF57 //log(1/frcpa(1+ 39/256)
-data8 0x3FC2D1610C86813A //log(1/frcpa(1+ 40/256)
-data8 0x3FC340C59741142E //log(1/frcpa(1+ 41/256)
-data8 0x3FC3B08B6757F2A9 //log(1/frcpa(1+ 42/256)
-data8 0x3FC40DFB08378003 //log(1/frcpa(1+ 43/256)
-data8 0x3FC47E74E8CA5F7C //log(1/frcpa(1+ 44/256)
-data8 0x3FC4EF51F6466DE4 //log(1/frcpa(1+ 45/256)
-data8 0x3FC56092E02BA516 //log(1/frcpa(1+ 46/256)
-data8 0x3FC5D23857CD74D5 //log(1/frcpa(1+ 47/256)
-data8 0x3FC6313A37335D76 //log(1/frcpa(1+ 48/256)
-data8 0x3FC6A399DABBD383 //log(1/frcpa(1+ 49/256)
-data8 0x3FC70337DD3CE41B //log(1/frcpa(1+ 50/256)
-data8 0x3FC77654128F6127 //log(1/frcpa(1+ 51/256)
-data8 0x3FC7E9D82A0B022D //log(1/frcpa(1+ 52/256)
-data8 0x3FC84A6B759F512F //log(1/frcpa(1+ 53/256)
-data8 0x3FC8AB47D5F5A310 //log(1/frcpa(1+ 54/256)
-data8 0x3FC91FE49096581B //log(1/frcpa(1+ 55/256)
-data8 0x3FC981634011AA75 //log(1/frcpa(1+ 56/256)
-data8 0x3FC9F6C407089664 //log(1/frcpa(1+ 57/256)
-data8 0x3FCA58E729348F43 //log(1/frcpa(1+ 58/256)
-data8 0x3FCABB55C31693AD //log(1/frcpa(1+ 59/256)
-data8 0x3FCB1E104919EFD0 //log(1/frcpa(1+ 60/256)
-data8 0x3FCB94EE93E367CB //log(1/frcpa(1+ 61/256)
-data8 0x3FCBF851C067555F //log(1/frcpa(1+ 62/256)
-data8 0x3FCC5C0254BF23A6 //log(1/frcpa(1+ 63/256)
-data8 0x3FCCC000C9DB3C52 //log(1/frcpa(1+ 64/256)
-data8 0x3FCD244D99C85674 //log(1/frcpa(1+ 65/256)
-data8 0x3FCD88E93FB2F450 //log(1/frcpa(1+ 66/256)
-data8 0x3FCDEDD437EAEF01 //log(1/frcpa(1+ 67/256)
-data8 0x3FCE530EFFE71012 //log(1/frcpa(1+ 68/256)
-data8 0x3FCEB89A1648B971 //log(1/frcpa(1+ 69/256)
-data8 0x3FCF1E75FADF9BDE //log(1/frcpa(1+ 70/256)
-data8 0x3FCF84A32EAD7C35 //log(1/frcpa(1+ 71/256)
-data8 0x3FCFEB2233EA07CD //log(1/frcpa(1+ 72/256)
-data8 0x3FD028F9C7035C1C //log(1/frcpa(1+ 73/256)
-data8 0x3FD05C8BE0D9635A //log(1/frcpa(1+ 74/256)
-data8 0x3FD085EB8F8AE797 //log(1/frcpa(1+ 75/256)
-data8 0x3FD0B9C8E32D1911 //log(1/frcpa(1+ 76/256)
-data8 0x3FD0EDD060B78081 //log(1/frcpa(1+ 77/256)
-data8 0x3FD122024CF0063F //log(1/frcpa(1+ 78/256)
-data8 0x3FD14BE2927AECD4 //log(1/frcpa(1+ 79/256)
-data8 0x3FD180618EF18ADF //log(1/frcpa(1+ 80/256)
-data8 0x3FD1B50BBE2FC63B //log(1/frcpa(1+ 81/256)
-data8 0x3FD1DF4CC7CF242D //log(1/frcpa(1+ 82/256)
-data8 0x3FD214456D0EB8D4 //log(1/frcpa(1+ 83/256)
-data8 0x3FD23EC5991EBA49 //log(1/frcpa(1+ 84/256)
-data8 0x3FD2740D9F870AFB //log(1/frcpa(1+ 85/256)
-data8 0x3FD29ECDABCDFA04 //log(1/frcpa(1+ 86/256)
-data8 0x3FD2D46602ADCCEE //log(1/frcpa(1+ 87/256)
-data8 0x3FD2FF66B04EA9D4 //log(1/frcpa(1+ 88/256)
-data8 0x3FD335504B355A37 //log(1/frcpa(1+ 89/256)
-data8 0x3FD360925EC44F5D //log(1/frcpa(1+ 90/256)
-data8 0x3FD38BF1C3337E75 //log(1/frcpa(1+ 91/256)
-data8 0x3FD3C25277333184 //log(1/frcpa(1+ 92/256)
-data8 0x3FD3EDF463C1683E //log(1/frcpa(1+ 93/256)
-data8 0x3FD419B423D5E8C7 //log(1/frcpa(1+ 94/256)
-data8 0x3FD44591E0539F49 //log(1/frcpa(1+ 95/256)
-data8 0x3FD47C9175B6F0AD //log(1/frcpa(1+ 96/256)
-data8 0x3FD4A8B341552B09 //log(1/frcpa(1+ 97/256)
-data8 0x3FD4D4F3908901A0 //log(1/frcpa(1+ 98/256)
-data8 0x3FD501528DA1F968 //log(1/frcpa(1+ 99/256)
-data8 0x3FD52DD06347D4F6 //log(1/frcpa(1+ 100/256)
-data8 0x3FD55A6D3C7B8A8A //log(1/frcpa(1+ 101/256)
-data8 0x3FD5925D2B112A59 //log(1/frcpa(1+ 102/256)
-data8 0x3FD5BF406B543DB2 //log(1/frcpa(1+ 103/256)
-data8 0x3FD5EC433D5C35AE //log(1/frcpa(1+ 104/256)
-data8 0x3FD61965CDB02C1F //log(1/frcpa(1+ 105/256)
-data8 0x3FD646A84935B2A2 //log(1/frcpa(1+ 106/256)
-data8 0x3FD6740ADD31DE94 //log(1/frcpa(1+ 107/256)
-data8 0x3FD6A18DB74A58C5 //log(1/frcpa(1+ 108/256)
-data8 0x3FD6CF31058670EC //log(1/frcpa(1+ 109/256)
-data8 0x3FD6F180E852F0BA //log(1/frcpa(1+ 110/256)
-data8 0x3FD71F5D71B894F0 //log(1/frcpa(1+ 111/256)
-data8 0x3FD74D5AEFD66D5C //log(1/frcpa(1+ 112/256)
-data8 0x3FD77B79922BD37E //log(1/frcpa(1+ 113/256)
-data8 0x3FD7A9B9889F19E2 //log(1/frcpa(1+ 114/256)
-data8 0x3FD7D81B037EB6A6 //log(1/frcpa(1+ 115/256)
-data8 0x3FD8069E33827231 //log(1/frcpa(1+ 116/256)
-data8 0x3FD82996D3EF8BCB //log(1/frcpa(1+ 117/256)
-data8 0x3FD85855776DCBFB //log(1/frcpa(1+ 118/256)
-data8 0x3FD8873658327CCF //log(1/frcpa(1+ 119/256)
-data8 0x3FD8AA75973AB8CF //log(1/frcpa(1+ 120/256)
-data8 0x3FD8D992DC8824E5 //log(1/frcpa(1+ 121/256)
-data8 0x3FD908D2EA7D9512 //log(1/frcpa(1+ 122/256)
-data8 0x3FD92C59E79C0E56 //log(1/frcpa(1+ 123/256)
-data8 0x3FD95BD750EE3ED3 //log(1/frcpa(1+ 124/256)
-data8 0x3FD98B7811A3EE5B //log(1/frcpa(1+ 125/256)
-data8 0x3FD9AF47F33D406C //log(1/frcpa(1+ 126/256)
-data8 0x3FD9DF270C1914A8 //log(1/frcpa(1+ 127/256)
-data8 0x3FDA0325ED14FDA4 //log(1/frcpa(1+ 128/256)
-data8 0x3FDA33440224FA79 //log(1/frcpa(1+ 129/256)
-data8 0x3FDA57725E80C383 //log(1/frcpa(1+ 130/256)
-data8 0x3FDA87D0165DD199 //log(1/frcpa(1+ 131/256)
-data8 0x3FDAAC2E6C03F896 //log(1/frcpa(1+ 132/256)
-data8 0x3FDADCCC6FDF6A81 //log(1/frcpa(1+ 133/256)
-data8 0x3FDB015B3EB1E790 //log(1/frcpa(1+ 134/256)
-data8 0x3FDB323A3A635948 //log(1/frcpa(1+ 135/256)
-data8 0x3FDB56FA04462909 //log(1/frcpa(1+ 136/256)
-data8 0x3FDB881AA659BC93 //log(1/frcpa(1+ 137/256)
-data8 0x3FDBAD0BEF3DB165 //log(1/frcpa(1+ 138/256)
-data8 0x3FDBD21297781C2F //log(1/frcpa(1+ 139/256)
-data8 0x3FDC039236F08819 //log(1/frcpa(1+ 140/256)
-data8 0x3FDC28CB1E4D32FD //log(1/frcpa(1+ 141/256)
-data8 0x3FDC4E19B84723C2 //log(1/frcpa(1+ 142/256)
-data8 0x3FDC7FF9C74554C9 //log(1/frcpa(1+ 143/256)
-data8 0x3FDCA57B64E9DB05 //log(1/frcpa(1+ 144/256)
-data8 0x3FDCCB130A5CEBB0 //log(1/frcpa(1+ 145/256)
-data8 0x3FDCF0C0D18F326F //log(1/frcpa(1+ 146/256)
-data8 0x3FDD232075B5A201 //log(1/frcpa(1+ 147/256)
-data8 0x3FDD490246DEFA6B //log(1/frcpa(1+ 148/256)
-data8 0x3FDD6EFA918D25CD //log(1/frcpa(1+ 149/256)
-data8 0x3FDD9509707AE52F //log(1/frcpa(1+ 150/256)
-data8 0x3FDDBB2EFE92C554 //log(1/frcpa(1+ 151/256)
-data8 0x3FDDEE2F3445E4AF //log(1/frcpa(1+ 152/256)
-data8 0x3FDE148A1A2726CE //log(1/frcpa(1+ 153/256)
-data8 0x3FDE3AFC0A49FF40 //log(1/frcpa(1+ 154/256)
-data8 0x3FDE6185206D516E //log(1/frcpa(1+ 155/256)
-data8 0x3FDE882578823D52 //log(1/frcpa(1+ 156/256)
-data8 0x3FDEAEDD2EAC990C //log(1/frcpa(1+ 157/256)
-data8 0x3FDED5AC5F436BE3 //log(1/frcpa(1+ 158/256)
-data8 0x3FDEFC9326D16AB9 //log(1/frcpa(1+ 159/256)
-data8 0x3FDF2391A2157600 //log(1/frcpa(1+ 160/256)
-data8 0x3FDF4AA7EE03192D //log(1/frcpa(1+ 161/256)
-data8 0x3FDF71D627C30BB0 //log(1/frcpa(1+ 162/256)
-data8 0x3FDF991C6CB3B379 //log(1/frcpa(1+ 163/256)
-data8 0x3FDFC07ADA69A910 //log(1/frcpa(1+ 164/256)
-data8 0x3FDFE7F18EB03D3E //log(1/frcpa(1+ 165/256)
-data8 0x3FE007C053C5002E //log(1/frcpa(1+ 166/256)
-data8 0x3FE01B942198A5A1 //log(1/frcpa(1+ 167/256)
-data8 0x3FE02F74400C64EB //log(1/frcpa(1+ 168/256)
-data8 0x3FE04360BE7603AD //log(1/frcpa(1+ 169/256)
-data8 0x3FE05759AC47FE34 //log(1/frcpa(1+ 170/256)
-data8 0x3FE06B5F1911CF52 //log(1/frcpa(1+ 171/256)
-data8 0x3FE078BF0533C568 //log(1/frcpa(1+ 172/256)
-data8 0x3FE08CD9687E7B0E //log(1/frcpa(1+ 173/256)
-data8 0x3FE0A10074CF9019 //log(1/frcpa(1+ 174/256)
-data8 0x3FE0B5343A234477 //log(1/frcpa(1+ 175/256)
-data8 0x3FE0C974C89431CE //log(1/frcpa(1+ 176/256)
-data8 0x3FE0DDC2305B9886 //log(1/frcpa(1+ 177/256)
-data8 0x3FE0EB524BAFC918 //log(1/frcpa(1+ 178/256)
-data8 0x3FE0FFB54213A476 //log(1/frcpa(1+ 179/256)
-data8 0x3FE114253DA97D9F //log(1/frcpa(1+ 180/256)
-data8 0x3FE128A24F1D9AFF //log(1/frcpa(1+ 181/256)
-data8 0x3FE1365252BF0865 //log(1/frcpa(1+ 182/256)
-data8 0x3FE14AE558B4A92D //log(1/frcpa(1+ 183/256)
-data8 0x3FE15F85A19C765B //log(1/frcpa(1+ 184/256)
-data8 0x3FE16D4D38C119FA //log(1/frcpa(1+ 185/256)
-data8 0x3FE18203C20DD133 //log(1/frcpa(1+ 186/256)
-data8 0x3FE196C7BC4B1F3B //log(1/frcpa(1+ 187/256)
-data8 0x3FE1A4A738B7A33C //log(1/frcpa(1+ 188/256)
-data8 0x3FE1B981C0C9653D //log(1/frcpa(1+ 189/256)
-data8 0x3FE1CE69E8BB106B //log(1/frcpa(1+ 190/256)
-data8 0x3FE1DC619DE06944 //log(1/frcpa(1+ 191/256)
-data8 0x3FE1F160A2AD0DA4 //log(1/frcpa(1+ 192/256)
-data8 0x3FE2066D7740737E //log(1/frcpa(1+ 193/256)
-data8 0x3FE2147DBA47A394 //log(1/frcpa(1+ 194/256)
-data8 0x3FE229A1BC5EBAC3 //log(1/frcpa(1+ 195/256)
-data8 0x3FE237C1841A502E //log(1/frcpa(1+ 196/256)
-data8 0x3FE24CFCE6F80D9A //log(1/frcpa(1+ 197/256)
-data8 0x3FE25B2C55CD5762 //log(1/frcpa(1+ 198/256)
-data8 0x3FE2707F4D5F7C41 //log(1/frcpa(1+ 199/256)
-data8 0x3FE285E0842CA384 //log(1/frcpa(1+ 200/256)
-data8 0x3FE294294708B773 //log(1/frcpa(1+ 201/256)
-data8 0x3FE2A9A2670AFF0C //log(1/frcpa(1+ 202/256)
-data8 0x3FE2B7FB2C8D1CC1 //log(1/frcpa(1+ 203/256)
-data8 0x3FE2C65A6395F5F5 //log(1/frcpa(1+ 204/256)
-data8 0x3FE2DBF557B0DF43 //log(1/frcpa(1+ 205/256)
-data8 0x3FE2EA64C3F97655 //log(1/frcpa(1+ 206/256)
-data8 0x3FE3001823684D73 //log(1/frcpa(1+ 207/256)
-data8 0x3FE30E97E9A8B5CD //log(1/frcpa(1+ 208/256)
-data8 0x3FE32463EBDD34EA //log(1/frcpa(1+ 209/256)
-data8 0x3FE332F4314AD796 //log(1/frcpa(1+ 210/256)
-data8 0x3FE348D90E7464D0 //log(1/frcpa(1+ 211/256)
-data8 0x3FE35779F8C43D6E //log(1/frcpa(1+ 212/256)
-data8 0x3FE36621961A6A99 //log(1/frcpa(1+ 213/256)
-data8 0x3FE37C299F3C366A //log(1/frcpa(1+ 214/256)
-data8 0x3FE38AE2171976E7 //log(1/frcpa(1+ 215/256)
-data8 0x3FE399A157A603E7 //log(1/frcpa(1+ 216/256)
-data8 0x3FE3AFCCFE77B9D1 //log(1/frcpa(1+ 217/256)
-data8 0x3FE3BE9D503533B5 //log(1/frcpa(1+ 218/256)
-data8 0x3FE3CD7480B4A8A3 //log(1/frcpa(1+ 219/256)
-data8 0x3FE3E3C43918F76C //log(1/frcpa(1+ 220/256)
-data8 0x3FE3F2ACB27ED6C7 //log(1/frcpa(1+ 221/256)
-data8 0x3FE4019C2125CA93 //log(1/frcpa(1+ 222/256)
-data8 0x3FE4181061389722 //log(1/frcpa(1+ 223/256)
-data8 0x3FE42711518DF545 //log(1/frcpa(1+ 224/256)
-data8 0x3FE436194E12B6BF //log(1/frcpa(1+ 225/256)
-data8 0x3FE445285D68EA69 //log(1/frcpa(1+ 226/256)
-data8 0x3FE45BCC464C893A //log(1/frcpa(1+ 227/256)
-data8 0x3FE46AED21F117FC //log(1/frcpa(1+ 228/256)
-data8 0x3FE47A1527E8A2D3 //log(1/frcpa(1+ 229/256)
-data8 0x3FE489445EFFFCCC //log(1/frcpa(1+ 230/256)
-data8 0x3FE4A018BCB69835 //log(1/frcpa(1+ 231/256)
-data8 0x3FE4AF5A0C9D65D7 //log(1/frcpa(1+ 232/256)
-data8 0x3FE4BEA2A5BDBE87 //log(1/frcpa(1+ 233/256)
-data8 0x3FE4CDF28F10AC46 //log(1/frcpa(1+ 234/256)
-data8 0x3FE4DD49CF994058 //log(1/frcpa(1+ 235/256)
-data8 0x3FE4ECA86E64A684 //log(1/frcpa(1+ 236/256)
-data8 0x3FE503C43CD8EB68 //log(1/frcpa(1+ 237/256)
-data8 0x3FE513356667FC57 //log(1/frcpa(1+ 238/256)
-data8 0x3FE522AE0738A3D8 //log(1/frcpa(1+ 239/256)
-data8 0x3FE5322E26867857 //log(1/frcpa(1+ 240/256)
-data8 0x3FE541B5CB979809 //log(1/frcpa(1+ 241/256)
-data8 0x3FE55144FDBCBD62 //log(1/frcpa(1+ 242/256)
-data8 0x3FE560DBC45153C7 //log(1/frcpa(1+ 243/256)
-data8 0x3FE5707A26BB8C66 //log(1/frcpa(1+ 244/256)
-data8 0x3FE587F60ED5B900 //log(1/frcpa(1+ 245/256)
-data8 0x3FE597A7977C8F31 //log(1/frcpa(1+ 246/256)
-data8 0x3FE5A760D634BB8B //log(1/frcpa(1+ 247/256)
-data8 0x3FE5B721D295F10F //log(1/frcpa(1+ 248/256)
-data8 0x3FE5C6EA94431EF9 //log(1/frcpa(1+ 249/256)
-data8 0x3FE5D6BB22EA86F6 //log(1/frcpa(1+ 250/256)
-data8 0x3FE5E6938645D390 //log(1/frcpa(1+ 251/256)
-data8 0x3FE5F673C61A2ED2 //log(1/frcpa(1+ 252/256)
-data8 0x3FE6065BEA385926 //log(1/frcpa(1+ 253/256)
-data8 0x3FE6164BFA7CC06B //log(1/frcpa(1+ 254/256)
-data8 0x3FE62643FECF9743 //log(1/frcpa(1+ 255/256)
-ASM_SIZE_DIRECTIVE(log_table_2)
-
-
-.align 32
-.global logf#
-.global log10f#
-
-// log10 has p7 true, p8 false
-// log has p8 true, p7 false
-
+LOCAL_OBJECT_START(logf_data)
+data8 0x3FF0000000000000 // 1.0
+//
+// ln(1/frcpa(1+i/256)), i=0...255
+data8 0x3F60040155D5889E // 0
+data8 0x3F78121214586B54 // 1
+data8 0x3F841929F96832F0 // 2
+data8 0x3F8C317384C75F06 // 3
+data8 0x3F91A6B91AC73386 // 4
+data8 0x3F95BA9A5D9AC039 // 5
+data8 0x3F99D2A8074325F4 // 6
+data8 0x3F9D6B2725979802 // 7
+data8 0x3FA0C58FA19DFAAA // 8
+data8 0x3FA2954C78CBCE1B // 9
+data8 0x3FA4A94D2DA96C56 // 10
+data8 0x3FA67C94F2D4BB58 // 11
+data8 0x3FA85188B630F068 // 12
+data8 0x3FAA6B8ABE73AF4C // 13
+data8 0x3FAC441E06F72A9E // 14
+data8 0x3FAE1E6713606D07 // 15
+data8 0x3FAFFA6911AB9301 // 16
+data8 0x3FB0EC139C5DA601 // 17
+data8 0x3FB1DBD2643D190B // 18
+data8 0x3FB2CC7284FE5F1C // 19
+data8 0x3FB3BDF5A7D1EE64 // 20
+data8 0x3FB4B05D7AA012E0 // 21
+data8 0x3FB580DB7CEB5702 // 22
+data8 0x3FB674F089365A7A // 23
+data8 0x3FB769EF2C6B568D // 24
+data8 0x3FB85FD927506A48 // 25
+data8 0x3FB9335E5D594989 // 26
+data8 0x3FBA2B0220C8E5F5 // 27
+data8 0x3FBB0004AC1A86AC // 28
+data8 0x3FBBF968769FCA11 // 29
+data8 0x3FBCCFEDBFEE13A8 // 30
+data8 0x3FBDA727638446A2 // 31
+data8 0x3FBEA3257FE10F7A // 32
+data8 0x3FBF7BE9FEDBFDE6 // 33
+data8 0x3FC02AB352FF25F4 // 34
+data8 0x3FC097CE579D204D // 35
+data8 0x3FC1178E8227E47C // 36
+data8 0x3FC185747DBECF34 // 37
+data8 0x3FC1F3B925F25D41 // 38
+data8 0x3FC2625D1E6DDF57 // 39
+data8 0x3FC2D1610C86813A // 40
+data8 0x3FC340C59741142E // 41
+data8 0x3FC3B08B6757F2A9 // 42
+data8 0x3FC40DFB08378003 // 43
+data8 0x3FC47E74E8CA5F7C // 44
+data8 0x3FC4EF51F6466DE4 // 45
+data8 0x3FC56092E02BA516 // 46
+data8 0x3FC5D23857CD74D5 // 47
+data8 0x3FC6313A37335D76 // 48
+data8 0x3FC6A399DABBD383 // 49
+data8 0x3FC70337DD3CE41B // 50
+data8 0x3FC77654128F6127 // 51
+data8 0x3FC7E9D82A0B022D // 52
+data8 0x3FC84A6B759F512F // 53
+data8 0x3FC8AB47D5F5A310 // 54
+data8 0x3FC91FE49096581B // 55
+data8 0x3FC981634011AA75 // 56
+data8 0x3FC9F6C407089664 // 57
+data8 0x3FCA58E729348F43 // 58
+data8 0x3FCABB55C31693AD // 59
+data8 0x3FCB1E104919EFD0 // 60
+data8 0x3FCB94EE93E367CB // 61
+data8 0x3FCBF851C067555F // 62
+data8 0x3FCC5C0254BF23A6 // 63
+data8 0x3FCCC000C9DB3C52 // 64
+data8 0x3FCD244D99C85674 // 65
+data8 0x3FCD88E93FB2F450 // 66
+data8 0x3FCDEDD437EAEF01 // 67
+data8 0x3FCE530EFFE71012 // 68
+data8 0x3FCEB89A1648B971 // 69
+data8 0x3FCF1E75FADF9BDE // 70
+data8 0x3FCF84A32EAD7C35 // 71
+data8 0x3FCFEB2233EA07CD // 72
+data8 0x3FD028F9C7035C1C // 73
+data8 0x3FD05C8BE0D9635A // 74
+data8 0x3FD085EB8F8AE797 // 75
+data8 0x3FD0B9C8E32D1911 // 76
+data8 0x3FD0EDD060B78081 // 77
+data8 0x3FD122024CF0063F // 78
+data8 0x3FD14BE2927AECD4 // 79
+data8 0x3FD180618EF18ADF // 80
+data8 0x3FD1B50BBE2FC63B // 81
+data8 0x3FD1DF4CC7CF242D // 82
+data8 0x3FD214456D0EB8D4 // 83
+data8 0x3FD23EC5991EBA49 // 84
+data8 0x3FD2740D9F870AFB // 85
+data8 0x3FD29ECDABCDFA04 // 86
+data8 0x3FD2D46602ADCCEE // 87
+data8 0x3FD2FF66B04EA9D4 // 88
+data8 0x3FD335504B355A37 // 89
+data8 0x3FD360925EC44F5D // 90
+data8 0x3FD38BF1C3337E75 // 91
+data8 0x3FD3C25277333184 // 92
+data8 0x3FD3EDF463C1683E // 93
+data8 0x3FD419B423D5E8C7 // 94
+data8 0x3FD44591E0539F49 // 95
+data8 0x3FD47C9175B6F0AD // 96
+data8 0x3FD4A8B341552B09 // 97
+data8 0x3FD4D4F3908901A0 // 98
+data8 0x3FD501528DA1F968 // 99
+data8 0x3FD52DD06347D4F6 // 100
+data8 0x3FD55A6D3C7B8A8A // 101
+data8 0x3FD5925D2B112A59 // 102
+data8 0x3FD5BF406B543DB2 // 103
+data8 0x3FD5EC433D5C35AE // 104
+data8 0x3FD61965CDB02C1F // 105
+data8 0x3FD646A84935B2A2 // 106
+data8 0x3FD6740ADD31DE94 // 107
+data8 0x3FD6A18DB74A58C5 // 108
+data8 0x3FD6CF31058670EC // 109
+data8 0x3FD6F180E852F0BA // 110
+data8 0x3FD71F5D71B894F0 // 111
+data8 0x3FD74D5AEFD66D5C // 112
+data8 0x3FD77B79922BD37E // 113
+data8 0x3FD7A9B9889F19E2 // 114
+data8 0x3FD7D81B037EB6A6 // 115
+data8 0x3FD8069E33827231 // 116
+data8 0x3FD82996D3EF8BCB // 117
+data8 0x3FD85855776DCBFB // 118
+data8 0x3FD8873658327CCF // 119
+data8 0x3FD8AA75973AB8CF // 120
+data8 0x3FD8D992DC8824E5 // 121
+data8 0x3FD908D2EA7D9512 // 122
+data8 0x3FD92C59E79C0E56 // 123
+data8 0x3FD95BD750EE3ED3 // 124
+data8 0x3FD98B7811A3EE5B // 125
+data8 0x3FD9AF47F33D406C // 126
+data8 0x3FD9DF270C1914A8 // 127
+data8 0x3FDA0325ED14FDA4 // 128
+data8 0x3FDA33440224FA79 // 129
+data8 0x3FDA57725E80C383 // 130
+data8 0x3FDA87D0165DD199 // 131
+data8 0x3FDAAC2E6C03F896 // 132
+data8 0x3FDADCCC6FDF6A81 // 133
+data8 0x3FDB015B3EB1E790 // 134
+data8 0x3FDB323A3A635948 // 135
+data8 0x3FDB56FA04462909 // 136
+data8 0x3FDB881AA659BC93 // 137
+data8 0x3FDBAD0BEF3DB165 // 138
+data8 0x3FDBD21297781C2F // 139
+data8 0x3FDC039236F08819 // 140
+data8 0x3FDC28CB1E4D32FD // 141
+data8 0x3FDC4E19B84723C2 // 142
+data8 0x3FDC7FF9C74554C9 // 143
+data8 0x3FDCA57B64E9DB05 // 144
+data8 0x3FDCCB130A5CEBB0 // 145
+data8 0x3FDCF0C0D18F326F // 146
+data8 0x3FDD232075B5A201 // 147
+data8 0x3FDD490246DEFA6B // 148
+data8 0x3FDD6EFA918D25CD // 149
+data8 0x3FDD9509707AE52F // 150
+data8 0x3FDDBB2EFE92C554 // 151
+data8 0x3FDDEE2F3445E4AF // 152
+data8 0x3FDE148A1A2726CE // 153
+data8 0x3FDE3AFC0A49FF40 // 154
+data8 0x3FDE6185206D516E // 155
+data8 0x3FDE882578823D52 // 156
+data8 0x3FDEAEDD2EAC990C // 157
+data8 0x3FDED5AC5F436BE3 // 158
+data8 0x3FDEFC9326D16AB9 // 159
+data8 0x3FDF2391A2157600 // 160
+data8 0x3FDF4AA7EE03192D // 161
+data8 0x3FDF71D627C30BB0 // 162
+data8 0x3FDF991C6CB3B379 // 163
+data8 0x3FDFC07ADA69A910 // 164
+data8 0x3FDFE7F18EB03D3E // 165
+data8 0x3FE007C053C5002E // 166
+data8 0x3FE01B942198A5A1 // 167
+data8 0x3FE02F74400C64EB // 168
+data8 0x3FE04360BE7603AD // 169
+data8 0x3FE05759AC47FE34 // 170
+data8 0x3FE06B5F1911CF52 // 171
+data8 0x3FE078BF0533C568 // 172
+data8 0x3FE08CD9687E7B0E // 173
+data8 0x3FE0A10074CF9019 // 174
+data8 0x3FE0B5343A234477 // 175
+data8 0x3FE0C974C89431CE // 176
+data8 0x3FE0DDC2305B9886 // 177
+data8 0x3FE0EB524BAFC918 // 178
+data8 0x3FE0FFB54213A476 // 179
+data8 0x3FE114253DA97D9F // 180
+data8 0x3FE128A24F1D9AFF // 181
+data8 0x3FE1365252BF0865 // 182
+data8 0x3FE14AE558B4A92D // 183
+data8 0x3FE15F85A19C765B // 184
+data8 0x3FE16D4D38C119FA // 185
+data8 0x3FE18203C20DD133 // 186
+data8 0x3FE196C7BC4B1F3B // 187
+data8 0x3FE1A4A738B7A33C // 188
+data8 0x3FE1B981C0C9653D // 189
+data8 0x3FE1CE69E8BB106B // 190
+data8 0x3FE1DC619DE06944 // 191
+data8 0x3FE1F160A2AD0DA4 // 192
+data8 0x3FE2066D7740737E // 193
+data8 0x3FE2147DBA47A394 // 194
+data8 0x3FE229A1BC5EBAC3 // 195
+data8 0x3FE237C1841A502E // 196
+data8 0x3FE24CFCE6F80D9A // 197
+data8 0x3FE25B2C55CD5762 // 198
+data8 0x3FE2707F4D5F7C41 // 199
+data8 0x3FE285E0842CA384 // 200
+data8 0x3FE294294708B773 // 201
+data8 0x3FE2A9A2670AFF0C // 202
+data8 0x3FE2B7FB2C8D1CC1 // 203
+data8 0x3FE2C65A6395F5F5 // 204
+data8 0x3FE2DBF557B0DF43 // 205
+data8 0x3FE2EA64C3F97655 // 206
+data8 0x3FE3001823684D73 // 207
+data8 0x3FE30E97E9A8B5CD // 208
+data8 0x3FE32463EBDD34EA // 209
+data8 0x3FE332F4314AD796 // 210
+data8 0x3FE348D90E7464D0 // 211
+data8 0x3FE35779F8C43D6E // 212
+data8 0x3FE36621961A6A99 // 213
+data8 0x3FE37C299F3C366A // 214
+data8 0x3FE38AE2171976E7 // 215
+data8 0x3FE399A157A603E7 // 216
+data8 0x3FE3AFCCFE77B9D1 // 217
+data8 0x3FE3BE9D503533B5 // 218
+data8 0x3FE3CD7480B4A8A3 // 219
+data8 0x3FE3E3C43918F76C // 220
+data8 0x3FE3F2ACB27ED6C7 // 221
+data8 0x3FE4019C2125CA93 // 222
+data8 0x3FE4181061389722 // 223
+data8 0x3FE42711518DF545 // 224
+data8 0x3FE436194E12B6BF // 225
+data8 0x3FE445285D68EA69 // 226
+data8 0x3FE45BCC464C893A // 227
+data8 0x3FE46AED21F117FC // 228
+data8 0x3FE47A1527E8A2D3 // 229
+data8 0x3FE489445EFFFCCC // 230
+data8 0x3FE4A018BCB69835 // 231
+data8 0x3FE4AF5A0C9D65D7 // 232
+data8 0x3FE4BEA2A5BDBE87 // 233
+data8 0x3FE4CDF28F10AC46 // 234
+data8 0x3FE4DD49CF994058 // 235
+data8 0x3FE4ECA86E64A684 // 236
+data8 0x3FE503C43CD8EB68 // 237
+data8 0x3FE513356667FC57 // 238
+data8 0x3FE522AE0738A3D8 // 239
+data8 0x3FE5322E26867857 // 240
+data8 0x3FE541B5CB979809 // 241
+data8 0x3FE55144FDBCBD62 // 242
+data8 0x3FE560DBC45153C7 // 243
+data8 0x3FE5707A26BB8C66 // 244
+data8 0x3FE587F60ED5B900 // 245
+data8 0x3FE597A7977C8F31 // 246
+data8 0x3FE5A760D634BB8B // 247
+data8 0x3FE5B721D295F10F // 248
+data8 0x3FE5C6EA94431EF9 // 249
+data8 0x3FE5D6BB22EA86F6 // 250
+data8 0x3FE5E6938645D390 // 251
+data8 0x3FE5F673C61A2ED2 // 252
+data8 0x3FE6065BEA385926 // 253
+data8 0x3FE6164BFA7CC06B // 254
+data8 0x3FE62643FECF9743 // 255
+LOCAL_OBJECT_END(logf_data)
+
+LOCAL_OBJECT_START(log10f_data)
+data8 0x3FDBCB7B1526E50E // 1/ln(10)
+//
+// ln(1/frcpa(1+i/256))/ln(10), i=0...255
+data8 0x3F4BD27045BFD025 // 0
+data8 0x3F64E84E793A474A // 1
+data8 0x3F7175085AB85FF0 // 2
+data8 0x3F787CFF9D9147A5 // 3
+data8 0x3F7EA9D372B89FC8 // 4
+data8 0x3F82DF9D95DA961C // 5
+data8 0x3F866DF172D6372C // 6
+data8 0x3F898D79EF5EEDF0 // 7
+data8 0x3F8D22ADF3F9579D // 8
+data8 0x3F9024231D30C398 // 9
+data8 0x3F91F23A98897D4A // 10
+data8 0x3F93881A7B818F9E // 11
+data8 0x3F951F6E1E759E35 // 12
+data8 0x3F96F2BCE7ADC5B4 // 13
+data8 0x3F988D362CDF359E // 14
+data8 0x3F9A292BAF010982 // 15
+data8 0x3F9BC6A03117EB97 // 16
+data8 0x3F9D65967DE3AB09 // 17
+data8 0x3F9F061167FC31E8 // 18
+data8 0x3FA05409E4F7819C // 19
+data8 0x3FA125D0432EA20E // 20
+data8 0x3FA1F85D440D299B // 21
+data8 0x3FA2AD755749617D // 22
+data8 0x3FA381772A00E604 // 23
+data8 0x3FA45643E165A70B // 24
+data8 0x3FA52BDD034475B8 // 25
+data8 0x3FA5E3966B7E9295 // 26
+data8 0x3FA6BAAF47C5B245 // 27
+data8 0x3FA773B3E8C4F3C8 // 28
+data8 0x3FA84C51EBEE8D15 // 29
+data8 0x3FA906A6786FC1CB // 30
+data8 0x3FA9C197ABF00DD7 // 31
+data8 0x3FAA9C78712191F7 // 32
+data8 0x3FAB58C09C8D637C // 33
+data8 0x3FAC15A8BCDD7B7E // 34
+data8 0x3FACD331E2C2967C // 35
+data8 0x3FADB11ED766ABF4 // 36
+data8 0x3FAE70089346A9E6 // 37
+data8 0x3FAF2F96C6754AEE // 38
+data8 0x3FAFEFCA8D451FD6 // 39
+data8 0x3FB0585283764178 // 40
+data8 0x3FB0B913AAC7D3A7 // 41
+data8 0x3FB11A294F2569F6 // 42
+data8 0x3FB16B51A2696891 // 43
+data8 0x3FB1CD03ADACC8BE // 44
+data8 0x3FB22F0BDD7745F5 // 45
+data8 0x3FB2916ACA38D1E8 // 46
+data8 0x3FB2F4210DF7663D // 47
+data8 0x3FB346A6C3C49066 // 48
+data8 0x3FB3A9FEBC60540A // 49
+data8 0x3FB3FD0C10A3AA54 // 50
+data8 0x3FB46107D3540A82 // 51
+data8 0x3FB4C55DD16967FE // 52
+data8 0x3FB51940330C000B // 53
+data8 0x3FB56D620EE7115E // 54
+data8 0x3FB5D2ABCF26178E // 55
+data8 0x3FB6275AA5DEBF81 // 56
+data8 0x3FB68D4EAF26D7EE // 57
+data8 0x3FB6E28C5C54A28D // 58
+data8 0x3FB7380B9665B7C8 // 59
+data8 0x3FB78DCCC278E85B // 60
+data8 0x3FB7F50C2CF2557A // 61
+data8 0x3FB84B5FD5EAEFD8 // 62
+data8 0x3FB8A1F6BAB2B226 // 63
+data8 0x3FB8F8D144557BDF // 64
+data8 0x3FB94FEFDCD61D92 // 65
+data8 0x3FB9A752EF316149 // 66
+data8 0x3FB9FEFAE7611EE0 // 67
+data8 0x3FBA56E8325F5C87 // 68
+data8 0x3FBAAF1B3E297BB4 // 69
+data8 0x3FBB079479C372AD // 70
+data8 0x3FBB6054553B12F7 // 71
+data8 0x3FBBB95B41AB5CE6 // 72
+data8 0x3FBC12A9B13FE079 // 73
+data8 0x3FBC6C4017382BEA // 74
+data8 0x3FBCB41FBA42686D // 75
+data8 0x3FBD0E38CE73393F // 76
+data8 0x3FBD689B2193F133 // 77
+data8 0x3FBDC3472B1D2860 // 78
+data8 0x3FBE0C06300D528B // 79
+data8 0x3FBE6738190E394C // 80
+data8 0x3FBEC2B50D208D9B // 81
+data8 0x3FBF0C1C2B936828 // 82
+data8 0x3FBF68216C9CC727 // 83
+data8 0x3FBFB1F6381856F4 // 84
+data8 0x3FC00742AF4CE5F8 // 85
+data8 0x3FC02C64906512D2 // 86
+data8 0x3FC05AF1E63E03B4 // 87
+data8 0x3FC0804BEA723AA9 // 88
+data8 0x3FC0AF1FD6711527 // 89
+data8 0x3FC0D4B2A8805A00 // 90
+data8 0x3FC0FA5EF136A06C // 91
+data8 0x3FC1299A4FB3E306 // 92
+data8 0x3FC14F806253C3ED // 93
+data8 0x3FC175805D1587C1 // 94
+data8 0x3FC19B9A637CA295 // 95
+data8 0x3FC1CB5FC26EDE17 // 96
+data8 0x3FC1F1B4E65F2590 // 97
+data8 0x3FC218248B5DC3E5 // 98
+data8 0x3FC23EAED62ADC76 // 99
+data8 0x3FC26553EBD337BD // 100
+data8 0x3FC28C13F1B11900 // 101
+data8 0x3FC2BCAA14381386 // 102
+data8 0x3FC2E3A740B7800F // 103
+data8 0x3FC30ABFD8F333B6 // 104
+data8 0x3FC331F403985097 // 105
+data8 0x3FC35943E7A60690 // 106
+data8 0x3FC380AFAC6E7C07 // 107
+data8 0x3FC3A8377997B9E6 // 108
+data8 0x3FC3CFDB771C9ADB // 109
+data8 0x3FC3EDA90D39A5DF // 110
+data8 0x3FC4157EC09505CD // 111
+data8 0x3FC43D7113FB04C1 // 112
+data8 0x3FC4658030AD1CCF // 113
+data8 0x3FC48DAC404638F6 // 114
+data8 0x3FC4B5F56CBBB869 // 115
+data8 0x3FC4DE5BE05E7583 // 116
+data8 0x3FC4FCBC0776FD85 // 117
+data8 0x3FC525561E9256EE // 118
+data8 0x3FC54E0DF3198865 // 119
+data8 0x3FC56CAB7112BDE2 // 120
+data8 0x3FC59597BA735B15 // 121
+data8 0x3FC5BEA23A506FDA // 122
+data8 0x3FC5DD7E08DE382F // 123
+data8 0x3FC606BDD3F92355 // 124
+data8 0x3FC6301C518A501F // 125
+data8 0x3FC64F3770618916 // 126
+data8 0x3FC678CC14C1E2D8 // 127
+data8 0x3FC6981005ED2947 // 128
+data8 0x3FC6C1DB5F9BB336 // 129
+data8 0x3FC6E1488ECD2881 // 130
+data8 0x3FC70B4B2E7E41B9 // 131
+data8 0x3FC72AE209146BF9 // 132
+data8 0x3FC7551C81BD8DCF // 133
+data8 0x3FC774DD76CC43BE // 134
+data8 0x3FC79F505DB00E88 // 135
+data8 0x3FC7BF3BDE099F30 // 136
+data8 0x3FC7E9E7CAC437F9 // 137
+data8 0x3FC809FE4902D00D // 138
+data8 0x3FC82A2757995CBE // 139
+data8 0x3FC85525C625E098 // 140
+data8 0x3FC8757A79831887 // 141
+data8 0x3FC895E2058D8E03 // 142
+data8 0x3FC8C13437695532 // 143
+data8 0x3FC8E1C812EF32BE // 144
+data8 0x3FC9026F112197E8 // 145
+data8 0x3FC923294888880B // 146
+data8 0x3FC94EEA4B8334F3 // 147
+data8 0x3FC96FD1B639FC09 // 148
+data8 0x3FC990CCA66229AC // 149
+data8 0x3FC9B1DB33334843 // 150
+data8 0x3FC9D2FD740E6607 // 151
+data8 0x3FC9FF49EEDCB553 // 152
+data8 0x3FCA209A84FBCFF8 // 153
+data8 0x3FCA41FF1E43F02B // 154
+data8 0x3FCA6377D2CE9378 // 155
+data8 0x3FCA8504BAE0D9F6 // 156
+data8 0x3FCAA6A5EEEBEFE3 // 157
+data8 0x3FCAC85B878D7879 // 158
+data8 0x3FCAEA259D8FFA0B // 159
+data8 0x3FCB0C0449EB4B6B // 160
+data8 0x3FCB2DF7A5C50299 // 161
+data8 0x3FCB4FFFCA70E4D1 // 162
+data8 0x3FCB721CD17157E3 // 163
+data8 0x3FCB944ED477D4ED // 164
+data8 0x3FCBB695ED655C7D // 165
+data8 0x3FCBD8F2364AEC0F // 166
+data8 0x3FCBFB63C969F4FF // 167
+data8 0x3FCC1DEAC134D4E9 // 168
+data8 0x3FCC4087384F4F80 // 169
+data8 0x3FCC6339498F09E2 // 170
+data8 0x3FCC86010FFC076C // 171
+data8 0x3FCC9D3D065C5B42 // 172
+data8 0x3FCCC029375BA07A // 173
+data8 0x3FCCE32B66978BA4 // 174
+data8 0x3FCD0643AFD51404 // 175
+data8 0x3FCD29722F0DEA45 // 176
+data8 0x3FCD4CB70070FE44 // 177
+data8 0x3FCD6446AB3F8C96 // 178
+data8 0x3FCD87B0EF71DB45 // 179
+data8 0x3FCDAB31D1FE99A7 // 180
+data8 0x3FCDCEC96FDC888F // 181
+data8 0x3FCDE6908876357A // 182
+data8 0x3FCE0A4E4A25C200 // 183
+data8 0x3FCE2E2315755E33 // 184
+data8 0x3FCE461322D1648A // 185
+data8 0x3FCE6A0E95C7787B // 186
+data8 0x3FCE8E216243DD60 // 187
+data8 0x3FCEA63AF26E007C // 188
+data8 0x3FCECA74ED15E0B7 // 189
+data8 0x3FCEEEC692CCD25A // 190
+data8 0x3FCF070A36B8D9C1 // 191
+data8 0x3FCF2B8393E34A2D // 192
+data8 0x3FCF5014EF538A5B // 193
+data8 0x3FCF68833AF1B180 // 194
+data8 0x3FCF8D3CD9F3F04F // 195
+data8 0x3FCFA5C61ADD93E9 // 196
+data8 0x3FCFCAA8567EBA7A // 197
+data8 0x3FCFE34CC8743DD8 // 198
+data8 0x3FD0042BFD74F519 // 199
+data8 0x3FD016BDF6A18017 // 200
+data8 0x3FD023262F907322 // 201
+data8 0x3FD035CCED8D32A1 // 202
+data8 0x3FD042430E869FFC // 203
+data8 0x3FD04EBEC842B2E0 // 204
+data8 0x3FD06182E84FD4AC // 205
+data8 0x3FD06E0CB609D383 // 206
+data8 0x3FD080E60BEC8F12 // 207
+data8 0x3FD08D7E0D894735 // 208
+data8 0x3FD0A06CC96A2056 // 209
+data8 0x3FD0AD131F3B3C55 // 210
+data8 0x3FD0C01771E775FB // 211
+data8 0x3FD0CCCC3CAD6F4B // 212
+data8 0x3FD0D986D91A34A9 // 213
+data8 0x3FD0ECA9B8861A2D // 214
+data8 0x3FD0F972F87FF3D6 // 215
+data8 0x3FD106421CF0E5F7 // 216
+data8 0x3FD11983EBE28A9D // 217
+data8 0x3FD12661E35B785A // 218
+data8 0x3FD13345D2779D3B // 219
+data8 0x3FD146A6F597283A // 220
+data8 0x3FD15399E81EA83D // 221
+data8 0x3FD16092E5D3A9A6 // 222
+data8 0x3FD17413C3B7AB5E // 223
+data8 0x3FD1811BF629D6FB // 224
+data8 0x3FD18E2A47B46686 // 225
+data8 0x3FD19B3EBE1A4418 // 226
+data8 0x3FD1AEE9017CB450 // 227
+data8 0x3FD1BC0CED7134E2 // 228
+data8 0x3FD1C93712ABC7FF // 229
+data8 0x3FD1D66777147D3F // 230
+data8 0x3FD1EA3BD1286E1C // 231
+data8 0x3FD1F77BED932C4C // 232
+data8 0x3FD204C25E1B031F // 233
+data8 0x3FD2120F28CE69B1 // 234
+data8 0x3FD21F6253C48D01 // 235
+data8 0x3FD22CBBE51D60AA // 236
+data8 0x3FD240CE4C975444 // 237
+data8 0x3FD24E37F8ECDAE8 // 238
+data8 0x3FD25BA8215AF7FC // 239
+data8 0x3FD2691ECC29F042 // 240
+data8 0x3FD2769BFFAB2E00 // 241
+data8 0x3FD2841FC23952C9 // 242
+data8 0x3FD291AA1A384978 // 243
+data8 0x3FD29F3B0E15584B // 244
+data8 0x3FD2B3A0EE479DF7 // 245
+data8 0x3FD2C142842C09E6 // 246
+data8 0x3FD2CEEACCB7BD6D // 247
+data8 0x3FD2DC99CE82FF21 // 248
+data8 0x3FD2EA4F902FD7DA // 249
+data8 0x3FD2F80C186A25FD // 250
+data8 0x3FD305CF6DE7B0F7 // 251
+data8 0x3FD3139997683CE7 // 252
+data8 0x3FD3216A9BB59E7C // 253
+data8 0x3FD32F4281A3CEFF // 254
+data8 0x3FD33D2150110092 // 255
+LOCAL_OBJECT_END(log10f_data)
+
+
+// Code
+//==============================================================
.section .text
-.proc log10f#
-.align 32
-log10f:
-#ifdef _LIBC
-.global __ieee754_log10f
-.type __ieee754_log10f,@function
-__ieee754_log10f:
-#endif
-{ .mfi
- alloc r32=ar.pfs,1,15,4,0
- frcpa.s1 log_C,p9 = f1,f8
- cmp.eq.unc p7,p8 = r0, r0
-}
-{ .mfb
- addl log_AD_1 = @ltoff(log_table_1), gp
- fnorm.s1 log_NORM_f8 = f8
- br.sptk L(LOG_LOG10_X)
-}
-;;
-
-.endp log10f
-ASM_SIZE_DIRECTIVE(log10f)
-ASM_SIZE_DIRECTIVE(__ieee754_log10f)
-
-
-
-.section .text
-.proc logf#
-.align 32
-logf:
-#ifdef _LIBC
-.global __ieee754_logf
-.type __ieee754_logf,@function
-__ieee754_logf:
-#endif
+// logf has p13 true, p14 false
+// log10f has p14 true, p13 false
+GLOBAL_IEEE754_ENTRY(log10f)
{ .mfi
- alloc r32=ar.pfs,1,15,4,0
- frcpa.s1 log_C,p9 = f1,f8
- cmp.eq.unc p8,p7 = r0, r0
+ getf.exp GR_Exp = f8 // if x is unorm then must recompute
+ frcpa.s1 FR_RcpX,p0 = f1,f8
+ mov GR_05 = 0xFFFE // biased exponent of A2=0.5
}
+{ .mlx
+ addl GR_ad_T = @ltoff(log10f_data),gp
+ movl GR_A3 = 0x3FD5555555555555 // double precision memory
+ // representation of A3
+};;
{ .mfi
- addl log_AD_1 = @ltoff(log_table_1), gp
- fnorm.s1 log_NORM_f8 = f8
- nop.i 999
-}
-;;
-
-L(LOG_LOG10_X):
-
-{ .mfi
- getf.exp log_GR_signexp_f8 = f8 // If x unorm then must recompute
- fclass.m.unc p15,p0 = f8, 0x0b // Test for x=unorm
- mov log_GR_fff7 = 0xfff7
+ getf.sig GR_Sig = f8 // if x is unorm then must recompute
+ fclass.m p8,p0 = f8,9 // is x positive unorm?
+ sub GR_025 = GR_05,r0,1 // biased exponent of A4=0.25
}
+{ .mlx
+ ld8 GR_ad_T = [GR_ad_T]
+ movl GR_Ln2 = 0x3FD34413509F79FF // double precision memory
+ // representation of
+ // log(2)/ln(10)
+};;
{ .mfi
- ld8 log_AD_1 = [log_AD_1]
- fms.s1 log_w = f8,f1,f1
- mov log_GR_exp_17_ones = 0x1ffff
+ setf.d FR_A3 = GR_A3 // create A3
+ fcmp.eq.s1 p14,p13 = f0,f0 // set p14 to 1 for log10f
+ dep.z GR_xorg = GR_05,55,8 // 0x7F00000000000000 integer number
+ // bits of that are
+ // GR_xorg[63] = last bit of biased
+ // exponent of 255/256
+ // GR_xorg[62-0] = bits from 62 to 0
+ // of significand of 255/256
}
-;;
-
-{ .mmi
- getf.sig log_GR_significand_f8 = f8 // If x unorm then must recompute
- mov log_GR_exp_16_ones = 0xffff
- nop.i 999
-}
-;;
-
-{ .mmb
- adds log_AD_2 = 0x10, log_AD_1
- and log_GR_exp_f8 = log_GR_signexp_f8, log_GR_exp_17_ones
-(p15) br.cond.spnt L(LOG_DENORM)
-}
-;;
-
-L(LOG_COMMON):
-{.mfi
- ldfpd log_P3,log_P2 = [log_AD_1],16
- fclass.m.unc p6,p0 = f8, 0xc3 // Test for x=nan
- shl log_GR_index = log_GR_significand_f8,1
-}
-{.mfi
- sub log_GR_true_exp_f8 = log_GR_exp_f8, log_GR_exp_16_ones
- nop.f 999
- nop.i 999
-}
-;;
-
+{ .mib
+ setf.exp FR_A2 = GR_05 // create A2
+ sub GR_de = GR_Exp,GR_05 // biased_exponent_of_x - 0xFFFE
+ // needed to comparion with 0.5 and 2.0
+ br.cond.sptk logf_log10f_common
+};;
+GLOBAL_IEEE754_END(log10f)
+GLOBAL_IEEE754_ENTRY(logf)
{ .mfi
- ldfpd log_P1,log_inv_ln10 = [log_AD_2],16
- fclass.m.unc p11,p0 = f8, 0x21 // Test for x=+inf
- shr.u log_GR_index = log_GR_index,56
+ getf.exp GR_Exp = f8 // if x is unorm then must recompute
+ frcpa.s1 FR_RcpX,p0 = f1,f8
+ mov GR_05 = 0xFFFE // biased exponent of A2=-0.5
}
+{ .mlx
+ addl GR_ad_T = @ltoff(logf_data),gp
+ movl GR_A3 = 0x3FD5555555555555 // double precision memory
+ // representation of A3
+};;
{ .mfi
- setf.sig log_int_Nfloat = log_GR_true_exp_f8
- nop.f 999
- nop.i 999
+ getf.sig GR_Sig = f8 // if x is unorm then must recompute
+ fclass.m p8,p0 = f8,9 // is x positive unorm?
+ dep.z GR_xorg = GR_05,55,8 // 0x7F00000000000000 integer number
+ // bits of that are
+ // GR_xorg[63] = last bit of biased
+ // exponent of 255/256
+ // GR_xorg[62-0] = bits from 62 to 0
+ // of significand of 255/256
}
-;;
-
-
{ .mfi
- ldfd log_log2 = [log_AD_2],16
- fma.s1 log_wsq = log_w, log_w, f0
- nop.i 999
-}
-{ .mfb
- nop.m 999
-(p6) fma.s.s0 f8 = f8,f1,f0 // quietize nan result if x=nan
-(p6) br.ret.spnt b0 // Exit for x=nan
-}
-;;
-
-
+ ld8 GR_ad_T = [GR_ad_T]
+ nop.f 0
+ sub GR_025 = GR_05,r0,1 // biased exponent of A4=0.25
+};;
{ .mfi
- shladd log_AD_2 = log_GR_index,3,log_AD_2
- fcmp.eq.s1 p10,p0 = log_NORM_f8, f1 // Test for x=+1.0
- nop.i 999
+ setf.d FR_A3 = GR_A3 // create A3
+ fcmp.eq.s1 p13,p14 = f0,f0 // p13 - true for logf
+ sub GR_de = GR_Exp,GR_05 // biased_exponent_of_x - 0xFFFE
+ // needed to comparion with 0.5 and 2.0
}
-{ .mfb
- nop.m 999
- fms.s1 log_r = log_C,f8,f1
-(p11) br.ret.spnt b0 // Exit for x=+inf
-}
-;;
-
-
-{ .mmf
- nop.m 999
- nop.m 999
- fclass.m.unc p6,p0 = f8, 0x07 // Test for x=0
-}
-;;
-
-
-{ .mfb
- ldfd log_T = [log_AD_2]
-(p10) fmerge.s f8 = f0, f0
-(p10) br.ret.spnt b0 // Exit for x=1.0
-;;
-}
-
+{ .mlx
+ setf.exp FR_A2 = GR_05 // create A2
+ movl GR_Ln2 = 0x3FE62E42FEFA39EF // double precision memory
+ // representation of log(2)
+};;
+logf_log10f_common:
{ .mfi
- getf.exp log_GR_signexp_w = log_w
- fclass.m.unc p12,p0 = f8, 0x3a // Test for x neg norm, unorm, inf
- nop.i 999
-}
-;;
-
-{ .mmb
- nop.m 999
- nop.m 999
-(p6) br.cond.spnt L(LOG_ZERO_NEG) // Branch if x=0
-;;
+ setf.exp FR_A4 = GR_025 // create A4=0.25
+ fclass.m p9,p0 = f8,0x3A // is x < 0 (including negateve unnormals)?
+ dep GR_x = GR_Exp,GR_Sig,63,1 // produce integer that bits are
+ // GR_x[63] = GR_Exp[0]
+ // GR_x[62-0] = GR_Sig[62-0]
}
-
-
+{ .mib
+ sub GR_N = GR_Exp,GR_05,1 // unbiased exponent of x
+ cmp.gtu p6,p7 = 2,GR_de // is 0.5 <= x < 2.0?
+(p8) br.cond.spnt logf_positive_unorm
+};;
+logf_core:
{ .mfi
- and log_GR_exp_w = log_GR_exp_17_ones, log_GR_signexp_w
- nop.f 999
- nop.i 999
+ setf.sig FR_N = GR_N // copy unbiased exponent of x to the
+ // significand field of FR_N
+ fclass.m p10,p0 = f8,0x1E1 // is x NaN, NaT or +Inf?
+ dep.z GR_dx = GR_05,54,3 // 0x0180000000000000 - difference
+ // between our integer representations
+ // of 257/256 and 255/256
}
-{ .mfb
- nop.m 999
- fma.s1 log_rsq = log_r, log_r, f0
-(p12) br.cond.spnt L(LOG_ZERO_NEG) // Branch if x<0
-;;
-}
-
{ .mfi
- nop.m 999
- fma.s1 log_rp_p32 = log_P3, log_r, log_P2
- nop.i 999
-}
+ nop.m 0
+ nop.f 0
+ sub GR_x = GR_x,GR_xorg // difference between representations
+ // of x and 255/256
+};;
{ .mfi
- nop.m 999
- fma.s1 log_rp_q32 = log_P3, log_w, log_P2
- nop.i 999
-;;
+ ldfd FR_InvLn10 = [GR_ad_T],8
+ fcmp.eq.s1 p11,p0 = f8,f1 // is x equal to 1.0?
+ extr.u GR_Ind = GR_Sig,55,8 // get bits from 55 to 62 as index
}
-
+{ .mib
+ setf.d FR_Ln2 = GR_Ln2 // create log(2) or log10(2)
+(p6) cmp.gtu p6,p7 = GR_dx,GR_x // set p6 if 255/256 <= x < 257/256
+(p9) br.cond.spnt logf_negatives // jump if input argument is negative number
+};;
+// p6 is true if |x-1| < 1/256
+// p7 is true if |x-1| >= 1/256
+.pred.rel "mutex",p6,p7
{ .mfi
- nop.m 999
- fcvt.xf log_Nfloat = log_int_Nfloat
- nop.i 999 ;;
+ shladd GR_ad_T = GR_Ind,3,GR_ad_T // calculate address of T
+(p7) fms.s1 FR_r = FR_RcpX,f8,f1 // range reduction for |x-1|>=1/256
+ extr.u GR_Exp = GR_Exp,0,17 // exponent without sign
}
-
+{ .mfb
+ nop.m 0
+(p6) fms.s1 FR_r = f8,f1,f1 // range reduction for |x-1|<1/256
+(p10) br.cond.spnt logf_nan_nat_pinf // exit for NaN, NaT or +Inf
+};;
+{ .mfb
+ ldfd FR_T = [GR_ad_T] // load T
+(p11) fma.s.s0 f8 = f0,f0,f0
+(p11) br.ret.spnt b0 // exit for x = 1.0
+};;
+{ .mib
+ nop.m 0
+ cmp.eq p12,p0 = r0,GR_Exp // is x +/-0? (here it's quite enough
+ // only to compare exponent with 0
+ // because all unnormals already
+ // have been filtered)
+(p12) br.cond.spnt logf_zeroes // Branch if input argument is +/-0
+};;
{ .mfi
- nop.m 999
- fma.s1 log_rp_p10 = log_P1, log_r, f1
- nop.i 999
+ nop.m 0
+ fnma.s1 FR_A2 = FR_A2,FR_r,f1 // A2*r+1
+ nop.i 0
}
{ .mfi
- nop.m 999
- fma.s1 log_rp_q10 = log_P1, log_w, f1
- nop.i 999
-;;
-}
-
-// p13 <== large w log
-// p14 <== small w log
+ nop.m 0
+ fma.s1 FR_r2 = FR_r,FR_r,f0 // r^2
+ nop.i 0
+};;
{ .mfi
-(p8) cmp.ge.unc p13,p14 = log_GR_exp_w, log_GR_fff7
- fcmp.eq.s0 p6,p0 = f8,f0 // Sets flag on +denormal input
- nop.i 999
-;;
+ nop.m 0
+ fcvt.xf FR_N = FR_N // convert integer N in significand of FR_N
+ // to floating-point representation
+ nop.i 0
}
-
-// p10 <== large w log10
-// p11 <== small w log10
{ .mfi
-(p7) cmp.ge.unc p10,p11 = log_GR_exp_w, log_GR_fff7
- nop.f 999
- nop.i 999 ;;
-}
-
+ nop.m 0
+ fnma.s1 FR_A3 = FR_A4,FR_r,FR_A3 // A4*r+A3
+ nop.i 0
+};;
{ .mfi
- nop.m 999
- fma.s1 log_T_plus_Nlog2 = log_Nfloat,log_log2, log_T
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 FR_r = FR_r,FR_InvLn10,f0 // For log10f we have r/log(10)
+ nop.i 0
}
-
-
{ .mfi
- nop.m 999
- fma.s1 log_rp_p2 = log_rp_p32, log_rsq, log_rp_p10
- nop.i 999
-}
+ nop.m 0
+ nop.f 0
+ nop.i 0
+};;
{ .mfi
- nop.m 999
- fma.s1 log_rp_q2 = log_rp_q32, log_wsq, log_rp_q10
- nop.i 999
-;;
+ nop.m 0
+ fma.s1 FR_A2 = FR_A3,FR_r2,FR_A2 // (A4*r+A3)*r^2+(A2*r+1)
+ nop.i 0
}
-
-
-// small w, log <== p14
{ .mfi
- nop.m 999
-(p14) fma.s f8 = log_rp_q2, log_w, f0
- nop.i 999
-}
+ nop.m 0
+ fma.s1 FR_NxLn2pT = FR_N,FR_Ln2,FR_T // N*Ln2+T
+ nop.i 0
+};;
+.pred.rel "mutex",p6,p7
{ .mfi
- nop.m 999
-(p11) fma.s1 log_Q = log_rp_q2, log_w, f0
- nop.i 999 ;;
+ nop.m 0
+(p7) fma.s.s0 f8 = FR_A2,FR_r,FR_NxLn2pT // result for |x-1|>=1/256
+ nop.i 0
}
+{ .mfb
+ nop.m 0
+(p6) fma.s.s0 f8 = FR_A2,FR_r,f0 // result for |x-1|<1/256
+ br.ret.sptk b0
+};;
-
-// large w, log <== p13
-.pred.rel "mutex",p13,p10
+.align 32
+logf_positive_unorm:
{ .mfi
- nop.m 999
-(p13) fma.s f8 = log_rp_p2, log_r, log_T_plus_Nlog2
- nop.i 999
-}
+ nop.m 0
+(p8) fma.s0 f8 = f8,f1,f0 // Normalize & set D-flag
+ nop.i 0
+};;
{ .mfi
- nop.m 999
-(p10) fma.s1 log_Q = log_rp_p2, log_r, log_T_plus_Nlog2
- nop.i 999 ;;
-}
-
-
-// log10
-{ .mfb
- nop.m 999
-(p7) fma.s f8 = log_inv_ln10,log_Q,f0
- br.ret.sptk b0
-;;
-}
-
-
-L(LOG_DENORM):
-{ .mmi
- getf.exp log_GR_signexp_f8 = log_NORM_f8
- nop.m 999
- nop.i 999
-}
-;;
-{ .mmb
- getf.sig log_GR_significand_f8 = log_NORM_f8
- and log_GR_exp_f8 = log_GR_signexp_f8, log_GR_exp_17_ones
- br.cond.sptk L(LOG_COMMON)
-}
-;;
-
-L(LOG_ZERO_NEG):
-
-// qnan snan inf norm unorm 0 -+
-// 0 0 0 0 0 1 11 0x7
-// 0 0 1 1 1 0 10 0x3a
-
-// Save x (f8) in f10
+ getf.exp GR_Exp = f8 // recompute biased exponent
+ nop.f 0
+ cmp.ne p6,p7 = r0,r0 // p6 <- 0, p7 <- 1 because
+ // in case of unorm we are out
+ // interval [255/256; 257/256]
+};;
{ .mfi
- nop.m 999
- fmerge.s f10 = f8,f8
- nop.i 999 ;;
-}
-
-// p8 p9 means ln(+-0) = -inf
-// p7 p10 means log(+-0) = -inf
-
-// p13 means ln(-)
-// p14 means log(-)
-
+ getf.sig GR_Sig = f8 // recompute significand
+ nop.f 0
+ nop.i 0
+};;
+{ .mib
+ sub GR_N = GR_Exp,GR_05,1 // unbiased exponent N
+ nop.i 0
+ br.cond.sptk logf_core // return into main path
+};;
+.align 32
+logf_nan_nat_pinf:
{ .mfi
- nop.m 999
- fmerge.ns f6 = f1,f1 // Form -1.0
- nop.i 999 ;;
+ nop.m 0
+ fma.s.s0 f8 = f8,f1,f0 // set V-flag
+ nop.i 0
}
+{ .mfb
+ nop.m 0
+ nop.f 0
+ br.ret.sptk b0 // exit for NaN, NaT or +Inf
+};;
-// p9 means ln(+-0) = -inf
-// p10 means log(+-0) = -inf
-// Log(+-0) = -inf
-
-{ .mfi
- nop.m 999
-(p8) fclass.m.unc p9,p0 = f10, 0x07
- nop.i 999
-}
+.align 32
+logf_zeroes:
{ .mfi
- nop.m 999
-(p7) fclass.m.unc p10,p0 = f10, 0x07
- nop.i 999 ;;
+ nop.m 0
+ fmerge.s FR_X = f8,f8 // keep input argument for subsequent
+ // call of __libm_error_support#
+ nop.i 0
}
-
-
-// p13 ln(-)
-// p14 log(-)
-
-// Log(-inf, -normal, -unnormal) = QNAN indefinite
{ .mfi
- nop.m 999
-(p8) fclass.m.unc p13,p0 = f10, 0x3a
- nop.i 999
-}
+(p13) mov GR_TAG = 4 // set libm error in case of logf
+ fms.s1 FR_tmp = f0,f0,f1 // -1.0
+ nop.i 0
+};;
{ .mfi
- nop.m 999
-(p7) fclass.m.unc p14,p0 = f10, 0x3a
- nop.i 999 ;;
+ nop.m 0
+ frcpa.s0 f8,p0 = FR_tmp,f0 // log(+/-0) should be equal to -INF.
+ // We can get it using frcpa because it
+ // sets result to the IEEE-754 mandated
+ // quotient of FR_tmp/f0.
+ // As far as FR_tmp is -1 it'll be -INF
+ nop.i 0
}
+{ .mib
+(p14) mov GR_TAG = 10 // set libm error in case of log10f
+ nop.i 0
+ br.cond.sptk logf_libm_err
+};;
-
-.pred.rel "mutex",p9,p10
-{ .mfi
-(p9) mov log_GR_tag = 4
-(p9) frcpa f8,p11 = f6,f0
- nop.i 999
-}
+.align 32
+logf_negatives:
{ .mfi
-(p10) mov log_GR_tag = 10
-(p10) frcpa f8,p12 = f6,f0
- nop.i 999 ;;
-}
-
-.pred.rel "mutex",p13,p14
+(p13) mov GR_TAG = 5 // set libm error in case of logf
+ fmerge.s FR_X = f8,f8 // keep input argument for subsequent
+ // call of __libm_error_support#
+ nop.i 0
+};;
{ .mfi
-(p13) mov log_GR_tag = 5
-(p13) frcpa f8,p11 = f0,f0
- nop.i 999
-}
-{ .mfb
-(p14) mov log_GR_tag = 11
-(p14) frcpa f8,p12 = f0,f0
- br.cond.sptk __libm_error_region ;;
-}
-.endp logf
-ASM_SIZE_DIRECTIVE(logf)
-ASM_SIZE_DIRECTIVE(__ieee754_logf)
+(p14) mov GR_TAG = 11 // set libm error in case of log10f
+ frcpa.s0 f8,p0 = f0,f0 // log(negatives) should be equal to NaN.
+ // We can get it using frcpa because it
+ // sets result to the IEEE-754 mandated
+ // quotient of f0/f0 i.e. NaN.
+ nop.i 0
+};;
+.align 32
+logf_libm_err:
+{ .mmi
+ alloc r32 = ar.pfs,1,4,4,0
+ mov GR_Parameter_TAG = GR_TAG
+ nop.i 0
+};;
+GLOBAL_IEEE754_END(logf)
// Stack operations when calling error support.
// (1) (2) (3) (call) (4)
@@ -890,70 +1101,56 @@ ASM_SIZE_DIRECTIVE(__ieee754_logf)
// save ar.pfs save b0 restore gp
// save gp restore ar.pfs
-
-
-.proc __libm_error_region
-__libm_error_region:
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
-
-// (1)
{ .mfi
- add GR_Parameter_Y=-32,sp // Parameter 2 value
- nop.f 0
-.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
- add sp=-64,sp // Create new stack
- nop.f 0
- mov GR_SAVE_GP=gp // Save gp
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
};;
-
-
-// (2)
{ .mmi
- stfs [GR_Parameter_Y] = f1,16 // STORE Parameter 2 on stack
- add GR_Parameter_X = 16,sp // Parameter 1 address
+ stfs [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0 // Save b0
+ mov GR_SAVE_B0=b0 // Save b0
};;
-
.body
-// (3)
{ .mib
- stfs [GR_Parameter_X] = f10 // STORE Parameter 1 on stack
- add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
- nop.b 0
+ stfs [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
}
{ .mib
- stfs [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack
- add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support# // Call error handling function
+ stfs [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
};;
-
{ .mmi
- nop.m 0
- nop.m 0
- add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
};;
-
-// (4)
{ .mmi
- ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
- add sp = 64,sp // Restore stack pointer
- mov b0 = GR_SAVE_B0 // Restore return address
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
- mov gp = GR_SAVE_GP // Restore gp
- mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
- br.ret.sptk b0 // Return
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
-
+LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#
+
diff --git a/sysdeps/ia64/fpu/e_logl.c b/sysdeps/ia64/fpu/e_logl.c
deleted file mode 100644
index 41254ae60a..0000000000
--- a/sysdeps/ia64/fpu/e_logl.c
+++ /dev/null
@@ -1 +0,0 @@
-/* Not needed. */
diff --git a/sysdeps/ia64/fpu/e_pow.S b/sysdeps/ia64/fpu/e_pow.S
index 56f7f078ba..11fae53d72 100644
--- a/sysdeps/ia64/fpu/e_pow.S
+++ b/sysdeps/ia64/fpu/e_pow.S
@@ -1,10 +1,10 @@
.file "pow.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,30 +35,41 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00 Initial version
-// 2/03/00 Added p12 to definite over/under path. With odd power we did not
+// 02/02/00 Initial version
+// 02/03/00 Added p12 to definite over/under path. With odd power we did not
// maintain the sign of x in this path.
-// 4/04/00 Unwind support added
-// 4/19/00 pow(+-1,inf) now returns NaN
-// pow(+-val, +-inf) returns 0 or inf, but now does not call error support
+// 04/04/00 Unwind support added
+// 04/19/00 pow(+-1,inf) now returns NaN
+// pow(+-val, +-inf) returns 0 or inf, but now does not call error
+// support
// Added s1 to fcvt.fx because invalid flag was incorrectly set.
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
-// 9/07/00 Improved performance by eliminating bank conflicts and other stalls,
+// 09/07/00 Improved performance by eliminating bank conflicts and other stalls,
// and tweaking the critical path
-// 9/08/00 Per c99, pow(+-1,inf) now returns 1, and pow(+1,nan) returns 1
-// 9/28/00 Updated NaN**0 path
-// 1/20/01 Fixed denormal flag settings.
-// 2/12/01 Improved speed.
+// 09/08/00 Per c99, pow(+-1,inf) now returns 1, and pow(+1,nan) returns 1
+// 09/28/00 Updated NaN**0 path
+// 01/20/01 Fixed denormal flag settings.
+// 02/13/01 Improved speed.
+// 03/19/01 Reordered exp polynomial to improve speed and eliminate monotonicity
+// problem in round up, down, and to zero modes. Also corrected
+// overflow result when x negative, y odd in round up, down, zero.
+// 06/14/01 Added brace missing from bundle
+// 12/10/01 Corrected case where x negative, 2^52 <= |y| < 2^53, y odd integer.
+// 12/20/01 Fixed monotonity problem in round to nearest.
+// 02/08/02 Fixed overflow/underflow cases that were not calling error support.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 08/29/02 Improved Itanium 2 performance
+// 09/21/02 Added branch for |y*log(x)|<2^-11 to fix monotonicity problems.
+// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// API
//==============================================================
-// double pow(double)
-// float powf(float)
+// double pow(double x, double y)
//
// Overview of operation
//==============================================================
@@ -67,51 +78,51 @@
// 1. Log(x)
// 2. y Log(x)
// 3. exp(y log(x))
-//
+//
// This means we work with the absolute value of x and merge in the sign later.
// Log(x) = G + delta + r -rsq/2 + p
// G,delta depend on the exponent of x and table entries. The table entries are
// indexed by the exponent of x, called K.
-//
+//
// The G and delta come out of the reduction; r is the reduced x.
-//
+//
// B = frcpa(x)
// xB-1 is small means that B is the approximate inverse of x.
-//
+//
// Log(x) = Log( (1/B)(Bx) )
// = Log(1/B) + Log(Bx)
// = Log(1/B) + Log( 1 + (Bx-1))
-//
+//
// x = 2^K 1.x_1x_2.....x_52
-// B= frcpa(x) = 2^-k Cm
+// B= frcpa(x) = 2^-k Cm
// Log(1/B) = Log(1/(2^-K Cm))
// Log(1/B) = Log((2^K/ Cm))
// Log(1/B) = K Log(2) + Log(1/Cm)
-//
+//
// Log(x) = K Log(2) + Log(1/Cm) + Log( 1 + (Bx-1))
-//
+//
// If you take the significand of x, set the exponent to true 0, then Cm is
// the frcpa. We tabulate the Log(1/Cm) values. There are 256 of them.
// The frcpa table is indexed by 8 bits, the x_1 thru x_8.
// m = x_1x_2...x_8 is an 8-bit index.
-//
+//
// Log(1/Cm) = log(1/frcpa(1+m/256)) where m goes from 0 to 255.
-//
+//
// We tabluate as two doubles, T and t, where T +t is the value itself.
-//
+//
// Log(x) = (K Log(2)_hi + T) + (Log(2)_hi + t) + Log( 1 + (Bx-1))
// Log(x) = G + delta + Log( 1 + (Bx-1))
-//
+//
// The Log( 1 + (Bx-1)) can be calculated as a series in r = Bx-1.
-//
+//
// Log( 1 + (Bx-1)) = r - rsq/2 + p
-//
+//
// Then,
-//
+//
// yLog(x) = yG + y delta + y(r-rsq/2) + yp
// yLog(x) = Z1 + e3 + Z2 + Z3 + (e2 + e3)
-//
-//
+//
+//
// exp(yLog(x)) = exp(Z1 + Z2 + Z3) exp(e1 + e2 + e3)
//
//
@@ -133,7 +144,7 @@
// exp(r) = exp(Z - N log2/128)
//
// r = s + d = (Z - N (log2/128)_hi) -N (log2/128)_lo
-// = Z - N (log2/128)
+// = Z - N (log2/128)
//
// Z = s+d +N (log2/128)
//
@@ -149,22 +160,22 @@
// n log2/128 = n_7n_6n_5 log2/8 + n_4n_3n_2n_1 log2/128
// n log2/128 = I2 log2/8 + I1 log2/128
//
-// N log2/128 = M log2 + I2 log2/8 + I1 log2/128
+// N log2/128 = M log2 + I2 log2/8 + I1 log2/128
//
// exp(Z) = exp(s) (1+d) exp(log(2^M) + log(2^I2/8) + log(2^I1/128))
// exp(Z) = exp(s) (1+d1) (1+d2)(2^M) 2^I2/8 2^I1/128
// exp(Z) = exp(s) f1 f2 (2^M) 2^I2/8 2^I1/128
//
// I1, I2 are table indices. Use a series for exp(s).
-// Then get exp(Z)
+// Then get exp(Z)
//
// exp(yLog(x)) = exp(Z1 + Z2 + Z3) exp(e1 + e2 + e3)
-// exp(yLog(x)) = exp(Z) exp(Z3) f3
-// exp(yLog(x)) = exp(Z)f3 exp(Z3)
-// exp(yLog(x)) = A exp(Z3)
+// exp(yLog(x)) = exp(Z) exp(Z3) f3
+// exp(yLog(x)) = exp(Z)f3 exp(Z3)
+// exp(yLog(x)) = A exp(Z3)
//
// We actually calculate exp(Z3) -1.
-// Then,
+// Then,
// exp(yLog(x)) = A + A( exp(Z3) -1)
//
@@ -175,142 +186,146 @@
// ==============
// The operation (K*log2_hi) must be exact. K is the true exponent of x.
// If we allow gradual underflow (denormals), K can be represented in 12 bits
-// (as a two's complement number). We assume 13 bits as an engineering precaution.
-//
+// (as a two's complement number). We assume 13 bits as an engineering
+// precaution.
+//
// +------------+----------------+-+
// | 13 bits | 50 bits | |
// +------------+----------------+-+
// 0 1 66
// 2 34
-//
+//
// So we want the lsb(log2_hi) to be 2^-50
// We get log2 as a quad-extended (15-bit exponent, 128-bit significand)
-//
+//
// 0 fffe b17217f7d1cf79ab c9e3b39803f2f6af (4...)
-//
+//
// Consider numbering the bits left to right, starting at 0 thru 127.
// Bit 0 is the 2^-1 bit; bit 49 is the 2^-50 bit.
-//
+//
// ...79ab
// 0111 1001 1010 1011
// 44
// 89
-//
-// So if we shift off the rightmost 14 bits, then (shift back only
+//
+// So if we shift off the rightmost 14 bits, then (shift back only
// the top half) we get
-//
+//
// 0 fffe b17217f7d1cf4000 e6af278ece600fcb dabc000000000000
-//
+//
// Put the right 64-bit signficand in an FR register, convert to double;
// it is exact. Put the next 128 bits into a quad register and round to double.
// The true exponent of the low part is -51.
-//
+//
// hi is 0 fffe b17217f7d1cf4000
// lo is 0 ffcc e6af278ece601000
-//
+//
// Convert to double memory format and get
-//
+//
// hi is 0x3fe62e42fefa39e8
-// lo is 0x3cccd5e4f1d9cc02
-//
+// lo is 0x3cccd5e4f1d9cc02
+//
// log2_hi + log2_lo is an accurate value for log2.
-//
-//
+//
+//
// The T and t values
// ==================
// A similar method is used to generate the T and t values.
-//
+//
// K * log2_hi + T must be exact.
-//
+//
// Smallest T,t
// ----------
-// The smallest T,t is
+// The smallest T,t is
// T t
-// data8 0x3f60040155d58800, 0x3c93bce0ce3ddd81 log(1/frcpa(1+0/256))= +1.95503e-003
-//
+// 0x3f60040155d58800, 0x3c93bce0ce3ddd81 log(1/frcpa(1+0/256))= +1.95503e-003
+//
// The exponent is 0x3f6 (biased) or -9 (true).
// For the smallest T value, what we want is to clip the significand such that
-// when it is shifted right by 9, its lsb is in the bit for 2^-51. The 9 is the specific
-// for the first entry. In general, it is 0xffff - (biased 15-bit exponent).
+// when it is shifted right by 9, its lsb is in the bit for 2^-51. The 9 is the
+// specific for the first entry. In general, it is 0xffff - (biased 15-bit
+// exponent).
-// Independently, what we have calculated is the table value as a quad precision number.
+// Independently, what we have calculated is the table value as a quad
+// precision number.
// Table entry 1 is
// 0 fff6 80200aaeac44ef38 338f77605fdf8000
-//
+//
// We store this quad precision number in a data structure that is
-// sign: 1
+// sign: 1
// exponent: 15
// signficand_hi: 64 (includes explicit bit)
// signficand_lo: 49
// Because the explicit bit is included, the significand is 113 bits.
-//
+//
// Consider significand_hi for table entry 1.
-//
-//
+//
+//
// +-+--- ... -------+--------------------+
// | |
// +-+--- ... -------+--------------------+
// 0 1 4444444455555555556666
// 2345678901234567890123
-//
+//
// Labeled as above, bit 0 is 2^0, bit 1 is 2^-1, etc.
// Bit 42 is 2^-42. If we shift to the right by 9, the bit in
// bit 42 goes in 51.
-//
+//
// So what we want to do is shift bits 43 thru 63 into significand_lo.
-// This is shifting bit 42 into bit 63, taking care to retain the shifted-off bits.
-// Then shifting (just with signficaand_hi) back into bit 42.
-//
-// The shift_value is 63-42 = 21. In general, this is
+// This is shifting bit 42 into bit 63, taking care to retain shifted-off bits.
+// Then shifting (just with signficaand_hi) back into bit 42.
+//
+// The shift_value is 63-42 = 21. In general, this is
// 63 - (51 -(0xffff - 0xfff6))
// For this example, it is
// 63 - (51 - 9) = 63 - 42 = 21
-//
-// This means we are shifting 21 bits into significand_lo. We must maintain more
-// that a 128-bit signficand not to lose bits. So before the shift we put the 128-bit
-// significand into a 256-bit signficand and then shift.
+//
+// This means we are shifting 21 bits into significand_lo. We must maintain more
+// that a 128-bit signficand not to lose bits. So before the shift we put the
+// 128-bit significand into a 256-bit signficand and then shift.
// The 256-bit significand has four parts: hh, hl, lh, and ll.
-//
+//
// Start off with
// hh hl lh ll
// <64> <49><15_0> <64_0> <64_0>
-//
+//
// After shift by 21 (then return for significand_hi),
// <43><21_0> <21><43> <6><58_0> <64_0>
-//
+//
// Take the hh part and convert to a double. There is no rounding here.
-// The conversion is exact. The true exponent of the high part is the same as the
-// true exponent of the input quad.
-//
-// We have some 64 plus significand bits for the low part. In this example, we have
-// 70 bits. We want to round this to a double. Put them in a quad and then do a quad fnorm.
-// For this example the true exponent of the low part is
+// The conversion is exact. The true exponent of the high part is the same as
+// the true exponent of the input quad.
+//
+// We have some 64 plus significand bits for the low part. In this example, we
+// have 70 bits. We want to round this to a double. Put them in a quad and then
+// do a quad fnorm.
+// For this example the true exponent of the low part is
// true_exponent_of_high - 43 = true_exponent_of_high - (64-21)
-// In general, this is
-// true_exponent_of_high - (64 - shift_value)
-//
-//
+// In general, this is
+// true_exponent_of_high - (64 - shift_value)
+//
+//
// Largest T,t
// ----------
// The largest T,t is
-// data8 0x3fe62643fecf9742, 0x3c9e3147684bd37d log(1/frcpa(1+255/256))= +6.92171e-001
-//
+// 0x3fe62643fecf9742, 0x3c9e3147684bd37d log(1/frcpa(1+255/256))=+6.92171e-001
+//
// Table entry 256 is
// 0 fffe b1321ff67cba178c 51da12f4df5a0000
-//
-// The shift value is
+//
+// The shift value is
// 63 - (51 -(0xffff - 0xfffe)) = 13
-//
-// The true exponent of the low part is
+//
+// The true exponent of the low part is
// true_exponent_of_high - (64 - shift_value)
// -1 - (64-13) = -52
// Biased as a double, this is 0x3cb
-//
-//
-//
+//
+//
+//
// So then lsb(T) must be >= 2^-51
// msb(Klog2_hi) <= 2^12
-//
+//
// +--------+---------+
// | 51 bits | <== largest T
// +--------+---------+
@@ -320,7 +335,6 @@
// +------------+----------------+-+
-
// Special Cases
//==============================================================
@@ -385,63 +399,67 @@
// X any Y =0 +1
-#include "libm_support.h"
-
// Assembly macros
//==============================================================
// integer registers used
-pow_AD_Tt = r33
-pow_GR_FFF7 = r34
-pow_GR_exp_Y = r34 // duplicate
-pow_GR_17ones = r35
-
-pow_AD_P = r36
-pow_AD_Q = r37
-pow_AD_tbl1 = r38
-pow_AD_tbl2 = r39
-pow_GR_exp_X = r40
-pow_GR_true_exp_X = r40 // duplicate
-
-pow_GR_offset = r41
-pow_GR_exp_Xm1 = r42
-pow_GR_sig_X = r43
-pow_GR_signexp_X = r44
-
-pow_GR_signexp_Xm1 = r46
-pow_GR_int_W1 = r47
-pow_GR_int_W2 = r48
-pow_GR_int_N = r49
-pow_GR_index1 = r50
-
-pow_GR_index2 = r51
-pow_AD_T1 = r52
-pow_AD_T2 = r53
-pow_GR_gt_ln = r53 // duplicate
-pow_int_GR_M = r54
-pow_GR_10033 = r55
-
-pow_GR_16ones = r56
-pow_GR_sig_int_Y = r57
-pow_GR_sign_Y_Gpr = r58
-pow_GR_17ones_m1 = r59
-pow_GR_one = r60
-pow_GR_sign_Y = r60
-
-pow_GR_signexp_Y_Gpr = r61
-pow_GR_exp_Y_Gpr = r62
-pow_GR_true_exp_Y_Gpr = r63
-pow_GR_signexp_Y = r64
-
-GR_SAVE_B0 = r65
-GR_SAVE_GP = r66
-GR_SAVE_PFS = r67
-
-GR_Parameter_X = r68
-GR_Parameter_Y = r69
-GR_Parameter_RESULT = r70
-pow_GR_tag = r71
+pow_GR_signexp_X = r14
+pow_GR_17ones = r15
+pow_AD_P = r16
+pow_GR_exp_2tom8 = r17
+pow_GR_sig_X = r18
+pow_GR_10033 = r19
+pow_GR_16ones = r20
+
+pow_AD_Tt = r21
+pow_GR_exp_X = r22
+pow_AD_Q = r23
+pow_GR_true_exp_X = r24
+pow_GR_y_zero = r25
+
+pow_GR_exp_Y = r26
+pow_AD_tbl1 = r27
+pow_AD_tbl2 = r28
+pow_GR_offset = r29
+pow_GR_exp_Xm1 = r30
+pow_GR_xneg_yodd = r31
+
+pow_GR_signexp_Xm1 = r35
+pow_GR_int_W1 = r36
+pow_GR_int_W2 = r37
+pow_GR_int_N = r38
+pow_GR_index1 = r39
+pow_GR_index2 = r40
+
+pow_AD_T1 = r41
+pow_AD_T2 = r42
+pow_int_GR_M = r43
+pow_GR_sig_int_Y = r44
+pow_GR_sign_Y_Gpr = r45
+
+pow_GR_17ones_m1 = r46
+pow_GR_one = r47
+pow_GR_sign_Y = r48
+pow_GR_signexp_Y_Gpr = r49
+pow_GR_exp_Y_Gpr = r50
+
+pow_GR_true_exp_Y_Gpr = r51
+pow_GR_signexp_Y = r52
+pow_GR_x_one = r53
+pow_GR_exp_2toM63 = r54
+pow_GR_big_pos = r55
+
+pow_GR_big_neg = r56
+
+GR_SAVE_B0 = r50
+GR_SAVE_GP = r51
+GR_SAVE_PFS = r52
+
+GR_Parameter_X = r53
+GR_Parameter_Y = r54
+GR_Parameter_RESULT = r55
+pow_GR_tag = r56
// floating point registers used
@@ -464,7 +482,8 @@ POW_log2_lo = f43
POW_r = f44
POW_Q0_half = f45
-POW_Q1 = f46
+POW_Q1 = f46
+POW_tmp = f47
POW_log2_hi = f48
POW_Q4 = f49
POW_P1 = f50
@@ -476,6 +495,7 @@ POW_Yrcub = f54
POW_log2_by_128_lo = f55
POW_v6 = f56
+POW_xsq = f57
POW_v4 = f58
POW_v2 = f59
POW_T = f60
@@ -484,6 +504,7 @@ POW_Tt = f61
POW_RSHF = f62
POW_v21ps = f63
POW_s4 = f64
+POW_twoV = f65
POW_U = f66
POW_G = f67
@@ -533,44 +554,45 @@ POW_1ps = f103
POW_A = f104
POW_es = f105
+POW_Xp1 = f106
POW_int_K = f107
POW_K = f108
POW_f123 = f109
POW_Gpr = f110
-POW_Y_Gpr = f111
+POW_Y_Gpr = f111
POW_int_Y = f112
+POW_abs_q = f114
+POW_2toM63 = f115
POW_float_int_Y = f116
POW_ftz_urm_f8 = f117
POW_wre_urm_f8 = f118
-POW_abs_A = f119
-POW_gt_pln = f120
+POW_big_neg = f119
+POW_big_pos = f120
-POW_xsq = f121
-
-POW_twoV = f122
-POW_Xp1 = f123
+POW_GY_Z2 = f121
+POW_pYrcub_e3 = f122
+POW_d = f123
+POW_d2 = f124
+POW_poly_d_hi = f121
+POW_poly_d_lo = f122
+POW_poly_d = f121
// Data tables
//==============================================================
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
+RODATA
.align 16
-pow_table_P:
-ASM_TYPE_DIRECTIVE(pow_table_P,@object)
+LOCAL_OBJECT_START(pow_table_P)
data8 0x8000F7B249FF332D, 0x0000BFFC // P_5
data8 0xAAAAAAA9E7902C7F, 0x0000BFFC // P_3
data8 0x80000000000018E5, 0x0000BFFD // P_1
data8 0xb8aa3b295c17f0bc, 0x00004006 // inv_ln2_by_128
-
-
+//
+//
data8 0x3FA5555555554A9E // Q_2
data8 0x3F8111124F4DD9F9 // Q_3
data8 0x3FE0000000000000 // Q_0
@@ -580,20 +602,18 @@ data8 0x43e8000000000000 // Right shift constant for exp
data8 0xc9e3b39803f2f6af, 0x00003fb7 // ln2_by_128_lo
data8 0x0000000000000000 // pad to eliminate bank conflicts with pow_table_Q
data8 0x0000000000000000 // pad to eliminate bank conflicts with pow_table_Q
-ASM_SIZE_DIRECTIVE(pow_table_P)
+LOCAL_OBJECT_END(pow_table_P)
-pow_table_Q:
-ASM_TYPE_DIRECTIVE(pow_table_Q,@object)
+LOCAL_OBJECT_START(pow_table_Q)
data8 0x9249FE7F0DC423CF, 0x00003FFC // P_4
data8 0xCCCCCCCC4ED2BA7F, 0x00003FFC // P_2
data8 0xAAAAAAAAAAAAB505, 0x00003FFD // P_0
data8 0x3fe62e42fefa39e8, 0x3cccd5e4f1d9cc02 // log2 hi lo = +6.93147e-001
data8 0xb17217f7d1cf79ab, 0x00003ff7 // ln2_by_128_hi
-ASM_SIZE_DIRECTIVE(pow_table_Q)
+LOCAL_OBJECT_END(pow_table_Q)
-pow_Tt:
-ASM_TYPE_DIRECTIVE(pow_Tt,@object)
+LOCAL_OBJECT_START(pow_Tt)
data8 0x3f60040155d58800, 0x3c93bce0ce3ddd81 // log(1/frcpa(1+0/256))= +1.95503e-003
data8 0x3f78121214586a00, 0x3cb540e0a5cfc9bc // log(1/frcpa(1+1/256))= +5.87661e-003
data8 0x3f841929f9683200, 0x3cbdf1d57404da1f // log(1/frcpa(1+2/256))= +9.81362e-003
@@ -850,13 +870,12 @@ data8 0x3fe5f673c61a2ed0, 0x3caa385eef5f2789 // log(1/frcpa(1+252/256))= +6.863
data8 0x3fe6065bea385924, 0x3cb11624f165c5b4 // log(1/frcpa(1+253/256))= +6.88276e-001
data8 0x3fe6164bfa7cc068, 0x3cbad884f87073fa // log(1/frcpa(1+254/256))= +6.90222e-001
data8 0x3fe62643fecf9740, 0x3cb78c51da12f4df // log(1/frcpa(1+255/256))= +6.92171e-001
-ASM_SIZE_DIRECTIVE(pow_Tt)
+LOCAL_OBJECT_END(pow_Tt)
// Table 1 is 2^(index_1/128) where
// index_1 goes from 0 to 15
-pow_tbl1:
-ASM_TYPE_DIRECTIVE(pow_tbl1,@object)
+LOCAL_OBJECT_START(pow_tbl1)
data8 0x8000000000000000 , 0x00003FFF
data8 0x80B1ED4FD999AB6C , 0x00003FFF
data8 0x8164D1F3BC030773 , 0x00003FFF
@@ -873,13 +892,12 @@ data8 0x88980E8092DA8527 , 0x00003FFF
data8 0x8955EE03618E5FDD , 0x00003FFF
data8 0x8A14D575496EFD9A , 0x00003FFF
data8 0x8AD4C6452C728924 , 0x00003FFF
-ASM_SIZE_DIRECTIVE(pow_tbl1)
+LOCAL_OBJECT_END(pow_tbl1)
// Table 2 is 2^(index_1/8) where
// index_2 goes from 0 to 7
-pow_tbl2:
-ASM_TYPE_DIRECTIVE(pow_tbl2,@object)
+LOCAL_OBJECT_START(pow_tbl2)
data8 0x8000000000000000 , 0x00003FFF
data8 0x8B95C1E3EA8BD6E7 , 0x00003FFF
data8 0x9837F0518DB8A96F , 0x00003FFF
@@ -888,402 +906,319 @@ data8 0xB504F333F9DE6484 , 0x00003FFF
data8 0xC5672A115506DADD , 0x00003FFF
data8 0xD744FCCAD69D6AF4 , 0x00003FFF
data8 0xEAC0C6E7DD24392F , 0x00003FFF
-ASM_SIZE_DIRECTIVE(pow_tbl2)
-
-.global pow
+LOCAL_OBJECT_END(pow_tbl2)
.section .text
-.proc pow
-.align 32
-
-pow:
+GLOBAL_LIBM_ENTRY(pow)
+// Get exponent of x. Will be used to calculate K.
{ .mfi
- alloc r32=ar.pfs,1,35,4,0
- fms.s1 POW_Xm1 = f8,f1,f1 // Will be used for r1 if x>0
- mov pow_GR_17ones = 0x1FFFF
+ getf.exp pow_GR_signexp_X = f8
+ fms.s1 POW_Xm1 = f8,f1,f1 // Will be used for r1 if x>0
+ mov pow_GR_17ones = 0x1FFFF
}
{ .mfi
-(p0) addl pow_AD_P = @ltoff(pow_table_P), gp
- fma.s1 POW_Xp1 = f8,f1,f1 // Will be used for r1 if x<0
+ addl pow_AD_P = @ltoff(pow_table_P), gp
+ fma.s1 POW_Xp1 = f8,f1,f1 // Will be used for r1 if x<0
nop.i 999
;;
}
-
-// Get exponent of x. Will be used to calculate K.
+// Get significand of x. Will be used to get index to fetch T, Tt.
{ .mfi
- getf.exp pow_GR_signexp_X = f8
- frcpa.s1 POW_B, p6 = f1,f8
+ getf.sig pow_GR_sig_X = f8
+ frcpa.s1 POW_B, p6 = f1,f8
nop.i 999
}
{ .mfi
ld8 pow_AD_P = [pow_AD_P]
- fma.s1 POW_NORM_X = f8,f1,f0
- mov pow_GR_FFF7 = 0xFFF7
+ fma.s1 POW_NORM_X = f8,f1,f0
+ mov pow_GR_exp_2tom8 = 0xFFF7
}
;;
-
-
-// Get significand of x. Will be used to get index to fetch T, Tt.
// p13 = TRUE ==> X is unorm
// DOUBLE 0x10033 exponent limit at which y is an integer
-// SINGLE 0x10016
{ .mfi
- getf.sig pow_GR_sig_X = f8
- fclass.m p13,p0 = f8, 0x0b // Test for x unorm
- addl pow_GR_10033 = 0x10033, r0
+ nop.m 999
+ fclass.m p13,p0 = f8, 0x0b // Test for x unorm
+ addl pow_GR_10033 = 0x10033, r0
}
{ .mfi
mov pow_GR_16ones = 0xFFFF
- fma.s1 POW_NORM_Y = f9,f1,f0
+ fma.s1 POW_NORM_Y = f9,f1,f0
nop.i 999
}
;;
-
// p14 = TRUE ==> X is ZERO
{ .mfi
adds pow_AD_Tt = pow_Tt - pow_table_P, pow_AD_P
- fclass.m p14,p15 = f8, 0x07
- and pow_GR_exp_X = pow_GR_signexp_X, pow_GR_17ones
+ fclass.m p14,p0 = f8, 0x07
+ and pow_GR_exp_X = pow_GR_signexp_X, pow_GR_17ones
}
{ .mfi
- adds pow_AD_Q = pow_table_Q - pow_table_P, pow_AD_P
+ adds pow_AD_Q = pow_table_Q - pow_table_P, pow_AD_P
nop.f 999
nop.i 999
}
;;
{ .mfi
- ldfe POW_P5 = [pow_AD_P], 16
- fcmp.lt.s1 p8,p9 = f8, f0 // Test for x<0
- shl pow_GR_offset = pow_GR_sig_X, 1
+ ldfe POW_P5 = [pow_AD_P], 16
+ fcmp.lt.s1 p8,p9 = f8, f0 // Test for x<0
+ nop.i 999
}
{ .mib
- ldfe POW_P4 = [pow_AD_Q], 16
- sub pow_GR_true_exp_X = pow_GR_exp_X, pow_GR_16ones
-(p13) br.cond.spnt L(POW_X_DENORM)
+ ldfe POW_P4 = [pow_AD_Q], 16
+ sub pow_GR_true_exp_X = pow_GR_exp_X, pow_GR_16ones
+(p13) br.cond.spnt POW_X_DENORM
}
;;
-
// Continue normal and denormal paths here
-L(POW_COMMON):
+POW_COMMON:
// p11 = TRUE ==> Y is a NAN
{ .mfi
- ldfe POW_P3 = [pow_AD_P], 16
- fclass.m.unc p11,p0 = f9, 0xc3
- shr.u pow_GR_offset = pow_GR_offset,56
+ ldfe POW_P3 = [pow_AD_P], 16
+ fclass.m p11,p0 = f9, 0xc3
+ nop.i 999
}
{ .mfi
- ldfe POW_P2 = [pow_AD_Q], 16
+ ldfe POW_P2 = [pow_AD_Q], 16
nop.f 999
- nop.i 999
+ mov pow_GR_y_zero = 0
}
;;
-
-
-// Compute xsq to decide later if |x|=1
-// p11 = TRUE ==> Y is a NaN
+// Note POW_Xm1 and POW_r1 are used interchangably
{ .mfi
- setf.sig POW_int_K = pow_GR_true_exp_X
-(p15) fms.s1 POW_r = POW_B, POW_NORM_X,f1
- shladd pow_AD_Tt = pow_GR_offset, 4, pow_AD_Tt
+ alloc r32=ar.pfs,2,19,4,0
+ fms.s1 POW_r = POW_B, POW_NORM_X,f1
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p8) fnma.s1 POW_Xm1 = POW_Xp1,f1,f0
+ setf.sig POW_int_K = pow_GR_true_exp_X
+(p8) fnma.s1 POW_Xm1 = POW_Xp1,f1,f0
nop.i 999
}
;;
-
-
-// p12 = TRUE ==> X is ZERO and Y is ZERO
+// p12 = TRUE if Y is ZERO
+// Compute xsq to decide later if |x|=1
{ .mfi
- ldfe POW_P1 = [pow_AD_P], 16
-(p14) fclass.m.unc p12,p0 = f9, 0x07
- nop.i 999
+ ldfe POW_P1 = [pow_AD_P], 16
+ fclass.m p12,p0 = f9, 0x07
+ shl pow_GR_offset = pow_GR_sig_X, 1
}
{ .mfb
- ldfe POW_P0 = [pow_AD_Q], 16
+ ldfe POW_P0 = [pow_AD_Q], 16
fma.s1 POW_xsq = POW_NORM_X, POW_NORM_X, f0
-(p11) br.cond.spnt L(POW_Y_NAN)
+(p11) br.cond.spnt POW_Y_NAN // Branch if y=nan
}
;;
-
-.pred.rel "mutex",p8,p9
// Get exponent of |x|-1 to use in comparison to 2^-8
-{ .mmf
-(p8) getf.exp pow_GR_signexp_Xm1 = POW_Xp1
-(p9) getf.exp pow_GR_signexp_Xm1 = POW_Xm1
- fcvt.fx.s1 POW_int_Y = POW_NORM_Y
+{ .mfi
+ getf.exp pow_GR_signexp_Xm1 = POW_Xm1
+ fcvt.fx.s1 POW_int_Y = POW_NORM_Y
+ shr.u pow_GR_offset = pow_GR_offset,56
}
;;
-
// p11 = TRUE ==> X is a NAN
{ .mfi
ldfpd POW_log2_hi, POW_log2_lo = [pow_AD_Q], 16
- fclass.m.unc p11,p0 = f8, 0xc3
- nop.i 999
-}
-{ .mib
- ldfpd POW_T, POW_Tt = [pow_AD_Tt], 16
- nop.i 999
-(p12) br.cond.spnt L(POW_X_0_Y_0)
+ fclass.m p11,p0 = f8, 0xc3
+ shladd pow_AD_Tt = pow_GR_offset, 4, pow_AD_Tt
}
-;;
-
-
-// p14 = TRUE ==> X is zero
-// p15 = TRUE ==> X is zero AND Y is negative
-// p10 = TRUE ==> X is zero AND Y is >= zero
{ .mfi
ldfe POW_inv_log2_by_128 = [pow_AD_P], 16
-(p14) fcmp.lt.unc.s1 p15, p10 = f9,f0
- nop.i 999
+ fma.s1 POW_delta = f0,f0,f0 // delta=0 in case |x| near 1
+(p12) mov pow_GR_y_zero = 1
}
-{ .mfi
- nop.m 999
- nop.f 999
- and pow_GR_exp_Xm1 = pow_GR_signexp_Xm1, pow_GR_17ones
-}
-;;
-
-
-// Determine if we will use the |x| near 1 path (p6) or normal path (p7)
-// p12 = TRUE ==> X is a NAN and Y is a zero
-// p13 = TRUE ==> X is a NAN and Y is anything else
-{ .mfi
- getf.exp pow_GR_signexp_Y = POW_NORM_Y
-(p11) fclass.m.unc p12,p13 = f9, 0x07
- cmp.lt.unc p6,p7 = pow_GR_exp_Xm1, pow_GR_FFF7
-}
-{ .mfi
- ldfpd POW_Q2, POW_Q3 = [pow_AD_P], 16
- fma.s1 POW_rsq = POW_r, POW_r,f0
- nop.i 999
;;
-}
-// If on the x near 1 path, assign r1 to r and r1*r1 to rsq
{ .mfi
- ldfpd POW_Q0_half, POW_Q1 = [pow_AD_P], 16
-(p6) fma.s1 POW_r = POW_r1, f1, f0
- nop.i 999
+ ldfpd POW_Q2, POW_Q3 = [pow_AD_P], 16
+ fma.s1 POW_G = f0,f0,f0 // G=0 in case |x| near 1
+ and pow_GR_exp_Xm1 = pow_GR_signexp_Xm1, pow_GR_17ones
}
-{ .mfi
- nop.m 999
-(p6) fma.s1 POW_rsq = POW_r1, POW_r1, f0
- nop.i 999
;;
-}
-
+// Determine if we will use the |x| near 1 path (p6) or normal path (p7)
{ .mfi
- ldfpd POW_Q4, POW_RSHF = [pow_AD_P], 16
-(p7) fma.s1 POW_v6 = POW_r, POW_P5, POW_P4
- and pow_GR_exp_Y = pow_GR_signexp_Y, pow_GR_17ones
+ getf.exp pow_GR_signexp_Y = POW_NORM_Y
+ nop.f 999
+ cmp.lt p6,p7 = pow_GR_exp_Xm1, pow_GR_exp_2tom8
}
{ .mfb
- nop.m 999
-(p6) fma.s1 POW_v6 = POW_r1, POW_P5, POW_P4
-(p12) br.cond.spnt L(POW_X_NAN_Y_0)
+ ldfpd POW_T, POW_Tt = [pow_AD_Tt], 16
+ fma.s1 POW_rsq = POW_r, POW_r,f0
+(p11) br.cond.spnt POW_X_NAN // Branch if x=nan and y not nan
}
;;
-
+// If on the x near 1 path, assign r1 to r and r1*r1 to rsq
{ .mfi
- nop.m 999
-(p7) fma.s1 POW_v4 = POW_P3, POW_r, POW_P2
- andcm pow_GR_sign_Y = pow_GR_signexp_Y, pow_GR_17ones
+ ldfpd POW_Q0_half, POW_Q1 = [pow_AD_P], 16
+(p6) fma.s1 POW_r = POW_r1, f1, f0
+ nop.i 999
}
{ .mfb
nop.m 999
-(p6) fma.s1 POW_v4 = POW_P3, POW_r1, POW_P2
-(p12) br.cond.spnt L(POW_X_NAN_Y_0)
+(p6) fma.s1 POW_rsq = POW_r1, POW_r1, f0
+(p14) br.cond.spnt POW_X_0 // Branch if x zero and y not nan
}
;;
{ .mfi
- nop.m 999
- fcvt.xf POW_K = POW_int_K
+ ldfpd POW_Q4, POW_RSHF = [pow_AD_P], 16
+(p7) fma.s1 POW_v6 = POW_r, POW_P5, POW_P4
nop.i 999
}
-{ .mfb
- nop.m 999
-(p13) fma.d f8 = f8,f1,f0
-(p13) br.ret.spnt b0 // Exit if x nan, y anything but zero
+{ .mfi
+ mov pow_GR_exp_2toM63 = 0xffc0 // Exponent of 2^-63
+(p6) fma.s1 POW_v6 = POW_r1, POW_P5, POW_P4
+ nop.i 999
}
;;
-
-// p10 = TRUE ==> X is zero AND Y is positive
-// p8 = TRUE ==> X is zero AND Y is outside integer range (treat as even int)
-// return +0
-// p9 = TRUE ==> X is zero AND Y is within integer range (may not be integer)
+
{ .mfi
-(p10) cmp.gt.unc p8,p9 = pow_GR_exp_Y, pow_GR_10033
-(p6) fmerge.s POW_delta = f0,f0
+ setf.exp POW_2toM63 = pow_GR_exp_2toM63 // Form 2^-63 for test of q
+(p7) fma.s1 POW_v4 = POW_P3, POW_r, POW_P2
nop.i 999
}
{ .mfi
nop.m 999
-(p6) fma.s1 POW_G = f0,f0,f0
+(p6) fma.s1 POW_v4 = POW_P3, POW_r1, POW_P2
nop.i 999
}
;;
{ .mfi
- getf.sig pow_GR_sig_int_Y = POW_int_Y
- fnma.s1 POW_twoV = POW_NORM_Y, POW_rsq,f0
- nop.i 999
-}
-{ .mfi
nop.m 999
- fma.s1 POW_U = POW_NORM_Y,POW_r,f0
+ fcvt.xf POW_K = POW_int_K
nop.i 999
}
;;
{ .mfi
- ldfe POW_log2_by_128_lo = [pow_AD_P], 16
-(p6) fma.s1 POW_v2 = POW_P1, POW_r1, POW_P0
- nop.i 999
+ getf.sig pow_GR_sig_int_Y = POW_int_Y
+ fnma.s1 POW_twoV = POW_NORM_Y, POW_rsq,f0
+ and pow_GR_exp_Y = pow_GR_signexp_Y, pow_GR_17ones
}
-{ .mfi
- ldfe POW_log2_by_128_hi = [pow_AD_Q], 16
-(p7) fma.s1 POW_v2 = POW_P1, POW_r, POW_P0
- nop.i 999
+{ .mfb
+ andcm pow_GR_sign_Y = pow_GR_signexp_Y, pow_GR_17ones
+ fma.s1 POW_U = POW_NORM_Y,POW_r,f0
+(p12) br.cond.spnt POW_Y_0 // Branch if y=zero, x not zero or nan
}
;;
-
+// p11 = TRUE ==> X is NEGATIVE but not inf
{ .mfi
- nop.m 999
- fcvt.xf POW_float_int_Y = POW_int_Y
+ ldfe POW_log2_by_128_lo = [pow_AD_P], 16
+ fclass.m p11,p0 = f8, 0x1a
nop.i 999
}
{ .mfi
- nop.m 999
- fma.s1 POW_v3 = POW_v6, POW_rsq, POW_v4
- adds pow_AD_tbl1 = pow_tbl1 - pow_Tt, pow_AD_Q
+ ldfe POW_log2_by_128_hi = [pow_AD_Q], 16
+ fma.s1 POW_v2 = POW_P1, POW_r, POW_P0
+ nop.i 999
}
;;
{ .mfi
nop.m 999
-(p7) fma.s1 POW_delta = POW_K, POW_log2_lo, POW_Tt
+ fcvt.xf POW_float_int_Y = POW_int_Y
nop.i 999
}
{ .mfi
nop.m 999
-(p7) fma.s1 POW_G = POW_K, POW_log2_hi, POW_T
- adds pow_AD_tbl2 = pow_tbl2 - pow_tbl1, pow_AD_tbl1
+ fma.s1 POW_v3 = POW_v6, POW_rsq, POW_v4
+ adds pow_AD_tbl1 = pow_tbl1 - pow_Tt, pow_AD_Q
}
;;
-
{ .mfi
nop.m 999
- fms.s1 POW_e2 = POW_NORM_Y, POW_r, POW_U
+(p7) fma.s1 POW_delta = POW_K, POW_log2_lo, POW_Tt
nop.i 999
}
{ .mfi
nop.m 999
- fma.s1 POW_Z2 = POW_twoV, POW_Q0_half, POW_U
- nop.i 999
+(p7) fma.s1 POW_G = POW_K, POW_log2_hi, POW_T
+ adds pow_AD_tbl2 = pow_tbl2 - pow_tbl1, pow_AD_tbl1
}
;;
-// p11 = TRUE ==> X is NEGATIVE
-// p8 = TRUE ==> X is zero AND Y is outside intger range (treat as even int)
-// return +0
{ .mfi
nop.m 999
- fclass.m.unc p11,p0 = f8, 0x1a
+ fms.s1 POW_e2 = POW_NORM_Y, POW_r, POW_U
nop.i 999
}
-{ .mfb
+{ .mfi
nop.m 999
-(p8) fma.d f8 = f0,f0,f0
-(p8) br.ret.spnt b0
+ fma.s1 POW_Z2 = POW_twoV, POW_Q0_half, POW_U
+ nop.i 999
}
;;
-{ .mfi
+{ .mfi
nop.m 999
- fma.s1 POW_Yrcub = POW_rsq, POW_U, f0
+ fma.s1 POW_Yrcub = POW_rsq, POW_U, f0
nop.i 999
}
-{ .mfi
+{ .mfi
nop.m 999
- fma.s1 POW_p = POW_rsq, POW_v3, POW_v2
+ fma.s1 POW_p = POW_rsq, POW_v3, POW_v2
nop.i 999
}
;;
-
-// p11 = TRUE ==> X is NEGATIVE
-// p12 = TRUE ==> X is NEGATIVE AND Y already int
+// p11 = TRUE ==> X is NEGATIVE but not inf
+// p12 = TRUE ==> X is NEGATIVE AND Y already even int
// p13 = TRUE ==> X is NEGATIVE AND Y possible int
{ .mfi
nop.m 999
- fma.s1 POW_Z1 = POW_NORM_Y, POW_G, f0
-(p11) cmp.ge.unc p12,p13 = pow_GR_exp_Y, pow_GR_10033
+ fma.s1 POW_Z1 = POW_NORM_Y, POW_G, f0
+(p11) cmp.gt.unc p12,p13 = pow_GR_exp_Y, pow_GR_10033
}
{ .mfi
nop.m 999
- fma.s1 POW_e3 = POW_NORM_Y, POW_delta, f0
+ fma.s1 POW_Gpr = POW_G, f1, POW_r
nop.i 999
}
;;
-// p9 = TRUE ==> X is zero AND Y is within integer range (may not be integer)
-// p6 = TRUE ==> X is zero AND Y is an integer (may be even or odd)
-// p7 = TRUE ==> X is zero AND Y is NOT an integer, return +0
+// By adding RSHF (1.1000...*2^63) we put integer part in rightmost significand
{ .mfi
nop.m 999
-(p9) fcmp.eq.unc.s1 p6,p7 = POW_float_int_Y, POW_NORM_Y
+ fma.s1 POW_W2 = POW_Z2, POW_inv_log2_by_128, POW_RSHF
nop.i 999
}
-{ .mfi
+{ .mfi
nop.m 999
- fma.s1 POW_Gpr = POW_G, f1, POW_r
+ fms.s1 POW_UmZ2 = POW_U, f1, POW_Z2
nop.i 999
}
;;
-// By adding RSHF (1.1000...*2^63) we put integer part in rightmost significand
{ .mfi
nop.m 999
- fma.s1 POW_W2 = POW_Z2, POW_inv_log2_by_128, POW_RSHF
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fms.s1 POW_UmZ2 = POW_U, f1, POW_Z2
+ fma.s1 POW_e3 = POW_NORM_Y, POW_delta, f0
nop.i 999
}
;;
-
-// If x=0 and y>0, test y and flag denormal
-// p6 = TRUE ==> X is zero AND Y is an integer (may be even or odd)
-// p8 = TRUE ==> X is zero AND Y is an odd integer
-// p9 = TRUE ==> X is zero AND Y is an even integer
{ .mfi
nop.m 999
-(p10) fcmp.eq.s0 p15,p0 = f9,f0
-(p6) tbit.nz.unc p8,p9 = pow_GR_sig_int_Y,0
+ fma.s1 POW_Z3 = POW_p, POW_Yrcub, f0
+ nop.i 999
}
{ .mfi
nop.m 999
- fma.s1 POW_Z3 = POW_p, POW_Yrcub, f0
+ fma.s1 POW_GY_Z2 = POW_G, POW_NORM_Y, POW_Z2
nop.i 999
}
;;
@@ -1291,7 +1226,7 @@ L(POW_COMMON):
// By adding RSHF (1.1000...*2^63) we put integer part in rightmost significand
{ .mfi
nop.m 999
- fms.s1 POW_e1 = POW_NORM_Y, POW_G, POW_Z1
+ fms.s1 POW_e1 = POW_NORM_Y, POW_G, POW_Z1
nop.i 999
}
{ .mfi
@@ -1301,81 +1236,60 @@ L(POW_COMMON):
}
;;
+// p13 = TRUE ==> X is NEGATIVE AND Y possible int
+// p10 = TRUE ==> X is NEG and Y is an int
+// p12 = TRUE ==> X is NEG and Y is not an int
{ .mfi
nop.m 999
-(p7) fma.d f8 = f0,f0,f0 // Result +0 if x zero and y not integer
- nop.i 999
+(p13) fcmp.eq.unc.s1 p10,p12 = POW_float_int_Y, POW_NORM_Y
+ mov pow_GR_xneg_yodd = 0
}
-{ .mfb
+{ .mfi
nop.m 999
- fma.s1 POW_Y_Gpr = POW_NORM_Y, POW_Gpr, f0
-(p8) br.ret.spnt b0 // Exit if x zero and y odd integer
+ fma.s1 POW_Y_Gpr = POW_NORM_Y, POW_Gpr, f0
+ nop.i 999
}
;;
// By subtracting RSHF we get rounded integer POW_N2float
-// p15 = TRUE ==> X_0_Y_NEG
{ .mfi
nop.m 999
fms.s1 POW_N2float = POW_W2, f1, POW_RSHF
nop.i 999
}
-{ .mfb
+{ .mfi
nop.m 999
- fma.s1 POW_UmZ2pV = POW_twoV,POW_Q0_half,POW_UmZ2
-(p15) br.cond.spnt L(POW_X_0_Y_NEG)
+ fma.s1 POW_UmZ2pV = POW_twoV,POW_Q0_half,POW_UmZ2
+ nop.i 999
}
;;
-
-
{ .mfi
nop.m 999
- fma.s1 POW_Z3sq = POW_Z3, POW_Z3, f0
+ fma.s1 POW_Z3sq = POW_Z3, POW_Z3, f0
nop.i 999
}
-{ .mfb
+{ .mfi
nop.m 999
- fma.s1 POW_v4 = POW_Z3, POW_Q3, POW_Q2
-(p7) br.ret.spnt b0 // Exit if x zero and y not an integer
+ fma.s1 POW_v4 = POW_Z3, POW_Q3, POW_Q2
+ nop.i 999
}
;;
-
-
// Extract rounded integer from rightmost significand of POW_W2
// By subtracting RSHF we get rounded integer POW_N1float
{ .mfi
- getf.sig pow_GR_int_W2 = POW_W2
+ getf.sig pow_GR_int_W2 = POW_W2
fms.s1 POW_N1float = POW_W1, f1, POW_RSHF
nop.i 999
}
{ .mfi
nop.m 999
- fma.s1 POW_v2 = POW_Z3, POW_Q1, POW_Q0_half
+ fma.s1 POW_v2 = POW_Z3, POW_Q1, POW_Q0_half
nop.i 999
}
;;
-
-
-
-// p13 = TRUE ==> X is NEGATIVE AND Y possible int
-// p10 = TRUE ==> X is NEG and Y is an int
-// p12 = TRUE ==> X is NEG and Y is not an int
-{ .mfi
- nop.m 999
-(p13) fcmp.eq.unc.s1 p10,p12 = POW_float_int_Y, POW_NORM_Y
- nop.i 999
-}
-{ .mfb
- nop.m 999
-(p9) fma.d f8 = f0,f0,f0 // Result +0 if x zero and y even integer
-(p9) br.ret.spnt b0 // Exit if x zero and y even integer
-}
-;;
-
-
{ .mfi
nop.m 999
fnma.s1 POW_s2 = POW_N2float, POW_log2_by_128_hi, POW_Z2
@@ -1383,7 +1297,7 @@ L(POW_COMMON):
}
{ .mfi
nop.m 999
- fma.s1 POW_e2 = POW_e2,f1,POW_UmZ2pV
+ fma.s1 POW_e2 = POW_e2,f1,POW_UmZ2pV
nop.i 999
}
;;
@@ -1391,278 +1305,283 @@ L(POW_COMMON):
// Extract rounded integer from rightmost significand of POW_W1
// Test if x inf
{ .mfi
- getf.sig pow_GR_int_W1 = POW_W1
- fclass.m.unc p15,p0 = POW_NORM_X, 0x23
+ getf.sig pow_GR_int_W1 = POW_W1
+ fclass.m p15,p0 = POW_NORM_X, 0x23
nop.i 999
}
{ .mfb
nop.m 999
fnma.s1 POW_f2 = POW_N2float, POW_log2_by_128_lo, f1
-(p12) br.cond.spnt L(POW_X_NEG_Y_NONINT) // Branch if x neg, y not integer
+(p12) br.cond.spnt POW_X_NEG_Y_NONINT // Branch if x neg, y not integer
}
;;
+// p11 = TRUE ==> X is +1.0
// p12 = TRUE ==> X is NEGATIVE AND Y is an odd integer
{ .mfi
- getf.exp pow_GR_signexp_Y_Gpr = POW_Y_Gpr
- fma.s1 POW_v3 = POW_Z3sq, POW_Q4, POW_v4
-(p10) tbit.nz.unc p12,p0 = pow_GR_sig_int_Y,0
+ getf.exp pow_GR_signexp_Y_Gpr = POW_Y_Gpr
+ fcmp.eq.s1 p11,p0 = POW_NORM_X, f1
+(p10) tbit.nz.unc p12,p0 = pow_GR_sig_int_Y,0
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_v3 = POW_Z3sq, POW_Q4, POW_v4
+ nop.i 999
}
;;
-
{ .mfi
- add pow_GR_int_N = pow_GR_int_W1, pow_GR_int_W2
+ nop.m 999
fnma.s1 POW_f1 = POW_N1float, POW_log2_by_128_lo, f1
nop.i 999
}
{ .mfb
nop.m 999
fnma.s1 POW_s1 = POW_N1float, POW_log2_by_128_hi, POW_Z1
-(p15) br.cond.spnt L(POW_X_INF)
+(p15) br.cond.spnt POW_X_INF
}
;;
-
// Test x and y and flag denormal
{ .mfi
- and pow_GR_index1 = 0x0f, pow_GR_int_N
+ nop.m 999
fcmp.eq.s0 p15,p0 = f8,f9
- shr r2 = pow_GR_int_N, 7
+ nop.i 999
}
{ .mfi
- and pow_GR_exp_Y_Gpr = pow_GR_signexp_Y_Gpr, pow_GR_17ones
- nop.f 999
- and pow_GR_index2 = 0x70, pow_GR_int_N
+ nop.m 999
+ fma.s1 POW_pYrcub_e3 = POW_p, POW_Yrcub, POW_e3
+ nop.i 999
}
;;
-
-
{ .mfi
- shladd pow_AD_T1 = pow_GR_index1, 4, pow_AD_tbl1
+ nop.m 999
fcmp.eq.s1 p7,p0 = POW_NORM_Y, f1 // Test for y=1.0
- sub pow_GR_true_exp_Y_Gpr = pow_GR_exp_Y_Gpr, pow_GR_16ones
+ nop.i 999
}
{ .mfi
- addl pow_int_GR_M = 0xFFFF, r2
- fma.s1 POW_e12 = POW_e1,f1,POW_e2
- add pow_AD_T2 = pow_AD_tbl2, pow_GR_index2
+ nop.m 999
+ fma.s1 POW_e12 = POW_e1,f1,POW_e2
+ nop.i 999
}
;;
-
-{ .mmi
- ldfe POW_T1 = [pow_AD_T1],16
- setf.exp POW_2M = pow_int_GR_M
- andcm pow_GR_sign_Y_Gpr = pow_GR_signexp_Y_Gpr, pow_GR_17ones
+{ .mfi
+ add pow_GR_int_N = pow_GR_int_W1, pow_GR_int_W2
+(p11) fma.d.s0 f8 = f1,f1,f0 // If x=1, result is +1
+ nop.i 999
+}
+{ .mib
+(p12) mov pow_GR_xneg_yodd = 1
+ nop.i 999
+(p11) br.ret.spnt b0 // Early exit if x=1.0, result is +1
}
;;
-
-{ .mfb
- ldfe POW_T2 = [pow_AD_T2],16
- fma.s1 POW_q = POW_Z3sq, POW_v3, POW_v2
+{ .mfi
+ and pow_GR_index1 = 0x0f, pow_GR_int_N
+ fma.s1 POW_q = POW_Z3sq, POW_v3, POW_v2
+ shr pow_int_GR_M = pow_GR_int_N, 7 // M = N/128
+}
+{ .mib
+ and pow_GR_index2 = 0x70, pow_GR_int_N
+ cmp.eq p6, p0 = pow_GR_xneg_yodd, r0
(p7) br.ret.spnt b0 // Early exit if y=1.0, result is x
}
;;
-
-// double: p8 TRUE ==> |Y(G + r)| >= 10
-// single: p8 TRUE ==> |Y(G + r)| >= 7
-
-// double
-// -2^10 -2^9 2^9 2^10
-// -----+-----+----+ ... +-----+-----+-----
-// p8 | p9 | p8
-// | | p10 | |
-// single
-// -2^7 -2^6 2^6 2^7
-// -----+-----+----+ ... +-----+-----+-----
-// p8 | p9 | p8
-// | | p10 | |
-
-
{ .mfi
-(p0) cmp.le.unc p8,p9 = 10, pow_GR_true_exp_Y_Gpr
- fma.s1 POW_s = POW_s1, f1, POW_s2
- nop.i 999
+ shladd pow_AD_T1 = pow_GR_index1, 4, pow_AD_tbl1
+ fma.s1 POW_s = POW_s1, f1, POW_s2
+ add pow_int_GR_M = pow_GR_16ones, pow_int_GR_M
}
{ .mfi
- nop.m 999
- fma.s1 POW_f12 = POW_f1, POW_f2,f0
- nop.i 999
+ add pow_AD_T2 = pow_AD_tbl2, pow_GR_index2
+ fma.s1 POW_f12 = POW_f1, POW_f2,f0
+ and pow_GR_exp_Y_Gpr = pow_GR_signexp_Y_Gpr, pow_GR_17ones
}
;;
-
-{ .mfi
- nop.f 999
-(p9) cmp.le.unc p0,p10 = 9, pow_GR_true_exp_Y_Gpr
+{ .mmi
+ ldfe POW_T1 = [pow_AD_T1]
+ ldfe POW_T2 = [pow_AD_T2]
+ sub pow_GR_true_exp_Y_Gpr = pow_GR_exp_Y_Gpr, pow_GR_16ones
}
;;
-
-
+{ .mfi
+ setf.exp POW_2M = pow_int_GR_M
+ fma.s1 POW_e123 = POW_e12, f1, POW_e3
+ nop.i 999
+}
{ .mfb
- nop.m 999
- fma.s1 POW_e123 = POW_e12, f1, POW_e3
-(p8) br.cond.spnt L(POW_OVER_UNDER_X_NOT_INF)
+(p6) cmp.gt p6, p0 = -11, pow_GR_true_exp_Y_Gpr
+ fma.s1 POW_d = POW_GY_Z2, f1, POW_pYrcub_e3
+(p6) br.cond.spnt POW_NEAR_ONE // branch if |y*log(x)| < 2^(-11)
}
;;
-
-{ .mmf
- fma.s1 POW_q = POW_Z3sq, POW_q, POW_Z3
+{ .mfi
+ nop.m 999
+ fma.s1 POW_q = POW_Z3sq, POW_q, POW_Z3
+ nop.i 999
}
;;
+// p8 TRUE ==> |Y(G + r)| >= 10
+// double
+// -2^10 -2^9 2^9 2^10
+// -----+-----+----+ ... +-----+-----+-----
+// p8 | p9 | p8
+// | | p10 | |
+
+// Form signexp of constants to indicate overflow
{ .mfi
- nop.m 999
- fma.s1 POW_ssq = POW_s, POW_s, f0
- nop.i 999
+ mov pow_GR_big_pos = 0x103ff
+ fma.s1 POW_ssq = POW_s, POW_s, f0
+ cmp.le p8,p9 = 10, pow_GR_true_exp_Y_Gpr
}
{ .mfi
- nop.m 999
- fma.s1 POW_v4 = POW_s, POW_Q3, POW_Q2
- nop.i 999
+ mov pow_GR_big_neg = 0x303ff
+ fma.s1 POW_v4 = POW_s, POW_Q3, POW_Q2
+ andcm pow_GR_sign_Y_Gpr = pow_GR_signexp_Y_Gpr, pow_GR_17ones
}
;;
+// Form big positive and negative constants to test for possible overflow
{ .mfi
- nop.m 999
- fma.s1 POW_v2 = POW_s, POW_Q1, POW_Q0_half
- nop.i 999
+ setf.exp POW_big_pos = pow_GR_big_pos
+ fma.s1 POW_v2 = POW_s, POW_Q1, POW_Q0_half
+(p9) cmp.le.unc p0,p10 = 9, pow_GR_true_exp_Y_Gpr
}
-{ .mfi
- nop.m 999
- fma.s1 POW_1ps = f1,f1,POW_s
- nop.i 999
+{ .mfb
+ setf.exp POW_big_neg = pow_GR_big_neg
+ fma.s1 POW_1ps = f1,f1,POW_s
+(p8) br.cond.spnt POW_OVER_UNDER_X_NOT_INF
}
;;
+// f123 = f12*(e123+1) = f12*e123+f12
{ .mfi
nop.m 999
- fma.s1 POW_f3 = POW_e123,f1,f1
+ fma.s1 POW_f123 = POW_e123,POW_f12,POW_f12
nop.i 999
}
;;
{ .mfi
nop.m 999
- fma.s1 POW_T1T2 = POW_T1, POW_T2, f0
+ fma.s1 POW_T1T2 = POW_T1, POW_T2, f0
nop.i 999
}
-;;
-
{ .mfi
nop.m 999
- fma.s1 POW_v3 = POW_ssq, POW_Q4, POW_v4
- nop.i 999
+ fma.s1 POW_v3 = POW_ssq, POW_Q4, POW_v4
+ cmp.ne p12,p13 = pow_GR_xneg_yodd, r0
}
;;
{ .mfi
nop.m 999
- fma.s1 POW_v21ps = POW_ssq, POW_v2, POW_1ps
+ fma.s1 POW_v21ps = POW_ssq, POW_v2, POW_1ps
nop.i 999
}
{ .mfi
nop.m 999
- fma.s1 POW_s4 = POW_ssq, POW_ssq, f0
+ fma.s1 POW_s4 = POW_ssq, POW_ssq, f0
nop.i 999
}
;;
{ .mfi
nop.m 999
- fma.s1 POW_f123 = POW_f12, POW_f3, f0
+(p12) fnma.s1 POW_A = POW_2M, POW_f123, f0
nop.i 999
}
+{ .mfi
+ nop.m 999
+(p13) fma.s1 POW_A = POW_2M, POW_f123, f0
+ cmp.eq p14,p11 = r0,r0 // Initialize p14 on, p11 off
+}
;;
{ .mfi
nop.m 999
- fma.s1 POW_A = POW_2M, POW_T1T2, f0
+ fmerge.s POW_abs_q = f0, POW_q // Form |q| so can test its size
nop.i 999
}
;;
-
-
{ .mfi
- nop.m 999
-(p12) fmerge.s POW_f123 = f8,POW_f123 // if x neg, y odd int
+(p10) cmp.eq p0,p14 = r0,r0 // Turn off p14 if no overflow
+ fma.s1 POW_es = POW_s4, POW_v3, POW_v21ps
nop.i 999
}
{ .mfi
nop.m 999
-// fma.s1 POW_es = POW_ssq, POW_v3, POW_v2
+ fma.s1 POW_A = POW_A, POW_T1T2, f0
nop.i 999
}
;;
{ .mfi
+// Test for |q| < 2^-63. If so then reverse last two steps of the result
+// to avoid monotonicity problems for results near 1.0 in round up/down/zero.
+// p11 will be set if need to reverse the order, p14 if not.
nop.m 999
- fma.s1 POW_es = POW_s4, POW_v3, POW_v21ps
+(p10) fcmp.lt.s0 p11,p14 = POW_abs_q, POW_2toM63 // Test |q| <2^-63
nop.i 999
}
;;
-
+.pred.rel "mutex",p11,p14
{ .mfi
nop.m 999
- fma.s1 POW_A = POW_A, POW_f123, f0
+(p14) fma.s1 POW_A = POW_A, POW_es, f0
nop.i 999
}
{ .mfi
nop.m 999
-// fma.s1 POW_es = POW_es, POW_ssq, POW_1ps
+(p11) fma.s1 POW_A = POW_A, POW_q, POW_A
nop.i 999
}
;;
-
+// Dummy op to set inexact if |q| < 2^-63
{ .mfi
nop.m 999
- fma.s1 POW_A = POW_A, POW_es,f0
+(p11) fma.d.s0 POW_tmp = POW_A, POW_q, POW_A
nop.i 999
}
;;
-
-
+{ .mfi
+ nop.m 999
+(p14) fma.d.s0 f8 = POW_A, POW_q, POW_A
+ nop.i 999
+}
{ .mfb
nop.m 999
-(p10) fma.d f8 = POW_A, POW_q, POW_A
-(p10) br.ret.sptk b0
+(p11) fma.d.s0 f8 = POW_A, POW_es, f0
+(p10) br.ret.sptk b0 // Exit main branch if no over/underflow
}
;;
-
-
-
-
// POSSIBLE_OVER_UNDER
-// p6 = TRUE ==> Y negative
+// p6 = TRUE ==> Y_Gpr negative
+// Result is already computed. We just need to know if over/underflow occurred.
-{ .mfi
- nop.m 999
- fmerge.s POW_abs_A = f0, POW_A
- cmp.eq.unc p0,p6 = pow_GR_sign_Y, r0
-}
-;;
-
-{ .mib
- nop.m 999
- nop.i 999
-(p6) br.cond.spnt L(POW_POSSIBLE_UNDER)
+{ .mfb
+ cmp.eq p0,p6 = pow_GR_sign_Y_Gpr, r0
+ nop.f 999
+(p6) br.cond.spnt POW_POSSIBLE_UNDER
}
;;
// POSSIBLE_OVER
-// We got an answer.
+// We got an answer.
// overflow is a possibility, not a certainty
@@ -1692,21 +1611,20 @@ L(POW_COMMON):
// RN RN
// RZ
-
// Put in s2 (td set, wre set)
{ .mfi
- mov pow_GR_gt_ln = 0x103ff
+ nop.m 999
fsetc.s2 0x7F,0x42
- nop.i 999
+ nop.i 999
}
;;
-
{ .mfi
- setf.exp POW_gt_pln = pow_GR_gt_ln
- fma.d.s2 POW_wre_urm_f8 = POW_abs_A, POW_q, POW_abs_A
- nop.i 999 ;;
+ nop.m 999
+ fma.d.s2 POW_wre_urm_f8 = POW_A, POW_q, POW_A
+ nop.i 999
}
+;;
// Return s2 to default
{ .mfi
@@ -1716,31 +1634,67 @@ L(POW_COMMON):
}
;;
-
// p7 = TRUE ==> yes, we have an overflow
{ .mfi
nop.m 999
- fcmp.ge.unc.s1 p7, p0 = POW_wre_urm_f8, POW_gt_pln
+ fcmp.ge.s1 p7, p8 = POW_wre_urm_f8, POW_big_pos
nop.i 999
}
;;
+{ .mfi
+ nop.m 999
+(p8) fcmp.le.s1 p7, p0 = POW_wre_urm_f8, POW_big_neg
+ nop.i 999
+}
+;;
+{ .mbb
+(p7) mov pow_GR_tag = 24
+(p7) br.cond.spnt __libm_error_region // Branch if overflow
+ br.ret.sptk b0 // Exit if did not overflow
+}
+;;
-{ .mfb
-(p7) mov pow_GR_tag = 24
- fma.d f8 = POW_A, POW_q, POW_A
-(p7) br.cond.spnt __libm_error_region
+// Here if |y*log(x)| < 2^(-11)
+// pow(x,y) ~ exp(d) ~ 1 + d + 0.5*d^2 + Q1*d^3 + Q2*d^4, where d = y*log(x)
+.align 32
+POW_NEAR_ONE:
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_d2 = POW_d, POW_d, f0
+ nop.i 999
}
-{ .mfb
- nop.m 999
- nop.f 999
-(p0) br.ret.sptk b0
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_poly_d_hi = POW_d, POW_Q0_half, f1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_poly_d_lo = POW_d, POW_Q2, POW_Q1
+ nop.i 999
}
;;
+{ .mfi
+ nop.m 999
+ fma.s1 POW_poly_d = POW_d2, POW_poly_d_lo, POW_poly_d_hi
+ nop.i 999
+}
+;;
+
+{ .mfb
+ nop.m 999
+ fma.d.s0 f8 = POW_d, POW_poly_d, f1
+ br.ret.sptk b0 // exit function for arguments |y*log(x)| < 2^(-11)
+}
+;;
-L(POW_POSSIBLE_UNDER):
+POW_POSSIBLE_UNDER:
// We got an answer. input was < -2^9 but > -2^10 (double)
// We got an answer. input was < -2^6 but > -2^7 (float)
// underflow is a possibility, not a certainty
@@ -1763,124 +1717,250 @@ L(POW_POSSIBLE_UNDER):
// 0.1...11 2^-3ffe (biased, 1)
// largest dn smallest normal
-
// Put in s2 (td set, ftz set)
{ .mfi
nop.m 999
fsetc.s2 0x7F,0x41
- nop.i 999
+ nop.i 999
}
;;
-
-
{ .mfi
nop.m 999
- fma.d.s2 POW_ftz_urm_f8 = POW_A, POW_q, POW_A
+ fma.d.s2 POW_ftz_urm_f8 = POW_A, POW_q, POW_A
nop.i 999
}
;;
-
// Return s2 to default
{ .mfi
nop.m 999
fsetc.s2 0x7F,0x40
- nop.i 999
+ nop.i 999
}
;;
-
// p7 = TRUE ==> yes, we have an underflow
{ .mfi
nop.m 999
- fcmp.eq.unc.s1 p7, p0 = POW_ftz_urm_f8, f0
- nop.i 999
+ fcmp.eq.s1 p7, p0 = POW_ftz_urm_f8, f0
+ nop.i 999
}
;;
+{ .mbb
+(p7) mov pow_GR_tag = 25
+(p7) br.cond.spnt __libm_error_region // Branch if underflow
+ br.ret.sptk b0 // Exit if did not underflow
+}
+;;
+
+POW_X_DENORM:
+// Here if x unorm. Use the NORM_X for getf instructions, and then back
+// to normal path
+{ .mfi
+ getf.exp pow_GR_signexp_X = POW_NORM_X
+ nop.f 999
+ nop.i 999
+}
+;;
+{ .mmi
+ getf.sig pow_GR_sig_X = POW_NORM_X
+;;
+ and pow_GR_exp_X = pow_GR_signexp_X, pow_GR_17ones
+ nop.i 999
+}
+;;
+
+{ .mib
+ sub pow_GR_true_exp_X = pow_GR_exp_X, pow_GR_16ones
+ nop.i 999
+ br.cond.sptk POW_COMMON
+}
+;;
+POW_X_0:
+// Here if x=0 and y not nan
+//
+// We have the following cases:
+// p6 x=0 and y>0 and is an integer (may be even or odd)
+// p7 x=0 and y>0 and is NOT an integer, return +0
+// p8 x=0 and y>0 and so big as to always be an even integer, return +0
+// p9 x=0 and y>0 and may not be integer
+// p10 x=0 and y>0 and is an odd integer, return x
+// p11 x=0 and y>0 and is an even integer, return +0
+// p12 used in dummy fcmp to set denormal flag if y=unorm
+// p13 x=0 and y>0
+// p14 x=0 and y=0, branch to code for calling error handling
+// p15 x=0 and y<0, branch to code for calling error handling
+//
+{ .mfi
+ getf.sig pow_GR_sig_int_Y = POW_int_Y // Get signif of int_Y
+ fcmp.lt.s1 p15,p13 = f9, f0 // Test for y<0
+ and pow_GR_exp_Y = pow_GR_signexp_Y, pow_GR_17ones
+}
+{ .mfb
+ cmp.ne p14,p0 = pow_GR_y_zero,r0 // Test for y=0
+ fcvt.xf POW_float_int_Y = POW_int_Y
+(p14) br.cond.spnt POW_X_0_Y_0 // Branch if x=0 and y=0
+}
+;;
+// If x=0 and y>0, test y and flag denormal
{ .mfb
-(p7) mov pow_GR_tag = 25
- fma.d f8 = POW_A, POW_q, POW_A
-(p7) br.cond.spnt __libm_error_region
+(p13) cmp.gt.unc p8,p9 = pow_GR_exp_Y, pow_GR_10033 // Test y +big = even int
+(p13) fcmp.eq.s0 p12,p0 = f9,f0 // If x=0, y>0 dummy op to flag denormal
+(p15) br.cond.spnt POW_X_0_Y_NEG // Branch if x=0 and y<0
}
;;
+// Here if x=0 and y>0
+{ .mfi
+ nop.m 999
+(p9) fcmp.eq.unc.s1 p6,p7 = POW_float_int_Y, POW_NORM_Y // Test y=int
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.d.s0 f8 = f0,f0,f0 // If x=0, y>0 and large even int, return +0
+ nop.i 999
+}
+;;
+{ .mfi
+ nop.m 999
+(p7) fma.d.s0 f8 = f0,f0,f0 // Result +0 if x=0 and y>0 and not integer
+(p6) tbit.nz.unc p10,p11 = pow_GR_sig_int_Y,0 // If y>0 int, test y even/odd
+}
+;;
+
+// Note if x=0, y>0 and odd integer, just return x
{ .mfb
nop.m 999
- nop.f 999
- br.ret.sptk b0
+(p11) fma.d.s0 f8 = f0,f0,f0 // Result +0 if x=0 and y even integer
+ br.ret.sptk b0 // Exit if x=0 and y>0
}
;;
+POW_X_0_Y_0:
+// When X is +-0 and Y is +-0, IEEE returns 1.0
+// We call error support with this value
-L(POW_X_DENORM):
-// Here if x unorm. Use the NORM_X for getf instructions, and the back
-// to normal path
-{ .mfi
- getf.exp pow_GR_signexp_X = POW_NORM_X
- nop.f 999
- nop.i 999
+{ .mfb
+ mov pow_GR_tag = 26
+ fma.d.s0 f8 = f1,f1,f0
+ br.cond.sptk __libm_error_region
}
;;
+POW_X_0_Y_NEG:
+// When X is +-0 and Y is negative, IEEE returns
+// X Y answer
+// +0 -odd int +inf
+// -0 -odd int -inf
+
+// +0 !-odd int +inf
+// -0 !-odd int +inf
+
+// p6 == Y is a floating point number outside the integer.
+// Hence it is an integer and is even.
+// return +inf
+
+// p7 == Y is a floating point number within the integer range.
+// p9 == (int_Y = NORM_Y), Y is an integer, which may be odd or even.
+// p11 odd
+// return (sign_of_x)inf
+// p12 even
+// return +inf
+// p10 == Y is not an integer
+// return +inf
+//
+
{ .mfi
- getf.sig pow_GR_sig_X = POW_NORM_X
- nop.f 999
- nop.i 999
+ nop.m 999
+ nop.f 999
+ cmp.gt p6,p7 = pow_GR_exp_Y, pow_GR_10033
}
;;
{ .mfi
- and pow_GR_exp_X = pow_GR_signexp_X, pow_GR_17ones
- nop.f 999
+ mov pow_GR_tag = 27
+(p7) fcmp.eq.unc.s1 p9,p10 = POW_float_int_Y, POW_NORM_Y
+ nop.i 999
}
;;
-{ .mib
- sub pow_GR_true_exp_X = pow_GR_exp_X, pow_GR_16ones
- shl pow_GR_offset = pow_GR_sig_X, 1
- br.cond.sptk L(POW_COMMON)
+{ .mfb
+ nop.m 999
+(p6) frcpa.s0 f8,p13 = f1, f0
+(p6) br.cond.sptk __libm_error_region // x=0, y<0, y large neg int
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p10) frcpa.s0 f8,p13 = f1, f0
+(p10) br.cond.sptk __libm_error_region // x=0, y<0, y not int
}
;;
+// x=0, y<0, y an int
+{ .mib
+ nop.m 999
+(p9) tbit.nz.unc p11,p12 = pow_GR_sig_int_Y,0
+ nop.b 999
+}
+;;
-L(POW_X_0_Y_0):
-// When X is +-0 and Y is +-0, IEEE returns 1.0
-// We call error support with this value
+{ .mfi
+ nop.m 999
+(p12) frcpa.s0 f8,p13 = f1,f0
+ nop.i 999
+}
+;;
{ .mfb
- mov pow_GR_tag = 26
- fma.d f8 = f1,f1,f0
- br.cond.sptk __libm_error_region
+ nop.m 999
+(p11) frcpa.s0 f8,p13 = f1,f8
+ br.cond.sptk __libm_error_region
}
;;
+POW_Y_0:
+// Here for y zero, x anything but zero and nan
+// Set flag if x denormal
+// Result is +1.0
+{ .mfi
+ nop.m 999
+ fcmp.eq.s0 p6,p0 = f8,f0 // Sets flag if x denormal
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fma.d.s0 f8 = f1,f1,f0
+ br.ret.sptk b0
+}
+;;
-L(POW_X_INF):
-// When X is +-inf and Y is +-, IEEE returns
+POW_X_INF:
+// Here when X is +-inf
-// overflow
-// X +inf Y +inf +inf
-// X -inf Y +inf +inf
+// X +inf Y +inf +inf
+// X -inf Y +inf +inf
-// X +inf Y >0 +inf
+// X +inf Y >0 +inf
// X -inf Y >0, !odd integer +inf <== (-inf)^0.5 = +inf !!
-// X -inf Y >0, odd integer -inf
+// X -inf Y >0, odd integer -inf
-// underflow
-// X +inf Y -inf +0
-// X -inf Y -inf +0
+// X +inf Y -inf +0
+// X -inf Y -inf +0
-// X +inf Y <0 +0
-// X -inf Y <0, !odd integer +0
-// X -inf Y <0, odd integer -0
+// X +inf Y <0 +0
+// X -inf Y <0, !odd integer +0
+// X -inf Y <0, odd integer -0
// X + inf Y=+0 +1
// X + inf Y=-0 +1
@@ -1892,32 +1972,30 @@ L(POW_X_INF):
// p6 == Y is a floating point number outside the integer.
// Hence it is an integer and is even.
-// p13 == (Y negative)
+// p13 == (Y negative)
// return +inf
// p14 == (Y positive)
// return +0
-
-
// p7 == Y is a floating point number within the integer range.
// p9 == (int_Y = NORM_Y), Y is an integer, which may be odd or even.
// p11 odd
-// p13 == (Y negative)
+// p13 == (Y negative)
// return (sign_of_x)inf
-// p14 == (Y positive)
+// p14 == (Y positive)
// return (sign_of_x)0
-// pxx even
-// p13 == (Y negative)
-// return +inf
+// pxx even
+// p13 == (Y negative)
+// return +inf
// p14 == (Y positive)
-// return +0
+// return +0
// pxx == Y is not an integer
-// p13 == (Y negative)
+// p13 == (Y negative)
// return +inf
// p14 == (Y positive)
// return +0
-//
+//
// If x=inf, test y and flag denormal
{ .mfi
@@ -1929,207 +2007,131 @@ L(POW_X_INF):
{ .mfi
nop.m 999
- fcmp.lt p13,p14 = POW_NORM_Y,f0
- cmp.gt.unc p6,p7 = pow_GR_exp_Y, pow_GR_10033
+ fcmp.lt.s0 p13,p14 = POW_NORM_Y,f0
+ cmp.gt p6,p7 = pow_GR_exp_Y, pow_GR_10033
}
{ .mfi
nop.m 999
- fclass.m p12,p0 = f9, 0x23
+ fclass.m p12,p0 = f9, 0x23 //@inf
nop.i 999
}
;;
-
{ .mfi
nop.m 999
- fclass.m p15,p0 = f9, 0x07 //@zero
+ fclass.m p15,p0 = f9, 0x07 //@zero
nop.i 999
}
;;
{ .mfb
nop.m 999
-(p15) fmerge.s f8 = f1,f1
-(p15) br.ret.spnt b0
+(p15) fmerge.s f8 = f1,f1 // Return +1.0 if x=inf, y=0
+(p15) br.ret.spnt b0 // Exit if x=inf, y=0
}
;;
-
{ .mfi
-(p13) mov pow_GR_tag = 25
-(p14) frcpa.s1 f8,p10 = f1,f0
+ nop.m 999
+(p14) frcpa.s1 f8,p10 = f1,f0 // If x=inf, y>0, assume result +inf
nop.i 999
}
{ .mfb
-(p14) mov pow_GR_tag = 24
-(p13) fma.s1 f8 = f0,f0,f0
-(p12) br.ret.spnt b0
-}
-;;
-
-
-
-{ .mfb
nop.m 999
-(p7) fcmp.eq.unc.s1 p9,p0 = POW_float_int_Y, POW_NORM_Y
- nop.b 999
+(p13) fma.d.s0 f8 = f0,f0,f0 // If x=inf, y<0, assume result +0.0
+(p12) br.ret.spnt b0 // Exit if x=inf, y=inf
}
;;
+// Here if x=inf, and 0 < |y| < inf. Need to correct results if y odd integer.
{ .mfi
nop.m 999
- nop.f 999
-(p9) tbit.nz.unc p11,p0 = pow_GR_sig_int_Y,0
-}
-;;
-
-{ .mfb
- nop.m 999
-(p11) fmerge.s f8 = POW_NORM_X,f8
- br.ret.sptk b0
+(p7) fcmp.eq.unc.s1 p9,p0 = POW_float_int_Y, POW_NORM_Y // Is y integer?
+ nop.i 999
}
;;
-
-
-L(POW_X_0_Y_NEG):
-// When X is +-0 and Y is negative, IEEE returns
-// X Y answer
-// +0 -odd int +inf
-// -0 -odd int -inf
-
-// +0 !-odd int +inf
-// -0 !-odd int +inf
-
-
-// p6 == Y is a floating point number outside the integer.
-// Hence it is an integer and is even.
-// return +inf
-
-// p7 == Y is a floating point number within the integer range.
-// p9 == (int_Y = NORM_Y), Y is an integer, which may be odd or even.
-// p11 odd
-// return (sign_of_x)inf
-// p12 even
-// return +inf
-// p10 == Y is not an integer
-// return +inf
-//
-//
-
{ .mfi
nop.m 999
nop.f 999
- cmp.gt.unc p6,p7 = pow_GR_exp_Y, pow_GR_10033
-}
-;;
-
-
-{ .mfi
- mov pow_GR_tag = 27
-(p7) fcmp.eq.unc.s1 p9,p10 = POW_float_int_Y, POW_NORM_Y
- nop.i 999
-}
-;;
-
-
-{ .mfb
- nop.m 999
-(p6) frcpa.s0 f8,p13 = f1, f0
-(p6) br.cond.sptk __libm_error_region
+(p9) tbit.nz.unc p11,p0 = pow_GR_sig_int_Y,0 // Test for y odd integer
}
;;
{ .mfb
nop.m 999
-(p10) frcpa.s0 f8,p13 = f1, f0
-(p10) br.cond.sptk __libm_error_region
+(p11) fmerge.s f8 = POW_NORM_X,f8 // If y odd integer use sign of x
+ br.ret.sptk b0 // Exit for x=inf, 0 < |y| < inf
}
;;
+POW_X_NEG_Y_NONINT:
+// When X is negative and Y is a non-integer, IEEE
+// returns a qnan indefinite.
+// We call error support with this value
-{ .mib
- nop.m 999
-(p9) tbit.nz.unc p11,p12 = pow_GR_sig_int_Y,0
- nop.b 999
+{ .mfb
+ mov pow_GR_tag = 28
+ frcpa.s0 f8,p6 = f0,f0
+ br.cond.sptk __libm_error_region
}
;;
-
-
+POW_X_NAN:
+// Here if x=nan, y not nan
{ .mfi
- nop.m 999
-(p12) frcpa.s0 f8,p13 = f1,f0
- nop.i 999
+ nop.m 999
+ fclass.m p9,p13 = f9, 0x07 // Test y=zero
+ nop.i 999
}
;;
{ .mfb
- nop.m 999
-(p11) frcpa f8,p13 = f1,f8
- br.cond.sptk __libm_error_region
+ nop.m 999
+(p13) fma.d.s0 f8 = f8,f1,f0
+(p13) br.ret.sptk b0 // Exit if x nan, y anything but zero or nan
}
;;
-
-
-
-L(POW_X_NEG_Y_NONINT):
-// When X is negative and Y is a non-integer, IEEE
-// returns a qnan indefinite.
-// We call error support with this value
-
-{ .mfb
- mov pow_GR_tag = 28
- frcpa f8,p6 = f0,f0
- br.cond.sptk __libm_error_region
-}
-;;
-
-
-
-
-L(POW_X_NAN_Y_0):
+POW_X_NAN_Y_0:
// When X is a NAN and Y is zero, IEEE returns 1.
// We call error support with this value.
-
{ .mfi
- nop.m 0
- fma.d.s0 f10 = f8,f1,f0
- nop.i 0
+ nop.m 999
+ fcmp.eq.s0 p6,p0 = f8,f0 // Dummy op to set invalid on snan
+ nop.i 999
}
{ .mfb
- mov pow_GR_tag = 29
- fma.d.s0 f8 = f0,f0,f1
+ mov pow_GR_tag = 29
+ fma.d.s0 f8 = f0,f0,f1
br.cond.sptk __libm_error_region
}
;;
-L(POW_OVER_UNDER_X_NOT_INF):
+POW_OVER_UNDER_X_NOT_INF:
// p8 is TRUE for overflow
// p9 is TRUE for underflow
// if y is infinity, we should not over/underflow
-
{ .mfi
nop.m 999
- fcmp.eq.unc.s1 p14, p13 = POW_xsq,f1
- cmp.eq.unc p8,p9 = pow_GR_sign_Y_Gpr, r0
+ fcmp.eq.s1 p14, p13 = POW_xsq,f1 // Test |x|=1
+ cmp.eq p8,p9 = pow_GR_sign_Y_Gpr, r0
}
;;
{ .mfi
nop.m 999
-(p14) fclass.m.unc p15, p0 = f9, 0x23
+(p14) fclass.m.unc p15, p0 = f9, 0x23 // If |x|=1, test y=inf
nop.i 999
}
{ .mfi
nop.m 999
-(p13) fclass.m.unc p11,p0 = f9, 0x23
+(p13) fclass.m.unc p11,p0 = f9, 0x23 // If |x| not 1, test y=inf
nop.i 999
}
;;
@@ -2137,31 +2139,33 @@ L(POW_OVER_UNDER_X_NOT_INF):
// p15 = TRUE if |x|=1, y=inf, return +1
{ .mfb
nop.m 999
-(p15) fma.d f8 = f1,f1,f0
-(p15) br.ret.spnt b0
+(p15) fma.d.s0 f8 = f1,f1,f0 // If |x|=1, y=inf, result +1
+(p15) br.ret.spnt b0 // Exit if |x|=1, y=inf
}
;;
.pred.rel "mutex",p8,p9
{ .mfb
-(p8) setf.exp f8 = pow_GR_17ones
-(p9) fmerge.s f8 = f0,f0
-(p11) br.ret.sptk b0
+(p8) setf.exp f8 = pow_GR_17ones // If exp(+big), result inf
+(p9) fmerge.s f8 = f0,f0 // If exp(-big), result 0
+(p11) br.ret.sptk b0 // Exit if |x| not 1, y=inf
}
+;;
{ .mfb
nop.m 999
nop.f 999
- br.cond.sptk L(POW_OVER_UNDER_ERROR)
+ br.cond.sptk POW_OVER_UNDER_ERROR // Branch if y not inf
}
;;
-L(POW_Y_NAN):
-// Is x = +1 then result is +1, else result is quiet Y
+POW_Y_NAN:
+// Here if y=nan, x anything
+// If x = +1 then result is +1, else result is quiet Y
{ .mfi
nop.m 999
- fcmp.eq.s1 p10,p9 = POW_NORM_X, f1
+ fcmp.eq.s1 p10,p9 = POW_NORM_X, f1
nop.i 999
}
;;
@@ -2175,148 +2179,117 @@ L(POW_Y_NAN):
{ .mfi
nop.m 999
-(p10) fma.d f8 = f1,f1,f0
+(p10) fma.d.s0 f8 = f1,f1,f0
nop.i 999
}
{ .mfb
nop.m 999
-(p9) fma.d f8 = f9,f8,f0
- br.ret.sptk b0
+(p9) fma.d.s0 f8 = f9,f8,f0
+ br.ret.sptk b0 // Exit y=nan
}
;;
-L(POW_OVER_UNDER_ERROR):
+POW_OVER_UNDER_ERROR:
+// Here if we have overflow or underflow.
+// Enter with p12 true if x negative and y odd int to force -0 or -inf
{ .mfi
- nop.m 999
- fmerge.s f10 = POW_NORM_X,POW_NORM_X
- nop.i 999
-}
-{ .mfi
- sub pow_GR_17ones_m1 = pow_GR_17ones, r0, 1
- nop.f 999
- mov pow_GR_one = 0x1
+ sub pow_GR_17ones_m1 = pow_GR_17ones, r0, 1
+ nop.f 999
+ mov pow_GR_one = 0x1
}
;;
-// overflow
+// overflow, force inf with O flag
{ .mmb
-(p8) mov pow_GR_tag = 24
-(p8) setf.exp f11 = pow_GR_17ones_m1
+(p8) mov pow_GR_tag = 24
+(p8) setf.exp POW_tmp = pow_GR_17ones_m1
nop.b 999
}
;;
-
-// underflow
+// underflow, force zero with I, U flags
{ .mmi
-(p9) mov pow_GR_tag = 25
-(p9) setf.exp f11 = pow_GR_one
+(p9) mov pow_GR_tag = 25
+(p9) setf.exp POW_tmp = pow_GR_one
nop.i 999
}
;;
-
-// p12 x is negative and y is an odd integer
-
-
{ .mfi
nop.m 999
- fma.d f8 = f11, f11, f0
+ fma.d.s0 f8 = POW_tmp, POW_tmp, f0
nop.i 999
}
;;
+// p12 x is negative and y is an odd integer, change sign of result
{ .mfi
nop.m 999
-(p12) fmerge.ns f8 = f8, f8
+(p12) fnma.d.s0 f8 = POW_tmp, POW_tmp, f0
nop.i 999
}
;;
+GLOBAL_LIBM_END(pow)
-.endp pow
-ASM_SIZE_DIRECTIVE(pow)
-
-
-// Stack operations when calling error support.
-// (1) (2) (3) (call) (4)
-// sp -> + psp -> + psp -> + sp -> +
-// | | | |
-// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
-// | | | |
-// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
-// | | | |
-// | | <- GR_X X1 ->| |
-// | | | |
-// sp-64 -> + sp -> + sp -> + +
-// save ar.pfs save b0 restore gp
-// save gp restore ar.pfs
-
-
+LOCAL_LIBM_ENTRY(__libm_error_region)
-.proc __libm_error_region
-__libm_error_region:
-
-// Answer is inf for overflow and 0 for underflow.
.prologue
-// (1)
{ .mfi
- add GR_Parameter_Y=-32,sp // Parameter 2 value
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
- add sp=-64,sp // Create new stack
+ add sp=-64,sp // Create new stack
nop.f 0
- mov GR_SAVE_GP=gp // Save gp
+ mov GR_SAVE_GP=gp // Save gp
};;
-
-// (2)
{ .mmi
stfd [GR_Parameter_Y] = POW_NORM_Y,16 // STORE Parameter 2 on stack
- add GR_Parameter_X = 16,sp // Parameter 1 address
+ add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0 // Save b0
+ mov GR_SAVE_B0=b0 // Save b0
};;
.body
-// (3)
{ .mib
- stfd [GR_Parameter_X] = POW_NORM_X // STORE Parameter 1 on stack
+ stfd [GR_Parameter_X] = POW_NORM_X // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
- nop.b 0
+ nop.b 0
}
{ .mib
- stfd [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack
+ stfd [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support# // Call error handling function
+ br.call.sptk b0=__libm_error_support# // Call error handling function
};;
+
{ .mmi
- nop.m 0
- nop.m 0
add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
};;
-// (4)
{ .mmi
- ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
- add sp = 64,sp // Restore stack pointer
- mov b0 = GR_SAVE_B0 // Restore return address
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
};;
+
{ .mib
- mov gp = GR_SAVE_GP // Restore gp
- mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
- br.ret.sptk b0 // Return
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#
+
diff --git a/sysdeps/ia64/fpu/e_powf.S b/sysdeps/ia64/fpu/e_powf.S
index d464058262..275843f1e2 100644
--- a/sysdeps/ia64/fpu/e_powf.S
+++ b/sysdeps/ia64/fpu/e_powf.S
@@ -1,10 +1,10 @@
.file "powf.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,30 +35,39 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00 Initial version
-// 2/03/00 Added p12 to definite over/under path. With odd power we did not
+// 02/02/00 Initial version
+// 02/03/00 Added p12 to definite over/under path. With odd power we did not
// maintain the sign of x in this path.
-// 4/04/00 Unwind support added
-// 4/19/00 pow(+-1,inf) now returns NaN
-// pow(+-val, +-inf) returns 0 or inf, but now does not call error support
+// 04/04/00 Unwind support added
+// 04/19/00 pow(+-1,inf) now returns NaN
+// pow(+-val, +-inf) returns 0 or inf, but now does not call error
+// support
// Added s1 to fcvt.fx because invalid flag was incorrectly set.
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
-// 9/07/00 Improved performance by eliminating bank conflicts and other stalls,
+// 09/07/00 Improved performance by eliminating bank conflicts and other stalls,
// and tweaking the critical path
-// 9/08/00 Per c99, pow(+-1,inf) now returns 1, and pow(+1,nan) returns 1
-// 9/28/00 Updated NaN**0 path
-// 1/20/01 Fixed denormal flag settings.
-// 2/12/01 Improved speed.
+// 09/08/00 Per c99, pow(+-1,inf) now returns 1, and pow(+1,nan) returns 1
+// 09/28/00 Updated NaN**0 path
+// 01/20/01 Fixed denormal flag settings.
+// 02/13/01 Improved speed.
+// 03/19/01 Reordered exp polynomial to improve speed and eliminate monotonicity
+// problem in round up, down, and to zero modes. Also corrected
+// overflow result when x negative, y odd in round up, down, zero.
+// 06/14/01 Added brace missing from bundle
+// 12/10/01 Corrected case where x negative, 2^23 <= |y| < 2^24, y odd integer.
+// 02/08/02 Fixed overflow/underflow cases that were not calling error support.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 08/29/02 Improved Itanium 2 performance
+// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// API
//==============================================================
-// double pow(double)
-// float powf(float)
+// float powf(float x, float y)
//
// Overview of operation
//==============================================================
@@ -67,51 +76,51 @@
// 1. Log(x)
// 2. y Log(x)
// 3. exp(y log(x))
-//
+//
// This means we work with the absolute value of x and merge in the sign later.
// Log(x) = G + delta + r -rsq/2 + p
// G,delta depend on the exponent of x and table entries. The table entries are
// indexed by the exponent of x, called K.
-//
+//
// The G and delta come out of the reduction; r is the reduced x.
-//
+//
// B = frcpa(x)
// xB-1 is small means that B is the approximate inverse of x.
-//
+//
// Log(x) = Log( (1/B)(Bx) )
// = Log(1/B) + Log(Bx)
// = Log(1/B) + Log( 1 + (Bx-1))
-//
+//
// x = 2^K 1.x_1x_2.....x_52
-// B= frcpa(x) = 2^-k Cm
+// B= frcpa(x) = 2^-k Cm
// Log(1/B) = Log(1/(2^-K Cm))
// Log(1/B) = Log((2^K/ Cm))
// Log(1/B) = K Log(2) + Log(1/Cm)
-//
+//
// Log(x) = K Log(2) + Log(1/Cm) + Log( 1 + (Bx-1))
-//
+//
// If you take the significand of x, set the exponent to true 0, then Cm is
// the frcpa. We tabulate the Log(1/Cm) values. There are 256 of them.
// The frcpa table is indexed by 8 bits, the x_1 thru x_8.
// m = x_1x_2...x_8 is an 8-bit index.
-//
+//
// Log(1/Cm) = log(1/frcpa(1+m/256)) where m goes from 0 to 255.
-//
+//
// We tabluate as two doubles, T and t, where T +t is the value itself.
-//
+//
// Log(x) = (K Log(2)_hi + T) + (Log(2)_hi + t) + Log( 1 + (Bx-1))
// Log(x) = G + delta + Log( 1 + (Bx-1))
-//
+//
// The Log( 1 + (Bx-1)) can be calculated as a series in r = Bx-1.
-//
+//
// Log( 1 + (Bx-1)) = r - rsq/2 + p
-//
+//
// Then,
-//
+//
// yLog(x) = yG + y delta + y(r-rsq/2) + yp
// yLog(x) = Z1 + e3 + Z2 + Z3 + (e2 + e3)
-//
-//
+//
+//
// exp(yLog(x)) = exp(Z1 + Z2 + Z3) exp(e1 + e2 + e3)
//
//
@@ -133,7 +142,7 @@
// exp(r) = exp(Z - N log2/128)
//
// r = s + d = (Z - N (log2/128)_hi) -N (log2/128)_lo
-// = Z - N (log2/128)
+// = Z - N (log2/128)
//
// Z = s+d +N (log2/128)
//
@@ -149,22 +158,22 @@
// n log2/128 = n_7n_6n_5 log2/8 + n_4n_3n_2n_1 log2/128
// n log2/128 = I2 log2/8 + I1 log2/128
//
-// N log2/128 = M log2 + I2 log2/8 + I1 log2/128
+// N log2/128 = M log2 + I2 log2/8 + I1 log2/128
//
// exp(Z) = exp(s) (1+d) exp(log(2^M) + log(2^I2/8) + log(2^I1/128))
// exp(Z) = exp(s) (1+d1) (1+d2)(2^M) 2^I2/8 2^I1/128
// exp(Z) = exp(s) f1 f2 (2^M) 2^I2/8 2^I1/128
//
// I1, I2 are table indices. Use a series for exp(s).
-// Then get exp(Z)
+// Then get exp(Z)
//
// exp(yLog(x)) = exp(Z1 + Z2 + Z3) exp(e1 + e2 + e3)
-// exp(yLog(x)) = exp(Z) exp(Z3) f3
-// exp(yLog(x)) = exp(Z)f3 exp(Z3)
-// exp(yLog(x)) = A exp(Z3)
+// exp(yLog(x)) = exp(Z) exp(Z3) f3
+// exp(yLog(x)) = exp(Z)f3 exp(Z3)
+// exp(yLog(x)) = A exp(Z3)
//
// We actually calculate exp(Z3) -1.
-// Then,
+// Then,
// exp(yLog(x)) = A + A( exp(Z3) -1)
//
@@ -175,142 +184,146 @@
// ==============
// The operation (K*log2_hi) must be exact. K is the true exponent of x.
// If we allow gradual underflow (denormals), K can be represented in 12 bits
-// (as a two's complement number). We assume 13 bits as an engineering precaution.
-//
+// (as a two's complement number). We assume 13 bits as an engineering
+// precaution.
+//
// +------------+----------------+-+
// | 13 bits | 50 bits | |
// +------------+----------------+-+
// 0 1 66
// 2 34
-//
+//
// So we want the lsb(log2_hi) to be 2^-50
// We get log2 as a quad-extended (15-bit exponent, 128-bit significand)
-//
+//
// 0 fffe b17217f7d1cf79ab c9e3b39803f2f6af (4...)
-//
+//
// Consider numbering the bits left to right, starting at 0 thru 127.
// Bit 0 is the 2^-1 bit; bit 49 is the 2^-50 bit.
-//
+//
// ...79ab
// 0111 1001 1010 1011
// 44
// 89
-//
-// So if we shift off the rightmost 14 bits, then (shift back only
+//
+// So if we shift off the rightmost 14 bits, then (shift back only
// the top half) we get
-//
+//
// 0 fffe b17217f7d1cf4000 e6af278ece600fcb dabc000000000000
-//
+//
// Put the right 64-bit signficand in an FR register, convert to double;
// it is exact. Put the next 128 bits into a quad register and round to double.
// The true exponent of the low part is -51.
-//
+//
// hi is 0 fffe b17217f7d1cf4000
// lo is 0 ffcc e6af278ece601000
-//
+//
// Convert to double memory format and get
-//
+//
// hi is 0x3fe62e42fefa39e8
-// lo is 0x3cccd5e4f1d9cc02
-//
+// lo is 0x3cccd5e4f1d9cc02
+//
// log2_hi + log2_lo is an accurate value for log2.
-//
-//
+//
+//
// The T and t values
// ==================
// A similar method is used to generate the T and t values.
-//
+//
// K * log2_hi + T must be exact.
-//
+//
// Smallest T,t
// ----------
-// The smallest T,t is
+// The smallest T,t is
// T t
-// data8 0x3f60040155d58800, 0x3c93bce0ce3ddd81 log(1/frcpa(1+0/256))= +1.95503e-003
-//
+// 0x3f60040155d58800, 0x3c93bce0ce3ddd81 log(1/frcpa(1+0/256))= +1.95503e-003
+//
// The exponent is 0x3f6 (biased) or -9 (true).
// For the smallest T value, what we want is to clip the significand such that
-// when it is shifted right by 9, its lsb is in the bit for 2^-51. The 9 is the specific
-// for the first entry. In general, it is 0xffff - (biased 15-bit exponent).
+// when it is shifted right by 9, its lsb is in the bit for 2^-51. The 9 is the
+// specific for the first entry. In general, it is 0xffff - (biased 15-bit
+// exponent).
-// Independently, what we have calculated is the table value as a quad precision number.
+// Independently, what we have calculated is the table value as a quad
+// precision number.
// Table entry 1 is
// 0 fff6 80200aaeac44ef38 338f77605fdf8000
-//
+//
// We store this quad precision number in a data structure that is
-// sign: 1
+// sign: 1
// exponent: 15
// signficand_hi: 64 (includes explicit bit)
// signficand_lo: 49
// Because the explicit bit is included, the significand is 113 bits.
-//
+//
// Consider significand_hi for table entry 1.
-//
-//
+//
+//
// +-+--- ... -------+--------------------+
// | |
// +-+--- ... -------+--------------------+
// 0 1 4444444455555555556666
// 2345678901234567890123
-//
+//
// Labeled as above, bit 0 is 2^0, bit 1 is 2^-1, etc.
// Bit 42 is 2^-42. If we shift to the right by 9, the bit in
// bit 42 goes in 51.
-//
+//
// So what we want to do is shift bits 43 thru 63 into significand_lo.
-// This is shifting bit 42 into bit 63, taking care to retain the shifted-off bits.
-// Then shifting (just with signficaand_hi) back into bit 42.
-//
-// The shift_value is 63-42 = 21. In general, this is
+// This is shifting bit 42 into bit 63, taking care to retain shifted-off bits.
+// Then shifting (just with signficaand_hi) back into bit 42.
+//
+// The shift_value is 63-42 = 21. In general, this is
// 63 - (51 -(0xffff - 0xfff6))
// For this example, it is
// 63 - (51 - 9) = 63 - 42 = 21
-//
-// This means we are shifting 21 bits into significand_lo. We must maintain more
-// that a 128-bit signficand not to lose bits. So before the shift we put the 128-bit
-// significand into a 256-bit signficand and then shift.
+//
+// This means we are shifting 21 bits into significand_lo. We must maintain more
+// that a 128-bit signficand not to lose bits. So before the shift we put the
+// 128-bit significand into a 256-bit signficand and then shift.
// The 256-bit significand has four parts: hh, hl, lh, and ll.
-//
+//
// Start off with
// hh hl lh ll
// <64> <49><15_0> <64_0> <64_0>
-//
+//
// After shift by 21 (then return for significand_hi),
// <43><21_0> <21><43> <6><58_0> <64_0>
-//
+//
// Take the hh part and convert to a double. There is no rounding here.
-// The conversion is exact. The true exponent of the high part is the same as the
-// true exponent of the input quad.
-//
-// We have some 64 plus significand bits for the low part. In this example, we have
-// 70 bits. We want to round this to a double. Put them in a quad and then do a quad fnorm.
-// For this example the true exponent of the low part is
+// The conversion is exact. The true exponent of the high part is the same as
+// the true exponent of the input quad.
+//
+// We have some 64 plus significand bits for the low part. In this example, we
+// have 70 bits. We want to round this to a double. Put them in a quad and then
+// do a quad fnorm.
+// For this example the true exponent of the low part is
// true_exponent_of_high - 43 = true_exponent_of_high - (64-21)
-// In general, this is
-// true_exponent_of_high - (64 - shift_value)
-//
-//
+// In general, this is
+// true_exponent_of_high - (64 - shift_value)
+//
+//
// Largest T,t
// ----------
// The largest T,t is
-// data8 0x3fe62643fecf9742, 0x3c9e3147684bd37d log(1/frcpa(1+255/256))= +6.92171e-001
-//
+// 0x3fe62643fecf9742, 0x3c9e3147684bd37d log(1/frcpa(1+255/256))=+6.92171e-001
+//
// Table entry 256 is
// 0 fffe b1321ff67cba178c 51da12f4df5a0000
-//
-// The shift value is
+//
+// The shift value is
// 63 - (51 -(0xffff - 0xfffe)) = 13
-//
-// The true exponent of the low part is
+//
+// The true exponent of the low part is
// true_exponent_of_high - (64 - shift_value)
// -1 - (64-13) = -52
// Biased as a double, this is 0x3cb
-//
-//
-//
+//
+//
+//
// So then lsb(T) must be >= 2^-51
// msb(Klog2_hi) <= 2^12
-//
+//
// +--------+---------+
// | 51 bits | <== largest T
// +--------+---------+
@@ -320,7 +333,6 @@
// +------------+----------------+-+
-
// Special Cases
//==============================================================
@@ -385,63 +397,66 @@
// X any Y =0 +1
-#include "libm_support.h"
-
// Assembly macros
//==============================================================
// integer registers used
-pow_AD_Tt = r33
-pow_GR_FFF7 = r34
-pow_GR_exp_Y = r34 // duplicate
-pow_GR_17ones = r35
-
-pow_AD_P = r36
-pow_AD_Q = r37
-pow_AD_tbl1 = r38
-pow_AD_tbl2 = r39
-pow_GR_exp_X = r40
-pow_GR_true_exp_X = r40 // duplicate
-
-pow_GR_offset = r41
-pow_GR_exp_Xm1 = r42
-pow_GR_sig_X = r43
-pow_GR_signexp_X = r44
-
-pow_GR_signexp_Xm1 = r46
-pow_GR_int_W1 = r47
-pow_GR_int_W2 = r48
-pow_GR_int_N = r49
-pow_GR_index1 = r50
-
-pow_GR_index2 = r51
-pow_AD_T1 = r52
-pow_AD_T2 = r53
-pow_GR_gt_ln = r53 // duplicate
-pow_int_GR_M = r54
-pow_GR_10033 = r55
-
-pow_GR_16ones = r56
-pow_GR_sig_int_Y = r57
-pow_GR_sign_Y_Gpr = r58
-pow_GR_17ones_m1 = r59
-pow_GR_one = r60
-pow_GR_sign_Y = r60
-
-pow_GR_signexp_Y_Gpr = r61
-pow_GR_exp_Y_Gpr = r62
-pow_GR_true_exp_Y_Gpr = r63
-pow_GR_signexp_Y = r64
-
-GR_SAVE_B0 = r65
-GR_SAVE_GP = r66
-GR_SAVE_PFS = r67
-
-GR_Parameter_X = r68
-GR_Parameter_Y = r69
-GR_Parameter_RESULT = r70
-pow_GR_tag = r71
+pow_GR_signexp_X = r14
+pow_GR_17ones = r15
+pow_AD_P = r16
+pow_GR_exp_2tom8 = r17
+pow_GR_sig_X = r18
+pow_GR_10033 = r19
+pow_GR_16ones = r20
+
+pow_AD_Tt = r21
+pow_GR_exp_X = r22
+pow_AD_Q = r23
+pow_GR_true_exp_X = r24
+pow_GR_y_zero = r25
+
+pow_GR_exp_Y = r26
+pow_AD_tbl1 = r27
+pow_AD_tbl2 = r28
+pow_GR_offset = r29
+pow_GR_exp_Xm1 = r30
+pow_GR_xneg_yodd = r31
+
+pow_GR_signexp_Xm1 = r35
+pow_GR_int_W1 = r36
+pow_GR_int_W2 = r37
+pow_GR_int_N = r38
+pow_GR_index1 = r39
+pow_GR_index2 = r40
+
+pow_AD_T1 = r41
+pow_AD_T2 = r42
+pow_int_GR_M = r43
+pow_GR_sig_int_Y = r44
+pow_GR_sign_Y_Gpr = r45
+
+pow_GR_17ones_m1 = r46
+pow_GR_one = r47
+pow_GR_sign_Y = r48
+pow_GR_signexp_Y_Gpr = r49
+pow_GR_exp_Y_Gpr = r50
+
+pow_GR_true_exp_Y_Gpr = r51
+pow_GR_signexp_Y = r52
+pow_GR_x_one = r53
+pow_GR_big_pos = r55
+
+pow_GR_big_neg = r56
+
+GR_SAVE_B0 = r50
+GR_SAVE_GP = r51
+GR_SAVE_PFS = r52
+
+GR_Parameter_X = r53
+GR_Parameter_Y = r54
+GR_Parameter_RESULT = r55
+pow_GR_tag = r56
// floating point registers used
@@ -464,7 +479,8 @@ POW_log2_lo = f43
POW_r = f44
POW_Q0_half = f45
-POW_Q1 = f46
+POW_Q1 = f46
+POW_tmp = f47
POW_log2_hi = f48
POW_Q4 = f49
POW_P1 = f50
@@ -476,6 +492,7 @@ POW_Yrcub = f54
POW_log2_by_128_lo = f55
POW_v6 = f56
+POW_xsq = f57
POW_v4 = f58
POW_v2 = f59
POW_T = f60
@@ -484,6 +501,7 @@ POW_Tt = f61
POW_RSHF = f62
POW_v21ps = f63
POW_s4 = f64
+POW_twoV = f65
POW_U = f66
POW_G = f67
@@ -533,44 +551,36 @@ POW_1ps = f103
POW_A = f104
POW_es = f105
+POW_Xp1 = f106
POW_int_K = f107
POW_K = f108
POW_f123 = f109
POW_Gpr = f110
-POW_Y_Gpr = f111
+POW_Y_Gpr = f111
POW_int_Y = f112
+POW_2Mqp1 = f113
POW_float_int_Y = f116
POW_ftz_urm_f8 = f117
POW_wre_urm_f8 = f118
-POW_abs_A = f119
-POW_gt_pln = f120
-
-POW_xsq = f121
-
-POW_twoV = f122
-POW_Xp1 = f123
+POW_big_neg = f119
+POW_big_pos = f120
// Data tables
//==============================================================
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
+RODATA
.align 16
-pow_table_P:
-ASM_TYPE_DIRECTIVE(pow_table_P,@object)
+LOCAL_OBJECT_START(pow_table_P)
data8 0x8000F7B249FF332D, 0x0000BFFC // P_5
data8 0xAAAAAAA9E7902C7F, 0x0000BFFC // P_3
data8 0x80000000000018E5, 0x0000BFFD // P_1
data8 0xb8aa3b295c17f0bc, 0x00004006 // inv_ln2_by_128
-
-
+//
+//
data8 0x3FA5555555554A9E // Q_2
data8 0x3F8111124F4DD9F9 // Q_3
data8 0x3FE0000000000000 // Q_0
@@ -580,20 +590,18 @@ data8 0x43e8000000000000 // Right shift constant for exp
data8 0xc9e3b39803f2f6af, 0x00003fb7 // ln2_by_128_lo
data8 0x0000000000000000 // pad to eliminate bank conflicts with pow_table_Q
data8 0x0000000000000000 // pad to eliminate bank conflicts with pow_table_Q
-ASM_SIZE_DIRECTIVE(pow_table_P)
+LOCAL_OBJECT_END(pow_table_P)
-pow_table_Q:
-ASM_TYPE_DIRECTIVE(pow_table_Q,@object)
+LOCAL_OBJECT_START(pow_table_Q)
data8 0x9249FE7F0DC423CF, 0x00003FFC // P_4
data8 0xCCCCCCCC4ED2BA7F, 0x00003FFC // P_2
data8 0xAAAAAAAAAAAAB505, 0x00003FFD // P_0
data8 0x3fe62e42fefa39e8, 0x3cccd5e4f1d9cc02 // log2 hi lo = +6.93147e-001
data8 0xb17217f7d1cf79ab, 0x00003ff7 // ln2_by_128_hi
-ASM_SIZE_DIRECTIVE(pow_table_Q)
+LOCAL_OBJECT_END(pow_table_Q)
-pow_Tt:
-ASM_TYPE_DIRECTIVE(pow_Tt,@object)
+LOCAL_OBJECT_START(pow_Tt)
data8 0x3f60040155d58800, 0x3c93bce0ce3ddd81 // log(1/frcpa(1+0/256))= +1.95503e-003
data8 0x3f78121214586a00, 0x3cb540e0a5cfc9bc // log(1/frcpa(1+1/256))= +5.87661e-003
data8 0x3f841929f9683200, 0x3cbdf1d57404da1f // log(1/frcpa(1+2/256))= +9.81362e-003
@@ -850,13 +858,12 @@ data8 0x3fe5f673c61a2ed0, 0x3caa385eef5f2789 // log(1/frcpa(1+252/256))= +6.863
data8 0x3fe6065bea385924, 0x3cb11624f165c5b4 // log(1/frcpa(1+253/256))= +6.88276e-001
data8 0x3fe6164bfa7cc068, 0x3cbad884f87073fa // log(1/frcpa(1+254/256))= +6.90222e-001
data8 0x3fe62643fecf9740, 0x3cb78c51da12f4df // log(1/frcpa(1+255/256))= +6.92171e-001
-ASM_SIZE_DIRECTIVE(pow_Tt)
+LOCAL_OBJECT_END(pow_Tt)
// Table 1 is 2^(index_1/128) where
// index_1 goes from 0 to 15
-pow_tbl1:
-ASM_TYPE_DIRECTIVE(pow_tbl1,@object)
+LOCAL_OBJECT_START(pow_tbl1)
data8 0x8000000000000000 , 0x00003FFF
data8 0x80B1ED4FD999AB6C , 0x00003FFF
data8 0x8164D1F3BC030773 , 0x00003FFF
@@ -873,13 +880,12 @@ data8 0x88980E8092DA8527 , 0x00003FFF
data8 0x8955EE03618E5FDD , 0x00003FFF
data8 0x8A14D575496EFD9A , 0x00003FFF
data8 0x8AD4C6452C728924 , 0x00003FFF
-ASM_SIZE_DIRECTIVE(pow_tbl1)
+LOCAL_OBJECT_END(pow_tbl1)
// Table 2 is 2^(index_1/8) where
// index_2 goes from 0 to 7
-pow_tbl2:
-ASM_TYPE_DIRECTIVE(pow_tbl2,@object)
+LOCAL_OBJECT_START(pow_tbl2)
data8 0x8000000000000000 , 0x00003FFF
data8 0x8B95C1E3EA8BD6E7 , 0x00003FFF
data8 0x9837F0518DB8A96F , 0x00003FFF
@@ -888,372 +894,287 @@ data8 0xB504F333F9DE6484 , 0x00003FFF
data8 0xC5672A115506DADD , 0x00003FFF
data8 0xD744FCCAD69D6AF4 , 0x00003FFF
data8 0xEAC0C6E7DD24392F , 0x00003FFF
-ASM_SIZE_DIRECTIVE(pow_tbl2)
-
-.global powf
+LOCAL_OBJECT_END(pow_tbl2)
.section .text
-.proc powf
-.align 32
-
-powf:
+GLOBAL_LIBM_ENTRY(powf)
+// Get exponent of x. Will be used to calculate K.
{ .mfi
- alloc r32=ar.pfs,1,35,4,0
- fms.s1 POW_Xm1 = f8,f1,f1 // Will be used for r1 if x>0
- mov pow_GR_17ones = 0x1FFFF
+ getf.exp pow_GR_signexp_X = f8
+ fms.s1 POW_Xm1 = f8,f1,f1 // Will be used for r1 if x>0
+ mov pow_GR_17ones = 0x1FFFF
}
{ .mfi
-(p0) addl pow_AD_P = @ltoff(pow_table_P), gp
- fma.s1 POW_Xp1 = f8,f1,f1 // Will be used for r1 if x<0
+ addl pow_AD_P = @ltoff(pow_table_P), gp
+ fma.s1 POW_Xp1 = f8,f1,f1 // Will be used for r1 if x<0
nop.i 999
;;
}
-
-// Get exponent of x. Will be used to calculate K.
+// Get significand of x. Will be used to get index to fetch T, Tt.
{ .mfi
- getf.exp pow_GR_signexp_X = f8
- frcpa.s1 POW_B, p6 = f1,f8
+ getf.sig pow_GR_sig_X = f8
+ frcpa.s1 POW_B, p6 = f1,f8
nop.i 999
}
{ .mfi
ld8 pow_AD_P = [pow_AD_P]
- fma.s1 POW_NORM_X = f8,f1,f0
- mov pow_GR_FFF7 = 0xFFF7
+ fma.s1 POW_NORM_X = f8,f1,f0
+ mov pow_GR_exp_2tom8 = 0xFFF7
}
;;
-
-
-// Get significand of x. Will be used to get index to fetch T, Tt.
// p13 = TRUE ==> X is unorm
// DOUBLE 0x10033 exponent limit at which y is an integer
-// SINGLE 0x10016
{ .mfi
- getf.sig pow_GR_sig_X = f8
- fclass.m p13,p0 = f8, 0x0b // Test for x unorm
- addl pow_GR_10033 = 0x10033, r0
+ nop.m 999
+ fclass.m p13,p0 = f8, 0x0b // Test for x unorm
+ addl pow_GR_10033 = 0x10033, r0
}
{ .mfi
mov pow_GR_16ones = 0xFFFF
- fma.s1 POW_NORM_Y = f9,f1,f0
+ fma.s1 POW_NORM_Y = f9,f1,f0
nop.i 999
}
;;
-
// p14 = TRUE ==> X is ZERO
{ .mfi
adds pow_AD_Tt = pow_Tt - pow_table_P, pow_AD_P
- fclass.m p14,p15 = f8, 0x07
- and pow_GR_exp_X = pow_GR_signexp_X, pow_GR_17ones
+ fclass.m p14,p0 = f8, 0x07
+ and pow_GR_exp_X = pow_GR_signexp_X, pow_GR_17ones
}
{ .mfi
- adds pow_AD_Q = pow_table_Q - pow_table_P, pow_AD_P
+ adds pow_AD_Q = pow_table_Q - pow_table_P, pow_AD_P
nop.f 999
nop.i 999
}
;;
{ .mfi
- ldfe POW_P5 = [pow_AD_P], 16
- fcmp.lt.s1 p8,p9 = f8, f0 // Test for x<0
- shl pow_GR_offset = pow_GR_sig_X, 1
+ ldfe POW_P5 = [pow_AD_P], 16
+ fcmp.lt.s1 p8,p9 = f8, f0 // Test for x<0
+ nop.i 999
}
{ .mib
- ldfe POW_P4 = [pow_AD_Q], 16
- sub pow_GR_true_exp_X = pow_GR_exp_X, pow_GR_16ones
-(p13) br.cond.spnt L(POW_X_DENORM)
+ ldfe POW_P4 = [pow_AD_Q], 16
+ sub pow_GR_true_exp_X = pow_GR_exp_X, pow_GR_16ones
+(p13) br.cond.spnt POW_X_DENORM
}
;;
-
// Continue normal and denormal paths here
-L(POW_COMMON):
+POW_COMMON:
// p11 = TRUE ==> Y is a NAN
{ .mfi
- ldfe POW_P3 = [pow_AD_P], 16
- fclass.m.unc p11,p0 = f9, 0xc3
- shr.u pow_GR_offset = pow_GR_offset,56
+ ldfe POW_P3 = [pow_AD_P], 16
+ fclass.m p11,p0 = f9, 0xc3
+ nop.i 999
}
{ .mfi
- ldfe POW_P2 = [pow_AD_Q], 16
+ ldfe POW_P2 = [pow_AD_Q], 16
nop.f 999
- nop.i 999
+ mov pow_GR_y_zero = 0
}
;;
-
-
-// Compute xsq to decide later if |x|=1
-// p11 = TRUE ==> Y is a NaN
+// Note POW_Xm1 and POW_r1 are used interchangably
{ .mfi
- setf.sig POW_int_K = pow_GR_true_exp_X
-(p15) fms.s1 POW_r = POW_B, POW_NORM_X,f1
- shladd pow_AD_Tt = pow_GR_offset, 4, pow_AD_Tt
+ alloc r32=ar.pfs,2,19,4,0
+ fms.s1 POW_r = POW_B, POW_NORM_X,f1
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p8) fnma.s1 POW_Xm1 = POW_Xp1,f1,f0
+ setf.sig POW_int_K = pow_GR_true_exp_X
+(p8) fnma.s1 POW_Xm1 = POW_Xp1,f1,f0
nop.i 999
}
;;
-
-
-// p12 = TRUE ==> X is ZERO and Y is ZERO
+// p12 = TRUE if Y is ZERO
+// Compute xsq to decide later if |x|=1
{ .mfi
- ldfe POW_P1 = [pow_AD_P], 16
-(p14) fclass.m.unc p12,p0 = f9, 0x07
- nop.i 999
+ ldfe POW_P1 = [pow_AD_P], 16
+ fclass.m p12,p0 = f9, 0x07
+ shl pow_GR_offset = pow_GR_sig_X, 1
}
{ .mfb
- ldfe POW_P0 = [pow_AD_Q], 16
+ ldfe POW_P0 = [pow_AD_Q], 16
fma.s1 POW_xsq = POW_NORM_X, POW_NORM_X, f0
-(p11) br.cond.spnt L(POW_Y_NAN)
+(p11) br.cond.spnt POW_Y_NAN // Branch if y=nan
}
;;
-
-.pred.rel "mutex",p8,p9
// Get exponent of |x|-1 to use in comparison to 2^-8
-{ .mmf
-(p8) getf.exp pow_GR_signexp_Xm1 = POW_Xp1
-(p9) getf.exp pow_GR_signexp_Xm1 = POW_Xm1
- fcvt.fx.s1 POW_int_Y = POW_NORM_Y
+{ .mfi
+ getf.exp pow_GR_signexp_Xm1 = POW_Xm1
+ fcvt.fx.s1 POW_int_Y = POW_NORM_Y
+ shr.u pow_GR_offset = pow_GR_offset,56
}
;;
-
// p11 = TRUE ==> X is a NAN
{ .mfi
ldfpd POW_log2_hi, POW_log2_lo = [pow_AD_Q], 16
- fclass.m.unc p11,p0 = f8, 0xc3
- nop.i 999
+ fclass.m p11,p0 = f8, 0xc3
+ shladd pow_AD_Tt = pow_GR_offset, 4, pow_AD_Tt
}
-{ .mib
- ldfpd POW_T, POW_Tt = [pow_AD_Tt], 16
- nop.i 999
-(p12) br.cond.spnt L(POW_X_0_Y_0)
+{ .mfi
+ ldfe POW_inv_log2_by_128 = [pow_AD_P], 16
+ fma.s1 POW_delta = f0,f0,f0 // delta=0 in case |x| near 1
+(p12) mov pow_GR_y_zero = 1
}
;;
-
-// p14 = TRUE ==> X is zero
-// p15 = TRUE ==> X is zero AND Y is negative
-// p10 = TRUE ==> X is zero AND Y is >= zero
{ .mfi
- ldfe POW_inv_log2_by_128 = [pow_AD_P], 16
-(p14) fcmp.lt.unc.s1 p15, p10 = f9,f0
- nop.i 999
+ ldfpd POW_Q2, POW_Q3 = [pow_AD_P], 16
+ fma.s1 POW_G = f0,f0,f0 // G=0 in case |x| near 1
+ and pow_GR_exp_Xm1 = pow_GR_signexp_Xm1, pow_GR_17ones
}
-{ .mfi
- nop.m 999
- nop.f 999
- and pow_GR_exp_Xm1 = pow_GR_signexp_Xm1, pow_GR_17ones
-}
;;
-
// Determine if we will use the |x| near 1 path (p6) or normal path (p7)
-// p12 = TRUE ==> X is a NAN and Y is a zero
-// p13 = TRUE ==> X is a NAN and Y is anything else
{ .mfi
- getf.exp pow_GR_signexp_Y = POW_NORM_Y
-(p11) fclass.m.unc p12,p13 = f9, 0x07
- cmp.lt.unc p6,p7 = pow_GR_exp_Xm1, pow_GR_FFF7
+ getf.exp pow_GR_signexp_Y = POW_NORM_Y
+ nop.f 999
+ cmp.lt p6,p7 = pow_GR_exp_Xm1, pow_GR_exp_2tom8
}
-{ .mfi
- ldfpd POW_Q2, POW_Q3 = [pow_AD_P], 16
- fma.s1 POW_rsq = POW_r, POW_r,f0
- nop.i 999
-;;
+{ .mfb
+ ldfpd POW_T, POW_Tt = [pow_AD_Tt], 16
+ fma.s1 POW_rsq = POW_r, POW_r,f0
+(p11) br.cond.spnt POW_X_NAN // Branch if x=nan and y not nan
}
+;;
// If on the x near 1 path, assign r1 to r and r1*r1 to rsq
{ .mfi
- ldfpd POW_Q0_half, POW_Q1 = [pow_AD_P], 16
-(p6) fma.s1 POW_r = POW_r1, f1, f0
- nop.i 999
-}
-{ .mfi
- nop.m 999
-(p6) fma.s1 POW_rsq = POW_r1, POW_r1, f0
+ ldfpd POW_Q0_half, POW_Q1 = [pow_AD_P], 16
+(p6) fma.s1 POW_r = POW_r1, f1, f0
nop.i 999
-;;
-}
-
-
-{ .mfi
- ldfpd POW_Q4, POW_RSHF = [pow_AD_P], 16
-(p7) fma.s1 POW_v6 = POW_r, POW_P5, POW_P4
- and pow_GR_exp_Y = pow_GR_signexp_Y, pow_GR_17ones
}
{ .mfb
nop.m 999
-(p6) fma.s1 POW_v6 = POW_r1, POW_P5, POW_P4
-(p12) br.cond.spnt L(POW_X_NAN_Y_0)
+(p6) fma.s1 POW_rsq = POW_r1, POW_r1, f0
+(p14) br.cond.spnt POW_X_0 // Branch if x zero and y not nan
}
;;
-
{ .mfi
- nop.m 999
-(p7) fma.s1 POW_v4 = POW_P3, POW_r, POW_P2
- andcm pow_GR_sign_Y = pow_GR_signexp_Y, pow_GR_17ones
+ ldfpd POW_Q4, POW_RSHF = [pow_AD_P], 16
+(p7) fma.s1 POW_v6 = POW_r, POW_P5, POW_P4
+ nop.i 999
}
-{ .mfb
+{ .mfi
nop.m 999
-(p6) fma.s1 POW_v4 = POW_P3, POW_r1, POW_P2
-(p12) br.cond.spnt L(POW_X_NAN_Y_0)
+(p6) fma.s1 POW_v6 = POW_r1, POW_P5, POW_P4
+ nop.i 999
}
;;
{ .mfi
nop.m 999
- fcvt.xf POW_K = POW_int_K
+(p7) fma.s1 POW_v4 = POW_P3, POW_r, POW_P2
nop.i 999
}
-{ .mfb
- nop.m 999
-(p13) fma.s f8 = f8,f1,f0
-(p13) br.ret.spnt b0 // Exit if x nan, y anything but zero
-}
-;;
-
-// p10 = TRUE ==> X is zero AND Y is positive
-// p8 = TRUE ==> X is zero AND Y is outside integer range (treat as even int)
-// return +0
-// p9 = TRUE ==> X is zero AND Y is within integer range (may not be integer)
{ .mfi
-(p10) cmp.gt.unc p8,p9 = pow_GR_exp_Y, pow_GR_10033
-(p6) fmerge.s POW_delta = f0,f0
+ nop.m 999
+(p6) fma.s1 POW_v4 = POW_P3, POW_r1, POW_P2
nop.i 999
}
+;;
+
{ .mfi
nop.m 999
-(p6) fma.s1 POW_G = f0,f0,f0
+ fcvt.xf POW_K = POW_int_K
nop.i 999
}
;;
{ .mfi
- getf.sig pow_GR_sig_int_Y = POW_int_Y
- fnma.s1 POW_twoV = POW_NORM_Y, POW_rsq,f0
- nop.i 999
+ getf.sig pow_GR_sig_int_Y = POW_int_Y
+ fnma.s1 POW_twoV = POW_NORM_Y, POW_rsq,f0
+ and pow_GR_exp_Y = pow_GR_signexp_Y, pow_GR_17ones
}
-{ .mfi
- nop.m 999
- fma.s1 POW_U = POW_NORM_Y,POW_r,f0
- nop.i 999
+{ .mfb
+ andcm pow_GR_sign_Y = pow_GR_signexp_Y, pow_GR_17ones
+ fma.s1 POW_U = POW_NORM_Y,POW_r,f0
+(p12) br.cond.spnt POW_Y_0 // Branch if y=zero, x not zero or nan
}
;;
+// p11 = TRUE ==> X is NEGATIVE but not inf
{ .mfi
- ldfe POW_log2_by_128_lo = [pow_AD_P], 16
-(p6) fma.s1 POW_v2 = POW_P1, POW_r1, POW_P0
+ ldfe POW_log2_by_128_lo = [pow_AD_P], 16
+ fclass.m p11,p0 = f8, 0x1a
nop.i 999
}
{ .mfi
- ldfe POW_log2_by_128_hi = [pow_AD_Q], 16
-(p7) fma.s1 POW_v2 = POW_P1, POW_r, POW_P0
+ ldfe POW_log2_by_128_hi = [pow_AD_Q], 16
+ fma.s1 POW_v2 = POW_P1, POW_r, POW_P0
nop.i 999
}
;;
-
{ .mfi
nop.m 999
- fcvt.xf POW_float_int_Y = POW_int_Y
+ fcvt.xf POW_float_int_Y = POW_int_Y
nop.i 999
}
{ .mfi
nop.m 999
- fma.s1 POW_v3 = POW_v6, POW_rsq, POW_v4
- adds pow_AD_tbl1 = pow_tbl1 - pow_Tt, pow_AD_Q
+ fma.s1 POW_v3 = POW_v6, POW_rsq, POW_v4
+ adds pow_AD_tbl1 = pow_tbl1 - pow_Tt, pow_AD_Q
}
;;
{ .mfi
nop.m 999
-(p7) fma.s1 POW_delta = POW_K, POW_log2_lo, POW_Tt
+(p7) fma.s1 POW_delta = POW_K, POW_log2_lo, POW_Tt
nop.i 999
}
{ .mfi
nop.m 999
-(p7) fma.s1 POW_G = POW_K, POW_log2_hi, POW_T
- adds pow_AD_tbl2 = pow_tbl2 - pow_tbl1, pow_AD_tbl1
+(p7) fma.s1 POW_G = POW_K, POW_log2_hi, POW_T
+ adds pow_AD_tbl2 = pow_tbl2 - pow_tbl1, pow_AD_tbl1
}
;;
-
{ .mfi
nop.m 999
- fms.s1 POW_e2 = POW_NORM_Y, POW_r, POW_U
+ fms.s1 POW_e2 = POW_NORM_Y, POW_r, POW_U
nop.i 999
}
{ .mfi
nop.m 999
- fma.s1 POW_Z2 = POW_twoV, POW_Q0_half, POW_U
+ fma.s1 POW_Z2 = POW_twoV, POW_Q0_half, POW_U
nop.i 999
}
;;
-// p11 = TRUE ==> X is NEGATIVE
-// p8 = TRUE ==> X is zero AND Y is outside intger range (treat as even int)
-// return +0
{ .mfi
nop.m 999
- fclass.m.unc p11,p0 = f8, 0x1a
- nop.i 999
-}
-{ .mfb
- nop.m 999
-(p8) fma.s f8 = f0,f0,f0
-(p8) br.ret.spnt b0
-}
-;;
-
-{ .mfi
- nop.m 999
- fma.s1 POW_Yrcub = POW_rsq, POW_U, f0
+ fma.s1 POW_Yrcub = POW_rsq, POW_U, f0
nop.i 999
}
-{ .mfi
+{ .mfi
nop.m 999
- fma.s1 POW_p = POW_rsq, POW_v3, POW_v2
+ fma.s1 POW_p = POW_rsq, POW_v3, POW_v2
nop.i 999
}
;;
-
-// p11 = TRUE ==> X is NEGATIVE
-// p12 = TRUE ==> X is NEGATIVE AND Y already int
+// p11 = TRUE ==> X is NEGATIVE but not inf
+// p12 = TRUE ==> X is NEGATIVE AND Y already even int
// p13 = TRUE ==> X is NEGATIVE AND Y possible int
{ .mfi
nop.m 999
- fma.s1 POW_Z1 = POW_NORM_Y, POW_G, f0
-(p11) cmp.ge.unc p12,p13 = pow_GR_exp_Y, pow_GR_10033
+ fma.s1 POW_Z1 = POW_NORM_Y, POW_G, f0
+(p11) cmp.gt.unc p12,p13 = pow_GR_exp_Y, pow_GR_10033
}
{ .mfi
nop.m 999
- fma.s1 POW_e3 = POW_NORM_Y, POW_delta, f0
- nop.i 999
-}
-;;
-
-// p9 = TRUE ==> X is zero AND Y is within integer range (may not be integer)
-// p6 = TRUE ==> X is zero AND Y is an integer (may be even or odd)
-// p7 = TRUE ==> X is zero AND Y is NOT an integer, return +0
-{ .mfi
- nop.m 999
-(p9) fcmp.eq.unc.s1 p6,p7 = POW_float_int_Y, POW_NORM_Y
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 POW_Gpr = POW_G, f1, POW_r
+ fma.s1 POW_Gpr = POW_G, f1, POW_r
nop.i 999
}
;;
@@ -1266,24 +1187,14 @@ L(POW_COMMON):
}
{ .mfi
nop.m 999
- fms.s1 POW_UmZ2 = POW_U, f1, POW_Z2
+ fms.s1 POW_UmZ2 = POW_U, f1, POW_Z2
nop.i 999
}
;;
-
-// If x=0 and y>0, test y and flag denormal
-// p6 = TRUE ==> X is zero AND Y is an integer (may be even or odd)
-// p8 = TRUE ==> X is zero AND Y is an odd integer
-// p9 = TRUE ==> X is zero AND Y is an even integer
-{ .mfi
- nop.m 999
-(p10) fcmp.eq.s0 p15,p0 = f9,f0
-(p6) tbit.nz.unc p8,p9 = pow_GR_sig_int_Y,0
-}
{ .mfi
nop.m 999
- fma.s1 POW_Z3 = POW_p, POW_Yrcub, f0
+ fma.s1 POW_Z3 = POW_p, POW_Yrcub, f0
nop.i 999
}
;;
@@ -1291,7 +1202,7 @@ L(POW_COMMON):
// By adding RSHF (1.1000...*2^63) we put integer part in rightmost significand
{ .mfi
nop.m 999
- fms.s1 POW_e1 = POW_NORM_Y, POW_G, POW_Z1
+ fms.s1 POW_e1 = POW_NORM_Y, POW_G, POW_Z1
nop.i 999
}
{ .mfi
@@ -1301,81 +1212,60 @@ L(POW_COMMON):
}
;;
+// p13 = TRUE ==> X is NEGATIVE AND Y possible int
+// p10 = TRUE ==> X is NEG and Y is an int
+// p12 = TRUE ==> X is NEG and Y is not an int
{ .mfi
nop.m 999
-(p7) fma.s f8 = f0,f0,f0 // Result +0 if x zero and y not integer
- nop.i 999
+(p13) fcmp.eq.unc.s1 p10,p12 = POW_float_int_Y, POW_NORM_Y
+ mov pow_GR_xneg_yodd = 0
}
-{ .mfb
+{ .mfi
nop.m 999
- fma.s1 POW_Y_Gpr = POW_NORM_Y, POW_Gpr, f0
-(p8) br.ret.spnt b0 // Exit if x zero and y odd integer
+ fma.s1 POW_Y_Gpr = POW_NORM_Y, POW_Gpr, f0
+ nop.i 999
}
;;
// By subtracting RSHF we get rounded integer POW_N2float
-// p15 = TRUE ==> X_0_Y_NEG
{ .mfi
nop.m 999
fms.s1 POW_N2float = POW_W2, f1, POW_RSHF
nop.i 999
}
-{ .mfb
+{ .mfi
nop.m 999
- fma.s1 POW_UmZ2pV = POW_twoV,POW_Q0_half,POW_UmZ2
-(p15) br.cond.spnt L(POW_X_0_Y_NEG)
+ fma.s1 POW_UmZ2pV = POW_twoV,POW_Q0_half,POW_UmZ2
+ nop.i 999
}
;;
-
-
{ .mfi
nop.m 999
- fma.s1 POW_Z3sq = POW_Z3, POW_Z3, f0
+ fma.s1 POW_Z3sq = POW_Z3, POW_Z3, f0
nop.i 999
}
-{ .mfb
+{ .mfi
nop.m 999
- fma.s1 POW_v4 = POW_Z3, POW_Q3, POW_Q2
-(p7) br.ret.spnt b0 // Exit if x zero and y not an integer
+ fma.s1 POW_v4 = POW_Z3, POW_Q3, POW_Q2
+ nop.i 999
}
;;
-
-
// Extract rounded integer from rightmost significand of POW_W2
// By subtracting RSHF we get rounded integer POW_N1float
{ .mfi
- getf.sig pow_GR_int_W2 = POW_W2
+ getf.sig pow_GR_int_W2 = POW_W2
fms.s1 POW_N1float = POW_W1, f1, POW_RSHF
nop.i 999
}
{ .mfi
nop.m 999
- fma.s1 POW_v2 = POW_Z3, POW_Q1, POW_Q0_half
- nop.i 999
-}
-;;
-
-
-
-
-// p13 = TRUE ==> X is NEGATIVE AND Y possible int
-// p10 = TRUE ==> X is NEG and Y is an int
-// p12 = TRUE ==> X is NEG and Y is not an int
-{ .mfi
- nop.m 999
-(p13) fcmp.eq.unc.s1 p10,p12 = POW_float_int_Y, POW_NORM_Y
+ fma.s1 POW_v2 = POW_Z3, POW_Q1, POW_Q0_half
nop.i 999
}
-{ .mfb
- nop.m 999
-(p9) fma.s f8 = f0,f0,f0 // Result +0 if x zero and y even integer
-(p9) br.ret.spnt b0 // Exit if x zero and y even integer
-}
;;
-
{ .mfi
nop.m 999
fnma.s1 POW_s2 = POW_N2float, POW_log2_by_128_hi, POW_Z2
@@ -1383,7 +1273,7 @@ L(POW_COMMON):
}
{ .mfi
nop.m 999
- fma.s1 POW_e2 = POW_e2,f1,POW_UmZ2pV
+ fma.s1 POW_e2 = POW_e2,f1,POW_UmZ2pV
nop.i 999
}
;;
@@ -1391,278 +1281,250 @@ L(POW_COMMON):
// Extract rounded integer from rightmost significand of POW_W1
// Test if x inf
{ .mfi
- getf.sig pow_GR_int_W1 = POW_W1
- fclass.m.unc p15,p0 = POW_NORM_X, 0x23
+ getf.sig pow_GR_int_W1 = POW_W1
+ fclass.m p15,p0 = POW_NORM_X, 0x23
nop.i 999
}
{ .mfb
nop.m 999
fnma.s1 POW_f2 = POW_N2float, POW_log2_by_128_lo, f1
-(p12) br.cond.spnt L(POW_X_NEG_Y_NONINT) // Branch if x neg, y not integer
+(p12) br.cond.spnt POW_X_NEG_Y_NONINT // Branch if x neg, y not integer
}
;;
+// p11 = TRUE ==> X is +1.0
// p12 = TRUE ==> X is NEGATIVE AND Y is an odd integer
{ .mfi
- getf.exp pow_GR_signexp_Y_Gpr = POW_Y_Gpr
- fma.s1 POW_v3 = POW_Z3sq, POW_Q4, POW_v4
-(p10) tbit.nz.unc p12,p0 = pow_GR_sig_int_Y,0
+ getf.exp pow_GR_signexp_Y_Gpr = POW_Y_Gpr
+ fcmp.eq.s1 p11,p0 = POW_NORM_X, f1
+(p10) tbit.nz.unc p12,p0 = pow_GR_sig_int_Y,0
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_v3 = POW_Z3sq, POW_Q4, POW_v4
+ nop.i 999
}
;;
-
{ .mfi
- add pow_GR_int_N = pow_GR_int_W1, pow_GR_int_W2
+ nop.m 999
fnma.s1 POW_f1 = POW_N1float, POW_log2_by_128_lo, f1
nop.i 999
}
{ .mfb
nop.m 999
fnma.s1 POW_s1 = POW_N1float, POW_log2_by_128_hi, POW_Z1
-(p15) br.cond.spnt L(POW_X_INF)
+(p15) br.cond.spnt POW_X_INF
}
;;
-
// Test x and y and flag denormal
{ .mfi
- and pow_GR_index1 = 0x0f, pow_GR_int_N
+ nop.m 999
fcmp.eq.s0 p15,p0 = f8,f9
- shr r2 = pow_GR_int_N, 7
+ nop.i 999
}
{ .mfi
- and pow_GR_exp_Y_Gpr = pow_GR_signexp_Y_Gpr, pow_GR_17ones
- nop.f 999
- and pow_GR_index2 = 0x70, pow_GR_int_N
+ nop.m 999
+ fma.s1 POW_e3 = POW_NORM_Y, POW_delta, f0
+ nop.i 999
}
;;
-
-
{ .mfi
- shladd pow_AD_T1 = pow_GR_index1, 4, pow_AD_tbl1
+ nop.m 999
fcmp.eq.s1 p7,p0 = POW_NORM_Y, f1 // Test for y=1.0
- sub pow_GR_true_exp_Y_Gpr = pow_GR_exp_Y_Gpr, pow_GR_16ones
+ nop.i 999
}
{ .mfi
- addl pow_int_GR_M = 0xFFFF, r2
- fma.s1 POW_e12 = POW_e1,f1,POW_e2
- add pow_AD_T2 = pow_AD_tbl2, pow_GR_index2
+ nop.m 999
+ fma.s1 POW_e12 = POW_e1,f1,POW_e2
+ nop.i 999
}
;;
-
-{ .mmi
- ldfe POW_T1 = [pow_AD_T1],16
- setf.exp POW_2M = pow_int_GR_M
- andcm pow_GR_sign_Y_Gpr = pow_GR_signexp_Y_Gpr, pow_GR_17ones
+{ .mfi
+ add pow_GR_int_N = pow_GR_int_W1, pow_GR_int_W2
+(p11) fma.s.s0 f8 = f1,f1,f0 // If x=1, result is +1
+ nop.i 999
+}
+{ .mib
+(p12) mov pow_GR_xneg_yodd = 1
+ nop.i 999
+(p11) br.ret.spnt b0 // Early exit if x=1.0, result is +1
}
;;
-
-{ .mfb
- ldfe POW_T2 = [pow_AD_T2],16
- fma.s1 POW_q = POW_Z3sq, POW_v3, POW_v2
+{ .mfi
+ and pow_GR_index1 = 0x0f, pow_GR_int_N
+ fma.s1 POW_q = POW_Z3sq, POW_v3, POW_v2
+ shr pow_int_GR_M = pow_GR_int_N, 7 // M = N/128
+}
+{ .mib
+ and pow_GR_index2 = 0x70, pow_GR_int_N
+ nop.i 999
(p7) br.ret.spnt b0 // Early exit if y=1.0, result is x
}
;;
-
-// double: p8 TRUE ==> |Y(G + r)| >= 10
-// single: p8 TRUE ==> |Y(G + r)| >= 7
-
-// double
-// -2^10 -2^9 2^9 2^10
-// -----+-----+----+ ... +-----+-----+-----
-// p8 | p9 | p8
-// | | p10 | |
-// single
-// -2^7 -2^6 2^6 2^7
-// -----+-----+----+ ... +-----+-----+-----
-// p8 | p9 | p8
-// | | p10 | |
-
-
{ .mfi
-(p0) cmp.le.unc p8,p9 = 7, pow_GR_true_exp_Y_Gpr
- fma.s1 POW_s = POW_s1, f1, POW_s2
- nop.i 999
+ shladd pow_AD_T1 = pow_GR_index1, 4, pow_AD_tbl1
+ fma.s1 POW_s = POW_s1, f1, POW_s2
+ add pow_int_GR_M = pow_GR_16ones, pow_int_GR_M
}
{ .mfi
- nop.m 999
- fma.s1 POW_f12 = POW_f1, POW_f2,f0
+ add pow_AD_T2 = pow_AD_tbl2, pow_GR_index2
+ fma.s1 POW_f12 = POW_f1, POW_f2,f0
nop.i 999
}
;;
-
-{ .mfi
+{ .mmf
+ ldfe POW_T1 = [pow_AD_T1]
+ ldfe POW_T2 = [pow_AD_T2]
nop.f 999
-(p9) cmp.le.unc p0,p10 = 6, pow_GR_true_exp_Y_Gpr
}
;;
-
-
-{ .mfb
- nop.m 999
- fma.s1 POW_e123 = POW_e12, f1, POW_e3
-(p8) br.cond.spnt L(POW_OVER_UNDER_X_NOT_INF)
+{ .mfi
+ setf.exp POW_2M = pow_int_GR_M
+ fma.s1 POW_e123 = POW_e12, f1, POW_e3
+ and pow_GR_exp_Y_Gpr = pow_GR_signexp_Y_Gpr, pow_GR_17ones
}
;;
-
-{ .mmf
- fma.s1 POW_q = POW_Z3sq, POW_q, POW_Z3
+{ .mfi
+ nop.m 999
+ fma.s1 POW_q = POW_Z3sq, POW_q, POW_Z3
+ sub pow_GR_true_exp_Y_Gpr = pow_GR_exp_Y_Gpr, pow_GR_16ones
}
;;
+// p8 TRUE ==> |Y(G + r)| >= 7
+// single
+// -2^7 -2^6 2^6 2^7
+// -----+-----+----+ ... +-----+-----+-----
+// p8 | p9 | p8
+// | | p10 | |
+
+// Form signexp of constants to indicate overflow
{ .mfi
- nop.m 999
- fma.s1 POW_ssq = POW_s, POW_s, f0
- nop.i 999
+ mov pow_GR_big_pos = 0x1007f
+ fma.s1 POW_ssq = POW_s, POW_s, f0
+ cmp.le p8,p9 = 7, pow_GR_true_exp_Y_Gpr
}
{ .mfi
- nop.m 999
- fma.s1 POW_v4 = POW_s, POW_Q3, POW_Q2
- nop.i 999
+ mov pow_GR_big_neg = 0x3007f
+ fma.s1 POW_v4 = POW_s, POW_Q3, POW_Q2
+ andcm pow_GR_sign_Y_Gpr = pow_GR_signexp_Y_Gpr, pow_GR_17ones
}
;;
+// Form big positive and negative constants to test for possible overflow
{ .mfi
- nop.m 999
- fma.s1 POW_v2 = POW_s, POW_Q1, POW_Q0_half
- nop.i 999
+ setf.exp POW_big_pos = pow_GR_big_pos
+ fma.s1 POW_v2 = POW_s, POW_Q1, POW_Q0_half
+(p9) cmp.le.unc p0,p10 = 6, pow_GR_true_exp_Y_Gpr
}
-{ .mfi
- nop.m 999
- fma.s1 POW_1ps = f1,f1,POW_s
- nop.i 999
+{ .mfb
+ setf.exp POW_big_neg = pow_GR_big_neg
+ fma.s1 POW_1ps = f1,f1,POW_s
+(p8) br.cond.spnt POW_OVER_UNDER_X_NOT_INF
}
;;
+// f123 = f12*(e123+1) = f12*e123+f12
{ .mfi
nop.m 999
- fma.s1 POW_f3 = POW_e123,f1,f1
+ fma.s1 POW_f123 = POW_e123,POW_f12,POW_f12
nop.i 999
}
;;
{ .mfi
nop.m 999
- fma.s1 POW_T1T2 = POW_T1, POW_T2, f0
+ fma.s1 POW_T1T2 = POW_T1, POW_T2, f0
nop.i 999
}
-;;
-
{ .mfi
nop.m 999
- fma.s1 POW_v3 = POW_ssq, POW_Q4, POW_v4
- nop.i 999
+ fma.s1 POW_v3 = POW_ssq, POW_Q4, POW_v4
+ cmp.ne p12,p13 = pow_GR_xneg_yodd, r0
}
;;
{ .mfi
nop.m 999
- fma.s1 POW_v21ps = POW_ssq, POW_v2, POW_1ps
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 POW_s4 = POW_ssq, POW_ssq, f0
+ fma.s1 POW_2Mqp1 = POW_2M, POW_q, POW_2M
nop.i 999
}
;;
{ .mfi
nop.m 999
- fma.s1 POW_f123 = POW_f12, POW_f3, f0
+ fma.s1 POW_v21ps = POW_ssq, POW_v2, POW_1ps
nop.i 999
}
-;;
-
{ .mfi
nop.m 999
- fma.s1 POW_A = POW_2M, POW_T1T2, f0
+ fma.s1 POW_s4 = POW_ssq, POW_ssq, f0
nop.i 999
}
;;
-
-
{ .mfi
nop.m 999
-(p12) fmerge.s POW_f123 = f8,POW_f123 // if x neg, y odd int
+(p12) fnma.s1 POW_A = POW_T1T2, POW_f123, f0
nop.i 999
}
{ .mfi
nop.m 999
-// fma.s1 POW_es = POW_ssq, POW_v3, POW_v2
+(p13) fma.s1 POW_A = POW_T1T2, POW_f123, f0
nop.i 999
}
;;
{ .mfi
nop.m 999
- fma.s1 POW_es = POW_s4, POW_v3, POW_v21ps
+ fma.s1 POW_es = POW_s4, POW_v3, POW_v21ps
nop.i 999
}
-;;
-
-
{ .mfi
nop.m 999
- fma.s1 POW_A = POW_A, POW_f123, f0
- nop.i 999
-}
-{ .mfi
- nop.m 999
-// fma.s1 POW_es = POW_es, POW_ssq, POW_1ps
+ fma.s1 POW_A = POW_A, POW_2Mqp1, f0
nop.i 999
}
;;
-
+// Dummy op to set inexact
{ .mfi
nop.m 999
- fma.s1 POW_A = POW_A, POW_es,f0
+ fma.s0 POW_tmp = POW_2M, POW_q, POW_2M
nop.i 999
}
;;
-
-
{ .mfb
nop.m 999
-(p10) fma.s f8 = POW_A, POW_q, POW_A
-(p10) br.ret.sptk b0
+ fma.s.s0 f8 = POW_A, POW_es, f0
+(p10) br.ret.sptk b0 // Exit main branch if no over/underflow
}
;;
-
-
-
-
// POSSIBLE_OVER_UNDER
-// p6 = TRUE ==> Y negative
+// p6 = TRUE ==> Y_Gpr negative
+// Result is already computed. We just need to know if over/underflow occurred.
-{ .mfi
- nop.m 999
- fmerge.s POW_abs_A = f0, POW_A
- cmp.eq.unc p0,p6 = pow_GR_sign_Y, r0
-}
-;;
-
-{ .mib
- nop.m 999
- nop.i 999
-(p6) br.cond.spnt L(POW_POSSIBLE_UNDER)
+{ .mfb
+ cmp.eq p0,p6 = pow_GR_sign_Y_Gpr, r0
+ nop.f 999
+(p6) br.cond.spnt POW_POSSIBLE_UNDER
}
;;
// POSSIBLE_OVER
-// We got an answer.
+// We got an answer.
// overflow is a possibility, not a certainty
@@ -1692,21 +1554,20 @@ L(POW_COMMON):
// RN RN
// RZ
-
// Put in s2 (td set, wre set)
{ .mfi
- mov pow_GR_gt_ln = 0x1007f
+ nop.m 999
fsetc.s2 0x7F,0x42
- nop.i 999
+ nop.i 999
}
;;
-
{ .mfi
- setf.exp POW_gt_pln = pow_GR_gt_ln
- fma.s.s2 POW_wre_urm_f8 = POW_abs_A, POW_q, POW_abs_A
- nop.i 999 ;;
+ nop.m 999
+ fma.s.s2 POW_wre_urm_f8 = POW_A, POW_es, f0
+ nop.i 999
}
+;;
// Return s2 to default
{ .mfi
@@ -1716,31 +1577,30 @@ L(POW_COMMON):
}
;;
-
// p7 = TRUE ==> yes, we have an overflow
{ .mfi
nop.m 999
- fcmp.ge.unc.s1 p7, p0 = POW_wre_urm_f8, POW_gt_pln
+ fcmp.ge.s1 p7, p8 = POW_wre_urm_f8, POW_big_pos
nop.i 999
}
;;
-
-
-{ .mfb
-(p7) mov pow_GR_tag = 30
- fma.s f8 = POW_A, POW_q, POW_A
-(p7) br.cond.spnt __libm_error_region
+{ .mfi
+ nop.m 999
+(p8) fcmp.le.s1 p7, p0 = POW_wre_urm_f8, POW_big_neg
+ nop.i 999
}
-{ .mfb
- nop.m 999
- nop.f 999
-(p0) br.ret.sptk b0
+;;
+
+{ .mbb
+(p7) mov pow_GR_tag = 30
+(p7) br.cond.spnt __libm_error_region // Branch if overflow
+ br.ret.sptk b0 // Exit if did not overflow
}
;;
-L(POW_POSSIBLE_UNDER):
+POW_POSSIBLE_UNDER:
// We got an answer. input was < -2^9 but > -2^10 (double)
// We got an answer. input was < -2^6 but > -2^7 (float)
// underflow is a possibility, not a certainty
@@ -1763,124 +1623,250 @@ L(POW_POSSIBLE_UNDER):
// 0.1...11 2^-3ffe (biased, 1)
// largest dn smallest normal
-
// Put in s2 (td set, ftz set)
{ .mfi
nop.m 999
fsetc.s2 0x7F,0x41
- nop.i 999
+ nop.i 999
}
;;
-
-
{ .mfi
nop.m 999
- fma.s.s2 POW_ftz_urm_f8 = POW_A, POW_q, POW_A
+ fma.s.s2 POW_ftz_urm_f8 = POW_A, POW_es, f0
nop.i 999
}
;;
-
// Return s2 to default
{ .mfi
nop.m 999
fsetc.s2 0x7F,0x40
- nop.i 999
+ nop.i 999
}
;;
-
// p7 = TRUE ==> yes, we have an underflow
{ .mfi
nop.m 999
- fcmp.eq.unc.s1 p7, p0 = POW_ftz_urm_f8, f0
- nop.i 999
+ fcmp.eq.s1 p7, p0 = POW_ftz_urm_f8, f0
+ nop.i 999
}
;;
+{ .mbb
+(p7) mov pow_GR_tag = 31
+(p7) br.cond.spnt __libm_error_region // Branch if underflow
+ br.ret.sptk b0 // Exit if did not underflow
+}
+;;
+POW_X_DENORM:
+// Here if x unorm. Use the NORM_X for getf instructions, and then back
+// to normal path
+{ .mfi
+ getf.exp pow_GR_signexp_X = POW_NORM_X
+ nop.f 999
+ nop.i 999
+}
+;;
+{ .mmi
+ getf.sig pow_GR_sig_X = POW_NORM_X
+;;
+ and pow_GR_exp_X = pow_GR_signexp_X, pow_GR_17ones
+ nop.i 999
+}
+;;
+
+{ .mib
+ sub pow_GR_true_exp_X = pow_GR_exp_X, pow_GR_16ones
+ nop.i 999
+ br.cond.sptk POW_COMMON
+}
+;;
+
+POW_X_0:
+// Here if x=0 and y not nan
+//
+// We have the following cases:
+// p6 x=0 and y>0 and is an integer (may be even or odd)
+// p7 x=0 and y>0 and is NOT an integer, return +0
+// p8 x=0 and y>0 and so big as to always be an even integer, return +0
+// p9 x=0 and y>0 and may not be integer
+// p10 x=0 and y>0 and is an odd integer, return x
+// p11 x=0 and y>0 and is an even integer, return +0
+// p12 used in dummy fcmp to set denormal flag if y=unorm
+// p13 x=0 and y>0
+// p14 x=0 and y=0, branch to code for calling error handling
+// p15 x=0 and y<0, branch to code for calling error handling
+//
+{ .mfi
+ getf.sig pow_GR_sig_int_Y = POW_int_Y // Get signif of int_Y
+ fcmp.lt.s1 p15,p13 = f9, f0 // Test for y<0
+ and pow_GR_exp_Y = pow_GR_signexp_Y, pow_GR_17ones
+}
+{ .mfb
+ cmp.ne p14,p0 = pow_GR_y_zero,r0 // Test for y=0
+ fcvt.xf POW_float_int_Y = POW_int_Y
+(p14) br.cond.spnt POW_X_0_Y_0 // Branch if x=0 and y=0
+}
+;;
+// If x=0 and y>0, test y and flag denormal
{ .mfb
-(p7) mov pow_GR_tag = 31
- fma.s f8 = POW_A, POW_q, POW_A
-(p7) br.cond.spnt __libm_error_region
+(p13) cmp.gt.unc p8,p9 = pow_GR_exp_Y, pow_GR_10033 // Test y +big = even int
+(p13) fcmp.eq.s0 p12,p0 = f9,f0 // If x=0, y>0 dummy op to flag denormal
+(p15) br.cond.spnt POW_X_0_Y_NEG // Branch if x=0 and y<0
}
;;
+// Here if x=0 and y>0
+{ .mfi
+ nop.m 999
+(p9) fcmp.eq.unc.s1 p6,p7 = POW_float_int_Y, POW_NORM_Y // Test y=int
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s.s0 f8 = f0,f0,f0 // If x=0, y>0 and large even int, return +0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p7) fma.s.s0 f8 = f0,f0,f0 // Result +0 if x=0 and y>0 and not integer
+(p6) tbit.nz.unc p10,p11 = pow_GR_sig_int_Y,0 // If y>0 int, test y even/odd
+}
+;;
+// Note if x=0, y>0 and odd integer, just return x
{ .mfb
nop.m 999
- nop.f 999
- br.ret.sptk b0
+(p11) fma.s.s0 f8 = f0,f0,f0 // Result +0 if x=0 and y even integer
+ br.ret.sptk b0 // Exit if x=0 and y>0
}
;;
+POW_X_0_Y_0:
+// When X is +-0 and Y is +-0, IEEE returns 1.0
+// We call error support with this value
-L(POW_X_DENORM):
-// Here if x unorm. Use the NORM_X for getf instructions, and the back
-// to normal path
-{ .mfi
- getf.exp pow_GR_signexp_X = POW_NORM_X
- nop.f 999
- nop.i 999
+{ .mfb
+ mov pow_GR_tag = 32
+ fma.s.s0 f8 = f1,f1,f0
+ br.cond.sptk __libm_error_region
}
;;
+POW_X_0_Y_NEG:
+// When X is +-0 and Y is negative, IEEE returns
+// X Y answer
+// +0 -odd int +inf
+// -0 -odd int -inf
+
+// +0 !-odd int +inf
+// -0 !-odd int +inf
+
+// p6 == Y is a floating point number outside the integer.
+// Hence it is an integer and is even.
+// return +inf
+
+// p7 == Y is a floating point number within the integer range.
+// p9 == (int_Y = NORM_Y), Y is an integer, which may be odd or even.
+// p11 odd
+// return (sign_of_x)inf
+// p12 even
+// return +inf
+// p10 == Y is not an integer
+// return +inf
+//
+
{ .mfi
- getf.sig pow_GR_sig_X = POW_NORM_X
- nop.f 999
- nop.i 999
+ nop.m 999
+ nop.f 999
+ cmp.gt p6,p7 = pow_GR_exp_Y, pow_GR_10033
}
;;
{ .mfi
- and pow_GR_exp_X = pow_GR_signexp_X, pow_GR_17ones
- nop.f 999
+ mov pow_GR_tag = 33
+(p7) fcmp.eq.unc.s1 p9,p10 = POW_float_int_Y, POW_NORM_Y
+ nop.i 999
}
;;
-{ .mib
- sub pow_GR_true_exp_X = pow_GR_exp_X, pow_GR_16ones
- shl pow_GR_offset = pow_GR_sig_X, 1
- br.cond.sptk L(POW_COMMON)
+{ .mfb
+ nop.m 999
+(p6) frcpa.s0 f8,p13 = f1, f0
+(p6) br.cond.sptk __libm_error_region // x=0, y<0, y large neg int
}
;;
+{ .mfb
+ nop.m 999
+(p10) frcpa.s0 f8,p13 = f1, f0
+(p10) br.cond.sptk __libm_error_region // x=0, y<0, y not int
+}
+;;
-L(POW_X_0_Y_0):
-// When X is +-0 and Y is +-0, IEEE returns 1.0
-// We call error support with this value
+// x=0, y<0, y an int
+{ .mib
+ nop.m 999
+(p9) tbit.nz.unc p11,p12 = pow_GR_sig_int_Y,0
+ nop.b 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p12) frcpa.s0 f8,p13 = f1,f0
+ nop.i 999
+}
+;;
{ .mfb
- mov pow_GR_tag = 32
- fma.s f8 = f1,f1,f0
- br.cond.sptk __libm_error_region
+ nop.m 999
+(p11) frcpa.s0 f8,p13 = f1,f8
+ br.cond.sptk __libm_error_region
}
;;
+POW_Y_0:
+// Here for y zero, x anything but zero and nan
+// Set flag if x denormal
+// Result is +1.0
+{ .mfi
+ nop.m 999
+ fcmp.eq.s0 p6,p0 = f8,f0 // Sets flag if x denormal
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fma.s.s0 f8 = f1,f1,f0
+ br.ret.sptk b0
+}
+;;
-L(POW_X_INF):
-// When X is +-inf and Y is +-, IEEE returns
+POW_X_INF:
+// Here when X is +-inf
-// overflow
-// X +inf Y +inf +inf
-// X -inf Y +inf +inf
+// X +inf Y +inf +inf
+// X -inf Y +inf +inf
-// X +inf Y >0 +inf
+// X +inf Y >0 +inf
// X -inf Y >0, !odd integer +inf <== (-inf)^0.5 = +inf !!
-// X -inf Y >0, odd integer -inf
+// X -inf Y >0, odd integer -inf
-// underflow
-// X +inf Y -inf +0
-// X -inf Y -inf +0
+// X +inf Y -inf +0
+// X -inf Y -inf +0
-// X +inf Y <0 +0
-// X -inf Y <0, !odd integer +0
-// X -inf Y <0, odd integer -0
+// X +inf Y <0 +0
+// X -inf Y <0, !odd integer +0
+// X -inf Y <0, odd integer -0
// X + inf Y=+0 +1
// X + inf Y=-0 +1
@@ -1892,32 +1878,30 @@ L(POW_X_INF):
// p6 == Y is a floating point number outside the integer.
// Hence it is an integer and is even.
-// p13 == (Y negative)
+// p13 == (Y negative)
// return +inf
// p14 == (Y positive)
// return +0
-
-
// p7 == Y is a floating point number within the integer range.
// p9 == (int_Y = NORM_Y), Y is an integer, which may be odd or even.
// p11 odd
-// p13 == (Y negative)
+// p13 == (Y negative)
// return (sign_of_x)inf
-// p14 == (Y positive)
+// p14 == (Y positive)
// return (sign_of_x)0
-// pxx even
-// p13 == (Y negative)
-// return +inf
+// pxx even
+// p13 == (Y negative)
+// return +inf
// p14 == (Y positive)
-// return +0
+// return +0
// pxx == Y is not an integer
-// p13 == (Y negative)
+// p13 == (Y negative)
// return +inf
// p14 == (Y positive)
// return +0
-//
+//
// If x=inf, test y and flag denormal
{ .mfi
@@ -1929,207 +1913,131 @@ L(POW_X_INF):
{ .mfi
nop.m 999
- fcmp.lt p13,p14 = POW_NORM_Y,f0
- cmp.gt.unc p6,p7 = pow_GR_exp_Y, pow_GR_10033
+ fcmp.lt.s0 p13,p14 = POW_NORM_Y,f0
+ cmp.gt p6,p7 = pow_GR_exp_Y, pow_GR_10033
}
{ .mfi
nop.m 999
- fclass.m p12,p0 = f9, 0x23
+ fclass.m p12,p0 = f9, 0x23 //@inf
nop.i 999
}
;;
-
{ .mfi
nop.m 999
- fclass.m p15,p0 = f9, 0x07 //@zero
+ fclass.m p15,p0 = f9, 0x07 //@zero
nop.i 999
}
;;
{ .mfb
nop.m 999
-(p15) fmerge.s f8 = f1,f1
-(p15) br.ret.spnt b0
+(p15) fmerge.s f8 = f1,f1 // Return +1.0 if x=inf, y=0
+(p15) br.ret.spnt b0 // Exit if x=inf, y=0
}
;;
-
{ .mfi
-(p13) mov pow_GR_tag = 31
-(p14) frcpa.s1 f8,p10 = f1,f0
+ nop.m 999
+(p14) frcpa.s1 f8,p10 = f1,f0 // If x=inf, y>0, assume result +inf
nop.i 999
}
{ .mfb
-(p14) mov pow_GR_tag = 30
-(p13) fma.s1 f8 = f0,f0,f0
-(p12) br.ret.spnt b0
-}
-;;
-
-
-
-{ .mfb
nop.m 999
-(p7) fcmp.eq.unc.s1 p9,p0 = POW_float_int_Y, POW_NORM_Y
- nop.b 999
+(p13) fma.s.s0 f8 = f0,f0,f0 // If x=inf, y<0, assume result +0.0
+(p12) br.ret.spnt b0 // Exit if x=inf, y=inf
}
;;
+// Here if x=inf, and 0 < |y| < inf. Need to correct results if y odd integer.
{ .mfi
nop.m 999
- nop.f 999
-(p9) tbit.nz.unc p11,p0 = pow_GR_sig_int_Y,0
-}
-;;
-
-{ .mfb
- nop.m 999
-(p11) fmerge.s f8 = POW_NORM_X,f8
- br.ret.sptk b0
+(p7) fcmp.eq.unc.s1 p9,p0 = POW_float_int_Y, POW_NORM_Y // Is y integer?
+ nop.i 999
}
;;
-
-
-L(POW_X_0_Y_NEG):
-// When X is +-0 and Y is negative, IEEE returns
-// X Y answer
-// +0 -odd int +inf
-// -0 -odd int -inf
-
-// +0 !-odd int +inf
-// -0 !-odd int +inf
-
-
-// p6 == Y is a floating point number outside the integer.
-// Hence it is an integer and is even.
-// return +inf
-
-// p7 == Y is a floating point number within the integer range.
-// p9 == (int_Y = NORM_Y), Y is an integer, which may be odd or even.
-// p11 odd
-// return (sign_of_x)inf
-// p12 even
-// return +inf
-// p10 == Y is not an integer
-// return +inf
-//
-//
-
{ .mfi
nop.m 999
nop.f 999
- cmp.gt.unc p6,p7 = pow_GR_exp_Y, pow_GR_10033
-}
-;;
-
-
-{ .mfi
- mov pow_GR_tag = 33
-(p7) fcmp.eq.unc.s1 p9,p10 = POW_float_int_Y, POW_NORM_Y
- nop.i 999
-}
-;;
-
-
-{ .mfb
- nop.m 999
-(p6) frcpa.s0 f8,p13 = f1, f0
-(p6) br.cond.sptk __libm_error_region
+(p9) tbit.nz.unc p11,p0 = pow_GR_sig_int_Y,0 // Test for y odd integer
}
;;
{ .mfb
nop.m 999
-(p10) frcpa.s0 f8,p13 = f1, f0
-(p10) br.cond.sptk __libm_error_region
+(p11) fmerge.s f8 = POW_NORM_X,f8 // If y odd integer use sign of x
+ br.ret.sptk b0 // Exit for x=inf, 0 < |y| < inf
}
;;
+POW_X_NEG_Y_NONINT:
+// When X is negative and Y is a non-integer, IEEE
+// returns a qnan indefinite.
+// We call error support with this value
-{ .mib
- nop.m 999
-(p9) tbit.nz.unc p11,p12 = pow_GR_sig_int_Y,0
- nop.b 999
+{ .mfb
+ mov pow_GR_tag = 34
+ frcpa.s0 f8,p6 = f0,f0
+ br.cond.sptk __libm_error_region
}
;;
-
-
+POW_X_NAN:
+// Here if x=nan, y not nan
{ .mfi
- nop.m 999
-(p12) frcpa.s0 f8,p13 = f1,f0
- nop.i 999
-}
-;;
-
-{ .mfb
- nop.m 999
-(p11) frcpa f8,p13 = f1,f8
- br.cond.sptk __libm_error_region
+ nop.m 999
+ fclass.m p9,p13 = f9, 0x07 // Test y=zero
+ nop.i 999
}
;;
-
-
-
-L(POW_X_NEG_Y_NONINT):
-// When X is negative and Y is a non-integer, IEEE
-// returns a qnan indefinite.
-// We call error support with this value
-
{ .mfb
- mov pow_GR_tag = 34
- frcpa f8,p6 = f0,f0
- br.cond.sptk __libm_error_region
+ nop.m 999
+(p13) fma.s.s0 f8 = f8,f1,f0
+(p13) br.ret.sptk b0 // Exit if x nan, y anything but zero or nan
}
;;
-
-
-
-L(POW_X_NAN_Y_0):
+POW_X_NAN_Y_0:
// When X is a NAN and Y is zero, IEEE returns 1.
// We call error support with this value.
-
{ .mfi
- nop.m 0
- fma.s.s0 f10 = f8,f1,f0
- nop.i 0
+ nop.m 999
+ fcmp.eq.s0 p6,p0 = f8,f0 // Dummy op to set invalid on snan
+ nop.i 999
}
{ .mfb
- mov pow_GR_tag = 35
- fma.s.s0 f8 = f0,f0,f1
+ mov pow_GR_tag = 35
+ fma.s.s0 f8 = f0,f0,f1
br.cond.sptk __libm_error_region
}
;;
-L(POW_OVER_UNDER_X_NOT_INF):
+POW_OVER_UNDER_X_NOT_INF:
// p8 is TRUE for overflow
// p9 is TRUE for underflow
// if y is infinity, we should not over/underflow
-
{ .mfi
nop.m 999
- fcmp.eq.unc.s1 p14, p13 = POW_xsq,f1
- cmp.eq.unc p8,p9 = pow_GR_sign_Y_Gpr, r0
+ fcmp.eq.s1 p14, p13 = POW_xsq,f1 // Test |x|=1
+ cmp.eq p8,p9 = pow_GR_sign_Y_Gpr, r0
}
;;
{ .mfi
nop.m 999
-(p14) fclass.m.unc p15, p0 = f9, 0x23
+(p14) fclass.m.unc p15, p0 = f9, 0x23 // If |x|=1, test y=inf
nop.i 999
}
{ .mfi
nop.m 999
-(p13) fclass.m.unc p11,p0 = f9, 0x23
+(p13) fclass.m.unc p11,p0 = f9, 0x23 // If |x| not 1, test y=inf
nop.i 999
}
;;
@@ -2137,31 +2045,33 @@ L(POW_OVER_UNDER_X_NOT_INF):
// p15 = TRUE if |x|=1, y=inf, return +1
{ .mfb
nop.m 999
-(p15) fma.s f8 = f1,f1,f0
-(p15) br.ret.spnt b0
+(p15) fma.s.s0 f8 = f1,f1,f0 // If |x|=1, y=inf, result +1
+(p15) br.ret.spnt b0 // Exit if |x|=1, y=inf
}
;;
.pred.rel "mutex",p8,p9
{ .mfb
-(p8) setf.exp f8 = pow_GR_17ones
-(p9) fmerge.s f8 = f0,f0
-(p11) br.ret.sptk b0
+(p8) setf.exp f8 = pow_GR_17ones // If exp(+big), result inf
+(p9) fmerge.s f8 = f0,f0 // If exp(-big), result 0
+(p11) br.ret.sptk b0 // Exit if |x| not 1, y=inf
}
+;;
{ .mfb
nop.m 999
nop.f 999
- br.cond.sptk L(POW_OVER_UNDER_ERROR)
+ br.cond.sptk POW_OVER_UNDER_ERROR // Branch if y not inf
}
;;
-L(POW_Y_NAN):
-// Is x = +1 then result is +1, else result is quiet Y
+POW_Y_NAN:
+// Here if y=nan, x anything
+// If x = +1 then result is +1, else result is quiet Y
{ .mfi
nop.m 999
- fcmp.eq.s1 p10,p9 = POW_NORM_X, f1
+ fcmp.eq.s1 p10,p9 = POW_NORM_X, f1
nop.i 999
}
;;
@@ -2175,148 +2085,117 @@ L(POW_Y_NAN):
{ .mfi
nop.m 999
-(p10) fma.s f8 = f1,f1,f0
+(p10) fma.s.s0 f8 = f1,f1,f0
nop.i 999
}
{ .mfb
nop.m 999
-(p9) fma.s f8 = f9,f8,f0
- br.ret.sptk b0
+(p9) fma.s.s0 f8 = f9,f8,f0
+ br.ret.sptk b0 // Exit y=nan
}
;;
-L(POW_OVER_UNDER_ERROR):
+POW_OVER_UNDER_ERROR:
+// Here if we have overflow or underflow.
+// Enter with p12 true if x negative and y odd int to force -0 or -inf
{ .mfi
- nop.m 999
- fmerge.s f10 = POW_NORM_X,POW_NORM_X
- nop.i 999
-}
-{ .mfi
- sub pow_GR_17ones_m1 = pow_GR_17ones, r0, 1
- nop.f 999
- mov pow_GR_one = 0x1
+ sub pow_GR_17ones_m1 = pow_GR_17ones, r0, 1
+ nop.f 999
+ mov pow_GR_one = 0x1
}
;;
-// overflow
+// overflow, force inf with O flag
{ .mmb
-(p8) mov pow_GR_tag = 30
-(p8) setf.exp f11 = pow_GR_17ones_m1
+(p8) mov pow_GR_tag = 30
+(p8) setf.exp POW_tmp = pow_GR_17ones_m1
nop.b 999
}
;;
-
-// underflow
+// underflow, force zero with I, U flags
{ .mmi
-(p9) mov pow_GR_tag = 31
-(p9) setf.exp f11 = pow_GR_one
+(p9) mov pow_GR_tag = 31
+(p9) setf.exp POW_tmp = pow_GR_one
nop.i 999
}
;;
-
-// p12 x is negative and y is an odd integer
-
-
{ .mfi
nop.m 999
- fma.s f8 = f11, f11, f0
+ fma.s.s0 f8 = POW_tmp, POW_tmp, f0
nop.i 999
}
;;
+// p12 x is negative and y is an odd integer, change sign of result
{ .mfi
nop.m 999
-(p12) fmerge.ns f8 = f8, f8
+(p12) fnma.s.s0 f8 = POW_tmp, POW_tmp, f0
nop.i 999
}
;;
+GLOBAL_LIBM_END(powf)
-.endp powf
-ASM_SIZE_DIRECTIVE(powf)
-
-
-// Stack operations when calling error support.
-// (1) (2) (3) (call) (4)
-// sp -> + psp -> + psp -> + sp -> +
-// | | | |
-// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
-// | | | |
-// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
-// | | | |
-// | | <- GR_X X1 ->| |
-// | | | |
-// sp-64 -> + sp -> + sp -> + +
-// save ar.pfs save b0 restore gp
-// save gp restore ar.pfs
-
-
+LOCAL_LIBM_ENTRY(__libm_error_region)
-.proc __libm_error_region
-__libm_error_region:
-
-// Answer is inf for overflow and 0 for underflow.
.prologue
-// (1)
{ .mfi
- add GR_Parameter_Y=-32,sp // Parameter 2 value
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
- add sp=-64,sp // Create new stack
+ add sp=-64,sp // Create new stack
nop.f 0
- mov GR_SAVE_GP=gp // Save gp
+ mov GR_SAVE_GP=gp // Save gp
};;
-
-// (2)
{ .mmi
stfs [GR_Parameter_Y] = POW_NORM_Y,16 // STORE Parameter 2 on stack
- add GR_Parameter_X = 16,sp // Parameter 1 address
+ add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0 // Save b0
+ mov GR_SAVE_B0=b0 // Save b0
};;
.body
-// (3)
{ .mib
- stfs [GR_Parameter_X] = POW_NORM_X // STORE Parameter 1 on stack
+ stfs [GR_Parameter_X] = POW_NORM_X // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
- nop.b 0
+ nop.b 0
}
{ .mib
- stfs [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack
+ stfs [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support# // Call error handling function
+ br.call.sptk b0=__libm_error_support# // Call error handling function
};;
+
{ .mmi
- nop.m 0
- nop.m 0
add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
};;
-// (4)
{ .mmi
- ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
- add sp = 64,sp // Restore stack pointer
- mov b0 = GR_SAVE_B0 // Restore return address
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
};;
+
{ .mib
- mov gp = GR_SAVE_GP // Restore gp
- mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
- br.ret.sptk b0 // Return
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#
+
diff --git a/sysdeps/ia64/fpu/e_powl.S b/sysdeps/ia64/fpu/e_powl.S
index d286e9abad..0896c19aac 100644
--- a/sysdeps/ia64/fpu/e_powl.S
+++ b/sysdeps/ia64/fpu/e_powl.S
@@ -1,10 +1,10 @@
.file "powl.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,61 +20,69 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
-// *********************************************************************
+//*********************************************************************
//
// Function: powl(x,y), where
-// y
+// y
// powl(x,y) = x , for double extended precision x and y values
//
-// *********************************************************************
+//*********************************************************************
//
-// History:
-// 2/02/00 (Hand Optimized)
-// 4/04/00 Unwind support added
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// History:
+// 02/02/00 (Hand Optimized)
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
-// 1/22/01 Corrected results for powl(1,inf), powl(1,nan), and
+// 01/22/01 Corrected results for powl(1,inf), powl(1,nan), and
// powl(snan,0) to be 1 per C99, not nan. Fixed many flag settings.
-// 2/06/01 Call __libm_error support if over/underflow when y=2.
+// 02/06/01 Call __libm_error support if over/underflow when y=2.
+// 04/17/01 Support added for y close to 1 and x a non-special value.
+// Shared software under/overflow detection for all paths
+// 02/07/02 Corrected sf3 setting to disable traps
+// 05/13/02 Improved performance of all paths
+// 02/10/03 Reordered header: .section, .global, .proc, .align;
+// used data8 for long double table values
+// 04/17/03 Added missing mutex directive
//
-// *********************************************************************
+//*********************************************************************
//
// Resources Used:
//
-// Floating-Point Registers:
-// f8 (Input and Return Value)
-// f9-f15,f32-f63,f99
+// Floating-Point Registers:
+// f8 (Input x and Return Value)
+// f9 (Input y)
+// f10-f15,f32-f79
//
// General Purpose Registers:
-// Locals r32 - r61
+// Locals r14-24,r32-r65
// Parameters to __libm_error_support r62,r63,r64,r65
//
// Predicate Registers: p6-p15
//
-// *********************************************************************
+//*********************************************************************
//
// Special Cases and IEEE special conditions:
//
// Denormal fault raised on denormal inputs
-// Overflow exceptions raised when appropriate for pow
-// Underflow exceptions raised when appropriate for pow
+// Overflow exceptions raised when appropriate for pow
+// Underflow exceptions raised when appropriate for pow
// (Error Handling Routine called for overflow and Underflow)
// Inexact raised when appropriate by algorithm
//
@@ -102,8 +110,8 @@
// 22. X or Y denorm/unorm and denorm/unorm operand trap is enabled,
// generate denorm/unorm fault except if invalid or div_0 raised.
//
-// *********************************************************************
-//
+//*********************************************************************
+//
// Algorithm
// =========
//
@@ -113,23 +121,23 @@
// If Y = 0.5, return sqrt(X).
//
// Compute log(X) to extra precision.
-//
+//
// ker_log_80( X, logX_hi, logX_lo, Safe );
//
-// ...logX_hi + logX_lo approximates log(X) to roughly 80
+// ...logX_hi + logX_lo approximates log(X) to roughly 80
// ...significant bits of accuracy.
//
// Compute Y*log(X) to extra precision.
//
// P_hi := Y * logX_hi
-// P_lo := Y * logX_hi - P_hi ...using FMA
-// P_lo := Y * logX_lo + P_lo ...using FMA
+// P_lo := Y * logX_hi - P_hi ...using FMA
+// P_lo := Y * logX_lo + P_lo ...using FMA
//
// Compute exp(P_hi + P_lo)
//
-// Flag := 2;
+// Flag := 2;
// Expo_Range := 2; (assuming double-extended power function)
-// ker_exp_64( P_hi, P_lo, Flag, Expo_Range,
+// ker_exp_64( P_hi, P_lo, Flag, Expo_Range,
// Z_hi, Z_lo, scale, Safe )
//
// scale := sgn * scale
@@ -138,7 +146,7 @@
// return scale*Z_hi + (scale*Z_lo)
// quickly
// Else
-// take necessary precaution in computing
+// take necessary precaution in computing
// scale*Z_hi + (scale*Z_lo)
// to set possible exceptions correctly.
// End If
@@ -152,8 +160,8 @@
// If Y is qNaN, return Y without exception.
// If X is qNaN, return X without exception.
//
-// At this point, X is real and Y is +-inf.
-// Thus |X| can only be 1, strictly bigger than 1, or
+// At this point, X is real and Y is +-inf.
+// Thus |X| can only be 1, strictly bigger than 1, or
// strictly less than 1.
//
// If |X| < 1, then
@@ -169,8 +177,8 @@
// ...Note that Y is real, finite, non-zero, and not +1.
//
// If X is qNaN, return X without exception.
-//
-// If X is +-0,
+//
+// If X is +-0,
// return ( Y > 0 ? +0 : +inf )
//
// If X is +inf
@@ -180,11 +188,11 @@
// return -0 ** -Y
// return ( Y > 0 ? +inf : +0 )
//
-// Case_Invalid
+// Case_Invalid
//
// Return 0 * inf to generate a quiet NaN together
// with an invalid exception.
-//
+//
// Implementation
// ==============
//
@@ -193,15 +201,15 @@
//
// STAGE 1
// -------
-// This stage contains two threads.
+// This stage contains two threads.
//
// Stage1.Thread1
//
// fclass.m X_excep, X_ok = X, (NatVal or s/qNaN) or
-// +-0, +-infinity
+// +-0, +-infinity
//
// fclass.nm X_unsupp, X_supp = X, (NatVal or s/qNaN) or
-// +-(0, unnorm, norm, infinity)
+// +-(0, unnorm, norm, infinity)
//
// X_norm := fnorm( X ) with traps disabled
//
@@ -209,26 +217,26 @@
// If (X_unsupp) goto Filtering (Step 2)
//
// Stage1.Thread2
-// ..............
+// ..............
//
// fclass.m Y_excep, Y_ok = Y, (NatVal or s/qNaN) or
-// +-0, +-infinity
+// +-0, +-infinity
//
// fclass.nm Y_unsupp, Y_supp = Y, (NatVal or s/qNaN) or
-// +-(0, unnorm, norm, infinity)
+// +-(0, unnorm, norm, infinity)
//
// Y_norm := fnorm( Y ) with traps disabled
//
// If (Y_excep) goto Filtering (Step 2)
// If (Y_unsupp) goto Filtering (Step 2)
//
-//
+//
// STAGE 2
// -------
// This stage contains two threads.
//
-// Stage2.Thread1
-// ..............
+// Stage2.Thread1
+// ..............
//
// Set X_lt_0 if X < 0 (using fcmp)
// sgn := +1.0
@@ -245,14 +253,14 @@
// This stage contains two threads.
//
//
-// Stage3.Thread1
-// ..............
+// Stage3.Thread1
+// ..............
//
// X := fnorm(X) in prevailing traps
//
//
-// Stage3.Thread2
-// ..............
+// Stage3.Thread2
+// ..............
//
// Y := fnorm(Y) in prevailing traps
//
@@ -262,60 +270,56 @@
// Go to Case_Normal.
//
-#include "libm_support.h"
-
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
-
-// Inv_L, L_hi, L_lo
-.align 64
-Constants_exp_64_Arg:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_Arg,@object)
-data4 0x5C17F0BC,0xB8AA3B29,0x0000400B,0x00000000
-data4 0x00000000,0xB17217F4,0x00003FF2,0x00000000
-data4 0xF278ECE6,0xF473DE6A,0x00003FD4,0x00000000
-ASM_SIZE_DIRECTIVE(Constants_exp_64_Arg)
-
-.align 64
-Constants_exp_64_Exponents:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_Exponents,@object)
-data4 0x0000007E,0x00000000,0xFFFFFF83,0xFFFFFFFF
-data4 0x000003FE,0x00000000,0xFFFFFC03,0xFFFFFFFF
-data4 0x00003FFE,0x00000000,0xFFFFC003,0xFFFFFFFF
-data4 0x00003FFE,0x00000000,0xFFFFC003,0xFFFFFFFF
-data4 0xFFFFFFE2,0xFFFFFFFF,0xFFFFFFC4,0xFFFFFFFF
-data4 0xFFFFFFBA,0xFFFFFFFF,0xFFFFFFBA,0xFFFFFFFF
-ASM_SIZE_DIRECTIVE(Constants_exp_64_Exponents)
-
-.align 64
-Constants_exp_64_A:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_A,@object)
-// Reversed
-data4 0xB1B736A0,0xAAAAAAAB,0x00003FFA,0x00000000
-data4 0x90CD6327,0xAAAAAAAB,0x00003FFC,0x00000000
-data4 0xFFFFFFFF,0xFFFFFFFF,0x00003FFD,0x00000000
-ASM_SIZE_DIRECTIVE(Constants_exp_64_A)
-
-.align 64
-Constants_exp_64_P:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_P,@object)
-// Reversed
-data4 0x43914A8A,0xD00D6C81,0x00003FF2,0x00000000
-data4 0x30304B30,0xB60BC4AC,0x00003FF5,0x00000000
-data4 0x7474C518,0x88888888,0x00003FF8,0x00000000
-data4 0x8DAE729D,0xAAAAAAAA,0x00003FFA,0x00000000
-data4 0xAAAAAF61,0xAAAAAAAA,0x00003FFC,0x00000000
-data4 0x000004C7,0x80000000,0x00003FFE,0x00000000
-ASM_SIZE_DIRECTIVE(Constants_exp_64_P)
-
-.align 64
-Constants_exp_64_T1:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_T1,@object)
-data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29
-data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5
+
+// ************* DO NOT CHANGE ORDER OF THESE TABLES ********************
+
+// double-extended 1/ln(2)
+// 3fff b8aa 3b29 5c17 f0bb be87fed0691d3e88
+// 3fff b8aa 3b29 5c17 f0bc
+// For speed the significand will be loaded directly with a movl and setf.sig
+// and the exponent will be bias+63 instead of bias+0. Thus subsequent
+// computations need to scale appropriately.
+// The constant 2^12/ln(2) is needed for the computation of N. This is also
+// obtained by scaling the computations.
+//
+// Two shifting constants are loaded directly with movl and setf.d.
+// 1. RSHF_2TO51 = 1.1000..00 * 2^(63-12)
+// This constant is added to x*1/ln2 to shift the integer part of
+// x*2^12/ln2 into the rightmost bits of the significand.
+// The result of this fma is N_signif.
+// 2. RSHF = 1.1000..00 * 2^(63)
+// This constant is subtracted from N_signif * 2^(-51) to give
+// the integer part of N, N_fix, as a floating-point number.
+// The result of this fms is float_N.
+RODATA
+
+.align 16
+// L_hi, L_lo
+LOCAL_OBJECT_START(Constants_exp_64_Arg)
+data8 0xB17217F400000000,0x00003FF2 // L_hi = hi part log(2)/2^12
+data8 0xF473DE6AF278ECE6,0x00003FD4 // L_lo = lo part log(2)/2^12
+LOCAL_OBJECT_END(Constants_exp_64_Arg)
+
+LOCAL_OBJECT_START(Constants_exp_64_A)
+// Reversed
+data8 0xAAAAAAABB1B736A0,0x00003FFA
+data8 0xAAAAAAAB90CD6327,0x00003FFC
+data8 0xFFFFFFFFFFFFFFFF,0x00003FFD
+LOCAL_OBJECT_END(Constants_exp_64_A)
+
+LOCAL_OBJECT_START(Constants_exp_64_P)
+// Reversed
+data8 0xD00D6C8143914A8A,0x00003FF2
+data8 0xB60BC4AC30304B30,0x00003FF5
+data8 0x888888887474C518,0x00003FF8
+data8 0xAAAAAAAA8DAE729D,0x00003FFA
+data8 0xAAAAAAAAAAAAAF61,0x00003FFC
+data8 0x80000000000004C7,0x00003FFE
+LOCAL_OBJECT_END(Constants_exp_64_P)
+
+LOCAL_OBJECT_START(Constants_exp_64_T1)
+data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29
+data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5
data4 0x3F8B95C2,0x3F8D1ADF,0x3F8EA43A,0x3F9031DC
data4 0x3F91C3D3,0x3F935A2B,0x3F94F4F0,0x3F96942D
data4 0x3F9837F0,0x3F99E046,0x3F9B8D3A,0x3F9D3EDA
@@ -330,274 +334,263 @@ data4 0x3FD744FD,0x3FD99D16,0x3FDBFBB8,0x3FDE60F5
data4 0x3FE0CCDF,0x3FE33F89,0x3FE5B907,0x3FE8396A
data4 0x3FEAC0C7,0x3FED4F30,0x3FEFE4BA,0x3FF28177
data4 0x3FF5257D,0x3FF7D0DF,0x3FFA83B3,0x3FFD3E0C
-ASM_SIZE_DIRECTIVE(Constants_exp_64_T1)
-
-.align 64
-Constants_exp_64_T2:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_T2,@object)
-data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4
-data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7
-data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E
-data4 0x3F80429C,0x3F80482B,0x3F804DB9,0x3F805349
-data4 0x3F8058D8,0x3F805E67,0x3F8063F7,0x3F806987
-data4 0x3F806F17,0x3F8074A8,0x3F807A39,0x3F807FCA
-data4 0x3F80855B,0x3F808AEC,0x3F80907E,0x3F809610
-data4 0x3F809BA2,0x3F80A135,0x3F80A6C7,0x3F80AC5A
-data4 0x3F80B1ED,0x3F80B781,0x3F80BD14,0x3F80C2A8
-data4 0x3F80C83C,0x3F80CDD1,0x3F80D365,0x3F80D8FA
-data4 0x3F80DE8F,0x3F80E425,0x3F80E9BA,0x3F80EF50
-data4 0x3F80F4E6,0x3F80FA7C,0x3F810013,0x3F8105AA
-data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07
-data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269
-data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE
+LOCAL_OBJECT_END(Constants_exp_64_T1)
+
+LOCAL_OBJECT_START(Constants_exp_64_T2)
+data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4
+data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7
+data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E
+data4 0x3F80429C,0x3F80482B,0x3F804DB9,0x3F805349
+data4 0x3F8058D8,0x3F805E67,0x3F8063F7,0x3F806987
+data4 0x3F806F17,0x3F8074A8,0x3F807A39,0x3F807FCA
+data4 0x3F80855B,0x3F808AEC,0x3F80907E,0x3F809610
+data4 0x3F809BA2,0x3F80A135,0x3F80A6C7,0x3F80AC5A
+data4 0x3F80B1ED,0x3F80B781,0x3F80BD14,0x3F80C2A8
+data4 0x3F80C83C,0x3F80CDD1,0x3F80D365,0x3F80D8FA
+data4 0x3F80DE8F,0x3F80E425,0x3F80E9BA,0x3F80EF50
+data4 0x3F80F4E6,0x3F80FA7C,0x3F810013,0x3F8105AA
+data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07
+data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269
+data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE
data4 0x3F814E68,0x3F815402,0x3F81599C,0x3F815F37
-ASM_SIZE_DIRECTIVE(Constants_exp_64_T2)
-
-.align 64
-Constants_exp_64_W1:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_W1,@object)
-data4 0x00000000,0x00000000,0x171EC4B4,0xBE384454
-data4 0x4AA72766,0xBE694741,0xD42518F8,0xBE5D32B6
-data4 0x3A319149,0x3E68D96D,0x62415F36,0xBE68F4DA
-data4 0xC9C86A3B,0xBE6DDA2F,0xF49228FE,0x3E6B2E50
-data4 0x1188B886,0xBE49C0C2,0x1A4C2F1F,0x3E64BFC2
-data4 0x2CB98B54,0xBE6A2FBB,0x9A55D329,0x3E5DC5DE
-data4 0x39A7AACE,0x3E696490,0x5C66DBA5,0x3E54728B
-data4 0xBA1C7D7D,0xBE62B0DB,0x09F1AF5F,0x3E576E04
-data4 0x1A0DD6A1,0x3E612500,0x795FBDEF,0xBE66A419
-data4 0xE1BD41FC,0xBE5CDE8C,0xEA54964F,0xBE621376
-data4 0x476E76EE,0x3E6370BE,0x3427EB92,0x3E390D1A
-data4 0x2BF82BF8,0x3E1336DE,0xD0F7BD9E,0xBE5FF1CB
-data4 0x0CEB09DD,0xBE60A355,0x0980F30D,0xBE5CA37E
-data4 0x4C082D25,0xBE5C541B,0x3B467D29,0xBE5BBECA
-data4 0xB9D946C5,0xBE400D8A,0x07ED374A,0xBE5E2A08
-data4 0x365C8B0A,0xBE66CB28,0xD3403BCA,0x3E3AAD5B
-data4 0xC7EA21E0,0x3E526055,0xE72880D6,0xBE442C75
-data4 0x85222A43,0x3E58B2BB,0x522C42BF,0xBE5AAB79
-data4 0x469DC2BC,0xBE605CB4,0xA48C40DC,0xBE589FA7
-data4 0x1AA42614,0xBE51C214,0xC37293F4,0xBE48D087
-data4 0xA2D673E0,0x3E367A1C,0x114F7A38,0xBE51BEBB
-data4 0x661A4B48,0xBE6348E5,0x1D3B9962,0xBDF52643
-data4 0x35A78A53,0x3E3A3B5E,0x1CECD788,0xBE46C46C
-data4 0x7857D689,0xBE60B7EC,0xD14F1AD7,0xBE594D3D
-data4 0x4C9A8F60,0xBE4F9C30,0x02DFF9D2,0xBE521873
-data4 0x55E6D68F,0xBE5E4C88,0x667F3DC4,0xBE62140F
-data4 0x3BF88747,0xBE36961B,0xC96EC6AA,0x3E602861
-data4 0xD57FD718,0xBE3B5151,0xFC4A627B,0x3E561CD0
-data4 0xCA913FEA,0xBE3A5217,0x9A5D193A,0x3E40A3CC
-data4 0x10A9C312,0xBE5AB713,0xC5F57719,0x3E4FDADB
-data4 0xDBDF59D5,0x3E361428,0x61B4180D,0x3E5DB5DB
-data4 0x7408D856,0xBE42AD5F,0x31B2B707,0x3E2A3148
-ASM_SIZE_DIRECTIVE(Constants_exp_64_W1)
-
-.align 64
-Constants_exp_64_W2:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_W2,@object)
-data4 0x00000000,0x00000000,0x37A3D7A2,0xBE641F25
-data4 0xAD028C40,0xBE68DD57,0xF212B1B6,0xBE5C77D8
-data4 0x1BA5B070,0x3E57878F,0x2ECAE6FE,0xBE55A36A
-data4 0x569DFA3B,0xBE620608,0xA6D300A3,0xBE53B50E
-data4 0x223F8F2C,0x3E5B5EF2,0xD6DE0DF4,0xBE56A0D9
-data4 0xEAE28F51,0xBE64EEF3,0x367EA80B,0xBE5E5AE2
-data4 0x5FCBC02D,0x3E47CB1A,0x9BDAFEB7,0xBE656BA0
-data4 0x805AFEE7,0x3E6E70C6,0xA3415EBA,0xBE6E0509
-data4 0x49BFF529,0xBE56856B,0x00508651,0x3E66DD33
-data4 0xC114BC13,0x3E51165F,0xC453290F,0x3E53333D
-data4 0x05539FDA,0x3E6A072B,0x7C0A7696,0xBE47CD87
-data4 0xEB05C6D9,0xBE668BF4,0x6AE86C93,0xBE67C3E3
-data4 0xD0B3E84B,0xBE533904,0x556B53CE,0x3E63E8D9
-data4 0x63A98DC8,0x3E212C89,0x032A7A22,0xBE33138F
-data4 0xBC584008,0x3E530FA9,0xCCB93C97,0xBE6ADF82
-data4 0x8370EA39,0x3E5F9113,0xFB6A05D8,0x3E5443A4
-data4 0x181FEE7A,0x3E63DACD,0xF0F67DEC,0xBE62B29D
-data4 0x3DDE6307,0x3E65C483,0xD40A24C1,0x3E5BF030
-data4 0x14E437BE,0x3E658B8F,0xED98B6C7,0xBE631C29
-data4 0x04CF7C71,0x3E6335D2,0xE954A79D,0x3E529EED
-data4 0xF64A2FB8,0x3E5D9257,0x854ED06C,0xBE6BED1B
-data4 0xD71405CB,0x3E5096F6,0xACB9FDF5,0xBE3D4893
-data4 0x01B68349,0xBDFEB158,0xC6A463B9,0x3E628D35
-data4 0xADE45917,0xBE559725,0x042FC476,0xBE68C29C
-data4 0x01E511FA,0xBE67593B,0x398801ED,0xBE4A4313
-data4 0xDA7C3300,0x3E699571,0x08062A9E,0x3E5349BE
-data4 0x755BB28E,0x3E5229C4,0x77A1F80D,0x3E67E426
-data4 0x6B69C352,0xBE52B33F,0x084DA57F,0xBE6B3550
-data4 0xD1D09A20,0xBE6DB03F,0x2161B2C1,0xBE60CBC4
-data4 0x78A2B771,0x3E56ED9C,0x9D0FA795,0xBE508E31
-data4 0xFD1A54E9,0xBE59482A,0xB07FD23E,0xBE2A17CE
-data4 0x17365712,0x3E68BF5C,0xB3785569,0x3E3956F9
-ASM_SIZE_DIRECTIVE(Constants_exp_64_W2)
-
-.align 64
-Constants_log_80_P:
-ASM_TYPE_DIRECTIVE(Constants_log_80_P,@object)
-// 1/2, P_8, P_7, ..., P_1
-data4 0x00000000, 0x80000000, 0x00003FFE, 0x00000000
-data4 0x3B1042BC, 0xCCCE8B88, 0x0000BFFB, 0x00000000
-data4 0xCADC2149, 0xE38997B7, 0x00003FFB, 0x00000000
-data4 0xB1ACB090, 0xFFFFFFFE, 0x0000BFFB, 0x00000000
-data4 0x06481C81, 0x92492498, 0x00003FFC, 0x00000000
-data4 0xAAAAB0EF, 0xAAAAAAAA, 0x0000BFFC, 0x00000000
-data4 0xCCC91416, 0xCCCCCCCC, 0x00003FFC, 0x00000000
-data4 0x00000000, 0x80000000, 0x0000BFFD, 0x00000000
-data4 0xAAAAAAAB, 0xAAAAAAAA, 0x00003FFD
-ASM_SIZE_DIRECTIVE(Constants_log_80_P)
-
-.align 64
-Constants_log_80_Q:
-ASM_TYPE_DIRECTIVE(Constants_log_80_Q,@object)
-// log2_hi, log2_lo, Q_6, Q_5, Q_4, Q_3, Q_2, Q_1
-data4 0x00000000,0xB1721800,0x00003FFE,0x00000000
-data4 0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000
-data4 0xA51BE0AF,0x92492453,0x00003FFC,0x00000000
-data4 0xA0CFD29F,0xAAAAAB73,0x0000BFFC,0x00000000
-data4 0xCCCE3872,0xCCCCCCCC,0x00003FFC,0x00000000
-data4 0xFFFFB4FB,0xFFFFFFFF,0x0000BFFC,0x00000000
-data4 0xAAAAAAAB,0xAAAAAAAA,0x00003FFD,0x00000000
-data4 0x00000000,0x80000000,0x0000BFFE,0x00000000
-ASM_SIZE_DIRECTIVE(Constants_log_80_Q)
-
-.align 64
-Constants_log_80_Z_G_H_h1:
-ASM_TYPE_DIRECTIVE(Constants_log_80_Z_G_H_h1,@object)
-// Z1 - 16 bit fixed, G1 and H1 IEEE single, h1 IEEE double
+LOCAL_OBJECT_END(Constants_exp_64_T2)
+
+LOCAL_OBJECT_START(Constants_exp_64_W1)
+data8 0x0000000000000000, 0xBE384454171EC4B4
+data8 0xBE6947414AA72766, 0xBE5D32B6D42518F8
+data8 0x3E68D96D3A319149, 0xBE68F4DA62415F36
+data8 0xBE6DDA2FC9C86A3B, 0x3E6B2E50F49228FE
+data8 0xBE49C0C21188B886, 0x3E64BFC21A4C2F1F
+data8 0xBE6A2FBB2CB98B54, 0x3E5DC5DE9A55D329
+data8 0x3E69649039A7AACE, 0x3E54728B5C66DBA5
+data8 0xBE62B0DBBA1C7D7D, 0x3E576E0409F1AF5F
+data8 0x3E6125001A0DD6A1, 0xBE66A419795FBDEF
+data8 0xBE5CDE8CE1BD41FC, 0xBE621376EA54964F
+data8 0x3E6370BE476E76EE, 0x3E390D1A3427EB92
+data8 0x3E1336DE2BF82BF8, 0xBE5FF1CBD0F7BD9E
+data8 0xBE60A3550CEB09DD, 0xBE5CA37E0980F30D
+data8 0xBE5C541B4C082D25, 0xBE5BBECA3B467D29
+data8 0xBE400D8AB9D946C5, 0xBE5E2A0807ED374A
+data8 0xBE66CB28365C8B0A, 0x3E3AAD5BD3403BCA
+data8 0x3E526055C7EA21E0, 0xBE442C75E72880D6
+data8 0x3E58B2BB85222A43, 0xBE5AAB79522C42BF
+data8 0xBE605CB4469DC2BC, 0xBE589FA7A48C40DC
+data8 0xBE51C2141AA42614, 0xBE48D087C37293F4
+data8 0x3E367A1CA2D673E0, 0xBE51BEBB114F7A38
+data8 0xBE6348E5661A4B48, 0xBDF526431D3B9962
+data8 0x3E3A3B5E35A78A53, 0xBE46C46C1CECD788
+data8 0xBE60B7EC7857D689, 0xBE594D3DD14F1AD7
+data8 0xBE4F9C304C9A8F60, 0xBE52187302DFF9D2
+data8 0xBE5E4C8855E6D68F, 0xBE62140F667F3DC4
+data8 0xBE36961B3BF88747, 0x3E602861C96EC6AA
+data8 0xBE3B5151D57FD718, 0x3E561CD0FC4A627B
+data8 0xBE3A5217CA913FEA, 0x3E40A3CC9A5D193A
+data8 0xBE5AB71310A9C312, 0x3E4FDADBC5F57719
+data8 0x3E361428DBDF59D5, 0x3E5DB5DB61B4180D
+data8 0xBE42AD5F7408D856, 0x3E2A314831B2B707
+LOCAL_OBJECT_END(Constants_exp_64_W1)
+
+LOCAL_OBJECT_START(Constants_exp_64_W2)
+data8 0x0000000000000000, 0xBE641F2537A3D7A2
+data8 0xBE68DD57AD028C40, 0xBE5C77D8F212B1B6
+data8 0x3E57878F1BA5B070, 0xBE55A36A2ECAE6FE
+data8 0xBE620608569DFA3B, 0xBE53B50EA6D300A3
+data8 0x3E5B5EF2223F8F2C, 0xBE56A0D9D6DE0DF4
+data8 0xBE64EEF3EAE28F51, 0xBE5E5AE2367EA80B
+data8 0x3E47CB1A5FCBC02D, 0xBE656BA09BDAFEB7
+data8 0x3E6E70C6805AFEE7, 0xBE6E0509A3415EBA
+data8 0xBE56856B49BFF529, 0x3E66DD3300508651
+data8 0x3E51165FC114BC13, 0x3E53333DC453290F
+data8 0x3E6A072B05539FDA, 0xBE47CD877C0A7696
+data8 0xBE668BF4EB05C6D9, 0xBE67C3E36AE86C93
+data8 0xBE533904D0B3E84B, 0x3E63E8D9556B53CE
+data8 0x3E212C8963A98DC8, 0xBE33138F032A7A22
+data8 0x3E530FA9BC584008, 0xBE6ADF82CCB93C97
+data8 0x3E5F91138370EA39, 0x3E5443A4FB6A05D8
+data8 0x3E63DACD181FEE7A, 0xBE62B29DF0F67DEC
+data8 0x3E65C4833DDE6307, 0x3E5BF030D40A24C1
+data8 0x3E658B8F14E437BE, 0xBE631C29ED98B6C7
+data8 0x3E6335D204CF7C71, 0x3E529EEDE954A79D
+data8 0x3E5D9257F64A2FB8, 0xBE6BED1B854ED06C
+data8 0x3E5096F6D71405CB, 0xBE3D4893ACB9FDF5
+data8 0xBDFEB15801B68349, 0x3E628D35C6A463B9
+data8 0xBE559725ADE45917, 0xBE68C29C042FC476
+data8 0xBE67593B01E511FA, 0xBE4A4313398801ED
+data8 0x3E699571DA7C3300, 0x3E5349BE08062A9E
+data8 0x3E5229C4755BB28E, 0x3E67E42677A1F80D
+data8 0xBE52B33F6B69C352, 0xBE6B3550084DA57F
+data8 0xBE6DB03FD1D09A20, 0xBE60CBC42161B2C1
+data8 0x3E56ED9C78A2B771, 0xBE508E319D0FA795
+data8 0xBE59482AFD1A54E9, 0xBE2A17CEB07FD23E
+data8 0x3E68BF5C17365712, 0x3E3956F9B3785569
+LOCAL_OBJECT_END(Constants_exp_64_W2)
+
+LOCAL_OBJECT_START(Constants_log_80_P)
+// P_8, P_7, ..., P_1
+data8 0xCCCE8B883B1042BC, 0x0000BFFB // P_8
+data8 0xE38997B7CADC2149, 0x00003FFB // P_7
+data8 0xFFFFFFFEB1ACB090, 0x0000BFFB // P_6
+data8 0x9249249806481C81, 0x00003FFC // P_5
+data8 0x0000000000000000, 0x00000000 // Pad for bank conflicts
+data8 0xAAAAAAAAAAAAB0EF, 0x0000BFFC // P_4
+data8 0xCCCCCCCCCCC91416, 0x00003FFC // P_3
+data8 0x8000000000000000, 0x0000BFFD // P_2
+data8 0xAAAAAAAAAAAAAAAB, 0x00003FFD // P_1
+LOCAL_OBJECT_END(Constants_log_80_P)
+
+LOCAL_OBJECT_START(Constants_log_80_Q)
+// log2_hi, log2_lo, Q_6, Q_5, Q_4, Q_3, Q_2, Q_1
+data8 0xB172180000000000,0x00003FFE
+data8 0x82E308654361C4C6,0x0000BFE2
+data8 0x92492453A51BE0AF,0x00003FFC
+data8 0xAAAAAB73A0CFD29F,0x0000BFFC
+data8 0xCCCCCCCCCCCE3872,0x00003FFC
+data8 0xFFFFFFFFFFFFB4FB,0x0000BFFC
+data8 0xAAAAAAAAAAAAAAAB,0x00003FFD
+data8 0x8000000000000000,0x0000BFFE
+LOCAL_OBJECT_END(Constants_log_80_Q)
+
+LOCAL_OBJECT_START(Constants_log_80_Z_G_H_h1)
+// Z1 - 16 bit fixed, G1 and H1 IEEE single, h1 IEEE double
data4 0x00008000,0x3F800000,0x00000000,0x00000000
-data4 0x00000000,0x00000000,0x00000000,0x00000000
+data4 0x00000000,0x00000000,0x00000000,0x00000000
data4 0x00007879,0x3F70F0F0,0x3D785196,0x00000000
data4 0xEBA0E0D1,0x8B1D330B,0x00003FDA,0x00000000
data4 0x000071C8,0x3F638E38,0x3DF13843,0x00000000
data4 0x9EADD553,0xE2AF365E,0x00003FE2,0x00000000
data4 0x00006BCB,0x3F579430,0x3E2FF9A0,0x00000000
-data4 0x752F34A2,0xF585FEC3,0x0000BFE3,0x00000000
+data4 0x752F34A2,0xF585FEC3,0x0000BFE3,0x00000000
data4 0x00006667,0x3F4CCCC8,0x3E647FD6,0x00000000
-data4 0x893B03F3,0xF3546435,0x00003FE2,0x00000000
-data4 0x00006187,0x3F430C30,0x3E8B3AE7,0x00000000
-data4 0x39CDD2AC,0xBABA62E0,0x00003FE4,0x00000000
-data4 0x00005D18,0x3F3A2E88,0x3EA30C68,0x00000000
+data4 0x893B03F3,0xF3546435,0x00003FE2,0x00000000
+data4 0x00006187,0x3F430C30,0x3E8B3AE7,0x00000000
+data4 0x39CDD2AC,0xBABA62E0,0x00003FE4,0x00000000
+data4 0x00005D18,0x3F3A2E88,0x3EA30C68,0x00000000
data4 0x457978A1,0x8718789F,0x00003FE2,0x00000000
-data4 0x0000590C,0x3F321640,0x3EB9CEC8,0x00000000
-data4 0x3185E56A,0x9442DF96,0x0000BFE4,0x00000000
-data4 0x00005556,0x3F2AAAA8,0x3ECF9927,0x00000000
-data4 0x2BBE2CBD,0xCBF9A4BF,0x00003FE4,0x00000000
-data4 0x000051EC,0x3F23D708,0x3EE47FC5,0x00000000
-data4 0x852D5935,0xF3537535,0x00003FE3,0x00000000
-data4 0x00004EC5,0x3F1D89D8,0x3EF8947D,0x00000000
-data4 0x46CDF32F,0xA1F1E699,0x0000BFDF,0x00000000
-data4 0x00004BDB,0x3F17B420,0x3F05F3A1,0x00000000
-data4 0xD8484CE3,0x84A61856,0x00003FE4,0x00000000
+data4 0x0000590C,0x3F321640,0x3EB9CEC8,0x00000000
+data4 0x3185E56A,0x9442DF96,0x0000BFE4,0x00000000
+data4 0x00005556,0x3F2AAAA8,0x3ECF9927,0x00000000
+data4 0x2BBE2CBD,0xCBF9A4BF,0x00003FE4,0x00000000
+data4 0x000051EC,0x3F23D708,0x3EE47FC5,0x00000000
+data4 0x852D5935,0xF3537535,0x00003FE3,0x00000000
+data4 0x00004EC5,0x3F1D89D8,0x3EF8947D,0x00000000
+data4 0x46CDF32F,0xA1F1E699,0x0000BFDF,0x00000000
+data4 0x00004BDB,0x3F17B420,0x3F05F3A1,0x00000000
+data4 0xD8484CE3,0x84A61856,0x00003FE4,0x00000000
data4 0x00004925,0x3F124920,0x3F0F4303,0x00000000
-data4 0xFF28821B,0xC7DD97E0,0x0000BFE2,0x00000000
-data4 0x0000469F,0x3F0D3DC8,0x3F183EBF,0x00000000
-data4 0xEF1FD32F,0xD3C4A887,0x00003FE3,0x00000000
-data4 0x00004445,0x3F088888,0x3F20EC80,0x00000000
-data4 0x464C76DA,0x84672BE6,0x00003FE5,0x00000000
+data4 0xFF28821B,0xC7DD97E0,0x0000BFE2,0x00000000
+data4 0x0000469F,0x3F0D3DC8,0x3F183EBF,0x00000000
+data4 0xEF1FD32F,0xD3C4A887,0x00003FE3,0x00000000
+data4 0x00004445,0x3F088888,0x3F20EC80,0x00000000
+data4 0x464C76DA,0x84672BE6,0x00003FE5,0x00000000
data4 0x00004211,0x3F042108,0x3F29516A,0x00000000
-data4 0x18835FB9,0x9A43A511,0x0000BFE5,0x00000000
-ASM_SIZE_DIRECTIVE(Constants_log_80_Z_G_H_h1)
-
-.align 64
-Constants_log_80_Z_G_H_h2:
-ASM_TYPE_DIRECTIVE(Constants_log_80_Z_G_H_h2,@object)
-// Z2 - 16 bit fixed, G2 and H2 IEEE single, h2 IEEE double
-data4 0x00008000,0x3F800000,0x00000000,0x00000000
-data4 0x00000000,0x00000000,0x00000000,0x00000000
-data4 0x00007F81,0x3F7F00F8,0x3B7F875D,0x00000000
+data4 0x18835FB9,0x9A43A511,0x0000BFE5,0x00000000
+LOCAL_OBJECT_END(Constants_log_80_Z_G_H_h1)
+
+LOCAL_OBJECT_START(Constants_log_80_Z_G_H_h2)
+// Z2 - 16 bit fixed, G2 and H2 IEEE single, h2 IEEE double
+data4 0x00008000,0x3F800000,0x00000000,0x00000000
+data4 0x00000000,0x00000000,0x00000000,0x00000000
+data4 0x00007F81,0x3F7F00F8,0x3B7F875D,0x00000000
data4 0x211398BF,0xAD08B116,0x00003FDB,0x00000000
-data4 0x00007F02,0x3F7E03F8,0x3BFF015B,0x00000000
-data4 0xC376958E,0xB106790F,0x00003FDE,0x00000000
-data4 0x00007E85,0x3F7D08E0,0x3C3EE393,0x00000000
-data4 0x79A7679A,0xFD03F242,0x0000BFDA,0x00000000
-data4 0x00007E08,0x3F7C0FC0,0x3C7E0586,0x00000000
-data4 0x05E7AE08,0xF03F81C3,0x0000BFDF,0x00000000
-data4 0x00007D8D,0x3F7B1880,0x3C9E75D2,0x00000000
+data4 0x00007F02,0x3F7E03F8,0x3BFF015B,0x00000000
+data4 0xC376958E,0xB106790F,0x00003FDE,0x00000000
+data4 0x00007E85,0x3F7D08E0,0x3C3EE393,0x00000000
+data4 0x79A7679A,0xFD03F242,0x0000BFDA,0x00000000
+data4 0x00007E08,0x3F7C0FC0,0x3C7E0586,0x00000000
+data4 0x05E7AE08,0xF03F81C3,0x0000BFDF,0x00000000
+data4 0x00007D8D,0x3F7B1880,0x3C9E75D2,0x00000000
data4 0x049EB22F,0xD1B87D3C,0x00003FDE,0x00000000
-data4 0x00007D12,0x3F7A2328,0x3CBDC97A,0x00000000
-data4 0x3A9E81E0,0xFABC8B95,0x00003FDF,0x00000000
+data4 0x00007D12,0x3F7A2328,0x3CBDC97A,0x00000000
+data4 0x3A9E81E0,0xFABC8B95,0x00003FDF,0x00000000
data4 0x00007C98,0x3F792FB0,0x3CDCFE47,0x00000000
-data4 0x7C4B5443,0xF5F3653F,0x00003FDF,0x00000000
-data4 0x00007C20,0x3F783E08,0x3CFC15D0,0x00000000
-data4 0xF65A1773,0xE78AB204,0x00003FE0,0x00000000
-data4 0x00007BA8,0x3F774E38,0x3D0D874D,0x00000000
-data4 0x7B8EF695,0xDB7CBFFF,0x0000BFE0,0x00000000
-data4 0x00007B31,0x3F766038,0x3D1CF49B,0x00000000
-data4 0xCF773FB3,0xC0241AEA,0x0000BFE0,0x00000000
-data4 0x00007ABB,0x3F757400,0x3D2C531D,0x00000000
-data4 0xC9539FDF,0xFC8F4D48,0x00003FE1,0x00000000
-data4 0x00007A45,0x3F748988,0x3D3BA322,0x00000000
-data4 0x954665C2,0x9CD035FB,0x0000BFE1,0x00000000
-data4 0x000079D1,0x3F73A0D0,0x3D4AE46F,0x00000000
-data4 0xDD367A30,0xEC9017C7,0x00003FE1,0x00000000
-data4 0x0000795D,0x3F72B9D0,0x3D5A1756,0x00000000
-data4 0xCB11189C,0xEE6625D3,0x0000BFE1,0x00000000
-data4 0x000078EB,0x3F71D488,0x3D693B9D,0x00000000
+data4 0x7C4B5443,0xF5F3653F,0x00003FDF,0x00000000
+data4 0x00007C20,0x3F783E08,0x3CFC15D0,0x00000000
+data4 0xF65A1773,0xE78AB204,0x00003FE0,0x00000000
+data4 0x00007BA8,0x3F774E38,0x3D0D874D,0x00000000
+data4 0x7B8EF695,0xDB7CBFFF,0x0000BFE0,0x00000000
+data4 0x00007B31,0x3F766038,0x3D1CF49B,0x00000000
+data4 0xCF773FB3,0xC0241AEA,0x0000BFE0,0x00000000
+data4 0x00007ABB,0x3F757400,0x3D2C531D,0x00000000
+data4 0xC9539FDF,0xFC8F4D48,0x00003FE1,0x00000000
+data4 0x00007A45,0x3F748988,0x3D3BA322,0x00000000
+data4 0x954665C2,0x9CD035FB,0x0000BFE1,0x00000000
+data4 0x000079D1,0x3F73A0D0,0x3D4AE46F,0x00000000
+data4 0xDD367A30,0xEC9017C7,0x00003FE1,0x00000000
+data4 0x0000795D,0x3F72B9D0,0x3D5A1756,0x00000000
+data4 0xCB11189C,0xEE6625D3,0x0000BFE1,0x00000000
+data4 0x000078EB,0x3F71D488,0x3D693B9D,0x00000000
data4 0xBE11C424,0xA49C8DB5,0x0000BFE0,0x00000000
-ASM_SIZE_DIRECTIVE(Constants_log_80_Z_G_H_h2)
-
-.align 64
-Constants_log_80_h3_G_H:
-ASM_TYPE_DIRECTIVE(Constants_log_80_h3_G_H,@object)
-// h3 IEEE double extended, H3 and G3 IEEE single
-data4 0x112666B0,0xAAACAAB1,0x00003FD3,0x3F7FFC00
+LOCAL_OBJECT_END(Constants_log_80_Z_G_H_h2)
+
+LOCAL_OBJECT_START(Constants_log_80_h3_G_H)
+// h3 IEEE double extended, H3 and G3 IEEE single
+data4 0x112666B0,0xAAACAAB1,0x00003FD3,0x3F7FFC00
data4 0x9B7FAD21,0x90051030,0x00003FD8,0x3F7FF400
-data4 0xF4D783C4,0xA6B46F46,0x00003FDA,0x3F7FEC00
-data4 0x11C6DDCA,0xDA148D88,0x0000BFD8,0x3F7FE400
+data4 0xF4D783C4,0xA6B46F46,0x00003FDA,0x3F7FEC00
+data4 0x11C6DDCA,0xDA148D88,0x0000BFD8,0x3F7FE400
data4 0xCA964D95,0xCE65C1D8,0x0000BFD8,0x3F7FDC00
-data4 0x23412D13,0x883838EE,0x0000BFDB,0x3F7FD400
-data4 0x983ED687,0xB7E5CFA1,0x00003FDB,0x3F7FCC08
-data4 0xE3C3930B,0xDBE23B16,0x0000BFD9,0x3F7FC408
-data4 0x48AA4DFC,0x9B92F1FC,0x0000BFDC,0x3F7FBC10
-data4 0xCE9C8F7E,0x9A8CEB15,0x0000BFD9,0x3F7FB410
-data4 0x0DECE74A,0x8C220879,0x00003FDC,0x3F7FAC18
+data4 0x23412D13,0x883838EE,0x0000BFDB,0x3F7FD400
+data4 0x983ED687,0xB7E5CFA1,0x00003FDB,0x3F7FCC08
+data4 0xE3C3930B,0xDBE23B16,0x0000BFD9,0x3F7FC408
+data4 0x48AA4DFC,0x9B92F1FC,0x0000BFDC,0x3F7FBC10
+data4 0xCE9C8F7E,0x9A8CEB15,0x0000BFD9,0x3F7FB410
+data4 0x0DECE74A,0x8C220879,0x00003FDC,0x3F7FAC18
data4 0x2F053150,0xB25CA912,0x0000BFDA,0x3F7FA420
-data4 0xD9A5BE20,0xA5876555,0x00003FDB,0x3F7F9C20
-data4 0x2053F087,0xC919BB6E,0x00003FD9,0x3F7F9428
-data4 0x041E9A77,0xB70BDA79,0x00003FDC,0x3F7F8C30
-data4 0xEA1C9C30,0xF18A5C08,0x00003FDA,0x3F7F8438
-data4 0x796D89E5,0xA3790D84,0x0000BFDD,0x3F7F7C40
-data4 0xA2915A3A,0xE1852369,0x0000BFDD,0x3F7F7448
-data4 0xA39ED868,0xD803858F,0x00003FDC,0x3F7F6C50
-data4 0x9417EBB7,0xB2EEE356,0x0000BFDD,0x3F7F6458
-data4 0x9BB0D07F,0xED5C1F8A,0x0000BFDC,0x3F7F5C68
-data4 0xE87C740A,0xD6D201A0,0x0000BFDD,0x3F7F5470
-data4 0x1CA74025,0xE8DEBF5E,0x00003FDC,0x3F7F4C78
+data4 0xD9A5BE20,0xA5876555,0x00003FDB,0x3F7F9C20
+data4 0x2053F087,0xC919BB6E,0x00003FD9,0x3F7F9428
+data4 0x041E9A77,0xB70BDA79,0x00003FDC,0x3F7F8C30
+data4 0xEA1C9C30,0xF18A5C08,0x00003FDA,0x3F7F8438
+data4 0x796D89E5,0xA3790D84,0x0000BFDD,0x3F7F7C40
+data4 0xA2915A3A,0xE1852369,0x0000BFDD,0x3F7F7448
+data4 0xA39ED868,0xD803858F,0x00003FDC,0x3F7F6C50
+data4 0x9417EBB7,0xB2EEE356,0x0000BFDD,0x3F7F6458
+data4 0x9BB0D07F,0xED5C1F8A,0x0000BFDC,0x3F7F5C68
+data4 0xE87C740A,0xD6D201A0,0x0000BFDD,0x3F7F5470
+data4 0x1CA74025,0xE8DEBF5E,0x00003FDC,0x3F7F4C78
data4 0x1F34A7EB,0x9A995A97,0x0000BFDC,0x3F7F4488
-data4 0x359EED97,0x9CB0F742,0x0000BFDA,0x3F7F3C90
-data4 0xBBC6A1C8,0xD6F833C2,0x0000BFDD,0x3F7F34A0
-data4 0xE71090EC,0xE1F68F2A,0x00003FDC,0x3F7F2CA8
-data4 0xC160A74F,0xD1881CF1,0x0000BFDB,0x3F7F24B8
-data4 0xD78CB5A4,0x9AD05AE2,0x00003FD6,0x3F7F1CC8
-data4 0x9A77DC4B,0xE658CB8E,0x0000BFDD,0x3F7F14D8
-data4 0x6BD6D312,0xBA281296,0x00003FDC,0x3F7F0CE0
-data4 0xF95210D0,0xB478BBEB,0x0000BFDB,0x3F7F04F0
-data4 0x38800100,0x39400480,0x39A00640,0x39E00C41 // H's start here
-data4 0x3A100A21,0x3A300F22,0x3A4FF51C,0x3A6FFC1D
+data4 0x359EED97,0x9CB0F742,0x0000BFDA,0x3F7F3C90
+data4 0xBBC6A1C8,0xD6F833C2,0x0000BFDD,0x3F7F34A0
+data4 0xE71090EC,0xE1F68F2A,0x00003FDC,0x3F7F2CA8
+data4 0xC160A74F,0xD1881CF1,0x0000BFDB,0x3F7F24B8
+data4 0xD78CB5A4,0x9AD05AE2,0x00003FD6,0x3F7F1CC8
+data4 0x9A77DC4B,0xE658CB8E,0x0000BFDD,0x3F7F14D8
+data4 0x6BD6D312,0xBA281296,0x00003FDC,0x3F7F0CE0
+data4 0xF95210D0,0xB478BBEB,0x0000BFDB,0x3F7F04F0
+data4 0x38800100,0x39400480,0x39A00640,0x39E00C41 // H's start here
+data4 0x3A100A21,0x3A300F22,0x3A4FF51C,0x3A6FFC1D
data4 0x3A87F20B,0x3A97F68B,0x3AA7EB86,0x3AB7E101
-data4 0x3AC7E701,0x3AD7DD7B,0x3AE7D474,0x3AF7CBED
-data4 0x3B03E1F3,0x3B0BDE2F,0x3B13DAAA,0x3B1BD766
-data4 0x3B23CC5C,0x3B2BC997,0x3B33C711,0x3B3BBCC6
-data4 0x3B43BAC0,0x3B4BB0F4,0x3B53AF6D,0x3B5BA620
-data4 0x3B639D12,0x3B6B9444,0x3B7393BC,0x3B7B8B6D
-ASM_SIZE_DIRECTIVE(Constants_log_80_h3_G_H)
-
-.align 64
-Constant_half:
-ASM_TYPE_DIRECTIVE(Constant_half,@object)
-data4 0x00000000,0x80000000,0x00003FFE
-ASM_SIZE_DIRECTIVE(Constant_half)
-
-GR_Expo_Range = r32
-GR_Flag = r33
+data4 0x3AC7E701,0x3AD7DD7B,0x3AE7D474,0x3AF7CBED
+data4 0x3B03E1F3,0x3B0BDE2F,0x3B13DAAA,0x3B1BD766
+data4 0x3B23CC5C,0x3B2BC997,0x3B33C711,0x3B3BBCC6
+data4 0x3B43BAC0,0x3B4BB0F4,0x3B53AF6D,0x3B5BA620
+data4 0x3B639D12,0x3B6B9444,0x3B7393BC,0x3B7B8B6D
+LOCAL_OBJECT_END(Constants_log_80_h3_G_H)
+
+GR_sig_inv_ln2 = r14
+GR_rshf_2to51 = r15
+GR_exp_2tom51 = r16
+GR_rshf = r17
+GR_exp_half = r18
+GR_sign_mask = r19
+GR_exp_square_oflow = r20
+GR_exp_square_uflow = r21
+GR_exp_ynear1_oflow = r22
+GR_exp_ynear1_uflow = r23
+GR_signif_Z = r24
+
+GR_signexp_x = r32
+
+GR_exp_x = r33
+
GR_Table_Ptr = r34
GR_Table_Ptr1 = r35
-GR_BIAS = r35
GR_Index1 = r36
-GR_sign_mask = r36
GR_Index2 = r37
GR_Expo_X = r37
-GR_signif_Z = r38
GR_M = r38
GR_X_0 = r39
@@ -620,45 +613,49 @@ GR_k = r44
GR_Big_Pos_Exp = r45
+GR_exp_pos_max = r46
-GR_BIAS_p_k = r47
-GR_BIASed_exp_y = r47
+GR_exp_bias_p_k = r47
-GR_Big_Neg_Exp = r48
GR_Index3 = r48
GR_temp = r48
GR_vsm_expo = r49
-GR_y_sign = r49
GR_T1_ptr = r50
+GR_P_ptr1 = r50
GR_T2_ptr = r51
+GR_P_ptr2 = r51
GR_N_fix = r52
GR_exp_y = r53
GR_signif_y = r54
-GR_exp_and_sign_y = r55
+GR_signexp_y = r55
+GR_fraction_y = r55
GR_low_order_bit = r56
-GR_get_exp_mask = r57
-GR_exponent_zero = r58
-
-// ** Registers for unwind support
+GR_exp_mask = r57
+GR_exp_bias = r58
+GR_y_sign = r59
+GR_table_base = r60
+GR_ptr_exp_Arg = r61
+GR_Delta_Exp = r62
+GR_Special_Exp = r63
+GR_exp_neg_max = r64
+GR_Big_Neg_Exp = r65
+
+//** Registers for unwind support
GR_SAVE_PFS = r59
GR_SAVE_B0 = r60
GR_SAVE_GP = r61
-GR_Parameter_X = r62
-GR_Parameter_Y = r63
-GR_Parameter_RESULT = r64
-GR_Parameter_TAG = r65
-
-FR_X = f8
-FR_Y = f9
-FR_RESULT = f99
+GR_Parameter_X = r62
+GR_Parameter_Y = r63
+GR_Parameter_RESULT = r64
+GR_Parameter_TAG = r65
-// **
+//**
FR_Input_X = f8
-FR_Output = f8
+FR_Result = f8
FR_Input_Y = f9
FR_Neg = f10
@@ -671,7 +668,6 @@ FR_poly_hi = f11
FR_Sgn = f12
-FR_Neg_X = f13
FR_half_W = f13
FR_X_cor = f14
@@ -698,13 +694,11 @@ FR_Scale = f36
FR_G_1 = f37
FR_G = f37
FR_Wsq = f37
-FR_L_Inv = f37
FR_temp = f37
FR_H_1 = f38
FR_H = f38
FR_W4 = f38
-FR_float_N = f38
FR_h = f39
FR_h_1 = f39
@@ -720,9 +714,7 @@ FR_L_lo = f41
FR_A_1 = f41
FR_h_2 = f42
-FR_P_6 = f42
-FR_abs_W = f43
FR_W1 = f43
FR_G_3 = f44
@@ -740,7 +732,6 @@ FR_H_3 = f47
FR_float_N = f48
-FR_P_4 = f49
FR_A_2 = f49
FR_Q_4 = f50
@@ -768,7 +759,6 @@ FR_Two = f56
FR_Big = f57
FR_neg_2_mK = f58
-FR_NBig = f58
FR_r = f59
@@ -777,1652 +767,1253 @@ FR_poly_lo = f60
FR_poly = f61
FR_P_5 = f62
+FR_Result_small = f62
FR_rsq = f63
-FR_Result = f99
-FR_Result_small = f100
-FR_Result_big = f101
+FR_Delta = f64
-.section .text
-.proc powl#
-.global powl#
-.align 64
+FR_save_Input_X = f65
+FR_norm_X = f66
+FR_norm_Y = f67
+FR_Y_lo_2 = f68
-powl:
-{ .mfi
-alloc GR_Expo_Range = ar.pfs,0,30,4,0
-(p0) fclass.m.unc p7, p13 = FR_Input_Y, 0x1E7
-nop.i 0
-}
-{ .mfi
-(p0) getf.exp GR_exp_and_sign_y = FR_Input_Y
+FR_P_6 = f69
+FR_Result_big = f69
+
+FR_RSHF_2TO51 = f70
+FR_INV_LN2_2TO63 = f71
+FR_2TOM51 = f72
+FR_RSHF = f73
+FR_TMP1 = f74
+FR_TMP2 = f75
+FR_TMP3 = f76
+FR_Tscale = f77
+FR_P_4 = f78
+FR_NBig = f79
+
+
+.section .text
+GLOBAL_LIBM_ENTRY(powl)
//
-// Save State
+// Get significand of x. It is the critical path.
//
-(p0) fclass.m.unc p6, p12 = FR_Input_X, 0x1E7
-nop.i 0
-};;
{ .mfi
-(p0) getf.sig GR_signif_y = FR_Input_Y
-(p0) fcmp.eq.unc.s1 p12, p13 = FR_Input_X, f1
-nop.i 0
+ getf.sig GR_signif_Z = FR_Input_X // Get significand of x
+ fclass.m p11, p12 = FR_Input_X, 0x0b // Test x unorm
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// Check for y = 1
-// Identify EM unsupporteds.
-// Load FR_half = .5
-//
-(p0) fadd.s1 FR_Two = f1, f1
-//
-// Load 1/2 in GP register
-//
-nop.i 0
+ nop.m 999
+ fnorm.s1 FR_norm_X = FR_Input_X // Normalize x
+ mov GR_exp_half = 0xffff - 1 // Exponent for 0.5
}
;;
-{ .mmi
- nop.m 999
-(p0) addl GR_Table_Ptr = @ltoff(Constant_half#), gp
- nop.i 999
+{ .mfi
+ alloc r32 = ar.pfs,0,30,4,0
+ fclass.m p7, p0 = FR_Input_Y, 0x1E7 // Test y natval, nan, inf, zero
+ mov GR_exp_pos_max = 0x13fff // Max exponent for pos oflow test
+}
+{ .mfi
+ addl GR_table_base = @ltoff(Constants_exp_64_Arg#), gp // Ptr to tables
+ fnorm.s1 FR_norm_Y = FR_Input_Y // Normalize y
+ mov GR_exp_neg_max = 0x33fff // Max exponent for neg oflow test
}
;;
-{ .mmi
- ld8 GR_Table_Ptr = [GR_Table_Ptr]
- nop.m 999
- nop.i 999
+{ .mfi
+ getf.exp GR_signexp_y = FR_Input_Y // Get sign and exp of y
+(p12) fclass.m p11, p0 = FR_Input_Y, 0x0b // Test y unorm
+ mov GR_sign_mask = 0x20000 // Sign mask
+}
+{ .mfi
+ ld8 GR_table_base = [GR_table_base] // Get base address for tables
+ fadd.s1 FR_Two = f1, f1 // Form 2.0 for square test
+ mov GR_exp_mask = 0x1FFFF // Exponent mask
}
;;
-{ .mlx
-(p0) ldfe FR_Half =[GR_Table_Ptr],0
-(p0) movl GR_get_exp_mask = 0x1FFFF ;;
+{ .mfi
+ getf.sig GR_signif_y = FR_Input_Y // Get significand of y
+ fclass.m p6, p0 = FR_Input_X, 0x1E7 // Test x natval, nan, inf, zero
+ nop.i 999
}
+;;
{ .mfi
- nop.m 999
-(p0) fclass.nm.unc p9, p15 = FR_Input_Y, 0x1FF
-//
-// Create FR_Two = 2
-// Get exp and significand of Y
-// Crate Masks
-// sgn = 1
-//
-(p0) and GR_exp_y = GR_get_exp_mask,GR_exp_and_sign_y
+ getf.exp GR_signexp_x = FR_Input_X // Get signexp of x
+ fmerge.s FR_save_Input_X = FR_Input_X, FR_Input_X
+ extr.u GR_Index1 = GR_signif_Z, 59, 4 // Extract upper 4 signif bits of x
}
-{ .mlx
- nop.m 999
-(p0) movl GR_exponent_zero = 0xFFFF ;;
+{ .mfb
+ setf.exp FR_Half = GR_exp_half // Load half
+ nop.f 999
+(p11) br.cond.spnt POWL_DENORM // Branch if x or y denorm/unorm
}
+;;
+
+// Return here from POWL_DENORM
+POWL_COMMON:
{ .mfi
- nop.m 999
-(p0) mov FR_Sgn = f1
- nop.i 999
+ setf.exp FR_Big = GR_exp_pos_max // Form big pos value for oflow test
+ fclass.nm p11, p0 = FR_Input_Y, 0x1FF // Test Y unsupported
+ shl GR_Index1 = GR_Index1,5 // Adjust index1 pointer x 32
}
{ .mfi
- nop.m 999
-(p0) fcmp.eq.unc.s1 p10, p11 = FR_Input_Y, f1
- nop.i 999 ;;
+ add GR_Table_Ptr = 0x7c0, GR_table_base // Constants_log_80_Z_G_H_h1
+ fma.s1 FR_Sgn = f1,f1,f0 // Assume result positive
+ mov GR_exp_bias = 0xFFFF // Form exponent bias
}
-{ .mfb
- nop.m 999
+;;
+
//
// Identify NatVals, NaNs, Infs, and Zeros.
-// Load Half
//
-(p0) fclass.nm.unc p8, p14 = FR_Input_X, 0x1FF
-//
-// Remove sign bit from exponent of y.
-// Check for x = 1
-//
-(p6) br.cond.spnt L(POWL_64_SPECIAL) ;;
-}
-{ .mib
- nop.m 999
- nop.i 999
-(p7) br.cond.spnt L(POWL_64_SPECIAL) ;;
-}
-{ .mib
- nop.m 999
- nop.i 999
-(p8) br.cond.spnt L(POWL_64_UNSUPPORT) ;;
-}
-{ .mib
- nop.m 999
- nop.i 999
-(p9) br.cond.spnt L(POWL_64_UNSUPPORT) ;;
-}
-{ .mfi
-(p0) cmp.lt.unc p9, p0 = GR_exp_y,GR_exponent_zero
-(p0) fcmp.lt.unc.s1 p6, p13 = FR_Input_X, f0
//
+// Remove sign bit from exponent of y.
+// Check for x = 1
// Branch on Infs, Nans, Zeros, and Natvals
// Check to see that exponent < 0
//
-(p0) sub GR_exp_y = GR_exp_y,GR_exponent_zero
-}
-// x not zero, is y ==2?
{ .mfi
- nop.m 999
-(p11) fcmp.eq.unc.s1 p7, p14 = FR_Input_Y, FR_Two
- nop.i 999 ;;
+ setf.exp FR_NBig = GR_exp_neg_max // Form big neg value for oflow test
+ fclass.nm p8, p0 = FR_Input_X, 0x1FF // Test X unsupported
+ and GR_exp_y = GR_exp_mask,GR_signexp_y // Get biased exponent of y
}
{ .mfb
- nop.m 999
-(p9) fcmp.lt.unc.s1 p9, p0 = FR_Input_X, f0
-(p7) br.cond.spnt L(POWL_64_SQUARE) ;; // Branch if x not zero and y=2
+ add GR_Index1 = GR_Index1,GR_Table_Ptr
+ nop.f 999
+(p6) br.cond.spnt POWL_64_SPECIAL // Branch if x natval, nan, inf, zero
}
-{ .mfi
- nop.m 999
-(p6) fmerge.ns FR_Neg_X = FR_Input_X, FR_Input_X
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-(p10) fmpy.s0 FR_Result = FR_Input_X, f1
-//
-// For y = 1, compute result = x
-// For x = 1, compute 1
-// When Y is one return X and possible raise
-// denormal operand exception.
-// Remove exponent BIAS
+;;
+
+// load Z_1 from Index1
+
+// There is logic starting here to determine if y is an integer when x < 0.
+// If 0 < |y| < 1 then clearly y is not an integer.
+// If |y| > 1, then the significand of y is shifted left by the size of
+// the exponent of y. This preserves the lsb of the integer part + the
+// fractional bits. The lsb of the integer can be tested to determine if
+// the integer is even or odd. The fractional bits can be tested. If zero,
+// then y is an integer.
//
-(p6) shl GR_exp_and_sign_y= GR_signif_y,GR_exp_y ;;
-}
{ .mfi
-(p9) or GR_exp_and_sign_y = 0xF,GR_signif_y
-(p12) fma.s0 FR_Result = FR_Input_Y, f0, f1
- nop.i 999 ;;
+ ld2 GR_Z_1 =[GR_Index1],4 // Load Z_1
+ fmerge.s FR_Z = f0, FR_norm_X // Z = |x|
+ extr.u GR_X_0 = GR_signif_Z, 49, 15 // Extract X_0 from significand
}
-{ .mii
- nop.m 999
-(p6) extr.u GR_exp_y = GR_exp_and_sign_y,63,1 ;;
-(p6) cmp.ne.unc p9, p0 = GR_exp_y, r0
+{ .mfb
+ cmp.lt p9, p0 = GR_exp_y,GR_exp_bias // Test 0 < |y| < 1
+ nop.f 999
+(p7) br.cond.spnt POWL_64_SPECIAL // Branch if y natval, nan, inf, zero
}
-{ .mii
- nop.m 999
-//
-// Both predicates can be set.
-// Don't consider y's < 1.
-//
-(p6) shl GR_signif_y= GR_exp_and_sign_y,1 ;;
-//
-// Is shift off integer part of y.
-// Get y's even or odd bit.
-//
-(p6) cmp.ne.unc p8, p0 = GR_signif_y, r0
+;;
+
+{ .mfb
+ ldfs FR_G_1 = [GR_Index1],4 // Load G_1
+ fcmp.eq.s1 p10, p0 = FR_Input_Y, f1 // Test Y = +1.0
+(p8) br.cond.spnt POWL_64_UNSUPPORT // Branch if x unsupported
}
-{ .mib
- nop.m 999
- nop.i 999
+;;
+
//
-// Is the fractional part of the y = 0?
-// Is the integer even or odd.
+// X_0 = High order 15 bit of Z
//
-(p10) br.cond.spnt L(POWL_64_RETURN) ;;
-}
-{ .mib
- nop.m 999
- nop.i 999
-(p12) br.cond.spnt L(POWL_64_RETURN) ;;
-}
-{ .mib
- nop.m 999
- nop.i 999
-(p8) br.cond.spnt L(POWL_64_XNEG) ;;
+{ .mfb
+ ldfs FR_H_1 = [GR_Index1],8 // Load H_1
+(p9) fcmp.lt.unc.s1 p9, p0 = FR_Input_X, f0 // Test x<0, 0 <|y|<1
+(p11) br.cond.spnt POWL_64_UNSUPPORT // Branch if y unsupported
}
+;;
+
{ .mfi
- nop.m 999
-(p9) fmerge.ns FR_Sgn = FR_Sgn, FR_Sgn
- nop.i 999
+ ldfe FR_h_1 = [GR_Index1] // Load h_1
+ fcmp.eq.s1 p7, p0 = FR_Input_Y, FR_Two // Test y = 2.0
+ pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15 // X_1 = X_0 * Z_1 (bits 15-30)
+ // Wait 4 cycles to use result
}
{ .mfi
- nop.m 999
-(p0) fcmp.eq.unc.s0 p11, p0 = FR_Input_Y, FR_Half
- nop.i 999 ;;
+ add GR_Table_Ptr = 0x9c0, GR_table_base // Constants_log_80_Z_G_H_h2
+ nop.f 999
+ sub GR_exp_y = GR_exp_y,GR_exp_bias // Get true exponent of y
}
+;;
+
//
-// Raise possible denormal operand exception for both
-// X and Y.
+// Branch for (x < 0) and Y not an integer.
//
{ .mfb
- nop.m 999
-//
-// Branch for (x < 0) and Y not an integer.
-//
-(p0) fcmp.eq.unc.s0 p12, p0 = FR_Input_X, f1
-//
-// For x < 0 and y integer, make x positive
-// For x < 0 and y odd integer,, set sign = -1.
-//
-(p11) br.cond.spnt L(POWL_64_SQRT) ;;
-}
-{ .mmf
-(p0) cmp.eq.unc p15, p14 = r0, r0
- nop.m 999
-(p13) fnorm.s1 FR_Z = FR_Input_X ;;
-}
-{ .mfi
- nop.m 999
-(p6) fnorm.s1 FR_Z = FR_Neg_X
- nop.i 999
+ nop.m 999
+ fcmp.lt.s1 p6, p0 = FR_Input_X, f0 // Test x < 0
+(p9) br.cond.spnt POWL_64_XNEG // Branch if x < 0, 0 < |y| < 1
}
;;
-//
-// Branch to embedded sqrt(x)
-//
-//
-// Computes ln( x ) to extra precision
-// Input FR 1: FR_X
-// Output FR 2: FR_Y_hi
-// Output FR 3: FR_Y_lo
-// Output PR 1: PR_Safe
-//
-
-{ .mmi
+{ .mfi
nop.m 999
-(p0) addl GR_Table_Ptr = @ltoff(Constants_log_80_Z_G_H_h1#), gp
+ fcmp.eq.s1 p12, p0 = FR_Input_X, f1 // Test x=+1.0
nop.i 999
}
+{ .mfb
+ nop.m 999
+ fsub.s1 FR_W = FR_Z, f1 // W = Z - 1
+(p7) br.cond.spnt POWL_64_SQUARE // Branch if y=2
+}
;;
-{ .mmi
- ld8 GR_Table_Ptr = [GR_Table_Ptr]
+{ .mfi
nop.m 999
- nop.i 999
+(p10) fmpy.s0 FR_Result = FR_Input_X, f1 // If y=+1.0, result=x
+(p6) shl GR_fraction_y= GR_signif_y,GR_exp_y // Get lsb of int + fraction
+ // Wait 4 cycles to use result
}
;;
-
-{ .mlx
- nop.m 999
-(p0) movl GR_BIAS = 0x000000000000FFFF ;;
-}
{ .mfi
- nop.m 999
-(p0) fsub.s1 FR_W = FR_Z, f1
- nop.i 999 ;;
-}
-//
-// Z = Norm(X) - both + and - case
-// Set Safe = True
-//
-{ .mmb
-(p0) getf.sig GR_signif_Z = FR_Z
-(p0) getf.exp GR_N = FR_Z
- nop.b 999 ;;
-}
-{ .mii
- nop.m 999
-//
-// Get significand of Z
-// W = Z - 1
-//
-(p0) extr.u GR_Index1 = GR_signif_Z, 59, 4 ;;
-//
-// Index1 = High order 4 bits of Z
-// X_0 = High order 15 bit of Z
-//
-(p0) shl GR_Index1 = GR_Index1,5 ;;
-}
-{ .mfi
- nop.m 999
-//
-// Add offset to Index1 ptr.
-//
-(p0) fabs FR_abs_W = FR_W
-//
-// BIAS = 0x000...FFFF
-// Adjust Index1 ptr ( x 32) .
-//
-(p0) add GR_Index1 = GR_Index1,GR_Table_Ptr
+ nop.m 999
+(p12) fma.s0 FR_Result = FR_Input_Y, f0, f1 // If x=1.0, result=1, chk denorm
+ extr.u GR_Index2 = GR_X_1, 6, 4 // Extract index2
}
-{ .mmi
- nop.m 999 ;;
-(p0) ld2 GR_Z_1 =[GR_Index1],4
-(p0) extr.u GR_X_0 = GR_signif_Z, 49, 15
+;;
+
+//
+// N = exponent of Z
+//
+{ .mib
+ getf.exp GR_N = FR_Z // Get exponent of Z (also x)
+ shl GR_Index2=GR_Index2,5 // Index2 x 32 bytes
+(p10) br.ret.spnt b0 // Exit if y=+1.0
}
;;
-{ .mmi
- nop.m 999
-(p0) addl GR_Table_Ptr = @ltoff(Constants_log_80_Z_G_H_h2#), gp
+{ .mib
+ add GR_Index2 = GR_Index2, GR_Table_Ptr // Pointer to table 2
nop.i 999
+(p12) br.ret.spnt b0 // Exit if x=+1.0
}
;;
{ .mmi
- ld8 GR_Table_Ptr = [GR_Table_Ptr]
- nop.m 999
+ ld2 GR_Z_2 =[GR_Index2],4 // Load Z_2
+;;
+ ldfs FR_G_2 = [GR_Index2],4 // Load G_2
nop.i 999
}
;;
-
-{ .mmi
-(p0) ldfs FR_G_1 = [GR_Index1],4 ;;
-(p0) ldfs FR_H_1 = [GR_Index1],8
- nop.i 999 ;;
+{ .mii
+ ldfs FR_H_2 = [GR_Index2],8 // Load H_2
+(p6) tbit.nz.unc p9, p0 = GR_fraction_y, 63 // Test x<0 and y odd integer
+ add GR_Table_Ptr = 0xbcc, GR_table_base // Constants_log_80_h3_G_H, G_3
}
+;;
+
//
-// Adjust Index2 (x 32).
+// For x < 0 and y odd integer,, set sign = -1.
//
{ .mfi
-(p0) ldfe FR_h_1 = [GR_Index1],0
- nop.f 999
-(p0) pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15 ;;
-}
-{ .mmi
- nop.m 999 ;;
-//
-// load Z_1 from Index1
-// abs_W = |W|
-// Point to Table2
-//
-(p0) getf.exp GR_M = FR_abs_W
-//
-// M = M - BIAS
-// Load G_1
-// N = exponent of Z
-//
- nop.i 999;;
+ getf.exp GR_M = FR_W // Get signexp of W
+ nop.f 999
+ pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 // X_2 = X_1 * Z_2 (bits 15-30)
}
-{ .mmi
- nop.m 999
- nop.m 999
- nop.i 999;;
+{ .mfi
+ ldfe FR_h_2 = [GR_Index2] // Load h_2
+(p9) fnma.s1 FR_Sgn = f1, f1, f0 // If x<0, y odd int, result negative
+ sub GR_N = GR_N, GR_exp_bias // Get true exponent of x = N
}
-{ .mmi
- nop.m 999
- nop.m 999
- nop.i 999;;
+;;
+
+{ .mfi
+ add GR_Table_Ptr1 = 0xdc0, GR_table_base // Ptr to H_3
+ fcmp.eq.s0 p11, p0 = FR_Input_Y, FR_Half // Test y=0.5, also set denorm
+(p6) shl GR_fraction_y= GR_fraction_y, 1 // Shift left 1 to get fraction
}
-{ .mmi
- nop.m 999
- nop.m 999
-(p0) extr.u GR_Index2 = GR_X_1, 6, 4 ;;
+;;
+
+{ .mmb
+ setf.sig FR_float_N = GR_N
+(p6) cmp.ne.unc p8, p0 = GR_fraction_y, r0 // Test x<0 and y not integer
+(p8) br.cond.spnt POWL_64_XNEG // Branch if x<0 and y not int
}
-{ .mii
- nop.m 999
-//
-// Extract Index2
-// Load H_1
-// Is -8 > M ?
+;;
+
//
-(p0) shl GR_Index2=GR_Index2,5 ;;
-(p0) add GR_Index2 = GR_Index2, GR_Table_Ptr
-}
+// Raise possible denormal operand exception for both X and Y.
+// Set pointers in case |x| near 1
+// Branch to embedded sqrt(x) if y=0.5
//
-// M = exponent of abs_W
-// X_1 = X_0 * Z_1
-//
-{ .mii
-(p0) sub GR_M = GR_M, GR_BIAS
- nop.i 999 ;;
-(p0) cmp.gt.unc p7, p14 = -8, GR_M
+{ .mfi
+ add GR_P_ptr1 = 0x6b0, GR_table_base // Constants_log_80_P, P8, NEAR path
+ fcmp.eq.s0 p12, p0 = FR_Input_X, FR_Input_Y // Dummy to set denormal
+ add GR_P_ptr2 = 0x700, GR_table_base // Constants_log_80_P, P4, NEAR path
}
-{ .mib
- nop.m 999
- nop.i 999
-(p7) br.cond.spnt L(LOGL80_NEAR) ;;
+{ .mfb
+ cmp.eq p15, p14 = r0, r0 // Assume result safe (no over/under)
+ fsub.s1 FR_Delta = FR_Input_Y,f1 // Delta = y - 1.0
+(p11) br.cond.spnt POWL_64_SQRT // Branch if y=0.5
}
+;;
+
//
-// Load h_1
-// Possible branch out.
-// Add offset of table to Index2
+// Computes ln( x ) to extra precision
+// Input FR 1: FR_X
+// Output FR 2: FR_Y_hi
+// Output FR 3: FR_Y_lo
+// Output PR 1: PR_Safe
//
{ .mfi
-(p0) ld2 GR_Z_2 =[GR_Index2],4
-(p0) fmerge.se FR_S = f1,FR_Z
-(p0) sub GR_N = GR_N, GR_BIAS
+ and GR_M = GR_exp_mask, GR_M // Mask to get exponent of W
+ nop.f 999
+ extr.u GR_Index3 = GR_X_2, 1, 5 // Get index3
}
;;
{ .mmi
- nop.m 999
-(p0) addl GR_Table_Ptr = @ltoff(Constants_log_80_h3_G_H#), gp
- nop.i 999
+ shladd GR_Table_Ptr1 = GR_Index3,2,GR_Table_Ptr1 // Ptr to H_3
+ shladd GR_Index3 = GR_Index3,4,GR_Table_Ptr // Ptr to G_3
+ sub GR_M = GR_M, GR_exp_bias // Get true exponent of W
}
;;
-{ .mmi
- ld8 GR_Table_Ptr = [GR_Table_Ptr]
- nop.m 999
- nop.i 999
+{ .mib
+ ldfs FR_G_3 = [GR_Index3],-12 // Load G_3
+ cmp.gt p7, p14 = -8, GR_M // Test if |x-1| < 2^-8
+(p7) br.cond.spnt LOGL80_NEAR // Branch if |x-1| < 2^-8
}
;;
-//
-// load Z_2
-// N - BIAS
-// Point to Table 3.
-// S = merging of Z and 1.0
-//
-{ .mmi
-(p0) ldfs FR_G_2 = [GR_Index2],4
-(p0) setf.sig FR_float_N = GR_N
-(p0) add GR_Table_Ptr1 = 0x200,GR_Table_Ptr ;;
-}
-//
-// load G_2
-// X_2 = X_1 * Z_2
-// Add offset to Table 2 ptr.
-// float_N = significand of N
-//
-{ .mmi
-(p0) ldfs FR_H_2 = [GR_Index2],8 ;;
-//
-// load H_2
-// G = G * G_2
-//
-(p0) ldfe FR_h_2 = [GR_Index2],0
-(p0) pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 ;;
-}
-{ .mmi
- nop.m 999
- nop.m 999
- nop.i 999;;
-}
-{ .mmi
- nop.m 999
- nop.m 999
- nop.i 999;;
-}
-{ .mmi
- nop.m 999
- nop.m 999
- nop.i 999;;
+// Here if |x-1| >= 2^-8
+{ .mmf
+ ldfs FR_H_3 = [GR_Table_Ptr1] // Load H_3
+ nop.m 999
+ nop.f 999
}
-{ .mii
- nop.m 999
- nop.i 999 ;;
-(p0) extr.u GR_Index3 = GR_X_2, 1, 5 ;;
+;;
+
+{ .mfi
+ ldfe FR_h_3 = [GR_Index3] // Load h_3
+ fmerge.se FR_S = f1,FR_Z // S = merge of 1.0 and signif(Z)
+ nop.i 999
}
{ .mfi
-(p0) shladd GR_Table_Ptr1 = GR_Index3,2,GR_Table_Ptr1
- nop.f 999
+ add GR_Table_Ptr = 0x740, GR_table_base // Constants_log_80_Q
+ fmpy.s1 FR_G = FR_G_1, FR_G_2 // G = G_1 * G_2
+ nop.i 999
+}
+;;
+
//
-// h = h_1 + h_2
-// Adjust Index3
+// Begin Loading Q's - load log2_hi part
//
-(p0) shladd GR_Index3 = GR_Index3,4,GR_Table_Ptr ;;
-}
-{ .mmb
- nop.m 999
-(p0) ldfe FR_h_3 = [GR_Index3],12
- nop.b 999 ;;
-}
-{ .mmf
-(p0) ldfs FR_H_3 = [GR_Table_Ptr1],0
+{ .mfi
+ ldfe FR_log2_hi = [GR_Table_Ptr],16 // Load log2_hi
+ fadd.s1 FR_H = FR_H_1, FR_H_2 // H = H_1 + H_2
+ nop.i 999
+};;
+
//
-// float_N = Make N a fp number
-// Load h_3
-// Get pointer to Q table.
+// h = h_1 + h_2
//
-(p0) ldfs FR_G_3 = [GR_Index3],0
-(p0) fmpy.s1 FR_G = FR_G_1, FR_G_2
+{ .mfi
+ ldfe FR_log2_lo = [GR_Table_Ptr],16 // Load log2_lo
+ fadd.s1 FR_h = FR_h_1, FR_h_2 // h = h_1 + h_2
+ nop.i 999
}
;;
-{ .mmi
- nop.m 999
-(p0) addl GR_Table_Ptr = @ltoff(Constants_log_80_Q#), gp
+{ .mfi
+ ldfe FR_Q_6 = [GR_Table_Ptr],16 // Load Q_6
+ fcvt.xf FR_float_N = FR_float_N
nop.i 999
}
;;
-{ .mmi
- ld8 GR_Table_Ptr = [GR_Table_Ptr]
- nop.m 999
+{ .mfi
+ ldfe FR_Q_5 = [GR_Table_Ptr],16 // Load Q_5
+ nop.f 999
nop.i 999
}
;;
-
-
-{ .mfi
-(p0) ldfe FR_log2_hi = [GR_Table_Ptr],16
-(p0) fadd.s1 FR_H = FR_H_1, FR_H_2
- nop.i 999 ;;
-}
-{ .mmf
- nop.m 999
-//
-// G = G_1 * G_2 * G_3
-//
-(p0) ldfe FR_log2_lo = [GR_Table_Ptr],16
-//
-// load h_2
-// H = H_1 + H_2
-// Get Index3
//
-(p0) fadd.s1 FR_h = FR_h_1, FR_h_2 ;;
-}
-//
-// Load log2_lo part
-// r = G*S -1
+// G = G_1 * G_2 * G_3
//
{ .mfi
-(p0) ldfe FR_Q_6 = [GR_Table_Ptr],16
-//
-// Load H_3
-//
-(p0) fcvt.xf FR_float_N = FR_float_N
- nop.i 999 ;;
+ ldfe FR_Q_4 = [GR_Table_Ptr],16 // Load Q_4
+ fmpy.s1 FR_G = FR_G, FR_G_3
+ nop.i 999
}
+;;
+
//
-// Load Q_6
+// H = H_1 + H_2 + H_3
//
-{ .mmi
-(p0) ldfe FR_Q_5 = [GR_Table_Ptr],16 ;;
-(p0) ldfe FR_Q_4 = [GR_Table_Ptr],16
- nop.i 999 ;;
-}
-{ .mmi
-(p0) ldfe FR_Q_3 = [GR_Table_Ptr],16 ;;
-(p0) ldfe FR_Q_2 = [GR_Table_Ptr],16
- nop.i 999 ;;
+{ .mfi
+ ldfe FR_Q_3 = [GR_Table_Ptr],16 // Load Q_3
+ fadd.s1 FR_H = FR_H, FR_H_3
+ nop.i 999
}
-{ .mmf
- nop.m 999
-//
-// poly_lo = Q_5 + r * Q_6
-// Load Q_2
-// rsq = r * r
+;;
+
//
-(p0) ldfe FR_Q_1 = [GR_Table_Ptr],16
+// Y_lo = poly + Y_lo
//
-// h = h_1 + h_2 + h_3
-// H = H_1 + H_2 + H_3
-// Load G_3.
-// Begin Loading Q's - load log2_hi part
+// h = h_1 + h_2 + h_3
//
-(p0) fmpy.s1 FR_G = FR_G, FR_G_3
-}
{ .mfi
- nop.m 999
-(p0) fadd.s1 FR_H = FR_H, FR_H_3
- nop.i 999
+ ldfe FR_Q_2 = [GR_Table_Ptr],16 // Load Q_2
+ fadd.s1 FR_h = FR_h, FR_h_3
+ nop.i 999
}
;;
//
-// Y_lo = poly + Y_lo
+// GS_hi = G*S
+// r = G*S -1
//
-
-{ .mmi
- nop.m 999
-(p0) addl GR_Table_Ptr = @ltoff(Constants_exp_64_Arg#), gp
+{ .mfi
+ ldfe FR_Q_1 = [GR_Table_Ptr],16 // Load Q_1
+ fmpy.s1 FR_GS_hi = FR_G, FR_S
nop.i 999
}
-;;
-
-{ .mmi
- ld8 GR_Table_Ptr = [GR_Table_Ptr]
+{ .mfi
nop.m 999
+ fms.s1 FR_r = FR_G, FR_S, f1
nop.i 999
}
;;
-
-{ .mfi
- nop.m 999
-(p0) fadd.s1 FR_h = FR_h, FR_h_3
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
//
-// Load Q_5
+// poly_lo = Q_5 + r * Q_6
//
-(p0) fmpy.s1 FR_GS_hi = FR_G, FR_S
- nop.i 999
-}
-{ .mfi
- nop.m 999
-(p0) fms.s1 FR_r = FR_G, FR_S, f1
- nop.i 999 ;;
-}
{ .mfi
- nop.m 999
-(p0) fma.s1 FR_poly_lo = FR_r, FR_Q_6, FR_Q_5
- nop.i 999
+ getf.exp GR_Delta_Exp = FR_Delta // Get signexp of y-1 for exp calc
+ fma.s1 FR_poly_lo = FR_r, FR_Q_6, FR_Q_5
+ nop.i 999
}
-{ .mfi
- nop.m 999
//
-// GS_hi = G*S
-// Load Q_4
+// r_cor = GS_hi -1
//
-(p0) fsub.s1 FR_r_cor = FR_GS_hi, f1
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-(p0) fms.s1 FR_GS_lo = FR_G, FR_S, FR_GS_hi
- nop.i 999
-}
{ .mfi
- nop.m 999
-(p0) fma.s1 FR_poly = FR_r, FR_Q_2, FR_Q_1
- nop.i 999 ;;
+ nop.m 999
+ fsub.s1 FR_r_cor = FR_GS_hi, f1
+ nop.i 999
}
-{ .mfi
- nop.m 999
+;;
+
//
-// Load Q_3
-// r_cor = GS_hi -1
// GS_lo = G*S - GS_hi
//
-(p0) fmpy.s1 FR_rsq = FR_r, FR_r
- nop.i 999
-}
{ .mfi
- nop.m 999
-(p0) fma.s1 FR_G = FR_float_N, FR_log2_hi, FR_H
- nop.i 999 ;;
+ nop.m 999
+ fms.s1 FR_GS_lo = FR_G, FR_S, FR_GS_hi
+ nop.i 999
}
-{ .mfi
- nop.m 999
+;;
+
//
-// poly = poly_hi + rsq * poly_lo
-// Tbl = float_N*log2_hi + H
+// rsq = r * r
//
-(p0) fma.s1 FR_Y_lo = FR_float_N, FR_log2_lo, FR_h
- nop.i 999 ;;
-}
{ .mfi
- nop.m 999
-//
-// r_cor = r_cor - r
-// poly_hi = r * Q_2 + Q_1
-//
-(p0) fma.s1 FR_poly_lo = FR_r, FR_poly_lo, FR_Q_4
- nop.i 999
+ nop.m 999
+ fmpy.s1 FR_rsq = FR_r, FR_r
+ nop.i 999
}
-{ .mfi
- nop.m 999
//
-// Load Q_1
+// G = float_N*log2_hi + H
//
-(p0) fsub.s1 FR_r_cor = FR_r_cor, FR_r
- nop.i 999 ;;
-}
{ .mfi
- nop.m 999
-//
-// Y_lo = float_N*log2_lo + h
-//
-(p0) fadd.s1 FR_Y_hi = FR_G, FR_r
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 FR_G = FR_float_N, FR_log2_hi, FR_H
+ nop.i 999
}
-{ .mfi
- nop.m 999
+;;
+
//
-// poly_lo = Q_4 + r * poly_lo;;
-// r_cor = r_cor + GS_lo;;
+// Y_lo = float_N*log2_lo + h
//
-(p0) fma.s1 FR_poly_lo = FR_r, FR_poly_lo, FR_Q_3
- nop.i 999
-}
{ .mfi
- nop.m 999
-(p0) fadd.s1 FR_r_cor = FR_r_cor, FR_GS_lo
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-(p0) fadd.s1 FR_r_cor = FR_r_cor, FR_Y_lo
- nop.i 999
+ nop.m 999
+ fma.s1 FR_Y_lo = FR_float_N, FR_log2_lo, FR_h
+ nop.i 999
}
-{ .mfi
- nop.m 999
+;;
+
//
-// poly_lo = Q_3 + r * poly_lo;;
+// poly_lo = Q_4 + r * poly_lo
+// r_cor = r_cor - r
//
-(p0) fma.s1 FR_poly = FR_rsq, FR_poly_lo, FR_poly
- nop.i 999 ;;
-}
{ .mfi
- nop.m 999
-(p0) fsub.s1 FR_Y_lo = FR_G, FR_Y_hi
- nop.i 999
-}
-{ .mmi
-(p0) ldfe FR_L_Inv = [GR_Table_Ptr],16 ;;
-(p0) ldfe FR_L_hi = [GR_Table_Ptr],16
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 FR_poly_lo = FR_r, FR_poly_lo, FR_Q_4
+ nop.i 999
}
{ .mfi
-(p0) ldfe FR_L_lo = [GR_Table_Ptr],16
- nop.f 999
- nop.i 999 ;;
+ nop.m 999
+ fsub.s1 FR_r_cor = FR_r_cor, FR_r
+ nop.i 999
}
-{ .mfi
- nop.m 999
+;;
+
//
-// Y_hi = Tbl + r
-// r_cor = r_cor + Y_lo
+// poly_hi = r * Q_2 + Q_1
+// Y_hi = G + r
//
-(p0) fma.s1 FR_poly = FR_rsq, FR_poly, FR_r_cor
- nop.i 999 ;;
-}
{ .mfi
- nop.m 999
-// Y_lo = Tbl - Y_hi
-// poly = rsq * poly + r_cor
-//
-(p0) fadd.s1 FR_Y_lo = FR_Y_lo, FR_r
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 FR_poly = FR_r, FR_Q_2, FR_Q_1
+ nop.i 999
}
-{ .mfb
- nop.m 999
-//
-// Y_lo = Y_lo + r
-//
-(p0) fadd.s1 FR_Y_lo = FR_Y_lo, FR_poly
-//
-// Load L_Inv
-// Load L_hi
-// Load L_lo
-// all long before they are needed.
-// They are used in LOGL_RETURN PATH
-//
-br.cond.sptk L(LOGL_RETURN) ;;
+{ .mfi
+ nop.m 999
+ fadd.s1 FR_Y_hi = FR_G, FR_r
+ nop.i 999
}
-L(LOGL80_NEAR):
+;;
+
//
-// Branch LOGL80_NEAR
+// poly_lo = Q_3 + r * poly_lo
+// r_cor = r_cor + GS_lo
//
-
-{ .mmi
+{ .mfi
nop.m 999
-(p0) addl GR_Table_Ptr = @ltoff(Constants_log_80_P#), gp
+ fma.s1 FR_poly_lo = FR_r, FR_poly_lo, FR_Q_3
nop.i 999
}
-;;
-
-{ .mmi
- ld8 GR_Table_Ptr = [GR_Table_Ptr]
+{ .mfi
nop.m 999
+ fadd.s1 FR_r_cor = FR_r_cor, FR_GS_lo
nop.i 999
}
;;
-{ .mfi
- nop.m 999
-(p0) fmpy.s1 FR_Wsq = FR_W, FR_W
-(p0) add GR_Table_Ptr1 = 0x50,GR_Table_Ptr
-}
//
-// Adjust ptr to 1/2
-// Adjust Ptr1 to P_4
+// Y_lo = G - Y_hi
//
-{ .mmi
-(p0) ldfe FR_Half = [GR_Table_Ptr],16 ;;
-(p0) ldfe FR_P_4 = [GR_Table_Ptr1],16
- nop.i 999
+{ .mfi
+ nop.m 999
+ fsub.s1 FR_Y_lo_2 = FR_G, FR_Y_hi
+ nop.i 999
}
+;;
+
//
-// Load 1/2
+// r_cor = r_cor + Y_lo
+// poly = poly_hi + rsq * poly_lo
//
-{ .mmi
-(p0) ldfe FR_P_8 = [GR_Table_Ptr],16 ;;
-(p0) ldfe FR_P_3 = [GR_Table_Ptr1],16
- nop.i 999
+{ .mfi
+ add GR_Table_Ptr = 0x0, GR_table_base // Constants_exp_64_Arg
+ fadd.s1 FR_r_cor = FR_r_cor, FR_Y_lo
+ nop.i 999
}
-{ .mmi
-(p0) ldfe FR_P_7 = [GR_Table_Ptr],16 ;;
-(p0) ldfe FR_P_2 = [GR_Table_Ptr1],16
- nop.i 999
+{ .mfi
+ nop.m 999
+ fma.s1 FR_poly = FR_rsq, FR_poly_lo, FR_poly
+ nop.i 999
}
+;;
+
//
-// Load P_7
-// half_W = .5 * W
-// Load P_3
-//
-{ .mmi
-(p0) ldfe FR_P_6 = [GR_Table_Ptr],16 ;;
-(p0) ldfe FR_P_1 = [GR_Table_Ptr1],16
- nop.i 999 ;;
-}
+// Load L_hi
+// Load L_lo
+// all long before they are needed.
+// They are used in LOGL_RETURN PATH
//
-// Load P_6
-// Wsq = w * w
-// poly = w*P_4 + P_3
-// Load P_2
+// Y_lo = Y_lo + r
+// poly = rsq * poly + r_cor
//
{ .mfi
-(p0) ldfe FR_P_5 = [GR_Table_Ptr],16
-//
-// Load P_5
-// poly_lo = w * P_8 + P_7
-// Y_hi = w - (1/2)w*w
-// Load P_1
-//
-(p0) fmpy.s1 FR_W4 = FR_Wsq, FR_Wsq
- nop.i 999
+ ldfe FR_L_hi = [GR_Table_Ptr],16 // Load L_hi
+ fadd.s1 FR_Y_lo = FR_Y_lo_2, FR_r
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fmpy.s1 FR_W3 = FR_Wsq, FR_W
- nop.i 999
+ nop.m 999
+ fma.s1 FR_poly = FR_rsq, FR_poly, FR_r_cor
+ nop.i 999
}
;;
+{ .mfb
+ ldfe FR_L_lo = [GR_Table_Ptr],16 // Load L_lo
+ fadd.s1 FR_Y_lo = FR_Y_lo, FR_poly
+ br.cond.sptk LOGL_RETURN // Branch to common code
+}
+;;
+
+
+LOGL80_NEAR:
+// Here if |x-1| < 2^-8
//
-// Y_lo = W3 * poly + Y_lo
+// Branch LOGL80_NEAR
//
+{ .mmf
+ ldfe FR_P_8 = [GR_P_ptr1],16 // Load P_8
+ ldfe FR_P_4 = [GR_P_ptr2],16 // Load P_4
+ fmpy.s1 FR_Wsq = FR_W, FR_W
+}
+;;
+
{ .mmi
- nop.m 999
-(p0) addl GR_Table_Ptr = @ltoff(Constants_exp_64_Arg#), gp
+ ldfe FR_P_7 = [GR_P_ptr1],16 // Load P_7
+ ldfe FR_P_3 = [GR_P_ptr2],16 // Load P_3
nop.i 999
}
;;
{ .mmi
- ld8 GR_Table_Ptr = [GR_Table_Ptr]
- nop.m 999
+ ldfe FR_P_6 = [GR_P_ptr1],16 // Load P_6
+ ldfe FR_P_2 = [GR_P_ptr2],16 // Load P_2
nop.i 999
}
;;
-
{ .mmi
-(p0) ldfe FR_L_Inv = [GR_Table_Ptr],16 ;;
-(p0) ldfe FR_L_hi = [GR_Table_Ptr],16
- nop.i 999 ;;
-}
-{ .mfi
-(p0) ldfe FR_L_lo = [GR_Table_Ptr],16
-//
-// Load P_8
-// Load P_4
-//
-(p0) fmpy.s1 FR_half_W = FR_Half, FR_W
- nop.i 999 ;;
+ ldfe FR_P_5 = [GR_P_ptr1],16 // Load P_5
+ ldfe FR_P_1 = [GR_P_ptr2],16 // Load P_1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fma.s1 FR_poly_lo = FR_W, FR_P_8,FR_P_7
- nop.i 999
+ getf.exp GR_Delta_Exp = FR_Delta // Get signexp of y-1 for exp calc
+ fmpy.s1 FR_W4 = FR_Wsq, FR_Wsq
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fma.s1 FR_poly = FR_W, FR_P_4, FR_P_3
- nop.i 999 ;;
+ add GR_Table_Ptr = 0x0, GR_table_base // Constants_exp_64_Arg
+ fmpy.s1 FR_W3 = FR_Wsq, FR_W
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fnma.s1 FR_Y_hi = FR_W, FR_half_W, FR_W
- nop.i 999 ;;
+ nop.m 999
+ fmpy.s1 FR_half_W = FR_Half, FR_W
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// W4 = Wsq * Wsq
-// poly = w *poly + P_2
-//
-(p0) fma.s1 FR_poly_lo = FR_W, FR_poly_lo, FR_P_6
- nop.i 999
+ ldfe FR_L_hi = [GR_Table_Ptr],16
+ fma.s1 FR_poly_lo = FR_W, FR_P_8,FR_P_7
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fma.s1 FR_poly = FR_W, FR_poly, FR_P_2
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 FR_poly = FR_W, FR_P_4, FR_P_3
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fsub.s1 FR_Y_lo = FR_W, FR_Y_hi
- nop.i 999 ;;
+ ldfe FR_L_lo = [GR_Table_Ptr],16
+ fnma.s1 FR_Y_hi = FR_W, FR_half_W, FR_W
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// poly = w * poly + P_1
-// w3 = wsq * w
-//
-(p0) fma.s1 FR_poly_lo = FR_W, FR_poly_lo, FR_P_5
- nop.i 999
+ nop.m 999
+ fma.s1 FR_poly_lo = FR_W, FR_poly_lo, FR_P_6
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// poly_lo = w * poly_lo + P_6
-// Y_lo = W - Y_hi
-//
-(p0) fma.s1 FR_poly = FR_W, FR_poly, FR_P_1
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 FR_poly = FR_W, FR_poly, FR_P_2
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fnma.s1 FR_Y_lo = FR_W, FR_half_W, FR_Y_lo
- nop.i 999 ;;
+ nop.m 999
+ fsub.s1 FR_Y_lo = FR_W, FR_Y_hi
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// poly_lo = w * poly_lo +
-// Y_lo = Y_lo - w * (1/2)w
-//
-(p0) fma.s1 FR_poly = FR_poly_lo, FR_W4, FR_poly
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 FR_poly_lo = FR_W, FR_poly_lo, FR_P_5
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// Y_lo = (W-Y_hi) - w * (1/2)w
-// poly = W4* poly_lo + poly
-//
-(p0) fma.s1 FR_Y_lo = FR_poly, FR_W3, FR_Y_lo
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 FR_poly = FR_W, FR_poly, FR_P_1
+ nop.i 999
}
-L(LOGL_RETURN):
+;;
+
{ .mfi
-(p0) add GR_Expo_Range = 0x2,r0
-//
-// Load L_Inv
-// Load L_hi
-// Load L_lo
-// all long before they are needed.
-//
-//
-// kernel_log_80 computed ln(X)
-// and return logX_hi and logX_lo as results.
-// PR_pow_Safe set as well.
-//
-(p0) fmpy.s1 FR_X_lo = FR_Input_Y, FR_logx_lo
-//
-// Compute Y * (logX_hi + logX_lo)
-// P_hi -> X
-// P_lo -> X_cor
-// (Manipulate names so that inputs are in
-// the place kernel_exp expects them)
-// Set GR_Flag to 2
-// Set GR_Expo_Range to Double
-//
-// This function computes exp( x + x_cor)
-// Input FR 1: FR_X
-// Input FR 2: FR_X_cor
-// Input GR 1: GR_Flag
-// Input GR 2: GR_Expo_Range
-// Output FR 3: FR_Y_hi
-// Output FR 4: FR_Y_lo
-// Output FR 5: FR_Scale
-// Output PR 1: PR_Safe
-//
-(p0) cmp.eq.unc p15, p0 = r0, r0
+ nop.m 999
+ fnma.s1 FR_Y_lo = FR_W, FR_half_W, FR_Y_lo
+ nop.i 999
}
;;
-{ .mmi
-(p0) addl GR_W1_ptr = @ltoff(Constants_exp_64_W1#), gp
-(p0) addl GR_W2_ptr = @ltoff(Constants_exp_64_W2#), gp
-(p0) add GR_Flag = 0x2,r0
+{ .mfi
+ nop.m 999
+ fma.s1 FR_poly = FR_poly_lo, FR_W4, FR_poly
+ nop.i 999
}
;;
-{ .mmi
- ld8 GR_W1_ptr = [GR_W1_ptr]
- ld8 GR_W2_ptr = [GR_W2_ptr]
-(p0) cmp.ne.unc p7, p0 = 0x1, GR_Flag
+{ .mfi
+ nop.m 999
+ fma.s1 FR_Y_lo = FR_poly, FR_W3, FR_Y_lo
+ nop.i 999
}
;;
-{ .mlx
- nop.m 999
-(p0) movl GR_Mask = 0x1FFFF ;;
-}
+LOGL_RETURN:
+// Common code for completion of both logx paths
-{ .mlx
- nop.m 999
-(p0) movl GR_BIAS = 0x0FFFF ;;
-}
-{ .mfi
- nop.m 999
//
-// X_lo = Y * logX_lo
+// L_hi, L_lo already loaded.
//
-(p0) fma.s1 FR_P_hi = FR_Input_Y, FR_logx_hi,FR_X_lo
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
//
-// Set Safe=True
-// Flag is always 2 for this routine
+// kernel_log_80 computed ln(X)
+// and return logX_hi and logX_lo as results.
+// PR_pow_Safe set as well.
//
-(p0) fmpy.s1 FR_float_N = FR_X, FR_L_Inv
- nop.i 999
-}
-{ .mfi
- nop.m 999
//
-// X_hi = Y * logX_hi + X_lo
-// Set GR_Flag = 2 for exp(x + xcor)
+// Compute Y * (logX_hi + logX_lo)
+// P_hi -> X
+// P_lo -> X_cor
+// (Manipulate names so that inputs are in
+// the place kernel_exp expects them)
//
-(p0) fms.s1 FR_P_lo= FR_Input_Y, FR_logx_hi, FR_P_hi
- nop.i 999 ;;
+// This function computes exp( x + x_cor)
+// Input FR 1: FR_X
+// Input FR 2: FR_X_cor
+// Output FR 3: FR_Y_hi
+// Output FR 4: FR_Y_lo
+// Output FR 5: FR_Scale
+// Output PR 1: PR_Safe
+//
+// P15 is True
+//
+// Load constants used in computing N using right-shift technique
+{ .mlx
+ mov GR_exp_2tom51 = 0xffff-51
+ movl GR_sig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2
}
-{ .mmi
- nop.m 999 ;;
-(p0) getf.exp GR_Expo_X = FR_X
- nop.i 999 ;;
+{ .mlx
+ add GR_Special_Exp = -50,GR_exp_bias
+ movl GR_rshf_2to51 = 0x4718000000000000 // 1.10000 2^(63+51)
}
-{ .mfi
-(p0) and GR_Expo_X = GR_Expo_X, GR_Mask
+;;
+
//
-// Calculate unBIASed exponent of X
// Point to Table of W1s
// Point to Table of W2s
//
-(p0) fcvt.fx.s1 FR_N = FR_float_N
- nop.i 999 ;;
-}
+{ .mmi
+ add GR_W1_ptr = 0x2b0, GR_table_base // Constants_exp_64_W1
+ add GR_W2_ptr = 0x4b0, GR_table_base // Constants_exp_64_W2
+ cmp.le p6,p0= GR_Delta_Exp,GR_Special_Exp
+};;
+
+// Form two constants we need
+// 1/ln2 * 2^63 to compute w = x * 1/ln2 * 128
+// 1.1000..000 * 2^(63+63-12) to right shift int(N) into the significand
+
{ .mfi
- nop.m 999
-(p0) fadd.s1 FR_P_lo = FR_P_lo, FR_X_lo
-//
-// Float_N = X * L_Inv
-// Create exponent BIAS
-// Get BIASed exponent of X
-//
-(p0) sub GR_Expo_X = GR_Expo_X, GR_BIAS ;;
+ setf.sig FR_INV_LN2_2TO63 = GR_sig_inv_ln2 // form 1/ln2 * 2^63
+ nop.f 999
+ and GR_Delta_Exp=GR_Delta_Exp,GR_exp_mask // Get exponent of y-1
}
-{ .mib
-(p0) cmp.gt.unc p9, p0 = -6, GR_Expo_X
- nop.i 999
-//
-// N = fcvt.fx(float_N)
-// If -6 > Expo_X, set P9
-//
-(p9) br.cond.spnt L(EXPL_SMALL)
+{ .mlx
+ setf.d FR_RSHF_2TO51 = GR_rshf_2to51 // Form const 1.1000 * 2^(63+51)
+ movl GR_rshf = 0x43e8000000000000 // 1.10000 2^63 for right shift
}
;;
-//
-// If expo_X < -6 goto exp_small
-//
-{ .mmi
+{ .mfi
nop.m 999
-(p0) addl GR_T1_ptr = @ltoff(Constants_exp_64_T1#), gp
-(p0) cmp.lt.unc p10, p0 = 14, GR_Expo_X
+ fmpy.s1 FR_X_lo = FR_Input_Y, FR_logx_lo // logx_lo is Y_lo
+ cmp.eq p15, p0= r0, r0 // Set p15, assume safe
+};;
+
+{ .mmi
+ setf.exp FR_2TOM51 = GR_exp_2tom51 // Form 2^-51 for scaling float_N
+ setf.d FR_RSHF = GR_rshf // Form right shift const 1.1000 * 2^63
+ add GR_Table_Ptr1 = 0x50, GR_table_base // Constants_exp_64_P for
+ // EXPL_SMALL path
}
;;
{ .mmi
- ld8 GR_T1_ptr = [GR_T1_ptr]
- nop.m 999
+ ldfe FR_P_6 = [GR_Table_Ptr1],16 // Load P_6 for EXPL_SMALL path
+;;
+ ldfe FR_P_5 = [GR_Table_Ptr1],16 // Load P_5 for EXPL_SMALL path
nop.i 999
}
;;
-{ .mib
- nop.m 999
- nop.i 999
-//
-// If 14 < Expo_X, set P10
-// Create pointer to T1 table
-//
-(p10) br.cond.spnt L(EXPL_HUGE) ;;
+{ .mfi
+ ldfe FR_P_4 = [GR_Table_Ptr1],16 // Load P_4 for EXPL_SMALL path
+ fma.s1 FR_P_hi = FR_Input_Y, FR_logx_hi,FR_X_lo // logx_hi ix Y_hi
+ nop.i 999
}
-
+;;
{ .mmi
-(p0) addl GR_Table_Ptr = @ltoff(Constants_exp_64_Exponents#), gp
-(p0) addl GR_T2_ptr = @ltoff(Constants_exp_64_T2#), gp
+ ldfe FR_P_3 = [GR_Table_Ptr1],16 // Load P_3 for EXPL_SMALL path
+;;
+ ldfe FR_P_2 = [GR_Table_Ptr1],16 // Load P_2 for EXPL_SMALL path
nop.i 999
}
;;
-{ .mmi
- ld8 GR_Table_Ptr = [GR_Table_Ptr]
- ld8 GR_T2_ptr = [GR_T2_ptr]
+// N = X * Inv_log2_by_2^12
+// By adding 1.10...0*2^63 we shift and get round_int(N_signif) in significand.
+// We actually add 1.10...0*2^51 to X * Inv_log2 to do the same thing.
+{ .mfi
+ ldfe FR_P_1 = [GR_Table_Ptr1] // Load P_1 for EXPL_SMALL path
+ fma.s1 FR_N = FR_X, FR_INV_LN2_2TO63, FR_RSHF_2TO51
nop.i 999
}
+{ .mfb
+ nop.m 999
+ fms.s1 FR_P_lo= FR_Input_Y, FR_logx_hi, FR_P_hi // P_hi is X
+(p6) br.cond.spnt POWL_Y_ALMOST_1 // Branch if |y-1| < 2^-50
+}
;;
-
{ .mmi
-(p0) shladd GR_Table_Ptr = GR_Expo_Range,4,GR_Table_Ptr ;;
-//
-// Adjust T1_ptr by x 4 for single-precision values
-// Adjust T2_ptr by x 4 for single-precision values
-//
-(p0) ld8 GR_Big_Pos_Exp = [GR_Table_Ptr],8
- nop.i 999 ;;
-}
-//
-// Load double W1
-// Load +max exponent
-//
-{ .mfi
-(p0) ld8 GR_Big_Neg_Exp = [GR_Table_Ptr],0
-//
-// If 14 < Expo_X, goto exp_huge
-//
-(p0) fcvt.xf FR_float_N = FR_N
- nop.i 999
+ getf.exp GR_Expo_X = FR_X
+ add GR_T1_ptr = 0x0b0, GR_table_base // Constants_exp_64_T1
+ add GR_T2_ptr = 0x1b0, GR_table_base // Constants_exp_64_T2
}
;;
-//
-// Load double W2
-// Load -max exponent
-// Load ptr to A's
-//
+// float_N = round_int(N)
+// The signficand of N contains the rounded integer part of X * 2^12/ln2,
+// as a twos complement number in the lower bits (that is, it may be negative).
+// That twos complement number (called N) is put into GR_N_fix.
-{ .mmi
-(p0) getf.sig GR_N_fix = FR_N
-(p0) addl GR_Table_Ptr = @ltoff(Constants_exp_64_A#), gp
+// Since N is scaled by 2^51, it must be multiplied by 2^-51
+// before the shift constant 1.10000 * 2^63 is subtracted to yield float_N.
+// Thus, float_N contains the floating point version of N
+
+
+{ .mfi
+ add GR_Table_Ptr = 0x20, GR_table_base // Constants_exp_64_A
+ fms.s1 FR_float_N = FR_N, FR_2TOM51, FR_RSHF // Form float_N
nop.i 999
}
-;;
+// Create low part of Y(ln(x)_hi + ln(x)_lo) as P_lo
+{ .mfi
+ mov GR_Big_Pos_Exp = 0x3ffe // 16382, largest safe exponent
+ fadd.s1 FR_P_lo = FR_P_lo, FR_X_lo
+ mov GR_Big_Neg_Exp = -0x3ffd // -16381 smallest safe exponent
+};;
-{ .mmi
- ld8 GR_Table_Ptr = [GR_Table_Ptr]
+{ .mfi
nop.m 999
- nop.i 999
+ fmpy.s1 FR_rsq = FR_X, FR_X // rsq = X*X for EXPL_SMALL path
+ mov GR_vsm_expo = -70 // Exponent for very small path
+}
+{ .mfi
+ nop.m 999
+ fma.s1 FR_poly_lo = FR_P_6, FR_X, FR_P_5 // poly_lo for EXPL_SMALL path
+ add GR_temp = 0x1,r0 // For tiny signif if small path
}
;;
//
-// Load single T1
-// Load single T2
-// W_1_p1 = W_1 + 1
-//
-{ .mmi
-(p0) ldfe FR_A_3 = [GR_Table_Ptr],16 ;;
-//
-// Load A_3
-// if k > big_pos_exp, set p14 and Safe=False
-//
-(p0) ldfe FR_A_2 = [GR_Table_Ptr],16
-(p0) extr.u GR_M1 = GR_N_fix, 6, 6
-}
-{ .mmi
- nop.m 999 ;;
-(p0) shladd GR_W1_ptr = GR_M1,3,GR_W1_ptr
-//
-// float_N = fcvt.xf(N)
-// N_fix = significand of N
-// Create pointer to T2 table
-//
-(p0) extr.u GR_M2 = GR_N_fix, 0, 6
-}
-//
-// r = r + X_cor
-// Adjust W1_ptr by x 8 for double-precision values
-// Adjust W2_ptr by x 8 for double-precision values
-// Adjust Table_ptr by Expo_Rangex16
+// If expo_X < -6 goto exp_small
//
{ .mmi
-(p0) shladd GR_T1_ptr = GR_M1,2,GR_T1_ptr ;;
-(p0) ldfd FR_W1 = [GR_W1_ptr],0
-(p0) shladd GR_W2_ptr = GR_M2,3,GR_W2_ptr
+ getf.sig GR_N_fix = FR_N
+ ldfe FR_A_3 = [GR_Table_Ptr],16 // Load A_3
+ and GR_Expo_X = GR_Expo_X, GR_exp_mask // Get exponent of X
}
-//
-// Load ptr to A's
-//
+;;
+
{ .mfi
-(p0) ldfs FR_T1 = [GR_T1_ptr],0
-(p0) fnma.s1 FR_r = FR_L_hi, FR_float_N, FR_X
-(p0) shladd GR_T2_ptr = GR_M2,2,GR_T2_ptr ;;
+ ldfe FR_A_2 = [GR_Table_Ptr],16 // Load A_2
+ nop.f 999
+ sub GR_Expo_X = GR_Expo_X, GR_exp_bias // Get true exponent of X
}
-{ .mmi
-(p0) ldfd FR_W2 = [GR_W2_ptr],0
-(p0) ldfs FR_T2 = [GR_T2_ptr],0
+;;
+
//
-// r = x - L_hi * float_N
-// M2 = extr.u(N_fix,0,6)
-// M1 = extr.u(N_fix,6,6)
+// If -6 > Expo_X, set P9 and branch
//
-(p0) extr GR_k = GR_N_fix, 12, 52 ;;
+{ .mfb
+ cmp.gt p9, p0 = -6, GR_Expo_X
+ fnma.s1 FR_r = FR_L_hi, FR_float_N, FR_X // r = X - L_hi * float_N
+(p9) br.cond.spnt EXPL_SMALL // Branch if |X| < 2^-6
}
+;;
+
//
-// Load A_1
-// poly = A_3 * r + A_2
-// rsq = r*r
+// If 14 <= Expo_X, set P10
//
-{ .mii
-(p0) add GR_BIAS_p_k = GR_BIAS, GR_k
-(p0) cmp.gt.unc p14,p15 = GR_k,GR_Big_Pos_Exp ;;
-(p15) cmp.lt p14,p15 = GR_k,GR_Big_Neg_Exp
+{ .mib
+ cmp.le p10, p0 = 14, GR_Expo_X
+ nop.i 999
+(p10) br.cond.spnt EXPL_HUGE // Branch if |X| >= 2^14
}
+;;
+
//
-// BIAS_p_K = BIAS + k
-// T = T1 * T2
+// Load single T1
+// Load single T2
+// W_1_p1 = W_1 + 1
//
-{ .mfi
-(p0) setf.exp FR_Scale = GR_BIAS_p_k
- nop.f 999
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-(p0) fnma.s1 FR_r = FR_L_lo, FR_float_N, FR_r
- nop.i 999
+{ .mmi
+ nop.m 999
+ nop.m 999
+ extr.u GR_M1 = GR_N_fix, 6, 6 // Extract index M_1
}
+;;
+
//
-// W = W_1_p1 * W2 + W1
+// k = extr.u(N_fix,0,6)
//
-{ .mfi
-(p0) ldfe FR_A_1 = [GR_Table_Ptr],16
- nop.f 999
- nop.i 999 ;;
+{ .mmi
+ shladd GR_W1_ptr = GR_M1,3,GR_W1_ptr // Point to W1
+ shladd GR_T1_ptr = GR_M1,2,GR_T1_ptr // Point to T1
+ extr.u GR_M2 = GR_N_fix, 0, 6 // Extract index M_2
}
-{ .mfi
- nop.m 999
-(p0) fadd.s1 FR_W_1_p1 = FR_W1, f1
- nop.i 999 ;;
+;;
+
+// N_fix is only correct up to 50 bits because of our right shift technique.
+// Actually in the normal path we will have restricted K to about 14 bits.
+// Somewhat arbitrarily we extract 32 bits.
+{ .mmi
+ ldfd FR_W1 = [GR_W1_ptr]
+ shladd GR_W2_ptr = GR_M2,3,GR_W2_ptr // Point to W2
+ extr GR_k = GR_N_fix, 12, 32 // Extract k
}
+;;
+
{ .mfi
- nop.m 999
-//
-// k = extr.u(N_fix,0,6)
-// r = r - N * L_lo
-// Load ptr to Table of exponent thresholds.
-//
-(p0) fadd.s1 FR_r = FR_r, FR_X_cor
- nop.i 999
+ ldfs FR_T1 = [GR_T1_ptr]
+ fnma.s1 FR_r = FR_L_lo, FR_float_N, FR_r
+ shladd GR_T2_ptr = GR_M2,2,GR_T2_ptr // Point to T2
}
{ .mfi
- nop.m 999
-(p0) fmpy.s1 FR_T = FR_T1, FR_T2
- nop.i 999 ;;
+ add GR_exp_bias_p_k = GR_exp_bias, GR_k
+ nop.f 999
+ cmp.gt p14,p15 = GR_k,GR_Big_Pos_Exp
}
-{ .mfi
- nop.m 999
+;;
+
//
-// if k < big_neg_exp, set p14 and Safe=False
-// Load A_2
+// if k < big_neg_exp, set p14 and Safe=False
//
-(p0) fma.s1 FR_W = FR_W2, FR_W_1_p1, FR_W1
- nop.i 999 ;;
+{ .mmi
+ ldfs FR_T2 = [GR_T2_ptr]
+(p15) cmp.lt p14,p15 = GR_k,GR_Big_Neg_Exp
+ nop.i 999
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 FR_poly = FR_r, FR_A_3, FR_A_2
- nop.i 999
+;;
+
+{ .mmi
+ setf.exp FR_Scale = GR_exp_bias_p_k
+ ldfd FR_W2 = [GR_W2_ptr]
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fmpy.s1 FR_rsq = FR_r, FR_r
- nop.i 999 ;;
+ ldfe FR_A_1 = [GR_Table_Ptr],16
+ fadd.s1 FR_r = FR_r, FR_X_cor
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) mov FR_Y_hi = FR_T
- nop.i 999 ;;
+ nop.m 999
+ fadd.s1 FR_W_1_p1 = FR_W1, f1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// Scale = set_exp(BIAS_p_k)
-// poly = r * poly + A_1
-//
-(p0) fadd.s1 FR_Wp1 = FR_W, f1
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 FR_poly = FR_r, FR_A_3, FR_A_2
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fma.s1 FR_poly = FR_r, FR_poly, FR_A_1
- nop.i 999 ;;
+ nop.m 999
+ fmpy.s1 FR_rsq = FR_r, FR_r
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fma.s1 FR_poly = FR_rsq, FR_poly,FR_r
- nop.i 999 ;;
+ nop.m 999
+ fmpy.s1 FR_T = FR_T1, FR_T2
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// Wp1 = W + 1
-// poly = rsq * poly + rk
-//
-(p0) fma.s1 FR_Y_lo = FR_Wp1, FR_poly, FR_W
- nop.i 999 ;;
-}
-{ .mfb
- nop.m 999
-//
-// Y_lo = poly * Wp1 + W
-// Y_hi = T
-//
-(p0) fmpy.s1 FR_Y_lo = FR_Y_lo, FR_T
-//
-// Y_lo = T * Y_lo
-//
-(p0) br.cond.sptk L(EXPL_RETURN) ;;
+ nop.m 999
+ fma.s1 FR_W = FR_W2, FR_W_1_p1, FR_W1
+ nop.i 999
}
+;;
-L(EXPL_SMALL):
-
-//
-// r4 = rsq * rsq
-//
-
-{ .mmi
+{ .mfi
nop.m 999
-(p0) addl GR_Table_Ptr1 = @ltoff(Constants_exp_64_P), gp
+ fma.s1 FR_TMP1 = FR_Scale, FR_Sgn, f0
nop.i 999
}
;;
-{ .mmi
- ld8 GR_Table_Ptr1 = [GR_Table_Ptr1]
+{ .mfi
nop.m 999
+ fma.s1 FR_poly = FR_r, FR_poly, FR_A_1
nop.i 999
}
;;
-{ .mmf
- nop.m 999
-(p0) ldfe FR_P_6 = [GR_Table_Ptr1],16
-//
-// Return
-//
-(p0) fadd.s1 FR_r = FR_X,f0 ;;
+{ .mfi
+ nop.m 999
+ fma.s1 FR_TMP2 = FR_T, f1, f0 // TMP2 = Y_hi = T
+ nop.i 999
}
+;;
-{ .mmi
+{ .mfi
nop.m 999
-(p0) addl GR_Table_Ptr = @ltoff(Constants_exp_64_Exponents#), gp
+ fadd.s1 FR_Wp1 = FR_W, f1
nop.i 999
}
;;
-{ .mmi
- ld8 GR_Table_Ptr = [GR_Table_Ptr]
-(p0) ldfe FR_P_5 = [GR_Table_Ptr1],16
+{ .mfi
+ nop.m 999
+ fma.s1 FR_poly = FR_rsq, FR_poly,FR_r
nop.i 999
}
;;
-//
-// Is input very small?
-// Load P_5
-//
-{ .mii
-(p0) ldfe FR_P_4 = [GR_Table_Ptr1],16
-(p0) add GR_Table_Ptr = 0x040,GR_Table_Ptr ;;
-(p0) shladd GR_Table_Ptr = GR_Expo_Range,3,GR_Table_Ptr ;;
-}
-{ .mmb
-(p0) ldfe FR_P_3 = [GR_Table_Ptr1],16
-//
-// Adjust ptr.
-//
-(p0) ld8 GR_vsm_expo = [GR_Table_Ptr],0
- nop.b 999 ;;
-}
{ .mfi
- nop.m 999
-//
-// r = X (don't seem to need X_Cor)
-// Load the threshold exponents
-//
-(p0) fmpy.s1 FR_rsq = FR_r, FR_r
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 FR_Tscale = FR_T, FR_TMP1, f0 // Scale * Sgn * T
+ nop.i 999
}
-//
-// Load the negative integer
-// Load P_5
-//
{ .mfi
-(p0) cmp.lt.unc p12, p0 = GR_Expo_X, GR_vsm_expo
- nop.f 999
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 FR_Y_lo = FR_Wp1, FR_poly, FR_W
+ nop.i 999
}
+;;
+
{ .mfb
- nop.m 999
-//
-// rsq = r * r
-// Offset into exponents
-//
-(p0) fmpy.s1 FR_r4 = FR_rsq, FR_rsq
-(p12) br.cond.spnt L(EXPL_VERY_SMALL) ;;
+ nop.m 999
+ fmpy.s1 FR_TMP3 = FR_Y_lo, FR_Tscale
+ br.cond.sptk POWL_64_SHARED
}
-{ .mfi
-(p0) ldfe FR_P_2 = [GR_Table_Ptr1],16
-//
-// Load p4,p3,p2,p1
-//
-(p0) fma.s1 FR_poly_lo = FR_P_6, FR_r, FR_P_5
+;;
+
+
+EXPL_SMALL:
+// Here if |ylogx| < 2^-6
//
-// Y_lo = r4 * poly_lo + poly_hi
-// Scale = 1.0
+// Begin creating lsb to perturb final result
//
-(p0) add GR_temp = 0x1,r0 ;;
+{ .mfi
+ setf.sig FR_temp = GR_temp
+ fma.s1 FR_poly_lo = FR_poly_lo, FR_X, FR_P_4
+ cmp.lt p12, p0 = GR_Expo_X, GR_vsm_expo // Test |ylogx| < 2^-70
}
-{ .mmf
- nop.m 999
-(p0) ldfe FR_P_1 = [GR_Table_Ptr1],0
-(p0) mov FR_Scale = f1
+{ .mfi
+ nop.m 999
+ fma.s1 FR_poly_hi = FR_P_2, FR_X, FR_P_1
+ nop.i 999
}
-//
-// Begin creating lsb to perturb final result
-//
+;;
+
{ .mfi
-(p0) setf.sig FR_temp = GR_temp
-(p0) mov FR_Y_hi = f1
- nop.i 999 ;;
+ nop.m 999
+ fmpy.s1 FR_TMP2 = f1, f1
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// poly_lo = p_5 + p_6 * r
-// poly_hi = p_1 + p_2 * r
-//
-(p0) fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_P_4
- nop.i 999 ;;
+ nop.m 999
+ fmpy.s1 FR_TMP1 = FR_Sgn, f1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// poly_lo = p_4 + poly_lo * r
-// poly_hi = r + poly_hi * rsq
-//
-(p0) fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_P_3
- nop.i 999
+ nop.m 999
+ fmpy.s1 FR_r4 = FR_rsq, FR_rsq
+(p12) cmp.eq p15, p0 = r0, r0 // Set safe if |ylogx| < 2^-70
}
+{ .mfb
+ nop.m 999
+(p12) fmpy.s1 FR_TMP3 = FR_Sgn, FR_X
+(p12) br.cond.spnt POWL_64_SHARED // Branch if |ylogx| < 2^-70
+}
+;;
+
{ .mfi
- nop.m 999
-(p0) fma.s1 FR_poly_hi = FR_P_2, FR_r, FR_P_1
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 FR_poly_lo = FR_poly_lo, FR_X, FR_P_3
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, FR_r
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, FR_X
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// poly_lo = p_3 + poly_lo * r
-// Y_hi = 1, always
-//
-(p0) fma.s1 FR_Y_lo = FR_poly_lo, FR_r4, FR_poly_hi
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 FR_Y_lo = FR_poly_lo, FR_r4, FR_poly_hi
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// Set lsb in fp register
-//
-(p0) for FR_temp = FR_Y_lo,FR_temp
- nop.i 999 ;;
+ nop.m 999
+ fmpy.s1 FR_TMP3 = FR_Y_lo, FR_TMP1 // Add sign info
+ nop.i 999
}
-{ .mfb
- nop.m 999
+;;
+
//
// Toggle on last bit of Y_lo
-//
-(p0) fmerge.se FR_Y_lo = FR_Y_lo,FR_temp
-//
// Set lsb of Y_lo to 1
//
-(p0) br.cond.sptk L(EXPL_RETURN) ;;
-}
-L(EXPL_VERY_SMALL):
{ .mfi
- nop.m 999
-(p0) mov FR_Y_lo = FR_r
-(p0) cmp.eq.unc p15, p0 = r0, r0
+ nop.m 999
+ for FR_temp = FR_Y_lo,FR_temp
+ nop.i 999
}
-{ .mfi
- nop.m 999
-(p0) mov FR_Scale = f1
- nop.i 999
-};;
+;;
+
{ .mfb
- nop.m 999
-(p0) mov FR_Y_hi = f1
-//
-// If flag_not_1,
-// Y_hi = 1.0
-// Y_lo = X + X_cor
-// PR_Safe = true
-//
-(p0) br.cond.sptk L(EXPL_RETURN) ;;
+ nop.m 999
+ fmerge.se FR_TMP3 = FR_TMP3,FR_temp
+ br.cond.sptk POWL_64_SHARED
}
-L(EXPL_HUGE):
+;;
+
+
+EXPL_HUGE:
+// Here if |ylogx| >= 2^14
{ .mfi
- nop.m 999
-//
-// Return for flag=2
-//
-(p0) fcmp.gt.unc.s1 p12, p13 = FR_X, f0
-(p0) cmp.eq.unc p14, p15 = r0, r0 ;;
+ mov GR_temp = 0x0A1DC // If X < 0, exponent -24100
+ fcmp.gt.s1 p12, p13 = FR_X, f0 // Test X > 0
+ cmp.eq p14, p15 = r0, r0 // Set Safe to false
}
-{ .mlx
- nop.m 999
-//
-// Set Safe to false
-// Is x > 0
-//
-(p12) movl GR_Mask = 0x15DC0 ;;
-}
-{ .mlx
-(p12) setf.exp FR_Y_hi = GR_Mask
-(p13) movl GR_Mask = 0xA240 ;;
+;;
+
+{ .mmi
+(p12) mov GR_Mask = 0x15DC0 // If X > 0, exponent +24000
+(p13) mov GR_Mask = 0x0A240 // If X < 0, exponent -24000
+ nop.i 999
}
-{ .mlx
-(p13) setf.exp FR_Y_hi = GR_Mask
-//
-// x > 0: Create mask for Y_hi = 2**(24,000)
-// x <= 0: Create mask for Y_hi = 2**(-24,000)
-//
-(p13) movl GR_temp = 0xA1DC ;;
+;;
+
+{ .mmf
+ setf.exp FR_TMP2 = GR_Mask // Form Y_hi = TMP2
+(p13) setf.exp FR_Y_lo = GR_temp // If X < 0, Y_lo = 2^-24100
+(p12) mov FR_Y_lo = f1 // IF X > 0, Y_lo = 1.0
}
+;;
+
{ .mfi
-(p13) setf.exp FR_Y_lo = GR_temp
-//
-// x < =0: Create mask for 2**(-24,100)
-// x <= 0: Y_lo = w**(-24,100)
-//
-(p12) mov FR_Y_lo = f1
- nop.i 999 ;;
+ nop.m 999
+ fmpy.s1 FR_TMP1 = FR_TMP2, FR_Sgn // TMP1 = Y_hi * Sgn
+ nop.i 999
}
-{ .mfi
- nop.m 999
-(p12) mov FR_Scale = FR_Y_hi
- nop.i 999 ;;
+;;
+
+{ .mfb
+ nop.m 999
+ fmpy.s1 FR_TMP3 = FR_Y_lo,FR_TMP1 // TMP3 = Y_lo * (Y_hi * Sgn)
+ br.cond.sptk POWL_64_SHARED
}
-{ .mfi
- nop.m 999
+;;
+
+POWL_Y_ALMOST_1:
+// Here if delta = |y-1| < 2^-50
//
-// x > 0: Y_lo = 1.0
-// x > 0: Scale = 2**(24,000)
+// x**(1 + delta) = x * e (ln(x)*delta) = x ( 1 + ln(x) * delta)
//
-(p13) mov FR_Scale = FR_Y_hi
- nop.i 999 ;;
-}
-L(EXPL_RETURN):
+// Computation will be safe for 2^-16381 <= x < 2^16383
+
{ .mfi
- nop.m 999
-//
-// Scale = 2**(24,000)
-//
-//
-// exp(y *ln(x)) almost complete
-// FR_Scale is Scale
-// f34 is Z_hi
-// f35 is Z_lo
-//
-(p0) fmpy.s1 FR_Sgn = FR_Scale, FR_Sgn
- nop.i 999 ;;
+ mov GR_exp_ynear1_oflow = 0xffff + 16383
+ fma.s1 FR_TMP1 = FR_Input_X,FR_Delta,f0
+ and GR_exp_x = GR_exp_mask, GR_signexp_x
}
+;;
+
{ .mfi
- nop.m 999
-//
-// sgn * scale
-//
-(p0) fmpy.s1 FR_Y_lo = FR_Y_lo,FR_Sgn
- nop.i 999 ;;
+ cmp.lt p15, p14 = GR_exp_x, GR_exp_ynear1_oflow
+ fma.s1 FR_TMP2 = FR_logx_hi,f1,FR_X_lo
+ mov GR_exp_ynear1_uflow = 0xffff - 16381
}
+;;
+
{ .mfb
- nop.m 999
-//
-// Z_lo * (sgn * scale)
+(p15) cmp.ge p15, p14 = GR_exp_x, GR_exp_ynear1_uflow
+ fma.s1 FR_TMP3 = FR_Input_X,f1,f0
+ br.cond.sptk POWL_64_SHARED
+};;
+
+POWL_64_SQUARE:
//
-(p0) fma.s0 FR_Result = FR_Y_hi, FR_Sgn, FR_Y_lo
+// Here if x not zero and y=2.
//
-// Z_hi * (sgn * scale) + Z_lo
+// Setup for multipath code
//
-(p15) br.cond.sptk L(POWL_64_RETURN) ;;
-}
{ .mfi
- nop.m 999
-(p0) fsetc.s3 0x7F,0x01
- nop.i 999
-}
-{ .mlx
- nop.m 999
-//
-// Z_hi * (sgn * scale) + Z_lo with wre & td
-// Z_hi * (sgn * scale) + Z_lo with fz & td
-//
-(p0) movl GR_T1_ptr = 0x00000000013FFF ;;
+ mov GR_exp_square_oflow = 0xffff + 8192 // Exponent where x*x overflows
+ fmerge.se FR_TMP1 = FR_Input_X, FR_Input_X
+ and GR_exp_x = GR_exp_mask, GR_signexp_x // Get exponent of x
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fma.s3 FR_Result_small = FR_Y_hi, FR_Sgn, FR_Y_lo
- nop.i 999
+ cmp.lt p15, p14 = GR_exp_x, GR_exp_square_oflow // Decide safe/unsafe
+ fmerge.se FR_TMP2 = FR_Input_X, FR_Input_X
+ mov GR_exp_square_uflow = 0xffff - 8191 // Exponent where x*x underflows
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fsetc.s3 0x7F,0x40
- nop.i 999 ;;
+(p15) cmp.ge p15, p14 = GR_exp_x, GR_exp_square_uflow // Decide safe/unsafe
+ fma.s1 FR_TMP3 = f0,f0,f0
+ nop.i 999
}
-{ .mfi
- nop.m 999
+;;
+
//
-// Return if no danger of over of underflow.
+// This is the shared path that will set overflow and underflow.
//
-(p0) fsetc.s2 0x7F,0x42
- nop.i 999;;
-}
-{ .mfi
- nop.m 999
+POWL_64_SHARED:
+
//
-// S0 user supplied status
-// S2 user supplied status + WRE + TD (Overflows)
-// S3 user supplied status + FZ + TD (Underflows)
+// Return if no danger of over or underflow.
//
-(p0) fma.s2 FR_Result_big = FR_Y_hi, FR_Sgn, FR_Y_lo
- nop.i 999 ;;
+{ .mfb
+ nop.m 999
+ fma.s0 FR_Result = FR_TMP1, FR_TMP2, FR_TMP3
+(p15) br.ret.sptk b0 // Main path return if certain no over/underflow
}
+;;
+
//
-// S0 user supplied status
-// S2 user supplied status + WRE + TD (Overflows)
-// S3 user supplied status + FZ + TD (Underflows)
+// S0 user supplied status
+// S2 user supplied status + WRE + TD (Overflows)
+// S2 user supplied status + FZ + TD (Underflows)
//
//
// If (Safe) is true, then
@@ -2430,973 +2021,741 @@ L(EXPL_RETURN):
// No overflow or underflow here, but perhaps inexact.
// Return
// Else
-// Determine if overflow or underflow was raised.
-// Fetch +/- overflow threshold for IEEE single, double,
-// double extended
-//
-{ .mfi
-(p0) setf.exp FR_Big = GR_T1_ptr
-(p0) fsetc.s2 0x7F,0x40
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-(p0) fclass.m.unc p11, p0 = FR_Result_small, 0x00F
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-(p0) fmerge.ns FR_NBig = FR_Big, FR_Big
- nop.i 999
-}
-{ .mfi
- nop.m 999
-//
-// Create largest double exponent + 1.
-// Create smallest double exponent - 1.
-// Identify denormals
-//
-(p0) fcmp.ge.unc.s1 p8, p0 = FR_Result_big , FR_Big
- nop.i 999 ;;
-}
-{ .mii
- nop.m 999
- nop.i 999 ;;
-//
-// fcmp: resultS2 <= - overflow threshold
-// fclass: resultS3 is denorm/unorm/0
-//
-(p8) mov GR_Parameter_TAG = 18 ;;
-}
-{ .mfb
- nop.m 999
-//
-// fcmp: resultS2 >= + overflow threshold
-//
-(p0) fcmp.le.unc.s1 p9, p0 = FR_Result_big, FR_NBig
-(p8) br.cond.spnt __libm_error_region ;;
-}
-{ .mii
- nop.m 999
- nop.i 999 ;;
-(p9) mov GR_Parameter_TAG = 18
-}
-{ .mib
- nop.m 999
- nop.i 999
-(p9) br.cond.spnt __libm_error_region ;;
-}
-//
-// Report that pow overflowed - either +Inf, or -Inf
-//
-{ .mmb
-(p11) mov GR_Parameter_TAG = 19
- nop.m 999
-(p11) br.cond.spnt __libm_error_region ;;
-}
-{ .mib
- nop.m 999
- nop.i 999
-//
-// Report that pow underflowed
-//
-(p0) br.cond.sptk L(POWL_64_RETURN) ;;
-}
-
+// Determine if overflow or underflow was raised.
+// Fetch +/- overflow threshold for IEEE double extended
-L(POWL_64_SQUARE):
-// Here if x not zero and y=2.
-// Must call __libm_error_support for overflow or underflow
-//
-// S0 user supplied status
-// S2 user supplied status + WRE + TD (Overflows)
-// S3 user supplied status + FZ + TD (Underflows)
-//
{ .mfi
- nop.m 999
-(p0) fma.s0 FR_Result = FR_Input_X, FR_Input_X, f0
- nop.i 999
-}
-{ .mfi
- nop.m 999
-(p0) fsetc.s3 0x7F,0x01
- nop.i 999
-}
-{ .mlx
- nop.m 999
-(p0) movl GR_T1_ptr = 0x00000000013FFF ;;
-}
-{ .mfi
- nop.m 999
-(p0) fma.s3 FR_Result_small = FR_Input_X, FR_Input_X, f0
- nop.i 999
-}
-{ .mfi
- nop.m 999
-(p0) fsetc.s3 0x7F,0x40
- nop.i 999 ;;
+ nop.m 999
+ fsetc.s2 0x7F,0x41 // For underflow test, set S2=User+TD+FTZ
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// Return if no danger of over of underflow.
-//
-(p0) fsetc.s2 0x7F,0x42
- nop.i 999;;
+ nop.m 999
+ fma.s2 FR_Result_small = FR_TMP1, FR_TMP2, FR_TMP3
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fma.s2 FR_Result_big = FR_Input_X, FR_Input_X, f0
- nop.i 999 ;;
+ nop.m 999
+ fsetc.s2 0x7F,0x42 // For overflow test, set S2=User+TD+WRE
+ nop.i 999
}
-//
-// S0 user supplied status
-// S2 user supplied status + WRE + TD (Overflows)
-// S3 user supplied status + FZ + TD (Underflows)
-//
-//
-// If (Safe) is true, then
-// Compute result using user supplied status field.
-// No overflow or underflow here, but perhaps inexact.
-// Return
-// Else
-// Determine if overflow or underflow was raised.
-// Fetch +/- overflow threshold for IEEE single, double,
-// double extended
-//
+;;
+
{ .mfi
-(p0) setf.exp FR_Big = GR_T1_ptr
-(p0) fsetc.s2 0x7F,0x40
- nop.i 999 ;;
+ nop.m 999
+ fma.s2 FR_Result_big = FR_TMP1, FR_TMP2,FR_TMP3
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fclass.m.unc p11, p0 = FR_Result_small, 0x00F
- nop.i 999 ;;
+ nop.m 999
+ fsetc.s2 0x7F,0x40 // Reset S2=User
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fmerge.ns FR_NBig = FR_Big, FR_Big
- nop.i 999
+ nop.m 999
+ fclass.m p11, p0 = FR_Result_small, 0x00F // Test small result unorm/zero
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// Create largest double exponent + 1.
-// Create smallest double exponent - 1.
-// Identify denormals
-//
-(p0) fcmp.ge.unc.s1 p8, p0 = FR_Result_big , FR_Big
- nop.i 999 ;;
-}
-{ .mii
- nop.m 999
- nop.i 999 ;;
-//
-// fcmp: resultS2 <= - overflow threshold
-// fclass: resultS3 is denorm/unorm/0
-//
-(p8) mov GR_Parameter_TAG = 18 ;;
+ nop.m 999
+ fcmp.ge.s1 p8, p0 = FR_Result_big , FR_Big // Test >= + oflow threshold
+ nop.i 999
}
+;;
+
{ .mfb
- nop.m 999
-//
-// fcmp: resultS2 >= + overflow threshold
-//
-(p0) fcmp.le.unc.s1 p9, p0 = FR_Result_big, FR_NBig
-(p8) br.cond.spnt __libm_error_region ;;
-}
-{ .mii
- nop.m 999
- nop.i 999 ;;
-(p9) mov GR_Parameter_TAG = 18
-}
-{ .mib
- nop.m 999
- nop.i 999
-(p9) br.cond.spnt __libm_error_region ;;
-}
-//
-// Report that pow overflowed - either +Inf, or -Inf
-//
-{ .mmb
-(p11) mov GR_Parameter_TAG = 19
- nop.m 999
-(p11) br.cond.spnt __libm_error_region ;;
-}
-{ .mib
- nop.m 999
- nop.i 999
-//
-// Report that pow underflowed
-//
-(p0) br.cond.sptk L(POWL_64_RETURN) ;;
+(p11) mov GR_Parameter_TAG = 19 // Set tag for underflow
+ fcmp.le.s1 p9, p0 = FR_Result_big, FR_NBig // Test <= - oflow threshold
+(p11) br.cond.spnt __libm_error_region // Branch if pow underflowed
}
+;;
+{ .mfb
+(p8) mov GR_Parameter_TAG = 18 // Set tag for overflow
+ nop.f 999
+(p8) br.cond.spnt __libm_error_region // Branch if pow +overflow
+}
+;;
+{ .mbb
+(p9) mov GR_Parameter_TAG = 18 // Set tag for overflow
+(p9) br.cond.spnt __libm_error_region // Branch if pow -overflow
+ br.ret.sptk b0 // Branch if result really ok
+}
+;;
-L(POWL_64_SPECIAL):
+POWL_64_SPECIAL:
+// Here if x or y is NatVal, nan, inf, or zero
{ .mfi
- nop.m 999
-(p0) fcmp.eq.s1 p15, p0 = FR_Input_X, f1 // Is x=+1
- nop.i 999 ;;
+ nop.m 999
+ fcmp.eq.s1 p15, p0 = FR_Input_X, f1 // Test x=+1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fclass.m.unc p14, p0 = FR_Input_Y, 0x023
- nop.i 999 ;;
+ nop.m 999
+ fclass.m p8, p0 = FR_Input_X, 0x143 // Test x natval, snan
+ nop.i 999
}
+;;
{ .mfi
- nop.m 999
-(p15) fcmp.eq.unc.s0 p6,p0 = FR_Input_Y, f0 // If x=1, flag invalid if y=SNaN
- nop.i 999
+ nop.m 999
+(p15) fcmp.eq.unc.s0 p6,p0 = FR_Input_Y, f0 // If x=1, flag invalid if y=SNaN
+ nop.i 999
}
{ .mfb
- nop.m 999
-(p15) fmpy.s0 FR_Result = f1,f1 // If x=1, result=1
-(p15) br.cond.spnt L(POWL_64_RETURN) ;; // Exit if x=1
+ nop.m 999
+(p15) fmpy.s0 FR_Result = f1,f1 // If x=1, result=1
+(p15) br.ret.spnt b0 // Exit if x=1
}
+;;
{ .mfi
- nop.m 999
-(p0) fclass.m.unc p13, p0 = FR_Input_X, 0x023
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-(p0) fclass.m.unc p8, p0 = FR_Input_X, 0x143
- nop.i 999
+ nop.m 999
+ fclass.m p6, p0 = FR_Input_Y, 0x007 // Test y zero
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fclass.m.unc p9, p0 = FR_Input_Y, 0x143
- nop.i 999 ;;
+ nop.m 999
+ fclass.m p9, p0 = FR_Input_Y, 0x143 // Test y natval, snan
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fclass.m.unc p10, p0 = FR_Input_X, 0x083
- nop.i 999
+ nop.m 999
+ fclass.m p10, p0 = FR_Input_X, 0x083 // Test x qnan
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fclass.m.unc p11, p0 = FR_Input_Y, 0x083
- nop.i 999 ;;
+ nop.m 999
+(p8) fmpy.s0 FR_Result = FR_Input_Y, FR_Input_X // If x=snan, result=qnan
+(p6) cmp.ne p8,p0 = r0,r0 // Don't exit if x=snan, y=0 ==> result=+1
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fclass.m.unc p6, p0 = FR_Input_Y, 0x007
- nop.i 999
+ nop.m 999
+(p6) fclass.m.unc p15, p0 = FR_Input_X,0x007 // Test x=0, y=0
+ nop.i 999
}
-{ .mfi
- nop.m 999
-(p0) fcmp.eq.unc.s1 p7, p0 = FR_Input_Y, f1
- nop.i 999 ;;
+{ .mfb
+ nop.m 999
+(p9) fmpy.s0 FR_Result = FR_Input_Y, FR_Input_X // If y=snan, result=qnan
+(p8) br.ret.spnt b0 // Exit if x=snan, y not 0,
+ // result=qnan
}
+;;
+
{ .mfi
- nop.m 999
-//
-// set p13 if x +/- Inf
-// set p14 if y +/- Inf
-// set p8 if x Natval or +/-SNaN
-// set p9 if y Natval or +/-SNaN
-// set p10 if x QNaN
-// set p11 if y QNaNs
-// set p6 if y is +/-0
-// set p7 if y is 1
-//
-(p8) fmpy.s0 FR_Result = FR_Input_Y, FR_Input_X
-(p6) cmp.ne p8,p0 = r0,r0 ;; // Don't exit if x=snan, y=0 ==> result=+1
-}
-{ .mfb
- nop.m 999
-(p9) fmpy.s0 FR_Result = FR_Input_Y, FR_Input_X
-(p8) br.cond.spnt L(POWL_64_RETURN) ;;
+ nop.m 999
+ fcmp.eq.s1 p7, p0 = FR_Input_Y, f1 // Test y +1.0
+ nop.i 999
}
{ .mfb
- nop.m 999
-(p10) fmpy.s0 FR_Result = FR_Input_X, f0
-(p9) br.cond.spnt L(POWL_64_RETURN) ;;
-}
-{ .mfi
- nop.m 999
-//
-// Produce result for SNaN and NatVals and return
-//
-(p6) fclass.m.unc p15, p0 = FR_Input_X,0x007
- nop.i 999
+ nop.m 999
+(p10) fmpy.s0 FR_Result = FR_Input_X, f0 // If x=qnan, result=qnan
+(p9) br.ret.spnt b0 // Exit if y=snan, result=qnan
}
+;;
+
{ .mfi
- nop.m 999
-//
-// If Y +/- 0, set p15 if x +/- 0
-//
-(p6) fclass.m.unc p8, p0 = FR_Input_X,0x0C3
- nop.i 999 ;;
+ nop.m 999
+(p6) fclass.m.unc p8, p0 = FR_Input_X,0x0C3 // Test x=nan, y=0
+ nop.i 999
}
+;;
{ .mfi
- nop.m 999
-(p6) fcmp.eq.s0 p9,p0 = FR_Input_X, f0 // If y=0, flag if x denormal
- nop.i 999
+ nop.m 999
+(p6) fcmp.eq.s0 p9,p0 = FR_Input_X, f0 // If y=0, flag if x denormal
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p6) fadd.s0 FR_Result = f1, f0
- nop.i 999 ;;
+ nop.m 999
+(p6) fadd.s0 FR_Result = f1, f0 // If y=0, result=1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// Set p8 if y = +/-0 and X is a QNaN/SNaN
-// If y = +/-0, let result = 1.0
-//
-(p7) fmpy.s0 FR_Result = FR_Input_X,f1
-//
-// If y == 1, result = x * 1
-//
-(p15) mov GR_Parameter_TAG = 20
-}
-{ .mib
- nop.m 999
- nop.i 999
-(p15) br.cond.spnt __libm_error_region ;;
-}
-{ .mib
- nop.m 999
-//
-// If x and y are both zero, result = 1.0 and call error
-// support.
-//
-(p8) mov GR_Parameter_TAG = 23
-(p8) br.cond.spnt __libm_error_region ;;
+ nop.m 999
+ fclass.m p11, p0 = FR_Input_Y, 0x083 // Test y qnan
+ nop.i 999
}
-{ .mib
- nop.m 999
- nop.i 999
-//
-// If y = +/-0 and x is a QNaN, result = 1.0 and call error
-// support.
-//
-(p6) br.cond.spnt L(POWL_64_RETURN) ;;
+{ .mfb
+(p15) mov GR_Parameter_TAG = 20 // Error tag for x=0, y=0
+(p7) fmpy.s0 FR_Result = FR_Input_X,f1 // If y=1, result=x
+(p15) br.cond.spnt __libm_error_region // Branch if x=0, y=0, result=1
}
+;;
-// If x=0, y=-inf, go to the X_IS_ZERO path
{ .mfb
- nop.m 999
-(p14) fcmp.eq.unc.s1 p0,p14 = FR_Input_X,f0
-(p7) br.cond.spnt L(POWL_64_RETURN) ;;
+(p8) mov GR_Parameter_TAG = 23 // Error tag for x=nan, y=0
+ fclass.m p14, p0 = FR_Input_Y, 0x023 // Test y inf
+(p8) br.cond.spnt __libm_error_region // Branch if x=snan, y=0,
+ // result=1
}
+;;
-{ .mfi
- nop.m 999
-//
-// Produce all results for x**0 and x**1
-// Let all the result x ** 0 == 1 and return
-// Let all x ** 1 == x and return
-//
-(p10) fmpy.s0 FR_Result = FR_Input_Y,FR_Input_X
- nop.i 999 ;;
-}
{ .mfb
- nop.m 999
-(p11) fmpy.s0 FR_Result = FR_Input_Y,FR_Input_X
-(p10) br.cond.spnt L(POWL_64_RETURN) ;;
-}
-{ .mib
- nop.m 999
- nop.i 999
-(p11) br.cond.spnt L(POWL_64_RETURN) ;;
-}
-{ .mib
- nop.m 999
- nop.i 999
-//
-// Return result for x or y QNaN input with QNaN result
-//
-(p14) br.cond.spnt L(POWL_64_Y_IS_INF) ;;
+ nop.m 999
+ fclass.m p13, p0 = FR_Input_X, 0x023 // Test x inf
+(p6) br.ret.spnt b0 // Exit y=0, x not nan or 0,
+ // result=1
}
-{ .mib
- nop.m 999
- nop.i 999
-(p13) br.cond.spnt L(POWL_64_X_IS_INF) ;;
+;;
+
+{ .mfb
+ nop.m 999
+(p14) fcmp.eq.unc.s1 p0,p14 = FR_Input_X,f0 // Test x not 0, y=inf
+(p7) br.ret.spnt b0 // Exit y=1, x not snan,
+ // result=x
}
-L(POWL_64_X_IS_ZERO):
-{ .mmb
-(p0) getf.sig GR_signif_y = FR_Input_Y
-(p0) getf.exp GR_BIASed_exp_y = FR_Input_Y
- nop.b 999 ;;
+;;
+
+{ .mfb
+ nop.m 999
+(p10) fmpy.s0 FR_Result = FR_Input_Y,FR_Input_X // If x=qnan, y not snan,
+ // result=qnan
+(p10) br.ret.spnt b0 // Exit x=qnan, y not snan,
+ // result=qnan
}
-{ .mlx
- nop.m 999
-(p0) movl GR_Mask = 0x1FFFF
+;;
+
+{ .mfb
+ nop.m 999
+(p11) fmpy.s0 FR_Result = FR_Input_Y,FR_Input_X // If y=qnan, x not nan or 1,
+ // result=qnan
+(p11) br.ret.spnt b0 // Exit y=qnan, x not nan or 1,
+ // result=qnan
}
-{ .mlx
- nop.m 999
-(p0) movl GR_y_sign = 0x20000 ;;
+;;
+
+{ .mbb
+ nop.m 999
+(p14) br.cond.spnt POWL_64_Y_IS_INF // Branch if y=inf, x not 1 or nan
+(p13) br.cond.spnt POWL_64_X_IS_INF // Branch if x=inf, y not 1 or nan
}
-//
-// Get BIASed exp and significand of y
+;;
+
+
+POWL_64_X_IS_ZERO:
+// Here if x=0, y not nan or 1 or inf or 0
+
+// There is logic starting here to determine if y is an integer when x = 0.
+// If 0 < |y| < 1 then clearly y is not an integer.
+// If |y| > 1, then the significand of y is shifted left by the size of
+// the exponent of y. This preserves the lsb of the integer part + the
+// fractional bits. The lsb of the integer can be tested to determine if
+// the integer is even or odd. The fractional bits can be tested. If zero,
+// then y is an integer.
//
{ .mfi
-(p0) and GR_exp_y = GR_Mask,GR_BIASed_exp_y
- nop.f 999
-(p0) and GR_y_sign = GR_y_sign,GR_BIASed_exp_y
-}
-{ .mlx
- nop.m 999
-(p0) movl GR_BIAS = 0xFFFF ;;
+ and GR_exp_y = GR_exp_mask,GR_signexp_y // Get biased exponent of y
+ nop.f 999
+ and GR_y_sign = GR_sign_mask,GR_signexp_y // Get sign of y
}
-{ .mfi
-(p0) cmp.lt.unc p9, p8 = GR_exp_y,GR_BIAS
- nop.f 999
+;;
+
//
// Maybe y is < 1 already, so
// can never be an integer.
-// Remove sign bit from exponent.
-//
-(p0) sub GR_exp_y = GR_exp_y,GR_BIAS ;;
-}
-{ .mii
- nop.m 999
- nop.i 999 ;;
-//
-// Remove exponent BIAS
//
-(p8) shl GR_exp_y= GR_signif_y,GR_exp_y ;;
-}
{ .mfi
-(p9) or GR_exp_y= 0xF,GR_signif_y
- nop.f 999
- nop.i 999 ;;
+ cmp.lt p9, p8 = GR_exp_y,GR_exp_bias // Test 0 < |y| < 1
+ nop.f 999
+ sub GR_exp_y = GR_exp_y,GR_exp_bias // Get true exponent of y
}
-{ .mii
- nop.m 999
+;;
+
//
// Shift significand of y looking for nonzero bits
// For y > 1, shift signif_y exp_y bits to the left
-// For y < 1, turn on 4 low order bits of significand of y
+// For y < 1, turn on 4 low order bits of significand of y
// so that the fraction will always be non-zero
//
-(p0) shl GR_signif_y= GR_exp_y,1 ;;
-(p0) extr.u GR_low_order_bit = GR_exp_y,63,1
+{ .mmi
+(p9) or GR_exp_y= 0xF,GR_signif_y // Force nonzero fraction if y<1
+;;
+ nop.m 999
+(p8) shl GR_exp_y= GR_signif_y,GR_exp_y // Get lsb of int + fraction
+ // Wait 4 cycles to use result
+}
+;;
+
+{ .mmi
+ nop.m 999
+;;
+ nop.m 999
+ nop.i 999
+}
+;;
+
+{ .mmi
+ nop.m 999
+;;
+ nop.m 999
+ shl GR_fraction_y= GR_exp_y,1 // Shift left 1 to get fraction
}
+;;
+
//
// Integer part of y shifted off.
// Get y's low even or odd bit - y might not be an int.
//
{ .mii
-(p0) cmp.eq.unc p13,p0 = GR_signif_y, r0
-(p0) cmp.eq.unc p8,p9 = GR_y_sign, r0 ;;
-//
-// Is y an int?
-// Is y positive
-//
-(p13) cmp.ne.unc p13,p0 = GR_low_order_bit, r0 ;;
+ cmp.eq p13,p0 = GR_fraction_y, r0 // Test for y integer
+ cmp.eq p8,p0 = GR_y_sign, r0 // Test for y > 0
+;;
+(p13) tbit.nz.unc p13,p0 = GR_exp_y, 63 // Test if y an odd integer
}
+;;
+
+{ .mfi
+(p13) cmp.eq.unc p13,p14 = GR_y_sign, r0 // Test y pos odd integer
+(p8) fcmp.eq.s0 p12,p0 = FR_Input_Y, f0 // If x=0 and y>0 flag if y denormal
+ nop.i 999
+}
+;;
+
//
-// Is y and int and odd?
+// Return +/-0 when x=+/-0 and y is positive odd integer
//
{ .mfb
-(p13) cmp.eq.unc p13,p14 = GR_y_sign, r0
-(p8) fcmp.eq.s0 p12,p0 = FR_Input_Y, f0 // If x=0 and y>0 flag if y denormal
- nop.b 999 ;;
+ nop.m 999
+(p13) mov FR_Result = FR_Input_X // If x=0, y pos odd int, result=x
+(p13) br.ret.spnt b0 // Exit x=0, y pos odd int, result=x
}
-{ .mfb
- nop.m 999
+;;
+
//
-// Is y and int and odd and positive?
+// Return +/-inf when x=+/-0 and y is negative odd int
//
-(p13) mov FR_Result = FR_Input_X
-(p13) br.cond.sptk L(POWL_64_RETURN) ;;
+{ .mfb
+(p14) mov GR_Parameter_TAG = 21
+(p14) frcpa.s0 FR_Result, p0 = f1, FR_Input_X // Result +-inf, set Z flag
+(p14) br.cond.spnt __libm_error_region
}
-{ .mfi
- nop.m 999
+;;
+
//
-// Return +/-0 when x=+/-0 and y is and odd pos. int
+// Return +0 when x=+/-0 and y positive and not an odd integer
//
-(p14) frcpa.s0 FR_Result, p10 = f1, FR_Input_X
-(p14) mov GR_Parameter_TAG = 21
-}
-{ .mib
- nop.m 999
- nop.i 999
-(p14) br.cond.spnt __libm_error_region ;;
+{ .mfb
+ nop.m 999
+(p8) mov FR_Result = f0 // If x=0, y>0 and not odd integer, result=+0
+(p8) br.ret.sptk b0 // Exit x=0, y>0 and not odd integer, result=+0
}
+;;
-{ .mfb
- nop.m 999
//
-// Return +/-0 when x=+/-Inf and y is and odd neg int
-// and raise dz exception
+// Return +inf when x=+/-0 and y is negative and not odd int
//
-(p8) mov FR_Result = f0
-(p8) br.cond.sptk L(POWL_64_RETURN) ;;
+{ .mfb
+ mov GR_Parameter_TAG = 21
+ frcpa.s0 FR_Result, p10 = f1,f0 // Result +inf, raise Z flag
+ br.cond.sptk __libm_error_region
}
-{ .mfi
- nop.m 999
+;;
+
+
+POWL_64_X_IS_INF:
//
-// Return +0 when x=+/-0 and y > 0 and not odd.
+// Here if x=inf, y not 1 or nan
//
-(p9) frcpa.s0 FR_Result, p10 = f1,f0
-(p9) mov GR_Parameter_TAG = 21
-}
-{ .mib
- nop.m 999
- nop.i 999
-(p9) br.cond.sptk __libm_error_region ;;
-}
-L(POWL_64_X_IS_INF):
{ .mfi
-(p0) getf.exp GR_exp_y = FR_Input_Y
-(p0) fclass.m.unc p13, p0 = FR_Input_X,0x022
-(p0) mov GR_Mask = 0x1FFFF ;;
+ and GR_exp_y = GR_exp_mask,GR_signexp_y // Get biased exponent y
+ fclass.m p13, p0 = FR_Input_X,0x022 // Test x=-inf
+ nop.i 999
}
+;;
{ .mfi
-(p0) getf.sig GR_signif_y = FR_Input_Y
-(p0) fcmp.eq.s0 p9,p0 = FR_Input_Y, f0 // Flag if y denormal
- nop.i 999 ;;
+ and GR_y_sign = GR_sign_mask,GR_signexp_y // Get sign of y
+ fcmp.eq.s0 p9,p0 = FR_Input_Y, f0 // Dummy to set flag if y denorm
+ nop.i 999
}
+;;
//
-// Get exp and significand of y
-// Create exponent mask and sign mask
+// Maybe y is < 1 already, so
+// isn't an int.
//
-{ .mlx
-(p0) and GR_low_order_bit = GR_Mask,GR_exp_y
-(p0) movl GR_BIAS = 0xFFFF
+{ .mfi
+(p13) cmp.lt.unc p9, p8 = GR_exp_y,GR_exp_bias // Test 0 < |y| < 1 if x=-inf
+ fclass.m p11, p0 = FR_Input_X,0x021 // Test x=+inf
+ sub GR_exp_y = GR_exp_y,GR_exp_bias // Get true exponent y
}
-{ .mmi
- nop.m 999 ;;
+;;
+
//
-// Remove sign bit from exponent.
+// Shift significand of y looking for nonzero bits
+// For y > 1, shift signif_y exp_y bits to the left
+// For y < 1, turn on 4 low order bits of significand of y
+// so that the fraction will always be non-zero
//
-(p0) cmp.lt.unc p9, p8 = GR_low_order_bit,GR_BIAS
+{ .mmi
+(p9) or GR_exp_y= 0xF,GR_signif_y // Force nonzero fraction if y<1
+;;
+(p11) cmp.eq.unc p14,p12 = GR_y_sign, r0 // Test x=+inf, y>0
+(p8) shl GR_exp_y= GR_signif_y,GR_exp_y // Get lsb of int + fraction
+ // Wait 4 cycles to use result
+}
+;;
+
//
-// Maybe y is < 1 already, so
-// isn't an int.
+// Return +inf for x=+inf, y > 0
+// Return +0 for x=+inf, y < 0
//
-(p0) sub GR_low_order_bit = GR_low_order_bit,GR_BIAS
+{ .mfi
+ nop.m 999
+(p12) mov FR_Result = f0 // If x=+inf, y<0, result=+0
+ nop.i 999
}
-{ .mlx
- nop.m 999
-(p0) movl GR_sign_mask = 0x20000 ;;
+{ .mfb
+ nop.m 999
+(p14) fma.s0 FR_Result = FR_Input_X,f1,f0 // If x=+inf, y>0, result=+inf
+(p11) br.ret.sptk b0 // Exit x=+inf
}
-{ .mfi
-(p0) and GR_sign_mask = GR_sign_mask,GR_exp_y
+;;
+
//
-// Return +Inf when x=+/-0 and y < 0 and not odd and raise
-// divide-by-zero exception.
+// Here only if x=-inf. Wait until can use result of shl...
//
-(p0) fclass.m.unc p11, p0 = FR_Input_X,0x021
- nop.i 999 ;;
-}
{ .mmi
- nop.m 999 ;;
-//
-// Is shift off integer part of y.
-// Get y's even or odd bit - y might not be an int.
-//
-(p11) cmp.eq.unc p11,p12 = GR_sign_mask, r0
-//
-// Remove exponent BIAS
-//
-(p8) shl GR_exp_y = GR_signif_y,GR_low_order_bit ;;
+ nop.m 999
+;;
+ nop.m 999
+ nop.i 999
}
+;;
+
{ .mfi
-(p9) or GR_exp_y = 0xF,GR_signif_y
-//
-// Is y positive or negative when x is +Inf?
-// Is y and int when x = -Inf
-//
-(p11) mov FR_Result = FR_Input_X
- nop.i 999 ;;
+ cmp.eq p8,p9 = GR_y_sign, r0 // Test y pos
+ nop.f 999
+ shl GR_fraction_y = GR_exp_y,1 // Shift left 1 to get fraction
}
-{ .mfi
- nop.m 999
-(p12) mov FR_Result = f0
- nop.i 999 ;;
+;;
+
+{ .mmi
+ cmp.eq p13,p0 = GR_fraction_y, r0 // Test y integer
+;;
+ nop.m 999
+(p13) tbit.nz.unc p13,p0 = GR_exp_y, 63 // Test y odd integer
}
-{ .mii
- nop.m 999
+;;
+
//
-// Shift signficand looking for nonzero bits
-// For y non-ints, upset the significand.
+// Is y even or odd?
//
-(p0) shl GR_signif_y = GR_exp_y,1 ;;
-(p13) cmp.eq.unc p13,p0 = GR_signif_y, r0
-}
{ .mii
- nop.m 999
-(p0) extr.u GR_low_order_bit = GR_exp_y,63,1 ;;
-(p13) cmp.ne.unc p13,p0 = GR_low_order_bit, r0
-}
-{ .mib
- nop.m 999
- nop.i 999
-(p11) br.cond.sptk L(POWL_64_RETURN) ;;
-}
-{ .mib
- nop.m 999
- nop.i 999
-(p12) br.cond.sptk L(POWL_64_RETURN) ;;
+(p13) cmp.eq.unc p14,p10 = GR_y_sign, r0 // Test x=-inf, y pos odd int
+(p13) cmp.ne.and p8,p9 = r0,r0 // If y odd int, turn off p8,p9
+ nop.i 999
}
+;;
+
//
-// Return Inf for y > 0
-// Return +0 for y < 0
-// Is y even or odd?
+// Return -0 for x = -inf and y < 0 and odd int.
+// Return -Inf for x = -inf and y > 0 and odd int.
//
-{ .mii
-(p13) cmp.eq.unc p13,p10 = GR_sign_mask, r0
-(p0) cmp.eq.unc p8,p9 = GR_sign_mask, r0 ;;
- nop.i 999
+{ .mfi
+ nop.m 999
+(p10) fmerge.ns FR_Result = f0, f0 // If x=-inf, y neg odd int, result=-0
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
+(p14) fmpy.s0 FR_Result = FR_Input_X,f1 // If x=-inf, y pos odd int, result=-inf
+ nop.i 999
+}
+;;
+
//
-// For x = -inf, y is and int, positive
-// and odd
-// Is y positive in general?
+// Return Inf for x = -inf and y > 0 not an odd int.
+// Return +0 for x = -inf and y < 0 not an odd int.
//
-(p13) mov FR_Result = FR_Input_X
- nop.i 999 ;;
+.pred.rel "mutex",p8,p9
+{ .mfi
+ nop.m 999
+(p8) fmerge.ns FR_Result = FR_Input_X, FR_Input_X // If x=-inf, y>0 not odd int
+ // result=+inf
+ nop.i 999
}
{ .mfb
- nop.m 999
-(p10) fmerge.ns FR_Result = f0, f0
-(p13) br.cond.sptk L(POWL_64_RETURN) ;;
-}
-{ .mib
- nop.m 999
- nop.i 999
-(p10) br.cond.sptk L(POWL_64_RETURN) ;;
+ nop.m 999
+(p9) fmpy.s0 FR_Result = f0,f0 // If x=-inf, y<0 not odd int
+ // result=+0
+ br.ret.sptk b0 // Exit for x=-inf
}
-{ .mfi
- nop.m 999
+;;
+
+
+POWL_64_Y_IS_INF:
+// Here if y=inf, x not 1 or nan
//
-// Return -Inf for x = -inf and y > 0 and odd int.
-// Return -0 for x = -inf and y < 0 and odd int.
+// For y = +Inf and |x| < 1 returns 0
+// For y = +Inf and |x| > 1 returns Inf
+// For y = -Inf and |x| < 1 returns Inf
+// For y = -Inf and |x| > 1 returns 0
+// For y = Inf and |x| = 1 returns 1
//
-(p8) fmerge.ns FR_Result = FR_Input_X, FR_Input_X
- nop.i 999 ;;
-}
-{ .mfb
- nop.m 999
-(p9) mov FR_Result = f0
-(p8) br.cond.sptk L(POWL_64_RETURN) ;;
+{ .mfi
+ nop.m 999
+ fclass.m p8, p0 = FR_Input_Y, 0x021 // Test y=+inf
+ nop.i 999
}
-{ .mib
- nop.m 999
- nop.i 999
-(p9) br.cond.sptk L(POWL_64_RETURN) ;;
+;;
+
+{ .mfi
+ nop.m 999
+ fclass.m p9, p0 = FR_Input_Y, 0x022 // Test y=-inf
+ nop.i 999
}
-L(POWL_64_Y_IS_INF):
+;;
+
{ .mfi
- nop.m 999
-//
-// Return Inf for x = -inf and y > 0 not an odd int.
-// Return +0 for x = -inf and y < 0 and not an odd int.
-//
-(p0) fclass.m.unc p8, p0 = FR_Input_Y, 0x021
- nop.i 999
+ nop.m 999
+ fabs FR_X = FR_Input_X // Form |x|
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fclass.m.unc p9, p0 = FR_Input_Y, 0x022
- nop.i 999 ;;
+ nop.m 999
+ fcmp.eq.s0 p10,p0 = FR_Input_X, f0 // flag if x denormal
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fabs FR_X = FR_Input_X
- nop.i 999 ;;
+ nop.m 999
+(p8) fcmp.lt.unc.s1 p6, p0 = FR_X, f1 // Test y=+inf, |x|<1
+ nop.i 999
}
+;;
{ .mfi
- nop.m 999
-(p0) fcmp.eq.s0 p10,p0 = FR_Input_X, f0 // flag if x denormal
- nop.i 999 ;;
+ nop.m 999
+(p8) fcmp.gt.unc.s1 p7, p0 = FR_X, f1 // Test y=+inf, |x|>1
+ nop.i 999
}
+;;
{ .mfi
- nop.m 999
-//
-// Find y = +/- Inf
-// Compute |x|
-//
-(p8) fcmp.lt.unc.s1 p6, p0 = FR_X, f1
- nop.i 999
+ nop.m 999
+(p9) fcmp.lt.unc.s1 p12, p0 = FR_X, f1 // Test y=-inf, |x|<1
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p8) fcmp.gt.unc.s1 p7, p0 = FR_X, f1
- nop.i 999 ;;
+ nop.m 999
+(p6) fmpy.s0 FR_Result = f0,f0 // If y=+inf, |x|<1, result=+0
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p9) fcmp.lt.unc.s1 p12, p0 = FR_X, f1
- nop.i 999
+ nop.m 999
+(p9) fcmp.gt.unc.s1 p13, p0 = FR_X, f1 // Test y=-inf, |x|>1
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p9) fcmp.gt.unc.s1 p13, p0 = FR_X, f1
- nop.i 999 ;;
+ nop.m 999
+(p7) fmpy.s0 FR_Result = FR_Input_Y, f1 // If y=+inf, |x|>1, result=+inf
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// For y = +Inf and |x| < 1 returns 0
-// For y = +Inf and |x| > 1 returns Inf
-// For y = -Inf and |x| < 1 returns Inf
-// For y = -Inf and |x| > 1 returns 0
-//
-(p6) mov FR_Result = f0
- nop.i 999 ;;
+ nop.m 999
+ fcmp.eq.s1 p14, p0 = FR_X, f1 // Test y=inf, |x|=1
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p7) mov FR_Result = FR_Input_Y
- nop.i 999 ;;
+ nop.m 999
+(p12) fnma.s0 FR_Result = FR_Input_Y, f1, f0 // If y=-inf, |x|<1, result=+inf
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p12) fmpy.s0 FR_Result = FR_Input_Y, FR_Input_Y
- nop.i 999 ;;
+ nop.m 999
+(p13) mov FR_Result = f0 // If y=-inf, |x|>1, result=+0
+ nop.i 999
}
+;;
+
{ .mfb
- nop.m 999
-(p13) mov FR_Result = f0
-//
-// Produce x ** +/- Inf results
-//
-(p6) br.cond.spnt L(POWL_64_RETURN) ;;
+ nop.m 999
+(p14) fmpy.s0 FR_Result = f1,f1 // If y=inf, |x|=1, result=+1
+ br.ret.sptk b0 // Common return for y=inf
}
-{ .mib
- nop.m 999
- nop.i 999
-(p7) br.cond.spnt L(POWL_64_RETURN) ;;
+;;
+
+
+// Here if x or y denorm/unorm
+POWL_DENORM:
+{ .mmi
+ getf.sig GR_signif_Z = FR_norm_X // Get significand of x
+;;
+ getf.exp GR_signexp_y = FR_norm_Y // Get sign and exp of y
+ nop.i 999
}
-{ .mib
- nop.m 999
- nop.i 999
-(p12) br.cond.spnt L(POWL_64_RETURN) ;;
+;;
+
+{ .mfi
+ getf.sig GR_signif_y = FR_norm_Y // Get significand of y
+ nop.f 999
+ nop.i 999
}
+;;
+
{ .mib
- nop.m 999
- nop.i 999
-(p13) br.cond.spnt L(POWL_64_RETURN) ;;
+ getf.exp GR_signexp_x = FR_norm_X // Get sign and exp of x
+ extr.u GR_Index1 = GR_signif_Z, 59, 4 // Extract upper 4 signif bits of x
+ br.cond.sptk POWL_COMMON // Branch back to main path
}
-{ .mfb
- nop.m 999
+;;
+
+
+POWL_64_UNSUPPORT:
//
-// +/-1 ** +/-Inf, result is +1
+// Raise exceptions for specific
+// values - pseudo NaN and
+// infinities.
+// Return NaN and raise invalid
//
-(p0) fmpy.s0 FR_Result = f1,f1
-(p0) br.cond.sptk L(POWL_64_RETURN) ;;
-}
-L(POWL_64_UNSUPPORT):
{ .mfb
- nop.m 999
+ nop.m 999
+ fmpy.s0 FR_Result = FR_Input_X,f0
+ br.ret.sptk b0
+}
+;;
+
+POWL_64_XNEG:
//
-// Return NaN and raise invalid
+// Raise invalid for x < 0 and
+// y not an integer
//
-(p0) fmpy.s0 FR_Result = FR_Input_X,f0
-//
-// Raise exceptions for specific
-// values - pseudo NaN and
-// infinities.
-//
-(p0) br.cond.sptk L(POWL_64_RETURN) ;;
-}
-L(POWL_64_XNEG):
{ .mfi
- nop.m 999
-(p0) frcpa.s0 FR_Result, p8 = f0, f0
-//
-// Raise invalid for x < 0 and
-// y not an integer and
-//
-(p0) mov GR_Parameter_TAG = 22
+ nop.m 999
+ frcpa.s0 FR_Result, p8 = f0, f0
+ mov GR_Parameter_TAG = 22
}
{ .mib
- nop.m 999
- nop.i 999
-(p0) br.cond.sptk __libm_error_region ;;
+ nop.m 999
+ nop.i 999
+ br.cond.sptk __libm_error_region
}
-L(POWL_64_SQRT):
+;;
+
+POWL_64_SQRT:
{ .mfi
- nop.m 999
-(p0) frsqrta.s0 FR_Result,p10 = FR_Input_X
- nop.i 999 ;;
+ nop.m 999
+ frsqrta.s0 FR_Result,p10 = FR_save_Input_X
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-(p10) fma.s1 f62=FR_Half,FR_Input_X,f0
- nop.i 999 ;;
+ nop.m 999
+(p10) fma.s1 f62=FR_Half,FR_save_Input_X,f0
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-//
-// Step (2)
-// h = 1/2 * a in f9
-//
-(p10) fma.s1 f63=FR_Result,FR_Result,f0
- nop.i 999 ;;
+ nop.m 999
+(p10) fma.s1 f63=FR_Result,FR_Result,f0
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-//
-// Step (3)
-// t1 = y0 * y0 in f10
-//
-(p10) fnma.s1 f32=f63,f62,f11
- nop.i 999 ;;
+ nop.m 999
+(p10) fnma.s1 f32=f63,f62,FR_Half
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-//
-// Step (4)
-// t2 = 1/2 - t1 * h in f10
-//
-(p10) fma.s1 f33=f32,FR_Result,FR_Result
- nop.i 999 ;;
+ nop.m 999
+(p10) fma.s1 f33=f32,FR_Result,FR_Result
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-//
-// Step (5)
-// y1 = y0 + t2 * y0 in f13
-//
-(p10) fma.s1 f34=f33,f62,f0
- nop.i 999 ;;
+ nop.m 999
+(p10) fma.s1 f34=f33,f62,f0
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-//
-// Step (6)
-// t3 = y1 * h in f10
-//
-(p10) fnma.s1 f35=f34,f33,f11
- nop.i 999 ;;
+ nop.m 999
+(p10) fnma.s1 f35=f34,f33,FR_Half
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-//
-// Step (7)
-// t4 = 1/2 - t3 * y1 in f10
-//
-(p10) fma.s1 f63=f35,f33,f33
- nop.i 999 ;;
+ nop.m 999
+(p10) fma.s1 f63=f35,f33,f33
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-//
-// Step (8)
-// y2 = y1 + t4 * y1 in f13
-//
-(p10) fma.s1 f32=FR_Input_X,f63,f0
- nop.i 999
+ nop.m 999
+(p10) fma.s1 f32=FR_save_Input_X,f63,f0
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// Step (9)
-// S = a * y2 in f10
-//
-(p10) fma.s1 FR_Result=f63,f62,f0
- nop.i 999 ;;
+ nop.m 999
+(p10) fma.s1 FR_Result=f63,f62,f0
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-//
-// Step (10)
-// t5 = y2 * h in f9
-//
-(p10) fma.s1 f33=f11,f63,f0
- nop.i 999 ;;
+ nop.m 999
+(p10) fma.s1 f33=f11,f63,f0
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-//
-// Step (11)
-// H = 1/2 * y2 in f11
-//
-(p10) fnma.s1 f34=f32,f32,f8
- nop.i 999
+ nop.m 999
+(p10) fnma.s1 f34=f32,f32,FR_save_Input_X
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// Step (12)
-// d = a - S * S in f12
-//
-(p10) fnma.s1 f35=FR_Result,f63,f11
- nop.i 999 ;;
+ nop.m 999
+(p10) fnma.s1 f35=FR_Result,f63,FR_Half
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-//
-// Step (13)
-// t6 = 1/2 - t5 * y2 in f7
-//
-(p10) fma.s1 f62=f33,f34,f32
- nop.i 999
+ nop.m 999
+(p10) fma.s1 f62=f33,f34,f32
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// Step (14)
-// S1 = S + d * H in f13
-//
-(p10) fma.s1 f63=f33,f35,f33
- nop.i 999 ;;
+ nop.m 999
+(p10) fma.s1 f63=f33,f35,f33
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-//
-// Step (15)
-// H1 = H + t6 * h in f7
-//
-(p10) fnma.s1 f32=f62,f62,FR_Input_X
- nop.i 999 ;;
+ nop.m 999
+(p10) fnma.s1 f32=f62,f62,FR_save_Input_X
+ nop.i 999 ;;
}
{ .mfb
- nop.m 999
-//
-// Step (16)
-// d1 = a - S1 * S1
-//
-(p10) fma.s0 FR_Result=f32,f63,f62
-//
-// Step (17)
-// R = S1 + d1 * H1
-//
-(p10) br.cond.sptk L(POWL_64_RETURN) ;;
-}
-{ .mib
- nop.m 999
- nop.i 999
-//
-// Do the Newton-Raphson iteration from the EAS.
-//
-(p0) br.cond.sptk L(POWL_64_RETURN) ;;
+ nop.m 999
+(p10) fma.s0 FR_Result=f32,f63,f62
+ br.ret.sptk b0 // Exit for x > 0, y = 0.5
}
-//
-// Take care of the degenerate cases.
-//
+;;
-L(POWL_64_RETURN):
-{ .mfb
- nop.m 999
-(p0) mov FR_Output = FR_Result
-(p0) br.ret.sptk b0 ;;
-}
-.endp powl
-ASM_SIZE_DIRECTIVE(powl)
+GLOBAL_LIBM_END(powl)
-.proc __libm_error_region
-__libm_error_region:
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
@@ -3411,32 +2770,32 @@ __libm_error_region:
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
- stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
+ stfe [GR_Parameter_Y] = FR_Input_Y,16 // Save Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
- stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ stfe [GR_Parameter_X] = FR_save_Input_X // Store Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0 // Parameter 3 address
}
{ .mib
- stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ stfe [GR_Parameter_Y] = FR_Result // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support# // Call error handling function
+ br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
- nop.m 0
- nop.m 0
add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
};;
{ .mmi
- ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
+ ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
- add sp = 64,sp // Restore stack pointer
- mov b0 = GR_SAVE_B0 // Restore return address
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
@@ -3444,7 +2803,6 @@ __libm_error_region:
br.ret.sptk b0 // Return
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+.endp
.type __libm_error_support#,@function
.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_remainder.S b/sysdeps/ia64/fpu/e_remainder.S
index d8a27722de..2f6e90f994 100644
--- a/sysdeps/ia64/fpu/e_remainder.S
+++ b/sysdeps/ia64/fpu/e_remainder.S
@@ -1,10 +1,10 @@
- .file "remainder.asm"
-// Copyright (C) 2000, 2001, Intel Corporation
+.file "remainder.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska, Bob Norin,
-// Shane Story, and Ping Tak Peter Tang of the Computational Software Lab,
-// Intel Corporation.
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,17 +35,19 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//====================================================================
-// 2/02/00 Initial version
-// 3/02/00 New Algorithm
-// 4/04/00 Unwind support added
-// 7/21/00 Fixed quotient=2^{24*m+23}*1.q1...q23 1 bug
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 02/02/00 Initial version
+// 03/02/00 New Algorithm
+// 04/04/00 Unwind support added
+// 07/21/00 Fixed quotient=2^{24*m+23}*1.q1...q23 1 bug
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
-//11/29/00 Set FR_Y to f9
+// 11/29/00 Set FR_Y to f9
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// API
//====================================================================
@@ -78,16 +80,12 @@
// a=+/- Inf, or b=+/-0: return NaN, call libm_error_support
// a=NaN or b=NaN: return NaN
-#include "libm_support.h"
-
// Registers used
//====================================================================
// Predicate registers: p6-p14
// General registers: r2,r3,r28,r29,r32 (ar.pfs), r33-r39
// Floating point registers: f6-f15,f32
- .section .text
-
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
GR_SAVE_GP = r35
@@ -103,18 +101,9 @@ FR_Y = f9
FR_RESULT = f8
+.section .text
+GLOBAL_IEEE754_ENTRY(remainder)
- .proc remainder#
- .align 32
- .global remainder#
- .align 32
-
-remainder:
-#ifdef _LIBC
-.global __remainder
-.type __remainder,@function
-__remainder:
-#endif
// inputs in f8, f9
// result in f8
@@ -139,7 +128,7 @@ __remainder:
// Y +-NAN, +-inf, +-0? p11
{ .mfi
setf.exp f32=r28
-(p0) fclass.m.unc p11,p0 = f9, 0xe7
+ fclass.m.unc p11,p0 = f9, 0xe7
nop.i 999
}
// qnan snan inf norm unorm 0 -+
@@ -148,7 +137,7 @@ __remainder:
// X +-NAN, +-inf, ? p9
{ .mfi
nop.m 999
-(p0) fclass.m.unc p9,p0 = f8, 0xe3
+ fclass.m.unc p9,p0 = f8, 0xe3
nop.i 999;;
}
@@ -167,8 +156,8 @@ __remainder:
}
{.bbb
- (p9) br.cond.spnt L(FREM_X_NAN_INF)
- (p11) br.cond.spnt L(FREM_Y_NAN_INF_ZERO)
+ (p9) br.cond.spnt FREM_X_NAN_INF
+ (p11) br.cond.spnt FREM_Y_NAN_INF_ZERO
nop.b 0
} {.mfi
nop.m 0
@@ -178,7 +167,7 @@ __remainder:
}
-L(remloop24):
+remloop24:
{ .mfi
nop.m 0
// Step (2)
@@ -200,7 +189,7 @@ L(remloop24):
{.mfi
nop.m 0
// q1=q0*(1+e0)
- fma.s1 f15=f12,f7,f12
+ (p6) fma.s1 f15=f12,f7,f12
nop.i 0
}
{ .mfi
@@ -331,7 +320,7 @@ L(remloop24):
// (p9) set r=r2 (new a, if not last iteration)
// (p10) new a =r
(p10) mov f13=f6
- (p12) br.cond.sptk L(remloop24);;
+ (p12) br.cond.sptk remloop24;;
}
// last iteration
@@ -388,7 +377,7 @@ L(remloop24):
}
-L(FREM_X_NAN_INF):
+FREM_X_NAN_INF:
// Y zero ?
{.mfi
@@ -405,19 +394,19 @@ L(FREM_X_NAN_INF):
nop.m 0
nop.i 0
// if Y zero
- (p11) br.cond.spnt L(FREM_Y_ZERO);;
+ (p11) br.cond.spnt FREM_Y_ZERO;;
}
// X infinity? Return QNAN indefinite
{ .mfi
nop.m 999
-(p0) fclass.m.unc p8,p0 = f8, 0x23
+ fclass.m.unc p8,p0 = f8, 0x23
nop.i 999
}
// X infinity? Return QNAN indefinite
{ .mfi
nop.m 999
-(p0) fclass.m.unc p11,p0 = f8, 0x23
+ fclass.m.unc p11,p0 = f8, 0x23
nop.i 999;;
}
// Y NaN ?
@@ -445,14 +434,14 @@ L(FREM_X_NAN_INF):
}
{ .mfi
nop.m 999
-(p8) fma.d f8=f8,f1,f0
+(p8) fma.d.s0 f8=f8,f1,f0
nop.i 0 ;;
}
{ .mfb
nop.m 999
frcpa.s0 f8,p7=f8,f9
- (p11) br.cond.spnt L(EXP_ERROR_RETURN);;
+ (p11) br.cond.spnt EXP_ERROR_RETURN;;
}
{ .mib
nop.m 0
@@ -461,35 +450,35 @@ L(FREM_X_NAN_INF):
}
-L(FREM_Y_NAN_INF_ZERO):
+FREM_Y_NAN_INF_ZERO:
// Y INF
{ .mfi
nop.m 999
-(p0) fclass.m.unc p7,p0 = f9, 0x23
+ fclass.m.unc p7,p0 = f9, 0x23
nop.i 999 ;;
}
{ .mfb
nop.m 999
-(p7) fma.d f8=f8,f1,f0
+(p7) fma.d.s0 f8=f8,f1,f0
(p7) br.ret.spnt b0 ;;
}
// Y NAN?
{ .mfi
nop.m 999
-(p0) fclass.m.unc p9,p0 = f9, 0xc3
+ fclass.m.unc p9,p0 = f9, 0xc3
nop.i 999 ;;
}
{ .mfb
nop.m 999
-(p9) fma.d f8=f9,f1,f0
+(p9) fma.d.s0 f8=f9,f1,f0
(p9) br.ret.spnt b0 ;;
}
-L(FREM_Y_ZERO):
+FREM_Y_ZERO:
// Y zero? Must be zero at this point
// because it is the only choice left.
// Return QNAN indefinite
@@ -497,7 +486,7 @@ L(FREM_Y_ZERO):
// X NAN?
{ .mfi
nop.m 999
-(p0) fclass.m.unc p9,p10 = f8, 0xc3
+ fclass.m.unc p9,p10 = f8, 0xc3
nop.i 999 ;;
}
{ .mfi
@@ -508,47 +497,41 @@ L(FREM_Y_ZERO):
{.mfi
nop.m 999
- (p9) frcpa f11,p7=f8,f0
+ (p9) frcpa.s0 f11,p7=f8,f0
nop.i 0;;
}
{ .mfi
nop.m 999
-(p10) frcpa f11,p7 = f0,f0
+(p10) frcpa.s0 f11,p7 = f0,f0
nop.i 999;;
}
{ .mfi
nop.m 999
-(p0) fmerge.s f10 = f8, f8
+ fmerge.s f10 = f8, f8
nop.i 999
}
{ .mfi
nop.m 999
-(p0) fma.d f8=f11,f1,f0
+ fma.d.s0 f8=f11,f1,f0
nop.i 999
}
-L(EXP_ERROR_RETURN):
+EXP_ERROR_RETURN:
{ .mib
-(p0) mov GR_Parameter_TAG = 124
+ mov GR_Parameter_TAG = 124
nop.i 999
-(p0) br.sptk __libm_error_region;;
+ br.sptk __libm_error_region;;
}
-.endp remainder
-ASM_SIZE_DIRECTIVE(remainder)
-#ifdef _LIBC
-ASM_SIZE_DIRECTIVE(__remainder)
-#endif
-
+GLOBAL_IEEE754_END(remainder)
-.proc __libm_error_region
-__libm_error_region:
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
@@ -596,10 +579,11 @@ __libm_error_region:
br.ret.sptk b0 // Return
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#
+
+
diff --git a/sysdeps/ia64/fpu/e_remainderf.S b/sysdeps/ia64/fpu/e_remainderf.S
index 40f9b32921..bbb5fd0e0f 100644
--- a/sysdeps/ia64/fpu/e_remainderf.S
+++ b/sysdeps/ia64/fpu/e_remainderf.S
@@ -1,11 +1,10 @@
- .file "remainderf.asm"
-// Copyright (C) 2000, 2001, Intel Corporation
+.file "remainderf.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
-// Bob Norin, Shane Story, and Ping Tak Peter Tang of the Computational
-// Software Lab,
-// Intel Corporation.
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -21,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -36,17 +35,19 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//====================================================================
-// 2/02/00 Initial version
-// 3/02/00 New algorithm
-// 4/04/00 Unwind support added
-// 7/21/00 Fixed quotient=2^{24*m+23} bug
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 02/02/00 Initial version
+// 03/02/00 New algorithm
+// 04/04/00 Unwind support added
+// 07/21/00 Fixed quotient=2^{24*m+23} bug
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
-//11/29/00 Set FR_Y to f9
+// 11/29/00 Set FR_Y to f9
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// API
//====================================================================
@@ -78,9 +79,6 @@
//====================================================================
// a=+/- Inf, or b=+/-0: return NaN, call libm_error_support
// a=NaN or b=NaN: return NaN
-
-#include "libm_support.h"
-
//
// Registers used
//====================================================================
@@ -89,8 +87,6 @@
// Floating point registers: f6-f15
//
-.section .text
-
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
GR_SAVE_GP = r35
@@ -106,17 +102,9 @@ FR_Y = f9
FR_RESULT = f8
- .proc remainderf#
- .align 32
- .global remainderf#
- .align 32
+.section .text
+GLOBAL_IEEE754_ENTRY(remainderf)
-remainderf:
-#ifdef _LIBC
-.global __remainderf
-.type __remainderf,@function
-__remainderf:
-#endif
// inputs in f8, f9
// result in f8
@@ -141,7 +129,7 @@ __remainderf:
// Y +-NAN, +-inf, +-0? p11
{ .mfi
nop.m 999
-(p0) fclass.m.unc p11,p0 = f9, 0xe7
+ fclass.m.unc p11,p0 = f9, 0xe7
nop.i 999
}
// qnan snan inf norm unorm 0 -+
@@ -150,7 +138,7 @@ __remainderf:
// X +-NAN, +-inf, ? p9
{ .mfi
nop.m 999
-(p0) fclass.m.unc p9,p0 = f8, 0xe3
+ fclass.m.unc p9,p0 = f8, 0xe3
nop.i 999;;
}
@@ -168,8 +156,8 @@ __remainderf:
nop.i 0;;
}
{.bbb
- (p9) br.cond.spnt L(FREM_X_NAN_INF)
- (p11) br.cond.spnt L(FREM_Y_NAN_INF_ZERO)
+ (p9) br.cond.spnt FREM_X_NAN_INF
+ (p11) br.cond.spnt FREM_Y_NAN_INF_ZERO
nop.b 0
} {.mfi
nop.m 0
@@ -179,7 +167,7 @@ __remainderf:
}
.align 32
-L(remloop24):
+remloop24:
{ .mfi
// f12=2^{24}-2
setf.s f12=r3
@@ -347,7 +335,7 @@ L(remloop24):
// (p9) set r=r2 (new a, if not last iteration)
// (p10) new a =r
(p10) mov f13=f6
- (p12) br.cond.sptk L(remloop24);;
+ (p12) br.cond.sptk remloop24;;
}
// last iteration
@@ -408,7 +396,7 @@ L(remloop24):
}
-L(FREM_X_NAN_INF):
+FREM_X_NAN_INF:
// Y zero ?
{.mfi
@@ -425,19 +413,19 @@ L(FREM_X_NAN_INF):
nop.m 0
nop.i 0
// if Y zero
- (p11) br.cond.spnt L(FREM_Y_ZERO);;
+ (p11) br.cond.spnt FREM_Y_ZERO;;
}
// X infinity? Return QNAN indefinite
{ .mfi
nop.m 999
-(p0) fclass.m.unc p8,p0 = f8, 0x23
+ fclass.m.unc p8,p0 = f8, 0x23
nop.i 999
}
// X infinity? Return QNAN indefinite
{ .mfi
nop.m 999
-(p0) fclass.m.unc p11,p0 = f8, 0x23
+ fclass.m.unc p11,p0 = f8, 0x23
nop.i 999;;
}
// Y NaN ?
@@ -465,14 +453,14 @@ L(FREM_X_NAN_INF):
}
{ .mfi
nop.m 999
-(p8) fma.s f8=f8,f1,f0
+(p8) fma.s.s0 f8=f8,f1,f0
nop.i 0 ;;
}
{ .mfb
nop.m 999
frcpa.s0 f8,p7=f8,f9
- (p11) br.cond.spnt L(EXP_ERROR_RETURN);;
+ (p11) br.cond.spnt EXP_ERROR_RETURN;;
}
{ .mib
nop.m 0
@@ -481,35 +469,35 @@ L(FREM_X_NAN_INF):
}
-L(FREM_Y_NAN_INF_ZERO):
+FREM_Y_NAN_INF_ZERO:
// Y INF
{ .mfi
nop.m 999
-(p0) fclass.m.unc p7,p0 = f9, 0x23
+ fclass.m.unc p7,p0 = f9, 0x23
nop.i 999 ;;
}
{ .mfb
nop.m 999
-(p7) fma.s f8=f8,f1,f0
+(p7) fma.s.s0 f8=f8,f1,f0
(p7) br.ret.spnt b0 ;;
}
// Y NAN?
{ .mfi
nop.m 999
-(p0) fclass.m.unc p9,p0 = f9, 0xc3
+ fclass.m.unc p9,p0 = f9, 0xc3
nop.i 999 ;;
}
{ .mfb
nop.m 999
-(p9) fma.s f8=f9,f1,f0
+(p9) fma.s.s0 f8=f9,f1,f0
(p9) br.ret.spnt b0 ;;
}
-L(FREM_Y_ZERO):
+FREM_Y_ZERO:
// Y zero? Must be zero at this point
// because it is the only choice left.
// Return QNAN indefinite
@@ -517,7 +505,7 @@ L(FREM_Y_ZERO):
// X NAN?
{ .mfi
nop.m 999
-(p0) fclass.m.unc p9,p10 = f8, 0xc3
+ fclass.m.unc p9,p10 = f8, 0xc3
nop.i 999 ;;
}
{ .mfi
@@ -528,47 +516,41 @@ L(FREM_Y_ZERO):
{.mfi
nop.m 999
- (p9) frcpa f11,p7=f8,f0
+ (p9) frcpa.s0 f11,p7=f8,f0
nop.i 0;;
}
{ .mfi
nop.m 999
-(p10) frcpa f11,p7 = f0,f0
+(p10) frcpa.s0 f11,p7 = f0,f0
nop.i 999;;
}
{ .mfi
nop.m 999
-(p0) fmerge.s f10 = f8, f8
+ fmerge.s f10 = f8, f8
nop.i 999
}
{ .mfi
nop.m 999
-(p0) fma.s f8=f11,f1,f0
+ fma.s.s0 f8=f11,f1,f0
nop.i 999
}
-L(EXP_ERROR_RETURN):
+EXP_ERROR_RETURN:
{ .mib
-(p0) mov GR_Parameter_TAG = 125
+ mov GR_Parameter_TAG = 125
nop.i 999
-(p0) br.sptk __libm_error_region;;
+ br.sptk __libm_error_region;;
}
-.endp remainderf
-ASM_SIZE_DIRECTIVE(remainderf)
-#ifdef _LIBC
-ASM_SIZE_DIRECTIVE(__remainderf)
-#endif
-
+GLOBAL_IEEE754_END(remainderf)
-.proc __libm_error_region
-__libm_error_region:
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
@@ -616,9 +598,11 @@ __libm_error_region:
br.ret.sptk b0 // Return
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#
+
+
+
diff --git a/sysdeps/ia64/fpu/e_remainderl.S b/sysdeps/ia64/fpu/e_remainderl.S
index 5856861442..1c1a3c3072 100644
--- a/sysdeps/ia64/fpu/e_remainderl.S
+++ b/sysdeps/ia64/fpu/e_remainderl.S
@@ -1,10 +1,10 @@
-.file "remainderl.asm"
-// Copyright (C) 2000, 2001, Intel Corporation
+.file "remainderl.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
-// Bob Norin, Shane Story, and Ping Tak Peter Tang of the Computational
-// Software Lab, Intel Corporation.
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,17 +35,19 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//====================================================================
-// 2/02/00 Initial version
-// 3/02/00 New algorithm
-// 4/04/00 Unwind support added
-// 7/21/00 Fixed quotient=2^{24*m+23}*1.q1...q23 1 bug
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 02/02/00 Initial version
+// 03/02/00 New algorithm
+// 04/04/00 Unwind support added
+// 07/21/00 Fixed quotient=2^{24*m+23}*1.q1...q23 1 bug
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
-//11/29/00 Set FR_Y to f9
+// 11/29/00 Set FR_Y to f9
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// API
//====================================================================
@@ -77,9 +79,6 @@
//====================================================================
// a=+/- Inf, or b=+/-0: return NaN, call libm_error_support
// a=NaN or b=NaN: return NaN
-
-#include "libm_support.h"
-
//
// Registers used
//====================================================================
@@ -87,8 +86,6 @@
// General registers: r2,r3,r28,r29,r32 (ar.pfs), r33-r39
// Floating point registers: f6-f15,f32
//
-.section .text
-
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
@@ -105,19 +102,9 @@ FR_Y = f9
FR_RESULT = f8
+.section .text
+GLOBAL_IEEE754_ENTRY(remainderl)
-
- .proc remainderl#
- .align 32
- .global remainderl#
- .align 32
-
-remainderl:
-#ifdef _LIBC
-.global __remainderl
-.type __remainderl,@function
-__remainderl:
-#endif
// inputs in f8, f9
// result in f8
@@ -159,7 +146,7 @@ cmp.eq p11,p10=r29,r0;;
// X +-NAN, +-inf, ? p9
{ .mfi
nop.m 999
-(p0) fclass.m.unc p9,p8 = f8, 0xe3
+ fclass.m.unc p9,p8 = f8, 0xe3
nop.i 999;;
}
@@ -196,8 +183,8 @@ cmp.eq p11,p10=r29,r0;;
}
{.bbb
- (p9) br.cond.spnt L(FREM_X_NAN_INF)
- (p11) br.cond.spnt L(FREM_Y_NAN_INF_ZERO)
+ (p9) br.cond.spnt FREM_X_NAN_INF
+ (p11) br.cond.spnt FREM_Y_NAN_INF_ZERO
nop.b 0
} {.mfi
nop.m 0
@@ -206,7 +193,7 @@ cmp.eq p11,p10=r29,r0;;
nop.i 0;;
}
-L(remloop24):
+remloop24:
{ .mfi
nop.m 0
// Step (2)
@@ -228,7 +215,7 @@ L(remloop24):
{.mfi
nop.m 0
// q1=q0*(1+e0)
- fma.s1 f15=f12,f7,f12
+ (p6) fma.s1 f15=f12,f7,f12
nop.i 0
}
{ .mfi
@@ -358,7 +345,7 @@ L(remloop24):
// (p9) set r=r2 (new a, if not last iteration)
// (p10) new a =r
(p10) mov f13=f6
- (p12) br.cond.sptk L(remloop24);;
+ (p12) br.cond.sptk remloop24;;
}
// last iteration
@@ -416,7 +403,7 @@ L(remloop24):
-L(FREM_X_NAN_INF):
+FREM_X_NAN_INF:
// Y zero ?
{.mfi
@@ -433,19 +420,19 @@ L(FREM_X_NAN_INF):
nop.m 0
nop.i 0
// if Y zero
- (p11) br.cond.spnt L(FREM_Y_ZERO);;
+ (p11) br.cond.spnt FREM_Y_ZERO;;
}
// X infinity? Return QNAN indefinite
{ .mfi
nop.m 999
-(p0) fclass.m.unc p8,p0 = f8, 0x23
+ fclass.m.unc p8,p0 = f8, 0x23
nop.i 999
}
// X infinity? Return QNAN indefinite
{ .mfi
nop.m 999
-(p0) fclass.m.unc p11,p0 = f8, 0x23
+ fclass.m.unc p11,p0 = f8, 0x23
nop.i 999;;
}
// Y NaN ?
@@ -473,14 +460,14 @@ L(FREM_X_NAN_INF):
}
{ .mfi
nop.m 999
-(p8) fma f8=f8,f1,f0
+(p8) fma.s0 f8=f8,f1,f0
nop.i 0 ;;
}
{ .mfb
nop.m 999
frcpa.s0 f8,p7=f8,f9
- (p11) br.cond.spnt L(EXP_ERROR_RETURN);;
+ (p11) br.cond.spnt EXP_ERROR_RETURN;;
}
{ .mib
nop.m 0
@@ -489,24 +476,24 @@ L(FREM_X_NAN_INF):
}
-L(FREM_Y_NAN_INF_ZERO):
+FREM_Y_NAN_INF_ZERO:
// Y INF
{ .mfi
nop.m 999
-(p0) fclass.m.unc p7,p0 = f9, 0x23
+ fclass.m.unc p7,p0 = f9, 0x23
nop.i 999 ;;
}
{ .mfb
nop.m 999
-(p7) fma f8=f8,f1,f0
+(p7) fma.s0 f8=f8,f1,f0
(p7) br.ret.spnt b0 ;;
}
// Y NAN?
{ .mfi
nop.m 999
-(p0) fclass.m.unc p9,p10 = f9, 0xc3
+ fclass.m.unc p9,p10 = f9, 0xc3
nop.i 999 ;;
}
{ .mfi
@@ -517,11 +504,11 @@ L(FREM_Y_NAN_INF_ZERO):
{ .mfb
nop.m 999
-(p9) fma f8=f9,f1,f0
+(p9) fma.s0 f8=f9,f1,f0
(p9) br.ret.spnt b0 ;;
}
-L(FREM_Y_ZERO):
+FREM_Y_ZERO:
// Y zero? Must be zero at this point
// because it is the only choice left.
// Return QNAN indefinite
@@ -529,7 +516,7 @@ L(FREM_Y_ZERO):
// X NAN?
{ .mfi
nop.m 999
-(p0) fclass.m.unc p9,p10 = f8, 0xc3
+ fclass.m.unc p9,p10 = f8, 0xc3
nop.i 999 ;;
}
{ .mfi
@@ -540,43 +527,37 @@ L(FREM_Y_ZERO):
{.mfi
nop.m 999
- (p9) frcpa f11,p7=f8,f0
+ (p9) frcpa.s0 f11,p7=f8,f0
nop.i 0;;
}
{ .mfi
nop.m 999
-(p10) frcpa f11,p7 = f0,f0
+(p10) frcpa.s0 f11,p7 = f0,f0
nop.i 999;;
}
{ .mfi
nop.m 999
-(p0) fmerge.s f10 = f8, f8
+ fmerge.s f10 = f8, f8
nop.i 999
}
{ .mfi
nop.m 999
-(p0) fma f8=f11,f1,f0
+ fma.s0 f8=f11,f1,f0
nop.i 999;;
}
-L(EXP_ERROR_RETURN):
+EXP_ERROR_RETURN:
{ .mib
-(p0) mov GR_Parameter_TAG = 123
+ mov GR_Parameter_TAG = 123
nop.i 999
-(p0) br.sptk __libm_error_region;;
+ br.sptk __libm_error_region;;
}
-.endp remainderl
-ASM_SIZE_DIRECTIVE(remainderl)
-#ifdef _LIBC
-ASM_SIZE_DIRECTIVE(__remainderl)
-#endif
-
-.proc __libm_error_region
-__libm_error_region:
+GLOBAL_IEEE754_END(remainderl)
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
@@ -624,9 +605,12 @@ __libm_error_region:
br.ret.sptk b0 // Return
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#
+
+
+
+
diff --git a/sysdeps/ia64/fpu/e_scalb.S b/sysdeps/ia64/fpu/e_scalb.S
index 7f5b5796de..82e914e259 100644
--- a/sysdeps/ia64/fpu/e_scalb.S
+++ b/sysdeps/ia64/fpu/e_scalb.S
@@ -1,10 +1,10 @@
.file "scalb.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,12 +35,14 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00 Initial version
-// 1/26/01 Scalb completely reworked and now standalone version
+// 02/02/00 Initial version
+// 01/26/01 Scalb completely reworked and now standalone version
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// API
//==============================================================
@@ -53,8 +55,6 @@
//
//
-#include "libm_support.h"
-
FR_Floating_X = f8
FR_Result = f8
FR_Floating_N = f9
@@ -84,19 +84,8 @@ GR_Parameter_Y = r36
GR_Parameter_RESULT = r37
GR_Tag = r38
-.align 32
-.global scalb
-
.section .text
-.proc scalb
-.align 32
-
-scalb:
-#ifdef _LIBC
-.global __ieee754_scalb
-.type __ieee754_scalb,@function
-__ieee754_scalb:
-#endif
+GLOBAL_IEEE754_ENTRY(scalb)
//
// Is x NAN, INF, ZERO, +-?
@@ -140,12 +129,12 @@ __ieee754_scalb:
{ .mib
setf.exp FR_Big = GR_Scratch
nop.i 0
-(p6) br.cond.spnt L(SCALB_NAN_INF_ZERO)
+(p6) br.cond.spnt SCALB_NAN_INF_ZERO
}
{ .mib
setf.exp FR_NBig = GR_Scratch1
nop.i 0
-(p7) br.cond.spnt L(SCALB_NAN_INF_ZERO)
+(p7) br.cond.spnt SCALB_NAN_INF_ZERO
};;
//
@@ -212,7 +201,7 @@ __ieee754_scalb:
}
{ .mfb
nop.m 0
-(p7) frcpa f8,p11 = f0,f0
+(p7) frcpa.s0 f8,p11 = f0,f0
(p7) br.ret.spnt b0
};;
@@ -246,7 +235,7 @@ __ieee754_scalb:
}
{ .mlx
nop.m 999
-(p0) movl GR_Scratch = 0x00000000000303FF
+ movl GR_Scratch = 0x00000000000303FF
};;
{ .mfi
nop.m 0
@@ -255,7 +244,7 @@ __ieee754_scalb:
}
{ .mlx
nop.m 999
-(p0) movl GR_Scratch1= 0x00000000000103FF
+ movl GR_Scratch1= 0x00000000000103FF
};;
// Set up necessary status fields
@@ -266,12 +255,12 @@ __ieee754_scalb:
//
{ .mfi
nop.m 999
-(p0) fsetc.s3 0x7F,0x41
+ fsetc.s3 0x7F,0x41
nop.i 999
}
{ .mfi
nop.m 999
-(p0) fsetc.s2 0x7F,0x42
+ fsetc.s2 0x7F,0x42
nop.i 999
};;
@@ -345,7 +334,7 @@ __ieee754_scalb:
{ .mfb
(p6) addl GR_Tag = 54, r0
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
-(p6) br.cond.spnt L(SCALB_UNDERFLOW)
+(p6) br.cond.spnt SCALB_UNDERFLOW
};;
//
@@ -353,8 +342,8 @@ __ieee754_scalb:
//
{ .mbb
nop.m 0
-(p7) br.cond.spnt L(SCALB_OVERFLOW)
-(p9) br.cond.spnt L(SCALB_OVERFLOW)
+(p7) br.cond.spnt SCALB_OVERFLOW
+(p9) br.cond.spnt SCALB_OVERFLOW
};;
//
@@ -366,7 +355,7 @@ __ieee754_scalb:
br.ret.sptk b0;;
}
-L(SCALB_NAN_INF_ZERO):
+SCALB_NAN_INF_ZERO:
//
// Convert N to a fp integer
@@ -471,16 +460,11 @@ L(SCALB_NAN_INF_ZERO):
br.ret.sptk b0
};;
-.endp scalb
-ASM_SIZE_DIRECTIVE(scalb)
-#ifdef _LIBC
-ASM_SIZE_DIRECTIVE(__ieee754_scalb)
-#endif
-.proc __libm_error_region
+GLOBAL_IEEE754_END(scalb)
__libm_error_region:
-L(SCALB_OVERFLOW):
-L(SCALB_UNDERFLOW):
+SCALB_OVERFLOW:
+SCALB_UNDERFLOW:
//
// Get stack address of N
@@ -557,8 +541,7 @@ L(SCALB_UNDERFLOW):
br.ret.sptk b0
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_scalbf.S b/sysdeps/ia64/fpu/e_scalbf.S
index 40af080d38..07acb3297e 100644
--- a/sysdeps/ia64/fpu/e_scalbf.S
+++ b/sysdeps/ia64/fpu/e_scalbf.S
@@ -1,10 +1,10 @@
.file "scalbf.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,12 +35,14 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00 Initial version
-// 1/26/01 Scalb completely reworked and now standalone version
+// 02/02/00 Initial version
+// 01/26/01 Scalb completely reworked and now standalone version
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// API
//==============================================================
@@ -53,8 +55,6 @@
//
//
-#include "libm_support.h"
-
FR_Floating_X = f8
FR_Result = f8
FR_Floating_N = f9
@@ -84,19 +84,8 @@ GR_Parameter_Y = r36
GR_Parameter_RESULT = r37
GR_Tag = r38
-.align 32
-.global scalbf
-
.section .text
-.proc scalbf
-.align 32
-
-scalbf:
-#ifdef _LIBC
-.global __ieee754_scalbf
-.type __ieee754_scalbf,@function
-__ieee754_scalbf:
-#endif
+GLOBAL_IEEE754_ENTRY(scalbf)
//
// Is x NAN, INF, ZERO, +-?
@@ -140,12 +129,12 @@ __ieee754_scalbf:
{ .mib
setf.exp FR_Big = GR_Scratch
nop.i 0
-(p6) br.cond.spnt L(SCALBF_NAN_INF_ZERO)
+(p6) br.cond.spnt SCALBF_NAN_INF_ZERO
}
{ .mib
setf.exp FR_NBig = GR_Scratch1
nop.i 0
-(p7) br.cond.spnt L(SCALBF_NAN_INF_ZERO)
+(p7) br.cond.spnt SCALBF_NAN_INF_ZERO
};;
//
@@ -212,7 +201,7 @@ __ieee754_scalbf:
}
{ .mfb
nop.m 0
-(p7) frcpa f8,p11 = f0,f0
+(p7) frcpa.s0 f8,p11 = f0,f0
(p7) br.ret.spnt b0
};;
@@ -246,7 +235,7 @@ __ieee754_scalbf:
}
{ .mlx
nop.m 999
-(p0) movl GR_Scratch = 0x000000000003007F
+ movl GR_Scratch = 0x000000000003007F
};;
{ .mfi
nop.m 0
@@ -255,7 +244,7 @@ __ieee754_scalbf:
}
{ .mlx
nop.m 999
-(p0) movl GR_Scratch1= 0x000000000001007F
+ movl GR_Scratch1= 0x000000000001007F
};;
// Set up necessary status fields
@@ -266,12 +255,12 @@ __ieee754_scalbf:
//
{ .mfi
nop.m 999
-(p0) fsetc.s3 0x7F,0x41
+ fsetc.s3 0x7F,0x41
nop.i 999
}
{ .mfi
nop.m 999
-(p0) fsetc.s2 0x7F,0x42
+ fsetc.s2 0x7F,0x42
nop.i 999
};;
@@ -345,7 +334,7 @@ __ieee754_scalbf:
{ .mfb
(p6) addl GR_Tag = 56, r0
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
-(p6) br.cond.spnt L(SCALBF_UNDERFLOW)
+(p6) br.cond.spnt SCALBF_UNDERFLOW
};;
//
@@ -353,8 +342,8 @@ __ieee754_scalbf:
//
{ .mbb
nop.m 0
-(p7) br.cond.spnt L(SCALBF_OVERFLOW)
-(p9) br.cond.spnt L(SCALBF_OVERFLOW)
+(p7) br.cond.spnt SCALBF_OVERFLOW
+(p9) br.cond.spnt SCALBF_OVERFLOW
};;
//
@@ -366,7 +355,7 @@ __ieee754_scalbf:
br.ret.sptk b0;;
}
-L(SCALBF_NAN_INF_ZERO):
+SCALBF_NAN_INF_ZERO:
//
// Convert N to a fp integer
@@ -471,16 +460,11 @@ L(SCALBF_NAN_INF_ZERO):
br.ret.sptk b0
};;
-.endp scalbf
-ASM_SIZE_DIRECTIVE(scalbf)
-#ifdef _LIBC
-ASM_SIZE_DIRECTIVE(__ieee754_scalbf)
-#endif
-.proc __libm_error_region
+GLOBAL_IEEE754_END(scalbf)
__libm_error_region:
-L(SCALBF_OVERFLOW):
-L(SCALBF_UNDERFLOW):
+SCALBF_OVERFLOW:
+SCALBF_UNDERFLOW:
//
// Get stack address of N
@@ -557,8 +541,7 @@ L(SCALBF_UNDERFLOW):
br.ret.sptk b0
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_scalbl.S b/sysdeps/ia64/fpu/e_scalbl.S
index 43eac7a2ad..d22d029155 100644
--- a/sysdeps/ia64/fpu/e_scalbl.S
+++ b/sysdeps/ia64/fpu/e_scalbl.S
@@ -1,10 +1,10 @@
.file "scalbl.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,12 +35,14 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00 Initial version
-// 1/26/01 Scalb completely reworked and now standalone version
+// 02/02/00 Initial version
+// 01/26/01 Scalb completely reworked and now standalone version
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// API
//==============================================================
@@ -53,8 +55,6 @@
//
//
-#include "libm_support.h"
-
FR_Floating_X = f8
FR_Result = f8
FR_Floating_N = f9
@@ -84,19 +84,8 @@ GR_Parameter_Y = r36
GR_Parameter_RESULT = r37
GR_Tag = r38
-.align 32
-.global scalbl
-
.section .text
-.proc scalbl
-.align 32
-
-scalbl:
-#ifdef _LIBC
-.global __ieee754_scalbl
-.type __ieee754_scalbl,@function
-__ieee754_scalbl:
-#endif
+GLOBAL_IEEE754_ENTRY(scalbl)
//
// Is x NAN, INF, ZERO, +-?
@@ -140,12 +129,12 @@ __ieee754_scalbl:
{ .mib
setf.exp FR_Big = GR_Scratch
nop.i 0
-(p6) br.cond.spnt L(SCALBL_NAN_INF_ZERO)
+(p6) br.cond.spnt SCALBL_NAN_INF_ZERO
}
{ .mib
setf.exp FR_NBig = GR_Scratch1
nop.i 0
-(p7) br.cond.spnt L(SCALBL_NAN_INF_ZERO)
+(p7) br.cond.spnt SCALBL_NAN_INF_ZERO
};;
//
@@ -212,7 +201,7 @@ __ieee754_scalbl:
}
{ .mfb
nop.m 0
-(p7) frcpa f8,p11 = f0,f0
+(p7) frcpa.s0 f8,p11 = f0,f0
(p7) br.ret.spnt b0
};;
@@ -246,7 +235,7 @@ __ieee754_scalbl:
}
{ .mlx
nop.m 999
-(p0) movl GR_Scratch = 0x0000000000033FFF
+ movl GR_Scratch = 0x0000000000033FFF
};;
{ .mfi
nop.m 0
@@ -255,7 +244,7 @@ __ieee754_scalbl:
}
{ .mlx
nop.m 999
-(p0) movl GR_Scratch1= 0x0000000000013FFF
+ movl GR_Scratch1= 0x0000000000013FFF
};;
// Set up necessary status fields
@@ -266,12 +255,12 @@ __ieee754_scalbl:
//
{ .mfi
nop.m 999
-(p0) fsetc.s3 0x7F,0x41
+ fsetc.s3 0x7F,0x41
nop.i 999
}
{ .mfi
nop.m 999
-(p0) fsetc.s2 0x7F,0x42
+ fsetc.s2 0x7F,0x42
nop.i 999
};;
@@ -345,7 +334,7 @@ __ieee754_scalbl:
{ .mfb
(p6) addl GR_Tag = 52, r0
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
-(p6) br.cond.spnt L(SCALBL_UNDERFLOW)
+(p6) br.cond.spnt SCALBL_UNDERFLOW
};;
//
@@ -353,8 +342,8 @@ __ieee754_scalbl:
//
{ .mbb
nop.m 0
-(p7) br.cond.spnt L(SCALBL_OVERFLOW)
-(p9) br.cond.spnt L(SCALBL_OVERFLOW)
+(p7) br.cond.spnt SCALBL_OVERFLOW
+(p9) br.cond.spnt SCALBL_OVERFLOW
};;
//
@@ -366,7 +355,7 @@ __ieee754_scalbl:
br.ret.sptk b0;;
}
-L(SCALBL_NAN_INF_ZERO):
+SCALBL_NAN_INF_ZERO:
//
// Convert N to a fp integer
@@ -471,16 +460,11 @@ L(SCALBL_NAN_INF_ZERO):
br.ret.sptk b0
};;
-.endp scalbl
-ASM_SIZE_DIRECTIVE(scalbl)
-#ifdef _LIBC
-ASM_SIZE_DIRECTIVE(__ieee754_scalbl)
-#endif
-.proc __libm_error_region
+GLOBAL_IEEE754_END(scalbl)
__libm_error_region:
-L(SCALBL_OVERFLOW):
-L(SCALBL_UNDERFLOW):
+SCALBL_OVERFLOW:
+SCALBL_UNDERFLOW:
//
// Get stack address of N
@@ -557,8 +541,7 @@ L(SCALBL_UNDERFLOW):
br.ret.sptk b0
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_sinh.S b/sysdeps/ia64/fpu/e_sinh.S
index 4415dc7524..84c312c2b7 100644
--- a/sysdeps/ia64/fpu/e_sinh.S
+++ b/sysdeps/ia64/fpu/e_sinh.S
@@ -1,10 +1,10 @@
.file "sinh.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2002, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,1249 +20,838 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00 Initial version
-// 4/04/00 Unwind support added
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 02/02/00 Initial version
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
// 10/12/00 Update to set denormal operand and underflow flags
-// 1/22/01 Fixed to set inexact flag for small args.
-//
+// 01/22/01 Fixed to set inexact flag for small args.
+// 05/02/01 Reworked to improve speed of all paths
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 11/20/02 Improved speed with new algorithm
+
// API
//==============================================================
-// double = sinh(double)
-// input floating point f8
-// output floating point f8
-//
-// Registers used
-//==============================================================
-// general registers:
-// r32 -> r47
-// predicate registers used:
-// p6 p7 p8 p9
-// floating-point registers used:
-// f9 -> f15; f32 -> f45;
-// f8 has input, then output
-//
+// double sinh(double)
+
// Overview of operation
//==============================================================
-// There are four paths
-// 1. |x| < 0.25 SINH_BY_POLY
-// 2. |x| < 32 SINH_BY_TBL
-// 3. |x| < 2^14 SINH_BY_EXP
-// 4. |x_ >= 2^14 SINH_HUGE
-//
-// For double extended we get infinity for x >= 400c b174 ddc0 31ae c0ea
-// >= 1.0110001.... x 2^13
-// >= 11357.2166
-//
-// But for double we get infinity for x >= 408633ce8fb9f87e
-// >= 1.0110...x 2^9
-// >= +7.10476e+002
-//
-// And for single we get infinity for x >= 42b3a496
-// >= 1.0110... 2^6
-// >= 89.8215
+// Case 1: 0 < |x| < 2^-60
+// Result = x, computed by x+sgn(x)*x^2) to handle flags and rounding
//
-// SAFE: If there is danger of overflow set SAFE to 0
-// NOT implemented: if there is danger of underflow, set SAFE to 0
-// SAFE for all paths listed below
+// Case 2: 2^-60 < |x| < 0.25
+// Evaluate sinh(x) by a 13th order polynomial
+// Care is take for the order of multiplication; and A1 is not exactly 1/3!,
+// A2 is not exactly 1/5!, etc.
+// sinh(x) = x + (A1*x^3 + A2*x^5 + A3*x^7 + A4*x^9 + A5*x^11 + A6*x^13)
//
-// 1. SINH_BY_POLY
-// ===============
-// If |x| is less than the tiny threshold, then clear SAFE
-// For double, the tiny threshold is -1022 = -0x3fe => -3fe + ffff = fc01
-// register-biased, this is fc01
-// For single, the tiny threshold is -126 = -7e => -7e + ffff = ff81
-// If |x| < tiny threshold, set SAFE = 0
+// Case 3: 0.25 < |x| < 710.47586
+// Algorithm is based on the identity sinh(x) = ( exp(x) - exp(-x) ) / 2.
+// The algorithm for exp is described as below. There are a number of
+// economies from evaluating both exp(x) and exp(-x). Although we
+// are evaluating both quantities, only where the quantities diverge do we
+// duplicate the computations. The basic algorithm for exp(x) is described
+// below.
//
-// 2. SINH_BY_TBL
-// =============
-// SAFE: SAFE is always 1 for TBL;
-//
-// 3. SINH_BY_EXP
-// ==============
-// There is a danger of double-extended overflow if N-1 > 16382 = 0x3ffe
-// r34 has N-1; 16382 is in register biased form, 0x13ffd
-// There is danger of double overflow if N-1 > 0x3fe
-// in register biased form, 0x103fd
-// Analagously, there is danger of single overflow if N-1 > 0x7e
-// in register biased form, 0x1007d
-// SAFE: If there is danger of overflow set SAFE to 0
-//
-// 4. SINH_HUGE
-// ============
-// SAFE: SAFE is always 0 for HUGE
+// Take the input x. w is "how many log2/128 in x?"
+// w = x * 128/log2
+// n = int(w)
+// x = n log2/128 + r + delta
-#include "libm_support.h"
+// n = 128M + index_1 + 2^4 index_2
+// x = M log2 + (log2/128) index_1 + (log2/8) index_2 + r + delta
-//
-// Assembly macros
-//==============================================================
-sinh_FR_X = f44
-sinh_FR_X2 = f9
-sinh_FR_X4 = f10
-sinh_FR_SGNX = f40
-sinh_FR_all_ones = f45
-sinh_FR_tmp = f42
-
-sinh_FR_Inv_log2by64 = f9
-sinh_FR_log2by64_lo = f11
-sinh_FR_log2by64_hi = f10
-
-sinh_FR_A1 = f9
-sinh_FR_A2 = f10
-sinh_FR_A3 = f11
-
-sinh_FR_Rcub = f12
-sinh_FR_M_temp = f13
-sinh_FR_R_temp = f13
-sinh_FR_Rsq = f13
-sinh_FR_R = f14
-
-sinh_FR_M = f38
-
-sinh_FR_B1 = f15
-sinh_FR_B2 = f32
-sinh_FR_B3 = f33
-
-sinh_FR_peven_temp1 = f34
-sinh_FR_peven_temp2 = f35
-sinh_FR_peven = f36
-
-sinh_FR_podd_temp1 = f34
-sinh_FR_podd_temp2 = f35
-sinh_FR_podd = f37
-
-sinh_FR_poly_podd_temp1 = f11
-sinh_FR_poly_podd_temp2 = f13
-sinh_FR_poly_peven_temp1 = f11
-sinh_FR_poly_peven_temp2 = f13
+// exp(x) = 2^M 2^(index_1/128) 2^(index_2/8) exp(r) exp(delta)
+// Construct 2^M
+// Get 2^(index_1/128) from table_1;
+// Get 2^(index_2/8) from table_2;
+// Calculate exp(r) by 5th order polynomial
+// r = x - n (log2/128)_high
+// delta = - n (log2/128)_low
+// Calculate exp(delta) as 1 + delta
-sinh_FR_J_temp = f9
-sinh_FR_J = f10
-sinh_FR_Mmj = f39
-
-sinh_FR_N_temp1 = f11
-sinh_FR_N_temp2 = f12
-sinh_FR_N = f13
-
-sinh_FR_spos = f14
-sinh_FR_sneg = f15
-
-sinh_FR_Tjhi = f32
-sinh_FR_Tjlo = f33
-sinh_FR_Tmjhi = f34
-sinh_FR_Tmjlo = f35
-
-sinh_GR_mJ = r35
-sinh_GR_J = r36
-
-sinh_AD_mJ = r38
-sinh_AD_J = r39
-sinh_GR_all_ones = r40
-
-sinh_FR_S_hi = f9
-sinh_FR_S_hi_temp = f10
-sinh_FR_S_lo_temp1 = f11
-sinh_FR_S_lo_temp2 = f12
-sinh_FR_S_lo_temp3 = f13
-
-sinh_FR_S_lo = f38
-sinh_FR_C_hi = f39
+// Special values
+//==============================================================
+// sinh(+0) = +0
+// sinh(-0) = -0
-sinh_FR_C_hi_temp1 = f10
-sinh_FR_Y_hi = f11
-sinh_FR_Y_lo_temp = f12
-sinh_FR_Y_lo = f13
-sinh_FR_SINH = f9
+// sinh(+qnan) = +qnan
+// sinh(-qnan) = -qnan
+// sinh(+snan) = +qnan
+// sinh(-snan) = -qnan
-sinh_FR_P1 = f14
-sinh_FR_P2 = f15
-sinh_FR_P3 = f32
-sinh_FR_P4 = f33
-sinh_FR_P5 = f34
-sinh_FR_P6 = f35
+// sinh(-inf) = -inf
+// sinh(+inf) = +inf
-sinh_FR_TINY_THRESH = f9
+// Overflow and Underflow
+//=======================
+// sinh(x) = largest double normal when
+// |x| = 710.47586 = 0x408633ce8fb9f87d
+//
+// Underflow is handled as described in case 1 above
-sinh_FR_SINH_temp = f10
-sinh_FR_SCALE = f11
+// Registers used
+//==============================================================
+// Floating Point registers used:
+// f8, input, output
+// f6 -> f15, f32 -> f61
-sinh_FR_signed_hi_lo = f10
+// General registers used:
+// r14 -> r40
+// Predicate registers used:
+// p6 -> p15
-GR_SAVE_PFS = r41
-GR_SAVE_B0 = r42
-GR_SAVE_GP = r43
+// Assembly macros
+//==============================================================
-GR_Parameter_X = r44
-GR_Parameter_Y = r45
-GR_Parameter_RESULT = r46
+rRshf = r14
+rN_neg = r14
+rAD_TB1 = r15
+rAD_TB2 = r16
+rAD_P = r17
+rN = r18
+rIndex_1 = r19
+rIndex_2_16 = r20
+rM = r21
+rBiased_M = r21
+rSig_inv_ln2 = r22
+rIndex_1_neg = r22
+rExp_bias = r23
+rExp_bias_minus_1 = r23
+rExp_mask = r24
+rTmp = r24
+rGt_ln = r24
+rIndex_2_16_neg = r24
+rM_neg = r25
+rBiased_M_neg = r25
+rRshf_2to56 = r26
+rAD_T1_neg = r26
+rExp_2tom56 = r28
+rAD_T2_neg = r28
+rAD_T1 = r29
+rAD_T2 = r30
+rSignexp_x = r31
+rExp_x = r31
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+
+FR_X = f10
+FR_Y = f1
+FR_RESULT = f8
+
+fRSHF_2TO56 = f6
+fINV_LN2_2TO63 = f7
+fW_2TO56_RSH = f9
+f2TOM56 = f11
+fP5 = f12
+fP4 = f13
+fP3 = f14
+fP2 = f15
+
+fLn2_by_128_hi = f33
+fLn2_by_128_lo = f34
+
+fRSHF = f35
+fNfloat = f36
+fNormX = f37
+fR = f38
+fF = f39
+
+fRsq = f40
+f2M = f41
+fS1 = f42
+fT1 = f42
+fS2 = f43
+fT2 = f43
+fS = f43
+fWre_urm_f8 = f44
+fAbsX = f44
+
+fMIN_DBL_OFLOW_ARG = f45
+fMAX_DBL_NORM_ARG = f46
+fXsq = f47
+fX4 = f48
+fGt_pln = f49
+fTmp = f49
+
+fP54 = f50
+fP5432 = f50
+fP32 = f51
+fP = f52
+fP54_neg = f53
+fP5432_neg = f53
+fP32_neg = f54
+fP_neg = f55
+fF_neg = f56
+
+f2M_neg = f57
+fS1_neg = f58
+fT1_neg = f58
+fS2_neg = f59
+fT2_neg = f59
+fS_neg = f59
+fExp = f60
+fExp_neg = f61
+
+fA6 = f50
+fA65 = f50
+fA6543 = f50
+fA654321 = f50
+fA5 = f51
+fA4 = f52
+fA43 = f52
+fA3 = f53
+fA2 = f54
+fA21 = f54
+fA1 = f55
+fX3 = f56
// Data tables
//==============================================================
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
-
+RODATA
.align 16
-double_sinh_arg_reduction:
-ASM_TYPE_DIRECTIVE(double_sinh_arg_reduction,@object)
- data8 0xB8AA3B295C17F0BC, 0x00004005
- data8 0xB17217F7D1000000, 0x00003FF8
- data8 0xCF79ABC9E3B39804, 0x00003FD0
-ASM_SIZE_DIRECTIVE(double_sinh_arg_reduction)
-
-double_sinh_p_table:
-ASM_TYPE_DIRECTIVE(double_sinh_p_table,@object)
- data8 0xAAAAAAAAAAAAAAAB, 0x00003FFC
- data8 0x8888888888888412, 0x00003FF8
- data8 0xD00D00D00D4D39F2, 0x00003FF2
- data8 0xB8EF1D28926D8891, 0x00003FEC
- data8 0xD732377688025BE9, 0x00003FE5
- data8 0xB08AF9AE78C1239F, 0x00003FDE
-ASM_SIZE_DIRECTIVE(double_sinh_p_table)
-
-double_sinh_ab_table:
-ASM_TYPE_DIRECTIVE(double_sinh_ab_table,@object)
- data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC
- data8 0x88888888884ECDD5, 0x00003FF8
- data8 0xD00D0C6DCC26A86B, 0x00003FF2
- data8 0x8000000000000002, 0x00003FFE
- data8 0xAAAAAAAAAA402C77, 0x00003FFA
- data8 0xB60B6CC96BDB144D, 0x00003FF5
-ASM_SIZE_DIRECTIVE(double_sinh_ab_table)
-
-double_sinh_j_table:
-ASM_TYPE_DIRECTIVE(double_sinh_j_table,@object)
- data8 0xB504F333F9DE6484, 0x00003FFE, 0x1EB2FB13, 0x00000000
- data8 0xB6FD91E328D17791, 0x00003FFE, 0x1CE2CBE2, 0x00000000
- data8 0xB8FBAF4762FB9EE9, 0x00003FFE, 0x1DDC3CBC, 0x00000000
- data8 0xBAFF5AB2133E45FB, 0x00003FFE, 0x1EE9AA34, 0x00000000
- data8 0xBD08A39F580C36BF, 0x00003FFE, 0x9EAEFDC1, 0x00000000
- data8 0xBF1799B67A731083, 0x00003FFE, 0x9DBF517B, 0x00000000
- data8 0xC12C4CCA66709456, 0x00003FFE, 0x1EF88AFB, 0x00000000
- data8 0xC346CCDA24976407, 0x00003FFE, 0x1E03B216, 0x00000000
- data8 0xC5672A115506DADD, 0x00003FFE, 0x1E78AB43, 0x00000000
- data8 0xC78D74C8ABB9B15D, 0x00003FFE, 0x9E7B1747, 0x00000000
- data8 0xC9B9BD866E2F27A3, 0x00003FFE, 0x9EFE3C0E, 0x00000000
- data8 0xCBEC14FEF2727C5D, 0x00003FFE, 0x9D36F837, 0x00000000
- data8 0xCE248C151F8480E4, 0x00003FFE, 0x9DEE53E4, 0x00000000
- data8 0xD06333DAEF2B2595, 0x00003FFE, 0x9E24AE8E, 0x00000000
- data8 0xD2A81D91F12AE45A, 0x00003FFE, 0x1D912473, 0x00000000
- data8 0xD4F35AABCFEDFA1F, 0x00003FFE, 0x1EB243BE, 0x00000000
- data8 0xD744FCCAD69D6AF4, 0x00003FFE, 0x1E669A2F, 0x00000000
- data8 0xD99D15C278AFD7B6, 0x00003FFE, 0x9BBC610A, 0x00000000
- data8 0xDBFBB797DAF23755, 0x00003FFE, 0x1E761035, 0x00000000
- data8 0xDE60F4825E0E9124, 0x00003FFE, 0x9E0BE175, 0x00000000
- data8 0xE0CCDEEC2A94E111, 0x00003FFE, 0x1CCB12A1, 0x00000000
- data8 0xE33F8972BE8A5A51, 0x00003FFE, 0x1D1BFE90, 0x00000000
- data8 0xE5B906E77C8348A8, 0x00003FFE, 0x1DF2F47A, 0x00000000
- data8 0xE8396A503C4BDC68, 0x00003FFE, 0x1EF22F22, 0x00000000
- data8 0xEAC0C6E7DD24392F, 0x00003FFE, 0x9E3F4A29, 0x00000000
- data8 0xED4F301ED9942B84, 0x00003FFE, 0x1EC01A5B, 0x00000000
- data8 0xEFE4B99BDCDAF5CB, 0x00003FFE, 0x1E8CAC3A, 0x00000000
- data8 0xF281773C59FFB13A, 0x00003FFE, 0x9DBB3FAB, 0x00000000
- data8 0xF5257D152486CC2C, 0x00003FFE, 0x1EF73A19, 0x00000000
- data8 0xF7D0DF730AD13BB9, 0x00003FFE, 0x9BB795B5, 0x00000000
- data8 0xFA83B2DB722A033A, 0x00003FFE, 0x1EF84B76, 0x00000000
- data8 0xFD3E0C0CF486C175, 0x00003FFE, 0x9EF5818B, 0x00000000
- data8 0x8000000000000000, 0x00003FFF, 0x00000000, 0x00000000
- data8 0x8164D1F3BC030773, 0x00003FFF, 0x1F77CACA, 0x00000000
- data8 0x82CD8698AC2BA1D7, 0x00003FFF, 0x1EF8A91D, 0x00000000
- data8 0x843A28C3ACDE4046, 0x00003FFF, 0x1E57C976, 0x00000000
- data8 0x85AAC367CC487B15, 0x00003FFF, 0x9EE8DA92, 0x00000000
- data8 0x871F61969E8D1010, 0x00003FFF, 0x1EE85C9F, 0x00000000
- data8 0x88980E8092DA8527, 0x00003FFF, 0x1F3BF1AF, 0x00000000
- data8 0x8A14D575496EFD9A, 0x00003FFF, 0x1D80CA1E, 0x00000000
- data8 0x8B95C1E3EA8BD6E7, 0x00003FFF, 0x9D0373AF, 0x00000000
- data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF, 0x9F167097, 0x00000000
- data8 0x8EA4398B45CD53C0, 0x00003FFF, 0x1EB70051, 0x00000000
- data8 0x9031DC431466B1DC, 0x00003FFF, 0x1F6EB029, 0x00000000
- data8 0x91C3D373AB11C336, 0x00003FFF, 0x1DFD6D8E, 0x00000000
- data8 0x935A2B2F13E6E92C, 0x00003FFF, 0x9EB319B0, 0x00000000
- data8 0x94F4EFA8FEF70961, 0x00003FFF, 0x1EBA2BEB, 0x00000000
- data8 0x96942D3720185A00, 0x00003FFF, 0x1F11D537, 0x00000000
- data8 0x9837F0518DB8A96F, 0x00003FFF, 0x1F0D5A46, 0x00000000
- data8 0x99E0459320B7FA65, 0x00003FFF, 0x9E5E7BCA, 0x00000000
- data8 0x9B8D39B9D54E5539, 0x00003FFF, 0x9F3AAFD1, 0x00000000
- data8 0x9D3ED9A72CFFB751, 0x00003FFF, 0x9E86DACC, 0x00000000
- data8 0x9EF5326091A111AE, 0x00003FFF, 0x9F3EDDC2, 0x00000000
- data8 0xA0B0510FB9714FC2, 0x00003FFF, 0x1E496E3D, 0x00000000
- data8 0xA27043030C496819, 0x00003FFF, 0x9F490BF6, 0x00000000
- data8 0xA43515AE09E6809E, 0x00003FFF, 0x1DD1DB48, 0x00000000
- data8 0xA5FED6A9B15138EA, 0x00003FFF, 0x1E65EBFB, 0x00000000
- data8 0xA7CD93B4E965356A, 0x00003FFF, 0x9F427496, 0x00000000
- data8 0xA9A15AB4EA7C0EF8, 0x00003FFF, 0x1F283C4A, 0x00000000
- data8 0xAB7A39B5A93ED337, 0x00003FFF, 0x1F4B0047, 0x00000000
- data8 0xAD583EEA42A14AC6, 0x00003FFF, 0x1F130152, 0x00000000
- data8 0xAF3B78AD690A4375, 0x00003FFF, 0x9E8367C0, 0x00000000
- data8 0xB123F581D2AC2590, 0x00003FFF, 0x9F705F90, 0x00000000
- data8 0xB311C412A9112489, 0x00003FFF, 0x1EFB3C53, 0x00000000
- data8 0xB504F333F9DE6484, 0x00003FFF, 0x1F32FB13, 0x00000000
-ASM_SIZE_DIRECTIVE(double_sinh_j_table)
-
-.align 32
-.global sinh#
-.section .text
-.proc sinh#
-.align 32
-
-sinh:
-#ifdef _LIBC
-.global __ieee754_sinh
-.type __ieee754_sinh,@function
-__ieee754_sinh:
-#endif
-
-// X infinity or NAN?
-// Take invalid fault if enabled
+// ************* DO NOT CHANGE ORDER OF THESE TABLES ********************
+// double-extended 1/ln(2)
+// 3fff b8aa 3b29 5c17 f0bb be87fed0691d3e88
+// 3fff b8aa 3b29 5c17 f0bc
+// For speed the significand will be loaded directly with a movl and setf.sig
+// and the exponent will be bias+63 instead of bias+0. Thus subsequent
+// computations need to scale appropriately.
+// The constant 128/ln(2) is needed for the computation of w. This is also
+// obtained by scaling the computations.
+//
+// Two shifting constants are loaded directly with movl and setf.d.
+// 1. fRSHF_2TO56 = 1.1000..00 * 2^(63-7)
+// This constant is added to x*1/ln2 to shift the integer part of
+// x*128/ln2 into the rightmost bits of the significand.
+// The result of this fma is fW_2TO56_RSH.
+// 2. fRSHF = 1.1000..00 * 2^(63)
+// This constant is subtracted from fW_2TO56_RSH * 2^(-56) to give
+// the integer part of w, n, as a floating-point number.
+// The result of this fms is fNfloat.
+
+
+LOCAL_OBJECT_START(exp_table_1)
+data8 0x408633ce8fb9f87e // smallest dbl overflow arg
+data8 0x408633ce8fb9f87d // largest dbl arg to give normal dbl result
+data8 0xb17217f7d1cf79ab , 0x00003ff7 // ln2/128 hi
+data8 0xc9e3b39803f2f6af , 0x00003fb7 // ln2/128 lo
+//
+// Table 1 is 2^(index_1/128) where
+// index_1 goes from 0 to 15
+//
+data8 0x8000000000000000 , 0x00003FFF
+data8 0x80B1ED4FD999AB6C , 0x00003FFF
+data8 0x8164D1F3BC030773 , 0x00003FFF
+data8 0x8218AF4373FC25EC , 0x00003FFF
+data8 0x82CD8698AC2BA1D7 , 0x00003FFF
+data8 0x8383594EEFB6EE37 , 0x00003FFF
+data8 0x843A28C3ACDE4046 , 0x00003FFF
+data8 0x84F1F656379C1A29 , 0x00003FFF
+data8 0x85AAC367CC487B15 , 0x00003FFF
+data8 0x8664915B923FBA04 , 0x00003FFF
+data8 0x871F61969E8D1010 , 0x00003FFF
+data8 0x87DB357FF698D792 , 0x00003FFF
+data8 0x88980E8092DA8527 , 0x00003FFF
+data8 0x8955EE03618E5FDD , 0x00003FFF
+data8 0x8A14D575496EFD9A , 0x00003FFF
+data8 0x8AD4C6452C728924 , 0x00003FFF
+LOCAL_OBJECT_END(exp_table_1)
+
+// Table 2 is 2^(index_1/8) where
+// index_2 goes from 0 to 7
+LOCAL_OBJECT_START(exp_table_2)
+data8 0x8000000000000000 , 0x00003FFF
+data8 0x8B95C1E3EA8BD6E7 , 0x00003FFF
+data8 0x9837F0518DB8A96F , 0x00003FFF
+data8 0xA5FED6A9B15138EA , 0x00003FFF
+data8 0xB504F333F9DE6484 , 0x00003FFF
+data8 0xC5672A115506DADD , 0x00003FFF
+data8 0xD744FCCAD69D6AF4 , 0x00003FFF
+data8 0xEAC0C6E7DD24392F , 0x00003FFF
+LOCAL_OBJECT_END(exp_table_2)
+
+
+LOCAL_OBJECT_START(exp_p_table)
+data8 0x3f8111116da21757 //P5
+data8 0x3fa55555d787761c //P4
+data8 0x3fc5555555555414 //P3
+data8 0x3fdffffffffffd6a //P2
+LOCAL_OBJECT_END(exp_p_table)
+
+LOCAL_OBJECT_START(sinh_p_table)
+data8 0xB08AF9AE78C1239F, 0x00003FDE // A6
+data8 0xB8EF1D28926D8891, 0x00003FEC // A4
+data8 0x8888888888888412, 0x00003FF8 // A2
+data8 0xD732377688025BE9, 0x00003FE5 // A5
+data8 0xD00D00D00D4D39F2, 0x00003FF2 // A3
+data8 0xAAAAAAAAAAAAAAAB, 0x00003FFC // A1
+LOCAL_OBJECT_END(sinh_p_table)
-{ .mfi
- alloc r32 = ar.pfs,0,12,4,0
-(p0) fclass.m.unc p6,p0 = f8, 0xe3 //@qnan | @snan | @inf
- mov sinh_GR_all_ones = -1
-}
-;;
+.section .text
+GLOBAL_IEEE754_ENTRY(sinh)
-{ .mfb
- nop.m 999
-(p6) fma.d.s0 f8 = f8,f1,f8
-(p6) br.ret.spnt b0 ;;
+{ .mlx
+ getf.exp rSignexp_x = f8 // Must recompute if x unorm
+ movl rSig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2
}
-
-// Put 0.25 in f9; p6 true if x < 0.25
-// Make constant that will generate inexact when squared
{ .mlx
- setf.sig sinh_FR_all_ones = sinh_GR_all_ones
-(p0) movl r32 = 0x000000000000fffd ;;
+ addl rAD_TB1 = @ltoff(exp_table_1), gp
+ movl rRshf_2to56 = 0x4768000000000000 // 1.10000 2^(63+56)
}
+;;
{ .mfi
-(p0) setf.exp f9 = r32
-(p0) fclass.m.unc p7,p0 = f8, 0x07 //@zero
- nop.i 999 ;;
+ ld8 rAD_TB1 = [rAD_TB1]
+ fclass.m p6,p0 = f8,0x0b // Test for x=unorm
+ mov rExp_mask = 0x1ffff
}
-
-{ .mfb
- nop.m 999
-(p0) fmerge.s sinh_FR_X = f0,f8
-(p7) br.ret.spnt b0 ;;
-}
-
-// Identify denormal operands.
{ .mfi
- nop.m 999
- fclass.m.unc p10,p0 = f8, 0x09 // + denorm
- nop.i 999
-};;
-{ .mfi
- nop.m 999
- fclass.m.unc p11,p0 = f8, 0x0a // - denorm
- nop.i 999
+ mov rExp_bias = 0xffff
+ fnorm.s1 fNormX = f8
+ mov rExp_2tom56 = 0xffff-56
}
+;;
+
+// Form two constants we need
+// 1/ln2 * 2^63 to compute w = x * 1/ln2 * 128
+// 1.1000..000 * 2^(63+63-7) to right shift int(w) into the significand
{ .mfi
- nop.m 999
-(p0) fmerge.s sinh_FR_SGNX = f8,f1
- nop.i 999 ;;
+ setf.sig fINV_LN2_2TO63 = rSig_inv_ln2 // form 1/ln2 * 2^63
+ fclass.m p8,p0 = f8,0x07 // Test for x=0
+ nop.i 999
}
+{ .mlx
+ setf.d fRSHF_2TO56 = rRshf_2to56 // Form const 1.100 * 2^(63+56)
+ movl rRshf = 0x43e8000000000000 // 1.10000 2^63 for right shift
+}
+;;
{ .mfi
- nop.m 999
-(p0) fcmp.lt.unc.s1 p0,p7 = sinh_FR_X,f9
- nop.i 999 ;;
+ ldfpd fMIN_DBL_OFLOW_ARG, fMAX_DBL_NORM_ARG = [rAD_TB1],16
+ fclass.m p10,p0 = f8,0x1e3 // Test for x=inf, nan, NaT
+ nop.i 0
}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p7) br.cond.sptk L(SINH_BY_TBL) ;;
-}
-
-
-L(SINH_BY_POLY):
-
-// POLY cannot overflow so there is no need to call __libm_error_support
-// Set tiny_SAFE (p7) to 1(0) if answer is not tiny
-// Currently we do not use tiny_SAFE. So the setting of tiny_SAFE is
-// commented out.
-//(p0) movl r32 = 0x000000000000fc01
-//(p0) setf.exp f10 = r32
-//(p0) fcmp.lt.unc.s1 p6,p7 = f8,f10
-// Here is essentially the algorithm for SINH_BY_POLY. Care is take for the order
-// of multiplication; and P_1 is not exactly 1/3!, P_2 is not exactly 1/5!, etc.
-// Note that ax = |x|
-// sinh(x) = sign * (series(e^x) - series(e^-x))/2
-// = sign * (ax + ax^3/3! + ax^5/5! + ax^7/7! + ax^9/9! + ax^11/11! + ax^13/13!)
-// = sign * (ax + ax * ( ax^2 * (1/3! + ax^4 * (1/7! + ax^4*1/11!)) )
-// + ax * ( ax^4 * (1/5! + ax^4 * (1/9! + ax^4*1/13!)) ) )
-// = sign * (ax + ax*p_odd + (ax*p_even))
-// = sign * (ax + Y_lo)
-// sinh(x) = sign * (Y_hi + Y_lo)
-// Get the values of P_x from the table
{ .mfb
-(p0) addl r34 = @ltoff(double_sinh_p_table), gp
-(p10) fma.d.s0 f8 = f8,f8,f8
-(p10) br.ret.spnt b0
+ setf.exp f2TOM56 = rExp_2tom56 // form 2^-56 for scaling Nfloat
+ nop.f 0
+(p6) br.cond.spnt SINH_UNORM // Branch if x=unorm
}
;;
+SINH_COMMON:
+{ .mfi
+ ldfe fLn2_by_128_hi = [rAD_TB1],16
+ nop.f 0
+ nop.i 0
+}
{ .mfb
- ld8 r34 = [r34]
-(p11) fnma.d.s0 f8 = f8,f8,f8
-(p11) br.ret.spnt b0
+ setf.d fRSHF = rRshf // Form right shift const 1.100 * 2^63
+ nop.f 0
+(p8) br.ret.spnt b0 // Exit for x=0, result=x
}
;;
-// Calculate sinh_FR_X2 = ax*ax and sinh_FR_X4 = ax*ax*ax*ax
-{ .mmf
- nop.m 999
-(p0) ldfe sinh_FR_P1 = [r34],16
-(p0) fma.s1 sinh_FR_X2 = sinh_FR_X, sinh_FR_X, f0 ;;
-}
-
-{ .mmi
-(p0) ldfe sinh_FR_P2 = [r34],16 ;;
-(p0) ldfe sinh_FR_P3 = [r34],16
- nop.i 999 ;;
+{ .mfi
+ ldfe fLn2_by_128_lo = [rAD_TB1],16
+ nop.f 0
+ nop.i 0
}
-
-{ .mmi
-(p0) ldfe sinh_FR_P4 = [r34],16 ;;
-(p0) ldfe sinh_FR_P5 = [r34],16
- nop.i 999 ;;
+{ .mfb
+ and rExp_x = rExp_mask, rSignexp_x // Biased exponent of x
+(p10) fma.d.s0 f8 = f8,f1,f0 // Result if x=inf, nan, NaT
+(p10) br.ret.spnt b0 // quick exit for x=inf, nan, NaT
}
+;;
+// After that last load rAD_TB1 points to the beginning of table 1
{ .mfi
-(p0) ldfe sinh_FR_P6 = [r34],16
-(p0) fma.s1 sinh_FR_X4 = sinh_FR_X2, sinh_FR_X2, f0
- nop.i 999 ;;
+ nop.m 0
+ fcmp.eq.s0 p6,p0 = f8, f0 // Dummy to set D
+ sub rExp_x = rExp_x, rExp_bias // True exponent of x
}
+;;
-// Calculate sinh_FR_podd = p_odd and sinh_FR_peven = p_even
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_poly_podd_temp1 = sinh_FR_X4, sinh_FR_P5, sinh_FR_P3
- nop.i 999 ;;
+ nop.m 0
+ fmerge.s fAbsX = f0, fNormX // Form |x|
+ nop.i 0
}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_poly_podd_temp2 = sinh_FR_X4, sinh_FR_poly_podd_temp1, sinh_FR_P1
- nop.i 999
+{ .mfb
+ cmp.gt p7, p0 = -2, rExp_x // Test |x| < 2^(-2)
+ fma.s1 fXsq = fNormX, fNormX, f0 // x*x for small path
+(p7) br.cond.spnt SINH_SMALL // Branch if 0 < |x| < 2^-2
}
+;;
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_poly_peven_temp1 = sinh_FR_X4, sinh_FR_P6, sinh_FR_P4
- nop.i 999 ;;
-}
+// W = X * Inv_log2_by_128
+// By adding 1.10...0*2^63 we shift and get round_int(W) in significand.
+// We actually add 1.10...0*2^56 to X * Inv_log2 to do the same thing.
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_podd = sinh_FR_X2, sinh_FR_poly_podd_temp2, f0
- nop.i 999
+ add rAD_P = 0x180, rAD_TB1
+ fma.s1 fW_2TO56_RSH = fNormX, fINV_LN2_2TO63, fRSHF_2TO56
+ add rAD_TB2 = 0x100, rAD_TB1
}
+;;
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_poly_peven_temp2 = sinh_FR_X4, sinh_FR_poly_peven_temp1, sinh_FR_P2
- nop.i 999 ;;
-}
+// Divide arguments into the following categories:
+// Certain Safe - 0.25 <= |x| <= MAX_DBL_NORM_ARG
+// Possible Overflow p14 - MAX_DBL_NORM_ARG < |x| < MIN_DBL_OFLOW_ARG
+// Certain Overflow p15 - MIN_DBL_OFLOW_ARG <= |x| < +inf
+//
+// If the input is really a double arg, then there will never be
+// "Possible Overflow" arguments.
+//
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_peven = sinh_FR_X4, sinh_FR_poly_peven_temp2, f0
- nop.i 999 ;;
+ ldfpd fP5, fP4 = [rAD_P] ,16
+ fcmp.ge.s1 p15,p14 = fAbsX,fMIN_DBL_OFLOW_ARG
+ nop.i 0
}
+;;
-// Calculate sinh_FR_Y_lo = ax*p_odd + (ax*p_even)
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_Y_lo_temp = sinh_FR_X, sinh_FR_peven, f0
- nop.i 999 ;;
-}
+// Nfloat = round_int(W)
+// The signficand of fW_2TO56_RSH contains the rounded integer part of W,
+// as a twos complement number in the lower bits (that is, it may be negative).
+// That twos complement number (called N) is put into rN.
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_Y_lo = sinh_FR_X, sinh_FR_podd, sinh_FR_Y_lo_temp
- nop.i 999 ;;
-}
+// Since fW_2TO56_RSH is scaled by 2^56, it must be multiplied by 2^-56
+// before the shift constant 1.10000 * 2^63 is subtracted to yield fNfloat.
+// Thus, fNfloat contains the floating point version of N
-// Calculate sinh_FR_SINH = Y_hi + Y_lo. Note that ax = Y_hi
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_SINH = sinh_FR_X, f1, sinh_FR_Y_lo
- nop.i 999 ;;
+ ldfpd fP3, fP2 = [rAD_P]
+(p14) fcmp.gt.unc.s1 p14,p0 = fAbsX,fMAX_DBL_NORM_ARG
+ nop.i 0
}
-// Dummy multiply to generate inexact
-{ .mfi
- nop.m 999
-(p0) fmpy.s0 sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
- nop.i 999
-}
-
-// Calculate f8 = sign * (Y_hi + Y_lo)
-// Go to return
{ .mfb
- nop.m 999
-(p0) fma.d.s0 f8 = sinh_FR_SGNX,sinh_FR_SINH,f0
-(p0) br.ret.sptk b0 ;;
+ nop.m 0
+ fms.s1 fNfloat = fW_2TO56_RSH, f2TOM56, fRSHF
+(p15) br.cond.spnt SINH_CERTAIN_OVERFLOW
}
+;;
-
-L(SINH_BY_TBL):
-
-// Now that we are at TBL; so far all we know is that |x| >= 0.25.
-// The first two steps are the same for TBL and EXP, but if we are HUGE
-// we want to leave now.
-// Double-extended:
-// Go to HUGE if |x| >= 2^14, 1000d (register-biased) is e = 14 (true)
-// Double
-// Go to HUGE if |x| >= 2^10, 10009 (register-biased) is e = 10 (true)
-// Single
-// Go to HUGE if |x| >= 2^7, 10006 (register-biased) is e = 7 (true)
-
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x0000000000010009 ;;
+{ .mfi
+ getf.sig rN = fW_2TO56_RSH
+ nop.f 0
+ mov rExp_bias_minus_1 = 0xfffe
}
+;;
+// rIndex_1 has index_1
+// rIndex_2_16 has index_2 * 16
+// rBiased_M has M
+
+// rM has true M
+// r = x - Nfloat * ln2_by_128_hi
+// f = 1 - Nfloat * ln2_by_128_lo
{ .mfi
-(p0) setf.exp f9 = r32
- nop.f 999
- nop.i 999 ;;
+ and rIndex_1 = 0x0f, rN
+ fnma.s1 fR = fNfloat, fLn2_by_128_hi, fNormX
+ shr rM = rN, 0x7
}
-
{ .mfi
- nop.m 999
-(p0) fcmp.ge.unc.s1 p6,p7 = sinh_FR_X,f9
- nop.i 999 ;;
+ and rIndex_2_16 = 0x70, rN
+ fnma.s1 fF = fNfloat, fLn2_by_128_lo, f1
+ sub rN_neg = r0, rN
}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p6) br.cond.spnt L(SINH_HUGE) ;;
-}
-
-// r32 = 1
-// r34 = N-1
-// r35 = N
-// r36 = j
-// r37 = N+1
-
-// TBL can never overflow
-// sinh(x) = sinh(B+R)
-// = sinh(B)cosh(R) + cosh(B)sinh(R)
-//
-// ax = |x| = M*log2/64 + R
-// B = M*log2/64
-// M = 64*N + j
-// We will calcualte M and get N as (M-j)/64
-// The division is a shift.
-// exp(B) = exp(N*log2 + j*log2/64)
-// = 2^N * 2^(j*log2/64)
-// sinh(B) = 1/2(e^B -e^-B)
-// = 1/2(2^N * 2^(j*log2/64) - 2^-N * 2^(-j*log2/64))
-// sinh(B) = (2^(N-1) * 2^(j*log2/64) - 2^(-N-1) * 2^(-j*log2/64))
-// cosh(B) = (2^(N-1) * 2^(j*log2/64) + 2^(-N-1) * 2^(-j*log2/64))
-// 2^(j*log2/64) is stored as Tjhi + Tjlo , j= -32,....,32
-// Tjhi is double-extended (80-bit) and Tjlo is single(32-bit)
-// R = ax - M*log2/64
-// R = ax - M*log2_by_64_hi - M*log2_by_64_lo
-// exp(R) = 1 + R +R^2(1/2! + R(1/3! + R(1/4! + ... + R(1/n!)...)
-// = 1 + p_odd + p_even
-// where the p_even uses the A coefficients and the p_even uses the B coefficients
-// So sinh(R) = 1 + p_odd + p_even -(1 -p_odd -p_even)/2 = p_odd
-// cosh(R) = 1 + p_even
-// sinh(B) = S_hi + S_lo
-// cosh(B) = C_hi
-// sinh(x) = sinh(B)cosh(R) + cosh(B)sinh(R)
-// ******************************************************
-// STEP 1 (TBL and EXP)
-// ******************************************************
-// Get the following constants.
-// f9 = Inv_log2by64
-// f10 = log2by64_hi
-// f11 = log2by64_lo
+;;
{ .mmi
-(p0) adds r32 = 0x1,r0
-(p0) addl r34 = @ltoff(double_sinh_arg_reduction), gp
- nop.i 999
+ and rIndex_1_neg = 0x0f, rN_neg
+ add rBiased_M = rExp_bias_minus_1, rM
+ shr rM_neg = rN_neg, 0x7
}
-;;
-
{ .mmi
- ld8 r34 = [r34]
- nop.m 999
- nop.i 999
+ and rIndex_2_16_neg = 0x70, rN_neg
+ add rAD_T2 = rAD_TB2, rIndex_2_16
+ shladd rAD_T1 = rIndex_1, 4, rAD_TB1
}
;;
-
-// We want 2^(N-1) and 2^(-N-1). So bias N-1 and -N-1 and
-// put them in an exponent.
-// sinh_FR_spos = 2^(N-1) and sinh_FR_sneg = 2^(-N-1)
-// r39 = 0xffff + (N-1) = 0xffff +N -1
-// r40 = 0xffff - (N +1) = 0xffff -N -1
-
-{ .mlx
- nop.m 999
-(p0) movl r38 = 0x000000000000fffe ;;
-}
+// rAD_T1 has address of T1
+// rAD_T2 has address if T2
{ .mmi
-(p0) ldfe sinh_FR_Inv_log2by64 = [r34],16 ;;
-(p0) ldfe sinh_FR_log2by64_hi = [r34],16
- nop.i 999 ;;
-}
-
-{ .mbb
-(p0) ldfe sinh_FR_log2by64_lo = [r34],16
- nop.b 999
- nop.b 999 ;;
+ setf.exp f2M = rBiased_M
+ ldfe fT2 = [rAD_T2]
+ nop.i 0
}
-
-// Get the A coefficients
-// f9 = A_1
-// f10 = A_2
-// f11 = A_3
-
{ .mmi
- nop.m 999
-(p0) addl r34 = @ltoff(double_sinh_ab_table), gp
- nop.i 999
+ add rBiased_M_neg = rExp_bias_minus_1, rM_neg
+ add rAD_T2_neg = rAD_TB2, rIndex_2_16_neg
+ shladd rAD_T1_neg = rIndex_1_neg, 4, rAD_TB1
}
;;
+// Create Scale = 2^M
+// Load T1 and T2
{ .mmi
- ld8 r34 = [r34]
- nop.m 999
- nop.i 999
+ ldfe fT1 = [rAD_T1]
+ nop.m 0
+ nop.i 0
}
-;;
-
-
-// Calculate M and keep it as integer and floating point.
-// f38 = M = round-to-integer(x*Inv_log2by64)
-// sinh_FR_M = M = truncate(ax/(log2/64))
-// Put the significand of M in r35
-// and the floating point representation of M in sinh_FR_M
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_M = sinh_FR_X, sinh_FR_Inv_log2by64, f0
- nop.i 999
-}
-
-{ .mfi
-(p0) ldfe sinh_FR_A1 = [r34],16
- nop.f 999
- nop.i 999 ;;
+{ .mmf
+ setf.exp f2M_neg = rBiased_M_neg
+ ldfe fT2_neg = [rAD_T2_neg]
+ fma.s1 fF_neg = fNfloat, fLn2_by_128_lo, f1
}
+;;
{ .mfi
- nop.m 999
-(p0) fcvt.fx.s1 sinh_FR_M_temp = sinh_FR_M
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fRsq = fR, fR, f0
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p0) fnorm.s1 sinh_FR_M = sinh_FR_M_temp
- nop.i 999 ;;
+ ldfe fT1_neg = [rAD_T1_neg]
+ fma.s1 fP54 = fR, fP5, fP4
+ nop.i 0
}
+;;
{ .mfi
-(p0) getf.sig r35 = sinh_FR_M_temp
- nop.f 999
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fP32 = fR, fP3, fP2
+ nop.i 0
}
-
-// M is still in r35. Calculate j. j is the signed extension of the six lsb of M. It
-// has a range of -32 thru 31.
-// r35 = M
-// r36 = j
-
-{ .mii
- nop.m 999
- nop.i 999 ;;
-(p0) and r36 = 0x3f, r35 ;;
-}
-
-// Calculate R
-// f13 = f44 - f12*f10 = ax - M*log2by64_hi
-// f14 = f13 - f8*f11 = R = (ax - M*log2by64_hi) - M*log2by64_lo
-
{ .mfi
- nop.m 999
-(p0) fnma.s1 sinh_FR_R_temp = sinh_FR_M, sinh_FR_log2by64_hi, sinh_FR_X
- nop.i 999
+ nop.m 0
+ fnma.s1 fP54_neg = fR, fP5, fP4
+ nop.i 0
}
+;;
{ .mfi
-(p0) ldfe sinh_FR_A2 = [r34],16
- nop.f 999
- nop.i 999 ;;
+ nop.m 0
+ fnma.s1 fP32_neg = fR, fP3, fP2
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fnma.s1 sinh_FR_R = sinh_FR_M, sinh_FR_log2by64_lo, sinh_FR_R_temp
- nop.i 999
-}
-
-// Get the B coefficients
-// f15 = B_1
-// f32 = B_2
-// f33 = B_3
-
-{ .mmi
-(p0) ldfe sinh_FR_A3 = [r34],16 ;;
-(p0) ldfe sinh_FR_B1 = [r34],16
- nop.i 999 ;;
-}
-
-{ .mmi
-(p0) ldfe sinh_FR_B2 = [r34],16 ;;
-(p0) ldfe sinh_FR_B3 = [r34],16
- nop.i 999 ;;
-}
-
-{ .mii
- nop.m 999
-(p0) shl r34 = r36, 0x2 ;;
-(p0) sxt1 r37 = r34 ;;
+ nop.m 0
+ fma.s1 fP5432 = fRsq, fP54, fP32
+ nop.i 0
}
-
-// ******************************************************
-// STEP 2 (TBL and EXP)
-// ******************************************************
-// Calculate Rsquared and Rcubed in preparation for p_even and p_odd
-// f12 = R*R*R
-// f13 = R*R
-// f14 = R <== from above
-
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_Rsq = sinh_FR_R, sinh_FR_R, f0
-(p0) shr r36 = r37, 0x2 ;;
-}
-
-// r34 = M-j = r35 - r36
-// r35 = N = (M-j)/64
-
-{ .mii
-(p0) sub r34 = r35, r36
- nop.i 999 ;;
-(p0) shr r35 = r34, 0x6 ;;
-}
-
-{ .mii
-(p0) sub r40 = r38, r35
-(p0) adds r37 = 0x1, r35
-(p0) add r39 = r38, r35 ;;
-}
-
-// Get the address of the J table, add the offset,
-// addresses are sinh_AD_mJ and sinh_AD_J, get the T value
-// f32 = T(j)_hi
-// f33 = T(j)_lo
-// f34 = T(-j)_hi
-// f35 = T(-j)_lo
-
-{ .mmi
-(p0) sub r34 = r35, r32
-(p0) addl r37 = @ltoff(double_sinh_j_table), gp
- nop.i 999
+ nop.m 0
+ fma.s1 fS2 = fF,fT2,f0
+ nop.i 0
}
;;
-{ .mmi
- ld8 r37 = [r37]
- nop.m 999
- nop.i 999
-}
-;;
-
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_Rcub = sinh_FR_Rsq, sinh_FR_R, f0
- nop.i 999
-}
-
-// ******************************************************
-// STEP 3 Now decide if we need to branch to EXP
-// ******************************************************
-// Put 32 in f9; p6 true if x < 32
-// Go to EXP if |x| >= 32
-
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x0000000000010004 ;;
-}
-
-// Calculate p_even
-// f34 = B_2 + Rsq *B_3
-// f35 = B_1 + Rsq*f34 = B_1 + Rsq * (B_2 + Rsq *B_3)
-// f36 = p_even = Rsq * f35 = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
-
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_peven_temp1 = sinh_FR_Rsq, sinh_FR_B3, sinh_FR_B2
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fS1 = f2M,fT1,f0
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_peven_temp2 = sinh_FR_Rsq, sinh_FR_peven_temp1, sinh_FR_B1
- nop.i 999
+ nop.m 0
+ fma.s1 fP5432_neg = fRsq, fP54_neg, fP32_neg
+ nop.i 0
}
-
-// Calculate p_odd
-// f34 = A_2 + Rsq *A_3
-// f35 = A_1 + Rsq * (A_2 + Rsq *A_3)
-// f37 = podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_podd_temp1 = sinh_FR_Rsq, sinh_FR_A3, sinh_FR_A2
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fS1_neg = f2M_neg,fT1_neg,f0
+ nop.i 0
}
-
{ .mfi
-(p0) setf.exp sinh_FR_N_temp1 = r39
- nop.f 999
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fS2_neg = fF_neg,fT2_neg,f0
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_peven = sinh_FR_Rsq, sinh_FR_peven_temp2, f0
- nop.i 999
+ nop.m 0
+ fma.s1 fP = fRsq, fP5432, fR
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_podd_temp2 = sinh_FR_Rsq, sinh_FR_podd_temp1, sinh_FR_A1
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fS = fS1,fS2,f0
+ nop.i 0
}
+;;
{ .mfi
-(p0) setf.exp f9 = r32
- nop.f 999
- nop.i 999 ;;
+ nop.m 0
+ fms.s1 fP_neg = fRsq, fP5432_neg, fR
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_podd = sinh_FR_podd_temp2, sinh_FR_Rcub, sinh_FR_R
- nop.i 999
+ nop.m 0
+ fma.s1 fS_neg = fS1_neg,fS2_neg,f0
+ nop.i 0
}
+;;
-// sinh_GR_mj contains the table offset for -j
-// sinh_GR_j contains the table offset for +j
-// p6 is true when j <= 0
-
-{ .mlx
-(p0) setf.exp sinh_FR_N_temp2 = r40
-(p0) movl r40 = 0x0000000000000020 ;;
+{ .mfb
+ nop.m 0
+ fmpy.s0 fTmp = fLn2_by_128_lo, fLn2_by_128_lo // Force inexact
+(p14) br.cond.spnt SINH_POSSIBLE_OVERFLOW
}
+;;
{ .mfi
-(p0) sub sinh_GR_mJ = r40, r36
-(p0) fmerge.se sinh_FR_spos = sinh_FR_N_temp1, f1
-(p0) adds sinh_GR_J = 0x20, r36 ;;
+ nop.m 0
+ fma.s1 fExp = fS, fP, fS
+ nop.i 0
}
-
-{ .mii
- nop.m 999
-(p0) shl sinh_GR_mJ = sinh_GR_mJ, 5 ;;
-(p0) add sinh_AD_mJ = r37, sinh_GR_mJ ;;
-}
-
-{ .mmi
- nop.m 999
-(p0) ldfe sinh_FR_Tmjhi = [sinh_AD_mJ],16
-(p0) shl sinh_GR_J = sinh_GR_J, 5 ;;
-}
-
{ .mfi
-(p0) ldfs sinh_FR_Tmjlo = [sinh_AD_mJ],16
-(p0) fcmp.lt.unc.s1 p0,p7 = sinh_FR_X,f9
-(p0) add sinh_AD_J = r37, sinh_GR_J ;;
-}
-
-{ .mmi
-(p0) ldfe sinh_FR_Tjhi = [sinh_AD_J],16 ;;
-(p0) ldfs sinh_FR_Tjlo = [sinh_AD_J],16
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fExp_neg = fS_neg, fP_neg, fS_neg
+ nop.i 0
}
+;;
{ .mfb
- nop.m 999
-(p0) fmerge.se sinh_FR_sneg = sinh_FR_N_temp2, f1
-(p7) br.cond.spnt L(SINH_BY_EXP) ;;
+ nop.m 0
+ fms.d.s0 f8 = fExp, f1, fExp_neg
+ br.ret.sptk b0 // Normal path exit
}
+;;
+// Here if 0 < |x| < 0.25
+SINH_SMALL:
{ .mfi
- nop.m 999
- nop.f 999
- nop.i 999 ;;
+ add rAD_T1 = 0x1a0, rAD_TB1
+ fcmp.lt.s1 p7, p8 = fNormX, f0 // Test sign of x
+ cmp.gt p6, p0 = -60, rExp_x // Test |x| < 2^(-60)
}
-
-// ******************************************************
-// If NOT branch to EXP
-// ******************************************************
-// Calculate S_hi and S_lo
-// sinh_FR_S_hi_temp = sinh_FR_sneg * sinh_FR_Tmjhi
-// sinh_FR_S_hi = sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi_temp
-// sinh_FR_S_hi = sinh_FR_spos * sinh_FR_Tjhi - (sinh_FR_sneg * sinh_FR_Tmjlo)
-
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_S_hi_temp = sinh_FR_sneg, sinh_FR_Tmjhi, f0
- nop.i 999 ;;
+ add rAD_T2 = 0x1d0, rAD_TB1
+ nop.f 0
+ nop.i 0
}
+;;
-{ .mfi
- nop.m 999
-(p0) fms.s1 sinh_FR_S_hi = sinh_FR_spos, sinh_FR_Tjhi, sinh_FR_S_hi_temp
- nop.i 999
+{ .mmb
+ ldfe fA6 = [rAD_T1],16
+ ldfe fA5 = [rAD_T2],16
+(p6) br.cond.spnt SINH_VERY_SMALL // Branch if |x| < 2^(-60)
}
+;;
-// Calculate C_hi
-// sinh_FR_C_hi_temp1 = sinh_FR_sneg * sinh_FR_Tmjhi
-// sinh_FR_C_hi = sinh_FR_spos * sinh_FR_Tjhi + sinh_FR_C_hi_temp1
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_C_hi_temp1 = sinh_FR_sneg, sinh_FR_Tmjhi, f0
- nop.i 999 ;;
+{ .mmi
+ ldfe fA4 = [rAD_T1],16
+ ldfe fA3 = [rAD_T2],16
+ nop.i 0
}
+;;
-// sinh_FR_S_lo_temp1 = sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi
-// sinh_FR_S_lo_temp2 = -sinh_FR_sneg * sinh_FR_Tmjlo + (sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi)
-// sinh_FR_S_lo_temp2 = -sinh_FR_sneg * sinh_FR_Tmjlo + (sinh_FR_S_lo_temp1 )
-
-{ .mfi
- nop.m 999
-(p0) fms.s1 sinh_FR_S_lo_temp1 = sinh_FR_spos, sinh_FR_Tjhi, sinh_FR_S_hi
- nop.i 999
+{ .mmi
+ ldfe fA2 = [rAD_T1]
+ ldfe fA1 = [rAD_T2]
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_C_hi = sinh_FR_spos, sinh_FR_Tjhi, sinh_FR_C_hi_temp1
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fX3 = fNormX, fXsq, f0
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p0) fnma.s1 sinh_FR_S_lo_temp2 = sinh_FR_sneg, sinh_FR_Tmjhi, sinh_FR_S_lo_temp1
- nop.i 999
+ nop.m 0
+ fma.s1 fX4 = fXsq, fXsq, f0
+ nop.i 0
}
-
-// sinh_FR_S_lo_temp1 = sinh_FR_sneg * sinh_FR_Tmjlo
-// sinh_FR_S_lo_temp3 = sinh_FR_spos * sinh_FR_Tjlo - sinh_FR_S_lo_temp1
-// sinh_FR_S_lo_temp3 = sinh_FR_spos * sinh_FR_Tjlo -(sinh_FR_sneg * sinh_FR_Tmjlo)
-// sinh_FR_S_lo = sinh_FR_S_lo_temp3 + sinh_FR_S_lo_temp2
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_S_lo_temp1 = sinh_FR_sneg, sinh_FR_Tmjlo, f0
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fA65 = fXsq, fA6, fA5
+ nop.i 0
}
-
-/////////// BUG FIX fma to fms -TK
{ .mfi
- nop.m 999
-(p0) fms.s1 sinh_FR_S_lo_temp3 = sinh_FR_spos, sinh_FR_Tjlo, sinh_FR_S_lo_temp1
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fA43 = fXsq, fA4, fA3
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_S_lo = sinh_FR_S_lo_temp3, f1, sinh_FR_S_lo_temp2
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fA21 = fXsq, fA2, fA1
+ nop.i 0
}
-
-// Y_hi = S_hi
-// Y_lo = C_hi*p_odd + (S_hi*p_even + S_lo)
-// sinh_FR_Y_lo_temp = sinh_FR_S_hi * sinh_FR_peven + sinh_FR_S_lo
-// sinh_FR_Y_lo = sinh_FR_C_hi * sinh_FR_podd + sinh_FR_Y_lo_temp
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_Y_lo_temp = sinh_FR_S_hi, sinh_FR_peven, sinh_FR_S_lo
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fA6543 = fX4, fA65, fA43
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_Y_lo = sinh_FR_C_hi, sinh_FR_podd, sinh_FR_Y_lo_temp
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fA654321 = fX4, fA6543, fA21
+ nop.i 0
}
-
-// sinh_FR_SINH = Y_hi + Y_lo
-// f8 = answer = sinh_FR_SGNX * sinh_FR_SINH
+;;
// Dummy multiply to generate inexact
{ .mfi
- nop.m 999
-(p0) fmpy.s0 sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
- nop.i 999
+ nop.m 0
+ fmpy.s0 fTmp = fA6, fA6
+ nop.i 0
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_SINH = sinh_FR_S_hi, f1, sinh_FR_Y_lo
- nop.i 999 ;;
+{ .mfb
+ nop.m 0
+ fma.d.s0 f8 = fA654321, fX3, fNormX
+ br.ret.sptk b0 // Exit if 2^-60 < |x| < 0.25
}
+;;
+SINH_VERY_SMALL:
+// Here if 0 < |x| < 2^-60
+// Compute result by x + sgn(x)*x^2 to get properly rounded result
+.pred.rel "mutex",p7,p8
+{ .mfi
+ nop.m 0
+(p7) fnma.d.s0 f8 = fNormX, fNormX, fNormX // If x<0 result ~ x-x^2
+ nop.i 0
+}
{ .mfb
- nop.m 999
-(p0) fma.d.s0 f8 = sinh_FR_SGNX, sinh_FR_SINH,f0
-(p0) br.ret.sptk b0 ;;
+ nop.m 0
+(p8) fma.d.s0 f8 = fNormX, fNormX, fNormX // If x>0 result ~ x+x^2
+ br.ret.sptk b0 // Exit if |x| < 2^-60
}
+;;
-L(SINH_BY_EXP):
+SINH_POSSIBLE_OVERFLOW:
-// When p7 is true, we know that an overflow is not going to happen
-// When p7 is false, we must check for possible overflow
-// p7 is the over_SAFE flag
-// Y_hi = Tjhi
-// Y_lo = Tjhi * (p_odd + p_even) +Tjlo
-// Scale = sign * 2^(N-1)
-// sinh_FR_Y_lo = sinh_FR_Tjhi * (sinh_FR_peven + sinh_FR_podd)
-// sinh_FR_Y_lo = sinh_FR_Tjhi * (sinh_FR_Y_lo_temp )
+// Here if fMAX_DBL_NORM_ARG < |x| < fMIN_DBL_OFLOW_ARG
+// This cannot happen if input is a double, only if input higher precision.
+// Overflow is a possibility, not a certainty.
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_Y_lo_temp = sinh_FR_peven, f1, sinh_FR_podd
- nop.i 999
-}
-
-// Now we are in EXP. This is the only path where an overflow is possible
-// but not for certain. So this is the only path where over_SAFE has any use.
-// r34 still has N-1
-// There is a danger of double-extended overflow if N-1 > 16382 = 0x3ffe
-// There is a danger of double overflow if N-1 > 0x3fe = 1022
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x00000000000003fe ;;
-}
+// Recompute result using status field 2 with user's rounding mode,
+// and wre set. If result is larger than largest double, then we have
+// overflow
{ .mfi
-(p0) cmp.gt.unc p0,p7 = r34, r32
-(p0) fmerge.s sinh_FR_SCALE = sinh_FR_SGNX, sinh_FR_spos
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_Y_lo = sinh_FR_Tjhi, sinh_FR_Y_lo_temp, sinh_FR_Tjlo
- nop.i 999 ;;
+ mov rGt_ln = 0x103ff // Exponent for largest dbl + 1 ulp
+ fsetc.s2 0x7F,0x42 // Get user's round mode, set wre
+ nop.i 0
}
+;;
-// f8 = answer = scale * (Y_hi + Y_lo)
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_SINH_temp = sinh_FR_Y_lo, f1, sinh_FR_Tjhi
- nop.i 999 ;;
+ setf.exp fGt_pln = rGt_ln // Create largest double + 1 ulp
+ fma.d.s2 fWre_urm_f8 = fS, fP, fS // Result with wre set
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.d.s0 f44 = sinh_FR_SCALE, sinh_FR_SINH_temp, f0
- nop.i 999 ;;
+ nop.m 0
+ fsetc.s2 0x7F,0x40 // Turn off wre in sf2
+ nop.i 0
}
+;;
-// Dummy multiply to generate inexact
{ .mfi
- nop.m 999
-(p7) fmpy.s0 sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
- nop.i 999 ;;
+ nop.m 0
+ fcmp.ge.s1 p6, p0 = fWre_urm_f8, fGt_pln // Test for overflow
+ nop.i 0
}
+;;
-// If over_SAFE is set, return
{ .mfb
- nop.m 999
-(p7) fmerge.s f8 = f44,f44
-(p7) br.ret.sptk b0 ;;
+ nop.m 0
+ nop.f 0
+(p6) br.cond.spnt SINH_CERTAIN_OVERFLOW // Branch if overflow
}
+;;
-// Else see if we overflowed
-// S0 user supplied status
-// S2 user supplied status + WRE + TD (Overflows)
-// If WRE is set then an overflow will not occur in EXP.
-// The input value that would cause a register (WRE) value to overflow is about 2^15
-// and this input would go into the HUGE path.
-// Answer with WRE is in f43.
-
-{ .mfi
- nop.m 999
-(p0) fsetc.s2 0x7F,0x42
- nop.i 999;;
+{ .mfb
+ nop.m 0
+ fma.d.s0 f8 = fS, fP, fS
+ br.ret.sptk b0 // Exit if really no overflow
}
+;;
+SINH_CERTAIN_OVERFLOW:
{ .mfi
- nop.m 999
-(p0) fma.d.s2 f43 = sinh_FR_SCALE, sinh_FR_SINH_temp, f0
- nop.i 999 ;;
-}
-
-// 103FF => 103FF -FFFF = 400(true)
-// 400 + 3FF = 7FF, which is 1 more that the exponent of the largest
-// double (7FE). So 0 103FF 8000000000000000 is one ulp more than
-// largest double in register bias
-// Now set p8 if the answer with WRE is greater than or equal this value
-// Also set p9 if the answer with WRE is less than or equal to negative this value
-
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x000000000103FF ;;
+ sub rTmp = rExp_mask, r0, 1
+ fcmp.lt.s1 p6, p7 = fNormX, f0 // Test for x < 0
+ nop.i 0
}
+;;
{ .mmf
- nop.m 999
-(p0) setf.exp f41 = r32
-(p0) fsetc.s2 0x7F,0x40 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fcmp.ge.unc.s1 p8, p0 = f43, f41
- nop.i 999
+ alloc r32=ar.pfs,1,4,4,0
+ setf.exp fTmp = rTmp
+ fmerge.s FR_X = f8,f8
}
+;;
{ .mfi
- nop.m 999
-(p0) fmerge.ns f42 = f41, f41
- nop.i 999 ;;
-}
-
-// The error tag for overflow is 127
-{ .mii
- nop.m 999
- nop.i 999 ;;
-(p8) mov r47 = 127 ;;
+ mov GR_Parameter_TAG = 127
+(p6) fnma.d.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and -INF result
+ nop.i 0
}
-
{ .mfb
- nop.m 999
-(p0) fcmp.le.unc.s1 p9, p0 = f43, f42
-(p8) br.cond.spnt L(SINH_ERROR_SUPPORT) ;;
-}
-
-{ .mii
- nop.m 999
- nop.i 999 ;;
-(p9) mov r47 = 127
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p9) br.cond.spnt L(SINH_ERROR_SUPPORT) ;;
-}
-
-// Dummy multiply to generate inexact
-{ .mfi
- nop.m 999
-(p0) fmpy.s0 sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
- nop.i 999 ;;
+ nop.m 0
+(p7) fma.d.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and +INF result
+ br.cond.sptk __libm_error_region
}
+;;
+// Here if x unorm
+SINH_UNORM:
{ .mfb
- nop.m 999
-(p0) fmerge.s f8 = f44,f44
-(p0) br.ret.sptk b0 ;;
-}
-
-L(SINH_HUGE):
-
-// for SINH_HUGE, put 24000 in exponent; take sign from input; add 1
-// SAFE: SAFE is always 0 for HUGE
-
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x0000000000015dbf ;;
-}
-
-{ .mfi
-(p0) setf.exp f9 = r32
- nop.f 999
- nop.i 999 ;;
+ getf.exp rSignexp_x = fNormX // Must recompute if x unorm
+ fcmp.eq.s0 p6, p0 = f8, f0 // Set D flag
+ br.cond.sptk SINH_COMMON
}
+;;
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_signed_hi_lo = sinh_FR_SGNX, f9, f1
- nop.i 999 ;;
-}
+GLOBAL_IEEE754_END(sinh)
-{ .mfi
- nop.m 999
-(p0) fma.d.s0 f44 = sinh_FR_signed_hi_lo, f9, f0
-(p0) mov r47 = 127
-}
-.endp sinh
-ASM_SIZE_DIRECTIVE(sinh)
-#ifdef _LIBC
-ASM_SIZE_DIRECTIVE(__ieee754_sinh)
-#endif
-
-// Stack operations when calling error support.
-// (1) (2) (3) (call) (4)
-// sp -> + psp -> + psp -> + sp -> +
-// | | | |
-// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
-// | | | |
-// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
-// | | | |
-// | | <- GR_X X1 ->| |
-// | | | |
-// sp-64 -> + sp -> + sp -> + +
-// save ar.pfs save b0 restore gp
-// save gp restore ar.pfs
-
-.proc __libm_error_region
-__libm_error_region:
-L(SINH_ERROR_SUPPORT):
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
-
-// (1)
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
@@ -1271,39 +860,32 @@ L(SINH_ERROR_SUPPORT):
}
{ .mfi
.fframe 64
- add sp=-64,sp // Create new stack
+ add sp=-64,sp // Create new stack
nop.f 0
- mov GR_SAVE_GP=gp // Save gp
+ mov GR_SAVE_GP=gp // Save gp
};;
-
-
-// (2)
{ .mmi
- stfd [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
- add GR_Parameter_X = 16,sp // Parameter 1 address
+ stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0 // Save b0
+ mov GR_SAVE_B0=b0 // Save b0
};;
-
.body
-// (3)
{ .mib
- stfd [GR_Parameter_X] = f8 // STORE Parameter 1 on stack
- add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
- nop.b 0
+ stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
}
{ .mib
- stfd [GR_Parameter_Y] = f44 // STORE Parameter 3 on stack
+ stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support# // Call error handling function
+ br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
- nop.m 0
- nop.m 0
add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
};;
-
-// (4)
{ .mmi
ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
@@ -1316,8 +898,6 @@ L(SINH_ERROR_SUPPORT):
br.ret.sptk b0 // Return
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
-
+LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_sinhf.S b/sysdeps/ia64/fpu/e_sinhf.S
index d5aa2dca16..4a407b7f3c 100644
--- a/sysdeps/ia64/fpu/e_sinhf.S
+++ b/sysdeps/ia64/fpu/e_sinhf.S
@@ -1,10 +1,10 @@
.file "sinhf.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2002, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,1305 +20,727 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+
// History
-//==============================================================
-// 2/02/00 Initial version
-// 4/04/00 Unwind support added
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+//*********************************************************************
+// 02/02/00 Initial version
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
// 10/12/00 Update to set denormal operand and underflow flags
-// 1/22/01 Fixed to set inexact flag for small args.
+// 01/22/01 Fixed to set inexact flag for small args.
+// 05/02/01 Reworked to improve speed of all paths
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 11/20/02 Improved algorithm based on expf
//
// API
-//==============================================================
-// float = sinhf(float)
-// input floating point f8
-// output floating point f8
-//
-// Registers used
-//==============================================================
-// general registers:
-// r32 -> r47
-// predicate registers used:
-// p6 p7 p8 p9
-// floating-point registers used:
-// f9 -> f15; f32 -> f45;
-// f8 has input, then output
+//*********************************************************************
+// float sinhf(float)
//
// Overview of operation
-//==============================================================
-// There are four paths
-// 1. |x| < 0.25 SINH_BY_POLY
-// 2. |x| < 32 SINH_BY_TBL
-// 3. |x| < 2^14 SINH_BY_EXP
-// 4. |x_ >= 2^14 SINH_HUGE
-//
-// For double extended we get infinity for x >= 400c b174 ddc0 31ae c0ea
-// >= 1.0110001.... x 2^13
-// >= 11357.2166
+//*********************************************************************
+// Case 1: 0 < |x| < 2^-60
+// Result = x, computed by x+sgn(x)*x^2) to handle flags and rounding
//
-// But for double we get infinity for x >= 408633ce8fb9f87e
-// >= 1.0110...x 2^9
-// >= +7.10476e+002
+// Case 2: 2^-60 < |x| < 0.25
+// Evaluate sinh(x) by a 9th order polynomial
+// Care is take for the order of multiplication; and A2 is not exactly 1/5!,
+// A3 is not exactly 1/7!, etc.
+// sinh(x) = x + (A1*x^3 + A2*x^5 + A3*x^7 + A4*x^9)
//
-// And for single we get infinity for x >= 42b3a496
-// >= 1.0110... 2^6
-// >= 89.8215
+// Case 3: 0.25 < |x| < 89.41598
+// Algorithm is based on the identity sinh(x) = ( exp(x) - exp(-x) ) / 2.
+// The algorithm for exp is described as below. There are a number of
+// economies from evaluating both exp(x) and exp(-x). Although we
+// are evaluating both quantities, only where the quantities diverge do we
+// duplicate the computations. The basic algorithm for exp(x) is described
+// below.
//
-// SAFE: If there is danger of overflow set SAFE to 0
-// NOT implemented: if there is danger of underflow, set SAFE to 0
-// SAFE for all paths listed below
-//
-// 1. SINH_BY_POLY
-// ===============
-// If |x| is less than the tiny threshold, then clear SAFE
-// For double, the tiny threshold is -1022 = -0x3fe => -3fe + ffff = fc01
-// register-biased, this is fc01
-// For single, the tiny threshold is -126 = -7e => -7e + ffff = ff81
-// If |x| < tiny threshold, set SAFE = 0
-//
-// 2. SINH_BY_TBL
-// =============
-// SAFE: SAFE is always 1 for TBL;
+// Take the input x. w is "how many log2/128 in x?"
+// w = x * 64/log2
+// NJ = int(w)
+// x = NJ*log2/64 + R
+
+// NJ = 64*n + j
+// x = n*log2 + (log2/64)*j + R
//
-// 3. SINH_BY_EXP
-// ==============
-// There is a danger of double-extended overflow if N-1 > 16382 = 0x3ffe
-// r34 has N-1; 16382 is in register biased form, 0x13ffd
-// There is danger of double overflow if N-1 > 0x3fe
-// in register biased form, 0x103fd
-// Analagously, there is danger of single overflow if N-1 > 0x7e
-// in register biased form, 0x1007d
-// SAFE: If there is danger of overflow set SAFE to 0
+// So, exp(x) = 2^n * 2^(j/64)* exp(R)
//
-// 4. SINH_HUGE
-// ============
-// SAFE: SAFE is always 0 for HUGE
+// T = 2^n * 2^(j/64)
+// Construct 2^n
+// Get 2^(j/64) table
+// actually all the entries of 2^(j/64) table are stored in DP and
+// with exponent bits set to 0 -> multiplication on 2^n can be
+// performed by doing logical "or" operation with bits presenting 2^n
+
+// exp(R) = 1 + (exp(R) - 1)
+// P = exp(R) - 1 approximated by Taylor series of 3rd degree
+// P = A3*R^3 + A2*R^2 + R, A3 = 1/6, A2 = 1/2
//
-#include "libm_support.h"
-
-// Assembly macros
-//==============================================================
-sinh_FR_X = f44
-sinh_FR_X2 = f9
-sinh_FR_X4 = f10
-sinh_FR_SGNX = f40
-sinh_FR_all_ones = f45
-sinh_FR_tmp = f42
-
-sinh_FR_Inv_log2by64 = f9
-sinh_FR_log2by64_lo = f11
-sinh_FR_log2by64_hi = f10
-
-sinh_FR_A1 = f9
-sinh_FR_A2 = f10
-sinh_FR_A3 = f11
-
-sinh_FR_Rcub = f12
-sinh_FR_M_temp = f13
-sinh_FR_R_temp = f13
-sinh_FR_Rsq = f13
-sinh_FR_R = f14
-
-sinh_FR_M = f38
-
-sinh_FR_B1 = f15
-sinh_FR_B2 = f32
-sinh_FR_B3 = f33
+// The final result is reconstructed as follows
+// exp(x) = T + T*P
-sinh_FR_peven_temp1 = f34
-sinh_FR_peven_temp2 = f35
-sinh_FR_peven = f36
+// Special values
+//*********************************************************************
+// sinhf(+0) = +0
+// sinhf(-0) = -0
-sinh_FR_podd_temp1 = f34
-sinh_FR_podd_temp2 = f35
-sinh_FR_podd = f37
+// sinhf(+qnan) = +qnan
+// sinhf(-qnan) = -qnan
+// sinhf(+snan) = +qnan
+// sinhf(-snan) = -qnan
-sinh_FR_poly_podd_temp1 = f11
-sinh_FR_poly_podd_temp2 = f13
-sinh_FR_poly_peven_temp1 = f11
-sinh_FR_poly_peven_temp2 = f13
+// sinhf(-inf) = -inf
+// sinhf(+inf) = +inf
-sinh_FR_J_temp = f9
-sinh_FR_J = f10
-
-sinh_FR_Mmj = f39
-
-sinh_FR_N_temp1 = f11
-sinh_FR_N_temp2 = f12
-sinh_FR_N = f13
-
-sinh_FR_spos = f14
-sinh_FR_sneg = f15
-
-sinh_FR_Tjhi = f32
-sinh_FR_Tjlo = f33
-sinh_FR_Tmjhi = f34
-sinh_FR_Tmjlo = f35
-
-sinh_GR_mJ = r35
-sinh_GR_J = r36
-
-sinh_AD_mJ = r38
-sinh_AD_J = r39
-sinh_GR_all_ones = r40
-
-sinh_FR_S_hi = f9
-sinh_FR_S_hi_temp = f10
-sinh_FR_S_lo_temp1 = f11
-sinh_FR_S_lo_temp2 = f12
-sinh_FR_S_lo_temp3 = f13
-
-sinh_FR_S_lo = f38
-sinh_FR_C_hi = f39
-
-sinh_FR_C_hi_temp1 = f10
-sinh_FR_Y_hi = f11
-sinh_FR_Y_lo_temp = f12
-sinh_FR_Y_lo = f13
-sinh_FR_SINH = f9
-
-sinh_FR_P1 = f14
-sinh_FR_P2 = f15
-sinh_FR_P3 = f32
-sinh_FR_P4 = f33
-sinh_FR_P5 = f34
-sinh_FR_P6 = f35
-
-sinh_FR_TINY_THRESH = f9
-
-sinh_FR_SINH_temp = f10
-sinh_FR_SCALE = f11
-
-sinh_FR_signed_hi_lo = f10
-
-
-GR_SAVE_PFS = r41
-GR_SAVE_B0 = r42
-GR_SAVE_GP = r43
+// Overflow and Underflow
+//*********************************************************************
+// sinhf(x) = largest single normal when
+// x = 89.41598 = 0x42b2d4fc
+//
+// Underflow is handled as described in case 1 above
-GR_Parameter_X = r44
-GR_Parameter_Y = r45
-GR_Parameter_RESULT = r46
+// Registers used
+//*********************************************************************
+// Floating Point registers used:
+// f8 input, output
+// f6,f7, f9 -> f15, f32 -> f45
-// Data tables
-//==============================================================
+// General registers used:
+// r2, r3, r16 -> r38
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
+// Predicate registers used:
+// p6 -> p15
+// Assembly macros
+//*********************************************************************
+// integer registers used
+// scratch
+rNJ = r2
+rNJ_neg = r3
+
+rJ_neg = r16
+rN_neg = r17
+rSignexp_x = r18
+rExp_x = r18
+rExp_mask = r19
+rExp_bias = r20
+rAd1 = r21
+rAd2 = r22
+rJ = r23
+rN = r24
+rTblAddr = r25
+rA3 = r26
+rExpHalf = r27
+rLn2Div64 = r28
+rGt_ln = r29
+r17ones_m1 = r29
+rRightShifter = r30
+rJ_mask = r30
+r64DivLn2 = r31
+rN_mask = r31
+// stacked
+GR_SAVE_PFS = r32
+GR_SAVE_B0 = r33
+GR_SAVE_GP = r34
+GR_Parameter_X = r35
+GR_Parameter_Y = r36
+GR_Parameter_RESULT = r37
+GR_Parameter_TAG = r38
+
+// floating point registers used
+FR_X = f10
+FR_Y = f1
+FR_RESULT = f8
+// scratch
+fRightShifter = f6
+f64DivLn2 = f7
+fNormX = f9
+fNint = f10
+fN = f11
+fR = f12
+fLn2Div64 = f13
+fA2 = f14
+fA3 = f15
+// stacked
+fP = f32
+fT = f33
+fMIN_SGL_OFLOW_ARG = f34
+fMAX_SGL_NORM_ARG = f35
+fRSqr = f36
+fA1 = f37
+fA21 = f37
+fA4 = f38
+fA43 = f38
+fA4321 = f38
+fX4 = f39
+fTmp = f39
+fGt_pln = f39
+fWre_urm_f8 = f40
+fXsq = f40
+fP_neg = f41
+fX3 = f41
+fT_neg = f42
+fExp = f43
+fExp_neg = f44
+fAbsX = f45
+
+
+RODATA
.align 16
-double_sinh_arg_reduction:
-ASM_TYPE_DIRECTIVE(double_sinh_arg_reduction,@object)
- data8 0xB8AA3B295C17F0BC, 0x00004005
- data8 0xB17217F7D1000000, 0x00003FF8
- data8 0xCF79ABC9E3B39804, 0x00003FD0
-ASM_SIZE_DIRECTIVE(double_sinh_arg_reduction)
-
-double_sinh_p_table:
-ASM_TYPE_DIRECTIVE(double_sinh_p_table,@object)
- data8 0xAAAAAAAAAAAAAAAB, 0x00003FFC
- data8 0x8888888888888412, 0x00003FF8
- data8 0xD00D00D00D4D39F2, 0x00003FF2
- data8 0xB8EF1D28926D8891, 0x00003FEC
- data8 0xD732377688025BE9, 0x00003FE5
- data8 0xB08AF9AE78C1239F, 0x00003FDE
-ASM_SIZE_DIRECTIVE(double_sinh_p_table)
-
-double_sinh_ab_table:
-ASM_TYPE_DIRECTIVE(double_sinh_ab_table,@object)
- data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC
- data8 0x88888888884ECDD5, 0x00003FF8
- data8 0xD00D0C6DCC26A86B, 0x00003FF2
- data8 0x8000000000000002, 0x00003FFE
- data8 0xAAAAAAAAAA402C77, 0x00003FFA
- data8 0xB60B6CC96BDB144D, 0x00003FF5
-ASM_SIZE_DIRECTIVE(double_sinh_ab_table)
-
-double_sinh_j_table:
-ASM_TYPE_DIRECTIVE(double_sinh_j_table,@object)
- data8 0xB504F333F9DE6484, 0x00003FFE, 0x1EB2FB13, 0x00000000
- data8 0xB6FD91E328D17791, 0x00003FFE, 0x1CE2CBE2, 0x00000000
- data8 0xB8FBAF4762FB9EE9, 0x00003FFE, 0x1DDC3CBC, 0x00000000
- data8 0xBAFF5AB2133E45FB, 0x00003FFE, 0x1EE9AA34, 0x00000000
- data8 0xBD08A39F580C36BF, 0x00003FFE, 0x9EAEFDC1, 0x00000000
- data8 0xBF1799B67A731083, 0x00003FFE, 0x9DBF517B, 0x00000000
- data8 0xC12C4CCA66709456, 0x00003FFE, 0x1EF88AFB, 0x00000000
- data8 0xC346CCDA24976407, 0x00003FFE, 0x1E03B216, 0x00000000
- data8 0xC5672A115506DADD, 0x00003FFE, 0x1E78AB43, 0x00000000
- data8 0xC78D74C8ABB9B15D, 0x00003FFE, 0x9E7B1747, 0x00000000
- data8 0xC9B9BD866E2F27A3, 0x00003FFE, 0x9EFE3C0E, 0x00000000
- data8 0xCBEC14FEF2727C5D, 0x00003FFE, 0x9D36F837, 0x00000000
- data8 0xCE248C151F8480E4, 0x00003FFE, 0x9DEE53E4, 0x00000000
- data8 0xD06333DAEF2B2595, 0x00003FFE, 0x9E24AE8E, 0x00000000
- data8 0xD2A81D91F12AE45A, 0x00003FFE, 0x1D912473, 0x00000000
- data8 0xD4F35AABCFEDFA1F, 0x00003FFE, 0x1EB243BE, 0x00000000
- data8 0xD744FCCAD69D6AF4, 0x00003FFE, 0x1E669A2F, 0x00000000
- data8 0xD99D15C278AFD7B6, 0x00003FFE, 0x9BBC610A, 0x00000000
- data8 0xDBFBB797DAF23755, 0x00003FFE, 0x1E761035, 0x00000000
- data8 0xDE60F4825E0E9124, 0x00003FFE, 0x9E0BE175, 0x00000000
- data8 0xE0CCDEEC2A94E111, 0x00003FFE, 0x1CCB12A1, 0x00000000
- data8 0xE33F8972BE8A5A51, 0x00003FFE, 0x1D1BFE90, 0x00000000
- data8 0xE5B906E77C8348A8, 0x00003FFE, 0x1DF2F47A, 0x00000000
- data8 0xE8396A503C4BDC68, 0x00003FFE, 0x1EF22F22, 0x00000000
- data8 0xEAC0C6E7DD24392F, 0x00003FFE, 0x9E3F4A29, 0x00000000
- data8 0xED4F301ED9942B84, 0x00003FFE, 0x1EC01A5B, 0x00000000
- data8 0xEFE4B99BDCDAF5CB, 0x00003FFE, 0x1E8CAC3A, 0x00000000
- data8 0xF281773C59FFB13A, 0x00003FFE, 0x9DBB3FAB, 0x00000000
- data8 0xF5257D152486CC2C, 0x00003FFE, 0x1EF73A19, 0x00000000
- data8 0xF7D0DF730AD13BB9, 0x00003FFE, 0x9BB795B5, 0x00000000
- data8 0xFA83B2DB722A033A, 0x00003FFE, 0x1EF84B76, 0x00000000
- data8 0xFD3E0C0CF486C175, 0x00003FFE, 0x9EF5818B, 0x00000000
- data8 0x8000000000000000, 0x00003FFF, 0x00000000, 0x00000000
- data8 0x8164D1F3BC030773, 0x00003FFF, 0x1F77CACA, 0x00000000
- data8 0x82CD8698AC2BA1D7, 0x00003FFF, 0x1EF8A91D, 0x00000000
- data8 0x843A28C3ACDE4046, 0x00003FFF, 0x1E57C976, 0x00000000
- data8 0x85AAC367CC487B15, 0x00003FFF, 0x9EE8DA92, 0x00000000
- data8 0x871F61969E8D1010, 0x00003FFF, 0x1EE85C9F, 0x00000000
- data8 0x88980E8092DA8527, 0x00003FFF, 0x1F3BF1AF, 0x00000000
- data8 0x8A14D575496EFD9A, 0x00003FFF, 0x1D80CA1E, 0x00000000
- data8 0x8B95C1E3EA8BD6E7, 0x00003FFF, 0x9D0373AF, 0x00000000
- data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF, 0x9F167097, 0x00000000
- data8 0x8EA4398B45CD53C0, 0x00003FFF, 0x1EB70051, 0x00000000
- data8 0x9031DC431466B1DC, 0x00003FFF, 0x1F6EB029, 0x00000000
- data8 0x91C3D373AB11C336, 0x00003FFF, 0x1DFD6D8E, 0x00000000
- data8 0x935A2B2F13E6E92C, 0x00003FFF, 0x9EB319B0, 0x00000000
- data8 0x94F4EFA8FEF70961, 0x00003FFF, 0x1EBA2BEB, 0x00000000
- data8 0x96942D3720185A00, 0x00003FFF, 0x1F11D537, 0x00000000
- data8 0x9837F0518DB8A96F, 0x00003FFF, 0x1F0D5A46, 0x00000000
- data8 0x99E0459320B7FA65, 0x00003FFF, 0x9E5E7BCA, 0x00000000
- data8 0x9B8D39B9D54E5539, 0x00003FFF, 0x9F3AAFD1, 0x00000000
- data8 0x9D3ED9A72CFFB751, 0x00003FFF, 0x9E86DACC, 0x00000000
- data8 0x9EF5326091A111AE, 0x00003FFF, 0x9F3EDDC2, 0x00000000
- data8 0xA0B0510FB9714FC2, 0x00003FFF, 0x1E496E3D, 0x00000000
- data8 0xA27043030C496819, 0x00003FFF, 0x9F490BF6, 0x00000000
- data8 0xA43515AE09E6809E, 0x00003FFF, 0x1DD1DB48, 0x00000000
- data8 0xA5FED6A9B15138EA, 0x00003FFF, 0x1E65EBFB, 0x00000000
- data8 0xA7CD93B4E965356A, 0x00003FFF, 0x9F427496, 0x00000000
- data8 0xA9A15AB4EA7C0EF8, 0x00003FFF, 0x1F283C4A, 0x00000000
- data8 0xAB7A39B5A93ED337, 0x00003FFF, 0x1F4B0047, 0x00000000
- data8 0xAD583EEA42A14AC6, 0x00003FFF, 0x1F130152, 0x00000000
- data8 0xAF3B78AD690A4375, 0x00003FFF, 0x9E8367C0, 0x00000000
- data8 0xB123F581D2AC2590, 0x00003FFF, 0x9F705F90, 0x00000000
- data8 0xB311C412A9112489, 0x00003FFF, 0x1EFB3C53, 0x00000000
- data8 0xB504F333F9DE6484, 0x00003FFF, 0x1F32FB13, 0x00000000
-ASM_SIZE_DIRECTIVE(double_sinh_j_table)
-
-.align 32
-.global sinhf#
-
-.section .text
-.proc sinhf#
-.align 32
-
-sinhf:
-#ifdef _LIBC
-.global __ieee754_sinhf
-.type __ieee754_sinhf,@function
-__ieee754_sinhf:
-#endif
-
-// X infinity or NAN?
-// Take invalid fault if enabled
-
-{ .mfi
- alloc r32 = ar.pfs,0,12,4,0
-(p0) fclass.m.unc p6,p0 = f8, 0xe3 //@qnan | @snan | @inf
- mov sinh_GR_all_ones = -1
-}
-;;
+LOCAL_OBJECT_START(_sinhf_table)
+data4 0x42b2d4fd // Smallest single arg to overflow single result
+data4 0x42b2d4fc // Largest single arg to give normal single result
+data4 0x00000000 // pad
+data4 0x00000000 // pad
+//
+// 2^(j/64) table, j goes from 0 to 63
+data8 0x0000000000000000 // 2^(0/64)
+data8 0x00002C9A3E778061 // 2^(1/64)
+data8 0x000059B0D3158574 // 2^(2/64)
+data8 0x0000874518759BC8 // 2^(3/64)
+data8 0x0000B5586CF9890F // 2^(4/64)
+data8 0x0000E3EC32D3D1A2 // 2^(5/64)
+data8 0x00011301D0125B51 // 2^(6/64)
+data8 0x0001429AAEA92DE0 // 2^(7/64)
+data8 0x000172B83C7D517B // 2^(8/64)
+data8 0x0001A35BEB6FCB75 // 2^(9/64)
+data8 0x0001D4873168B9AA // 2^(10/64)
+data8 0x0002063B88628CD6 // 2^(11/64)
+data8 0x0002387A6E756238 // 2^(12/64)
+data8 0x00026B4565E27CDD // 2^(13/64)
+data8 0x00029E9DF51FDEE1 // 2^(14/64)
+data8 0x0002D285A6E4030B // 2^(15/64)
+data8 0x000306FE0A31B715 // 2^(16/64)
+data8 0x00033C08B26416FF // 2^(17/64)
+data8 0x000371A7373AA9CB // 2^(18/64)
+data8 0x0003A7DB34E59FF7 // 2^(19/64)
+data8 0x0003DEA64C123422 // 2^(20/64)
+data8 0x0004160A21F72E2A // 2^(21/64)
+data8 0x00044E086061892D // 2^(22/64)
+data8 0x000486A2B5C13CD0 // 2^(23/64)
+data8 0x0004BFDAD5362A27 // 2^(24/64)
+data8 0x0004F9B2769D2CA7 // 2^(25/64)
+data8 0x0005342B569D4F82 // 2^(26/64)
+data8 0x00056F4736B527DA // 2^(27/64)
+data8 0x0005AB07DD485429 // 2^(28/64)
+data8 0x0005E76F15AD2148 // 2^(29/64)
+data8 0x0006247EB03A5585 // 2^(30/64)
+data8 0x0006623882552225 // 2^(31/64)
+data8 0x0006A09E667F3BCD // 2^(32/64)
+data8 0x0006DFB23C651A2F // 2^(33/64)
+data8 0x00071F75E8EC5F74 // 2^(34/64)
+data8 0x00075FEB564267C9 // 2^(35/64)
+data8 0x0007A11473EB0187 // 2^(36/64)
+data8 0x0007E2F336CF4E62 // 2^(37/64)
+data8 0x00082589994CCE13 // 2^(38/64)
+data8 0x000868D99B4492ED // 2^(39/64)
+data8 0x0008ACE5422AA0DB // 2^(40/64)
+data8 0x0008F1AE99157736 // 2^(41/64)
+data8 0x00093737B0CDC5E5 // 2^(42/64)
+data8 0x00097D829FDE4E50 // 2^(43/64)
+data8 0x0009C49182A3F090 // 2^(44/64)
+data8 0x000A0C667B5DE565 // 2^(45/64)
+data8 0x000A5503B23E255D // 2^(46/64)
+data8 0x000A9E6B5579FDBF // 2^(47/64)
+data8 0x000AE89F995AD3AD // 2^(48/64)
+data8 0x000B33A2B84F15FB // 2^(49/64)
+data8 0x000B7F76F2FB5E47 // 2^(50/64)
+data8 0x000BCC1E904BC1D2 // 2^(51/64)
+data8 0x000C199BDD85529C // 2^(52/64)
+data8 0x000C67F12E57D14B // 2^(53/64)
+data8 0x000CB720DCEF9069 // 2^(54/64)
+data8 0x000D072D4A07897C // 2^(55/64)
+data8 0x000D5818DCFBA487 // 2^(56/64)
+data8 0x000DA9E603DB3285 // 2^(57/64)
+data8 0x000DFC97337B9B5F // 2^(58/64)
+data8 0x000E502EE78B3FF6 // 2^(59/64)
+data8 0x000EA4AFA2A490DA // 2^(60/64)
+data8 0x000EFA1BEE615A27 // 2^(61/64)
+data8 0x000F50765B6E4540 // 2^(62/64)
+data8 0x000FA7C1819E90D8 // 2^(63/64)
+LOCAL_OBJECT_END(_sinhf_table)
+
+LOCAL_OBJECT_START(sinh_p_table)
+data8 0x3ec749d84bc96d7d // A4
+data8 0x3f2a0168d09557cf // A3
+data8 0x3f811111326ed15a // A2
+data8 0x3fc55555552ed1e2 // A1
+LOCAL_OBJECT_END(sinh_p_table)
-{ .mfb
- nop.m 999
-(p6) fma.s.s0 f8 = f8,f1,f8
-(p6) br.ret.spnt b0 ;;
-}
+.section .text
+GLOBAL_IEEE754_ENTRY(sinhf)
-// Put 0.25 in f9; p6 true if x < 0.25
-// Make constant that will generate inexact when squared
{ .mlx
- setf.sig sinh_FR_all_ones = sinh_GR_all_ones
-(p0) movl r32 = 0x000000000000fffd ;;
-}
-
-{ .mfi
-(p0) setf.exp f9 = r32
-(p0) fclass.m.unc p7,p0 = f8, 0x07 //@zero
- nop.i 999 ;;
-}
-
-{ .mfb
- nop.m 999
-(p0) fmerge.s sinh_FR_X = f0,f8
-(p7) br.ret.spnt b0 ;;
+ getf.exp rSignexp_x = f8 // Must recompute if x unorm
+ movl r64DivLn2 = 0x40571547652B82FE // 64/ln(2)
}
-
-// Identify denormal operands.
-{ .mfi
- nop.m 999
- fclass.m.unc p10,p0 = f8, 0x09 // + denorm
- nop.i 999
-};;
-{ .mfi
- nop.m 999
- fclass.m.unc p11,p0 = f8, 0x0a // - denorm
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-(p0) fmerge.s sinh_FR_SGNX = f8,f1
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fcmp.lt.unc.s1 p0,p7 = sinh_FR_X,f9
- nop.i 999 ;;
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p7) br.cond.sptk L(SINH_BY_TBL) ;;
-}
-
-
-L(SINH_BY_POLY):
-
-// POLY cannot overflow so there is no need to call __libm_error_support
-// Set tiny_SAFE (p7) to 1(0) if answer is not tiny
-// Currently we do not use tiny_SAFE. So the setting of tiny_SAFE is
-// commented out.
-//(p0) movl r32 = 0x000000000000fc01
-//(p0) setf.exp f10 = r32
-//(p0) fcmp.lt.unc.s1 p6,p7 = f8,f10
-// Here is essentially the algorithm for SINH_BY_POLY. Care is take for the order
-// of multiplication; and P_1 is not exactly 1/3!, P_2 is not exactly 1/5!, etc.
-// Note that ax = |x|
-// sinh(x) = sign * (series(e^x) - series(e^-x))/2
-// = sign * (ax + ax^3/3! + ax^5/5! + ax^7/7! + ax^9/9! + ax^11/11! + ax^13/13!)
-// = sign * (ax + ax * ( ax^2 * (1/3! + ax^4 * (1/7! + ax^4*1/11!)) )
-// + ax * ( ax^4 * (1/5! + ax^4 * (1/9! + ax^4*1/13!)) ) )
-// = sign * (ax + ax*p_odd + (ax*p_even))
-// = sign * (ax + Y_lo)
-// sinh(x) = sign * (Y_hi + Y_lo)
-// Get the values of P_x from the table
-{ .mfb
-(p0) addl r34 = @ltoff(double_sinh_p_table), gp
-(p10) fma.s.s0 f8 = f8,f8,f8
-(p10) br.ret.spnt b0
-}
-;;
-
-{ .mfb
- ld8 r34 = [r34]
-(p11) fnma.s.s0 f8 = f8,f8,f8
-(p11) br.ret.spnt b0
+{ .mlx
+ addl rTblAddr = @ltoff(_sinhf_table),gp
+ movl rRightShifter = 0x43E8000000000000 // DP Right Shifter
}
;;
-// Calculate sinh_FR_X2 = ax*ax and sinh_FR_X4 = ax*ax*ax*ax
-{ .mmf
- nop.m 999
-(p0) ldfe sinh_FR_P1 = [r34],16
-(p0) fma.s1 sinh_FR_X2 = sinh_FR_X, sinh_FR_X, f0 ;;
-}
-
-{ .mmi
-(p0) ldfe sinh_FR_P2 = [r34],16 ;;
-(p0) ldfe sinh_FR_P3 = [r34],16
- nop.i 999 ;;
-}
-
-{ .mmi
-(p0) ldfe sinh_FR_P4 = [r34],16 ;;
-(p0) ldfe sinh_FR_P5 = [r34],16
- nop.i 999 ;;
-}
-
-{ .mfi
-(p0) ldfe sinh_FR_P6 = [r34],16
-(p0) fma.s1 sinh_FR_X4 = sinh_FR_X2, sinh_FR_X2, f0
- nop.i 999 ;;
-}
-
-// Calculate sinh_FR_podd = p_odd and sinh_FR_peven = p_even
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_poly_podd_temp1 = sinh_FR_X4, sinh_FR_P5, sinh_FR_P3
- nop.i 999 ;;
+ // point to the beginning of the table
+ ld8 rTblAddr = [rTblAddr]
+ fclass.m p6, p0 = f8, 0x0b // Test for x=unorm
+ addl rA3 = 0x3E2AA, r0 // high bits of 1.0/6.0 rounded to SP
}
-
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_poly_podd_temp2 = sinh_FR_X4, sinh_FR_poly_podd_temp1, sinh_FR_P1
- nop.i 999
+ nop.m 0
+ fnorm.s1 fNormX = f8 // normalized x
+ addl rExpHalf = 0xFFFE, r0 // exponent of 1/2
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_poly_peven_temp1 = sinh_FR_X4, sinh_FR_P6, sinh_FR_P4
- nop.i 999 ;;
+ setf.d f64DivLn2 = r64DivLn2 // load 64/ln(2) to FP reg
+ fclass.m p15, p0 = f8, 0x1e3 // test for NaT,NaN,Inf
+ nop.i 0
}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_podd = sinh_FR_X2, sinh_FR_poly_podd_temp2, f0
- nop.i 999
+{ .mlx
+ // load Right Shifter to FP reg
+ setf.d fRightShifter = rRightShifter
+ movl rLn2Div64 = 0x3F862E42FEFA39EF // DP ln(2)/64 in GR
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_poly_peven_temp2 = sinh_FR_X4, sinh_FR_poly_peven_temp1, sinh_FR_P2
- nop.i 999 ;;
+ mov rExp_mask = 0x1ffff
+ fcmp.eq.s1 p13, p0 = f0, f8 // test for x = 0.0
+ shl rA3 = rA3, 12 // 0x3E2AA000, approx to 1.0/6.0 in SP
}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_peven = sinh_FR_X4, sinh_FR_poly_peven_temp2, f0
- nop.i 999 ;;
+{ .mfb
+ nop.m 0
+ nop.f 0
+(p6) br.cond.spnt SINH_UNORM // Branch if x=unorm
}
+;;
-// Calculate sinh_FR_Y_lo = ax*p_odd + (ax*p_even)
+SINH_COMMON:
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_Y_lo_temp = sinh_FR_X, sinh_FR_peven, f0
- nop.i 999 ;;
+ setf.exp fA2 = rExpHalf // load A2 to FP reg
+ nop.f 0
+ mov rExp_bias = 0xffff
}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_Y_lo = sinh_FR_X, sinh_FR_podd, sinh_FR_Y_lo_temp
- nop.i 999 ;;
+{ .mfb
+ setf.d fLn2Div64 = rLn2Div64 // load ln(2)/64 to FP reg
+(p15) fma.s.s0 f8 = f8, f1, f0 // result if x = NaT,NaN,Inf
+(p15) br.ret.spnt b0 // exit here if x = NaT,NaN,Inf
}
+;;
-// Calculate sinh_FR_SINH = Y_hi + Y_lo. Note that ax = Y_hi
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_SINH = sinh_FR_X, f1, sinh_FR_Y_lo
- nop.i 999 ;;
+ // min overflow and max normal threshold
+ ldfps fMIN_SGL_OFLOW_ARG, fMAX_SGL_NORM_ARG = [rTblAddr], 8
+ nop.f 0
+ and rExp_x = rExp_mask, rSignexp_x // Biased exponent of x
}
-// Dummy multiply to generate inexact
-{ .mfi
- nop.m 999
-(p0) fmpy.s0 sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
- nop.i 999
-}
-
-// Calculate f8 = sign * (Y_hi + Y_lo)
-// Go to return
{ .mfb
- nop.m 999
-(p0) fma.s.s0 f8 = sinh_FR_SGNX,sinh_FR_SINH,f0
-(p0) br.ret.sptk b0 ;;
-}
-
-
-L(SINH_BY_TBL):
-
-// Now that we are at TBL; so far all we know is that |x| >= 0.25.
-// The first two steps are the same for TBL and EXP, but if we are HUGE
-// we want to leave now.
-// Double-extended:
-// Go to HUGE if |x| >= 2^14, 1000d (register-biased) is e = 14 (true)
-// Double
-// Go to HUGE if |x| >= 2^10, 10009 (register-biased) is e = 10 (true)
-// Single
-// Go to HUGE if |x| >= 2^7, 10006 (register-biased) is e = 7 (true)
-
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x0000000000010006 ;;
+ setf.s fA3 = rA3 // load A3 to FP reg
+ nop.f 0
+(p13) br.ret.spnt b0 // exit here if x=0.0, return x
}
+;;
{ .mfi
-(p0) setf.exp f9 = r32
- nop.f 999
- nop.i 999 ;;
+ sub rExp_x = rExp_x, rExp_bias // True exponent of x
+ fmerge.s fAbsX = f0, fNormX // Form |x|
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fcmp.ge.unc.s1 p6,p7 = sinh_FR_X,f9
- nop.i 999 ;;
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p6) br.cond.spnt L(SINH_HUGE) ;;
-}
-
-// r32 = 1
-// r34 = N-1
-// r35 = N
-// r36 = j
-// r37 = N+1
-
-// TBL can never overflow
-// sinh(x) = sinh(B+R)
-// = sinh(B)cosh(R) + cosh(B)sinh(R)
-//
-// ax = |x| = M*log2/64 + R
-// B = M*log2/64
-// M = 64*N + j
-// We will calcualte M and get N as (M-j)/64
-// The division is a shift.
-// exp(B) = exp(N*log2 + j*log2/64)
-// = 2^N * 2^(j*log2/64)
-// sinh(B) = 1/2(e^B -e^-B)
-// = 1/2(2^N * 2^(j*log2/64) - 2^-N * 2^(-j*log2/64))
-// sinh(B) = (2^(N-1) * 2^(j*log2/64) - 2^(-N-1) * 2^(-j*log2/64))
-// cosh(B) = (2^(N-1) * 2^(j*log2/64) + 2^(-N-1) * 2^(-j*log2/64))
-// 2^(j*log2/64) is stored as Tjhi + Tjlo , j= -32,....,32
-// Tjhi is double-extended (80-bit) and Tjlo is single(32-bit)
-// R = ax - M*log2/64
-// R = ax - M*log2_by_64_hi - M*log2_by_64_lo
-// exp(R) = 1 + R +R^2(1/2! + R(1/3! + R(1/4! + ... + R(1/n!)...)
-// = 1 + p_odd + p_even
-// where the p_even uses the A coefficients and the p_even uses the B coefficients
-// So sinh(R) = 1 + p_odd + p_even -(1 -p_odd -p_even)/2 = p_odd
-// cosh(R) = 1 + p_even
-// sinh(B) = S_hi + S_lo
-// cosh(B) = C_hi
-// sinh(x) = sinh(B)cosh(R) + cosh(B)sinh(R)
-// ******************************************************
-// STEP 1 (TBL and EXP)
-// ******************************************************
-// Get the following constants.
-// f9 = Inv_log2by64
-// f10 = log2by64_hi
-// f11 = log2by64_lo
-
-{ .mmi
-(p0) adds r32 = 0x1,r0
-(p0) addl r34 = @ltoff(double_sinh_arg_reduction), gp
- nop.i 999
+ nop.m 0
+ // x*(64/ln(2)) + Right Shifter
+ fma.s1 fNint = fNormX, f64DivLn2, fRightShifter
+ add rTblAddr = 8, rTblAddr
}
-;;
-
-{ .mmi
- ld8 r34 = [r34]
- nop.m 999
- nop.i 999
+{ .mfb
+ cmp.gt p7, p0 = -2, rExp_x // Test |x| < 2^(-2)
+ fma.s1 fXsq = fNormX, fNormX, f0 // x*x for small path
+(p7) br.cond.spnt SINH_SMALL // Branch if 0 < |x| < 2^-2
}
;;
-
-// We want 2^(N-1) and 2^(-N-1). So bias N-1 and -N-1 and
-// put them in an exponent.
-// sinh_FR_spos = 2^(N-1) and sinh_FR_sneg = 2^(-N-1)
-// r39 = 0xffff + (N-1) = 0xffff +N -1
-// r40 = 0xffff - (N +1) = 0xffff -N -1
-
-{ .mlx
- nop.m 999
-(p0) movl r38 = 0x000000000000fffe ;;
-}
-
-{ .mmi
-(p0) ldfe sinh_FR_Inv_log2by64 = [r34],16 ;;
-(p0) ldfe sinh_FR_log2by64_hi = [r34],16
- nop.i 999 ;;
-}
-
-{ .mbb
-(p0) ldfe sinh_FR_log2by64_lo = [r34],16
- nop.b 999
- nop.b 999 ;;
-}
-
-// Get the A coefficients
-// f9 = A_1
-// f10 = A_2
-// f11 = A_3
-
-{ .mmi
- nop.m 999
-(p0) addl r34 = @ltoff(double_sinh_ab_table), gp
- nop.i 999
+{ .mfi
+ nop.m 0
+ // check for overflow
+ fcmp.ge.s1 p12, p13 = fAbsX, fMIN_SGL_OFLOW_ARG
+ mov rJ_mask = 0x3f // 6-bit mask for J
}
;;
-{ .mmi
- ld8 r34 = [r34]
- nop.m 999
- nop.i 999
+{ .mfb
+ nop.m 0
+ fms.s1 fN = fNint, f1, fRightShifter // n in FP register
+ // branch out if overflow
+(p12) br.cond.spnt SINH_CERTAIN_OVERFLOW
}
;;
-
-// Calculate M and keep it as integer and floating point.
-// f38 = M = round-to-integer(x*Inv_log2by64)
-// sinh_FR_M = M = truncate(ax/(log2/64))
-// Put the significand of M in r35
-// and the floating point representation of M in sinh_FR_M
-
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_M = sinh_FR_X, sinh_FR_Inv_log2by64, f0
- nop.i 999
+ getf.sig rNJ = fNint // bits of n, j
+ // check for possible overflow
+ fcmp.gt.s1 p13, p0 = fAbsX, fMAX_SGL_NORM_ARG
+ nop.i 0
}
+;;
{ .mfi
-(p0) ldfe sinh_FR_A1 = [r34],16
- nop.f 999
- nop.i 999 ;;
+ addl rN = 0xFFBF - 63, rNJ // biased and shifted n-1,j
+ fnma.s1 fR = fLn2Div64, fN, fNormX // R = x - N*ln(2)/64
+ and rJ = rJ_mask, rNJ // bits of j
}
-
{ .mfi
- nop.m 999
-(p0) fcvt.fx.s1 sinh_FR_M_temp = sinh_FR_M
- nop.i 999 ;;
+ sub rNJ_neg = r0, rNJ // bits of n, j for -x
+ nop.f 0
+ andcm rN_mask = -1, rJ_mask // 0xff...fc0 to mask N
}
+;;
{ .mfi
- nop.m 999
-(p0) fnorm.s1 sinh_FR_M = sinh_FR_M_temp
- nop.i 999 ;;
+ shladd rJ = rJ, 3, rTblAddr // address in the 2^(j/64) table
+ nop.f 0
+ and rN = rN_mask, rN // biased, shifted n-1
}
-
{ .mfi
-(p0) getf.sig r35 = sinh_FR_M_temp
- nop.f 999
- nop.i 999 ;;
+ addl rN_neg = 0xFFBF - 63, rNJ_neg // -x biased, shifted n-1,j
+ nop.f 0
+ and rJ_neg = rJ_mask, rNJ_neg // bits of j for -x
}
-
-// M is still in r35. Calculate j. j is the signed extension of the six lsb of M. It
-// has a range of -32 thru 31.
-// r35 = M
-// r36 = j
-
-{ .mii
- nop.m 999
- nop.i 999 ;;
-(p0) and r36 = 0x3f, r35 ;;
-}
-
-// Calculate R
-// f13 = f44 - f12*f10 = ax - M*log2by64_hi
-// f14 = f13 - f8*f11 = R = (ax - M*log2by64_hi) - M*log2by64_lo
+;;
{ .mfi
- nop.m 999
-(p0) fnma.s1 sinh_FR_R_temp = sinh_FR_M, sinh_FR_log2by64_hi, sinh_FR_X
- nop.i 999
+ ld8 rJ = [rJ] // Table value
+ nop.f 0
+ shl rN = rN, 46 // 2^(n-1) bits in DP format
}
-
{ .mfi
-(p0) ldfe sinh_FR_A2 = [r34],16
- nop.f 999
- nop.i 999 ;;
+ shladd rJ_neg = rJ_neg, 3, rTblAddr // addr in 2^(j/64) table -x
+ nop.f 0
+ and rN_neg = rN_mask, rN_neg // biased, shifted n-1 for -x
}
+;;
{ .mfi
- nop.m 999
-(p0) fnma.s1 sinh_FR_R = sinh_FR_M, sinh_FR_log2by64_lo, sinh_FR_R_temp
- nop.i 999
+ ld8 rJ_neg = [rJ_neg] // Table value for -x
+ nop.f 0
+ shl rN_neg = rN_neg, 46 // 2^(n-1) bits in DP format for -x
}
-
-// Get the B coefficients
-// f15 = B_1
-// f32 = B_2
-// f33 = B_3
-
-{ .mmi
-(p0) ldfe sinh_FR_A3 = [r34],16 ;;
-(p0) ldfe sinh_FR_B1 = [r34],16
- nop.i 999 ;;
-}
-
-{ .mmi
-(p0) ldfe sinh_FR_B2 = [r34],16 ;;
-(p0) ldfe sinh_FR_B3 = [r34],16
- nop.i 999 ;;
-}
-
-{ .mii
- nop.m 999
-(p0) shl r34 = r36, 0x2 ;;
-(p0) sxt1 r37 = r34 ;;
-}
-
-// ******************************************************
-// STEP 2 (TBL and EXP)
-// ******************************************************
-// Calculate Rsquared and Rcubed in preparation for p_even and p_odd
-// f12 = R*R*R
-// f13 = R*R
-// f14 = R <== from above
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_Rsq = sinh_FR_R, sinh_FR_R, f0
-(p0) shr r36 = r37, 0x2 ;;
-}
-
-// r34 = M-j = r35 - r36
-// r35 = N = (M-j)/64
-
-{ .mii
-(p0) sub r34 = r35, r36
- nop.i 999 ;;
-(p0) shr r35 = r34, 0x6 ;;
-}
-
-{ .mii
-(p0) sub r40 = r38, r35
-(p0) adds r37 = 0x1, r35
-(p0) add r39 = r38, r35 ;;
-}
-
-// Get the address of the J table, add the offset,
-// addresses are sinh_AD_mJ and sinh_AD_J, get the T value
-// f32 = T(j)_hi
-// f33 = T(j)_lo
-// f34 = T(-j)_hi
-// f35 = T(-j)_lo
-
-{ .mmi
-(p0) sub r34 = r35, r32
-(p0) addl r37 = @ltoff(double_sinh_j_table), gp
- nop.i 999
+ or rN = rN, rJ // bits of 2^n * 2^(j/64) in DP format
+ nop.f 0
+ nop.i 0
}
;;
-{ .mmi
- ld8 r37 = [r37]
- nop.m 999
- nop.i 999
+{ .mmf
+ setf.d fT = rN // 2^(n-1) * 2^(j/64)
+ or rN_neg = rN_neg, rJ_neg // -x bits of 2^n * 2^(j/64) in DP
+ fma.s1 fRSqr = fR, fR, f0 // R^2
}
;;
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_Rcub = sinh_FR_Rsq, sinh_FR_R, f0
- nop.i 999
-}
-
-// ******************************************************
-// STEP 3 Now decide if we need to branch to EXP
-// ******************************************************
-// Put 32 in f9; p6 true if x < 32
-// Go to EXP if |x| >= 32
-
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x0000000000010004 ;;
-}
-
-// Calculate p_even
-// f34 = B_2 + Rsq *B_3
-// f35 = B_1 + Rsq*f34 = B_1 + Rsq * (B_2 + Rsq *B_3)
-// f36 = p_even = Rsq * f35 = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_peven_temp1 = sinh_FR_Rsq, sinh_FR_B3, sinh_FR_B2
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_peven_temp2 = sinh_FR_Rsq, sinh_FR_peven_temp1, sinh_FR_B1
- nop.i 999
-}
-
-// Calculate p_odd
-// f34 = A_2 + Rsq *A_3
-// f35 = A_1 + Rsq * (A_2 + Rsq *A_3)
-// f37 = podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_podd_temp1 = sinh_FR_Rsq, sinh_FR_A3, sinh_FR_A2
- nop.i 999 ;;
-}
-
{ .mfi
-(p0) setf.exp sinh_FR_N_temp1 = r39
- nop.f 999
- nop.i 999 ;;
+ setf.d fT_neg = rN_neg // 2^(n-1) * 2^(j/64) for -x
+ fma.s1 fP = fA3, fR, fA2 // A3*R + A2
+ nop.i 0
}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_peven = sinh_FR_Rsq, sinh_FR_peven_temp2, f0
- nop.i 999
-}
-
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_podd_temp2 = sinh_FR_Rsq, sinh_FR_podd_temp1, sinh_FR_A1
- nop.i 999 ;;
+ nop.m 0
+ fnma.s1 fP_neg = fA3, fR, fA2 // A3*R + A2 for -x
+ nop.i 0
}
+;;
{ .mfi
-(p0) setf.exp f9 = r32
- nop.f 999
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fP = fP, fRSqr, fR // P = (A3*R + A2)*R^2 + R
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_podd = sinh_FR_podd_temp2, sinh_FR_Rcub, sinh_FR_R
- nop.i 999
-}
-
-// sinh_GR_mj contains the table offset for -j
-// sinh_GR_j contains the table offset for +j
-// p6 is true when j <= 0
-
-{ .mlx
-(p0) setf.exp sinh_FR_N_temp2 = r40
-(p0) movl r40 = 0x0000000000000020 ;;
+ nop.m 0
+ fms.s1 fP_neg = fP_neg, fRSqr, fR // P = (A3*R + A2)*R^2 + R, -x
+ nop.i 0
}
+;;
{ .mfi
-(p0) sub sinh_GR_mJ = r40, r36
-(p0) fmerge.se sinh_FR_spos = sinh_FR_N_temp1, f1
-(p0) adds sinh_GR_J = 0x20, r36 ;;
-}
-
-{ .mii
- nop.m 999
-(p0) shl sinh_GR_mJ = sinh_GR_mJ, 5 ;;
-(p0) add sinh_AD_mJ = r37, sinh_GR_mJ ;;
-}
-
-{ .mmi
- nop.m 999
-(p0) ldfe sinh_FR_Tmjhi = [sinh_AD_mJ],16
-(p0) shl sinh_GR_J = sinh_GR_J, 5 ;;
+ nop.m 0
+ fmpy.s0 fTmp = fLn2Div64, fLn2Div64 // Force inexact
+ nop.i 0
}
+;;
{ .mfi
-(p0) ldfs sinh_FR_Tmjlo = [sinh_AD_mJ],16
-(p0) fcmp.lt.unc.s1 p0,p7 = sinh_FR_X,f9
-(p0) add sinh_AD_J = r37, sinh_GR_J ;;
+ nop.m 0
+ fma.s1 fExp = fP, fT, fT // exp(x)/2
+ nop.i 0
}
-
-{ .mmi
-(p0) ldfe sinh_FR_Tjhi = [sinh_AD_J],16 ;;
-(p0) ldfs sinh_FR_Tjlo = [sinh_AD_J],16
- nop.i 999 ;;
+{ .mfb
+ nop.m 0
+ fma.s1 fExp_neg = fP_neg, fT_neg, fT_neg // exp(-x)/2
+ // branch out if possible overflow result
+(p13) br.cond.spnt SINH_POSSIBLE_OVERFLOW
}
+;;
{ .mfb
- nop.m 999
-(p0) fmerge.se sinh_FR_sneg = sinh_FR_N_temp2, f1
-(p7) br.cond.spnt L(SINH_BY_EXP) ;;
+ nop.m 0
+ // final result in the absence of overflow
+ fms.s.s0 f8 = fExp, f1, fExp_neg // result = (exp(x)-exp(-x))/2
+ // exit here in the absence of overflow
+ br.ret.sptk b0 // Exit main path, 0.25 <= |x| < 89.41598
}
+;;
+// Here if 0 < |x| < 0.25. Evaluate 9th order polynomial.
+SINH_SMALL:
{ .mfi
- nop.m 999
- nop.f 999
- nop.i 999 ;;
+ add rAd1 = 0x200, rTblAddr
+ fcmp.lt.s1 p7, p8 = fNormX, f0 // Test sign of x
+ cmp.gt p6, p0 = -60, rExp_x // Test |x| < 2^(-60)
}
-
-// ******************************************************
-// If NOT branch to EXP
-// ******************************************************
-// Calculate S_hi and S_lo
-// sinh_FR_S_hi_temp = sinh_FR_sneg * sinh_FR_Tmjhi
-// sinh_FR_S_hi = sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi_temp
-// sinh_FR_S_hi = sinh_FR_spos * sinh_FR_Tjhi - (sinh_FR_sneg * sinh_FR_Tmjlo)
-
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_S_hi_temp = sinh_FR_sneg, sinh_FR_Tmjhi, f0
- nop.i 999 ;;
+ add rAd2 = 0x210, rTblAddr
+ nop.f 0
+ nop.i 0
}
+;;
-{ .mfi
- nop.m 999
-(p0) fms.s1 sinh_FR_S_hi = sinh_FR_spos, sinh_FR_Tjhi, sinh_FR_S_hi_temp
- nop.i 999
+{ .mmb
+ ldfpd fA4, fA3 = [rAd1]
+ ldfpd fA2, fA1 = [rAd2]
+(p6) br.cond.spnt SINH_VERY_SMALL // Branch if |x| < 2^(-60)
}
-
-// Calculate C_hi
-// sinh_FR_C_hi_temp1 = sinh_FR_sneg * sinh_FR_Tmjhi
-// sinh_FR_C_hi = sinh_FR_spos * sinh_FR_Tjhi + sinh_FR_C_hi_temp1
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_C_hi_temp1 = sinh_FR_sneg, sinh_FR_Tmjhi, f0
- nop.i 999 ;;
-}
-
-// sinh_FR_S_lo_temp1 = sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi
-// sinh_FR_S_lo_temp2 = -sinh_FR_sneg * sinh_FR_Tmjlo + (sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi)
-// sinh_FR_S_lo_temp2 = -sinh_FR_sneg * sinh_FR_Tmjlo + (sinh_FR_S_lo_temp1 )
+;;
{ .mfi
- nop.m 999
-(p0) fms.s1 sinh_FR_S_lo_temp1 = sinh_FR_spos, sinh_FR_Tjhi, sinh_FR_S_hi
- nop.i 999
+ nop.m 0
+ fma.s1 fX3 = fXsq, fNormX, f0
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_C_hi = sinh_FR_spos, sinh_FR_Tjhi, sinh_FR_C_hi_temp1
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fX4 = fXsq, fXsq, f0
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fnma.s1 sinh_FR_S_lo_temp2 = sinh_FR_sneg, sinh_FR_Tmjhi, sinh_FR_S_lo_temp1
- nop.i 999
+ nop.m 0
+ fma.s1 fA43 = fXsq, fA4, fA3
+ nop.i 0
}
-
-// sinh_FR_S_lo_temp1 = sinh_FR_sneg * sinh_FR_Tmjlo
-// sinh_FR_S_lo_temp3 = sinh_FR_spos * sinh_FR_Tjlo - sinh_FR_S_lo_temp1
-// sinh_FR_S_lo_temp3 = sinh_FR_spos * sinh_FR_Tjlo -(sinh_FR_sneg * sinh_FR_Tmjlo)
-// sinh_FR_S_lo = sinh_FR_S_lo_temp3 + sinh_FR_S_lo_temp2
-
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_S_lo_temp1 = sinh_FR_sneg, sinh_FR_Tmjlo, f0
- nop.i 999 ;;
-}
-
-/////////// BUG FIX fma to fms -TK
-{ .mfi
- nop.m 999
-(p0) fms.s1 sinh_FR_S_lo_temp3 = sinh_FR_spos, sinh_FR_Tjlo, sinh_FR_S_lo_temp1
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fA21 = fXsq, fA2, fA1
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_S_lo = sinh_FR_S_lo_temp3, f1, sinh_FR_S_lo_temp2
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fA4321 = fX4, fA43, fA21
+ nop.i 0
}
+;;
-// Y_hi = S_hi
-// Y_lo = C_hi*p_odd + (S_hi*p_even + S_lo)
-// sinh_FR_Y_lo_temp = sinh_FR_S_hi * sinh_FR_peven + sinh_FR_S_lo
-// sinh_FR_Y_lo = sinh_FR_C_hi * sinh_FR_podd + sinh_FR_Y_lo_temp
-
+// Dummy multiply to generate inexact
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_Y_lo_temp = sinh_FR_S_hi, sinh_FR_peven, sinh_FR_S_lo
- nop.i 999 ;;
+ nop.m 0
+ fmpy.s0 fTmp = fA4, fA4
+ nop.i 0
}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_Y_lo = sinh_FR_C_hi, sinh_FR_podd, sinh_FR_Y_lo_temp
- nop.i 999 ;;
+{ .mfb
+ nop.m 0
+ fma.s.s0 f8 = fA4321, fX3, fNormX
+ br.ret.sptk b0 // Exit if 2^-60 < |x| < 0.25
}
+;;
-// sinh_FR_SINH = Y_hi + Y_lo
-// f8 = answer = sinh_FR_SGNX * sinh_FR_SINH
-
-// Dummy multiply to generate inexact
-{ .mfi
- nop.m 999
-(p0) fmpy.s0 sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
- nop.i 999
-}
+SINH_VERY_SMALL:
+// Here if 0 < |x| < 2^-60
+// Compute result by x + sgn(x)*x^2 to get properly rounded result
+.pred.rel "mutex",p7,p8
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_SINH = sinh_FR_S_hi, f1, sinh_FR_Y_lo
- nop.i 999 ;;
+ nop.m 0
+(p7) fnma.s.s0 f8 = fNormX, fNormX, fNormX // If x<0 result ~ x-x^2
+ nop.i 0
}
-
{ .mfb
- nop.m 999
-(p0) fma.s.s0 f8 = sinh_FR_SGNX, sinh_FR_SINH,f0
-(p0) br.ret.sptk b0 ;;
+ nop.m 0
+(p8) fma.s.s0 f8 = fNormX, fNormX, fNormX // If x>0 result ~ x+x^2
+ br.ret.sptk b0 // Exit if |x| < 2^-60
}
+;;
+SINH_POSSIBLE_OVERFLOW:
-L(SINH_BY_EXP):
+// Here if fMAX_SGL_NORM_ARG < x < fMIN_SGL_OFLOW_ARG
+// This cannot happen if input is a single, only if input higher precision.
+// Overflow is a possibility, not a certainty.
-// When p7 is true, we know that an overflow is not going to happen
-// When p7 is false, we must check for possible overflow
-// p7 is the over_SAFE flag
-// Y_hi = Tjhi
-// Y_lo = Tjhi * (p_odd + p_even) +Tjlo
-// Scale = sign * 2^(N-1)
-// sinh_FR_Y_lo = sinh_FR_Tjhi * (sinh_FR_peven + sinh_FR_podd)
-// sinh_FR_Y_lo = sinh_FR_Tjhi * (sinh_FR_Y_lo_temp )
+// Recompute result using status field 2 with user's rounding mode,
+// and wre set. If result is larger than largest single, then we have
+// overflow
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_Y_lo_temp = sinh_FR_peven, f1, sinh_FR_podd
- nop.i 999
-}
-
-// Now we are in EXP. This is the only path where an overflow is possible
-// but not for certain. So this is the only path where over_SAFE has any use.
-// r34 still has N-1
-// There is a danger of double-extended overflow if N-1 > 16382 = 0x3ffe
-// There is a danger of double overflow if N-1 > 0x3fe = 1022
-// There is a danger of single overflow if N-1 > 0x7e = 126
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x000000000000007e ;;
-}
-
-{ .mfi
-(p0) cmp.gt.unc p0,p7 = r34, r32
-(p0) fmerge.s sinh_FR_SCALE = sinh_FR_SGNX, sinh_FR_spos
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_Y_lo = sinh_FR_Tjhi, sinh_FR_Y_lo_temp, sinh_FR_Tjlo
- nop.i 999 ;;
+ mov rGt_ln = 0x1007f // Exponent for largest single + 1 ulp
+ fsetc.s2 0x7F,0x42 // Get user's round mode, set wre
+ nop.i 0
}
+;;
-// f8 = answer = scale * (Y_hi + Y_lo)
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_SINH_temp = sinh_FR_Y_lo, f1, sinh_FR_Tjhi
- nop.i 999 ;;
+ setf.exp fGt_pln = rGt_ln // Create largest single + 1 ulp
+ fma.s.s2 fWre_urm_f8 = fP, fT, fT // Result with wre set
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s.s0 f44 = sinh_FR_SCALE, sinh_FR_SINH_temp, f0
- nop.i 999 ;;
+ nop.m 0
+ fsetc.s2 0x7F,0x40 // Turn off wre in sf2
+ nop.i 0
}
+;;
-// Dummy multiply to generate inexact
{ .mfi
- nop.m 999
-(p7) fmpy.s0 sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
- nop.i 999 ;;
+ nop.m 0
+ fcmp.ge.s1 p6, p0 = fWre_urm_f8, fGt_pln // Test for overflow
+ nop.i 0
}
+;;
-// If over_SAFE is set, return
{ .mfb
- nop.m 999
-(p7) fmerge.s f8 = f44,f44
-(p7) br.ret.sptk b0 ;;
+ nop.m 0
+ nop.f 0
+(p6) br.cond.spnt SINH_CERTAIN_OVERFLOW // Branch if overflow
}
+;;
-// Else see if we overflowed
-// S0 user supplied status
-// S2 user supplied status + WRE + TD (Overflows)
-// If WRE is set then an overflow will not occur in EXP.
-// The input value that would cause a register (WRE) value to overflow is about 2^15
-// and this input would go into the HUGE path.
-// Answer with WRE is in f43.
-
-{ .mfi
- nop.m 999
-(p0) fsetc.s2 0x7F,0x42
- nop.i 999;;
+{ .mfb
+ nop.m 0
+ fma.s.s0 f8 = fP, fT, fT
+ br.ret.sptk b0 // Exit if really no overflow
}
+;;
+// here if overflow
+SINH_CERTAIN_OVERFLOW:
{ .mfi
- nop.m 999
-(p0) fma.s.s2 f43 = sinh_FR_SCALE, sinh_FR_SINH_temp, f0
- nop.i 999 ;;
-}
-
-// 1007F => 1007F -FFFF = 80(true)
-// 80 + 7F = FF, which is 1 more that the exponent of the largest
-// double (FE). So 0 1007F 8000000000000000 is one ulp more than
-// largest single in register bias
-// Now set p8 if the answer with WRE is greater than or equal this value
-// Also set p9 if the answer with WRE is less than or equal to negative this value
-
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x0000000001007F ;;
+ addl r17ones_m1 = 0x1FFFE, r0
+ fcmp.lt.s1 p6, p7 = fNormX, f0 // Test for x < 0
+ nop.i 0
}
+;;
{ .mmf
- nop.m 999
-(p0) setf.exp f41 = r32
-(p0) fsetc.s2 0x7F,0x40 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fcmp.ge.unc.s1 p8, p0 = f43, f41
- nop.i 999
+ alloc r32 = ar.pfs, 0, 3, 4, 0 // get some registers
+ setf.exp fTmp = r17ones_m1
+ fmerge.s FR_X = f8,f8
}
+;;
{ .mfi
- nop.m 999
-(p0) fmerge.ns f42 = f41, f41
- nop.i 999 ;;
-}
-
-// The error tag for overflow is 128
-{ .mii
- nop.m 999
- nop.i 999 ;;
-(p8) mov r47 = 128 ;;
+ mov GR_Parameter_TAG = 128
+(p6) fnma.s.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and -INF result
+ nop.i 0
}
-
{ .mfb
- nop.m 999
-(p0) fcmp.le.unc.s1 p9, p0 = f43, f42
-(p8) br.cond.spnt L(SINH_ERROR_SUPPORT) ;;
-}
-
-{ .mii
- nop.m 999
- nop.i 999 ;;
-(p9) mov r47 = 128
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p9) br.cond.spnt L(SINH_ERROR_SUPPORT) ;;
-}
-
-// Dummy multiply to generate inexact
-{ .mfi
- nop.m 999
-(p0) fmpy.s0 sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
- nop.i 999 ;;
+ nop.m 0
+(p7) fma.s.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and +INF result
+ br.cond.sptk __libm_error_region
}
+;;
+// Here if x unorm
+SINH_UNORM:
{ .mfb
- nop.m 999
-(p0) fmerge.s f8 = f44,f44
-(p0) br.ret.sptk b0 ;;
-}
-
-L(SINH_HUGE):
-
-// for SINH_HUGE, put 24000 in exponent; take sign from input; add 1
-// SAFE: SAFE is always 0 for HUGE
-
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x0000000000015dbf ;;
-}
-
-{ .mfi
-(p0) setf.exp f9 = r32
- nop.f 999
- nop.i 999 ;;
+ getf.exp rSignexp_x = fNormX // Must recompute if x unorm
+ fcmp.eq.s0 p6, p0 = f8, f0 // Set D flag
+ br.cond.sptk SINH_COMMON // Return to main path
}
+;;
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_signed_hi_lo = sinh_FR_SGNX, f9, f1
- nop.i 999 ;;
-}
+GLOBAL_IEEE754_END(sinhf)
-{ .mfi
- nop.m 999
-(p0) fma.s.s0 f44 = sinh_FR_signed_hi_lo, f9, f0
-(p0) mov r47 = 128
-}
-.endp sinhf
-ASM_SIZE_DIRECTIVE(sinhf)
-#ifdef _LIBC
-ASM_SIZE_DIRECTIVE(__ieee754_sinhf)
-#endif
-
-// Stack operations when calling error support.
-// (1) (2) (3) (call) (4)
-// sp -> + psp -> + psp -> + sp -> +
-// | | | |
-// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
-// | | | |
-// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
-// | | | |
-// | | <- GR_X X1 ->| |
-// | | | |
-// sp-64 -> + sp -> + sp -> + +
-// save ar.pfs save b0 restore gp
-// save gp restore ar.pfs
-
-.proc __libm_error_region
-__libm_error_region:
-L(SINH_ERROR_SUPPORT):
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
-
-// (1)
{ .mfi
- add GR_Parameter_Y=-32,sp // Parameter 2 value
- nop.f 0
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
- add sp=-64,sp // Create new stack
- nop.f 0
- mov GR_SAVE_GP=gp // Save gp
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
};;
-
-
-// (2)
{ .mmi
- stfs [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
- add GR_Parameter_X = 16,sp // Parameter 1 address
+ stfs [GR_Parameter_Y] = FR_Y,16 // Store Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0 // Save b0
+ mov GR_SAVE_B0=b0 // Save b0
};;
-
.body
-// (3)
-{ .mib
- stfs [GR_Parameter_X] = f8 // STORE Parameter 1 on stack
- add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
- nop.b 0
+{ .mfi
+ stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ nop.f 0
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
}
{ .mib
- stfs [GR_Parameter_Y] = f44 // STORE Parameter 3 on stack
- add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support# // Call error handling function
+ stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
};;
+
{ .mmi
- nop.m 0
- nop.m 0
- add GR_Parameter_RESULT = 48,sp
+ add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
};;
-// (4)
{ .mmi
- ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
- add sp = 64,sp // Restore stack pointer
- mov b0 = GR_SAVE_B0 // Restore return address
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
- mov gp = GR_SAVE_GP // Restore gp
- mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
- br.ret.sptk b0 // Return
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region)
+
.type __libm_error_support#,@function
.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_sinhl.S b/sysdeps/ia64/fpu/e_sinhl.S
index b880b95b64..ccc996a8cc 100644
--- a/sysdeps/ia64/fpu/e_sinhl.S
+++ b/sysdeps/ia64/fpu/e_sinhl.S
@@ -1,10 +1,10 @@
.file "sinhl.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2002, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,17 +35,20 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00 Initial version
-// 4/04/00 Unwind support added
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 02/02/00 Initial version
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
// 10/12/00 Update to set denormal operand and underflow flags
-// 1/22/01 Fixed to set inexact flag for small args. Fixed incorrect
+// 01/22/01 Fixed to set inexact flag for small args. Fixed incorrect
// call to __libm_error_support for 710.476 < x < 11357.2166.
+// 05/02/01 Reworked to improve speed of all paths
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 12/04/02 Improved performance
//
// API
//==============================================================
@@ -56,1269 +59,1058 @@
// Registers used
//==============================================================
// general registers:
-// r32 -> r47
+// r14 -> r40
// predicate registers used:
-// p6 p7 p8 p9
+// p6 -> p11
// floating-point registers used:
-// f9 -> f15; f32 -> f45;
+// f9 -> f15; f32 -> f90;
// f8 has input, then output
//
// Overview of operation
//==============================================================
-// There are four paths
-// 1. |x| < 0.25 SINH_BY_POLY
-// 2. |x| < 32 SINH_BY_TBL
-// 3. |x| < 2^14 SINH_BY_EXP
-// 4. |x_ >= 2^14 SINH_HUGE
-//
-// For double extended we get infinity for x >= 400c b174 ddc0 31ae c0ea
-// >= 1.0110001.... x 2^13
-// >= 11357.2166
+// There are seven paths
+// 1. 0 < |x| < 0.25 SINH_BY_POLY
+// 2. 0.25 <=|x| < 32 SINH_BY_TBL
+// 3. 32 <= |x| < 11357.21655 SINH_BY_EXP (merged path with SINH_BY_TBL)
+// 4. |x| >= 11357.21655 SINH_HUGE
+// 5. x=0 Done with early exit
+// 6. x=inf,nan Done with early exit
+// 7. x=denormal SINH_DENORM
//
-// But for double we get infinity for x >= 408633ce8fb9f87e
-// >= 1.0110...x 2^9
-// >= +7.10476e+002
+// For double extended we get overflow for x >= 400c b174 ddc0 31ae c0ea
+// >= 11357.21655
//
-// And for single we get infinity for x >= 42b3a496
-// >= 1.0110... 2^6
-// >= 89.8215
//
-// SAFE: If there is danger of overflow set SAFE to 0
-// NOT implemented: if there is danger of underflow, set SAFE to 0
-// SAFE for all paths listed below
-//
-// 1. SINH_BY_POLY
+// 1. SINH_BY_POLY 0 < |x| < 0.25
// ===============
-// If |x| is less than the tiny threshold, then clear SAFE
-// For double, the tiny threshold is -1022 = -0x3fe => -3fe + ffff = fc01
-// register-biased, this is fc01
-// For single, the tiny threshold is -126 = -7e => -7e + ffff = ff81
-// If |x| < tiny threshold, set SAFE = 0
+// Evaluate sinh(x) by a 13th order polynomial
+// Care is take for the order of multiplication; and P_1 is not exactly 1/3!,
+// P_2 is not exactly 1/5!, etc.
+// sinh(x) = sign * (series(e^x) - series(e^-x))/2
+// = sign * (ax + ax^3/3! + ax^5/5! + ax^7/7! + ax^9/9! + ax^11/11!
+// + ax^13/13!)
+// = sign * (ax + ax * ( ax^2 * (1/3! + ax^4 * (1/7! + ax^4*1/11!)) )
+// + ax * ( ax^4 * (1/5! + ax^4 * (1/9! + ax^4*1/13!)) ))
+// = sign * (ax + ax*p_odd + (ax*p_even))
+// = sign * (ax + Y_lo)
+// sinh(x) = sign * (Y_hi + Y_lo)
+// Note that ax = |x|
//
-// 2. SINH_BY_TBL
+// 2. SINH_BY_TBL 0.25 <= |x| < 32.0
// =============
-// SAFE: SAFE is always 1 for TBL;
+// sinh(x) = sinh(B+R)
+// = sinh(B)cosh(R) + cosh(B)sinh(R)
+//
+// ax = |x| = M*log2/64 + R
+// B = M*log2/64
+// M = 64*N + j
+// We will calculate M and get N as (M-j)/64
+// The division is a shift.
+// exp(B) = exp(N*log2 + j*log2/64)
+// = 2^N * 2^(j*log2/64)
+// sinh(B) = 1/2(e^B -e^-B)
+// = 1/2(2^N * 2^(j*log2/64) - 2^-N * 2^(-j*log2/64))
+// sinh(B) = (2^(N-1) * 2^(j*log2/64) - 2^(-N-1) * 2^(-j*log2/64))
+// cosh(B) = (2^(N-1) * 2^(j*log2/64) + 2^(-N-1) * 2^(-j*log2/64))
+// 2^(j*log2/64) is stored as Tjhi + Tjlo , j= -32,....,32
+// Tjhi is double-extended (80-bit) and Tjlo is single(32-bit)
+//
+// R = ax - M*log2/64
+// R = ax - M*log2_by_64_hi - M*log2_by_64_lo
+// exp(R) = 1 + R +R^2(1/2! + R(1/3! + R(1/4! + ... + R(1/n!)...)
+// = 1 + p_odd + p_even
+// where the p_even uses the A coefficients and the p_even uses
+// the B coefficients
+//
+// So sinh(R) = 1 + p_odd + p_even -(1 -p_odd -p_even)/2 = p_odd
+// cosh(R) = 1 + p_even
+// sinh(B) = S_hi + S_lo
+// cosh(B) = C_hi
+// sinh(x) = sinh(B)cosh(R) + cosh(B)sinh(R)
//
-// 3. SINH_BY_EXP
+// 3. SINH_BY_EXP 32.0 <= |x| < 11357.21655 ( 400c b174 ddc0 31ae c0ea )
// ==============
-// There is a danger of double-extended overflow if N-1 > 16382 = 0x3ffe
-// r34 has N-1; 16382 is in register biased form, 0x13ffd
-// There is danger of double overflow if N-1 > 0x3fe
-// in register biased form, 0x103fd
-// Analagously, there is danger of single overflow if N-1 > 0x7e
-// in register biased form, 0x1007d
-// SAFE: If there is danger of overflow set SAFE to 0
+// Can approximate result by exp(x)/2 in this region.
+// Y_hi = Tjhi
+// Y_lo = Tjhi * (p_odd + p_even) + Tjlo
+// sinh(x) = Y_hi + Y_lo
//
-// 4. SINH_HUGE
+// 4. SINH_HUGE |x| >= 11357.21655 ( 400c b174 ddc0 31ae c0ea )
// ============
-// SAFE: SAFE is always 0 for HUGE
+// Set error tag and call error support
+//
//
-
-#include "libm_support.h"
-
// Assembly macros
//==============================================================
-sinh_FR_X = f44
-sinh_FR_X2 = f9
-sinh_FR_X4 = f10
-sinh_FR_SGNX = f40
-sinh_FR_all_ones = f45
-sinh_FR_tmp = f42
-
-sinh_FR_Inv_log2by64 = f9
-sinh_FR_log2by64_lo = f11
-sinh_FR_log2by64_hi = f10
-
-sinh_FR_A1 = f9
-sinh_FR_A2 = f10
-sinh_FR_A3 = f11
-
-sinh_FR_Rcub = f12
-sinh_FR_M_temp = f13
-sinh_FR_R_temp = f13
-sinh_FR_Rsq = f13
-sinh_FR_R = f14
-
-sinh_FR_M = f38
-
-sinh_FR_B1 = f15
-sinh_FR_B2 = f32
-sinh_FR_B3 = f33
+r_ad5 = r14
+r_rshf_2to57 = r15
+r_exp_denorm = r15
+r_ad_mJ_lo = r15
+r_ad_J_lo = r16
+r_2Nm1 = r17
+r_2mNm1 = r18
+r_exp_x = r18
+r_ad_J_hi = r19
+r_ad2o = r19
+r_ad_mJ_hi = r20
+r_mj = r21
+r_ad2e = r22
+r_ad3 = r23
+r_ad1 = r24
+r_Mmj = r24
+r_rshf = r25
+r_M = r25
+r_N = r25
+r_jshf = r26
+r_exp_2tom57 = r26
+r_j = r26
+r_exp_mask = r27
+r_signexp_x = r28
+r_signexp_sgnx_0_5 = r28
+r_exp_0_25 = r29
+r_sig_inv_ln2 = r30
+r_exp_32 = r30
+r_exp_huge = r30
+r_ad4 = r31
+
+GR_SAVE_PFS = r34
+GR_SAVE_B0 = r35
+GR_SAVE_GP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+
+f_ABS_X = f9
+f_X2 = f10
+f_X4 = f11
+f_tmp = f14
+f_RSHF = f15
+
+f_Inv_log2by64 = f32
+f_log2by64_lo = f33
+f_log2by64_hi = f34
+f_A1 = f35
+
+f_A2 = f36
+f_A3 = f37
+f_Rcub = f38
+f_M_temp = f39
+f_R_temp = f40
+
+f_Rsq = f41
+f_R = f42
+f_M = f43
+f_B1 = f44
+f_B2 = f45
+
+f_B3 = f46
+f_peven_temp1 = f47
+f_peven_temp2 = f48
+f_peven = f49
+f_podd_temp1 = f50
+
+f_podd_temp2 = f51
+f_podd = f52
+f_poly65 = f53
+f_poly6543 = f53
+f_poly6to1 = f53
+f_poly43 = f54
+f_poly21 = f55
+
+f_X3 = f56
+f_INV_LN2_2TO63 = f57
+f_RSHF_2TO57 = f58
+f_2TOM57 = f59
+f_smlst_oflow_input = f60
+
+f_pre_result = f61
+f_huge = f62
+f_spos = f63
+f_sneg = f64
+f_Tjhi = f65
+
+f_Tjlo = f66
+f_Tmjhi = f67
+f_Tmjlo = f68
+f_S_hi = f69
+f_SC_hi_temp = f70
+
+f_S_lo_temp1 = f71
+f_S_lo_temp2 = f72
+f_S_lo_temp3 = f73
+f_S_lo_temp4 = f73
+f_S_lo = f74
+f_C_hi = f75
+
+f_Y_hi = f77
+f_Y_lo_temp = f78
+f_Y_lo = f79
+f_NORM_X = f80
+
+f_P1 = f81
+f_P2 = f82
+f_P3 = f83
+f_P4 = f84
+f_P5 = f85
+
+f_P6 = f86
+f_Tjhi_spos = f87
+f_Tjlo_spos = f88
+f_huge = f89
+f_signed_hi_lo = f90
-sinh_FR_peven_temp1 = f34
-sinh_FR_peven_temp2 = f35
-sinh_FR_peven = f36
-
-sinh_FR_podd_temp1 = f34
-sinh_FR_podd_temp2 = f35
-sinh_FR_podd = f37
-
-sinh_FR_poly_podd_temp1 = f11
-sinh_FR_poly_podd_temp2 = f13
-sinh_FR_poly_peven_temp1 = f11
-sinh_FR_poly_peven_temp2 = f13
-
-sinh_FR_J_temp = f9
-sinh_FR_J = f10
-
-sinh_FR_Mmj = f39
-
-sinh_FR_N_temp1 = f11
-sinh_FR_N_temp2 = f12
-sinh_FR_N = f13
-
-sinh_FR_spos = f14
-sinh_FR_sneg = f15
-
-sinh_FR_Tjhi = f32
-sinh_FR_Tjlo = f33
-sinh_FR_Tmjhi = f34
-sinh_FR_Tmjlo = f35
-
-sinh_GR_mJ = r35
-sinh_GR_J = r36
-
-sinh_AD_mJ = r38
-sinh_AD_J = r39
-sinh_GR_all_ones = r40
-
-sinh_FR_S_hi = f9
-sinh_FR_S_hi_temp = f10
-sinh_FR_S_lo_temp1 = f11
-sinh_FR_S_lo_temp2 = f12
-sinh_FR_S_lo_temp3 = f13
-
-sinh_FR_S_lo = f38
-sinh_FR_C_hi = f39
-
-sinh_FR_C_hi_temp1 = f10
-sinh_FR_Y_hi = f11
-sinh_FR_Y_lo_temp = f12
-sinh_FR_Y_lo = f13
-sinh_FR_SINH = f9
-
-sinh_FR_P1 = f14
-sinh_FR_P2 = f15
-sinh_FR_P3 = f32
-sinh_FR_P4 = f33
-sinh_FR_P5 = f34
-sinh_FR_P6 = f35
-
-sinh_FR_TINY_THRESH = f9
-
-sinh_FR_SINH_temp = f10
-sinh_FR_SCALE = f11
-
-sinh_FR_signed_hi_lo = f10
-
-
-GR_SAVE_PFS = r41
-GR_SAVE_B0 = r42
-GR_SAVE_GP = r43
-
-GR_Parameter_X = r44
-GR_Parameter_Y = r45
-GR_Parameter_RESULT = r46
// Data tables
//==============================================================
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
+// DO NOT CHANGE ORDER OF THESE TABLES
+RODATA
.align 16
-double_sinh_arg_reduction:
-ASM_TYPE_DIRECTIVE(double_sinh_arg_reduction,@object)
- data8 0xB8AA3B295C17F0BC, 0x00004005
- data8 0xB17217F7D1000000, 0x00003FF8
- data8 0xCF79ABC9E3B39804, 0x00003FD0
-ASM_SIZE_DIRECTIVE(double_sinh_arg_reduction)
-
-double_sinh_p_table:
-ASM_TYPE_DIRECTIVE(double_sinh_p_table,@object)
- data8 0xAAAAAAAAAAAAAAAB, 0x00003FFC
- data8 0x8888888888888412, 0x00003FF8
- data8 0xD00D00D00D4D39F2, 0x00003FF2
- data8 0xB8EF1D28926D8891, 0x00003FEC
- data8 0xD732377688025BE9, 0x00003FE5
- data8 0xB08AF9AE78C1239F, 0x00003FDE
-ASM_SIZE_DIRECTIVE(double_sinh_p_table)
-
-double_sinh_ab_table:
-ASM_TYPE_DIRECTIVE(double_sinh_ab_table,@object)
- data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC
- data8 0x88888888884ECDD5, 0x00003FF8
- data8 0xD00D0C6DCC26A86B, 0x00003FF2
- data8 0x8000000000000002, 0x00003FFE
- data8 0xAAAAAAAAAA402C77, 0x00003FFA
- data8 0xB60B6CC96BDB144D, 0x00003FF5
-ASM_SIZE_DIRECTIVE(double_sinh_ab_table)
-
-double_sinh_j_table:
-ASM_TYPE_DIRECTIVE(double_sinh_j_table,@object)
- data8 0xB504F333F9DE6484, 0x00003FFE, 0x1EB2FB13, 0x00000000
- data8 0xB6FD91E328D17791, 0x00003FFE, 0x1CE2CBE2, 0x00000000
- data8 0xB8FBAF4762FB9EE9, 0x00003FFE, 0x1DDC3CBC, 0x00000000
- data8 0xBAFF5AB2133E45FB, 0x00003FFE, 0x1EE9AA34, 0x00000000
- data8 0xBD08A39F580C36BF, 0x00003FFE, 0x9EAEFDC1, 0x00000000
- data8 0xBF1799B67A731083, 0x00003FFE, 0x9DBF517B, 0x00000000
- data8 0xC12C4CCA66709456, 0x00003FFE, 0x1EF88AFB, 0x00000000
- data8 0xC346CCDA24976407, 0x00003FFE, 0x1E03B216, 0x00000000
- data8 0xC5672A115506DADD, 0x00003FFE, 0x1E78AB43, 0x00000000
- data8 0xC78D74C8ABB9B15D, 0x00003FFE, 0x9E7B1747, 0x00000000
- data8 0xC9B9BD866E2F27A3, 0x00003FFE, 0x9EFE3C0E, 0x00000000
- data8 0xCBEC14FEF2727C5D, 0x00003FFE, 0x9D36F837, 0x00000000
- data8 0xCE248C151F8480E4, 0x00003FFE, 0x9DEE53E4, 0x00000000
- data8 0xD06333DAEF2B2595, 0x00003FFE, 0x9E24AE8E, 0x00000000
- data8 0xD2A81D91F12AE45A, 0x00003FFE, 0x1D912473, 0x00000000
- data8 0xD4F35AABCFEDFA1F, 0x00003FFE, 0x1EB243BE, 0x00000000
- data8 0xD744FCCAD69D6AF4, 0x00003FFE, 0x1E669A2F, 0x00000000
- data8 0xD99D15C278AFD7B6, 0x00003FFE, 0x9BBC610A, 0x00000000
- data8 0xDBFBB797DAF23755, 0x00003FFE, 0x1E761035, 0x00000000
- data8 0xDE60F4825E0E9124, 0x00003FFE, 0x9E0BE175, 0x00000000
- data8 0xE0CCDEEC2A94E111, 0x00003FFE, 0x1CCB12A1, 0x00000000
- data8 0xE33F8972BE8A5A51, 0x00003FFE, 0x1D1BFE90, 0x00000000
- data8 0xE5B906E77C8348A8, 0x00003FFE, 0x1DF2F47A, 0x00000000
- data8 0xE8396A503C4BDC68, 0x00003FFE, 0x1EF22F22, 0x00000000
- data8 0xEAC0C6E7DD24392F, 0x00003FFE, 0x9E3F4A29, 0x00000000
- data8 0xED4F301ED9942B84, 0x00003FFE, 0x1EC01A5B, 0x00000000
- data8 0xEFE4B99BDCDAF5CB, 0x00003FFE, 0x1E8CAC3A, 0x00000000
- data8 0xF281773C59FFB13A, 0x00003FFE, 0x9DBB3FAB, 0x00000000
- data8 0xF5257D152486CC2C, 0x00003FFE, 0x1EF73A19, 0x00000000
- data8 0xF7D0DF730AD13BB9, 0x00003FFE, 0x9BB795B5, 0x00000000
- data8 0xFA83B2DB722A033A, 0x00003FFE, 0x1EF84B76, 0x00000000
- data8 0xFD3E0C0CF486C175, 0x00003FFE, 0x9EF5818B, 0x00000000
- data8 0x8000000000000000, 0x00003FFF, 0x00000000, 0x00000000
- data8 0x8164D1F3BC030773, 0x00003FFF, 0x1F77CACA, 0x00000000
- data8 0x82CD8698AC2BA1D7, 0x00003FFF, 0x1EF8A91D, 0x00000000
- data8 0x843A28C3ACDE4046, 0x00003FFF, 0x1E57C976, 0x00000000
- data8 0x85AAC367CC487B15, 0x00003FFF, 0x9EE8DA92, 0x00000000
- data8 0x871F61969E8D1010, 0x00003FFF, 0x1EE85C9F, 0x00000000
- data8 0x88980E8092DA8527, 0x00003FFF, 0x1F3BF1AF, 0x00000000
- data8 0x8A14D575496EFD9A, 0x00003FFF, 0x1D80CA1E, 0x00000000
- data8 0x8B95C1E3EA8BD6E7, 0x00003FFF, 0x9D0373AF, 0x00000000
- data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF, 0x9F167097, 0x00000000
- data8 0x8EA4398B45CD53C0, 0x00003FFF, 0x1EB70051, 0x00000000
- data8 0x9031DC431466B1DC, 0x00003FFF, 0x1F6EB029, 0x00000000
- data8 0x91C3D373AB11C336, 0x00003FFF, 0x1DFD6D8E, 0x00000000
- data8 0x935A2B2F13E6E92C, 0x00003FFF, 0x9EB319B0, 0x00000000
- data8 0x94F4EFA8FEF70961, 0x00003FFF, 0x1EBA2BEB, 0x00000000
- data8 0x96942D3720185A00, 0x00003FFF, 0x1F11D537, 0x00000000
- data8 0x9837F0518DB8A96F, 0x00003FFF, 0x1F0D5A46, 0x00000000
- data8 0x99E0459320B7FA65, 0x00003FFF, 0x9E5E7BCA, 0x00000000
- data8 0x9B8D39B9D54E5539, 0x00003FFF, 0x9F3AAFD1, 0x00000000
- data8 0x9D3ED9A72CFFB751, 0x00003FFF, 0x9E86DACC, 0x00000000
- data8 0x9EF5326091A111AE, 0x00003FFF, 0x9F3EDDC2, 0x00000000
- data8 0xA0B0510FB9714FC2, 0x00003FFF, 0x1E496E3D, 0x00000000
- data8 0xA27043030C496819, 0x00003FFF, 0x9F490BF6, 0x00000000
- data8 0xA43515AE09E6809E, 0x00003FFF, 0x1DD1DB48, 0x00000000
- data8 0xA5FED6A9B15138EA, 0x00003FFF, 0x1E65EBFB, 0x00000000
- data8 0xA7CD93B4E965356A, 0x00003FFF, 0x9F427496, 0x00000000
- data8 0xA9A15AB4EA7C0EF8, 0x00003FFF, 0x1F283C4A, 0x00000000
- data8 0xAB7A39B5A93ED337, 0x00003FFF, 0x1F4B0047, 0x00000000
- data8 0xAD583EEA42A14AC6, 0x00003FFF, 0x1F130152, 0x00000000
- data8 0xAF3B78AD690A4375, 0x00003FFF, 0x9E8367C0, 0x00000000
- data8 0xB123F581D2AC2590, 0x00003FFF, 0x9F705F90, 0x00000000
- data8 0xB311C412A9112489, 0x00003FFF, 0x1EFB3C53, 0x00000000
- data8 0xB504F333F9DE6484, 0x00003FFF, 0x1F32FB13, 0x00000000
-ASM_SIZE_DIRECTIVE(double_sinh_j_table)
-
-.align 32
-.global sinhl#
-
-.section .text
-.proc sinhl#
-.align 32
-
-sinhl:
-#ifdef _LIBC
-.global __ieee754_sinhl
-.type __ieee754_sinhl,@function
-__ieee754_sinhl:
-#endif
-
-// X infinity or NAN?
-// Take invalid fault if enabled
-
+LOCAL_OBJECT_START(sinh_arg_reduction)
+// data8 0xB8AA3B295C17F0BC, 0x00004005 // 64/log2 -- signif loaded with setf
+ data8 0xB17217F7D1000000, 0x00003FF8 // log2/64 high part
+ data8 0xCF79ABC9E3B39804, 0x00003FD0 // log2/64 low part
+ data8 0xb174ddc031aec0ea, 0x0000400c // Smallest x to overflow (11357.21655)
+LOCAL_OBJECT_END(sinh_arg_reduction)
+
+LOCAL_OBJECT_START(sinh_p_table)
+ data8 0xB08AF9AE78C1239F, 0x00003FDE // P6
+ data8 0xB8EF1D28926D8891, 0x00003FEC // P4
+ data8 0x8888888888888412, 0x00003FF8 // P2
+ data8 0xD732377688025BE9, 0x00003FE5 // P5
+ data8 0xD00D00D00D4D39F2, 0x00003FF2 // P3
+ data8 0xAAAAAAAAAAAAAAAB, 0x00003FFC // P1
+LOCAL_OBJECT_END(sinh_p_table)
+
+LOCAL_OBJECT_START(sinh_ab_table)
+ data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC // A1
+ data8 0x88888888884ECDD5, 0x00003FF8 // A2
+ data8 0xD00D0C6DCC26A86B, 0x00003FF2 // A3
+ data8 0x8000000000000002, 0x00003FFE // B1
+ data8 0xAAAAAAAAAA402C77, 0x00003FFA // B2
+ data8 0xB60B6CC96BDB144D, 0x00003FF5 // B3
+LOCAL_OBJECT_END(sinh_ab_table)
+
+LOCAL_OBJECT_START(sinh_j_hi_table)
+ data8 0xB504F333F9DE6484, 0x00003FFE
+ data8 0xB6FD91E328D17791, 0x00003FFE
+ data8 0xB8FBAF4762FB9EE9, 0x00003FFE
+ data8 0xBAFF5AB2133E45FB, 0x00003FFE
+ data8 0xBD08A39F580C36BF, 0x00003FFE
+ data8 0xBF1799B67A731083, 0x00003FFE
+ data8 0xC12C4CCA66709456, 0x00003FFE
+ data8 0xC346CCDA24976407, 0x00003FFE
+ data8 0xC5672A115506DADD, 0x00003FFE
+ data8 0xC78D74C8ABB9B15D, 0x00003FFE
+ data8 0xC9B9BD866E2F27A3, 0x00003FFE
+ data8 0xCBEC14FEF2727C5D, 0x00003FFE
+ data8 0xCE248C151F8480E4, 0x00003FFE
+ data8 0xD06333DAEF2B2595, 0x00003FFE
+ data8 0xD2A81D91F12AE45A, 0x00003FFE
+ data8 0xD4F35AABCFEDFA1F, 0x00003FFE
+ data8 0xD744FCCAD69D6AF4, 0x00003FFE
+ data8 0xD99D15C278AFD7B6, 0x00003FFE
+ data8 0xDBFBB797DAF23755, 0x00003FFE
+ data8 0xDE60F4825E0E9124, 0x00003FFE
+ data8 0xE0CCDEEC2A94E111, 0x00003FFE
+ data8 0xE33F8972BE8A5A51, 0x00003FFE
+ data8 0xE5B906E77C8348A8, 0x00003FFE
+ data8 0xE8396A503C4BDC68, 0x00003FFE
+ data8 0xEAC0C6E7DD24392F, 0x00003FFE
+ data8 0xED4F301ED9942B84, 0x00003FFE
+ data8 0xEFE4B99BDCDAF5CB, 0x00003FFE
+ data8 0xF281773C59FFB13A, 0x00003FFE
+ data8 0xF5257D152486CC2C, 0x00003FFE
+ data8 0xF7D0DF730AD13BB9, 0x00003FFE
+ data8 0xFA83B2DB722A033A, 0x00003FFE
+ data8 0xFD3E0C0CF486C175, 0x00003FFE
+ data8 0x8000000000000000, 0x00003FFF // Center of table
+ data8 0x8164D1F3BC030773, 0x00003FFF
+ data8 0x82CD8698AC2BA1D7, 0x00003FFF
+ data8 0x843A28C3ACDE4046, 0x00003FFF
+ data8 0x85AAC367CC487B15, 0x00003FFF
+ data8 0x871F61969E8D1010, 0x00003FFF
+ data8 0x88980E8092DA8527, 0x00003FFF
+ data8 0x8A14D575496EFD9A, 0x00003FFF
+ data8 0x8B95C1E3EA8BD6E7, 0x00003FFF
+ data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF
+ data8 0x8EA4398B45CD53C0, 0x00003FFF
+ data8 0x9031DC431466B1DC, 0x00003FFF
+ data8 0x91C3D373AB11C336, 0x00003FFF
+ data8 0x935A2B2F13E6E92C, 0x00003FFF
+ data8 0x94F4EFA8FEF70961, 0x00003FFF
+ data8 0x96942D3720185A00, 0x00003FFF
+ data8 0x9837F0518DB8A96F, 0x00003FFF
+ data8 0x99E0459320B7FA65, 0x00003FFF
+ data8 0x9B8D39B9D54E5539, 0x00003FFF
+ data8 0x9D3ED9A72CFFB751, 0x00003FFF
+ data8 0x9EF5326091A111AE, 0x00003FFF
+ data8 0xA0B0510FB9714FC2, 0x00003FFF
+ data8 0xA27043030C496819, 0x00003FFF
+ data8 0xA43515AE09E6809E, 0x00003FFF
+ data8 0xA5FED6A9B15138EA, 0x00003FFF
+ data8 0xA7CD93B4E965356A, 0x00003FFF
+ data8 0xA9A15AB4EA7C0EF8, 0x00003FFF
+ data8 0xAB7A39B5A93ED337, 0x00003FFF
+ data8 0xAD583EEA42A14AC6, 0x00003FFF
+ data8 0xAF3B78AD690A4375, 0x00003FFF
+ data8 0xB123F581D2AC2590, 0x00003FFF
+ data8 0xB311C412A9112489, 0x00003FFF
+ data8 0xB504F333F9DE6484, 0x00003FFF
+LOCAL_OBJECT_END(sinh_j_hi_table)
+
+LOCAL_OBJECT_START(sinh_j_lo_table)
+ data4 0x1EB2FB13
+ data4 0x1CE2CBE2
+ data4 0x1DDC3CBC
+ data4 0x1EE9AA34
+ data4 0x9EAEFDC1
+ data4 0x9DBF517B
+ data4 0x1EF88AFB
+ data4 0x1E03B216
+ data4 0x1E78AB43
+ data4 0x9E7B1747
+ data4 0x9EFE3C0E
+ data4 0x9D36F837
+ data4 0x9DEE53E4
+ data4 0x9E24AE8E
+ data4 0x1D912473
+ data4 0x1EB243BE
+ data4 0x1E669A2F
+ data4 0x9BBC610A
+ data4 0x1E761035
+ data4 0x9E0BE175
+ data4 0x1CCB12A1
+ data4 0x1D1BFE90
+ data4 0x1DF2F47A
+ data4 0x1EF22F22
+ data4 0x9E3F4A29
+ data4 0x1EC01A5B
+ data4 0x1E8CAC3A
+ data4 0x9DBB3FAB
+ data4 0x1EF73A19
+ data4 0x9BB795B5
+ data4 0x1EF84B76
+ data4 0x9EF5818B
+ data4 0x00000000 // Center of table
+ data4 0x1F77CACA
+ data4 0x1EF8A91D
+ data4 0x1E57C976
+ data4 0x9EE8DA92
+ data4 0x1EE85C9F
+ data4 0x1F3BF1AF
+ data4 0x1D80CA1E
+ data4 0x9D0373AF
+ data4 0x9F167097
+ data4 0x1EB70051
+ data4 0x1F6EB029
+ data4 0x1DFD6D8E
+ data4 0x9EB319B0
+ data4 0x1EBA2BEB
+ data4 0x1F11D537
+ data4 0x1F0D5A46
+ data4 0x9E5E7BCA
+ data4 0x9F3AAFD1
+ data4 0x9E86DACC
+ data4 0x9F3EDDC2
+ data4 0x1E496E3D
+ data4 0x9F490BF6
+ data4 0x1DD1DB48
+ data4 0x1E65EBFB
+ data4 0x9F427496
+ data4 0x1F283C4A
+ data4 0x1F4B0047
+ data4 0x1F130152
+ data4 0x9E8367C0
+ data4 0x9F705F90
+ data4 0x1EFB3C53
+ data4 0x1F32FB13
+LOCAL_OBJECT_END(sinh_j_lo_table)
-{ .mfi
- alloc r32 = ar.pfs,0,12,4,0
-(p0) fclass.m.unc p6,p0 = f8, 0xe3 //@qnan | @snan | @inf
- mov sinh_GR_all_ones = -1
-}
-;;
+.section .text
+GLOBAL_IEEE754_ENTRY(sinhl)
-{ .mfb
- nop.m 999
-(p6) fma.s0 f8 = f8,f1,f8
-(p6) br.ret.spnt b0 ;;
-}
-
-// Put 0.25 in f9; p6 true if x < 0.25
-// Make constant that will generate inexact when squared
{ .mlx
- setf.sig sinh_FR_all_ones = sinh_GR_all_ones
-(p0) movl r32 = 0x000000000000fffd ;;
-}
-
-{ .mfi
-(p0) setf.exp f9 = r32
-(p0) fclass.m.unc p7,p0 = f8, 0x07 //@zero
- nop.i 999 ;;
-}
-
-{ .mfb
- nop.m 999
-(p0) fmerge.s sinh_FR_X = f0,f8
-(p7) br.ret.spnt b0 ;;
+ getf.exp r_signexp_x = f8 // Get signexp of x, must redo if unorm
+ movl r_sig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2
}
-
-// Identify denormal operands.
-{ .mfi
- nop.m 999
- fclass.m.unc p10,p0 = f8, 0x09 // + denorm
- nop.i 999
-};;
-{ .mfi
- nop.m 999
- fclass.m.unc p11,p0 = f8, 0x0a // - denorm
- nop.i 999
+{ .mlx
+ addl r_ad1 = @ltoff(sinh_arg_reduction), gp
+ movl r_rshf_2to57 = 0x4778000000000000 // 1.10000 2^(63+57)
}
+;;
{ .mfi
- nop.m 999
-(p0) fmerge.s sinh_FR_SGNX = f8,f1
- nop.i 999 ;;
+ ld8 r_ad1 = [r_ad1]
+ fmerge.s f_ABS_X = f0,f8
+ mov r_exp_0_25 = 0x0fffd // Form exponent for 0.25
}
-
{ .mfi
- nop.m 999
-(p0) fcmp.lt.unc.s1 p0,p7 = sinh_FR_X,f9
- nop.i 999 ;;
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p7) br.cond.sptk L(SINH_BY_TBL) ;;
-}
-
-
-L(SINH_BY_POLY):
-
-// POLY cannot overflow so there is no need to call __libm_error_support
-// Set tiny_SAFE (p7) to 1(0) if answer is not tiny
-// Currently we do not use tiny_SAFE. So the setting of tiny_SAFE is
-// commented out.
-//(p0) movl r32 = 0x000000000000fc01
-//(p0) setf.exp f10 = r32
-//(p0) fcmp.lt.unc.s1 p6,p7 = f8,f10
-// Here is essentially the algorithm for SINH_BY_POLY. Care is take for the order
-// of multiplication; and P_1 is not exactly 1/3!, P_2 is not exactly 1/5!, etc.
-// Note that ax = |x|
-// sinh(x) = sign * (series(e^x) - series(e^-x))/2
-// = sign * (ax + ax^3/3! + ax^5/5! + ax^7/7! + ax^9/9! + ax^11/11! + ax^13/13!)
-// = sign * (ax + ax * ( ax^2 * (1/3! + ax^4 * (1/7! + ax^4*1/11!)) )
-// + ax * ( ax^4 * (1/5! + ax^4 * (1/9! + ax^4*1/13!)) ) )
-// = sign * (ax + ax*p_odd + (ax*p_even))
-// = sign * (ax + Y_lo)
-// sinh(x) = sign * (Y_hi + Y_lo)
-// Get the values of P_x from the table
-{ .mfb
-(p0) addl r34 = @ltoff(double_sinh_p_table), gp
-(p10) fma.s0 f8 = f8,f8,f8
-(p10) br.ret.spnt b0
-}
-;;
-
-{ .mfb
- ld8 r34 = [r34]
-(p11) fnma.s0 f8 = f8,f8,f8
-(p11) br.ret.spnt b0
+ nop.m 0
+ fnorm.s1 f_NORM_X = f8
+ mov r_exp_2tom57 = 0xffff-57
}
;;
-// Calculate sinh_FR_X2 = ax*ax and sinh_FR_X4 = ax*ax*ax*ax
-{ .mmf
- nop.m 999
-(p0) ldfe sinh_FR_P1 = [r34],16
-(p0) fma.s1 sinh_FR_X2 = sinh_FR_X, sinh_FR_X, f0 ;;
-}
-
-{ .mmi
-(p0) ldfe sinh_FR_P2 = [r34],16 ;;
-(p0) ldfe sinh_FR_P3 = [r34],16
- nop.i 999 ;;
-}
-
-{ .mmi
-(p0) ldfe sinh_FR_P4 = [r34],16 ;;
-(p0) ldfe sinh_FR_P5 = [r34],16
- nop.i 999 ;;
-}
-
{ .mfi
-(p0) ldfe sinh_FR_P6 = [r34],16
-(p0) fma.s1 sinh_FR_X4 = sinh_FR_X2, sinh_FR_X2, f0
- nop.i 999 ;;
+ setf.d f_RSHF_2TO57 = r_rshf_2to57 // Form const 1.100 * 2^120
+ fclass.m p10,p0 = f8, 0x0b // Test for denorm
+ mov r_exp_mask = 0x1ffff
}
-
-// Calculate sinh_FR_podd = p_odd and sinh_FR_peven = p_even
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_poly_podd_temp1 = sinh_FR_X4, sinh_FR_P5, sinh_FR_P3
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_poly_podd_temp2 = sinh_FR_X4, sinh_FR_poly_podd_temp1, sinh_FR_P1
- nop.i 999
+{ .mlx
+ setf.sig f_INV_LN2_2TO63 = r_sig_inv_ln2 // Form 1/ln2 * 2^63
+ movl r_rshf = 0x43e8000000000000 // 1.1000 2^63 for right shift
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_poly_peven_temp1 = sinh_FR_X4, sinh_FR_P6, sinh_FR_P4
- nop.i 999 ;;
+ nop.m 0
+ fclass.m p7,p0 = f8, 0x07 // Test if x=0
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_podd = sinh_FR_X2, sinh_FR_poly_podd_temp2, f0
- nop.i 999
+ setf.exp f_2TOM57 = r_exp_2tom57 // Form 2^-57 for scaling
+ nop.f 0
+ add r_ad3 = 0x90, r_ad1 // Point to ab_table
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_poly_peven_temp2 = sinh_FR_X4, sinh_FR_poly_peven_temp1, sinh_FR_P2
- nop.i 999 ;;
+ setf.d f_RSHF = r_rshf // Form right shift const 1.100 * 2^63
+ fclass.m p6,p0 = f8, 0xe3 // Test if x nan, inf
+ add r_ad4 = 0x2f0, r_ad1 // Point to j_hi_table midpoint
}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_peven = sinh_FR_X4, sinh_FR_poly_peven_temp2, f0
- nop.i 999 ;;
+{ .mib
+ add r_ad2e = 0x20, r_ad1 // Point to p_table
+ nop.i 0
+(p10) br.cond.spnt SINH_DENORM // Branch if x denorm
}
+;;
-// Calculate sinh_FR_Y_lo = ax*p_odd + (ax*p_even)
+// Common path -- return here from SINH_DENORM if x is unnorm
+SINH_COMMON:
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_Y_lo_temp = sinh_FR_X, sinh_FR_peven, f0
- nop.i 999 ;;
+ ldfe f_smlst_oflow_input = [r_ad2e],16
+ nop.f 0
+ add r_ad5 = 0x580, r_ad1 // Point to j_lo_table midpoint
}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_Y_lo = sinh_FR_X, sinh_FR_podd, sinh_FR_Y_lo_temp
- nop.i 999 ;;
+{ .mib
+ ldfe f_log2by64_hi = [r_ad1],16
+ and r_exp_x = r_exp_mask, r_signexp_x
+(p7) br.ret.spnt b0 // Exit if x=0
}
+;;
-// Calculate sinh_FR_SINH = Y_hi + Y_lo. Note that ax = Y_hi
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_SINH = sinh_FR_X, f1, sinh_FR_Y_lo
- nop.i 999 ;;
-}
-// Dummy multiply to generate inexact
+// Get the A coefficients for SINH_BY_TBL
{ .mfi
- nop.m 999
-(p0) fmpy.s0 sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
- nop.i 999
+ ldfe f_A1 = [r_ad3],16
+ fcmp.lt.s1 p8,p9 = f8,f0 // Test for x<0
+ cmp.lt p7,p0 = r_exp_x, r_exp_0_25 // Test x < 0.25
}
-
-// Calculate f8 = sign * (Y_hi + Y_lo)
-// Go to return
{ .mfb
- nop.m 999
-(p0) fma.s0 f8 = sinh_FR_SGNX,sinh_FR_SINH,f0
-(p0) br.ret.sptk b0 ;;
-}
-
-
-L(SINH_BY_TBL):
-
-// Now that we are at TBL; so far all we know is that |x| >= 0.25.
-// The first two steps are the same for TBL and EXP, but if we are HUGE
-// we want to leave now.
-// Double-extended:
-// Go to HUGE if |x| >= 2^14, 1000d (register-biased) is e = 14 (true)
-// Double
-// Go to HUGE if |x| >= 2^10, 10009 (register-biased) is e = 10 (true)
-// Single
-// Go to HUGE if |x| >= 2^7, 10006 (register-biased) is e = 7 (true)
-
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x000000000001000d ;;
-}
-
-{ .mfi
-(p0) setf.exp f9 = r32
- nop.f 999
- nop.i 999 ;;
+ add r_ad2o = 0x30, r_ad2e // Point to p_table odd coeffs
+(p6) fma.s0 f8 = f8,f1,f0 // Result for x nan, inf
+(p6) br.ret.spnt b0 // Exit for x nan, inf
}
+;;
+// Calculate X2 = ax*ax for SINH_BY_POLY
{ .mfi
- nop.m 999
-(p0) fcmp.ge.unc.s1 p6,p7 = sinh_FR_X,f9
- nop.i 999 ;;
+ ldfe f_log2by64_lo = [r_ad1],16
+ nop.f 0
+ nop.i 0
}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p6) br.cond.spnt L(SINH_HUGE) ;;
+{ .mfb
+ ldfe f_A2 = [r_ad3],16
+ fma.s1 f_X2 = f_NORM_X, f_NORM_X, f0
+(p7) br.cond.spnt SINH_BY_POLY
}
+;;
-// r32 = 1
-// r34 = N-1
-// r35 = N
-// r36 = j
-// r37 = N+1
-
-// TBL can never overflow
-// sinh(x) = sinh(B+R)
-// = sinh(B)cosh(R) + cosh(B)sinh(R)
-//
-// ax = |x| = M*log2/64 + R
-// B = M*log2/64
-// M = 64*N + j
-// We will calcualte M and get N as (M-j)/64
-// The division is a shift.
-// exp(B) = exp(N*log2 + j*log2/64)
-// = 2^N * 2^(j*log2/64)
-// sinh(B) = 1/2(e^B -e^-B)
-// = 1/2(2^N * 2^(j*log2/64) - 2^-N * 2^(-j*log2/64))
-// sinh(B) = (2^(N-1) * 2^(j*log2/64) - 2^(-N-1) * 2^(-j*log2/64))
-// cosh(B) = (2^(N-1) * 2^(j*log2/64) + 2^(-N-1) * 2^(-j*log2/64))
-// 2^(j*log2/64) is stored as Tjhi + Tjlo , j= -32,....,32
-// Tjhi is double-extended (80-bit) and Tjlo is single(32-bit)
-// R = ax - M*log2/64
-// R = ax - M*log2_by_64_hi - M*log2_by_64_lo
-// exp(R) = 1 + R +R^2(1/2! + R(1/3! + R(1/4! + ... + R(1/n!)...)
-// = 1 + p_odd + p_even
-// where the p_even uses the A coefficients and the p_even uses the B coefficients
-// So sinh(R) = 1 + p_odd + p_even -(1 -p_odd -p_even)/2 = p_odd
-// cosh(R) = 1 + p_even
-// sinh(B) = S_hi + S_lo
-// cosh(B) = C_hi
-// sinh(x) = sinh(B)cosh(R) + cosh(B)sinh(R)
+// Here if |x| >= 0.25
+SINH_BY_TBL:
// ******************************************************
-// STEP 1 (TBL and EXP)
+// STEP 1 (TBL and EXP) - Argument reduction
// ******************************************************
// Get the following constants.
-// f9 = Inv_log2by64
-// f10 = log2by64_hi
-// f11 = log2by64_lo
-
-{ .mmi
-(p0) adds r32 = 0x1,r0
-(p0) addl r34 = @ltoff(double_sinh_arg_reduction), gp
- nop.i 999
-}
-;;
-
-{ .mmi
- ld8 r34 = [r34]
- nop.m 999
- nop.i 999
-}
-;;
+// Inv_log2by64
+// log2by64_hi
+// log2by64_lo
// We want 2^(N-1) and 2^(-N-1). So bias N-1 and -N-1 and
// put them in an exponent.
-// sinh_FR_spos = 2^(N-1) and sinh_FR_sneg = 2^(-N-1)
-// r39 = 0xffff + (N-1) = 0xffff +N -1
-// r40 = 0xffff - (N +1) = 0xffff -N -1
-
-{ .mlx
- nop.m 999
-(p0) movl r38 = 0x000000000000fffe ;;
-}
-
-{ .mmi
-(p0) ldfe sinh_FR_Inv_log2by64 = [r34],16 ;;
-(p0) ldfe sinh_FR_log2by64_hi = [r34],16
- nop.i 999 ;;
-}
-
-{ .mbb
-(p0) ldfe sinh_FR_log2by64_lo = [r34],16
- nop.b 999
- nop.b 999 ;;
-}
+// f_spos = 2^(N-1) and f_sneg = 2^(-N-1)
+// 0xffff + (N-1) = 0xffff +N -1
+// 0xffff - (N +1) = 0xffff -N -1
-// Get the A coefficients
-// f9 = A_1
-// f10 = A_2
-// f11 = A_3
-{ .mmi
- nop.m 999
-(p0) addl r34 = @ltoff(double_sinh_ab_table), gp
- nop.i 999
-}
-;;
+// Calculate M and keep it as integer and floating point.
+// M = round-to-integer(x*Inv_log2by64)
+// f_M = M = truncate(ax/(log2/64))
+// Put the integer representation of M in r_M
+// and the floating point representation of M in f_M
+// Get the remaining A,B coefficients
{ .mmi
- ld8 r34 = [r34]
- nop.m 999
- nop.i 999
+ ldfe f_A3 = [r_ad3],16
+ nop.m 0
+ nop.i 0
}
;;
-
-// Calculate M and keep it as integer and floating point.
-// f38 = M = round-to-integer(x*Inv_log2by64)
-// sinh_FR_M = M = truncate(ax/(log2/64))
-// Put the significand of M in r35
-// and the floating point representation of M in sinh_FR_M
-
+.pred.rel "mutex",p8,p9
+// Use constant (1.100*2^(63-6)) to get rounded M into rightmost significand
+// |x| * 64 * 1/ln2 * 2^(63-6) + 1.1000 * 2^(63+(63-6))
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_M = sinh_FR_X, sinh_FR_Inv_log2by64, f0
- nop.i 999
+(p8) mov r_signexp_sgnx_0_5 = 0x2fffe // signexp of -0.5
+ fma.s1 f_M_temp = f_ABS_X, f_INV_LN2_2TO63, f_RSHF_2TO57
+(p9) mov r_signexp_sgnx_0_5 = 0x0fffe // signexp of +0.5
}
+;;
+// Test for |x| >= overflow limit
{ .mfi
-(p0) ldfe sinh_FR_A1 = [r34],16
- nop.f 999
- nop.i 999 ;;
+ ldfe f_B1 = [r_ad3],16
+ fcmp.ge.s1 p6,p0 = f_ABS_X, f_smlst_oflow_input
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fcvt.fx.s1 sinh_FR_M_temp = sinh_FR_M
- nop.i 999 ;;
+ ldfe f_B2 = [r_ad3],16
+ nop.f 0
+ mov r_exp_32 = 0x10004
}
+;;
-{ .mfi
- nop.m 999
-(p0) fnorm.s1 sinh_FR_M = sinh_FR_M_temp
- nop.i 999 ;;
+// Subtract RSHF constant to get rounded M as a floating point value
+// M_temp * 2^(63-6) - 2^63
+{ .mfb
+ ldfe f_B3 = [r_ad3],16
+ fms.s1 f_M = f_M_temp, f_2TOM57, f_RSHF
+(p6) br.cond.spnt SINH_HUGE // Branch if result will overflow
}
+;;
{ .mfi
-(p0) getf.sig r35 = sinh_FR_M_temp
- nop.f 999
- nop.i 999 ;;
+ getf.sig r_M = f_M_temp
+ nop.f 0
+ cmp.ge p7,p6 = r_exp_x, r_exp_32 // Test if x >= 32
}
+;;
-// M is still in r35. Calculate j. j is the signed extension of the six lsb of M. It
+// Calculate j. j is the signed extension of the six lsb of M. It
// has a range of -32 thru 31.
-// r35 = M
-// r36 = j
-
-{ .mii
- nop.m 999
- nop.i 999 ;;
-(p0) and r36 = 0x3f, r35 ;;
-}
// Calculate R
-// f13 = f44 - f12*f10 = ax - M*log2by64_hi
-// f14 = f13 - f8*f11 = R = (ax - M*log2by64_hi) - M*log2by64_lo
-
-{ .mfi
- nop.m 999
-(p0) fnma.s1 sinh_FR_R_temp = sinh_FR_M, sinh_FR_log2by64_hi, sinh_FR_X
- nop.i 999
-}
+// ax - M*log2by64_hi
+// R = (ax - M*log2by64_hi) - M*log2by64_lo
{ .mfi
-(p0) ldfe sinh_FR_A2 = [r34],16
- nop.f 999
- nop.i 999 ;;
+ nop.m 0
+ fnma.s1 f_R_temp = f_M, f_log2by64_hi, f_ABS_X
+ and r_j = 0x3f, r_M
}
+;;
-{ .mfi
- nop.m 999
-(p0) fnma.s1 sinh_FR_R = sinh_FR_M, sinh_FR_log2by64_lo, sinh_FR_R_temp
- nop.i 999
+{ .mii
+ nop.m 0
+ shl r_jshf = r_j, 0x2 // Shift j so can sign extend it
+;;
+ sxt1 r_jshf = r_jshf
}
+;;
-// Get the B coefficients
-// f15 = B_1
-// f32 = B_2
-// f33 = B_3
-
-{ .mmi
-(p0) ldfe sinh_FR_A3 = [r34],16 ;;
-(p0) ldfe sinh_FR_B1 = [r34],16
- nop.i 999 ;;
+{ .mii
+ nop.m 0
+ shr r_j = r_jshf, 0x2 // Now j has range -32 to 31
+ nop.i 0
}
+;;
{ .mmi
-(p0) ldfe sinh_FR_B2 = [r34],16 ;;
-(p0) ldfe sinh_FR_B3 = [r34],16
- nop.i 999 ;;
-}
-
-{ .mii
- nop.m 999
-(p0) shl r34 = r36, 0x2 ;;
-(p0) sxt1 r37 = r34 ;;
+ shladd r_ad_J_hi = r_j, 4, r_ad4 // pointer to Tjhi
+ sub r_Mmj = r_M, r_j // M-j
+ sub r_mj = r0, r_j // Form -j
}
+;;
-// ******************************************************
-// STEP 2 (TBL and EXP)
-// ******************************************************
-// Calculate Rsquared and Rcubed in preparation for p_even and p_odd
-// f12 = R*R*R
-// f13 = R*R
-// f14 = R <== from above
-
+// The TBL and EXP branches are merged and predicated
+// If TBL, p6 true, 0.25 <= |x| < 32
+// If EXP, p7 true, 32 <= |x| < overflow_limit
+//
+// N = (M-j)/64
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_Rsq = sinh_FR_R, sinh_FR_R, f0
-(p0) shr r36 = r37, 0x2 ;;
+ ldfe f_Tjhi = [r_ad_J_hi]
+ fnma.s1 f_R = f_M, f_log2by64_lo, f_R_temp
+ shr r_N = r_Mmj, 0x6 // N = (M-j)/64
}
-
-// r34 = M-j = r35 - r36
-// r35 = N = (M-j)/64
-
-{ .mii
-(p0) sub r34 = r35, r36
- nop.i 999 ;;
-(p0) shr r35 = r34, 0x6 ;;
+{ .mfi
+ shladd r_ad_mJ_hi = r_mj, 4, r_ad4 // pointer to Tmjhi
+ nop.f 0
+ shladd r_ad_mJ_lo = r_mj, 2, r_ad5 // pointer to Tmjlo
}
+;;
-{ .mii
-(p0) sub r40 = r38, r35
-(p0) adds r37 = 0x1, r35
-(p0) add r39 = r38, r35 ;;
+{ .mfi
+ sub r_2mNm1 = r_signexp_sgnx_0_5, r_N // signexp sgnx*2^(-N-1)
+ nop.f 0
+ shladd r_ad_J_lo = r_j, 2, r_ad5 // pointer to Tjlo
}
-
-// Get the address of the J table, add the offset,
-// addresses are sinh_AD_mJ and sinh_AD_J, get the T value
-// f32 = T(j)_hi
-// f33 = T(j)_lo
-// f34 = T(-j)_hi
-// f35 = T(-j)_lo
-
-{ .mmi
-(p0) sub r34 = r35, r32
-(p0) addl r37 = @ltoff(double_sinh_j_table), gp
- nop.i 999
+{ .mfi
+ ldfe f_Tmjhi = [r_ad_mJ_hi]
+ nop.f 0
+ add r_2Nm1 = r_signexp_sgnx_0_5, r_N // signexp sgnx*2^(N-1)
}
;;
-{ .mmi
- ld8 r37 = [r37]
- nop.m 999
- nop.i 999
+{ .mmf
+ ldfs f_Tmjlo = [r_ad_mJ_lo]
+ setf.exp f_sneg = r_2mNm1 // Form sgnx * 2^(-N-1)
+ nop.f 0
}
;;
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_Rcub = sinh_FR_Rsq, sinh_FR_R, f0
- nop.i 999
+{ .mmf
+ ldfs f_Tjlo = [r_ad_J_lo]
+ setf.exp f_spos = r_2Nm1 // Form sgnx * 2^(N-1)
+ nop.f 0
}
+;;
// ******************************************************
-// STEP 3 Now decide if we need to branch to EXP
+// STEP 2 (TBL and EXP)
// ******************************************************
-// Put 32 in f9; p6 true if x < 32
-// Go to EXP if |x| >= 32
+// Calculate Rsquared and Rcubed in preparation for p_even and p_odd
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x0000000000010004 ;;
+{ .mmf
+ nop.m 0
+ nop.m 0
+ fma.s1 f_Rsq = f_R, f_R, f0
}
+;;
-// Calculate p_even
-// f34 = B_2 + Rsq *B_3
-// f35 = B_1 + Rsq*f34 = B_1 + Rsq * (B_2 + Rsq *B_3)
-// f36 = p_even = Rsq * f35 = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_peven_temp1 = sinh_FR_Rsq, sinh_FR_B3, sinh_FR_B2
- nop.i 999 ;;
-}
+// Calculate p_even
+// B_2 + Rsq *B_3
+// B_1 + Rsq * (B_2 + Rsq *B_3)
+// p_even = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_peven_temp2 = sinh_FR_Rsq, sinh_FR_peven_temp1, sinh_FR_B1
- nop.i 999
+ nop.m 0
+ fma.s1 f_peven_temp1 = f_Rsq, f_B3, f_B2
+ nop.i 0
}
-
// Calculate p_odd
-// f34 = A_2 + Rsq *A_3
-// f35 = A_1 + Rsq * (A_2 + Rsq *A_3)
-// f37 = podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
-
+// A_2 + Rsq *A_3
+// A_1 + Rsq * (A_2 + Rsq *A_3)
+// podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_podd_temp1 = sinh_FR_Rsq, sinh_FR_A3, sinh_FR_A2
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 f_podd_temp1 = f_Rsq, f_A3, f_A2
+ nop.i 0
}
+;;
{ .mfi
-(p0) setf.exp sinh_FR_N_temp1 = r39
- nop.f 999
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 f_Rcub = f_Rsq, f_R, f0
+ nop.i 0
}
+;;
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_peven = sinh_FR_Rsq, sinh_FR_peven_temp2, f0
- nop.i 999
-}
+//
+// If TBL,
+// Calculate S_hi and S_lo, and C_hi
+// SC_hi_temp = sneg * Tmjhi
+// S_hi = spos * Tjhi - SC_hi_temp
+// S_hi = spos * Tjhi - (sneg * Tmjhi)
+// C_hi = spos * Tjhi + SC_hi_temp
+// C_hi = spos * Tjhi + (sneg * Tmjhi)
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_podd_temp2 = sinh_FR_Rsq, sinh_FR_podd_temp1, sinh_FR_A1
- nop.i 999 ;;
+ nop.m 0
+(p6) fma.s1 f_SC_hi_temp = f_sneg, f_Tmjhi, f0
+ nop.i 0
}
+;;
+// If TBL,
+// S_lo_temp3 = sneg * Tmjlo
+// S_lo_temp4 = spos * Tjlo - S_lo_temp3
+// S_lo_temp4 = spos * Tjlo -(sneg * Tmjlo)
{ .mfi
-(p0) setf.exp f9 = r32
- nop.f 999
- nop.i 999 ;;
+ nop.m 0
+(p6) fma.s1 f_S_lo_temp3 = f_sneg, f_Tmjlo, f0
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_podd = sinh_FR_podd_temp2, sinh_FR_Rcub, sinh_FR_R
- nop.i 999
-}
-
-// sinh_GR_mj contains the table offset for -j
-// sinh_GR_j contains the table offset for +j
-// p6 is true when j <= 0
-
-{ .mlx
-(p0) setf.exp sinh_FR_N_temp2 = r40
-(p0) movl r40 = 0x0000000000000020 ;;
+ nop.m 0
+ fma.s1 f_peven_temp2 = f_Rsq, f_peven_temp1, f_B1
+ nop.i 0
}
-
{ .mfi
-(p0) sub sinh_GR_mJ = r40, r36
-(p0) fmerge.se sinh_FR_spos = sinh_FR_N_temp1, f1
-(p0) adds sinh_GR_J = 0x20, r36 ;;
-}
-
-{ .mii
- nop.m 999
-(p0) shl sinh_GR_mJ = sinh_GR_mJ, 5 ;;
-(p0) add sinh_AD_mJ = r37, sinh_GR_mJ ;;
-}
-
-{ .mmi
- nop.m 999
-(p0) ldfe sinh_FR_Tmjhi = [sinh_AD_mJ],16
-(p0) shl sinh_GR_J = sinh_GR_J, 5 ;;
+ nop.m 0
+ fma.s1 f_podd_temp2 = f_Rsq, f_podd_temp1, f_A1
+ nop.i 0
}
+;;
+// If EXP,
+// Compute sgnx * 2^(N-1) * Tjhi and sgnx * 2^(N-1) * Tjlo
{ .mfi
-(p0) ldfs sinh_FR_Tmjlo = [sinh_AD_mJ],16
-(p0) fcmp.lt.unc.s1 p0,p7 = sinh_FR_X,f9
-(p0) add sinh_AD_J = r37, sinh_GR_J ;;
-}
-
-{ .mmi
-(p0) ldfe sinh_FR_Tjhi = [sinh_AD_J],16 ;;
-(p0) ldfs sinh_FR_Tjlo = [sinh_AD_J],16
- nop.i 999 ;;
-}
-
-{ .mfb
- nop.m 999
-(p0) fmerge.se sinh_FR_sneg = sinh_FR_N_temp2, f1
-(p7) br.cond.spnt L(SINH_BY_EXP) ;;
+ nop.m 0
+(p7) fma.s1 f_Tjhi_spos = f_Tjhi, f_spos, f0
+ nop.i 0
}
-
{ .mfi
- nop.m 999
- nop.f 999
- nop.i 999 ;;
+ nop.m 0
+(p7) fma.s1 f_Tjlo_spos = f_Tjlo, f_spos, f0
+ nop.i 0
}
-
-// ******************************************************
-// If NOT branch to EXP
-// ******************************************************
-// Calculate S_hi and S_lo
-// sinh_FR_S_hi_temp = sinh_FR_sneg * sinh_FR_Tmjhi
-// sinh_FR_S_hi = sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi_temp
-// sinh_FR_S_hi = sinh_FR_spos * sinh_FR_Tjhi - (sinh_FR_sneg * sinh_FR_Tmjlo)
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_S_hi_temp = sinh_FR_sneg, sinh_FR_Tmjhi, f0
- nop.i 999 ;;
+ nop.m 0
+(p6) fms.s1 f_S_hi = f_spos, f_Tjhi, f_SC_hi_temp
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fms.s1 sinh_FR_S_hi = sinh_FR_spos, sinh_FR_Tjhi, sinh_FR_S_hi_temp
- nop.i 999
+ nop.m 0
+(p6) fma.s1 f_C_hi = f_spos, f_Tjhi, f_SC_hi_temp
+ nop.i 0
}
-
-// Calculate C_hi
-// sinh_FR_C_hi_temp1 = sinh_FR_sneg * sinh_FR_Tmjhi
-// sinh_FR_C_hi = sinh_FR_spos * sinh_FR_Tjhi + sinh_FR_C_hi_temp1
-
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_C_hi_temp1 = sinh_FR_sneg, sinh_FR_Tmjhi, f0
- nop.i 999 ;;
+ nop.m 0
+(p6) fms.s1 f_S_lo_temp4 = f_spos, f_Tjlo, f_S_lo_temp3
+ nop.i 0
}
-
-// sinh_FR_S_lo_temp1 = sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi
-// sinh_FR_S_lo_temp2 = -sinh_FR_sneg * sinh_FR_Tmjlo + (sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi)
-// sinh_FR_S_lo_temp2 = -sinh_FR_sneg * sinh_FR_Tmjlo + (sinh_FR_S_lo_temp1 )
+;;
{ .mfi
- nop.m 999
-(p0) fms.s1 sinh_FR_S_lo_temp1 = sinh_FR_spos, sinh_FR_Tjhi, sinh_FR_S_hi
- nop.i 999
+ nop.m 0
+ fma.s1 f_peven = f_Rsq, f_peven_temp2, f0
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_C_hi = sinh_FR_spos, sinh_FR_Tjhi, sinh_FR_C_hi_temp1
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 f_podd = f_podd_temp2, f_Rcub, f_R
+ nop.i 0
}
+;;
+
+// If TBL,
+// S_lo_temp1 = spos * Tjhi - S_hi
+// S_lo_temp2 = -sneg * Tmjlo + S_lo_temp1
+// S_lo_temp2 = -sneg * Tmjlo + (spos * Tjhi - S_hi)
{ .mfi
- nop.m 999
-(p0) fnma.s1 sinh_FR_S_lo_temp2 = sinh_FR_sneg, sinh_FR_Tmjhi, sinh_FR_S_lo_temp1
- nop.i 999
+ nop.m 0
+(p6) fms.s1 f_S_lo_temp1 = f_spos, f_Tjhi, f_S_hi
+ nop.i 0
}
-
-// sinh_FR_S_lo_temp1 = sinh_FR_sneg * sinh_FR_Tmjlo
-// sinh_FR_S_lo_temp3 = sinh_FR_spos * sinh_FR_Tjlo - sinh_FR_S_lo_temp1
-// sinh_FR_S_lo_temp3 = sinh_FR_spos * sinh_FR_Tjlo -(sinh_FR_sneg * sinh_FR_Tmjlo)
-// sinh_FR_S_lo = sinh_FR_S_lo_temp3 + sinh_FR_S_lo_temp2
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_S_lo_temp1 = sinh_FR_sneg, sinh_FR_Tmjlo, f0
- nop.i 999 ;;
+ nop.m 0
+(p6) fnma.s1 f_S_lo_temp2 = f_sneg, f_Tmjhi, f_S_lo_temp1
+ nop.i 0
}
+;;
-/////////// BUG FIX fma to fms -TK
+// If EXP,
+// Y_hi = sgnx * 2^(N-1) * Tjhi
+// Y_lo = sgnx * 2^(N-1) * Tjhi * (p_odd + p_even) + sgnx * 2^(N-1) * Tjlo
{ .mfi
- nop.m 999
-(p0) fms.s1 sinh_FR_S_lo_temp3 = sinh_FR_spos, sinh_FR_Tjlo, sinh_FR_S_lo_temp1
- nop.i 999 ;;
+ nop.m 0
+(p7) fma.s1 f_Y_lo_temp = f_peven, f1, f_podd
+ nop.i 0
}
+;;
+// If TBL,
+// S_lo = S_lo_temp4 + S_lo_temp2
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_S_lo = sinh_FR_S_lo_temp3, f1, sinh_FR_S_lo_temp2
- nop.i 999 ;;
+ nop.m 0
+(p6) fma.s1 f_S_lo = f_S_lo_temp4, f1, f_S_lo_temp2
+ nop.i 0
}
+;;
+// If TBL,
// Y_hi = S_hi
// Y_lo = C_hi*p_odd + (S_hi*p_even + S_lo)
-// sinh_FR_Y_lo_temp = sinh_FR_S_hi * sinh_FR_peven + sinh_FR_S_lo
-// sinh_FR_Y_lo = sinh_FR_C_hi * sinh_FR_podd + sinh_FR_Y_lo_temp
-
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_Y_lo_temp = sinh_FR_S_hi, sinh_FR_peven, sinh_FR_S_lo
- nop.i 999 ;;
+ nop.m 0
+(p6) fma.s1 f_Y_lo_temp = f_S_hi, f_peven, f_S_lo
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_Y_lo = sinh_FR_C_hi, sinh_FR_podd, sinh_FR_Y_lo_temp
- nop.i 999 ;;
+ nop.m 0
+(p7) fma.s1 f_Y_lo = f_Tjhi_spos, f_Y_lo_temp, f_Tjlo_spos
+ nop.i 0
}
-
-// sinh_FR_SINH = Y_hi + Y_lo
-// f8 = answer = sinh_FR_SGNX * sinh_FR_SINH
+;;
// Dummy multiply to generate inexact
{ .mfi
- nop.m 999
-(p0) fmpy.s0 sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
- nop.i 999
+ nop.m 0
+ fmpy.s0 f_tmp = f_B2, f_B2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+(p6) fma.s1 f_Y_lo = f_C_hi, f_podd, f_Y_lo_temp
+ nop.i 0
}
+;;
+
+// f8 = answer = Y_hi + Y_lo
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_SINH = sinh_FR_S_hi, f1, sinh_FR_Y_lo
- nop.i 999 ;;
+ nop.m 0
+(p7) fma.s0 f8 = f_Y_lo, f1, f_Tjhi_spos
+ nop.i 0
}
+;;
+// f8 = answer = Y_hi + Y_lo
{ .mfb
- nop.m 999
-(p0) fma.s0 f8 = sinh_FR_SGNX, sinh_FR_SINH,f0
-(p0) br.ret.sptk b0 ;;
+ nop.m 0
+(p6) fma.s0 f8 = f_Y_lo, f1, f_S_hi
+ br.ret.sptk b0 // Exit for SINH_BY_TBL and SINH_BY_EXP
}
+;;
-L(SINH_BY_EXP):
-
-// When p7 is true, we know that an overflow is not going to happen
-// When p7 is false, we must check for possible overflow
-// p7 is the over_SAFE flag
-// Y_hi = Tjhi
-// Y_lo = Tjhi * (p_odd + p_even) +Tjlo
-// Scale = sign * 2^(N-1)
-// sinh_FR_Y_lo = sinh_FR_Tjhi * (sinh_FR_peven + sinh_FR_podd)
-// sinh_FR_Y_lo = sinh_FR_Tjhi * (sinh_FR_Y_lo_temp )
+// Here if 0 < |x| < 0.25
+SINH_BY_POLY:
+{ .mmf
+ ldfe f_P6 = [r_ad2e],16
+ ldfe f_P5 = [r_ad2o],16
+ nop.f 0
+}
+;;
-{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_Y_lo_temp = sinh_FR_peven, f1, sinh_FR_podd
- nop.i 999
+{ .mmi
+ ldfe f_P4 = [r_ad2e],16
+ ldfe f_P3 = [r_ad2o],16
+ nop.i 0
}
+;;
-// Now we are in EXP. This is the only path where an overflow is possible
-// but not for certain. So this is the only path where over_SAFE has any use.
-// r34 still has N-1
-// There is a danger of double-extended overflow if N-1 > 16382 = 0x3ffe
-// There is a danger of double overflow if N-1 > 0x3fe = 1022
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x0000000000003ffe ;;
+{ .mmi
+ ldfe f_P2 = [r_ad2e],16
+ ldfe f_P1 = [r_ad2o],16
+ nop.i 0
}
+;;
{ .mfi
-(p0) cmp.gt.unc p0,p7 = r34, r32
-(p0) fmerge.s sinh_FR_SCALE = sinh_FR_SGNX, sinh_FR_spos
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 f_X3 = f_NORM_X, f_X2, f0
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_Y_lo = sinh_FR_Tjhi, sinh_FR_Y_lo_temp, sinh_FR_Tjlo
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 f_X4 = f_X2, f_X2, f0
+ nop.i 0
}
+;;
-// f8 = answer = scale * (Y_hi + Y_lo)
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_SINH_temp = sinh_FR_Y_lo, f1, sinh_FR_Tjhi
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 f_poly65 = f_X2, f_P6, f_P5
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p0) fma.s0 f44 = sinh_FR_SCALE, sinh_FR_SINH_temp, f0
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 f_poly43 = f_X2, f_P4, f_P3
+ nop.i 0
}
+;;
-// Dummy multiply to generate inexact
{ .mfi
- nop.m 999
-(p7) fmpy.s0 sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 f_poly21 = f_X2, f_P2, f_P1
+ nop.i 0
}
+;;
-// If over_SAFE is set, return
-{ .mfb
- nop.m 999
-(p7) fmerge.s f8 = f44,f44
-(p7) br.ret.sptk b0 ;;
+{ .mfi
+ nop.m 0
+ fma.s1 f_poly6543 = f_X4, f_poly65, f_poly43
+ nop.i 0
}
-
-// Else see if we overflowed
-// S0 user supplied status
-// S2 user supplied status + WRE + TD (Overflows)
-// If WRE is set then an overflow will not occur in EXP.
-// The input value that would cause a register (WRE) value to overflow is about 2^15
-// and this input would go into the HUGE path.
-// Answer with WRE is in f43.
+;;
{ .mfi
- nop.m 999
-(p0) fsetc.s2 0x7F,0x42
- nop.i 999;;
+ nop.m 0
+ fma.s1 f_poly6to1 = f_X4, f_poly6543, f_poly21
+ nop.i 0
}
+;;
+// Dummy multiply to generate inexact
{ .mfi
- nop.m 999
-(p0) fma.s2 f43 = sinh_FR_SCALE, sinh_FR_SINH_temp, f0
- nop.i 999 ;;
+ nop.m 0
+ fmpy.s0 f_tmp = f_P6, f_P6
+ nop.i 0
}
-
-// 13FFF => 13FFF -FFFF = 4000(true)
-// 4000 + 3FFF = 7FFF, which is 1 more that the exponent of the largest
-// long double (7FFE). So 0 13FFF 8000000000000000 is one ulp more than
-// largest long double in register bias
-// Now set p8 if the answer with WRE is greater than or equal this value
-// Also set p9 if the answer with WRE is less than or equal to negative this value
-
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x00000000013FFF ;;
+{ .mfb
+ nop.m 0
+ fma.s0 f8 = f_poly6to1, f_X3, f_NORM_X
+ br.ret.sptk b0 // Exit SINH_BY_POLY
}
+;;
-{ .mmf
- nop.m 999
-(p0) setf.exp f41 = r32
-(p0) fsetc.s2 0x7F,0x40 ;;
-}
-{ .mfi
- nop.m 999
-(p0) fcmp.ge.unc.s1 p8, p0 = f43, f41
- nop.i 999
+// Here if x denorm or unorm
+SINH_DENORM:
+// Determine if x really a denorm and not a unorm
+{ .mmf
+ getf.exp r_signexp_x = f_NORM_X
+ mov r_exp_denorm = 0x0c001 // Real denorms have exp < this
+ fmerge.s f_ABS_X = f0, f_NORM_X
}
+;;
{ .mfi
- nop.m 999
-(p0) fmerge.ns f42 = f41, f41
- nop.i 999 ;;
+ nop.m 0
+ fcmp.eq.s0 p10,p0 = f8, f0 // Set denorm flag
+ nop.i 0
}
+;;
-// The error tag for overflow is 126
-{ .mii
- nop.m 999
- nop.i 999 ;;
-(p8) mov r47 = 126 ;;
+// Set p8 if really a denorm
+{ .mmi
+ and r_exp_x = r_exp_mask, r_signexp_x
+;;
+ cmp.lt p8,p9 = r_exp_x, r_exp_denorm
+ nop.i 0
}
+;;
+// Identify denormal operands.
{ .mfb
- nop.m 999
-(p0) fcmp.le.unc.s1 p9, p0 = f43, f42
-(p8) br.cond.spnt L(SINH_ERROR_SUPPORT) ;;
-}
-
-{ .mii
- nop.m 999
- nop.i 999 ;;
-(p9) mov r47 = 126
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p9) br.cond.spnt L(SINH_ERROR_SUPPORT) ;;
+ nop.m 0
+(p8) fcmp.ge.unc.s1 p6,p7 = f8, f0 // Test sign of denorm
+(p9) br.cond.sptk SINH_COMMON // Return to main path if x unorm
}
+;;
-// Dummy multiply to generate inexact
{ .mfi
- nop.m 999
-(p0) fmpy.s0 sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
- nop.i 999 ;;
+ nop.m 0
+(p6) fma.s0 f8 = f8,f8,f8 // If x +denorm, result=x+x^2
+ nop.i 0
}
-
{ .mfb
- nop.m 999
-(p0) fmerge.s f8 = f44,f44
-(p0) br.ret.sptk b0 ;;
+ nop.m 0
+(p7) fnma.s0 f8 = f8,f8,f8 // If x -denorm, result=x-x^2
+ br.ret.sptk b0 // Exit if x denorm
}
+;;
-L(SINH_HUGE):
-
-// for SINH_HUGE, put 24000 in exponent; take sign from input; add 1
-// SAFE: SAFE is always 0 for HUGE
-{ .mlx
- nop.m 999
-(p0) movl r32 = 0x0000000000015dbf ;;
+// Here if |x| >= overflow limit
+SINH_HUGE:
+// for SINH_HUGE, put 24000 in exponent; take sign from input
+{ .mmi
+ mov r_exp_huge = 0x15dbf
+;;
+ setf.exp f_huge = r_exp_huge
+ nop.i 0
}
+;;
+.pred.rel "mutex",p8,p9
{ .mfi
-(p0) setf.exp f9 = r32
- nop.f 999
- nop.i 999 ;;
+ alloc r32 = ar.pfs,0,5,4,0
+(p8) fnma.s1 f_signed_hi_lo = f_huge, f1, f1
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p0) fma.s1 sinh_FR_signed_hi_lo = sinh_FR_SGNX, f9, f1
- nop.i 999 ;;
+ nop.m 0
+(p9) fma.s1 f_signed_hi_lo = f_huge, f1, f1
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s0 f44 = sinh_FR_signed_hi_lo, f9, f0
-(p0) mov r47 = 126
-}
-.endp sinhl
-ASM_SIZE_DIRECTIVE(sinhl)
-#ifdef _LIBC
-ASM_SIZE_DIRECTIVE(__ieee754_sinhl)
-#endif
-
-// Stack operations when calling error support.
-// (1) (2) (3) (call) (4)
-// sp -> + psp -> + psp -> + sp -> +
-// | | | |
-// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
-// | | | |
-// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
-// | | | |
-// | | <- GR_X X1 ->| |
-// | | | |
-// sp-64 -> + sp -> + sp -> + +
-// save ar.pfs save b0 restore gp
-// save gp restore ar.pfs
-
-.proc __libm_error_region
-__libm_error_region:
-L(SINH_ERROR_SUPPORT):
+ nop.m 0
+ fma.s0 f_pre_result = f_signed_hi_lo, f_huge, f0
+ mov GR_Parameter_TAG = 126
+}
+;;
+
+GLOBAL_IEEE754_END(sinhl)
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
-// (1)
{ .mfi
- add GR_Parameter_Y=-32,sp // Parameter 2 value
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
- add sp=-64,sp // Create new stack
+ add sp=-64,sp // Create new stack
nop.f 0
- mov GR_SAVE_GP=gp // Save gp
+ mov GR_SAVE_GP=gp // Save gp
};;
-
-// (2)
{ .mmi
- stfe [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
- add GR_Parameter_X = 16,sp // Parameter 1 address
+ stfe [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0 // Save b0
+ mov GR_SAVE_B0=b0 // Save b0
};;
.body
-// (3)
{ .mib
- stfe [GR_Parameter_X] = f8 // STORE Parameter 1 on stack
+ stfe [GR_Parameter_X] = f8 // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
}
{ .mib
- stfe [GR_Parameter_Y] = f44 // STORE Parameter 3 on stack
+ stfe [GR_Parameter_Y] = f_pre_result // STORE Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support# // Call error handling function
+ br.call.sptk b0=__libm_error_support# // Call error handling function
};;
+
{ .mmi
- nop.m 0
- nop.m 0
add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
};;
-// (4)
{ .mmi
- ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
+ ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
- add sp = 64,sp // Restore stack pointer
- mov b0 = GR_SAVE_B0 // Restore return address
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
};;
+
{ .mib
- mov gp = GR_SAVE_GP // Restore gp
- mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
- br.ret.sptk b0 // Return
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region)
+
.type __libm_error_support#,@function
.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_sqrt.S b/sysdeps/ia64/fpu/e_sqrt.S
index dd057f58ee..0e208b3de1 100644
--- a/sysdeps/ia64/fpu/e_sqrt.S
+++ b/sysdeps/ia64/fpu/e_sqrt.S
@@ -1,11 +1,11 @@
.file "sqrt.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
-//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,27 +35,28 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
-// ********************************************************************
+//********************************************************************
// History
-// ********************************************************************
-// 2/02/00 Initial version
-// 4/04/00 Unwind support added
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+//********************************************************************
+// 02/02/00 Initial version
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
+// 02/10/03 Reordered header: .section, .global, .proc, .align
//
-// ********************************************************************
+//********************************************************************
//
// Function: Combined sqrt(x), where
// _
// sqrt(x) = |x, for double precision x values
//
-// ********************************************************************
+//********************************************************************
//
// Accuracy: Correctly Rounded
//
-// ********************************************************************
+//********************************************************************
//
// Resources Used:
//
@@ -68,7 +69,7 @@
//
// Predicate Registers: p6, p7, p8
//
-// *********************************************************************
+//*********************************************************************
//
// IEEE Special Conditions:
//
@@ -78,15 +79,13 @@
// sqrt(+/-0) = +/-0
// sqrt(negative) = QNaN and error handling is called
//
-// *********************************************************************
+//*********************************************************************
//
// Implementation:
//
// Modified Newton-Raphson Algorithm
//
-// *********************************************************************
-
-#include "libm_support.h"
+//*********************************************************************
GR_SAVE_PFS = r33
GR_SAVE_B0 = r34
@@ -98,19 +97,7 @@ GR_Parameter_RESULT = r39
.section .text
-.proc sqrt#
-.global sqrt#
-.align 64
-
-sqrt:
-#ifdef _LIBC
-.global __sqrt
-.type __sqrt,@function
-__sqrt:
-.global __ieee754_sqrt
-.type __ieee754_sqrt,@function
-__ieee754_sqrt:
-#endif
+GLOBAL_IEEE754_ENTRY(sqrt)
{ .mfi
alloc r32= ar.pfs,0,5,4,0
frsqrta.s0 f7,p6=f8
@@ -255,7 +242,7 @@ __ieee754_sqrt:
{ .mfb
nop.m 0
- (p0) mov f8 = f7
+ mov f8 = f7
(p8) br.ret.sptk b0 ;;
}
{ .mfb
@@ -264,13 +251,7 @@ __ieee754_sqrt:
(p7) br.cond.sptk __libm_error_region ;;
}
// END DOUBLE PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
-.endp sqrt#
-ASM_SIZE_DIRECTIVE(sqrt)
-#ifdef _LIBC
-ASM_SIZE_DIRECTIVE(__sqrt)
-ASM_SIZE_DIRECTIVE(__ieee754_sqrt)
-#endif
-
+GLOBAL_IEEE754_END(sqrt)
// Stack operations when calling error support.
// (1) (2) (3) (call) (4)
// sp -> + psp -> + psp -> + sp -> +
@@ -286,8 +267,7 @@ ASM_SIZE_DIRECTIVE(__ieee754_sqrt)
// save gp restore ar.pfs
-.proc __libm_error_region
-__libm_error_region:
+LOCAL_LIBM_ENTRY(__libm_error_region)
//
// This branch includes all those special values that are not negative,
@@ -352,8 +332,9 @@ __libm_error_region:
br.ret.sptk b0 // Return
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region)
+
+
.type __libm_error_support#,@function
diff --git a/sysdeps/ia64/fpu/e_sqrtf.S b/sysdeps/ia64/fpu/e_sqrtf.S
index 1799845d6d..bee0df7414 100644
--- a/sysdeps/ia64/fpu/e_sqrtf.S
+++ b/sysdeps/ia64/fpu/e_sqrtf.S
@@ -1,10 +1,10 @@
.file "sqrtf.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,27 +35,29 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
-// *********************************************************************
+//*********************************************************************
// History:
//
-// 2/02/00 Initial version
-// 4/04/00 Unwind support added
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 02/02/00 Initial version
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
//
-// *********************************************************************
+//*********************************************************************
//
// Function: Combined sqrtf(x), where
// _
// sqrtf(x) = |x, for single precision x values
//
-// ********************************************************************
+//********************************************************************
//
// Accuracy: Correctly Rounded
//
-// ********************************************************************
+//********************************************************************
//
// Resources Used:
//
@@ -68,7 +70,7 @@
//
// Predicate Registers: p6, p7, p8
//
-// ********************************************************************
+//********************************************************************
//
// IEEE Special Conditions:
//
@@ -78,15 +80,14 @@
// sqrtf(+/-0) = +/-0
// sqrtf(negative) = QNaN and error handling is called
//
-// ********************************************************************
+//********************************************************************
//
// Implementation:
//
// Modified Newton-Raphson Algorithm
//
-// ********************************************************************
+//********************************************************************
-#include "libm_support.h"
GR_SAVE_B0 = r34
GR_SAVE_PFS = r33
@@ -102,21 +103,8 @@ FR_Y = f0
FR_RESULT = f8
-
.section .text
-.proc sqrtf#
-.global sqrtf#
-.align 64
-
-sqrtf:
-#ifdef _LIBC
-.global __sqrtf
-.type __sqrtf,@function
-__sqrtf:
-.global __ieee754_sqrtf
-.type __ieee754_sqrtf,@function
-__ieee754_sqrtf:
-#endif
+GLOBAL_IEEE754_ENTRY(sqrtf)
{ .mlx
// BEGIN SINGLE PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
alloc r32= ar.pfs,0,5,4,0
@@ -197,7 +185,7 @@ __ieee754_sqrtf:
// Step (10)
// d1 = a - S1 * S1 in f9
(p6) fnma.s1 f9=f7,f7,f8
- nop.i 0;;;
+ nop.i 0;;
} { .mfb
nop.m 0
// Step (11)
@@ -207,27 +195,20 @@ __ieee754_sqrtf:
// END SINGLE PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
} { .mfb
nop.m 0
- (p0) mov f8 = f7
+ mov f8 = f7
(p8) br.ret.sptk b0 ;;
}
//
// This branch includes all those special values that are not negative,
// with the result equal to frcpa(x)
//
-.endp sqrtf
-ASM_SIZE_DIRECTIVE(sqrtf)
-#ifdef _LIBC
-ASM_SIZE_DIRECTIVE(__sqrtf)
-ASM_SIZE_DIRECTIVE(__ieee754_sqrtf)
-#endif
-
+GLOBAL_IEEE754_END(sqrtf)
-.proc __libm_error_region
-__libm_error_region:
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mii
add GR_Parameter_Y=-32,sp // Parameter 2 value
-(p0) mov GR_Parameter_TAG = 50
+ mov GR_Parameter_TAG = 50
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
@@ -271,8 +252,7 @@ __libm_error_region:
br.ret.sptk b0 // Return
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
diff --git a/sysdeps/ia64/fpu/e_sqrtl.S b/sysdeps/ia64/fpu/e_sqrtl.S
index e41148243a..ec1475626d 100644
--- a/sysdeps/ia64/fpu/e_sqrtl.S
+++ b/sysdeps/ia64/fpu/e_sqrtl.S
@@ -1,10 +1,10 @@
.file "sqrtl.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,23 +35,25 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
-// ********************************************************************
+//********************************************************************
//
// History:
-// 2/02/00 (hand-optimized)
-// 4/04/00 Unwind support added
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 02/02/00 (hand-optimized)
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
//
-// ********************************************************************
+//********************************************************************
//
// Function: Combined sqrtl(x), where
// _
// sqrtl(x) = |x, for double-extended precision x values
//
-// ********************************************************************
+//********************************************************************
//
// Resources Used:
//
@@ -64,7 +66,7 @@
//
// Predicate Registers: p6, p7, p8
//
-// ********************************************************************
+//********************************************************************
//
// IEEE Special Conditions:
//
@@ -74,15 +76,13 @@
// sqrtl(+/-0) = +/-0
// sqrtl(negative) = QNaN and error handling is called
//
-// ********************************************************************
+//********************************************************************
//
// Implementation:
//
// Modified Newton-Raphson Algorithm
//
-// ********************************************************************
-
-#include "libm_support.h"
+//********************************************************************
GR_SAVE_PFS = r33
GR_SAVE_B0 = r34
@@ -97,19 +97,7 @@ FR_Y = f0
FR_RESULT = f8
.section .text
-.proc sqrtl#
-.global sqrtl#
-.align 64
-
-sqrtl:
-#ifdef _LIBC
-.global __sqrtl
-.type __sqrtl,@function
-__sqrtl:
-.global __ieee754_sqrtl
-.type __ieee754_sqrtl,@function
-__ieee754_sqrtl:
-#endif
+GLOBAL_IEEE754_ENTRY(sqrtl)
{ .mlx
alloc r32= ar.pfs,0,5,4,0
// exponent of +1/2 in r2
@@ -151,7 +139,7 @@ alloc r32= ar.pfs,0,5,4,0
}
{ .mfi
nop.m 0
- (p0) mov f15=f8
+ mov f15=f8
nop.i 0;;
} { .mfi
nop.m 0
@@ -221,8 +209,8 @@ alloc r32= ar.pfs,0,5,4,0
(p6) br.ret.sptk b0 ;;
}
{ .mfb
- (p0) mov GR_Parameter_TAG = 48
- (p0) mov f8 = f7
+ mov GR_Parameter_TAG = 48
+ mov f8 = f7
(p8) br.ret.sptk b0 ;;
}
//
@@ -232,15 +220,8 @@ alloc r32= ar.pfs,0,5,4,0
// END DOUBLE EXTENDED PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
-.endp sqrtl#
-ASM_SIZE_DIRECTIVE(sqrtl)
-#ifdef _LIBC
-ASM_SIZE_DIRECTIVE(__sqrtl)
-ASM_SIZE_DIRECTIVE(__ieee754_sqrtl)
-#endif
-
-.proc __libm_error_region
-__libm_error_region:
+GLOBAL_IEEE754_END(sqrtl)
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
@@ -288,7 +269,6 @@ __libm_error_region:
br.ret.sptk b0 // Return
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region#)
.type __libm_error_support#,@function
.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/libm_atan2_reg.S b/sysdeps/ia64/fpu/libm_atan2_reg.S
deleted file mode 100644
index 5649670d19..0000000000
--- a/sysdeps/ia64/fpu/libm_atan2_reg.S
+++ /dev/null
@@ -1,1234 +0,0 @@
-.file "libm_atan2_reg.s"
-
-// Copyright (C) 2000, 2001, Intel Corporation
-// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// * Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// * The name of Intel Corporation may not be used to endorse or promote
-// products derived from this software without specific prior written
-// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
-//
-// History
-//==============================================================
-// 2/02/00: Initial version
-// 4/04/00 Unwind support added
-
-#include "libm_support.h"
-
-.data
-
-.align 64
-ASM_TYPE_DIRECTIVE(Constants_atan#,@object)
-Constants_atan:
-data4 0x54442D18, 0x3FF921FB, 0x248D3132, 0x3E000000
-// double pi/2, single lo_pi/2, two**(-3)
-data4 0xAAAAAAA3, 0xAAAAAAAA, 0x0000BFFD, 0x00000000 // P_1
-data4 0xCCCC54B2, 0xCCCCCCCC, 0x00003FFC, 0x00000000 // P_2
-data4 0x47E4D0C2, 0x92492492, 0x0000BFFC, 0x00000000 // P_3
-data4 0x58870889, 0xE38E38E0, 0x00003FFB, 0x00000000 // P_4
-data4 0x290149F8, 0xBA2E895B, 0x0000BFFB, 0x00000000 // P_5
-data4 0x250F733D, 0x9D88E6D4, 0x00003FFB, 0x00000000 // P_6
-data4 0xFB8745A0, 0x884E51FF, 0x0000BFFB, 0x00000000 // P_7
-data4 0x394396BD, 0xE1C7412B, 0x00003FFA, 0x00000000 // P_8
-data4 0xAAAAA52F, 0xAAAAAAAA, 0x0000BFFD, 0x00000000 // Q_1
-data4 0xC75B60D3, 0xCCCCCCCC, 0x00003FFC, 0x00000000 // Q_2
-data4 0x011F1940, 0x924923AD, 0x0000BFFC, 0x00000000 // Q_3
-data4 0x2A5F89BD, 0xE36F716D, 0x00003FFB, 0x00000000 // Q_4
-// Entries Tbl_hi (double precision)
-// B = 1+Index/16+1/32 Index = 0
-// Entries Tbl_lo (single precision)
-// B = 1+Index/16+1/32 Index = 0
-data4 0xA935BD8E, 0x3FE9A000, 0x23ACA08F, 0x00000000
-// Entries Tbl_hi (double precision) Index = 0,1,...,15
-// B = 2^(-1)*(1+Index/16+1/32)
-// Entries Tbl_lo (single precision)
-// Index = 0,1,...,15 B = 2^(-1)*(1+Index/16+1/32)
-data4 0x7F175A34, 0x3FDE77EB, 0x238729EE, 0x00000000
-data4 0x73C1A40B, 0x3FE0039C, 0x249334DB, 0x00000000
-data4 0x5B5B43DA, 0x3FE0C614, 0x22CBA7D1, 0x00000000
-data4 0x88BE7C13, 0x3FE1835A, 0x246310E7, 0x00000000
-data4 0xE2CC9E6A, 0x3FE23B71, 0x236210E5, 0x00000000
-data4 0x8406CBCA, 0x3FE2EE62, 0x2462EAF5, 0x00000000
-data4 0x1CD41719, 0x3FE39C39, 0x24B73EF3, 0x00000000
-data4 0x5B795B55, 0x3FE44506, 0x24C11260, 0x00000000
-data4 0x5BB6EC04, 0x3FE4E8DE, 0x242519EE, 0x00000000
-data4 0x1F732FBA, 0x3FE587D8, 0x24D4346C, 0x00000000
-data4 0x115D7B8D, 0x3FE6220D, 0x24ED487B, 0x00000000
-data4 0x920B3D98, 0x3FE6B798, 0x2495FF1E, 0x00000000
-data4 0x8FBA8E0F, 0x3FE74897, 0x223D9531, 0x00000000
-data4 0x289FA093, 0x3FE7D528, 0x242B0411, 0x00000000
-data4 0x576CC2C5, 0x3FE85D69, 0x2335B374, 0x00000000
-data4 0xA99CC05D, 0x3FE8E17A, 0x24C27CFB, 0x00000000
-//
-// Entries Tbl_hi (double precision) Index = 0,1,...,15
-// B = 2^(-2)*(1+Index/16+1/32)
-// Entries Tbl_lo (single precision)
-// Index = 0,1,...,15 B = 2^(-2)*(1+Index/16+1/32)
-//
-data4 0x510665B5, 0x3FD025FA, 0x24263482, 0x00000000
-data4 0x362431C9, 0x3FD1151A, 0x242C8DC9, 0x00000000
-data4 0x67E47C95, 0x3FD20255, 0x245CF9BA, 0x00000000
-data4 0x7A823CFE, 0x3FD2ED98, 0x235C892C, 0x00000000
-data4 0x29271134, 0x3FD3D6D1, 0x2389BE52, 0x00000000
-data4 0x586890E6, 0x3FD4BDEE, 0x24436471, 0x00000000
-data4 0x175E0F4E, 0x3FD5A2E0, 0x2389DBD4, 0x00000000
-data4 0x9F5FA6FD, 0x3FD68597, 0x2476D43F, 0x00000000
-data4 0x52817501, 0x3FD76607, 0x24711774, 0x00000000
-data4 0xB8DF95D7, 0x3FD84422, 0x23EBB501, 0x00000000
-data4 0x7CD0C662, 0x3FD91FDE, 0x23883A0C, 0x00000000
-data4 0x66168001, 0x3FD9F930, 0x240DF63F, 0x00000000
-data4 0x5422058B, 0x3FDAD00F, 0x23FE261A, 0x00000000
-data4 0x378624A5, 0x3FDBA473, 0x23A8CD0E, 0x00000000
-data4 0x0AAD71F8, 0x3FDC7655, 0x2422D1D0, 0x00000000
-data4 0xC9EC862B, 0x3FDD45AE, 0x2344A109, 0x00000000
-//
-// Entries Tbl_hi (double precision) Index = 0,1,...,15
-// B = 2^(-3)*(1+Index/16+1/32)
-// Entries Tbl_lo (single precision)
-// Index = 0,1,...,15 B = 2^(-3)*(1+Index/16+1/32)
-//
-data4 0x84212B3D, 0x3FC068D5, 0x239874B6, 0x00000000
-data4 0x41060850, 0x3FC16465, 0x2335E774, 0x00000000
-data4 0x171A535C, 0x3FC25F6E, 0x233E36BE, 0x00000000
-data4 0xEDEB99A3, 0x3FC359E8, 0x239680A3, 0x00000000
-data4 0xC6092A9E, 0x3FC453CE, 0x230FB29E, 0x00000000
-data4 0xBA11570A, 0x3FC54D18, 0x230C1418, 0x00000000
-data4 0xFFB3AA73, 0x3FC645BF, 0x23F0564A, 0x00000000
-data4 0xE8A7D201, 0x3FC73DBD, 0x23D4A5E1, 0x00000000
-data4 0xE398EBC7, 0x3FC8350B, 0x23D4ADDA, 0x00000000
-data4 0x7D050271, 0x3FC92BA3, 0x23BCB085, 0x00000000
-data4 0x601081A5, 0x3FCA217E, 0x23BC841D, 0x00000000
-data4 0x574D780B, 0x3FCB1696, 0x23CF4A8E, 0x00000000
-data4 0x4D768466, 0x3FCC0AE5, 0x23BECC90, 0x00000000
-data4 0x4E1D5395, 0x3FCCFE65, 0x2323DCD2, 0x00000000
-data4 0x864C9D9D, 0x3FCDF110, 0x23F53F3A, 0x00000000
-data4 0x451D980C, 0x3FCEE2E1, 0x23CCB11F, 0x00000000
-data4 0x54442D18, 0x400921FB, 0x33145C07, 0x3CA1A626 // I two doubles
-data4 0x54442D18, 0x3FF921FB, 0x33145C07, 0x3C91A626 // I_by_2 two dbls
-data4 0x54442D18, 0x3FE921FB, 0x33145C07, 0x3C81A626 // I_by_4 two dbls
-data4 0x7F3321D2, 0x4002D97C, 0x4C9E8A0A, 0x3C9A7939 // 3I_by_4 two dbls
-ASM_SIZE_DIRECTIVE(Constants_atan#)
-.section .text
-
-.proc __libm_atan2_reg#
-.global __libm_atan2_reg#
-.align 64
-__libm_atan2_reg:
-
-
-{ .mfi
- alloc r32 = ar.pfs,0,20,4,0
-(p0) mov f32 = f8
- nop.i 0
-}
-{ .mmi
- nop.m 0
-(p0) addl r39 = @ltoff(Constants_atan#), gp
- nop.i 999
-}
-;;
-
-{ .mmi
- ld8 r39 = [r39]
- nop.m 999
- nop.i 999
-}
-;;
-
-{ .mfi
- nop 999 // EMbo added ...
-(p0) mov f33 = f9
- nop.i 0
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fclass.nm.unc p9,p0 = f32 ,0x1FF
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fclass.nm.unc p8,p0 = f33 ,0x1FF
- nop 999 // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fclass.m.unc p6,p0 = f33 ,0x103
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fclass.m.unc p7,p0 = f32 ,0x103
- nop 999 // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fclass.m.unc p12,p0 = f33 ,0x0C3
- nop 999;; // EMbo added ...
- } { .mfb
- nop 999 // EMbo added ...
-//
-// Check for NatVals.
-// Check for EM Unsupporteds
-// Check for NaNs.
-//
-(p0) fclass.m.unc p13,p0 = f32 ,0x0C3
-(p6) br.cond.sptk L(ATAN_NATVAL);;
- } { .mbb
- nop 999 // EMbo added ...
-(p7) br.cond.sptk L(ATAN_NATVAL)
-(p8) br.cond.sptk L(ATAN_UNSUPPORTED);;
- } { .mib
-(p0) add r40 = 96, r39
- nop 999 // EMbo added ...
-(p9) br.cond.sptk L(ATAN_UNSUPPORTED);;
- } { .mib
-(p0) ldfd f50 = [r39],8
- nop 999 // EMbo added ...
-(p12) br.cond.sptk L(ATAN_NAN);;
- } { .mfb
- nop 999 // EMbo added ...
-(p0) fnorm.s1 f33 = f33
-(p13) br.cond.sptk L(ATAN_NAN);;
- } { .mfi
-(p0) ldfs f51 = [r39],4
-//
-// Remove sign bits from exponents
-// Load 2**(-3)
-// Normalize the input argument.
-//
-(p0) fnorm.s1 f32 = f32
- nop 999 // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) mov f82 = f1
- nop 999;; // EMbo added ...
- } { .mmi
- nop 999;; // EMbo added ...
-(p0) ldfs f78 = [r39],180
- nop 999;; // EMbo added ...
- } { .mmi
-(p0) getf.exp r36 = f33;;
-//
-// Get exp and sign of ArgX
-// Get exp and sign of ArgY
-// Load 2**(-3) and increment ptr to Q_4.
-//
-(p0) getf.exp r37 = f32
-(p0) shr.u r36 = r36,17;;
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fmerge.s f84 = f1,f32
-(p0) shr.u r37 = r37,17;;
- } { .mfi
- nop 999 // EMbo added ...
-//
-// ArgX_abs = |ArgX|
-// ArgY_abs = |ArgY|
-// sign_X is sign bit of ArgX
-// sign_Y is sign bit of ArgY
-//
-(p0) fmerge.s f83 = f1,f33
-(p0) cmp.eq.unc p8,p9 = 0x00000, r37;;
- } { .mfi
- nop 999 // EMbo added ...
-(p8) fadd.s1 f34 = f0, f1
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p9) fsub.s1 f34 = f0, f1
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fmin.s1 f36 = f83, f84
- nop 999 // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fmax.s1 f35 = f83, f84
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// Is ArgX_abs >= ArgY_abs
-// Is sign_Y == 0?
-//
-(p0) fcmp.ge.s1 p6,p7 = f83,f84
- nop 999;; // EMbo added ...
- } { .mii
-(p6) cmp.eq.unc p10, p11 = 0x00000, r36
-(p6) add r38 = r0, r0;;
-//
-// U = max(ArgX_abs,ArgY_abs)
-// V = min(ArgX_abs,ArgY_abs)
-// if p6, swap = 0
-// if p7, swap = 1
-//
-//
-// Let M = 1.0
-// if p8, s_Y = 1.0
-// if p9, s_Y = -1.0
-//
-(p7) add r38 = 1,r0;;
- } { .mfi
- nop 999 // EMbo added ...
-(p0) frcpa.s1 f37, p6 = f36, f35
- nop 999;; // EMbo added ...
- } { .mfb
- nop 999 // EMbo added ...
-//
-// E = frcpa(V,U)
-//
-(p10) fsub.s1 f82 = f82, f1
-(p6) br.cond.sptk L(ATAN_STEP2);;
- } { .mib
- nop 999 // EMbo added ...
- nop 999 // EMbo added ...
-// /**************************************************/
-// /********************* STEP2 **********************/
-// /**************************************************/
-(p0) br.cond.spnt L(ATAN_SPECIAL_HANDLING);;
- }
-L(ATAN_STEP2):
- { .mlx
- nop 999 // EMbo added ...
-(p0) movl r47 = 0x8400000000000000
- } { .mlx
- nop 999 // EMbo added ...
-(p0) movl r48 = 0x0000000000000100;;
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fmpy.s1 f38 = f37, f36
- nop 999 // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fcmp.lt.unc.s0 p0,p9 = f9,f1
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fcmp.lt.unc.s0 p0,p8 = f8,f1
- nop 999 // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// Q = E * V
-//
-(p11) fadd.s1 f82 = f82, f1
- nop 999;; // EMbo added ...
- } { .mfi
-(p0) getf.sig r46 = f38
-(p0) fcmp.lt.unc p6,p7 = f38,f78
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fmpy.s1 f38 = f37, f36
-(p0) extr.u r42 = r46, 59, 4;;
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fmpy.s1 f50 = f82, f50
-(p0) dep r47 = r42, r47, 59, 4
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fmpy.s1 f51 = f82, f51
- nop 999;; // EMbo added ...
- } { .mmi
- nop 999;; // EMbo added ...
-//
-// Is Q < 2**(-3)?
-//
-//
-// Do fcmp to raise any denormal operand
-// exceptions.
-//
-(p0) getf.exp r45 = f38
- nop 999;; // EMbo added ...
- } { .mib
-//
-// lookup = b_1 b_2 b_3 B_4
-//
-//
-// Generate 1.b_1 b_2 b_3 b_4 1 0 0 0 ... 0
-//
-(p0) andcm r41 = 0x0003, r45
- nop 999 // EMbo added ...
-//
-// We waited a few extra cycles so P_lo and P_hi could be calculated.
-// Load the constant 256 for loading up table entries.
-//
-// /**************************************************/
-// /********************* STEP3 **********************/
-// /**************************************************/
-(p6) br.cond.spnt L(ATAN_POLY);;
- } { .mii
-(p0) setf.sig f39 = r47
-(p0) cmp.eq.unc p8, p9 = 0x0000, r41
-//
-// z_hi = s exp 1.b_1 b_2 b_3 b_4 1 0 0 0 ... 0
-// point to beginning of Tbl_hi entries - k = 0.
-//
-(p0) add r40 = 16, r39
- } { .mmi
-(p0) ldfe f73 = [r39],-16;;
-(p9) sub r41 = r41,r0,1
-(p9) add r40 = 16,r40
- } { .mfi
-(p8) ldfd f48 = [r40],8
-(p0) fmpy.s1 f50 = f34, f50
-(p0) xor r38 = r36,r38;;
- } { .mmi
-(p0) ldfe f71 = [r39],-16;;
-(p8) ldfs f49 = [r40],8
-(p9) pmpy2.r r41 = r41,r48;;
- } { .mfi
-(p0) ldfe f69 = [r39],-16
-//
-// Let z_hi have exponent and sign of original Q
-// Load the Tbl_hi(0) else, increment pointer.
-//
-(p0) fmerge.se f39 = f38,f39
-(p9) shladd r42 = r42,0x0004,r41;;
- } { .mmi
-(p9) add r40 = r40, r42;;
-(p9) ldfd f48 = [r40],8
- nop 999;; // EMbo added ...
- } { .mmi
-(p0) ldfe f67 = [r39],-16;;
-(p9) ldfs f49 = [r40],8
- nop 999 // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// U_prime_hi = U + V * z_hi
-// Load the Tbl_lo(0)
-//
-(p0) fma.s1 f40 = f36, f39, f35
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fnma.s1 f42 = f35, f39, f36
- nop 999 // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) mov f52 = f48
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) frcpa.s1 f43, p6 = f1, f40
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// U_prime_lo = U - U_prime_hi
-// k = k * 256 - result can be 0, 256, or 512.
-//
-(p0) fsub.s1 f41 = f35, f40
-(p0) cmp.eq.unc p7, p6 = 0x00000, r38
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fmpy.s1 f52 = f34, f52
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p7) fadd.s1 f54 = f0, f1
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p6) fsub.s1 f54 = f0, f1
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fnma.s1 f80 = f43, f40, f1
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fadd.s1 f79 = f41, f40
- nop 999 // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fma.s1 f41 = f36, f39, f41
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fma.s1 f56 = f54, f52, f50
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fma.s1 f43 = f80, f43, f43
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// U_prime_lo = U - U_hold
-// lookup -> lookup * 16 + k
-//
-//
-// V_prime = V - U * z_hi
-// U_prime_lo = V * z_hi + U_prime_lo
-//
-(p0) fsub.s1 f79 = f35, f79
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fnma.s1 f80 = f43, f40, f1
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// C_hi = frcpa(1,U_prime_hi)
-// U_prime_lo = U_prime_lo + U_hold
-//
-//
-// C_hi_hold = 1 - C_hi * U_prime_hi (1)
-//
-//
-// C_hi = C_hi + C_hi * C_hi_hold (1)
-//
-//
-// C_hi_hold = 1 - C_hi * U_prime_hi (2)
-//
-(p0) fadd.s1 f41 = f41, f79
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// C_hi = C_hi + C_hi * C_hi_hold (2)
-//
-(p0) fma.s1 f43 = f80, f43, f43
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// C_hi_hold = 1 - C_hi * U_prime_hi (3)
-//
-(p0) fnma.s1 f80 = f43, f40, f1
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// C_hi = C_hi + C_hi * C_hi_hold (3)
-//
-(p0) fma.s1 f43 = f80, f43, f43
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// w_hi = V_prime * C_hi
-//
-(p0) fmpy.s1 f44 = f42, f43
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fmpy.s1 f46 = f44, f44
- nop 999 // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// wsq = w_hi * w_hi
-// w_lo = = V_prime - w_hi * U_prime_hi
-//
-(p0) fnma.s1 f45 = f44, f40, f42
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fma.s1 f47 = f46, f73, f71
- nop 999 // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// poly = Q_3 + wsq * Q_4
-// w_lo = = w_lo - w_hi * U_prime_lo
-//
-(p0) fnma.s1 f45 = f44, f41, f45
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fma.s1 f47 = f46, f47, f69
- nop 999 // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// poly = Q_2 + wsq * poly
-// w_lo = = w_lo * C_hi
-//
-(p0) fmpy.s1 f45 = f43, f45
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fma.s1 f47 = f46, f47, f67
- nop 999 // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// poly = Q_1 + wsq * poly
-// A_lo = Tbl_lo + w_lo
-// swap = xor(swap,sign_X)
-//
-(p0) fadd.s1 f53 = f49, f45
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// Is (swap) != 0 ?
-// poly = wsq * poly
-// A_hi = Tbl_hi
-//
-(p0) fmpy.s1 f47 = f46, f47
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// poly = wsq * poly
-//
-//
-// if (p6) sigma = -1.0
-// if (p7) sigma = 1.0
-//
-(p0) fmpy.s1 f47 = f44, f47
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// P_hi = s_Y * P_hi
-// A_lo = A_lo + poly
-//
-(p0) fadd.s1 f53 = f53, f47
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// A_lo = A_lo + w_hi
-// A_hi = s_Y * A_hi
-//
-(p0) fadd.s1 f53 = f53, f44
- nop 999;; // EMbo added ...
- } { .mfb
- nop 999 // EMbo added ...
-//
-// result_hi = P_hi + sigma * A_hi
-// result_lo = P_lo + sigma * A_lo
-//
-(p0) fma.s1 f55 = f54, f53, f51
-(p0) br.cond.sptk L(RETURN_ATAN);;
-}
-//
-// result = result_hi + result_lo * s_Y (User Supplied Rounding Mode)
-//
-// (p0) fma.d.s0 f57 = f55, f34, f56
-//
-// /**************************************************/
-// /********************* STEP4 **********************/
-// /**************************************************/
-//
-L(ATAN_POLY):
-{ .mmi
-(p0) xor r38 = r36,r38
-(p0) addl r39 = @ltoff(Constants_atan#), gp
- nop.i 999
-}
-;;
-
-{ .mmi
- ld8 r39 = [r39]
- nop.m 999
- nop.i 999
-}
-;;
-
-
-{ .mlx
- nop 999 // EMbo added ...
-(p0) movl r47 = 0x24005;;
- } { .mfi
-(p0) add r39 = 128, r39
-(p0) fnma.s1 f81 = f37, f35, f1
-(p0) cmp.eq.unc p7, p6 = 0x00000, r38;;
- } { .mmf
- nop 999 // EMbo added ...
-(p0) ldfe f77 = [r39],-16
-//
-// Iterate 3 times E = E + E*(1.0 - E*U)
-// Also load P_8, P_7, P_6, P_5, P_4
-// E_hold = 1.0 - E * U (1)
-// A_temp = Q
-//
-(p0) mov f85 = f38;;
- } { .mmf
- nop 999 // EMbo added ...
-(p0) ldfe f76 = [r39],-16
-(p6) fsub.s1 f54 = f0, f1;;
- } { .mmf
- nop 999 // EMbo added ...
-(p0) ldfe f75 = [r39],-16
-//
-// E = E + E_hold*E (1)
-// Point to P_8.
-//
-(p0) fma.s1 f37 = f37, f81, f37;;
- } { .mmf
- nop 999 // EMbo added ...
-(p0) ldfe f74 = [r39],-16
-(p0) fnma.s1 f64 = f85, f35, f36;;
- } { .mmf
- nop 999 // EMbo added ...
-(p0) ldfe f72 = [r39],-16
-(p7) fadd.s1 f54 = f0, f1;;
- } { .mmf
- nop 999 // EMbo added ...
-(p0) ldfe f70 = [r39],-16
-//
-// E_hold = 1.0 - E * U (2)
-//
-(p0) fnma.s1 f81 = f37, f35, f1;;
- } { .mmf
- nop 999 // EMbo added ...
-(p0) ldfe f68 = [r39],-16
-(p0) fmpy.s1 f50 = f34, f50;;
- } { .mmf
- nop 999 // EMbo added ...
-(p0) ldfe f66 = [r39],-16
-(p0) fmpy.d.s0 f67 = f67, f67
- } { .mfi
- nop 999 // EMbo added ...
-//
-// E = E + E_hold*E (2)
-//
-(p0) fma.s1 f37 = f37, f81, f37
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// E_hold = 1.0 - E * U (3)
-//
-(p0) fnma.s1 f81 = f37, f35, f1
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// E = E + E_hold*E (3)
-// At this point E approximates 1/U to roughly working precision
-// z = V*E approximates V/U
-//
-(p0) fma.s1 f37 = f37, f81, f37
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// z = V * E
-//
-(p0) fmpy.s1 f59 = f36, f37
- nop 999 // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fmpy.s1 f64 = f64, f37
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// zsq = z * z
-// Also load P_3
-//
-(p0) fmpy.s1 f60 = f59, f59
- nop 999 // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fadd.s1 f52 = f85, f64
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fma.s1 f62 = f60, f77, f76
- nop 999 // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fma.s1 f63 = f60, f70, f68
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// z8 = zsq * zsq
-// Also load P_2
-//
-(p0) fmpy.s1 f61 = f60, f60
- nop 999 // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fsub.s1 f85 = f85, f52
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fmerge.s f65 = f52,f52
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fma.s1 f62 = f60, f62, f75
- nop 999 // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fma.s1 f63 = f60, f63, f66
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// z8 = z8 * z8
-// Also load P_1
-// poly1 = _4 + zsq*(P_5 + zsq*(P_6 + zsq*(P_7 + zsq*P_8)))
-// poly2 = zsq*(P_1 + zsq*(P_2 + zsq*P_3))
-//
-//
-// poly1 = P_7 + zsq * P_8
-// poly2 = P_2 + zsq * P_3
-// poly1 = P_4 + zsq*(P_5 + zsq*(P_6 + zsq*poly1))
-// poly2 = zsq*(P_1 + zsq*poly2)
-//
-//
-// poly1 = P_6 + zsq * poly1
-// poly2 = P_1 + zsq * poly2
-// poly1 = P_4 + zsq*(P_5 + zsq*poly1)
-// poly2 = zsq*poly2
-//
-(p0) fmpy.s1 f61 = f61, f61
- nop 999 // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fadd.s1 f64 = f85, f64
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fma.s1 f62 = f60, f62, f74
- nop 999 // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// poly1 = P_5 + zsq * poly1
-// poly2 = zsq * poly2
-// poly1 = P_4 + zsq*poly1
-//
-(p0) fmpy.s1 f63 = f63, f60
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// poly1 = P_4 + zsq * poly1
-// swap = xor(swap,sign_X)
-//
-(p0) fma.s1 f62 = f60, f62, f72
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// poly = z8*poly1 + poly2 (Typo in writeup)
-// Is (swap) != 0 ?
-//
-//
-// z_lo = V - A_temp * U
-// if (p7) sigma = 1.0
-// Writeup shows A_temp as A_hi
-//
-//
-// z_lo = z_lo * E
-// if (p6) sigma = -1.0
-// z_lo = (V - A_temp * U) *E
-//
-//
-// Fixup added to force inexact later -
-// A_hi = A_temp + z_lo
-// z_lo = (A_temp - A_hi) + z_lo
-// z_lo = A_hi - z_lo -A_hi + z_lo = about 0
-//
-(p0) fma.s1 f47 = f61, f62, f63
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// A_lo = z * poly + z_lo
-//
-(p0) fma.s1 f53 = f59, f47, f64
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fadd.s1 f52 = f65, f53
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fsub.s1 f65 = f65, f52
- nop 999 // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fmpy.s1 f52 = f34, f52
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fadd.s1 f53 = f65, f53
- nop 999 // EMbo added ...
- } { .mfi
-(p0) setf.exp f65 = r47
-(p0) fma.s1 f56 = f54, f52, f50
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fclass.m.unc p6,p0 = f53,0x007
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// P_hi = s_Y * P_hi
-// A_hi = s_Y * A_hi
-//
-//
-// result_hi = P_hi + sigma * A_hi
-//
-(p6) mov f53 = f65
- nop 999 // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// tmp = P_hi - result_hi
-//
-(p0) fsub.s1 f65 = f50, f56
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fma.s1 f65 = f52, f54, f65
- nop 999 // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// tmp = sigma * A_hi + tmp
-// sigma = A_lo * sigma + P_lo
-//
-(p0) fma.s1 f54 = f53, f54, f51
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// result_lo = s_Y * sigma + tmp
-//
-(p0) fma.s1 f55 = f34, f54, f65
- nop 999;; // EMbo added ...
- } { .mfb
- nop.m 0
- mov f34 = f1
-(p0) br.cond.sptk L(RETURN_ATAN);;
-}
-//
-// result = result_hi + result_lo (User Supplied Rounding Mode)
-//
-// (p0) fadd.d.s0 f57 = f55, f56
-L(ATAN_UNSUPPORTED):
-L(ATAN_NATVAL):
- { .mfb
- nop 999 // EMbo added ...
-//
-// Deal with the NatVal and unsupported cases.
-// Raise invalid if warrented.
-//
-(p0) fmpy.d.s0 f57 = f8, f9
-br.cond.sptk L(RETURN_ATAN);;
- }
-L(ATAN_NAN):
- { .mfb
- nop 999 // EMbo added ...
-//
-// If only one NaN, then generate the resulting
-// NaN and return - may raise invalid.
-//
-(p0) fmpy.d.s0 f57 = f8, f9
-(p0) br.cond.sptk L(RETURN_ATAN);;
- }
-L(ATAN_SPECIAL_HANDLING):
-
- { .mmf
-(p0) addl r39 = @ltoff(Constants_atan#), gp
- nop.m 999
-(p0) fcmp.lt.s0 p0,p7 = f8,f1
- }
-;;
-
-//
-// Raise denormal operand faults if necessary
-//
-
-{ .mfi
- ld8 r39 = [r39]
-(p0) fcmp.lt.s0 p0,p6 = f9,f1
- nop 999;; // EMbo added ...
-}
-;;
-
-
-
-{ .mfi
- nop 999 // EMbo added ...
-(p0) fclass.m.unc p6,p7 = f32,0x007
- nop 999;; // EMbo added ...
- } { .mlx
- nop 999 // EMbo added ...
-(p0) movl r47 = 992;;
- } { .mib
-(p0) add r39 = r39, r47
- nop 999 // EMbo added ...
-(p7) br.cond.sptk L(ATAN_ArgY_Not_ZERO);;
- } { .mfi
- nop 999 // EMbo added ...
-(p6) fclass.m.unc p14,p0 = f33,0x035
- nop 999 // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p6) fclass.m.unc p15,p0 = f33,0x036
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p6) fclass.m.unc p13,p0 = f33,0x007
- nop 999 // EMbo added ...
- } { .mfi
-(p0) ldfd f56 = [r39],8
- nop 999 // EMbo added ...
- nop 999;; // EMbo added ...
- } { .mfi
-(p0) ldfd f55 = [r39],-8
-(p14) fmerge.s f56 = f32,f0
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// Return sign_Y * 0 when Y = +/-0 and X > 0
-//
-(p14) fmerge.s f55 = f32,f0
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p15) fmerge.s f56 = f32,f56
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// Return sign_Y * PI when X < -0
-//
-//
-(p15) fmerge.s f55 = f32,f55
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fadd.d.s0 f57 = f56,f55
- nop.i 0
- } { .bbb
-//
-// Call error support function for atan(0,0)
-// - expected value already computed.
-//
- nop.b 0
- nop.b 0
-(p0) br.cond.sptk L(RETURN_ATAN)
- }
-L(ATAN_ArgY_Not_ZERO):
- { .mfi
- nop 999 // EMbo added ...
-(p0) fclass.m.unc p9,p10 = f32,0x023
- nop 999;; // EMbo added ...
- } { .mfb
- nop 999 // EMbo added ...
-(p9) fclass.m.unc p6,p0 = f33,0x017
-(p10) br.cond.sptk L(ATAN_ArgY_Not_INF);;
- } { .mfi
-(p6) add r39 = 16,r39
-(p9) fclass.m.unc p7,p0 = f33,0x021
- nop 999;; // EMbo added ...
- } { .mmf
- nop 999 // EMbo added ...
-(p0) ldfd f56 = [r39],8
-(p9) fclass.m.unc p8,p0 = f33,0x022;;
- } { .mbb
-(p0) ldfd f55 = [r39],-8
- nop 999 // EMbo added ...
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p6) fmerge.s f56 = f32,f56
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p6) fmerge.s f55 = f32,f55
- nop 999;; // EMbo added ...
- } { .mfb
- nop 999 // EMbo added ...
-//
-// Load I/2 and adjust its sign.
-// Return +I/2 when ArgY = +Inf and ArgX = +/-0,normal
-// Return -I/2 when ArgY = -Inf and ArgX = +/-0,normal
-//
-(p6) fadd.d.s0 f57 = f56, f55
-(p6) br.cond.sptk L(RETURN_ATAN);;
- } { .mmi
-(p7) add r39 = 32,r39;;
-(p7) ldfd f56 = [r39],8
- nop 999;; // EMbo added ...
- } { .mmi
- nop 999;; // EMbo added ...
-(p7) ldfd f55 = [r39],-8
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p7) fmerge.s f56 = f32,f56
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p7) fmerge.s f55 = f32,f55
- nop 999;; // EMbo added ...
- } { .mfb
- nop 999 // EMbo added ...
-//
-// Load PI/4 and adjust its sign.
-// Return +PI/4 when ArgY = +Inf and ArgX = +Inf
-// Return -PI/4 when ArgY = -Inf and ArgX = +Inf
-//
-(p7) fadd.d.s0 f57 = f56, f55
-(p7) br.cond.sptk L(RETURN_ATAN);;
- } { .mmi
-(p8) add r39 = 48,r39;;
-(p8) ldfd f56 =[r39],8
- nop 999;; // EMbo added ...
- } { .mmi
- nop 999;; // EMbo added ...
-(p8) ldfd f55 =[r39],-8
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p8) fmerge.s f56 = f32,f56
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p8) fmerge.s f55 = f32,f55
- nop 999;; // EMbo added ...
- } { .mfb
- nop 999 // EMbo added ...
-//
-// Load I/4 and adjust its sign.
-// Return +3I/4 when ArgY = +Inf and ArgX = -Inf
-// Return -3I/4 when ArgY = -Inf and ArgX = -Inf
-//
-(p8) fadd.d.s0 f57 = f56, f55
-(p8) br.cond.sptk L(RETURN_ATAN);;
- }
-L(ATAN_ArgY_Not_INF):
- { .mfi
- nop 999 // EMbo added ...
-(p0) fclass.m.unc p6,p0 = f33,0x007
- nop 999 // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fclass.m.unc p7,p0 = f33,0x021
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p0) fclass.m.unc p8,p0 = f33,0x022
-(p6) add r39 = 16,r39;;
- } { .mfi
-(p6) ldfd f56 =[r39],8
- nop 999 // EMbo added ...
- nop 999;; // EMbo added ...
- } { .mmi
- nop 999;; // EMbo added ...
-(p6) ldfd f55 =[r39],-8
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p6) fmerge.s f56 = f32,f56
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p6) fmerge.s f55 = f32,f55
- nop 999;; // EMbo added ...
- } { .mfb
- nop 999 // EMbo added ...
-//
-// return = sign_Y * I/2 when ArgX = +/-0
-//
-(p6) fadd.d.s0 f57 = f56, f55
-(p6) br.cond.sptk L(RETURN_ATAN);;
- } { .mfi
- nop 999 // EMbo added ...
-(p7) fmerge.s f56 = f32,f0
- nop 999 // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p7) fmerge.s f55 = f32,f0
- nop 999;; // EMbo added ...
- } { .mfb
- nop 999 // EMbo added ...
-//
-// return = sign_Y * 0 when ArgX = Inf
-//
-(p7) fadd.d.s0 f57 = f56, f55
-(p7) br.cond.sptk L(RETURN_ATAN);;
- } { .mfi
-(p8) ldfd f56 = [r39],8
- nop 999 // EMbo added ...
- nop 999;; // EMbo added ...
- } { .mmi
- nop 999;; // EMbo added ...
-(p8) ldfd f55 = [r39],-8
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p8) fmerge.s f56 = f32,f56
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-(p8) fmerge.s f55 = f32,f55
- nop 999;; // EMbo added ...
- } { .mfi
- nop 999 // EMbo added ...
-//
-// return = sign_Y * I when ArgX = -Inf
-//
-(p8) fadd.d.s0 f57 = f56, f55
- nop 999 // EMbo added ...
- };;
-L(RETURN_ATAN):
-// mov f8 = f57 ;;
-// The answer is in f57.
-// But Z_hi is f56
-// Z_lo is f55
-// s_Y is f34
-// W is in f9 and untouched
-
-{ .mfi
- nop 999
-mov f8 = f56
- nop.i 0
-};;
-
-{ .mfi
- nop 999
-mov f10 = f55
- nop.i 999
-}
-{ .mfb
- nop 999
-mov f11 = f34
-br.ret.sptk b0
-};;
-
-.endp __libm_atan2_reg
-ASM_SIZE_DIRECTIVE(__libm_atan2_reg)
diff --git a/sysdeps/ia64/fpu/libm_error.c b/sysdeps/ia64/fpu/libm_error.c
index ebbaad02ad..42ca36d98f 100644
--- a/sysdeps/ia64/fpu/libm_error.c
+++ b/sysdeps/ia64/fpu/libm_error.c
@@ -1,9 +1,10 @@
-//
-// Copyright (C) 2000, 2001, Intel Corporation
+/* file: libm_error.c */
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story, James
-// Edwards, and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -19,14 +20,15 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
+
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
@@ -34,19 +36,39 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 2/02/00: Initial version
-// 3/22/00: Updated to support flexible and dynamic error handling.
-// 8/16/00: Changed all matherr function-calls to use the pmatherr
+// 3/22/00: Updated to support flexible and dynamic error handling.
+// 8/16/00: Changed all matherr function-calls to use the pmatherr
// function-pointers.
// 10/03/00: Corrected a scalb type.
// 11/28/00: Changed INPUT_XL to INPUT_XD for scalb_underflow case.
// 12/07/00: Added code to make scalbn error support equivalent to ldexp.
// 2/07/01: Added __declspec(align(16)) to long double constants to correct
// alignment problem.
+// 4/23/01: Added code for remquo
+// 6/07/01: Added code for fdim, lrint, lround, llrint, llround
+// Deleted code for remquo
+// 8/15/01: Added code for scalbln, nexttoward
+// 12/10/01: Added code for erfc
+// 12/27/01: Added code for degree argument functions
+// 01/02/02: Added code for tand, cotd
+// 01/15/02: Corrected SVID/XOPEN code for log1p, pow, and acosh
+// 01/25/02: Corrected ISOC for lgamma and gamma to return EDOM for neg ints
+// 01/28/02: Corrected SVID/XOPEN stderr message for log2
+// 05/20/02: Added code for cot
+// 07/01/02: Added code for sinhcosh
+// 10/04/02: Underflow detection in ISOC path redefined to
+// be zero rather than tiny and inexact
+// 12/06/02: Added code for annuity and compound
+// 01/30/03: Corrected test for underflow in ISOC path to not set denormal
+// 04/10/03: Corrected ISOC branch for gamma/lgamma to return ERANGE for neg ints.
+// Added code for tgamma
+// 04/11/03: Corrected POSIX/SVID/XOPEN branches for gamma/lgamma
+// to return EDOM for neg ints.
//
#include <errno.h>
@@ -54,38 +76,41 @@
#include <stdlib.h>
#include "libm_support.h"
-#ifndef _LIBC
+#ifdef _LIBC
+# define pmatherr matherr
+# define pmatherrf matherrf
+# define pmatherrl matherrl
+#else
_LIB_VERSION_TYPE
#if defined( __POSIX__ )
-_LIB_VERSION = _POSIX_;
+_LIB_VERSIONIMF = _POSIX_;
#elif defined( __XOPEN__ )
-_LIB_VERSION = _XOPEN_;
+_LIB_VERSIONIMF = _XOPEN_;
#elif defined( __SVID__ )
-_LIB_VERSION = _SVID_;
+_LIB_VERSIONIMF = _SVID_;
#elif defined( __IEEE__ )
-_LIB_VERSION = _IEEE_;
+_LIB_VERSIONIMF = _IEEE_;
#else
-_LIB_VERSION = _ISOC_;
-#endif
+_LIB_VERSIONIMF = _ISOC_;
#endif
/************************************************************/
/* matherrX function pointers and setusermatherrX functions */
/************************************************************/
-#if 0
int (*pmatherrf)(struct exceptionf*) = MATHERR_F;
int (*pmatherr)(struct EXC_DECL_D*) = MATHERR_D;
int (*pmatherrl)(struct exceptionl*) = matherrl;
void __libm_setusermatherrf( int(*user_merrf)(struct exceptionf*) )
-{ pmatherrf = ( (user_merrf==NULL)? (MATHERR_F) : (user_merrf) ); }
+{ pmatherrf = ( (user_merrf==NULL)? (MATHERR_F) : (user_merrf) ); }
void __libm_setusermatherr( int(*user_merr)(struct EXC_DECL_D*) )
-{ pmatherr = ( (user_merr==NULL)? (MATHERR_D) : (user_merr) ); }
+{ pmatherr = ( (user_merr==NULL)? (MATHERR_D) : (user_merr) ); }
void __libm_setusermatherrl( int(*user_merrl)(struct exceptionl*) )
-{ pmatherrl = ( (user_merrl==NULL)? (matherrl) : (user_merrl) ); }
-#endif
+{ pmatherrl = ( (user_merrl==NULL)? (matherrl) : (user_merrl) ); }
+
+#endif /* !_LIBC */
/***********************************************/
/* error-handling function, libm_error_support */
@@ -93,22 +118,27 @@ void __libm_setusermatherrl( int(*user_merrl)(struct exceptionl*) )
void __libm_error_support(void *arg1,void *arg2,void *retval,error_types input_tag)
{
-
# ifdef __cplusplus
struct __exception exc;
-# else
+# else
struct exception exc;
-# endif
+# endif
struct exceptionf excf;
struct exceptionl excl;
-# if defined opensource || defined _LIBC
+# if defined(__GNUC__)
+#define ALIGNIT __attribute__ ((__aligned__ (16)))
+# elif defined opensource
#define ALIGNIT
-#define ALIGNATTR __attribute__ ((__aligned__ (16)))
# else
#define ALIGNIT __declspec(align(16))
-#define ALIGNATTR
+# endif
+
+# ifdef SIZE_LONG_INT_64
+#define __INT_64__ signed long
+# else
+#define __INT_64__ __int64
# endif
const char float_inf[4] = {0x00,0x00,0x80,0x7F};
@@ -118,66 +148,74 @@ const char float_neg_inf[4] = {0x00,0x00,0x80,0xFF};
const char float_neg_huge[4] = {0xFF,0xFF,0x7F,0xFF};
const char float_neg_zero[4] = {0x00,0x00,0x00,0x80};
ALIGNIT
-const char double_inf[8] ALIGNATTR = {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0x7F};
+const char double_inf[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0x7F};
+#if 0 /* unused */
ALIGNIT
-//const char double_huge[8] ALIGNATTR = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xEF,0x7F};
+const char double_huge[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xEF,0x7F};
+#endif
ALIGNIT
-const char double_zero[8] ALIGNATTR = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
+const char double_zero[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
ALIGNIT
-const char double_neg_inf[8] ALIGNATTR = {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0xFF};
+const char double_neg_inf[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0xFF};
+#if 0 /* unused */
ALIGNIT
-//const char double_neg_huge[8] ALIGNATTR = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xEF,0xFF};
+const char double_neg_huge[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xEF,0xFF};
+#endif
ALIGNIT
-const char double_neg_zero[8] ALIGNATTR = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80};
+const char double_neg_zero[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80};
ALIGNIT
-const char long_double_inf[16] ALIGNATTR = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0xFF,0x7F,0x00,0x00,0x00,0x00,0x00,0x00};
+const char long_double_inf[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0xFF,0x7F,0x00,0x00,0x00,0x00,0x00,0x00};
+#if 0 /* unused */
ALIGNIT
-//const char long_double_huge[16] ALIGNATTR = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0x7F,0x00,0x00,0x00,0x00,0x00,0x00};
+const char long_double_huge[16] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0x7F,0x00,0x00,0x00,0x00,0x00,0x00};
+#endif
ALIGNIT
-const char long_double_zero[16] ALIGNATTR = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
+const char long_double_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
ALIGNIT
-const char long_double_neg_inf[16] ALIGNATTR = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00};
+const char long_double_neg_inf[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00};
+#if 0 /* unused */
ALIGNIT
-//const char long_double_neg_huge[16] ALIGNATTR = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, 0xFE,0xFF,0x00,0x00,0x00,0x00,0x00,0x00};
+const char long_double_neg_huge[16] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0xFF,0x00,0x00,0x00,0x00,0x00,0x00};
+#endif
ALIGNIT
-const char long_double_neg_zero[16] ALIGNATTR = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x80,0x00,0x00,0x00,0x00,0x00,0x00};
+const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00,0x00};
-#define RETVAL_HUGE_VALL *(long double *)retval = *(long double *)long_double_inf
-#define RETVAL_NEG_HUGE_VALL *(long double *)retval = *(long double *)long_double_neg_inf
-#define RETVAL_HUGEL *(long double *)retval = (long double)*(float *)float_huge
-#define RETVAL_NEG_HUGEL *(long double *)retval =(long double)*(float*)float_neg_huge
+#define RETVAL_HUGE_VALL *(long double *)retval = *(long double *)long_double_inf
+#define RETVAL_NEG_HUGE_VALL *(long double *)retval = *(long double *)long_double_neg_inf
+#define RETVAL_HUGEL *(long double *)retval = (long double)*(float *)float_huge
+#define RETVAL_NEG_HUGEL *(long double *)retval =(long double)*(float*)float_neg_huge
#define RETVAL_HUGE_VALD *(double *)retval = *(double *) double_inf
#define RETVAL_NEG_HUGE_VALD *(double *)retval = *(double *) double_neg_inf
#define RETVAL_HUGED *(double *)retval = (double) *(float *)float_huge
-#define RETVAL_NEG_HUGED *(double *)retval = (double) *(float *) float_neg_huge
+#define RETVAL_NEG_HUGED *(double *)retval = (double) *(float *) float_neg_huge
#define RETVAL_HUGE_VALF *(float *)retval = *(float *) float_inf
#define RETVAL_NEG_HUGE_VALF *(float *)retval = *(float *) float_neg_inf
#define RETVAL_HUGEF *(float *)retval = *(float *) float_huge
-#define RETVAL_NEG_HUGEF *(float *)retval = *(float *) float_neg_huge
+#define RETVAL_NEG_HUGEF *(float *)retval = *(float *) float_neg_huge
-#define RETVAL_ZEROL *(long double *)retval = *(long double *)long_double_zero
-#define RETVAL_ZEROD *(double *)retval = *(double *)double_zero
-#define RETVAL_ZEROF *(float *)retval = *(float *)float_zero
+#define RETVAL_ZEROL *(long double *)retval = *(long double *)long_double_zero
+#define RETVAL_ZEROD *(double *)retval = *(double *)double_zero
+#define RETVAL_ZEROF *(float *)retval = *(float *)float_zero
-#define RETVAL_NEG_ZEROL *(long double *)retval = *(long double *)long_double_neg_zero
-#define RETVAL_NEG_ZEROD *(double *)retval = *(double *)double_neg_zero
-#define RETVAL_NEG_ZEROF *(float *)retval = *(float *)float_neg_zero
+#define RETVAL_NEG_ZEROL *(long double *)retval = *(long double *)long_double_neg_zero
+#define RETVAL_NEG_ZEROD *(double *)retval = *(double *)double_neg_zero
+#define RETVAL_NEG_ZEROF *(float *)retval = *(float *)float_neg_zero
-#define RETVAL_ONEL *(long double *)retval = (long double) 1.0
-#define RETVAL_ONED *(double *)retval = 1.0
-#define RETVAL_ONEF *(float *)retval = 1.0f
+#define RETVAL_ONEL *(long double *)retval = (long double) 1.0
+#define RETVAL_ONED *(double *)retval = 1.0
+#define RETVAL_ONEF *(float *)retval = 1.0f
-#define NOT_MATHERRL excl.arg1=*(long double *)arg1;excl.arg2=*(long double *)arg2;excl.retval=*(long double *)retval;if(!matherrl(&excl))
-#define NOT_MATHERRD exc.arg1=*(double *)arg1;exc.arg2=*(double *)arg2;exc.retval=*(double *)retval;if(!MATHERR_D(&exc))
-#define NOT_MATHERRF excf.arg1=*(float *)arg1;excf.arg2=*(float *)arg2;excf.retval=*(float *)retval;if(!MATHERR_F(&excf))
+#define NOT_MATHERRL excl.arg1=*(long double *)arg1;excl.arg2=*(long double *)arg2;excl.retval=*(long double *)retval;if(!pmatherrl(&excl))
+#define NOT_MATHERRD exc.arg1=*(double *)arg1;exc.arg2=*(double *)arg2;exc.retval=*(double *)retval;if(!pmatherr(&exc))
+#define NOT_MATHERRF excf.arg1=*(float *)arg1;excf.arg2=*(float *)arg2;excf.retval=*(float *)retval;if(!pmatherrf(&excf))
-#define ifSVID if(_LIB_VERSION==_SVID_)
+#define ifSVID if(_LIB_VERSIONIMF==_SVID_)
-#define NAMEL excl.name
-#define NAMED exc.name
-#define NAMEF excf.name
+#define NAMEL excl.name
+#define NAMED exc.name
+#define NAMEF excf.name
//
// These should work OK for MS because they are ints -
@@ -192,28 +230,28 @@ const char long_double_neg_zero[16] ALIGNATTR = {0x00,0x00,0x00,0x00,0x00,0x00,0
#define PLOSS 6
#define SINGL excl.type = SING
-#define DOMAINL excl.type = DOMAIN
-#define OVERFLOWL excl.type = OVERFLOW
-#define UNDERFLOWL excl.type = UNDERFLOW
-#define TLOSSL excl.type = TLOSS
+#define DOMAINL excl.type = DOMAIN
+#define OVERFLOWL excl.type = OVERFLOW
+#define UNDERFLOWL excl.type = UNDERFLOW
+#define TLOSSL excl.type = TLOSS
#define SINGD exc.type = SING
-#define DOMAIND exc.type = DOMAIN
-#define OVERFLOWD exc.type = OVERFLOW
-#define UNDERFLOWD exc.type = UNDERFLOW
-#define TLOSSD exc.type = TLOSS
+#define DOMAIND exc.type = DOMAIN
+#define OVERFLOWD exc.type = OVERFLOW
+#define UNDERFLOWD exc.type = UNDERFLOW
+#define TLOSSD exc.type = TLOSS
#define SINGF excf.type = SING
-#define DOMAINF excf.type = DOMAIN
-#define OVERFLOWF excf.type = OVERFLOW
-#define UNDERFLOWF excf.type = UNDERFLOW
-#define TLOSSF excf.type = TLOSS
+#define DOMAINF excf.type = DOMAIN
+#define OVERFLOWF excf.type = OVERFLOW
+#define UNDERFLOWF excf.type = UNDERFLOW
+#define TLOSSF excf.type = TLOSS
#define INPUT_XL (excl.arg1=*(long double*)arg1)
#define INPUT_XD (exc.arg1=*(double*)arg1)
#define INPUT_XF (excf.arg1=*(float*)arg1)
-#define INPUT_YL (excl.arg1=*(long double*)arg2)
-#define INPUT_YD (exc.arg1=*(double*)arg2)
-#define INPUT_YF (excf.arg1=*(float*)arg2)
-#define INPUT_RESL (*(long double *)retval)
+#define INPUT_YL (excl.arg2=*(long double*)arg2)
+#define INPUT_YD (exc.arg2=*(double*)arg2)
+#define INPUT_YF (excf.arg2=*(float*)arg2)
+#define INPUT_RESL (*(long double *)retval)
#define INPUT_RESD (*(double *)retval)
#define INPUT_RESF (*(float *)retval)
@@ -248,11 +286,17 @@ const char long_double_neg_zero[16] ALIGNATTR = {0x00,0x00,0x00,0x00,0x00,0x00,0
#define WRITED_LOG1P_NEGATIVE fputs("log1p: DOMAIN error\n",stderr)
#define WRITEF_LOG1P_NEGATIVE fputs("log1pf: DOMAIN error\n",stderr)
#define WRITEL_LOG10_ZERO fputs("log10l: SING error\n",stderr)
-#define WRITED_LOG10_ZERO fputs("log10: SING error\n",stderr)
+#define WRITED_LOG10_ZERO fputs("log10: SING error\n",stderr)
#define WRITEF_LOG10_ZERO fputs("log10f: SING error\n",stderr)
#define WRITEL_LOG10_NEGATIVE fputs("log10l: DOMAIN error\n",stderr)
#define WRITED_LOG10_NEGATIVE fputs("log10: DOMAIN error\n",stderr)
#define WRITEF_LOG10_NEGATIVE fputs("log10f: DOMAIN error\n",stderr)
+#define WRITEL_LOG2_ZERO fputs("log2l: SING error\n",stderr)
+#define WRITED_LOG2_ZERO fputs("log2: SING error\n",stderr)
+#define WRITEF_LOG2_ZERO fputs("log2f: SING error\n",stderr)
+#define WRITEL_LOG2_NEGATIVE fputs("log2l: DOMAIN error\n",stderr)
+#define WRITED_LOG2_NEGATIVE fputs("log2: DOMAIN error\n",stderr)
+#define WRITEF_LOG2_NEGATIVE fputs("log2f: DOMAIN error\n",stderr)
#define WRITEL_POW_ZERO_TO_ZERO fputs("powl(0,0): DOMAIN error\n",stderr)
#define WRITED_POW_ZERO_TO_ZERO fputs("pow(0,0): DOMAIN error\n",stderr)
#define WRITEF_POW_ZERO_TO_ZERO fputs("powf(0,0): DOMAIN error\n",stderr)
@@ -295,6 +339,9 @@ const char long_double_neg_zero[16] ALIGNATTR = {0x00,0x00,0x00,0x00,0x00,0x00,0
#define WRITEL_GAMMA_NEGATIVE fputs("gammal: SING error\n",stderr)
#define WRITED_GAMMA_NEGATIVE fputs("gamma: SING error\n",stderr)
#define WRITEF_GAMMA_NEGATIVE fputs("gammaf: SING error\n",stderr)
+#define WRITEL_TGAMMA_NEGATIVE fputs("tgammal: DOMAIN error\n",stderr)
+#define WRITED_TGAMMA_NEGATIVE fputs("tgamma: DOMAIN error\n",stderr)
+#define WRITEF_TGAMMA_NEGATIVE fputs("tgammaf: DOMAIN error\n",stderr)
#define WRITEL_J0_TLOSS fputs("j0l: TLOSS error\n",stderr)
#define WRITEL_Y0_TLOSS fputs("y0l: TLOSS error\n",stderr)
#define WRITEL_J1_TLOSS fputs("j1l: TLOSS error\n",stderr)
@@ -313,16 +360,26 @@ const char long_double_neg_zero[16] ALIGNATTR = {0x00,0x00,0x00,0x00,0x00,0x00,0
#define WRITEF_Y1_TLOSS fputs("y1f: TLOSS error\n",stderr)
#define WRITEF_JN_TLOSS fputs("jnf: TLOSS error\n",stderr)
#define WRITEF_YN_TLOSS fputs("ynf: TLOSS error\n",stderr)
+#define WRITEL_ACOSD fputs("acosdl: DOMAIN error\n",stderr)
+#define WRITED_ACOSD fputs("acosd: DOMAIN error\n",stderr)
+#define WRITEF_ACOSD fputs("acosdf: DOMAIN error\n",stderr)
+#define WRITEL_ASIND fputs("asindl: DOMAIN error\n",stderr)
+#define WRITED_ASIND fputs("asind: DOMAIN error\n",stderr)
+#define WRITEF_ASIND fputs("asindf: DOMAIN error\n",stderr)
+#define WRITEL_ATAN2D_ZERO_BY_ZERO fputs("atan2dl: DOMAIN error\n",stderr)
+#define WRITED_ATAN2D_ZERO_BY_ZERO fputs("atan2d: DOMAIN error\n",stderr)
+#define WRITEF_ATAN2D_ZERO_BY_ZERO fputs("atan2df: DOMAIN error\n",stderr)
+
/***********************/
/* IEEE Path */
/***********************/
-if(_LIB_VERSION==_IEEE_) return;
+if(_LIB_VERSIONIMF==_IEEE_) return;
/***********************/
/* C9X Path */
/***********************/
-else if(_LIB_VERSION==_ISOC_)
+else if(_LIB_VERSIONIMF==_ISOC_)
{
switch(input_tag)
{
@@ -339,80 +396,146 @@ else if(_LIB_VERSION==_ISOC_)
case log1p_zero:
case log1pf_zero:
case powl_overflow:
- case pow_overflow:
- case powf_overflow:
- case powl_underflow:
- case pow_underflow:
- case powf_underflow:
+ case pow_overflow:
+ case powf_overflow:
case expl_overflow:
- case exp_overflow:
- case expf_overflow:
- case expl_underflow:
- case exp_underflow:
- case expf_underflow:
+ case exp_overflow:
+ case expf_overflow:
case exp2l_overflow:
- case exp2_overflow:
- case exp2f_overflow:
- case exp2l_underflow:
- case exp2_underflow:
- case exp2f_underflow:
+ case exp2_overflow:
+ case exp2f_overflow:
case exp10l_overflow:
- case exp10_overflow:
- case exp10f_overflow:
+ case exp10_overflow:
+ case exp10f_overflow:
case expm1l_overflow:
- case expm1_overflow:
- case expm1f_overflow:
+ case expm1_overflow:
+ case expm1f_overflow:
case hypotl_overflow:
case hypot_overflow:
case hypotf_overflow:
- case sinhl_overflow:
- case sinh_overflow:
- case sinhf_overflow:
- case atanhl_eq_one:
- case atanh_eq_one:
- case atanhf_eq_one:
+ case sinhl_overflow:
+ case sinh_overflow:
+ case sinhf_overflow:
+ case atanhl_eq_one:
+ case atanh_eq_one:
+ case atanhf_eq_one:
case scalbl_overflow:
case scalb_overflow:
case scalbf_overflow:
- case scalbl_underflow:
- case scalb_underflow:
- case scalbf_underflow:
case coshl_overflow:
case cosh_overflow:
case coshf_overflow:
case nextafterl_overflow:
case nextafter_overflow:
case nextafterf_overflow:
+ case nexttowardl_overflow:
+ case nexttoward_overflow:
+ case nexttowardf_overflow:
case scalbnl_overflow:
case scalbn_overflow:
case scalbnf_overflow:
- case scalbnl_underflow:
- case scalbn_underflow:
- case scalbnf_underflow:
+ case scalblnl_overflow:
+ case scalbln_overflow:
+ case scalblnf_overflow:
case ldexpl_overflow:
case ldexp_overflow:
case ldexpf_overflow:
- case ldexpl_underflow:
- case ldexp_underflow:
- case ldexpf_underflow:
case lgammal_overflow:
case lgamma_overflow:
case lgammaf_overflow:
- case lgammal_negative:
- case lgamma_negative:
- case lgammaf_negative:
case gammal_overflow:
case gamma_overflow:
case gammaf_overflow:
+ case lgammal_negative:
+ case lgamma_negative:
+ case lgammaf_negative:
case gammal_negative:
case gamma_negative:
case gammaf_negative:
case ilogbl_zero:
- case ilogb_zero:
+ case ilogb_zero:
case ilogbf_zero:
+ case fdiml_overflow:
+ case fdim_overflow:
+ case fdimf_overflow:
+ case llrintl_large:
+ case llrint_large:
+ case llrintf_large:
+ case llroundl_large:
+ case llround_large:
+ case llroundf_large:
+ case lrintl_large:
+ case lrint_large:
+ case lrintf_large:
+ case lroundl_large:
+ case lround_large:
+ case lroundf_large:
+ case tandl_overflow:
+ case tand_overflow:
+ case tandf_overflow:
+ case cotdl_overflow:
+ case cotd_overflow:
+ case cotdf_overflow:
+ case cotl_overflow:
+ case cot_overflow:
+ case cotf_overflow:
+ case sinhcoshl_overflow:
+ case sinhcosh_overflow:
+ case sinhcoshf_overflow:
+ case annuityl_overflow:
+ case annuity_overflow:
+ case annuityf_overflow:
+ case compoundl_overflow:
+ case compound_overflow:
+ case compoundf_overflow:
+ case tgammal_overflow:
+ case tgamma_overflow:
+ case tgammaf_overflow:
{
ERRNO_RANGE; break;
}
+ case powl_underflow:
+ case expl_underflow:
+ case exp2l_underflow:
+ case scalbl_underflow:
+ case scalbnl_underflow:
+ case scalblnl_underflow:
+ case ldexpl_underflow:
+ case erfcl_underflow:
+ case annuityl_underflow:
+ case compoundl_underflow:
+ {
+ if ( *(__INT_64__*)retval == 0 ) ERRNO_RANGE;
+ break;
+ }
+ case pow_underflow:
+ case exp_underflow:
+ case exp2_underflow:
+ case scalb_underflow:
+ case scalbn_underflow:
+ case scalbln_underflow:
+ case ldexp_underflow:
+ case erfc_underflow:
+ case annuity_underflow:
+ case compound_underflow:
+ {
+ if ( ((*(__INT_64__*)retval)<<1) == 0 ) ERRNO_RANGE;
+ break;
+ }
+ case powf_underflow:
+ case expf_underflow:
+ case exp2f_underflow:
+ case scalbf_underflow:
+ case scalbnf_underflow:
+ case scalblnf_underflow:
+ case ldexpf_underflow:
+ case erfcf_underflow:
+ case annuityf_underflow:
+ case compoundf_underflow:
+ {
+ if ( ((*(__INT_64__*)retval)<<33) == 0 ) ERRNO_RANGE;
+ break;
+ }
case logl_negative:
case log_negative:
case logf_negative:
@@ -440,17 +563,17 @@ else if(_LIB_VERSION==_ISOC_)
case fmodl_by_zero:
case fmod_by_zero:
case fmodf_by_zero:
- case atanhl_gt_one:
- case atanh_gt_one:
- case atanhf_gt_one:
- case acosl_gt_one:
- case acos_gt_one:
- case acosf_gt_one:
- case asinl_gt_one:
- case asin_gt_one:
- case asinf_gt_one:
+ case atanhl_gt_one:
+ case atanh_gt_one:
+ case atanhf_gt_one:
+ case acosl_gt_one:
+ case acos_gt_one:
+ case acosf_gt_one:
+ case asinl_gt_one:
+ case asin_gt_one:
+ case asinf_gt_one:
case logbl_zero:
- case logb_zero:
+ case logb_zero:
case logbf_zero:
case acoshl_lt_one:
case acosh_lt_one:
@@ -473,6 +596,30 @@ else if(_LIB_VERSION==_ISOC_)
case ynl_negative:
case yn_negative:
case ynf_negative:
+ case acosdl_gt_one:
+ case acosd_gt_one:
+ case acosdf_gt_one:
+ case asindl_gt_one:
+ case asind_gt_one:
+ case asindf_gt_one:
+ case atan2dl_zero:
+ case atan2d_zero:
+ case atan2df_zero:
+ case annuityl_by_zero:
+ case annuity_by_zero:
+ case annuityf_by_zero:
+ case annuityl_less_m1:
+ case annuity_less_m1:
+ case annuityf_less_m1:
+ case compoundl_by_zero:
+ case compound_by_zero:
+ case compoundf_by_zero:
+ case compoundl_less_m1:
+ case compound_less_m1:
+ case compoundf_less_m1:
+ case tgammal_negative:
+ case tgamma_negative:
+ case tgammaf_negative:
{
ERRNO_DOMAIN; break;
}
@@ -486,31 +633,37 @@ else if(_LIB_VERSION==_ISOC_)
/* _POSIX_ Path */
/***********************/
-else if(_LIB_VERSION==_POSIX_)
+else if(_LIB_VERSIONIMF==_POSIX_)
{
switch(input_tag)
{
case gammal_overflow:
case lgammal_overflow:
+ case tgammal_overflow:
{
RETVAL_HUGE_VALL; ERRNO_RANGE; break;
}
case gamma_overflow:
case lgamma_overflow:
+ case tgamma_overflow:
{
RETVAL_HUGE_VALD; ERRNO_RANGE; break;
}
case gammaf_overflow:
case lgammaf_overflow:
+ case tgammaf_overflow:
{
RETVAL_HUGE_VALF; ERRNO_RANGE; break;
}
case gammal_negative:
- case gamma_negative:
- case gammaf_negative:
case lgammal_negative:
+ case gamma_negative:
case lgamma_negative:
+ case gammaf_negative:
case lgammaf_negative:
+ case tgammal_negative:
+ case tgamma_negative:
+ case tgammaf_negative:
{
ERRNO_DOMAIN; break;
}
@@ -526,38 +679,56 @@ switch(input_tag)
case scalbn_underflow:
case scalbnf_overflow:
case scalbnf_underflow:
+ case scalblnl_overflow:
+ case scalblnl_underflow:
+ case scalbln_overflow:
+ case scalbln_underflow:
+ case scalblnf_overflow:
+ case scalblnf_underflow:
+ case tandl_overflow:
+ case tand_overflow:
+ case tandf_overflow:
+ case cotdl_overflow:
+ case cotd_overflow:
+ case cotdf_overflow:
+ case cotl_overflow:
+ case cot_overflow:
+ case cotf_overflow:
+ case sinhcoshl_overflow:
+ case sinhcosh_overflow:
+ case sinhcoshf_overflow:
{
ERRNO_RANGE; break;
}
- case atanhl_gt_one:
- case atanhl_eq_one:
+ case atanhl_gt_one:
+ case atanhl_eq_one:
/* atanhl(|x| >= 1) */
{
ERRNO_DOMAIN; break;
}
- case atanh_gt_one:
- case atanh_eq_one:
+ case atanh_gt_one:
+ case atanh_eq_one:
/* atanh(|x| >= 1) */
{
ERRNO_DOMAIN; break;
}
- case atanhf_gt_one:
- case atanhf_eq_one:
+ case atanhf_gt_one:
+ case atanhf_eq_one:
/* atanhf(|x| >= 1) */
{
ERRNO_DOMAIN; break;
}
- case sqrtl_negative:
+ case sqrtl_negative:
/* sqrtl(x < 0) */
{
ERRNO_DOMAIN; break;
}
- case sqrt_negative:
+ case sqrt_negative:
/* sqrt(x < 0) */
{
ERRNO_DOMAIN; break;
}
- case sqrtf_negative:
+ case sqrtf_negative:
/* sqrtf(x < 0) */
{
ERRNO_DOMAIN; break;
@@ -606,7 +777,7 @@ switch(input_tag)
/* yn(x < 0) */
{
RETVAL_NEG_HUGE_VALD; ERRNO_DOMAIN; break;
- }
+ }
case y0f_negative:
case y1f_negative:
case ynf_negative:
@@ -615,10 +786,11 @@ switch(input_tag)
/* ynf(x < 0) */
{
RETVAL_NEG_HUGE_VALF; ERRNO_DOMAIN; break;
- }
+ }
case logl_zero:
case log1pl_zero:
case log10l_zero:
+ case log2l_zero:
/* logl(0) */
/* log1pl(0) */
/* log10l(0) */
@@ -628,7 +800,7 @@ switch(input_tag)
case log_zero:
case log1p_zero:
case log10_zero:
- case log2l_zero:
+ case log2_zero:
/* log(0) */
/* log1p(0) */
/* log10(0) */
@@ -638,6 +810,7 @@ switch(input_tag)
case logf_zero:
case log1pf_zero:
case log10f_zero:
+ case log2f_zero:
/* logf(0) */
/* log1pf(0) */
/* log10f(0) */
@@ -652,6 +825,9 @@ switch(input_tag)
/* log1pl(x < 0) */
/* log10l(x < 0) */
{
+#ifndef _LIBC
+ RETVAL_NEG_HUGE_VALL;
+#endif
ERRNO_DOMAIN; break;
}
case log_negative:
@@ -662,8 +838,11 @@ switch(input_tag)
/* log1p(x < 0) */
/* log10(x < 0) */
{
+#ifndef _LIBC
+ RETVAL_NEG_HUGE_VALD;
+#endif
ERRNO_DOMAIN; break;
- }
+ }
case logf_negative:
case log1pf_negative:
case log10f_negative:
@@ -672,34 +851,46 @@ switch(input_tag)
/* log1pf(x < 0) */
/* log10f(x < 0) */
{
+#ifndef _LIBC
+ RETVAL_NEG_HUGE_VALF;
+#endif
ERRNO_DOMAIN; break;
- }
+ }
case expl_overflow:
+ case exp2l_overflow:
+ case exp10l_overflow:
/* expl overflow */
{
RETVAL_HUGE_VALL; ERRNO_RANGE; break;
}
case exp_overflow:
+ case exp2_overflow:
+ case exp10_overflow:
/* exp overflow */
{
RETVAL_HUGE_VALD; ERRNO_RANGE; break;
}
case expf_overflow:
+ case exp2f_overflow:
+ case exp10f_overflow:
/* expf overflow */
{
RETVAL_HUGE_VALF; ERRNO_RANGE; break;
}
case expl_underflow:
+ case exp2l_underflow:
/* expl underflow */
{
RETVAL_ZEROL; ERRNO_RANGE; break;
}
case exp_underflow:
+ case exp2_underflow:
/* exp underflow */
{
RETVAL_ZEROD; ERRNO_RANGE; break;
}
case expf_underflow:
+ case exp2f_underflow:
/* expf underflow */
{
RETVAL_ZEROF; ERRNO_RANGE; break;
@@ -750,13 +941,17 @@ switch(input_tag)
break;
}
case powl_overflow:
+ case annuityl_overflow:
+ case compoundl_overflow:
/* powl(x,y) overflow */
{
if (INPUT_RESL < 0) RETVAL_NEG_HUGE_VALL;
else RETVAL_HUGE_VALL;
- ERRNO_RANGE; break;
+ ERRNO_RANGE; break;
}
case pow_overflow:
+ case annuity_overflow:
+ case compound_overflow:
/* pow(x,y) overflow */
{
if (INPUT_RESD < 0) RETVAL_NEG_HUGE_VALD;
@@ -764,6 +959,8 @@ switch(input_tag)
ERRNO_RANGE; break;
}
case powf_overflow:
+ case annuityf_overflow:
+ case compoundf_overflow:
/* powf(x,y) overflow */
{
if (INPUT_RESF < 0) RETVAL_NEG_HUGE_VALF;
@@ -771,20 +968,41 @@ switch(input_tag)
ERRNO_RANGE; break;
}
case powl_underflow:
+ case annuityl_underflow:
+ case compoundl_underflow:
/* powl(x,y) underflow */
{
RETVAL_ZEROL; ERRNO_RANGE; break;
}
case pow_underflow:
+ case annuity_underflow:
+ case compound_underflow:
/* pow(x,y) underflow */
{
RETVAL_ZEROD; ERRNO_RANGE; break;
}
- case powf_underflow:
+ case powf_underflow:
+ case annuityf_underflow:
+ case compoundf_underflow:
/* powf(x,y) underflow */
{
RETVAL_ZEROF; ERRNO_RANGE; break;
}
+ case annuityl_by_zero:
+ case annuityl_less_m1:
+ case compoundl_by_zero:
+ case compoundl_less_m1:
+ case annuity_by_zero:
+ case annuity_less_m1:
+ case compound_by_zero:
+ case compound_less_m1:
+ case annuityf_by_zero:
+ case annuityf_less_m1:
+ case compoundf_by_zero:
+ case compoundf_less_m1:
+ {
+ ERRNO_DOMAIN; break;
+ }
case powl_zero_to_negative:
/* 0**neg */
{
@@ -820,7 +1038,7 @@ switch(input_tag)
/* Special Error */
{
break;
- }
+ }
case pow_nan_to_zero:
/* pow(NaN,0.0) */
{
@@ -832,36 +1050,51 @@ switch(input_tag)
break;
}
case atan2l_zero:
- /* atan2l(0,0) */
+ case atan2dl_zero:
+ /* atan2dl(0,0) */
{
- /* XXX arg1 and arg2 are switched!!!! */
+#ifndef _LIBC
+ RETVAL_ZEROL;
+#else
+ /* XXX arg1 and arg2 are switched!!!! */
if (signbit (*(long double *) arg1))
/* y == -0 */
- *(long double *) retval = copysignl (M_PIl, *(long double *) arg2);
+ *(long double *) retval = __libm_copysignl (M_PIl, *(long double *) arg2);
else
*(long double *) retval = *(long double *) arg2;
+#endif
ERRNO_DOMAIN; break;
}
case atan2_zero:
- /* atan2(0,0) */
+ case atan2d_zero:
+ /* atan2d(0,0) */
{
- /* XXX arg1 and arg2 are switched!!!! */
+#ifndef _LIBC
+ RETVAL_ZEROD;
+#else
+ /* XXX arg1 and arg2 are switched!!!! */
if (signbit (*(double *) arg1))
/* y == -0 */
- *(double *) retval = copysign (M_PI, *(double *) arg2);
+ *(double *) retval = __libm_copysign (M_PI, *(double *) arg2);
else
*(double *) retval = *(double *) arg2;
+#endif
ERRNO_DOMAIN; break;
}
- case
- atan2f_zero:
+ case atan2f_zero:
+ case atan2df_zero:
/* atan2f(0,0) */
+ /* atan2df(0,0) */
{
+#ifndef _LIBC
+ RETVAL_ZEROF;
+#else
if (signbit (*(float *) arg2))
/* y == -0 */
- *(float *) retval = copysignf (M_PI, *(float *) arg1);
+ *(float *) retval = __libm_copysignf (M_PI, *(float *) arg1);
else
*(float *) retval = *(float *) arg1;
+#endif
ERRNO_DOMAIN; break;
}
case expm1l_overflow:
@@ -912,42 +1145,42 @@ switch(input_tag)
case scalbl_underflow:
/* scalbl underflow */
{
- if (INPUT_XL < 0) RETVAL_NEG_ZEROL;
+ if (INPUT_XL < 0) RETVAL_NEG_ZEROL;
else RETVAL_ZEROL;
ERRNO_RANGE; break;
}
case scalb_underflow:
/* scalb underflow */
{
- if (INPUT_XD < 0) RETVAL_NEG_ZEROD;
+ if (INPUT_XD < 0) RETVAL_NEG_ZEROD;
else RETVAL_ZEROD;
ERRNO_RANGE; break;
}
case scalbf_underflow:
/* scalbf underflow */
{
- if (INPUT_XF < 0) RETVAL_NEG_ZEROF;
+ if (INPUT_XF < 0) RETVAL_NEG_ZEROF;
else RETVAL_ZEROF;
ERRNO_RANGE; break;
}
case scalbl_overflow:
/* scalbl overflow */
{
- if (INPUT_XL < 0) RETVAL_NEG_HUGE_VALL;
+ if (INPUT_XL < 0) RETVAL_NEG_HUGE_VALL;
else RETVAL_HUGE_VALL;
ERRNO_RANGE; break;
}
case scalb_overflow:
/* scalb overflow */
{
- if (INPUT_XD < 0) RETVAL_NEG_HUGE_VALD;
+ if (INPUT_XD < 0) RETVAL_NEG_HUGE_VALD;
else RETVAL_HUGE_VALD;
ERRNO_RANGE; break;
}
case scalbf_overflow:
/* scalbf overflow */
{
- if (INPUT_XF < 0) RETVAL_NEG_HUGE_VALF;
+ if (INPUT_XF < 0) RETVAL_NEG_HUGE_VALF;
else RETVAL_HUGE_VALF;
ERRNO_RANGE; break;
}
@@ -967,33 +1200,62 @@ switch(input_tag)
ERRNO_DOMAIN; break;
}
case acosl_gt_one:
+ case acosdl_gt_one:
/* acosl(x > 1) */
+ /* acosdl(x > 1) */
{
+#ifndef _LIBC
+ RETVAL_ZEROL;
+#endif
ERRNO_DOMAIN; break;
}
case acos_gt_one:
+ case acosd_gt_one:
/* acos(x > 1) */
+ /* acosd(x > 1) */
{
- ERRNO_DOMAIN; break;
+#ifndef _LIBC
+ RETVAL_ZEROD;
+#endif
+ ERRNO_DOMAIN; break;
}
case acosf_gt_one:
+ case acosdf_gt_one:
/* acosf(x > 1) */
+ /* acosdf(x > 1) */
{
- ERRNO_DOMAIN; break;
+#ifndef _LIBC
+ RETVAL_ZEROF;
+#endif
+ ERRNO_DOMAIN; break;
}
case asinl_gt_one:
+ case asindl_gt_one:
/* asinl(x > 1) */
+ /* asindl(x > 1) */
{
+#ifndef _LIBC
+ RETVAL_ZEROL;
+#endif
ERRNO_DOMAIN; break;
}
case asin_gt_one:
+ case asind_gt_one:
/* asin(x > 1) */
+ /* asind(x > 1) */
{
+#ifndef _LIBC
+ RETVAL_ZEROD;
+#endif
ERRNO_DOMAIN; break;
}
case asinf_gt_one:
- /* asinf(x > 1) */
+ case asindf_gt_one:
+ /* asindf(x > 1) */
{
+#ifndef _LIBC
+ RETVAL_ZEROF;
+#endif
ERRNO_DOMAIN; break;
}
case remainderl_by_zero:
@@ -1029,6 +1291,15 @@ switch(input_tag)
{
RETVAL_HUGE_VALF; ERRNO_RANGE; break;
}
+ case nextafterl_overflow:
+ case nextafter_overflow:
+ case nextafterf_overflow:
+ case nexttowardl_overflow:
+ case nexttoward_overflow:
+ case nexttowardf_overflow:
+ {
+ ERRNO_RANGE; break;
+ }
case sinhl_overflow:
/* sinhl overflows */
{
@@ -1090,7 +1361,7 @@ return;
/*******************************/
/* __SVID__ and __XOPEN__ Path */
/*******************************/
-else
+else
{
switch(input_tag)
{
@@ -1106,15 +1377,57 @@ else
case scalbn_underflow:
case scalbnf_overflow:
case scalbnf_underflow:
+ case scalblnl_overflow:
+ case scalblnl_underflow:
+ case scalbln_overflow:
+ case scalbln_underflow:
+ case scalblnf_overflow:
+ case scalblnf_underflow:
+ case tandl_overflow:
+ case tand_overflow:
+ case tandf_overflow:
+ case cotdl_overflow:
+ case cotd_overflow:
+ case cotdf_overflow:
+ case cotl_overflow:
+ case cot_overflow:
+ case cotf_overflow:
+ case annuityl_overflow:
+ case annuityl_underflow:
+ case annuity_overflow:
+ case annuity_underflow:
+ case annuityf_overflow:
+ case annuityf_underflow:
+ case compoundl_overflow:
+ case compoundl_underflow:
+ case compound_overflow:
+ case compound_underflow:
+ case compoundf_overflow:
+ case compoundf_underflow:
{
ERRNO_RANGE; break;
}
- case sqrtl_negative:
+ case annuityl_by_zero:
+ case annuityl_less_m1:
+ case annuity_by_zero:
+ case annuity_less_m1:
+ case annuityf_by_zero:
+ case annuityf_less_m1:
+ case compoundl_by_zero:
+ case compoundl_less_m1:
+ case compound_by_zero:
+ case compound_less_m1:
+ case compoundf_by_zero:
+ case compoundf_less_m1:
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case sqrtl_negative:
/* sqrtl(x < 0) */
{
DOMAINL; NAMEL = (char *) "sqrtl";
- ifSVID
- {
+ ifSVID
+ {
RETVAL_ZEROL;
NOT_MATHERRL
{
@@ -1122,22 +1435,22 @@ else
ERRNO_DOMAIN;
}
}
- else
+ else
{ /* NaN already computed */
NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
- case sqrt_negative:
+ case sqrt_negative:
/* sqrt(x < 0) */
{
DOMAIND; NAMED = (char *) "sqrt";
- ifSVID
+ ifSVID
{
-
+
RETVAL_ZEROD;
- NOT_MATHERRD
+ NOT_MATHERRD
{
WRITED_SQRT;
ERRNO_DOMAIN;
@@ -1146,18 +1459,18 @@ else
else
{ /* NaN already computed */
NOT_MATHERRD {ERRNO_DOMAIN;}
- }
- *(double *)retval = exc.retval;
+ }
+ *(double *)retval = exc.retval;
break;
}
- case sqrtf_negative:
+ case sqrtf_negative:
/* sqrtf(x < 0) */
{
DOMAINF; NAMEF = (char *) "sqrtf";
- ifSVID
+ ifSVID
{
RETVAL_ZEROF;
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_SQRT;
ERRNO_DOMAIN;
@@ -1166,62 +1479,59 @@ else
else
{
NOT_MATHERRF {ERRNO_DOMAIN;}
- }
- *(float *)retval = excf.retval;
+ }
+ *(float *)retval = excf.retval;
break;
}
case logl_zero:
- case log2l_zero:
/* logl(0) */
{
SINGL; NAMEL = (char *) "logl";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEL;
- NOT_MATHERRL
+ NOT_MATHERRL
{
WRITEL_LOG_ZERO;
ERRNO_DOMAIN;
- }
+ }
}
else
{
RETVAL_NEG_HUGE_VALL;
NOT_MATHERRL {ERRNO_DOMAIN;}
- }
- *(long double *)retval = excl.retval;
+ }
+ *(long double *)retval = excl.retval;
break;
}
case log_zero:
- case log2_zero:
/* log(0) */
{
SINGD; NAMED = (char *) "log";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGED;
- NOT_MATHERRD
+ NOT_MATHERRD
{
WRITED_LOG_ZERO;
ERRNO_DOMAIN;
- }
+ }
}
else
{
RETVAL_NEG_HUGE_VALD;
NOT_MATHERRD {ERRNO_DOMAIN;}
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case logf_zero:
- case log2f_zero:
/* logf(0) */
{
SINGF; NAMEF = (char *) "logf";
- ifSVID
+ ifSVID
{
- RETVAL_NEG_HUGEF;
+ RETVAL_NEG_HUGEF;
NOT_MATHERRF
{
WRITEF_LOG_ZERO;
@@ -1230,22 +1540,21 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALF;
+ RETVAL_NEG_HUGE_VALF;
NOT_MATHERRF {ERRNO_DOMAIN;}
}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case logl_negative:
- case log2l_negative:
/* logl(x < 0) */
{
DOMAINL; NAMEL = (char *) "logl";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEL;
- NOT_MATHERRL
+ NOT_MATHERRL
{
WRITEL_LOG_NEGATIVE;
ERRNO_DOMAIN;
@@ -1253,21 +1562,20 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALL;
+ RETVAL_NEG_HUGE_VALL;
NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case log_negative:
- case log2_negative:
/* log(x < 0) */
{
DOMAIND; NAMED = (char *) "log";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGED;
- NOT_MATHERRD
+ NOT_MATHERRD
{
WRITED_LOG_NEGATIVE;
ERRNO_DOMAIN;
@@ -1275,39 +1583,38 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALD;
+ RETVAL_NEG_HUGE_VALD;
NOT_MATHERRD {ERRNO_DOMAIN;}
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
- }
+ }
case logf_negative:
- case log2f_negative:
/* logf(x < 0) */
{
DOMAINF; NAMEF = (char *) "logf";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEF;
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_LOG_NEGATIVE;
ERRNO_DOMAIN;
}
- }
+ }
else
{
- RETVAL_NEG_HUGE_VALF;
+ RETVAL_NEG_HUGE_VALF;
NOT_MATHERRF{ERRNO_DOMAIN;}
}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case log1pl_zero:
/* log1pl(-1) */
{
SINGL; NAMEL = (char *) "log1pl";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEL;
NOT_MATHERRL
@@ -1328,7 +1635,7 @@ else
/* log1p(-1) */
{
SINGD; NAMED = (char *) "log1p";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGED;
NOT_MATHERRD
@@ -1349,7 +1656,7 @@ else
/* log1pf(-1) */
{
SINGF; NAMEF = (char *) "log1pf";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEF;
NOT_MATHERRF
@@ -1361,11 +1668,11 @@ else
else
{
RETVAL_NEG_HUGE_VALF;
- NOT_MATHERRF {}ERRNO_DOMAIN;
+ NOT_MATHERRF {ERRNO_DOMAIN;}
}
*(float *)retval = excf.retval;
break;
- }
+ }
case log1pl_negative:
/* log1pl(x < -1) */
{
@@ -1379,7 +1686,7 @@ else
ERRNO_DOMAIN;
}
}
- else
+ else
{
RETVAL_NEG_HUGE_VALL;
NOT_MATHERRL {ERRNO_DOMAIN;}
@@ -1400,7 +1707,7 @@ else
ERRNO_DOMAIN;
}
}
- else
+ else
{
RETVAL_NEG_HUGE_VALD;
NOT_MATHERRD {ERRNO_DOMAIN;}
@@ -1421,7 +1728,7 @@ else
ERRNO_DOMAIN;
}
}
- else
+ else
{
RETVAL_NEG_HUGE_VALF;
NOT_MATHERRF {ERRNO_DOMAIN;}
@@ -1433,7 +1740,7 @@ else
/* log10l(0) */
{
SINGL; NAMEL = (char *) "log10l";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEL;
NOT_MATHERRL
@@ -1447,14 +1754,14 @@ else
RETVAL_NEG_HUGE_VALL;
NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case log10_zero:
/* log10(0) */
{
SINGD; NAMED = (char *) "log10";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGED;
NOT_MATHERRD
@@ -1468,14 +1775,14 @@ else
RETVAL_NEG_HUGE_VALD;
NOT_MATHERRD {ERRNO_DOMAIN;}
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case log10f_zero:
/* log10f(0) */
{
SINGF; NAMEF = (char *) "log10f";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEF;
NOT_MATHERRF
@@ -1489,17 +1796,17 @@ else
RETVAL_NEG_HUGE_VALF;
NOT_MATHERRF {ERRNO_DOMAIN;}
}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case log10l_negative:
/* log10l(x < 0) */
{
DOMAINL; NAMEL = (char *) "log10l";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEL;
- NOT_MATHERRL
+ NOT_MATHERRL
{
WRITEL_LOG10_NEGATIVE;
ERRNO_DOMAIN;
@@ -1510,38 +1817,38 @@ else
RETVAL_NEG_HUGE_VALL;
NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case log10_negative:
/* log10(x < 0) */
{
DOMAIND; NAMED = (char *) "log10";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGED;
- NOT_MATHERRD
+ NOT_MATHERRD
{
WRITED_LOG10_NEGATIVE;
ERRNO_DOMAIN;
}
- }
+ }
else
{
RETVAL_NEG_HUGE_VALD;
NOT_MATHERRD {ERRNO_DOMAIN;}
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case log10f_negative:
/* log10f(x < 0) */
{
DOMAINF; NAMEF = (char *) "log10f";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEF;
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_LOG10_NEGATIVE;
ERRNO_DOMAIN;
@@ -1552,14 +1859,119 @@ else
RETVAL_NEG_HUGE_VALF;
NOT_MATHERRF {ERRNO_DOMAIN;}
}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case log2_zero:
+ /* log2(0) */
+ {
+ SINGD; NAMED = (char *) "log2";
+ ifSVID
+ {
+ RETVAL_NEG_HUGED;
+ NOT_MATHERRD
+ {
+ WRITED_LOG2_ZERO;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALD;
+ NOT_MATHERRD {ERRNO_DOMAIN;}
+ }
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case log2f_zero:
+ /* log2f(0) */
+ {
+ SINGF; NAMEF = (char *) "log2f";
+ ifSVID
+ {
+ RETVAL_NEG_HUGEF;
+ NOT_MATHERRF
+ {
+ WRITEF_LOG2_ZERO;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALF;
+ NOT_MATHERRF {ERRNO_DOMAIN;}
+ }
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case log2l_negative:
+ /* log2l(x < 0) */
+ {
+ DOMAINL; NAMEL = (char *) "log2l";
+ ifSVID
+ {
+ RETVAL_NEG_HUGEL;
+ NOT_MATHERRL
+ {
+ WRITEL_LOG2_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALL;
+ NOT_MATHERRL {ERRNO_DOMAIN;}
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case log2_negative:
+ /* log2(x < 0) */
+ {
+ DOMAIND; NAMED = (char *) "log2";
+ ifSVID
+ {
+ RETVAL_NEG_HUGED;
+ NOT_MATHERRD
+ {
+ WRITED_LOG2_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALD;
+ NOT_MATHERRD {ERRNO_DOMAIN;}
+ }
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case log2f_negative:
+ /* log2f(x < 0) */
+ {
+ DOMAINF; NAMEF = (char *) "log2f";
+ ifSVID
+ {
+ RETVAL_NEG_HUGEF;
+ NOT_MATHERRF
+ {
+ WRITEF_LOG2_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALF;
+ NOT_MATHERRF {ERRNO_DOMAIN;}
+ }
+ *(float *)retval = excf.retval;
break;
}
case expl_overflow:
/* expl overflow */
{
OVERFLOWL; NAMEL = (char *) "expl";
- ifSVID
+ ifSVID
{
RETVAL_HUGEL;
}
@@ -1568,14 +1980,14 @@ else
RETVAL_HUGE_VALL;
}
NOT_MATHERRL {ERRNO_RANGE;}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case exp_overflow:
/* exp overflow */
{
OVERFLOWD; NAMED = (char *) "exp";
- ifSVID
+ ifSVID
{
RETVAL_HUGED;
}
@@ -1584,14 +1996,14 @@ else
RETVAL_HUGE_VALD;
}
NOT_MATHERRD {ERRNO_RANGE;}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case expf_overflow:
/* expf overflow */
{
OVERFLOWF; NAMEF = (char *) "expf";
- ifSVID
+ ifSVID
{
RETVAL_HUGEF;
}
@@ -1600,7 +2012,7 @@ else
RETVAL_HUGE_VALF;
}
NOT_MATHERRF {ERRNO_RANGE;}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case expl_underflow:
@@ -1608,7 +2020,7 @@ else
{
UNDERFLOWL; NAMEL = (char *) "expl"; RETVAL_ZEROL;
NOT_MATHERRL {ERRNO_RANGE;}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case exp_underflow:
@@ -1616,7 +2028,7 @@ else
{
UNDERFLOWD; NAMED = (char *) "exp"; RETVAL_ZEROD;
NOT_MATHERRD {ERRNO_RANGE;}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case expf_underflow:
@@ -1624,22 +2036,22 @@ else
{
UNDERFLOWF; NAMEF = (char *) "expf"; RETVAL_ZEROF;
NOT_MATHERRF {ERRNO_RANGE;}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case powl_zero_to_zero:
/* powl 0**0 */
{
DOMAINL; NAMEL = (char *) "powl";
- ifSVID
+ ifSVID
{
RETVAL_ZEROL;
- NOT_MATHERRL
+ NOT_MATHERRL
{
WRITEL_POW_ZERO_TO_ZERO;
- ERRNO_RANGE;
+ ERRNO_DOMAIN;
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
}
else RETVAL_ONEL;
break;
@@ -1648,15 +2060,15 @@ else
/* pow 0**0 */
{
DOMAIND; NAMED = (char *) "pow";
- ifSVID
+ ifSVID
{
RETVAL_ZEROD;
- NOT_MATHERRD
+ NOT_MATHERRD
{
WRITED_POW_ZERO_TO_ZERO;
- ERRNO_RANGE;
+ ERRNO_DOMAIN;
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
}
else RETVAL_ONED;
break;
@@ -1665,15 +2077,15 @@ else
/* powf 0**0 */
{
DOMAINF; NAMEF = (char *) "powf";
- ifSVID
+ ifSVID
{
RETVAL_ZEROF;
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_POW_ZERO_TO_ZERO;
- ERRNO_RANGE;
+ ERRNO_DOMAIN;
}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
}
else RETVAL_ONEF;
break;
@@ -1682,54 +2094,54 @@ else
/* powl(x,y) overflow */
{
OVERFLOWL; NAMEL = (char *) "powl";
- ifSVID
+ ifSVID
{
if (INPUT_XL < 0) RETVAL_NEG_HUGEL;
else RETVAL_HUGEL;
}
else
- {
+ {
if (INPUT_XL < 0) RETVAL_NEG_HUGE_VALL;
else RETVAL_HUGE_VALL;
}
NOT_MATHERRL {ERRNO_RANGE;}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case pow_overflow:
/* pow(x,y) overflow */
{
OVERFLOWD; NAMED = (char *) "pow";
- ifSVID
+ ifSVID
{
if (INPUT_XD < 0) RETVAL_NEG_HUGED;
else RETVAL_HUGED;
}
else
- {
+ {
if (INPUT_XD < 0) RETVAL_NEG_HUGE_VALD;
else RETVAL_HUGE_VALD;
}
NOT_MATHERRD {ERRNO_RANGE;}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case powf_overflow:
/* powf(x,y) overflow */
{
OVERFLOWF; NAMEF = (char *) "powf";
- ifSVID
+ ifSVID
{
if (INPUT_XF < 0) RETVAL_NEG_HUGEF;
- else RETVAL_HUGEF;
+ else RETVAL_HUGEF;
}
else
- {
+ {
if (INPUT_XF < 0) RETVAL_NEG_HUGE_VALF;
else RETVAL_HUGE_VALF;
}
NOT_MATHERRF {ERRNO_RANGE;}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case powl_underflow:
@@ -1737,7 +2149,7 @@ else
{
UNDERFLOWL; NAMEL = (char *) "powl"; RETVAL_ZEROL;
NOT_MATHERRL {ERRNO_RANGE;}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case pow_underflow:
@@ -1745,7 +2157,7 @@ else
{
UNDERFLOWD; NAMED = (char *) "pow"; RETVAL_ZEROD;
NOT_MATHERRD {ERRNO_RANGE;}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case powf_underflow:
@@ -1753,17 +2165,17 @@ else
{
UNDERFLOWF; NAMEF = (char *) "powf"; RETVAL_ZEROF;
NOT_MATHERRF {ERRNO_RANGE;}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case powl_zero_to_negative:
/* 0 to neg */
{
DOMAINL; NAMEL = (char *) "powl";
- ifSVID
- {
+ ifSVID
+ {
RETVAL_ZEROL;
- NOT_MATHERRL
+ NOT_MATHERRL
{
WRITEL_POW_ZERO_TO_NEGATIVE;
ERRNO_DOMAIN;
@@ -1774,17 +2186,17 @@ else
RETVAL_NEG_HUGE_VALL;
NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case pow_zero_to_negative:
/* 0**neg */
{
DOMAIND; NAMED = (char *) "pow";
- ifSVID
- {
+ ifSVID
+ {
RETVAL_ZEROD;
- NOT_MATHERRD
+ NOT_MATHERRD
{
WRITED_POW_ZERO_TO_NEGATIVE;
ERRNO_DOMAIN;
@@ -1795,7 +2207,7 @@ else
RETVAL_NEG_HUGE_VALD;
NOT_MATHERRD {ERRNO_DOMAIN;}
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case powf_zero_to_negative:
@@ -1803,10 +2215,10 @@ else
{
DOMAINF; NAMEF = (char *) "powf";
RETVAL_NEG_HUGE_VALF;
- ifSVID
- {
+ ifSVID
+ {
RETVAL_ZEROF;
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_POW_ZERO_TO_NEGATIVE;
ERRNO_DOMAIN;
@@ -1817,17 +2229,17 @@ else
RETVAL_NEG_HUGE_VALF;
NOT_MATHERRF {ERRNO_DOMAIN;}
}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case powl_neg_to_non_integer:
/* neg**non_integral */
{
DOMAINL; NAMEL = (char *) "powl";
- ifSVID
- {
+ ifSVID
+ {
RETVAL_ZEROF;
- NOT_MATHERRL
+ NOT_MATHERRL
{
WRITEL_POW_NEG_TO_NON_INTEGER;
ERRNO_DOMAIN;
@@ -1837,17 +2249,17 @@ else
{
NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case pow_neg_to_non_integer:
/* neg**non_integral */
{
DOMAIND; NAMED = (char *) "pow";
- ifSVID
- {
+ ifSVID
+ {
RETVAL_ZEROD;
- NOT_MATHERRD
+ NOT_MATHERRD
{
WRITED_POW_NEG_TO_NON_INTEGER;
ERRNO_DOMAIN;
@@ -1857,17 +2269,17 @@ else
{
NOT_MATHERRD {ERRNO_DOMAIN;}
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case powf_neg_to_non_integer:
/* neg**non-integral */
{
DOMAINF; NAMEF = (char *) "powf";
- ifSVID
- {
+ ifSVID
+ {
RETVAL_ZEROF;
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_POW_NEG_TO_NON_INTEGER;
ERRNO_DOMAIN;
@@ -1877,37 +2289,37 @@ else
{
NOT_MATHERRF {ERRNO_DOMAIN;}
}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case powl_nan_to_zero:
/* pow(NaN,0.0) */
/* Special Error */
{
- DOMAINL; NAMEL = (char *) "powl"; INPUT_XL; INPUT_YL;
- excl.retval = *(long double *)arg1;
+ DOMAINL; NAMEL = (char *) "powl";
+ *(long double *)retval = *(long double *)arg1;
NOT_MATHERRL {ERRNO_DOMAIN;}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
- }
+ }
case pow_nan_to_zero:
/* pow(NaN,0.0) */
/* Special Error */
{
- DOMAIND; NAMED = (char *) "pow"; INPUT_XD; INPUT_YD;
- exc.retval = *(double *)arg1;
+ DOMAIND; NAMED = (char *) "pow";
+ *(double *)retval = *(double *)arg1;
NOT_MATHERRD {ERRNO_DOMAIN;}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case powf_nan_to_zero:
/* powf(NaN,0.0) */
/* Special Error */
{
- DOMAINF; NAMEF = (char *) "powf"; INPUT_XF; INPUT_YF;
- excf.retval = *(float *)arg1;
+ DOMAINF; NAMEF = (char *) "powf";
+ *(float *)retval = *(float *)arg1;
NOT_MATHERRF {ERRNO_DOMAIN;}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case atan2l_zero:
@@ -1915,15 +2327,15 @@ else
{
DOMAINL; NAMEL = (char *) "atan2l";
RETVAL_ZEROL;
- NOT_MATHERRL
+ NOT_MATHERRL
{
- ifSVID
+ ifSVID
{
WRITEL_ATAN2_ZERO_BY_ZERO;
}
ERRNO_DOMAIN;
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case atan2_zero:
@@ -1931,15 +2343,15 @@ else
{
DOMAIND; NAMED = (char *) "atan2";
RETVAL_ZEROD;
- NOT_MATHERRD
+ NOT_MATHERRD
{
- ifSVID
- {
+ ifSVID
+ {
WRITED_ATAN2_ZERO_BY_ZERO;
}
ERRNO_DOMAIN;
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case atan2f_zero:
@@ -1947,13 +2359,59 @@ else
{
DOMAINF; NAMEF = (char *) "atan2f";
RETVAL_ZEROF;
- NOT_MATHERRF
- ifSVID
+ NOT_MATHERRF
+ ifSVID
{
WRITEF_ATAN2_ZERO_BY_ZERO;
}
ERRNO_DOMAIN;
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case atan2dl_zero:
+ /* atan2dl(0.0,0.0) */
+ {
+ DOMAINL; NAMEL = (char *) "atan2dl";
+ RETVAL_ZEROL;
+ NOT_MATHERRL
+ {
+ ifSVID
+ {
+ WRITEL_ATAN2D_ZERO_BY_ZERO;
+ }
+ ERRNO_DOMAIN;
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case atan2d_zero:
+ /* atan2d(0.0,0.0) */
+ {
+ DOMAIND; NAMED = (char *) "atan2d";
+ RETVAL_ZEROD;
+ NOT_MATHERRD
+ {
+ ifSVID
+ {
+ WRITED_ATAN2D_ZERO_BY_ZERO;
+ }
+ ERRNO_DOMAIN;
+ }
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case atan2df_zero:
+ /* atan2df(0.0,0.0) */
+ {
+ DOMAINF; NAMEF = (char *) "atan2df";
+ RETVAL_ZEROF;
+ NOT_MATHERRF
+ ifSVID
+ {
+ WRITEF_ATAN2D_ZERO_BY_ZERO;
+ }
+ ERRNO_DOMAIN;
+ *(float *)retval = excf.retval;
break;
}
case expm1_overflow:
@@ -1990,8 +2448,8 @@ else
UNDERFLOWL; NAMEL = (char *) "scalbl";
if (INPUT_XL < 0.0L) RETVAL_NEG_ZEROL;
else RETVAL_ZEROL;
- NOT_MATHERRL {ERRNO_RANGE;}
- *(long double *)retval = excf.retval;
+ NOT_MATHERRL {ERRNO_RANGE;}
+ *(long double *)retval = excl.retval;
break;
}
case scalb_underflow:
@@ -2000,8 +2458,8 @@ else
UNDERFLOWD; NAMED = (char *) "scalb";
if (INPUT_XD < 0.0) RETVAL_NEG_ZEROD;
else RETVAL_ZEROD;
- NOT_MATHERRD {ERRNO_RANGE;}
- *(double *)retval = exc.retval;
+ NOT_MATHERRD {ERRNO_RANGE;}
+ *(double *)retval = exc.retval;
break;
}
case scalbf_underflow:
@@ -2010,8 +2468,8 @@ else
UNDERFLOWF; NAMEF = (char *) "scalbf";
if (INPUT_XF < 0.0) RETVAL_NEG_ZEROF;
else RETVAL_ZEROF;
- NOT_MATHERRF {ERRNO_RANGE;}
- *(float *)retval = excf.retval;
+ NOT_MATHERRF {ERRNO_RANGE;}
+ *(float *)retval = excf.retval;
break;
}
case scalbl_overflow:
@@ -2020,8 +2478,8 @@ else
OVERFLOWL; NAMEL = (char *) "scalbl";
if (INPUT_XL < 0) RETVAL_NEG_HUGE_VALL;
else RETVAL_HUGE_VALL;
- NOT_MATHERRL {ERRNO_RANGE;}
- *(long double *)retval = excl.retval;
+ NOT_MATHERRL {ERRNO_RANGE;}
+ *(long double *)retval = excl.retval;
break;
}
case scalb_overflow:
@@ -2030,8 +2488,8 @@ else
OVERFLOWD; NAMED = (char *) "scalb";
if (INPUT_XD < 0) RETVAL_NEG_HUGE_VALD;
else RETVAL_HUGE_VALD;
- NOT_MATHERRD {ERRNO_RANGE;}
- *(double *)retval = exc.retval;
+ NOT_MATHERRD {ERRNO_RANGE;}
+ *(double *)retval = exc.retval;
break;
}
case scalbf_overflow:
@@ -2040,8 +2498,8 @@ else
OVERFLOWF; NAMEF = (char *) "scalbf";
if (INPUT_XF < 0) RETVAL_NEG_HUGE_VALF;
else RETVAL_HUGE_VALF;
- NOT_MATHERRF {ERRNO_RANGE;}
- *(float *)retval = excf.retval;
+ NOT_MATHERRF {ERRNO_RANGE;}
+ *(float *)retval = excf.retval;
break;
}
case hypotl_overflow:
@@ -2049,7 +2507,7 @@ else
{
OVERFLOWL; NAMEL = (char *) "hypotl";
ifSVID
- {
+ {
RETVAL_HUGEL;
}
else
@@ -2057,7 +2515,7 @@ else
RETVAL_HUGE_VALL;
}
NOT_MATHERRL {ERRNO_RANGE;}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case hypot_overflow:
@@ -2065,7 +2523,7 @@ else
{
OVERFLOWD; NAMED = (char *) "hypot";
ifSVID
- {
+ {
RETVAL_HUGED;
}
else
@@ -2073,14 +2531,14 @@ else
RETVAL_HUGE_VALD;
}
NOT_MATHERRD {ERRNO_RANGE;}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case hypotf_overflow:
/* hypotf overflow */
- {
+ {
OVERFLOWF; NAMEF = (char *) "hypotf";
- ifSVID
+ ifSVID
{
RETVAL_HUGEF;
}
@@ -2089,7 +2547,7 @@ else
RETVAL_HUGE_VALF;
}
NOT_MATHERRF {ERRNO_RANGE;}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case acosl_gt_one:
@@ -2097,7 +2555,7 @@ else
{
DOMAINL; NAMEL = (char *) "acosl";
RETVAL_ZEROL;
- ifSVID
+ ifSVID
{
NOT_MATHERRL
{
@@ -2117,7 +2575,7 @@ else
{
DOMAIND; NAMED = (char *) "acos";
RETVAL_ZEROD;
- ifSVID
+ ifSVID
{
NOT_MATHERRD
{
@@ -2137,9 +2595,9 @@ else
{
DOMAINF; NAMEF = (char *) "acosf";
RETVAL_ZEROF;
- ifSVID
+ ifSVID
{
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_ACOS;
ERRNO_DOMAIN;
@@ -2148,8 +2606,8 @@ else
else
{
NOT_MATHERRF {ERRNO_DOMAIN;}
- }
- *(float *)retval = excf.retval;
+ }
+ *(float *)retval = excf.retval;
break;
}
case asinl_gt_one:
@@ -2157,7 +2615,7 @@ else
{
DOMAINL; NAMEL = (char *) "asinl";
RETVAL_ZEROL;
- ifSVID
+ ifSVID
{
NOT_MATHERRL
{
@@ -2177,7 +2635,7 @@ else
{
DOMAIND; NAMED = (char *) "asin";
RETVAL_ZEROD;
- ifSVID
+ ifSVID
{
NOT_MATHERRD
{
@@ -2197,9 +2655,9 @@ else
{
DOMAINF; NAMEF = (char *) "asinf";
RETVAL_ZEROF;
- ifSVID
+ ifSVID
{
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_ASIN;
ERRNO_DOMAIN;
@@ -2208,8 +2666,128 @@ else
else
{
NOT_MATHERRF {ERRNO_DOMAIN;}
+ }
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case acosdl_gt_one:
+ /* acosdl(x > 1) */
+ {
+ DOMAINL; NAMEL = (char *) "acosdl";
+ RETVAL_ZEROL;
+ ifSVID
+ {
+ NOT_MATHERRL
+ {
+ WRITEL_ACOSD;
+ ERRNO_DOMAIN;
+ }
}
- *(float *)retval = excf.retval;
+ else
+ {
+ NOT_MATHERRL {ERRNO_DOMAIN;}
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case acosd_gt_one:
+ /* acosd(x > 1) */
+ {
+ DOMAIND; NAMED = (char *) "acosd";
+ RETVAL_ZEROD;
+ ifSVID
+ {
+ NOT_MATHERRD
+ {
+ WRITED_ACOSD;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ NOT_MATHERRD {ERRNO_DOMAIN;}
+ }
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case acosdf_gt_one:
+ /* acosdf(x > 1) */
+ {
+ DOMAINF; NAMEF = (char *) "acosdf";
+ RETVAL_ZEROF;
+ ifSVID
+ {
+ NOT_MATHERRF
+ {
+ WRITEF_ACOSD;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ NOT_MATHERRF {ERRNO_DOMAIN;}
+ }
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case asindl_gt_one:
+ /* asindl(x > 1) */
+ {
+ DOMAINL; NAMEL = (char *) "asindl";
+ RETVAL_ZEROL;
+ ifSVID
+ {
+ NOT_MATHERRL
+ {
+ WRITEL_ASIND;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ NOT_MATHERRL {ERRNO_DOMAIN;}
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case asind_gt_one:
+ /* asind(x > 1) */
+ {
+ DOMAIND; NAMED = (char *) "asind";
+ RETVAL_ZEROD;
+ ifSVID
+ {
+ NOT_MATHERRD
+ {
+ WRITED_ASIND;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ NOT_MATHERRD {ERRNO_DOMAIN;}
+ }
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case asindf_gt_one:
+ /* asindf(x > 1) */
+ {
+ DOMAINF; NAMEF = (char *) "asindf";
+ RETVAL_ZEROF;
+ ifSVID
+ {
+ NOT_MATHERRF
+ {
+ WRITEF_ASIND;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ NOT_MATHERRF {ERRNO_DOMAIN;}
+ }
+ *(float *)retval = excf.retval;
break;
}
case coshl_overflow:
@@ -2220,7 +2798,7 @@ else
{
RETVAL_HUGEL;
}
- else
+ else
{
RETVAL_HUGE_VALL;
}
@@ -2236,7 +2814,7 @@ else
{
RETVAL_HUGED;
}
- else
+ else
{
RETVAL_HUGE_VALD;
}
@@ -2252,7 +2830,7 @@ else
{
RETVAL_HUGEF;
}
- else
+ else
{
RETVAL_HUGE_VALF;
}
@@ -2269,7 +2847,7 @@ else
if (INPUT_XL > 0.0) RETVAL_HUGEL;
else RETVAL_NEG_HUGEL;
}
- else
+ else
{
if (INPUT_XL > 0.0) RETVAL_HUGE_VALL;
else RETVAL_NEG_HUGE_VALL;
@@ -2287,7 +2865,7 @@ else
if (INPUT_XD > 0.0) RETVAL_HUGED;
else RETVAL_NEG_HUGED;
}
- else
+ else
{
if (INPUT_XD > 0.0) RETVAL_HUGE_VALD;
else RETVAL_NEG_HUGE_VALD;
@@ -2305,7 +2883,7 @@ else
if( INPUT_XF > 0.0) RETVAL_HUGEF;
else RETVAL_NEG_HUGEF;
}
- else
+ else
{
if (INPUT_XF > 0.0) RETVAL_HUGE_VALF;
else RETVAL_NEG_HUGE_VALF;
@@ -2318,7 +2896,7 @@ else
/* acoshl(x < 1) */
{
DOMAINL; NAMEL = (char *) "acoshl";
- ifSVID
+ ifSVID
{
NOT_MATHERRL
{
@@ -2326,7 +2904,10 @@ else
ERRNO_DOMAIN;
}
}
- else NOT_MATHERRL {ERRNO_DOMAIN;}
+ else
+ {
+ NOT_MATHERRL {ERRNO_DOMAIN;}
+ }
*(long double *)retval = excl.retval;
break;
}
@@ -2334,7 +2915,7 @@ else
/* acosh(x < 1) */
{
DOMAIND; NAMED = (char *) "acosh";
- ifSVID
+ ifSVID
{
NOT_MATHERRD
{
@@ -2342,7 +2923,10 @@ else
ERRNO_DOMAIN;
}
}
- else NOT_MATHERRD {ERRNO_DOMAIN;}
+ else
+ {
+ NOT_MATHERRD {ERRNO_DOMAIN;}
+ }
*(double *)retval = exc.retval;
break;
}
@@ -2350,7 +2934,7 @@ else
/* acoshf(x < 1) */
{
DOMAINF; NAMEF = (char *) "acoshf";
- ifSVID
+ ifSVID
{
NOT_MATHERRF
{
@@ -2369,7 +2953,7 @@ else
/* atanhl(|x| > 1) */
{
DOMAINL; NAMEL = (char *) "atanhl";
- ifSVID
+ ifSVID
{
NOT_MATHERRL
{
@@ -2387,7 +2971,7 @@ else
/* atanh(|x| > 1) */
{
DOMAIND; NAMED = (char *) "atanh";
- ifSVID
+ ifSVID
{
NOT_MATHERRD
{
@@ -2405,7 +2989,7 @@ else
/* atanhf(|x| > 1) */
{
DOMAINF; NAMEF = (char *) "atanhf";
- ifSVID
+ ifSVID
{
NOT_MATHERRF
{
@@ -2422,8 +3006,8 @@ else
case atanhl_eq_one:
/* atanhl(|x| == 1) */
{
- SINGL; NAMEL = (char *)"atanhl";
- ifSVID
+ SINGL; NAMEL = (char *) "atanhl";
+ ifSVID
{
NOT_MATHERRL
{
@@ -2441,7 +3025,7 @@ else
/* atanh(|x| == 1) */
{
SINGD; NAMED = (char *) "atanh";
- ifSVID
+ ifSVID
{
NOT_MATHERRD
{
@@ -2459,7 +3043,7 @@ else
/* atanhf(|x| == 1) */
{
SINGF; NAMEF = (char *) "atanhf";
- ifSVID
+ ifSVID
{
NOT_MATHERRF
{
@@ -2477,7 +3061,7 @@ else
/* gammal overflow */
{
OVERFLOWL; NAMEL = (char *) "gammal";
- ifSVID
+ ifSVID
{
RETVAL_HUGEL;
}
@@ -2485,15 +3069,15 @@ else
{
RETVAL_HUGE_VALL;
}
- NOT_MATHERRL {ERRNO_RANGE;}
- *(long double *)retval = excl.retval;
+ NOT_MATHERRL{ERRNO_RANGE;}
+ *(long double*)retval = excl.retval;
break;
}
case gamma_overflow:
/* gamma overflow */
{
OVERFLOWD; NAMED = (char *) "gamma";
- ifSVID
+ ifSVID
{
RETVAL_HUGED;
}
@@ -2501,31 +3085,94 @@ else
{
RETVAL_HUGE_VALD;
}
- NOT_MATHERRD {ERRNO_RANGE;}
- *(double *)retval = exc.retval;
+ NOT_MATHERRD{ERRNO_RANGE;}
+ *(double*)retval = exc.retval;
break;
}
case gammaf_overflow:
/* gammaf overflow */
{
OVERFLOWF; NAMEF = (char *) "gammaf";
+ ifSVID
+ {
+ RETVAL_HUGEF;
+ }
+ else
+ {
+ RETVAL_HUGE_VALF;
+ }
+ NOT_MATHERRF{ERRNO_RANGE;}
+ *(float*)retval = excf.retval;
+ break;
+ }
+ case gammal_negative:
+ /* gammal -int or 0 */
+ {
+ SINGL; NAMEL = (char *) "gammal";
ifSVID
{
+ RETVAL_HUGEL;
+ NOT_MATHERRL
+ {
+ WRITEL_GAMMA_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_HUGE_VALL;
+ NOT_MATHERRL{ERRNO_DOMAIN;}
+ }
+ *(long double*)retval = excl.retval;
+ break;
+ }
+ case gamma_negative:
+ /* gamma -int or 0 */
+ {
+ SINGD; NAMED = (char *) "gamma";
+ ifSVID
+ {
+ RETVAL_HUGED;
+ NOT_MATHERRD
+ {
+ WRITED_GAMMA_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_HUGE_VALD;
+ NOT_MATHERRD{ERRNO_DOMAIN;}
+ }
+ *(double*)retval = exc.retval;
+ break;
+ }
+ case gammaf_negative:
+ /* gammaf -int or 0 */
+ {
+ SINGF; NAMEF = (char *) "gammaf";
+ ifSVID
+ {
RETVAL_HUGEF;
+ NOT_MATHERRF
+ {
+ WRITEF_GAMMA_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
}
else
{
RETVAL_HUGE_VALF;
+ NOT_MATHERRF{ERRNO_DOMAIN;}
}
- NOT_MATHERRF {ERRNO_RANGE;}
- *(float *)retval = excf.retval;
+ *(float*)retval = excf.retval;
break;
}
case lgammal_overflow:
/* lgammal overflow */
{
OVERFLOWL; NAMEL = (char *) "lgammal";
- ifSVID
+ ifSVID
{
RETVAL_HUGEL;
}
@@ -2533,15 +3180,15 @@ else
{
RETVAL_HUGE_VALL;
}
- NOT_MATHERRL {ERRNO_RANGE;}
- *(long double *)retval = excl.retval;
+ NOT_MATHERRL{ERRNO_RANGE;}
+ *(long double*)retval = excl.retval;
break;
}
case lgamma_overflow:
/* lgamma overflow */
{
OVERFLOWD; NAMED = (char *) "lgamma";
- ifSVID
+ ifSVID
{
RETVAL_HUGED;
}
@@ -2549,15 +3196,15 @@ else
{
RETVAL_HUGE_VALD;
}
- NOT_MATHERRD {ERRNO_RANGE;}
- *(double *)retval = exc.retval;
+ NOT_MATHERRD{ERRNO_RANGE;}
+ *(double*)retval = exc.retval;
break;
}
case lgammaf_overflow:
/* lgammaf overflow */
{
OVERFLOWF; NAMEF = (char *) "lgammaf";
- ifSVID
+ ifSVID
{
RETVAL_HUGEF;
}
@@ -2565,8 +3212,8 @@ else
{
RETVAL_HUGE_VALF;
}
- NOT_MATHERRF {ERRNO_RANGE;}
- *(float *)retval = excf.retval;
+ NOT_MATHERRF{ERRNO_RANGE;}
+ *(float*)retval = excf.retval;
break;
}
case lgammal_negative:
@@ -2578,16 +3225,16 @@ else
RETVAL_HUGEL;
NOT_MATHERRL
{
- WRITEL_LGAMMA_NEGATIVE;
- ERRNO_DOMAIN;
+ WRITEL_GAMMA_NEGATIVE;
+ ERRNO_DOMAIN;
}
}
else
{
RETVAL_HUGE_VALL;
- NOT_MATHERRL {ERRNO_DOMAIN;}
+ NOT_MATHERRL{ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double*)retval = excl.retval;
break;
}
case lgamma_negative:
@@ -2606,16 +3253,16 @@ else
else
{
RETVAL_HUGE_VALD;
- NOT_MATHERRD {ERRNO_DOMAIN;}
+ NOT_MATHERRD{ERRNO_DOMAIN;}
}
- *(double *)retval = exc.retval;
+ *(double*)retval = exc.retval;
break;
}
case lgammaf_negative:
/* lgammaf -int or 0 */
{
SINGF; NAMEF = (char *) "lgammaf";
- ifSVID
+ ifSVID
{
RETVAL_HUGEF;
NOT_MATHERRF
@@ -2627,72 +3274,114 @@ else
else
{
RETVAL_HUGE_VALF;
- NOT_MATHERRF {ERRNO_DOMAIN;}
+ NOT_MATHERRF{ERRNO_DOMAIN;}
}
- *(float *)retval = excf.retval;
+ *(float*)retval = excf.retval;
break;
}
- case gammal_negative:
- /* gammal -int or 0 */
+ case tgammal_overflow:
+ /* tgammal overflow */
{
- SINGL; NAMEL = (char *) "gammal";
- ifSVID
+ OVERFLOWL; NAMEL = (char *) "tgammal";
+ ifSVID
{
RETVAL_HUGEL;
+ }
+ else
+ {
+ RETVAL_HUGE_VALL;
+ }
+ NOT_MATHERRL{ERRNO_RANGE;}
+ *(long double*)retval = excl.retval;
+ break;
+ }
+ case tgamma_overflow:
+ /* tgamma overflow */
+ {
+ OVERFLOWD; NAMED = (char *) "tgamma";
+ ifSVID
+ {
+ RETVAL_HUGED;
+ }
+ else
+ {
+ RETVAL_HUGE_VALD;
+ }
+ NOT_MATHERRD{ERRNO_RANGE;}
+ *(double*)retval = exc.retval;
+ break;
+ }
+ case tgammaf_overflow:
+ /* tgammaf overflow */
+ {
+ OVERFLOWF; NAMEF = (char *) "tgammaf";
+ ifSVID
+ {
+ RETVAL_HUGEF;
+ }
+ else
+ {
+ RETVAL_HUGE_VALF;
+ }
+ NOT_MATHERRF{ERRNO_RANGE;}
+ *(float*)retval = excf.retval;
+ break;
+ }
+ case tgammal_negative:
+ /* tgammal -int or 0 */
+ {
+ SINGL; NAMEL = (char *) "tgammal";
+ ifSVID
+ {
NOT_MATHERRL
{
- WRITEL_GAMMA_NEGATIVE;
- ERRNO_DOMAIN;
+ WRITEL_TGAMMA_NEGATIVE;
+ ERRNO_DOMAIN;
}
}
else
{
- RETVAL_HUGE_VALL;
- NOT_MATHERRL {ERRNO_DOMAIN;}
+ NOT_MATHERRL{ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double*)retval = excl.retval;
break;
}
- case gamma_negative:
- /* gamma -int or 0 */
+ case tgamma_negative:
+ /* tgamma -int or 0 */
{
- SINGD; NAMED = (char *) "gamma";
- ifSVID
+ SINGD; NAMED = (char *) "tgamma";
+ ifSVID
{
- RETVAL_HUGED;
NOT_MATHERRD
{
- WRITED_GAMMA_NEGATIVE;
- ERRNO_DOMAIN;
+ WRITED_TGAMMA_NEGATIVE;
+ ERRNO_DOMAIN;
}
}
else
{
- RETVAL_HUGE_VALD;
- NOT_MATHERRD {ERRNO_DOMAIN;}
+ NOT_MATHERRD{ERRNO_DOMAIN;}
}
- *(double *)retval = exc.retval;
+ *(double*)retval = exc.retval;
break;
}
- case gammaf_negative:
- /* gammaf -int or 0 */
+ case tgammaf_negative:
+ /* tgammaf -int or 0 */
{
- SINGF; NAMEF = (char *) "gammaf";
- ifSVID
+ SINGF; NAMEF = (char *) "tgammaf";
+ ifSVID
{
- RETVAL_HUGEF;
NOT_MATHERRF
{
- WRITEF_GAMMA_NEGATIVE;
- ERRNO_DOMAIN;
+ WRITEF_TGAMMA_NEGATIVE;
+ ERRNO_DOMAIN;
}
}
else
{
- RETVAL_HUGE_VALF;
- NOT_MATHERRF {ERRNO_DOMAIN;}
+ NOT_MATHERRF{ERRNO_DOMAIN;}
}
- *(float *)retval = excf.retval;
+ *(float*)retval = excf.retval;
break;
}
case j0l_gt_loss:
@@ -2700,7 +3389,7 @@ else
{
TLOSSL; NAMEL = (char *) "j0l";
RETVAL_ZEROL;
- ifSVID
+ ifSVID
{
NOT_MATHERRL
{
@@ -2712,7 +3401,7 @@ else
{
NOT_MATHERRL {ERRNO_RANGE;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case j0_gt_loss:
@@ -2720,7 +3409,7 @@ else
{
TLOSSD; NAMED = (char *) "j0";
RETVAL_ZEROD;
- ifSVID
+ ifSVID
{
NOT_MATHERRD
{
@@ -2732,7 +3421,7 @@ else
{
NOT_MATHERRD {ERRNO_RANGE;}
}
- *(double*)retval = exc.retval;
+ *(double*)retval = exc.retval;
break;
}
case j0f_gt_loss:
@@ -2740,7 +3429,7 @@ else
{
TLOSSF; NAMEF = (char *) "j0f";
RETVAL_ZEROF;
- ifSVID
+ ifSVID
{
NOT_MATHERRF
{
@@ -2760,7 +3449,7 @@ else
{
TLOSSL; NAMEL = (char *) "j1l";
RETVAL_ZEROL;
- ifSVID
+ ifSVID
{
NOT_MATHERRL
{
@@ -2772,7 +3461,7 @@ else
{
NOT_MATHERRL {ERRNO_RANGE;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case j1_gt_loss:
@@ -2780,7 +3469,7 @@ else
{
TLOSSD; NAMED = (char *) "j1";
RETVAL_ZEROD;
- ifSVID
+ ifSVID
{
NOT_MATHERRD
{
@@ -2792,7 +3481,7 @@ else
{
NOT_MATHERRD {ERRNO_RANGE;}
}
- *(double*)retval = exc.retval;
+ *(double*)retval = exc.retval;
break;
}
case j1f_gt_loss:
@@ -2800,7 +3489,7 @@ else
{
TLOSSF; NAMEF = (char *) "j1f";
RETVAL_ZEROF;
- ifSVID
+ ifSVID
{
NOT_MATHERRF
{
@@ -2820,7 +3509,7 @@ else
{
TLOSSL; NAMEL = (char *) "jnl";
RETVAL_ZEROL;
- ifSVID
+ ifSVID
{
NOT_MATHERRL
{
@@ -2832,7 +3521,7 @@ else
{
NOT_MATHERRL {ERRNO_RANGE;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case jn_gt_loss:
@@ -2840,7 +3529,7 @@ else
{
TLOSSD; NAMED = (char *) "jn";
RETVAL_ZEROD;
- ifSVID
+ ifSVID
{
NOT_MATHERRD
{
@@ -2852,7 +3541,7 @@ else
{
NOT_MATHERRD {ERRNO_RANGE;}
}
- *(double*)retval = exc.retval;
+ *(double*)retval = exc.retval;
break;
}
case jnf_gt_loss:
@@ -2860,7 +3549,7 @@ else
{
TLOSSF; NAMEF = (char *) "jnf";
RETVAL_ZEROF;
- ifSVID
+ ifSVID
{
NOT_MATHERRF
{
@@ -2880,7 +3569,7 @@ else
{
TLOSSL; NAMEL = (char *) "y0l";
RETVAL_ZEROL;
- ifSVID
+ ifSVID
{
NOT_MATHERRL
{
@@ -2900,7 +3589,7 @@ else
{
TLOSSD; NAMED = (char *) "y0";
RETVAL_ZEROD;
- ifSVID
+ ifSVID
{
NOT_MATHERRD
{
@@ -2920,7 +3609,7 @@ else
{
TLOSSF; NAMEF = (char *) "y0f";
RETVAL_ZEROF;
- ifSVID
+ ifSVID
{
NOT_MATHERRF
{
@@ -2939,10 +3628,10 @@ else
/* y0l(0) */
{
DOMAINL; NAMEL = (char *) "y0l";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEL;
- NOT_MATHERRL
+ NOT_MATHERRL
{
WRITEL_Y0_ZERO;
ERRNO_DOMAIN;
@@ -2950,20 +3639,20 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALL;
+ RETVAL_NEG_HUGE_VALL;
NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case y0_zero:
/* y0(0) */
{
DOMAIND; NAMED = (char *) "y0";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGED;
- NOT_MATHERRD
+ NOT_MATHERRD
{
WRITED_Y0_ZERO;
ERRNO_DOMAIN;
@@ -2971,20 +3660,20 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALD;
+ RETVAL_NEG_HUGE_VALD;
NOT_MATHERRD {ERRNO_DOMAIN;}
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case y0f_zero:
/* y0f(0) */
{
DOMAINF; NAMEF = (char *) "y0f";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEF;
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_Y0_ZERO;
ERRNO_DOMAIN;
@@ -2992,10 +3681,10 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALF;
+ RETVAL_NEG_HUGE_VALF;
NOT_MATHERRF {ERRNO_DOMAIN;}
}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case y1l_gt_loss:
@@ -3003,7 +3692,7 @@ else
{
TLOSSL; NAMEL = (char *) "y1l";
RETVAL_ZEROL;
- ifSVID
+ ifSVID
{
NOT_MATHERRL
{
@@ -3023,7 +3712,7 @@ else
{
TLOSSD; NAMED = (char *) "y1";
RETVAL_ZEROD;
- ifSVID
+ ifSVID
{
NOT_MATHERRD
{
@@ -3043,7 +3732,7 @@ else
{
TLOSSF; NAMEF = (char *) "y1f";
RETVAL_ZEROF;
- ifSVID
+ ifSVID
{
NOT_MATHERRF
{
@@ -3062,10 +3751,10 @@ else
/* y1l(0) */
{
DOMAINL; NAMEL = (char *) "y1l";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEL;
- NOT_MATHERRL
+ NOT_MATHERRL
{
WRITEL_Y1_ZERO;
ERRNO_DOMAIN;
@@ -3073,20 +3762,20 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALL;
+ RETVAL_NEG_HUGE_VALL;
NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case y1_zero:
/* y1(0) */
{
DOMAIND; NAMED = (char *) "y1";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGED;
- NOT_MATHERRD
+ NOT_MATHERRD
{
WRITED_Y1_ZERO;
ERRNO_DOMAIN;
@@ -3094,30 +3783,30 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALD;
+ RETVAL_NEG_HUGE_VALD;
NOT_MATHERRD {ERRNO_DOMAIN;}
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case y1f_zero:
/* y1f(0) */
{
DOMAINF; NAMEF = (char *) "y1f";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEF;
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_Y1_ZERO;
ERRNO_DOMAIN;
}
}else
{
- RETVAL_NEG_HUGE_VALF;
+ RETVAL_NEG_HUGE_VALF;
NOT_MATHERRF {ERRNO_DOMAIN;}
}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case ynl_gt_loss:
@@ -3125,7 +3814,7 @@ else
{
TLOSSL; NAMEL = (char *) "ynl";
RETVAL_ZEROL;
- ifSVID
+ ifSVID
{
NOT_MATHERRL
{
@@ -3145,7 +3834,7 @@ else
{
TLOSSD; NAMED = (char *) "yn";
RETVAL_ZEROD;
- ifSVID
+ ifSVID
{
NOT_MATHERRD
{
@@ -3165,7 +3854,7 @@ else
{
TLOSSF; NAMEF = (char *) "ynf";
RETVAL_ZEROF;
- ifSVID
+ ifSVID
{
NOT_MATHERRF
{
@@ -3184,10 +3873,10 @@ else
/* ynl(0) */
{
DOMAINL; NAMEL = (char *) "ynl";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEL;
- NOT_MATHERRL
+ NOT_MATHERRL
{
WRITEL_YN_ZERO;
ERRNO_DOMAIN;
@@ -3195,20 +3884,20 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALL;
+ RETVAL_NEG_HUGE_VALL;
NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case yn_zero:
/* yn(0) */
{
DOMAIND; NAMED = (char *) "yn";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGED;
- NOT_MATHERRD
+ NOT_MATHERRD
{
WRITED_YN_ZERO;
ERRNO_DOMAIN;
@@ -3216,20 +3905,20 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALD;
+ RETVAL_NEG_HUGE_VALD;
NOT_MATHERRD {ERRNO_DOMAIN;}
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case ynf_zero:
/* ynf(0) */
{
DOMAINF; NAMEF = (char *) "ynf";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEF;
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_YN_ZERO;
ERRNO_DOMAIN;
@@ -3237,20 +3926,20 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALF;
+ RETVAL_NEG_HUGE_VALF;
NOT_MATHERRF {ERRNO_DOMAIN;}
}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case y0l_negative:
/* y0l(x<0) */
{
DOMAINL; NAMEL = (char *) "y0l";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEL;
- NOT_MATHERRL
+ NOT_MATHERRL
{
WRITEL_Y0_NEGATIVE;
ERRNO_DOMAIN;
@@ -3258,20 +3947,20 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALL;
+ RETVAL_NEG_HUGE_VALL;
NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case y0_negative:
/* y0(x<0) */
{
DOMAIND; NAMED = (char *) "y0";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGED;
- NOT_MATHERRD
+ NOT_MATHERRD
{
WRITED_Y0_NEGATIVE;
ERRNO_DOMAIN;
@@ -3279,20 +3968,20 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALD;
+ RETVAL_NEG_HUGE_VALD;
NOT_MATHERRD {ERRNO_DOMAIN;}
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case y0f_negative:
/* y0f(x<0) */
{
DOMAINF; NAMEF = (char *) "y0f";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEF;
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_Y0_NEGATIVE;
ERRNO_DOMAIN;
@@ -3300,20 +3989,20 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALF;
+ RETVAL_NEG_HUGE_VALF;
NOT_MATHERRF {ERRNO_DOMAIN;}
}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case y1l_negative:
/* y1l(x<0) */
{
DOMAINL; NAMEL = (char *) "y1l";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEL;
- NOT_MATHERRL
+ NOT_MATHERRL
{
WRITEL_Y1_NEGATIVE;
ERRNO_DOMAIN;
@@ -3321,20 +4010,20 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALL;
+ RETVAL_NEG_HUGE_VALL;
NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case y1_negative:
/* y1(x<0) */
{
DOMAIND; NAMED = (char *) "y1";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGED;
- NOT_MATHERRD
+ NOT_MATHERRD
{
WRITED_Y1_NEGATIUE;
ERRNO_DOMAIN;
@@ -3342,20 +4031,20 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALD;
+ RETVAL_NEG_HUGE_VALD;
NOT_MATHERRD {ERRNO_DOMAIN;}
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case y1f_negative:
/* y1f(x<0) */
{
DOMAINF; NAMEF = (char *) "y1f";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEF;
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_Y1_NEGATIVE;
ERRNO_DOMAIN;
@@ -3363,20 +4052,20 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALF;
+ RETVAL_NEG_HUGE_VALF;
NOT_MATHERRF {ERRNO_DOMAIN;}
}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case ynl_negative:
/* ynl(x<0) */
{
DOMAINL; NAMEL = (char *) "ynl";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEL;
- NOT_MATHERRL
+ NOT_MATHERRL
{
WRITEL_YN_NEGATIVE;
ERRNO_DOMAIN;
@@ -3384,20 +4073,20 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALL;
+ RETVAL_NEG_HUGE_VALL;
NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case yn_negative:
/* yn(x<0) */
{
DOMAIND; NAMED = (char *) "yn";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGED;
- NOT_MATHERRD
+ NOT_MATHERRD
{
WRITED_YN_NEGATIVE;
ERRNO_DOMAIN;
@@ -3405,20 +4094,20 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALD;
+ RETVAL_NEG_HUGE_VALD;
NOT_MATHERRD {ERRNO_DOMAIN;}
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case ynf_negative:
/* ynf(x<0) */
{
DOMAINF; NAMEF = (char *) "ynf";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEF;
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_YN_NEGATIVE;
ERRNO_DOMAIN;
@@ -3426,18 +4115,18 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALF;
+ RETVAL_NEG_HUGE_VALF;
NOT_MATHERRF {ERRNO_DOMAIN;}
}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
- case fmodl_by_zero:
+ case fmodl_by_zero:
/* fmodl(x,0) */
{
DOMAINL; NAMEL = (char *) "fmodl";
- ifSVID
- {
+ ifSVID
+ {
*(long double *)retval = *(long double *)arg1;
NOT_MATHERRL
{
@@ -3445,21 +4134,21 @@ else
ERRNO_DOMAIN;
}
}
- else
+ else
{ /* NaN already computed */
NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
- case fmod_by_zero:
+ case fmod_by_zero:
/* fmod(x,0) */
{
DOMAIND; NAMED = (char *) "fmod";
- ifSVID
+ ifSVID
{
*(double *)retval = *(double *)arg1;
- NOT_MATHERRD
+ NOT_MATHERRD
{
WRITED_FMOD;
ERRNO_DOMAIN;
@@ -3468,18 +4157,18 @@ else
else
{ /* NaN already computed */
NOT_MATHERRD {ERRNO_DOMAIN;}
- }
- *(double *)retval = exc.retval;
+ }
+ *(double *)retval = exc.retval;
break;
}
- case fmodf_by_zero:
+ case fmodf_by_zero:
/* fmodf(x,0) */
{
DOMAINF; NAMEF = (char *) "fmodf";
- ifSVID
+ ifSVID
{
*(float *)retval = *(float *)arg1;
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_FMOD;
ERRNO_DOMAIN;
@@ -3488,36 +4177,36 @@ else
else
{
NOT_MATHERRF {ERRNO_DOMAIN;}
- }
- *(float *)retval = excf.retval;
+ }
+ *(float *)retval = excf.retval;
break;
}
- case remainderl_by_zero:
+ case remainderl_by_zero:
/* remainderl(x,0) */
{
DOMAINL; NAMEL = (char *) "remainderl";
- ifSVID
- {
+ ifSVID
+ {
NOT_MATHERRL
{
WRITEL_REM;
ERRNO_DOMAIN;
}
}
- else
+ else
{ /* NaN already computed */
NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
- case remainder_by_zero:
+ case remainder_by_zero:
/* remainder(x,0) */
{
DOMAIND; NAMED = (char *) "remainder";
- ifSVID
+ ifSVID
{
- NOT_MATHERRD
+ NOT_MATHERRD
{
WRITED_REM;
ERRNO_DOMAIN;
@@ -3526,17 +4215,17 @@ else
else
{ /* NaN already computed */
NOT_MATHERRD {ERRNO_DOMAIN;}
- }
- *(double *)retval = exc.retval;
+ }
+ *(double *)retval = exc.retval;
break;
}
- case remainderf_by_zero:
+ case remainderf_by_zero:
/* remainderf(x,0) */
{
DOMAINF; NAMEF = (char *) "remainderf";
- ifSVID
+ ifSVID
{
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_REM;
ERRNO_DOMAIN;
@@ -3545,12 +4234,14 @@ else
else
{
NOT_MATHERRF {ERRNO_DOMAIN;}
- }
- *(float *)retval = excf.retval;
+ }
+ *(float *)retval = excf.retval;
break;
}
default:
- abort();
+ /* We don't want to abort () since SVID doesn't cover all math
+ library functions. */
+ break;
}
return;
}
diff --git a/sysdeps/ia64/fpu/libm_reduce.S b/sysdeps/ia64/fpu/libm_reduce.S
index 1c7f4e1e88..8bdf91d6de 100644
--- a/sysdeps/ia64/fpu/libm_reduce.S
+++ b/sysdeps/ia64/fpu/libm_reduce.S
@@ -1,10 +1,10 @@
.file "libm_reduce.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,304 +20,310 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
-// History: 02/02/00 Initial Version
+// History:
+// 02/02/00 Initial Version
+// 05/13/02 Rescheduled for speed, changed interface to pass
+// parameters in fp registers
+// 02/10/03 Reordered header: .section, .global, .proc, .align;
+// used data8 for long double data storage
//
-// *********************************************************************
-// *********************************************************************
+//*********************************************************************
+//*********************************************************************
//
// Function: __libm_pi_by_two_reduce(x) return r, c, and N where
// x = N * pi/4 + (r+c) , where |r+c| <= pi/4.
// This function is not designed to be used by the
// general user.
//
-// *********************************************************************
+//*********************************************************************
//
// Accuracy: Returns double-precision values
//
-// *********************************************************************
+//*********************************************************************
//
// Resources Used:
//
-// Floating-Point Registers: f32-f70
+// Floating-Point Registers:
+// f8 = Input x, return value r
+// f9 = return value c
+// f32-f70
//
// General Purpose Registers:
// r8 = return value N
-// r32 = Address of x
-// r33 = Address of where to place r and then c
// r34-r64
//
// Predicate Registers: p6-p14
//
-// *********************************************************************
+//*********************************************************************
//
// IEEE Special Conditions:
//
-// No condions should be raised.
+// No condions should be raised.
//
-// *********************************************************************
+//*********************************************************************
//
// I. Introduction
// ===============
//
// For the forward trigonometric functions sin, cos, sincos, and
-// tan, the original algorithms for IA 64 handle arguments up to
+// tan, the original algorithms for IA 64 handle arguments up to
// 1 ulp less than 2^63 in magnitude. For double-extended arguments x,
-// |x| >= 2^63, this routine returns CASE, N and r_hi, r_lo where
-//
+// |x| >= 2^63, this routine returns N and r_hi, r_lo where
+//
// x is accurately approximated by
// 2*K*pi + N * pi/2 + r_hi + r_lo, |r_hi+r_lo| <= pi/4.
// CASE = 1 or 2.
// CASE is 1 unless |r_hi + r_lo| < 2^(-33).
-//
+//
// The exact value of K is not determined, but that information is
// not required in trigonometric function computations.
-//
-// We first assume the argument x in question satisfies x >= 2^(63).
+//
+// We first assume the argument x in question satisfies x >= 2^(63).
// In particular, it is positive. Negative x can be handled by symmetry:
-//
+//
// -x is accurately approximated by
// -2*K*pi + (-N) * pi/2 - (r_hi + r_lo), |r_hi+r_lo| <= pi/4.
-//
+//
// The idea of the reduction is that
-//
-// x * 2/pi = N_big + N + f, |f| <= 1/2
-//
+//
+// x * 2/pi = N_big + N + f, |f| <= 1/2
+//
// Moreover, for double extended x, |f| >= 2^(-75). (This is an
// non-obvious fact found by enumeration using a special algorithm
-// involving continued fraction.) The algorithm described below
+// involving continued fraction.) The algorithm described below
// calculates N and an accurate approximation of f.
-//
-// Roughly speaking, an appropriate 256-bit (4 X 64) portion of
+//
+// Roughly speaking, an appropriate 256-bit (4 X 64) portion of
// 2/pi is multiplied with x to give the desired information.
-//
+//
// II. Representation of 2/PI
// ==========================
-//
+//
// The value of 2/pi in binary fixed-point is
-//
+//
// .101000101111100110......
-//
+//
// We store 2/pi in a table, starting at the position corresponding
-// to bit position 63
-//
+// to bit position 63
+//
// bit position 63 62 ... 0 -1 -2 -3 -4 -5 -6 -7 .... -16576
-//
-// 0 0 ... 0 . 1 0 1 0 1 0 1 .... X
-//
+//
+// 0 0 ... 0 . 1 0 1 0 1 0 1 .... X
+//
// ^
-// |__ implied binary pt
-//
+// |__ implied binary pt
+//
// III. Algorithm
// ==============
-//
+//
// This describes the algorithm in the most natural way using
-// unsigned interger multiplication. The implementation section
+// unsigned interger multiplication. The implementation section
// describes how the integer arithmetic is simulated.
-//
+//
// STEP 0. Initialization
// ----------------------
-//
-// Let the input argument x be
-//
+//
+// Let the input argument x be
+//
// x = 2^m * ( 1. b_1 b_2 b_3 ... b_63 ), 63 <= m <= 16383.
-//
-// The first crucial step is to fetch four 64-bit portions of 2/pi.
+//
+// The first crucial step is to fetch four 64-bit portions of 2/pi.
// To fulfill this goal, we calculate the bit position L of the
// beginning of these 256-bit quantity by
-//
+//
// L := 62 - m.
-//
-// Note that -16321 <= L <= -1 because 63 <= m <= 16383; and that
+//
+// Note that -16321 <= L <= -1 because 63 <= m <= 16383; and that
// the storage of 2/pi is adequate.
-//
+//
// Fetch P_1, P_2, P_3, P_4 beginning at bit position L thus:
-//
+//
// bit position L L-1 L-2 ... L-63
-//
+//
// P_1 = b b b ... b
-//
+//
// each b can be 0 or 1. Also, let P_0 be the two bits correspoding to
// bit positions L+2 and L+1. So, when each of the P_j is interpreted
// with appropriate scaling, we have
//
// 2/pi = P_big + P_0 + (P_1 + P_2 + P_3 + P_4) + P_small
-//
+//
// Note that P_big and P_small can be ignored. The reasons are as follow.
// First, consider P_big. If P_big = 0, we can certainly ignore it.
-// Otherwise, P_big >= 2^(L+3). Now,
-//
+// Otherwise, P_big >= 2^(L+3). Now,
+//
// P_big * ulp(x) >= 2^(L+3) * 2^(m-63)
-// >= 2^(65-m + m-63 )
-// >= 2^2
-//
+// >= 2^(65-m + m-63 )
+// >= 2^2
+//
// Thus, P_big * x is an integer of the form 4*K. So
-//
-// x = 4*K * (pi/2) + x*(P_0 + P_1 + P_2 + P_3 + P_4)*(pi/2)
+//
+// x = 4*K * (pi/2) + x*(P_0 + P_1 + P_2 + P_3 + P_4)*(pi/2)
// + x*P_small*(pi/2).
-//
+//
// Hence, P_big*x corresponds to information that can be ignored for
// trigonometic function evaluation.
-//
+//
// Next, we must estimate the effect of ignoring P_small. The absolute
// error made by ignoring P_small is bounded by
-//
+//
// |P_small * x| <= ulp(P_4) * x
-// <= 2^(L-255) * 2^(m+1)
-// <= 2^(62-m-255 + m + 1)
-// <= 2^(-192)
-//
-// Since for double-extended precision, x * 2/pi = integer + f,
+// <= 2^(L-255) * 2^(m+1)
+// <= 2^(62-m-255 + m + 1)
+// <= 2^(-192)
+//
+// Since for double-extended precision, x * 2/pi = integer + f,
// 0.5 >= |f| >= 2^(-75), the relative error introduced by ignoring
// P_small is bounded by 2^(-192+75) <= 2^(-117), which is acceptable.
-//
+//
// Further note that if x is split into x_hi + x_lo where x_lo is the
// two bits corresponding to bit positions 2^(m-62) and 2^(m-63); then
-//
-// P_0 * x_hi
-//
+//
+// P_0 * x_hi
+//
// is also an integer of the form 4*K; and thus can also be ignored.
// Let M := P_0 * x_lo which is a small integer. The main part of the
// calculation is really the multiplication of x with the four pieces
// P_1, P_2, P_3, and P_4.
-//
+//
// Unless the reduced argument is extremely small in magnitude, it
// suffices to carry out the multiplication of x with P_1, P_2, and
-// P_3. x*P_4 will be carried out and added on as a correction only
+// P_3. x*P_4 will be carried out and added on as a correction only
// when it is found to be needed. Note also that x*P_4 need not be
// computed exactly. A straightforward multiplication suffices since
// the rounding error thus produced would be bounded by 2^(-3*64),
// that is 2^(-192) which is small enough as the reduced argument
// is bounded from below by 2^(-75).
-//
+//
// Now that we have four 64-bit data representing 2/pi and a
// 64-bit x. We first need to calculate a highly accurate product
// of x and P_1, P_2, P_3. This is best understood as integer
// multiplication.
-//
-//
+//
+//
// STEP 1. Multiplication
// ----------------------
-//
-//
+//
+//
// --------- --------- ---------
-// | P_1 | | P_2 | | P_3 |
-// --------- --------- ---------
-//
+// | P_1 | | P_2 | | P_3 |
+// --------- --------- ---------
+//
+// ---------
+// X | X |
// ---------
-// X | X |
-// ---------
// ----------------------------------------------------
//
// --------- ---------
-// | A_hi | | A_lo |
-// --------- ---------
+// | A_hi | | A_lo |
+// --------- ---------
//
//
// --------- ---------
-// | B_hi | | B_lo |
-// --------- ---------
+// | B_hi | | B_lo |
+// --------- ---------
//
//
-// --------- ---------
-// | C_hi | | C_lo |
-// --------- ---------
+// --------- ---------
+// | C_hi | | C_lo |
+// --------- ---------
//
// ====================================================
// --------- --------- --------- ---------
-// | S_0 | | S_1 | | S_2 | | S_3 |
-// --------- --------- --------- ---------
+// | S_0 | | S_1 | | S_2 | | S_3 |
+// --------- --------- --------- ---------
//
//
//
// STEP 2. Get N and f
// -------------------
-//
+//
// Conceptually, after the individual pieces S_0, S_1, ..., are obtained,
// we have to sum them and obtain an integer part, N, and a fraction, f.
// Here, |f| <= 1/2, and N is an integer. Note also that N need only to
// be known to module 2^k, k >= 2. In the case when |f| is small enough,
// we would need to add in the value x*P_4.
-//
-//
+//
+//
// STEP 3. Get reduced argument
// ----------------------------
-//
+//
// The value f is not yet the reduced argument that we seek. The
// equation
-//
-// x * 2/pi = 4K + N + f
-//
+//
+// x * 2/pi = 4K + N + f
+//
// says that
-//
+//
// x = 2*K*pi + N * pi/2 + f * (pi/2).
-//
+//
// Thus, the reduced argument is given by
-//
-// reduced argument = f * pi/2.
-//
+//
+// reduced argument = f * pi/2.
+//
// This multiplication must be performed to extra precision.
-//
+//
// IV. Implementation
// ==================
-//
+//
// Step 0. Initialization
// ----------------------
-//
+//
// Set sgn_x := sign(x); x := |x|; x_lo := 2 lsb of x.
-//
+//
// In memory, 2/pi is stored contigously as
-//
+//
// 0x00000000 0x00000000 0xA2F....
// ^
// |__ implied binary bit
-//
+//
// Given x = 2^m * 1.xxxx...xxx; we calculate L := 62 - m. Thus
// -1 <= L <= -16321. We fetch from memory 5 integer pieces of data.
-//
+//
// P_0 is the two bits corresponding to bit positions L+2 and L+1
// P_1 is the 64-bit starting at bit position L
// P_2 is the 64-bit starting at bit position L-64
// P_3 is the 64-bit starting at bit position L-128
// P_4 is the 64-bit starting at bit position L-192
-//
+//
// For example, if m = 63, P_0 would be 0 and P_1 would look like
// 0xA2F...
-//
+//
// If m = 65, P_0 would be the two msb of 0xA, thus, P_0 is 10 in binary.
-// P_1 in binary would be 1 0 0 0 1 0 1 1 1 1 ....
-//
+// P_1 in binary would be 1 0 0 0 1 0 1 1 1 1 ....
+//
// Step 1. Multiplication
// ----------------------
-//
+//
// At this point, P_1, P_2, P_3, P_4 are integers. They are
// supposed to be interpreted as
-//
+//
// 2^(L-63) * P_1;
// 2^(L-63-64) * P_2;
// 2^(L-63-128) * P_3;
// 2^(L-63-192) * P_4;
-//
+//
// Since each of them need to be multiplied to x, we would scale
// both x and the P_j's by some convenient factors: scale each
// of P_j's up by 2^(63-L), and scale x down by 2^(L-63).
-//
+//
// p_1 := fcvt.xf ( P_1 )
// p_2 := fcvt.xf ( P_2 ) * 2^(-64)
// p_3 := fcvt.xf ( P_3 ) * 2^(-128)
@@ -325,30 +331,30 @@
// x := replace exponent of x by -1
// because 2^m * 1.xxxx...xxx * 2^(L-63)
// is 2^(-1) * 1.xxxx...xxx
-//
+//
// We are now faced with the task of computing the following
-//
+//
// --------- --------- ---------
-// | P_1 | | P_2 | | P_3 |
-// --------- --------- ---------
-//
+// | P_1 | | P_2 | | P_3 |
+// --------- --------- ---------
+//
// ---------
-// X | X |
-// ---------
+// X | X |
+// ---------
// ----------------------------------------------------
-//
+//
// --------- ---------
-// | A_hi | | A_lo |
-// --------- ---------
-//
+// | A_hi | | A_lo |
+// --------- ---------
+//
// --------- ---------
-// | B_hi | | B_lo |
-// --------- ---------
-//
-// --------- ---------
-// | C_hi | | C_lo |
-// --------- ---------
-//
+// | B_hi | | B_lo |
+// --------- ---------
+//
+// --------- ---------
+// | C_hi | | C_lo |
+// --------- ---------
+//
// ====================================================
// ----------- --------- --------- ---------
// | S_0 | | S_1 | | S_2 | | S_3 |
@@ -357,108 +363,108 @@
// | |___ binary point
// |
// |___ possibly one more bit
-//
+//
// Let FPSR3 be set to round towards zero with widest precision
-// and exponent range. Unless an explicit FPSR is given,
+// and exponent range. Unless an explicit FPSR is given,
// round-to-nearest with widest precision and exponent range is
// used.
-//
+//
// Define sigma_C := 2^63; sigma_B := 2^(-1); sigma_C := 2^(-65).
-//
+//
// Tmp_C := fmpy.fpsr3( x, p_1 );
// If Tmp_C >= sigma_C then
// C_hi := Tmp_C;
// C_lo := x*p_1 - C_hi ...fma, exact
// Else
// C_hi := fadd.fpsr3(sigma_C, Tmp_C) - sigma_C
-// ...subtraction is exact, regardless
-// ...of rounding direction
+// ...subtraction is exact, regardless
+// ...of rounding direction
// C_lo := x*p_1 - C_hi ...fma, exact
// End If
-//
+//
// Tmp_B := fmpy.fpsr3( x, p_2 );
// If Tmp_B >= sigma_B then
// B_hi := Tmp_B;
// B_lo := x*p_2 - B_hi ...fma, exact
// Else
// B_hi := fadd.fpsr3(sigma_B, Tmp_B) - sigma_B
-// ...subtraction is exact, regardless
-// ...of rounding direction
+// ...subtraction is exact, regardless
+// ...of rounding direction
// B_lo := x*p_2 - B_hi ...fma, exact
// End If
-//
+//
// Tmp_A := fmpy.fpsr3( x, p_3 );
// If Tmp_A >= sigma_A then
// A_hi := Tmp_A;
// A_lo := x*p_3 - A_hi ...fma, exact
// Else
// A_hi := fadd.fpsr3(sigma_A, Tmp_A) - sigma_A
-// ...subtraction is exact, regardless
-// ...of rounding direction
+// ...subtraction is exact, regardless
+// ...of rounding direction
// A_lo := x*p_3 - A_hi ...fma, exact
// End If
-//
+//
// ...Note that C_hi is of integer value. We need only the
-// ...last few bits. Thus we can ensure C_hi is never a big
+// ...last few bits. Thus we can ensure C_hi is never a big
// ...integer, freeing us from overflow worry.
-//
+//
// Tmp_C := fadd.fpsr3( C_hi, 2^(70) ) - 2^(70);
// ...Tmp_C is the upper portion of C_hi
// C_hi := C_hi - Tmp_C
// ...0 <= C_hi < 2^7
-//
+//
// Step 2. Get N and f
// -------------------
-//
-// At this point, we have all the components to obtain
+//
+// At this point, we have all the components to obtain
// S_0, S_1, S_2, S_3 and thus N and f. We start by adding
// C_lo and B_hi. This sum together with C_hi gives a good
-// estimation of N and f.
-//
+// estimation of N and f.
+//
// A := fadd.fpsr3( B_hi, C_lo )
// B := max( B_hi, C_lo )
// b := min( B_hi, C_lo )
-//
-// a := (B - A) + b ...exact. Note that a is either 0
-// ...or 2^(-64).
-//
+//
+// a := (B - A) + b ...exact. Note that a is either 0
+// ...or 2^(-64).
+//
// N := round_to_nearest_integer_value( A );
-// f := A - N; ...exact because lsb(A) >= 2^(-64)
-// ...and |f| <= 1/2.
-//
-// f := f + a ...exact because a is 0 or 2^(-64);
-// ...the msb of the sum is <= 1/2
-// ...lsb >= 2^(-64).
-//
+// f := A - N; ...exact because lsb(A) >= 2^(-64)
+// ...and |f| <= 1/2.
+//
+// f := f + a ...exact because a is 0 or 2^(-64);
+// ...the msb of the sum is <= 1/2
+// ...lsb >= 2^(-64).
+//
// N := convert to integer format( C_hi + N );
// M := P_0 * x_lo;
// N := N + M;
-//
+//
// If sgn_x == 1 (that is original x was negative)
// N := 2^10 - N
// ...this maintains N to be non-negative, but still
// ...equivalent to the (negated N) mod 4.
// End If
-//
+//
// If |f| >= 2^(-33)
-//
+//
// ...Case 1
// CASE := 1
// g := A_hi + B_lo;
// s_hi := f + g;
// s_lo := (f - s_hi) + g;
-//
+//
// Else
-//
+//
// ...Case 2
// CASE := 2
// A := fadd.fpsr3( A_hi, B_lo )
// B := max( A_hi, B_lo )
// b := min( A_hi, B_lo )
-//
-// a := (B - A) + b ...exact. Note that a is either 0
-// ...or 2^(-128).
-//
+//
+// a := (B - A) + b ...exact. Note that a is either 0
+// ...or 2^(-128).
+//
// f_hi := A + f;
// f_lo := (f - f_hi) + A;
// ...this is exact.
@@ -468,9 +474,9 @@
// ...If f = 2^(-64), f-f_hi involves cancellation and is
// ...exact. If f = -2^(-64), then A + f is exact. Hence
// ...f-f_hi is -A exactly, giving f_lo = 0.
-//
+//
// f_lo := f_lo + a;
-//
+//
// If |f| >= 2^(-50) then
// s_hi := f_hi;
// s_lo := f_lo;
@@ -479,117 +485,111 @@
// s_hi := f_hi + f_lo
// s_lo := (f_hi - s_hi) + f_lo
// End If
-//
+//
// End If
-//
+//
// Step 3. Get reduced argument
// ----------------------------
-//
+//
// If sgn_x == 0 (that is original x is positive)
-//
+//
// D_hi := Pi_by_2_hi
// D_lo := Pi_by_2_lo
// ...load from table
-//
+//
// Else
-//
+//
// D_hi := neg_Pi_by_2_hi
// D_lo := neg_Pi_by_2_lo
// ...load from table
// End If
-//
+//
// r_hi := s_hi*D_hi
-// r_lo := s_hi*D_hi - r_hi ...fma
+// r_lo := s_hi*D_hi - r_hi ...fma
// r_lo := (s_hi*D_lo + r_lo) + s_lo*D_hi
-//
-// Return CASE, N, r_hi, r_lo
-//
-
-#include "libm_support.h"
-
-FR_X = f32
-FR_N = f33
-FR_p_1 = f34
-FR_TWOM33 = f35
-FR_TWOM50 = f36
-FR_g = f37
-FR_p_2 = f38
-FR_f = f39
-FR_s_lo = f40
-FR_p_3 = f41
-FR_f_abs = f42
-FR_D_lo = f43
-FR_p_4 = f44
-FR_D_hi = f45
-FR_Tmp2_C = f46
-FR_s_hi = f47
-FR_sigma_A = f48
-FR_A = f49
-FR_sigma_B = f50
-FR_B = f51
-FR_sigma_C = f52
-FR_b = f53
-FR_ScaleP2 = f54
-FR_ScaleP3 = f55
-FR_ScaleP4 = f56
-FR_Tmp_A = f57
-FR_Tmp_B = f58
-FR_Tmp_C = f59
-FR_A_hi = f60
-FR_f_hi = f61
-FR_r_hi = f62
-FR_A_lo = f63
-FR_B_hi = f64
-FR_a = f65
-FR_B_lo = f66
+//
+// Return N, r_hi, r_lo
+//
+FR_input_X = f8
+FR_r_hi = f8
+FR_r_lo = f9
+
+FR_X = f32
+FR_N = f33
+FR_p_1 = f34
+FR_TWOM33 = f35
+FR_TWOM50 = f36
+FR_g = f37
+FR_p_2 = f38
+FR_f = f39
+FR_s_lo = f40
+FR_p_3 = f41
+FR_f_abs = f42
+FR_D_lo = f43
+FR_p_4 = f44
+FR_D_hi = f45
+FR_Tmp2_C = f46
+FR_s_hi = f47
+FR_sigma_A = f48
+FR_A = f49
+FR_sigma_B = f50
+FR_B = f51
+FR_sigma_C = f52
+FR_b = f53
+FR_ScaleP2 = f54
+FR_ScaleP3 = f55
+FR_ScaleP4 = f56
+FR_Tmp_A = f57
+FR_Tmp_B = f58
+FR_Tmp_C = f59
+FR_A_hi = f60
+FR_f_hi = f61
+FR_RSHF = f62
+FR_A_lo = f63
+FR_B_hi = f64
+FR_a = f65
+FR_B_lo = f66
FR_f_lo = f67
-FR_r_lo = f68
-FR_C_hi = f69
-FR_C_lo = f70
+FR_N_fix = f68
+FR_C_hi = f69
+FR_C_lo = f70
GR_N = r8
-GR_Address_of_Input = r32
-GR_Address_of_Outputs = r33
-GR_Exp_x = r36
-GR_Temp = r37
-GR_BIASL63 = r38
+GR_Exp_x = r36
+GR_Temp = r37
+GR_BIASL63 = r38
GR_CASE = r39
-GR_x_lo = r40
-GR_sgn_x = r41
+GR_x_lo = r40
+GR_sgn_x = r41
GR_M = r42
GR_BASE = r43
GR_LENGTH1 = r44
GR_LENGTH2 = r45
GR_ASUB = r46
GR_P_0 = r47
-GR_P_1 = r48
-GR_P_2 = r49
-GR_P_3 = r50
-GR_P_4 = r51
+GR_P_1 = r48
+GR_P_2 = r49
+GR_P_3 = r50
+GR_P_4 = r51
GR_START = r52
GR_SEGMENT = r53
GR_A = r54
-GR_B = r55
+GR_B = r55
GR_C = r56
GR_D = r57
GR_E = r58
-GR_TEMP1 = r59
-GR_TEMP2 = r60
-GR_TEMP3 = r61
-GR_TEMP4 = r62
+GR_TEMP1 = r59
+GR_TEMP2 = r60
+GR_TEMP3 = r61
+GR_TEMP4 = r62
GR_TEMP5 = r63
GR_TEMP6 = r64
+GR_rshf = r64
+RODATA
.align 64
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
-
-Constants_Bits_of_2_by_pi:
-ASM_TYPE_DIRECTIVE(Constants_Bits_of_2_by_pi,@object)
+LOCAL_OBJECT_START(Constants_Bits_of_2_by_pi)
data8 0x0000000000000000,0xA2F9836E4E441529
data8 0xFC2757D1F534DDC0,0xDB6295993C439041
data8 0xFE5163ABDEBBC561,0xB7246E3A424DD2E0
@@ -721,34 +721,33 @@ data8 0xB5D6DF8261DD9602,0x36169F3AC4A1A283
data8 0x6DED727A8D39A9B8,0x825C326B5B2746ED
data8 0x34007700D255F4FC,0x4D59018071E0E13F
data8 0x89B295F364A8F1AE,0xA74B38FC4CEAB2BB
-ASM_SIZE_DIRECTIVE(Constants_Bits_of_2_by_pi)
+LOCAL_OBJECT_END(Constants_Bits_of_2_by_pi)
-Constants_Bits_of_pi_by_2:
-ASM_TYPE_DIRECTIVE(Constants_Bits_of_pi_by_2,@object)
-data4 0x2168C234,0xC90FDAA2,0x00003FFF,0x00000000
-data4 0x80DC1CD1,0xC4C6628B,0x00003FBF,0x00000000
-ASM_SIZE_DIRECTIVE(Constants_Bits_of_pi_by_2)
+LOCAL_OBJECT_START(Constants_Bits_of_pi_by_2)
+data8 0xC90FDAA22168C234,0x00003FFF
+data8 0xC4C6628B80DC1CD1,0x00003FBF
+LOCAL_OBJECT_END(Constants_Bits_of_pi_by_2)
.section .text
-.proc __libm_pi_by_2_reduce#
.global __libm_pi_by_2_reduce#
-.align 64
+.proc __libm_pi_by_2_reduce#
+.align 32
-__libm_pi_by_2_reduce:
+__libm_pi_by_2_reduce:
-// X is at the address in Address_of_Input
-// Place the two-piece result at the address in Address_of_Outputs
-// r followed by c
-// N is returned
+// X is in f8
+// Place the two-piece result r (r_hi) in f8 and c (r_lo) in f9
+// N is returned in r8
-{ .mmf
-alloc r34 = ar.pfs,2,34,0,0
-(p0) ldfe FR_X = [GR_Address_of_Input]
-(p0) fsetc.s3 0x00,0x7F ;;
+{ .mfi
+ alloc r34 = ar.pfs,2,34,0,0
+ fsetc.s3 0x00,0x7F // Set sf3 to round to zero, 82-bit prec, td, ftz
+ nop.i 999
}
-{ .mlx
- nop.m 999
-(p0) movl GR_BIASL63 = 0x1003E
+{ .mfi
+ addl GR_BASE = @ltoff(Constants_Bits_of_2_by_pi#), gp
+ nop.f 999
+ mov GR_BIASL63 = 0x1003E
}
;;
@@ -765,73 +764,61 @@ alloc r34 = ar.pfs,2,34,0,0
// Address_BASE = shladd(SEGMENT,3) + BASE
-
{ .mmi
- nop.m 999
-(p0) addl GR_BASE = @ltoff(Constants_Bits_of_2_by_pi#), gp
- nop.i 999
+ getf.exp GR_Exp_x = FR_input_X
+ ld8 GR_BASE = [GR_BASE]
+ mov GR_TEMP5 = 0x0FFFE
}
;;
+// Define sigma_C := 2^63; sigma_B := 2^(-1); sigma_A := 2^(-65).
{ .mmi
- ld8 GR_BASE = [GR_BASE]
- nop.m 999
+ getf.sig GR_x_lo = FR_input_X
+ mov GR_TEMP6 = 0x0FFBE
nop.i 999
}
;;
-
-{ .mlx
- nop.m 999
-(p0) movl GR_TEMP5 = 0x000000000000FFFE
-}
-{ .mmi
- nop.m 999 ;;
-(p0) setf.exp FR_sigma_B = GR_TEMP5
- nop.i 999
-}
-{ .mlx
- nop.m 999
-(p0) movl GR_TEMP6 = 0x000000000000FFBE ;;
-}
-// Define sigma_C := 2^63; sigma_B := 2^(-1); sigma_A := 2^(-65).
-{ .mfi
-(p0) setf.exp FR_sigma_A = GR_TEMP6
- nop.f 999
- nop.i 999 ;;
-}
-// Special Code for testing DE arguments
-// (p0) movl GR_BIASL63 = 0x0000000000013FFE
-// (p0) movl GR_x_lo = 0xFFFFFFFFFFFFFFFF
-// (p0) setf.exp FR_X = GR_BIASL63
-// (p0) setf.sig FR_ScaleP3 = GR_x_lo
-// (p0) fmerge.se FR_X = FR_X,FR_ScaleP3
+// Special Code for testing DE arguments
+// movl GR_BIASL63 = 0x0000000000013FFE
+// movl GR_x_lo = 0xFFFFFFFFFFFFFFFF
+// setf.exp FR_X = GR_BIASL63
+// setf.sig FR_ScaleP3 = GR_x_lo
+// fmerge.se FR_X = FR_X,FR_ScaleP3
// Set sgn_x := sign(x); x := |x|; x_lo := 2 lsb of x.
// 2/pi is stored contigously as
// 0x00000000 0x00000000.0xA2F....
// M = EXP - BIAS ( M >= 63)
// Given x = 2^m * 1.xxxx...xxx; we calculate L := 62 - m.
// Thus -1 <= L <= -16321.
-{ .mmf
-(p0) getf.exp GR_Exp_x = FR_X
-(p0) getf.sig GR_x_lo = FR_X
-(p0) fabs FR_X = FR_X ;;
+{ .mmi
+ setf.exp FR_sigma_B = GR_TEMP5
+ setf.exp FR_sigma_A = GR_TEMP6
+ extr.u GR_M = GR_Exp_x,0,17
}
+;;
+
{ .mii
-(p0) and GR_x_lo = 0x03,GR_x_lo
-(p0) extr.u GR_M = GR_Exp_x,0,17 ;;
-(p0) sub GR_START = GR_M,GR_BIASL63
+ and GR_x_lo = 0x03,GR_x_lo
+ sub GR_START = GR_M,GR_BIASL63
+ add GR_BASE = 8,GR_BASE // To effectively add 1 to SEGMENT
}
-{ .mmi
- nop.m 999 ;;
-(p0) and GR_LENGTH1 = 0x3F,GR_START
-(p0) shr.u GR_SEGMENT = GR_START,6
+;;
+
+{ .mii
+ and GR_LENGTH1 = 0x3F,GR_START
+ shr.u GR_SEGMENT = GR_START,6
+ nop.i 999
}
+;;
+
{ .mmi
- nop.m 999 ;;
-(p0) add GR_SEGMENT = 0x1,GR_SEGMENT
-(p0) sub GR_LENGTH2 = 0x40,GR_LENGTH1
+ shladd GR_BASE = GR_SEGMENT,3,GR_BASE
+ sub GR_LENGTH2 = 0x40,GR_LENGTH1
+ cmp.le p6,p7 = 0x2,GR_LENGTH1
}
+;;
+
// P_0 is the two bits corresponding to bit positions L+2 and L+1
// P_1 is the 64-bit starting at bit position L
// P_2 is the 64-bit starting at bit position L-64
@@ -849,13 +836,13 @@ alloc r34 = ar.pfs,2,34,0,0
// P_4 is made up of Clo and Dhi
// P_4 = deposit Dlo, position 0, length2 into P_4, position length1
// deposit Ehi, position length2, length1 into P_4, position 0
-{ .mmi
-(p0) cmp.le.unc p6,p7 = 0x2,GR_LENGTH1 ;;
-(p0) shladd GR_BASE = GR_SEGMENT,3,GR_BASE
-(p7) cmp.eq.unc p8,p9 = 0x1,GR_LENGTH1 ;;
+{ .mfi
+ ld8 GR_A = [GR_BASE],8
+ fabs FR_X = FR_input_X
+(p7) cmp.eq.unc p8,p9 = 0x1,GR_LENGTH1
}
-{ .mmi
- nop.m 999
+;;
+
// ld_64 A at Base and increment Base by 8
// ld_64 B at Base and increment Base by 8
// ld_64 C at Base and increment Base by 8
@@ -866,31 +853,35 @@ alloc r34 = ar.pfs,2,34,0,0
// A, B, C, D, and E look like | length1 | length2 |
// ---------------------
// hi lo
-(p0) ld8 GR_A = [GR_BASE],8
-(p0) extr.u GR_sgn_x = GR_Exp_x,17,1 ;;
-}
-{ .mmf
- nop.m 999
-(p0) ld8 GR_B = [GR_BASE],8
-(p0) fmerge.se FR_X = FR_sigma_B,FR_X ;;
+{ .mlx
+ ld8 GR_B = [GR_BASE],8
+ movl GR_rshf = 0x43e8000000000000 // 1.10000 2^63 for right shift N_fix
}
-{ .mii
-(p0) ld8 GR_C = [GR_BASE],8
-(p8) extr.u GR_Temp = GR_A,63,1 ;;
-(p0) shl GR_TEMP1 = GR_A,GR_LENGTH1
+;;
+
+{ .mmi
+ ld8 GR_C = [GR_BASE],8
+ nop.m 999
+(p8) extr.u GR_Temp = GR_A,63,1
}
-{ .mii
-(p0) ld8 GR_D = [GR_BASE],8
+;;
+
// If length1 >= 2,
// P_0 = deposit Ahi, position length2, 2 bit into P_0 at position 0.
-(p6) shr.u GR_P_0 = GR_A,GR_LENGTH2 ;;
-(p0) shl GR_TEMP2 = GR_B,GR_LENGTH1
+{ .mii
+ ld8 GR_D = [GR_BASE],8
+ shl GR_TEMP1 = GR_A,GR_LENGTH1 // MM instruction
+(p6) shr.u GR_P_0 = GR_A,GR_LENGTH2 // MM instruction
}
+;;
+
{ .mii
-(p0) ld8 GR_E = [GR_BASE],-40
-(p0) shr.u GR_P_1 = GR_B,GR_LENGTH2 ;;
-(p0) shr.u GR_P_2 = GR_C,GR_LENGTH2
+ ld8 GR_E = [GR_BASE],-40
+ shl GR_TEMP2 = GR_B,GR_LENGTH1 // MM instruction
+ shr.u GR_P_1 = GR_B,GR_LENGTH2 // MM instruction
}
+;;
+
// Else
// Load 16 bit of ASUB from (Base_Address_of_A - 2)
// P_0 = ASUB & 0x3
@@ -900,43 +891,56 @@ alloc r34 = ar.pfs,2,34,0,0
// Deposit element 63 from Ahi and place in element 0 of P_0.
// Endif
// Endif
+
{ .mii
(p7) ld2 GR_ASUB = [GR_BASE],8
-(p0) shl GR_TEMP3 = GR_C,GR_LENGTH1 ;;
-(p0) shl GR_TEMP4 = GR_D,GR_LENGTH1
+ shl GR_TEMP3 = GR_C,GR_LENGTH1 // MM instruction
+ shr.u GR_P_2 = GR_C,GR_LENGTH2 // MM instruction
}
+;;
+
{ .mii
- nop.m 999
-(p0) shr.u GR_P_3 = GR_D,GR_LENGTH2 ;;
-(p0) shr.u GR_P_4 = GR_E,GR_LENGTH2
+ setf.d FR_RSHF = GR_rshf // Form right shift const 1.100 * 2^63
+ shl GR_TEMP4 = GR_D,GR_LENGTH1 // MM instruction
+ shr.u GR_P_3 = GR_D,GR_LENGTH2 // MM instruction
}
-{ .mii
+;;
+
+{ .mmi
(p7) and GR_P_0 = 0x03,GR_ASUB
-(p6) and GR_P_0 = 0x03,GR_P_0 ;;
-(p0) or GR_P_1 = GR_P_1,GR_TEMP1
+(p6) and GR_P_0 = 0x03,GR_P_0
+ shr.u GR_P_4 = GR_E,GR_LENGTH2 // MM instruction
}
+;;
+
{ .mmi
-(p8) and GR_P_0 = 0x1,GR_P_0 ;;
-(p0) or GR_P_2 = GR_P_2,GR_TEMP2
-(p8) shl GR_P_0 = GR_P_0,0x1 ;;
-}
-{ .mii
- nop.m 999
-(p0) or GR_P_3 = GR_P_3,GR_TEMP3
-(p8) or GR_P_0 = GR_P_0,GR_Temp
+ nop.m 999
+ or GR_P_1 = GR_P_1,GR_TEMP1
+(p8) and GR_P_0 = 0x1,GR_P_0
}
+;;
+
{ .mmi
-(p0) setf.sig FR_p_1 = GR_P_1 ;;
-(p0) setf.sig FR_p_2 = GR_P_2
-(p0) or GR_P_4 = GR_P_4,GR_TEMP4 ;;
+ setf.sig FR_p_1 = GR_P_1
+ or GR_P_2 = GR_P_2,GR_TEMP2
+(p8) shladd GR_P_0 = GR_P_0,1,GR_Temp
}
+;;
+
+{ .mmf
+ setf.sig FR_p_2 = GR_P_2
+ or GR_P_3 = GR_P_3,GR_TEMP3
+ fmerge.se FR_X = FR_sigma_B,FR_X
+}
+;;
+
{ .mmi
- nop.m 999 ;;
-(p0) setf.sig FR_p_3 = GR_P_3
-(p0) pmpy2.r GR_M = GR_P_0,GR_x_lo
+ setf.sig FR_p_3 = GR_P_3
+ or GR_P_4 = GR_P_4,GR_TEMP4
+ pmpy2.r GR_M = GR_P_0,GR_x_lo
}
-{ .mlx
-(p0) setf.sig FR_p_4 = GR_P_4
+;;
+
// P_1, P_2, P_3, P_4 are integers. They should be
// 2^(L-63) * P_1;
// 2^(L-63-64) * P_2;
@@ -954,18 +958,18 @@ alloc r34 = ar.pfs,2,34,0,0
// | P_1 | | P_2 | | P_3 |
// --------- --------- ---------
// ---------
-// X | X |
-// ---------
+// X | X |
+// ---------
// ----------------------------------------------------
// --------- ---------
-// | A_hi | | A_lo |
-// --------- ---------
+// | A_hi | | A_lo |
+// --------- ---------
// --------- ---------
-// | B_hi | | B_lo |
-// --------- ---------
+// | B_hi | | B_lo |
+// --------- ---------
+// --------- ---------
+// | C_hi | | C_lo |
// --------- ---------
-// | C_hi | | C_lo |
-// --------- ---------
// ====================================================
// ----------- --------- --------- ---------
// | S_0 | | S_1 | | S_2 | | S_3 |
@@ -977,52 +981,55 @@ alloc r34 = ar.pfs,2,34,0,0
// and exponent range. Unless an explicit FPSR is given,
// round-to-nearest with widest precision and exponent range is
// used.
-(p0) movl GR_TEMP1 = 0x000000000000FFBF
-}
{ .mmi
- nop.m 999 ;;
-(p0) setf.exp FR_ScaleP2 = GR_TEMP1
- nop.i 999
-}
-{ .mlx
- nop.m 999
-(p0) movl GR_TEMP4 = 0x000000000001003E
+ setf.sig FR_p_4 = GR_P_4
+ mov GR_TEMP1 = 0x0FFBF
+ nop.i 999
}
+;;
+
{ .mmi
- nop.m 999 ;;
-(p0) setf.exp FR_sigma_C = GR_TEMP4
- nop.i 999
+ setf.exp FR_ScaleP2 = GR_TEMP1
+ mov GR_TEMP2 = 0x0FF7F
+ nop.i 999
}
-{ .mlx
- nop.m 999
-(p0) movl GR_TEMP2 = 0x000000000000FF7F ;;
+;;
+
+{ .mmi
+ setf.exp FR_ScaleP3 = GR_TEMP2
+ mov GR_TEMP4 = 0x1003E
+ nop.i 999
}
+;;
+
{ .mmf
- nop.m 999
-(p0) setf.exp FR_ScaleP3 = GR_TEMP2
-(p0) fcvt.xuf.s1 FR_p_1 = FR_p_1 ;;
+ setf.exp FR_sigma_C = GR_TEMP4
+ mov GR_Temp = 0x0FFDE
+ fcvt.xuf.s1 FR_p_1 = FR_p_1
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fcvt.xuf.s1 FR_p_2 = FR_p_2
- nop.i 999
-}
-{ .mlx
- nop.m 999
-(p0) movl GR_Temp = 0x000000000000FFDE ;;
-}
-{ .mmf
- nop.m 999
-(p0) setf.exp FR_TWOM33 = GR_Temp
-(p0) fcvt.xuf.s1 FR_p_3 = FR_p_3 ;;
+ setf.exp FR_TWOM33 = GR_Temp
+ fcvt.xuf.s1 FR_p_2 = FR_p_2
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fcvt.xuf.s1 FR_p_4 = FR_p_4
- nop.i 999 ;;
+ nop.m 999
+ fcvt.xuf.s1 FR_p_3 = FR_p_3
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
+ nop.m 999
+ fcvt.xuf.s1 FR_p_4 = FR_p_4
+ nop.i 999
+}
+;;
+
// Tmp_C := fmpy.fpsr3( x, p_1 );
// Tmp_B := fmpy.fpsr3( x, p_2 );
// Tmp_A := fmpy.fpsr3( x, p_3 );
@@ -1048,55 +1055,62 @@ alloc r34 = ar.pfs,2,34,0,0
// Exact, regardless ...of rounding direction
// A_lo := x*p_3 - A_hi ...fma, exact
// Endif
-(p0) fmpy.s3 FR_Tmp_C = FR_X,FR_p_1
- nop.i 999 ;;
-}
{ .mfi
- nop.m 999
-(p0) fmpy.s1 FR_p_2 = FR_p_2,FR_ScaleP2
- nop.i 999
-}
-{ .mlx
- nop.m 999
-(p0) movl GR_Temp = 0x0000000000000400
+ nop.m 999
+ fmpy.s3 FR_Tmp_C = FR_X,FR_p_1
+ nop.i 999
}
-{ .mlx
- nop.m 999
-(p0) movl GR_TEMP3 = 0x000000000000FF3F ;;
+;;
+
+{ .mfi
+ mov GR_TEMP3 = 0x0FF3F
+ fmpy.s1 FR_p_2 = FR_p_2,FR_ScaleP2
+ nop.i 999
}
+;;
+
{ .mmf
- nop.m 999
-(p0) setf.exp FR_ScaleP4 = GR_TEMP3
-(p0) fmpy.s1 FR_p_3 = FR_p_3,FR_ScaleP3 ;;
+ setf.exp FR_ScaleP4 = GR_TEMP3
+ mov GR_TEMP4 = 0x10045
+ fmpy.s1 FR_p_3 = FR_p_3,FR_ScaleP3
}
-{ .mlx
- nop.m 999
-(p0) movl GR_TEMP4 = 0x0000000000010045 ;;
+;;
+
+{ .mfi
+ nop.m 999
+ fadd.s3 FR_C_hi = FR_sigma_C,FR_Tmp_C // For Tmp_C < sigma_C case
+ nop.i 999
}
+;;
+
{ .mmf
- nop.m 999
-(p0) setf.exp FR_Tmp2_C = GR_TEMP4
-(p0) fmpy.s3 FR_Tmp_B = FR_X,FR_p_2 ;;
+ setf.exp FR_Tmp2_C = GR_TEMP4
+ nop.m 999
+ fmpy.s3 FR_Tmp_B = FR_X,FR_p_2
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fcmp.ge.unc.s1 p12, p9 = FR_Tmp_C,FR_sigma_C
- nop.i 999 ;;
+ addl GR_BASE = @ltoff(Constants_Bits_of_pi_by_2#), gp
+ fcmp.ge.s1 p12, p9 = FR_Tmp_C,FR_sigma_C
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fmpy.s3 FR_Tmp_A = FR_X,FR_p_3
- nop.i 999 ;;
+ nop.m 999
+ fmpy.s3 FR_Tmp_A = FR_X,FR_p_3
+ nop.i 99
}
+;;
+
{ .mfi
- nop.m 999
+ ld8 GR_BASE = [GR_BASE]
(p12) mov FR_C_hi = FR_Tmp_C
- nop.i 999 ;;
+ nop.i 999
}
{ .mfi
-(p0) addl GR_BASE = @ltoff(Constants_Bits_of_pi_by_2#), gp
-(p9) fadd.s3 FR_C_hi = FR_sigma_C,FR_Tmp_C
- nop.i 999
+ nop.m 999
+(p9) fsub.s1 FR_C_hi = FR_C_hi,FR_sigma_C
+ nop.i 999
}
;;
@@ -1114,97 +1128,106 @@ alloc r34 = ar.pfs,2,34,0,0
// Load from table
// End If
-
-{ .mmi
- ld8 GR_BASE = [GR_BASE]
+{ .mfi
nop.m 999
+ fmpy.s1 FR_p_4 = FR_p_4,FR_ScaleP4
nop.i 999
}
-;;
-
-
{ .mfi
-(p0) ldfe FR_D_hi = [GR_BASE],16
-(p0) fmpy.s1 FR_p_4 = FR_p_4,FR_ScaleP4
- nop.i 999 ;;
+ nop.m 999
+ fadd.s3 FR_B_hi = FR_sigma_B,FR_Tmp_B // For Tmp_B < sigma_B case
+ nop.i 999
}
+;;
+
{ .mfi
-(p0) ldfe FR_D_lo = [GR_BASE],0
-(p0) fcmp.ge.unc.s1 p13, p10 = FR_Tmp_B,FR_sigma_B
- nop.i 999 ;;
+ nop.m 999
+ fadd.s3 FR_A_hi = FR_sigma_A,FR_Tmp_A // For Tmp_A < sigma_A case
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p13) mov FR_B_hi = FR_Tmp_B
- nop.i 999
+ nop.m 999
+ fcmp.ge.s1 p13, p10 = FR_Tmp_B,FR_sigma_B
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p12) fms.s1 FR_C_lo = FR_X,FR_p_1,FR_C_hi
- nop.i 999 ;;
+ nop.m 999
+ fms.s1 FR_C_lo = FR_X,FR_p_1,FR_C_hi
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p10) fadd.s3 FR_B_hi = FR_sigma_B,FR_Tmp_B
- nop.i 999
+ ldfe FR_D_hi = [GR_BASE],16
+ fcmp.ge.s1 p14, p11 = FR_Tmp_A,FR_sigma_A
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p9) fsub.s1 FR_C_hi = FR_C_hi,FR_sigma_C
- nop.i 999 ;;
+ ldfe FR_D_lo = [GR_BASE]
+(p13) mov FR_B_hi = FR_Tmp_B
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fcmp.ge.unc.s1 p14, p11 = FR_Tmp_A,FR_sigma_A
- nop.i 999 ;;
+ nop.m 999
+(p10) fsub.s1 FR_B_hi = FR_B_hi,FR_sigma_B
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
+ nop.m 999
(p14) mov FR_A_hi = FR_Tmp_A
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-(p11) fadd.s3 FR_A_hi = FR_sigma_A,FR_Tmp_A
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-(p9) fms.s1 FR_C_lo = FR_X,FR_p_1,FR_C_hi
-(p0) cmp.eq.unc p12,p9 = 0x1,GR_sgn_x
-}
-{ .mfi
- nop.m 999
-(p13) fms.s1 FR_B_lo = FR_X,FR_p_2,FR_B_hi
- nop.i 999 ;;
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p10) fsub.s1 FR_B_hi = FR_B_hi,FR_sigma_B
- nop.i 999
+ nop.m 999
+(p11) fsub.s1 FR_A_hi = FR_A_hi,FR_sigma_A
+ nop.i 999
}
-{ .mfi
- nop.m 999
+;;
+
// Note that C_hi is of integer value. We need only the
// last few bits. Thus we can ensure C_hi is never a big
// integer, freeing us from overflow worry.
// Tmp_C := fadd.fpsr3( C_hi, 2^(70) ) - 2^(70);
// Tmp_C is the upper portion of C_hi
-(p0) fadd.s3 FR_Tmp_C = FR_C_hi,FR_Tmp2_C
- nop.i 999 ;;
+{ .mfi
+ nop.m 999
+ fadd.s3 FR_Tmp_C = FR_C_hi,FR_Tmp2_C
+ tbit.z p12,p9 = GR_Exp_x, 17
}
+;;
+
{ .mfi
- nop.m 999
-(p14) fms.s1 FR_A_lo = FR_X,FR_p_3,FR_A_hi
- nop.i 999
+ nop.m 999
+ fms.s1 FR_B_lo = FR_X,FR_p_2,FR_B_hi
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p11) fsub.s1 FR_A_hi = FR_A_hi,FR_sigma_A
- nop.i 999 ;;
+ nop.m 999
+ fadd.s3 FR_A = FR_B_hi,FR_C_lo
+ nop.i 999
}
+;;
+
+{ .mfi
+ nop.m 999
+ fms.s1 FR_A_lo = FR_X,FR_p_3,FR_A_hi
+ nop.i 999
+}
+;;
+
{ .mfi
- nop.m 999
+ nop.m 999
+ fsub.s1 FR_Tmp_C = FR_Tmp_C,FR_Tmp2_C
+ nop.i 999
+}
+;;
+
// *******************
// Step 2. Get N and f
// *******************
@@ -1215,168 +1238,213 @@ alloc r34 = ar.pfs,2,34,0,0
// A := fadd.fpsr3( B_hi, C_lo )
// B := max( B_hi, C_lo )
// b := min( B_hi, C_lo )
-(p0) fadd.s3 FR_A = FR_B_hi,FR_C_lo
- nop.i 999
-}
{ .mfi
- nop.m 999
-(p10) fms.s1 FR_B_lo = FR_X,FR_p_2,FR_B_hi
- nop.i 999 ;;
+ nop.m 999
+ fmax.s1 FR_B = FR_B_hi,FR_C_lo
+ nop.i 999
}
+;;
+
+// We use a right-shift trick to get the integer part of A into the rightmost
+// bits of the significand by adding 1.1000..00 * 2^63. This operation is good
+// if |A| < 2^61, which it is in this case. We are doing this to save a few
+// cycles over using fcvt.fx followed by fnorm. The second step of the trick
+// is to subtract the same constant to float the rounded integer into a fp reg.
+
{ .mfi
- nop.m 999
-(p0) fsub.s1 FR_Tmp_C = FR_Tmp_C,FR_Tmp2_C
- nop.i 999 ;;
+ nop.m 999
+// N := round_to_nearest_integer_value( A );
+ fma.s1 FR_N_fix = FR_A, f1, FR_RSHF
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fmax.s1 FR_B = FR_B_hi,FR_C_lo
- nop.i 999 ;;
+ nop.m 999
+ fmin.s1 FR_b = FR_B_hi,FR_C_lo
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fmin.s1 FR_b = FR_B_hi,FR_C_lo
- nop.i 999
+ nop.m 999
+// C_hi := C_hi - Tmp_C ...0 <= C_hi < 2^7
+ fsub.s1 FR_C_hi = FR_C_hi,FR_Tmp_C
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p11) fms.s1 FR_A_lo = FR_X,FR_p_3,FR_A_hi
- nop.i 999 ;;
+ nop.m 999
+// a := (B - A) + b: Exact - note that a is either 0 or 2^(-64).
+ fsub.s1 FR_a = FR_B,FR_A
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-// N := round_to_nearest_integer_value( A );
-(p0) fcvt.fx.s1 FR_N = FR_A
- nop.i 999 ;;
+ nop.m 999
+ fms.s1 FR_N = FR_N_fix, f1, FR_RSHF
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-// C_hi := C_hi - Tmp_C ...0 <= C_hi < 2^7
-(p0) fsub.s1 FR_C_hi = FR_C_hi,FR_Tmp_C
- nop.i 999 ;;
+ nop.m 999
+ fadd.s1 FR_a = FR_a,FR_b
+ nop.i 999
}
+;;
+
+// f := A - N; Exact because lsb(A) >= 2^(-64) and |f| <= 1/2.
+// N := convert to integer format( C_hi + N );
+// M := P_0 * x_lo;
+// N := N + M;
{ .mfi
- nop.m 999
-// a := (B - A) + b: Exact - note that a is either 0 or 2^(-64).
-(p0) fsub.s1 FR_a = FR_B,FR_A
- nop.i 999 ;;
+ nop.m 999
+ fsub.s1 FR_f = FR_A,FR_N
+ nop.i 999
}
{ .mfi
- nop.m 999
-// f := A - N; Exact because lsb(A) >= 2^(-64) and |f| <= 1/2.
-(p0) fnorm.s1 FR_N = FR_N
- nop.i 999
+ nop.m 999
+ fadd.s1 FR_N = FR_N,FR_C_hi
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fadd.s1 FR_a = FR_a,FR_b
- nop.i 999 ;;
+ nop.m 999
+(p9) fsub.s1 FR_D_hi = f0, FR_D_hi
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fsub.s1 FR_f = FR_A,FR_N
- nop.i 999
+ nop.m 999
+(p9) fsub.s1 FR_D_lo = f0, FR_D_lo
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-// N := convert to integer format( C_hi + N );
-// M := P_0 * x_lo;
-// N := N + M;
-(p0) fadd.s1 FR_N = FR_N,FR_C_hi
- nop.i 999 ;;
+ nop.m 999
+ fadd.s1 FR_g = FR_A_hi,FR_B_lo // For Case 1, g=A_hi+B_lo
+ nop.i 999
}
{ .mfi
- nop.m 999
-// f = f + a Exact because a is 0 or 2^(-64);
-// the msb of the sum is <= 1/2 and lsb >= 2^(-64).
-(p0) fadd.s1 FR_f = FR_f,FR_a
- nop.i 999
+ nop.m 999
+ fadd.s3 FR_A = FR_A_hi,FR_B_lo // For Case 2, A=A_hi+B_lo w/ sf3
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// Create 2**(-33)
-//
-(p0) fcvt.fx.s1 FR_N = FR_N
- nop.i 999 ;;
+ mov GR_Temp = 0x0FFCD // For Case 2, exponent of 2^-50
+ fmax.s1 FR_B = FR_A_hi,FR_B_lo // For Case 2, B=max(A_hi,B_lo)
+ nop.i 999
}
+;;
+
+// f = f + a Exact because a is 0 or 2^(-64);
+// the msb of the sum is <= 1/2 and lsb >= 2^(-64).
{ .mfi
- nop.m 999
-(p0) fabs FR_f_abs = FR_f
- nop.i 999 ;;
+ setf.exp FR_TWOM50 = GR_Temp // For Case 2, form 2^-50
+ fcvt.fx.s1 FR_N = FR_N
+ nop.i 999
}
{ .mfi
-(p0) getf.sig GR_N = FR_N
- nop.f 999
- nop.i 999 ;;
+ nop.m 999
+ fadd.s1 FR_f = FR_f,FR_a
+ nop.i 999
}
-{ .mii
- nop.m 999
- nop.i 999 ;;
-(p0) add GR_N = GR_N,GR_M ;;
+;;
+
+{ .mfi
+ nop.m 999
+ fmin.s1 FR_b = FR_A_hi,FR_B_lo // For Case 2, b=min(A_hi,B_lo)
+ nop.i 999
}
-// If sgn_x == 1 (that is original x was negative)
-// N := 2^10 - N
-// this maintains N to be non-negative, but still
-// equivalent to the (negated N) mod 4.
-// End If
-{ .mii
-(p12) sub GR_N = GR_Temp,GR_N
-(p0) cmp.eq.unc p12,p9 = 0x0,GR_sgn_x ;;
- nop.i 999
+;;
+
+{ .mfi
+ nop.m 999
+ fsub.s1 FR_a = FR_B,FR_A // For Case 2, a=B-A
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fcmp.ge.unc.s1 p13, p10 = FR_f_abs,FR_TWOM33
- nop.i 999 ;;
+ nop.m 999
+ fadd.s1 FR_s_hi = FR_f,FR_g // For Case 1, s_hi=f+g
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p9) fsub.s1 FR_D_hi = f0, FR_D_hi
- nop.i 999 ;;
+ nop.m 999
+ fadd.s1 FR_f_hi = FR_A,FR_f // For Case 2, f_hi=A+f
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p10) fadd.s3 FR_A = FR_A_hi,FR_B_lo
- nop.i 999
+ nop.m 999
+ fabs FR_f_abs = FR_f
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p13) fadd.s1 FR_g = FR_A_hi,FR_B_lo
- nop.i 999 ;;
+ getf.sig GR_N = FR_N
+ fsetc.s3 0x7F,0x40 // Reset sf3 to user settings + td
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p10) fmax.s1 FR_B = FR_A_hi,FR_B_lo
- nop.i 999
+ nop.m 999
+ fsub.s1 FR_s_lo = FR_f,FR_s_hi // For Case 1, s_lo=f-s_hi
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p9) fsub.s1 FR_D_lo = f0, FR_D_lo
- nop.i 999 ;;
+ nop.m 999
+ fsub.s1 FR_f_lo = FR_f,FR_f_hi // For Case 2, f_lo=f-f_hi
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p10) fmin.s1 FR_b = FR_A_hi,FR_B_lo
- nop.i 999 ;;
+ nop.m 999
+ fmpy.s1 FR_r_hi = FR_s_hi,FR_D_hi // For Case 1, r_hi=s_hi*D_hi
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fsetc.s3 0x7F,0x40
- nop.i 999
+ nop.m 999
+ fadd.s1 FR_a = FR_a,FR_b // For Case 2, a=a+b
+ nop.i 999
}
-{ .mlx
- nop.m 999
-(p10) movl GR_Temp = 0x000000000000FFCD ;;
+;;
+
+
+// If sgn_x == 1 (that is original x was negative)
+// N := 2^10 - N
+// this maintains N to be non-negative, but still
+// equivalent to the (negated N) mod 4.
+// End If
+{ .mfi
+ add GR_N = GR_N,GR_M
+ fcmp.ge.s1 p13, p10 = FR_f_abs,FR_TWOM33
+ mov GR_Temp = 0x00400
}
-{ .mmf
- nop.m 999
-(p10) setf.exp FR_TWOM50 = GR_Temp
-(p10) fadd.s1 FR_f_hi = FR_A,FR_f ;;
+;;
+
+{ .mfi
+(p9) sub GR_N = GR_Temp,GR_N
+ fadd.s1 FR_s_lo = FR_s_lo,FR_g // For Case 1, s_lo=s_lo+g
+ nop.i 999
}
{ .mfi
- nop.m 999
-// a := (B - A) + b Exact.
+ nop.m 999
+ fadd.s1 FR_f_lo = FR_f_lo,FR_A // For Case 2, f_lo=f_lo+A
+ nop.i 999
+}
+;;
+
+// a := (B - A) + b Exact.
// Note that a is either 0 or 2^(-128).
// f_hi := A + f;
// f_lo := (f - f_hi) + A
@@ -1387,68 +1455,32 @@ alloc r34 = ar.pfs,2,34,0,0
// exact. If f = -2^(-64), then A + f is exact. Hence
// f-f_hi is -A exactly, giving f_lo = 0.
// f_lo := f_lo + a;
-(p10) fsub.s1 FR_a = FR_B,FR_A
- nop.i 999
-}
-{ .mfi
- nop.m 999
-(p13) fadd.s1 FR_s_hi = FR_f,FR_g
- nop.i 999 ;;
-}
-{ .mlx
- nop.m 999
+
// If |f| >= 2^(-33)
// Case 1
// CASE := 1
// g := A_hi + B_lo;
// s_hi := f + g;
// s_lo := (f - s_hi) + g;
-(p13) movl GR_CASE = 0x1 ;;
-}
-{ .mlx
- nop.m 999
// Else
// Case 2
// CASE := 2
// A := fadd.fpsr3( A_hi, B_lo )
// B := max( A_hi, B_lo )
// b := min( A_hi, B_lo )
-(p10) movl GR_CASE = 0x2
-}
-{ .mfi
- nop.m 999
-(p10) fsub.s1 FR_f_lo = FR_f,FR_f_hi
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-(p10) fadd.s1 FR_a = FR_a,FR_b
- nop.i 999
-}
-{ .mfi
- nop.m 999
-(p13) fsub.s1 FR_s_lo = FR_f,FR_s_hi
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-(p13) fadd.s1 FR_s_lo = FR_s_lo,FR_g
- nop.i 999 ;;
-}
+
{ .mfi
- nop.m 999
-(p10) fcmp.ge.unc.s1 p14, p11 = FR_f_abs,FR_TWOM50
- nop.i 999 ;;
+ nop.m 999
+(p10) fcmp.ge.unc.s1 p14, p11 = FR_f_abs,FR_TWOM50
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// Create 2**(-50)
-(p10) fadd.s1 FR_f_lo = FR_f_lo,FR_A
- nop.i 999 ;;
+ nop.m 999
+(p13) fms.s1 FR_r_lo = FR_s_hi,FR_D_hi,FR_r_hi //For Case 1, r_lo=s_hi*D_hi+r_hi
+ nop.i 999
}
-{ .mfi
- nop.m 999
+;;
+
// If |f| >= 2^(-50) then
// s_hi := f_hi;
// s_lo := f_lo;
@@ -1457,84 +1489,90 @@ alloc r34 = ar.pfs,2,34,0,0
// s_hi := f_hi + f_lo
// s_lo := (f_hi - s_hi) + f_lo
// End If
-(p14) mov FR_s_hi = FR_f_hi
- nop.i 999 ;;
+{ .mfi
+ nop.m 999
+(p14) mov FR_s_hi = FR_f_hi
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p10) fadd.s1 FR_f_lo = FR_f_lo,FR_a
- nop.i 999 ;;
+ nop.m 999
+(p10) fadd.s1 FR_f_lo = FR_f_lo,FR_a
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p14) mov FR_s_lo = FR_f_lo
- nop.i 999
+ nop.m 999
+(p14) mov FR_s_lo = FR_f_lo
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p11) fadd.s1 FR_f_lo = FR_f_lo,FR_A_lo
- nop.i 999 ;;
+ nop.m 999
+(p11) fadd.s1 FR_f_lo = FR_f_lo,FR_A_lo
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p11) fma.s1 FR_f_lo = FR_X,FR_p_4,FR_f_lo
- nop.i 999 ;;
+ nop.m 999
+(p11) fma.s1 FR_f_lo = FR_X,FR_p_4,FR_f_lo
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p11) fadd.s1 FR_s_hi = FR_f_hi,FR_f_lo
- nop.i 999 ;;
+ nop.m 999
+(p13) fma.s1 FR_r_lo = FR_s_hi,FR_D_lo,FR_r_lo //For Case 1, r_lo=s_hi*D_lo+r_lo
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
+(p11) fadd.s1 FR_s_hi = FR_f_hi,FR_f_lo
+ nop.i 999
+}
+;;
+
// r_hi := s_hi*D_hi
// r_lo := s_hi*D_hi - r_hi with fma
// r_lo := (s_hi*D_lo + r_lo) + s_lo*D_hi
-(p0) fmpy.s1 FR_r_hi = FR_s_hi,FR_D_hi
- nop.i 999
-}
{ .mfi
- nop.m 999
-(p11) fsub.s1 FR_s_lo = FR_f_hi,FR_s_hi
- nop.i 999 ;;
+ nop.m 999
+(p10) fmpy.s1 FR_r_hi = FR_s_hi,FR_D_hi
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fms.s1 FR_r_lo = FR_s_hi,FR_D_hi,FR_r_hi
- nop.i 999
+ nop.m 999
+(p11) fsub.s1 FR_s_lo = FR_f_hi,FR_s_hi
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p11) fadd.s1 FR_s_lo = FR_s_lo,FR_f_lo
- nop.i 999 ;;
-}
-{ .mmi
- nop.m 999 ;;
-// Return N, r_hi, r_lo
-// We do not return CASE
-(p0) stfe [GR_Address_of_Outputs] = FR_r_hi,16
- nop.i 999 ;;
+ nop.m 999
+(p10) fms.s1 FR_r_lo = FR_s_hi,FR_D_hi,FR_r_hi
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fma.s1 FR_r_lo = FR_s_hi,FR_D_lo,FR_r_lo
- nop.i 999 ;;
+ nop.m 999
+(p11) fadd.s1 FR_s_lo = FR_s_lo,FR_f_lo
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fma.s1 FR_r_lo = FR_s_lo,FR_D_hi,FR_r_lo
- nop.i 999 ;;
-}
-{ .mmi
- nop.m 999 ;;
-(p0) stfe [GR_Address_of_Outputs] = FR_r_lo,-16
- nop.i 999
+ nop.m 999
+(p10) fma.s1 FR_r_lo = FR_s_hi,FR_D_lo,FR_r_lo
+ nop.i 999
}
-{ .mib
- nop.m 999
- nop.i 999
-(p0) br.ret.sptk b0 ;;
+;;
+
+// Return N, r_hi, r_lo
+// We do not return CASE
+{ .mfb
+ nop.m 999
+ fma.s1 FR_r_lo = FR_s_lo,FR_D_hi,FR_r_lo
+ br.ret.sptk b0
}
+;;
-.endp __libm_pi_by_2_reduce
-ASM_SIZE_DIRECTIVE(__libm_pi_by_2_reduce)
+.endp __libm_pi_by_2_reduce#
diff --git a/sysdeps/ia64/fpu/libm_support.h b/sysdeps/ia64/fpu/libm_support.h
index 5d3498dfc9..50dac33133 100644
--- a/sysdeps/ia64/fpu/libm_support.h
+++ b/sysdeps/ia64/fpu/libm_support.h
@@ -1,9 +1,10 @@
-//
-// Copyright (C) 2000, 2001, Intel Corporation
+/* file: libm_support.h */
+
+
+// Copyright (c) 2000 - 2002, Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -19,14 +20,14 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
@@ -34,45 +35,51 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
-// History: 02/02/2000 Initial version
+// History: 02/02/2000 Initial version
// 2/28/2000 added tags for logb and nextafter
-// 3/22/2000 Changes to support _LIB_VERSION variable
-// and filled some enum gaps. Added support for C99.
+// 3/22/2000 Changes to support _LIB_VERSIONIMF variable
+// and filled some enum gaps. Added support for C99.
// 5/31/2000 added prototypes for __libm_frexp_4l/8l
-// 8/10/2000 Changed declaration of _LIB_VERSION to work for library
+// 8/10/2000 Changed declaration of _LIB_VERSIONIMF to work for library
// builds and other application builds (precompiler directives).
// 8/11/2000 Added pointers-to-matherr-functions declarations to allow
// for user-defined matherr functions in the dll build.
// 12/07/2000 Added scalbn error_types values.
+// 5/01/2001 Added error_types values for C99 nearest integer
+// functions.
+// 6/07/2001 Added error_types values for fdim.
+// 6/18/2001 Added include of complex_support.h.
+// 8/03/2001 Added error_types values for nexttoward, scalbln.
+// 8/23/2001 Corrected tag numbers from 186 and higher.
+// 8/27/2001 Added check for long int and long long int definitions.
+// 12/10/2001 Added error_types for erfc.
+// 12/27/2001 Added error_types for degree argument functions.
+// 01/02/2002 Added error_types for tand, cotd.
+// 01/04/2002 Delete include of complex_support.h
+// 01/23/2002 Deleted prototypes for __libm_frexp*. Added check for
+// multiple int, long int, and long long int definitions.
+// 05/20/2002 Added error_types for cot.
+// 06/27/2002 Added error_types for sinhcosh.
+// 12/05/2002 Added error_types for annuity and compound
+// 04/10/2003 Added error_types for tgammal/tgamma/tgammaf
//
-#ifndef __ASSEMBLER__
-#include <math.h>
-
-float __libm_frexp_4f( float x, int* exp);
-float _GI___libm_frexp_4f( float x, int* exp);
-float __libm_frexp_8f( float x, int* exp);
-double __libm_frexp_4( double x, int* exp);
-double _GI___libm_frexp_4( double x, int* exp);
-double __libm_frexp_8( double x, int* exp);
-long double __libm_frexp_4l( long double x, int* exp);
-long double _GI___libm_frexp_4l( long double x, int* exp);
-long double __libm_frexp_8l( long double x, int* exp);
void __libm_sincos_pi4(double,double*,double*,int);
void __libm_y0y1(double , double *, double *);
void __libm_j0j1(double , double *, double *);
-double __libm_lgamma_kernel(double,int*,int,int);
double __libm_j0(double);
double __libm_j1(double);
double __libm_jn(int,double);
double __libm_y0(double);
double __libm_y1(double);
double __libm_yn(int,double);
+double __libm_copysign (double, double);
+float __libm_copysignf (float, float);
+long double __libm_copysignl (long double, long double);
-extern double rint(double);
extern double sqrt(double);
extern double fabs(double);
extern double log(double);
@@ -112,24 +119,31 @@ extern long double log1pl(long double);
extern long double logl(long double);
extern long double sqrtl(long double);
extern long double expl(long double);
-
-extern long lround(double);
-extern long lroundf(float);
-extern long lroundl(long double);
+extern long double fabsl(long double);
#if !(defined(SIZE_INT_32) || defined(SIZE_INT_64))
- #error integer size not established; define SIZE_INT_32 or SIZE_INT_64
+#error integer size not established; define SIZE_INT_32 or SIZE_INT_64
#endif
-struct fp64 { /*/ sign:1 exponent:11 significand:52 (implied leading 1)*/
- unsigned lo_significand:32;
- unsigned hi_significand:20;
- unsigned exponent:11;
- unsigned sign:1;
-};
+#if (defined(SIZE_INT_32) && defined(SIZE_INT_64))
+#error multiple integer size definitions; define SIZE_INT_32 or SIZE_INT_64
+#endif
-#define HI_SIGNIFICAND_LESS(X, HI) ((X)->hi_significand < 0x ## HI)
-#define f64abs(x) ((x) < 0.0 ? -(x) : (x))
+#if !(defined(SIZE_LONG_INT_32) || defined(SIZE_LONG_INT_64))
+#error long int size not established; define SIZE_LONG_INT_32 or SIZE_LONG_INT_64
+#endif
+
+#if (defined(SIZE_LONG_INT_32) && defined(SIZE_LONG_INT_64))
+#error multiple long int size definitions; define SIZE_LONG_INT_32 or SIZE_LONG_INT_64
+#endif
+
+#if !(defined(SIZE_LONG_LONG_INT_32) || defined(SIZE_LONG_LONG_INT_64))
+#error long long int size not established; define SIZE_LONG_LONG_INT_32 or SIZE_LONG_LONG_INT_64
+#endif
+
+#if (defined(SIZE_LONG_LONG_INT_32) && defined(SIZE_LONG_LONG_INT_64))
+#error multiple long long int size definitions; define SIZE_LONG_LONG_INT_32 or SIZE_LONG_LONG_INT_64
+#endif
typedef enum
{
@@ -148,14 +162,14 @@ typedef enum
powl_neg_to_non_integer, /* 22 */
powl_nan_to_zero, /* 23 */
pow_overflow, pow_underflow, /* 24, 25 */
- pow_zero_to_zero, /* 26 */
+ pow_zero_to_zero, /* 26 */
pow_zero_to_negative, /* 27 */
pow_neg_to_non_integer, /* 28 */
pow_nan_to_zero, /* 29 */
powf_overflow, powf_underflow, /* 30, 31 */
powf_zero_to_zero, /* 32 */
- powf_zero_to_negative, /* 33 */
- powf_neg_to_non_integer, /* 34 */
+ powf_zero_to_negative, /* 33 */
+ powf_neg_to_non_integer, /* 34 */
powf_nan_to_zero, /* 35 */
atan2l_zero, /* 36 */
atan2_zero, /* 37 */
@@ -181,13 +195,13 @@ typedef enum
y0l_zero, y0l_negative,y0l_gt_loss, /* 66, 67, 68 */
y0_zero, y0_negative,y0_gt_loss, /* 69, 70, 71 */
y0f_zero, y0f_negative,y0f_gt_loss, /* 72, 73, 74 */
- y1l_zero, y1l_negative,y1l_gt_loss, /* 75, 76, 77 */
- y1_zero, y1_negative,y1_gt_loss, /* 78, 79, 80 */
- y1f_zero, y1f_negative,y1f_gt_loss, /* 81, 82, 83 */
+ y1l_zero, y1l_negative,y1l_gt_loss, /* 75, 76, 77 */
+ y1_zero, y1_negative,y1_gt_loss, /* 78, 79, 80 */
+ y1f_zero, y1f_negative,y1f_gt_loss, /* 81, 82, 83 */
ynl_zero, ynl_negative,ynl_gt_loss, /* 84, 85, 86 */
yn_zero, yn_negative,yn_gt_loss, /* 87, 88, 89 */
ynf_zero, ynf_negative,ynf_gt_loss, /* 90, 91, 92 */
- j0l_gt_loss, /* 93 */
+ j0l_gt_loss, /* 93 */
j0_gt_loss, /* 94 */
j0f_gt_loss, /* 95 */
j1l_gt_loss, /* 96 */
@@ -201,7 +215,7 @@ typedef enum
lgammaf_overflow, lgammaf_negative, lgammaf_reserve,/* 108, 109, 110 */
gammal_overflow,gammal_negative, gammal_reserve, /* 111, 112, 113 */
gamma_overflow, gamma_negative, gamma_reserve, /* 114, 115, 116 */
- gammaf_overflow,gammaf_negative,gammaf_reserve, /* 117, 118, 119 */
+ gammaf_overflow,gammaf_negative,gammaf_reserve, /* 117, 118, 119 */
fmodl_by_zero, /* 120 */
fmod_by_zero, /* 121 */
fmodf_by_zero, /* 122 */
@@ -222,7 +236,7 @@ typedef enum
ldexp_overflow, ldexp_underflow, /* 146, 147 */
ldexpf_overflow, ldexpf_underflow, /* 148, 149 */
logbl_zero, logb_zero, logbf_zero, /* 150, 151, 152 */
- nextafterl_overflow, nextafter_overflow,
+ nextafterl_overflow, nextafter_overflow,
nextafterf_overflow, /* 153, 154, 155 */
ilogbl_zero, ilogb_zero, ilogbf_zero, /* 156, 157, 158 */
exp2l_overflow, exp2l_underflow, /* 159, 160 */
@@ -235,18 +249,406 @@ typedef enum
log2f_zero, log2f_negative, /* 172, 173 */
scalbnl_overflow, scalbnl_underflow, /* 174, 175 */
scalbn_overflow, scalbn_underflow, /* 176, 177 */
- scalbnf_overflow, scalbnf_underflow /* 178, 179 */
+ scalbnf_overflow, scalbnf_underflow, /* 178, 179 */
+ remquol_by_zero, /* 180 */
+ remquo_by_zero, /* 181 */
+ remquof_by_zero, /* 182 */
+ lrintl_large, lrint_large, lrintf_large, /* 183, 184, 185 */
+ llrintl_large, llrint_large, llrintf_large, /* 186, 187, 188 */
+ lroundl_large, lround_large, lroundf_large, /* 189, 190, 191 */
+ llroundl_large, llround_large, llroundf_large, /* 192, 193, 194 */
+ fdiml_overflow, fdim_overflow, fdimf_overflow, /* 195, 196, 197 */
+ nexttowardl_overflow, nexttoward_overflow,
+ nexttowardf_overflow, /* 198, 199, 200 */
+ scalblnl_overflow, scalblnl_underflow, /* 201, 202 */
+ scalbln_overflow, scalbln_underflow, /* 203, 204 */
+ scalblnf_overflow, scalblnf_underflow, /* 205, 206 */
+ erfcl_underflow, erfc_underflow, erfcf_underflow, /* 207, 208, 209 */
+ acosdl_gt_one, acosd_gt_one, acosdf_gt_one, /* 210, 211, 212 */
+ asindl_gt_one, asind_gt_one, asindf_gt_one, /* 213, 214, 215 */
+ atan2dl_zero, atan2d_zero, atan2df_zero, /* 216, 217, 218 */
+ tandl_overflow, tand_overflow, tandf_overflow, /* 219, 220, 221 */
+ cotdl_overflow, cotd_overflow, cotdf_overflow, /* 222, 223, 224 */
+ cotl_overflow, cot_overflow, cotf_overflow, /* 225, 226, 227 */
+ sinhcoshl_overflow, sinhcosh_overflow, sinhcoshf_overflow, /* 228, 229, 230 */
+ annuityl_by_zero, annuity_by_zero, annuityf_by_zero, /* 231, 232, 233 */
+ annuityl_less_m1, annuity_less_m1, annuityf_less_m1, /* 234, 235, 236 */
+ annuityl_overflow, annuity_overflow, annuityf_overflow, /* 237, 238, 239 */
+ annuityl_underflow, annuity_underflow, annuityf_underflow, /* 240, 241, 242 */
+ compoundl_by_zero, compound_by_zero, compoundf_by_zero, /* 243, 244, 245 */
+ compoundl_less_m1, compound_less_m1, compoundf_less_m1, /* 246, 247, 248 */
+ compoundl_overflow, compound_overflow, compoundf_overflow, /* 249, 250, 251 */
+ compoundl_underflow, compound_underflow, compoundf_underflow, /* 252, 253, 254 */
+ tgammal_overflow, tgammal_negative, tgammal_reserve, /* 255, 256, 257 */
+ tgamma_overflow, tgamma_negative, tgamma_reserve, /* 258, 259, 260 */
+ tgammaf_overflow, tgammaf_negative, tgammaf_reserve, /* 261, 262, 263 */
} error_types;
void __libm_error_support(void*,void*,void*,error_types);
+#ifdef _LIBC
libc_hidden_proto(__libm_error_support)
+#endif
+
+#define HI_SIGNIFICAND_LESS(X, HI) ((X)->hi_significand < 0x ## HI)
+#define f64abs(x) ((x) < 0.0 ? -(x) : (x))
+
+#if !defined(__USE_EXTERNAL_FPMEMTYP_H__)
+
+#define BIAS_32 0x007F
+#define BIAS_64 0x03FF
+#define BIAS_80 0x3FFF
+
+#define MAXEXP_32 0x00FE
+#define MAXEXP_64 0x07FE
+#define MAXEXP_80 0x7FFE
+
+#define EXPINF_32 0x00FF
+#define EXPINF_64 0x07FF
+#define EXPINF_80 0x7FFF
+
+struct fp32 { /*// sign:1 exponent:8 significand:23 (implied leading 1)*/
+#if defined(SIZE_INT_32)
+ unsigned significand:23;
+ unsigned exponent:8;
+ unsigned sign:1;
+#elif defined(SIZE_INT_64)
+ unsigned significand:23;
+ unsigned exponent:8;
+ unsigned sign:1;
+#endif
+};
+
+struct fp64 { /*/ sign:1 exponent:11 significand:52 (implied leading 1)*/
+#if defined(SIZE_INT_32)
+ unsigned lo_significand:32;
+ unsigned hi_significand:20;
+ unsigned exponent:11;
+ unsigned sign:1;
+#elif defined(SIZE_INT_64)
+ unsigned significand:52;
+ unsigned exponent:11;
+ unsigned sign:1;
+#endif
+};
+
+struct fp80 { /*/ sign:1 exponent:15 significand:64 (NO implied bits) */
+#if defined(SIZE_INT_32)
+ unsigned lo_significand;
+ unsigned hi_significand;
+ unsigned exponent:15;
+ unsigned sign:1;
+#elif defined(SIZE_INT_64)
+ unsigned significand;
+ unsigned exponent:15;
+ unsigned sign:1;
+#endif
+};
+
+#endif /*__USE_EXTERNAL_FPMEMTYP_H__*/
+
+/* macros to form a double value in hex representation (unsigned int type) */
+
+#define DOUBLE_HEX(hi,lo) 0x##lo,0x##hi /*LITTLE_ENDIAN*/
+
+/* macros to form a long double value in hex representation (unsigned short type) */
+
+#if defined(_WIN32) || defined(_WIN64)
+#define LDOUBLE_ALIGN 16
+#else
+#define LDOUBLE_ALIGN 12
+#endif
+
+#if (LDOUBLE_ALIGN == 16)
+#define _XPD_ ,0x0000,0x0000,0x0000
+#else /*12*/
+#define _XPD_ ,0x0000
+#endif
+
+#define LDOUBLE_HEX(w4,w3,w2,w1,w0) 0x##w0,0x##w1,0x##w2,0x##w3,0x##w4 _XPD_ /*LITTLE_ENDIAN*/
+
+/* macros to sign-expand low 'num' bits of 'val' to native integer */
-#define BIAS_64 1023
-#define EXPINF_64 2047
+#if defined(SIZE_INT_32)
+# define SIGN_EXPAND(val,num) ((int)(val) << (32-(num))) >> (32-(num)) /* sign expand of 'num' LSBs */
+#elif defined(SIZE_INT_64)
+# define SIGN_EXPAND(val,num) ((int)(val) << (64-(num))) >> (64-(num)) /* sign expand of 'num' LSBs */
+#endif
+
+/* macros to form pointers to FP number on-the-fly */
+
+#define FP32(f) ((struct fp32 *)&f)
+#define FP64(d) ((struct fp64 *)&d)
+#define FP80(ld) ((struct fp80 *)&ld)
+
+/* macros to extract signed low and high doubleword of long double */
+
+#if defined(SIZE_INT_32)
+# define HI_DWORD_80(ld) ((((FP80(ld)->sign << 15) | FP80(ld)->exponent) << 16) | \
+ ((FP80(ld)->hi_significand >> 16) & 0xFFFF))
+# define LO_DWORD_80(ld) SIGN_EXPAND(FP80(ld)->lo_significand, 32)
+#elif defined(SIZE_INT_64)
+# define HI_DWORD_80(ld) ((((FP80(ld)->sign << 15) | FP80(ld)->exponent) << 16) | \
+ ((FP80(ld)->significand >> 48) & 0xFFFF))
+# define LO_DWORD_80(ld) SIGN_EXPAND(FP80(ld)->significand, 32)
+#endif
+
+/* macros to extract hi bits of significand.
+ * note that explicit high bit do not count (returns as is)
+ */
+
+#if defined(SIZE_INT_32)
+# define HI_SIGNIFICAND_80(X,NBITS) ((X)->hi_significand >> (31 - (NBITS)))
+#elif defined(SIZE_INT_64)
+# define HI_SIGNIFICAND_80(X,NBITS) ((X)->significand >> (63 - (NBITS)))
+#endif
+
+/* macros to check, whether a significand bits are all zero, or some of them are non-zero.
+ * note that SIGNIFICAND_ZERO_80 tests high bit also, but SIGNIFICAND_NONZERO_80 does not
+ */
+
+#define SIGNIFICAND_ZERO_32(X) ((X)->significand == 0)
+#define SIGNIFICAND_NONZERO_32(X) ((X)->significand != 0)
-#define DOUBLE_HEX(HI, LO) 0x ## LO, 0x ## HI
+#if defined(SIZE_INT_32)
+# define SIGNIFICAND_ZERO_64(X) (((X)->hi_significand == 0) && ((X)->lo_significand == 0))
+# define SIGNIFICAND_NONZERO_64(X) (((X)->hi_significand != 0) || ((X)->lo_significand != 0))
+#elif defined(SIZE_INT_64)
+# define SIGNIFICAND_ZERO_64(X) ((X)->significand == 0)
+# define SIGNIFICAND_NONZERO_64(X) ((X)->significand != 0)
+#endif
+
+#if defined(SIZE_INT_32)
+# define SIGNIFICAND_ZERO_80(X) (((X)->hi_significand == 0x00000000) && ((X)->lo_significand == 0))
+# define SIGNIFICAND_NONZERO_80(X) (((X)->hi_significand != 0x80000000) || ((X)->lo_significand != 0))
+#elif defined(SIZE_INT_64)
+# define SIGNIFICAND_ZERO_80(X) ((X)->significand == 0x0000000000000000)
+# define SIGNIFICAND_NONZERO_80(X) ((X)->significand != 0x8000000000000000)
+#endif
+
+/* macros to compare long double with constant value, represented as hex */
+
+#define SIGNIFICAND_EQ_HEX_32(X,BITS) ((X)->significand == 0x ## BITS)
+#define SIGNIFICAND_GT_HEX_32(X,BITS) ((X)->significand > 0x ## BITS)
+#define SIGNIFICAND_GE_HEX_32(X,BITS) ((X)->significand >= 0x ## BITS)
+#define SIGNIFICAND_LT_HEX_32(X,BITS) ((X)->significand < 0x ## BITS)
+#define SIGNIFICAND_LE_HEX_32(X,BITS) ((X)->significand <= 0x ## BITS)
+
+#if defined(SIZE_INT_32)
+# define SIGNIFICAND_EQ_HEX_64(X,HI,LO) \
+ (((X)->hi_significand == 0x ## HI) && ((X)->lo_significand == 0x ## LO))
+# define SIGNIFICAND_GT_HEX_64(X,HI,LO) (((X)->hi_significand > 0x ## HI) || \
+ (((X)->hi_significand == 0x ## HI) && ((X)->lo_significand > 0x ## LO)))
+# define SIGNIFICAND_GE_HEX_64(X,HI,LO) (((X)->hi_significand > 0x ## HI) || \
+ (((X)->hi_significand == 0x ## HI) && ((X)->lo_significand >= 0x ## LO)))
+# define SIGNIFICAND_LT_HEX_64(X,HI,LO) (((X)->hi_significand < 0x ## HI) || \
+ (((X)->hi_significand == 0x ## HI) && ((X)->lo_significand < 0x ## LO)))
+# define SIGNIFICAND_LE_HEX_64(X,HI,LO) (((X)->hi_significand < 0x ## HI) || \
+ (((X)->hi_significand == 0x ## HI) && ((X)->lo_significand <= 0x ## LO)))
+#elif defined(SIZE_INT_64)
+# define SIGNIFICAND_EQ_HEX_64(X,HI,LO) ((X)->significand == 0x ## HI ## LO)
+# define SIGNIFICAND_GT_HEX_64(X,HI,LO) ((X)->significand > 0x ## HI ## LO)
+# define SIGNIFICAND_GE_HEX_64(X,HI,LO) ((X)->significand >= 0x ## HI ## LO)
+# define SIGNIFICAND_LT_HEX_64(X,HI,LO) ((X)->significand < 0x ## HI ## LO)
+# define SIGNIFICAND_LE_HEX_64(X,HI,LO) ((X)->significand <= 0x ## HI ## LO)
+#endif
+
+#if defined(SIZE_INT_32)
+# define SIGNIFICAND_EQ_HEX_80(X,HI,LO) \
+ (((X)->hi_significand == 0x ## HI) && ((X)->lo_significand == 0x ## LO))
+# define SIGNIFICAND_GT_HEX_80(X,HI,LO) (((X)->hi_significand > 0x ## HI) || \
+ (((X)->hi_significand == 0x ## HI) && ((X)->lo_significand > 0x ## LO)))
+# define SIGNIFICAND_GE_HEX_80(X,HI,LO) (((X)->hi_significand > 0x ## HI) || \
+ (((X)->hi_significand == 0x ## HI) && ((X)->lo_significand >= 0x ## LO)))
+# define SIGNIFICAND_LT_HEX_80(X,HI,LO) (((X)->hi_significand < 0x ## HI) || \
+ (((X)->hi_significand == 0x ## HI) && ((X)->lo_significand < 0x ## LO)))
+# define SIGNIFICAND_LE_HEX_80(X,HI,LO) (((X)->hi_significand < 0x ## HI) || \
+ (((X)->hi_significand == 0x ## HI) && ((X)->lo_significand <= 0x ## LO)))
+#elif defined(SIZE_INT_64)
+# define SIGNIFICAND_EQ_HEX_80(X,HI,LO) ((X)->significand == 0x ## HI ## LO)
+# define SIGNIFICAND_GT_HEX_80(X,HI,LO) ((X)->significand > 0x ## HI ## LO)
+# define SIGNIFICAND_GE_HEX_80(X,HI,LO) ((X)->significand >= 0x ## HI ## LO)
+# define SIGNIFICAND_LT_HEX_80(X,HI,LO) ((X)->significand < 0x ## HI ## LO)
+# define SIGNIFICAND_LE_HEX_80(X,HI,LO) ((X)->significand <= 0x ## HI ## LO)
+#endif
+
+#define VALUE_EQ_HEX_32(X,EXP,BITS) \
+ (((X)->exponent == (EXP)) && (SIGNIFICAND_EQ_HEX_32(X, BITS)))
+#define VALUE_GT_HEX_32(X,EXP,BITS) (((X)->exponent > (EXP)) || \
+ (((X)->exponent == (EXP)) && (SIGNIFICAND_GT_HEX_32(X, BITS))))
+#define VALUE_GE_HEX_32(X,EXP,BITS) (((X)->exponent > (EXP)) || \
+ (((X)->exponent == (EXP)) && (SIGNIFICAND_GE_HEX_32(X, BITS))))
+#define VALUE_LT_HEX_32(X,EXP,BITS) (((X)->exponent < (EXP)) || \
+ (((X)->exponent == (EXP)) && (SIGNIFICAND_LT_HEX_32(X, BITS))))
+#define VALUE_LE_HEX_32(X,EXP,BITS) (((X)->exponent < (EXP)) || \
+ (((X)->exponent == (EXP)) && (SIGNIFICAND_LE_HEX_32(X, BITS))))
+
+#define VALUE_EQ_HEX_64(X,EXP,HI,LO) \
+ (((X)->exponent == (EXP)) && (SIGNIFICAND_EQ_HEX_64(X, HI, LO)))
+#define VALUE_GT_HEX_64(X,EXP,HI,LO) (((X)->exponent > (EXP)) || \
+ (((X)->exponent == (EXP)) && (SIGNIFICAND_GT_HEX_64(X, HI, LO))))
+#define VALUE_GE_HEX_64(X,EXP,HI,LO) (((X)->exponent > (EXP)) || \
+ (((X)->exponent == (EXP)) && (SIGNIFICAND_GE_HEX_64(X, HI, LO))))
+#define VALUE_LT_HEX_64(X,EXP,HI,LO) (((X)->exponent < (EXP)) || \
+ (((X)->exponent == (EXP)) && (SIGNIFICAND_LT_HEX_64(X, HI, LO))))
+#define VALUE_LE_HEX_64(X,EXP,HI,LO) (((X)->exponent < (EXP)) || \
+ (((X)->exponent == (EXP)) && (SIGNIFICAND_LE_HEX_64(X, HI, LO))))
+
+#define VALUE_EQ_HEX_80(X,EXP,HI,LO) \
+ (((X)->exponent == (EXP)) && (SIGNIFICAND_EQ_HEX_80(X, HI, LO)))
+#define VALUE_GT_HEX_80(X,EXP,HI,LO) (((X)->exponent > (EXP)) || \
+ (((X)->exponent == (EXP)) && (SIGNIFICAND_GT_HEX_80(X, HI, LO))))
+#define VALUE_GE_HEX_80(X,EXP,HI,LO) (((X)->exponent > (EXP)) || \
+ (((X)->exponent == (EXP)) && (SIGNIFICAND_GE_HEX_80(X, HI, LO))))
+#define VALUE_LT_HEX_80(X,EXP,HI,LO) (((X)->exponent < (EXP)) || \
+ (((X)->exponent == (EXP)) && (SIGNIFICAND_LT_HEX_80(X, HI, LO))))
+#define VALUE_LE_HEX_80(X,EXP,HI,LO) (((X)->exponent < (EXP)) || \
+ (((X)->exponent == (EXP)) && (SIGNIFICAND_LE_HEX_80(X, HI, LO))))
+
+/* macros to compare two long doubles */
+
+#define SIGNIFICAND_EQ_32(X,Y) ((X)->significand == (Y)->significand)
+#define SIGNIFICAND_GT_32(X,Y) ((X)->significand > (Y)->significand)
+#define SIGNIFICAND_GE_32(X,Y) ((X)->significand >= (Y)->significand)
+#define SIGNIFICAND_LT_32(X,Y) ((X)->significand < (Y)->significand)
+#define SIGNIFICAND_LE_32(X,Y) ((X)->significand <= (Y)->significand)
+
+#if defined(SIZE_INT_32)
+# define SIGNIFICAND_EQ_64(X,Y) \
+ (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand == (Y)->lo_significand))
+# define SIGNIFICAND_GT_64(X,Y) (((X)->hi_significand > (Y)->hi_significand) || \
+ (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand > (Y)->lo_significand)))
+# define SIGNIFICAND_GE_64(X,Y) (((X)->hi_significand > (Y)->hi_significand) || \
+ (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand >= (Y)->lo_significand)))
+# define SIGNIFICAND_LT_64(X,Y) (((X)->hi_significand < (Y)->hi_significand) || \
+ (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand < (Y)->lo_significand)))
+# define SIGNIFICAND_LE_64(X,Y) (((X)->hi_significand < (Y)->hi_significand) || \
+ (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand <= (Y)->lo_significand)))
+#elif defined(SIZE_INT_64)
+# define SIGNIFICAND_EQ_64(X,Y) ((X)->significand == (Y)->significand)
+# define SIGNIFICAND_GT_64(X,Y) ((X)->significand > (Y)->significand)
+# define SIGNIFICAND_GE_64(X,Y) ((X)->significand >= (Y)->significand)
+# define SIGNIFICAND_LT_64(X,Y) ((X)->significand < (Y)->significand)
+# define SIGNIFICAND_LE_64(X,Y) ((X)->significand <= (Y)->significand)
+#endif
+
+#if defined(SIZE_INT_32)
+# define SIGNIFICAND_EQ_80(X,Y) \
+ (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand == (Y)->lo_significand))
+# define SIGNIFICAND_GT_80(X,Y) (((X)->hi_significand > (Y)->hi_significand) || \
+ (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand > (Y)->lo_significand)))
+# define SIGNIFICAND_GE_80(X,Y) (((X)->hi_significand > (Y)->hi_significand) || \
+ (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand >= (Y)->lo_significand)))
+# define SIGNIFICAND_LT_80(X,Y) (((X)->hi_significand < (Y)->hi_significand) || \
+ (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand < (Y)->lo_significand)))
+# define SIGNIFICAND_LE_80(X,Y) (((X)->hi_significand < (Y)->hi_significand) || \
+ (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand <= (Y)->lo_significand)))
+#elif defined(SIZE_INT_64)
+# define SIGNIFICAND_EQ_80(X,Y) ((X)->significand == (Y)->significand)
+# define SIGNIFICAND_GT_80(X,Y) ((X)->significand > (Y)->significand)
+# define SIGNIFICAND_GE_80(X,Y) ((X)->significand >= (Y)->significand)
+# define SIGNIFICAND_LT_80(X,Y) ((X)->significand < (Y)->significand)
+# define SIGNIFICAND_LE_80(X,Y) ((X)->significand <= (Y)->significand)
+#endif
+
+#define VALUE_EQ_32(X,Y) \
+ (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_EQ_32(X, Y)))
+#define VALUE_GT_32(X,Y) (((X)->exponent > (Y)->exponent) || \
+ (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_GT_32(X, Y))))
+#define VALUE_GE_32(X,Y) (((X)->exponent > (Y)->exponent) || \
+ (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_GE_32(X, Y))))
+#define VALUE_LT_32(X,Y) (((X)->exponent < (Y)->exponent) || \
+ (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_LT_32(X, Y))))
+#define VALUE_LE_32(X,Y) (((X)->exponent < (Y)->exponent) || \
+ (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_LE_32(X, Y))))
+
+#define VALUE_EQ_64(X,Y) \
+ (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_EQ_64(X, Y)))
+#define VALUE_GT_64(X,Y) (((X)->exponent > (Y)->exponent) || \
+ (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_GT_64(X, Y))))
+#define VALUE_GE_64(X,Y) (((X)->exponent > (Y)->exponent) || \
+ (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_GE_64(X, Y))))
+#define VALUE_LT_64(X,Y) (((X)->exponent < (Y)->exponent) || \
+ (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_LT_64(X, Y))))
+#define VALUE_LE_64(X,Y) (((X)->exponent < (Y)->exponent) || \
+ (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_LE_64(X, Y))))
+
+#define VALUE_EQ_80(X,Y) \
+ (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_EQ_80(X, Y)))
+#define VALUE_GT_80(X,Y) (((X)->exponent > (Y)->exponent) || \
+ (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_GT_80(X, Y))))
+#define VALUE_GE_80(X,Y) (((X)->exponent > (Y)->exponent) || \
+ (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_GE_80(X, Y))))
+#define VALUE_LT_80(X,Y) (((X)->exponent < (Y)->exponent) || \
+ (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_LT_80(X, Y))))
+#define VALUE_LE_80(X,Y) (((X)->exponent < (Y)->exponent) || \
+ (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_LE_80(X, Y))))
+
+/* add/subtract 1 ulp macros */
+
+#if defined(SIZE_INT_32)
+# define ADD_ULP_80(X) \
+ if ((++(X)->lo_significand == 0) && \
+ (++(X)->hi_significand == (((X)->exponent == 0) ? 0x80000000 : 0))) \
+ { \
+ (X)->hi_significand |= 0x80000000; \
+ ++(X)->exponent; \
+ }
+# define SUB_ULP_80(X) \
+ if (--(X)->lo_significand == 0xFFFFFFFF) { \
+ --(X)->hi_significand; \
+ if (((X)->exponent != 0) && \
+ ((X)->hi_significand == 0x7FFFFFFF) && \
+ (--(X)->exponent != 0)) \
+ { \
+ (X)->hi_significand |= 0x80000000; \
+ } \
+ }
+#elif defined(SIZE_INT_64)
+# define ADD_ULP_80(X) \
+ if (++(X)->significand == (((X)->exponent == 0) ? 0x8000000000000000 : 0))) { \
+ (X)->significand |= 0x8000000000000000; \
+ ++(X)->exponent; \
+ }
+# define SUB_ULP_80(X) \
+ { \
+ --(X)->significand; \
+ if (((X)->exponent != 0) && \
+ ((X)->significand == 0x7FFFFFFFFFFFFFFF) && \
+ (--(X)->exponent != 0)) \
+ { \
+ (X)->significand |= 0x8000000000000000; \
+ } \
+ }
+#endif
+
+
+
+#if (defined(_WIN32) && !defined(_WIN64))
+
+#define FP80_DECLARE()
+#define _FPC_64 0x0300
+static unsigned short __wControlWord, __wNewControlWord;
+#define FP80_SET() { \
+ __asm { fnstcw word ptr [__wControlWord] } \
+ __wNewControlWord = __wControlWord | _FPC_64; \
+ __asm { fldcw word ptr [__wNewControlWord] } \
+ }
+#define FP80_RESET() { \
+ __asm { fldcw word ptr [__wControlWord] } \
+ }
+#else /* defined(_WIN32) && !defined(_WIN64) */
+
+#define FP80_DECLARE()
+#define FP80_SET()
+#define FP80_RESET()
+
+#endif /* defined(_WIN32) && !defined(_WIN64) */
+
+
+#ifdef _LIBC
+# include <math.h>
+#else
-#if 0
static const unsigned INF[] = {
DOUBLE_HEX(7ff00000, 00000000),
DOUBLE_HEX(fff00000, 00000000)
@@ -255,12 +657,12 @@ static const unsigned INF[] = {
static const double _zeroo = 0.0;
static const double _bigg = 1.0e300;
static const double _ponee = 1.0;
-static const double _nonee = -1.0;
+static const double _nonee = -1.0;
#define INVALID (_zeroo * *((double*)&INF[0]))
-#define PINF *((double*)&INF[0])
-#define NINF -PINF
-#define PINF_DZ (_ponee/_zeroo)
+#define PINF *((double*)&INF[0])
+#define NINF -PINF
+#define PINF_DZ (_ponee/_zeroo)
#define X_TLOSS 1.41484755040568800000e+16
#endif
@@ -278,7 +680,7 @@ struct __exception
char *name;
double arg1, arg2, retval;
};
-# else
+# else
# ifndef _LIBC
struct exception
@@ -300,18 +702,18 @@ struct exceptionl
};
#ifdef _MS_
-#define MATHERR_F _matherrf
-#define MATHERR_D _matherr
+#define MATHERR_F _matherrf
+#define MATHERR_D _matherr
#else
-#define MATHERR_F matherrf
-#define MATHERR_D matherr
+#define MATHERR_F matherrf
+#define MATHERR_D matherr
#endif
# ifdef __cplusplus
-#define EXC_DECL_D __exception
+#define EXC_DECL_D __exception
#else
// exception is a reserved name in C++
-#define EXC_DECL_D exception
+#define EXC_DECL_D exception
#endif
extern int MATHERR_F(struct exceptionf*);
@@ -324,7 +726,7 @@ extern int matherrl(struct exceptionl*);
#define ERRNO_DOMAIN errno = EDOM
-// Add code to support _LIB_VERSION
+// Add code to support _LIB_VERSIONIMF
#ifndef _LIBC
typedef enum
{
@@ -335,29 +737,19 @@ typedef enum
_ISOC_ // ISO C9X
} _LIB_VERSION_TYPE;
-extern _LIB_VERSION_TYPE _LIB_VERSION;
-#endif
-// This is a run-time variable and may effect
-// floating point behavior of the libm functions
-
-#elif defined _LIBC
-
-# if !defined NOT_IN_libc && defined SHARED && defined DO_VERSIONING \
- && !defined HAVE_BROKEN_ALIAS_ATTRIBUTE && !defined NO_HIDDEN
-# define __libm_error_support __GI___libm_error_support
-# endif
-
-#endif /* __ASSEMBLER__ */
-
-/* Support for compatible assembler handling. */
-#if !defined L && defined _LIBC
-#define L(name) .L##name
-#endif
-#ifdef __ELF__
-#define ASM_SIZE_DIRECTIVE(name) .size name,.-name
-#define ASM_TYPE_DIRECTIVE(name,T) .type name,T
+#if !defined( LIBM_BUILD )
+#if defined( _DLL )
+extern _LIB_VERSION_TYPE __declspec(dllimport) _LIB_VERSIONIMF;
+#else
+extern _LIB_VERSION_TYPE _LIB_VERSIONIMF;
+#endif /* _DLL */
#else
-#define ASM_SIZE_DIRECTIVE(name)
-#define ASM_TYPE_DIRECTIVE(name,T)
+extern int (*pmatherrf)(struct exceptionf*);
+extern int (*pmatherr)(struct EXC_DECL_D*);
+extern int (*pmatherrl)(struct exceptionl*);
+#endif /* LIBM_BUILD */
+
+// This is a run-time variable and may affect
+// floating point behavior of the libm functions
#endif
diff --git a/sysdeps/ia64/fpu/s_atan.S b/sysdeps/ia64/fpu/s_atan.S
index c0daabd3d7..720ecad28a 100644
--- a/sysdeps/ia64/fpu/s_atan.S
+++ b/sysdeps/ia64/fpu/s_atan.S
@@ -1,10 +1,10 @@
.file "atan.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,947 +20,734 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00: Initial version
-// 4/13/00: Improved speed
-// 4/19/00: Removed the qualifying predicate from the fmerge.s that
-// takes the absolute value.
-// 6/16/00: Reassigned FP registers to eliminate stalls on loads
-// 8/30/00: Saved 5 cycles in main path by rearranging large argument logic
-// and delaying use of result of fcmp in load by 1 group
+// 02/02/00 Initial version
+// 04/13/00 Improved speed
+// 04/19/00 Removed the qualifying predicate from the fmerge.s that
+// takes the absolute value.
+// 06/16/00 Reassigned FP registers to eliminate stalls on loads
+// 08/30/00 Saved 5 cycles in main path by rearranging large argument logic
+// and delaying use of result of fcmp in load by 1 group
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 08/20/02 Use atan2 algorithm with x=1 for better accuracy
+// 02/06/03 Reordered header: .section, .global, .proc, .align
//
// API
//==============================================================
-// double atan( double x);
+// double atan(double Y)
//
// Overview of operation
//==============================================================
-// atan(x) = sign(X)pi/2 - atan(1/x)
//
-// We have two paths: |x| > 1 and |x| <= 1
+// The atan function returns values in the interval [-pi/2,+pi/2].
//
-// |x| > 1
-// ==========================================
+// The algorithm used is the atan2(Y,X) algorithm where we fix X=1.0.
//
-// c = frcpa(x) which is approximately 1/x
+// There are two basic paths: swap true and swap false.
+// atan2(Y,X) ==> atan2(V/U) where U >= V. If Y > X, we must swap.
//
-// xc = 1- B
-// B = 1-xc
+// p6 swap True |Y| > |X|
+// p7 swap False |Y| <= |X|
//
-// Approximate 1/(1-B)^k by a polynomial in B, poly(B)
-// k is 45.
//
-// poly(B) = 1 + r1 B + r2 B^2 + ...+ r10 B^10
+// Simple trigonometric identities show
+// Region 1
+// |Y|<=1.0, V=Y, U=1.0 atan2(Y,X) = sgnY * (0 + atan(V/U))
//
-// c^k = (1-B)^k/x^k
-// c^k/(1-B)^k = 1/x^k
-// c^k poly(B) = 1/x^k
-
-// poly(x) = series(atan(1/x)) = 1/x - 1/3x^3 + 1/5x^5 - 1/7x^7 .... + 1/45 x^45
-// = 1/x^45 ( x^44 - x^42/3 + x^40/5 - x^38/7 ... +1)
-// = 1/x^45 ( y^22 - y^21/3 + y^20/5 - y^19/7 ... +1)
-//
-// = c^45 poly(B) poly(x)
-// = c^45 r(B) q(y)
-
-// q(y) = q0 + q1 y + q2 y^2 + ... + q22 y^22
-// where q22 is 1.0
-
-// atan(x) = sign(X)pi/2 - c^45 r(B) q(y)
-
-// |x| <= 1
-// ==========================================
-// poly(x) = series(atan(x)) = x - x^3/3 + x^5/5 + .....
-// poly(x) = series(atan(x)) = x + x^3(- 1/3 + x^2/5 + ..... +x^47/47)
-// poly(x) = series(atan(x)) = x + x^3(p0 + x^2/5 + ..... + x^44/47)
-// poly(x) = series(atan(x)) = x + x^3(p0 + y/5 + ..... + y^22/47)
-
-// where p0 is about -1/3.
-
-// atan(x) = poly(x)
-
-#include "libm_support.h"
+// Region 2
+// |Y|>1.0, V=1.0, U=Y atan2(Y,X) = sgnY * (pi/2 - atan(V/U))
+//
+//
+// We compute atan(V/U) from the identity
+// atan(z) + atan([(V/U)-z] / [1+(V/U)z])
+// where z is a limited precision approximation (16 bits) to V/U
+//
+// z is calculated with the assistance of the frcpa instruction.
+//
+// atan(z) is calculated by a polynomial z + z^3 * p(w), w=z^2
+// where p(w) = P0+P1*w+...+P22*w^22
+//
+// Let d = [(V/U)-z] / [1+(V/U)z]) = (V-U*z)/(U+V*z)
+//
+// Approximate atan(d) by d + P0*d^3
+// Let F = 1/(U+V*z) * (1-a), where |a|< 2^-8.8.
+// Compute q(a) = 1 + a + ... + a^5.
+// Then F*q(a) approximates the reciprocal to more than 50 bits.
-// Special Values
+// Special values
//==============================================================
// atan(QNAN) = QNAN
// atan(SNAN) = quieted SNAN
-// atan(+-inf) = +- pi/2
+// atan(+-inf) = +- pi/2
// atan(+-0) = +-0
-
-
// Registers used
//==============================================================
-// predicate registers used:
-// p6 -> p11
+// predicate registers used:
+// p6 -> p15
-// floating-point registers used:
-// f32 -> f127
+// floating-point registers used:
+// f8, input
+// f32 -> f116
// general registers used
-// r32 -> r37
+// r14 -> r16
// Assembly macros
//==============================================================
-atan_Pi_by_2 = f32
-atan_S_PI = f33
-atan_ABS_f8 = f34
-
-atan_R0 = f35
-atan_R1 = f36
-atan_R2 = f37
-atan_R3 = f38
-atan_R4 = f39
-atan_R5 = f40
-atan_R6 = f41
-atan_R7 = f42
-atan_R8 = f43
-atan_R9 = f44
-atan_R10 = f45
-
-atan_Q0 = f46
-
-atan_Q1 = f47
-atan_Q2 = f48
-atan_Q3 = f49
-atan_Q4 = f50
-atan_Q5 = f51
-atan_Q6 = f52
-atan_Q7 = f53
-atan_Q8 = f54
-atan_Q9 = f55
-atan_Q10 = f56
-
-atan_Q11 = f57
-atan_Q12 = f58
-atan_Q13 = f59
-atan_Q14 = f60
-atan_Q15 = f61
-atan_Q16 = f62
-atan_Q17 = f63
-atan_Q18 = f64
-atan_Q19 = f65
-atan_Q20 = f66
-atan_Q21 = f67
-atan_Q22 = f68
-
-// P and Q constants are mutually exclusive
-// so they can share macro definitions
-atan_P0 = f46
-
-atan_P1 = f47
-atan_P2 = f48
-atan_P3 = f49
-atan_P4 = f10
-atan_P5 = f11
-atan_P6 = f12
-atan_P7 = f13
-atan_P10 = f103
-
-atan_P11 = f114
-atan_P12 = f58
-atan_P13 = f59
-atan_P14 = f60
-atan_P15 = f61
-atan_P16 = f62
-atan_P17 = f63
-atan_P18 = f64
-atan_P19 = f65
-atan_P20 = f14
-atan_P21 = f99
-atan_P22 = f68
-// end of P constant macros
-
-atan_C = f69
-atan_Y = f70
-atan_B = f71
-atan_Z = f72
-atan_V11 = f73
-atan_V12 = f74
-
-atan_V7 = f75
-atan_V8 = f76
-
-atan_W13 = f77
-atan_W11 = f78
-
-atan_V3 = f79
-atan_V4 = f80
-
-atan_G11 = f81
-atan_G12 = f82
-atan_G7 = f83
-atan_G8 = f84
-
-atan_Z1 = f85
-atan_W7 = f86
-
-atan_G3 = f87
-atan_W8 = f88
-atan_V9 = f89
-atan_V10 = f90
-
-atan_G10 = f91
-atan_W3 = f92
-atan_G4 = f93
-atan_G9 = f94
-
-atan_G6 = f95
-atan_W4 = f96
-atan_Z2 = f97
-atan_V6 = f98
-
-atan_V2 = f99
-atan_W6 = f100
-atan_W10 = f101
-atan_Y3 = f102
-
-atan_G2 = f103
-
-atan_Y8 = f104
-
-atan_G5 = f105
-atan_Z3 = f106
-atan_Z4 = f107
-atan_W2 = f108
-atan_V5 = f109
-
-atan_W5 = f110
-atan_G1 = f111
-atan_Y11 = f112
-
-atan_Z5 = f113
-atan_Z6 = f114
-atan_V1 = f115
-atan_W1 = f116
-
-atan_Z7 = f117
-atan_Q = f118
-atan_Z = f119
-atan_abs_f8 = f120
-
-atan_V13 = f121
-atan_Xcub = f122
-atan_Y12 = f123
-atan_P = f124
-
-atan_NORM_f8 = f125
-
-atan_P8 = f126
-atan_P9 = f127
-
-
-
-
-atan_GR_AD_R = r14
-atan_GR_AD_Q = r15
-atan_GR_AD_P = r16
-atan_GR_10172 = r17
-atan_GR_exp_f8 = r18
-atan_GR_signexp_f8 = r19
-atan_GR_exp_mask = r20
-
-
+EXP_AD_P1 = r14
+EXP_AD_P2 = r15
+rsig_near_one = r16
+
+atan2_Y = f8
+atan2_X = f1
+
+atan2_u1_X = f32
+atan2_u1_Y = f33
+atan2_z2_X = f34
+
+atan2_two = f36
+atan2_B1sq_Y = f37
+atan2_z1_X = f38
+atan2_B1X = f40
+
+atan2_B1Y = f41
+atan2_wp_X = f42
+atan2_B1sq_X = f43
+atan2_z = f44
+atan2_w = f45
+
+atan2_P0 = f46
+atan2_P1 = f47
+atan2_P2 = f48
+atan2_P3 = f49
+atan2_P4 = f50
+
+atan2_P5 = f51
+atan2_P6 = f52
+atan2_P7 = f53
+atan2_P8 = f54
+atan2_P9 = f55
+
+atan2_P10 = f56
+atan2_P11 = f57
+atan2_P12 = f58
+atan2_P13 = f59
+atan2_P14 = f60
+
+atan2_P15 = f61
+atan2_P16 = f62
+atan2_P17 = f63
+atan2_P18 = f64
+atan2_P19 = f65
+
+atan2_P20 = f66
+atan2_P21 = f67
+atan2_P22 = f68
+atan2_pi_by_2 = f69
+atan2_sgn_pi_by_2 = f69
+atan2_V13 = f70
+
+atan2_W11 = f71
+atan2_E = f72
+atan2_wp_Y = f73
+atan2_V11 = f74
+atan2_V12 = f75
+
+atan2_V7 = f76
+atan2_V8 = f77
+atan2_W7 = f78
+atan2_W8 = f79
+atan2_W3 = f80
+
+atan2_W4 = f81
+atan2_V3 = f82
+atan2_V4 = f83
+atan2_F = f84
+atan2_gV = f85
+
+atan2_V10 = f86
+atan2_zcub = f87
+atan2_V6 = f88
+atan2_V9 = f89
+atan2_W10 = f90
+
+atan2_W6 = f91
+atan2_W2 = f92
+atan2_V2 = f93
+atan2_alpha = f94
+atan2_alpha_1 = f95
+
+atan2_gVF = f96
+atan2_V5 = f97
+atan2_W12 = f98
+atan2_W5 = f99
+atan2_alpha_sq = f100
+
+atan2_Cp = f101
+atan2_V1 = f102
+atan2_ysq = f103
+atan2_W1 = f104
+atan2_alpha_cub = f105
+
+atan2_C = f106
+atan2_d = f108
+atan2_A_hi = f109
+atan2_dsq = f110
+
+atan2_pd = f111
+atan2_A_lo = f112
+atan2_A = f113
+atan2_Pp = f114
+atan2_sgnY = f115
+
+atan2_sig_near_one = f116
+atan2_near_one = f116
/////////////////////////////////////////////////////////////
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
+RODATA
.align 16
-double_atan_constants_R:
-ASM_TYPE_DIRECTIVE(double_atan_constants_R,@object)
- data8 0xB36B46B9C5443CED, 0x0000401C //R8
- data8 0x842633E0D126261F, 0x0000401F //R9
- data8 0xBE04FFFFFFFF46E0, 0x00004010 //R4
- data8 0xE8C62000244D66E2, 0x00004013 //R5
- data8 0xF2790C001E3789B3, 0x00004016 //R6
- data8 0xDCD2CCF97D7C764F, 0x00004019 //R7
- data8 0xB40000000000000B, 0x00004004 //R1
- data8 0xB265F3D38F5EE28F, 0x00004021 //R10
- data8 0x8160000000000001, 0x00004009 //R2
- data8 0xFD5BFFFFFFFE55CD, 0x0000400C //R3
- data8 0xC90FDAA22168C235, 0x00003FFF // pi/2
-ASM_SIZE_DIRECTIVE(double_atan_constants_R)
-
-double_atan_constants_Q:
-ASM_TYPE_DIRECTIVE(double_atan_constants_Q,@object)
- data8 0xEBD602FA7761BC33, 0x00003FF9 //Q8
- data8 0x8CB1CABD6A91913C, 0x0000BFFA //Q9
- data8 0x84C665C37D623CD2, 0x00003FF7 //Q4
- data8 0x8DE0D1673DAEA9BC, 0x0000BFF8 //Q5
- data8 0xF658ADBE2C6E6FCC, 0x00003FF8 //Q6
-
- data8 0xB56307BE1DD3FFB6, 0x0000BFF9 //Q7
- data8 0xAAAAAAAAAAAA8000, 0x0000BFFD //Q21
- data8 0x8000000000000000, 0x00003FFF //Q22
- data8 0x924924923A9D710C, 0x0000BFFC //Q19
- data8 0xCCCCCCCCCC9380E7, 0x00003FFC //Q20
-
- data8 0xA644DC250EFA2800, 0x00003FED //Q0
- data8 0x83DEAE24EEBF5E44, 0x0000BFF1 //Q1
- data8 0xC758CCC64793D4EC, 0x00003FF3 //Q2
- data8 0xBFDC0B54E7C89DCE, 0x0000BFF5 //Q3
- data8 0x888855199D1290AF, 0x0000BFFB //Q15
-
- data8 0x9D89D3BE514B0178, 0x00003FFB //Q16
- data8 0xBA2E8B4DEC70282A, 0x0000BFFB //Q17
- data8 0xE38E38DF9E9FC83B, 0x00003FFB //Q18
- data8 0x9F8781CC990029D9, 0x00003FFA //Q10
- data8 0xB0B39472DEBA3C79, 0x0000BFFA //Q11
-
- data8 0xC2AFAEF8C85B0BC6, 0x00003FFA //Q12
- data8 0xD780E539797525DD, 0x0000BFFA //Q13
- data8 0xF0EDC449AC786DF9, 0x00003FFA //Q14
-ASM_SIZE_DIRECTIVE(double_atan_constants_Q)
-
-
-
-double_atan_constants_P:
-ASM_TYPE_DIRECTIVE(double_atan_constants_P,@object)
- data8 0xB1899EC590CDB8DF, 0x0000BFFA //P10
- data8 0xA1E79850A67D59B0, 0x00003FFA //P11
- data8 0x911D8B30C2A96E6D, 0x0000BFF3 //P20
- data8 0xB87233C68A640706, 0x00003FF0 //P21
- data8 0xD78E4B82F3C29D7A, 0x0000BFFA //P8
-
- data8 0xC2EBE37AF932C14F, 0x00003FFA //P9
- data8 0xBA2E8B94AA104DD6, 0x0000BFFB //P4
- data8 0x9D89D7A640B71D38, 0x00003FFB //P5
- data8 0x88887CA2CE9B2A40, 0x0000BFFB //P6
- data8 0xF0F017D57A919C1E, 0x00003FFA //P7
-
- data8 0xD0D635F230C80E06, 0x0000BFF8 //P16
- data8 0xE847BECA7209B479, 0x00003FF7 //P17
- data8 0xD14C6A2AAE0D5B07, 0x0000BFF6 //P18
- data8 0x915F612A5C469117, 0x00003FF5 //P19
- data8 0x921EDE5FD0DBBBE2, 0x0000BFFA //P12
-
- data8 0xFFD303C2C8535445, 0x00003FF9 //P13
- data8 0xD30DF50E295386F7, 0x0000BFF9 //P14
- data8 0x9E81F2B1BBD210A8, 0x00003FF9 //P15
- data8 0xAAAAAAAAAAAAA800, 0x0000BFFD //P0
- data8 0xCCCCCCCCCCC7D476, 0x00003FFC //P1
-
- data8 0x9249249247838066, 0x0000BFFC //P2
- data8 0xE38E38E302290D68, 0x00003FFB //P3
- data8 0xDF7F0A816F7E5025, 0x0000BFEC //P22
-ASM_SIZE_DIRECTIVE(double_atan_constants_P)
-
-
-.align 32
-.global atan#
-
-////////////////////////////////////////////////////////
-
+LOCAL_OBJECT_START(atan2_tb1)
+data8 0xA21922DC45605EA1 , 0x00003FFA // P11
+data8 0xB199DD6D2675C40F , 0x0000BFFA // P10
+data8 0xC2F01E5DDD100DBE , 0x00003FFA // P9
+data8 0xD78F28FC2A592781 , 0x0000BFFA // P8
+data8 0xF0F03ADB3FC930D3 , 0x00003FFA // P7
+data8 0x88887EBB209E3543 , 0x0000BFFB // P6
+data8 0x9D89D7D55C3287A5 , 0x00003FFB // P5
+data8 0xBA2E8B9793955C77 , 0x0000BFFB // P4
+data8 0xE38E38E320A8A098 , 0x00003FFB // P3
+data8 0x9249249247E37913 , 0x0000BFFC // P2
+data8 0xCCCCCCCCCCC906CD , 0x00003FFC // P1
+data8 0xAAAAAAAAAAAAA8A9 , 0x0000BFFD // P0
+data8 0x0000000000000000 , 0x00000000 // pad to avoid bank conflict
+LOCAL_OBJECT_END(atan2_tb1)
+
+LOCAL_OBJECT_START(atan2_tb2)
+data8 0xCE585A259BD8374C , 0x00003FF0 // P21
+data8 0x9F90FB984D8E39D0 , 0x0000BFF3 // P20
+data8 0x9D3436AABE218776 , 0x00003FF5 // P19
+data8 0xDEC343E068A6D2A8 , 0x0000BFF6 // P18
+data8 0xF396268151CFB11C , 0x00003FF7 // P17
+data8 0xD818B4BB43D84BF2 , 0x0000BFF8 // P16
+data8 0xA2270D30A90AA220 , 0x00003FF9 // P15
+data8 0xD5F4F2182E7A8725 , 0x0000BFF9 // P14
+data8 0x80D601879218B53A , 0x00003FFA // P13
+data8 0x9297B23CCFFB291F , 0x0000BFFA // P12
+data8 0xFE7E52D2A89995B3 , 0x0000BFEC // P22
+data8 0xC90FDAA22168C235 , 0x00003FFF // pi/2
+LOCAL_OBJECT_END(atan2_tb2)
-.section .text
-.proc atan#
-.align 32
-atan:
-
-{ .mmf
-(p0) addl atan_GR_AD_P = @ltoff(double_atan_constants_P), gp
-(p0) addl atan_GR_AD_Q = @ltoff(double_atan_constants_Q), gp
-(p0) fmerge.s atan_ABS_f8 = f0,f8
-}
-;;
-{ .mmf
- ld8 atan_GR_AD_P = [atan_GR_AD_P]
- ld8 atan_GR_AD_Q = [atan_GR_AD_Q]
-(p0) frcpa.s1 atan_C,p8 = f1,f8
-}
-;;
+.section .text
+GLOBAL_LIBM_ENTRY(atan)
-{ .mmf
-(p0) addl atan_GR_AD_R = @ltoff(double_atan_constants_R), gp
-(p0) addl atan_GR_exp_mask = 0x1ffff, r0
-(p0) fma.s1 atan_Y = f8,f8,f0
+{ .mfi
+ nop.m 999
+ frcpa.s1 atan2_u1_Y,p7 = f1,atan2_Y
+ nop.i 999
}
+{ .mfi
+ addl EXP_AD_P1 = @ltoff(atan2_tb1), gp
+ fma.s1 atan2_two = f1,f1,f1
+ nop.i 999
;;
-
-// This fnorm takes faults or sets fault flags
-{ .mmf
-(p0) mov atan_GR_10172 = 0x10172
- ld8 atan_GR_AD_R = [atan_GR_AD_R]
-(p0) fnorm atan_NORM_f8 = f8
}
-;;
-
-
-// qnan snan inf norm unorm 0 -+
-// 1 1 0 0 0 1 11
-// c 7
-
-// p9 set if we have a NAN or +-0
-{ .mmf
-(p0) ldfe atan_Q8 = [atan_GR_AD_Q],16
-(p0) ldfe atan_P10 = [atan_GR_AD_P],16
-(p0) fclass.m.unc p9, p0 = f8, 0xc7
+{ .mfi
+ ld8 EXP_AD_P1 = [EXP_AD_P1]
+ frcpa.s1 atan2_u1_X,p6 = f1,atan2_X
+ nop.i 999
}
-;;
-
-
-{ .mmi
-(p0) ldfe atan_Q9 = [atan_GR_AD_Q],16
-(p0) ldfe atan_P11 = [atan_GR_AD_P],16
- nop.i 999
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_ysq = atan2_Y,atan2_Y,f0
+ nop.i 999
}
;;
-
-{ .mmf
-(p0) ldfe atan_Q4 = [atan_GR_AD_Q],16
-(p0) ldfe atan_P20 = [atan_GR_AD_P],16
-(p9) fma.d.s0 f8 = f8,f1,f0
-;;
-}
-
-// Exit if we have a NAN or +-0
-{ .mmb
-(p0) ldfe atan_Q5 = [atan_GR_AD_Q],16
-(p0) ldfe atan_P21 = [atan_GR_AD_P],16
-(p9) br.ret.spnt b0
-;;
+{ .mfi
+ add EXP_AD_P2 = 0xd0,EXP_AD_P1
+ fmerge.s atan2_sgnY = atan2_Y,f1
+ nop.i 999
}
-
-
-// p6 is TRUE if |x| <= 1
-// p7 is TRUE if |x| > 1
-{ .mmf
-(p0) ldfe atan_Q6 = [atan_GR_AD_Q],16
-(p0) ldfe atan_P8 = [atan_GR_AD_P],16
-(p0) fcmp.le.unc p6,p7 = atan_ABS_f8, f1
;;
-}
{ .mfi
-(p0) ldfe atan_Q7 = [atan_GR_AD_Q],16
-(p0) fma.s1 atan_Z = atan_C, atan_C, f0
- nop.i 999
+ ldfe atan2_P11 = [EXP_AD_P1],16
+ fclass.m p10,p0 = atan2_Y, 0xc3 // Test for y=nan
+ nop.i 999
}
{ .mfi
-(p0) ldfe atan_P9 = [atan_GR_AD_P],16
-(p0) fnma.s1 atan_B = atan_C,f8, f1
- nop.i 999 ;;
+ ldfe atan2_P21 = [EXP_AD_P2],16
+ nop.f 999
+ nop.i 999
+;;
}
{ .mfi
-(p0) ldfe atan_Q21 = [atan_GR_AD_Q],16
-(p0) fma.s1 atan_V12 = atan_Y, atan_Y, f0
- nop.i 999
+ ldfe atan2_P10 = [EXP_AD_P1],16
+ fnma.s1 atan2_B1Y = atan2_u1_Y, atan2_Y, atan2_two
+ nop.i 999
}
{ .mfi
-(p0) ldfe atan_P4 = [atan_GR_AD_P],16
-(p0) fma.s1 atan_Xcub = f8, atan_Y , f0
- nop.i 999
-;;
-}
-
-
-{ .mmi
-(p7) ldfe atan_Q22 = [atan_GR_AD_Q],16
-(p6) ldfe atan_P5 = [atan_GR_AD_P],16
-(p6) cmp.eq.unc p8,p0 = r0,r0
-;;
-}
-
-
-{ .mmi
-(p7) ldfe atan_Q19 = [atan_GR_AD_Q],16
-(p6) ldfe atan_P6 = [atan_GR_AD_P],16
-(p7) cmp.eq.unc p9,p0 = r0,r0
-;;
-}
-
-
-{ .mmi
-(p7) ldfe atan_Q20 = [atan_GR_AD_Q],16
-(p6) ldfe atan_P7 = [atan_GR_AD_P],16
- nop.i 999
+ ldfe atan2_P20 = [EXP_AD_P2],16
+ fma.s1 atan2_wp_Y = atan2_u1_Y, atan2_u1_Y, f0
+ nop.i 999
;;
}
{ .mfi
-(p7) ldfe atan_Q0 = [atan_GR_AD_Q],16
-(p6) fma.s1 atan_V13 = atan_Y, atan_P11, atan_P10
- nop.i 999
+ ldfe atan2_P9 = [EXP_AD_P1],16
+ fma.s1 atan2_z1_X = atan2_u1_X, atan2_Y, f0
+ nop.i 999
}
{ .mfi
-(p6) ldfe atan_P16 = [atan_GR_AD_P],16
-(p7) fma.s1 atan_V11 = atan_Y, atan_Q9, atan_Q8
- nop.i 999 ;;
+ ldfe atan2_P19 = [EXP_AD_P2],16
+ fnma.s1 atan2_B1X = atan2_u1_X, atan2_X, atan2_two
+ nop.i 999
}
-
+;;
{ .mfi
-(p7) ldfe atan_Q1 = [atan_GR_AD_Q],16
-(p7) fma.s1 atan_G12 = atan_B, atan_B, f0
- nop.i 999
+ ldfe atan2_P8 = [EXP_AD_P1],16
+ fma.s1 atan2_z2_X = atan2_u1_X, atan2_ysq, f0
+ nop.i 999
}
-{ .mfi
-(p6) ldfe atan_P17 = [atan_GR_AD_P],16
-(p0) fma.s1 atan_V9 = atan_V12, atan_V12, f0
- nop.i 999 ;;
+{ .mfb
+ ldfe atan2_P18 = [EXP_AD_P2],16
+(p10) fma.d.s0 f8 = atan2_Y,atan2_X,f0 // If y=nan, result quietized y
+(p10) br.ret.spnt b0 // Exit if y=nan
}
+;;
-
+// p6 true if swap, means |y| > 1.0 or ysq > 1.0
+// p7 true if no swap, means 1.0 >= |y| or 1.0 >= ysq
{ .mfi
-(p7) ldfe atan_Q2 = [atan_GR_AD_Q],16
-(p6) fma.s1 atan_W11 = atan_Y, atan_P21, atan_P20
- nop.i 999
+ ldfe atan2_P7 = [EXP_AD_P1],16
+ fcmp.ge.s1 p7,p6 = f1, atan2_ysq
+ nop.i 999
}
-{ .mfi
-(p6) ldfe atan_P18 = [atan_GR_AD_P],16
-(p7) fma.s1 atan_V7 = atan_Y, atan_Q5, atan_Q4
- nop.i 999 ;;
+{ .mmf
+ ldfe atan2_P17 = [EXP_AD_P2],16
+ nop.m 999
+ nop.f 999
}
+;;
{ .mfi
-(p7) ldfe atan_Q3 = [atan_GR_AD_Q],16
-(p7) fma.s1 atan_Z1 = atan_Z, atan_Z, f0
- nop.i 999
+ ldfe atan2_P6 = [EXP_AD_P1],16
+ fma.s1 atan2_E = atan2_u1_Y, atan2_B1Y, atan2_Y
+ nop.i 999
}
{ .mfi
-(p6) ldfe atan_P19 = [atan_GR_AD_P],16
-(p7) fma.s1 atan_Y3 = atan_Y , atan_V12, f0
- nop.i 999 ;;
+ ldfe atan2_P16 = [EXP_AD_P2],16
+ fma.s1 atan2_B1sq_Y = atan2_B1Y, atan2_B1Y, f0
+ nop.i 999
+;;
}
{ .mfi
-(p7) ldfe atan_R8 = [atan_GR_AD_R],16
-(p6) fma.s1 atan_V11 = atan_Y, atan_P9, atan_P8
- nop.i 999
+ ldfe atan2_P5 = [EXP_AD_P1],16
+(p7) fma.s1 atan2_wp_X = atan2_z1_X, atan2_z1_X, f0
+ nop.i 999
}
{ .mfi
-(p6) ldfe atan_P12 = [atan_GR_AD_P],16
-(p7) fma.s1 atan_V8 = atan_Y, atan_Q7, atan_Q6
- nop.i 999 ;;
-}
-
-{ .mmi
-(p7) ldfe atan_R9 = [atan_GR_AD_R],16
-(p6) ldfe atan_P13 = [atan_GR_AD_P],16
- nop.i 999
+ ldfe atan2_P15 = [EXP_AD_P2],16
+(p7) fma.s1 atan2_B1sq_X = atan2_B1X, atan2_B1X, f0
+ nop.i 999
;;
}
{ .mfi
-(p7) ldfe atan_R4 = [atan_GR_AD_R],16
-(p6) fma.s1 atan_V7 = atan_Y, atan_P5, atan_P4
- nop.i 999
+ ldfe atan2_P4 = [EXP_AD_P1],16
+(p6) fma.s1 atan2_z = atan2_u1_Y, atan2_B1Y, f0
+ nop.i 999
}
{ .mfi
-(p6) ldfe atan_P14 = [atan_GR_AD_P],16
-(p7) fma.s1 atan_W13 = atan_Y, atan_Q22, atan_Q21
- nop.i 999 ;;
+ ldfe atan2_P14 = [EXP_AD_P2],16
+(p7) fma.s1 atan2_E = atan2_z2_X, atan2_B1X, atan2_X
+ nop.i 999
+;;
}
{ .mfi
-(p7) ldfe atan_R5 = [atan_GR_AD_R],16
-(p6) fma.s1 atan_Y12 = atan_V9 , atan_V9 , f0
- nop.i 999
+ ldfe atan2_P3 = [EXP_AD_P1],16
+ fcmp.eq.s0 p14,p15=atan2_X,atan2_Y // Dummy for denorm and invalid
+ nop.i 999
}
-{ .mfi
-(p6) ldfe atan_P15 = [atan_GR_AD_P],16
-(p7) fma.s1 atan_Y8 = atan_V9 , atan_V9 , f0
- nop.i 999 ;;
+{ .mmf
+ ldfe atan2_P13 = [EXP_AD_P2],16
+ nop.m 999
+(p7) fma.s1 atan2_z = atan2_z1_X, atan2_B1X, f0
+;;
}
-
{ .mfi
-(p7) ldfe atan_R6 = [atan_GR_AD_R],16
-(p6) fma.s1 atan_V8 = atan_Y, atan_P7, atan_P6
- nop.i 999
+ ldfe atan2_P2 = [EXP_AD_P1],16
+(p6) fma.s1 atan2_w = atan2_wp_Y, atan2_B1sq_Y,f0
+ nop.i 999
}
-{ .mfi
-(p6) ldfe atan_P0 = [atan_GR_AD_P],16
-(p7) fma.s1 atan_W11 = atan_Y, atan_Q20, atan_Q19
- nop.i 999 ;;
+{ .mlx
+ ldfe atan2_P12 = [EXP_AD_P2],16
+ movl rsig_near_one = 0x8000000000000001 // signif near 1.0
+;;
}
-
{ .mfi
-(p7) ldfe atan_R7 = [atan_GR_AD_R],16
-(p7) fma.s1 atan_Z2 = atan_Z1 , atan_Z1, f0
- nop.i 999
+ ldfe atan2_P1 = [EXP_AD_P1],16
+ fclass.m p9,p0 = atan2_Y, 0x23 // test if y inf
+ nop.i 999
}
{ .mfi
-(p6) ldfe atan_P1 = [atan_GR_AD_P],16
-(p6) fma.s1 atan_V10 = atan_V12, atan_V13, atan_V11
- nop.i 999 ;;
+ ldfe atan2_P22 = [EXP_AD_P2],16
+(p7) fma.s1 atan2_w = atan2_wp_X, atan2_B1sq_X,f0
+ nop.i 999
+;;
}
{ .mfi
-(p7) ldfe atan_Q15 = [atan_GR_AD_Q],16
-(p6) fma.s1 atan_W7 = atan_Y, atan_P17, atan_P16
- nop.i 999
+ ldfe atan2_P0 = [EXP_AD_P1],16
+ frcpa.s1 atan2_F,p0 = f1, atan2_E
+ nop.i 999
}
{ .mfi
-(p6) ldfe atan_P2 = [atan_GR_AD_P],16
-(p7) fma.s1 atan_V3 = atan_Y, atan_Q1 , atan_Q0
- nop.i 999 ;;
+ ldfe atan2_pi_by_2 = [EXP_AD_P2],16
+(p6) fnma.s1 atan2_gV = atan2_Y, atan2_z, atan2_X
+ nop.i 999
+;;
}
{ .mfi
-(p7) ldfe atan_Q16 = [atan_GR_AD_Q],16
-(p7) fma.s1 atan_G9 = atan_G12, atan_G12, f0
- nop.i 999
+ setf.sig atan2_sig_near_one = rsig_near_one
+(p7) fnma.s1 atan2_gV = atan2_X, atan2_z, atan2_Y
+ nop.i 999
}
-{ .mfi
-(p6) ldfe atan_P3 = [atan_GR_AD_P],16
-(p7) fma.s1 atan_V6 = atan_V12, atan_V8, atan_V7
- nop.i 999 ;;
+{ .mfb
+ nop.m 999
+(p9) fma.d.s0 f8 = atan2_sgnY, atan2_pi_by_2, f0 // +-pi/2 if y inf
+(p9) br.ret.spnt b0 // exit if y inf, result is +-pi/2
+;;
}
-
{ .mfi
-(p7) ldfe atan_R1 = [atan_GR_AD_R],16
-(p6) fma.s1 atan_W8 = atan_Y, atan_P19, atan_P18
- nop.i 999
+ nop.m 999
+ fma.s1 atan2_V13 = atan2_w, atan2_P11, atan2_P10
+ nop.i 999
}
{ .mfi
-(p6) ldfe atan_P22 = [atan_GR_AD_P],16
-(p7) fma.s1 atan_V4 = atan_Y, atan_Q3 , atan_Q2
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 atan2_W11 = atan2_w, atan2_P21, atan2_P20
+ nop.i 999
+;;
}
-
{ .mfi
- getf.exp atan_GR_signexp_f8 = atan_NORM_f8
-(p7) fma.s1 atan_Y11 = atan_Y3 , atan_Y8 , f0
- nop.i 999
+ nop.m 999
+ fma.s1 atan2_V11 = atan2_w, atan2_P9, atan2_P8
+ nop.i 999
}
{ .mfi
-(p7) ldfe atan_Q17 = [atan_GR_AD_Q],16
-(p6) fma.s1 atan_V6 = atan_V12, atan_V8, atan_V7
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 atan2_V12 = atan2_w, atan2_w, f0
+ nop.i 999
+;;
}
-
{ .mfi
-(p7) ldfe atan_Q18 = [atan_GR_AD_Q],16
-(p6) fma.s1 atan_W3 = atan_Y, atan_P13, atan_P12
- nop.i 999
+ nop.m 999
+ fma.s1 atan2_V8 = atan2_w, atan2_P7 , atan2_P6
+ nop.i 999
}
{ .mfi
-(p7) ldfe atan_R10 = [atan_GR_AD_R],16
-(p7) fma.s1 atan_G11 = atan_B, atan_R9 , atan_R8
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 atan2_W8 = atan2_w, atan2_P19, atan2_P18
+ nop.i 999
+;;
}
-
{ .mfi
-(p7) ldfe atan_Q10 = [atan_GR_AD_Q],16
-(p7) fma.s1 atan_Z3 = atan_Z1 , atan_Z2 , f0
-(p0) and atan_GR_exp_f8 = atan_GR_signexp_f8,atan_GR_exp_mask
+ nop.m 999
+ fnma.s1 atan2_alpha = atan2_E, atan2_F, f1
+ nop.i 999
}
{ .mfi
-(p7) ldfe atan_R2 = [atan_GR_AD_R],16
-(p7) fma.s1 atan_Z4 = atan_Z2 , atan_Z2 , f0
- nop.i 999 ;;
+ nop.m 999
+ fnma.s1 atan2_alpha_1 = atan2_E, atan2_F, atan2_two
+ nop.i 999
+;;
}
{ .mfi
-(p7) ldfe atan_Q11 = [atan_GR_AD_Q],16
-(p6) fma.s1 atan_W4 = atan_Y, atan_P15, atan_P14
- nop.i 999
+ nop.m 999
+ fma.s1 atan2_V7 = atan2_w, atan2_P5 , atan2_P4
+ nop.i 999
}
{ .mfi
-(p7) ldfe atan_R3 = [atan_GR_AD_R],16
-(p7) fma.s1 atan_G7 = atan_B, atan_R5 , atan_R4
-(p0) cmp.le.unc p11,p0 = atan_GR_10172,atan_GR_exp_f8
-;;
-}
-
-
-{ .mmf
-(p9) ldfe atan_Q12 = [atan_GR_AD_Q],16
-(p0) ldfe atan_S_PI = [atan_GR_AD_R],16
-(p8) fma.s1 atan_W6 = atan_V12, atan_W8, atan_W7
+ nop.m 999
+ fma.s1 atan2_W7 = atan2_w, atan2_P17, atan2_P16
+ nop.i 999
;;
}
-
-
{ .mfi
-(p9) ldfe atan_Q13 = [atan_GR_AD_Q],16
-(p8) fma.s1 atan_V3 = atan_Y, atan_P1 , atan_P0
-(p11) cmp.ne.and p6,p7 = r0,r0
+ nop.m 999
+ fma.s1 atan2_V4 = atan2_w, atan2_P3 , atan2_P2
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p8) fma.s1 atan_V5 = atan_V9 , atan_V10, atan_V6
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 atan2_W4 = atan2_w, atan2_P15, atan2_P14
+ nop.i 999
+;;
}
-
-.pred.rel "mutex",p6,p7,p11
{ .mfi
-(p7) ldfe atan_Q14 = [atan_GR_AD_Q],16
-(p6) fma.s1 atan_Y12 = atan_V9 , atan_Y12, f0
- nop.i 999
+ nop.m 999
+ fma.s1 atan2_V3 = atan2_w, atan2_P1 , atan2_P0
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p7) fma.s1 atan_G8 = atan_B, atan_R7 , atan_R6
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 atan2_W3 = atan2_w, atan2_P13, atan2_P12
+ nop.i 999
+;;
}
-
{ .mfi
- nop.m 999
-(p6) fma.s1 atan_V4 = atan_Y, atan_P3 , atan_P2
- nop.i 999
+ nop.m 999
+ fma.s1 atan2_V10 = atan2_V12, atan2_V13, atan2_V11
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p7) fma.s1 atan_W7 = atan_Y, atan_Q16, atan_Q15
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 atan2_gVF = atan2_gV, atan2_F, f0
+ nop.i 999
+;;
}
-
{ .mfi
- nop.m 999
-(p6) fma.s1 atan_W10 = atan_V12, atan_P22, atan_W11
- nop.i 999
+ nop.m 999
+ fma.s1 atan2_alpha_sq = atan2_alpha, atan2_alpha, f0
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p7) fma.s1 atan_G3 = atan_B, atan_R1 , f1
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 atan2_Cp = atan2_alpha, atan2_alpha_1, f1
+ nop.i 999
+;;
}
-
{ .mfi
- nop.m 999
-(p6) fma.s1 atan_W2 = atan_V12, atan_W4 , atan_W3
- nop.i 999
+ nop.m 999
+ fma.s1 atan2_V9 = atan2_V12, atan2_V12, f0
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p7) fma.s1 atan_V2 = atan_V12, atan_V4 , atan_V3
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 atan2_W10 = atan2_V12, atan2_P22 , atan2_W11
+ nop.i 999
+;;
}
{ .mfi
- nop.m 999
-(p7) fma.s1 atan_W8 = atan_Y, atan_Q18, atan_Q17
- nop.i 999
+ nop.m 999
+ fma.s1 atan2_V6 = atan2_V12, atan2_V8 , atan2_V7
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p7) fma.s1 atan_G10 = atan_G12, atan_R10, atan_G11
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 atan2_W6 = atan2_V12, atan2_W8 , atan2_W7
+ nop.i 999
+;;
}
{ .mfi
- nop.m 999
-(p7) fma.s1 atan_V10 = atan_V12, atan_Q10, atan_V11
- nop.i 999
+ nop.m 999
+ fma.s1 atan2_V2 = atan2_V12, atan2_V4 , atan2_V3
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p7) fma.s1 atan_G6 = atan_G12, atan_G8 , atan_G7
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 atan2_W2 = atan2_V12, atan2_W4 , atan2_W3
+ nop.i 999
+;;
}
-
{ .mfi
- nop.m 999
-(p6) fma.s1 atan_V2 = atan_V12, atan_V4, atan_V3
- nop.i 999
+ nop.m 999
+ fma.s1 atan2_alpha_cub = atan2_alpha, atan2_alpha_sq, f0
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p7) fma.s1 atan_G4 = atan_B , atan_R3 , atan_R2
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 atan2_C = atan2_gVF, atan2_Cp, f0
+ nop.i 999
+;;
}
-
{ .mfi
- nop.m 999
-(p6) fma.s1 atan_W5 = atan_V9 , atan_W10, atan_W6
- nop.i 999
-}
-{ .mfi
- nop.m 999
-(p7) fma.s1 atan_W3 = atan_Y , atan_Q12, atan_Q11
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 atan2_W12 = atan2_V9, atan2_V9, f0
+ nop.i 999
+;;
}
-
{ .mfi
- nop.m 999
-(p7) fma.s1 atan_Z5 = atan_Z3 , atan_Z4 , f0
- nop.i 999
+ nop.m 999
+ fma.s1 atan2_V5 = atan2_V9, atan2_V10, atan2_V6
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p7) fma.s1 atan_W10 = atan_V12, atan_W13, atan_W11
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 atan2_W5 = atan2_V9, atan2_W10, atan2_W6
+ nop.i 999
+;;
}
-
{ .mfi
- nop.m 999
-(p7) fma.s1 atan_W4 = atan_Y , atan_Q14, atan_Q13
- nop.i 999
+ nop.m 999
+ fclass.m p8,p0 = atan2_Y, 0x07 // Test for y=0
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p7) fma.s1 atan_W6 = atan_V12, atan_W8, atan_W7
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 atan2_d = atan2_alpha_cub, atan2_C, atan2_C
+ nop.i 999
}
+;;
{ .mfi
- nop.m 999
-(p7) fma.s1 atan_V5 = atan_V9 , atan_V10, atan_V6
- nop.i 999
+ nop.m 999
+ fma.s1 atan2_W12 = atan2_V9, atan2_W12, f0
+ nop.i 999
}
-{ .mfi
- nop.m 999
-(p7) fma.s1 atan_G5 = atan_G9 , atan_G10, atan_G6
- nop.i 999 ;;
-}
-
+;;
{ .mfi
- nop.m 999
-(p6) fma.s1 atan_V1 = atan_V9 , atan_V5 , atan_V2
- nop.i 999
+ nop.m 999
+ fma.s1 atan2_V1 = atan2_V9, atan2_V5, atan2_V2
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p7) fma.s1 atan_G2 = atan_G12, atan_G4 , atan_G3
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 atan2_W1 = atan2_V9, atan2_W5, atan2_W2
+ nop.i 999
+;;
}
-
-{ .mfi
- nop.m 999
-(p6) fma.s1 atan_W1 = atan_V9 , atan_W5 , atan_W2
- nop.i 999
-}
{ .mfi
- nop.m 999
-(p7) fma.s1 atan_Z6 = atan_Z4 , atan_C , f0
- nop.i 999 ;;
+ nop.m 999
+(p8) fmerge.s f8 = atan2_sgnY, f0 // +-0 if y=0
+ nop.i 999
}
-
-{ .mfi
- nop.m 999
-(p0) fmerge.s atan_S_PI = f8, atan_S_PI
- nop.i 999 ;;
+{ .mfb
+ nop.m 999
+ fma.s1 atan2_zcub = atan2_z, atan2_w, f0
+(p8) br.ret.spnt b0 // Exit if y=0
+;;
}
-
{ .mfi
- nop.m 999
-(p7) fma.s1 atan_W5 = atan_V9 , atan_W10, atan_W6
- nop.i 999
+ nop.m 999
+ fma.s1 atan2_pd = atan2_P0, atan2_d, f0
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p7) fma.s1 atan_W2 = atan_V12, atan_W4 , atan_W3
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 atan2_dsq = atan2_d, atan2_d, f0
+ nop.i 999
+;;
}
{ .mfi
- nop.m 999
-(p7) fma.s1 atan_G1 = atan_G9 , atan_G5 , atan_G2
- nop.i 999
+ nop.m 999
+ fmerge.se atan2_near_one = f1, atan2_sig_near_one // Const ~1.0
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p7) fma.s1 atan_V1 = atan_V9 , atan_V5 , atan_V2
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 atan2_Pp = atan2_W12, atan2_W1, atan2_V1
+ nop.i 999
+;;
}
-
{ .mfi
- nop.m 999
-(p6) fma.s1 atan_P = atan_Y12, atan_W1 , atan_V1
- nop.i 999
+ nop.m 999
+ fma.s1 atan2_sgn_pi_by_2 = atan2_pi_by_2, atan2_sgnY, f0
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p7) fma.s1 atan_Z7 = atan_Z5 , atan_Z6 , f0
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 atan2_A_lo = atan2_pd, atan2_dsq, atan2_d
+ nop.i 999
+;;
}
{ .mfi
- nop.m 999
-(p7) fma.s1 atan_W1 = atan_V9 , atan_W5 , atan_W2
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 atan2_A_hi = atan2_zcub, atan2_Pp, atan2_z
+ nop.i 999
+;;
}
{ .mfi
- nop.m 999
-(p11) fma.d.s0 f8 = atan_S_PI,f1,f0
- nop.i 999
+ nop.m 999
+(p6) fma.s1 atan2_A = atan2_A_hi, f1, atan2_A_lo
+ nop.i 999
}
+// For |Y| <= |X| and X > 0, result is A_hi + A_lo
{ .mfi
- nop.m 999
-(p7) fma.s1 atan_Z = atan_G1 , atan_Z7 , f0
- nop.i 999 ;;
-}
-
-
-{ .mfi
- nop.m 999
-(p7) fma.s1 atan_Q = atan_Y11, atan_W1 , atan_V1
- nop.i 999 ;;
+ nop.m 999
+(p7) fma.d.s0 f8 = atan2_A_hi, f1, atan2_A_lo
+ nop.i 999
+;;
}
-
-{ .mfi
- nop.m 999
-(p6) fma.d.s0 f8 = atan_P , atan_Xcub , f8
- nop.i 999
-}
+// For |Y| > |X|, result is +- pi/2 - (A_hi + A_lo)
+// We perturb A by multiplying by 1.0+1ulp as we produce the result
+// in order to get symmetrically rounded results in directed rounding modes.
+// If we don't do this, there are a few cases where the trailing 11 bits of
+// the significand of the result, before converting to double, are zero. These
+// cases do not round symmetrically in round to +infinity or round to -infinity.
{ .mfb
- nop.m 999
-(p7) fnma.d.s0 f8 = atan_Z , atan_Q , atan_S_PI
-(p0) br.ret.sptk b0 ;;
+ nop.m 999
+(p6) fnma.d.s0 f8 = atan2_A, atan2_near_one, atan2_sgn_pi_by_2
+ br.ret.sptk b0
+;;
}
-.endp atan
-ASM_SIZE_DIRECTIVE(atan)
+GLOBAL_LIBM_END(atan)
diff --git a/sysdeps/ia64/fpu/s_atanf.S b/sysdeps/ia64/fpu/s_atanf.S
index b0a68737aa..fb7f4a307e 100644
--- a/sysdeps/ia64/fpu/s_atanf.S
+++ b/sysdeps/ia64/fpu/s_atanf.S
@@ -1,12 +1,10 @@
.file "atanf.s"
-// THIS IS NOT OPTIMIZED AND NOT OFFICIAL
-// Copyright (C) 2000, 2001, Intel Corporation
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -22,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -37,16 +35,18 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
// History
//==============================================================
-// ?/??/00 Initial revision
-// 8/17/00 Changed predicate register macro-usage to direct predicate
+// 02/20/00 Initial version
+// 08/17/00 Changed predicate register macro-usage to direct predicate
// names due to an assembler bug.
-
-#include "libm_support.h"
+// 02/06/02 Corrected .section statement
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/06/03 Reordered header: .section, .global, .proc, .align;
+// added missing bundling
//
// Assembly macros
@@ -140,16 +140,11 @@ atanf_answer = f8
//atanf_pred_GT1 = p7
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
+RODATA
.align 16
-atanf_coeff_1_table:
-ASM_TYPE_DIRECTIVE(atanf_coeff_1_table,@object)
+LOCAL_OBJECT_START(atanf_coeff_1_table)
data8 0x40c4c241be751ff2 // r4
data8 0x40e9f300c2f3070b // r5
data8 0x409babffef772075 // r3
@@ -164,12 +159,11 @@ data8 0xbfc2473c5145ee38 // p3
data8 0x3fbc4f512b1865f5 // p4
data8 0x3fc9997e7afbff4e // p2
data8 0x3ff921fb54442d18 // pi/2
-ASM_SIZE_DIRECTIVE(atanf_coeff_1_table)
+LOCAL_OBJECT_END(atanf_coeff_1_table)
-atanf_coeff_2_table:
-ASM_TYPE_DIRECTIVE(atanf_coeff_2_table,@object)
+LOCAL_OBJECT_START(atanf_coeff_2_table)
data8 0x4035000000004284 // r1
data8 0x406cdffff336a59b // r2
data8 0x3fbc4f512b1865f5 // p4 = q6
@@ -182,18 +176,12 @@ data8 0xbfa6e10ba401393f // p7
data8 0x3f97105b4160f86b // p8
data8 0xbf7deaadaa336451 // p9
data8 0x3f522e5d33bc9baa // p10
-ASM_SIZE_DIRECTIVE(atanf_coeff_2_table)
-
-
+LOCAL_OBJECT_END(atanf_coeff_2_table)
-.global atanf
-.text
-.proc atanf
-
-.align 32
-atanf:
+.section .text
+GLOBAL_LIBM_ENTRY(atanf)
{ .mfi
alloc r32 = ar.pfs,1,2,0,0
@@ -325,7 +313,7 @@ atanf:
{ .mfb
nop.m 999
fma.s1 atanf_x5 = atanf_t,atanf_xcub,f0
-(p8) br.cond.spnt L(ATANF_X_INF_NAN_ZERO)
+(p8) br.cond.spnt ATANF_X_INF_NAN_ZERO
}
;;
@@ -487,7 +475,7 @@ atanf:
{ .mfi
nop.m 999
- fma atanf_sgnx_piby2 = atanf_sgn_x,atanf_piby2,f0
+ fma.s0 atanf_sgnx_piby2 = atanf_sgn_x,atanf_piby2,f0
nop.i 999
}
{ .mfi
@@ -530,27 +518,38 @@ atanf:
{ .mfi
nop.m 999
//(atanf_pred_GT1) fnma.s atanf_answer = atanf_poly_q,atanf_z21_poly_r,atanf_sgnx_piby2
-(p7) fnma.s atanf_answer = atanf_poly_q,atanf_z21_poly_r,atanf_sgnx_piby2
+(p7) fnma.s.s0 atanf_answer = atanf_poly_q,atanf_z21_poly_r,atanf_sgnx_piby2
nop.i 999;;
}
{ .mfb
nop.m 999
//(atanf_pred_LE1) fma.s atanf_answer = atanf_x11,atanf_poly_p1,atanf_poly_p4
-(p6) fma.s atanf_answer = atanf_x11,atanf_poly_p1,atanf_poly_p4
+(p6) fma.s.s0 atanf_answer = atanf_x11,atanf_poly_p1,atanf_poly_p4
br.ret.sptk b0
}
-L(ATANF_X_INF_NAN_ZERO):
+ATANF_X_INF_NAN_ZERO:
- fclass.m p8,p9 = f8,0x23 // @inf
+{ .mfi
+ nop.m 0
+ fclass.m p8,p9 = f8,0x23 // @inf
+ nop.i 0
+}
;;
+{ .mfi
+ nop.m 0
(p8) fmerge.s f8 = f8, atanf_piby2
+ nop.i 0
+}
;;
- fnorm.s f8 = f8
+{ .mfb
+ nop.m 0
+ fnorm.s.s0 f8 = f8
br.ret.sptk b0
+}
+;;
-.endp atanf
-ASM_SIZE_DIRECTIVE(atanf)
+GLOBAL_LIBM_END(atanf)
diff --git a/sysdeps/ia64/fpu/s_atanl.S b/sysdeps/ia64/fpu/s_atanl.S
index 28d44c1850..bfd9f458f4 100644
--- a/sysdeps/ia64/fpu/s_atanl.S
+++ b/sysdeps/ia64/fpu/s_atanl.S
@@ -1,10 +1,10 @@
.file "atanl.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,41 +35,52 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
//
-// *********************************************************************
+//*********************************************************************
//
// History
-// 2/02/00 (hand-optimized)
-// 4/04/00 Unwind support added
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 02/02/00 (hand-optimized)
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
+// 03/13/01 Fixed flags when denormal raised on intermediate result
+// 01/08/02 Improved speed.
+// 02/06/02 Corrected .section statement
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align;
+// used data8 for long double table values
//
-// *********************************************************************
+//*********************************************************************
//
// Function: atanl(x) = inverse tangent(x), for double extended x values
-// Function: atan2l(y,x) = atan(y/x), for double extended x values
+// Function: atan2l(y,x) = atan(y/x), for double extended y, x values
+//
+// API
+//
+// long double atanl (long double x)
+// long double atan2l (long double y, long double x)
//
-// *********************************************************************
+//*********************************************************************
//
// Resources Used:
//
// Floating-Point Registers: f8 (Input and Return Value)
-// f9-f15
-// f32-f79
+// f9 (Input for atan2l)
+// f10-f15, f32-f83
//
// General Purpose Registers:
-// r32-r48
-// r49,r50,r51,r52 (Arguments to error support for 0,0 case)
+// r32-r51
+// r49-r52 (Arguments to error support for 0,0 case)
//
// Predicate Registers: p6-p15
//
-// *********************************************************************
+//*********************************************************************
//
// IEEE Special Conditions:
//
-// Denormal fault raised on denormal inputs
+// Denormal fault raised on denormal inputs
// Underflow exceptions may occur
// Special error handling for the y=0 and x=0 case
// Inexact raised when appropriate by algorithm
@@ -92,7 +103,7 @@
// atan2l(+/-Inf, Inf) = +/-pi/4
// atan2l(+/-Inf, -Inf) = +/-3pi/4
//
-// *********************************************************************
+//*********************************************************************
//
// Mathematical Description
// ---------------------------
@@ -108,16 +119,16 @@
//
//
// (Arg_X, Arg_Y) x
-// \
-// \
-// \
-// \
+// \
+// \
+// \
+// \
// \ angle between is ATANL(Arg_Y,Arg_X)
-// \
+// \
// ------------------> X-axis
// Origin
@@ -232,14 +243,14 @@
// z_hi = 2^k * 1.b_1 b_2 b_3 b_4 1
//
// then
-// / \
+// / \
// | (V/U) - z_hi |
// arctan(V/U) = arctan(z_hi) + acrtan| -------------- |
// | 1 + (V/U)*z_hi |
// \ /
//
-// / \
+// / \
// | V - z_hi*U |
// = arctan(z_hi) + acrtan| -------------- |
@@ -295,7 +306,7 @@
// U := max( |Arg_X|, |Arg_Y| )
// V := min( |Arg_X|, |Arg_Y| )
//
-// execute: frcap E, pred, V, U
+// execute: frcpa E, pred, V, U
// If pred is 0, go to Step 5 for special cases handling.
//
// Step 2. Decide on branch.
@@ -399,7 +410,7 @@
//
// z := V * E ...z approximates V/U to roughly working precision
// zsq := z * z
-// z8 := zsq * zsq; z8 := z8 * z8
+// z4 := zsq * zsq; z8 := z4 * z4
//
// poly1 := P_4 + zsq*(P_5 + zsq*(P_6 + zsq*(P_7 + zsq*P_8)))
// poly2 := zsq*(P_1 + zsq*(P_2 + zsq*P_3))
@@ -438,12 +449,11 @@
//
// Step 5. Special Cases
//
-// If pred is 0 where pred is obtained in
-// frcap E, pred, V, U
+// These are detected early in the function by fclass instructions.
//
-// we are in one of those special cases of 0,+-inf or NaN
+// We are in one of those special cases when X or Y is 0,+-inf or NaN
//
-// If one of U and V is NaN, return U+V (which will generate
+// If one of X and Y is NaN, return X+Y (which will generate
// invalid in case one is a signaling NaN). Otherwise,
// return the Result as described in the table
//
@@ -469,8 +479,6 @@
//
//
-#include "libm_support.h"
-
ArgY_orig = f8
Result = f8
FR_RESULT = f8
@@ -504,6 +512,7 @@ Res_hi = f49
Res_lo = f50
Z = f52
zsq = f53
+z4 = f54
z8 = f54
poly1 = f55
poly2 = f56
@@ -521,8 +530,8 @@ P_5 = f67
P_6 = f68
P_7 = f69
P_8 = f70
-TWO_TO_NEG3 = f71
-U_hold = f72
+U_hold = f71
+TWO_TO_NEG3 = f72
C_hi_hold = f73
E_hold = f74
M = f75
@@ -530,6 +539,11 @@ ArgX_abs = f76
ArgY_abs = f77
Result_lo = f78
A_temp = f79
+FR_temp = f80
+Xsq = f81
+Ysq = f82
+tmp_small = f83
+
GR_SAVE_PFS = r33
GR_SAVE_B0 = r34
GR_SAVE_GP = r35
@@ -545,1415 +559,1399 @@ exp_ArgY = r44
exponent_Q = r45
significand_Q = r46
special = r47
-special1 = r48
+sp_exp_Q = r48
+sp_exp_4sig_Q = r49
+table_base = r50
+int_temp = r51
+
GR_Parameter_X = r49
GR_Parameter_Y = r50
GR_Parameter_RESULT = r51
GR_Parameter_TAG = r52
-int_temp = r52
-
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
-.align 64
-
-Constants_atan:
-ASM_TYPE_DIRECTIVE(Constants_atan,@object)
-data4 0x54442D18, 0x3FF921FB, 0x248D3132, 0x3E000000
-// double pi/2, single lo_pi/2, two**(-3)
-data4 0xAAAAAAA3, 0xAAAAAAAA, 0x0000BFFD, 0x00000000 // P_1
-data4 0xCCCC54B2, 0xCCCCCCCC, 0x00003FFC, 0x00000000 // P_2
-data4 0x47E4D0C2, 0x92492492, 0x0000BFFC, 0x00000000 // P_3
-data4 0x58870889, 0xE38E38E0, 0x00003FFB, 0x00000000 // P_4
-data4 0x290149F8, 0xBA2E895B, 0x0000BFFB, 0x00000000 // P_5
-data4 0x250F733D, 0x9D88E6D4, 0x00003FFB, 0x00000000 // P_6
-data4 0xFB8745A0, 0x884E51FF, 0x0000BFFB, 0x00000000 // P_7
-data4 0x394396BD, 0xE1C7412B, 0x00003FFA, 0x00000000 // P_8
-data4 0xAAAAA52F, 0xAAAAAAAA, 0x0000BFFD, 0x00000000 // Q_1
-data4 0xC75B60D3, 0xCCCCCCCC, 0x00003FFC, 0x00000000 // Q_2
-data4 0x011F1940, 0x924923AD, 0x0000BFFC, 0x00000000 // Q_3
-data4 0x2A5F89BD, 0xE36F716D, 0x00003FFB, 0x00000000 // Q_4
+GR_temp = r52
+
+RODATA
+.align 16
+
+LOCAL_OBJECT_START(Constants_atan)
+// double pi/2
+data8 0x3FF921FB54442D18
+// single lo_pi/2, two**(-3)
+data4 0x248D3132, 0x3E000000
+data8 0xAAAAAAAAAAAAAAA3, 0xBFFD // P_1
+data8 0xCCCCCCCCCCCC54B2, 0x3FFC // P_2
+data8 0x9249249247E4D0C2, 0xBFFC // P_3
+data8 0xE38E38E058870889, 0x3FFB // P_4
+data8 0xBA2E895B290149F8, 0xBFFB // P_5
+data8 0x9D88E6D4250F733D, 0x3FFB // P_6
+data8 0x884E51FFFB8745A0, 0xBFFB // P_7
+data8 0xE1C7412B394396BD, 0x3FFA // P_8
+data8 0xAAAAAAAAAAAAA52F, 0xBFFD // Q_1
+data8 0xCCCCCCCCC75B60D3, 0x3FFC // Q_2
+data8 0x924923AD011F1940, 0xBFFC // Q_3
+data8 0xE36F716D2A5F89BD, 0x3FFB // Q_4
//
// Entries Tbl_hi (double precision)
// B = 1+Index/16+1/32 Index = 0
// Entries Tbl_lo (single precision)
// B = 1+Index/16+1/32 Index = 0
//
-data4 0xA935BD8E, 0x3FE9A000, 0x23ACA08F, 0x00000000
+data8 0x3FE9A000A935BD8E
+data4 0x23ACA08F, 0x00000000
//
// Entries Tbl_hi (double precision) Index = 0,1,...,15
// B = 2^(-1)*(1+Index/16+1/32)
// Entries Tbl_lo (single precision)
// Index = 0,1,...,15 B = 2^(-1)*(1+Index/16+1/32)
//
-data4 0x7F175A34, 0x3FDE77EB, 0x238729EE, 0x00000000
-data4 0x73C1A40B, 0x3FE0039C, 0x249334DB, 0x00000000
-data4 0x5B5B43DA, 0x3FE0C614, 0x22CBA7D1, 0x00000000
-data4 0x88BE7C13, 0x3FE1835A, 0x246310E7, 0x00000000
-data4 0xE2CC9E6A, 0x3FE23B71, 0x236210E5, 0x00000000
-data4 0x8406CBCA, 0x3FE2EE62, 0x2462EAF5, 0x00000000
-data4 0x1CD41719, 0x3FE39C39, 0x24B73EF3, 0x00000000
-data4 0x5B795B55, 0x3FE44506, 0x24C11260, 0x00000000
-data4 0x5BB6EC04, 0x3FE4E8DE, 0x242519EE, 0x00000000
-data4 0x1F732FBA, 0x3FE587D8, 0x24D4346C, 0x00000000
-data4 0x115D7B8D, 0x3FE6220D, 0x24ED487B, 0x00000000
-data4 0x920B3D98, 0x3FE6B798, 0x2495FF1E, 0x00000000
-data4 0x8FBA8E0F, 0x3FE74897, 0x223D9531, 0x00000000
-data4 0x289FA093, 0x3FE7D528, 0x242B0411, 0x00000000
-data4 0x576CC2C5, 0x3FE85D69, 0x2335B374, 0x00000000
-data4 0xA99CC05D, 0x3FE8E17A, 0x24C27CFB, 0x00000000
+data8 0x3FDE77EB7F175A34
+data4 0x238729EE, 0x00000000
+data8 0x3FE0039C73C1A40B
+data4 0x249334DB, 0x00000000
+data8 0x3FE0C6145B5B43DA
+data4 0x22CBA7D1, 0x00000000
+data8 0x3FE1835A88BE7C13
+data4 0x246310E7, 0x00000000
+data8 0x3FE23B71E2CC9E6A
+data4 0x236210E5, 0x00000000
+data8 0x3FE2EE628406CBCA
+data4 0x2462EAF5, 0x00000000
+data8 0x3FE39C391CD41719
+data4 0x24B73EF3, 0x00000000
+data8 0x3FE445065B795B55
+data4 0x24C11260, 0x00000000
+data8 0x3FE4E8DE5BB6EC04
+data4 0x242519EE, 0x00000000
+data8 0x3FE587D81F732FBA
+data4 0x24D4346C, 0x00000000
+data8 0x3FE6220D115D7B8D
+data4 0x24ED487B, 0x00000000
+data8 0x3FE6B798920B3D98
+data4 0x2495FF1E, 0x00000000
+data8 0x3FE748978FBA8E0F
+data4 0x223D9531, 0x00000000
+data8 0x3FE7D528289FA093
+data4 0x242B0411, 0x00000000
+data8 0x3FE85D69576CC2C5
+data4 0x2335B374, 0x00000000
+data8 0x3FE8E17AA99CC05D
+data4 0x24C27CFB, 0x00000000
//
// Entries Tbl_hi (double precision) Index = 0,1,...,15
// B = 2^(-2)*(1+Index/16+1/32)
// Entries Tbl_lo (single precision)
// Index = 0,1,...,15 B = 2^(-2)*(1+Index/16+1/32)
//
-data4 0x510665B5, 0x3FD025FA, 0x24263482, 0x00000000
-data4 0x362431C9, 0x3FD1151A, 0x242C8DC9, 0x00000000
-data4 0x67E47C95, 0x3FD20255, 0x245CF9BA, 0x00000000
-data4 0x7A823CFE, 0x3FD2ED98, 0x235C892C, 0x00000000
-data4 0x29271134, 0x3FD3D6D1, 0x2389BE52, 0x00000000
-data4 0x586890E6, 0x3FD4BDEE, 0x24436471, 0x00000000
-data4 0x175E0F4E, 0x3FD5A2E0, 0x2389DBD4, 0x00000000
-data4 0x9F5FA6FD, 0x3FD68597, 0x2476D43F, 0x00000000
-data4 0x52817501, 0x3FD76607, 0x24711774, 0x00000000
-data4 0xB8DF95D7, 0x3FD84422, 0x23EBB501, 0x00000000
-data4 0x7CD0C662, 0x3FD91FDE, 0x23883A0C, 0x00000000
-data4 0x66168001, 0x3FD9F930, 0x240DF63F, 0x00000000
-data4 0x5422058B, 0x3FDAD00F, 0x23FE261A, 0x00000000
-data4 0x378624A5, 0x3FDBA473, 0x23A8CD0E, 0x00000000
-data4 0x0AAD71F8, 0x3FDC7655, 0x2422D1D0, 0x00000000
-data4 0xC9EC862B, 0x3FDD45AE, 0x2344A109, 0x00000000
+data8 0x3FD025FA510665B5
+data4 0x24263482, 0x00000000
+data8 0x3FD1151A362431C9
+data4 0x242C8DC9, 0x00000000
+data8 0x3FD2025567E47C95
+data4 0x245CF9BA, 0x00000000
+data8 0x3FD2ED987A823CFE
+data4 0x235C892C, 0x00000000
+data8 0x3FD3D6D129271134
+data4 0x2389BE52, 0x00000000
+data8 0x3FD4BDEE586890E6
+data4 0x24436471, 0x00000000
+data8 0x3FD5A2E0175E0F4E
+data4 0x2389DBD4, 0x00000000
+data8 0x3FD685979F5FA6FD
+data4 0x2476D43F, 0x00000000
+data8 0x3FD7660752817501
+data4 0x24711774, 0x00000000
+data8 0x3FD84422B8DF95D7
+data4 0x23EBB501, 0x00000000
+data8 0x3FD91FDE7CD0C662
+data4 0x23883A0C, 0x00000000
+data8 0x3FD9F93066168001
+data4 0x240DF63F, 0x00000000
+data8 0x3FDAD00F5422058B
+data4 0x23FE261A, 0x00000000
+data8 0x3FDBA473378624A5
+data4 0x23A8CD0E, 0x00000000
+data8 0x3FDC76550AAD71F8
+data4 0x2422D1D0, 0x00000000
+data8 0x3FDD45AEC9EC862B
+data4 0x2344A109, 0x00000000
//
// Entries Tbl_hi (double precision) Index = 0,1,...,15
// B = 2^(-3)*(1+Index/16+1/32)
// Entries Tbl_lo (single precision)
// Index = 0,1,...,15 B = 2^(-3)*(1+Index/16+1/32)
//
-data4 0x84212B3D, 0x3FC068D5, 0x239874B6, 0x00000000
-data4 0x41060850, 0x3FC16465, 0x2335E774, 0x00000000
-data4 0x171A535C, 0x3FC25F6E, 0x233E36BE, 0x00000000
-data4 0xEDEB99A3, 0x3FC359E8, 0x239680A3, 0x00000000
-data4 0xC6092A9E, 0x3FC453CE, 0x230FB29E, 0x00000000
-data4 0xBA11570A, 0x3FC54D18, 0x230C1418, 0x00000000
-data4 0xFFB3AA73, 0x3FC645BF, 0x23F0564A, 0x00000000
-data4 0xE8A7D201, 0x3FC73DBD, 0x23D4A5E1, 0x00000000
-data4 0xE398EBC7, 0x3FC8350B, 0x23D4ADDA, 0x00000000
-data4 0x7D050271, 0x3FC92BA3, 0x23BCB085, 0x00000000
-data4 0x601081A5, 0x3FCA217E, 0x23BC841D, 0x00000000
-data4 0x574D780B, 0x3FCB1696, 0x23CF4A8E, 0x00000000
-data4 0x4D768466, 0x3FCC0AE5, 0x23BECC90, 0x00000000
-data4 0x4E1D5395, 0x3FCCFE65, 0x2323DCD2, 0x00000000
-data4 0x864C9D9D, 0x3FCDF110, 0x23F53F3A, 0x00000000
-data4 0x451D980C, 0x3FCEE2E1, 0x23CCB11F, 0x00000000
-
-data4 0x54442D18, 0x400921FB, 0x33145C07, 0x3CA1A626 // PI two doubles
-data4 0x54442D18, 0x3FF921FB, 0x33145C07, 0x3C91A626 // PI_by_2 two dbles
-data4 0x54442D18, 0x3FE921FB, 0x33145C07, 0x3C81A626 // PI_by_4 two dbles
-data4 0x7F3321D2, 0x4002D97C, 0x4C9E8A0A, 0x3C9A7939 // 3PI_by_4 two dbles
-ASM_SIZE_DIRECTIVE(Constants_atan)
-
-
-.text
-.proc atanl#
-.global atanl#
-.align 64
-
-atanl:
-{ .mfb
- nop.m 999
-(p0) mov ArgX_orig = f1
-(p0) br.cond.sptk atan2l ;;
-}
-.endp atanl
-ASM_SIZE_DIRECTIVE(atanl)
-
-.text
-.proc atan2l#
-.global atan2l#
-#ifdef _LIBC
-.proc __atan2l#
-.global __atan2l#
-.proc __ieee754_atan2l#
-.global __ieee754_atan2l#
-#endif
-.align 64
-
-
-atan2l:
-#ifdef _LIBC
-__atan2l:
-__ieee754_atan2l:
-#endif
-{ .mfi
-alloc r32 = ar.pfs, 0, 17 , 4, 0
-(p0) mov ArgY = ArgY_orig
-}
-{ .mfi
- nop.m 999
-(p0) mov ArgX = ArgX_orig
- nop.i 999
-};;
+data8 0x3FC068D584212B3D
+data4 0x239874B6, 0x00000000
+data8 0x3FC1646541060850
+data4 0x2335E774, 0x00000000
+data8 0x3FC25F6E171A535C
+data4 0x233E36BE, 0x00000000
+data8 0x3FC359E8EDEB99A3
+data4 0x239680A3, 0x00000000
+data8 0x3FC453CEC6092A9E
+data4 0x230FB29E, 0x00000000
+data8 0x3FC54D18BA11570A
+data4 0x230C1418, 0x00000000
+data8 0x3FC645BFFFB3AA73
+data4 0x23F0564A, 0x00000000
+data8 0x3FC73DBDE8A7D201
+data4 0x23D4A5E1, 0x00000000
+data8 0x3FC8350BE398EBC7
+data4 0x23D4ADDA, 0x00000000
+data8 0x3FC92BA37D050271
+data4 0x23BCB085, 0x00000000
+data8 0x3FCA217E601081A5
+data4 0x23BC841D, 0x00000000
+data8 0x3FCB1696574D780B
+data4 0x23CF4A8E, 0x00000000
+data8 0x3FCC0AE54D768466
+data4 0x23BECC90, 0x00000000
+data8 0x3FCCFE654E1D5395
+data4 0x2323DCD2, 0x00000000
+data8 0x3FCDF110864C9D9D
+data4 0x23F53F3A, 0x00000000
+data8 0x3FCEE2E1451D980C
+data4 0x23CCB11F, 0x00000000
+//
+data8 0x400921FB54442D18, 0x3CA1A62633145C07 // PI two doubles
+data8 0x3FF921FB54442D18, 0x3C91A62633145C07 // PI_by_2 two dbles
+data8 0x3FE921FB54442D18, 0x3C81A62633145C07 // PI_by_4 two dbles
+data8 0x4002D97C7F3321D2, 0x3C9A79394C9E8A0A // 3PI_by_4 two dbles
+LOCAL_OBJECT_END(Constants_atan)
+
+
+.section .text
+GLOBAL_IEEE754_ENTRY(atanl)
+
+// Use common code with atan2l after setting x=1.0
{ .mfi
- nop.m 999
-(p0) fclass.m.unc p7,p0 = ArgY_orig, 0x103
- nop.i 999
+ alloc r32 = ar.pfs, 0, 17, 4, 0
+ fma.s1 Ysq = ArgY_orig, ArgY_orig, f0 // Form y*y
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-//
-// Save original input args and load table ptr.
-//
-(p0) fclass.m.unc p6,p0 = ArgX_orig, 0x103
- nop.i 999
-};;
+ addl table_ptr1 = @ltoff(Constants_atan#), gp // Address of table pointer
+ fma.s1 Xsq = f1, f1, f0 // Form x*x
+ nop.i 999
+}
+;;
+
{ .mfi
-(p0) addl table_ptr1 = @ltoff(Constants_atan#), gp
-(p0) fclass.m.unc p0,p9 = ArgY_orig, 0x1FF
- nop.i 999 ;;
+ ld8 table_ptr1 = [table_ptr1] // Get table pointer
+ fnorm.s1 ArgY = ArgY_orig
+ nop.i 999
}
{ .mfi
- ld8 table_ptr1 = [table_ptr1]
-(p0) fclass.m.unc p0,p8 = ArgX_orig, 0x1FF
- nop.i 999
+ nop.m 999
+ fnorm.s1 ArgX = f1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fclass.m.unc p13,p0 = ArgY_orig, 0x0C3
- nop.i 999 ;;
+ getf.exp sign_X = f1 // Get signexp of x
+ fmerge.s ArgX_abs = f0, f1 // Form |x|
+ nop.i 999
}
{ .mfi
-(p0) fclass.m.unc p12,p0 = ArgX_orig, 0x0C3
- nop.i 999
+ nop.m 999
+ fnorm.s1 ArgX_orig = f1
+ nop.i 999
}
+;;
+{ .mfi
+ getf.exp sign_Y = ArgY_orig // Get signexp of y
+ fmerge.s ArgY_abs = f0, ArgY_orig // Form |y|
+ mov table_base = table_ptr1 // Save base pointer to tables
+}
+;;
-//
-// Check for NatVals.
-// Check for everything - if false, then must be pseudo-zero
-// or pseudo-nan (IA unsupporteds).
-//
-{ .mib
- nop.m 999
- nop.i 999
-(p6) br.cond.spnt L(ATANL_NATVAL) ;;
+{ .mfi
+ ldfd P_hi = [table_ptr1],8 // Load double precision hi part of pi
+ fclass.m p8,p0 = ArgY_orig, 0x1e7 // Test y natval, nan, inf, zero
+ nop.i 999
}
+;;
-{ .mib
- nop.m 999
- nop.i 999
-(p7) br.cond.spnt L(ATANL_NATVAL) ;;
+{ .mfi
+ ldfps P_lo, TWO_TO_NEG3 = [table_ptr1], 8 // Load P_lo and constant 2^-3
+ nop.f 999
+ nop.i 999
}
-{ .mib
-(p0) ldfd P_hi = [table_ptr1],8
- nop.i 999
-(p8) br.cond.spnt L(ATANL_UNSUPPORTED) ;;
+{ .mfi
+ nop.m 999
+ fma.s1 M = f1, f1, f0 // Set M = 1.0
+ nop.i 999
}
-{ .mbb
-(p0) add table_ptr2 = 96, table_ptr1
-(p9) br.cond.spnt L(ATANL_UNSUPPORTED)
+;;
+
//
-// Load double precision high-order part of pi
+// Check for everything - if false, then must be pseudo-zero
+// or pseudo-nan (IA unsupporteds).
//
-(p12) br.cond.spnt L(ATANL_NAN) ;;
-}
{ .mfb
- nop.m 999
-(p0) fnorm.s1 ArgX = ArgX
-(p13) br.cond.spnt L(ATANL_NAN) ;;
-}
-//
-// Normalize the input argument.
-// Branch out if NaN inputs
-//
-{ .mmf
-(p0) ldfs P_lo = [table_ptr1], 4
- nop.m 999
-(p0) fnorm.s1 ArgY = ArgY ;;
+ nop.m 999
+ fclass.m p0,p12 = f1, 0x1FF // Test x unsupported
+(p8) br.cond.spnt ATANL_Y_SPECIAL // Branch if y natval, nan, inf, zero
}
-{ .mmf
- nop.m 999
-(p0) ldfs TWO_TO_NEG3 = [table_ptr1], 180
-//
+;;
+
// U = max(ArgX_abs,ArgY_abs)
// V = min(ArgX_abs,ArgY_abs)
-// if PR1, swap = 0
-// if PR2, swap = 1
-//
-(p0) mov M = f1 ;;
-}
{ .mfi
- nop.m 999
-//
-// Get exp and sign of ArgX
-// Get exp and sign of ArgY
-// Load 2**(-3) and increment ptr to Q_4.
-//
-(p0) fmerge.s ArgX_abs = f1, ArgX
- nop.i 999 ;;
+ nop.m 999
+ fcmp.ge.s1 p6,p7 = Xsq, Ysq // Test for |x| >= |y| using squares
+ nop.i 999
}
-//
-// load single precision low-order part of pi = P_lo
-//
+{ .mfb
+ nop.m 999
+ fma.s1 V = ArgX_abs, f1, f0 // Set V assuming |x| < |y|
+ br.cond.sptk ATANL_COMMON // Branch to common code
+}
+;;
+
+GLOBAL_IEEE754_END(atanl)
+GLOBAL_IEEE754_ENTRY(atan2l)
+
{ .mfi
-(p0) getf.exp sign_X = ArgX
-(p0) fmerge.s ArgY_abs = f1, ArgY
- nop.i 999 ;;
+ alloc r32 = ar.pfs, 0, 17, 4, 0
+ fma.s1 Ysq = ArgY_orig, ArgY_orig, f0 // Form y*y
+ nop.i 999
}
-{ .mii
-(p0) getf.exp sign_Y = ArgY
- nop.i 999 ;;
-(p0) shr sign_X = sign_X, 17 ;;
+{ .mfi
+ addl table_ptr1 = @ltoff(Constants_atan#), gp // Address of table pointer
+ fma.s1 Xsq = ArgX_orig, ArgX_orig, f0 // Form x*x
+ nop.i 999
}
-{ .mii
- nop.m 999
-(p0) shr sign_Y = sign_Y, 17 ;;
-(p0) cmp.eq.unc p8, p9 = 0x00000, sign_Y ;;
+;;
+
+{ .mfi
+ ld8 table_ptr1 = [table_ptr1] // Get table pointer
+ fnorm.s1 ArgY = ArgY_orig
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// Is ArgX_abs >= ArgY_abs
-// Is sign_Y == 0?
-//
-(p0) fmax.s1 U = ArgX_abs, ArgY_abs
- nop.i 999
+ nop.m 999
+ fnorm.s1 ArgX = ArgX_orig
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// ArgX_abs = |ArgX|
-// ArgY_abs = |ArgY|
-// sign_X is sign bit of ArgX
-// sign_Y is sign bit of ArgY
-//
-(p0) fcmp.ge.s1 p6, p7 = ArgX_abs, ArgY_abs
- nop.i 999 ;;
+ getf.exp sign_X = ArgX_orig // Get signexp of x
+ fmerge.s ArgX_abs = f0, ArgX_orig // Form |x|
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fmin.s1 V = ArgX_abs, ArgY_abs
- nop.i 999 ;;
+ getf.exp sign_Y = ArgY_orig // Get signexp of y
+ fmerge.s ArgY_abs = f0, ArgY_orig // Form |y|
+ mov table_base = table_ptr1 // Save base pointer to tables
}
+;;
+
{ .mfi
- nop.m 999
-(p8) fadd.s1 s_Y = f0, f1
-(p6) cmp.eq.unc p10, p11 = 0x00000, sign_X
+ ldfd P_hi = [table_ptr1],8 // Load double precision hi part of pi
+ fclass.m p8,p0 = ArgY_orig, 0x1e7 // Test y natval, nan, inf, zero
+ nop.i 999
}
-{ .mii
-(p6) add swap = r0, r0
- nop.i 999 ;;
-(p7) add swap = 1, r0
+;;
+
+{ .mfi
+ ldfps P_lo, TWO_TO_NEG3 = [table_ptr1], 8 // Load P_lo and constant 2^-3
+ fclass.m p9,p0 = ArgX_orig, 0x1e7 // Test x natval, nan, inf, zero
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
+ fma.s1 M = f1, f1, f0 // Set M = 1.0
+ nop.i 999
+}
+;;
+
//
-// Let M = 1.0
-// if p8, s_Y = 1.0
-// if p9, s_Y = -1.0
+// Check for everything - if false, then must be pseudo-zero
+// or pseudo-nan (IA unsupporteds).
//
-(p10) fsub.s1 M = M, f1
- nop.i 999 ;;
+{ .mfb
+ nop.m 999
+ fclass.m p0,p12 = ArgX_orig, 0x1FF // Test x unsupported
+(p8) br.cond.spnt ATANL_Y_SPECIAL // Branch if y natval, nan, inf, zero
}
+;;
+
+// U = max(ArgX_abs,ArgY_abs)
+// V = min(ArgX_abs,ArgY_abs)
{ .mfi
- nop.m 999
-(p9) fsub.s1 s_Y = f0, f1
- nop.i 999 ;;
+ nop.m 999
+ fcmp.ge.s1 p6,p7 = Xsq, Ysq // Test for |x| >= |y| using squares
+ nop.i 999
}
+{ .mfb
+ nop.m 999
+ fma.s1 V = ArgX_abs, f1, f0 // Set V assuming |x| < |y|
+(p9) br.cond.spnt ATANL_X_SPECIAL // Branch if x natval, nan, inf, zero
+}
+;;
+
+// Now common code for atanl and atan2l
+ATANL_COMMON:
{ .mfi
- nop.m 999
-(p0) frcpa.s1 E, p6 = V, U
- nop.i 999 ;;
+ nop.m 999
+ fclass.m p0,p13 = ArgY_orig, 0x1FF // Test y unsupported
+ shr sign_X = sign_X, 17 // Get sign bit of x
+}
+{ .mfi
+ nop.m 999
+ fma.s1 U = ArgY_abs, f1, f0 // Set U assuming |x| < |y|
+ adds table_ptr1 = 176, table_ptr1 // Point to Q4
}
-{ .mbb
- nop.m 999
+;;
+
+{ .mfi
+(p6) add swap = r0, r0 // Set swap=0 if |x| >= |y|
+(p6) frcpa.s1 E, p0 = ArgY_abs, ArgX_abs // Compute E if |x| >= |y|
+ shr sign_Y = sign_Y, 17 // Get sign bit of y
+}
+{ .mfb
+ nop.m 999
+(p6) fma.s1 V = ArgY_abs, f1, f0 // Set V if |x| >= |y|
+(p12) br.cond.spnt ATANL_UNSUPPORTED // Branch if x unsupported
+}
+;;
+
+// Set p8 if y >=0
+// Set p9 if y < 0
+// Set p10 if |x| >= |y| and x >=0
+// Set p11 if |x| >= |y| and x < 0
+{ .mfi
+ cmp.eq p8, p9 = 0, sign_Y // Test for y >= 0
+(p7) frcpa.s1 E, p0 = ArgX_abs, ArgY_abs // Compute E if |x| < |y|
+(p7) add swap = 1, r0 // Set swap=1 if |x| < |y|
+}
+{ .mfb
+(p6) cmp.eq.unc p10, p11 = 0, sign_X // If |x| >= |y|, test for x >= 0
+(p6) fma.s1 U = ArgX_abs, f1, f0 // Set U if |x| >= |y|
+(p13) br.cond.spnt ATANL_UNSUPPORTED // Branch if y unsupported
+}
+;;
+
//
-// E = frcpa(V,U)
+// if p8, s_Y = 1.0
+// if p9, s_Y = -1.0
//
-(p6) br.cond.sptk L(ATANL_STEP2)
-(p0) br.cond.spnt L(ATANL_SPECIAL_HANDLING) ;;
+.pred.rel "mutex",p8,p9
+{ .mfi
+ nop.m 999
+(p8) fadd.s1 s_Y = f0, f1 // If y >= 0 set s_Y = 1.0
+ nop.i 999
}
-L(ATANL_STEP2):
{ .mfi
- nop.m 999
-(p0) fmpy.s1 Q = E, V
- nop.i 999
+ nop.m 999
+(p9) fsub.s1 s_Y = f0, f1 // If y < 0 set s_Y = -1.0
+ nop.i 999
}
+;;
+
+.pred.rel "mutex",p10,p11
{ .mfi
- nop.m 999
-(p0) fcmp.eq.s0 p0, p9 = f1, ArgY_orig
- nop.i 999 ;;
+ nop.m 999
+(p10) fsub.s1 M = M, f1 // If |x| >= |y| and x >=0, set M=0
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// Is Q < 2**(-3)?
-//
-(p0) fcmp.eq.s0 p0, p8 = f1, ArgX_orig
- nop.i 999
+ nop.m 999
+(p11) fadd.s1 M = M, f1 // If |x| >= |y| and x < 0, set M=2.0
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p11) fadd.s1 M = M, f1
- nop.i 999 ;;
+ nop.m 999
+ fcmp.eq.s0 p0, p9 = ArgX_orig, ArgY_orig // Dummy to set denormal flag
+ nop.i 999
}
-{ .mlx
- nop.m 999
// *************************************************
// ********************* STEP2 *********************
// *************************************************
-(p0) movl special = 0x8400000000000000
-}
-{ .mlx
- nop.m 999
//
-// lookup = b_1 b_2 b_3 B_4
+// Q = E * V
//
-(p0) movl special1 = 0x0000000000000100 ;;
+{ .mfi
+ nop.m 999
+ fmpy.s1 Q = E, V
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// Do fnorms to raise any denormal operand
-// exceptions.
-//
-(p0) fmpy.s1 P_hi = M, P_hi
- nop.i 999
+ nop.m 999
+ fnma.s1 E_hold = E, U, f1 // E_hold = 1.0 - E*U (1) if POLY path
+ nop.i 999
}
+;;
+
+// Create a single precision representation of the signexp of Q with the
+// 4 most significant bits of the significand followed by a 1 and then 18 0's
{ .mfi
- nop.m 999
-(p0) fmpy.s1 P_lo = M, P_lo
- nop.i 999 ;;
+ nop.m 999
+ fmpy.s1 P_hi = M, P_hi
+ dep.z special = 0x1, 18, 1 // Form 0x0000000000040000
}
{ .mfi
- nop.m 999
-//
-// Q = E * V
-//
-(p0) fcmp.lt.unc.s1 p6, p7 = Q, TWO_TO_NEG3
- nop.i 999 ;;
+ nop.m 999
+ fmpy.s1 P_lo = M, P_lo
+ add table_ptr2 = 32, table_ptr1
}
-{ .mmb
-(p0) getf.sig significand_Q = Q
-(p0) getf.exp exponent_Q = Q
- nop.b 999 ;;
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 A_temp = Q, f1, f0 // Set A_temp if POLY path
+ nop.i 999
}
-{ .mmi
- nop.m 999 ;;
-(p0) andcm k = 0x0003, exponent_Q
-(p0) extr.u lookup = significand_Q, 59, 4 ;;
+{ .mfi
+ nop.m 999
+ fma.s1 E = E, E_hold, E // E = E + E*E_hold (1) if POLY path
+ nop.i 999
}
-{ .mib
- nop.m 999
-(p0) dep special = lookup, special, 59, 4
+;;
+
//
-// Generate 1.b_1 b_2 b_3 b_4 1 0 0 0 ... 0
+// Is Q < 2**(-3)?
+// swap = xor(swap,sign_X)
//
-(p6) br.cond.spnt L(ATANL_POLY) ;;
-}
{ .mfi
-(p0) cmp.eq.unc p8, p9 = 0x0000, k
-(p0) fmpy.s1 P_hi = s_Y, P_hi
+ nop.m 999
+ fcmp.lt.s1 p9, p0 = Q, TWO_TO_NEG3 // Test Q < 2^-3
+ xor swap = sign_X, swap
+}
+;;
+
+// P_hi = s_Y * P_hi
+{ .mmf
+ getf.exp exponent_Q = Q // Get signexp of Q
+ cmp.eq.unc p7, p6 = 0x00000, swap
+ fmpy.s1 P_hi = s_Y, P_hi
+}
+;;
+
//
-// We waited a few extra cycles so P_lo and P_hi could be calculated.
-// Load the constant 256 for loading up table entries.
+// if (PR_1) sigma = -1.0
+// if (PR_2) sigma = 1.0
+//
+{ .mfi
+ getf.sig significand_Q = Q // Get significand of Q
+(p6) fsub.s1 sigma = f0, f1
+ nop.i 999
+}
+{ .mfb
+(p9) add table_ptr1 = 128, table_base // Point to P8 if POLY path
+(p7) fadd.s1 sigma = f0, f1
+(p9) br.cond.spnt ATANL_POLY // Branch to POLY if 0 < Q < 2^-3
+}
+;;
+
//
// *************************************************
// ******************** STEP3 **********************
// *************************************************
-(p0) add table_ptr2 = 16, table_ptr1
-}
//
-// Let z_hi have exponent and sign of original Q
-// Load the Tbl_hi(0) else, increment pointer.
+// lookup = b_1 b_2 b_3 B_4
//
-{ .mii
-(p0) ldfe Q_4 = [table_ptr1], -16
-(p0) xor swap = sign_X, swap ;;
-(p9) sub k = k, r0, 1
-}
{ .mmi
-(p0) setf.sig z_hi = special
-(p0) ldfe Q_3 = [table_ptr1], -16
-(p9) add table_ptr2 = 16, table_ptr2 ;;
+ nop.m 999
+ nop.m 999
+ andcm k = 0x0003, exponent_Q // k=0,1,2,3 for exp_Q=0,-1,-2,-3
}
+;;
+
//
-// U_hold = U - U_prime_hi
-// k = k * 256 - Result can be 0, 256, or 512.
+// Generate sign_exp_Q b_1 b_2 b_3 b_4 1 0 0 0 ... 0 in single precision
+// representation. Note sign of Q is always 0.
//
-{ .mmb
-(p0) ldfe Q_2 = [table_ptr1], -16
-(p8) ldfd Tbl_hi = [table_ptr2], 8
- nop.b 999 ;;
+{ .mfi
+ cmp.eq p8, p9 = 0x0000, k // Test k=0
+ nop.f 999
+ extr.u lookup = significand_Q, 59, 4 // Extract b_1 b_2 b_3 b_4 for index
}
-//
-// U_prime_lo = U_hold + V * z_hi
-// lookup -> lookup * 16 + k
-//
-{ .mmi
-(p0) ldfe Q_1 = [table_ptr1], -16 ;;
-(p8) ldfs Tbl_lo = [table_ptr2], 8
-//
-// U_prime_hi = U + V * z_hi
-// Load the Tbl_lo(0)
-//
-(p9) pmpy2.r k = k, special1 ;;
+{ .mfi
+ sub sp_exp_Q = 0x7f, k // Form single prec biased exp of Q
+ nop.f 999
+ sub k = k, r0, 1 // Decrement k
}
-{ .mii
- nop.m 999
- nop.i 999
- nop.i 999 ;;
+;;
+
+// Form pointer to B index table
+{ .mfi
+ ldfe Q_4 = [table_ptr1], -16 // Load Q_4
+ nop.f 999
+(p9) shl k = k, 8 // k = 0, 256, or 512
}
-{ .mii
- nop.m 999
- nop.i 999
- nop.i 999 ;;
+{ .mfi
+(p9) shladd table_ptr2 = lookup, 4, table_ptr2
+ nop.f 999
+ shladd sp_exp_4sig_Q = sp_exp_Q, 4, lookup // Shift and add in 4 high bits
}
-{ .mii
- nop.m 999
- nop.i 999
- nop.i 999 ;;
+;;
+
+{ .mmi
+(p8) add table_ptr2 = -16, table_ptr2 // Pointer if original k was 0
+(p9) add table_ptr2 = k, table_ptr2 // Pointer if k was 1, 2, 3
+ dep special = sp_exp_4sig_Q, special, 19, 13 // Form z_hi as single prec
}
-{ .mii
- nop.m 999
- nop.i 999 ;;
-(p9) shladd lookup = lookup, 0x0004, k ;;
+;;
+
+// z_hi = s exp 1.b_1 b_2 b_3 b_4 1 0 0 0 ... 0
+{ .mmi
+ ldfd Tbl_hi = [table_ptr2], 8 // Load Tbl_hi from index table
+;;
+ setf.s z_hi = special // Form z_hi
+ nop.i 999
}
{ .mmi
-(p9) add table_ptr2 = table_ptr2, lookup ;;
-//
-// V_prime = V - U * z_hi
-//
-(p9) ldfd Tbl_hi = [table_ptr2], 8
- nop.i 999 ;;
+ ldfs Tbl_lo = [table_ptr2], 8 // Load Tbl_lo from index table
+;;
+ ldfe Q_3 = [table_ptr1], -16 // Load Q_3
+ nop.i 999
}
+;;
+
+{ .mmi
+ ldfe Q_2 = [table_ptr1], -16 // Load Q_2
+ nop.m 999
+ nop.i 999
+}
+;;
+
{ .mmf
- nop.m 999
-//
-// C_hi = frcpa(1,U_prime_hi)
-//
-(p9) ldfs Tbl_lo = [table_ptr2], 8
-//
-// z_hi = s exp 1.b_1 b_2 b_3 b_4 1 0 0 0 ... 0
-// Point to beginning of Tbl_hi entries - k = 0.
-//
-(p0) fmerge.se z_hi = Q, z_hi ;;
+ ldfe Q_1 = [table_ptr1], -16 // Load Q_1
+ nop.m 999
+ nop.f 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fma.s1 U_prime_hi = V, z_hi, U
- nop.i 999
+ nop.m 999
+ fma.s1 U_prime_hi = V, z_hi, U // U_prime_hi = U + V * z_hi
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fnma.s1 V_prime = U, z_hi, V
- nop.i 999 ;;
+ nop.m 999
+ fnma.s1 V_prime = U, z_hi, V // V_prime = V - U * z_hi
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) mov A_hi = Tbl_hi
- nop.i 999 ;;
+ nop.m 999
+ mov A_hi = Tbl_hi // Start with A_hi = Tbl_hi
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fsub.s1 U_hold = U, U_prime_hi
- nop.i 999 ;;
+ nop.m 999
+ fsub.s1 U_hold = U, U_prime_hi // U_hold = U - U_prime_hi
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) frcpa.s1 C_hi, p6 = f1, U_prime_hi
- nop.i 999 ;;
+ nop.m 999
+ frcpa.s1 C_hi, p0 = f1, U_prime_hi // C_hi = frcpa(1,U_prime_hi)
+ nop.i 999
}
+;;
+
{ .mfi
-(p0) cmp.eq.unc p7, p6 = 0x00000, swap
-(p0) fmpy.s1 A_hi = s_Y, A_hi
- nop.i 999 ;;
+ nop.m 999
+ fmpy.s1 A_hi = s_Y, A_hi // A_hi = s_Y * A_hi
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// poly = wsq * poly
-//
-(p7) fadd.s1 sigma = f0, f1
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 U_prime_lo = z_hi, V, U_hold // U_prime_lo = U_hold + V * z_hi
+ nop.i 999
}
+;;
+
+// C_hi_hold = 1 - C_hi * U_prime_hi (1)
{ .mfi
- nop.m 999
-(p0) fma.s1 U_prime_lo = z_hi, V, U_hold
- nop.i 999
+ nop.m 999
+ fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p6) fsub.s1 sigma = f0, f1
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 Res_hi = sigma, A_hi, P_hi // Res_hi = P_hi + sigma * A_hi
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 C_hi = C_hi_hold, C_hi, C_hi // C_hi = C_hi + C_hi * C_hi_hold (1)
+ nop.i 999
}
+;;
+
+// C_hi_hold = 1 - C_hi * U_prime_hi (2)
{ .mfi
- nop.m 999
-//
-// A_lo = A_lo + w_hi
-// A_hi = s_Y * A_hi
-//
-(p0) fma.s1 Res_hi = sigma, A_hi, P_hi
- nop.i 999 ;;
+ nop.m 999
+ fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// C_hi_hold = 1 - C_hi * U_prime_hi (1)
-//
-(p0) fma.s1 C_hi = C_hi_hold, C_hi, C_hi
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 C_hi = C_hi_hold, C_hi, C_hi // C_hi = C_hi + C_hi * C_hi_hold (2)
+ nop.i 999
}
+;;
+
+// C_hi_hold = 1 - C_hi * U_prime_hi (3)
{ .mfi
- nop.m 999
-//
-// C_hi = C_hi + C_hi * C_hi_hold (1)
-//
-(p0) fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1
- nop.i 999 ;;
+ nop.m 999
+ fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// C_hi_hold = 1 - C_hi * U_prime_hi (2)
-//
-(p0) fma.s1 C_hi = C_hi_hold, C_hi, C_hi
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 C_hi = C_hi_hold, C_hi, C_hi // C_hi = C_hi + C_hi * C_hi_hold (3)
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// C_hi = C_hi + C_hi * C_hi_hold (2)
-//
-(p0) fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1
- nop.i 999 ;;
+ nop.m 999
+ fmpy.s1 w_hi = V_prime, C_hi // w_hi = V_prime * C_hi
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// C_hi_hold = 1 - C_hi * U_prime_hi (3)
-//
-(p0) fma.s1 C_hi = C_hi_hold, C_hi, C_hi
- nop.i 999 ;;
+ nop.m 999
+ fmpy.s1 wsq = w_hi, w_hi // wsq = w_hi * w_hi
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// C_hi = C_hi + C_hi * C_hi_hold (3)
-//
-(p0) fmpy.s1 w_hi = V_prime, C_hi
- nop.i 999 ;;
+ nop.m 999
+ fnma.s1 w_lo = w_hi, U_prime_hi, V_prime // w_lo = V_prime-w_hi*U_prime_hi
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// w_hi = V_prime * C_hi
-//
-(p0) fmpy.s1 wsq = w_hi, w_hi
- nop.i 999
+ nop.m 999
+ fma.s1 poly = wsq, Q_4, Q_3 // poly = Q_3 + wsq * Q_4
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fnma.s1 w_lo = w_hi, U_prime_hi, V_prime
- nop.i 999 ;;
+ nop.m 999
+ fnma.s1 w_lo = w_hi, U_prime_lo, w_lo // w_lo = w_lo - w_hi * U_prime_lo
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// wsq = w_hi * w_hi
-// w_lo = = V_prime - w_hi * U_prime_hi
-//
-(p0) fma.s1 poly = wsq, Q_4, Q_3
- nop.i 999
+ nop.m 999
+ fma.s1 poly = wsq, poly, Q_2 // poly = Q_2 + wsq * poly
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fnma.s1 w_lo = w_hi, U_prime_lo, w_lo
- nop.i 999 ;;
+ nop.m 999
+ fmpy.s1 w_lo = C_hi, w_lo // w_lo = = w_lo * C_hi
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// poly = Q_3 + wsq * Q_4
-// w_lo = = w_lo - w_hi * U_prime_lo
-//
-(p0) fma.s1 poly = wsq, poly, Q_2
- nop.i 999
+ nop.m 999
+ fma.s1 poly = wsq, poly, Q_1 // poly = Q_1 + wsq * poly
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fmpy.s1 w_lo = C_hi, w_lo
- nop.i 999 ;;
+ nop.m 999
+ fadd.s1 A_lo = Tbl_lo, w_lo // A_lo = Tbl_lo + w_lo
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// poly = Q_2 + wsq * poly
-// w_lo = = w_lo * C_hi
-//
-(p0) fma.s1 poly = wsq, poly, Q_1
- nop.i 999
+ nop.m 999
+ fmpy.s0 Q_1 = Q_1, Q_1 // Dummy operation to raise inexact
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fadd.s1 A_lo = Tbl_lo, w_lo
- nop.i 999 ;;
+ nop.m 999
+ fmpy.s1 poly = wsq, poly // poly = wsq * poly
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// Result = Res_hi + Res_lo * s_Y (User Supplied Rounding Mode)
-//
-(p0) fmpy.s0 Q_1 = Q_1, Q_1
- nop.i 999 ;;
+ nop.m 999
+ fmpy.s1 poly = w_hi, poly // poly = w_hi * poly
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// poly = Q_1 + wsq * poly
-// A_lo = Tbl_lo + w_lo
-// swap = xor(swap,sign_X)
-//
-(p0) fmpy.s1 poly = wsq, poly
- nop.i 999 ;;
+ nop.m 999
+ fadd.s1 A_lo = A_lo, poly // A_lo = A_lo + poly
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// Is (swap) != 0 ?
-// poly = wsq * poly
-// A_hi = Tbl_hi
-//
-(p0) fmpy.s1 poly = w_hi, poly
- nop.i 999 ;;
+ nop.m 999
+ fadd.s1 A_lo = A_lo, w_hi // A_lo = A_lo + w_hi
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// if (PR_1) sigma = -1.0
-// if (PR_2) sigma = 1.0
-//
-(p0) fadd.s1 A_lo = A_lo, poly
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 Res_lo = sigma, A_lo, P_lo // Res_lo = P_lo + sigma * A_lo
+ nop.i 999
}
-{ .mfi
- nop.m 999
+;;
+
//
-// P_hi = s_Y * P_hi
-// A_lo = A_lo + poly
+// Result = Res_hi + Res_lo * s_Y (User Supplied Rounding Mode)
//
-(p0) fadd.s1 A_lo = A_lo, w_hi
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-(p0) fma.s1 Res_lo = sigma, A_lo, P_lo
- nop.i 999 ;;
-}
{ .mfb
- nop.m 999
-//
-// Res_hi = P_hi + sigma * A_hi
-// Res_lo = P_lo + sigma * A_lo
-//
-(p0) fma.s0 Result = Res_lo, s_Y, Res_hi
-//
-// Raise inexact.
-//
-br.ret.sptk b0 ;;
-}
-//
-// poly1 = P_5 + zsq * poly1
-// poly2 = zsq * poly2
-//
-L(ATANL_POLY):
-{ .mmf
-(p0) xor swap = sign_X, swap
- nop.m 999
-(p0) fnma.s1 E_hold = E, U, f1 ;;
+ nop.m 999
+ fma.s0 Result = Res_lo, s_Y, Res_hi
+ br.ret.sptk b0 // Exit table path 2^-3 <= V/U < 1
}
-{ .mfi
- nop.m 999
-(p0) mov A_temp = Q
+;;
+
+
+ATANL_POLY:
+// Here if 0 < V/U < 2^-3
//
-// poly1 = P_4 + zsq * poly1
-// swap = xor(swap,sign_X)
+// ***********************************************
+// ******************** STEP4 ********************
+// ***********************************************
+
//
-// sign_X gr_002
-// swap gr_004
-// poly1 = poly1 <== Done with poly1
-// poly1 = P_4 + zsq * poly1
-// swap = xor(swap,sign_X)
+// Following:
+// Iterate 3 times E = E + E*(1.0 - E*U)
+// Also load P_8, P_7, P_6, P_5, P_4
//
-(p0) cmp.eq.unc p7, p6 = 0x00000, swap
-}
-{ .mfi
- nop.m 999
-(p0) fmpy.s1 P_hi = s_Y, P_hi
- nop.i 999 ;;
-}
{ .mfi
- nop.m 999
-(p6) fsub.s1 sigma = f0, f1
- nop.i 999
+ ldfe P_8 = [table_ptr1], -16 // Load P_8
+ fnma.s1 z_lo = A_temp, U, V // z_lo = V - A_temp * U
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p7) fadd.s1 sigma = f0, f1
- nop.i 999 ;;
-}
-
-// ***********************************************
-// ******************** STEP4 ********************
-// ***********************************************
-
-{ .mmi
nop.m 999
-(p0) addl table_ptr1 = @ltoff(Constants_atan#), gp
+ fnma.s1 E_hold = E, U, f1 // E_hold = 1.0 - E*U (2)
nop.i 999
}
;;
{ .mmi
- ld8 table_ptr1 = [table_ptr1]
- nop.m 999
+ ldfe P_7 = [table_ptr1], -16 // Load P_7
+;;
+ ldfe P_6 = [table_ptr1], -16 // Load P_6
nop.i 999
}
;;
-
{ .mfi
- nop.m 999
-(p0) fma.s1 E = E, E_hold, E
-//
-// Following:
-// Iterate 3 times E = E + E*(1.0 - E*U)
-// Also load P_8, P_7, P_6, P_5, P_4
-// E_hold = 1.0 - E * U (1)
-// A_temp = Q
-//
-(p0) add table_ptr1 = 128, table_ptr1 ;;
-}
-{ .mmf
- nop.m 999
-//
-// E = E + E_hold*E (1)
-// Point to P_8.
-//
-(p0) ldfe P_8 = [table_ptr1], -16
-//
-// poly = z8*poly1 + poly2 (Typo in writeup)
-// Is (swap) != 0 ?
-//
-(p0) fnma.s1 z_lo = A_temp, U, V ;;
+ ldfe P_5 = [table_ptr1], -16 // Load P_5
+ fma.s1 E = E, E_hold, E // E = E + E_hold*E (2)
+ nop.i 999
}
-{ .mmb
- nop.m 999
-//
-// E_hold = 1.0 - E * U (2)
-//
-(p0) ldfe P_7 = [table_ptr1], -16
- nop.b 999 ;;
+;;
+
+{ .mmi
+ ldfe P_4 = [table_ptr1], -16 // Load P_4
+;;
+ ldfe P_3 = [table_ptr1], -16 // Load P_3
+ nop.i 999
}
-{ .mmb
- nop.m 999
-//
-// E = E + E_hold*E (2)
-//
-(p0) ldfe P_6 = [table_ptr1], -16
- nop.b 999 ;;
+;;
+
+{ .mfi
+ ldfe P_2 = [table_ptr1], -16 // Load P_2
+ fnma.s1 E_hold = E, U, f1 // E_hold = 1.0 - E*U (3)
+ nop.i 999
}
-{ .mmb
- nop.m 999
-//
-// E_hold = 1.0 - E * U (3)
-//
-(p0) ldfe P_5 = [table_ptr1], -16
- nop.b 999 ;;
+{ .mlx
+ nop.m 999
+ movl int_temp = 0x24005 // Signexp for small neg number
}
+;;
+
{ .mmf
- nop.m 999
-//
-// E = E + E_hold*E (3)
+ ldfe P_1 = [table_ptr1], -16 // Load P_1
+ setf.exp tmp_small = int_temp // Form small neg number
+ fma.s1 E = E, E_hold, E // E = E + E_hold*E (3)
+}
+;;
+
//
//
// At this point E approximates 1/U to roughly working precision
-// z = V*E approximates V/U
+// Z = V*E approximates V/U
//
-(p0) ldfe P_4 = [table_ptr1], -16
-(p0) fnma.s1 E_hold = E, U, f1 ;;
+{ .mfi
+ nop.m 999
+ fmpy.s1 Z = V, E // Z = V * E
+ nop.i 999
}
-{ .mmb
- nop.m 999
-//
-// Z = V * E
-//
-(p0) ldfe P_3 = [table_ptr1], -16
- nop.b 999 ;;
+{ .mfi
+ nop.m 999
+ fmpy.s1 z_lo = z_lo, E // z_lo = z_lo * E
+ nop.i 999
}
-{ .mmb
- nop.m 999
+;;
+
//
-// zsq = Z * Z
+// Now what we want to do is
+// poly1 = P_4 + zsq*(P_5 + zsq*(P_6 + zsq*(P_7 + zsq*P_8)))
+// poly2 = zsq*(P_1 + zsq*(P_2 + zsq*P_3))
//
-(p0) ldfe P_2 = [table_ptr1], -16
- nop.b 999 ;;
-}
-{ .mmb
- nop.m 999
//
-// z8 = zsq * zsq
+// Fixup added to force inexact later -
+// A_hi = A_temp + z_lo
+// z_lo = (A_temp - A_hi) + z_lo
//
-(p0) ldfe P_1 = [table_ptr1], -16
- nop.b 999 ;;
-}
-{ .mlx
- nop.m 999
-(p0) movl int_temp = 0x24005
-}
{ .mfi
- nop.m 999
-(p0) fma.s1 E = E, E_hold, E
- nop.i 999 ;;
+ nop.m 999
+ fmpy.s1 zsq = Z, Z // zsq = Z * Z
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fnma.s1 E_hold = E, U, f1
- nop.i 999 ;;
+ nop.m 999
+ fadd.s1 A_hi = A_temp, z_lo // A_hi = A_temp + z_lo
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fma.s1 E = E, E_hold, E
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 poly1 = zsq, P_8, P_7 // poly1 = P_7 + zsq * P_8
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fmpy.s1 Z = V, E
- nop.i 999
+ nop.m 999
+ fma.s1 poly2 = zsq, P_3, P_2 // poly2 = P_2 + zsq * P_3
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// z_lo = V - A_temp * U
-// if (PR_2) sigma = 1.0
-//
-(p0) fmpy.s1 z_lo = z_lo, E
- nop.i 999 ;;
+ nop.m 999
+ fmpy.s1 z4 = zsq, zsq // z4 = zsq * zsq
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fmpy.s1 zsq = Z, Z
- nop.i 999
+ nop.m 999
+ fsub.s1 A_temp = A_temp, A_hi // A_temp = A_temp - A_hi
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// z_lo = z_lo * E
-// if (PR_1) sigma = -1.0
-//
-(p0) fadd.s1 A_hi = A_temp, z_lo
- nop.i 999 ;;
+ nop.m 999
+ fmerge.s tmp = A_hi, A_hi // Copy tmp = A_hi
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// z8 = z8 * z8
-//
-//
-// Now what we want to do is
-// poly1 = P_4 + zsq*(P_5 + zsq*(P_6 + zsq*(P_7 + zsq*P_8)))
-// poly2 = zsq*(P_1 + zsq*(P_2 + zsq*P_3))
-//
-(p0) fma.s1 poly1 = zsq, P_8, P_7
- nop.i 999
+ nop.m 999
+ fma.s1 poly1 = zsq, poly1, P_6 // poly1 = P_6 + zsq * poly1
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fma.s1 poly2 = zsq, P_3, P_2
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 poly2 = zsq, poly2, P_1 // poly2 = P_2 + zsq * poly2
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fmpy.s1 z8 = zsq, zsq
- nop.i 999
+ nop.m 999
+ fmpy.s1 z8 = z4, z4 // z8 = z4 * z4
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fsub.s1 A_temp = A_temp, A_hi
- nop.i 999 ;;
+ nop.m 999
+ fadd.s1 z_lo = A_temp, z_lo // z_lo = (A_temp - A_hi) + z_lo
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// A_lo = Z * poly + z_lo
-//
-(p0) fmerge.s tmp = A_hi, A_hi
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 poly1 = zsq, poly1, P_5 // poly1 = P_5 + zsq * poly1
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// poly1 = P_7 + zsq * P_8
-// poly2 = P_2 + zsq * P_3
-//
-(p0) fma.s1 poly1 = zsq, poly1, P_6
- nop.i 999
+ nop.m 999
+ fmpy.s1 poly2 = poly2, zsq // poly2 = zsq * poly2
+ nop.i 999
}
+;;
+
+// Create small GR double in case need to raise underflow
{ .mfi
- nop.m 999
-(p0) fma.s1 poly2 = zsq, poly2, P_1
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 poly1 = zsq, poly1, P_4 // poly1 = P_4 + zsq * poly1
+ dep GR_temp = -1,r0,0,53
}
+;;
+
+// Create small double in case need to raise underflow
{ .mfi
- nop.m 999
-(p0) fmpy.s1 z8 = z8, z8
- nop.i 999
+ setf.d FR_temp = GR_temp
+ fma.s1 poly = z8, poly1, poly2 // poly = poly2 + z8 * poly1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fadd.s1 z_lo = A_temp, z_lo
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 A_lo = Z, poly, z_lo // A_lo = z_lo + Z * poly
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// poly1 = P_6 + zsq * poly1
-// poly2 = P_2 + zsq * poly2
-//
-(p0) fma.s1 poly1 = zsq, poly1, P_5
- nop.i 999
+ nop.m 999
+ fadd.s1 A_hi = tmp, A_lo // A_hi = tmp + A_lo
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fmpy.s1 poly2 = poly2, zsq
- nop.i 999 ;;
+ nop.m 999
+ fsub.s1 tmp = tmp, A_hi // tmp = tmp - A_hi
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// Result = Res_hi + Res_lo (User Supplied Rounding Mode)
-//
-(p0) fmpy.s1 P_5 = P_5, P_5
- nop.i 999 ;;
+ nop.m 999
+ fmpy.s1 A_hi = s_Y, A_hi // A_hi = s_Y * A_hi
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fma.s1 poly1 = zsq, poly1, P_4
- nop.i 999 ;;
+ nop.m 999
+ fadd.s1 A_lo = tmp, A_lo // A_lo = tmp + A_lo
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fma.s1 poly = z8, poly1, poly2
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 Res_hi = sigma, A_hi, P_hi // Res_hi = P_hi + sigma * A_hi
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
+ nop.m 999
+ fsub.s1 tmp = P_hi, Res_hi // tmp = P_hi - Res_hi
+ nop.i 999
+}
+;;
+
//
-// Fixup added to force inexact later -
-// A_hi = A_temp + z_lo
-// z_lo = (A_temp - A_hi) + z_lo
+// Test if A_lo is zero
//
-(p0) fma.s1 A_lo = Z, poly, z_lo
- nop.i 999 ;;
-}
{ .mfi
- nop.m 999
-(p0) fadd.s1 A_hi = tmp, A_lo
- nop.i 999 ;;
+ nop.m 999
+ fclass.m p6,p0 = A_lo, 0x007 // Test A_lo = 0
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fsub.s1 tmp = tmp, A_hi
- nop.i 999
+ nop.m 999
+(p6) mov A_lo = tmp_small // If A_lo zero, make very small
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fmpy.s1 A_hi = s_Y, A_hi
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 tmp = A_hi, sigma, tmp // tmp = sigma * A_hi + tmp
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fadd.s1 A_lo = tmp, A_lo
- nop.i 999
+ nop.m 999
+ fma.s1 sigma = A_lo, sigma, P_lo // sigma = A_lo * sigma + P_lo
+ nop.i 999
}
+;;
+
{ .mfi
-(p0) setf.exp tmp = int_temp
+ nop.m 999
+ fma.s1 Res_lo = s_Y, sigma, tmp // Res_lo = s_Y * sigma + tmp
+ nop.i 999
+}
+;;
+
//
-// P_hi = s_Y * P_hi
-// A_hi = s_Y * A_hi
+// Test if Res_lo is denormal
//
-(p0) fma.s1 Res_hi = sigma, A_hi, P_hi
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-(p0) fclass.m.unc p6,p0 = A_lo, 0x007
- nop.i 999 ;;
-}
{ .mfi
- nop.m 999
-(p6) mov A_lo = tmp
- nop.i 999
+ nop.m 999
+ fclass.m p14, p15 = Res_lo, 0x0b
+ nop.i 999
}
-{ .mfi
- nop.m 999
+;;
+
//
-// Res_hi = P_hi + sigma * A_hi
+// Compute Result = Res_lo + Res_hi. Use s3 if Res_lo is denormal.
//
-(p0) fsub.s1 tmp = P_hi, Res_hi
- nop.i 999 ;;
-}
{ .mfi
- nop.m 999
-//
-// tmp = P_hi - Res_hi
-//
-(p0) fma.s1 tmp = A_hi, sigma, tmp
- nop.i 999
+ nop.m 999
+(p14) fadd.s3 Result = Res_lo, Res_hi // Result for Res_lo denormal
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fma.s1 sigma = A_lo, sigma, P_lo
- nop.i 999 ;;
+ nop.m 999
+(p15) fadd.s0 Result = Res_lo, Res_hi // Result for Res_lo normal
+ nop.i 999
}
+;;
+
+//
+// If Res_lo is denormal test if Result equals zero
+//
{ .mfi
- nop.m 999
-//
-// tmp = sigma * A_hi + tmp
-// sigma = A_lo * sigma + P_lo
-//
-(p0) fma.s1 Res_lo = s_Y, sigma, tmp
- nop.i 999 ;;
+ nop.m 999
+(p14) fclass.m.unc p14, p0 = Result, 0x07
+ nop.i 999
}
-{ .mfb
- nop.m 999
+;;
+
//
-// Res_lo = s_Y * sigma + tmp
+// If Res_lo is denormal and Result equals zero, raise inexact, underflow
+// by squaring small double
//
-(p0) fadd.s0 Result = Res_lo, Res_hi
-br.ret.sptk b0 ;;
+{ .mfb
+ nop.m 999
+(p14) fmpy.d.s0 FR_temp = FR_temp, FR_temp
+ br.ret.sptk b0 // Exit POLY path, 0 < Q < 2^-3
}
-L(ATANL_NATVAL):
-L(ATANL_UNSUPPORTED):
-L(ATANL_NAN):
+;;
+
+
+ATANL_UNSUPPORTED:
{ .mfb
- nop.m 999
-(p0) fmpy.s0 Result = ArgX,ArgY
-(p0) br.ret.sptk b0 ;;
+ nop.m 999
+ fmpy.s0 Result = ArgX,ArgY
+ br.ret.sptk b0
}
-L(ATANL_SPECIAL_HANDLING):
+;;
+
+// Here if y natval, nan, inf, zero
+ATANL_Y_SPECIAL:
+// Here if x natval, nan, inf, zero
+ATANL_X_SPECIAL:
{ .mfi
- nop.m 999
-(p0) fcmp.eq.s0 p0, p6 = f1, ArgY_orig
- nop.i 999
+ nop.m 999
+ fclass.m p13,p12 = ArgY_orig, 0x0c3 // Test y nan
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fcmp.eq.s0 p0, p5 = f1, ArgX_orig
- nop.i 999 ;;
+ nop.m 999
+ fclass.m p15,p14 = ArgY_orig, 0x103 // Test y natval
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fclass.m.unc p6, p7 = ArgY, 0x007
- nop.i 999
-}
-{ .mlx
- nop.m 999
-(p0) movl special = 992
+ nop.m 999
+(p12) fclass.m p13,p0 = ArgX_orig, 0x0c3 // Test x nan
+ nop.i 999
}
;;
-
-{ .mmi
+{ .mfi
nop.m 999
-(p0) addl table_ptr1 = @ltoff(Constants_atan#), gp
+(p14) fclass.m p15,p0 = ArgX_orig, 0x103 // Test x natval
nop.i 999
}
;;
-{ .mmi
- ld8 table_ptr1 = [table_ptr1]
+{ .mfb
nop.m 999
- nop.i 999
+(p13) fmpy.s0 Result = ArgX_orig, ArgY_orig // Result nan if x or y nan
+(p13) br.ret.spnt b0 // Exit if x or y nan
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p15) fmpy.s0 Result = ArgX_orig, ArgY_orig // Result natval if x or y natval
+(p15) br.ret.spnt b0 // Exit if x or y natval
}
;;
-{ .mib
-(p0) add table_ptr1 = table_ptr1, special
- nop.i 999
-(p7) br.cond.spnt L(ATANL_ArgY_Not_ZERO) ;;
+// Here if x or y inf or zero
+ATANL_SPECIAL_HANDLING:
+{ .mfi
+ nop.m 999
+ fclass.m p6, p7 = ArgY_orig, 0x007 // Test y zero
+ mov special = 992 // Offset to table
}
+;;
+
+{ .mfb
+ add table_ptr1 = table_base, special // Point to 3pi/4
+ fcmp.eq.s0 p0, p9 = ArgX_orig, ArgY_orig // Dummy to set denormal flag
+(p7) br.cond.spnt ATANL_ArgY_Not_ZERO // Branch if y not zero
+}
+;;
+
+// Here if y zero
{ .mmf
-(p0) ldfd Result = [table_ptr1], 8
- nop.m 999
-(p6) fclass.m.unc p14, p0 = ArgX, 0x035 ;;
+ ldfd Result = [table_ptr1], 8 // Get pi high
+ nop.m 999
+ fclass.m p14, p0 = ArgX, 0x035 // Test for x>=+0
}
+;;
+
{ .mmf
- nop.m 999
-(p0) ldfd Result_lo = [table_ptr1], -8
-(p6) fclass.m.unc p15, p0 = ArgX, 0x036 ;;
+ nop.m 999
+ ldfd Result_lo = [table_ptr1], -8 // Get pi lo
+ fclass.m p15, p0 = ArgX, 0x036 // Test for x<=-0
}
+;;
+
+//
+// Return sign_Y * 0 when ArgX > +0
+//
{ .mfi
- nop.m 999
-(p14) fmerge.s Result = ArgY, f0
- nop.i 999
+ nop.m 999
+(p14) fmerge.s Result = ArgY, f0 // If x>=+0, y=0, hi sgn(y)*0
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p6) fclass.m.unc p13, p0 = ArgX, 0x007
- nop.i 999 ;;
+ nop.m 999
+ fclass.m p13, p0 = ArgX, 0x007 // Test for x=0
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p14) fmerge.s Result_lo = ArgY, f0
- nop.i 999 ;;
+ nop.m 999
+(p14) fmerge.s Result_lo = ArgY, f0 // If x>=+0, y=0, lo sgn(y)*0
+ nop.i 999
}
+;;
+
{ .mfi
-(p13) mov GR_Parameter_TAG = 36
- nop.f 999
- nop.i 999 ;;
+(p13) mov GR_Parameter_TAG = 36 // Error tag for x=0, y=0
+ nop.f 999
+ nop.i 999
}
-{ .mfi
- nop.m 999
+;;
+
//
-// Return sign_Y * 0 when ArgX > +0
+// Return sign_Y * pi when ArgX < -0
//
-(p15) fmerge.s Result = ArgY, Result
- nop.i 999 ;;
-}
{ .mfi
- nop.m 999
-(p15) fmerge.s Result_lo = ArgY, Result_lo
- nop.i 999 ;;
+ nop.m 999
+(p15) fmerge.s Result = ArgY, Result // If x<0, y=0, hi=sgn(y)*pi
+ nop.i 999
}
-{ .mfb
- nop.m 999
-//
-// Return sign_Y * 0 when ArgX < -0
-//
-(p0) fadd.s0 Result = Result, Result_lo
-(p13) br.cond.spnt __libm_error_region ;;
+;;
+
+{ .mfi
+ nop.m 999
+(p15) fmerge.s Result_lo = ArgY, Result_lo // If x<0, y=0, lo=sgn(y)*pi
+ nop.i 999
}
-{ .mib
- nop.m 999
- nop.i 999
+;;
+
//
-// Call error support funciton for atan(0,0)
+// Call error support function for atan(0,0)
//
-(p0) br.ret.sptk b0 ;;
-}
-L(ATANL_ArgY_Not_ZERO):
-{ .mfi
- nop.m 999
-(p0) fclass.m.unc p9, p10 = ArgY, 0x023
- nop.i 999 ;;
+{ .mfb
+ nop.m 999
+ fadd.s0 Result = Result, Result_lo
+(p13) br.cond.spnt __libm_error_region // Branch if atan(0,0)
}
+;;
+
{ .mib
- nop.m 999
- nop.i 999
-(p10) br.cond.spnt L(ATANL_ArgY_Not_INF) ;;
-}
-{ .mfi
- nop.m 999
-(p9) fclass.m.unc p6, p0 = ArgX, 0x017
- nop.i 999
-}
-{ .mfi
- nop.m 999
-(p9) fclass.m.unc p7, p0 = ArgX, 0x021
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-(p9) fclass.m.unc p8, p0 = ArgX, 0x022
- nop.i 999 ;;
-}
-{ .mmi
-(p6) add table_ptr1 = 16, table_ptr1 ;;
-(p0) ldfd Result = [table_ptr1], 8
- nop.i 999 ;;
-}
-{ .mfi
-(p0) ldfd Result_lo = [table_ptr1], -8
- nop.f 999
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-(p6) fmerge.s Result = ArgY, Result
- nop.i 999 ;;
+ nop.m 999
+ nop.i 999
+ br.ret.sptk b0 // Exit for y=0, x not 0
}
+;;
+
+// Here if y not zero
+ATANL_ArgY_Not_ZERO:
{ .mfi
- nop.m 999
-(p6) fmerge.s Result_lo = ArgY, Result_lo
- nop.i 999 ;;
+ nop.m 999
+ fclass.m p0, p10 = ArgY, 0x023 // Test y inf
+ nop.i 999
}
+;;
+
{ .mfb
- nop.m 999
-(p6) fadd.s0 Result = Result, Result_lo
-(p6) br.ret.sptk b0 ;;
+ nop.m 999
+ fclass.m p6, p0 = ArgX, 0x017 // Test for 0 <= |x| < inf
+(p10) br.cond.spnt ATANL_ArgY_Not_INF // Branch if 0 < |y| < inf
}
+;;
+
+// Here if y=inf
//
-// Load PI/2 and adjust its sign.
// Return +PI/2 when ArgY = +Inf and ArgX = +/-0 or normal
// Return -PI/2 when ArgY = -Inf and ArgX = +/-0 or normal
+// Return +PI/4 when ArgY = +Inf and ArgX = +Inf
+// Return -PI/4 when ArgY = -Inf and ArgX = +Inf
+// Return +3PI/4 when ArgY = +Inf and ArgX = -Inf
+// Return -3PI/4 when ArgY = -Inf and ArgX = -Inf
//
-{ .mmi
-(p7) add table_ptr1 = 32, table_ptr1 ;;
-(p7) ldfd Result = [table_ptr1], 8
- nop.i 999 ;;
-}
{ .mfi
-(p7) ldfd Result_lo = [table_ptr1], -8
- nop.f 999
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-(p7) fmerge.s Result = ArgY, Result
- nop.i 999 ;;
+ nop.m 999
+ fclass.m p7, p0 = ArgX, 0x021 // Test for x=+inf
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p7) fmerge.s Result_lo = ArgY, Result_lo
- nop.i 999 ;;
-}
-{ .mfb
- nop.m 999
-(p7) fadd.s0 Result = Result, Result_lo
-(p7) br.ret.sptk b0 ;;
+(p6) add table_ptr1 = 16, table_ptr1 // Point to pi/2, if x finite
+ fclass.m p8, p0 = ArgX, 0x022 // Test for x=-inf
+ nop.i 999
}
-//
-// Load PI/4 and adjust its sign.
-// Return +PI/4 when ArgY = +Inf and ArgX = +Inf
-// Return -PI/4 when ArgY = -Inf and ArgX = +Inf
-//
+;;
+
{ .mmi
-(p8) add table_ptr1 = 48, table_ptr1 ;;
-(p8) ldfd Result = [table_ptr1], 8
- nop.i 999 ;;
+(p7) add table_ptr1 = 32, table_ptr1 // Point to pi/4 if x=+inf
+;;
+(p8) add table_ptr1 = 48, table_ptr1 // Point to 3pi/4 if x=-inf
+
+ nop.i 999
}
-{ .mfi
-(p8) ldfd Result_lo = [table_ptr1], -8
- nop.f 999
- nop.i 999 ;;
+;;
+
+{ .mmi
+ ldfd Result = [table_ptr1], 8 // Load pi/2, pi/4, or 3pi/4 hi
+;;
+ ldfd Result_lo = [table_ptr1], -8 // Load pi/2, pi/4, or 3pi/4 lo
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p8) fmerge.s Result = ArgY, Result
- nop.i 999 ;;
+ nop.m 999
+ fmerge.s Result = ArgY, Result // Merge sgn(y) in hi
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p8) fmerge.s Result_lo = ArgY, Result_lo
- nop.i 999 ;;
+ nop.m 999
+ fmerge.s Result_lo = ArgY, Result_lo // Merge sgn(y) in lo
+ nop.i 999
}
+;;
+
{ .mfb
- nop.m 999
-(p8) fadd.s0 Result = Result, Result_lo
-(p8) br.ret.sptk b0 ;;
+ nop.m 999
+ fadd.s0 Result = Result, Result_lo // Compute complete result
+ br.ret.sptk b0 // Exit for y=inf
}
-L(ATANL_ArgY_Not_INF):
-{ .mfi
- nop.m 999
+;;
+
+// Here if y not INF, and x=0 or INF
+ATANL_ArgY_Not_INF:
//
-// Load PI/4 and adjust its sign.
-// Return +3PI/4 when ArgY = +Inf and ArgX = -Inf
-// Return -3PI/4 when ArgY = -Inf and ArgX = -Inf
+// Return +PI/2 when ArgY NOT Inf, ArgY > 0 and ArgX = +/-0
+// Return -PI/2 when ArgY NOT Inf, ArgY < 0 and ArgX = +/-0
+// Return +0 when ArgY NOT Inf, ArgY > 0 and ArgX = +Inf
+// Return -0 when ArgY NOT Inf, ArgY > 0 and ArgX = +Inf
+// Return +PI when ArgY NOT Inf, ArgY > 0 and ArgX = -Inf
+// Return -PI when ArgY NOT Inf, ArgY > 0 and ArgX = -Inf
//
-(p0) fclass.m.unc p6, p0 = ArgX, 0x007
- nop.i 999
-}
-{ .mfi
- nop.m 999
-(p0) fclass.m.unc p7, p0 = ArgX, 0x021
- nop.i 999 ;;
-}
{ .mfi
- nop.m 999
-(p0) fclass.m.unc p8, p0 = ArgX, 0x022
- nop.i 999 ;;
-}
-{ .mmi
-(p6) add table_ptr1 = 16, table_ptr1 ;;
-(p6) ldfd Result = [table_ptr1], 8
- nop.i 999 ;;
+ nop.m 999
+ fclass.m p7, p9 = ArgX, 0x021 // Test for x=+inf
+ nop.i 999
}
+;;
+
{ .mfi
-(p6) ldfd Result_lo = [table_ptr1], -8
- nop.f 999
- nop.i 999 ;;
+ nop.m 999
+ fclass.m p6, p0 = ArgX, 0x007 // Test for x=0
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p6) fmerge.s Result = ArgY, Result
- nop.i 999 ;;
+(p6) add table_ptr1 = 16, table_ptr1 // Point to pi/2
+ fclass.m p8, p0 = ArgX, 0x022 // Test for x=-inf
+ nop.i 999
}
+;;
+
+.pred.rel "mutex",p7,p9
{ .mfi
- nop.m 999
-(p6) fmerge.s Result_lo = ArgY, Result_lo
- nop.i 999 ;;
-}
-{ .mfb
- nop.m 999
-(p6) fadd.s0 Result = Result, Result_lo
-(p6) br.ret.spnt b0 ;;
+(p9) ldfd Result = [table_ptr1], 8 // Load pi or pi/2 hi
+(p7) fmerge.s Result = ArgY, f0 // If y not inf, x=+inf, sgn(y)*0
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// return = sign_Y * PI/2 when ArgX = 0
-//
-(p7) fmerge.s Result = ArgY, f0
- nop.i 999 ;;
-}
-{ .mfb
- nop.m 999
-(p7) fnorm.s0 Result = Result
-(p7) br.ret.spnt b0 ;;
-}
-//
-// return = sign_Y * 0 when ArgX = Inf
-//
-{ .mmi
-(p8) ldfd Result = [table_ptr1], 8 ;;
-(p8) ldfd Result_lo = [table_ptr1], -8
- nop.i 999 ;;
+(p9) ldfd Result_lo = [table_ptr1], -8 // Load pi or pi/2 lo
+(p7) fnorm.s0 Result = Result // If y not inf, x=+inf normalize
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p8) fmerge.s Result = ArgY, Result
- nop.i 999 ;;
+ nop.m 999
+(p9) fmerge.s Result = ArgY, Result // Merge sgn(y) in hi
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p8) fmerge.s Result_lo = ArgY, Result_lo
- nop.i 999 ;;
+ nop.m 999
+(p9) fmerge.s Result_lo = ArgY, Result_lo // Merge sgn(y) in lo
+ nop.i 999
}
+;;
+
{ .mfb
- nop.m 999
-(p8) fadd.s0 Result = Result, Result_lo
-(p8) br.ret.sptk b0 ;;
+ nop.m 999
+(p9) fadd.s0 Result = Result, Result_lo // Compute complete result
+ br.ret.spnt b0 // Exit for y not inf, x=0,inf
}
-//
-// return = sign_Y * PI when ArgX = -Inf
-//
-.endp atan2l
-ASM_SIZE_DIRECTIVE(atan2l)
-ASM_SIZE_DIRECTIVE(__atan2l)
-ASM_SIZE_DIRECTIVE(__ieee754_atan2l)
-
-.proc __libm_error_region
-__libm_error_region:
+;;
+
+GLOBAL_IEEE754_END(atan2l)
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
@@ -2001,7 +1999,6 @@ __libm_error_region:
br.ret.sptk b0 // Return
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region#)
.type __libm_error_support#,@function
.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_cbrt.S b/sysdeps/ia64/fpu/s_cbrt.S
index 1e23b6024d..b7a827d1da 100644
--- a/sysdeps/ia64/fpu/s_cbrt.S
+++ b/sysdeps/ia64/fpu/s_cbrt.S
@@ -1,11 +1,10 @@
-.file "cbrt.asm"
+.file "cbrt.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
-// Bob Norin, Shane Story, and Ping Tak Peter Tang
-// of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -21,27 +20,30 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http: //www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00: Initial version
-// 5/19/00: New version (modified algorithm)
+// 02/02/00 Initial version
+// 05/19/00 New version (modified algorithm)
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 01/28/03 Updated polynomial coefficients (changed to Remez coefficients),
+// to slightly improve accuracy
//
// API
//==============================================================
@@ -53,637 +55,713 @@
//
// Implementation
//
-// cbrt(a) = cbrt(a y) / cbrt(y)
-// = cbrt(1 - (1 - a y)) * 1/cbrt(y)
+// Let y= frcpa(a), where a is the argument
+//
+// cbrt(a)= cbrt(a*y)/cbrt(y) = cbrt(1 - (1-a*y)) * (1/cbrt(y))
+//
+// For all values of y, the 3 possible significands of 1/cbrt(y)
+// are stored in a table (T0) to 64 bits of accuracy. (There are
+// 3 possible significands because the exponent of y modulo 3
+// can be 0, 1, or 2.)
//
-// where y = frcpa(a).
//
-// * cbrt(1 - (1 - a y)) is approximated by a degree-5 polynomial
-//
-// 1 - (1/3)*r - (1/9)*r^2 - (5/81)*r^3 - (10/243)*r^4 - (22/729)*r^5
-//
-// in r = 1 - a y.
+// * cbrt(1 - (1-a*y)) is approximated by a degree-5 polynomial ~
+//
+// ~ 1 - (1/3)*r - (1/9)*r^2 - (5/81)*r^3 - (10/243)*r^4 - (22/729)*r^5
+//
+// in r = 1-a*y.
//
-// * The values 1/cbrt(y) are stored in a table of constants T0
-// to 64 bits of accuracy
//
// The table values are stored for three exponent values and are
// then multiplied by e/3 where e is the exponent of the input number.
// This computation is carried out in parallel with the polynomial
// evaluation:
//
-// T = 2^(e/3) * T0
+// T= 2^(e/3) * T0
//===============
-// input = x
-// C = frcpa(x)
-// r = 1 - C * x
+// input= x
+// C= frcpa(x)
+// r= 1 - C * x
//
-// Special values
+// Special values
//==============================================================
// Registers used
//==============================================================
-// f6-f15
-// r2, r23-r26, r28-r30
-// p6,p7,p8,p12
+// f6-f15
+// GR_GP, r23-r26, r28-r30
+// p6, p7, p8, p12
+
+ FR_R = f6
+ FR_COEFF1 = f7
+ FR_COEFF2 = f9
+ FR_COEFF3 = f10
+ FR_COEFF4 = f11
+ FR_COEFF5 = f12
+ FR_R2 = f13
+ FR_ARG = f14
+ FR_P23 = f15
+ FR_P25 = f32
+ FR_P15 = f33
+ FR_P1 = f34
+ FR_P45 = f35
+ FR_2EXP = f36
+ FR_TMP63 = f37
+
+ GR_GP = r2
+ GR_ADDR = r2
+ GR_CONST1 = r3
+ GR_I1 = r8
+ GR_EXP = r9
+ GR_ADDR2 = r10
+ GR_IT1 = r11
+ GR_TMP2 = r11
+ GR_EXPON = r15
+ GR_TMP1 = r16
+ GR_TMP6 = r16
+ GR_ITB1 = r17
+ GR_TMP3 = r18
+ GR_TMP4 = r19
+ GR_TMP63 = r19
+ GR_TMP5 = r20
+ GR_EXP_BY_3 = r20
+ GR_CONST4 = r21
+ GR_TMP6 = r22
+ GR_INDEX = r23
+ GR_EBIAS = r24
+ GR_SIGNIF = r25
+ GR_SIGNIF2 = r25
+ GR_TEST = r25
+ GR_ARGEXP = r26
+ GR_CONST2 = r27
+ GR_SIGN = r28
+ GR_REM = r29
+ GR_CONST3 = r30
+ GR_SEXP = r31
+
+
+
-#include "libm_support.h"
// Data tables
//==============================================================
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
+RODATA
.align 16
-poly_coeffs:
-ASM_TYPE_DIRECTIVE(poly_coeffs,@object)
-data8 0xaaaaaaaaaaaaaaab, 0x00003ffd // 1/3
-data8 0x3fbc71c71c71c71d, 0x3faf9add3c0ca459
-data8 0x3fa511e8d2b3183b, 0x3f9ee7113506ac13
-ASM_SIZE_DIRECTIVE(poly_coeffs)
-
-T_table:
-ASM_TYPE_DIRECTIVE(T_table,@object)
-
-data8 0x80155c748c374836, 0xa160019ed37fb4ae
-data8 0xcb51ddcb9e93095e, 0x8040404b0879f7f9
-data8 0xa1960b5966da4608, 0xcb95f333968ad59b
-data8 0x806b5dce4b405c10, 0xa1cc5dbe6dc2aab4
-data8 0xcbda64292d3ffd97, 0x8096b586974669b1
-data8 0xa202f97995b69c0d, 0xcc1f3184af961596
-data8 0x80bcd273d952a028, 0xa232fe6eb0c0577d
-data8 0xcc5bb1ac954d33e2, 0x80e898c52813f2f3
-data8 0xa26a2582012f6e17, 0xcca12e9831fc6402
-data8 0x81149add67c2d208, 0xa2a197e5d10465cb
-data8 0xcce70a67b64f24ad, 0x813b4e2c856b6e9a
-data8 0xa2d25a532efefbc8, 0xcd24794726477ea5
-data8 0x8167c1dde03de7aa, 0xa30a5bd6e49e4ab8
-data8 0xcd6b096a0b70ee87, 0x818ed973b811135e
-data8 0xa33b9c9b59879e24, 0xcda9177738b15a90
-data8 0x81bbc0c33e13ec98, 0xa3742fca6a3c1f21
-data8 0xcdf05f2247dffab9, 0x81e33e69fbe7504a
-data8 0xa3a5f1273887bf22, 0xce2f0f347f96f906
-data8 0x820aec524e3c23e9, 0xa3d7ef508ff11574
-data8 0xce6e0be0cd551a61, 0x823880f78e70b805
-data8 0xa4115ce30548bc15, 0xceb666b2c347d1de
-data8 0x826097a62a8e5200, 0xa443df0e53df577a
-data8 0xcef609b0cb874f00, 0x8288dfe00e9b5eaf
-data8 0xa4769fa5913c0ec3, 0xcf35fb5447e5c765
-data8 0x82b15a10c5371624, 0xa4a99f303bc7def5
-data8 0xcf763c47ee869f00, 0x82da06a527b18937
-data8 0xa4dcde37779adf4b, 0xcfb6cd3888d71785
-data8 0x8302e60b635ab394, 0xa5105d46152c938a
-data8 0xcff7aed4fbfbb447, 0x832bf8b2feec2f0e
-data8 0xa5441ce89825cb8d, 0xd038e1ce5167e3c6
-data8 0x83553f0ce00e276b, 0xa5781dad3e54d899
-data8 0xd07a66d7bfa0ebba, 0x837eb98b50f8322a
-data8 0xa5ac602406c4e68c, 0xd0bc3ea6b32d1b21
-data8 0x83a270f44c84f699, 0xa5d9601d95c2c0bc
-data8 0xd0f4f0e8f36c1bf8, 0x83cc4d7cfcfac5ca
-data8 0xa60e1e1a2de14745, 0xd1376458e34b037e
-data8 0x83f65f78a8872b4c, 0xa6431f6e3fbd9658
-data8 0xd17a2ca133f78572, 0x8420a75f2f7b53c8
-data8 0xa67864b0d432fda4, 0xd1bd4a80301c5715
-data8 0x844510461ff14209, 0xa6a6444aa0243c0b
-data8 0xd1f71682b2fa4575, 0x846fbd91b930bed2
-data8 0xa6dc094d10f25792, 0xd23ad555f773f059
-data8 0x84947e18234f3294, 0xa70a574cc02bba69
-data8 0xd2752c7039a5bf73, 0x84bf92755825045a
-data8 0xa7409e2af9549084, 0xd2b98ee008c06b59
-data8 0x84e4ac0ee112ba51, 0xa76f5c64ca2cf13b
-data8 0xd2f4735ffd700280, 0x8509ef44b86f20be
-data8 0xa79e4f0babab5dc0, 0xd32f99ed6d9ac0e1
-data8 0x85359d5d91768427, 0xa7d5579ae5164b85
-data8 0xd374f0666c75d51c, 0x855b3bd5b7384357
-data8 0xa804bd3c6fe61cc8, 0xd3b0a7d13618e4a1
-data8 0x858104f0c415f79a, 0xa8345895e5250a5a
-data8 0xd3eca2ea53bcec0c, 0x85a6f90390d29864
-data8 0xa8642a122b44ef0b, 0xd428e23874f13a17
-data8 0x85d3772fcd56a1dd, 0xa89c38ca18f6108b
-data8 0xd46f82fe293bc6d3, 0x85f9c982fcc002f3
-data8 0xa8cc81063b6e87ca, 0xd4ac57e9b7186420
-data8 0x862047e0e7ea554b, 0xa8fd00bfa409285e
-data8 0xd4e972becb04e8b8, 0x8646f2a26f7f5852
-data8 0xa92db8664d5516da, 0xd526d40a7a9b43a3
-data8 0x866dca21754096b5, 0xa95ea86b75cc2c20
-data8 0xd5647c5b73917370, 0x8694ceb8dfd17a37
-data8 0xa98fd141a4992deb, 0xd5a26c4201bd6d13
-data8 0x86bc00c49e9307e8, 0xa9c1335cae7446ba
-data8 0xd5e0a45015350a7e, 0x86dccd74fce79610
-data8 0xa9ea8686f556f645, 0xd614b539c6194104
-data8 0x870453c845acf90f, 0xaa1c52d17906bb19
-data8 0xd6537310e224283f, 0x872c089a1e90342c
-data8 0xaa4e59b046dab887, 0xd6927ab62244c917
-data8 0x8753ec4a92d16c5e, 0xaa809b9c60d1890b
-data8 0xd6d1ccc1fc4ef4b7, 0x877bff3aca19f6b4
-data8 0xaab319102f3f9b33, 0xd71169cea98fdded
-data8 0x879d88b6fe1c324c, 0xaadd5a18c1e21274
-data8 0xd746a66a5bc9f6d9, 0x87c5f346dbf98c3a
-data8 0xab1045f2ac31bdf5, 0xd786ce8f0fae5317
-data8 0x87e7c653efacef2c, 0xab3ae3ab2df7231e
-data8 0xd7bc7ff214c4e75a, 0x881089d4e73ffefc
-data8 0xab6e3f945d1e96fc, 0xd7fd35467a517ed1
-data8 0x88397e6a366f2a8a, 0xaba1d953a08fa94e
-data8 0xd83e38838648d815, 0x885bc559e5e1c081
-data8 0xabcd090db7ef4c3f, 0xd874a1db598b8951
-data8 0x887e2ee392bb7a93, 0xabf864602d7c323d
-data8 0xd8ab42205b80edaf, 0x88a7a8587e404257
-data8 0xac2ca5886ccf9b57, 0xd8ed1849d202f965
-data8 0x88ca5eda67594784, 0xac5861d4aa441f0f
-data8 0xd92432bd5a173685, 0x88f4356166bd590e
-data8 0xac8d183fe3a2fbed, 0xd9669ca45b03c23e
-data8 0x89173a0acf5ce026, 0xacb93703ff51571e
-data8 0xd99e3327cf89574e, 0x893a62a098b6a57b
-data8 0xace5830ad0c3f14b, 0xd9d602b19b100466
-data8 0x895daf637236ae2c, 0xad11fca5d78b3ff2
-data8 0xda0e0ba86c096841, 0x89883b9d1c2fa9c5
-data8 0xad4797fddf91a798, 0xda5195fcdb1c3dce
-data8 0x89abd8dd374a5d7b, 0xad747701e559ebcb
-data8 0xda8a1eb87a491f6c, 0x89cf9b1dcd197fa0
-data8 0xada184a47e9c7613, 0xdac2e230b91c3f84
-data8 0x89f382a258ea79de, 0xadcec13ab0dda8ff
-data8 0xdafbe0d0b66aea30, 0x8a178faf06648f29
-data8 0xadfc2d1a5fd21ba8, 0xdb351b04a8fafced
-data8 0x8a3bc288b3e1d18a, 0xae29c89a5053c33a
-data8 0xdb6e9139e33cdd8e, 0x8a601b74f4d1f835
-data8 0xae5794122b638df9, 0xdba843ded7151ea1
-data8 0x8a849aba14274764, 0xae858fda8137ae0a
-data8 0xdbe2336319b61fc8, 0x8aa9409f16cdbc9b
-data8 0xaeb3bc4ccc56d3d1, 0xdc1c60376789fa68
-data8 0x8ace0d6bbe2cb316, 0xaee219c374c09920
-data8 0xdc56cacda82d0cd5, 0x8af301688ab33558
-data8 0xaf10a899d3235fe7, 0xdc917398f2797814
-data8 0x8b181cdebe6f3206, 0xaf3f692c341fe8b4
-data8 0xdccc5b0d90a3e628, 0x8b3d60185fafcb7c
-data8 0xaf6e5bd7db9ae6c2, 0xdd0781a10469f0f2
-data8 0x8b62cb603bb2fad0, 0xaf9d80fb081cd91b
-data8 0xdd42e7ca0b52838f, 0x8b80d7d6bc4104de
-data8 0xafc35ce063eb3787, 0xdd729ad01c69114d
-data8 0x8ba68bf73ac74f39, 0xaff2ddcb5f28f03d
-data8 0xddae749c001fbf5e, 0x8bcc68fb9f9f7335
-data8 0xb022923b148e05c5, 0xddea8f50a51c69b1
-data8 0x8bf26f31c534fca2, 0xb0527a919adbf58b
-data8 0xde26eb69a0f0f111, 0x8c10f86e13a1a1f9
-data8 0xb078f3ab1d701c65, 0xde576480262399bc
-data8 0x8c3749916cc6abb5, 0xb0a93a6870649f31
-data8 0xde943789645933c8, 0x8c5dc4c4f7706032
-data8 0xb0d9b624d62ec856, 0xded14d58139a28af
-data8 0x8c7cac3a8c42e3e0, 0xb100a5f53fb3c8e1
-data8 0xdf025c00bbf2b5c7, 0x8ca373f1b7bf2716
-data8 0xb131821882f5540a, 0xdf3feb44d723a713
-data8 0x8cc29907fb951294, 0xb158bf8e4cb04055
-data8 0xdf715bc16c159be0, 0x8ce9ae4e9492aac8
-data8 0xb189fd69d56b238f, 0xdfaf66240e29cda8
-data8 0x8d0911dddbfdad0e, 0xb1b189958e8108e4
-data8 0xdfe139cbf6e19bdc, 0x8d3075c4f20f04ee
-data8 0xb1e32a8165b09832, 0xe01fc0fe94d9fc52
-data8 0x8d5018a9d4de77d5, 0xb20b0678fc271eec
-data8 0xe051f92ffcc0bd60, 0x8d77cc47dd143515
-data8 0xb23d0bd3f7592b6e, 0xe090feec9c9a06ac
-data8 0x8d97af6352739cb7, 0xb26538b2db8420dc
-data8 0xe0c39d0c9ff862d6, 0x8db7af523167800f
-data8 0xb28d89e339ceca14, 0xe0f668eeb99f188d
-data8 0x8ddfd80bc68c32ff, 0xb2c022ca12e55a16
-data8 0xe1362890eb663139, 0x8e00197e1e7c88fe
-data8 0xb2e8c6852c6b03f1, 0xe1695c7212aecbaa
-data8 0x8e207859f77e20e7, 0xb3118f4eda9fe40f
-data8 0xe19cbf0391bbbbe9, 0x8e40f4ce60c9f8e2
-data8 0xb33a7d6268109ebe, 0xe1d050901c531e85
-data8 0x8e69ba46cf2fde4d, 0xb36ddbc5ea70ec55
-data8 0xe2110903b4f4047a, 0x8e8a7a00bd7ae63e
-data8 0xb3971e9b39264023, 0xe2450559b4d80b6d
-data8 0x8eab57ef1cf2f529, 0xb3c0877ecc18e24a
-data8 0xe27931a231554ef3, 0x8ecc5442cffb1dad
-data8 0xb3ea16ae3a6c905f, 0xe2ad8e2ac3c5b04b
-data8 0x8eed6f2d2a4acbfe, 0xb413cc67aa0e4d2d
-data8 0xe2e21b41b9694cce, 0x8f0ea8dff24441ff
-data8 0xb43da8e9d163e1af, 0xe316d93615862714
-data8 0x8f385c95d696b817, 0xb47233773b84d425
-data8 0xe3590bd86a0d30f9, 0x8f59dc43edd930f3
-data8 0xb49c6825430fe730, 0xe38e38e38e38e38e
-data8 0x8f7b7b5f5ffad1c4, 0xb4c6c46bcdb27dcf
-data8 0xe3c397d1e6db7839, 0x8f9d3a1bea165f38
-data8 0xb4f1488c0b35d26f, 0xe3f928f5953feb9e
-data8 0x8fbf18adc34b66da, 0xb51bf4c7c51f0168
-data8 0xe42eeca17c62886c, 0x8fe117499e356095
-data8 0xb546c9616087ab9c, 0xe464e32943446305
-data8 0x90033624aa685f8d, 0xb571c69bdffd9a70
-data8 0xe49b0ce15747a8a2, 0x9025757495f36b86
-data8 0xb59cecbae56984c3, 0xe4d16a1eee94e9d4
-data8 0x903f3a5dcc091203, 0xb5bd64512bb14bb7
-data8 0xe4fa52107353f67d, 0x9061b2fceb2bdbab
-data8 0xb5e8d2a4bf5ba416, 0xe5310a471f4d2dc3
-data8 0x90844ca7211032a7, 0xb6146a9a1bc47819
-data8 0xe567f6f1c2b9c224, 0x90a7079403e6a15d
-data8 0xb6402c7749d621c0, 0xe59f18689a9e4c9a
-data8 0x90c9e3fbafd63799, 0xb66c1882fb435ea2
-data8 0xe5d66f04b8a68ecf, 0x90ece216c8a16ee4
-data8 0xb6982f048c999a56, 0xe60dfb2005c192e9
-data8 0x9110021e7b516f0a, 0xb6c47044075b4142
-data8 0xe645bd1544c7ea51, 0x912a708a39be9075
-data8 0xb6e5bd6bfd02bafd, 0xe66fb21b505b20a0
-data8 0x914dcc7b31146370, 0xb7124a2736ff8ef2
-data8 0xe6a7d32af4a7c59a, 0x91714af8cfe984d5
-data8 0xb73f026a01e94177, 0xe6e02b129c6a5ae4
-data8 0x918c00a6f3795e97, 0xb760a959f1d0a7a7
-data8 0xe70a9136a7403039, 0x91afbc299ed0295d
-data8 0xb78dae7e06868ab0, 0xe74349fb2d92a589
-data8 0x91d39add3e958db0, 0xb7badff8ad9e4e02
-data8 0xe77c3a9c86ed7d42, 0x91ee9920a8974d92
-data8 0xb7dce25b8e17ae9f, 0xe7a713f88151518a
-data8 0x9212b5fcac537c19, 0xb80a6226904045e2
-data8 0xe7e067453317ed2b, 0x9236f6b256923fcf
-data8 0xb8380f1cafd73c1c, 0xe819f37a81871bb5
-data8 0x92523ee6f90dcfc3, 0xb85a6ea8e321b4d8
-data8 0xe8454236bfaeca14, 0x9276bef031e6eb79
-data8 0xb8886b684ae7d2fa, 0xe87f32f24c3fc90e
-data8 0x929236ec237a24ad, 0xb8ab0726fa00cf5d
-data8 0xe8aacd8688892ba6, 0x92b6f70b7efe9dc3
-data8 0xb8d954a4d13b7cb1, 0xe8e523fd32f606f7
-data8 0x92d29f61eec7dc2b, 0xb8fc2d4f6cd9f04a
-data8 0xe9110b5311407927, 0x92f7a05d5b8ba92f
-data8 0xb92acc851476b1ab, 0xe94bc8bf0c108fa3
-data8 0x931379a403be5c16, 0xb94de2d841a184c2
-data8 0xe977fdc439c2ca3c, 0x9338bc44de2e3f34
-data8 0xb97cd4c36c92693c, 0xe9b3236528fc349e
-data8 0x9354c71412c69486, 0xb9a0297f172665e3
-data8 0xe9dfa70b745ac1b4, 0x937a4c273907e262
-data8 0xb9cf6f21e36c3924, 0xea1b36268d0eaa38
-data8 0x93968919f6e7975d, 0xb9f3030951267208
-data8 0xea480963fd394197, 0x93bc516fdd4680c9
-data8 0xba229d6a618e7c59, 0xea84034425f27484
-data8 0x93d8c123d9be59b2, 0xba467144459f9855
-data8 0xeab12713138dd1cc, 0x93f546c955e60076
-data8 0xba6a60c3c48f1a4b, 0xeade6db73a5e503b
-data8 0x941b70a65879079f, 0xba9a76056b67ee7a
-data8 0xeb1b0268343b121b, 0x943829f337410591
-data8 0xbabea699563ada6e, 0xeb489b0b2bdb5f14
-data8 0x9454f995765bc4d2, 0xbae2f350b262cc4b
-data8 0xeb765721e85f03d0, 0x947b86b57f5842ed
-data8 0xbb1385a23be24e57, 0xebb389645f222f62
-data8 0x94988aeb23470f86, 0xbb3814975e17c680
-data8 0xebe198f090607e0c, 0x94b5a5dc9695f42a
-data8 0xbb5cc031009bf467, 0xec0fcc9321024509
-data8 0x94d2d7a9170d8b42, 0xbb81889680024764
-data8 0xec3e247da8b82f61, 0x94f9e87dd78bf019
-data8 0xbbb2c0d8703ae95d, 0xec7c27d21321c9f7
-data8 0x95175019a503d89e, 0xbbd7cd09ba3c5463
-data8 0xecaad5278824e453, 0x9534cefa625fcb3a
-data8 0xbbfcf68c4977718f, 0xecd9a76d097d4e77
-data8 0x955265405c491a25, 0xbc223d88cfc88eee
-data8 0xed089ed5dcd99446, 0x9570130c1f9bb857
-data8 0xbc47a2284fee4ff8, 0xed37bb95add09a1c
-data8 0x9597ca4119525184, 0xbc79ac0916ed7b8a
-data8 0xed76c70508f904b6, 0x95b5af6fb5aa4d3c
-data8 0xbc9f5670d1a13030, 0xeda63bb05e7f93c6
-data8 0x95d3ac9273aafd7a, 0xbcc51f068cb95c1d
-data8 0xedd5d661daed2dc4, 0x95f1c1cafdfd3684
-data8 0xbceb05f4b30a9bc0, 0xee05974eef86b903
-data8 0x960fef3b430b8d5f, 0xbd110b6604c7d306
-data8 0xee357ead791fc670, 0x962e350575b409c5
-data8 0xbd372f8598620f19, 0xee658cb3c134a463
-data8 0x964c934c0dfc1708, 0xbd5d727edb6b3c7e
-data8 0xee95c1987f080211, 0x966b0a31c9c6bc7d
-data8 0xbd83d47d937bbc6d, 0xeec61d92d8c4314f
-data8 0x968999d9ad8d264e, 0xbdaa55addf1ae47d
-data8 0xeef6a0da64a014ac, 0x96a8426705198795
-data8 0xbdd0f63c36aa73f0, 0xef274ba72a07c811
-data8 0x96c703fd64445ee5, 0xbdf7b6556d550a15
-data8 0xef581e31a2c91260, 0x96e5dec0a7b4268d
-data8 0xbe1e9626b1ffa96b, 0xef8918b2bc43aec6
-data8 0x9704d2d4f59f79f3, 0xbe4595dd903e5371
-data8 0xefba3b63d89d7cbf, 0x9723e05ebe91b9b0
-data8 0xbe6cb5a7f14bc935, 0xefeb867ecffaa607
-data8 0x97430782be323831, 0xbe93f5b41d047cf7
-data8 0xf01cfa3df1b9c9fa, 0x97624865fc0df8bf
-data8 0xbebb5630bae4c15f, 0xf04e96dc05b43e2d
-data8 0x9781a32dcc640b2a, 0xbee2d74cd30a430c
-data8 0xf0805c944d827454, 0x97a117ffd0f48e46
-data8 0xbf0a7937cf38d981, 0xf0b24ba285c495cb
-data8 0x97c0a701f9d263c9, 0xbf323c217be2bc8c
-data8 0xf0e46442e76f6569, 0x97e0505a8637a036
-data8 0xbf5a203a09342bbb, 0xf116a6b2291d7896
-data8 0x97f57a9fb0b08c6e, 0xbf74cad1c14ebfc4
-data8 0xf1383fa9e9b5b381, 0x9815503365914a9d
-data8 0xbf9ce6a497a89f78, 0xf16ac84f90083b9b
-data8 0x98354085054fd204, 0xbfc52428bec6e72f
-data8 0xf19d7b686dcb03d7, 0x98554bbbf8a77902
-data8 0xbfed838fddab024b, 0xf1d0593311db1757
-data8 0x987571fffb7f94f6, 0xc016050c0420981a
-data8 0xf20361ee8f1c711e, 0x9895b3791dd03c23
-data8 0xc03ea8cfabddc330, 0xf23695da7de51d3f
-data8 0x98ab43a5fc65d0c8, 0xc059d3cbd65ddbce
-data8 0xf258d095e465cc35, 0x98cbb2d196bd713d
-data8 0xc082b122a3c78c9d, 0xf28c4d0bfc982b34
-data8 0x98ec3d9ec7b6f21a, 0xc0abb1499ae736c4
-data8 0xf2bff55eb3f0ea71, 0x990ce436db5e8344
-data8 0xc0d4d474c3aedaaf, 0xf2f3c9cf9884636e
-data8 0x9922b8218160967a, 0xc0f054ca33eb3437
-data8 0xf31670135ab9cc0f, 0x99438d686f75779d
-data8 0xc119b2c67e600ed0, 0xf34a8e9f0b54cdfb
-data8 0x99647eea131fa20b, 0xc1433453de2033ff
-data8 0xf37ed9fa6b8add3f, 0x997a85045a47c6d0
-data8 0xc15ef3e44e10032d, 0xf3a1cfe884ef6bb6
-data8 0x999ba5f14f8add02, 0xc188b130431d80e6
-data8 0xf3d66689dcc8e8d3, 0x99bce38b5465ecae
-data8 0xc1b2929d6067730e, 0xf40b2ab069d5c96a
-data8 0x99d31ca0887f30f9, 0xc1ce9268f31cc734
-data8 0xf42e718b90c8bc16, 0x99f48a669c74c09e
-data8 0xc1f8b0877c1b0c08, 0xf463822a0a3b4b00
-data8 0x9a16154eb445c873, 0xc222f35a87b415ba
-data8 0xf498c1076015faf8, 0x9a2c822ec198d667
-data8 0xc23f3467349e5c88, 0xf4bc5a19a33990b5
-data8 0x9a4e3e080cd91b78, 0xc269b4e40e088c01
-data8 0xf4f1e6a7d6f5425f, 0x9a70177afe52322e
-data8 0xc2945aac24daaf6e, 0xf527a232cf6be334
-data8 0x9a86b8fa94eebe10, 0xc2b0de05e43c1d66
-data8 0xf54b8ecdcda90851, 0x9aa8c42866ae2958
-data8 0xc2dbc275e1229d09, 0xf5819949c7ad87b4
-data8 0x9abf86f9e12fc45e, 0xc2f86fca9d80eeff
-data8 0xf5a5bac9213b48a9, 0x9ae1c462fc05f49d
-data8 0xc323938449a2587e, 0xf5dc1501f324a812
-data8 0x9af8a8dc936b84d0, 0xc3406b40a538ed20
-data8 0xf6006bee86b5589e, 0x9b1b19033be35730
-data8 0xc36bcee8211d15e0, 0xf63716b2fa067fa4
-data8 0x9b3da7daf04c2892, 0xc397593adf2ba366
-data8 0xf66df22fb6132b9c, 0x9b54c2e4c8a9012b
-data8 0xc3b475b6206155d5, 0xf6929fb98225deb1
-data8 0x9b77854e6c661200, 0xc3e0410243b97383
-data8 0xf6c9cd13021e3fea, 0x9b8ec2e678d56d2f
-data8 0xc3fd890709833d37, 0xf6eeb177472cedae
-data8 0x9ba60e6a5ca133b6, 0xc41ae295f7e7fa06
-data8 0xf713abf4cb0b3afb, 0x9bc919ea66a151a4
-data8 0xc44709f7bb8a4dd2, 0xf74b4d5333684ef1
-data8 0x9be0887c09ef82bb, 0xc4648fb0e0bec4c1
-data8 0xf7707f75a72f8e94, 0x9c03c8d5fffc3503
-data8 0xc490f9a94695ba14, 0xf7a874b97927af44
-data8 0x9c1b5ad21a81cbb9, 0xc4aeac0173b7d390
-data8 0xf7cddf140aedf1d8, 0x9c3ed09216e9ca02
-data8 0xc4db5941007aa853, 0xf806291bacb7f7a9
-data8 0x9c568656c0423def, 0xc4f938aec206291a
-data8 0xf82bcc43b92eafef, 0x9c7a320af242ce60
-data8 0xc52629e899dfd622, 0xf8646bf0defb759e
-data8 0x9c920bf7a8c01dc2, 0xc54436e44043b965
-data8 0xf88a487dfc3ff5f7, 0x9ca9f475d98b159c
-data8 0xc562563abf9ea07f, 0xf8b03c2b46cdc17f
-data8 0x9ccdeca60e80b5f8, 0xc58fa7d1dc42921c
-data8 0xf8e95541c152ae7a, 0x9ce5f9d4653d4902
-data8 0xc5adf561b91e110a, 0xf90f832c2700c160
-data8 0x9cfe15cb38bfdd8e, 0xc5cc5591bdbd82fa
-data8 0xf935c88e0c7f419b, 0x9d225b983f6c1f96
-data8 0xc5fa08f1ff20593c, 0xf96f5cd84fd86873
-data8 0x9d3a9cca32261ed7, 0xc618980a79ce6862
-data8 0xf995dd53ebdd9d6d, 0x9d52ecfccebe1768
-data8 0xc6373a09e34b50fa, 0xf9bc75a034436a41
-data8 0x9d77818d95b82f86, 0xc66550a6e0baaf35
-data8 0xf9f686f26d5518de, 0x9d8ff7893fa4706c
-data8 0xc6842241926342c9, 0xfa1d5b39b910a8c5
-data8 0x9da87cbef36f2a5e, 0xc6a3070b7c93bb9e
-data8 0xfa4447acc4ecbfd2, 0x9dcd6140b4a35aeb
-data8 0xc6d18260bb84081b, 0xfa7ed7e51e6fdfb4
-data8 0x9de60cd06dc6e2d4, 0xc6f0977c9416828b
-data8 0xfaa601394d49a1a0, 0x9dfec7d4cc43b76f
-data8 0xc70fc0117c641630, 0xfacd431644ce0e40
-data8 0x9e17925ec9fccc4a, 0xc72efc34d7e615be
-data8 0xfaf49d96f7a75909, 0x9e3cdf6db57dc075
-data8 0xc75dfb441594141e, 0xfb2fd3c65e562fd5
-data8 0x9e55d110b63637a8, 0xc77d68aa019bda4c
-data8 0xfb576c5762024805, 0x9e6ed27594550d2e
-data8 0xc79ce9ea478dbc4f, 0xfb7f1debc22c4040
-data8 0x9e87e3adc385d393, 0xc7bc7f1ae453219d
-data8 0xfba6e89f32d0190a, 0x9ead9b54b37a1055
-data8 0xc7ec0476e15e141a, 0xfbe2c803a0894893
-data8 0x9ec6d46a3d7de215, 0xc80bcbe16f1d540f
-data8 0xfc0ad1ff0ed9ecf0, 0x9ee01d9108be3154
-data8 0xc82ba78a5d349735, 0xfc32f57bdfbcbe7f
-data8 0x9ef976db07288d04, 0xc84b978847a06b87
-data8 0xfc5b32968f99b21c, 0x9f12e05a4759ec25
-data8 0xc86b9bf1ee817bc6, 0xfc83896bc861ab08
-data8 0x9f2c5a20f4da6668, 0xc88bb4de3667cdf4
-data8 0xfcabfa1861ed4815, 0x9f52af78ed1733ca
-data8 0xc8bc00e7fe9e23a3, 0xfce8d3cea7d3163e
-data8 0x9f6c52426a39d003, 0xc8dc4d7ff2d25232
-data8 0xfd118595143ee273, 0x9f860593d42fd7f3
-data8 0xc8fcaeebcb40eb47, 0xfd3a519943d4865a
-data8 0x9f9fc97fdb96bd51, 0xc91d25431426a663
-data8 0xfd6337f8e1ae5a4b, 0x9fb99e194f4a7037
-data8 0xc93db09d7fdb2949, 0xfd8c38d1c8e927eb
-data8 0x9fd383731ca51db9, 0xc95e5112e721582a
-data8 0xfdb5544205095a53, 0x9fed79a04fbf9423
-data8 0xc97f06bb49787677, 0xfdde8a67d2613531
-data8 0xa00780b413b24ee8, 0xc99fd1aecd6e1b06
-data8 0xfe07db619e781611, 0xa02eab2c4474b0cd
-data8 0xc9d12a3e27bb1625, 0xfe460768d80bf758
-data8 0xa048dcd51ccfd142, 0xc9f22ad82ba3d5f0
-data8 0xfe6f9bfb06cd32f6, 0xa0631fa894b11b8d
-data8 0xca134113105e67b2, 0xfe994bcd3d14fcc2
-data8 0xa07d73ba65e680af, 0xca346d07b045a876
-data8 0xfec316fecaf3f2ab, 0xa097d91e6aaf71b0
-data8 0xca55aecf0e94bb88, 0xfeecfdaf33fadb80
-data8 0xa0b24fe89e02602f, 0xca77068257be9bab
-data8 0xff16fffe2fa8fad6, 0xa0ccd82d1bd2f68b
-data8 0xca98743ae1c693a8, 0xff411e0ba9db886d
-data8 0xa0e77200215909e6, 0xcab9f8122c99a101
-data8 0xff6b57f7c33e4e9a, 0xa1021d760d584855
-data8 0xcadb9221e268c3b5, 0xff95ade2d1bd7358
-data8 0xa11cdaa36068a57d, 0xcafd4283d8043dfd
-data8 0xffc01fed60f86fb5, 0xa137a99cbd3f880b
-data8 0xcb1f09520d37c6fb, 0xffeaae3832b63956
-ASM_SIZE_DIRECTIVE(T_table)
-
-
-
-
-
-
-.align 32
-.global cbrt#
+LOCAL_OBJECT_START(poly_coeffs)
+
+ data8 0xaaaaaaaaaaaaaab4, 0x0000bffd // ~ 1/3
+ data8 0xbfbc71c71c718e45, 0xbfaf9add3c0bbb43
+ data8 0xbfa511edb93dc98d, 0xbf9ee71c45f0dfbc
+LOCAL_OBJECT_END(poly_coeffs)
+
+
+// For every entry B in the frcpa table, this table contains
+// the significands of cbrt(1/B), cbrt(2/B), cbrt(4/B).
+// The index to this table is the same as the frcpa index.
+
+LOCAL_OBJECT_START(T_table)
+
+
+ data8 0x80155c748c374836, 0xa160019ed37fb4ae
+ data8 0xcb51ddcb9e93095e, 0x8040404b0879f7f9
+ data8 0xa1960b5966da4608, 0xcb95f333968ad59b
+ data8 0x806b5dce4b405c10, 0xa1cc5dbe6dc2aab4
+ data8 0xcbda64292d3ffd97, 0x8096b586974669b1
+ data8 0xa202f97995b69c0d, 0xcc1f3184af961596
+ data8 0x80bcd273d952a028, 0xa232fe6eb0c0577d
+ data8 0xcc5bb1ac954d33e2, 0x80e898c52813f2f3
+ data8 0xa26a2582012f6e17, 0xcca12e9831fc6402
+ data8 0x81149add67c2d208, 0xa2a197e5d10465cb
+ data8 0xcce70a67b64f24ad, 0x813b4e2c856b6e9a
+ data8 0xa2d25a532efefbc8, 0xcd24794726477ea5
+ data8 0x8167c1dde03de7aa, 0xa30a5bd6e49e4ab8
+ data8 0xcd6b096a0b70ee87, 0x818ed973b811135e
+ data8 0xa33b9c9b59879e24, 0xcda9177738b15a90
+ data8 0x81bbc0c33e13ec98, 0xa3742fca6a3c1f21
+ data8 0xcdf05f2247dffab9, 0x81e33e69fbe7504a
+ data8 0xa3a5f1273887bf22, 0xce2f0f347f96f906
+ data8 0x820aec524e3c23e9, 0xa3d7ef508ff11574
+ data8 0xce6e0be0cd551a61, 0x823880f78e70b805
+ data8 0xa4115ce30548bc15, 0xceb666b2c347d1de
+ data8 0x826097a62a8e5200, 0xa443df0e53df577a
+ data8 0xcef609b0cb874f00, 0x8288dfe00e9b5eaf
+ data8 0xa4769fa5913c0ec3, 0xcf35fb5447e5c765
+ data8 0x82b15a10c5371624, 0xa4a99f303bc7def5
+ data8 0xcf763c47ee869f00, 0x82da06a527b18937
+ data8 0xa4dcde37779adf4b, 0xcfb6cd3888d71785
+ data8 0x8302e60b635ab394, 0xa5105d46152c938a
+ data8 0xcff7aed4fbfbb447, 0x832bf8b2feec2f0e
+ data8 0xa5441ce89825cb8d, 0xd038e1ce5167e3c6
+ data8 0x83553f0ce00e276b, 0xa5781dad3e54d899
+ data8 0xd07a66d7bfa0ebba, 0x837eb98b50f8322a
+ data8 0xa5ac602406c4e68c, 0xd0bc3ea6b32d1b21
+ data8 0x83a270f44c84f699, 0xa5d9601d95c2c0bc
+ data8 0xd0f4f0e8f36c1bf8, 0x83cc4d7cfcfac5ca
+ data8 0xa60e1e1a2de14745, 0xd1376458e34b037e
+ data8 0x83f65f78a8872b4c, 0xa6431f6e3fbd9658
+ data8 0xd17a2ca133f78572, 0x8420a75f2f7b53c8
+ data8 0xa67864b0d432fda4, 0xd1bd4a80301c5715
+ data8 0x844510461ff14209, 0xa6a6444aa0243c0b
+ data8 0xd1f71682b2fa4575, 0x846fbd91b930bed2
+ data8 0xa6dc094d10f25792, 0xd23ad555f773f059
+ data8 0x84947e18234f3294, 0xa70a574cc02bba69
+ data8 0xd2752c7039a5bf73, 0x84bf92755825045a
+ data8 0xa7409e2af9549084, 0xd2b98ee008c06b59
+ data8 0x84e4ac0ee112ba51, 0xa76f5c64ca2cf13b
+ data8 0xd2f4735ffd700280, 0x8509ef44b86f20be
+ data8 0xa79e4f0babab5dc0, 0xd32f99ed6d9ac0e1
+ data8 0x85359d5d91768427, 0xa7d5579ae5164b85
+ data8 0xd374f0666c75d51c, 0x855b3bd5b7384357
+ data8 0xa804bd3c6fe61cc8, 0xd3b0a7d13618e4a1
+ data8 0x858104f0c415f79a, 0xa8345895e5250a5a
+ data8 0xd3eca2ea53bcec0c, 0x85a6f90390d29864
+ data8 0xa8642a122b44ef0b, 0xd428e23874f13a17
+ data8 0x85d3772fcd56a1dd, 0xa89c38ca18f6108b
+ data8 0xd46f82fe293bc6d3, 0x85f9c982fcc002f3
+ data8 0xa8cc81063b6e87ca, 0xd4ac57e9b7186420
+ data8 0x862047e0e7ea554b, 0xa8fd00bfa409285e
+ data8 0xd4e972becb04e8b8, 0x8646f2a26f7f5852
+ data8 0xa92db8664d5516da, 0xd526d40a7a9b43a3
+ data8 0x866dca21754096b5, 0xa95ea86b75cc2c20
+ data8 0xd5647c5b73917370, 0x8694ceb8dfd17a37
+ data8 0xa98fd141a4992deb, 0xd5a26c4201bd6d13
+ data8 0x86bc00c49e9307e8, 0xa9c1335cae7446ba
+ data8 0xd5e0a45015350a7e, 0x86dccd74fce79610
+ data8 0xa9ea8686f556f645, 0xd614b539c6194104
+ data8 0x870453c845acf90f, 0xaa1c52d17906bb19
+ data8 0xd6537310e224283f, 0x872c089a1e90342c
+ data8 0xaa4e59b046dab887, 0xd6927ab62244c917
+ data8 0x8753ec4a92d16c5e, 0xaa809b9c60d1890b
+ data8 0xd6d1ccc1fc4ef4b7, 0x877bff3aca19f6b4
+ data8 0xaab319102f3f9b33, 0xd71169cea98fdded
+ data8 0x879d88b6fe1c324c, 0xaadd5a18c1e21274
+ data8 0xd746a66a5bc9f6d9, 0x87c5f346dbf98c3a
+ data8 0xab1045f2ac31bdf5, 0xd786ce8f0fae5317
+ data8 0x87e7c653efacef2c, 0xab3ae3ab2df7231e
+ data8 0xd7bc7ff214c4e75a, 0x881089d4e73ffefc
+ data8 0xab6e3f945d1e96fc, 0xd7fd35467a517ed1
+ data8 0x88397e6a366f2a8a, 0xaba1d953a08fa94e
+ data8 0xd83e38838648d815, 0x885bc559e5e1c081
+ data8 0xabcd090db7ef4c3f, 0xd874a1db598b8951
+ data8 0x887e2ee392bb7a93, 0xabf864602d7c323d
+ data8 0xd8ab42205b80edaf, 0x88a7a8587e404257
+ data8 0xac2ca5886ccf9b57, 0xd8ed1849d202f965
+ data8 0x88ca5eda67594784, 0xac5861d4aa441f0f
+ data8 0xd92432bd5a173685, 0x88f4356166bd590e
+ data8 0xac8d183fe3a2fbed, 0xd9669ca45b03c23e
+ data8 0x89173a0acf5ce026, 0xacb93703ff51571e
+ data8 0xd99e3327cf89574e, 0x893a62a098b6a57b
+ data8 0xace5830ad0c3f14b, 0xd9d602b19b100466
+ data8 0x895daf637236ae2c, 0xad11fca5d78b3ff2
+ data8 0xda0e0ba86c096841, 0x89883b9d1c2fa9c5
+ data8 0xad4797fddf91a798, 0xda5195fcdb1c3dce
+ data8 0x89abd8dd374a5d7b, 0xad747701e559ebcb
+ data8 0xda8a1eb87a491f6c, 0x89cf9b1dcd197fa0
+ data8 0xada184a47e9c7613, 0xdac2e230b91c3f84
+ data8 0x89f382a258ea79de, 0xadcec13ab0dda8ff
+ data8 0xdafbe0d0b66aea30, 0x8a178faf06648f29
+ data8 0xadfc2d1a5fd21ba8, 0xdb351b04a8fafced
+ data8 0x8a3bc288b3e1d18a, 0xae29c89a5053c33a
+ data8 0xdb6e9139e33cdd8e, 0x8a601b74f4d1f835
+ data8 0xae5794122b638df9, 0xdba843ded7151ea1
+ data8 0x8a849aba14274764, 0xae858fda8137ae0a
+ data8 0xdbe2336319b61fc8, 0x8aa9409f16cdbc9b
+ data8 0xaeb3bc4ccc56d3d1, 0xdc1c60376789fa68
+ data8 0x8ace0d6bbe2cb316, 0xaee219c374c09920
+ data8 0xdc56cacda82d0cd5, 0x8af301688ab33558
+ data8 0xaf10a899d3235fe7, 0xdc917398f2797814
+ data8 0x8b181cdebe6f3206, 0xaf3f692c341fe8b4
+ data8 0xdccc5b0d90a3e628, 0x8b3d60185fafcb7c
+ data8 0xaf6e5bd7db9ae6c2, 0xdd0781a10469f0f2
+ data8 0x8b62cb603bb2fad0, 0xaf9d80fb081cd91b
+ data8 0xdd42e7ca0b52838f, 0x8b80d7d6bc4104de
+ data8 0xafc35ce063eb3787, 0xdd729ad01c69114d
+ data8 0x8ba68bf73ac74f39, 0xaff2ddcb5f28f03d
+ data8 0xddae749c001fbf5e, 0x8bcc68fb9f9f7335
+ data8 0xb022923b148e05c5, 0xddea8f50a51c69b1
+ data8 0x8bf26f31c534fca2, 0xb0527a919adbf58b
+ data8 0xde26eb69a0f0f111, 0x8c10f86e13a1a1f9
+ data8 0xb078f3ab1d701c65, 0xde576480262399bc
+ data8 0x8c3749916cc6abb5, 0xb0a93a6870649f31
+ data8 0xde943789645933c8, 0x8c5dc4c4f7706032
+ data8 0xb0d9b624d62ec856, 0xded14d58139a28af
+ data8 0x8c7cac3a8c42e3e0, 0xb100a5f53fb3c8e1
+ data8 0xdf025c00bbf2b5c7, 0x8ca373f1b7bf2716
+ data8 0xb131821882f5540a, 0xdf3feb44d723a713
+ data8 0x8cc29907fb951294, 0xb158bf8e4cb04055
+ data8 0xdf715bc16c159be0, 0x8ce9ae4e9492aac8
+ data8 0xb189fd69d56b238f, 0xdfaf66240e29cda8
+ data8 0x8d0911dddbfdad0e, 0xb1b189958e8108e4
+ data8 0xdfe139cbf6e19bdc, 0x8d3075c4f20f04ee
+ data8 0xb1e32a8165b09832, 0xe01fc0fe94d9fc52
+ data8 0x8d5018a9d4de77d5, 0xb20b0678fc271eec
+ data8 0xe051f92ffcc0bd60, 0x8d77cc47dd143515
+ data8 0xb23d0bd3f7592b6e, 0xe090feec9c9a06ac
+ data8 0x8d97af6352739cb7, 0xb26538b2db8420dc
+ data8 0xe0c39d0c9ff862d6, 0x8db7af523167800f
+ data8 0xb28d89e339ceca14, 0xe0f668eeb99f188d
+ data8 0x8ddfd80bc68c32ff, 0xb2c022ca12e55a16
+ data8 0xe1362890eb663139, 0x8e00197e1e7c88fe
+ data8 0xb2e8c6852c6b03f1, 0xe1695c7212aecbaa
+ data8 0x8e207859f77e20e7, 0xb3118f4eda9fe40f
+ data8 0xe19cbf0391bbbbe9, 0x8e40f4ce60c9f8e2
+ data8 0xb33a7d6268109ebe, 0xe1d050901c531e85
+ data8 0x8e69ba46cf2fde4d, 0xb36ddbc5ea70ec55
+ data8 0xe2110903b4f4047a, 0x8e8a7a00bd7ae63e
+ data8 0xb3971e9b39264023, 0xe2450559b4d80b6d
+ data8 0x8eab57ef1cf2f529, 0xb3c0877ecc18e24a
+ data8 0xe27931a231554ef3, 0x8ecc5442cffb1dad
+ data8 0xb3ea16ae3a6c905f, 0xe2ad8e2ac3c5b04b
+ data8 0x8eed6f2d2a4acbfe, 0xb413cc67aa0e4d2d
+ data8 0xe2e21b41b9694cce, 0x8f0ea8dff24441ff
+ data8 0xb43da8e9d163e1af, 0xe316d93615862714
+ data8 0x8f385c95d696b817, 0xb47233773b84d425
+ data8 0xe3590bd86a0d30f9, 0x8f59dc43edd930f3
+ data8 0xb49c6825430fe730, 0xe38e38e38e38e38e
+ data8 0x8f7b7b5f5ffad1c4, 0xb4c6c46bcdb27dcf
+ data8 0xe3c397d1e6db7839, 0x8f9d3a1bea165f38
+ data8 0xb4f1488c0b35d26f, 0xe3f928f5953feb9e
+ data8 0x8fbf18adc34b66da, 0xb51bf4c7c51f0168
+ data8 0xe42eeca17c62886c, 0x8fe117499e356095
+ data8 0xb546c9616087ab9c, 0xe464e32943446305
+ data8 0x90033624aa685f8d, 0xb571c69bdffd9a70
+ data8 0xe49b0ce15747a8a2, 0x9025757495f36b86
+ data8 0xb59cecbae56984c3, 0xe4d16a1eee94e9d4
+ data8 0x903f3a5dcc091203, 0xb5bd64512bb14bb7
+ data8 0xe4fa52107353f67d, 0x9061b2fceb2bdbab
+ data8 0xb5e8d2a4bf5ba416, 0xe5310a471f4d2dc3
+ data8 0x90844ca7211032a7, 0xb6146a9a1bc47819
+ data8 0xe567f6f1c2b9c224, 0x90a7079403e6a15d
+ data8 0xb6402c7749d621c0, 0xe59f18689a9e4c9a
+ data8 0x90c9e3fbafd63799, 0xb66c1882fb435ea2
+ data8 0xe5d66f04b8a68ecf, 0x90ece216c8a16ee4
+ data8 0xb6982f048c999a56, 0xe60dfb2005c192e9
+ data8 0x9110021e7b516f0a, 0xb6c47044075b4142
+ data8 0xe645bd1544c7ea51, 0x912a708a39be9075
+ data8 0xb6e5bd6bfd02bafd, 0xe66fb21b505b20a0
+ data8 0x914dcc7b31146370, 0xb7124a2736ff8ef2
+ data8 0xe6a7d32af4a7c59a, 0x91714af8cfe984d5
+ data8 0xb73f026a01e94177, 0xe6e02b129c6a5ae4
+ data8 0x918c00a6f3795e97, 0xb760a959f1d0a7a7
+ data8 0xe70a9136a7403039, 0x91afbc299ed0295d
+ data8 0xb78dae7e06868ab0, 0xe74349fb2d92a589
+ data8 0x91d39add3e958db0, 0xb7badff8ad9e4e02
+ data8 0xe77c3a9c86ed7d42, 0x91ee9920a8974d92
+ data8 0xb7dce25b8e17ae9f, 0xe7a713f88151518a
+ data8 0x9212b5fcac537c19, 0xb80a6226904045e2
+ data8 0xe7e067453317ed2b, 0x9236f6b256923fcf
+ data8 0xb8380f1cafd73c1c, 0xe819f37a81871bb5
+ data8 0x92523ee6f90dcfc3, 0xb85a6ea8e321b4d8
+ data8 0xe8454236bfaeca14, 0x9276bef031e6eb79
+ data8 0xb8886b684ae7d2fa, 0xe87f32f24c3fc90e
+ data8 0x929236ec237a24ad, 0xb8ab0726fa00cf5d
+ data8 0xe8aacd8688892ba6, 0x92b6f70b7efe9dc3
+ data8 0xb8d954a4d13b7cb1, 0xe8e523fd32f606f7
+ data8 0x92d29f61eec7dc2b, 0xb8fc2d4f6cd9f04a
+ data8 0xe9110b5311407927, 0x92f7a05d5b8ba92f
+ data8 0xb92acc851476b1ab, 0xe94bc8bf0c108fa3
+ data8 0x931379a403be5c16, 0xb94de2d841a184c2
+ data8 0xe977fdc439c2ca3c, 0x9338bc44de2e3f34
+ data8 0xb97cd4c36c92693c, 0xe9b3236528fc349e
+ data8 0x9354c71412c69486, 0xb9a0297f172665e3
+ data8 0xe9dfa70b745ac1b4, 0x937a4c273907e262
+ data8 0xb9cf6f21e36c3924, 0xea1b36268d0eaa38
+ data8 0x93968919f6e7975d, 0xb9f3030951267208
+ data8 0xea480963fd394197, 0x93bc516fdd4680c9
+ data8 0xba229d6a618e7c59, 0xea84034425f27484
+ data8 0x93d8c123d9be59b2, 0xba467144459f9855
+ data8 0xeab12713138dd1cc, 0x93f546c955e60076
+ data8 0xba6a60c3c48f1a4b, 0xeade6db73a5e503b
+ data8 0x941b70a65879079f, 0xba9a76056b67ee7a
+ data8 0xeb1b0268343b121b, 0x943829f337410591
+ data8 0xbabea699563ada6e, 0xeb489b0b2bdb5f14
+ data8 0x9454f995765bc4d2, 0xbae2f350b262cc4b
+ data8 0xeb765721e85f03d0, 0x947b86b57f5842ed
+ data8 0xbb1385a23be24e57, 0xebb389645f222f62
+ data8 0x94988aeb23470f86, 0xbb3814975e17c680
+ data8 0xebe198f090607e0c, 0x94b5a5dc9695f42a
+ data8 0xbb5cc031009bf467, 0xec0fcc9321024509
+ data8 0x94d2d7a9170d8b42, 0xbb81889680024764
+ data8 0xec3e247da8b82f61, 0x94f9e87dd78bf019
+ data8 0xbbb2c0d8703ae95d, 0xec7c27d21321c9f7
+ data8 0x95175019a503d89e, 0xbbd7cd09ba3c5463
+ data8 0xecaad5278824e453, 0x9534cefa625fcb3a
+ data8 0xbbfcf68c4977718f, 0xecd9a76d097d4e77
+ data8 0x955265405c491a25, 0xbc223d88cfc88eee
+ data8 0xed089ed5dcd99446, 0x9570130c1f9bb857
+ data8 0xbc47a2284fee4ff8, 0xed37bb95add09a1c
+ data8 0x9597ca4119525184, 0xbc79ac0916ed7b8a
+ data8 0xed76c70508f904b6, 0x95b5af6fb5aa4d3c
+ data8 0xbc9f5670d1a13030, 0xeda63bb05e7f93c6
+ data8 0x95d3ac9273aafd7a, 0xbcc51f068cb95c1d
+ data8 0xedd5d661daed2dc4, 0x95f1c1cafdfd3684
+ data8 0xbceb05f4b30a9bc0, 0xee05974eef86b903
+ data8 0x960fef3b430b8d5f, 0xbd110b6604c7d306
+ data8 0xee357ead791fc670, 0x962e350575b409c5
+ data8 0xbd372f8598620f19, 0xee658cb3c134a463
+ data8 0x964c934c0dfc1708, 0xbd5d727edb6b3c7e
+ data8 0xee95c1987f080211, 0x966b0a31c9c6bc7d
+ data8 0xbd83d47d937bbc6d, 0xeec61d92d8c4314f
+ data8 0x968999d9ad8d264e, 0xbdaa55addf1ae47d
+ data8 0xeef6a0da64a014ac, 0x96a8426705198795
+ data8 0xbdd0f63c36aa73f0, 0xef274ba72a07c811
+ data8 0x96c703fd64445ee5, 0xbdf7b6556d550a15
+ data8 0xef581e31a2c91260, 0x96e5dec0a7b4268d
+ data8 0xbe1e9626b1ffa96b, 0xef8918b2bc43aec6
+ data8 0x9704d2d4f59f79f3, 0xbe4595dd903e5371
+ data8 0xefba3b63d89d7cbf, 0x9723e05ebe91b9b0
+ data8 0xbe6cb5a7f14bc935, 0xefeb867ecffaa607
+ data8 0x97430782be323831, 0xbe93f5b41d047cf7
+ data8 0xf01cfa3df1b9c9fa, 0x97624865fc0df8bf
+ data8 0xbebb5630bae4c15f, 0xf04e96dc05b43e2d
+ data8 0x9781a32dcc640b2a, 0xbee2d74cd30a430c
+ data8 0xf0805c944d827454, 0x97a117ffd0f48e46
+ data8 0xbf0a7937cf38d981, 0xf0b24ba285c495cb
+ data8 0x97c0a701f9d263c9, 0xbf323c217be2bc8c
+ data8 0xf0e46442e76f6569, 0x97e0505a8637a036
+ data8 0xbf5a203a09342bbb, 0xf116a6b2291d7896
+ data8 0x97f57a9fb0b08c6e, 0xbf74cad1c14ebfc4
+ data8 0xf1383fa9e9b5b381, 0x9815503365914a9d
+ data8 0xbf9ce6a497a89f78, 0xf16ac84f90083b9b
+ data8 0x98354085054fd204, 0xbfc52428bec6e72f
+ data8 0xf19d7b686dcb03d7, 0x98554bbbf8a77902
+ data8 0xbfed838fddab024b, 0xf1d0593311db1757
+ data8 0x987571fffb7f94f6, 0xc016050c0420981a
+ data8 0xf20361ee8f1c711e, 0x9895b3791dd03c23
+ data8 0xc03ea8cfabddc330, 0xf23695da7de51d3f
+ data8 0x98ab43a5fc65d0c8, 0xc059d3cbd65ddbce
+ data8 0xf258d095e465cc35, 0x98cbb2d196bd713d
+ data8 0xc082b122a3c78c9d, 0xf28c4d0bfc982b34
+ data8 0x98ec3d9ec7b6f21a, 0xc0abb1499ae736c4
+ data8 0xf2bff55eb3f0ea71, 0x990ce436db5e8344
+ data8 0xc0d4d474c3aedaaf, 0xf2f3c9cf9884636e
+ data8 0x9922b8218160967a, 0xc0f054ca33eb3437
+ data8 0xf31670135ab9cc0f, 0x99438d686f75779d
+ data8 0xc119b2c67e600ed0, 0xf34a8e9f0b54cdfb
+ data8 0x99647eea131fa20b, 0xc1433453de2033ff
+ data8 0xf37ed9fa6b8add3f, 0x997a85045a47c6d0
+ data8 0xc15ef3e44e10032d, 0xf3a1cfe884ef6bb6
+ data8 0x999ba5f14f8add02, 0xc188b130431d80e6
+ data8 0xf3d66689dcc8e8d3, 0x99bce38b5465ecae
+ data8 0xc1b2929d6067730e, 0xf40b2ab069d5c96a
+ data8 0x99d31ca0887f30f9, 0xc1ce9268f31cc734
+ data8 0xf42e718b90c8bc16, 0x99f48a669c74c09e
+ data8 0xc1f8b0877c1b0c08, 0xf463822a0a3b4b00
+ data8 0x9a16154eb445c873, 0xc222f35a87b415ba
+ data8 0xf498c1076015faf8, 0x9a2c822ec198d667
+ data8 0xc23f3467349e5c88, 0xf4bc5a19a33990b5
+ data8 0x9a4e3e080cd91b78, 0xc269b4e40e088c01
+ data8 0xf4f1e6a7d6f5425f, 0x9a70177afe52322e
+ data8 0xc2945aac24daaf6e, 0xf527a232cf6be334
+ data8 0x9a86b8fa94eebe10, 0xc2b0de05e43c1d66
+ data8 0xf54b8ecdcda90851, 0x9aa8c42866ae2958
+ data8 0xc2dbc275e1229d09, 0xf5819949c7ad87b4
+ data8 0x9abf86f9e12fc45e, 0xc2f86fca9d80eeff
+ data8 0xf5a5bac9213b48a9, 0x9ae1c462fc05f49d
+ data8 0xc323938449a2587e, 0xf5dc1501f324a812
+ data8 0x9af8a8dc936b84d0, 0xc3406b40a538ed20
+ data8 0xf6006bee86b5589e, 0x9b1b19033be35730
+ data8 0xc36bcee8211d15e0, 0xf63716b2fa067fa4
+ data8 0x9b3da7daf04c2892, 0xc397593adf2ba366
+ data8 0xf66df22fb6132b9c, 0x9b54c2e4c8a9012b
+ data8 0xc3b475b6206155d5, 0xf6929fb98225deb1
+ data8 0x9b77854e6c661200, 0xc3e0410243b97383
+ data8 0xf6c9cd13021e3fea, 0x9b8ec2e678d56d2f
+ data8 0xc3fd890709833d37, 0xf6eeb177472cedae
+ data8 0x9ba60e6a5ca133b6, 0xc41ae295f7e7fa06
+ data8 0xf713abf4cb0b3afb, 0x9bc919ea66a151a4
+ data8 0xc44709f7bb8a4dd2, 0xf74b4d5333684ef1
+ data8 0x9be0887c09ef82bb, 0xc4648fb0e0bec4c1
+ data8 0xf7707f75a72f8e94, 0x9c03c8d5fffc3503
+ data8 0xc490f9a94695ba14, 0xf7a874b97927af44
+ data8 0x9c1b5ad21a81cbb9, 0xc4aeac0173b7d390
+ data8 0xf7cddf140aedf1d8, 0x9c3ed09216e9ca02
+ data8 0xc4db5941007aa853, 0xf806291bacb7f7a9
+ data8 0x9c568656c0423def, 0xc4f938aec206291a
+ data8 0xf82bcc43b92eafef, 0x9c7a320af242ce60
+ data8 0xc52629e899dfd622, 0xf8646bf0defb759e
+ data8 0x9c920bf7a8c01dc2, 0xc54436e44043b965
+ data8 0xf88a487dfc3ff5f7, 0x9ca9f475d98b159c
+ data8 0xc562563abf9ea07f, 0xf8b03c2b46cdc17f
+ data8 0x9ccdeca60e80b5f8, 0xc58fa7d1dc42921c
+ data8 0xf8e95541c152ae7a, 0x9ce5f9d4653d4902
+ data8 0xc5adf561b91e110a, 0xf90f832c2700c160
+ data8 0x9cfe15cb38bfdd8e, 0xc5cc5591bdbd82fa
+ data8 0xf935c88e0c7f419b, 0x9d225b983f6c1f96
+ data8 0xc5fa08f1ff20593c, 0xf96f5cd84fd86873
+ data8 0x9d3a9cca32261ed7, 0xc618980a79ce6862
+ data8 0xf995dd53ebdd9d6d, 0x9d52ecfccebe1768
+ data8 0xc6373a09e34b50fa, 0xf9bc75a034436a41
+ data8 0x9d77818d95b82f86, 0xc66550a6e0baaf35
+ data8 0xf9f686f26d5518de, 0x9d8ff7893fa4706c
+ data8 0xc6842241926342c9, 0xfa1d5b39b910a8c5
+ data8 0x9da87cbef36f2a5e, 0xc6a3070b7c93bb9e
+ data8 0xfa4447acc4ecbfd2, 0x9dcd6140b4a35aeb
+ data8 0xc6d18260bb84081b, 0xfa7ed7e51e6fdfb4
+ data8 0x9de60cd06dc6e2d4, 0xc6f0977c9416828b
+ data8 0xfaa601394d49a1a0, 0x9dfec7d4cc43b76f
+ data8 0xc70fc0117c641630, 0xfacd431644ce0e40
+ data8 0x9e17925ec9fccc4a, 0xc72efc34d7e615be
+ data8 0xfaf49d96f7a75909, 0x9e3cdf6db57dc075
+ data8 0xc75dfb441594141e, 0xfb2fd3c65e562fd5
+ data8 0x9e55d110b63637a8, 0xc77d68aa019bda4c
+ data8 0xfb576c5762024805, 0x9e6ed27594550d2e
+ data8 0xc79ce9ea478dbc4f, 0xfb7f1debc22c4040
+ data8 0x9e87e3adc385d393, 0xc7bc7f1ae453219d
+ data8 0xfba6e89f32d0190a, 0x9ead9b54b37a1055
+ data8 0xc7ec0476e15e141a, 0xfbe2c803a0894893
+ data8 0x9ec6d46a3d7de215, 0xc80bcbe16f1d540f
+ data8 0xfc0ad1ff0ed9ecf0, 0x9ee01d9108be3154
+ data8 0xc82ba78a5d349735, 0xfc32f57bdfbcbe7f
+ data8 0x9ef976db07288d04, 0xc84b978847a06b87
+ data8 0xfc5b32968f99b21c, 0x9f12e05a4759ec25
+ data8 0xc86b9bf1ee817bc6, 0xfc83896bc861ab08
+ data8 0x9f2c5a20f4da6668, 0xc88bb4de3667cdf4
+ data8 0xfcabfa1861ed4815, 0x9f52af78ed1733ca
+ data8 0xc8bc00e7fe9e23a3, 0xfce8d3cea7d3163e
+ data8 0x9f6c52426a39d003, 0xc8dc4d7ff2d25232
+ data8 0xfd118595143ee273, 0x9f860593d42fd7f3
+ data8 0xc8fcaeebcb40eb47, 0xfd3a519943d4865a
+ data8 0x9f9fc97fdb96bd51, 0xc91d25431426a663
+ data8 0xfd6337f8e1ae5a4b, 0x9fb99e194f4a7037
+ data8 0xc93db09d7fdb2949, 0xfd8c38d1c8e927eb
+ data8 0x9fd383731ca51db9, 0xc95e5112e721582a
+ data8 0xfdb5544205095a53, 0x9fed79a04fbf9423
+ data8 0xc97f06bb49787677, 0xfdde8a67d2613531
+ data8 0xa00780b413b24ee8, 0xc99fd1aecd6e1b06
+ data8 0xfe07db619e781611, 0xa02eab2c4474b0cd
+ data8 0xc9d12a3e27bb1625, 0xfe460768d80bf758
+ data8 0xa048dcd51ccfd142, 0xc9f22ad82ba3d5f0
+ data8 0xfe6f9bfb06cd32f6, 0xa0631fa894b11b8d
+ data8 0xca134113105e67b2, 0xfe994bcd3d14fcc2
+ data8 0xa07d73ba65e680af, 0xca346d07b045a876
+ data8 0xfec316fecaf3f2ab, 0xa097d91e6aaf71b0
+ data8 0xca55aecf0e94bb88, 0xfeecfdaf33fadb80
+ data8 0xa0b24fe89e02602f, 0xca77068257be9bab
+ data8 0xff16fffe2fa8fad6, 0xa0ccd82d1bd2f68b
+ data8 0xca98743ae1c693a8, 0xff411e0ba9db886d
+ data8 0xa0e77200215909e6, 0xcab9f8122c99a101
+ data8 0xff6b57f7c33e4e9a, 0xa1021d760d584855
+ data8 0xcadb9221e268c3b5, 0xff95ade2d1bd7358
+ data8 0xa11cdaa36068a57d, 0xcafd4283d8043dfd
+ data8 0xffc01fed60f86fb5, 0xa137a99cbd3f880b
+ data8 0xcb1f09520d37c6fb, 0xffeaae3832b63956
+LOCAL_OBJECT_END(T_table)
+
+
+
+
+
+
.section .text
-.proc cbrt#
-.align 32
-cbrt:
-
-
-{ .mfi
- // get significand
- getf.sig r23=f8
- // will continue only for normal/denormal numbers
- (p0) fclass.nm.unc p12,p0 = f8, 0x1b
- // r2 = pointer to C_1,...,C_5 followed by T_table
- addl r2 = @ltoff(poly_coeffs), gp
+GLOBAL_LIBM_ENTRY(cbrt)
+
+
+{.mfi
+ // get significand
+ getf.sig GR_SIGNIF = f8
+ // normalize a
+ fma.s1 FR_ARG = f8, f1, f0
+ // GR_GP = pointer to C_1,..., C_5 followed by T_table
+ addl GR_GP = @ltoff(poly_coeffs), gp ;;
}
+
{.mfi
- // get exponent
- getf.exp r24=f8
- // normalize a
- fma.s1 f14=f8,f1,f0
- // r29=bias-((2^{12}-1)/3) -63=0xffff-0x555-0x3f=0xfa6b
- mov r29=0xfa6b;;
+ // get exponent
+ getf.exp GR_ARGEXP = f8
+ // will continue only for normal/denormal numbers
+ fclass.m.unc p12, p13 = f8, 0x1e7
+ // GR_CONST4 = bias-((2^{12}-1)/3)-63 = 0xffff-0x555-0x3f = 0xfa6b
+ mov GR_CONST4 = 0xfa6b ;;
}
+
{.mlx
- mov r25=0x20000
- // r28=2^52
- movl r28=0x8000000000000000;;
-}
-{.mfb
- // load start address for C_1,...,C_5 followed by T_table
- ld8 r3=[r2]
- (p12) fma.d.s0 f8=f8,f1,f0
- (p12) br.ret.spnt b0
+ mov GR_CONST2 = 0x20000
+ // GR_CONST3 = 2^52
+ movl GR_CONST3 = 0x8000000000000000 ;;
}
+
+.pred.rel "mutex", p12, p13
{.mfi
- nop.m 0
- // y=frcpa(a)
- frcpa.s0 f8,p6=f1,f8
- // p7=1 if denormal input
- cmp.gtu p7,p0=r28,r23;;
+ // load start address for C_1,..., C_5 followed by T_table
+ ld8 GR_ADDR = [ GR_GP ]
+ // y = frcpa(a)
+ (p13) frcpa.s0 f8, p0 = f1, f8
+ // p7 = 1 if denormal input
+ cmp.gtu p7, p0 = GR_CONST3, GR_SIGNIF
+}
+{.mfb
+ nop.m 0
+ // if argument is 0, +/-Infinity, NaN, or NaTVal, then return
+ (p12) fma.d.s0 f8 = f8, f1, f0
+ (p12) br.ret.spnt b0 ;;
}
+
{.mmi
- // get exponent
- (p7) getf.exp r24=f14
- // get normalized significand
- (p7) getf.sig r23=f14
- // r28=bias-(2^{12}-1)
- mov r28=0xf000;;
+ // get exponent (for denormal input)
+ (p7) getf.exp GR_ARGEXP = FR_ARG
+ // get normalized significand (for denormal input)
+ (p7) getf.sig GR_SIGNIF = FR_ARG
+ // GR_CONST1 = bias-(2^{12}-1)
+ mov GR_CONST1 = 0xf000 ;;
}
+
{.mii
- // get r26=sign
- and r26=r24,r25
- // eliminate leading 1 from r23=1st table index
- shl r23=r23,1
- // eliminate sign from exponent (r25)
- andcm r25=r24,r25;;
+ // get GR_SIGN = sign
+ and GR_SIGN = GR_ARGEXP, GR_CONST2
+ // eliminate leading 1 from GR_I1 = 1st table index
+ shl GR_I1 = GR_SIGNIF, 1
+ // eliminate sign from exponent
+ andcm GR_EXP = GR_ARGEXP, GR_CONST2 ;;
}
+
{.mib
- add r2=32,r3
- // r23=1st table index (y_index,8 bits)
- shr.u r23=r23,56
- nop.b 0
+ add GR_ADDR2 = 32, GR_ADDR
+ // GR_IT1 = 1st table index (y_index, 8 bits)
+ shr.u GR_IT1 = GR_I1, 56
+ nop.b 0
}
{.mib
- // load C_1
- ldfe f7=[r3],16
- // subtract bias from r25=exponent
- sub r25=r25,r28
- nop.b 0;;
+ // load C_1
+ ldfe FR_COEFF1 = [ GR_ADDR ], 16
+ // subtract bias from GR_EXPON = exponent
+ sub GR_EXPON = GR_EXP, GR_CONST1
+ nop.b 0 ;;
}
+
{.mib
- // load C_2, C_3
- ldfpd f9,f10=[r3]
- // 1: exponent*=5; // (2^{16}-1)/3=0x5555
- shladd r24=r25,2,r25
- nop.b 0
+ // load C_2, C_3
+ ldfpd FR_COEFF2, FR_COEFF3 = [ GR_ADDR ]
+ // 1: exponent* = 5; // (2^{16}-1)/3 = 0x5555
+ shladd GR_TMP1 = GR_EXPON, 2, GR_EXPON
+ nop.b 0
}
{.mib
- // load C_4, C_5
- ldfpd f11,f12=[r2],16
- // r23=3*y_index
- shladd r23=r23,1,r23
- nop.b 0;;
+ // load C_4, C_5
+ ldfpd FR_COEFF4, FR_COEFF5 = [ GR_ADDR2 ], 16
+ // GR_TMP2 = 3*y_index
+ shladd GR_TMP2 = GR_IT1, 1, GR_IT1
+ nop.b 0 ;;
}
{.mfi
- // r30=(5*expon)*16+5*expon=(0x55)*expon
- shladd r30=r24,4,r24
- // r=1-a*y
- (p6) fnma.s1 f6=f8,f14,f1
- // adjust T_table pointer by 1st index
- shladd r2=r23,3,r2;;
+ // GR_TMP6 = (5*expon)*16+5*expon = (0x55)*expon
+ shladd GR_TMP6 = GR_TMP1, 4, GR_TMP1
+ // r = 1-a*y
+ fnma.s1 FR_R = f8, FR_ARG, f1
+ // adjust T_table pointer by 1st index
+ shladd GR_ITB1 = GR_TMP2, 3, GR_ADDR2 ;;
}
{.mii
- nop.m 0
- // r24=(0x5500)*expon
- shl r24=r30,8;;
- // r24=(0x5555)*expon
- add r24=r24,r30;;
+ // eliminate leading 1 from significand
+ add GR_SIGNIF2 = GR_SIGNIF, GR_SIGNIF
+ // GR_TMP3 = (0x5500)*expon
+ shl GR_TMP3 = GR_TMP6, 8 ;;
+ // GR_TMP4 = (0x5555)*expon
+ add GR_TMP4 = GR_TMP3, GR_TMP6 ;;
}
+
{.mii
- // r24=(0x5556)*expon // 0x5556=(2^{16}+2)/3
- add r24=r24,r25
- nop.i 0;;
- // r24=floor(expon/3)
- shr r24=r24,16;;
+ // GR_TMP5 = (0x5556)*expon // 0x5556 = (2^{16}+2)/3
+ add GR_TMP5 = GR_TMP4, GR_EXPON
+ nop.i 0 ;;
+ // GR_EXP_BY_3 = floor(expon/3)
+ shr GR_EXP_BY_3 = GR_TMP5, 16 ;;
}
+
{.mfi
- // r28=3*exponent
- shladd r28=r24,1,r24
- // r2=r*r
- (p6) fma.s1 f13=f6,f6,f0
- // bias exponent
- add r24=r29,r24;;
+ // GR_TMP6 = 3*exponent
+ shladd GR_TMP6 = GR_EXP_BY_3, 1, GR_EXP_BY_3
+ // r*r
+ fma.s1 FR_R2 = FR_R, FR_R, f0
+ // bias exponent
+ add GR_EBIAS = GR_CONST4, GR_EXP_BY_3 ;;
}
+
{.mfi
- // get remainder of exponent/3 : r25-r28
- sub r25=r25,r28
- // c2+c3*r
- (p6) fma.s1 f9=f10,f6,f9
- // add sign to exponent
- or r24=r24,r26
+ // get remainder of exponent/3
+ sub GR_REM = GR_EXPON, GR_TMP6
+ // c2+c3*r
+ fma.s1 FR_P23 = FR_COEFF3, FR_R, FR_COEFF2
+ nop.i 0
}
{.mfi
- nop.m 0
- // c4+c5*r
- (p6) fma.s1 f11=f12,f6,f11
- nop.i 0;;
+ // add sign to exponent
+ or GR_SEXP = GR_EBIAS, GR_SIGN
+ // c4+c5*r
+ fma.s1 FR_P45 = FR_COEFF5, FR_R, FR_COEFF4
+ mov GR_TMP63 = 63+0xffff ;;
}
+
{.mmi
- // f14=sign*2^{exponent/3}
- (p6) setf.exp f14=r24
- // adjust T_table pointer by 2nd index
- shladd r2=r25,3,r2
- nop.i 0;;
+ // FR_2EXP = sign*2^{exponent/3}
+ setf.exp FR_2EXP = GR_SEXP
+ // adjust T_table pointer by 2nd index
+ shladd GR_INDEX = GR_REM, 3, GR_ITB1
+ // is the argument of the form 2^(3*k) ?
+ // get (significand - leading 1) | (exponent mod 3)
+ or GR_TEST = GR_REM, GR_SIGNIF2 ;;
}
+
{.mmi
- // load T
- (p6) ldf8 f8=[r2]
- nop.m 0
- nop.i 0;;
+ // 2^63
+ setf.exp FR_TMP63 = GR_TMP63
+ // load T
+ ldf8 f8 = [ GR_INDEX ]
+ // is the argument of the form 2^(3*k) ?
+ cmp.eq p14, p0 = GR_TEST, r0 ;;
}
{.mfi
- nop.m 0
- // (c2+c3*r)+r^2*(c4+c5*r)
- (p6) fma.s1 f9=f11,f13,f9
- nop.i 0
+ nop.m 0
+ // (c2+c3*r)+r^2*(c4+c5*r)
+ fma.s1 FR_P25 = FR_P45, FR_R2, FR_P23
+ nop.i 0
}
{.mfi
- nop.m 0
- // c1*r
- (p6) fma.s1 f7=f7,f6,f0
- nop.i 0;;
+ nop.m 0
+ // c1*r
+ fma.s1 FR_P1 = FR_COEFF1, FR_R, f0
+ nop.i 0 ;;
+}
+
+{.mfb
+ nop.m 0
+ (p14) fma.d.s0 f8 = FR_2EXP, FR_TMP63, f0
+ (p14) br.ret.spnt b0 ;;
}
{.mfi
- nop.m 0
- // P=c1*r+r^2*[(c2+c3*r)+r^2*(c4+c5*r)]
- (p6) fma.s1 f9=f9,f13,f7
- nop.i 0
+ nop.m 0
+ // P = c1*r+r^2* [ (c2+c3*r)+r^2*(c4+c5*r) ]
+ fma.s1 FR_P15 = FR_P25, FR_R2, FR_P1
+ nop.i 0
}
{.mfi
- nop.m 0
- // T'=T*(2^exp)
- (p6) fma.s1 f8=f8,f14,f0
- nop.i 0;;
+ nop.m 0
+ // T' = T*(2^exp)
+ fma.s1 f8 = f8, FR_2EXP, f0
+ nop.i 0 ;;
}
+
{.mfb
- nop.m 0
- // result = T'-T'*P
- (p6) fnma.d.s0 f8=f8,f9,f8
- br.ret.sptk b0;;
+ nop.m 0
+ // result = T'+T'*P
+ fma.d.s0 f8 = f8, FR_P15, f8
+ br.ret.sptk b0 ;;
}
-.endp cbrt
-ASM_SIZE_DIRECTIVE(cbrt)
+
+
+GLOBAL_LIBM_END(cbrt)
diff --git a/sysdeps/ia64/fpu/s_cbrtf.S b/sysdeps/ia64/fpu/s_cbrtf.S
index 20167797b8..c8c6500b25 100644
--- a/sysdeps/ia64/fpu/s_cbrtf.S
+++ b/sysdeps/ia64/fpu/s_cbrtf.S
@@ -1,11 +1,10 @@
-.file "cbrtf.asm"
+.file "cbrtf.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
-// Bob Norin, Shane Story, and Ping Tak Peter Tang
-// of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -21,27 +20,30 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http: //www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00: Initial version
-// 5/18/00: New version (modified algorithm)
+// 02/02/00 Initial version
+// 05/18/00 New version (modified algorithm)
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 01/28/03 Rescheduled some instructions for better performance
+// on Itanium 2, and reformatted
//
// API
//==============================================================
@@ -53,616 +55,710 @@
//
// Implementation
//
-// cbrt(a) = cbrt(a y) / cbrt(y)
-// = cbrt(1 - (1 - a y)) * 1/cbrt(y)
+// Let y= frcpa(a), where a is the argument
//
-// where y = frcpa(a).
+// cbrt(a)= cbrt(a*y)/cbrt(y) = cbrt(1 - (1-a*y)) * (1/cbrt(y))
//
-// * cbrt(1 - (1 - a y)) is approximated by a degree-2 polynomial
-//
-// 1 - (1/3)*r - (1/9)*r^2
-//
-// in r = 1 - a y.
+// For all values of y, the 3 possible significands of 1/cbrt(y)
+// are stored in a table (T0) to 64 bits of accuracy. (There are
+// 3 possible significands because the exponent of y modulo 3
+// can be 0, 1, or 2.)
//
-// * The values 1/cbrt(y) are stored in a table of constants T0
-// to 64 bits of accuracy
+//
+// * cbrt(1 - (1-a*y)) is approximated by a degree-2 polynomial
+//
+// 1 - (1/3)*r - (1/9)*r^2
+//
+// in r = 1-a*y.
//
// The table values are stored for three exponent values and are
-// then multiplied by e/3 where e is the exponent of the input number.
+// then multiplied by 2^(e/3) where e is the exponent of the input number.
// This computation is carried out in parallel with the polynomial
// evaluation:
//
-// T = 2^(e/3) * T0
+// T= 2^(e/3) * T0
//===============
-// input = x
-// C = frcpa(x)
-// r = 1 - C * x
+// input= x
+// C= frcpa(x)
+// r= 1 - C * x
//
-// Special values
+// Special values
//==============================================================
// Registers used
//==============================================================
-// f6-f15
-// r2, r23-r26, r28-r30
-// p6,p7,p8,p12
+// p6, p7, p8, p12
+
+ FR_R = f6
+ FR_COEFF1 = f7
+ FR_COEFF2 = f9
+ FR_T0 = f10
+ FR_T1 = f11
+ FR_T2 = f12
+ FR_2M63 = f13
+ FR_ARG = f14
+ FR_Y = f15
+
+ GR_GP = r2
+ GR_ADDR = r2
+ GR_TMP5 = r3
+ GR_CONST = r8
+ GR_TMP63 = r8
+ GR_SIGN = r9
+ GR_CT2 = r10
+ GR_CT3 = r11
+ GR_TMP4 = r14
+ GR_EBIAS3 = r15
+ GR_REM = r16
+ GR_SEXP = r17
+ GR_2P63 = r18
+ GR_SIGNIF = r19
+ GR_I1 = r20
+ GR_EBIAS = r21
+ GR_EXP = r22
+ GR_IT1 = r23
+ GR_E5 = r24
+ GR_IT1_3 = r25
+ GR_TP1 = r26
+ GR_TMP = r27
+ GR_TMP2 = r28
+ GR_TMP3 = r29
+ GR_EXP3 = r30
+ GR_ARGEXP = r31
+
+
-#include "libm_support.h"
// Data tables
//==============================================================
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
+RODATA
.align 16
-poly_coeffs:
-ASM_TYPE_DIRECTIVE(poly_coeffs,@object)
-data8 0xaaaaaaaaaaaaaaab, 0x00003ffd // 1/3
-data8 0xe38e38e38e38e38e, 0x00003ffb // 1/9
-ASM_SIZE_DIRECTIVE(poly_coeffs)
-
-
-T_table:
-ASM_TYPE_DIRECTIVE(T_table,@object)
-
-data8 0x80155c748c374836, 0xa160019ed37fb4ae
-data8 0xcb51ddcb9e93095e, 0x8040404b0879f7f9
-data8 0xa1960b5966da4608, 0xcb95f333968ad59b
-data8 0x806b5dce4b405c10, 0xa1cc5dbe6dc2aab4
-data8 0xcbda64292d3ffd97, 0x8096b586974669b1
-data8 0xa202f97995b69c0d, 0xcc1f3184af961596
-data8 0x80bcd273d952a028, 0xa232fe6eb0c0577d
-data8 0xcc5bb1ac954d33e2, 0x80e898c52813f2f3
-data8 0xa26a2582012f6e17, 0xcca12e9831fc6402
-data8 0x81149add67c2d208, 0xa2a197e5d10465cb
-data8 0xcce70a67b64f24ad, 0x813b4e2c856b6e9a
-data8 0xa2d25a532efefbc8, 0xcd24794726477ea5
-data8 0x8167c1dde03de7aa, 0xa30a5bd6e49e4ab8
-data8 0xcd6b096a0b70ee87, 0x818ed973b811135e
-data8 0xa33b9c9b59879e24, 0xcda9177738b15a90
-data8 0x81bbc0c33e13ec98, 0xa3742fca6a3c1f21
-data8 0xcdf05f2247dffab9, 0x81e33e69fbe7504a
-data8 0xa3a5f1273887bf22, 0xce2f0f347f96f906
-data8 0x820aec524e3c23e9, 0xa3d7ef508ff11574
-data8 0xce6e0be0cd551a61, 0x823880f78e70b805
-data8 0xa4115ce30548bc15, 0xceb666b2c347d1de
-data8 0x826097a62a8e5200, 0xa443df0e53df577a
-data8 0xcef609b0cb874f00, 0x8288dfe00e9b5eaf
-data8 0xa4769fa5913c0ec3, 0xcf35fb5447e5c765
-data8 0x82b15a10c5371624, 0xa4a99f303bc7def5
-data8 0xcf763c47ee869f00, 0x82da06a527b18937
-data8 0xa4dcde37779adf4b, 0xcfb6cd3888d71785
-data8 0x8302e60b635ab394, 0xa5105d46152c938a
-data8 0xcff7aed4fbfbb447, 0x832bf8b2feec2f0e
-data8 0xa5441ce89825cb8d, 0xd038e1ce5167e3c6
-data8 0x83553f0ce00e276b, 0xa5781dad3e54d899
-data8 0xd07a66d7bfa0ebba, 0x837eb98b50f8322a
-data8 0xa5ac602406c4e68c, 0xd0bc3ea6b32d1b21
-data8 0x83a270f44c84f699, 0xa5d9601d95c2c0bc
-data8 0xd0f4f0e8f36c1bf8, 0x83cc4d7cfcfac5ca
-data8 0xa60e1e1a2de14745, 0xd1376458e34b037e
-data8 0x83f65f78a8872b4c, 0xa6431f6e3fbd9658
-data8 0xd17a2ca133f78572, 0x8420a75f2f7b53c8
-data8 0xa67864b0d432fda4, 0xd1bd4a80301c5715
-data8 0x844510461ff14209, 0xa6a6444aa0243c0b
-data8 0xd1f71682b2fa4575, 0x846fbd91b930bed2
-data8 0xa6dc094d10f25792, 0xd23ad555f773f059
-data8 0x84947e18234f3294, 0xa70a574cc02bba69
-data8 0xd2752c7039a5bf73, 0x84bf92755825045a
-data8 0xa7409e2af9549084, 0xd2b98ee008c06b59
-data8 0x84e4ac0ee112ba51, 0xa76f5c64ca2cf13b
-data8 0xd2f4735ffd700280, 0x8509ef44b86f20be
-data8 0xa79e4f0babab5dc0, 0xd32f99ed6d9ac0e1
-data8 0x85359d5d91768427, 0xa7d5579ae5164b85
-data8 0xd374f0666c75d51c, 0x855b3bd5b7384357
-data8 0xa804bd3c6fe61cc8, 0xd3b0a7d13618e4a1
-data8 0x858104f0c415f79a, 0xa8345895e5250a5a
-data8 0xd3eca2ea53bcec0c, 0x85a6f90390d29864
-data8 0xa8642a122b44ef0b, 0xd428e23874f13a17
-data8 0x85d3772fcd56a1dd, 0xa89c38ca18f6108b
-data8 0xd46f82fe293bc6d3, 0x85f9c982fcc002f3
-data8 0xa8cc81063b6e87ca, 0xd4ac57e9b7186420
-data8 0x862047e0e7ea554b, 0xa8fd00bfa409285e
-data8 0xd4e972becb04e8b8, 0x8646f2a26f7f5852
-data8 0xa92db8664d5516da, 0xd526d40a7a9b43a3
-data8 0x866dca21754096b5, 0xa95ea86b75cc2c20
-data8 0xd5647c5b73917370, 0x8694ceb8dfd17a37
-data8 0xa98fd141a4992deb, 0xd5a26c4201bd6d13
-data8 0x86bc00c49e9307e8, 0xa9c1335cae7446ba
-data8 0xd5e0a45015350a7e, 0x86dccd74fce79610
-data8 0xa9ea8686f556f645, 0xd614b539c6194104
-data8 0x870453c845acf90f, 0xaa1c52d17906bb19
-data8 0xd6537310e224283f, 0x872c089a1e90342c
-data8 0xaa4e59b046dab887, 0xd6927ab62244c917
-data8 0x8753ec4a92d16c5e, 0xaa809b9c60d1890b
-data8 0xd6d1ccc1fc4ef4b7, 0x877bff3aca19f6b4
-data8 0xaab319102f3f9b33, 0xd71169cea98fdded
-data8 0x879d88b6fe1c324c, 0xaadd5a18c1e21274
-data8 0xd746a66a5bc9f6d9, 0x87c5f346dbf98c3a
-data8 0xab1045f2ac31bdf5, 0xd786ce8f0fae5317
-data8 0x87e7c653efacef2c, 0xab3ae3ab2df7231e
-data8 0xd7bc7ff214c4e75a, 0x881089d4e73ffefc
-data8 0xab6e3f945d1e96fc, 0xd7fd35467a517ed1
-data8 0x88397e6a366f2a8a, 0xaba1d953a08fa94e
-data8 0xd83e38838648d815, 0x885bc559e5e1c081
-data8 0xabcd090db7ef4c3f, 0xd874a1db598b8951
-data8 0x887e2ee392bb7a93, 0xabf864602d7c323d
-data8 0xd8ab42205b80edaf, 0x88a7a8587e404257
-data8 0xac2ca5886ccf9b57, 0xd8ed1849d202f965
-data8 0x88ca5eda67594784, 0xac5861d4aa441f0f
-data8 0xd92432bd5a173685, 0x88f4356166bd590e
-data8 0xac8d183fe3a2fbed, 0xd9669ca45b03c23e
-data8 0x89173a0acf5ce026, 0xacb93703ff51571e
-data8 0xd99e3327cf89574e, 0x893a62a098b6a57b
-data8 0xace5830ad0c3f14b, 0xd9d602b19b100466
-data8 0x895daf637236ae2c, 0xad11fca5d78b3ff2
-data8 0xda0e0ba86c096841, 0x89883b9d1c2fa9c5
-data8 0xad4797fddf91a798, 0xda5195fcdb1c3dce
-data8 0x89abd8dd374a5d7b, 0xad747701e559ebcb
-data8 0xda8a1eb87a491f6c, 0x89cf9b1dcd197fa0
-data8 0xada184a47e9c7613, 0xdac2e230b91c3f84
-data8 0x89f382a258ea79de, 0xadcec13ab0dda8ff
-data8 0xdafbe0d0b66aea30, 0x8a178faf06648f29
-data8 0xadfc2d1a5fd21ba8, 0xdb351b04a8fafced
-data8 0x8a3bc288b3e1d18a, 0xae29c89a5053c33a
-data8 0xdb6e9139e33cdd8e, 0x8a601b74f4d1f835
-data8 0xae5794122b638df9, 0xdba843ded7151ea1
-data8 0x8a849aba14274764, 0xae858fda8137ae0a
-data8 0xdbe2336319b61fc8, 0x8aa9409f16cdbc9b
-data8 0xaeb3bc4ccc56d3d1, 0xdc1c60376789fa68
-data8 0x8ace0d6bbe2cb316, 0xaee219c374c09920
-data8 0xdc56cacda82d0cd5, 0x8af301688ab33558
-data8 0xaf10a899d3235fe7, 0xdc917398f2797814
-data8 0x8b181cdebe6f3206, 0xaf3f692c341fe8b4
-data8 0xdccc5b0d90a3e628, 0x8b3d60185fafcb7c
-data8 0xaf6e5bd7db9ae6c2, 0xdd0781a10469f0f2
-data8 0x8b62cb603bb2fad0, 0xaf9d80fb081cd91b
-data8 0xdd42e7ca0b52838f, 0x8b80d7d6bc4104de
-data8 0xafc35ce063eb3787, 0xdd729ad01c69114d
-data8 0x8ba68bf73ac74f39, 0xaff2ddcb5f28f03d
-data8 0xddae749c001fbf5e, 0x8bcc68fb9f9f7335
-data8 0xb022923b148e05c5, 0xddea8f50a51c69b1
-data8 0x8bf26f31c534fca2, 0xb0527a919adbf58b
-data8 0xde26eb69a0f0f111, 0x8c10f86e13a1a1f9
-data8 0xb078f3ab1d701c65, 0xde576480262399bc
-data8 0x8c3749916cc6abb5, 0xb0a93a6870649f31
-data8 0xde943789645933c8, 0x8c5dc4c4f7706032
-data8 0xb0d9b624d62ec856, 0xded14d58139a28af
-data8 0x8c7cac3a8c42e3e0, 0xb100a5f53fb3c8e1
-data8 0xdf025c00bbf2b5c7, 0x8ca373f1b7bf2716
-data8 0xb131821882f5540a, 0xdf3feb44d723a713
-data8 0x8cc29907fb951294, 0xb158bf8e4cb04055
-data8 0xdf715bc16c159be0, 0x8ce9ae4e9492aac8
-data8 0xb189fd69d56b238f, 0xdfaf66240e29cda8
-data8 0x8d0911dddbfdad0e, 0xb1b189958e8108e4
-data8 0xdfe139cbf6e19bdc, 0x8d3075c4f20f04ee
-data8 0xb1e32a8165b09832, 0xe01fc0fe94d9fc52
-data8 0x8d5018a9d4de77d5, 0xb20b0678fc271eec
-data8 0xe051f92ffcc0bd60, 0x8d77cc47dd143515
-data8 0xb23d0bd3f7592b6e, 0xe090feec9c9a06ac
-data8 0x8d97af6352739cb7, 0xb26538b2db8420dc
-data8 0xe0c39d0c9ff862d6, 0x8db7af523167800f
-data8 0xb28d89e339ceca14, 0xe0f668eeb99f188d
-data8 0x8ddfd80bc68c32ff, 0xb2c022ca12e55a16
-data8 0xe1362890eb663139, 0x8e00197e1e7c88fe
-data8 0xb2e8c6852c6b03f1, 0xe1695c7212aecbaa
-data8 0x8e207859f77e20e7, 0xb3118f4eda9fe40f
-data8 0xe19cbf0391bbbbe9, 0x8e40f4ce60c9f8e2
-data8 0xb33a7d6268109ebe, 0xe1d050901c531e85
-data8 0x8e69ba46cf2fde4d, 0xb36ddbc5ea70ec55
-data8 0xe2110903b4f4047a, 0x8e8a7a00bd7ae63e
-data8 0xb3971e9b39264023, 0xe2450559b4d80b6d
-data8 0x8eab57ef1cf2f529, 0xb3c0877ecc18e24a
-data8 0xe27931a231554ef3, 0x8ecc5442cffb1dad
-data8 0xb3ea16ae3a6c905f, 0xe2ad8e2ac3c5b04b
-data8 0x8eed6f2d2a4acbfe, 0xb413cc67aa0e4d2d
-data8 0xe2e21b41b9694cce, 0x8f0ea8dff24441ff
-data8 0xb43da8e9d163e1af, 0xe316d93615862714
-data8 0x8f385c95d696b817, 0xb47233773b84d425
-data8 0xe3590bd86a0d30f9, 0x8f59dc43edd930f3
-data8 0xb49c6825430fe730, 0xe38e38e38e38e38e
-data8 0x8f7b7b5f5ffad1c4, 0xb4c6c46bcdb27dcf
-data8 0xe3c397d1e6db7839, 0x8f9d3a1bea165f38
-data8 0xb4f1488c0b35d26f, 0xe3f928f5953feb9e
-data8 0x8fbf18adc34b66da, 0xb51bf4c7c51f0168
-data8 0xe42eeca17c62886c, 0x8fe117499e356095
-data8 0xb546c9616087ab9c, 0xe464e32943446305
-data8 0x90033624aa685f8d, 0xb571c69bdffd9a70
-data8 0xe49b0ce15747a8a2, 0x9025757495f36b86
-data8 0xb59cecbae56984c3, 0xe4d16a1eee94e9d4
-data8 0x903f3a5dcc091203, 0xb5bd64512bb14bb7
-data8 0xe4fa52107353f67d, 0x9061b2fceb2bdbab
-data8 0xb5e8d2a4bf5ba416, 0xe5310a471f4d2dc3
-data8 0x90844ca7211032a7, 0xb6146a9a1bc47819
-data8 0xe567f6f1c2b9c224, 0x90a7079403e6a15d
-data8 0xb6402c7749d621c0, 0xe59f18689a9e4c9a
-data8 0x90c9e3fbafd63799, 0xb66c1882fb435ea2
-data8 0xe5d66f04b8a68ecf, 0x90ece216c8a16ee4
-data8 0xb6982f048c999a56, 0xe60dfb2005c192e9
-data8 0x9110021e7b516f0a, 0xb6c47044075b4142
-data8 0xe645bd1544c7ea51, 0x912a708a39be9075
-data8 0xb6e5bd6bfd02bafd, 0xe66fb21b505b20a0
-data8 0x914dcc7b31146370, 0xb7124a2736ff8ef2
-data8 0xe6a7d32af4a7c59a, 0x91714af8cfe984d5
-data8 0xb73f026a01e94177, 0xe6e02b129c6a5ae4
-data8 0x918c00a6f3795e97, 0xb760a959f1d0a7a7
-data8 0xe70a9136a7403039, 0x91afbc299ed0295d
-data8 0xb78dae7e06868ab0, 0xe74349fb2d92a589
-data8 0x91d39add3e958db0, 0xb7badff8ad9e4e02
-data8 0xe77c3a9c86ed7d42, 0x91ee9920a8974d92
-data8 0xb7dce25b8e17ae9f, 0xe7a713f88151518a
-data8 0x9212b5fcac537c19, 0xb80a6226904045e2
-data8 0xe7e067453317ed2b, 0x9236f6b256923fcf
-data8 0xb8380f1cafd73c1c, 0xe819f37a81871bb5
-data8 0x92523ee6f90dcfc3, 0xb85a6ea8e321b4d8
-data8 0xe8454236bfaeca14, 0x9276bef031e6eb79
-data8 0xb8886b684ae7d2fa, 0xe87f32f24c3fc90e
-data8 0x929236ec237a24ad, 0xb8ab0726fa00cf5d
-data8 0xe8aacd8688892ba6, 0x92b6f70b7efe9dc3
-data8 0xb8d954a4d13b7cb1, 0xe8e523fd32f606f7
-data8 0x92d29f61eec7dc2b, 0xb8fc2d4f6cd9f04a
-data8 0xe9110b5311407927, 0x92f7a05d5b8ba92f
-data8 0xb92acc851476b1ab, 0xe94bc8bf0c108fa3
-data8 0x931379a403be5c16, 0xb94de2d841a184c2
-data8 0xe977fdc439c2ca3c, 0x9338bc44de2e3f34
-data8 0xb97cd4c36c92693c, 0xe9b3236528fc349e
-data8 0x9354c71412c69486, 0xb9a0297f172665e3
-data8 0xe9dfa70b745ac1b4, 0x937a4c273907e262
-data8 0xb9cf6f21e36c3924, 0xea1b36268d0eaa38
-data8 0x93968919f6e7975d, 0xb9f3030951267208
-data8 0xea480963fd394197, 0x93bc516fdd4680c9
-data8 0xba229d6a618e7c59, 0xea84034425f27484
-data8 0x93d8c123d9be59b2, 0xba467144459f9855
-data8 0xeab12713138dd1cc, 0x93f546c955e60076
-data8 0xba6a60c3c48f1a4b, 0xeade6db73a5e503b
-data8 0x941b70a65879079f, 0xba9a76056b67ee7a
-data8 0xeb1b0268343b121b, 0x943829f337410591
-data8 0xbabea699563ada6e, 0xeb489b0b2bdb5f14
-data8 0x9454f995765bc4d2, 0xbae2f350b262cc4b
-data8 0xeb765721e85f03d0, 0x947b86b57f5842ed
-data8 0xbb1385a23be24e57, 0xebb389645f222f62
-data8 0x94988aeb23470f86, 0xbb3814975e17c680
-data8 0xebe198f090607e0c, 0x94b5a5dc9695f42a
-data8 0xbb5cc031009bf467, 0xec0fcc9321024509
-data8 0x94d2d7a9170d8b42, 0xbb81889680024764
-data8 0xec3e247da8b82f61, 0x94f9e87dd78bf019
-data8 0xbbb2c0d8703ae95d, 0xec7c27d21321c9f7
-data8 0x95175019a503d89e, 0xbbd7cd09ba3c5463
-data8 0xecaad5278824e453, 0x9534cefa625fcb3a
-data8 0xbbfcf68c4977718f, 0xecd9a76d097d4e77
-data8 0x955265405c491a25, 0xbc223d88cfc88eee
-data8 0xed089ed5dcd99446, 0x9570130c1f9bb857
-data8 0xbc47a2284fee4ff8, 0xed37bb95add09a1c
-data8 0x9597ca4119525184, 0xbc79ac0916ed7b8a
-data8 0xed76c70508f904b6, 0x95b5af6fb5aa4d3c
-data8 0xbc9f5670d1a13030, 0xeda63bb05e7f93c6
-data8 0x95d3ac9273aafd7a, 0xbcc51f068cb95c1d
-data8 0xedd5d661daed2dc4, 0x95f1c1cafdfd3684
-data8 0xbceb05f4b30a9bc0, 0xee05974eef86b903
-data8 0x960fef3b430b8d5f, 0xbd110b6604c7d306
-data8 0xee357ead791fc670, 0x962e350575b409c5
-data8 0xbd372f8598620f19, 0xee658cb3c134a463
-data8 0x964c934c0dfc1708, 0xbd5d727edb6b3c7e
-data8 0xee95c1987f080211, 0x966b0a31c9c6bc7d
-data8 0xbd83d47d937bbc6d, 0xeec61d92d8c4314f
-data8 0x968999d9ad8d264e, 0xbdaa55addf1ae47d
-data8 0xeef6a0da64a014ac, 0x96a8426705198795
-data8 0xbdd0f63c36aa73f0, 0xef274ba72a07c811
-data8 0x96c703fd64445ee5, 0xbdf7b6556d550a15
-data8 0xef581e31a2c91260, 0x96e5dec0a7b4268d
-data8 0xbe1e9626b1ffa96b, 0xef8918b2bc43aec6
-data8 0x9704d2d4f59f79f3, 0xbe4595dd903e5371
-data8 0xefba3b63d89d7cbf, 0x9723e05ebe91b9b0
-data8 0xbe6cb5a7f14bc935, 0xefeb867ecffaa607
-data8 0x97430782be323831, 0xbe93f5b41d047cf7
-data8 0xf01cfa3df1b9c9fa, 0x97624865fc0df8bf
-data8 0xbebb5630bae4c15f, 0xf04e96dc05b43e2d
-data8 0x9781a32dcc640b2a, 0xbee2d74cd30a430c
-data8 0xf0805c944d827454, 0x97a117ffd0f48e46
-data8 0xbf0a7937cf38d981, 0xf0b24ba285c495cb
-data8 0x97c0a701f9d263c9, 0xbf323c217be2bc8c
-data8 0xf0e46442e76f6569, 0x97e0505a8637a036
-data8 0xbf5a203a09342bbb, 0xf116a6b2291d7896
-data8 0x97f57a9fb0b08c6e, 0xbf74cad1c14ebfc4
-data8 0xf1383fa9e9b5b381, 0x9815503365914a9d
-data8 0xbf9ce6a497a89f78, 0xf16ac84f90083b9b
-data8 0x98354085054fd204, 0xbfc52428bec6e72f
-data8 0xf19d7b686dcb03d7, 0x98554bbbf8a77902
-data8 0xbfed838fddab024b, 0xf1d0593311db1757
-data8 0x987571fffb7f94f6, 0xc016050c0420981a
-data8 0xf20361ee8f1c711e, 0x9895b3791dd03c23
-data8 0xc03ea8cfabddc330, 0xf23695da7de51d3f
-data8 0x98ab43a5fc65d0c8, 0xc059d3cbd65ddbce
-data8 0xf258d095e465cc35, 0x98cbb2d196bd713d
-data8 0xc082b122a3c78c9d, 0xf28c4d0bfc982b34
-data8 0x98ec3d9ec7b6f21a, 0xc0abb1499ae736c4
-data8 0xf2bff55eb3f0ea71, 0x990ce436db5e8344
-data8 0xc0d4d474c3aedaaf, 0xf2f3c9cf9884636e
-data8 0x9922b8218160967a, 0xc0f054ca33eb3437
-data8 0xf31670135ab9cc0f, 0x99438d686f75779d
-data8 0xc119b2c67e600ed0, 0xf34a8e9f0b54cdfb
-data8 0x99647eea131fa20b, 0xc1433453de2033ff
-data8 0xf37ed9fa6b8add3f, 0x997a85045a47c6d0
-data8 0xc15ef3e44e10032d, 0xf3a1cfe884ef6bb6
-data8 0x999ba5f14f8add02, 0xc188b130431d80e6
-data8 0xf3d66689dcc8e8d3, 0x99bce38b5465ecae
-data8 0xc1b2929d6067730e, 0xf40b2ab069d5c96a
-data8 0x99d31ca0887f30f9, 0xc1ce9268f31cc734
-data8 0xf42e718b90c8bc16, 0x99f48a669c74c09e
-data8 0xc1f8b0877c1b0c08, 0xf463822a0a3b4b00
-data8 0x9a16154eb445c873, 0xc222f35a87b415ba
-data8 0xf498c1076015faf8, 0x9a2c822ec198d667
-data8 0xc23f3467349e5c88, 0xf4bc5a19a33990b5
-data8 0x9a4e3e080cd91b78, 0xc269b4e40e088c01
-data8 0xf4f1e6a7d6f5425f, 0x9a70177afe52322e
-data8 0xc2945aac24daaf6e, 0xf527a232cf6be334
-data8 0x9a86b8fa94eebe10, 0xc2b0de05e43c1d66
-data8 0xf54b8ecdcda90851, 0x9aa8c42866ae2958
-data8 0xc2dbc275e1229d09, 0xf5819949c7ad87b4
-data8 0x9abf86f9e12fc45e, 0xc2f86fca9d80eeff
-data8 0xf5a5bac9213b48a9, 0x9ae1c462fc05f49d
-data8 0xc323938449a2587e, 0xf5dc1501f324a812
-data8 0x9af8a8dc936b84d0, 0xc3406b40a538ed20
-data8 0xf6006bee86b5589e, 0x9b1b19033be35730
-data8 0xc36bcee8211d15e0, 0xf63716b2fa067fa4
-data8 0x9b3da7daf04c2892, 0xc397593adf2ba366
-data8 0xf66df22fb6132b9c, 0x9b54c2e4c8a9012b
-data8 0xc3b475b6206155d5, 0xf6929fb98225deb1
-data8 0x9b77854e6c661200, 0xc3e0410243b97383
-data8 0xf6c9cd13021e3fea, 0x9b8ec2e678d56d2f
-data8 0xc3fd890709833d37, 0xf6eeb177472cedae
-data8 0x9ba60e6a5ca133b6, 0xc41ae295f7e7fa06
-data8 0xf713abf4cb0b3afb, 0x9bc919ea66a151a4
-data8 0xc44709f7bb8a4dd2, 0xf74b4d5333684ef1
-data8 0x9be0887c09ef82bb, 0xc4648fb0e0bec4c1
-data8 0xf7707f75a72f8e94, 0x9c03c8d5fffc3503
-data8 0xc490f9a94695ba14, 0xf7a874b97927af44
-data8 0x9c1b5ad21a81cbb9, 0xc4aeac0173b7d390
-data8 0xf7cddf140aedf1d8, 0x9c3ed09216e9ca02
-data8 0xc4db5941007aa853, 0xf806291bacb7f7a9
-data8 0x9c568656c0423def, 0xc4f938aec206291a
-data8 0xf82bcc43b92eafef, 0x9c7a320af242ce60
-data8 0xc52629e899dfd622, 0xf8646bf0defb759e
-data8 0x9c920bf7a8c01dc2, 0xc54436e44043b965
-data8 0xf88a487dfc3ff5f7, 0x9ca9f475d98b159c
-data8 0xc562563abf9ea07f, 0xf8b03c2b46cdc17f
-data8 0x9ccdeca60e80b5f8, 0xc58fa7d1dc42921c
-data8 0xf8e95541c152ae7a, 0x9ce5f9d4653d4902
-data8 0xc5adf561b91e110a, 0xf90f832c2700c160
-data8 0x9cfe15cb38bfdd8e, 0xc5cc5591bdbd82fa
-data8 0xf935c88e0c7f419b, 0x9d225b983f6c1f96
-data8 0xc5fa08f1ff20593c, 0xf96f5cd84fd86873
-data8 0x9d3a9cca32261ed7, 0xc618980a79ce6862
-data8 0xf995dd53ebdd9d6d, 0x9d52ecfccebe1768
-data8 0xc6373a09e34b50fa, 0xf9bc75a034436a41
-data8 0x9d77818d95b82f86, 0xc66550a6e0baaf35
-data8 0xf9f686f26d5518de, 0x9d8ff7893fa4706c
-data8 0xc6842241926342c9, 0xfa1d5b39b910a8c5
-data8 0x9da87cbef36f2a5e, 0xc6a3070b7c93bb9e
-data8 0xfa4447acc4ecbfd2, 0x9dcd6140b4a35aeb
-data8 0xc6d18260bb84081b, 0xfa7ed7e51e6fdfb4
-data8 0x9de60cd06dc6e2d4, 0xc6f0977c9416828b
-data8 0xfaa601394d49a1a0, 0x9dfec7d4cc43b76f
-data8 0xc70fc0117c641630, 0xfacd431644ce0e40
-data8 0x9e17925ec9fccc4a, 0xc72efc34d7e615be
-data8 0xfaf49d96f7a75909, 0x9e3cdf6db57dc075
-data8 0xc75dfb441594141e, 0xfb2fd3c65e562fd5
-data8 0x9e55d110b63637a8, 0xc77d68aa019bda4c
-data8 0xfb576c5762024805, 0x9e6ed27594550d2e
-data8 0xc79ce9ea478dbc4f, 0xfb7f1debc22c4040
-data8 0x9e87e3adc385d393, 0xc7bc7f1ae453219d
-data8 0xfba6e89f32d0190a, 0x9ead9b54b37a1055
-data8 0xc7ec0476e15e141a, 0xfbe2c803a0894893
-data8 0x9ec6d46a3d7de215, 0xc80bcbe16f1d540f
-data8 0xfc0ad1ff0ed9ecf0, 0x9ee01d9108be3154
-data8 0xc82ba78a5d349735, 0xfc32f57bdfbcbe7f
-data8 0x9ef976db07288d04, 0xc84b978847a06b87
-data8 0xfc5b32968f99b21c, 0x9f12e05a4759ec25
-data8 0xc86b9bf1ee817bc6, 0xfc83896bc861ab08
-data8 0x9f2c5a20f4da6668, 0xc88bb4de3667cdf4
-data8 0xfcabfa1861ed4815, 0x9f52af78ed1733ca
-data8 0xc8bc00e7fe9e23a3, 0xfce8d3cea7d3163e
-data8 0x9f6c52426a39d003, 0xc8dc4d7ff2d25232
-data8 0xfd118595143ee273, 0x9f860593d42fd7f3
-data8 0xc8fcaeebcb40eb47, 0xfd3a519943d4865a
-data8 0x9f9fc97fdb96bd51, 0xc91d25431426a663
-data8 0xfd6337f8e1ae5a4b, 0x9fb99e194f4a7037
-data8 0xc93db09d7fdb2949, 0xfd8c38d1c8e927eb
-data8 0x9fd383731ca51db9, 0xc95e5112e721582a
-data8 0xfdb5544205095a53, 0x9fed79a04fbf9423
-data8 0xc97f06bb49787677, 0xfdde8a67d2613531
-data8 0xa00780b413b24ee8, 0xc99fd1aecd6e1b06
-data8 0xfe07db619e781611, 0xa02eab2c4474b0cd
-data8 0xc9d12a3e27bb1625, 0xfe460768d80bf758
-data8 0xa048dcd51ccfd142, 0xc9f22ad82ba3d5f0
-data8 0xfe6f9bfb06cd32f6, 0xa0631fa894b11b8d
-data8 0xca134113105e67b2, 0xfe994bcd3d14fcc2
-data8 0xa07d73ba65e680af, 0xca346d07b045a876
-data8 0xfec316fecaf3f2ab, 0xa097d91e6aaf71b0
-data8 0xca55aecf0e94bb88, 0xfeecfdaf33fadb80
-data8 0xa0b24fe89e02602f, 0xca77068257be9bab
-data8 0xff16fffe2fa8fad6, 0xa0ccd82d1bd2f68b
-data8 0xca98743ae1c693a8, 0xff411e0ba9db886d
-data8 0xa0e77200215909e6, 0xcab9f8122c99a101
-data8 0xff6b57f7c33e4e9a, 0xa1021d760d584855
-data8 0xcadb9221e268c3b5, 0xff95ade2d1bd7358
-data8 0xa11cdaa36068a57d, 0xcafd4283d8043dfd
-data8 0xffc01fed60f86fb5, 0xa137a99cbd3f880b
-data8 0xcb1f09520d37c6fb, 0xffeaae3832b63956
-ASM_SIZE_DIRECTIVE(T_table)
-
-
-
-
-
-
-.align 32
-.global cbrtf#
+LOCAL_OBJECT_START(poly_coeffs)
+
+ data8 0xaaaab19b7e1f5ef9, 0x00003ffd // ~ 1/3
+ data8 0xe38e5192a5a8e56c, 0x00003ffb // ~ 1/9
+LOCAL_OBJECT_END(poly_coeffs)
+
+// For every entry B in the frcpa table, this table contains
+// the significands of cbrt(1/B), cbrt(2/B), cbrt(4/B).
+// The index to this table is the same as the frcpa index.
+
+LOCAL_OBJECT_START(T_table)
+
+ data8 0x80155c748c374836, 0xa160019ed37fb4ae
+ data8 0xcb51ddcb9e93095e, 0x8040404b0879f7f9
+ data8 0xa1960b5966da4608, 0xcb95f333968ad59b
+ data8 0x806b5dce4b405c10, 0xa1cc5dbe6dc2aab4
+ data8 0xcbda64292d3ffd97, 0x8096b586974669b1
+ data8 0xa202f97995b69c0d, 0xcc1f3184af961596
+ data8 0x80bcd273d952a028, 0xa232fe6eb0c0577d
+ data8 0xcc5bb1ac954d33e2, 0x80e898c52813f2f3
+ data8 0xa26a2582012f6e17, 0xcca12e9831fc6402
+ data8 0x81149add67c2d208, 0xa2a197e5d10465cb
+ data8 0xcce70a67b64f24ad, 0x813b4e2c856b6e9a
+ data8 0xa2d25a532efefbc8, 0xcd24794726477ea5
+ data8 0x8167c1dde03de7aa, 0xa30a5bd6e49e4ab8
+ data8 0xcd6b096a0b70ee87, 0x818ed973b811135e
+ data8 0xa33b9c9b59879e24, 0xcda9177738b15a90
+ data8 0x81bbc0c33e13ec98, 0xa3742fca6a3c1f21
+ data8 0xcdf05f2247dffab9, 0x81e33e69fbe7504a
+ data8 0xa3a5f1273887bf22, 0xce2f0f347f96f906
+ data8 0x820aec524e3c23e9, 0xa3d7ef508ff11574
+ data8 0xce6e0be0cd551a61, 0x823880f78e70b805
+ data8 0xa4115ce30548bc15, 0xceb666b2c347d1de
+ data8 0x826097a62a8e5200, 0xa443df0e53df577a
+ data8 0xcef609b0cb874f00, 0x8288dfe00e9b5eaf
+ data8 0xa4769fa5913c0ec3, 0xcf35fb5447e5c765
+ data8 0x82b15a10c5371624, 0xa4a99f303bc7def5
+ data8 0xcf763c47ee869f00, 0x82da06a527b18937
+ data8 0xa4dcde37779adf4b, 0xcfb6cd3888d71785
+ data8 0x8302e60b635ab394, 0xa5105d46152c938a
+ data8 0xcff7aed4fbfbb447, 0x832bf8b2feec2f0e
+ data8 0xa5441ce89825cb8d, 0xd038e1ce5167e3c6
+ data8 0x83553f0ce00e276b, 0xa5781dad3e54d899
+ data8 0xd07a66d7bfa0ebba, 0x837eb98b50f8322a
+ data8 0xa5ac602406c4e68c, 0xd0bc3ea6b32d1b21
+ data8 0x83a270f44c84f699, 0xa5d9601d95c2c0bc
+ data8 0xd0f4f0e8f36c1bf8, 0x83cc4d7cfcfac5ca
+ data8 0xa60e1e1a2de14745, 0xd1376458e34b037e
+ data8 0x83f65f78a8872b4c, 0xa6431f6e3fbd9658
+ data8 0xd17a2ca133f78572, 0x8420a75f2f7b53c8
+ data8 0xa67864b0d432fda4, 0xd1bd4a80301c5715
+ data8 0x844510461ff14209, 0xa6a6444aa0243c0b
+ data8 0xd1f71682b2fa4575, 0x846fbd91b930bed2
+ data8 0xa6dc094d10f25792, 0xd23ad555f773f059
+ data8 0x84947e18234f3294, 0xa70a574cc02bba69
+ data8 0xd2752c7039a5bf73, 0x84bf92755825045a
+ data8 0xa7409e2af9549084, 0xd2b98ee008c06b59
+ data8 0x84e4ac0ee112ba51, 0xa76f5c64ca2cf13b
+ data8 0xd2f4735ffd700280, 0x8509ef44b86f20be
+ data8 0xa79e4f0babab5dc0, 0xd32f99ed6d9ac0e1
+ data8 0x85359d5d91768427, 0xa7d5579ae5164b85
+ data8 0xd374f0666c75d51c, 0x855b3bd5b7384357
+ data8 0xa804bd3c6fe61cc8, 0xd3b0a7d13618e4a1
+ data8 0x858104f0c415f79a, 0xa8345895e5250a5a
+ data8 0xd3eca2ea53bcec0c, 0x85a6f90390d29864
+ data8 0xa8642a122b44ef0b, 0xd428e23874f13a17
+ data8 0x85d3772fcd56a1dd, 0xa89c38ca18f6108b
+ data8 0xd46f82fe293bc6d3, 0x85f9c982fcc002f3
+ data8 0xa8cc81063b6e87ca, 0xd4ac57e9b7186420
+ data8 0x862047e0e7ea554b, 0xa8fd00bfa409285e
+ data8 0xd4e972becb04e8b8, 0x8646f2a26f7f5852
+ data8 0xa92db8664d5516da, 0xd526d40a7a9b43a3
+ data8 0x866dca21754096b5, 0xa95ea86b75cc2c20
+ data8 0xd5647c5b73917370, 0x8694ceb8dfd17a37
+ data8 0xa98fd141a4992deb, 0xd5a26c4201bd6d13
+ data8 0x86bc00c49e9307e8, 0xa9c1335cae7446ba
+ data8 0xd5e0a45015350a7e, 0x86dccd74fce79610
+ data8 0xa9ea8686f556f645, 0xd614b539c6194104
+ data8 0x870453c845acf90f, 0xaa1c52d17906bb19
+ data8 0xd6537310e224283f, 0x872c089a1e90342c
+ data8 0xaa4e59b046dab887, 0xd6927ab62244c917
+ data8 0x8753ec4a92d16c5e, 0xaa809b9c60d1890b
+ data8 0xd6d1ccc1fc4ef4b7, 0x877bff3aca19f6b4
+ data8 0xaab319102f3f9b33, 0xd71169cea98fdded
+ data8 0x879d88b6fe1c324c, 0xaadd5a18c1e21274
+ data8 0xd746a66a5bc9f6d9, 0x87c5f346dbf98c3a
+ data8 0xab1045f2ac31bdf5, 0xd786ce8f0fae5317
+ data8 0x87e7c653efacef2c, 0xab3ae3ab2df7231e
+ data8 0xd7bc7ff214c4e75a, 0x881089d4e73ffefc
+ data8 0xab6e3f945d1e96fc, 0xd7fd35467a517ed1
+ data8 0x88397e6a366f2a8a, 0xaba1d953a08fa94e
+ data8 0xd83e38838648d815, 0x885bc559e5e1c081
+ data8 0xabcd090db7ef4c3f, 0xd874a1db598b8951
+ data8 0x887e2ee392bb7a93, 0xabf864602d7c323d
+ data8 0xd8ab42205b80edaf, 0x88a7a8587e404257
+ data8 0xac2ca5886ccf9b57, 0xd8ed1849d202f965
+ data8 0x88ca5eda67594784, 0xac5861d4aa441f0f
+ data8 0xd92432bd5a173685, 0x88f4356166bd590e
+ data8 0xac8d183fe3a2fbed, 0xd9669ca45b03c23e
+ data8 0x89173a0acf5ce026, 0xacb93703ff51571e
+ data8 0xd99e3327cf89574e, 0x893a62a098b6a57b
+ data8 0xace5830ad0c3f14b, 0xd9d602b19b100466
+ data8 0x895daf637236ae2c, 0xad11fca5d78b3ff2
+ data8 0xda0e0ba86c096841, 0x89883b9d1c2fa9c5
+ data8 0xad4797fddf91a798, 0xda5195fcdb1c3dce
+ data8 0x89abd8dd374a5d7b, 0xad747701e559ebcb
+ data8 0xda8a1eb87a491f6c, 0x89cf9b1dcd197fa0
+ data8 0xada184a47e9c7613, 0xdac2e230b91c3f84
+ data8 0x89f382a258ea79de, 0xadcec13ab0dda8ff
+ data8 0xdafbe0d0b66aea30, 0x8a178faf06648f29
+ data8 0xadfc2d1a5fd21ba8, 0xdb351b04a8fafced
+ data8 0x8a3bc288b3e1d18a, 0xae29c89a5053c33a
+ data8 0xdb6e9139e33cdd8e, 0x8a601b74f4d1f835
+ data8 0xae5794122b638df9, 0xdba843ded7151ea1
+ data8 0x8a849aba14274764, 0xae858fda8137ae0a
+ data8 0xdbe2336319b61fc8, 0x8aa9409f16cdbc9b
+ data8 0xaeb3bc4ccc56d3d1, 0xdc1c60376789fa68
+ data8 0x8ace0d6bbe2cb316, 0xaee219c374c09920
+ data8 0xdc56cacda82d0cd5, 0x8af301688ab33558
+ data8 0xaf10a899d3235fe7, 0xdc917398f2797814
+ data8 0x8b181cdebe6f3206, 0xaf3f692c341fe8b4
+ data8 0xdccc5b0d90a3e628, 0x8b3d60185fafcb7c
+ data8 0xaf6e5bd7db9ae6c2, 0xdd0781a10469f0f2
+ data8 0x8b62cb603bb2fad0, 0xaf9d80fb081cd91b
+ data8 0xdd42e7ca0b52838f, 0x8b80d7d6bc4104de
+ data8 0xafc35ce063eb3787, 0xdd729ad01c69114d
+ data8 0x8ba68bf73ac74f39, 0xaff2ddcb5f28f03d
+ data8 0xddae749c001fbf5e, 0x8bcc68fb9f9f7335
+ data8 0xb022923b148e05c5, 0xddea8f50a51c69b1
+ data8 0x8bf26f31c534fca2, 0xb0527a919adbf58b
+ data8 0xde26eb69a0f0f111, 0x8c10f86e13a1a1f9
+ data8 0xb078f3ab1d701c65, 0xde576480262399bc
+ data8 0x8c3749916cc6abb5, 0xb0a93a6870649f31
+ data8 0xde943789645933c8, 0x8c5dc4c4f7706032
+ data8 0xb0d9b624d62ec856, 0xded14d58139a28af
+ data8 0x8c7cac3a8c42e3e0, 0xb100a5f53fb3c8e1
+ data8 0xdf025c00bbf2b5c7, 0x8ca373f1b7bf2716
+ data8 0xb131821882f5540a, 0xdf3feb44d723a713
+ data8 0x8cc29907fb951294, 0xb158bf8e4cb04055
+ data8 0xdf715bc16c159be0, 0x8ce9ae4e9492aac8
+ data8 0xb189fd69d56b238f, 0xdfaf66240e29cda8
+ data8 0x8d0911dddbfdad0e, 0xb1b189958e8108e4
+ data8 0xdfe139cbf6e19bdc, 0x8d3075c4f20f04ee
+ data8 0xb1e32a8165b09832, 0xe01fc0fe94d9fc52
+ data8 0x8d5018a9d4de77d5, 0xb20b0678fc271eec
+ data8 0xe051f92ffcc0bd60, 0x8d77cc47dd143515
+ data8 0xb23d0bd3f7592b6e, 0xe090feec9c9a06ac
+ data8 0x8d97af6352739cb7, 0xb26538b2db8420dc
+ data8 0xe0c39d0c9ff862d6, 0x8db7af523167800f
+ data8 0xb28d89e339ceca14, 0xe0f668eeb99f188d
+ data8 0x8ddfd80bc68c32ff, 0xb2c022ca12e55a16
+ data8 0xe1362890eb663139, 0x8e00197e1e7c88fe
+ data8 0xb2e8c6852c6b03f1, 0xe1695c7212aecbaa
+ data8 0x8e207859f77e20e7, 0xb3118f4eda9fe40f
+ data8 0xe19cbf0391bbbbe9, 0x8e40f4ce60c9f8e2
+ data8 0xb33a7d6268109ebe, 0xe1d050901c531e85
+ data8 0x8e69ba46cf2fde4d, 0xb36ddbc5ea70ec55
+ data8 0xe2110903b4f4047a, 0x8e8a7a00bd7ae63e
+ data8 0xb3971e9b39264023, 0xe2450559b4d80b6d
+ data8 0x8eab57ef1cf2f529, 0xb3c0877ecc18e24a
+ data8 0xe27931a231554ef3, 0x8ecc5442cffb1dad
+ data8 0xb3ea16ae3a6c905f, 0xe2ad8e2ac3c5b04b
+ data8 0x8eed6f2d2a4acbfe, 0xb413cc67aa0e4d2d
+ data8 0xe2e21b41b9694cce, 0x8f0ea8dff24441ff
+ data8 0xb43da8e9d163e1af, 0xe316d93615862714
+ data8 0x8f385c95d696b817, 0xb47233773b84d425
+ data8 0xe3590bd86a0d30f9, 0x8f59dc43edd930f3
+ data8 0xb49c6825430fe730, 0xe38e38e38e38e38e
+ data8 0x8f7b7b5f5ffad1c4, 0xb4c6c46bcdb27dcf
+ data8 0xe3c397d1e6db7839, 0x8f9d3a1bea165f38
+ data8 0xb4f1488c0b35d26f, 0xe3f928f5953feb9e
+ data8 0x8fbf18adc34b66da, 0xb51bf4c7c51f0168
+ data8 0xe42eeca17c62886c, 0x8fe117499e356095
+ data8 0xb546c9616087ab9c, 0xe464e32943446305
+ data8 0x90033624aa685f8d, 0xb571c69bdffd9a70
+ data8 0xe49b0ce15747a8a2, 0x9025757495f36b86
+ data8 0xb59cecbae56984c3, 0xe4d16a1eee94e9d4
+ data8 0x903f3a5dcc091203, 0xb5bd64512bb14bb7
+ data8 0xe4fa52107353f67d, 0x9061b2fceb2bdbab
+ data8 0xb5e8d2a4bf5ba416, 0xe5310a471f4d2dc3
+ data8 0x90844ca7211032a7, 0xb6146a9a1bc47819
+ data8 0xe567f6f1c2b9c224, 0x90a7079403e6a15d
+ data8 0xb6402c7749d621c0, 0xe59f18689a9e4c9a
+ data8 0x90c9e3fbafd63799, 0xb66c1882fb435ea2
+ data8 0xe5d66f04b8a68ecf, 0x90ece216c8a16ee4
+ data8 0xb6982f048c999a56, 0xe60dfb2005c192e9
+ data8 0x9110021e7b516f0a, 0xb6c47044075b4142
+ data8 0xe645bd1544c7ea51, 0x912a708a39be9075
+ data8 0xb6e5bd6bfd02bafd, 0xe66fb21b505b20a0
+ data8 0x914dcc7b31146370, 0xb7124a2736ff8ef2
+ data8 0xe6a7d32af4a7c59a, 0x91714af8cfe984d5
+ data8 0xb73f026a01e94177, 0xe6e02b129c6a5ae4
+ data8 0x918c00a6f3795e97, 0xb760a959f1d0a7a7
+ data8 0xe70a9136a7403039, 0x91afbc299ed0295d
+ data8 0xb78dae7e06868ab0, 0xe74349fb2d92a589
+ data8 0x91d39add3e958db0, 0xb7badff8ad9e4e02
+ data8 0xe77c3a9c86ed7d42, 0x91ee9920a8974d92
+ data8 0xb7dce25b8e17ae9f, 0xe7a713f88151518a
+ data8 0x9212b5fcac537c19, 0xb80a6226904045e2
+ data8 0xe7e067453317ed2b, 0x9236f6b256923fcf
+ data8 0xb8380f1cafd73c1c, 0xe819f37a81871bb5
+ data8 0x92523ee6f90dcfc3, 0xb85a6ea8e321b4d8
+ data8 0xe8454236bfaeca14, 0x9276bef031e6eb79
+ data8 0xb8886b684ae7d2fa, 0xe87f32f24c3fc90e
+ data8 0x929236ec237a24ad, 0xb8ab0726fa00cf5d
+ data8 0xe8aacd8688892ba6, 0x92b6f70b7efe9dc3
+ data8 0xb8d954a4d13b7cb1, 0xe8e523fd32f606f7
+ data8 0x92d29f61eec7dc2b, 0xb8fc2d4f6cd9f04a
+ data8 0xe9110b5311407927, 0x92f7a05d5b8ba92f
+ data8 0xb92acc851476b1ab, 0xe94bc8bf0c108fa3
+ data8 0x931379a403be5c16, 0xb94de2d841a184c2
+ data8 0xe977fdc439c2ca3c, 0x9338bc44de2e3f34
+ data8 0xb97cd4c36c92693c, 0xe9b3236528fc349e
+ data8 0x9354c71412c69486, 0xb9a0297f172665e3
+ data8 0xe9dfa70b745ac1b4, 0x937a4c273907e262
+ data8 0xb9cf6f21e36c3924, 0xea1b36268d0eaa38
+ data8 0x93968919f6e7975d, 0xb9f3030951267208
+ data8 0xea480963fd394197, 0x93bc516fdd4680c9
+ data8 0xba229d6a618e7c59, 0xea84034425f27484
+ data8 0x93d8c123d9be59b2, 0xba467144459f9855
+ data8 0xeab12713138dd1cc, 0x93f546c955e60076
+ data8 0xba6a60c3c48f1a4b, 0xeade6db73a5e503b
+ data8 0x941b70a65879079f, 0xba9a76056b67ee7a
+ data8 0xeb1b0268343b121b, 0x943829f337410591
+ data8 0xbabea699563ada6e, 0xeb489b0b2bdb5f14
+ data8 0x9454f995765bc4d2, 0xbae2f350b262cc4b
+ data8 0xeb765721e85f03d0, 0x947b86b57f5842ed
+ data8 0xbb1385a23be24e57, 0xebb389645f222f62
+ data8 0x94988aeb23470f86, 0xbb3814975e17c680
+ data8 0xebe198f090607e0c, 0x94b5a5dc9695f42a
+ data8 0xbb5cc031009bf467, 0xec0fcc9321024509
+ data8 0x94d2d7a9170d8b42, 0xbb81889680024764
+ data8 0xec3e247da8b82f61, 0x94f9e87dd78bf019
+ data8 0xbbb2c0d8703ae95d, 0xec7c27d21321c9f7
+ data8 0x95175019a503d89e, 0xbbd7cd09ba3c5463
+ data8 0xecaad5278824e453, 0x9534cefa625fcb3a
+ data8 0xbbfcf68c4977718f, 0xecd9a76d097d4e77
+ data8 0x955265405c491a25, 0xbc223d88cfc88eee
+ data8 0xed089ed5dcd99446, 0x9570130c1f9bb857
+ data8 0xbc47a2284fee4ff8, 0xed37bb95add09a1c
+ data8 0x9597ca4119525184, 0xbc79ac0916ed7b8a
+ data8 0xed76c70508f904b6, 0x95b5af6fb5aa4d3c
+ data8 0xbc9f5670d1a13030, 0xeda63bb05e7f93c6
+ data8 0x95d3ac9273aafd7a, 0xbcc51f068cb95c1d
+ data8 0xedd5d661daed2dc4, 0x95f1c1cafdfd3684
+ data8 0xbceb05f4b30a9bc0, 0xee05974eef86b903
+ data8 0x960fef3b430b8d5f, 0xbd110b6604c7d306
+ data8 0xee357ead791fc670, 0x962e350575b409c5
+ data8 0xbd372f8598620f19, 0xee658cb3c134a463
+ data8 0x964c934c0dfc1708, 0xbd5d727edb6b3c7e
+ data8 0xee95c1987f080211, 0x966b0a31c9c6bc7d
+ data8 0xbd83d47d937bbc6d, 0xeec61d92d8c4314f
+ data8 0x968999d9ad8d264e, 0xbdaa55addf1ae47d
+ data8 0xeef6a0da64a014ac, 0x96a8426705198795
+ data8 0xbdd0f63c36aa73f0, 0xef274ba72a07c811
+ data8 0x96c703fd64445ee5, 0xbdf7b6556d550a15
+ data8 0xef581e31a2c91260, 0x96e5dec0a7b4268d
+ data8 0xbe1e9626b1ffa96b, 0xef8918b2bc43aec6
+ data8 0x9704d2d4f59f79f3, 0xbe4595dd903e5371
+ data8 0xefba3b63d89d7cbf, 0x9723e05ebe91b9b0
+ data8 0xbe6cb5a7f14bc935, 0xefeb867ecffaa607
+ data8 0x97430782be323831, 0xbe93f5b41d047cf7
+ data8 0xf01cfa3df1b9c9fa, 0x97624865fc0df8bf
+ data8 0xbebb5630bae4c15f, 0xf04e96dc05b43e2d
+ data8 0x9781a32dcc640b2a, 0xbee2d74cd30a430c
+ data8 0xf0805c944d827454, 0x97a117ffd0f48e46
+ data8 0xbf0a7937cf38d981, 0xf0b24ba285c495cb
+ data8 0x97c0a701f9d263c9, 0xbf323c217be2bc8c
+ data8 0xf0e46442e76f6569, 0x97e0505a8637a036
+ data8 0xbf5a203a09342bbb, 0xf116a6b2291d7896
+ data8 0x97f57a9fb0b08c6e, 0xbf74cad1c14ebfc4
+ data8 0xf1383fa9e9b5b381, 0x9815503365914a9d
+ data8 0xbf9ce6a497a89f78, 0xf16ac84f90083b9b
+ data8 0x98354085054fd204, 0xbfc52428bec6e72f
+ data8 0xf19d7b686dcb03d7, 0x98554bbbf8a77902
+ data8 0xbfed838fddab024b, 0xf1d0593311db1757
+ data8 0x987571fffb7f94f6, 0xc016050c0420981a
+ data8 0xf20361ee8f1c711e, 0x9895b3791dd03c23
+ data8 0xc03ea8cfabddc330, 0xf23695da7de51d3f
+ data8 0x98ab43a5fc65d0c8, 0xc059d3cbd65ddbce
+ data8 0xf258d095e465cc35, 0x98cbb2d196bd713d
+ data8 0xc082b122a3c78c9d, 0xf28c4d0bfc982b34
+ data8 0x98ec3d9ec7b6f21a, 0xc0abb1499ae736c4
+ data8 0xf2bff55eb3f0ea71, 0x990ce436db5e8344
+ data8 0xc0d4d474c3aedaaf, 0xf2f3c9cf9884636e
+ data8 0x9922b8218160967a, 0xc0f054ca33eb3437
+ data8 0xf31670135ab9cc0f, 0x99438d686f75779d
+ data8 0xc119b2c67e600ed0, 0xf34a8e9f0b54cdfb
+ data8 0x99647eea131fa20b, 0xc1433453de2033ff
+ data8 0xf37ed9fa6b8add3f, 0x997a85045a47c6d0
+ data8 0xc15ef3e44e10032d, 0xf3a1cfe884ef6bb6
+ data8 0x999ba5f14f8add02, 0xc188b130431d80e6
+ data8 0xf3d66689dcc8e8d3, 0x99bce38b5465ecae
+ data8 0xc1b2929d6067730e, 0xf40b2ab069d5c96a
+ data8 0x99d31ca0887f30f9, 0xc1ce9268f31cc734
+ data8 0xf42e718b90c8bc16, 0x99f48a669c74c09e
+ data8 0xc1f8b0877c1b0c08, 0xf463822a0a3b4b00
+ data8 0x9a16154eb445c873, 0xc222f35a87b415ba
+ data8 0xf498c1076015faf8, 0x9a2c822ec198d667
+ data8 0xc23f3467349e5c88, 0xf4bc5a19a33990b5
+ data8 0x9a4e3e080cd91b78, 0xc269b4e40e088c01
+ data8 0xf4f1e6a7d6f5425f, 0x9a70177afe52322e
+ data8 0xc2945aac24daaf6e, 0xf527a232cf6be334
+ data8 0x9a86b8fa94eebe10, 0xc2b0de05e43c1d66
+ data8 0xf54b8ecdcda90851, 0x9aa8c42866ae2958
+ data8 0xc2dbc275e1229d09, 0xf5819949c7ad87b4
+ data8 0x9abf86f9e12fc45e, 0xc2f86fca9d80eeff
+ data8 0xf5a5bac9213b48a9, 0x9ae1c462fc05f49d
+ data8 0xc323938449a2587e, 0xf5dc1501f324a812
+ data8 0x9af8a8dc936b84d0, 0xc3406b40a538ed20
+ data8 0xf6006bee86b5589e, 0x9b1b19033be35730
+ data8 0xc36bcee8211d15e0, 0xf63716b2fa067fa4
+ data8 0x9b3da7daf04c2892, 0xc397593adf2ba366
+ data8 0xf66df22fb6132b9c, 0x9b54c2e4c8a9012b
+ data8 0xc3b475b6206155d5, 0xf6929fb98225deb1
+ data8 0x9b77854e6c661200, 0xc3e0410243b97383
+ data8 0xf6c9cd13021e3fea, 0x9b8ec2e678d56d2f
+ data8 0xc3fd890709833d37, 0xf6eeb177472cedae
+ data8 0x9ba60e6a5ca133b6, 0xc41ae295f7e7fa06
+ data8 0xf713abf4cb0b3afb, 0x9bc919ea66a151a4
+ data8 0xc44709f7bb8a4dd2, 0xf74b4d5333684ef1
+ data8 0x9be0887c09ef82bb, 0xc4648fb0e0bec4c1
+ data8 0xf7707f75a72f8e94, 0x9c03c8d5fffc3503
+ data8 0xc490f9a94695ba14, 0xf7a874b97927af44
+ data8 0x9c1b5ad21a81cbb9, 0xc4aeac0173b7d390
+ data8 0xf7cddf140aedf1d8, 0x9c3ed09216e9ca02
+ data8 0xc4db5941007aa853, 0xf806291bacb7f7a9
+ data8 0x9c568656c0423def, 0xc4f938aec206291a
+ data8 0xf82bcc43b92eafef, 0x9c7a320af242ce60
+ data8 0xc52629e899dfd622, 0xf8646bf0defb759e
+ data8 0x9c920bf7a8c01dc2, 0xc54436e44043b965
+ data8 0xf88a487dfc3ff5f7, 0x9ca9f475d98b159c
+ data8 0xc562563abf9ea07f, 0xf8b03c2b46cdc17f
+ data8 0x9ccdeca60e80b5f8, 0xc58fa7d1dc42921c
+ data8 0xf8e95541c152ae7a, 0x9ce5f9d4653d4902
+ data8 0xc5adf561b91e110a, 0xf90f832c2700c160
+ data8 0x9cfe15cb38bfdd8e, 0xc5cc5591bdbd82fa
+ data8 0xf935c88e0c7f419b, 0x9d225b983f6c1f96
+ data8 0xc5fa08f1ff20593c, 0xf96f5cd84fd86873
+ data8 0x9d3a9cca32261ed7, 0xc618980a79ce6862
+ data8 0xf995dd53ebdd9d6d, 0x9d52ecfccebe1768
+ data8 0xc6373a09e34b50fa, 0xf9bc75a034436a41
+ data8 0x9d77818d95b82f86, 0xc66550a6e0baaf35
+ data8 0xf9f686f26d5518de, 0x9d8ff7893fa4706c
+ data8 0xc6842241926342c9, 0xfa1d5b39b910a8c5
+ data8 0x9da87cbef36f2a5e, 0xc6a3070b7c93bb9e
+ data8 0xfa4447acc4ecbfd2, 0x9dcd6140b4a35aeb
+ data8 0xc6d18260bb84081b, 0xfa7ed7e51e6fdfb4
+ data8 0x9de60cd06dc6e2d4, 0xc6f0977c9416828b
+ data8 0xfaa601394d49a1a0, 0x9dfec7d4cc43b76f
+ data8 0xc70fc0117c641630, 0xfacd431644ce0e40
+ data8 0x9e17925ec9fccc4a, 0xc72efc34d7e615be
+ data8 0xfaf49d96f7a75909, 0x9e3cdf6db57dc075
+ data8 0xc75dfb441594141e, 0xfb2fd3c65e562fd5
+ data8 0x9e55d110b63637a8, 0xc77d68aa019bda4c
+ data8 0xfb576c5762024805, 0x9e6ed27594550d2e
+ data8 0xc79ce9ea478dbc4f, 0xfb7f1debc22c4040
+ data8 0x9e87e3adc385d393, 0xc7bc7f1ae453219d
+ data8 0xfba6e89f32d0190a, 0x9ead9b54b37a1055
+ data8 0xc7ec0476e15e141a, 0xfbe2c803a0894893
+ data8 0x9ec6d46a3d7de215, 0xc80bcbe16f1d540f
+ data8 0xfc0ad1ff0ed9ecf0, 0x9ee01d9108be3154
+ data8 0xc82ba78a5d349735, 0xfc32f57bdfbcbe7f
+ data8 0x9ef976db07288d04, 0xc84b978847a06b87
+ data8 0xfc5b32968f99b21c, 0x9f12e05a4759ec25
+ data8 0xc86b9bf1ee817bc6, 0xfc83896bc861ab08
+ data8 0x9f2c5a20f4da6668, 0xc88bb4de3667cdf4
+ data8 0xfcabfa1861ed4815, 0x9f52af78ed1733ca
+ data8 0xc8bc00e7fe9e23a3, 0xfce8d3cea7d3163e
+ data8 0x9f6c52426a39d003, 0xc8dc4d7ff2d25232
+ data8 0xfd118595143ee273, 0x9f860593d42fd7f3
+ data8 0xc8fcaeebcb40eb47, 0xfd3a519943d4865a
+ data8 0x9f9fc97fdb96bd51, 0xc91d25431426a663
+ data8 0xfd6337f8e1ae5a4b, 0x9fb99e194f4a7037
+ data8 0xc93db09d7fdb2949, 0xfd8c38d1c8e927eb
+ data8 0x9fd383731ca51db9, 0xc95e5112e721582a
+ data8 0xfdb5544205095a53, 0x9fed79a04fbf9423
+ data8 0xc97f06bb49787677, 0xfdde8a67d2613531
+ data8 0xa00780b413b24ee8, 0xc99fd1aecd6e1b06
+ data8 0xfe07db619e781611, 0xa02eab2c4474b0cd
+ data8 0xc9d12a3e27bb1625, 0xfe460768d80bf758
+ data8 0xa048dcd51ccfd142, 0xc9f22ad82ba3d5f0
+ data8 0xfe6f9bfb06cd32f6, 0xa0631fa894b11b8d
+ data8 0xca134113105e67b2, 0xfe994bcd3d14fcc2
+ data8 0xa07d73ba65e680af, 0xca346d07b045a876
+ data8 0xfec316fecaf3f2ab, 0xa097d91e6aaf71b0
+ data8 0xca55aecf0e94bb88, 0xfeecfdaf33fadb80
+ data8 0xa0b24fe89e02602f, 0xca77068257be9bab
+ data8 0xff16fffe2fa8fad6, 0xa0ccd82d1bd2f68b
+ data8 0xca98743ae1c693a8, 0xff411e0ba9db886d
+ data8 0xa0e77200215909e6, 0xcab9f8122c99a101
+ data8 0xff6b57f7c33e4e9a, 0xa1021d760d584855
+ data8 0xcadb9221e268c3b5, 0xff95ade2d1bd7358
+ data8 0xa11cdaa36068a57d, 0xcafd4283d8043dfd
+ data8 0xffc01fed60f86fb5, 0xa137a99cbd3f880b
+ data8 0xcb1f09520d37c6fb, 0xffeaae3832b63956
+LOCAL_OBJECT_END(T_table)
+
+
+
+
+
+
.section .text
-.proc cbrtf#
-.align 32
-cbrtf:
+GLOBAL_LIBM_ENTRY(cbrtf)
-{ .mfi
- getf.sig r28=f8
- // will continue only for normal/denormal numbers
-(p0) fclass.nm.unc p12,p7 = f8, 0x1b
- // r2 = pointer to C_1,C_2 followed by T_table
- addl r2 = @ltoff(poly_coeffs), gp
+{.mfi
+ getf.sig GR_SIGNIF = f8
+ // will continue only for normal/denormal numbers
+ fclass.nm.unc p12, p7 = f8, 0x1b
+ // GR_GP = pointer to C_1, C_2 followed by T_table
+ nop.i 0
}
{.mfi
- // r29=bias-((2^8-1)/3) -63=0xffff-0x55-0x3f=0xff6b
- mov r29=0xff6b
- // normalize a
- fma.s1 f14=f8,f1,f0
- nop.i 0;;
+ addl GR_GP = @ltoff(poly_coeffs), gp
+ // normalize a
+ fma.s1 FR_ARG = f8, f1, f0
+ // GR_CT3 = bias-((2^8-1)/3) -63 = 0xffff-0x55-0x3f = 0xff6b
+ mov GR_CT3 = 0xff6b ;;
}
-{.mib
- nop.m 0
- (p7) cmp.eq p12,p0=r28,r0
- nop.b 0;;
+
+{.mmi
+ // get exponent
+ getf.exp GR_ARGEXP = f8
+ // load start address for C_1, C_2 followed by T_table
+ ld8 GR_ADDR = [ GR_GP ]
+ nop.i 0 ;;
}
-{.mfb
- // load start address for C_1,C_2 followed by T_table
- ld8 r2=[r2]
- (p12) fma.s.s0 f8=f8,f1,f0
- (p12) br.ret.spnt b0;;
+
+{.mlx
+ // check if input significand is 0
+ (p7) cmp.eq p12, p7 = GR_SIGNIF, r0
+ // GR_2P63 = 2^63
+ movl GR_2P63 = 0x8000000000000000 ;;
+}
+
+{.mfi
+ nop.m 0
+ // y = frcpa(a)
+ // p7 = 1 for normal and denormal (but non-zero) arguments
+ (p7) frcpa.s0 FR_Y, p0 = f1, f8
+ // p9 = 1 if denormal input
+ cmp.gtu p9, p0 = GR_2P63, GR_SIGNIF
}
-{.mmf
- // load C_1
- ldfe f7=[r2],16
- nop.m 0
- // y=frcpa(a)
- frcpa.s0 f8,p6=f1,f8;;
+{.mfb
+ // load C_1
+ ldfe FR_COEFF1 = [ GR_ADDR ], 16
+ // if argument is 0, +/-Infinity, or NaN, return
+ (p12) fma.s.s0 f8 = f8, f1, f0
+ (p12) br.ret.spnt b0 ;;
}
+
{.mmi
- // load C_2
- ldfe f9=[r2],16
- // r28=bias-(2^8-1)
- mov r28=0xff00
- nop.i 0;;
+ // get normalized significand (for denormal inputs only)
+ (p9) getf.sig GR_SIGNIF = FR_ARG
+ // load C_2
+ ldfe FR_COEFF2 = [ GR_ADDR ], 16
+ // GR_CT2 = bias-(2^8-1)
+ mov GR_CT2 = 0xff00
}
-{.mmi
- // get normalized significand
- getf.sig r23=f14
- // get exponent
- getf.exp r24=f14
- mov r25=0x20000;;
+
+{.mii
+ // get exponent (for denormal inputs only)
+ (p9) getf.exp GR_ARGEXP = FR_ARG
+ nop.i 0
+ mov GR_CONST = 0x20000 ;;
}
+
+
{.mii
- // get r26=sign
- and r26=r24,r25
- // eliminate leading 1 from r23=1st table index
- shl r23=r23,1
- // eliminate sign from exponent (r25)
- andcm r25=r24,r25;;
+ // get GR_SIGN = sign
+ and GR_SIGN = GR_ARGEXP, GR_CONST
+ // eliminate leading 1 from GR_I1 = 1st table index
+ shl GR_I1 = GR_SIGNIF, 1
+ // eliminate sign from exponent
+ andcm GR_EBIAS = GR_ARGEXP, GR_CONST ;;
}
+
+
{.mfi
- // subtract bias from r25=exponent
- sub r25=r25,r28
- // r=1-a*y
- (p6) fnma.s1 f6=f8,f14,f1
- // r23=1st table index (y_index8 bits)
- shr.u r23=r23,56;;
+ // subtract bias from GR_EXP = exponent
+ sub GR_EXP = GR_EBIAS, GR_CT2
+ // r = 1-a*y
+ fnma.s1 FR_R = FR_Y, FR_ARG, f1
+ // GR_IT1 = 1st table index (y_index8 bits)
+ shr.u GR_IT1 = GR_I1, 56 ;;
}
+
+
{.mii
- // 1: exponent*=5; // (2^{16}-1)/3=0x5555
- shladd r24=r25,2,r25
- // r23=3*y_index
- shladd r23=r23,1,r23;;
- // r30=(5*expon)*16+5*expon=(0x55)*expon
- shladd r30=r24,4,r24;;
+ // 1: exponent* = 5; // (2^{16}-1)/3 = 0x5555
+ shladd GR_E5 = GR_EXP, 2, GR_EXP
+ // GR_IT1_3 = 3*y_index
+ shladd GR_IT1_3 = GR_IT1, 1, GR_IT1
+ nop.i 0 ;;
}
+
+
+{.mmi
+ // GR_TMP5 = (5*expon)*16+5*expon = (0x55)*expon
+ shladd GR_TMP5 = GR_E5, 4, GR_E5
+ // adjust T_table pointer by 1st index
+ shladd GR_TP1 = GR_IT1_3, 3, GR_ADDR
+ nop.i 0 ;;
+}
+
+
{.mmi
- // adjust T_table pointer by 1st index
- shladd r2=r23,3,r2;;
- // f10=T[0][y]
- (p6) ldf8 f10=[r2],8
- // r24=(0x5500)*expon
- shl r24=r30,8;;
+ // FR_T0 = T [ 0 ] [ y ]
+ ldf8 FR_T0 = [ GR_TP1 ], 8
+ // get 2^{-63}
+ mov GR_TMP63 = 0xffff + 63
+ // GR_TMP = (0x5500)*expon
+ shl GR_TMP = GR_TMP5, 8 ;;
}
+
+
{.mfi
- // f11=T[1][y]
- (p6) ldf8 f11=[r2],8
- // P_1=C_1+C_2*r
- (p6) fma.s1 f7=f9,f6,f7
- // r24=(0x5555)*expon
- add r24=r24,r30;;
+ // FR_T1 = T [ 1 ] [ y ]
+ ldf8 FR_T1 = [ GR_TP1 ], 8
+ // P_1 = C_1+C_2*r
+ fma.s1 FR_COEFF1 = FR_COEFF2, FR_R, FR_COEFF1
+ // GR_TMP2 = (0x5555)*expon
+ add GR_TMP2 = GR_TMP, GR_TMP5 ;;
}
+
+
{.mmi
- // r24=(0x5556)*expon // 0x5556=(2^{16}+2)/3
- add r24=r24,r25;;
- // f8=T[2][y]
- (p6) ldf8 f8=[r2]
- // r24=floor(expon/3)
- shr r24=r24,16;;
+ // GR_TMP3 = (0x5556)*expon // 0x5556 = (2^{16}+2)/3
+ add GR_TMP3 = GR_TMP2, GR_EXP ;;
+ // FR_T2 = T [ 2 ] [ y ]
+ ldf8 FR_T2 = [ GR_TP1 ]
+ // GR_EXP3 = floor(expon/3)
+ shr GR_EXP3 = GR_TMP3, 16 ;;
}
+
+
{.mmi
- nop.m 0
- // r28=3*exponent
- shladd r28=r24,1,r24
- // bias exponent
- add r24=r29,r24;;
+ setf.exp FR_2M63 = GR_TMP63
+ // GR_TMP4 = 3*exponent
+ shladd GR_TMP4 = GR_EXP3, 1, GR_EXP3
+ // bias exponent
+ add GR_EBIAS3 = GR_CT3, GR_EXP3 ;;
+}
+
+
+{.mmf
+ // get remainder of exponent/3
+ sub GR_REM = GR_EXP, GR_TMP4
+ // add sign to exponent
+ or GR_SEXP = GR_EBIAS3, GR_SIGN
+ // P_2 = -r*P_1
+ fnma.s1 FR_R = FR_COEFF1, FR_R, f0 ;;
}
+
+
+
{.mmi
- // get remainder of exponent/3
- sub r25=r25,r28
- // add sign to exponent
- or r24=r24,r26
- nop.i 0;;
-}
-{.mfi
- nop.m 0
- // P_2=-r*P_1
- (p6) fnma.s1 f6=f7,f6,f0
- // remainder=0 ?
- (p6) cmp.eq.unc p7,p8=r0,r25;;
+ // FR_ARG = sign*2^{exponent/3}
+ setf.exp FR_ARG = GR_SEXP
+ nop.m 0
+ // remainder = 0 ?
+ // p7=1 if input exponent is 3*j (remainder is 0)
+ cmp.eq.unc p7, p8 = r0, GR_REM ;;
}
+
+
{.mfi
- // f14=sign*2^{exponent/3}
- (p6) setf.exp f14=r24
- nop.f 0
- // remainder = 1 ?
- (p8) cmp.eq.unc p8,p12=1,r25;;
+ // remainder = 1 ?
+ // p8=1 if input exponent is 3*j+1 (remainder is 1)
+ // p12=1 if input exponent is 3*j+2 (remainder is 2)
+ (p8) cmp.eq.unc p8, p12 = 1, GR_REM
+ // p7=1 -> remainder = 0 -> use T = FR_T0
+ (p7) fma.s1 f8 = FR_T0, FR_R, FR_T0
+ // argument is of the form 2^(3*k) ?
+ // ( GR_I1 holds significand bits, without the leading 1)
+ or GR_I1 = GR_I1, GR_REM ;;
}
-.pred.rel "mutex",p7,p8
+
+
+.pred.rel "mutex", p12, p8
{.mfi
- nop.m 0
- // remainder=0 -> use T=f10
- (p7) fma.s1 f8=f10,f6,f10
- nop.i 0
+ nop.m 0
+ // p8=1 -> remainder = 1 -> use FR_T1
+ (p8) fma.s1 f8 = FR_T1, FR_R, FR_T1
+ // argument is of the form 2^(3*k) ?
+ cmp.eq p14, p7 = GR_I1, r0
}
+
+
{.mfi
- nop.m 0
- // remainder =1 -> use f11
- (p8) fma.s1 f8=f11,f6,f11
- nop.i 0;;
+ nop.m 0
+ // p12=1 -> remainder=2 -> result = T+T*P_2
+ (p12) fma.s1 f8 = FR_T2, FR_R, FR_T2
+ nop.i 0 ;;
}
+
+
+.pred.rel "mutex", p14, p7
{.mfi
- nop.m 0
- // result=T+T*P_2
- (p12) fma.s.s0 f8=f8,f6,f8
- nop.i 0;;
+ nop.m 0
+ // if argument is sgn*2^{3*(expon/3)}
+ (p14) fma.s.s0 f8 = FR_2M63, FR_ARG, f0
+ nop.i 0
}
{.mfb
- nop.m 0
- // T*=sgn*2^{expon/3}
- (p6) fma.s.s0 f8=f8,f14,f0
- br.ret.sptk b0;;
+ nop.m 0
+ // T* = sgn*2^{expon/3}
+ (p7) fma.s.s0 f8 = f8, FR_ARG, f0
+ br.ret.sptk b0 ;;
}
-.endp cbrtf
-ASM_SIZE_DIRECTIVE(cbrtf)
+
+
+GLOBAL_LIBM_END(cbrtf)
+
+
+
diff --git a/sysdeps/ia64/fpu/s_cbrtl.S b/sysdeps/ia64/fpu/s_cbrtl.S
index d4bbf8fdbf..3e621e2c12 100644
--- a/sysdeps/ia64/fpu/s_cbrtl.S
+++ b/sysdeps/ia64/fpu/s_cbrtl.S
@@ -1,11 +1,10 @@
-.file "cbrtl.asm"
+.file "cbrtl.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
-// Bob Norin, Shane Story, and Ping Tak Peter Tang
-// of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -21,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -36,11 +35,13 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 4/28/00: Initial version
+// 04/28/00 Initial version
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/06/03 Reordered header: .section, .global, .proc, .align
//
// API
//==============================================================
@@ -95,29 +96,26 @@
// r2-r3, r23-r30
// p6,p7,p12
-#include "libm_support.h"
+
// Data tables
//==============================================================
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
+RODATA
.align 16
-poly_coeffs:
-ASM_TYPE_DIRECTIVE(poly_coeffs,@object)
+LOCAL_OBJECT_START(poly_coeffs)
+
data8 0xaaaaaaaaaaaaaab1, 0x00003ffd // C_1
data8 0xe38e38e38e38e3e0, 0x00003ffb // C_2
data8 0x3faf9add3c0be9a6, 0x3fa511e8d2b1f749 // C_3, C_4
data8 0x3f9ee71b2c6ebe99, 0x3f9809180fd0340c // C_5, C_6
-ASM_SIZE_DIRECTIVE(poly_coeffs)
+LOCAL_OBJECT_END(poly_coeffs)
+
+
+LOCAL_OBJECT_START(T_table)
-T_table:
-ASM_TYPE_DIRECTIVE(T_table,@object)
data8 0x80155c748c374836, 0x8040404b0879f7f9
data8 0x806b5dce4b405c10, 0x8096b586974669b1
@@ -503,14 +501,15 @@ data8 0xfec316fecaf3f2ab, 0xfeecfdaf33fadb80
data8 0xff16fffe2fa8fad6, 0xff411e0ba9db886d
data8 0xff6b57f7c33e4e9a, 0xff95ade2d1bd7358
data8 0xffc01fed60f86fb5, 0xffeaae3832b63956
-ASM_SIZE_DIRECTIVE(T_table)
+LOCAL_OBJECT_END(T_table)
-D_table:
-ASM_TYPE_DIRECTIVE(D_table,@object)
+
+LOCAL_OBJECT_START(D_table)
+
data4 0x1e50f488, 0x1ebdc559, 0x1e649ec1, 0x9eed9b2c
data4 0x9e511c44, 0x9ec6d551, 0x9eefe248, 0x9e313854
data4 0x9f54ff18, 0x9d231411, 0x1ee5d63c, 0x9edf6b95
@@ -703,25 +702,16 @@ data4 0x9eafd508, 0x9ef0e9fc, 0x1d1307ac, 0x1eecee20
data4 0x1cf60c6f, 0x9d556216, 0x9eaed175, 0x9ec919f4
data4 0x1ec2c988, 0x1cd82772, 0x9dc99456, 0x1eab0467
data4 0x1e89b36f, 0x1c757944, 0x1eef9abd, 0x9e98664d
-ASM_SIZE_DIRECTIVE(D_table)
-
-
+LOCAL_OBJECT_END(D_table)
-
-.align 32
-.global cbrtl#
-
.section .text
-.proc cbrtl#
-.align 32
-cbrtl:
-
+GLOBAL_LIBM_ENTRY(cbrtl)
{ .mfi
getf.sig r3=f8
// will continue only for normal/denormal numbers
-(p0) fclass.nm.unc p12,p7 = f8, 0x1b
+ fclass.nm.unc p12,p7 = f8, 0x1b
// r2 = pointer to C_1...C_6 followed by T_table
addl r2 = @ltoff(poly_coeffs), gp;;
}
@@ -898,5 +888,5 @@ cbrtl:
(p6) fma.s0 f8=f8,f6,f8
br.ret.sptk b0;;
}
-.endp cbrtl
-ASM_SIZE_DIRECTIVE(cbrtl)
+GLOBAL_LIBM_END(cbrtl)
+
diff --git a/sysdeps/ia64/fpu/s_ceil.S b/sysdeps/ia64/fpu/s_ceil.S
index f7e6d2cfa6..d1d2980618 100644
--- a/sysdeps/ia64/fpu/s_ceil.S
+++ b/sysdeps/ia64/fpu/s_ceil.S
@@ -1,10 +1,10 @@
.file "ceil.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,90 +20,67 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
-
-#include "libm_support.h"
-
-.align 32
-.global ceil#
-
-.section .text
-.proc ceil#
-.align 32
-
// History
//==============================================================
-// 2/02/00: Initial version
-// 6/13/00: Improved speed
-// 6/27/00: Eliminated incorrect invalid flag setting
+// 02/02/00 Initial version
+// 06/13/00 Improved speed
+// 06/27/00 Eliminated incorrect invalid flag setting
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 01/28/03 Improved performance
+//==============================================================
// API
//==============================================================
// double ceil(double x)
+//==============================================================
-// general input registers:
-
-ceil_GR_FFFF = r14
-ceil_GR_signexp = r15
-ceil_GR_exponent = r16
-ceil_GR_expmask = r17
-ceil_GR_bigexp = r18
-
-
-// predicate registers used:
+// general input registers:
+// r14 - r19
-// p6 ==> Input is NaN, infinity, zero
-// p7 ==> Input is denormal
-// p8 ==> Input is <0
-// p9 ==> Input is >=0
-// p10 ==> Input is already an integer (bigger than largest integer)
-// p11 ==> Input is not a large integer
-// p12 ==> Input is a smaller integer
-// p13 ==> Input is not an even integer, so inexact must be set
-// p14 ==> Input is between -1 and 0, so result will be -0 and inexact
+rSignexp = r14
+rExp = r15
+rExpMask = r16
+rBigexp = r17
+rM1 = r18
+rSignexpM1 = r19
+// floating-point registers:
+// f8 - f13
-// floating-point registers used:
+fXInt = f9
+fNormX = f10
+fTmp = f11
+fAdj = f12
+fPreResult = f13
-CEIL_SIGNED_ZERO = f7
-CEIL_NORM_f8 = f9
-CEIL_FFFF = f10
-CEIL_INEXACT = f11
-CEIL_FLOAT_INT_f8 = f12
-CEIL_INT_f8 = f13
-CEIL_adj = f14
-CEIL_MINUS_ONE = f15
+// predicate registers used:
+// p6 - p10
// Overview of operation
//==============================================================
-
// double ceil(double x)
-// Return an integer value (represented as a double) that is the smallest
+// Return an integer value (represented as a double) that is the smallest
// value not less than x
// This is x rounded toward +infinity to an integral value.
// Inexact is set if x != ceil(x)
-// **************************************************************************
-
-// Set denormal flag for denormal input and
-// and take denormal fault if necessary.
-
-// Is the input an integer value already?
+//==============================================================
// double_extended
// if the exponent is > 1003e => 3F(true) = 63(decimal)
@@ -124,139 +101,124 @@ CEIL_MINUS_ONE = f15
// If we multiply by 2^23, we no longer have a fractional part
// So input is an integer value already.
-// If x is NAN, ZERO, or INFINITY, then return
-
-// qnan snan inf norm unorm 0 -+
-// 1 1 1 0 0 1 11 0xe7
-
-ceil:
+.section .text
+GLOBAL_LIBM_ENTRY(ceil)
{ .mfi
- getf.exp ceil_GR_signexp = f8
- fcvt.fx.trunc.s1 CEIL_INT_f8 = f8
- addl ceil_GR_bigexp = 0x10033, r0
+ getf.exp rSignexp = f8 // Get signexp, recompute if unorm
+ fclass.m p7,p0 = f8, 0x0b // Test x unorm
+ addl rBigexp = 0x10033, r0 // Set exponent at which is integer
}
{ .mfi
- addl ceil_GR_FFFF = -1,r0
- fcmp.lt.s1 p8,p9 = f8,f0
- mov ceil_GR_expmask = 0x1FFFF ;;
+ mov rM1 = -1 // Set all ones
+ fcvt.fx.trunc.s1 fXInt = f8 // Convert to int in significand
+ mov rExpMask = 0x1FFFF // Form exponent mask
}
+;;
-// p7 ==> denorm
{ .mfi
- setf.sig CEIL_FFFF = ceil_GR_FFFF
- fclass.m p7,p0 = f8, 0x0b
- nop.i 999
+ mov rSignexpM1 = 0x2FFFF // Form signexp of -1
+ fcmp.lt.s1 p8,p9 = f8, f0 // Test x < 0
+ nop.i 0
}
-{ .mfi
- nop.m 999
- fnorm CEIL_NORM_f8 = f8
- nop.i 999 ;;
+{ .mfb
+ setf.sig fTmp = rM1 // Make const for setting inexact
+ fnorm.s1 fNormX = f8 // Normalize input
+(p7) br.cond.spnt CEIL_UNORM // Branch if x unorm
}
+;;
-// Form 0 with sign of input in case negative zero is needed
-{ .mfi
- nop.m 999
- fmerge.s CEIL_SIGNED_ZERO = f8, f0
- nop.i 999
-}
+CEIL_COMMON:
+// Return here from CEIL_UNORM
{ .mfi
- nop.m 999
- fsub.s1 CEIL_MINUS_ONE = f0, f1
- nop.i 999 ;;
-}
-
-// p6 ==> NAN, INF, ZERO
-{ .mfb
- nop.m 999
- fclass.m p6,p10 = f8, 0xe7
-(p7) br.cond.spnt L(CEIL_DENORM) ;;
+ nop.m 0
+ fclass.m p6,p0 = f8, 0x1e7 // Test x natval, nan, inf, 0
+ nop.i 0
}
+;;
-L(CEIL_COMMON):
.pred.rel "mutex",p8,p9
-// Set adjustment to add to trunc(x) for result
-// If x>0, adjustment is 1.0
-// If x<=0, adjustment is 0.0
{ .mfi
- and ceil_GR_exponent = ceil_GR_signexp, ceil_GR_expmask
-(p9) fadd.s1 CEIL_adj = f1,f0
- nop.i 999
+ nop.m 0
+(p8) fma.s1 fAdj = f0, f0, f0 // If x < 0, adjustment is 0
+ nop.i 0
}
{ .mfi
- nop.m 999
-(p8) fadd.s1 CEIL_adj = f0,f0
- nop.i 999 ;;
+ nop.m 0
+(p9) fma.s1 fAdj = f1, f1, f0 // If x > 0, adjustment is +1
+ nop.i 0
}
+;;
{ .mfi
-(p10) cmp.ge.unc p10,p11 = ceil_GR_exponent, ceil_GR_bigexp
-(p6) fnorm.d f8 = f8
- nop.i 999 ;;
+ nop.m 0
+ fcvt.xf fPreResult = fXInt // trunc(x)
+ nop.i 0
}
-
-{ .mfi
- nop.m 999
-(p11) fcvt.xf CEIL_FLOAT_INT_f8 = CEIL_INT_f8
- nop.i 999 ;;
+{ .mfb
+ nop.m 0
+(p6) fma.d.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf, 0
+(p6) br.ret.spnt b0 // Exit if x natval, nan, inf, 0
}
+;;
-{ .mfi
- nop.m 999
-(p10) fnorm.d f8 = CEIL_NORM_f8
- nop.i 999 ;;
+{ .mmi
+ and rExp = rSignexp, rExpMask // Get biased exponent
+;;
+ cmp.ge p7,p6 = rExp, rBigexp // Is |x| >= 2^52?
+(p8) cmp.lt.unc p10,p0 = rSignexp, rSignexpM1 // Is -1 < x < 0?
}
+;;
-// Is -1 < x < 0? If so, result will be -0. Special case it with p14 set.
+// If -1 < x < 0, we turn off p6 and compute result as -0
{ .mfi
- nop.m 999
-(p8) fcmp.gt.unc.s1 p14,p0 = CEIL_NORM_f8, CEIL_MINUS_ONE
- nop.i 999 ;;
+(p10) cmp.ne p6,p0 = r0,r0
+(p10) fmerge.s f8 = fNormX, f0
+ nop.i 0
}
+;;
+.pred.rel "mutex",p6,p7
{ .mfi
-(p14) cmp.ne p11,p0 = r0,r0
-(p14) fnorm.d f8 = CEIL_SIGNED_ZERO
- nop.i 999
+ nop.m 0
+(p6) fma.d.s0 f8 = fPreResult, f1, fAdj // Result if !int, |x| < 2^52
+ nop.i 0
}
{ .mfi
- nop.m 999
-(p14) fmpy.s0 CEIL_INEXACT = CEIL_FFFF,CEIL_FFFF
- nop.i 999 ;;
+ nop.m 0
+(p7) fma.d.s0 f8 = fNormX, f1, f0 // Result, if |x| >= 2^52
+(p10) cmp.eq p6,p0 = r0,r0 // If -1 < x < 0, turn on p6 again
}
+;;
{ .mfi
- nop.m 999
-(p11) fadd.d f8 = CEIL_FLOAT_INT_f8,CEIL_adj
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-(p11) fcmp.eq.unc.s1 p12,p13 = CEIL_FLOAT_INT_f8, CEIL_NORM_f8
- nop.i 999 ;;
+ nop.m 0
+(p6) fcmp.eq.unc.s1 p8, p9 = fPreResult, fNormX // Is trunc(x) = x ?
+ nop.i 0
}
+;;
-// Set inexact if result not equal to input
{ .mfi
- nop.m 999
-(p13) fmpy.s0 CEIL_INEXACT = CEIL_FFFF,CEIL_FFFF
- nop.i 999
+ nop.m 0
+(p9) fmpy.s0 fTmp = fTmp, fTmp // Dummy to set inexact
+ nop.i 0
}
-// Set result to input if integer
{ .mfb
- nop.m 999
-(p12) fnorm.d f8 = CEIL_NORM_f8
- br.ret.sptk b0 ;;
+ nop.m 0
+(p8) fma.d.s0 f8 = fNormX, f1, f0 // If x int, result normalized x
+ br.ret.sptk b0 // Exit main path, 0 < |x| < 2^52
}
+;;
+
-// Here if input denorm
-L(CEIL_DENORM):
+CEIL_UNORM:
+// Here if x unorm
{ .mfb
- getf.exp ceil_GR_signexp = CEIL_NORM_f8
- fcvt.fx.trunc.s1 CEIL_INT_f8 = CEIL_NORM_f8
- br.cond.sptk L(CEIL_COMMON) ;;
+ getf.exp rSignexp = fNormX // Get signexp, recompute if unorm
+ fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
+ br.cond.sptk CEIL_COMMON // Return to main path
}
+;;
-.endp ceil
-ASM_SIZE_DIRECTIVE(ceil)
+GLOBAL_LIBM_END(ceil)
diff --git a/sysdeps/ia64/fpu/s_ceilf.S b/sysdeps/ia64/fpu/s_ceilf.S
index d1011052e8..051534a202 100644
--- a/sysdeps/ia64/fpu/s_ceilf.S
+++ b/sysdeps/ia64/fpu/s_ceilf.S
@@ -1,10 +1,10 @@
.file "ceilf.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,90 +20,67 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
-
-#include "libm_support.h"
-
-.align 32
-.global ceilf#
-
-.section .text
-.proc ceilf#
-.align 32
-
// History
//==============================================================
-// 2/02/00: Initial version
-// 6/13/00: Improved speed
-// 6/27/00: Eliminated incorrect invalid flag setting
+// 02/02/00 Initial version
+// 06/13/00 Improved speed
+// 06/27/00 Eliminated incorrect invalid flag setting
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 01/28/03 Improved performance
+//==============================================================
// API
//==============================================================
// float ceilf(float x)
+//==============================================================
-// general input registers:
-
-ceil_GR_FFFF = r14
-ceil_GR_signexp = r15
-ceil_GR_exponent = r16
-ceil_GR_expmask = r17
-ceil_GR_bigexp = r18
-
-
-// predicate registers used:
+// general input registers:
+// r14 - r19
-// p6 ==> Input is NaN, infinity, zero
-// p7 ==> Input is denormal
-// p8 ==> Input is <0
-// p9 ==> Input is >=0
-// p10 ==> Input is already an integer (bigger than largest integer)
-// p11 ==> Input is not a large integer
-// p12 ==> Input is a smaller integer
-// p13 ==> Input is not an even integer, so inexact must be set
-// p14 ==> Input is between -1 and 0, so result will be -0 and inexact
+rSignexp = r14
+rExp = r15
+rExpMask = r16
+rBigexp = r17
+rM1 = r18
+rSignexpM1 = r19
+// floating-point registers:
+// f8 - f13
-// floating-point registers used:
+fXInt = f9
+fNormX = f10
+fTmp = f11
+fAdj = f12
+fPreResult = f13
-CEIL_SIGNED_ZERO = f7
-CEIL_NORM_f8 = f9
-CEIL_FFFF = f10
-CEIL_INEXACT = f11
-CEIL_FLOAT_INT_f8 = f12
-CEIL_INT_f8 = f13
-CEIL_adj = f14
-CEIL_MINUS_ONE = f15
+// predicate registers used:
+// p6 - p10
// Overview of operation
//==============================================================
-
// float ceilf(float x)
-// Return an integer value (represented as a float) that is the smallest
+// Return an integer value (represented as a float) that is the smallest
// value not less than x
// This is x rounded toward +infinity to an integral value.
// Inexact is set if x != ceilf(x)
-// **************************************************************************
-
-// Set denormal flag for denormal input and
-// and take denormal fault if necessary.
-
-// Is the input an integer value already?
+//==============================================================
// double_extended
// if the exponent is > 1003e => 3F(true) = 63(decimal)
@@ -124,139 +101,124 @@ CEIL_MINUS_ONE = f15
// If we multiply by 2^23, we no longer have a fractional part
// So input is an integer value already.
-// If x is NAN, ZERO, or INFINITY, then return
-
-// qnan snan inf norm unorm 0 -+
-// 1 1 1 0 0 1 11 0xe7
-
-ceilf:
+.section .text
+GLOBAL_LIBM_ENTRY(ceilf)
{ .mfi
- getf.exp ceil_GR_signexp = f8
- fcvt.fx.trunc.s1 CEIL_INT_f8 = f8
- addl ceil_GR_bigexp = 0x10016, r0
+ getf.exp rSignexp = f8 // Get signexp, recompute if unorm
+ fclass.m p7,p0 = f8, 0x0b // Test x unorm
+ addl rBigexp = 0x10016, r0 // Set exponent at which is integer
}
{ .mfi
- addl ceil_GR_FFFF = -1,r0
- fcmp.lt.s1 p8,p9 = f8,f0
- mov ceil_GR_expmask = 0x1FFFF ;;
+ mov rM1 = -1 // Set all ones
+ fcvt.fx.trunc.s1 fXInt = f8 // Convert to int in significand
+ mov rExpMask = 0x1FFFF // Form exponent mask
}
+;;
-// p7 ==> denorm
{ .mfi
- setf.sig CEIL_FFFF = ceil_GR_FFFF
- fclass.m p7,p0 = f8, 0x0b
- nop.i 999
+ mov rSignexpM1 = 0x2FFFF // Form signexp of -1
+ fcmp.lt.s1 p8,p9 = f8, f0 // Test x < 0
+ nop.i 0
}
-{ .mfi
- nop.m 999
- fnorm CEIL_NORM_f8 = f8
- nop.i 999 ;;
+{ .mfb
+ setf.sig fTmp = rM1 // Make const for setting inexact
+ fnorm.s1 fNormX = f8 // Normalize input
+(p7) br.cond.spnt CEIL_UNORM // Branch if x unorm
}
+;;
-// Form 0 with sign of input in case negative zero is needed
-{ .mfi
- nop.m 999
- fmerge.s CEIL_SIGNED_ZERO = f8, f0
- nop.i 999
-}
+CEIL_COMMON:
+// Return here from CEIL_UNORM
{ .mfi
- nop.m 999
- fsub.s1 CEIL_MINUS_ONE = f0, f1
- nop.i 999 ;;
-}
-
-// p6 ==> NAN, INF, ZERO
-{ .mfb
- nop.m 999
- fclass.m p6,p10 = f8, 0xe7
-(p7) br.cond.spnt L(CEIL_DENORM) ;;
+ nop.m 0
+ fclass.m p6,p0 = f8, 0x1e7 // Test x natval, nan, inf, 0
+ nop.i 0
}
+;;
-L(CEIL_COMMON):
.pred.rel "mutex",p8,p9
-// Set adjustment to add to trunc(x) for result
-// If x>0, adjustment is 1.0
-// If x<=0, adjustment is 0.0
{ .mfi
- and ceil_GR_exponent = ceil_GR_signexp, ceil_GR_expmask
-(p9) fadd.s1 CEIL_adj = f1,f0
- nop.i 999
+ nop.m 0
+(p8) fma.s1 fAdj = f0, f0, f0 // If x < 0, adjustment is 0
+ nop.i 0
}
{ .mfi
- nop.m 999
-(p8) fadd.s1 CEIL_adj = f0,f0
- nop.i 999 ;;
+ nop.m 0
+(p9) fma.s1 fAdj = f1, f1, f0 // If x > 0, adjustment is +1
+ nop.i 0
}
+;;
{ .mfi
-(p10) cmp.ge.unc p10,p11 = ceil_GR_exponent, ceil_GR_bigexp
-(p6) fnorm.s f8 = f8
- nop.i 999 ;;
+ nop.m 0
+ fcvt.xf fPreResult = fXInt // trunc(x)
+ nop.i 0
}
-
-{ .mfi
- nop.m 999
-(p11) fcvt.xf CEIL_FLOAT_INT_f8 = CEIL_INT_f8
- nop.i 999 ;;
+{ .mfb
+ nop.m 0
+(p6) fma.s.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf, 0
+(p6) br.ret.spnt b0 // Exit if x natval, nan, inf, 0
}
+;;
-{ .mfi
- nop.m 999
-(p10) fnorm.s f8 = CEIL_NORM_f8
- nop.i 999 ;;
+{ .mmi
+ and rExp = rSignexp, rExpMask // Get biased exponent
+;;
+ cmp.ge p7,p6 = rExp, rBigexp // Is |x| >= 2^23?
+(p8) cmp.lt.unc p10,p0 = rSignexp, rSignexpM1 // Is -1 < x < 0?
}
+;;
-// Is -1 < x < 0? If so, result will be -0. Special case it with p14 set.
+// If -1 < x < 0, we turn off p6 and compute result as -0
{ .mfi
- nop.m 999
-(p8) fcmp.gt.unc.s1 p14,p0 = CEIL_NORM_f8, CEIL_MINUS_ONE
- nop.i 999 ;;
+(p10) cmp.ne p6,p0 = r0,r0
+(p10) fmerge.s f8 = fNormX, f0
+ nop.i 0
}
+;;
+.pred.rel "mutex",p6,p7
{ .mfi
-(p14) cmp.ne p11,p0 = r0,r0
-(p14) fnorm.s f8 = CEIL_SIGNED_ZERO
- nop.i 999
+ nop.m 0
+(p6) fma.s.s0 f8 = fPreResult, f1, fAdj // Result if !int, |x| < 2^23
+ nop.i 0
}
{ .mfi
- nop.m 999
-(p14) fmpy.s0 CEIL_INEXACT = CEIL_FFFF,CEIL_FFFF
- nop.i 999 ;;
+ nop.m 0
+(p7) fma.s.s0 f8 = fNormX, f1, f0 // Result, if |x| >= 2^23
+(p10) cmp.eq p6,p0 = r0,r0 // If -1 < x < 0, turn on p6 again
}
+;;
{ .mfi
- nop.m 999
-(p11) fadd.s f8 = CEIL_FLOAT_INT_f8,CEIL_adj
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-(p11) fcmp.eq.unc.s1 p12,p13 = CEIL_FLOAT_INT_f8, CEIL_NORM_f8
- nop.i 999 ;;
+ nop.m 0
+(p6) fcmp.eq.unc.s1 p8, p9 = fPreResult, fNormX // Is trunc(x) = x ?
+ nop.i 0
}
+;;
-// Set inexact if result not equal to input
{ .mfi
- nop.m 999
-(p13) fmpy.s0 CEIL_INEXACT = CEIL_FFFF,CEIL_FFFF
- nop.i 999
+ nop.m 0
+(p9) fmpy.s0 fTmp = fTmp, fTmp // Dummy to set inexact
+ nop.i 0
}
-// Set result to input if integer
{ .mfb
- nop.m 999
-(p12) fnorm.s f8 = CEIL_NORM_f8
- br.ret.sptk b0 ;;
+ nop.m 0
+(p8) fma.s.s0 f8 = fNormX, f1, f0 // If x int, result normalized x
+ br.ret.sptk b0 // Exit main path, 0 < |x| < 2^23
}
+;;
+
-// Here if input denorm
-L(CEIL_DENORM):
+CEIL_UNORM:
+// Here if x unorm
{ .mfb
- getf.exp ceil_GR_signexp = CEIL_NORM_f8
- fcvt.fx.trunc.s1 CEIL_INT_f8 = CEIL_NORM_f8
- br.cond.sptk L(CEIL_COMMON) ;;
+ getf.exp rSignexp = fNormX // Get signexp, recompute if unorm
+ fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
+ br.cond.sptk CEIL_COMMON // Return to main path
}
+;;
-.endp ceilf
-ASM_SIZE_DIRECTIVE(ceilf)
+GLOBAL_LIBM_END(ceilf)
diff --git a/sysdeps/ia64/fpu/s_ceill.S b/sysdeps/ia64/fpu/s_ceill.S
index d3d8719584..71cb01d3fa 100644
--- a/sysdeps/ia64/fpu/s_ceill.S
+++ b/sysdeps/ia64/fpu/s_ceill.S
@@ -1,10 +1,10 @@
.file "ceill.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,90 +20,67 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
-
-#include "libm_support.h"
-
-.align 32
-.global ceill#
-
-.section .text
-.proc ceill#
-.align 32
-
// History
//==============================================================
-// 2/02/00: Initial version
-// 6/13/00: Improved speed
-// 6/27/00: Eliminated incorrect invalid flag setting
+// 02/02/00 Initial version
+// 06/13/00 Improved speed
+// 06/27/00 Eliminated incorrect invalid flag setting
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 01/28/03 Improved performance
+//==============================================================
// API
//==============================================================
-// double ceill(double x)
-
-// general input registers:
-
-ceil_GR_FFFF = r14
-ceil_GR_signexp = r15
-ceil_GR_exponent = r16
-ceil_GR_expmask = r17
-ceil_GR_bigexp = r18
-
+// long double ceill(long double x)
+//==============================================================
-// predicate registers used:
+// general input registers:
+// r14 - r19
-// p6 ==> Input is NaN, infinity, zero
-// p7 ==> Input is denormal
-// p8 ==> Input is <0
-// p9 ==> Input is >=0
-// p10 ==> Input is already an integer (bigger than largest integer)
-// p11 ==> Input is not a large integer
-// p12 ==> Input is a smaller integer
-// p13 ==> Input is not an even integer, so inexact must be set
-// p14 ==> Input is between -1 and 0, so result will be -0 and inexact
+rSignexp = r14
+rExp = r15
+rExpMask = r16
+rBigexp = r17
+rM1 = r18
+rSignexpM1 = r19
+// floating-point registers:
+// f8 - f13
-// floating-point registers used:
+fXInt = f9
+fNormX = f10
+fTmp = f11
+fAdj = f12
+fPreResult = f13
-CEIL_SIGNED_ZERO = f7
-CEIL_NORM_f8 = f9
-CEIL_FFFF = f10
-CEIL_INEXACT = f11
-CEIL_FLOAT_INT_f8 = f12
-CEIL_INT_f8 = f13
-CEIL_adj = f14
-CEIL_MINUS_ONE = f15
+// predicate registers used:
+// p6 - p10
// Overview of operation
//==============================================================
-
// long double ceill(long double x)
-// Return an integer value (represented as a long double) that is the smallest
+// Return an integer value (represented as a long double) that is the smallest
// value not less than x
// This is x rounded toward +infinity to an integral value.
// Inexact is set if x != ceill(x)
-// **************************************************************************
-
-// Set denormal flag for denormal input and
-// and take denormal fault if necessary.
-
-// Is the input an integer value already?
+//==============================================================
// double_extended
// if the exponent is > 1003e => 3F(true) = 63(decimal)
@@ -124,139 +101,124 @@ CEIL_MINUS_ONE = f15
// If we multiply by 2^23, we no longer have a fractional part
// So input is an integer value already.
-// If x is NAN, ZERO, or INFINITY, then return
-
-// qnan snan inf norm unorm 0 -+
-// 1 1 1 0 0 1 11 0xe7
-
-ceill:
+.section .text
+GLOBAL_LIBM_ENTRY(ceill)
{ .mfi
- getf.exp ceil_GR_signexp = f8
- fcvt.fx.trunc.s1 CEIL_INT_f8 = f8
- addl ceil_GR_bigexp = 0x1003e, r0
+ getf.exp rSignexp = f8 // Get signexp, recompute if unorm
+ fclass.m p7,p0 = f8, 0x0b // Test x unorm
+ addl rBigexp = 0x1003e, r0 // Set exponent at which is integer
}
{ .mfi
- addl ceil_GR_FFFF = -1,r0
- fcmp.lt.s1 p8,p9 = f8,f0
- mov ceil_GR_expmask = 0x1FFFF ;;
+ mov rM1 = -1 // Set all ones
+ fcvt.fx.trunc.s1 fXInt = f8 // Convert to int in significand
+ mov rExpMask = 0x1FFFF // Form exponent mask
}
+;;
-// p7 ==> denorm
{ .mfi
- setf.sig CEIL_FFFF = ceil_GR_FFFF
- fclass.m p7,p0 = f8, 0x0b
- nop.i 999
+ mov rSignexpM1 = 0x2FFFF // Form signexp of -1
+ fcmp.lt.s1 p8,p9 = f8, f0 // Test x < 0
+ nop.i 0
}
-{ .mfi
- nop.m 999
- fnorm CEIL_NORM_f8 = f8
- nop.i 999 ;;
+{ .mfb
+ setf.sig fTmp = rM1 // Make const for setting inexact
+ fnorm.s1 fNormX = f8 // Normalize input
+(p7) br.cond.spnt CEIL_UNORM // Branch if x unorm
}
+;;
-// Form 0 with sign of input in case negative zero is needed
-{ .mfi
- nop.m 999
- fmerge.s CEIL_SIGNED_ZERO = f8, f0
- nop.i 999
-}
+CEIL_COMMON:
+// Return here from CEIL_UNORM
{ .mfi
- nop.m 999
- fsub.s1 CEIL_MINUS_ONE = f0, f1
- nop.i 999 ;;
-}
-
-// p6 ==> NAN, INF, ZERO
-{ .mfb
- nop.m 999
- fclass.m p6,p10 = f8, 0xe7
-(p7) br.cond.spnt L(CEIL_DENORM) ;;
+ nop.m 0
+ fclass.m p6,p0 = f8, 0x1e7 // Test x natval, nan, inf, 0
+ nop.i 0
}
+;;
-L(CEIL_COMMON):
.pred.rel "mutex",p8,p9
-// Set adjustment to add to trunc(x) for result
-// If x>0, adjustment is 1.0
-// If x<=0, adjustment is 0.0
{ .mfi
- and ceil_GR_exponent = ceil_GR_signexp, ceil_GR_expmask
-(p9) fadd.s1 CEIL_adj = f1,f0
- nop.i 999
+ nop.m 0
+(p8) fma.s1 fAdj = f0, f0, f0 // If x < 0, adjustment is 0
+ nop.i 0
}
{ .mfi
- nop.m 999
-(p8) fadd.s1 CEIL_adj = f0,f0
- nop.i 999 ;;
+ nop.m 0
+(p9) fma.s1 fAdj = f1, f1, f0 // If x > 0, adjustment is +1
+ nop.i 0
}
+;;
{ .mfi
-(p10) cmp.ge.unc p10,p11 = ceil_GR_exponent, ceil_GR_bigexp
-(p6) fnorm f8 = f8
- nop.i 999 ;;
+ nop.m 0
+ fcvt.xf fPreResult = fXInt // trunc(x)
+ nop.i 0
}
-
-{ .mfi
- nop.m 999
-(p11) fcvt.xf CEIL_FLOAT_INT_f8 = CEIL_INT_f8
- nop.i 999 ;;
+{ .mfb
+ nop.m 0
+(p6) fma.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf, 0
+(p6) br.ret.spnt b0 // Exit if x natval, nan, inf, 0
}
+;;
-{ .mfi
- nop.m 999
-(p10) fnorm f8 = CEIL_NORM_f8
- nop.i 999 ;;
+{ .mmi
+ and rExp = rSignexp, rExpMask // Get biased exponent
+;;
+ cmp.ge p7,p6 = rExp, rBigexp // Is |x| >= 2^63?
+(p8) cmp.lt.unc p10,p0 = rSignexp, rSignexpM1 // Is -1 < x < 0?
}
+;;
-// Is -1 < x < 0? If so, result will be -0. Special case it with p14 set.
+// If -1 < x < 0, we turn off p6 and compute result as -0
{ .mfi
- nop.m 999
-(p8) fcmp.gt.unc.s1 p14,p0 = CEIL_NORM_f8, CEIL_MINUS_ONE
- nop.i 999 ;;
+(p10) cmp.ne p6,p0 = r0,r0
+(p10) fmerge.s f8 = fNormX, f0
+ nop.i 0
}
+;;
+.pred.rel "mutex",p6,p7
{ .mfi
-(p14) cmp.ne p11,p0 = r0,r0
-(p14) fnorm f8 = CEIL_SIGNED_ZERO
- nop.i 999
+ nop.m 0
+(p6) fma.s0 f8 = fPreResult, f1, fAdj // Result if !int, |x| < 2^63
+ nop.i 0
}
{ .mfi
- nop.m 999
-(p14) fmpy.s0 CEIL_INEXACT = CEIL_FFFF,CEIL_FFFF
- nop.i 999 ;;
+ nop.m 0
+(p7) fma.s0 f8 = fNormX, f1, f0 // Result, if |x| >= 2^63
+(p10) cmp.eq p6,p0 = r0,r0 // If -1 < x < 0, turn on p6 again
}
+;;
{ .mfi
- nop.m 999
-(p11) fadd f8 = CEIL_FLOAT_INT_f8,CEIL_adj
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-(p11) fcmp.eq.unc.s1 p12,p13 = CEIL_FLOAT_INT_f8, CEIL_NORM_f8
- nop.i 999 ;;
+ nop.m 0
+(p6) fcmp.eq.unc.s1 p8, p9 = fPreResult, fNormX // Is trunc(x) = x ?
+ nop.i 0
}
+;;
-// Set inexact if result not equal to input
{ .mfi
- nop.m 999
-(p13) fmpy.s0 CEIL_INEXACT = CEIL_FFFF,CEIL_FFFF
- nop.i 999
+ nop.m 0
+(p9) fmpy.s0 fTmp = fTmp, fTmp // Dummy to set inexact
+ nop.i 0
}
-// Set result to input if integer
{ .mfb
- nop.m 999
-(p12) fnorm f8 = CEIL_NORM_f8
- br.ret.sptk b0 ;;
+ nop.m 0
+(p8) fma.s0 f8 = fNormX, f1, f0 // If x int, result normalized x
+ br.ret.sptk b0 // Exit main path, 0 < |x| < 2^63
}
+;;
+
-// Here if input denorm
-L(CEIL_DENORM):
+CEIL_UNORM:
+// Here if x unorm
{ .mfb
- getf.exp ceil_GR_signexp = CEIL_NORM_f8
- fcvt.fx.trunc.s1 CEIL_INT_f8 = CEIL_NORM_f8
- br.cond.sptk L(CEIL_COMMON) ;;
+ getf.exp rSignexp = fNormX // Get signexp, recompute if unorm
+ fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
+ br.cond.sptk CEIL_COMMON // Return to main path
}
+;;
-.endp ceill
-ASM_SIZE_DIRECTIVE(ceill)
+GLOBAL_LIBM_END(ceill)
diff --git a/sysdeps/ia64/fpu/s_copysign.S b/sysdeps/ia64/fpu/s_copysign.S
index e0d08cb721..0903565ff3 100644
--- a/sysdeps/ia64/fpu/s_copysign.S
+++ b/sysdeps/ia64/fpu/s_copysign.S
@@ -23,12 +23,16 @@ ENTRY (__copysign)
{
fmerge.s fret0 = farg1, farg0
br.ret.sptk.many rp
-}
+}
END (__copysign)
strong_alias (__copysign, __copysignf)
strong_alias (__copysign, __copysignl)
+strong_alias (__copysign, __libm_copysign)
+strong_alias (__copysign, __libm_copysignf)
+strong_alias (__copysign, __libm_copysignl)
+
weak_alias (__copysign, copysign)
weak_alias (__copysignf, copysignf)
weak_alias (__copysignl, copysignl)
diff --git a/sysdeps/ia64/fpu/s_cos.S b/sysdeps/ia64/fpu/s_cos.S
index 6540aec724..84c177abab 100644
--- a/sysdeps/ia64/fpu/s_cos.S
+++ b/sysdeps/ia64/fpu/s_cos.S
@@ -1,10 +1,10 @@
.file "sincos.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,17 +35,22 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00 Initial revision
-// 4/02/00 Unwind support added.
-// 6/16/00 Updated tables to enforce symmetry
-// 8/31/00 Saved 2 cycles in main path, and 9 in other paths.
-// 9/20/00 The updated tables regressed to an old version, so reinstated them
+// 02/02/00 Initial version
+// 04/02/00 Unwind support added.
+// 06/16/00 Updated tables to enforce symmetry
+// 08/31/00 Saved 2 cycles in main path, and 9 in other paths.
+// 09/20/00 The updated tables regressed to an old version, so reinstated them
// 10/18/00 Changed one table entry to ensure symmetry
-// 1/03/01 Improved speed, fixed flag settings for small arguments.
+// 01/03/01 Improved speed, fixed flag settings for small arguments.
+// 02/18/02 Large arguments processing routine excluded
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 06/03/02 Insure inexact flag set for large arg result
+// 09/05/02 Work range is widened by reduction strengthen (3 parts of Pi/16)
+// 02/10/03 Reordered header: .section, .global, .proc, .align
// API
//==============================================================
@@ -63,9 +68,13 @@
// nfloat = Round result to integer (round-to-nearest)
//
// r = x - nfloat * pi/2^k
-// Do this as (x - nfloat * HIGH(pi/2^k)) - nfloat * LOW(pi/2^k) for increased accuracy.
+// Do this as ((((x - nfloat * HIGH(pi/2^k))) -
+// nfloat * LOW(pi/2^k)) -
+// nfloat * LOWEST(pi/2^k) for increased accuracy.
// pi/2^k is stored as two numbers that when added make pi/2^k.
// pi/2^k = HIGH(pi/2^k) + LOW(pi/2^k)
+// HIGH and LOW parts are rounded to zero values,
+// and LOWEST is rounded to nearest one.
//
// x = (nfloat * pi/2^k) + r
// r is small enough that we can use a polynomial approximation
@@ -121,7 +130,7 @@
//
// as follows
//
-// Sm = Sin(Mpi/2^k) and Cm = Cos(Mpi/2^k)
+// S[m] = Sin(Mpi/2^k) and C[m] = Cos(Mpi/2^k)
// rsq = r*r
//
//
@@ -141,23 +150,22 @@
//
// P = r + rcub * P
//
-// Answer = Sm Cos(r) + Cm P
+// Answer = S[m] Cos(r) + [Cm] P
//
// Cos(r) = 1 + rsq Q
// Cos(r) = 1 + r^2 Q
// Cos(r) = 1 + r^2 (q1 + r^2q2 + r^4q3 + r^6q4)
// Cos(r) = 1 + r^2q1 + r^4q2 + r^6q3 + r^8q4 + ...
//
-// Sm Cos(r) = Sm(1 + rsq Q)
-// Sm Cos(r) = Sm + Sm rsq Q
-// Sm Cos(r) = Sm + s_rsq Q
-// Q = Sm + s_rsq Q
+// S[m] Cos(r) = S[m](1 + rsq Q)
+// S[m] Cos(r) = S[m] + Sm rsq Q
+// S[m] Cos(r) = S[m] + s_rsq Q
+// Q = S[m] + s_rsq Q
//
// Then,
//
-// Answer = Q + Cm P
+// Answer = Q + C[m] P
-#include "libm_support.h"
// Registers used
//==============================================================
@@ -174,99 +182,97 @@
// Assembly macros
//==============================================================
-sind_NORM_f8 = f9
-sind_W = f10
-sind_int_Nfloat = f11
-sind_Nfloat = f12
+sincos_NORM_f8 = f9
+sincos_W = f10
+sincos_int_Nfloat = f11
+sincos_Nfloat = f12
-sind_r = f13
-sind_rsq = f14
-sind_rcub = f15
+sincos_r = f13
+sincos_rsq = f14
+sincos_rcub = f15
+sincos_save_tmp = f15
-sind_Inv_Pi_by_16 = f32
-sind_Pi_by_16_hi = f33
-sind_Pi_by_16_lo = f34
+sincos_Inv_Pi_by_16 = f32
+sincos_Pi_by_16_1 = f33
+sincos_Pi_by_16_2 = f34
-sind_Inv_Pi_by_64 = f35
-sind_Pi_by_64_hi = f36
-sind_Pi_by_64_lo = f37
+sincos_Inv_Pi_by_64 = f35
-sind_Sm = f38
-sind_Cm = f39
+sincos_Pi_by_16_3 = f36
-sind_P1 = f40
-sind_Q1 = f41
-sind_P2 = f42
-sind_Q2 = f43
-sind_P3 = f44
-sind_Q3 = f45
-sind_P4 = f46
-sind_Q4 = f47
+sincos_r_exact = f37
-sind_P_temp1 = f48
-sind_P_temp2 = f49
+sincos_Sm = f38
+sincos_Cm = f39
-sind_Q_temp1 = f50
-sind_Q_temp2 = f51
+sincos_P1 = f40
+sincos_Q1 = f41
+sincos_P2 = f42
+sincos_Q2 = f43
+sincos_P3 = f44
+sincos_Q3 = f45
+sincos_P4 = f46
+sincos_Q4 = f47
-sind_P = f52
-sind_Q = f53
+sincos_P_temp1 = f48
+sincos_P_temp2 = f49
-sind_srsq = f54
+sincos_Q_temp1 = f50
+sincos_Q_temp2 = f51
-sind_SIG_INV_PI_BY_16_2TO61 = f55
-sind_RSHF_2TO61 = f56
-sind_RSHF = f57
-sind_2TOM61 = f58
-sind_NFLOAT = f59
-sind_W_2TO61_RSH = f60
+sincos_P = f52
+sincos_Q = f53
-fp_tmp = f61
+sincos_srsq = f54
+
+sincos_SIG_INV_PI_BY_16_2TO61 = f55
+sincos_RSHF_2TO61 = f56
+sincos_RSHF = f57
+sincos_2TOM61 = f58
+sincos_NFLOAT = f59
+sincos_W_2TO61_RSH = f60
+
+fp_tmp = f61
/////////////////////////////////////////////////////////////
-sind_AD_1 = r33
-sind_AD_2 = r34
-sind_exp_limit = r35
-sind_r_signexp = r36
-sind_AD_beta_table = r37
-sind_r_sincos = r38
+sincos_AD_1 = r33
+sincos_AD_2 = r34
+sincos_exp_limit = r35
+sincos_r_signexp = r36
+sincos_AD_beta_table = r37
+sincos_r_sincos = r38
-sind_r_exp = r39
-sind_r_17_ones = r40
+sincos_r_exp = r39
+sincos_r_17_ones = r40
-sind_GR_sig_inv_pi_by_16 = r14
-sind_GR_rshf_2to61 = r15
-sind_GR_rshf = r16
-sind_GR_exp_2tom61 = r17
-sind_GR_n = r18
-sind_GR_m = r19
-sind_GR_32m = r19
+sincos_GR_sig_inv_pi_by_16 = r14
+sincos_GR_rshf_2to61 = r15
+sincos_GR_rshf = r16
+sincos_GR_exp_2tom61 = r17
+sincos_GR_n = r18
+sincos_GR_m = r19
+sincos_GR_32m = r19
+sincos_GR_all_ones = r19
-gr_tmp = r41
-GR_SAVE_PFS = r41
-GR_SAVE_B0 = r42
-GR_SAVE_GP = r43
+gr_tmp = r41
+GR_SAVE_PFS = r41
+GR_SAVE_B0 = r42
+GR_SAVE_GP = r43
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
+RODATA
+// Pi/16 parts
.align 16
-double_sind_pi:
-ASM_TYPE_DIRECTIVE(double_sind_pi,@object)
-// data8 0xA2F9836E4E44152A, 0x00004001 // 16/pi (significand loaded w/ setf)
-// c90fdaa22168c234
- data8 0xC90FDAA22168C234, 0x00003FFC // pi/16 hi
-// c4c6628b80dc1cd1 29024e088a
- data8 0xC4C6628B80DC1CD1, 0x00003FBC // pi/16 lo
-ASM_SIZE_DIRECTIVE(double_sind_pi)
-
-double_sind_pq_k4:
-ASM_TYPE_DIRECTIVE(double_sind_pq_k4,@object)
+LOCAL_OBJECT_START(double_sincos_pi)
+ data8 0xC90FDAA22168C234, 0x00003FFC // pi/16 1st part
+ data8 0xC4C6628B80DC1CD1, 0x00003FBC // pi/16 2nd part
+ data8 0xA4093822299F31D0, 0x00003F7A // pi/16 3rd part
+LOCAL_OBJECT_END(double_sincos_pi)
+
+// Coefficients for polynomials
+LOCAL_OBJECT_START(double_sincos_pq_k4)
data8 0x3EC71C963717C63A // P4
data8 0x3EF9FFBA8F191AE6 // Q4
data8 0xBF2A01A00F4E11A8 // P3
@@ -275,125 +281,119 @@ ASM_TYPE_DIRECTIVE(double_sind_pq_k4,@object)
data8 0x3FA555555554DD45 // Q2
data8 0xBFC5555555555555 // P1
data8 0xBFDFFFFFFFFFFFFC // Q1
-ASM_SIZE_DIRECTIVE(double_sind_pq_k4)
+LOCAL_OBJECT_END(double_sincos_pq_k4)
+// Sincos table (S[m], C[m])
+LOCAL_OBJECT_START(double_sin_cos_beta_k4)
-double_sin_cos_beta_k4:
-ASM_TYPE_DIRECTIVE(double_sin_cos_beta_k4,@object)
data8 0x0000000000000000 , 0x00000000 // sin( 0 pi/16) S0
data8 0x8000000000000000 , 0x00003fff // cos( 0 pi/16) C0
-
+//
data8 0xc7c5c1e34d3055b3 , 0x00003ffc // sin( 1 pi/16) S1
data8 0xfb14be7fbae58157 , 0x00003ffe // cos( 1 pi/16) C1
-
+//
data8 0xc3ef1535754b168e , 0x00003ffd // sin( 2 pi/16) S2
data8 0xec835e79946a3146 , 0x00003ffe // cos( 2 pi/16) C2
-
+//
data8 0x8e39d9cd73464364 , 0x00003ffe // sin( 3 pi/16) S3
data8 0xd4db3148750d181a , 0x00003ffe // cos( 3 pi/16) C3
-
+//
data8 0xb504f333f9de6484 , 0x00003ffe // sin( 4 pi/16) S4
data8 0xb504f333f9de6484 , 0x00003ffe // cos( 4 pi/16) C4
-
-
+//
+//
data8 0xd4db3148750d181a , 0x00003ffe // sin( 5 pi/16) C3
data8 0x8e39d9cd73464364 , 0x00003ffe // cos( 5 pi/16) S3
-
+//
data8 0xec835e79946a3146 , 0x00003ffe // sin( 6 pi/16) C2
data8 0xc3ef1535754b168e , 0x00003ffd // cos( 6 pi/16) S2
-
+//
data8 0xfb14be7fbae58157 , 0x00003ffe // sin( 7 pi/16) C1
data8 0xc7c5c1e34d3055b3 , 0x00003ffc // cos( 7 pi/16) S1
-
+//
data8 0x8000000000000000 , 0x00003fff // sin( 8 pi/16) C0
data8 0x0000000000000000 , 0x00000000 // cos( 8 pi/16) S0
-
-
+//
+//
data8 0xfb14be7fbae58157 , 0x00003ffe // sin( 9 pi/16) C1
data8 0xc7c5c1e34d3055b3 , 0x0000bffc // cos( 9 pi/16) -S1
-
+//
data8 0xec835e79946a3146 , 0x00003ffe // sin(10 pi/16) C2
data8 0xc3ef1535754b168e , 0x0000bffd // cos(10 pi/16) -S2
-
+//
data8 0xd4db3148750d181a , 0x00003ffe // sin(11 pi/16) C3
data8 0x8e39d9cd73464364 , 0x0000bffe // cos(11 pi/16) -S3
-
+//
data8 0xb504f333f9de6484 , 0x00003ffe // sin(12 pi/16) S4
data8 0xb504f333f9de6484 , 0x0000bffe // cos(12 pi/16) -S4
-
-
+//
+//
data8 0x8e39d9cd73464364 , 0x00003ffe // sin(13 pi/16) S3
data8 0xd4db3148750d181a , 0x0000bffe // cos(13 pi/16) -C3
-
+//
data8 0xc3ef1535754b168e , 0x00003ffd // sin(14 pi/16) S2
data8 0xec835e79946a3146 , 0x0000bffe // cos(14 pi/16) -C2
-
+//
data8 0xc7c5c1e34d3055b3 , 0x00003ffc // sin(15 pi/16) S1
data8 0xfb14be7fbae58157 , 0x0000bffe // cos(15 pi/16) -C1
-
+//
data8 0x0000000000000000 , 0x00000000 // sin(16 pi/16) S0
data8 0x8000000000000000 , 0x0000bfff // cos(16 pi/16) -C0
-
-
+//
+//
data8 0xc7c5c1e34d3055b3 , 0x0000bffc // sin(17 pi/16) -S1
data8 0xfb14be7fbae58157 , 0x0000bffe // cos(17 pi/16) -C1
-
+//
data8 0xc3ef1535754b168e , 0x0000bffd // sin(18 pi/16) -S2
data8 0xec835e79946a3146 , 0x0000bffe // cos(18 pi/16) -C2
-
+//
data8 0x8e39d9cd73464364 , 0x0000bffe // sin(19 pi/16) -S3
data8 0xd4db3148750d181a , 0x0000bffe // cos(19 pi/16) -C3
-
+//
data8 0xb504f333f9de6484 , 0x0000bffe // sin(20 pi/16) -S4
data8 0xb504f333f9de6484 , 0x0000bffe // cos(20 pi/16) -S4
-
-
+//
+//
data8 0xd4db3148750d181a , 0x0000bffe // sin(21 pi/16) -C3
data8 0x8e39d9cd73464364 , 0x0000bffe // cos(21 pi/16) -S3
-
+//
data8 0xec835e79946a3146 , 0x0000bffe // sin(22 pi/16) -C2
data8 0xc3ef1535754b168e , 0x0000bffd // cos(22 pi/16) -S2
-
+//
data8 0xfb14be7fbae58157 , 0x0000bffe // sin(23 pi/16) -C1
data8 0xc7c5c1e34d3055b3 , 0x0000bffc // cos(23 pi/16) -S1
-
+//
data8 0x8000000000000000 , 0x0000bfff // sin(24 pi/16) -C0
data8 0x0000000000000000 , 0x00000000 // cos(24 pi/16) S0
-
-
+//
+//
data8 0xfb14be7fbae58157 , 0x0000bffe // sin(25 pi/16) -C1
data8 0xc7c5c1e34d3055b3 , 0x00003ffc // cos(25 pi/16) S1
-
+//
data8 0xec835e79946a3146 , 0x0000bffe // sin(26 pi/16) -C2
data8 0xc3ef1535754b168e , 0x00003ffd // cos(26 pi/16) S2
-
+//
data8 0xd4db3148750d181a , 0x0000bffe // sin(27 pi/16) -C3
data8 0x8e39d9cd73464364 , 0x00003ffe // cos(27 pi/16) S3
-
+//
data8 0xb504f333f9de6484 , 0x0000bffe // sin(28 pi/16) -S4
data8 0xb504f333f9de6484 , 0x00003ffe // cos(28 pi/16) S4
-
-
+//
+//
data8 0x8e39d9cd73464364 , 0x0000bffe // sin(29 pi/16) -S3
data8 0xd4db3148750d181a , 0x00003ffe // cos(29 pi/16) C3
-
+//
data8 0xc3ef1535754b168e , 0x0000bffd // sin(30 pi/16) -S2
data8 0xec835e79946a3146 , 0x00003ffe // cos(30 pi/16) C2
-
+//
data8 0xc7c5c1e34d3055b3 , 0x0000bffc // sin(31 pi/16) -S1
data8 0xfb14be7fbae58157 , 0x00003ffe // cos(31 pi/16) C1
-
+//
data8 0x0000000000000000 , 0x00000000 // sin(32 pi/16) S0
data8 0x8000000000000000 , 0x00003fff // cos(32 pi/16) C0
-ASM_SIZE_DIRECTIVE(double_sin_cos_beta_k4)
+LOCAL_OBJECT_END(double_sin_cos_beta_k4)
-.align 32
-.global sin#
-.global cos#
-#ifdef _LIBC
-.global __sin#
-.global __cos#
-#endif
+.section .text
////////////////////////////////////////////////////////
// There are two entry points: sin and cos
@@ -402,85 +402,63 @@ ASM_SIZE_DIRECTIVE(double_sin_cos_beta_k4)
// If from sin, p8 is true
// If from cos, p9 is true
-.section .text
-.proc sin#
-#ifdef _LIBC
-.proc __sin#
-#endif
-.align 32
-
-sin:
-#ifdef _LIBC
-__sin:
-#endif
+GLOBAL_IEEE754_ENTRY(sin)
{ .mlx
- alloc r32=ar.pfs,1,13,0,0
- movl sind_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A // significand of 16/pi
+ alloc r32 = ar.pfs, 1, 13, 0, 0
+ movl sincos_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A // signd of 16/pi
}
{ .mlx
- addl sind_AD_1 = @ltoff(double_sind_pi), gp
- movl sind_GR_rshf_2to61 = 0x47b8000000000000 // 1.1000 2^(63+63-2)
+ addl sincos_AD_1 = @ltoff(double_sincos_pi), gp
+ movl sincos_GR_rshf_2to61 = 0x47b8000000000000 // 1.1 2^(63+63-2)
}
;;
{ .mfi
- ld8 sind_AD_1 = [sind_AD_1]
- fnorm sind_NORM_f8 = f8
- cmp.eq p8,p9 = r0, r0
+ ld8 sincos_AD_1 = [sincos_AD_1]
+ fnorm.s0 sincos_NORM_f8 = f8 // Normalize argument
+ cmp.eq p8,p9 = r0, r0 // set p8 (clear p9) for sin
}
{ .mib
- mov sind_GR_exp_2tom61 = 0xffff-61 // exponent of scaling factor 2^-61
- mov sind_r_sincos = 0x0
- br.cond.sptk L(SIND_SINCOS)
+ mov sincos_GR_exp_2tom61 = 0xffff-61 // exponent of scale 2^-61
+ mov sincos_r_sincos = 0x0 // sincos_r_sincos = 0 for sin
+ br.cond.sptk _SINCOS_COMMON // go to common part
}
;;
-.endp sin
-ASM_SIZE_DIRECTIVE(sin)
-
-
-.section .text
-.proc cos#
-#ifdef _LIBC
-.proc __cos#
-#endif
-.align 32
-cos:
-#ifdef _LIBC
-__cos:
-#endif
+GLOBAL_IEEE754_END(sin)
+GLOBAL_IEEE754_ENTRY(cos)
{ .mlx
- alloc r32=ar.pfs,1,13,0,0
- movl sind_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A // significand of 16/pi
+ alloc r32 = ar.pfs, 1, 13, 0, 0
+ movl sincos_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A // signd of 16/pi
}
{ .mlx
- addl sind_AD_1 = @ltoff(double_sind_pi), gp
- movl sind_GR_rshf_2to61 = 0x47b8000000000000 // 1.1000 2^(63+63-2)
+ addl sincos_AD_1 = @ltoff(double_sincos_pi), gp
+ movl sincos_GR_rshf_2to61 = 0x47b8000000000000 // 1.1 2^(63+63-2)
}
;;
{ .mfi
- ld8 sind_AD_1 = [sind_AD_1]
- fnorm.s1 sind_NORM_f8 = f8
- cmp.eq p9,p8 = r0, r0
+ ld8 sincos_AD_1 = [sincos_AD_1]
+ fnorm.s1 sincos_NORM_f8 = f8 // Normalize argument
+ cmp.eq p9,p8 = r0, r0 // set p9 (clear p8) for cos
}
{ .mib
- mov sind_GR_exp_2tom61 = 0xffff-61 // exponent of scaling factor 2^-61
- mov sind_r_sincos = 0x8
- br.cond.sptk L(SIND_SINCOS)
+ mov sincos_GR_exp_2tom61 = 0xffff-61 // exp of scale 2^-61
+ mov sincos_r_sincos = 0x8 // sincos_r_sincos = 8 for cos
+ nop.b 999
}
;;
-
////////////////////////////////////////////////////////
// All entry points end up here.
-// If from sin, sind_r_sincos is 0 and p8 is true
-// If from cos, sind_r_sincos is 8 = 2^(k-1) and p9 is true
-// We add sind_r_sincos to N
+// If from sin, sincos_r_sincos is 0 and p8 is true
+// If from cos, sincos_r_sincos is 8 = 2^(k-1) and p9 is true
+// We add sincos_r_sincos to N
-L(SIND_SINCOS):
+///////////// Common sin and cos part //////////////////
+_SINCOS_COMMON:
// Form two constants we need
@@ -488,3014 +466,320 @@ L(SIND_SINCOS):
// 1.1000...000 * 2^(63+63-2) to right shift int(W) into the low significand
// fcmp used to set denormal, and invalid on snans
{ .mfi
- setf.sig sind_SIG_INV_PI_BY_16_2TO61 = sind_GR_sig_inv_pi_by_16
- fcmp.eq.s0 p12,p0=f8,f0
- mov sind_r_17_ones = 0x1ffff
+ setf.sig sincos_SIG_INV_PI_BY_16_2TO61 = sincos_GR_sig_inv_pi_by_16
+ fclass.m p6,p0 = f8, 0xe7 // if x = 0,inf,nan
+ mov sincos_exp_limit = 0x1001a
}
{ .mlx
- setf.d sind_RSHF_2TO61 = sind_GR_rshf_2to61
- movl sind_GR_rshf = 0x43e8000000000000 // 1.1000 2^63 for right shift
-}
+ setf.d sincos_RSHF_2TO61 = sincos_GR_rshf_2to61
+ movl sincos_GR_rshf = 0x43e8000000000000 // 1.1 2^63
+} // Right shift
;;
// Form another constant
// 2^-61 for scaling Nfloat
-// 0x10009 is register_bias + 10.
-// So if f8 > 2^10 = Gamma, go to DBX
-{ .mfi
- setf.exp sind_2TOM61 = sind_GR_exp_2tom61
- fclass.m p13,p0 = f8, 0x23 // Test for x inf
- mov sind_exp_limit = 0x10009
+// 0x1001a is register_bias + 27.
+// So if f8 >= 2^27, go to large argument routines
+{ .mmi
+ getf.exp sincos_r_signexp = f8
+ setf.exp sincos_2TOM61 = sincos_GR_exp_2tom61
+ addl gr_tmp = -1,r0 // For "inexect" constant create
}
;;
// Load the two pieces of pi/16
// Form another constant
// 1.1000...000 * 2^63, the right shift constant
-{ .mmf
- ldfe sind_Pi_by_16_hi = [sind_AD_1],16
- setf.d sind_RSHF = sind_GR_rshf
- fclass.m p14,p0 = f8, 0xc3 // Test for x nan
-}
-;;
-
-{ .mfi
- ldfe sind_Pi_by_16_lo = [sind_AD_1],16
-(p13) frcpa.s0 f8,p12=f0,f0 // force qnan indef for x=inf
- addl gr_tmp = -1,r0
-}
-{ .mfb
- addl sind_AD_beta_table = @ltoff(double_sin_cos_beta_k4), gp
- nop.f 999
-(p13) br.ret.spnt b0 ;; // Exit for x=inf
-}
-
-// Start loading P, Q coefficients
-// SIN(0)
-{ .mfi
- ldfpd sind_P4,sind_Q4 = [sind_AD_1],16
-(p8) fclass.m.unc p6,p0 = f8, 0x07 // Test for sin(0)
- nop.i 999
-}
-{ .mfb
- addl sind_AD_beta_table = @ltoff(double_sin_cos_beta_k4), gp
-(p14) fma.d f8=f8,f1,f0 // qnan for x=nan
-(p14) br.ret.spnt b0 ;; // Exit for x=nan
-}
-
-
-// COS(0)
-{ .mfi
- getf.exp sind_r_signexp = f8
-(p9) fclass.m.unc p7,p0 = f8, 0x07 // Test for sin(0)
- nop.i 999
-}
-{ .mfi
- ld8 sind_AD_beta_table = [sind_AD_beta_table]
- nop.f 999
- nop.i 999 ;;
-}
-
{ .mmb
- ldfpd sind_P3,sind_Q3 = [sind_AD_1],16
- setf.sig fp_tmp = gr_tmp // Create constant such that fmpy sets inexact
-(p6) br.ret.spnt b0 ;;
-}
-
-{ .mfb
- and sind_r_exp = sind_r_17_ones, sind_r_signexp
-(p7) fmerge.s f8 = f1,f1
-(p7) br.ret.spnt b0 ;;
-}
-
-// p10 is true if we must call routines to handle larger arguments
-// p10 is true if f8 exp is > 0x10009
-
-{ .mfi
- ldfpd sind_P2,sind_Q2 = [sind_AD_1],16
- nop.f 999
- cmp.ge p10,p0 = sind_r_exp,sind_exp_limit
+ ldfe sincos_Pi_by_16_1 = [sincos_AD_1],16
+ setf.d sincos_RSHF = sincos_GR_rshf
+(p6) br.cond.spnt _SINCOS_SPECIAL_ARGS
}
;;
-// sind_W = x * sind_Inv_Pi_by_16
-// Multiply x by scaled 16/pi and add large const to shift integer part of W to
-// rightmost bits of significand
-{ .mfi
- ldfpd sind_P1,sind_Q1 = [sind_AD_1]
- fma.s1 sind_W_2TO61_RSH = sind_NORM_f8,sind_SIG_INV_PI_BY_16_2TO61,sind_RSHF_2TO61
- nop.i 999
-}
-{ .mbb
-(p10) cmp.ne.unc p11,p12=sind_r_sincos,r0 // p11 call __libm_cos_double_dbx
- // p12 call __libm_sin_double_dbx
-(p11) br.cond.spnt L(COSD_DBX)
-(p12) br.cond.spnt L(SIND_DBX)
-}
-;;
-
-
-// sind_NFLOAT = Round_Int_Nearest(sind_W)
-// This is done by scaling back by 2^-61 and subtracting the shift constant
-{ .mfi
- nop.m 999
- fms.s1 sind_NFLOAT = sind_W_2TO61_RSH,sind_2TOM61,sind_RSHF
- nop.i 999 ;;
-}
-
-
-// get N = (int)sind_int_Nfloat
-{ .mfi
- getf.sig sind_GR_n = sind_W_2TO61_RSH
- nop.f 999
- nop.i 999 ;;
-}
-
-// Add 2^(k-1) (which is in sind_r_sincos) to N
-// sind_r = -sind_Nfloat * sind_Pi_by_16_hi + x
-// sind_r = sind_r -sind_Nfloat * sind_Pi_by_16_lo
-{ .mfi
- add sind_GR_n = sind_GR_n, sind_r_sincos
- fnma.s1 sind_r = sind_NFLOAT, sind_Pi_by_16_hi, sind_NORM_f8
- nop.i 999 ;;
-}
-
-
-// Get M (least k+1 bits of N)
{ .mmi
- and sind_GR_m = 0x1f,sind_GR_n ;;
- nop.m 999
- shl sind_GR_32m = sind_GR_m,5 ;;
-}
-
-// Add 32*M to address of sin_cos_beta table
-{ .mmi
- add sind_AD_2 = sind_GR_32m, sind_AD_beta_table
- nop.m 999
- nop.i 999 ;;
-}
-
-{ .mfi
- ldfe sind_Sm = [sind_AD_2],16
-(p8) fclass.m.unc p10,p0=f8,0x0b // If sin, note denormal input to set uflow
- nop.i 999 ;;
-}
-
-{ .mfi
- ldfe sind_Cm = [sind_AD_2]
- fnma.s1 sind_r = sind_NFLOAT, sind_Pi_by_16_lo, sind_r
- nop.i 999 ;;
-}
-
-// get rsq
-{ .mfi
- nop.m 999
- fma.s1 sind_rsq = sind_r, sind_r, f0
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fmpy.s0 fp_tmp = fp_tmp,fp_tmp // fmpy forces inexact flag
- nop.i 999 ;;
-}
-
-// form P and Q series
-{ .mfi
- nop.m 999
- fma.s1 sind_P_temp1 = sind_rsq, sind_P4, sind_P3
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
- fma.s1 sind_Q_temp1 = sind_rsq, sind_Q4, sind_Q3
- nop.i 999 ;;
-}
-
-// get rcube and sm*rsq
-{ .mfi
- nop.m 999
- fmpy.s1 sind_srsq = sind_Sm,sind_rsq
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
- fmpy.s1 sind_rcub = sind_r, sind_rsq
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
- fma.s1 sind_Q_temp2 = sind_rsq, sind_Q_temp1, sind_Q2
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
- fma.s1 sind_P_temp2 = sind_rsq, sind_P_temp1, sind_P2
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
- fma.s1 sind_Q = sind_rsq, sind_Q_temp2, sind_Q1
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
- fma.s1 sind_P = sind_rsq, sind_P_temp2, sind_P1
- nop.i 999 ;;
-}
-
-// Get final P and Q
-{ .mfi
- nop.m 999
- fma.s1 sind_Q = sind_srsq,sind_Q, sind_Sm
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
- fma.s1 sind_P = sind_rcub,sind_P, sind_r
- nop.i 999 ;;
-}
-
-// If sin(denormal), force inexact to be set
-{ .mfi
- nop.m 999
-(p10) fmpy.d.s0 fp_tmp = f8,f8
- nop.i 999 ;;
-}
-
-// Final calculation
-{ .mfb
- nop.m 999
- fma.d f8 = sind_Cm, sind_P, sind_Q
- br.ret.sptk b0 ;;
-}
-.endp cos#
-ASM_SIZE_DIRECTIVE(cos#)
-
-
-
-.proc __libm_callout_1s
-__libm_callout_1s:
-L(SIND_DBX):
-.prologue
-{ .mfi
- nop.m 0
- nop.f 0
-.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs
-}
-;;
-
-{ .mfi
- mov GR_SAVE_GP=gp
- nop.f 0
-.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0
-}
-
-.body
-{ .mib
- nop.m 999
- nop.i 999
- br.call.sptk.many b0=__libm_sin_double_dbx# ;;
-}
-;;
-
-
-{ .mfi
- mov gp = GR_SAVE_GP
- nop.f 999
- mov b0 = GR_SAVE_B0
-}
-;;
-
-{ .mib
- nop.m 999
- mov ar.pfs = GR_SAVE_PFS
- br.ret.sptk b0 ;;
-}
-.endp __libm_callout_1s
-ASM_SIZE_DIRECTIVE(__libm_callout_1s)
-
-
-.proc __libm_callout_1c
-__libm_callout_1c:
-L(COSD_DBX):
-.prologue
-{ .mfi
- nop.m 0
- nop.f 0
-.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs
-}
-;;
-
-{ .mfi
- mov GR_SAVE_GP=gp
- nop.f 0
-.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0
-}
-
-.body
-{ .mib
- nop.m 999
- nop.i 999
- br.call.sptk.many b0=__libm_cos_double_dbx# ;;
-}
-;;
-
+ ldfe sincos_Pi_by_16_2 = [sincos_AD_1],16
+ setf.sig fp_tmp = gr_tmp // constant for inexact set
+ nop.i 999
+};;
{ .mfi
- mov gp = GR_SAVE_GP
- nop.f 999
- mov b0 = GR_SAVE_B0
-}
-;;
-
-{ .mib
- nop.m 999
- mov ar.pfs = GR_SAVE_PFS
- br.ret.sptk b0 ;;
-}
-.endp __libm_callout_1c
-ASM_SIZE_DIRECTIVE(__libm_callout_1c)
-
-
-// ====================================================================
-// ====================================================================
-
-// These functions calculate the sin and cos for inputs
-// greater than 2^10
-// __libm_sin_double_dbx# and __libm_cos_double_dbx#
-
-// *********************************************************************
-// *********************************************************************
-//
-// Function: Combined sin(x) and cos(x), where
-//
-// sin(x) = sine(x), for double precision x values
-// cos(x) = cosine(x), for double precision x values
-//
-// *********************************************************************
-//
-// Accuracy: Within .7 ulps for 80-bit floating point values
-// Very accurate for double precision values
-//
-// *********************************************************************
-//
-// Resources Used:
-//
-// Floating-Point Registers: f8 (Input and Return Value)
-// f32-f99
-//
-// General Purpose Registers:
-// r32-r43
-// r44-r45 (Used to pass arguments to pi_by_2 reduce routine)
-//
-// Predicate Registers: p6-p13
-//
-// *********************************************************************
-//
-// IEEE Special Conditions:
-//
-// Denormal fault raised on denormal inputs
-// Overflow exceptions do not occur
-// Underflow exceptions raised when appropriate for sin
-// (No specialized error handling for this routine)
-// Inexact raised when appropriate by algorithm
-//
-// sin(SNaN) = QNaN
-// sin(QNaN) = QNaN
-// sin(inf) = QNaN
-// sin(+/-0) = +/-0
-// cos(inf) = QNaN
-// cos(SNaN) = QNaN
-// cos(QNaN) = QNaN
-// cos(0) = 1
-//
-// *********************************************************************
-//
-// Mathematical Description
-// ========================
-//
-// The computation of FSIN and FCOS is best handled in one piece of
-// code. The main reason is that given any argument Arg, computation
-// of trigonometric functions first calculate N and an approximation
-// to alpha where
-//
-// Arg = N pi/2 + alpha, |alpha| <= pi/4.
-//
-// Since
-//
-// cos( Arg ) = sin( (N+1) pi/2 + alpha ),
-//
-// therefore, the code for computing sine will produce cosine as long
-// as 1 is added to N immediately after the argument reduction
-// process.
-//
-// Let M = N if sine
-// N+1 if cosine.
-//
-// Now, given
-//
-// Arg = M pi/2 + alpha, |alpha| <= pi/4,
-//
-// let I = M mod 4, or I be the two lsb of M when M is represented
-// as 2's complement. I = [i_0 i_1]. Then
-//
-// sin( Arg ) = (-1)^i_0 sin( alpha ) if i_1 = 0,
-// = (-1)^i_0 cos( alpha ) if i_1 = 1.
-//
-// For example:
-// if M = -1, I = 11
-// sin ((-pi/2 + alpha) = (-1) cos (alpha)
-// if M = 0, I = 00
-// sin (alpha) = sin (alpha)
-// if M = 1, I = 01
-// sin (pi/2 + alpha) = cos (alpha)
-// if M = 2, I = 10
-// sin (pi + alpha) = (-1) sin (alpha)
-// if M = 3, I = 11
-// sin ((3/2)pi + alpha) = (-1) cos (alpha)
-//
-// The value of alpha is obtained by argument reduction and
-// represented by two working precision numbers r and c where
-//
-// alpha = r + c accurately.
-//
-// The reduction method is described in a previous write up.
-// The argument reduction scheme identifies 4 cases. For Cases 2
-// and 4, because |alpha| is small, sin(r+c) and cos(r+c) can be
-// computed very easily by 2 or 3 terms of the Taylor series
-// expansion as follows:
-//
-// Case 2:
-// -------
-//
-// sin(r + c) = r + c - r^3/6 accurately
-// cos(r + c) = 1 - 2^(-67) accurately
-//
-// Case 4:
-// -------
-//
-// sin(r + c) = r + c - r^3/6 + r^5/120 accurately
-// cos(r + c) = 1 - r^2/2 + r^4/24 accurately
-//
-// The only cases left are Cases 1 and 3 of the argument reduction
-// procedure. These two cases will be merged since after the
-// argument is reduced in either cases, we have the reduced argument
-// represented as r + c and that the magnitude |r + c| is not small
-// enough to allow the usage of a very short approximation.
-//
-// The required calculation is either
-//
-// sin(r + c) = sin(r) + correction, or
-// cos(r + c) = cos(r) + correction.
-//
-// Specifically,
-//
-// sin(r + c) = sin(r) + c sin'(r) + O(c^2)
-// = sin(r) + c cos (r) + O(c^2)
-// = sin(r) + c(1 - r^2/2) accurately.
-// Similarly,
-//
-// cos(r + c) = cos(r) - c sin(r) + O(c^2)
-// = cos(r) - c(r - r^3/6) accurately.
-//
-// We therefore concentrate on accurately calculating sin(r) and
-// cos(r) for a working-precision number r, |r| <= pi/4 to within
-// 0.1% or so.
-//
-// The greatest challenge of this task is that the second terms of
-// the Taylor series
-//
-// r - r^3/3! + r^r/5! - ...
-//
-// and
-//
-// 1 - r^2/2! + r^4/4! - ...
-//
-// are not very small when |r| is close to pi/4 and the rounding
-// errors will be a concern if simple polynomial accumulation is
-// used. When |r| < 2^-3, however, the second terms will be small
-// enough (6 bits or so of right shift) that a normal Horner
-// recurrence suffices. Hence there are two cases that we consider
-// in the accurate computation of sin(r) and cos(r), |r| <= pi/4.
-//
-// Case small_r: |r| < 2^(-3)
-// --------------------------
-//
-// Since Arg = M pi/4 + r + c accurately, and M mod 4 is [i_0 i_1],
-// we have
-//
-// sin(Arg) = (-1)^i_0 * sin(r + c) if i_1 = 0
-// = (-1)^i_0 * cos(r + c) if i_1 = 1
-//
-// can be accurately approximated by
-//
-// sin(Arg) = (-1)^i_0 * [sin(r) + c] if i_1 = 0
-// = (-1)^i_0 * [cos(r) - c*r] if i_1 = 1
-//
-// because |r| is small and thus the second terms in the correction
-// are unneccessary.
-//
-// Finally, sin(r) and cos(r) are approximated by polynomials of
-// moderate lengths.
-//
-// sin(r) = r + S_1 r^3 + S_2 r^5 + ... + S_5 r^11
-// cos(r) = 1 + C_1 r^2 + C_2 r^4 + ... + C_5 r^10
-//
-// We can make use of predicates to selectively calculate
-// sin(r) or cos(r) based on i_1.
-//
-// Case normal_r: 2^(-3) <= |r| <= pi/4
-// ------------------------------------
-//
-// This case is more likely than the previous one if one considers
-// r to be uniformly distributed in [-pi/4 pi/4]. Again,
-//
-// sin(Arg) = (-1)^i_0 * sin(r + c) if i_1 = 0
-// = (-1)^i_0 * cos(r + c) if i_1 = 1.
-//
-// Because |r| is now larger, we need one extra term in the
-// correction. sin(Arg) can be accurately approximated by
-//
-// sin(Arg) = (-1)^i_0 * [sin(r) + c(1-r^2/2)] if i_1 = 0
-// = (-1)^i_0 * [cos(r) - c*r*(1 - r^2/6)] i_1 = 1.
-//
-// Finally, sin(r) and cos(r) are approximated by polynomials of
-// moderate lengths.
-//
-// sin(r) = r + PP_1_hi r^3 + PP_1_lo r^3 +
-// PP_2 r^5 + ... + PP_8 r^17
-//
-// cos(r) = 1 + QQ_1 r^2 + QQ_2 r^4 + ... + QQ_8 r^16
-//
-// where PP_1_hi is only about 16 bits long and QQ_1 is -1/2.
-// The crux in accurate computation is to calculate
-//
-// r + PP_1_hi r^3 or 1 + QQ_1 r^2
-//
-// accurately as two pieces: U_hi and U_lo. The way to achieve this
-// is to obtain r_hi as a 10 sig. bit number that approximates r to
-// roughly 8 bits or so of accuracy. (One convenient way is
-//
-// r_hi := frcpa( frcpa( r ) ).)
-//
-// This way,
-//
-// r + PP_1_hi r^3 = r + PP_1_hi r_hi^3 +
-// PP_1_hi (r^3 - r_hi^3)
-// = [r + PP_1_hi r_hi^3] +
-// [PP_1_hi (r - r_hi)
-// (r^2 + r_hi r + r_hi^2) ]
-// = U_hi + U_lo
-//
-// Since r_hi is only 10 bit long and PP_1_hi is only 16 bit long,
-// PP_1_hi * r_hi^3 is only at most 46 bit long and thus computed
-// exactly. Furthermore, r and PP_1_hi r_hi^3 are of opposite sign
-// and that there is no more than 8 bit shift off between r and
-// PP_1_hi * r_hi^3. Hence the sum, U_hi, is representable and thus
-// calculated without any error. Finally, the fact that
-//
-// |U_lo| <= 2^(-8) |U_hi|
-//
-// says that U_hi + U_lo is approximating r + PP_1_hi r^3 to roughly
-// 8 extra bits of accuracy.
-//
-// Similarly,
-//
-// 1 + QQ_1 r^2 = [1 + QQ_1 r_hi^2] +
-// [QQ_1 (r - r_hi)(r + r_hi)]
-// = U_hi + U_lo.
-//
-// Summarizing, we calculate r_hi = frcpa( frcpa( r ) ).
-//
-// If i_1 = 0, then
-//
-// U_hi := r + PP_1_hi * r_hi^3
-// U_lo := PP_1_hi * (r - r_hi) * (r^2 + r*r_hi + r_hi^2)
-// poly := PP_1_lo r^3 + PP_2 r^5 + ... + PP_8 r^17
-// correction := c * ( 1 + C_1 r^2 )
-//
-// Else ...i_1 = 1
-//
-// U_hi := 1 + QQ_1 * r_hi * r_hi
-// U_lo := QQ_1 * (r - r_hi) * (r + r_hi)
-// poly := QQ_2 * r^4 + QQ_3 * r^6 + ... + QQ_8 r^16
-// correction := -c * r * (1 + S_1 * r^2)
-//
-// End
-//
-// Finally,
-//
-// V := poly + ( U_lo + correction )
-//
-// / U_hi + V if i_0 = 0
-// result := |
-// \ (-U_hi) - V if i_0 = 1
-//
-// It is important that in the last step, negation of U_hi is
-// performed prior to the subtraction which is to be performed in
-// the user-set rounding mode.
-//
-//
-// Algorithmic Description
-// =======================
-//
-// The argument reduction algorithm is tightly integrated into FSIN
-// and FCOS which share the same code. The following is complete and
-// self-contained. The argument reduction description given
-// previously is repeated below.
-//
-//
-// Step 0. Initialization.
-//
-// If FSIN is invoked, set N_inc := 0; else if FCOS is invoked,
-// set N_inc := 1.
-//
-// Step 1. Check for exceptional and special cases.
-//
-// * If Arg is +-0, +-inf, NaN, NaT, go to Step 10 for special
-// handling.
-// * If |Arg| < 2^24, go to Step 2 for reduction of moderate
-// arguments. This is the most likely case.
-// * If |Arg| < 2^63, go to Step 8 for pre-reduction of large
-// arguments.
-// * If |Arg| >= 2^63, go to Step 10 for special handling.
-//
-// Step 2. Reduction of moderate arguments.
-//
-// If |Arg| < pi/4 ...quick branch
-// N_fix := N_inc (integer)
-// r := Arg
-// c := 0.0
-// Branch to Step 4, Case_1_complete
-// Else ...cf. argument reduction
-// N := Arg * two_by_PI (fp)
-// N_fix := fcvt.fx( N ) (int)
-// N := fcvt.xf( N_fix )
-// N_fix := N_fix + N_inc
-// s := Arg - N * P_1 (first piece of pi/2)
-// w := -N * P_2 (second piece of pi/2)
-//
-// If |s| >= 2^(-33)
-// go to Step 3, Case_1_reduce
-// Else
-// go to Step 7, Case_2_reduce
-// Endif
-// Endif
-//
-// Step 3. Case_1_reduce.
-//
-// r := s + w
-// c := (s - r) + w ...observe order
-//
-// Step 4. Case_1_complete
-//
-// ...At this point, the reduced argument alpha is
-// ...accurately represented as r + c.
-// If |r| < 2^(-3), go to Step 6, small_r.
-//
-// Step 5. Normal_r.
-//
-// Let [i_0 i_1] by the 2 lsb of N_fix.
-// FR_rsq := r * r
-// r_hi := frcpa( frcpa( r ) )
-// r_lo := r - r_hi
-//
-// If i_1 = 0, then
-// poly := r*FR_rsq*(PP_1_lo + FR_rsq*(PP_2 + ... FR_rsq*PP_8))
-// U_hi := r + PP_1_hi*r_hi*r_hi*r_hi ...any order
-// U_lo := PP_1_hi*r_lo*(r*r + r*r_hi + r_hi*r_hi)
-// correction := c + c*C_1*FR_rsq ...any order
-// Else
-// poly := FR_rsq*FR_rsq*(QQ_2 + FR_rsq*(QQ_3 + ... + FR_rsq*QQ_8))
-// U_hi := 1 + QQ_1 * r_hi * r_hi ...any order
-// U_lo := QQ_1 * r_lo * (r + r_hi)
-// correction := -c*(r + S_1*FR_rsq*r) ...any order
-// Endif
-//
-// V := poly + (U_lo + correction) ...observe order
-//
-// result := (i_0 == 0? 1.0 : -1.0)
-//
-// Last instruction in user-set rounding mode
-//
-// result := (i_0 == 0? result*U_hi + V :
-// result*U_hi - V)
-//
-// Return
-//
-// Step 6. Small_r.
-//
-// ...Use flush to zero mode without causing exception
-// Let [i_0 i_1] be the two lsb of N_fix.
-//
-// FR_rsq := r * r
-//
-// If i_1 = 0 then
-// z := FR_rsq*FR_rsq; z := FR_rsq*z *r
-// poly_lo := S_3 + FR_rsq*(S_4 + FR_rsq*S_5)
-// poly_hi := r*FR_rsq*(S_1 + FR_rsq*S_2)
-// correction := c
-// result := r
-// Else
-// z := FR_rsq*FR_rsq; z := FR_rsq*z
-// poly_lo := C_3 + FR_rsq*(C_4 + FR_rsq*C_5)
-// poly_hi := FR_rsq*(C_1 + FR_rsq*C_2)
-// correction := -c*r
-// result := 1
-// Endif
-//
-// poly := poly_hi + (z * poly_lo + correction)
-//
-// If i_0 = 1, result := -result
-//
-// Last operation. Perform in user-set rounding mode
-//
-// result := (i_0 == 0? result + poly :
-// result - poly )
-// Return
-//
-// Step 7. Case_2_reduce.
-//
-// ...Refer to the write up for argument reduction for
-// ...rationale. The reduction algorithm below is taken from
-// ...argument reduction description and integrated this.
-//
-// w := N*P_3
-// U_1 := N*P_2 + w ...FMA
-// U_2 := (N*P_2 - U_1) + w ...2 FMA
-// ...U_1 + U_2 is N*(P_2+P_3) accurately
-//
-// r := s - U_1
-// c := ( (s - r) - U_1 ) - U_2
-//
-// ...The mathematical sum r + c approximates the reduced
-// ...argument accurately. Note that although compared to
-// ...Case 1, this case requires much more work to reduce
-// ...the argument, the subsequent calculation needed for
-// ...any of the trigonometric function is very little because
-// ...|alpha| < 1.01*2^(-33) and thus two terms of the
-// ...Taylor series expansion suffices.
-//
-// If i_1 = 0 then
-// poly := c + S_1 * r * r * r ...any order
-// result := r
-// Else
-// poly := -2^(-67)
-// result := 1.0
-// Endif
-//
-// If i_0 = 1, result := -result
-//
-// Last operation. Perform in user-set rounding mode
-//
-// result := (i_0 == 0? result + poly :
-// result - poly )
-//
-// Return
-//
-//
-// Step 8. Pre-reduction of large arguments.
-//
-// ...Again, the following reduction procedure was described
-// ...in the separate write up for argument reduction, which
-// ...is tightly integrated here.
-
-// N_0 := Arg * Inv_P_0
-// N_0_fix := fcvt.fx( N_0 )
-// N_0 := fcvt.xf( N_0_fix)
-
-// Arg' := Arg - N_0 * P_0
-// w := N_0 * d_1
-// N := Arg' * two_by_PI
-// N_fix := fcvt.fx( N )
-// N := fcvt.xf( N_fix )
-// N_fix := N_fix + N_inc
-//
-// s := Arg' - N * P_1
-// w := w - N * P_2
-//
-// If |s| >= 2^(-14)
-// go to Step 3
-// Else
-// go to Step 9
-// Endif
-//
-// Step 9. Case_4_reduce.
-//
-// ...first obtain N_0*d_1 and -N*P_2 accurately
-// U_hi := N_0 * d_1 V_hi := -N*P_2
-// U_lo := N_0 * d_1 - U_hi V_lo := -N*P_2 - U_hi ...FMAs
-//
-// ...compute the contribution from N_0*d_1 and -N*P_3
-// w := -N*P_3
-// w := w + N_0*d_2
-// t := U_lo + V_lo + w ...any order
-//
-// ...at this point, the mathematical value
-// ...s + U_hi + V_hi + t approximates the true reduced argument
-// ...accurately. Just need to compute this accurately.
-//
-// ...Calculate U_hi + V_hi accurately:
-// A := U_hi + V_hi
-// if |U_hi| >= |V_hi| then
-// a := (U_hi - A) + V_hi
-// else
-// a := (V_hi - A) + U_hi
-// endif
-// ...order in computing "a" must be observed. This branch is
-// ...best implemented by predicates.
-// ...A + a is U_hi + V_hi accurately. Moreover, "a" is
-// ...much smaller than A: |a| <= (1/2)ulp(A).
-//
-// ...Just need to calculate s + A + a + t
-// C_hi := s + A t := t + a
-// C_lo := (s - C_hi) + A
-// C_lo := C_lo + t
-//
-// ...Final steps for reduction
-// r := C_hi + C_lo
-// c := (C_hi - r) + C_lo
-//
-// ...At this point, we have r and c
-// ...And all we need is a couple of terms of the corresponding
-// ...Taylor series.
-//
-// If i_1 = 0
-// poly := c + r*FR_rsq*(S_1 + FR_rsq*S_2)
-// result := r
-// Else
-// poly := FR_rsq*(C_1 + FR_rsq*C_2)
-// result := 1
-// Endif
-//
-// If i_0 = 1, result := -result
-//
-// Last operation. Perform in user-set rounding mode
-//
-// result := (i_0 == 0? result + poly :
-// result - poly )
-// Return
-//
-// Large Arguments: For arguments above 2**63, a Payne-Hanek
-// style argument reduction is used and pi_by_2 reduce is called.
-//
-
-
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
-.align 64
-
-FSINCOS_CONSTANTS:
-ASM_TYPE_DIRECTIVE(FSINCOS_CONSTANTS,@object)
-data4 0x4B800000, 0xCB800000, 0x00000000,0x00000000 // two**24, -two**24
-data4 0x4E44152A, 0xA2F9836E, 0x00003FFE,0x00000000 // Inv_pi_by_2
-data4 0xCE81B9F1, 0xC84D32B0, 0x00004016,0x00000000 // P_0
-data4 0x2168C235, 0xC90FDAA2, 0x00003FFF,0x00000000 // P_1
-data4 0xFC8F8CBB, 0xECE675D1, 0x0000BFBD,0x00000000 // P_2
-data4 0xACC19C60, 0xB7ED8FBB, 0x0000BF7C,0x00000000 // P_3
-data4 0x5F000000, 0xDF000000, 0x00000000,0x00000000 // two_to_63, -two_to_63
-data4 0x6EC6B45A, 0xA397E504, 0x00003FE7,0x00000000 // Inv_P_0
-data4 0xDBD171A1, 0x8D848E89, 0x0000BFBF,0x00000000 // d_1
-data4 0x18A66F8E, 0xD5394C36, 0x0000BF7C,0x00000000 // d_2
-data4 0x2168C234, 0xC90FDAA2, 0x00003FFE,0x00000000 // pi_by_4
-data4 0x2168C234, 0xC90FDAA2, 0x0000BFFE,0x00000000 // neg_pi_by_4
-data4 0x3E000000, 0xBE000000, 0x00000000,0x00000000 // two**-3, -two**-3
-data4 0x2F000000, 0xAF000000, 0x9E000000,0x00000000 // two**-33, -two**-33, -two**-67
-data4 0xA21C0BC9, 0xCC8ABEBC, 0x00003FCE,0x00000000 // PP_8
-data4 0x720221DA, 0xD7468A05, 0x0000BFD6,0x00000000 // PP_7
-data4 0x640AD517, 0xB092382F, 0x00003FDE,0x00000000 // PP_6
-data4 0xD1EB75A4, 0xD7322B47, 0x0000BFE5,0x00000000 // PP_5
-data4 0xFFFFFFFE, 0xFFFFFFFF, 0x0000BFFD,0x00000000 // C_1
-data4 0x00000000, 0xAAAA0000, 0x0000BFFC,0x00000000 // PP_1_hi
-data4 0xBAF69EEA, 0xB8EF1D2A, 0x00003FEC,0x00000000 // PP_4
-data4 0x0D03BB69, 0xD00D00D0, 0x0000BFF2,0x00000000 // PP_3
-data4 0x88888962, 0x88888888, 0x00003FF8,0x00000000 // PP_2
-data4 0xAAAB0000, 0xAAAAAAAA, 0x0000BFEC,0x00000000 // PP_1_lo
-data4 0xC2B0FE52, 0xD56232EF, 0x00003FD2,0x00000000 // QQ_8
-data4 0x2B48DCA6, 0xC9C99ABA, 0x0000BFDA,0x00000000 // QQ_7
-data4 0x9C716658, 0x8F76C650, 0x00003FE2,0x00000000 // QQ_6
-data4 0xFDA8D0FC, 0x93F27DBA, 0x0000BFE9,0x00000000 // QQ_5
-data4 0xAAAAAAAA, 0xAAAAAAAA, 0x0000BFFC,0x00000000 // S_1
-data4 0x00000000, 0x80000000, 0x0000BFFE,0x00000000 // QQ_1
-data4 0x0C6E5041, 0xD00D00D0, 0x00003FEF,0x00000000 // QQ_4
-data4 0x0B607F60, 0xB60B60B6, 0x0000BFF5,0x00000000 // QQ_3
-data4 0xAAAAAA9B, 0xAAAAAAAA, 0x00003FFA,0x00000000 // QQ_2
-data4 0xFFFFFFFE, 0xFFFFFFFF, 0x0000BFFD,0x00000000 // C_1
-data4 0xAAAA719F, 0xAAAAAAAA, 0x00003FFA,0x00000000 // C_2
-data4 0x0356F994, 0xB60B60B6, 0x0000BFF5,0x00000000 // C_3
-data4 0xB2385EA9, 0xD00CFFD5, 0x00003FEF,0x00000000 // C_4
-data4 0x292A14CD, 0x93E4BD18, 0x0000BFE9,0x00000000 // C_5
-data4 0xAAAAAAAA, 0xAAAAAAAA, 0x0000BFFC,0x00000000 // S_1
-data4 0x888868DB, 0x88888888, 0x00003FF8,0x00000000 // S_2
-data4 0x055EFD4B, 0xD00D00D0, 0x0000BFF2,0x00000000 // S_3
-data4 0x839730B9, 0xB8EF1C5D, 0x00003FEC,0x00000000 // S_4
-data4 0xE5B3F492, 0xD71EA3A4, 0x0000BFE5,0x00000000 // S_5
-data4 0x38800000, 0xB8800000, 0x00000000 // two**-14, -two**-14
-ASM_SIZE_DIRECTIVE(FSINCOS_CONSTANTS)
-
-FR_Input_X = f8
-FR_Neg_Two_to_M3 = f32
-FR_Two_to_63 = f32
-FR_Two_to_24 = f33
-FR_Pi_by_4 = f33
-FR_Two_to_M14 = f34
-FR_Two_to_M33 = f35
-FR_Neg_Two_to_24 = f36
-FR_Neg_Pi_by_4 = f36
-FR_Neg_Two_to_M14 = f37
-FR_Neg_Two_to_M33 = f38
-FR_Neg_Two_to_M67 = f39
-FR_Inv_pi_by_2 = f40
-FR_N_float = f41
-FR_N_fix = f42
-FR_P_1 = f43
-FR_P_2 = f44
-FR_P_3 = f45
-FR_s = f46
-FR_w = f47
-FR_c = f48
-FR_r = f49
-FR_Z = f50
-FR_A = f51
-FR_a = f52
-FR_t = f53
-FR_U_1 = f54
-FR_U_2 = f55
-FR_C_1 = f56
-FR_C_2 = f57
-FR_C_3 = f58
-FR_C_4 = f59
-FR_C_5 = f60
-FR_S_1 = f61
-FR_S_2 = f62
-FR_S_3 = f63
-FR_S_4 = f64
-FR_S_5 = f65
-FR_poly_hi = f66
-FR_poly_lo = f67
-FR_r_hi = f68
-FR_r_lo = f69
-FR_rsq = f70
-FR_r_cubed = f71
-FR_C_hi = f72
-FR_N_0 = f73
-FR_d_1 = f74
-FR_V = f75
-FR_V_hi = f75
-FR_V_lo = f76
-FR_U_hi = f77
-FR_U_lo = f78
-FR_U_hiabs = f79
-FR_V_hiabs = f80
-FR_PP_8 = f81
-FR_QQ_8 = f81
-FR_PP_7 = f82
-FR_QQ_7 = f82
-FR_PP_6 = f83
-FR_QQ_6 = f83
-FR_PP_5 = f84
-FR_QQ_5 = f84
-FR_PP_4 = f85
-FR_QQ_4 = f85
-FR_PP_3 = f86
-FR_QQ_3 = f86
-FR_PP_2 = f87
-FR_QQ_2 = f87
-FR_QQ_1 = f88
-FR_N_0_fix = f89
-FR_Inv_P_0 = f90
-FR_corr = f91
-FR_poly = f92
-FR_d_2 = f93
-FR_Two_to_M3 = f94
-FR_Neg_Two_to_63 = f94
-FR_P_0 = f95
-FR_C_lo = f96
-FR_PP_1 = f97
-FR_PP_1_lo = f98
-FR_ArgPrime = f99
-
-GR_Table_Base = r32
-GR_Table_Base1 = r33
-GR_i_0 = r34
-GR_i_1 = r35
-GR_N_Inc = r36
-GR_Sin_or_Cos = r37
-
-GR_SAVE_B0 = r39
-GR_SAVE_GP = r40
-GR_SAVE_PFS = r41
-
-.section .text
-.proc __libm_sin_double_dbx#
-.align 64
-__libm_sin_double_dbx:
-
-{ .mlx
-alloc GR_Table_Base = ar.pfs,0,12,2,0
- movl GR_Sin_or_Cos = 0x0 ;;
-}
-
-{ .mmi
- nop.m 999
- addl GR_Table_Base = @ltoff(FSINCOS_CONSTANTS#), gp
- nop.i 999
-}
-;;
-
-{ .mmi
- ld8 GR_Table_Base = [GR_Table_Base]
- nop.m 999
- nop.i 999
-}
-;;
-
-
-{ .mib
- nop.m 999
- nop.i 999
- br.cond.sptk L(SINCOS_CONTINUE) ;;
-}
-
-.endp __libm_sin_double_dbx#
-ASM_SIZE_DIRECTIVE(__libm_sin_double_dbx)
-
-.section .text
-.proc __libm_cos_double_dbx#
-__libm_cos_double_dbx:
-
-{ .mlx
-alloc GR_Table_Base= ar.pfs,0,12,2,0
- movl GR_Sin_or_Cos = 0x1 ;;
-}
-
-{ .mmi
- nop.m 999
- addl GR_Table_Base = @ltoff(FSINCOS_CONSTANTS#), gp
- nop.i 999
-}
-;;
+ ldfe sincos_Pi_by_16_3 = [sincos_AD_1],16
+ nop.f 999
+ nop.i 999
+};;
+// Polynomial coefficients (Q4, P4, Q3, P3, Q2, Q1, P2, P1) loading
{ .mmi
- ld8 GR_Table_Base = [GR_Table_Base]
- nop.m 999
- nop.i 999
-}
-;;
-
-//
-// Load Table Address
-//
-L(SINCOS_CONTINUE):
+ ldfpd sincos_P4,sincos_Q4 = [sincos_AD_1],16
+ nop.m 999
+ nop.i 999
+};;
+// Select exponent (17 lsb)
{ .mmi
- add GR_Table_Base1 = 96, GR_Table_Base
- ldfs FR_Two_to_24 = [GR_Table_Base], 4
- nop.i 999
+ ldfpd sincos_P3,sincos_Q3 = [sincos_AD_1],16
+ nop.m 999
+ dep.z sincos_r_exp = sincos_r_signexp, 0, 17
}
;;
-{ .mmi
- nop.m 999
-//
-// Load 2**24, load 2**63.
-//
- ldfs FR_Neg_Two_to_24 = [GR_Table_Base], 12
- mov r41 = ar.pfs ;;
-}
-
-{ .mfi
- ldfs FR_Two_to_63 = [GR_Table_Base1], 4
-//
-// Check for unnormals - unsupported operands. We do not want
-// to generate denormal exception
-// Check for NatVals, QNaNs, SNaNs, +/-Infs
-// Check for EM unsupporteds
-// Check for Zero
-//
- fclass.m.unc p6, p8 = FR_Input_X, 0x1E3
- mov r40 = gp ;;
-}
-
-{ .mfi
- nop.m 999
- fclass.nm.unc p8, p0 = FR_Input_X, 0x1FF
-// GR_Sin_or_Cos denotes
- mov r39 = b0
-}
-
-{ .mfb
- ldfs FR_Neg_Two_to_63 = [GR_Table_Base1], 12
- fclass.m.unc p10, p0 = FR_Input_X, 0x007
-(p6) br.cond.spnt L(SINCOS_SPECIAL) ;;
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p8) br.cond.spnt L(SINCOS_SPECIAL) ;;
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-//
-// Branch if +/- NaN, Inf.
-// Load -2**24, load -2**63.
-//
-(p10) br.cond.spnt L(SINCOS_ZERO) ;;
-}
-
-{ .mmb
- ldfe FR_Inv_pi_by_2 = [GR_Table_Base], 16
- ldfe FR_Inv_P_0 = [GR_Table_Base1], 16
- nop.b 999 ;;
-}
-
-{ .mmb
- nop.m 999
- ldfe FR_d_1 = [GR_Table_Base1], 16
- nop.b 999 ;;
-}
-//
-// Raise possible denormal operand flag with useful fcmp
-// Is x <= -2**63
-// Load Inv_P_0 for pre-reduction
-// Load Inv_pi_by_2
-//
-
+// p10 is true if we must call routines to handle larger arguments
+// p10 is true if f8 exp is >= 0x1001a (2^27)
{ .mmb
- ldfe FR_P_0 = [GR_Table_Base], 16
- ldfe FR_d_2 = [GR_Table_Base1], 16
- nop.b 999 ;;
-}
-//
-// Load P_0
-// Load d_1
-// Is x >= 2**63
-// Is x <= -2**24?
-//
-
-{ .mmi
- ldfe FR_P_1 = [GR_Table_Base], 16 ;;
-//
-// Load P_1
-// Load d_2
-// Is x >= 2**24?
-//
- ldfe FR_P_2 = [GR_Table_Base], 16
- nop.i 999 ;;
-}
-
-{ .mmf
- nop.m 999
- ldfe FR_P_3 = [GR_Table_Base], 16
- fcmp.le.unc.s1 p7, p8 = FR_Input_X, FR_Neg_Two_to_24
-}
-
-{ .mfi
- nop.m 999
-//
-// Branch if +/- zero.
-// Decide about the paths to take:
-// If -2**24 < FR_Input_X < 2**24 - CASE 1 OR 2
-// OTHERWISE - CASE 3 OR 4
-//
- fcmp.le.unc.s0 p10, p11 = FR_Input_X, FR_Neg_Two_to_63
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p8) fcmp.ge.s1 p7, p0 = FR_Input_X, FR_Two_to_24
- nop.i 999
-}
-
-{ .mfi
- ldfe FR_Pi_by_4 = [GR_Table_Base1], 16
-(p11) fcmp.ge.s1 p10, p0 = FR_Input_X, FR_Two_to_63
- nop.i 999 ;;
-}
-
-{ .mmi
- ldfe FR_Neg_Pi_by_4 = [GR_Table_Base1], 16 ;;
- ldfs FR_Two_to_M3 = [GR_Table_Base1], 4
- nop.i 999 ;;
-}
-
-{ .mib
- ldfs FR_Neg_Two_to_M3 = [GR_Table_Base1], 12
- nop.i 999
-//
-// Load P_2
-// Load P_3
-// Load pi_by_4
-// Load neg_pi_by_4
-// Load 2**(-3)
-// Load -2**(-3).
-//
-(p10) br.cond.spnt L(SINCOS_ARG_TOO_LARGE) ;;
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-//
-// Branch out if x >= 2**63. Use Payne-Hanek Reduction
-//
-(p7) br.cond.spnt L(SINCOS_LARGER_ARG) ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// Branch if Arg <= -2**24 or Arg >= 2**24 and use pre-reduction.
-//
- fma.s1 FR_N_float = FR_Input_X, FR_Inv_pi_by_2, f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
- fcmp.lt.unc.s1 p6, p7 = FR_Input_X, FR_Pi_by_4
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// Select the case when |Arg| < pi/4
-// Else Select the case when |Arg| >= pi/4
-//
- fcvt.fx.s1 FR_N_fix = FR_N_float
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// N = Arg * 2/pi
-// Check if Arg < pi/4
-//
-(p6) fcmp.gt.s1 p6, p7 = FR_Input_X, FR_Neg_Pi_by_4
- nop.i 999 ;;
-}
-//
-// Case 2: Convert integer N_fix back to normalized floating-point value.
-// Case 1: p8 is only affected when p6 is set
-//
-
-{ .mfi
-(p7) ldfs FR_Two_to_M33 = [GR_Table_Base1], 4
-//
-// Grab the integer part of N and call it N_fix
-//
-(p6) fmerge.se FR_r = FR_Input_X, FR_Input_X
-// If |x| < pi/4, r = x and c = 0
-// lf |x| < pi/4, is x < 2**(-3).
-// r = Arg
-// c = 0
-(p6) mov GR_N_Inc = GR_Sin_or_Cos ;;
-}
-
-{ .mmf
- nop.m 999
-(p7) ldfs FR_Neg_Two_to_M33 = [GR_Table_Base1], 4
-(p6) fmerge.se FR_c = f0, f0
-}
-
-{ .mfi
- nop.m 999
-(p6) fcmp.lt.unc.s1 p8, p9 = FR_Input_X, FR_Two_to_M3
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// lf |x| < pi/4, is -2**(-3)< x < 2**(-3) - set p8.
-// If |x| >= pi/4,
-// Create the right N for |x| < pi/4 and otherwise
-// Case 2: Place integer part of N in GP register
-//
-(p7) fcvt.xf FR_N_float = FR_N_fix
- nop.i 999 ;;
-}
-
-{ .mmf
- nop.m 999
-(p7) getf.sig GR_N_Inc = FR_N_fix
-(p8) fcmp.gt.s1 p8, p0 = FR_Input_X, FR_Neg_Two_to_M3 ;;
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-//
-// Load 2**(-33), -2**(-33)
-//
-(p8) br.cond.spnt L(SINCOS_SMALL_R) ;;
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p6) br.cond.sptk L(SINCOS_NORMAL_R) ;;
-}
-//
-// if |x| < pi/4, branch based on |x| < 2**(-3) or otherwise.
-//
-//
-// In this branch, |x| >= pi/4.
-//
-
-{ .mfi
- ldfs FR_Neg_Two_to_M67 = [GR_Table_Base1], 8
-//
-// Load -2**(-67)
-//
- fnma.s1 FR_s = FR_N_float, FR_P_1, FR_Input_X
-//
-// w = N * P_2
-// s = -N * P_1 + Arg
-//
- add GR_N_Inc = GR_N_Inc, GR_Sin_or_Cos
-}
-
-{ .mfi
- nop.m 999
- fma.s1 FR_w = FR_N_float, FR_P_2, f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// Adjust N_fix by N_inc to determine whether sine or
-// cosine is being calculated
-//
- fcmp.lt.unc.s1 p7, p6 = FR_s, FR_Two_to_M33
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p7) fcmp.gt.s1 p7, p6 = FR_s, FR_Neg_Two_to_M33
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-// Remember x >= pi/4.
-// Is s <= -2**(-33) or s >= 2**(-33) (p6)
-// or -2**(-33) < s < 2**(-33) (p7)
-(p6) fms.s1 FR_r = FR_s, f1, FR_w
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-(p7) fma.s1 FR_w = FR_N_float, FR_P_3, f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p7) fma.s1 FR_U_1 = FR_N_float, FR_P_2, FR_w
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-(p6) fms.s1 FR_c = FR_s, f1, FR_r
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// For big s: r = s - w: No futher reduction is necessary
-// For small s: w = N * P_3 (change sign) More reduction
-//
-(p6) fcmp.lt.unc.s1 p8, p9 = FR_r, FR_Two_to_M3
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p8) fcmp.gt.s1 p8, p9 = FR_r, FR_Neg_Two_to_M3
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p7) fms.s1 FR_r = FR_s, f1, FR_U_1
- nop.i 999
-}
-
-{ .mfb
- nop.m 999
-//
-// For big s: Is |r| < 2**(-3)?
-// For big s: c = S - r
-// For small s: U_1 = N * P_2 + w
-//
-// If p8 is set, prepare to branch to Small_R.
-// If p9 is set, prepare to branch to Normal_R.
-// For big s, r is complete here.
-//
-(p6) fms.s1 FR_c = FR_c, f1, FR_w
-//
-// For big s: c = c + w (w has not been negated.)
-// For small s: r = S - U_1
-//
-(p8) br.cond.spnt L(SINCOS_SMALL_R) ;;
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p9) br.cond.sptk L(SINCOS_NORMAL_R) ;;
-}
-
-{ .mfi
-(p7) add GR_Table_Base1 = 224, GR_Table_Base1
-//
-// Branch to SINCOS_SMALL_R or SINCOS_NORMAL_R
-//
-(p7) fms.s1 FR_U_2 = FR_N_float, FR_P_2, FR_U_1
-//
-// c = S - U_1
-// r = S_1 * r
-//
-//
-(p7) extr.u GR_i_1 = GR_N_Inc, 0, 1
-}
-
-{ .mmi
- nop.m 999 ;;
-//
-// Get [i_0,i_1] - two lsb of N_fix_gr.
-// Do dummy fmpy so inexact is always set.
-//
-(p7) cmp.eq.unc p9, p10 = 0x0, GR_i_1
-(p7) extr.u GR_i_0 = GR_N_Inc, 1, 1 ;;
-}
-//
-// For small s: U_2 = N * P_2 - U_1
-// S_1 stored constant - grab the one stored with the
-// coefficients.
-//
-
-{ .mfi
-(p7) ldfe FR_S_1 = [GR_Table_Base1], 16
-//
-// Check if i_1 and i_0 != 0
-//
-(p10) fma.s1 FR_poly = f0, f1, FR_Neg_Two_to_M67
-(p7) cmp.eq.unc p11, p12 = 0x0, GR_i_0 ;;
-}
-
-{ .mfi
- nop.m 999
-(p7) fms.s1 FR_s = FR_s, f1, FR_r
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-//
-// S = S - r
-// U_2 = U_2 + w
-// load S_1
-//
-(p7) fma.s1 FR_rsq = FR_r, FR_r, f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p7) fma.s1 FR_U_2 = FR_U_2, f1, FR_w
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-(p7) fmerge.se FR_Input_X = FR_r, FR_r
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p10) fma.s1 FR_Input_X = f0, f1, f1
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// FR_rsq = r * r
-// Save r as the result.
-//
-(p7) fms.s1 FR_c = FR_s, f1, FR_U_1
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// if ( i_1 ==0) poly = c + S_1*r*r*r
-// else Result = 1
-//
-(p12) fnma.s1 FR_Input_X = FR_Input_X, f1, f0
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-(p7) fma.s1 FR_r = FR_S_1, FR_r, f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p7) fma.d.s0 FR_S_1 = FR_S_1, FR_S_1, f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// If i_1 != 0, poly = 2**(-67)
-//
-(p7) fms.s1 FR_c = FR_c, f1, FR_U_2
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// c = c - U_2
-//
-(p9) fma.s1 FR_poly = FR_r, FR_rsq, FR_c
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// i_0 != 0, so Result = -Result
-//
-(p11) fma.d.s0 FR_Input_X = FR_Input_X, f1, FR_poly
- nop.i 999 ;;
-}
-
-{ .mfb
- nop.m 999
-(p12) fms.d.s0 FR_Input_X = FR_Input_X, f1, FR_poly
-//
-// if (i_0 == 0), Result = Result + poly
-// else Result = Result - poly
-//
- br.ret.sptk b0 ;;
-}
-L(SINCOS_LARGER_ARG):
-
-{ .mfi
- nop.m 999
- fma.s1 FR_N_0 = FR_Input_X, FR_Inv_P_0, f0
- nop.i 999
-}
-;;
-
-// This path for argument > 2*24
-// Adjust table_ptr1 to beginning of table.
-//
-
-{ .mmi
- nop.m 999
- addl GR_Table_Base = @ltoff(FSINCOS_CONSTANTS#), gp
- nop.i 999
-}
-;;
-
-{ .mmi
- ld8 GR_Table_Base = [GR_Table_Base]
- nop.m 999
- nop.i 999
-}
-;;
-
-
-//
-// Point to 2*-14
-// N_0 = Arg * Inv_P_0
-//
-
-{ .mmi
- add GR_Table_Base = 688, GR_Table_Base ;;
- ldfs FR_Two_to_M14 = [GR_Table_Base], 4
- nop.i 999 ;;
-}
-
-{ .mfi
- ldfs FR_Neg_Two_to_M14 = [GR_Table_Base], 0
- nop.f 999
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// Load values 2**(-14) and -2**(-14)
-//
- fcvt.fx.s1 FR_N_0_fix = FR_N_0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// N_0_fix = integer part of N_0
-//
- fcvt.xf FR_N_0 = FR_N_0_fix
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// Make N_0 the integer part
-//
- fnma.s1 FR_ArgPrime = FR_N_0, FR_P_0, FR_Input_X
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
- fma.s1 FR_w = FR_N_0, FR_d_1, f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// Arg' = -N_0 * P_0 + Arg
-// w = N_0 * d_1
-//
- fma.s1 FR_N_float = FR_ArgPrime, FR_Inv_pi_by_2, f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// N = A' * 2/pi
-//
- fcvt.fx.s1 FR_N_fix = FR_N_float
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// N_fix is the integer part
-//
- fcvt.xf FR_N_float = FR_N_fix
- nop.i 999 ;;
-}
-
-{ .mfi
- getf.sig GR_N_Inc = FR_N_fix
- nop.f 999
- nop.i 999 ;;
-}
-
-{ .mii
- nop.m 999
- nop.i 999 ;;
- add GR_N_Inc = GR_N_Inc, GR_Sin_or_Cos ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// N is the integer part of the reduced-reduced argument.
-// Put the integer in a GP register
-//
- fnma.s1 FR_s = FR_N_float, FR_P_1, FR_ArgPrime
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
- fnma.s1 FR_w = FR_N_float, FR_P_2, FR_w
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// s = -N*P_1 + Arg'
-// w = -N*P_2 + w
-// N_fix_gr = N_fix_gr + N_inc
-//
- fcmp.lt.unc.s1 p9, p8 = FR_s, FR_Two_to_M14
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p9) fcmp.gt.s1 p9, p8 = FR_s, FR_Neg_Two_to_M14
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// For |s| > 2**(-14) r = S + w (r complete)
-// Else U_hi = N_0 * d_1
-//
-(p9) fma.s1 FR_V_hi = FR_N_float, FR_P_2, f0
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-(p9) fma.s1 FR_U_hi = FR_N_0, FR_d_1, f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// Either S <= -2**(-14) or S >= 2**(-14)
-// or -2**(-14) < s < 2**(-14)
-//
-(p8) fma.s1 FR_r = FR_s, f1, FR_w
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-(p9) fma.s1 FR_w = FR_N_float, FR_P_3, f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// We need abs of both U_hi and V_hi - don't
-// worry about switched sign of V_hi.
-//
-(p9) fms.s1 FR_A = FR_U_hi, f1, FR_V_hi
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-//
-// Big s: finish up c = (S - r) + w (c complete)
-// Case 4: A = U_hi + V_hi
-// Note: Worry about switched sign of V_hi, so subtract instead of add.
-//
-(p9) fnma.s1 FR_V_lo = FR_N_float, FR_P_2, FR_V_hi
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p9) fms.s1 FR_U_lo = FR_N_0, FR_d_1, FR_U_hi
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p9) fmerge.s FR_V_hiabs = f0, FR_V_hi
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-// For big s: c = S - r
-// For small s do more work: U_lo = N_0 * d_1 - U_hi
-//
-(p9) fmerge.s FR_U_hiabs = f0, FR_U_hi
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// For big s: Is |r| < 2**(-3)
-// For big s: if p12 set, prepare to branch to Small_R.
-// For big s: If p13 set, prepare to branch to Normal_R.
-//
-(p8) fms.s1 FR_c = FR_s, f1, FR_r
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-//
-// For small S: V_hi = N * P_2
-// w = N * P_3
-// Note the product does not include the (-) as in the writeup
-// so (-) missing for V_hi and w.
-//
-(p8) fcmp.lt.unc.s1 p12, p13 = FR_r, FR_Two_to_M3
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p12) fcmp.gt.s1 p12, p13 = FR_r, FR_Neg_Two_to_M3
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p8) fma.s1 FR_c = FR_c, f1, FR_w
- nop.i 999
-}
-
-{ .mfb
- nop.m 999
-(p9) fms.s1 FR_w = FR_N_0, FR_d_2, FR_w
-(p12) br.cond.spnt L(SINCOS_SMALL_R) ;;
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p13) br.cond.sptk L(SINCOS_NORMAL_R) ;;
-}
+ ldfpd sincos_P2,sincos_Q2 = [sincos_AD_1],16
+ cmp.ge p10,p0 = sincos_r_exp,sincos_exp_limit
+(p10) br.cond.spnt _SINCOS_LARGE_ARGS // Go to "large args" routine
+};;
+// sincos_W = x * sincos_Inv_Pi_by_16
+// Multiply x by scaled 16/pi and add large const to shift integer part of W to
+// rightmost bits of significand
{ .mfi
- nop.m 999
-//
-// Big s: Vector off when |r| < 2**(-3). Recall that p8 will be true.
-// The remaining stuff is for Case 4.
-// Small s: V_lo = N * P_2 + U_hi (U_hi is in place of V_hi in writeup)
-// Note: the (-) is still missing for V_lo.
-// Small s: w = w + N_0 * d_2
-// Note: the (-) is now incorporated in w.
-//
-(p9) fcmp.ge.unc.s1 p10, p11 = FR_U_hiabs, FR_V_hiabs
- extr.u GR_i_1 = GR_N_Inc, 0, 1 ;;
-}
+ ldfpd sincos_P1,sincos_Q1 = [sincos_AD_1],16
+ fma.s1 sincos_W_2TO61_RSH = sincos_NORM_f8,sincos_SIG_INV_PI_BY_16_2TO61,sincos_RSHF_2TO61
+ nop.i 999
+};;
+// sincos_NFLOAT = Round_Int_Nearest(sincos_W)
+// This is done by scaling back by 2^-61 and subtracting the shift constant
{ .mfi
- nop.m 999
-//
-// C_hi = S + A
-//
-(p9) fma.s1 FR_t = FR_U_lo, f1, FR_V_lo
- extr.u GR_i_0 = GR_N_Inc, 1, 1 ;;
-}
+ nop.m 999
+ fms.s1 sincos_NFLOAT = sincos_W_2TO61_RSH,sincos_2TOM61,sincos_RSHF
+ nop.i 999
+};;
-{ .mfi
- nop.m 999
-//
-// t = U_lo + V_lo
-//
-//
-(p10) fms.s1 FR_a = FR_U_hi, f1, FR_A
- nop.i 999 ;;
-}
+// get N = (int)sincos_int_Nfloat
{ .mfi
- nop.m 999
-(p11) fma.s1 FR_a = FR_V_hi, f1, FR_A
- nop.i 999
-}
-;;
-
-{ .mmi
- nop.m 999
- addl GR_Table_Base = @ltoff(FSINCOS_CONSTANTS#), gp
- nop.i 999
-}
-;;
-
-{ .mmi
- ld8 GR_Table_Base = [GR_Table_Base]
- nop.m 999
- nop.i 999
-}
-;;
-
+ getf.sig sincos_GR_n = sincos_W_2TO61_RSH
+ nop.f 999
+ nop.i 999
+};;
+// Add 2^(k-1) (which is in sincos_r_sincos) to N
+// sincos_r = -sincos_Nfloat * sincos_Pi_by_16_1 + x
{ .mfi
- add GR_Table_Base = 528, GR_Table_Base
-//
-// Is U_hiabs >= V_hiabs?
-//
-(p9) fma.s1 FR_C_hi = FR_s, f1, FR_A
- nop.i 999 ;;
-}
+ add sincos_GR_n = sincos_GR_n, sincos_r_sincos
+ fnma.s1 sincos_r = sincos_NFLOAT, sincos_Pi_by_16_1, sincos_NORM_f8
+ nop.i 999
+};;
+// Get M (least k+1 bits of N)
{ .mmi
- ldfe FR_C_1 = [GR_Table_Base], 16 ;;
- ldfe FR_C_2 = [GR_Table_Base], 64
- nop.i 999 ;;
-}
-
-{ .mmf
- nop.m 999
-//
-// c = c + C_lo finished.
-// Load C_2
-//
- ldfe FR_S_1 = [GR_Table_Base], 16
-//
-// C_lo = S - C_hi
-//
- fma.s1 FR_t = FR_t, f1, FR_w ;;
-}
-//
-// r and c have been computed.
-// Make sure ftz mode is set - should be automatic when using wre
-// |r| < 2**(-3)
-// Get [i_0,i_1] - two lsb of N_fix.
-// Load S_1
-//
+ and sincos_GR_m = 0x1f,sincos_GR_n;;
+ nop.m 999
+ shl sincos_GR_32m = sincos_GR_m,5
+};;
+// Add 32*M to address of sin_cos_beta table
{ .mfi
- ldfe FR_S_2 = [GR_Table_Base], 64
-//
-// t = t + w
-//
-(p10) fms.s1 FR_a = FR_a, f1, FR_V_hi
- cmp.eq.unc p9, p10 = 0x0, GR_i_0
-}
+ add sincos_AD_2 = sincos_GR_32m, sincos_AD_1
+(p8) fclass.m.unc p10,p0 = f8,0x0b // For sin denorm. - set uflow
+ nop.i 999
+};;
+// Load Sin and Cos table value using obtained index m (sincosf_AD_2)
{ .mfi
- nop.m 999
-//
-// For larger u than v: a = U_hi - A
-// Else a = V_hi - A (do an add to account for missing (-) on V_hi
-//
- fms.s1 FR_C_lo = FR_s, f1, FR_C_hi
- nop.i 999 ;;
-}
+ ldfe sincos_Sm = [sincos_AD_2],16
+(p9) fclass.m.unc p11,p0 = f8,0x0b // For cos denorm - set denorm
+ nop.i 999
+};;
+// sincos_r = sincos_r -sincos_Nfloat * sincos_Pi_by_16_2
{ .mfi
- nop.m 999
-(p11) fms.s1 FR_a = FR_U_hi, f1, FR_a
- cmp.eq.unc p11, p12 = 0x0, GR_i_1
-}
+ ldfe sincos_Cm = [sincos_AD_2]
+ fnma.s1 sincos_r = sincos_NFLOAT, sincos_Pi_by_16_2, sincos_r
+ nop.i 999
+};;
+// get rsq = r*r
{ .mfi
- nop.m 999
-//
-// If u > v: a = (U_hi - A) + V_hi
-// Else a = (V_hi - A) + U_hi
-// In each case account for negative missing from V_hi.
-//
- fma.s1 FR_C_lo = FR_C_lo, f1, FR_A
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 sincos_rsq = sincos_r, sincos_r, f0 // r^2 = r*r
+ nop.i 999
}
-
{ .mfi
- nop.m 999
-//
-// C_lo = (S - C_hi) + A
-//
- fma.s1 FR_t = FR_t, f1, FR_a
- nop.i 999 ;;
-}
+ nop.m 999
+ fmpy.s0 fp_tmp = fp_tmp,fp_tmp // forces inexact flag
+ nop.i 999
+};;
+// sincos_r_exact = sincos_r -sincos_Nfloat * sincos_Pi_by_16_3
{ .mfi
- nop.m 999
-//
-// t = t + a
-//
- fma.s1 FR_C_lo = FR_C_lo, f1, FR_t
- nop.i 999 ;;
-}
+ nop.m 999
+ fnma.s1 sincos_r_exact = sincos_NFLOAT, sincos_Pi_by_16_3, sincos_r
+ nop.i 999
+};;
+// Polynomials calculation
+// P_1 = P4*r^2 + P3
+// Q_2 = Q4*r^2 + Q3
{ .mfi
- nop.m 999
-//
-// C_lo = C_lo + t
-// Adjust Table_Base to beginning of table
-//
- fma.s1 FR_r = FR_C_hi, f1, FR_C_lo
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 sincos_P_temp1 = sincos_rsq, sincos_P4, sincos_P3
+ nop.i 999
}
-
{ .mfi
- nop.m 999
-//
-// Load S_2
-//
- fma.s1 FR_rsq = FR_r, FR_r, f0
- nop.i 999
-}
+ nop.m 999
+ fma.s1 sincos_Q_temp1 = sincos_rsq, sincos_Q4, sincos_Q3
+ nop.i 999
+};;
+// get rcube = r^3 and S[m]*r^2
{ .mfi
- nop.m 999
-//
-// Table_Base points to C_1
-// r = C_hi + C_lo
-//
- fms.s1 FR_c = FR_C_hi, f1, FR_r
- nop.i 999 ;;
+ nop.m 999
+ fmpy.s1 sincos_srsq = sincos_Sm,sincos_rsq
+ nop.i 999
}
-
{ .mfi
- nop.m 999
-//
-// if i_1 ==0: poly = S_2 * FR_rsq + S_1
-// else poly = C_2 * FR_rsq + C_1
-//
-(p11) fma.s1 FR_Input_X = f0, f1, FR_r
- nop.i 999 ;;
-}
+ nop.m 999
+ fmpy.s1 sincos_rcub = sincos_r_exact, sincos_rsq
+ nop.i 999
+};;
+// Polynomials calculation
+// Q_2 = Q_1*r^2 + Q2
+// P_1 = P_1*r^2 + P2
{ .mfi
- nop.m 999
-(p12) fma.s1 FR_Input_X = f0, f1, f1
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 sincos_Q_temp2 = sincos_rsq, sincos_Q_temp1, sincos_Q2
+ nop.i 999
}
-
{ .mfi
- nop.m 999
-//
-// Compute r_cube = FR_rsq * r
-//
-(p11) fma.s1 FR_poly = FR_rsq, FR_S_2, FR_S_1
- nop.i 999 ;;
-}
+ nop.m 999
+ fma.s1 sincos_P_temp2 = sincos_rsq, sincos_P_temp1, sincos_P2
+ nop.i 999
+};;
+// Polynomials calculation
+// Q = Q_2*r^2 + Q1
+// P = P_2*r^2 + P1
{ .mfi
- nop.m 999
-(p12) fma.s1 FR_poly = FR_rsq, FR_C_2, FR_C_1
- nop.i 999
+ nop.m 999
+ fma.s1 sincos_Q = sincos_rsq, sincos_Q_temp2, sincos_Q1
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// Compute FR_rsq = r * r
-// Is i_1 == 0 ?
-//
- fma.s1 FR_r_cubed = FR_rsq, FR_r, f0
- nop.i 999 ;;
-}
+ nop.m 999
+ fma.s1 sincos_P = sincos_rsq, sincos_P_temp2, sincos_P1
+ nop.i 999
+};;
+// Get final P and Q
+// Q = Q*S[m]*r^2 + S[m]
+// P = P*r^3 + r
{ .mfi
- nop.m 999
-//
-// c = C_hi - r
-// Load C_1
-//
- fma.s1 FR_c = FR_c, f1, FR_C_lo
- nop.i 999
+ nop.m 999
+ fma.s1 sincos_Q = sincos_srsq,sincos_Q, sincos_Sm
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// if i_1 ==0: poly = r_cube * poly + c
-// else poly = FR_rsq * poly
-//
-(p10) fms.s1 FR_Input_X = f0, f1, FR_Input_X
- nop.i 999 ;;
-}
+ nop.m 999
+ fma.s1 sincos_P = sincos_rcub,sincos_P, sincos_r_exact
+ nop.i 999
+};;
+// If sin(denormal), force underflow to be set
+.pred.rel "mutex",p10,p11
{ .mfi
- nop.m 999
-//
-// if i_1 ==0: Result = r
-// else Result = 1.0
-//
-(p11) fma.s1 FR_poly = FR_r_cubed, FR_poly, FR_c
- nop.i 999 ;;
+ nop.m 999
+(p10) fmpy.d.s0 fp_tmp = f8,f8 // forces underflow flag
+ nop.i 999 // for denormal sine args
}
-
{ .mfi
- nop.m 999
-(p12) fma.s1 FR_poly = FR_rsq, FR_poly, f0
- nop.i 999 ;;
-}
+ nop.m 999
+(p11) fma.d.s0 fp_tmp = f8,f1, f8 // forces denormal flag
+ nop.i 999 // for denormal cosine args
+};;
-{ .mfi
- nop.m 999
-//
-// if i_0 !=0: Result = -Result
-//
-(p9) fma.d.s0 FR_Input_X = FR_Input_X, f1, FR_poly
- nop.i 999 ;;
-}
+// Final calculation
+// result = C[m]*P + Q
{ .mfb
- nop.m 999
-(p10) fms.d.s0 FR_Input_X = FR_Input_X, f1, FR_poly
-//
-// if i_0 == 0: Result = Result + poly
-// else Result = Result - poly
-//
- br.ret.sptk b0 ;;
-}
-L(SINCOS_SMALL_R):
-
-{ .mii
- nop.m 999
- extr.u GR_i_1 = GR_N_Inc, 0, 1 ;;
-//
-//
-// Compare both i_1 and i_0 with 0.
-// if i_1 == 0, set p9.
-// if i_0 == 0, set p11.
-//
- cmp.eq.unc p9, p10 = 0x0, GR_i_1 ;;
-}
-
-{ .mfi
- nop.m 999
- fma.s1 FR_rsq = FR_r, FR_r, f0
- extr.u GR_i_0 = GR_N_Inc, 1, 1 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// Z = Z * FR_rsq
-//
-(p10) fnma.s1 FR_c = FR_c, FR_r, f0
- cmp.eq.unc p11, p12 = 0x0, GR_i_0
-}
-;;
-
-// ******************************************************************
-// ******************************************************************
-// ******************************************************************
-// r and c have been computed.
-// We know whether this is the sine or cosine routine.
-// Make sure ftz mode is set - should be automatic when using wre
-// |r| < 2**(-3)
-//
-// Set table_ptr1 to beginning of constant table.
-// Get [i_0,i_1] - two lsb of N_fix_gr.
-//
-
-{ .mmi
- nop.m 999
- addl GR_Table_Base = @ltoff(FSINCOS_CONSTANTS#), gp
- nop.i 999
-}
-;;
-
-{ .mmi
- ld8 GR_Table_Base = [GR_Table_Base]
- nop.m 999
- nop.i 999
-}
-;;
-
-
-//
-// Set table_ptr1 to point to S_5.
-// Set table_ptr1 to point to C_5.
-// Compute FR_rsq = r * r
-//
-
-{ .mfi
-(p9) add GR_Table_Base = 672, GR_Table_Base
-(p10) fmerge.s FR_r = f1, f1
-(p10) add GR_Table_Base = 592, GR_Table_Base ;;
-}
-//
-// Set table_ptr1 to point to S_5.
-// Set table_ptr1 to point to C_5.
-//
-
-{ .mmi
-(p9) ldfe FR_S_5 = [GR_Table_Base], -16 ;;
-//
-// if (i_1 == 0) load S_5
-// if (i_1 != 0) load C_5
-//
-(p9) ldfe FR_S_4 = [GR_Table_Base], -16
- nop.i 999 ;;
-}
-
-{ .mmf
-(p10) ldfe FR_C_5 = [GR_Table_Base], -16
-//
-// Z = FR_rsq * FR_rsq
-//
-(p9) ldfe FR_S_3 = [GR_Table_Base], -16
-//
-// Compute FR_rsq = r * r
-// if (i_1 == 0) load S_4
-// if (i_1 != 0) load C_4
-//
- fma.s1 FR_Z = FR_rsq, FR_rsq, f0 ;;
-}
-//
-// if (i_1 == 0) load S_3
-// if (i_1 != 0) load C_3
-//
-
-{ .mmi
-(p9) ldfe FR_S_2 = [GR_Table_Base], -16 ;;
-//
-// if (i_1 == 0) load S_2
-// if (i_1 != 0) load C_2
-//
-(p9) ldfe FR_S_1 = [GR_Table_Base], -16
- nop.i 999
-}
-
-{ .mmi
-(p10) ldfe FR_C_4 = [GR_Table_Base], -16 ;;
-(p10) ldfe FR_C_3 = [GR_Table_Base], -16
- nop.i 999 ;;
-}
-
-{ .mmi
-(p10) ldfe FR_C_2 = [GR_Table_Base], -16 ;;
-(p10) ldfe FR_C_1 = [GR_Table_Base], -16
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-//
-// if (i_1 != 0):
-// poly_lo = FR_rsq * C_5 + C_4
-// poly_hi = FR_rsq * C_2 + C_1
-//
-(p9) fma.s1 FR_Z = FR_Z, FR_r, f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// if (i_1 == 0) load S_1
-// if (i_1 != 0) load C_1
-//
-(p9) fma.s1 FR_poly_lo = FR_rsq, FR_S_5, FR_S_4
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-//
-// c = -c * r
-// dummy fmpy's to flag inexact.
-//
-(p9) fma.d.s0 FR_S_4 = FR_S_4, FR_S_4, f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// poly_lo = FR_rsq * poly_lo + C_3
-// poly_hi = FR_rsq * poly_hi
-//
- fma.s1 FR_Z = FR_Z, FR_rsq, f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p9) fma.s1 FR_poly_hi = FR_rsq, FR_S_2, FR_S_1
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-//
-// if (i_1 == 0):
-// poly_lo = FR_rsq * S_5 + S_4
-// poly_hi = FR_rsq * S_2 + S_1
-//
-(p10) fma.s1 FR_poly_lo = FR_rsq, FR_C_5, FR_C_4
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// if (i_1 == 0):
-// Z = Z * r for only one of the small r cases - not there
-// in original implementation notes.
-//
-(p9) fma.s1 FR_poly_lo = FR_rsq, FR_poly_lo, FR_S_3
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p10) fma.s1 FR_poly_hi = FR_rsq, FR_C_2, FR_C_1
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-(p10) fma.d.s0 FR_C_1 = FR_C_1, FR_C_1, f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p9) fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, f0
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-//
-// poly_lo = FR_rsq * poly_lo + S_3
-// poly_hi = FR_rsq * poly_hi
-//
-(p10) fma.s1 FR_poly_lo = FR_rsq, FR_poly_lo, FR_C_3
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p10) fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// if (i_1 == 0): dummy fmpy's to flag inexact
-// r = 1
-//
-(p9) fma.s1 FR_poly_hi = FR_r, FR_poly_hi, f0
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-//
-// poly_hi = r * poly_hi
-//
- fma.s1 FR_poly = FR_Z, FR_poly_lo, FR_c
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p12) fms.s1 FR_r = f0, f1, FR_r
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// poly_hi = Z * poly_lo + c
-// if i_0 == 1: r = -r
-//
- fma.s1 FR_poly = FR_poly, f1, FR_poly_hi
- nop.i 999 ;;
-}
+ nop.m 999
+ fma.d.s0 f8 = sincos_Cm, sincos_P, sincos_Q
+ br.ret.sptk b0 // Exit for common path
+};;
+////////// x = 0/Inf/NaN path //////////////////
+_SINCOS_SPECIAL_ARGS:
+.pred.rel "mutex",p8,p9
+// sin(+/-0) = +/-0
+// sin(Inf) = NaN
+// sin(NaN) = NaN
{ .mfi
- nop.m 999
-(p12) fms.d.s0 FR_Input_X = FR_r, f1, FR_poly
- nop.i 999
+ nop.m 999
+(p8) fma.d.s0 f8 = f8, f0, f0 // sin(+/-0,NaN,Inf)
+ nop.i 999
}
-
+// cos(+/-0) = 1.0
+// cos(Inf) = NaN
+// cos(NaN) = NaN
{ .mfb
- nop.m 999
-//
-// poly = poly + poly_hi
-//
-(p11) fma.d.s0 FR_Input_X = FR_r, f1, FR_poly
-//
-// if (i_0 == 0) Result = r + poly
-// if (i_0 != 0) Result = r - poly
-//
- br.ret.sptk b0 ;;
-}
-L(SINCOS_NORMAL_R):
-
-{ .mii
- nop.m 999
- extr.u GR_i_1 = GR_N_Inc, 0, 1 ;;
-//
-// Set table_ptr1 and table_ptr2 to base address of
-// constant table.
- cmp.eq.unc p9, p10 = 0x0, GR_i_1 ;;
-}
-
-{ .mfi
- nop.m 999
- fma.s1 FR_rsq = FR_r, FR_r, f0
- extr.u GR_i_0 = GR_N_Inc, 1, 1 ;;
-}
+ nop.m 999
+(p9) fma.d.s0 f8 = f8, f0, f1 // cos(+/-0,NaN,Inf)
+ br.ret.sptk b0 // Exit for x = 0/Inf/NaN path
+};;
+GLOBAL_IEEE754_END(cos)
+//////////// x >= 2^27 - large arguments routine call ////////////
+LOCAL_LIBM_ENTRY(__libm_callout_sincos)
+_SINCOS_LARGE_ARGS:
+.prologue
{ .mfi
- nop.m 999
- frcpa.s1 FR_r_hi, p6 = f1, FR_r
- cmp.eq.unc p11, p12 = 0x0, GR_i_0
-}
-;;
-
-// ******************************************************************
-// ******************************************************************
-// ******************************************************************
-//
-// r and c have been computed.
-// We known whether this is the sine or cosine routine.
-// Make sure ftz mode is set - should be automatic when using wre
-// Get [i_0,i_1] - two lsb of N_fix_gr alone.
-//
-
-{ .mmi
- nop.m 999
- addl GR_Table_Base = @ltoff(FSINCOS_CONSTANTS#), gp
- nop.i 999
+ mov sincos_GR_all_ones = -1 // 0xffffffff
+ nop.f 999
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS = ar.pfs
}
;;
-{ .mmi
- ld8 GR_Table_Base = [GR_Table_Base]
- nop.m 999
- nop.i 999
-}
-;;
-
-
-{ .mfi
-(p10) add GR_Table_Base = 384, GR_Table_Base
-(p12) fms.s1 FR_Input_X = f0, f1, f1
-(p9) add GR_Table_Base = 224, GR_Table_Base ;;
-}
-
-{ .mmf
- nop.m 999
-(p10) ldfe FR_QQ_8 = [GR_Table_Base], 16
-//
-// if (i_1==0) poly = poly * FR_rsq + PP_1_lo
-// else poly = FR_rsq * poly
-//
-(p11) fma.s1 FR_Input_X = f0, f1, f1 ;;
-}
-
-{ .mmf
-(p10) ldfe FR_QQ_7 = [GR_Table_Base], 16
-//
-// Adjust table pointers based on i_0
-// Compute rsq = r * r
-//
-(p9) ldfe FR_PP_8 = [GR_Table_Base], 16
- fma.s1 FR_r_cubed = FR_r, FR_rsq, f0 ;;
-}
-
-{ .mmf
-(p9) ldfe FR_PP_7 = [GR_Table_Base], 16
-(p10) ldfe FR_QQ_6 = [GR_Table_Base], 16
-//
-// Load PP_8 and QQ_8; PP_7 and QQ_7
-//
- frcpa.s1 FR_r_hi, p6 = f1, FR_r_hi ;;
-}
-//
-// if (i_1==0) poly = PP_7 + FR_rsq * PP_8.
-// else poly = QQ_7 + FR_rsq * QQ_8.
-//
-
-{ .mmb
-(p9) ldfe FR_PP_6 = [GR_Table_Base], 16
-(p10) ldfe FR_QQ_5 = [GR_Table_Base], 16
- nop.b 999 ;;
-}
-
-{ .mmb
-(p9) ldfe FR_PP_5 = [GR_Table_Base], 16
-(p10) ldfe FR_S_1 = [GR_Table_Base], 16
- nop.b 999 ;;
-}
-
-{ .mmb
-(p10) ldfe FR_QQ_1 = [GR_Table_Base], 16
-(p9) ldfe FR_C_1 = [GR_Table_Base], 16
- nop.b 999 ;;
-}
-
-{ .mmi
-(p10) ldfe FR_QQ_4 = [GR_Table_Base], 16 ;;
-(p9) ldfe FR_PP_1 = [GR_Table_Base], 16
- nop.i 999 ;;
-}
-
-{ .mmf
-(p10) ldfe FR_QQ_3 = [GR_Table_Base], 16
-//
-// if (i_1=0) corr = corr + c*c
-// else corr = corr * c
-//
-(p9) ldfe FR_PP_4 = [GR_Table_Base], 16
-(p10) fma.s1 FR_poly = FR_rsq, FR_QQ_8, FR_QQ_7 ;;
-}
-//
-// if (i_1=0) poly = rsq * poly + PP_5
-// else poly = rsq * poly + QQ_5
-// Load PP_4 or QQ_4
-//
-
-{ .mmf
-(p9) ldfe FR_PP_3 = [GR_Table_Base], 16
-(p10) ldfe FR_QQ_2 = [GR_Table_Base], 16
-//
-// r_hi = frcpa(frcpa(r)).
-// r_cube = r * FR_rsq.
-//
-(p9) fma.s1 FR_poly = FR_rsq, FR_PP_8, FR_PP_7 ;;
-}
-//
-// Do dummy multiplies so inexact is always set.
-//
-
-{ .mfi
-(p9) ldfe FR_PP_2 = [GR_Table_Base], 16
-//
-// r_lo = r - r_hi
-//
-(p9) fma.s1 FR_U_lo = FR_r_hi, FR_r_hi, f0
- nop.i 999 ;;
-}
-
-{ .mmf
- nop.m 999
-(p9) ldfe FR_PP_1_lo = [GR_Table_Base], 16
-(p10) fma.s1 FR_corr = FR_S_1, FR_r_cubed, FR_r
-}
-
-{ .mfi
- nop.m 999
-(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_6
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// if (i_1=0) U_lo = r_hi * r_hi
-// else U_lo = r_hi + r
-//
-(p9) fma.s1 FR_corr = FR_C_1, FR_rsq, f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// if (i_1=0) corr = C_1 * rsq
-// else corr = S_1 * r_cubed + r
-//
-(p9) fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_6
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-(p10) fma.s1 FR_U_lo = FR_r_hi, f1, FR_r
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// if (i_1=0) U_hi = r_hi + U_hi
-// else U_hi = QQ_1 * U_hi + 1
-//
-(p9) fma.s1 FR_U_lo = FR_r, FR_r_hi, FR_U_lo
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-//
-// U_hi = r_hi * r_hi
-//
- fms.s1 FR_r_lo = FR_r, f1, FR_r_hi
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// Load PP_1, PP_6, PP_5, and C_1
-// Load QQ_1, QQ_6, QQ_5, and S_1
-//
- fma.s1 FR_U_hi = FR_r_hi, FR_r_hi, f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_5
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-(p10) fnma.s1 FR_corr = FR_corr, FR_c, f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// if (i_1=0) U_lo = r * r_hi + U_lo
-// else U_lo = r_lo * U_lo
-//
-(p9) fma.s1 FR_corr = FR_corr, FR_c, FR_c
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p9) fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_5
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-//
-// if (i_1 =0) U_hi = r + U_hi
-// if (i_1 =0) U_lo = r_lo * U_lo
-//
-//
-(p9) fma.d.s0 FR_PP_5 = FR_PP_5, FR_PP_4, f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p9) fma.s1 FR_U_lo = FR_r, FR_r, FR_U_lo
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-(p10) fma.s1 FR_U_lo = FR_r_lo, FR_U_lo, f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// if (i_1=0) poly = poly * rsq + PP_6
-// else poly = poly * rsq + QQ_6
-//
-(p9) fma.s1 FR_U_hi = FR_r_hi, FR_U_hi, f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_4
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-(p10) fma.s1 FR_U_hi = FR_QQ_1, FR_U_hi, f1
- nop.i 999 ;;
-}
-
{ .mfi
- nop.m 999
-(p10) fma.d.s0 FR_QQ_5 = FR_QQ_5, FR_QQ_5, f0
- nop.i 999 ;;
+ mov GR_SAVE_GP = gp
+ nop.f 999
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0 = b0
}
-{ .mfi
- nop.m 999
-//
-// if (i_1!=0) U_hi = PP_1 * U_hi
-// if (i_1!=0) U_lo = r * r + U_lo
-// Load PP_3 or QQ_3
-//
-(p9) fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_4
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p9) fma.s1 FR_U_lo = FR_r_lo, FR_U_lo, f0
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-(p10) fma.s1 FR_U_lo = FR_QQ_1,FR_U_lo, f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p9) fma.s1 FR_U_hi = FR_PP_1, FR_U_hi, f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_3
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// Load PP_2, QQ_2
-//
-(p9) fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_3
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// if (i_1==0) poly = FR_rsq * poly + PP_3
-// else poly = FR_rsq * poly + QQ_3
-// Load PP_1_lo
-//
-(p9) fma.s1 FR_U_lo = FR_PP_1, FR_U_lo, f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// if (i_1 =0) poly = poly * rsq + pp_r4
-// else poly = poly * rsq + qq_r4
-//
-(p9) fma.s1 FR_U_hi = FR_r, f1, FR_U_hi
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_2
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// if (i_1==0) U_lo = PP_1_hi * U_lo
-// else U_lo = QQ_1 * U_lo
-//
-(p9) fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_2
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// if (i_0==0) Result = 1
-// else Result = -1
-//
- fma.s1 FR_V = FR_U_lo, f1, FR_corr
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p10) fma.s1 FR_poly = FR_rsq, FR_poly, f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// if (i_1==0) poly = FR_rsq * poly + PP_2
-// else poly = FR_rsq * poly + QQ_2
-//
-(p9) fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_1_lo
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p10) fma.s1 FR_poly = FR_rsq, FR_poly, f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// V = U_lo + corr
-//
-(p9) fma.s1 FR_poly = FR_r_cubed, FR_poly, f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// if (i_1==0) poly = r_cube * poly
-// else poly = FR_rsq * poly
-//
- fma.s1 FR_V = FR_poly, f1, FR_V
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p12) fms.d.s0 FR_Input_X = FR_Input_X, FR_U_hi, FR_V
- nop.i 999
-}
-
-{ .mfb
- nop.m 999
-//
-// V = V + poly
-//
-(p11) fma.d.s0 FR_Input_X = FR_Input_X, FR_U_hi, FR_V
-//
-// if (i_0==0) Result = Result * U_hi + V
-// else Result = Result * U_hi - V
-//
- br.ret.sptk b0 ;;
-}
-
-//
-// If cosine, FR_Input_X = 1
-// If sine, FR_Input_X = +/-Zero (Input FR_Input_X)
-// Results are exact, no exceptions
-//
-L(SINCOS_ZERO):
-
-{ .mmb
- cmp.eq.unc p6, p7 = 0x1, GR_Sin_or_Cos
- nop.m 999
- nop.b 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p7) fmerge.s FR_Input_X = FR_Input_X, FR_Input_X
- nop.i 999
-}
-
-{ .mfb
- nop.m 999
-(p6) fmerge.s FR_Input_X = f1, f1
- br.ret.sptk b0 ;;
-}
-
-L(SINCOS_SPECIAL):
-
-//
-// Path for Arg = +/- QNaN, SNaN, Inf
-// Invalid can be raised. SNaNs
-// become QNaNs
-//
-
-{ .mfb
- nop.m 999
- fmpy.d.s0 FR_Input_X = FR_Input_X, f0
- br.ret.sptk b0 ;;
-}
-.endp __libm_cos_double_dbx#
-ASM_SIZE_DIRECTIVE(__libm_cos_double_dbx#)
-
-
-
-//
-// Call int pi_by_2_reduce(double* x, double *y)
-// for |arguments| >= 2**63
-// Address to save r and c as double
-//
-//
-// psp sp+64
-// sp+48 -> f0 c
-// r45 sp+32 -> f0 r
-// r44 -> sp+16 -> InputX
-// sp sp -> scratch provided to callee
-
+.body
+{ .mbb
+ setf.sig sincos_save_tmp = sincos_GR_all_ones// inexact set
+ nop.b 999
+(p8) br.call.sptk.many b0 = __libm_sin_large# // sin(large_X)
+};;
-.proc __libm_callout_2
-__libm_callout_2:
-L(SINCOS_ARG_TOO_LARGE):
+{ .mbb
+ cmp.ne p9,p0 = sincos_r_sincos, r0 // set p9 if cos
+ nop.b 999
+(p9) br.call.sptk.many b0 = __libm_cos_large# // cos(large_X)
+};;
-.prologue
{ .mfi
- add r45=-32,sp // Parameter: r address
- nop.f 0
-.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+ mov gp = GR_SAVE_GP
+ fma.d.s0 f8 = f8, f1, f0 // Round result to double
+ mov b0 = GR_SAVE_B0
}
+// Force inexact set
{ .mfi
-.fframe 64
- add sp=-64,sp // Create new stack
- nop.f 0
- mov GR_SAVE_GP=gp // Save gp
-};;
-{ .mmi
- stfe [r45] = f0,16 // Clear Parameter r on stack
- add r44 = 16,sp // Parameter x address
-.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0 // Save b0
-};;
-.body
-{ .mib
- stfe [r45] = f0,-16 // Clear Parameter c on stack
- nop.i 0
- nop.b 0
-}
-{ .mib
- stfe [r44] = FR_Input_X // Store Parameter x on stack
- nop.i 0
- br.call.sptk b0=__libm_pi_by_2_reduce# ;;
+ nop.m 999
+ fmpy.s0 sincos_save_tmp = sincos_save_tmp, sincos_save_tmp
+ nop.i 999
};;
-
-{ .mii
- ldfe FR_Input_X =[r44],16
-//
-// Get r and c off stack
-//
- adds GR_Table_Base1 = -16, GR_Table_Base1
-//
-// Get r and c off stack
-//
- add GR_N_Inc = GR_Sin_or_Cos,r8 ;;
-}
-{ .mmb
- ldfe FR_r =[r45],16
-//
-// Get X off the stack
-// Readjust Table ptr
-//
- ldfs FR_Two_to_M3 = [GR_Table_Base1],4
- nop.b 999 ;;
-}
-{ .mmb
- ldfs FR_Neg_Two_to_M3 = [GR_Table_Base1],0
- ldfe FR_c =[r45]
- nop.b 999 ;;
-}
-
-{ .mfi
-.restore sp
- add sp = 64,sp // Restore stack pointer
- fcmp.lt.unc.s1 p6, p0 = FR_r, FR_Two_to_M3
- mov b0 = GR_SAVE_B0 // Restore return address
-};;
{ .mib
- mov gp = GR_SAVE_GP // Restore gp
- mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
- nop.b 0
+ nop.m 999
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0 // Exit for large arguments routine call
};;
+LOCAL_LIBM_END(__libm_callout_sincos)
-{ .mfi
- nop.m 999
-(p6) fcmp.gt.unc.s1 p6, p0 = FR_r, FR_Neg_Two_to_M3
- nop.i 999 ;;
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p6) br.cond.spnt L(SINCOS_SMALL_R) ;;
-}
-
-{ .mib
- nop.m 999
- nop.i 999
- br.cond.sptk L(SINCOS_NORMAL_R) ;;
-}
-
-.endp __libm_callout_2
-ASM_SIZE_DIRECTIVE(__libm_callout_2)
-
-.type __libm_pi_by_2_reduce#,@function
-.global __libm_pi_by_2_reduce#
-
+.type __libm_sin_large#,@function
+.global __libm_sin_large#
+.type __libm_cos_large#,@function
+.global __libm_cos_large#
-.type __libm_sin_double_dbx#,@function
-.global __libm_sin_double_dbx#
-.type __libm_cos_double_dbx#,@function
-.global __libm_cos_double_dbx#
diff --git a/sysdeps/ia64/fpu/s_cosf.S b/sysdeps/ia64/fpu/s_cosf.S
index 0e47255b3f..89cf82372d 100644
--- a/sysdeps/ia64/fpu/s_cosf.S
+++ b/sysdeps/ia64/fpu/s_cosf.S
@@ -1,12 +1,10 @@
-
.file "sincosf.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -22,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -37,663 +35,680 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
-
-
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
// History
//==============================================================
-// 2/02/00 Initial revision
-// 4/02/00 Unwind support added.
-// 5/10/00 Improved speed with new algorithm.
-// 8/08/00 Improved speed by avoiding SIR flush.
-// 8/17/00 Changed predicate register macro-usage to direct predicate
-// names due to an assembler bug.
-// 8/30/00 Put sin_of_r before sin_tbl_S_cos_of_r to gain a cycle
-// 1/02/00 Fixed flag settings, improved speed.
+// 02/02/00 Initial version
+// 04/02/00 Unwind support added.
+// 06/16/00 Updated tables to enforce symmetry
+// 08/31/00 Saved 2 cycles in main path, and 9 in other paths.
+// 09/20/00 The updated tables regressed to an old version, so reinstated them
+// 10/18/00 Changed one table entry to ensure symmetry
+// 01/03/01 Improved speed, fixed flag settings for small arguments.
+// 02/18/02 Large arguments processing routine excluded
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 06/03/02 Insure inexact flag set for large arg result
+// 09/05/02 Single precision version is made using double precision one as base
+// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// API
//==============================================================
// float sinf( float x);
// float cosf( float x);
//
+// Overview of operation
+//==============================================================
+//
+// Step 1
+// ======
+// Reduce x to region -1/2*pi/2^k ===== 0 ===== +1/2*pi/2^k where k=4
+// divide x by pi/2^k.
+// Multiply by 2^k/pi.
+// nfloat = Round result to integer (round-to-nearest)
+//
+// r = x - nfloat * pi/2^k
+// Do this as (x - nfloat * HIGH(pi/2^k)) - nfloat * LOW(pi/2^k)
-#include "libm_support.h"
-
-// Assembly macros
+// for increased accuracy.
+// pi/2^k is stored as two numbers that when added make pi/2^k.
+// pi/2^k = HIGH(pi/2^k) + LOW(pi/2^k)
+// HIGH part is rounded to zero, LOW - to nearest
+//
+// x = (nfloat * pi/2^k) + r
+// r is small enough that we can use a polynomial approximation
+// and is referred to as the reduced argument.
+//
+// Step 3
+// ======
+// Take the unreduced part and remove the multiples of 2pi.
+// So nfloat = nfloat (with lower k+1 bits cleared) + lower k+1 bits
+//
+// nfloat (with lower k+1 bits cleared) is a multiple of 2^(k+1)
+// N * 2^(k+1)
+// nfloat * pi/2^k = N * 2^(k+1) * pi/2^k + (lower k+1 bits) * pi/2^k
+// nfloat * pi/2^k = N * 2 * pi + (lower k+1 bits) * pi/2^k
+// nfloat * pi/2^k = N2pi + M * pi/2^k
+//
+//
+// Sin(x) = Sin((nfloat * pi/2^k) + r)
+// = Sin(nfloat * pi/2^k) * Cos(r) + Cos(nfloat * pi/2^k) * Sin(r)
+//
+// Sin(nfloat * pi/2^k) = Sin(N2pi + Mpi/2^k)
+// = Sin(N2pi)Cos(Mpi/2^k) + Cos(N2pi)Sin(Mpi/2^k)
+// = Sin(Mpi/2^k)
+//
+// Cos(nfloat * pi/2^k) = Cos(N2pi + Mpi/2^k)
+// = Cos(N2pi)Cos(Mpi/2^k) + Sin(N2pi)Sin(Mpi/2^k)
+// = Cos(Mpi/2^k)
+//
+// Sin(x) = Sin(Mpi/2^k) Cos(r) + Cos(Mpi/2^k) Sin(r)
+//
+//
+// Step 4
+// ======
+// 0 <= M < 2^(k+1)
+// There are 2^(k+1) Sin entries in a table.
+// There are 2^(k+1) Cos entries in a table.
+//
+// Get Sin(Mpi/2^k) and Cos(Mpi/2^k) by table lookup.
+//
+//
+// Step 5
+// ======
+// Calculate Cos(r) and Sin(r) by polynomial approximation.
+//
+// Cos(r) = 1 + r^2 q1 + r^4 q2 = Series for Cos
+// Sin(r) = r + r^3 p1 + r^5 p2 = Series for Sin
+//
+// and the coefficients q1, q2 and p1, p2 are stored in a table
+//
+//
+// Calculate
+// Sin(x) = Sin(Mpi/2^k) Cos(r) + Cos(Mpi/2^k) Sin(r)
+//
+// as follows
+//
+// S[m] = Sin(Mpi/2^k) and C[m] = Cos(Mpi/2^k)
+// rsq = r*r
+//
+//
+// P = P1 + r^2*P2
+// Q = Q1 + r^2*Q2
+//
+// rcub = r * rsq
+// Sin(r) = r + rcub * P
+// = r + r^3p1 + r^5p2 = Sin(r)
+//
+// The coefficients are not exactly these values, but almost.
+//
+// p1 = -1/6 = -1/3!
+// p2 = 1/120 = 1/5!
+// p3 = -1/5040 = -1/7!
+// p4 = 1/362889 = 1/9!
+//
+// P = r + r^3 * P
+//
+// Answer = S[m] Cos(r) + C[m] P
+//
+// Cos(r) = 1 + rsq Q
+// Cos(r) = 1 + r^2 Q
+// Cos(r) = 1 + r^2 (q1 + r^2q2)
+// Cos(r) = 1 + r^2q1 + r^4q2
+//
+// S[m] Cos(r) = S[m](1 + rsq Q)
+// S[m] Cos(r) = S[m] + S[m] rsq Q
+// S[m] Cos(r) = S[m] + s_rsq Q
+// Q = S[m] + s_rsq Q
+//
+// Then,
+//
+// Answer = Q + C[m] P
+
+
+// Registers used
//==============================================================
+// general input registers:
+// r14 -> r19
+// r32 -> r45
-// SIN_Sin_Flag = p6
-// SIN_Cos_Flag = p7
-
-// integer registers used
-
- SIN_AD_PQ_1 = r33
- SIN_AD_PQ_2 = r33
- sin_GR_sincos_flag = r34
- sin_GR_Mint = r35
-
- sin_GR_index = r36
- gr_tmp = r37
-
- GR_SAVE_B0 = r37
- GR_SAVE_GP = r38
- GR_SAVE_PFS = r39
-
-
-// floating point registers used
-
- sin_coeff_P1 = f32
- sin_coeff_P2 = f33
- sin_coeff_Q1 = f34
- sin_coeff_Q2 = f35
- sin_coeff_P4 = f36
- sin_coeff_P5 = f37
- sin_coeff_Q3 = f38
- sin_coeff_Q4 = f39
- sin_Mx = f40
- sin_Mfloat = f41
- sin_tbl_S = f42
- sin_tbl_C = f43
- sin_r = f44
- sin_rcube = f45
- sin_tsq = f46
- sin_r7 = f47
- sin_t = f48
- sin_poly_p2 = f49
- sin_poly_p1 = f50
- fp_tmp = f51
- sin_poly_p3 = f52
- sin_poly_p4 = f53
- sin_of_r = f54
- sin_S_t = f55
- sin_poly_q2 = f56
- sin_poly_q1 = f57
- sin_S_tcube = f58
- sin_poly_q3 = f59
- sin_poly_q4 = f60
- sin_tbl_S_tcube = f61
- sin_tbl_S_cos_of_r = f62
-
- sin_coeff_Q5 = f63
- sin_coeff_Q6 = f64
- sin_coeff_P3 = f65
-
- sin_poly_q5 = f66
- sin_poly_q12 = f67
- sin_poly_q3456 = f68
- fp_tmp2 = f69
- SIN_NORM_f8 = f70
-
-
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
+// predicate registers used:
+// p6 -> p14
-.align 16
+// floating-point registers used
+// f9 -> f15
+// f32 -> f61
-sin_coeff_1_table:
-ASM_TYPE_DIRECTIVE(sin_coeff_1_table,@object)
-data8 0xBF56C16C16BF6462 // q3
-data8 0x3EFA01A0128B9EBC // q4
-data8 0xBE927E42FDF33FFE // q5
-data8 0x3E21DA5C72A446F3 // q6
-data8 0x3EC71DD1D5E421A4 // p4
-data8 0xBE5AC5C9D0ACF95A // p5
-data8 0xBFC55555555554CA // p1
-data8 0x3F811111110F2395 // p2
-data8 0xBFE0000000000000 // q1
-data8 0x3FA55555555554EF // q2
-data8 0xBF2A01A011232913 // p3
-data8 0x0000000000000000 // pad
-
-
-/////////////////////////////////////////
-
-data8 0xBFE1A54991426566 //sin(-32)
-data8 0x3FEAB1F5305DE8E5 //cos(-32)
-data8 0x3FD9DBC0B640FC81 //sin(-31)
-data8 0x3FED4591C3E12A20 //cos(-31)
-data8 0x3FEF9DF47F1C903D //sin(-30)
-data8 0x3FC3BE82F2505A52 //cos(-30)
-data8 0x3FE53C7D20A6C9E7 //sin(-29)
-data8 0xBFE7F01658314E47 //cos(-29)
-data8 0xBFD156853B4514D6 //sin(-28)
-data8 0xBFEECDAAD1582500 //cos(-28)
-data8 0xBFEE9AA1B0E5BA30 //sin(-27)
-data8 0xBFD2B266F959DED5 //cos(-27)
-data8 0xBFE866E0FAC32583 //sin(-26)
-data8 0x3FE4B3902691A9ED //cos(-26)
-data8 0x3FC0F0E6F31E809D //sin(-25)
-data8 0x3FEFB7EEF59504FF //cos(-25)
-data8 0x3FECFA7F7919140F //sin(-24)
-data8 0x3FDB25BFB50A609A //cos(-24)
-data8 0x3FEB143CD0247D02 //sin(-23)
-data8 0xBFE10CF7D591F272 //cos(-23)
-data8 0x3F8220A29F6EB9F4 //sin(-22)
-data8 0xBFEFFFADD8D4ACDA //cos(-22)
-data8 0xBFEAC5E20BB0D7ED //sin(-21)
-data8 0xBFE186FF83773759 //cos(-21)
-data8 0xBFED36D8F55D3CE0 //sin(-20)
-data8 0x3FDA1E043964A83F //cos(-20)
-data8 0xBFC32F2D28F584CF //sin(-19)
-data8 0x3FEFA377DE108258 //cos(-19)
-data8 0x3FE8081668131E26 //sin(-18)
-data8 0x3FE52150815D2470 //cos(-18)
-data8 0x3FEEC3C4AC42882B //sin(-17)
-data8 0xBFD19C46B07F58E7 //cos(-17)
-data8 0x3FD26D02085F20F8 //sin(-16)
-data8 0xBFEEA5257E962F74 //cos(-16)
-data8 0xBFE4CF2871CEC2E8 //sin(-15)
-data8 0xBFE84F5D069CA4F3 //cos(-15)
-data8 0xBFEFB30E327C5E45 //sin(-14)
-data8 0x3FC1809AEC2CA0ED //cos(-14)
-data8 0xBFDAE4044881C506 //sin(-13)
-data8 0x3FED09CDD5260CB7 //cos(-13)
-data8 0x3FE12B9AF7D765A5 //sin(-12)
-data8 0x3FEB00DA046B65E3 //cos(-12)
-data8 0x3FEFFFEB762E93EB //sin(-11)
-data8 0x3F7220AE41EE2FDF //cos(-11)
-data8 0x3FE1689EF5F34F52 //sin(-10)
-data8 0xBFEAD9AC890C6B1F //cos(-10)
-data8 0xBFDA6026360C2F91 //sin( -9)
-data8 0xBFED27FAA6A6196B //cos( -9)
-data8 0xBFEFA8D2A028CF7B //sin( -8)
-data8 0xBFC29FBEBF632F94 //cos( -8)
-data8 0xBFE50608C26D0A08 //sin( -7)
-data8 0x3FE81FF79ED92017 //cos( -7)
-data8 0x3FD1E1F18AB0A2C0 //sin( -6)
-data8 0x3FEEB9B7097822F5 //cos( -6)
-data8 0x3FEEAF81F5E09933 //sin( -5)
-data8 0x3FD22785706B4AD9 //cos( -5)
-data8 0x3FE837B9DDDC1EAE //sin( -4)
-data8 0xBFE4EAA606DB24C1 //cos( -4)
-data8 0xBFC210386DB6D55B //sin( -3)
-data8 0xBFEFAE04BE85E5D2 //cos( -3)
-data8 0xBFED18F6EAD1B446 //sin( -2)
-data8 0xBFDAA22657537205 //cos( -2)
-data8 0xBFEAED548F090CEE //sin( -1)
-data8 0x3FE14A280FB5068C //cos( -1)
-data8 0x0000000000000000 //sin( 0)
-data8 0x3FF0000000000000 //cos( 0)
-data8 0x3FEAED548F090CEE //sin( 1)
-data8 0x3FE14A280FB5068C //cos( 1)
-data8 0x3FED18F6EAD1B446 //sin( 2)
-data8 0xBFDAA22657537205 //cos( 2)
-data8 0x3FC210386DB6D55B //sin( 3)
-data8 0xBFEFAE04BE85E5D2 //cos( 3)
-data8 0xBFE837B9DDDC1EAE //sin( 4)
-data8 0xBFE4EAA606DB24C1 //cos( 4)
-data8 0xBFEEAF81F5E09933 //sin( 5)
-data8 0x3FD22785706B4AD9 //cos( 5)
-data8 0xBFD1E1F18AB0A2C0 //sin( 6)
-data8 0x3FEEB9B7097822F5 //cos( 6)
-data8 0x3FE50608C26D0A08 //sin( 7)
-data8 0x3FE81FF79ED92017 //cos( 7)
-data8 0x3FEFA8D2A028CF7B //sin( 8)
-data8 0xBFC29FBEBF632F94 //cos( 8)
-data8 0x3FDA6026360C2F91 //sin( 9)
-data8 0xBFED27FAA6A6196B //cos( 9)
-data8 0xBFE1689EF5F34F52 //sin( 10)
-data8 0xBFEAD9AC890C6B1F //cos( 10)
-data8 0xBFEFFFEB762E93EB //sin( 11)
-data8 0x3F7220AE41EE2FDF //cos( 11)
-data8 0xBFE12B9AF7D765A5 //sin( 12)
-data8 0x3FEB00DA046B65E3 //cos( 12)
-data8 0x3FDAE4044881C506 //sin( 13)
-data8 0x3FED09CDD5260CB7 //cos( 13)
-data8 0x3FEFB30E327C5E45 //sin( 14)
-data8 0x3FC1809AEC2CA0ED //cos( 14)
-data8 0x3FE4CF2871CEC2E8 //sin( 15)
-data8 0xBFE84F5D069CA4F3 //cos( 15)
-data8 0xBFD26D02085F20F8 //sin( 16)
-data8 0xBFEEA5257E962F74 //cos( 16)
-data8 0xBFEEC3C4AC42882B //sin( 17)
-data8 0xBFD19C46B07F58E7 //cos( 17)
-data8 0xBFE8081668131E26 //sin( 18)
-data8 0x3FE52150815D2470 //cos( 18)
-data8 0x3FC32F2D28F584CF //sin( 19)
-data8 0x3FEFA377DE108258 //cos( 19)
-data8 0x3FED36D8F55D3CE0 //sin( 20)
-data8 0x3FDA1E043964A83F //cos( 20)
-data8 0x3FEAC5E20BB0D7ED //sin( 21)
-data8 0xBFE186FF83773759 //cos( 21)
-data8 0xBF8220A29F6EB9F4 //sin( 22)
-data8 0xBFEFFFADD8D4ACDA //cos( 22)
-data8 0xBFEB143CD0247D02 //sin( 23)
-data8 0xBFE10CF7D591F272 //cos( 23)
-data8 0xBFECFA7F7919140F //sin( 24)
-data8 0x3FDB25BFB50A609A //cos( 24)
-data8 0xBFC0F0E6F31E809D //sin( 25)
-data8 0x3FEFB7EEF59504FF //cos( 25)
-data8 0x3FE866E0FAC32583 //sin( 26)
-data8 0x3FE4B3902691A9ED //cos( 26)
-data8 0x3FEE9AA1B0E5BA30 //sin( 27)
-data8 0xBFD2B266F959DED5 //cos( 27)
-data8 0x3FD156853B4514D6 //sin( 28)
-data8 0xBFEECDAAD1582500 //cos( 28)
-data8 0xBFE53C7D20A6C9E7 //sin( 29)
-data8 0xBFE7F01658314E47 //cos( 29)
-data8 0xBFEF9DF47F1C903D //sin( 30)
-data8 0x3FC3BE82F2505A52 //cos( 30)
-data8 0xBFD9DBC0B640FC81 //sin( 31)
-data8 0x3FED4591C3E12A20 //cos( 31)
-data8 0x3FE1A54991426566 //sin( 32)
-data8 0x3FEAB1F5305DE8E5 //cos( 32)
-ASM_SIZE_DIRECTIVE(sin_coeff_1_table)
-
-//////////////////////////////////////////
-
-
-.global sinf
-.global cosf
-#ifdef _LIBC
-.global __sinf
-.global __cosf
-#endif
-
-.text
-.proc cosf
-#ifdef _LIBC
-.proc __cosf
-#endif
-.align 32
-
-
-cosf:
-#ifdef _LIBC
-__cosf:
-#endif
-{ .mfi
- alloc r32 = ar.pfs,1,7,0,0
- fcvt.fx.s1 sin_Mx = f8
- cmp.ne p6,p7 = r0,r0 // p7 set if cos
-}
-{ .mfi
- addl SIN_AD_PQ_1 = @ltoff(sin_coeff_1_table),gp
- fnorm.s0 SIN_NORM_f8 = f8 // Sets denormal or invalid
- mov sin_GR_sincos_flag = 0x0
-}
-;;
+// Assembly macros
+//==============================================================
+sincosf_NORM_f8 = f9
+sincosf_W = f10
+sincosf_int_Nfloat = f11
+sincosf_Nfloat = f12
-{ .mfi
- ld8 SIN_AD_PQ_1 = [SIN_AD_PQ_1]
- fclass.m.unc p9,p0 = f8, 0x07
- cmp.ne p8,p0 = r0,r0
-}
-{ .mfb
- nop.m 999
- nop.f 999
- br.sptk L(SINCOSF_COMMON)
-}
-;;
+sincosf_r = f13
+sincosf_rsq = f14
+sincosf_rcub = f15
+sincosf_save_tmp = f15
-.endp cosf
-ASM_SIZE_DIRECTIVE(cosf)
+sincosf_Inv_Pi_by_16 = f32
+sincosf_Pi_by_16_1 = f33
+sincosf_Pi_by_16_2 = f34
+sincosf_Inv_Pi_by_64 = f35
-.text
-.proc sinf
-#ifdef _LIBC
-.proc __sinf
-#endif
-.align 32
+sincosf_Pi_by_16_3 = f36
-sinf:
-#ifdef _LIBC
-__sinf:
-#endif
-{ .mfi
- alloc r32 = ar.pfs,1,7,0,0
- fcvt.fx.s1 sin_Mx = f8
- cmp.eq p6,p7 = r0,r0 // p6 set if sin
-}
-{ .mfi
- addl SIN_AD_PQ_1 = @ltoff(sin_coeff_1_table),gp
- fnorm.s0 SIN_NORM_f8 = f8 // Sets denormal or invalid
- mov sin_GR_sincos_flag = 0x1
-}
-;;
+sincosf_r_exact = f37
-{ .mfi
- ld8 SIN_AD_PQ_1 = [SIN_AD_PQ_1]
- fclass.m.unc p8,p0 = f8, 0x07
- cmp.ne p9,p0 = r0,r0
-}
-{ .mfb
- nop.m 999
- nop.f 999
- br.sptk L(SINCOSF_COMMON)
-}
-;;
+sincosf_Sm = f38
+sincosf_Cm = f39
+sincosf_P1 = f40
+sincosf_Q1 = f41
+sincosf_P2 = f42
+sincosf_Q2 = f43
+sincosf_P3 = f44
+sincosf_Q3 = f45
+sincosf_P4 = f46
+sincosf_Q4 = f47
-L(SINCOSF_COMMON):
+sincosf_P_temp1 = f48
+sincosf_P_temp2 = f49
-// Here with p6 if sin, p7 if cos, p8 if sin(0), p9 if cos(0)
+sincosf_Q_temp1 = f50
+sincosf_Q_temp2 = f51
+sincosf_P = f52
+sincosf_Q = f53
-{ .mmf
- ldfpd sin_coeff_Q3, sin_coeff_Q4 = [SIN_AD_PQ_1], 16
- nop.m 999
- fclass.m.unc p11,p0 = f8, 0x23 // Test for x=inf
-}
-;;
+sincosf_srsq = f54
-{ .mfb
- ldfpd sin_coeff_Q5, sin_coeff_Q6 = [SIN_AD_PQ_1], 16
- fclass.m.unc p10,p0 = f8, 0xc3 // Test for x=nan
-(p8) br.ret.spnt b0 // Exit for sin(0)
-}
-{ .mfb
- nop.m 999
-(p9) fma.s f8 = f1,f1,f0
-(p9) br.ret.spnt b0 // Exit for cos(0)
-}
-;;
+sincosf_SIG_INV_PI_BY_16_2TO61 = f55
+sincosf_RSHF_2TO61 = f56
+sincosf_RSHF = f57
+sincosf_2TOM61 = f58
+sincosf_NFLOAT = f59
+sincosf_W_2TO61_RSH = f60
-{ .mmf
- ldfpd sin_coeff_P4, sin_coeff_P5 = [SIN_AD_PQ_1], 16
- addl gr_tmp = -1,r0
- fcvt.xf sin_Mfloat = sin_Mx
-}
-;;
+fp_tmp = f61
-{ .mfi
- getf.sig sin_GR_Mint = sin_Mx
-(p11) frcpa.s0 f8,p13 = f0,f0 // qnan indef if x=inf
- nop.i 999
-}
-{ .mfb
- ldfpd sin_coeff_P1, sin_coeff_P2 = [SIN_AD_PQ_1], 16
- nop.f 999
-(p11) br.ret.spnt b0 // Exit for x=inf
-}
-;;
+/////////////////////////////////////////////////////////////
-{ .mfi
- ldfpd sin_coeff_Q1, sin_coeff_Q2 = [SIN_AD_PQ_1], 16
- nop.f 999
- cmp.ge p8,p9 = -33,sin_GR_Mint
-}
-{ .mfb
- add sin_GR_index = 32,sin_GR_Mint
-(p10) fma.s f8 = f8,f1,f0 // Force qnan if x=nan
-(p10) br.ret.spnt b0 // Exit for x=nan
-}
-;;
+sincosf_AD_1 = r33
+sincosf_AD_2 = r34
+sincosf_exp_limit = r35
+sincosf_r_signexp = r36
+sincosf_AD_beta_table = r37
+sincosf_r_sincos = r38
-{ .mmi
- ldfd sin_coeff_P3 = [SIN_AD_PQ_1], 16
-(p9) cmp.le p8,p0 = 33, sin_GR_Mint
- shl sin_GR_index = sin_GR_index,4
-}
-;;
+sincosf_r_exp = r39
+sincosf_r_17_ones = r40
+sincosf_GR_sig_inv_pi_by_16 = r14
+sincosf_GR_rshf_2to61 = r15
+sincosf_GR_rshf = r16
+sincosf_GR_exp_2tom61 = r17
+sincosf_GR_n = r18
+sincosf_GR_m = r19
+sincosf_GR_32m = r19
+sincosf_GR_all_ones = r19
-{ .mfi
- setf.sig fp_tmp = gr_tmp // Create constant such that fmpy sets inexact
- fnma.s1 sin_r = f1,sin_Mfloat,SIN_NORM_f8
-(p8) cmp.eq.unc p11,p12=sin_GR_sincos_flag,r0 // p11 if must call dbl cos
- // p12 if must call dbl sin
-}
-{ .mbb
- add SIN_AD_PQ_2 = sin_GR_index,SIN_AD_PQ_1
-(p11) br.cond.spnt COS_DOUBLE
-(p12) br.cond.spnt SIN_DOUBLE
-}
-;;
+gr_tmp = r41
+GR_SAVE_PFS = r41
+GR_SAVE_B0 = r42
+GR_SAVE_GP = r43
-.pred.rel "mutex",p6,p7 //SIN_Sin_Flag, SIN_Cos_Flag
-{ .mmi
-(p6) ldfpd sin_tbl_S,sin_tbl_C = [SIN_AD_PQ_2]
-(p7) ldfpd sin_tbl_C,sin_tbl_S = [SIN_AD_PQ_2]
- nop.i 999
-}
-;;
+RODATA
+.align 16
-{ .mfi
- nop.m 999
-(p6) fclass.m.unc p8,p0 = f8, 0x0b // If sin, note denormal input to set uflow
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 sin_t = sin_r,sin_r,f0
- nop.i 999
-}
-;;
+// Pi/16 parts
+LOCAL_OBJECT_START(double_sincosf_pi)
+ data8 0xC90FDAA22168C234, 0x00003FFC // pi/16 1st part
+ data8 0xC4C6628B80DC1CD1, 0x00003FBC // pi/16 2nd part
+LOCAL_OBJECT_END(double_sincosf_pi)
+
+// Coefficients for polynomials
+LOCAL_OBJECT_START(double_sincosf_pq_k4)
+ data8 0x3F810FABB668E9A2 // P2
+ data8 0x3FA552E3D6DE75C9 // Q2
+ data8 0xBFC555554447BC7F // P1
+ data8 0xBFDFFFFFC447610A // Q1
+LOCAL_OBJECT_END(double_sincosf_pq_k4)
+
+// Sincos table (S[m], C[m])
+LOCAL_OBJECT_START(double_sin_cos_beta_k4)
+ data8 0x0000000000000000 // sin ( 0 Pi / 16 )
+ data8 0x3FF0000000000000 // cos ( 0 Pi / 16 )
+//
+ data8 0x3FC8F8B83C69A60B // sin ( 1 Pi / 16 )
+ data8 0x3FEF6297CFF75CB0 // cos ( 1 Pi / 16 )
+//
+ data8 0x3FD87DE2A6AEA963 // sin ( 2 Pi / 16 )
+ data8 0x3FED906BCF328D46 // cos ( 2 Pi / 16 )
+//
+ data8 0x3FE1C73B39AE68C8 // sin ( 3 Pi / 16 )
+ data8 0x3FEA9B66290EA1A3 // cos ( 3 Pi / 16 )
+//
+ data8 0x3FE6A09E667F3BCD // sin ( 4 Pi / 16 )
+ data8 0x3FE6A09E667F3BCD // cos ( 4 Pi / 16 )
+//
+ data8 0x3FEA9B66290EA1A3 // sin ( 5 Pi / 16 )
+ data8 0x3FE1C73B39AE68C8 // cos ( 5 Pi / 16 )
+//
+ data8 0x3FED906BCF328D46 // sin ( 6 Pi / 16 )
+ data8 0x3FD87DE2A6AEA963 // cos ( 6 Pi / 16 )
+//
+ data8 0x3FEF6297CFF75CB0 // sin ( 7 Pi / 16 )
+ data8 0x3FC8F8B83C69A60B // cos ( 7 Pi / 16 )
+//
+ data8 0x3FF0000000000000 // sin ( 8 Pi / 16 )
+ data8 0x0000000000000000 // cos ( 8 Pi / 16 )
+//
+ data8 0x3FEF6297CFF75CB0 // sin ( 9 Pi / 16 )
+ data8 0xBFC8F8B83C69A60B // cos ( 9 Pi / 16 )
+//
+ data8 0x3FED906BCF328D46 // sin ( 10 Pi / 16 )
+ data8 0xBFD87DE2A6AEA963 // cos ( 10 Pi / 16 )
+//
+ data8 0x3FEA9B66290EA1A3 // sin ( 11 Pi / 16 )
+ data8 0xBFE1C73B39AE68C8 // cos ( 11 Pi / 16 )
+//
+ data8 0x3FE6A09E667F3BCD // sin ( 12 Pi / 16 )
+ data8 0xBFE6A09E667F3BCD // cos ( 12 Pi / 16 )
+//
+ data8 0x3FE1C73B39AE68C8 // sin ( 13 Pi / 16 )
+ data8 0xBFEA9B66290EA1A3 // cos ( 13 Pi / 16 )
+//
+ data8 0x3FD87DE2A6AEA963 // sin ( 14 Pi / 16 )
+ data8 0xBFED906BCF328D46 // cos ( 14 Pi / 16 )
+//
+ data8 0x3FC8F8B83C69A60B // sin ( 15 Pi / 16 )
+ data8 0xBFEF6297CFF75CB0 // cos ( 15 Pi / 16 )
+//
+ data8 0x0000000000000000 // sin ( 16 Pi / 16 )
+ data8 0xBFF0000000000000 // cos ( 16 Pi / 16 )
+//
+ data8 0xBFC8F8B83C69A60B // sin ( 17 Pi / 16 )
+ data8 0xBFEF6297CFF75CB0 // cos ( 17 Pi / 16 )
+//
+ data8 0xBFD87DE2A6AEA963 // sin ( 18 Pi / 16 )
+ data8 0xBFED906BCF328D46 // cos ( 18 Pi / 16 )
+//
+ data8 0xBFE1C73B39AE68C8 // sin ( 19 Pi / 16 )
+ data8 0xBFEA9B66290EA1A3 // cos ( 19 Pi / 16 )
+//
+ data8 0xBFE6A09E667F3BCD // sin ( 20 Pi / 16 )
+ data8 0xBFE6A09E667F3BCD // cos ( 20 Pi / 16 )
+//
+ data8 0xBFEA9B66290EA1A3 // sin ( 21 Pi / 16 )
+ data8 0xBFE1C73B39AE68C8 // cos ( 21 Pi / 16 )
+//
+ data8 0xBFED906BCF328D46 // sin ( 22 Pi / 16 )
+ data8 0xBFD87DE2A6AEA963 // cos ( 22 Pi / 16 )
+//
+ data8 0xBFEF6297CFF75CB0 // sin ( 23 Pi / 16 )
+ data8 0xBFC8F8B83C69A60B // cos ( 23 Pi / 16 )
+//
+ data8 0xBFF0000000000000 // sin ( 24 Pi / 16 )
+ data8 0x0000000000000000 // cos ( 24 Pi / 16 )
+//
+ data8 0xBFEF6297CFF75CB0 // sin ( 25 Pi / 16 )
+ data8 0x3FC8F8B83C69A60B // cos ( 25 Pi / 16 )
+//
+ data8 0xBFED906BCF328D46 // sin ( 26 Pi / 16 )
+ data8 0x3FD87DE2A6AEA963 // cos ( 26 Pi / 16 )
+//
+ data8 0xBFEA9B66290EA1A3 // sin ( 27 Pi / 16 )
+ data8 0x3FE1C73B39AE68C8 // cos ( 27 Pi / 16 )
+//
+ data8 0xBFE6A09E667F3BCD // sin ( 28 Pi / 16 )
+ data8 0x3FE6A09E667F3BCD // cos ( 28 Pi / 16 )
+//
+ data8 0xBFE1C73B39AE68C8 // sin ( 29 Pi / 16 )
+ data8 0x3FEA9B66290EA1A3 // cos ( 29 Pi / 16 )
+//
+ data8 0xBFD87DE2A6AEA963 // sin ( 30 Pi / 16 )
+ data8 0x3FED906BCF328D46 // cos ( 30 Pi / 16 )
+//
+ data8 0xBFC8F8B83C69A60B // sin ( 31 Pi / 16 )
+ data8 0x3FEF6297CFF75CB0 // cos ( 31 Pi / 16 )
+//
+ data8 0x0000000000000000 // sin ( 32 Pi / 16 )
+ data8 0x3FF0000000000000 // cos ( 32 Pi / 16 )
+LOCAL_OBJECT_END(double_sin_cos_beta_k4)
-{ .mfi
- nop.m 999
- fma.s1 sin_rcube = sin_t,sin_r,f0
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 sin_tsq = sin_t,sin_t,f0
- nop.i 999
-}
-;;
+.section .text
-{ .mfi
- nop.m 999
- fma.s1 sin_poly_q3 = sin_t,sin_coeff_Q4,sin_coeff_Q3
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 sin_poly_q5 = sin_t,sin_coeff_Q6,sin_coeff_Q5
- nop.i 999
-}
-;;
+////////////////////////////////////////////////////////
+// There are two entry points: sin and cos
+// If from sin, p8 is true
+// If from cos, p9 is true
-{ .mfi
- nop.m 999
- fma.s1 sin_poly_p1 = sin_t,sin_coeff_P5,sin_coeff_P4
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 sin_poly_p2 = sin_t,sin_coeff_P2,sin_coeff_P1
- nop.i 999
-}
-;;
+GLOBAL_IEEE754_ENTRY(sinf)
-{ .mfi
- nop.m 999
- fma.s1 sin_poly_q1 = sin_t,sin_coeff_Q2,sin_coeff_Q1
- nop.i 999
+{ .mlx
+ alloc r32 = ar.pfs,1,13,0,0
+ movl sincosf_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A //signd of 16/pi
}
-{ .mfi
- nop.m 999
- fma.s1 sin_S_t = sin_t,sin_tbl_S,f0
- nop.i 999
-}
-;;
+{ .mlx
+ addl sincosf_AD_1 = @ltoff(double_sincosf_pi), gp
+ movl sincosf_GR_rshf_2to61 = 0x47b8000000000000 // 1.1 2^(63+63-2)
+};;
-{ .mfi
- nop.m 999
-(p8) fmpy.s.s0 fp_tmp2 = f8,f8 // Dummy mult to set underflow if sin(denormal)
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 sin_r7 = sin_rcube,sin_tsq,f0
- nop.i 999
+{ .mfi
+ ld8 sincosf_AD_1 = [sincosf_AD_1]
+ fnorm.s1 sincosf_NORM_f8 = f8 // Normalize argument
+ cmp.eq p8,p9 = r0, r0 // set p8 (clear p9) for sin
}
-;;
+{ .mib
+ mov sincosf_GR_exp_2tom61 = 0xffff-61 // exponent of scale 2^-61
+ mov sincosf_r_sincos = 0x0 // 0 for sin
+ br.cond.sptk _SINCOSF_COMMON // go to common part
+};;
-{ .mfi
- nop.m 999
- fma.s1 sin_poly_q3456 = sin_tsq,sin_poly_q5,sin_poly_q3
- nop.i 999
-}
-;;
+GLOBAL_IEEE754_END(sinf)
+GLOBAL_IEEE754_ENTRY(cosf)
-{ .mfi
- nop.m 999
- fma.s1 sin_poly_p3 = sin_t,sin_poly_p1,sin_coeff_P3
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 sin_poly_p4 = sin_rcube,sin_poly_p2,sin_r
- nop.i 999
+{ .mlx
+ alloc r32 = ar.pfs,1,13,0,0
+ movl sincosf_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A //signd of 16/pi
}
-;;
+{ .mlx
+ addl sincosf_AD_1 = @ltoff(double_sincosf_pi), gp
+ movl sincosf_GR_rshf_2to61 = 0x47b8000000000000 // 1.1 2^(63+63-2)
+};;
-{ .mfi
- nop.m 999
- fma.s1 sin_tbl_S_tcube = sin_S_t,sin_tsq,f0
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 sin_poly_q12 = sin_S_t,sin_poly_q1,sin_tbl_S
- nop.i 999
+{ .mfi
+ ld8 sincosf_AD_1 = [sincosf_AD_1]
+ fnorm.s1 sincosf_NORM_f8 = f8 // Normalize argument
+ cmp.eq p9,p8 = r0, r0 // set p9 (clear p8) for cos
}
-;;
+{ .mib
+ mov sincosf_GR_exp_2tom61 = 0xffff-61 // exponent of scale 2^-61
+ mov sincosf_r_sincos = 0x8 // 8 for cos
+ nop.b 999
+};;
+
+////////////////////////////////////////////////////////
+// All entry points end up here.
+// If from sin, sincosf_r_sincos is 0 and p8 is true
+// If from cos, sincosf_r_sincos is 8 = 2^(k-1) and p9 is true
+// We add sincosf_r_sincos to N
+
+///////////// Common sin and cos part //////////////////
+_SINCOSF_COMMON:
+
+// Form two constants we need
+// 16/pi * 2^-2 * 2^63, scaled by 2^61 since we just loaded the significand
+// 1.1000...000 * 2^(63+63-2) to right shift int(W) into the low significand
+// fcmp used to set denormal, and invalid on snans
+{ .mfi
+ setf.sig sincosf_SIG_INV_PI_BY_16_2TO61 = sincosf_GR_sig_inv_pi_by_16
+ fclass.m p6,p0 = f8, 0xe7 // if x=0,inf,nan
+ mov sincosf_exp_limit = 0x10017
+}
+{ .mlx
+ setf.d sincosf_RSHF_2TO61 = sincosf_GR_rshf_2to61
+ movl sincosf_GR_rshf = 0x43e8000000000000 // 1.1000 2^63
+};; // Right shift
+
+// Form another constant
+// 2^-61 for scaling Nfloat
+// 0x10017 is register_bias + 24.
+// So if f8 >= 2^24, go to large argument routines
+{ .mmi
+ getf.exp sincosf_r_signexp = f8
+ setf.exp sincosf_2TOM61 = sincosf_GR_exp_2tom61
+ addl gr_tmp = -1,r0 // For "inexect" constant create
+};;
+
+// Load the two pieces of pi/16
+// Form another constant
+// 1.1000...000 * 2^63, the right shift constant
+{ .mmb
+ ldfe sincosf_Pi_by_16_1 = [sincosf_AD_1],16
+ setf.d sincosf_RSHF = sincosf_GR_rshf
+(p6) br.cond.spnt _SINCOSF_SPECIAL_ARGS
+};;
-{ .mfi
- nop.m 999
- fma.d.s1 sin_of_r = sin_r7,sin_poly_p3,sin_poly_p4
- nop.i 999
-}
-;;
+// Getting argument's exp for "large arguments" filtering
+{ .mmi
+ ldfe sincosf_Pi_by_16_2 = [sincosf_AD_1],16
+ setf.sig fp_tmp = gr_tmp // constant for inexact set
+ nop.i 999
+};;
-{ .mfi
- nop.m 999
- fma.d.s1 sin_tbl_S_cos_of_r = sin_tbl_S_tcube,sin_poly_q3456,sin_poly_q12
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fmpy.s0 fp_tmp = fp_tmp, fp_tmp // Dummy mult to set inexact
- nop.i 999
-}
-;;
+// Polynomial coefficients (Q2, Q1, P2, P1) loading
+{ .mmi
+ ldfpd sincosf_P2,sincosf_Q2 = [sincosf_AD_1],16
+ nop.m 999
+ nop.i 999
+};;
+// Select exponent (17 lsb)
+{ .mmi
+ ldfpd sincosf_P1,sincosf_Q1 = [sincosf_AD_1],16
+ nop.m 999
+ dep.z sincosf_r_exp = sincosf_r_signexp, 0, 17
+};;
-.pred.rel "mutex",p6,p7 //SIN_Sin_Flag, SIN_Cos_Flag
-{ .mfi
- nop.m 999
-//(SIN_Sin_Flag) fma.s f8 = sin_tbl_C,sin_of_r,sin_tbl_S_cos_of_r
-(p6) fma.s f8 = sin_tbl_C,sin_of_r,sin_tbl_S_cos_of_r
- nop.i 999
-}
-{ .mfb
- nop.m 999
-//(SIN_Cos_Flag) fnma.s f8 = sin_tbl_C,sin_of_r,sin_tbl_S_cos_of_r
-(p7) fnma.s f8 = sin_tbl_C,sin_of_r,sin_tbl_S_cos_of_r
- br.ret.sptk b0
-}
+// p10 is true if we must call routines to handle larger arguments
+// p10 is true if f8 exp is >= 0x10017 (2^24)
+{ .mfb
+ cmp.ge p10,p0 = sincosf_r_exp,sincosf_exp_limit
+ nop.f 999
+(p10) br.cond.spnt _SINCOSF_LARGE_ARGS // Go to "large args" routine
+};;
+
+// sincosf_W = x * sincosf_Inv_Pi_by_16
+// Multiply x by scaled 16/pi and add large const to shift integer part of W to
+// rightmost bits of significand
+{ .mfi
+ nop.m 999
+ fma.s1 sincosf_W_2TO61_RSH = sincosf_NORM_f8, sincosf_SIG_INV_PI_BY_16_2TO61, sincosf_RSHF_2TO61
+ nop.i 999
+};;
-.endp sinf
-ASM_SIZE_DIRECTIVE(sinf)
+// sincosf_NFLOAT = Round_Int_Nearest(sincosf_W)
+// This is done by scaling back by 2^-61 and subtracting the shift constant
+{ .mfi
+ nop.m 999
+ fms.s1 sincosf_NFLOAT = sincosf_W_2TO61_RSH,sincosf_2TOM61,sincosf_RSHF
+ nop.i 999
+};;
+// get N = (int)sincosf_int_Nfloat
+{ .mfi
+ getf.sig sincosf_GR_n = sincosf_W_2TO61_RSH // integer N value
+ nop.f 999
+ nop.i 999
+};;
-.proc SIN_DOUBLE
-SIN_DOUBLE:
-.prologue
+// Add 2^(k-1) (which is in sincosf_r_sincos=8) to N
+// sincosf_r = -sincosf_Nfloat * sincosf_Pi_by_16_1 + x
{ .mfi
- nop.m 0
- nop.f 0
-.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs
-}
-;;
+ add sincosf_GR_n = sincosf_GR_n, sincosf_r_sincos
+ fnma.s1 sincosf_r = sincosf_NFLOAT, sincosf_Pi_by_16_1, sincosf_NORM_f8
+ nop.i 999
+};;
+// Get M (least k+1 bits of N)
+{ .mmi
+ and sincosf_GR_m = 0x1f,sincosf_GR_n // Put mask 0x1F -
+ nop.m 999 // - select k+1 bits
+ nop.i 999
+};;
+
+// Add 16*M to address of sin_cos_beta table
{ .mfi
- mov GR_SAVE_GP=gp
- nop.f 0
-.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0
-}
+ shladd sincosf_AD_2 = sincosf_GR_32m, 4, sincosf_AD_1
+(p8) fclass.m.unc p10,p0 = f8,0x0b // If sin denormal input -
+ nop.i 999
+};;
-.body
-{ .mmb
- nop.m 999
- nop.m 999
- br.call.sptk.many b0=sin
+// Load Sin and Cos table value using obtained index m (sincosf_AD_2)
+{ .mfi
+ ldfd sincosf_Sm = [sincosf_AD_2],8 // Sin value S[m]
+(p9) fclass.m.unc p11,p0 = f8,0x0b // If cos denormal input -
+ nop.i 999 // - set denormal
+};;
+
+// sincosf_r = sincosf_r -sincosf_Nfloat * sincosf_Pi_by_16_2
+{ .mfi
+ ldfd sincosf_Cm = [sincosf_AD_2] // Cos table value C[m]
+ fnma.s1 sincosf_r_exact = sincosf_NFLOAT, sincosf_Pi_by_16_2, sincosf_r
+ nop.i 999
}
-;;
+// get rsq = r*r
+{ .mfi
+ nop.m 999
+ fma.s1 sincosf_rsq = sincosf_r, sincosf_r, f0 // r^2 = r*r
+ nop.i 999
+};;
{ .mfi
- mov gp = GR_SAVE_GP
- nop.f 999
- mov b0 = GR_SAVE_B0
+ nop.m 999
+ fmpy.s0 fp_tmp = fp_tmp, fp_tmp // forces inexact flag
+ nop.i 999
+};;
+
+// Polynomials calculation
+// Q = Q2*r^2 + Q1
+// P = P2*r^2 + P1
+{ .mfi
+ nop.m 999
+ fma.s1 sincosf_Q = sincosf_rsq, sincosf_Q2, sincosf_Q1
+ nop.i 999
}
-;;
+{ .mfi
+ nop.m 999
+ fma.s1 sincosf_P = sincosf_rsq, sincosf_P2, sincosf_P1
+ nop.i 999
+};;
+// get rcube and S[m]*r^2
{ .mfi
- nop.m 999
- fma.s f8 = f8,f1,f0
-(p0) mov ar.pfs = GR_SAVE_PFS
+ nop.m 999
+ fmpy.s1 sincosf_srsq = sincosf_Sm,sincosf_rsq // r^2*S[m]
+ nop.i 999
}
-{ .mib
- nop.m 999
- nop.i 999
-(p0) br.ret.sptk b0
+{ .mfi
+ nop.m 999
+ fmpy.s1 sincosf_rcub = sincosf_r_exact, sincosf_rsq
+ nop.i 999
+};;
+
+// Get final P and Q
+// Q = Q*S[m]*r^2 + S[m]
+// P = P*r^3 + r
+{ .mfi
+ nop.m 999
+ fma.s1 sincosf_Q = sincosf_srsq,sincosf_Q, sincosf_Sm
+ nop.i 999
}
-;;
+{ .mfi
+ nop.m 999
+ fma.s1 sincosf_P = sincosf_rcub,sincosf_P,sincosf_r_exact
+ nop.i 999
+};;
-.endp SIN_DOUBLE
-ASM_SIZE_DIRECTIVE(SIN_DOUBLE)
+// If sinf(denormal) - force underflow to be set
+.pred.rel "mutex",p10,p11
+{ .mfi
+ nop.m 999
+(p10) fmpy.s.s0 fp_tmp = f8,f8 // forces underflow flag
+ nop.i 999 // for denormal sine args
+}
+// If cosf(denormal) - force denormal to be set
+{ .mfi
+ nop.m 999
+(p11) fma.s.s0 fp_tmp = f8, f1, f8 // forces denormal flag
+ nop.i 999 // for denormal cosine args
+};;
-.proc COS_DOUBLE
-COS_DOUBLE:
+// Final calculation
+// result = C[m]*P + Q
+{ .mfb
+ nop.m 999
+ fma.s.s0 f8 = sincosf_Cm, sincosf_P, sincosf_Q
+ br.ret.sptk b0 // Exit for common path
+};;
+
+////////// x = 0/Inf/NaN path //////////////////
+_SINCOSF_SPECIAL_ARGS:
+.pred.rel "mutex",p8,p9
+// sinf(+/-0) = +/-0
+// sinf(Inf) = NaN
+// sinf(NaN) = NaN
+{ .mfi
+ nop.m 999
+(p8) fma.s.s0 f8 = f8, f0, f0 // sinf(+/-0,NaN,Inf)
+ nop.i 999
+}
+// cosf(+/-0) = 1.0
+// cosf(Inf) = NaN
+// cosf(NaN) = NaN
+{ .mfb
+ nop.m 999
+(p9) fma.s.s0 f8 = f8, f0, f1 // cosf(+/-0,NaN,Inf)
+ br.ret.sptk b0 // Exit for x = 0/Inf/NaN path
+};;
+
+GLOBAL_IEEE754_END(cosf)
+//////////// x >= 2^24 - large arguments routine call ////////////
+LOCAL_LIBM_ENTRY(__libm_callout_sincosf)
+_SINCOSF_LARGE_ARGS:
.prologue
{ .mfi
- nop.m 0
- nop.f 0
-.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs
+ mov sincosf_GR_all_ones = -1 // 0xffffffff
+ nop.f 999
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS = ar.pfs
}
;;
{ .mfi
- mov GR_SAVE_GP=gp
- nop.f 0
-.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0
+ mov GR_SAVE_GP = gp
+ nop.f 999
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0 = b0
}
-
.body
-{ .mmb
- nop.m 999
- nop.m 999
- br.call.sptk.many b0=cos
-}
-;;
-{ .mfi
- mov gp = GR_SAVE_GP
- nop.f 999
- mov b0 = GR_SAVE_B0
-}
-;;
+{ .mbb
+ setf.sig sincosf_save_tmp = sincosf_GR_all_ones // inexact set
+ nop.b 999
+(p8) br.call.sptk.many b0 = __libm_sin_large# // sinf(large_X)
+};;
+
+{ .mbb
+ cmp.ne p9,p0 = sincosf_r_sincos, r0 // set p9 if cos
+ nop.b 999
+(p9) br.call.sptk.many b0 = __libm_cos_large# // cosf(large_X)
+};;
{ .mfi
- nop.m 999
- fma.s f8 = f8,f1,f0
-(p0) mov ar.pfs = GR_SAVE_PFS
-}
-{ .mib
- nop.m 999
- nop.i 999
-(p0) br.ret.sptk b0
+ mov gp = GR_SAVE_GP
+ fma.s.s0 f8 = f8, f1, f0 // Round result to single
+ mov b0 = GR_SAVE_B0
}
-;;
-
-.endp COS_DOUBLE
-ASM_SIZE_DIRECTIVE(COS_DOUBLE)
+{ .mfi // force inexact set
+ nop.m 999
+ fmpy.s0 sincosf_save_tmp = sincosf_save_tmp, sincosf_save_tmp
+ nop.i 999
+};;
+{ .mib
+ nop.m 999
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0 // Exit for large arguments routine call
+};;
+LOCAL_LIBM_END(__libm_callout_sincosf)
+.type __libm_sin_large#, @function
+.global __libm_sin_large#
+.type __libm_cos_large#, @function
+.global __libm_cos_large#
-.type sin,@function
-.global sin
-.type cos,@function
-.global cos
diff --git a/sysdeps/ia64/fpu/s_cosl.S b/sysdeps/ia64/fpu/s_cosl.S
index 2755580c0d..374e822256 100644
--- a/sysdeps/ia64/fpu/s_cosl.S
+++ b/sysdeps/ia64/fpu/s_cosl.S
@@ -1,10 +1,10 @@
.file "sincosl.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,76 +20,81 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
-// *********************************************************************
+//*********************************************************************
//
-// History:
-// 2/02/2000 (hand-optimized)
-// 4/04/00 Unwind support added
+// History:
+// 02/02/00 (hand-optimized)
+// 04/04/00 Unwind support added
+// 07/30/01 Improved speed on all paths
+// 08/20/01 Fixed bundling typo
+// 05/13/02 Changed interface to __libm_pi_by_2_reduce
+// 02/10/03 Reordered header: .section, .global, .proc, .align;
+// used data8 for long double table values
//
-// *********************************************************************
+//*********************************************************************
//
// Function: Combined sinl(x) and cosl(x), where
//
// sinl(x) = sine(x), for double-extended precision x values
// cosl(x) = cosine(x), for double-extended precision x values
//
-// *********************************************************************
+//*********************************************************************
//
// Resources Used:
//
-// Floating-Point Registers: f8 (Input and Return Value)
+// Floating-Point Registers: f8 (Input and Return Value)
// f32-f99
//
// General Purpose Registers:
-// r32-r43
+// r32-r43
// r44-r45 (Used to pass arguments to pi_by_2 reduce routine)
//
// Predicate Registers: p6-p13
//
-// *********************************************************************
+//*********************************************************************
//
// IEEE Special Conditions:
//
// Denormal fault raised on denormal inputs
// Overflow exceptions do not occur
-// Underflow exceptions raised when appropriate for sin
+// Underflow exceptions raised when appropriate for sin
// (No specialized error handling for this routine)
// Inexact raised when appropriate by algorithm
//
// sinl(SNaN) = QNaN
// sinl(QNaN) = QNaN
-// sinl(inf) = QNaN
+// sinl(inf) = QNaN
// sinl(+/-0) = +/-0
-// cosl(inf) = QNaN
+// cosl(inf) = QNaN
// cosl(SNaN) = QNaN
// cosl(QNaN) = QNaN
// cosl(0) = 1
-//
-// *********************************************************************
+//
+//*********************************************************************
//
// Mathematical Description
// ========================
//
-// The computation of FSIN and FCOS is best handled in one piece of
-// code. The main reason is that given any argument Arg, computation
-// of trigonometric functions first calculate N and an approximation
+// The computation of FSIN and FCOS is best handled in one piece of
+// code. The main reason is that given any argument Arg, computation
+// of trigonometric functions first calculate N and an approximation
// to alpha where
//
// Arg = N pi/2 + alpha, |alpha| <= pi/4.
@@ -98,62 +103,62 @@
//
// cosl( Arg ) = sinl( (N+1) pi/2 + alpha ),
//
-// therefore, the code for computing sine will produce cosine as long
-// as 1 is added to N immediately after the argument reduction
+// therefore, the code for computing sine will produce cosine as long
+// as 1 is added to N immediately after the argument reduction
// process.
//
// Let M = N if sine
-// N+1 if cosine.
+// N+1 if cosine.
//
// Now, given
//
// Arg = M pi/2 + alpha, |alpha| <= pi/4,
//
-// let I = M mod 4, or I be the two lsb of M when M is represented
+// let I = M mod 4, or I be the two lsb of M when M is represented
// as 2's complement. I = [i_0 i_1]. Then
//
-// sinl( Arg ) = (-1)^i_0 sinl( alpha ) if i_1 = 0,
+// sinl( Arg ) = (-1)^i_0 sinl( alpha ) if i_1 = 0,
// = (-1)^i_0 cosl( alpha ) if i_1 = 1.
//
// For example:
-// if M = -1, I = 11
+// if M = -1, I = 11
// sin ((-pi/2 + alpha) = (-1) cos (alpha)
-// if M = 0, I = 00
+// if M = 0, I = 00
// sin (alpha) = sin (alpha)
-// if M = 1, I = 01
+// if M = 1, I = 01
// sin (pi/2 + alpha) = cos (alpha)
-// if M = 2, I = 10
+// if M = 2, I = 10
// sin (pi + alpha) = (-1) sin (alpha)
-// if M = 3, I = 11
+// if M = 3, I = 11
// sin ((3/2)pi + alpha) = (-1) cos (alpha)
//
-// The value of alpha is obtained by argument reduction and
+// The value of alpha is obtained by argument reduction and
// represented by two working precision numbers r and c where
//
// alpha = r + c accurately.
//
// The reduction method is described in a previous write up.
-// The argument reduction scheme identifies 4 cases. For Cases 2
-// and 4, because |alpha| is small, sinl(r+c) and cosl(r+c) can be
-// computed very easily by 2 or 3 terms of the Taylor series
+// The argument reduction scheme identifies 4 cases. For Cases 2
+// and 4, because |alpha| is small, sinl(r+c) and cosl(r+c) can be
+// computed very easily by 2 or 3 terms of the Taylor series
// expansion as follows:
//
// Case 2:
// -------
//
-// sinl(r + c) = r + c - r^3/6 accurately
-// cosl(r + c) = 1 - 2^(-67) accurately
+// sinl(r + c) = r + c - r^3/6 accurately
+// cosl(r + c) = 1 - 2^(-67) accurately
//
// Case 4:
// -------
//
-// sinl(r + c) = r + c - r^3/6 + r^5/120 accurately
-// cosl(r + c) = 1 - r^2/2 + r^4/24 accurately
+// sinl(r + c) = r + c - r^3/6 + r^5/120 accurately
+// cosl(r + c) = 1 - r^2/2 + r^4/24 accurately
//
-// The only cases left are Cases 1 and 3 of the argument reduction
-// procedure. These two cases will be merged since after the
-// argument is reduced in either cases, we have the reduced argument
-// represented as r + c and that the magnitude |r + c| is not small
+// The only cases left are Cases 1 and 3 of the argument reduction
+// procedure. These two cases will be merged since after the
+// argument is reduced in either cases, we have the reduced argument
+// represented as r + c and that the magnitude |r + c| is not small
// enough to allow the usage of a very short approximation.
//
// The required calculation is either
@@ -163,32 +168,32 @@
//
// Specifically,
//
-// sinl(r + c) = sinl(r) + c sin'(r) + O(c^2)
-// = sinl(r) + c cos (r) + O(c^2)
-// = sinl(r) + c(1 - r^2/2) accurately.
+// sinl(r + c) = sinl(r) + c sin'(r) + O(c^2)
+// = sinl(r) + c cos (r) + O(c^2)
+// = sinl(r) + c(1 - r^2/2) accurately.
// Similarly,
//
-// cosl(r + c) = cosl(r) - c sinl(r) + O(c^2)
-// = cosl(r) - c(r - r^3/6) accurately.
+// cosl(r + c) = cosl(r) - c sinl(r) + O(c^2)
+// = cosl(r) - c(r - r^3/6) accurately.
//
-// We therefore concentrate on accurately calculating sinl(r) and
+// We therefore concentrate on accurately calculating sinl(r) and
// cosl(r) for a working-precision number r, |r| <= pi/4 to within
// 0.1% or so.
//
-// The greatest challenge of this task is that the second terms of
+// The greatest challenge of this task is that the second terms of
// the Taylor series
-//
-// r - r^3/3! + r^r/5! - ...
+//
+// r - r^3/3! + r^r/5! - ...
//
// and
//
-// 1 - r^2/2! + r^4/4! - ...
+// 1 - r^2/2! + r^4/4! - ...
//
-// are not very small when |r| is close to pi/4 and the rounding
-// errors will be a concern if simple polynomial accumulation is
-// used. When |r| < 2^-3, however, the second terms will be small
-// enough (6 bits or so of right shift) that a normal Horner
-// recurrence suffices. Hence there are two cases that we consider
+// are not very small when |r| is close to pi/4 and the rounding
+// errors will be a concern if simple polynomial accumulation is
+// used. When |r| < 2^-3, however, the second terms will be small
+// enough (6 bits or so of right shift) that a normal Horner
+// recurrence suffices. Hence there are two cases that we consider
// in the accurate computation of sinl(r) and cosl(r), |r| <= pi/4.
//
// Case small_r: |r| < 2^(-3)
@@ -197,88 +202,88 @@
// Since Arg = M pi/4 + r + c accurately, and M mod 4 is [i_0 i_1],
// we have
//
-// sinl(Arg) = (-1)^i_0 * sinl(r + c) if i_1 = 0
-// = (-1)^i_0 * cosl(r + c) if i_1 = 1
+// sinl(Arg) = (-1)^i_0 * sinl(r + c) if i_1 = 0
+// = (-1)^i_0 * cosl(r + c) if i_1 = 1
//
// can be accurately approximated by
//
-// sinl(Arg) = (-1)^i_0 * [sinl(r) + c] if i_1 = 0
+// sinl(Arg) = (-1)^i_0 * [sinl(r) + c] if i_1 = 0
// = (-1)^i_0 * [cosl(r) - c*r] if i_1 = 1
//
-// because |r| is small and thus the second terms in the correction
+// because |r| is small and thus the second terms in the correction
// are unneccessary.
//
-// Finally, sinl(r) and cosl(r) are approximated by polynomials of
+// Finally, sinl(r) and cosl(r) are approximated by polynomials of
// moderate lengths.
//
// sinl(r) = r + S_1 r^3 + S_2 r^5 + ... + S_5 r^11
// cosl(r) = 1 + C_1 r^2 + C_2 r^4 + ... + C_5 r^10
//
-// We can make use of predicates to selectively calculate
-// sinl(r) or cosl(r) based on i_1.
+// We can make use of predicates to selectively calculate
+// sinl(r) or cosl(r) based on i_1.
//
// Case normal_r: 2^(-3) <= |r| <= pi/4
// ------------------------------------
//
// This case is more likely than the previous one if one considers
// r to be uniformly distributed in [-pi/4 pi/4]. Again,
-//
-// sinl(Arg) = (-1)^i_0 * sinl(r + c) if i_1 = 0
-// = (-1)^i_0 * cosl(r + c) if i_1 = 1.
//
-// Because |r| is now larger, we need one extra term in the
+// sinl(Arg) = (-1)^i_0 * sinl(r + c) if i_1 = 0
+// = (-1)^i_0 * cosl(r + c) if i_1 = 1.
+//
+// Because |r| is now larger, we need one extra term in the
// correction. sinl(Arg) can be accurately approximated by
//
// sinl(Arg) = (-1)^i_0 * [sinl(r) + c(1-r^2/2)] if i_1 = 0
// = (-1)^i_0 * [cosl(r) - c*r*(1 - r^2/6)] i_1 = 1.
//
-// Finally, sinl(r) and cosl(r) are approximated by polynomials of
+// Finally, sinl(r) and cosl(r) are approximated by polynomials of
// moderate lengths.
//
-// sinl(r) = r + PP_1_hi r^3 + PP_1_lo r^3 +
-// PP_2 r^5 + ... + PP_8 r^17
+// sinl(r) = r + PP_1_hi r^3 + PP_1_lo r^3 +
+// PP_2 r^5 + ... + PP_8 r^17
//
-// cosl(r) = 1 + QQ_1 r^2 + QQ_2 r^4 + ... + QQ_8 r^16
+// cosl(r) = 1 + QQ_1 r^2 + QQ_2 r^4 + ... + QQ_8 r^16
//
-// where PP_1_hi is only about 16 bits long and QQ_1 is -1/2.
-// The crux in accurate computation is to calculate
+// where PP_1_hi is only about 16 bits long and QQ_1 is -1/2.
+// The crux in accurate computation is to calculate
//
// r + PP_1_hi r^3 or 1 + QQ_1 r^2
//
-// accurately as two pieces: U_hi and U_lo. The way to achieve this
-// is to obtain r_hi as a 10 sig. bit number that approximates r to
+// accurately as two pieces: U_hi and U_lo. The way to achieve this
+// is to obtain r_hi as a 10 sig. bit number that approximates r to
// roughly 8 bits or so of accuracy. (One convenient way is
//
// r_hi := frcpa( frcpa( r ) ).)
//
// This way,
//
-// r + PP_1_hi r^3 = r + PP_1_hi r_hi^3 +
-// PP_1_hi (r^3 - r_hi^3)
-// = [r + PP_1_hi r_hi^3] +
-// [PP_1_hi (r - r_hi)
-// (r^2 + r_hi r + r_hi^2) ]
-// = U_hi + U_lo
+// r + PP_1_hi r^3 = r + PP_1_hi r_hi^3 +
+// PP_1_hi (r^3 - r_hi^3)
+// = [r + PP_1_hi r_hi^3] +
+// [PP_1_hi (r - r_hi)
+// (r^2 + r_hi r + r_hi^2) ]
+// = U_hi + U_lo
//
// Since r_hi is only 10 bit long and PP_1_hi is only 16 bit long,
-// PP_1_hi * r_hi^3 is only at most 46 bit long and thus computed
-// exactly. Furthermore, r and PP_1_hi r_hi^3 are of opposite sign
-// and that there is no more than 8 bit shift off between r and
-// PP_1_hi * r_hi^3. Hence the sum, U_hi, is representable and thus
-// calculated without any error. Finally, the fact that
+// PP_1_hi * r_hi^3 is only at most 46 bit long and thus computed
+// exactly. Furthermore, r and PP_1_hi r_hi^3 are of opposite sign
+// and that there is no more than 8 bit shift off between r and
+// PP_1_hi * r_hi^3. Hence the sum, U_hi, is representable and thus
+// calculated without any error. Finally, the fact that
//
-// |U_lo| <= 2^(-8) |U_hi|
+// |U_lo| <= 2^(-8) |U_hi|
//
-// says that U_hi + U_lo is approximating r + PP_1_hi r^3 to roughly
+// says that U_hi + U_lo is approximating r + PP_1_hi r^3 to roughly
// 8 extra bits of accuracy.
//
// Similarly,
//
-// 1 + QQ_1 r^2 = [1 + QQ_1 r_hi^2] +
-// [QQ_1 (r - r_hi)(r + r_hi)]
-// = U_hi + U_lo.
-//
-// Summarizing, we calculate r_hi = frcpa( frcpa( r ) ).
+// 1 + QQ_1 r^2 = [1 + QQ_1 r_hi^2] +
+// [QQ_1 (r - r_hi)(r + r_hi)]
+// = U_hi + U_lo.
+//
+// Summarizing, we calculate r_hi = frcpa( frcpa( r ) ).
//
// If i_1 = 0, then
//
@@ -297,35 +302,35 @@
// End
//
// Finally,
-//
-// V := poly + ( U_lo + correction )
+//
+// V := poly + ( U_lo + correction )
//
// / U_hi + V if i_0 = 0
-// result := |
+// result := |
// \ (-U_hi) - V if i_0 = 1
//
-// It is important that in the last step, negation of U_hi is
-// performed prior to the subtraction which is to be performed in
-// the user-set rounding mode.
+// It is important that in the last step, negation of U_hi is
+// performed prior to the subtraction which is to be performed in
+// the user-set rounding mode.
//
//
// Algorithmic Description
// =======================
//
-// The argument reduction algorithm is tightly integrated into FSIN
-// and FCOS which share the same code. The following is complete and
-// self-contained. The argument reduction description given
+// The argument reduction algorithm is tightly integrated into FSIN
+// and FCOS which share the same code. The following is complete and
+// self-contained. The argument reduction description given
// previously is repeated below.
//
//
-// Step 0. Initialization.
+// Step 0. Initialization.
//
// If FSIN is invoked, set N_inc := 0; else if FCOS is invoked,
// set N_inc := 1.
//
// Step 1. Check for exceptional and special cases.
//
-// * If Arg is +-0, +-inf, NaN, NaT, go to Step 10 for special
+// * If Arg is +-0, +-inf, NaN, NaT, go to Step 10 for special
// handling.
// * If |Arg| < 2^24, go to Step 2 for reduction of moderate
// arguments. This is the most likely case.
@@ -335,18 +340,18 @@
//
// Step 2. Reduction of moderate arguments.
//
-// If |Arg| < pi/4 ...quick branch
-// N_fix := N_inc (integer)
+// If |Arg| < pi/4 ...quick branch
+// N_fix := N_inc (integer)
// r := Arg
// c := 0.0
// Branch to Step 4, Case_1_complete
-// Else ...cf. argument reduction
-// N := Arg * two_by_PI (fp)
-// N_fix := fcvt.fx( N ) (int)
+// Else ...cf. argument reduction
+// N := Arg * two_by_PI (fp)
+// N_fix := fcvt.fx( N ) (int)
// N := fcvt.xf( N_fix )
// N_fix := N_fix + N_inc
-// s := Arg - N * P_1 (first piece of pi/2)
-// w := -N * P_2 (second piece of pi/2)
+// s := Arg - N * P_1 (first piece of pi/2)
+// w := -N * P_2 (second piece of pi/2)
//
// If |s| >= 2^(-33)
// go to Step 3, Case_1_reduce
@@ -358,8 +363,8 @@
// Step 3. Case_1_reduce.
//
// r := s + w
-// c := (s - r) + w ...observe order
-//
+// c := (s - r) + w ...observe order
+//
// Step 4. Case_1_complete
//
// ...At this point, the reduced argument alpha is
@@ -375,17 +380,17 @@
//
// If i_1 = 0, then
// poly := r*FR_rsq*(PP_1_lo + FR_rsq*(PP_2 + ... FR_rsq*PP_8))
-// U_hi := r + PP_1_hi*r_hi*r_hi*r_hi ...any order
+// U_hi := r + PP_1_hi*r_hi*r_hi*r_hi ...any order
// U_lo := PP_1_hi*r_lo*(r*r + r*r_hi + r_hi*r_hi)
-// correction := c + c*C_1*FR_rsq ...any order
+// correction := c + c*C_1*FR_rsq ...any order
// Else
// poly := FR_rsq*FR_rsq*(QQ_2 + FR_rsq*(QQ_3 + ... + FR_rsq*QQ_8))
-// U_hi := 1 + QQ_1 * r_hi * r_hi ...any order
+// U_hi := 1 + QQ_1 * r_hi * r_hi ...any order
// U_lo := QQ_1 * r_lo * (r + r_hi)
-// correction := -c*(r + S_1*FR_rsq*r) ...any order
+// correction := -c*(r + S_1*FR_rsq*r) ...any order
// Endif
//
-// V := poly + (U_lo + correction) ...observe order
+// V := poly + (U_lo + correction) ...observe order
//
// result := (i_0 == 0? 1.0 : -1.0)
//
@@ -397,7 +402,7 @@
// Return
//
// Step 6. Small_r.
-//
+//
// ...Use flush to zero mode without causing exception
// Let [i_0 i_1] be the two lsb of N_fix.
//
@@ -412,7 +417,7 @@
// Else
// z := FR_rsq*FR_rsq; z := FR_rsq*z
// poly_lo := C_3 + FR_rsq*(C_4 + FR_rsq*C_5)
-// poly_hi := FR_rsq*(C_1 + FR_rsq*C_2)
+// poly_hi := FR_rsq*(C_1 + FR_rsq*C_2)
// correction := -c*r
// result := 1
// Endif
@@ -429,15 +434,15 @@
//
// Step 7. Case_2_reduce.
//
-// ...Refer to the write up for argument reduction for
+// ...Refer to the write up for argument reduction for
// ...rationale. The reduction algorithm below is taken from
// ...argument reduction description and integrated this.
//
// w := N*P_3
-// U_1 := N*P_2 + w ...FMA
-// U_2 := (N*P_2 - U_1) + w ...2 FMA
+// U_1 := N*P_2 + w ...FMA
+// U_2 := (N*P_2 - U_1) + w ...2 FMA
// ...U_1 + U_2 is N*(P_2+P_3) accurately
-//
+//
// r := s - U_1
// c := ( (s - r) - U_1 ) - U_2
//
@@ -446,29 +451,29 @@
// ...Case 1, this case requires much more work to reduce
// ...the argument, the subsequent calculation needed for
// ...any of the trigonometric function is very little because
-// ...|alpha| < 1.01*2^(-33) and thus two terms of the
+// ...|alpha| < 1.01*2^(-33) and thus two terms of the
// ...Taylor series expansion suffices.
//
// If i_1 = 0 then
-// poly := c + S_1 * r * r * r ...any order
+// poly := c + S_1 * r * r * r ...any order
// result := r
// Else
// poly := -2^(-67)
// result := 1.0
// Endif
-//
+//
// If i_0 = 1, result := -result
//
// Last operation. Perform in user-set rounding mode
//
// result := (i_0 == 0? result + poly :
// result - poly )
-//
+//
// Return
//
-//
+//
// Step 8. Pre-reduction of large arguments.
-//
+//
// ...Again, the following reduction procedure was described
// ...in the separate write up for argument reduction, which
// ...is tightly integrated here.
@@ -476,13 +481,13 @@
// N_0 := Arg * Inv_P_0
// N_0_fix := fcvt.fx( N_0 )
// N_0 := fcvt.xf( N_0_fix)
-
+
// Arg' := Arg - N_0 * P_0
// w := N_0 * d_1
// N := Arg' * two_by_PI
// N_fix := fcvt.fx( N )
// N := fcvt.xf( N_fix )
-// N_fix := N_fix + N_inc
+// N_fix := N_fix + N_inc
//
// s := Arg' - N * P_1
// w := w - N * P_2
@@ -494,15 +499,15 @@
// Endif
//
// Step 9. Case_4_reduce.
-//
+//
// ...first obtain N_0*d_1 and -N*P_2 accurately
-// U_hi := N_0 * d_1 V_hi := -N*P_2
-// U_lo := N_0 * d_1 - U_hi V_lo := -N*P_2 - U_hi ...FMAs
+// U_hi := N_0 * d_1 V_hi := -N*P_2
+// U_lo := N_0 * d_1 - U_hi V_lo := -N*P_2 - U_hi ...FMAs
//
// ...compute the contribution from N_0*d_1 and -N*P_3
// w := -N*P_3
// w := w + N_0*d_2
-// t := U_lo + V_lo + w ...any order
+// t := U_lo + V_lo + w ...any order
//
// ...at this point, the mathematical value
// ...s + U_hi + V_hi + t approximates the true reduced argument
@@ -517,12 +522,12 @@
// endif
// ...order in computing "a" must be observed. This branch is
// ...best implemented by predicates.
-// ...A + a is U_hi + V_hi accurately. Moreover, "a" is
+// ...A + a is U_hi + V_hi accurately. Moreover, "a" is
// ...much smaller than A: |a| <= (1/2)ulp(A).
//
// ...Just need to calculate s + A + a + t
-// C_hi := s + A t := t + a
-// C_lo := (s - C_hi) + A
+// C_hi := s + A t := t + a
+// C_lo := (s - C_hi) + A
// C_lo := C_lo + t
//
// ...Final steps for reduction
@@ -548,156 +553,191 @@
// result := (i_0 == 0? result + poly :
// result - poly )
// Return
-//
+//
// Large Arguments: For arguments above 2**63, a Payne-Hanek
// style argument reduction is used and pi_by_2 reduce is called.
//
-#include "libm_support.h"
-
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
-.align 64
-
-FSINCOSL_CONSTANTS:
-ASM_TYPE_DIRECTIVE(FSINCOSL_CONSTANTS,@object)
-data4 0x4B800000, 0xCB800000, 0x00000000,0x00000000 // two**24, -two**24
-data4 0x4E44152A, 0xA2F9836E, 0x00003FFE,0x00000000 // Inv_pi_by_2
-data4 0xCE81B9F1, 0xC84D32B0, 0x00004016,0x00000000 // P_0
-data4 0x2168C235, 0xC90FDAA2, 0x00003FFF,0x00000000 // P_1
-data4 0xFC8F8CBB, 0xECE675D1, 0x0000BFBD,0x00000000 // P_2
-data4 0xACC19C60, 0xB7ED8FBB, 0x0000BF7C,0x00000000 // P_3
-data4 0x5F000000, 0xDF000000, 0x00000000,0x00000000 // two_to_63, -two_to_63
-data4 0x6EC6B45A, 0xA397E504, 0x00003FE7,0x00000000 // Inv_P_0
-data4 0xDBD171A1, 0x8D848E89, 0x0000BFBF,0x00000000 // d_1
-data4 0x18A66F8E, 0xD5394C36, 0x0000BF7C,0x00000000 // d_2
-data4 0x2168C234, 0xC90FDAA2, 0x00003FFE,0x00000000 // pi_by_4
-data4 0x2168C234, 0xC90FDAA2, 0x0000BFFE,0x00000000 // neg_pi_by_4
-data4 0x3E000000, 0xBE000000, 0x00000000,0x00000000 // two**-3, -two**-3
-data4 0x2F000000, 0xAF000000, 0x9E000000,0x00000000 // two**-33, -two**-33, -two**-67
-data4 0xA21C0BC9, 0xCC8ABEBC, 0x00003FCE,0x00000000 // PP_8
-data4 0x720221DA, 0xD7468A05, 0x0000BFD6,0x00000000 // PP_7
-data4 0x640AD517, 0xB092382F, 0x00003FDE,0x00000000 // PP_6
-data4 0xD1EB75A4, 0xD7322B47, 0x0000BFE5,0x00000000 // PP_5
-data4 0xFFFFFFFE, 0xFFFFFFFF, 0x0000BFFD,0x00000000 // C_1
-data4 0x00000000, 0xAAAA0000, 0x0000BFFC,0x00000000 // PP_1_hi
-data4 0xBAF69EEA, 0xB8EF1D2A, 0x00003FEC,0x00000000 // PP_4
-data4 0x0D03BB69, 0xD00D00D0, 0x0000BFF2,0x00000000 // PP_3
-data4 0x88888962, 0x88888888, 0x00003FF8,0x00000000 // PP_2
-data4 0xAAAB0000, 0xAAAAAAAA, 0x0000BFEC,0x00000000 // PP_1_lo
-data4 0xC2B0FE52, 0xD56232EF, 0x00003FD2,0x00000000 // QQ_8
-data4 0x2B48DCA6, 0xC9C99ABA, 0x0000BFDA,0x00000000 // QQ_7
-data4 0x9C716658, 0x8F76C650, 0x00003FE2,0x00000000 // QQ_6
-data4 0xFDA8D0FC, 0x93F27DBA, 0x0000BFE9,0x00000000 // QQ_5
-data4 0xAAAAAAAA, 0xAAAAAAAA, 0x0000BFFC,0x00000000 // S_1
-data4 0x00000000, 0x80000000, 0x0000BFFE,0x00000000 // QQ_1
-data4 0x0C6E5041, 0xD00D00D0, 0x00003FEF,0x00000000 // QQ_4
-data4 0x0B607F60, 0xB60B60B6, 0x0000BFF5,0x00000000 // QQ_3
-data4 0xAAAAAA9B, 0xAAAAAAAA, 0x00003FFA,0x00000000 // QQ_2
-data4 0xFFFFFFFE, 0xFFFFFFFF, 0x0000BFFD,0x00000000 // C_1
-data4 0xAAAA719F, 0xAAAAAAAA, 0x00003FFA,0x00000000 // C_2
-data4 0x0356F994, 0xB60B60B6, 0x0000BFF5,0x00000000 // C_3
-data4 0xB2385EA9, 0xD00CFFD5, 0x00003FEF,0x00000000 // C_4
-data4 0x292A14CD, 0x93E4BD18, 0x0000BFE9,0x00000000 // C_5
-data4 0xAAAAAAAA, 0xAAAAAAAA, 0x0000BFFC,0x00000000 // S_1
-data4 0x888868DB, 0x88888888, 0x00003FF8,0x00000000 // S_2
-data4 0x055EFD4B, 0xD00D00D0, 0x0000BFF2,0x00000000 // S_3
-data4 0x839730B9, 0xB8EF1C5D, 0x00003FEC,0x00000000 // S_4
-data4 0xE5B3F492, 0xD71EA3A4, 0x0000BFE5,0x00000000 // S_5
-data4 0x38800000, 0xB8800000, 0x00000000 // two**-14, -two**-14
-ASM_SIZE_DIRECTIVE(FSINCOSL_CONSTANTS)
-
-FR_Input_X = f8
-FR_Neg_Two_to_M3 = f32
-FR_Two_to_63 = f32
-FR_Two_to_24 = f33
-FR_Pi_by_4 = f33
-FR_Two_to_M14 = f34
-FR_Two_to_M33 = f35
-FR_Neg_Two_to_24 = f36
-FR_Neg_Pi_by_4 = f36
-FR_Neg_Two_to_M14 = f37
-FR_Neg_Two_to_M33 = f38
-FR_Neg_Two_to_M67 = f39
-FR_Inv_pi_by_2 = f40
-FR_N_float = f41
-FR_N_fix = f42
-FR_P_1 = f43
-FR_P_2 = f44
-FR_P_3 = f45
-FR_s = f46
-FR_w = f47
-FR_c = f48
-FR_r = f49
-FR_Z = f50
-FR_A = f51
-FR_a = f52
-FR_t = f53
-FR_U_1 = f54
-FR_U_2 = f55
-FR_C_1 = f56
-FR_C_2 = f57
-FR_C_3 = f58
-FR_C_4 = f59
-FR_C_5 = f60
-FR_S_1 = f61
-FR_S_2 = f62
-FR_S_3 = f63
-FR_S_4 = f64
-FR_S_5 = f65
-FR_poly_hi = f66
-FR_poly_lo = f67
-FR_r_hi = f68
-FR_r_lo = f69
-FR_rsq = f70
-FR_r_cubed = f71
-FR_C_hi = f72
-FR_N_0 = f73
-FR_d_1 = f74
-FR_V = f75
-FR_V_hi = f75
-FR_V_lo = f76
-FR_U_hi = f77
-FR_U_lo = f78
-FR_U_hiabs = f79
-FR_V_hiabs = f80
-FR_PP_8 = f81
-FR_QQ_8 = f81
-FR_PP_7 = f82
-FR_QQ_7 = f82
-FR_PP_6 = f83
-FR_QQ_6 = f83
-FR_PP_5 = f84
-FR_QQ_5 = f84
-FR_PP_4 = f85
-FR_QQ_4 = f85
-FR_PP_3 = f86
-FR_QQ_3 = f86
-FR_PP_2 = f87
-FR_QQ_2 = f87
-FR_QQ_1 = f88
-FR_N_0_fix = f89
-FR_Inv_P_0 = f90
-FR_corr = f91
-FR_poly = f92
-FR_d_2 = f93
-FR_Two_to_M3 = f94
-FR_Neg_Two_to_63 = f94
-FR_P_0 = f95
-FR_C_lo = f96
-FR_PP_1 = f97
-FR_PP_1_lo = f98
-FR_ArgPrime = f99
-
-GR_Table_Base = r32
-GR_Table_Base1 = r33
-GR_i_0 = r34
-GR_i_1 = r35
-GR_N_Inc = r36
-GR_Sin_or_Cos = r37
+
+RODATA
+.align 16
+
+LOCAL_OBJECT_START(FSINCOSL_CONSTANTS)
+
+sincosl_table_p:
+data8 0xA2F9836E4E44152A, 0x00003FFE // Inv_pi_by_2
+data8 0xC84D32B0CE81B9F1, 0x00004016 // P_0
+data8 0xC90FDAA22168C235, 0x00003FFF // P_1
+data8 0xECE675D1FC8F8CBB, 0x0000BFBD // P_2
+data8 0xB7ED8FBBACC19C60, 0x0000BF7C // P_3
+data8 0x8D848E89DBD171A1, 0x0000BFBF // d_1
+data8 0xD5394C3618A66F8E, 0x0000BF7C // d_2
+LOCAL_OBJECT_END(FSINCOSL_CONSTANTS)
+
+LOCAL_OBJECT_START(sincosl_table_d)
+data8 0xC90FDAA22168C234, 0x00003FFE // pi_by_4
+data8 0xA397E5046EC6B45A, 0x00003FE7 // Inv_P_0
+data4 0x3E000000, 0xBE000000 // 2^-3 and -2^-3
+data4 0x2F000000, 0xAF000000 // 2^-33 and -2^-33
+data4 0x9E000000, 0x00000000 // -2^-67
+data4 0x00000000, 0x00000000 // pad
+LOCAL_OBJECT_END(sincosl_table_d)
+
+LOCAL_OBJECT_START(sincosl_table_pp)
+data8 0xCC8ABEBCA21C0BC9, 0x00003FCE // PP_8
+data8 0xD7468A05720221DA, 0x0000BFD6 // PP_7
+data8 0xB092382F640AD517, 0x00003FDE // PP_6
+data8 0xD7322B47D1EB75A4, 0x0000BFE5 // PP_5
+data8 0xFFFFFFFFFFFFFFFE, 0x0000BFFD // C_1
+data8 0xAAAA000000000000, 0x0000BFFC // PP_1_hi
+data8 0xB8EF1D2ABAF69EEA, 0x00003FEC // PP_4
+data8 0xD00D00D00D03BB69, 0x0000BFF2 // PP_3
+data8 0x8888888888888962, 0x00003FF8 // PP_2
+data8 0xAAAAAAAAAAAB0000, 0x0000BFEC // PP_1_lo
+LOCAL_OBJECT_END(sincosl_table_pp)
+
+LOCAL_OBJECT_START(sincosl_table_qq)
+data8 0xD56232EFC2B0FE52, 0x00003FD2 // QQ_8
+data8 0xC9C99ABA2B48DCA6, 0x0000BFDA // QQ_7
+data8 0x8F76C6509C716658, 0x00003FE2 // QQ_6
+data8 0x93F27DBAFDA8D0FC, 0x0000BFE9 // QQ_5
+data8 0xAAAAAAAAAAAAAAAA, 0x0000BFFC // S_1
+data8 0x8000000000000000, 0x0000BFFE // QQ_1
+data8 0xD00D00D00C6E5041, 0x00003FEF // QQ_4
+data8 0xB60B60B60B607F60, 0x0000BFF5 // QQ_3
+data8 0xAAAAAAAAAAAAAA9B, 0x00003FFA // QQ_2
+LOCAL_OBJECT_END(sincosl_table_qq)
+
+LOCAL_OBJECT_START(sincosl_table_c)
+data8 0xFFFFFFFFFFFFFFFE, 0x0000BFFD // C_1
+data8 0xAAAAAAAAAAAA719F, 0x00003FFA // C_2
+data8 0xB60B60B60356F994, 0x0000BFF5 // C_3
+data8 0xD00CFFD5B2385EA9, 0x00003FEF // C_4
+data8 0x93E4BD18292A14CD, 0x0000BFE9 // C_5
+LOCAL_OBJECT_END(sincosl_table_c)
+
+LOCAL_OBJECT_START(sincosl_table_s)
+data8 0xAAAAAAAAAAAAAAAA, 0x0000BFFC // S_1
+data8 0x88888888888868DB, 0x00003FF8 // S_2
+data8 0xD00D00D0055EFD4B, 0x0000BFF2 // S_3
+data8 0xB8EF1C5D839730B9, 0x00003FEC // S_4
+data8 0xD71EA3A4E5B3F492, 0x0000BFE5 // S_5
+data4 0x38800000, 0xB8800000 // two**-14 and -two**-14
+LOCAL_OBJECT_END(sincosl_table_s)
+
+FR_Input_X = f8
+FR_Result = f8
+
+FR_r = f8
+FR_c = f9
+
+FR_norm_x = f9
+FR_inv_pi_2to63 = f10
+FR_rshf_2to64 = f11
+FR_2tom64 = f12
+FR_rshf = f13
+FR_N_float_signif = f14
+FR_abs_x = f15
+FR_Pi_by_4 = f34
+FR_Two_to_M14 = f35
+FR_Neg_Two_to_M14 = f36
+FR_Two_to_M33 = f37
+FR_Neg_Two_to_M33 = f38
+FR_Neg_Two_to_M67 = f39
+FR_Inv_pi_by_2 = f40
+FR_N_float = f41
+FR_N_fix = f42
+FR_P_1 = f43
+FR_P_2 = f44
+FR_P_3 = f45
+FR_s = f46
+FR_w = f47
+FR_d_2 = f48
+FR_tmp_result = f49
+FR_Z = f50
+FR_A = f51
+FR_a = f52
+FR_t = f53
+FR_U_1 = f54
+FR_U_2 = f55
+FR_C_1 = f56
+FR_C_2 = f57
+FR_C_3 = f58
+FR_C_4 = f59
+FR_C_5 = f60
+FR_S_1 = f61
+FR_S_2 = f62
+FR_S_3 = f63
+FR_S_4 = f64
+FR_S_5 = f65
+FR_poly_hi = f66
+FR_poly_lo = f67
+FR_r_hi = f68
+FR_r_lo = f69
+FR_rsq = f70
+FR_r_cubed = f71
+FR_C_hi = f72
+FR_N_0 = f73
+FR_d_1 = f74
+FR_V = f75
+FR_V_hi = f75
+FR_V_lo = f76
+FR_U_hi = f77
+FR_U_lo = f78
+FR_U_hiabs = f79
+FR_V_hiabs = f80
+FR_PP_8 = f81
+FR_QQ_8 = f101
+FR_PP_7 = f82
+FR_QQ_7 = f102
+FR_PP_6 = f83
+FR_QQ_6 = f103
+FR_PP_5 = f84
+FR_QQ_5 = f104
+FR_PP_4 = f85
+FR_QQ_4 = f105
+FR_PP_3 = f86
+FR_QQ_3 = f106
+FR_PP_2 = f87
+FR_QQ_2 = f107
+FR_QQ_1 = f108
+FR_r_hi_sq = f88
+FR_N_0_fix = f89
+FR_Inv_P_0 = f90
+FR_corr = f91
+FR_poly = f92
+FR_Neg_Two_to_M3 = f93
+FR_Two_to_M3 = f94
+FR_P_0 = f95
+FR_C_lo = f96
+FR_PP_1 = f97
+FR_PP_1_lo = f98
+FR_ArgPrime = f99
+FR_inexact = f100
+
+GR_sig_inv_pi = r14
+GR_rshf_2to64 = r15
+GR_exp_2tom64 = r16
+GR_rshf = r17
+GR_ad_p = r18
+GR_ad_d = r19
+GR_ad_pp = r20
+GR_ad_qq = r21
+GR_ad_c = r22
+GR_ad_s = r23
+GR_ad_ce = r24
+GR_ad_se = r25
+GR_ad_m14 = r26
+GR_ad_s1 = r27
+GR_exp_m2_to_m3= r36
+GR_N_Inc = r37
+GR_Sin_or_Cos = r38
+GR_signexp_x = r40
+GR_exp_x = r40
+GR_exp_mask = r41
+GR_exp_2_to_63 = r42
+GR_exp_2_to_m3 = r43
+GR_exp_2_to_24 = r44
// Added for unwind support
@@ -706,386 +746,376 @@ GR_SAVE_GP = r40
GR_SAVE_PFS = r41
-.global sinl#
-.global cosl#
-#ifdef _LIBC
-.global __sinl#
-.global __cosl#
-#endif
-
.section .text
-.proc sinl#
-#ifdef _LIBC
-.proc __sinl#
-#endif
-.align 64
-sinl:
-#ifdef _LIBC
-__sinl:
-#endif
+
+GLOBAL_IEEE754_ENTRY(sinl)
{ .mlx
-alloc GR_Table_Base = ar.pfs,0,12,2,0
-(p0) movl GR_Sin_or_Cos = 0x0 ;;
+ alloc r32 = ar.pfs,0,12,2,0
+ movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi
}
-
-{ .mmi
- nop.m 999
-(p0) addl GR_Table_Base = @ltoff(FSINCOSL_CONSTANTS#), gp
- nop.i 999
+{ .mlx
+ mov GR_Sin_or_Cos = 0x0
+ movl GR_rshf_2to64 = 0x47e8000000000000 // 1.1000 2^(63+64)
}
;;
-{ .mmb
- ld8 GR_Table_Base = [GR_Table_Base]
+{ .mfi
+ addl GR_ad_p = @ltoff(FSINCOSL_CONSTANTS#), gp
+ fclass.m p6, p0 = FR_Input_X, 0x1E3 // Test x natval, nan, inf
+ mov GR_exp_2_to_m3 = 0xffff - 3 // Exponent of 2^-3
+}
+{ .mfb
nop.m 999
-(p0) br.cond.sptk L(SINCOSL_CONTINUE) ;;
+ fnorm.s1 FR_norm_x = FR_Input_X // Normalize x
+ br.cond.sptk SINCOSL_CONTINUE
}
;;
-
-.endp sinl#
-ASM_SIZE_DIRECTIVE(sinl#)
-
-.section .text
-.proc cosl#
-cosl:
-#ifdef _LIBC
-.proc __cosl#
-__cosl:
-#endif
+GLOBAL_IEEE754_END(sinl)
+GLOBAL_IEEE754_ENTRY(cosl)
+{ .mlx
+ alloc r32 = ar.pfs,0,12,2,0
+ movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi
+}
{ .mlx
-alloc GR_Table_Base= ar.pfs,0,12,2,0
-(p0) movl GR_Sin_or_Cos = 0x1 ;;
+ mov GR_Sin_or_Cos = 0x1
+ movl GR_rshf_2to64 = 0x47e8000000000000 // 1.1000 2^(63+64)
}
;;
-{ .mmi
+{ .mfi
+ addl GR_ad_p = @ltoff(FSINCOSL_CONSTANTS#), gp
+ fclass.m p6, p0 = FR_Input_X, 0x1E3 // Test x natval, nan, inf
+ mov GR_exp_2_to_m3 = 0xffff - 3 // Exponent of 2^-3
+}
+{ .mfi
nop.m 999
-(p0) addl GR_Table_Base = @ltoff(FSINCOSL_CONSTANTS#), gp
+ fnorm.s1 FR_norm_x = FR_Input_X // Normalize x
nop.i 999
}
;;
-{ .mmb
- ld8 GR_Table_Base = [GR_Table_Base]
- nop.m 999
- nop.b 999
+SINCOSL_CONTINUE:
+{ .mfi
+ setf.sig FR_inv_pi_2to63 = GR_sig_inv_pi // Form 1/pi * 2^63
+ nop.f 999
+ mov GR_exp_2tom64 = 0xffff - 64 // Scaling constant to compute N
+}
+{ .mlx
+ setf.d FR_rshf_2to64 = GR_rshf_2to64 // Form const 1.1000 * 2^(63+64)
+ movl GR_rshf = 0x43e8000000000000 // Form const 1.1000 * 2^63
}
;;
+{ .mfi
+ ld8 GR_ad_p = [GR_ad_p] // Point to Inv_pi_by_2
+ fclass.m p7, p0 = FR_Input_X, 0x0b // Test x denormal
+ nop.i 999
+}
+;;
-
-//
-// Load Table Address
-//
-
-L(SINCOSL_CONTINUE):
-{ .mmi
-(p0) add GR_Table_Base1 = 96, GR_Table_Base
-(p0) ldfs FR_Two_to_24 = [GR_Table_Base], 4
-// GR_Sin_or_Cos denotes
-(p0) mov r39 = b0 ;;
+{ .mfi
+ getf.exp GR_signexp_x = FR_Input_X // Get sign and exponent of x
+ fclass.m p10, p0 = FR_Input_X, 0x007 // Test x zero
+ nop.i 999
}
-{ .mmi
- nop.m 0
-//
-// Load 2**24, load 2**63.
-//
-(p0) ldfs FR_Neg_Two_to_24 = [GR_Table_Base], 12
- nop.i 0
+{ .mib
+ mov GR_exp_mask = 0x1ffff // Exponent mask
+ nop.i 999
+(p6) br.cond.spnt SINCOSL_SPECIAL // Branch if x natval, nan, inf
}
+;;
+
{ .mfi
-(p0) ldfs FR_Two_to_63 = [GR_Table_Base1], 4
-//
-// Check for unnormals - unsupported operands. We do not want
-// to generate denormal exception
-// Check for NatVals, QNaNs, SNaNs, +/-Infs
-// Check for EM unsupporteds
-// Check for Zero
-//
-(p0) fclass.m.unc p6, p0 = FR_Input_X, 0x1E3
- nop.i 0
-};;
-{ .mmf
- nop.m 999
-(p0) ldfs FR_Neg_Two_to_63 = [GR_Table_Base1], 12
-(p0) fclass.nm.unc p8, p0 = FR_Input_X, 0x1FF
-}
-{ .mfb
- nop.m 999
-(p0) fclass.m.unc p10, p0 = FR_Input_X, 0x007
-(p6) br.cond.spnt L(SINCOSL_SPECIAL) ;;
+ setf.exp FR_2tom64 = GR_exp_2tom64 // Form 2^-64 for scaling N_float
+ nop.f 0
+ add GR_ad_d = 0x70, GR_ad_p // Point to constant table d
}
{ .mib
- nop.m 999
- nop.i 999
-(p8) br.cond.spnt L(SINCOSL_SPECIAL) ;;
+ setf.d FR_rshf = GR_rshf // Form right shift const 1.1000 * 2^63
+ mov GR_exp_m2_to_m3 = 0x2fffc // Form -(2^-3)
+(p7) br.cond.spnt SINCOSL_DENORMAL // Branch if x denormal
}
-{ .mib
- nop.m 999
- nop.i 999
-//
-// Branch if +/- NaN, Inf.
-// Load -2**24, load -2**63.
-//
-(p10) br.cond.spnt L(SINCOSL_ZERO) ;;
+;;
+
+SINCOSL_COMMON:
+{ .mfi
+ and GR_exp_x = GR_exp_mask, GR_signexp_x // Get exponent of x
+ fclass.nm p8, p0 = FR_Input_X, 0x1FF // Test x unsupported type
+ mov GR_exp_2_to_63 = 0xffff + 63 // Exponent of 2^63
}
-{ .mmb
-(p0) ldfe FR_Inv_pi_by_2 = [GR_Table_Base], 16
-(p0) ldfe FR_Inv_P_0 = [GR_Table_Base1], 16
- nop.b 999 ;;
+{ .mib
+ add GR_ad_pp = 0x40, GR_ad_d // Point to constant table pp
+ mov GR_exp_2_to_24 = 0xffff + 24 // Exponent of 2^24
+(p10) br.cond.spnt SINCOSL_ZERO // Branch if x zero
}
-{ .mmb
-(p0) ldfe FR_d_1 = [GR_Table_Base1], 16
-//
-// Raise possible denormal operand flag with useful fcmp
-// Is x <= -2**63
-// Load Inv_P_0 for pre-reduction
-// Load Inv_pi_by_2
-//
-(p0) ldfe FR_P_0 = [GR_Table_Base], 16
- nop.b 999 ;;
+;;
+
+{ .mfi
+ ldfe FR_Inv_pi_by_2 = [GR_ad_p], 16 // Load 2/pi
+ fcmp.eq.s0 p15, p0 = FR_Input_X, f0 // Dummy to set denormal
+ add GR_ad_qq = 0xa0, GR_ad_pp // Point to constant table qq
}
-{ .mmb
-(p0) ldfe FR_d_2 = [GR_Table_Base1], 16
-//
-// Load P_0
-// Load d_1
-// Is x >= 2**63
-// Is x <= -2**24?
-//
-(p0) ldfe FR_P_1 = [GR_Table_Base], 16
- nop.b 999 ;;
+{ .mfi
+ ldfe FR_Pi_by_4 = [GR_ad_d], 16 // Load pi/4 for range test
+ nop.f 999
+ cmp.ge p10,p0 = GR_exp_x, GR_exp_2_to_63 // Is |x| >= 2^63
}
-//
-// Load P_1
-// Load d_2
-// Is x >= 2**24?
-//
+;;
+
{ .mfi
-(p0) ldfe FR_P_2 = [GR_Table_Base], 16
-(p0) fcmp.le.unc.s1 p7, p8 = FR_Input_X, FR_Neg_Two_to_24
- nop.i 999 ;;
+ ldfe FR_P_0 = [GR_ad_p], 16 // Load P_0 for pi/4 <= |x| < 2^63
+ fmerge.s FR_abs_x = f1, FR_norm_x // |x|
+ add GR_ad_c = 0x90, GR_ad_qq // Point to constant table c
}
-{ .mbb
-(p0) ldfe FR_P_3 = [GR_Table_Base], 16
- nop.b 999
- nop.b 999 ;;
+{ .mfi
+ ldfe FR_Inv_P_0 = [GR_ad_d], 16 // Load 1/P_0 for pi/4 <= |x| < 2^63
+ nop.f 999
+ cmp.ge p7,p0 = GR_exp_x, GR_exp_2_to_24 // Is |x| >= 2^24
}
+;;
+
{ .mfi
- nop.m 999
-(p8) fcmp.ge.s1 p7, p0 = FR_Input_X, FR_Two_to_24
- nop.i 999
+ ldfe FR_P_1 = [GR_ad_p], 16 // Load P_1 for pi/4 <= |x| < 2^63
+ nop.f 999
+ add GR_ad_s = 0x50, GR_ad_c // Point to constant table s
}
{ .mfi
-(p0) ldfe FR_Pi_by_4 = [GR_Table_Base1], 16
-//
-// Branch if +/- zero.
-// Decide about the paths to take:
-// If -2**24 < FR_Input_X < 2**24 - CASE 1 OR 2
-// OTHERWISE - CASE 3 OR 4
-//
-(p0) fcmp.le.unc.s0 p10, p11 = FR_Input_X, FR_Neg_Two_to_63
- nop.i 999 ;;
+ ldfe FR_PP_8 = [GR_ad_pp], 16 // Load PP_8 for 2^-3 < |r| < pi/4
+ nop.f 999
+ nop.i 999
}
-{ .mmi
-(p0) ldfe FR_Neg_Pi_by_4 = [GR_Table_Base1], 16 ;;
-(p0) ldfs FR_Two_to_M3 = [GR_Table_Base1], 4
- nop.i 999
+;;
+
+{ .mfi
+ ldfe FR_P_2 = [GR_ad_p], 16 // Load P_2 for pi/4 <= |x| < 2^63
+ nop.f 999
+ add GR_ad_ce = 0x40, GR_ad_c // Point to end of constant table c
}
{ .mfi
- nop.m 999
-(p11) fcmp.ge.s1 p10, p0 = FR_Input_X, FR_Two_to_63
- nop.i 999 ;;
+ ldfe FR_QQ_8 = [GR_ad_qq], 16 // Load QQ_8 for 2^-3 < |r| < pi/4
+ nop.f 999
+ nop.i 999
}
-{ .mib
-(p0) ldfs FR_Neg_Two_to_M3 = [GR_Table_Base1], 12
- nop.i 999
-//
-// Load P_2
-// Load P_3
-// Load pi_by_4
-// Load neg_pi_by_4
-// Load 2**(-3)
-// Load -2**(-3).
-//
-(p10) br.cond.spnt L(SINCOSL_ARG_TOO_LARGE) ;;
+;;
+
+{ .mfi
+ ldfe FR_QQ_7 = [GR_ad_qq], 16 // Load QQ_7 for 2^-3 < |r| < pi/4
+ fma.s1 FR_N_float_signif = FR_Input_X, FR_inv_pi_2to63, FR_rshf_2to64
+ add GR_ad_se = 0x40, GR_ad_s // Point to end of constant table s
}
{ .mib
- nop.m 999
- nop.i 999
-//
-// Branch out if x >= 2**63. Use Payne-Hanek Reduction
-//
-(p7) br.cond.spnt L(SINCOSL_LARGER_ARG) ;;
+ ldfe FR_PP_7 = [GR_ad_pp], 16 // Load PP_7 for 2^-3 < |r| < pi/4
+ mov GR_ad_s1 = GR_ad_s // Save pointer to S_1
+(p10) br.cond.spnt SINCOSL_ARG_TOO_LARGE // Branch if |x| >= 2^63
+ // Use Payne-Hanek Reduction
}
+;;
+
{ .mfi
- nop.m 999
-//
-// Branch if Arg <= -2**24 or Arg >= 2**24 and use pre-reduction.
-//
-(p0) fma.s1 FR_N_float = FR_Input_X, FR_Inv_pi_by_2, f0
- nop.i 999 ;;
+ ldfe FR_P_3 = [GR_ad_p], 16 // Load P_3 for pi/4 <= |x| < 2^63
+ fmerge.se FR_r = FR_norm_x, FR_norm_x // r = x, in case |x| < pi/4
+ add GR_ad_m14 = 0x50, GR_ad_s // Point to constant table m14
}
-{ .mfi
- nop.m 999
-(p0) fcmp.lt.unc.s1 p6, p7 = FR_Input_X, FR_Pi_by_4
- nop.i 999 ;;
+{ .mfb
+ ldfps FR_Two_to_M3, FR_Neg_Two_to_M3 = [GR_ad_d], 8
+ fma.s1 FR_rsq = FR_norm_x, FR_norm_x, f0 // rsq = x*x, in case |x| < pi/4
+(p7) br.cond.spnt SINCOSL_LARGER_ARG // Branch if 2^24 <= |x| < 2^63
+ // Use pre-reduction
+}
+;;
+
+{ .mmf
+ ldfe FR_PP_6 = [GR_ad_pp], 16 // Load PP_6 for normal path
+ ldfe FR_QQ_6 = [GR_ad_qq], 16 // Load QQ_6 for normal path
+ fmerge.se FR_c = f0, f0 // c = 0 in case |x| < pi/4
}
+;;
+
+{ .mmf
+ ldfe FR_PP_5 = [GR_ad_pp], 16 // Load PP_5 for normal path
+ ldfe FR_QQ_5 = [GR_ad_qq], 16 // Load QQ_5 for normal path
+ nop.f 999
+}
+;;
+
+// Here if 0 < |x| < 2^24
{ .mfi
- nop.m 999
-//
-// Select the case when |Arg| < pi/4
-// Else Select the case when |Arg| >= pi/4
-//
-(p0) fcvt.fx.s1 FR_N_fix = FR_N_float
- nop.i 999 ;;
+ ldfe FR_S_5 = [GR_ad_se], -16 // Load S_5 if i_1=0
+ fcmp.lt.s1 p6, p7 = FR_abs_x, FR_Pi_by_4 // Test |x| < pi/4
+ nop.i 999
}
{ .mfi
- nop.m 999
+ ldfe FR_C_5 = [GR_ad_ce], -16 // Load C_5 if i_1=1
+ fms.s1 FR_N_float = FR_N_float_signif, FR_2tom64, FR_rshf
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ldfe FR_S_4 = [GR_ad_se], -16 // Load S_4 if i_1=0
+ ldfe FR_C_4 = [GR_ad_ce], -16 // Load C_4 if i_1=1
+ nop.i 999
+}
+;;
+
//
// N = Arg * 2/pi
// Check if Arg < pi/4
//
-(p6) fcmp.gt.s1 p6, p7 = FR_Input_X, FR_Neg_Pi_by_4
- nop.i 999 ;;
-}
//
// Case 2: Convert integer N_fix back to normalized floating-point value.
// Case 1: p8 is only affected when p6 is set
//
-{ .mfi
-(p7) ldfs FR_Two_to_M33 = [GR_Table_Base1], 4
//
// Grab the integer part of N and call it N_fix
//
-(p6) fmerge.se FR_r = FR_Input_X, FR_Input_X
-// If |x| < pi/4, r = x and c = 0
+{ .mfi
+(p7) ldfps FR_Two_to_M33, FR_Neg_Two_to_M33 = [GR_ad_d], 8
+(p6) fma.s1 FR_r_cubed = FR_r, FR_rsq, f0 // r^3 if |x| < pi/4
+(p6) mov GR_N_Inc = GR_Sin_or_Cos // N_Inc if |x| < pi/4
+}
+;;
+
+// If |x| < pi/4, r = x and c = 0
// lf |x| < pi/4, is x < 2**(-3).
-// r = Arg
+// r = Arg
// c = 0
-(p6) mov GR_N_Inc = GR_Sin_or_Cos ;;
-}
-{ .mmf
- nop.m 999
-(p7) ldfs FR_Neg_Two_to_M33 = [GR_Table_Base1], 4
-(p6) fmerge.se FR_c = f0, f0
-}
-{ .mfi
- nop.m 999
-(p6) fcmp.lt.unc.s1 p8, p9 = FR_Input_X, FR_Two_to_M3
- nop.i 999 ;;
+{ .mmi
+(p7) getf.sig GR_N_Inc = FR_N_float_signif
+(p6) cmp.lt.unc p8,p0 = GR_exp_x, GR_exp_2_to_m3 // Is |x| < 2^-3
+(p6) tbit.z p9,p10 = GR_N_Inc, 0 // p9 if i_1=0, N mod 4 = 0,1
+ // p10 if i_1=1, N mod 4 = 2,3
}
-{ .mfi
- nop.m 999
+;;
+
//
// lf |x| < pi/4, is -2**(-3)< x < 2**(-3) - set p8.
-// If |x| >= pi/4,
-// Create the right N for |x| < pi/4 and otherwise
+// If |x| >= pi/4,
+// Create the right N for |x| < pi/4 and otherwise
// Case 2: Place integer part of N in GP register
//
-(p7) fcvt.xf FR_N_float = FR_N_fix
- nop.i 999 ;;
-}
-{ .mmf
- nop.m 999
-(p7) getf.sig GR_N_Inc = FR_N_fix
-(p8) fcmp.gt.s1 p8, p0 = FR_Input_X, FR_Neg_Two_to_M3 ;;
-}
-{ .mib
- nop.m 999
- nop.i 999
-//
-// Load 2**(-33), -2**(-33)
-//
-(p8) br.cond.spnt L(SINCOSL_SMALL_R) ;;
+
+
+{ .mbb
+ nop.m 999
+(p8) br.cond.spnt SINCOSL_SMALL_R_0 // Branch if 0 < |x| < 2^-3
+(p6) br.cond.spnt SINCOSL_NORMAL_R_0 // Branch if 2^-3 <= |x| < pi/4
}
-{ .mib
- nop.m 999
- nop.i 999
-(p6) br.cond.sptk L(SINCOSL_NORMAL_R) ;;
+;;
+
+// Here if pi/4 <= |x| < 2^24
+{ .mfi
+ ldfs FR_Neg_Two_to_M67 = [GR_ad_d], 8 // Load -2^-67
+ fnma.s1 FR_s = FR_N_float, FR_P_1, FR_Input_X // s = -N * P_1 + Arg
+ add GR_N_Inc = GR_N_Inc, GR_Sin_or_Cos // Adjust N_Inc for sin/cos
}
-//
-// if |x| < pi/4, branch based on |x| < 2**(-3) or otherwise.
-//
-//
-// In this branch, |x| >= pi/4.
-//
{ .mfi
-(p0) ldfs FR_Neg_Two_to_M67 = [GR_Table_Base1], 8
-//
-// Load -2**(-67)
-//
-(p0) fnma.s1 FR_s = FR_N_float, FR_P_1, FR_Input_X
-//
-// w = N * P_2
-// s = -N * P_1 + Arg
-//
-(p0) add GR_N_Inc = GR_N_Inc, GR_Sin_or_Cos
+ nop.m 999
+ fma.s1 FR_w = FR_N_float, FR_P_2, f0 // w = N * P_2
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fma.s1 FR_w = FR_N_float, FR_P_2, f0
- nop.i 999 ;;
+ nop.m 999
+ fms.s1 FR_r = FR_s, f1, FR_w // r = s - w, assume |s| >= 2^-33
+ tbit.z p9,p10 = GR_N_Inc, 0 // p9 if i_1=0, N mod 4 = 0,1
+ // p10 if i_1=1, N mod 4 = 2,3
}
+;;
+
{ .mfi
- nop.m 999
-//
-// Adjust N_fix by N_inc to determine whether sine or
-// cosine is being calculated
-//
-(p0) fcmp.lt.unc.s1 p7, p6 = FR_s, FR_Two_to_M33
- nop.i 999 ;;
+ nop.m 999
+ fcmp.lt.s1 p7, p6 = FR_s, FR_Two_to_M33
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p7) fcmp.gt.s1 p7, p6 = FR_s, FR_Neg_Two_to_M33
- nop.i 999 ;;
+ nop.m 999
+(p7) fcmp.gt.s1 p7, p6 = FR_s, FR_Neg_Two_to_M33 // p6 if |s| >= 2^-33, else p7
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-// Remember x >= pi/4.
-// Is s <= -2**(-33) or s >= 2**(-33) (p6)
-// or -2**(-33) < s < 2**(-33) (p7)
-(p6) fms.s1 FR_r = FR_s, f1, FR_w
- nop.i 999
+ nop.m 999
+ fms.s1 FR_c = FR_s, f1, FR_r // c = s - r, for |s| >= 2^-33
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p7) fma.s1 FR_w = FR_N_float, FR_P_3, f0
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 FR_rsq = FR_r, FR_r, f0 // rsq = r * r, for |s| >= 2^-33
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p7) fma.s1 FR_U_1 = FR_N_float, FR_P_2, FR_w
- nop.i 999
+ nop.m 999
+(p7) fma.s1 FR_w = FR_N_float, FR_P_3, f0
+ nop.i 999
}
+;;
+
+{ .mmf
+(p9) ldfe FR_C_1 = [GR_ad_pp], 16 // Load C_1 if i_1=0
+(p10) ldfe FR_S_1 = [GR_ad_qq], 16 // Load S_1 if i_1=1
+ frcpa.s1 FR_r_hi, p15 = f1, FR_r // r_hi = frcpa(r)
+}
+;;
+
{ .mfi
- nop.m 999
-(p6) fms.s1 FR_c = FR_s, f1, FR_r
- nop.i 999 ;;
+ nop.m 999
+(p6) fcmp.lt.unc.s1 p8, p13 = FR_r, FR_Two_to_M3 // If big s, test r with 2^-3
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// For big s: r = s - w: No futher reduction is necessary
+ nop.m 999
+(p7) fma.s1 FR_U_1 = FR_N_float, FR_P_2, FR_w
+ nop.i 999
+}
+;;
+
+//
+// For big s: r = s - w: No futher reduction is necessary
// For small s: w = N * P_3 (change sign) More reduction
//
-(p6) fcmp.lt.unc.s1 p8, p9 = FR_r, FR_Two_to_M3
- nop.i 999 ;;
+{ .mfi
+ nop.m 999
+(p8) fcmp.gt.s1 p8, p13 = FR_r, FR_Neg_Two_to_M3 // If big s, p8 if |r| < 2^-3
+ nop.i 999 ;;
}
+
{ .mfi
- nop.m 999
-(p8) fcmp.gt.s1 p8, p9 = FR_r, FR_Neg_Two_to_M3
- nop.i 999 ;;
+ nop.m 999
+(p9) fma.s1 FR_poly = FR_rsq, FR_PP_8, FR_PP_7 // poly = rsq*PP_8+PP_7 if i_1=0
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
+(p10) fma.s1 FR_poly = FR_rsq, FR_QQ_8, FR_QQ_7 // poly = rsq*QQ_8+QQ_7 if i_1=1
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
(p7) fms.s1 FR_r = FR_s, f1, FR_U_1
- nop.i 999
+ nop.i 999
}
-{ .mfb
- nop.m 999
+;;
+
+{ .mfi
+ nop.m 999
+(p6) fma.s1 FR_r_cubed = FR_r, FR_rsq, f0 // rcubed = r * rsq
+ nop.i 999
+}
+;;
+
+{ .mfi
//
// For big s: Is |r| < 2**(-3)?
// For big s: c = S - r
@@ -1095,355 +1125,356 @@ L(SINCOSL_CONTINUE):
// If p9 is set, prepare to branch to Normal_R.
// For big s, r is complete here.
//
-(p6) fms.s1 FR_c = FR_c, f1, FR_w
-//
+//
// For big s: c = c + w (w has not been negated.)
// For small s: r = S - U_1
//
-(p8) br.cond.spnt L(SINCOSL_SMALL_R) ;;
+ nop.m 999
+(p6) fms.s1 FR_c = FR_c, f1, FR_w
+ nop.i 999
}
-{ .mib
- nop.m 999
- nop.i 999
-(p9) br.cond.sptk L(SINCOSL_NORMAL_R) ;;
+{ .mbb
+ nop.m 999
+(p8) br.cond.spnt SINCOSL_SMALL_R_1 // Branch if |s|>=2^-33, |r| < 2^-3,
+ // and pi/4 <= |x| < 2^24
+(p13) br.cond.sptk SINCOSL_NORMAL_R_1 // Branch if |s|>=2^-33, |r| >= 2^-3,
+ // and pi/4 <= |x| < 2^24
}
-{ .mfi
-(p7) add GR_Table_Base1 = 224, GR_Table_Base1
+;;
+
+SINCOSL_S_TINY:
+//
+// Here if |s| < 2^-33, and pi/4 <= |x| < 2^24
//
-// Branch to SINCOSL_SMALL_R or SINCOSL_NORMAL_R
+{ .mfi
+ fms.s1 FR_U_2 = FR_N_float, FR_P_2, FR_U_1
//
-(p7) fms.s1 FR_U_2 = FR_N_float, FR_P_2, FR_U_1
-//
// c = S - U_1
// r = S_1 * r
//
//
-(p7) extr.u GR_i_1 = GR_N_Inc, 0, 1 ;;
}
+;;
+
{ .mmi
- nop.m 999
+ nop.m 999
//
// Get [i_0,i_1] - two lsb of N_fix_gr.
// Do dummy fmpy so inexact is always set.
//
-(p7) cmp.eq.unc p9, p10 = 0x0, GR_i_1
-(p7) extr.u GR_i_0 = GR_N_Inc, 1, 1 ;;
+ tbit.z p9,p10 = GR_N_Inc, 0 // p9 if i_1=0, N mod 4 = 0,1
+ // p10 if i_1=1, N mod 4 = 2,3
}
-//
+;;
+
+//
// For small s: U_2 = N * P_2 - U_1
// S_1 stored constant - grab the one stored with the
// coefficients.
-//
+//
{ .mfi
-(p7) ldfe FR_S_1 = [GR_Table_Base1], 16
+ ldfe FR_S_1 = [GR_ad_s1], 16
//
// Check if i_1 and i_0 != 0
//
-(p10) fma.s1 FR_poly = f0, f1, FR_Neg_Two_to_M67
-(p7) cmp.eq.unc p11, p12 = 0x0, GR_i_0 ;;
+(p10) fma.s1 FR_poly = f0, f1, FR_Neg_Two_to_M67
+ tbit.z p11,p12 = GR_N_Inc, 1 // p11 if i_0=0, N mod 4 = 0,2
+ // p12 if i_0=1, N mod 4 = 1,3
}
+;;
+
{ .mfi
- nop.m 999
-(p7) fms.s1 FR_s = FR_s, f1, FR_r
- nop.i 999
+ nop.m 999
+ fms.s1 FR_s = FR_s, f1, FR_r
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
+ nop.m 999
+//
// S = S - r
// U_2 = U_2 + w
// load S_1
//
-(p7) fma.s1 FR_rsq = FR_r, FR_r, f0
- nop.i 999 ;;
+ fma.s1 FR_rsq = FR_r, FR_r, f0
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-(p7) fma.s1 FR_U_2 = FR_U_2, f1, FR_w
- nop.i 999
+ nop.m 999
+ fma.s1 FR_U_2 = FR_U_2, f1, FR_w
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p7) fmerge.se FR_Input_X = FR_r, FR_r
- nop.i 999 ;;
+ nop.m 999
+ fmerge.se FR_tmp_result = FR_r, FR_r
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-(p10) fma.s1 FR_Input_X = f0, f1, f1
- nop.i 999 ;;
+ nop.m 999
+(p10) fma.s1 FR_tmp_result = f0, f1, f1
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-//
+ nop.m 999
+//
// FR_rsq = r * r
// Save r as the result.
//
-(p7) fms.s1 FR_c = FR_s, f1, FR_U_1
- nop.i 999 ;;
+ fms.s1 FR_c = FR_s, f1, FR_U_1
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-//
+ nop.m 999
+//
// if ( i_1 ==0) poly = c + S_1*r*r*r
// else Result = 1
//
-(p12) fnma.s1 FR_Input_X = FR_Input_X, f1, f0
- nop.i 999
+(p12) fnma.s1 FR_tmp_result = FR_tmp_result, f1, f0
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p7) fma.s1 FR_r = FR_S_1, FR_r, f0
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 FR_r = FR_S_1, FR_r, f0
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-(p7) fma.s0 FR_S_1 = FR_S_1, FR_S_1, f0
- nop.i 999 ;;
+ nop.m 999
+ fma.s0 FR_S_1 = FR_S_1, FR_S_1, f0
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// If i_1 != 0, poly = 2**(-67)
//
-(p7) fms.s1 FR_c = FR_c, f1, FR_U_2
- nop.i 999 ;;
+ fms.s1 FR_c = FR_c, f1, FR_U_2
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-//
+ nop.m 999
+//
// c = c - U_2
-//
+//
(p9) fma.s1 FR_poly = FR_r, FR_rsq, FR_c
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// i_0 != 0, so Result = -Result
//
-(p11) fma.s0 FR_Input_X = FR_Input_X, f1, FR_poly
- nop.i 999 ;;
+(p11) fma.s0 FR_Result = FR_tmp_result, f1, FR_poly
+ nop.i 999 ;;
}
{ .mfb
- nop.m 999
-(p12) fms.s0 FR_Input_X = FR_Input_X, f1, FR_poly
+ nop.m 999
+(p12) fms.s0 FR_Result = FR_tmp_result, f1, FR_poly
//
// if (i_0 == 0), Result = Result + poly
// else Result = Result - poly
//
-(p0) br.ret.sptk b0 ;;
-}
-L(SINCOSL_LARGER_ARG):
-{ .mfi
- nop.m 999
-(p0) fma.s1 FR_N_0 = FR_Input_X, FR_Inv_P_0, f0
- nop.i 999
+ br.ret.sptk b0 // Exit if |s| < 2^-33, and pi/4 <= |x| < 2^24
}
;;
-// This path for argument > 2*24
-// Adjust table_ptr1 to beginning of table.
+SINCOSL_LARGER_ARG:
//
-
-{ .mmi
- nop.m 999
-(p0) addl GR_Table_Base = @ltoff(FSINCOSL_CONSTANTS#), gp
- nop.i 999
-}
-;;
-
-{ .mmi
- ld8 GR_Table_Base = [GR_Table_Base]
- nop.m 999
- nop.i 999
+// Here if 2^24 <= |x| < 2^63
+//
+{ .mfi
+ ldfe FR_d_1 = [GR_ad_p], 16 // Load d_1 for |x| >= 2^24 path
+ fma.s1 FR_N_0 = FR_Input_X, FR_Inv_P_0, f0
+ nop.i 999
}
;;
-
-//
-// Point to 2*-14
+//
// N_0 = Arg * Inv_P_0
//
+// Load values 2**(-14) and -2**(-14)
{ .mmi
-(p0) add GR_Table_Base = 688, GR_Table_Base ;;
-(p0) ldfs FR_Two_to_M14 = [GR_Table_Base], 4
- nop.i 999 ;;
+ ldfps FR_Two_to_M14, FR_Neg_Two_to_M14 = [GR_ad_m14]
+ nop.i 999 ;;
}
{ .mfi
-(p0) ldfs FR_Neg_Two_to_M14 = [GR_Table_Base], 0
- nop.f 999
- nop.i 999 ;;
+ ldfe FR_d_2 = [GR_ad_p], 16 // Load d_2 for |x| >= 2^24 path
+ nop.f 999
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
-// Load values 2**(-14) and -2**(-14)
//
-(p0) fcvt.fx.s1 FR_N_0_fix = FR_N_0
- nop.i 999 ;;
+ fcvt.fx.s1 FR_N_0_fix = FR_N_0
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N_0_fix = integer part of N_0
//
-(p0) fcvt.xf FR_N_0 = FR_N_0_fix
- nop.i 999 ;;
+ fcvt.xf FR_N_0 = FR_N_0_fix
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// Make N_0 the integer part
//
-(p0) fnma.s1 FR_ArgPrime = FR_N_0, FR_P_0, FR_Input_X
- nop.i 999
+ fnma.s1 FR_ArgPrime = FR_N_0, FR_P_0, FR_Input_X
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fma.s1 FR_w = FR_N_0, FR_d_1, f0
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 FR_w = FR_N_0, FR_d_1, f0
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// Arg' = -N_0 * P_0 + Arg
// w = N_0 * d_1
//
-(p0) fma.s1 FR_N_float = FR_ArgPrime, FR_Inv_pi_by_2, f0
- nop.i 999 ;;
+ fma.s1 FR_N_float = FR_ArgPrime, FR_Inv_pi_by_2, f0
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
-// N = A' * 2/pi
+// N = A' * 2/pi
//
-(p0) fcvt.fx.s1 FR_N_fix = FR_N_float
- nop.i 999 ;;
+ fcvt.fx.s1 FR_N_fix = FR_N_float
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
-// N_fix is the integer part
+// N_fix is the integer part
//
-(p0) fcvt.xf FR_N_float = FR_N_fix
- nop.i 999 ;;
+ fcvt.xf FR_N_float = FR_N_fix
+ nop.i 999 ;;
}
{ .mfi
-(p0) getf.sig GR_N_Inc = FR_N_fix
- nop.f 999
- nop.i 999 ;;
+ getf.sig GR_N_Inc = FR_N_fix
+ nop.f 999
+ nop.i 999 ;;
}
{ .mii
- nop.m 999
- nop.i 999 ;;
-(p0) add GR_N_Inc = GR_N_Inc, GR_Sin_or_Cos ;;
+ nop.m 999
+ nop.i 999 ;;
+ add GR_N_Inc = GR_N_Inc, GR_Sin_or_Cos ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N is the integer part of the reduced-reduced argument.
// Put the integer in a GP register
//
-(p0) fnma.s1 FR_s = FR_N_float, FR_P_1, FR_ArgPrime
- nop.i 999
+ fnma.s1 FR_s = FR_N_float, FR_P_1, FR_ArgPrime
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fnma.s1 FR_w = FR_N_float, FR_P_2, FR_w
- nop.i 999 ;;
+ nop.m 999
+ fnma.s1 FR_w = FR_N_float, FR_P_2, FR_w
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// s = -N*P_1 + Arg'
// w = -N*P_2 + w
// N_fix_gr = N_fix_gr + N_inc
//
-(p0) fcmp.lt.unc.s1 p9, p8 = FR_s, FR_Two_to_M14
- nop.i 999 ;;
+ fcmp.lt.unc.s1 p9, p8 = FR_s, FR_Two_to_M14
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-(p9) fcmp.gt.s1 p9, p8 = FR_s, FR_Neg_Two_to_M14
- nop.i 999 ;;
+ nop.m 999
+(p9) fcmp.gt.s1 p9, p8 = FR_s, FR_Neg_Two_to_M14 // p9 if |s| < 2^-14
+ nop.i 999 ;;
}
+
{ .mfi
- nop.m 999
+ nop.m 999
//
// For |s| > 2**(-14) r = S + w (r complete)
// Else U_hi = N_0 * d_1
//
(p9) fma.s1 FR_V_hi = FR_N_float, FR_P_2, f0
- nop.i 999
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
(p9) fma.s1 FR_U_hi = FR_N_0, FR_d_1, f0
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// Either S <= -2**(-14) or S >= 2**(-14)
// or -2**(-14) < s < 2**(-14)
//
(p8) fma.s1 FR_r = FR_s, f1, FR_w
- nop.i 999
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
(p9) fma.s1 FR_w = FR_N_float, FR_P_3, f0
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// We need abs of both U_hi and V_hi - don't
// worry about switched sign of V_hi.
//
(p9) fms.s1 FR_A = FR_U_hi, f1, FR_V_hi
- nop.i 999
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
//
-// Big s: finish up c = (S - r) + w (c complete)
+// Big s: finish up c = (S - r) + w (c complete)
// Case 4: A = U_hi + V_hi
// Note: Worry about switched sign of V_hi, so subtract instead of add.
//
(p9) fnma.s1 FR_V_lo = FR_N_float, FR_P_2, FR_V_hi
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mmf
- nop.m 999
- nop.m 999
+ nop.m 999
+ nop.m 999
(p9) fms.s1 FR_U_lo = FR_N_0, FR_d_1, FR_U_hi
}
{ .mfi
- nop.m 999
+ nop.m 999
(p9) fmerge.s FR_V_hiabs = f0, FR_V_hi
- nop.i 999 ;;
+ nop.i 999 ;;
}
+//{ .mfb
+//(p9) fmerge.s f8= FR_V_lo,FR_V_lo
+//(p9) br.ret.sptk b0
+//}
+//;;
{ .mfi
- nop.m 999
+ nop.m 999
// For big s: c = S - r
// For small s do more work: U_lo = N_0 * d_1 - U_hi
//
(p9) fmerge.s FR_U_hiabs = f0, FR_U_hi
- nop.i 999
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
//
-// For big s: Is |r| < 2**(-3)
+// For big s: Is |r| < 2**(-3)
// For big s: if p12 set, prepare to branch to Small_R.
// For big s: If p13 set, prepare to branch to Normal_R.
//
-(p8) fms.s1 FR_c = FR_s, f1, FR_r
- nop.i 999 ;;
+(p8) fms.s1 FR_c = FR_s, f1, FR_r
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// For small S: V_hi = N * P_2
// w = N * P_3
@@ -1451,104 +1482,99 @@ L(SINCOSL_LARGER_ARG):
// so (-) missing for V_hi and w.
//
(p8) fcmp.lt.unc.s1 p12, p13 = FR_r, FR_Two_to_M3
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
(p12) fcmp.gt.s1 p12, p13 = FR_r, FR_Neg_Two_to_M3
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
(p8) fma.s1 FR_c = FR_c, f1, FR_w
- nop.i 999
+ nop.i 999
}
{ .mfb
- nop.m 999
+ nop.m 999
(p9) fms.s1 FR_w = FR_N_0, FR_d_2, FR_w
-(p12) br.cond.spnt L(SINCOSL_SMALL_R) ;;
+(p12) br.cond.spnt SINCOSL_SMALL_R // Branch if |r| < 2^-3
+ // and 2^24 <= |x| < 2^63
}
+;;
+
{ .mib
- nop.m 999
- nop.i 999
-(p13) br.cond.sptk L(SINCOSL_NORMAL_R) ;;
+ nop.m 999
+ nop.i 999
+(p13) br.cond.sptk SINCOSL_NORMAL_R // Branch if |r| >= 2^-3
+ // and 2^24 <= |x| < 2^63
}
+;;
+
+SINCOSL_LARGER_S_TINY:
+//
+// Here if |s| < 2^-14, and 2^24 <= |x| < 2^63
+//
{ .mfi
- nop.m 999
-//
-// Big s: Vector off when |r| < 2**(-3). Recall that p8 will be true.
+ nop.m 999
+//
+// Big s: Vector off when |r| < 2**(-3). Recall that p8 will be true.
// The remaining stuff is for Case 4.
// Small s: V_lo = N * P_2 + U_hi (U_hi is in place of V_hi in writeup)
// Note: the (-) is still missing for V_lo.
// Small s: w = w + N_0 * d_2
// Note: the (-) is now incorporated in w.
//
-(p9) fcmp.ge.unc.s1 p10, p11 = FR_U_hiabs, FR_V_hiabs
-(p0) extr.u GR_i_1 = GR_N_Inc, 0, 1
+ fcmp.ge.unc.s1 p7, p8 = FR_U_hiabs, FR_V_hiabs
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// C_hi = S + A
//
-(p9) fma.s1 FR_t = FR_U_lo, f1, FR_V_lo
-(p0) extr.u GR_i_0 = GR_N_Inc, 1, 1 ;;
+ fma.s1 FR_t = FR_U_lo, f1, FR_V_lo
}
+;;
+
{ .mfi
- nop.m 999
+ nop.m 999
//
-// t = U_lo + V_lo
+// t = U_lo + V_lo
//
//
-(p10) fms.s1 FR_a = FR_U_hi, f1, FR_A
- nop.i 999 ;;
+(p7) fms.s1 FR_a = FR_U_hi, f1, FR_A
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-(p11) fma.s1 FR_a = FR_V_hi, f1, FR_A
- nop.i 999
-}
-;;
-
-{ .mmi
- nop.m 999
-(p0) addl GR_Table_Base = @ltoff(FSINCOSL_CONSTANTS#), gp
- nop.i 999
-}
-;;
-
-{ .mmi
- ld8 GR_Table_Base = [GR_Table_Base]
- nop.m 999
- nop.i 999
+ nop.m 999
+(p8) fma.s1 FR_a = FR_V_hi, f1, FR_A
+ nop.i 999
}
;;
-
{ .mfi
-(p0) add GR_Table_Base = 528, GR_Table_Base
//
// Is U_hiabs >= V_hiabs?
//
-(p9) fma.s1 FR_C_hi = FR_s, f1, FR_A
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 FR_C_hi = FR_s, f1, FR_A
+ nop.i 999 ;;
}
{ .mmi
-(p0) ldfe FR_C_1 = [GR_Table_Base], 16 ;;
-(p0) ldfe FR_C_2 = [GR_Table_Base], 64
- nop.i 999 ;;
+ ldfe FR_C_1 = [GR_ad_c], 16 ;;
+ ldfe FR_C_2 = [GR_ad_c], 64
+ nop.i 999 ;;
}
//
// c = c + C_lo finished.
// Load C_2
//
{ .mfi
-(p0) ldfe FR_S_1 = [GR_Table_Base], 16
+ ldfe FR_S_1 = [GR_ad_s], 16
//
-// C_lo = S - C_hi
+// C_lo = S - C_hi
//
-(p0) fma.s1 FR_t = FR_t, f1, FR_w
- nop.i 999 ;;
+ fma.s1 FR_t = FR_t, f1, FR_w
+ nop.i 999 ;;
}
//
// r and c have been computed.
@@ -1558,855 +1584,695 @@ L(SINCOSL_LARGER_ARG):
// Load S_1
//
{ .mfi
-(p0) ldfe FR_S_2 = [GR_Table_Base], 64
+ ldfe FR_S_2 = [GR_ad_s], 64
//
-// t = t + w
+// t = t + w
//
-(p10) fms.s1 FR_a = FR_a, f1, FR_V_hi
-(p0) cmp.eq.unc p9, p10 = 0x0, GR_i_0 ;;
+(p7) fms.s1 FR_a = FR_a, f1, FR_V_hi
+ tbit.z p9,p10 = GR_N_Inc, 0 // p9 if i_1=0, N mod 4 = 0,1
+ // p10 if i_1=1, N mod 4 = 2,3
}
+;;
{ .mfi
- nop.m 999
+ nop.m 999
//
// For larger u than v: a = U_hi - A
// Else a = V_hi - A (do an add to account for missing (-) on V_hi
//
-(p0) fms.s1 FR_C_lo = FR_s, f1, FR_C_hi
- nop.i 999 ;;
+ fms.s1 FR_C_lo = FR_s, f1, FR_C_hi
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-(p11) fms.s1 FR_a = FR_U_hi, f1, FR_a
-(p0) cmp.eq.unc p11, p12 = 0x0, GR_i_1 ;;
+ nop.m 999
+(p8) fms.s1 FR_a = FR_U_hi, f1, FR_a
+ tbit.z p11,p12 = GR_N_Inc, 1 // p11 if i_0=0, N mod 4 = 0,2
+ // p12 if i_0=1, N mod 4 = 1,3
}
+;;
+
{ .mfi
- nop.m 999
+ nop.m 999
//
// If u > v: a = (U_hi - A) + V_hi
// Else a = (V_hi - A) + U_hi
// In each case account for negative missing from V_hi.
//
-(p0) fma.s1 FR_C_lo = FR_C_lo, f1, FR_A
- nop.i 999 ;;
+ fma.s1 FR_C_lo = FR_C_lo, f1, FR_A
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
-// C_lo = (S - C_hi) + A
+// C_lo = (S - C_hi) + A
//
-(p0) fma.s1 FR_t = FR_t, f1, FR_a
- nop.i 999 ;;
+ fma.s1 FR_t = FR_t, f1, FR_a
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
-// t = t + a
+// t = t + a
//
-(p0) fma.s1 FR_C_lo = FR_C_lo, f1, FR_t
- nop.i 999 ;;
+ fma.s1 FR_C_lo = FR_C_lo, f1, FR_t
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// C_lo = C_lo + t
-// Adjust Table_Base to beginning of table
//
-(p0) fma.s1 FR_r = FR_C_hi, f1, FR_C_lo
- nop.i 999 ;;
+ fma.s1 FR_r = FR_C_hi, f1, FR_C_lo
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// Load S_2
//
-(p0) fma.s1 FR_rsq = FR_r, FR_r, f0
- nop.i 999
+ fma.s1 FR_rsq = FR_r, FR_r, f0
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
//
-// Table_Base points to C_1
// r = C_hi + C_lo
//
-(p0) fms.s1 FR_c = FR_C_hi, f1, FR_r
- nop.i 999 ;;
+ fms.s1 FR_c = FR_C_hi, f1, FR_r
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// if i_1 ==0: poly = S_2 * FR_rsq + S_1
// else poly = C_2 * FR_rsq + C_1
//
-(p11) fma.s1 FR_Input_X = f0, f1, FR_r
- nop.i 999 ;;
+(p9) fma.s1 FR_tmp_result = f0, f1, FR_r
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-(p12) fma.s1 FR_Input_X = f0, f1, f1
- nop.i 999 ;;
+ nop.m 999
+(p10) fma.s1 FR_tmp_result = f0, f1, f1
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
-// Compute r_cube = FR_rsq * r
+// Compute r_cube = FR_rsq * r
//
-(p11) fma.s1 FR_poly = FR_rsq, FR_S_2, FR_S_1
- nop.i 999 ;;
+(p9) fma.s1 FR_poly = FR_rsq, FR_S_2, FR_S_1
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-(p12) fma.s1 FR_poly = FR_rsq, FR_C_2, FR_C_1
- nop.i 999
+ nop.m 999
+(p10) fma.s1 FR_poly = FR_rsq, FR_C_2, FR_C_1
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// Compute FR_rsq = r * r
// Is i_1 == 0 ?
//
-(p0) fma.s1 FR_r_cubed = FR_rsq, FR_r, f0
- nop.i 999 ;;
+ fma.s1 FR_r_cubed = FR_rsq, FR_r, f0
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// c = C_hi - r
// Load C_1
//
-(p0) fma.s1 FR_c = FR_c, f1, FR_C_lo
- nop.i 999
+ fma.s1 FR_c = FR_c, f1, FR_C_lo
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// if i_1 ==0: poly = r_cube * poly + c
// else poly = FR_rsq * poly
//
-(p10) fms.s1 FR_Input_X = f0, f1, FR_Input_X
- nop.i 999 ;;
+(p12) fms.s1 FR_tmp_result = f0, f1, FR_tmp_result
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// if i_1 ==0: Result = r
// else Result = 1.0
//
-(p11) fma.s1 FR_poly = FR_r_cubed, FR_poly, FR_c
- nop.i 999 ;;
+(p9) fma.s1 FR_poly = FR_r_cubed, FR_poly, FR_c
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-(p12) fma.s1 FR_poly = FR_rsq, FR_poly, f0
- nop.i 999 ;;
+ nop.m 999
+(p10) fma.s1 FR_poly = FR_rsq, FR_poly, f0
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
-// if i_0 !=0: Result = -Result
+// if i_0 !=0: Result = -Result
//
-(p9) fma.s0 FR_Input_X = FR_Input_X, f1, FR_poly
- nop.i 999 ;;
+(p11) fma.s0 FR_Result = FR_tmp_result, f1, FR_poly
+ nop.i 999 ;;
}
{ .mfb
- nop.m 999
-(p10) fms.s0 FR_Input_X = FR_Input_X, f1, FR_poly
+ nop.m 999
+(p12) fms.s0 FR_Result = FR_tmp_result, f1, FR_poly
//
// if i_0 == 0: Result = Result + poly
// else Result = Result - poly
//
-(p0) br.ret.sptk b0 ;;
+ br.ret.sptk b0 // Exit for |s| < 2^-14, and 2^24 <= |x| < 2^63
}
-L(SINCOSL_SMALL_R):
-{ .mii
- nop.m 999
-(p0) extr.u GR_i_1 = GR_N_Inc, 0, 1 ;;
+;;
+
+
+SINCOSL_SMALL_R:
//
+// Here if |r| < 2^-3
+//
+// Enter with r, c, and N_Inc computed
//
// Compare both i_1 and i_0 with 0.
// if i_1 == 0, set p9.
// if i_0 == 0, set p11.
//
-(p0) cmp.eq.unc p9, p10 = 0x0, GR_i_1 ;;
-}
-{ .mfi
- nop.m 999
-(p0) fma.s1 FR_rsq = FR_r, FR_r, f0
-(p0) extr.u GR_i_0 = GR_N_Inc, 1, 1 ;;
-}
+
{ .mfi
- nop.m 999
-//
-// Z = Z * FR_rsq
-//
-(p10) fnma.s1 FR_c = FR_c, FR_r, f0
-(p0) cmp.eq.unc p11, p12 = 0x0, GR_i_0
+ nop.m 999
+ fma.s1 FR_rsq = FR_r, FR_r, f0 // rsq = r * r
+ tbit.z p9,p10 = GR_N_Inc, 0 // p9 if i_1=0, N mod 4 = 0,1
+ // p10 if i_1=1, N mod 4 = 2,3
}
;;
-// ******************************************************************
-// ******************************************************************
-// ******************************************************************
-// r and c have been computed.
-// We know whether this is the sine or cosine routine.
-// Make sure ftz mode is set - should be automatic when using wre
-// |r| < 2**(-3)
-//
-// Set table_ptr1 to beginning of constant table.
-// Get [i_0,i_1] - two lsb of N_fix_gr.
-//
-
{ .mmi
- nop.m 999
-(p0) addl GR_Table_Base = @ltoff(FSINCOSL_CONSTANTS#), gp
+(p9) ldfe FR_S_5 = [GR_ad_se], -16 // Load S_5 if i_1=0
+(p10) ldfe FR_C_5 = [GR_ad_ce], -16 // Load C_5 if i_1=1
nop.i 999
}
;;
{ .mmi
- ld8 GR_Table_Base = [GR_Table_Base]
- nop.m 999
+(p9) ldfe FR_S_4 = [GR_ad_se], -16 // Load S_4 if i_1=0
+(p10) ldfe FR_C_4 = [GR_ad_ce], -16 // Load C_4 if i_1=1
nop.i 999
}
;;
-
-//
-// Set table_ptr1 to point to S_5.
-// Set table_ptr1 to point to C_5.
-// Compute FR_rsq = r * r
-//
-{ .mfi
-(p9) add GR_Table_Base = 672, GR_Table_Base
-(p10) fmerge.s FR_r = f1, f1
-(p10) add GR_Table_Base = 592, GR_Table_Base ;;
+SINCOSL_SMALL_R_0:
+// Entry point for 2^-3 < |x| < pi/4
+.pred.rel "mutex",p9,p10
+SINCOSL_SMALL_R_1:
+// Entry point for pi/4 < |x| < 2^24 and |r| < 2^-3
+.pred.rel "mutex",p9,p10
+{ .mfi
+(p9) ldfe FR_S_3 = [GR_ad_se], -16 // Load S_3 if i_1=0
+ fma.s1 FR_Z = FR_rsq, FR_rsq, f0 // Z = rsq * rsq
+ nop.i 999
}
-//
-// Set table_ptr1 to point to S_5.
-// Set table_ptr1 to point to C_5.
-//
-{ .mmi
-(p9) ldfe FR_S_5 = [GR_Table_Base], -16 ;;
-//
-// if (i_1 == 0) load S_5
-// if (i_1 != 0) load C_5
-//
-(p9) ldfe FR_S_4 = [GR_Table_Base], -16
- nop.i 999 ;;
+{ .mfi
+(p10) ldfe FR_C_3 = [GR_ad_ce], -16 // Load C_3 if i_1=1
+(p10) fnma.s1 FR_c = FR_c, FR_r, f0 // c = -c * r if i_1=0
+ nop.i 999
}
+;;
+
{ .mmf
-(p10) ldfe FR_C_5 = [GR_Table_Base], -16
-//
-// Z = FR_rsq * FR_rsq
-//
-(p9) ldfe FR_S_3 = [GR_Table_Base], -16
-//
-// Compute FR_rsq = r * r
-// if (i_1 == 0) load S_4
-// if (i_1 != 0) load C_4
-//
-(p0) fma.s1 FR_Z = FR_rsq, FR_rsq, f0 ;;
-}
-//
-// if (i_1 == 0) load S_3
-// if (i_1 != 0) load C_3
-//
-{ .mmi
-(p9) ldfe FR_S_2 = [GR_Table_Base], -16 ;;
-//
-// if (i_1 == 0) load S_2
-// if (i_1 != 0) load C_2
-//
-(p9) ldfe FR_S_1 = [GR_Table_Base], -16
- nop.i 999
-}
-{ .mmi
-(p10) ldfe FR_C_4 = [GR_Table_Base], -16 ;;
-(p10) ldfe FR_C_3 = [GR_Table_Base], -16
- nop.i 999 ;;
+(p9) ldfe FR_S_2 = [GR_ad_se], -16 // Load S_2 if i_1=0
+(p10) ldfe FR_C_2 = [GR_ad_ce], -16 // Load C_2 if i_1=1
+(p10) fmerge.s FR_r = f1, f1
}
+;;
+
{ .mmi
-(p10) ldfe FR_C_2 = [GR_Table_Base], -16 ;;
-(p10) ldfe FR_C_1 = [GR_Table_Base], -16
- nop.i 999
-}
-{ .mfi
- nop.m 999
-//
-// if (i_1 != 0):
-// poly_lo = FR_rsq * C_5 + C_4
-// poly_hi = FR_rsq * C_2 + C_1
-//
-(p9) fma.s1 FR_Z = FR_Z, FR_r, f0
- nop.i 999 ;;
+(p9) ldfe FR_S_1 = [GR_ad_se], -16 // Load S_1 if i_1=0
+(p10) ldfe FR_C_1 = [GR_ad_ce], -16 // Load C_1 if i_1=1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// if (i_1 == 0) load S_1
-// if (i_1 != 0) load C_1
-//
-(p9) fma.s1 FR_poly_lo = FR_rsq, FR_S_5, FR_S_4
- nop.i 999
+ nop.m 999
+(p9) fma.s1 FR_Z = FR_Z, FR_r, f0 // Z = Z * r if i_1=0
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// c = -c * r
-// dummy fmpy's to flag inexact.
-//
-(p9) fma.s0 FR_S_4 = FR_S_4, FR_S_4, f0
- nop.i 999 ;;
+ nop.m 999
+(p9) fma.s1 FR_poly_lo = FR_rsq, FR_S_5, FR_S_4 // poly_lo=rsq*S_5+S_4 if i_1=0
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// poly_lo = FR_rsq * poly_lo + C_3
-// poly_hi = FR_rsq * poly_hi
-//
-(p0) fma.s1 FR_Z = FR_Z, FR_rsq, f0
- nop.i 999 ;;
+ nop.m 999
+(p10) fma.s1 FR_poly_lo = FR_rsq, FR_C_5, FR_C_4 // poly_lo=rsq*C_5+C_4 if i_1=1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p9) fma.s1 FR_poly_hi = FR_rsq, FR_S_2, FR_S_1
- nop.i 999
+ nop.m 999
+(p9) fma.s1 FR_poly_hi = FR_rsq, FR_S_2, FR_S_1 // poly_hi=rsq*S_2+S_1 if i_1=0
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// if (i_1 == 0):
-// poly_lo = FR_rsq * S_5 + S_4
-// poly_hi = FR_rsq * S_2 + S_1
-//
-(p10) fma.s1 FR_poly_lo = FR_rsq, FR_C_5, FR_C_4
- nop.i 999 ;;
+ nop.m 999
+(p10) fma.s1 FR_poly_hi = FR_rsq, FR_C_2, FR_C_1 // poly_hi=rsq*C_2+C_1 if i_1=1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// if (i_1 == 0):
-// Z = Z * r for only one of the small r cases - not there
-// in original implementation notes.
-//
-(p9) fma.s1 FR_poly_lo = FR_rsq, FR_poly_lo, FR_S_3
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 FR_Z = FR_Z, FR_rsq, f0 // Z = Z * rsq
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p10) fma.s1 FR_poly_hi = FR_rsq, FR_C_2, FR_C_1
- nop.i 999
+ nop.m 999
+(p9) fma.s1 FR_poly_lo = FR_rsq, FR_poly_lo, FR_S_3 // p_lo=p_lo*rsq+S_3, i_1=0
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p10) fma.s0 FR_C_1 = FR_C_1, FR_C_1, f0
- nop.i 999 ;;
+ nop.m 999
+(p10) fma.s1 FR_poly_lo = FR_rsq, FR_poly_lo, FR_C_3 // p_lo=p_lo*rsq+C_3, i_1=1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p9) fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, f0
- nop.i 999
+ nop.m 999
+(p9) fma.s0 FR_inexact = FR_S_4, FR_S_4, f0 // Dummy op to set inexact
+ tbit.z p11,p12 = GR_N_Inc, 1 // p11 if i_0=0, N mod 4 = 0,2
+ // p12 if i_0=1, N mod 4 = 1,3
}
{ .mfi
- nop.m 999
-//
-// poly_lo = FR_rsq * poly_lo + S_3
-// poly_hi = FR_rsq * poly_hi
-//
-(p10) fma.s1 FR_poly_lo = FR_rsq, FR_poly_lo, FR_C_3
- nop.i 999 ;;
+ nop.m 999
+(p10) fma.s0 FR_inexact = FR_C_1, FR_C_1, f0 // Dummy op to set inexact
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p10) fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, f0
- nop.i 999 ;;
+ nop.m 999
+(p9) fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, f0 // p_hi=p_hi*rsq if i_1=0
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// if (i_1 == 0): dummy fmpy's to flag inexact
-// r = 1
-//
-(p9) fma.s1 FR_poly_hi = FR_r, FR_poly_hi, f0
- nop.i 999
+ nop.m 999
+(p10) fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, f0 // p_hi=p_hi*rsq if i_1=1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// poly_hi = r * poly_hi
-//
-(p0) fma.s1 FR_poly = FR_Z, FR_poly_lo, FR_c
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 FR_poly = FR_Z, FR_poly_lo, FR_c // poly=Z*poly_lo+c
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p12) fms.s1 FR_r = f0, f1, FR_r
- nop.i 999 ;;
+ nop.m 999
+(p9) fma.s1 FR_poly_hi = FR_r, FR_poly_hi, f0 // p_hi=r*p_hi if i_1=0
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// poly_hi = Z * poly_lo + c
-// if i_0 == 1: r = -r
-//
-(p0) fma.s1 FR_poly = FR_poly, f1, FR_poly_hi
- nop.i 999 ;;
+ nop.m 999
+(p12) fms.s1 FR_r = f0, f1, FR_r // r = -r if i_0=1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p12) fms.s0 FR_Input_X = FR_r, f1, FR_poly
- nop.i 999
+ nop.m 999
+ fma.s1 FR_poly = FR_poly, f1, FR_poly_hi // poly=poly+poly_hi
+ nop.i 999
}
-{ .mfb
- nop.m 999
-//
-// poly = poly + poly_hi
-//
-(p11) fma.s0 FR_Input_X = FR_r, f1, FR_poly
+;;
+
//
// if (i_0 == 0) Result = r + poly
// if (i_0 != 0) Result = r - poly
//
-(p0) br.ret.sptk b0 ;;
-}
-L(SINCOSL_NORMAL_R):
-{ .mii
- nop.m 999
-(p0) extr.u GR_i_1 = GR_N_Inc, 0, 1 ;;
-//
-// Set table_ptr1 and table_ptr2 to base address of
-// constant table.
-(p0) cmp.eq.unc p9, p10 = 0x0, GR_i_1 ;;
-}
{ .mfi
- nop.m 999
-(p0) fma.s1 FR_rsq = FR_r, FR_r, f0
-(p0) extr.u GR_i_0 = GR_N_Inc, 1, 1 ;;
+ nop.m 999
+(p11) fma.s0 FR_Result = FR_r, f1, FR_poly
+ nop.i 999
}
-{ .mfi
- nop.m 999
-(p0) frcpa.s1 FR_r_hi, p6 = f1, FR_r
-(p0) cmp.eq.unc p11, p12 = 0x0, GR_i_0
+{ .mfb
+ nop.m 999
+(p12) fms.s0 FR_Result = FR_r, f1, FR_poly
+ br.ret.sptk b0 // Exit for |r| < 2^-3
}
;;
-// ******************************************************************
-// ******************************************************************
-// ******************************************************************
+
+SINCOSL_NORMAL_R:
//
-// r and c have been computed.
-// We known whether this is the sine or cosine routine.
-// Make sure ftz mode is set - should be automatic when using wre
-// Get [i_0,i_1] - two lsb of N_fix_gr alone.
+// Here if 2^-3 <= |r| < pi/4
+// THIS IS THE MAIN PATH
//
-
-{ .mmi
- nop.m 999
-(p0) addl GR_Table_Base = @ltoff(FSINCOSL_CONSTANTS#), gp
+// Enter with r, c, and N_Inc having been computed
+//
+{ .mfi
+ ldfe FR_PP_6 = [GR_ad_pp], 16 // Load PP_6
+ fma.s1 FR_rsq = FR_r, FR_r, f0 // rsq = r * r
+ tbit.z p9,p10 = GR_N_Inc, 0 // p9 if i_1=0, N mod 4 = 0,1
+ // p10 if i_1=1, N mod 4 = 2,3
+}
+{ .mfi
+ ldfe FR_QQ_6 = [GR_ad_qq], 16 // Load QQ_6
+ nop.f 999
nop.i 999
}
;;
{ .mmi
- ld8 GR_Table_Base = [GR_Table_Base]
- nop.m 999
+(p9) ldfe FR_PP_5 = [GR_ad_pp], 16 // Load PP_5 if i_1=0
+(p10) ldfe FR_QQ_5 = [GR_ad_qq], 16 // Load QQ_5 if i_1=1
nop.i 999
}
;;
+SINCOSL_NORMAL_R_0:
+// Entry for 2^-3 < |x| < pi/4
+.pred.rel "mutex",p9,p10
+{ .mmf
+(p9) ldfe FR_C_1 = [GR_ad_pp], 16 // Load C_1 if i_1=0
+(p10) ldfe FR_S_1 = [GR_ad_qq], 16 // Load S_1 if i_1=1
+ frcpa.s1 FR_r_hi, p6 = f1, FR_r // r_hi = frcpa(r)
+}
+;;
{ .mfi
-(p10) add GR_Table_Base = 384, GR_Table_Base
-(p12) fms.s1 FR_Input_X = f0, f1, f1
-(p9) add GR_Table_Base = 224, GR_Table_Base ;;
+ nop.m 999
+(p9) fma.s1 FR_poly = FR_rsq, FR_PP_8, FR_PP_7 // poly = rsq*PP_8+PP_7 if i_1=0
+ nop.i 999
}
{ .mfi
-(p10) ldfe FR_QQ_8 = [GR_Table_Base], 16
-//
-// if (i_1==0) poly = poly * FR_rsq + PP_1_lo
-// else poly = FR_rsq * poly
-//
-(p11) fma.s1 FR_Input_X = f0, f1, f1
- nop.i 999 ;;
-}
-{ .mmb
-(p10) ldfe FR_QQ_7 = [GR_Table_Base], 16
-//
-// Adjust table pointers based on i_0
-// Compute rsq = r * r
-//
-(p9) ldfe FR_PP_8 = [GR_Table_Base], 16
- nop.b 999 ;;
+ nop.m 999
+(p10) fma.s1 FR_poly = FR_rsq, FR_QQ_8, FR_QQ_7 // poly = rsq*QQ_8+QQ_7 if i_1=1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fma.s1 FR_r_cubed = FR_r, FR_rsq, f0
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 FR_r_cubed = FR_r, FR_rsq, f0 // rcubed = r * rsq
+ nop.i 999
}
+;;
+
+
+SINCOSL_NORMAL_R_1:
+// Entry for pi/4 <= |x| < 2^24
+.pred.rel "mutex",p9,p10
{ .mmf
-(p9) ldfe FR_PP_7 = [GR_Table_Base], 16
-(p10) ldfe FR_QQ_6 = [GR_Table_Base], 16
-//
-// Load PP_8 and QQ_8; PP_7 and QQ_7
-//
-(p0) frcpa.s1 FR_r_hi, p6 = f1, FR_r_hi ;;
-}
-//
-// if (i_1==0) poly = PP_7 + FR_rsq * PP_8.
-// else poly = QQ_7 + FR_rsq * QQ_8.
-//
-{ .mmb
-(p9) ldfe FR_PP_6 = [GR_Table_Base], 16
-(p10) ldfe FR_QQ_5 = [GR_Table_Base], 16
- nop.b 999 ;;
-}
-{ .mmb
-(p9) ldfe FR_PP_5 = [GR_Table_Base], 16
-(p10) ldfe FR_S_1 = [GR_Table_Base], 16
- nop.b 999 ;;
-}
-{ .mmb
-(p10) ldfe FR_QQ_1 = [GR_Table_Base], 16
-(p9) ldfe FR_C_1 = [GR_Table_Base], 16
- nop.b 999 ;;
-}
-{ .mmb
-(p10) ldfe FR_QQ_4 = [GR_Table_Base], 16
-(p9) ldfe FR_PP_1 = [GR_Table_Base], 16
- nop.b 999 ;;
-}
-{ .mmb
-(p10) ldfe FR_QQ_3 = [GR_Table_Base], 16
-//
-// if (i_1=0) corr = corr + c*c
-// else corr = corr * c
-//
-(p9) ldfe FR_PP_4 = [GR_Table_Base], 16
- nop.b 999 ;;
-}
-{ .mfi
- nop.m 999
-(p10) fma.s1 FR_poly = FR_rsq, FR_QQ_8, FR_QQ_7
- nop.i 999 ;;
-}
-//
-// if (i_1=0) poly = rsq * poly + PP_5
-// else poly = rsq * poly + QQ_5
-// Load PP_4 or QQ_4
-//
-{ .mmi
-(p9) ldfe FR_PP_3 = [GR_Table_Base], 16 ;;
-(p10) ldfe FR_QQ_2 = [GR_Table_Base], 16
- nop.i 999
+(p9) ldfe FR_PP_1 = [GR_ad_pp], 16 // Load PP_1_hi if i_1=0
+(p10) ldfe FR_QQ_1 = [GR_ad_qq], 16 // Load QQ_1 if i_1=1
+ frcpa.s1 FR_r_hi, p6 = f1, FR_r_hi // r_hi = frpca(frcpa(r))
}
+;;
+
{ .mfi
- nop.m 999
-//
-// r_hi = frcpa(frcpa(r)).
-// r_cube = r * FR_rsq.
-//
-(p9) fma.s1 FR_poly = FR_rsq, FR_PP_8, FR_PP_7
- nop.i 999 ;;
+(p9) ldfe FR_PP_4 = [GR_ad_pp], 16 // Load PP_4 if i_1=0
+(p9) fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_6 // poly = rsq*poly+PP_6 if i_1=0
+ nop.i 999
}
-//
-// Do dummy multiplies so inexact is always set.
-//
{ .mfi
-(p9) ldfe FR_PP_2 = [GR_Table_Base], 16
-//
-// r_lo = r - r_hi
-//
-(p9) fma.s1 FR_U_lo = FR_r_hi, FR_r_hi, f0
- nop.i 999 ;;
-}
-{ .mbb
-(p9) ldfe FR_PP_1_lo = [GR_Table_Base], 16
- nop.b 999
- nop.b 999 ;;
+(p10) ldfe FR_QQ_4 = [GR_ad_qq], 16 // Load QQ_4 if i_1=1
+(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_6 // poly = rsq*poly+QQ_6 if i_1=1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p10) fma.s1 FR_corr = FR_S_1, FR_r_cubed, FR_r
- nop.i 999
+ nop.m 999
+(p9) fma.s1 FR_corr = FR_C_1, FR_rsq, f0 // corr = C_1 * rsq if i_1=0
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_6
- nop.i 999 ;;
+ nop.m 999
+(p10) fma.s1 FR_corr = FR_S_1, FR_r_cubed, FR_r // corr = S_1 * r^3 + r if i_1=1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// if (i_1=0) U_lo = r_hi * r_hi
-// else U_lo = r_hi + r
-//
-(p9) fma.s1 FR_corr = FR_C_1, FR_rsq, f0
- nop.i 999 ;;
+(p9) ldfe FR_PP_3 = [GR_ad_pp], 16 // Load PP_3 if i_1=0
+ fma.s1 FR_r_hi_sq = FR_r_hi, FR_r_hi, f0 // r_hi_sq = r_hi * r_hi
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// if (i_1=0) corr = C_1 * rsq
-// else corr = S_1 * r_cubed + r
-//
-(p9) fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_6
- nop.i 999 ;;
+(p10) ldfe FR_QQ_3 = [GR_ad_qq], 16 // Load QQ_3 if i_1=1
+ fms.s1 FR_r_lo = FR_r, f1, FR_r_hi // r_lo = r - r_hi
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p10) fma.s1 FR_U_lo = FR_r_hi, f1, FR_r
- nop.i 999
+(p9) ldfe FR_PP_2 = [GR_ad_pp], 16 // Load PP_2 if i_1=0
+(p9) fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_5 // poly = rsq*poly+PP_5 if i_1=0
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// if (i_1=0) U_hi = r_hi + U_hi
-// else U_hi = QQ_1 * U_hi + 1
-//
-(p9) fma.s1 FR_U_lo = FR_r, FR_r_hi, FR_U_lo
- nop.i 999 ;;
+(p10) ldfe FR_QQ_2 = [GR_ad_qq], 16 // Load QQ_2 if i_1=1
+(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_5 // poly = rsq*poly+QQ_5 if i_1=1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// U_hi = r_hi * r_hi
-//
-(p0) fms.s1 FR_r_lo = FR_r, f1, FR_r_hi
- nop.i 999
+(p9) ldfe FR_PP_1_lo = [GR_ad_pp], 16 // Load PP_1_lo if i_1=0
+(p9) fma.s1 FR_corr = FR_corr, FR_c, FR_c // corr = corr * c + c if i_1=0
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// Load PP_1, PP_6, PP_5, and C_1
-// Load QQ_1, QQ_6, QQ_5, and S_1
-//
-(p0) fma.s1 FR_U_hi = FR_r_hi, FR_r_hi, f0
- nop.i 999 ;;
+ nop.m 999
+(p10) fnma.s1 FR_corr = FR_corr, FR_c, f0 // corr = -corr * c if i_1=1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_5
- nop.i 999
+ nop.m 999
+(p9) fma.s1 FR_U_lo = FR_r, FR_r_hi, FR_r_hi_sq // U_lo = r*r_hi+r_hi_sq, i_1=0
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p10) fnma.s1 FR_corr = FR_corr, FR_c, f0
- nop.i 999 ;;
+ nop.m 999
+(p10) fma.s1 FR_U_lo = FR_r_hi, f1, FR_r // U_lo = r_hi + r if i_1=1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// if (i_1=0) U_lo = r * r_hi + U_lo
-// else U_lo = r_lo * U_lo
-//
-(p9) fma.s1 FR_corr = FR_corr, FR_c, FR_c
- nop.i 999 ;;
+ nop.m 999
+(p9) fma.s1 FR_U_hi = FR_r_hi, FR_r_hi_sq, f0 // U_hi = r_hi*r_hi_sq if i_1=0
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p9) fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_5
- nop.i 999
+ nop.m 999
+(p10) fma.s1 FR_U_hi = FR_QQ_1, FR_r_hi_sq, f1 // U_hi = QQ_1*r_hi_sq+1, i_1=1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// if (i_1 =0) U_hi = r + U_hi
-// if (i_1 =0) U_lo = r_lo * U_lo
-//
-//
-(p9) fma.s0 FR_PP_5 = FR_PP_5, FR_PP_4, f0
- nop.i 999 ;;
+ nop.m 999
+(p9) fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_4 // poly = poly*rsq+PP_4 if i_1=0
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p9) fma.s1 FR_U_lo = FR_r, FR_r, FR_U_lo
- nop.i 999 ;;
+ nop.m 999
+(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_4 // poly = poly*rsq+QQ_4 if i_1=1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p10) fma.s1 FR_U_lo = FR_r_lo, FR_U_lo, f0
- nop.i 999 ;;
+ nop.m 999
+(p9) fma.s1 FR_U_lo = FR_r, FR_r, FR_U_lo // U_lo = r * r + U_lo if i_1=0
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// if (i_1=0) poly = poly * rsq + PP_6
-// else poly = poly * rsq + QQ_6
-//
-(p9) fma.s1 FR_U_hi = FR_r_hi, FR_U_hi, f0
- nop.i 999
+ nop.m 999
+(p10) fma.s1 FR_U_lo = FR_r_lo, FR_U_lo, f0 // U_lo = r_lo * U_lo if i_1=1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_4
- nop.i 999 ;;
+ nop.m 999
+(p9) fma.s1 FR_U_hi = FR_PP_1, FR_U_hi, f0 // U_hi = PP_1 * U_hi if i_1=0
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p10) fma.s1 FR_U_hi = FR_QQ_1, FR_U_hi, f1
- nop.i 999
+ nop.m 999
+(p9) fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_3 // poly = poly*rsq+PP_3 if i_1=0
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p10) fma.s0 FR_QQ_5 = FR_QQ_5, FR_QQ_5, f0
- nop.i 999 ;;
+ nop.m 999
+(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_3 // poly = poly*rsq+QQ_3 if i_1=1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// if (i_1!=0) U_hi = PP_1 * U_hi
-// if (i_1!=0) U_lo = r * r + U_lo
-// Load PP_3 or QQ_3
-//
-(p9) fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_4
- nop.i 999 ;;
+ nop.m 999
+(p9) fma.s1 FR_U_lo = FR_r_lo, FR_U_lo, f0 // U_lo = r_lo * U_lo if i_1=0
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p9) fma.s1 FR_U_lo = FR_r_lo, FR_U_lo, f0
- nop.i 999 ;;
+ nop.m 999
+(p10) fma.s1 FR_U_lo = FR_QQ_1,FR_U_lo, f0 // U_lo = QQ_1 * U_lo if i_1=1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p10) fma.s1 FR_U_lo = FR_QQ_1,FR_U_lo, f0
- nop.i 999 ;;
+ nop.m 999
+(p9) fma.s1 FR_U_hi = FR_r, f1, FR_U_hi // U_hi = r + U_hi if i_1=0
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p9) fma.s1 FR_U_hi = FR_PP_1, FR_U_hi, f0
- nop.i 999
+ nop.m 999
+(p9) fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_2 // poly = poly*rsq+PP_2 if i_1=0
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_3
- nop.i 999 ;;
+ nop.m 999
+(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_2 // poly = poly*rsq+QQ_2 if i_1=1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// Load PP_2, QQ_2
-//
-(p9) fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_3
- nop.i 999 ;;
+ nop.m 999
+(p9) fma.s1 FR_U_lo = FR_PP_1, FR_U_lo, f0 // U_lo = PP_1 * U_lo if i_1=0
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// if (i_1==0) poly = FR_rsq * poly + PP_3
-// else poly = FR_rsq * poly + QQ_3
-// Load PP_1_lo
-//
-(p9) fma.s1 FR_U_lo = FR_PP_1, FR_U_lo, f0
- nop.i 999 ;;
+ nop.m 999
+(p9) fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_1_lo // poly =poly*rsq+PP1lo i_1=0
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// if (i_1 =0) poly = poly * rsq + pp_r4
-// else poly = poly * rsq + qq_r4
-//
-(p9) fma.s1 FR_U_hi = FR_r, f1, FR_U_hi
- nop.i 999
+ nop.m 999
+(p10) fma.s1 FR_poly = FR_rsq, FR_poly, f0 // poly = poly*rsq if i_1=1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_2
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 FR_V = FR_U_lo, f1, FR_corr // V = U_lo + corr
+ tbit.z p11,p12 = GR_N_Inc, 1 // p11 if i_0=0, N mod 4 = 0,2
+ // p12 if i_0=1, N mod 4 = 1,3
}
+;;
+
{ .mfi
- nop.m 999
-//
-// if (i_1==0) U_lo = PP_1_hi * U_lo
-// else U_lo = QQ_1 * U_lo
-//
-(p9) fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_2
- nop.i 999 ;;
+ nop.m 999
+(p9) fma.s0 FR_inexact = FR_PP_5, FR_PP_4, f0 // Dummy op to set inexact
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// if (i_0==0) Result = 1
-// else Result = -1
-//
-(p0) fma.s1 FR_V = FR_U_lo, f1, FR_corr
- nop.i 999 ;;
+ nop.m 999
+(p10) fma.s0 FR_inexact = FR_QQ_5, FR_QQ_5, f0 // Dummy op to set inexact
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p10) fma.s1 FR_poly = FR_rsq, FR_poly, f0
- nop.i 999 ;;
+ nop.m 999
+(p9) fma.s1 FR_poly = FR_r_cubed, FR_poly, f0 // poly = poly*r^3 if i_1=0
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// if (i_1==0) poly = FR_rsq * poly + PP_2
-// else poly = FR_rsq * poly + QQ_2
-//
-(p9) fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_1_lo
- nop.i 999 ;;
+ nop.m 999
+(p10) fma.s1 FR_poly = FR_rsq, FR_poly, f0 // poly = poly*rsq if i_1=1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p10) fma.s1 FR_poly = FR_rsq, FR_poly, f0
- nop.i 999 ;;
+ nop.m 999
+(p11) fma.s1 FR_tmp_result = f0, f1, f1// tmp_result=+1.0 if i_0=0
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// V = U_lo + corr
-//
-(p9) fma.s1 FR_poly = FR_r_cubed, FR_poly, f0
- nop.i 999 ;;
+ nop.m 999
+(p12) fms.s1 FR_tmp_result = f0, f1, f1// tmp_result=-1.0 if i_0=1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// if (i_1==0) poly = r_cube * poly
-// else poly = FR_rsq * poly
-//
-(p0) fma.s1 FR_V = FR_poly, f1, FR_V
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 FR_V = FR_poly, f1, FR_V // V = poly + V
+ nop.i 999
}
+;;
+
+// If i_0 = 0 Result = U_hi + V
+// If i_0 = 1 Result = -U_hi - V
{ .mfi
- nop.m 999
-(p12) fms.s0 FR_Input_X = FR_Input_X, FR_U_hi, FR_V
- nop.i 999
+ nop.m 999
+(p11) fma.s0 FR_Result = FR_tmp_result, FR_U_hi, FR_V
+ nop.i 999
}
{ .mfb
- nop.m 999
-//
-// V = V + poly
-//
-(p11) fma.s0 FR_Input_X = FR_Input_X, FR_U_hi, FR_V
-//
-// if (i_0==0) Result = Result * U_hi + V
-// else Result = Result * U_hi - V
-//
-(p0) br.ret.sptk b0
-};;
-
-//
-// If cosine, FR_Input_X = 1
-// If sine, FR_Input_X = +/-Zero (Input FR_Input_X)
-// Results are exact, no exceptions
-//
+ nop.m 999
+(p12) fms.s0 FR_Result = FR_tmp_result, FR_U_hi, FR_V
+ br.ret.sptk b0 // Exit for 2^-3 <= |r| < pi/4
+}
+;;
-L(SINCOSL_ZERO):
-{ .mbb
-(p0) cmp.eq.unc p6, p7 = 0x1, GR_Sin_or_Cos
- nop.b 999
- nop.b 999 ;;
+SINCOSL_ZERO:
+// Here if x = 0
+{ .mfi
+ cmp.eq.unc p6, p7 = 0x1, GR_Sin_or_Cos
+ nop.f 999
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p7) fmerge.s FR_Input_X = FR_Input_X, FR_Input_X
- nop.i 999
+ nop.m 999
+(p7) fmerge.s FR_Result = FR_Input_X, FR_Input_X // If sin, result = input
+ nop.i 999
}
{ .mfb
- nop.m 999
-(p6) fmerge.s FR_Input_X = f1, f1
-(p0) br.ret.sptk b0 ;;
+ nop.m 999
+(p6) fma.s0 FR_Result = f1, f1, f0 // If cos, result=1.0
+ br.ret.sptk b0 // Exit for x=0
}
-L(SINCOSL_SPECIAL):
+;;
+
+
+SINCOSL_DENORMAL:
+{ .mmb
+ getf.exp GR_signexp_x = FR_norm_x // Get sign and exponent of x
+ nop.m 999
+ br.cond.sptk SINCOSL_COMMON // Return to common code
+}
+;;
+
+SINCOSL_SPECIAL:
{ .mfb
nop.m 999
//
@@ -2414,106 +2280,82 @@ L(SINCOSL_SPECIAL):
// Invalid can be raised. SNaNs
// become QNaNs
//
-(p0) fmpy.s0 FR_Input_X = FR_Input_X, f0
-(p0) br.ret.sptk b0 ;;
+ fmpy.s0 FR_Result = FR_Input_X, f0
+ br.ret.sptk b0 ;;
}
-.endp cosl#
-ASM_SIZE_DIRECTIVE(cosl#)
-// Call int pi_by_2_reduce(double* x, double *y)
-// for |arguments| >= 2**63
-// Address to save r and c as double
-//
-// sp+32 -> f0
-// r45 sp+16 -> f0
-// r44 -> sp -> InputX
-//
+GLOBAL_IEEE754_END(cosl)
+// *******************************************************************
+// *******************************************************************
+// *******************************************************************
+//
+// Special Code to handle very large argument case.
+// Call int __libm_pi_by_2_reduce(x,r,c) for |arguments| >= 2**63
+// The interface is custom:
+// On input:
+// (Arg or x) is in f8
+// On output:
+// r is in f8
+// c is in f9
+// N is in r8
+// Be sure to allocate at least 2 GP registers as output registers for
+// __libm_pi_by_2_reduce. This routine uses r49-50. These are used as
+// scratch registers within the __libm_pi_by_2_reduce routine (for speed).
+//
+// We know also that __libm_pi_by_2_reduce preserves f10-15, f71-127. We
+// use this to eliminate save/restore of key fp registers in this calling
+// function.
+//
+// *******************************************************************
+// *******************************************************************
+// *******************************************************************
-.proc __libm_callout
-__libm_callout:
-L(SINCOSL_ARG_TOO_LARGE):
+LOCAL_LIBM_ENTRY(__libm_callout)
+SINCOSL_ARG_TOO_LARGE:
.prologue
{ .mfi
- add r45=-32,sp // Parameter: r address
nop.f 0
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
-}
-{ .mfi
-.fframe 64
- add sp=-64,sp // Create new stack
- nop.f 0
- mov GR_SAVE_GP=gp // Save gp
};;
+
{ .mmi
- stfe [r45] = f0,16 // Clear Parameter r on stack
- add r44 = 16,sp // Parameter x address
+ setf.exp FR_Two_to_M3 = GR_exp_2_to_m3 // Form 2^-3
+ mov GR_SAVE_GP=gp // Save gp
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
+
.body
+//
+// Call argument reduction with x in f8
+// Returns with N in r8, r in f8, c in f9
+// Assumes f71-127 are preserved across the call
+//
{ .mib
- stfe [r45] = f0,-16 // Clear Parameter c on stack
- nop.i 0
- nop.b 0
-}
-{ .mib
- stfe [r44] = FR_Input_X // Store Parameter x on stack
+ setf.exp FR_Neg_Two_to_M3 = GR_exp_m2_to_m3 // Form -(2^-3)
nop.i 0
-(p0) br.call.sptk b0=__libm_pi_by_2_reduce# ;;
+ br.call.sptk b0=__libm_pi_by_2_reduce#
};;
-{ .mii
-(p0) ldfe FR_Input_X =[r44],16
-//
-// Get r and c off stack
-//
-(p0) adds GR_Table_Base1 = -16, GR_Table_Base1
-//
-// Get r and c off stack
-//
-(p0) add GR_N_Inc = GR_Sin_or_Cos,r8 ;;
-}
-{ .mmb
-(p0) ldfe FR_r =[r45],16
-//
-// Get X off the stack
-// Readjust Table ptr
-//
-(p0) ldfs FR_Two_to_M3 = [GR_Table_Base1],4
- nop.b 999 ;;
-}
-{ .mmb
-(p0) ldfs FR_Neg_Two_to_M3 = [GR_Table_Base1],0
-(p0) ldfe FR_c =[r45]
- nop.b 999 ;;
-}
+
{ .mfi
-.restore sp
- add sp = 64,sp // Restore stack pointer
-(p0) fcmp.lt.unc.s1 p6, p0 = FR_r, FR_Two_to_M3
+ add GR_N_Inc = GR_Sin_or_Cos,r8
+ fcmp.lt.unc.s1 p6, p0 = FR_r, FR_Two_to_M3
mov b0 = GR_SAVE_B0 // Restore return address
};;
-{ .mib
+
+{ .mfi
mov gp = GR_SAVE_GP // Restore gp
+(p6) fcmp.gt.unc.s1 p6, p0 = FR_r, FR_Neg_Two_to_M3
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
- nop.b 0
};;
-{ .mfi
- nop.m 999
-(p6) fcmp.gt.unc.s1 p6, p0 = FR_r, FR_Neg_Two_to_M3
- nop.i 999 ;;
-}
-{ .mib
- nop.m 999
- nop.i 999
-(p6) br.cond.spnt L(SINCOSL_SMALL_R) ;;
-}
-{ .mib
- nop.m 999
- nop.i 999
-(p0) br.cond.sptk L(SINCOSL_NORMAL_R) ;;
-}
-.endp __libm_callout
-ASM_SIZE_DIRECTIVE(__libm_callout)
+
+{ .mbb
+ nop.m 999
+(p6) br.cond.spnt SINCOSL_SMALL_R // Branch if |r|< 2^-3 for |x| >= 2^63
+ br.cond.sptk SINCOSL_NORMAL_R // Branch if |r|>=2^-3 for |x| >= 2^63
+};;
+
+.endp
.type __libm_pi_by_2_reduce#,@function
.global __libm_pi_by_2_reduce#
diff --git a/sysdeps/ia64/fpu/s_expm1.S b/sysdeps/ia64/fpu/s_expm1.S
index 19a237990c..41b9954ee8 100644
--- a/sysdeps/ia64/fpu/s_expm1.S
+++ b/sysdeps/ia64/fpu/s_expm1.S
@@ -1,10 +1,10 @@
.file "exp_m1.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2002, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,1694 +20,819 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
-// HISTORY
-// 2/02/00 Initial Version
-// 4/04/00 Unwind support added
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//==============================================================
+// 02/02/00 Initial Version
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
+// 07/07/01 Improved speed of all paths
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 11/20/02 Improved speed, algorithm based on exp
+
+// API
+//==============================================================
+// double expm1(double)
+
+// Overview of operation
+//==============================================================
+// 1. Inputs of Nan, Inf, Zero, NatVal handled with special paths
+//
+// 2. |x| < 2^-60
+// Result = x, computed by x + x*x to handle appropriate flags and rounding
//
-// *********************************************************************
-//
-// Function: Combined exp(x) and expm1(x), where
-// x
-// exp(x) = e , for double precision x values
-// x
-// expm1(x) = e - 1 for double precision x values
-//
-// *********************************************************************
-//
-// Accuracy: Within .7 ulps for 80-bit floating point values
-// Very accurate for double precision values
-//
-// *********************************************************************
-//
-// Resources Used:
-//
-// Floating-Point Registers: f8 (Input and Return Value)
-// f9,f32-f61, f99-f102
-//
-// General Purpose Registers:
-// r32-r61
-// r62-r65 (Used to pass arguments to error handling routine)
-//
-// Predicate Registers: p6-p15
-//
-// *********************************************************************
-//
-// IEEE Special Conditions:
-//
-// Denormal fault raised on denormal inputs
-// Overflow exceptions raised when appropriate for exp and expm1
-// Underflow exceptions raised when appropriate for exp and expm1
-// (Error Handling Routine called for overflow and Underflow)
-// Inexact raised when appropriate by algorithm
-//
-// exp(inf) = inf
-// exp(-inf) = +0
-// exp(SNaN) = QNaN
-// exp(QNaN) = QNaN
-// exp(0) = 1
-// exp(EM_special Values) = QNaN
-// exp(inf) = inf
-// expm1(-inf) = -1
-// expm1(SNaN) = QNaN
-// expm1(QNaN) = QNaN
-// expm1(0) = 0
-// expm1(EM_special Values) = QNaN
-//
-// *********************************************************************
-//
-// Implementation and Algorithm Notes:
-//
-// ker_exp_64( in_FR : X,
-// in_GR : Flag,
-// in_GR : Expo_Range
-// out_FR : Y_hi,
-// out_FR : Y_lo,
-// out_FR : scale,
-// out_PR : Safe )
-//
-// On input, X is in register format and
-// Flag = 0 for exp,
-// Flag = 1 for expm1,
-//
-// On output, provided X and X_cor are real numbers, then
-//
-// scale*(Y_hi + Y_lo) approximates exp(X) if Flag is 0
-// scale*(Y_hi + Y_lo) approximates exp(X)-1 if Flag is 1
-//
-// The accuracy is sufficient for a highly accurate 64 sig.
-// bit implementation. Safe is set if there is no danger of
-// overflow/underflow when the result is composed from scale,
-// Y_hi and Y_lo. Thus, we can have a fast return if Safe is set.
-// Otherwise, one must prepare to handle the possible exception
-// appropriately. Note that SAFE not set (false) does not mean
-// that overflow/underflow will occur; only the setting of SAFE
-// guarantees the opposite.
-//
-// **** High Level Overview ****
-//
-// The method consists of three cases.
-//
-// If |X| < Tiny use case exp_tiny;
-// else if |X| < 2^(-6) use case exp_small;
-// else use case exp_regular;
-//
-// Case exp_tiny:
-//
-// 1 + X can be used to approximate exp(X) or exp(X+X_cor);
-// X + X^2/2 can be used to approximate exp(X) - 1
-//
-// Case exp_small:
-//
-// Here, exp(X), exp(X+X_cor), and exp(X) - 1 can all be
-// appproximated by a relatively simple polynomial.
-//
-// This polynomial resembles the truncated Taylor series
-//
-// exp(w) = 1 + w + w^2/2! + w^3/3! + ... + w^n/n!
-//
-// Case exp_regular:
-//
-// Here we use a table lookup method. The basic idea is that in
-// order to compute exp(X), we accurately decompose X into
-//
-// X = N * log(2)/(2^12) + r, |r| <= log(2)/2^13.
-//
-// Hence
-//
-// exp(X) = 2^( N / 2^12 ) * exp(r).
-//
-// The value 2^( N / 2^12 ) is obtained by simple combinations
-// of values calculated beforehand and stored in table; exp(r)
-// is approximated by a short polynomial because |r| is small.
-//
-// We elaborate this method in 4 steps.
-//
-// Step 1: Reduction
-//
-// The value 2^12/log(2) is stored as a double-extended number
-// L_Inv.
-//
-// N := round_to_nearest_integer( X * L_Inv )
-//
-// The value log(2)/2^12 is stored as two numbers L_hi and L_lo so
-// that r can be computed accurately via
-//
-// r := (X - N*L_hi) - N*L_lo
-//
-// We pick L_hi such that N*L_hi is representable in 64 sig. bits
-// and thus the FMA X - N*L_hi is error free. So r is the
-// 1 rounding error from an exact reduction with respect to
-//
-// L_hi + L_lo.
-//
-// In particular, L_hi has 30 significant bit and can be stored
-// as a double-precision number; L_lo has 64 significant bits and
-// stored as a double-extended number.
-//
-// In the case Flag = 2, we further modify r by
-//
-// r := r + X_cor.
-//
-// Step 2: Approximation
-//
-// exp(r) - 1 is approximated by a short polynomial of the form
-//
-// r + A_1 r^2 + A_2 r^3 + A_3 r^4 .
-//
-// Step 3: Composition from Table Values
-//
-// The value 2^( N / 2^12 ) can be composed from a couple of tables
-// of precalculated values. First, express N as three integers
-// K, M_1, and M_2 as
-//
-// N = K * 2^12 + M_1 * 2^6 + M_2
-//
-// Where 0 <= M_1, M_2 < 2^6; and K can be positive or negative.
-// When N is represented in 2's complement, M_2 is simply the 6
-// lsb's, M_1 is the next 6, and K is simply N shifted right
-// arithmetically (sign extended) by 12 bits.
-//
-// Now, 2^( N / 2^12 ) is simply
-//
-// 2^K * 2^( M_1 / 2^6 ) * 2^( M_2 / 2^12 )
-//
-// Clearly, 2^K needs no tabulation. The other two values are less
-// trivial because if we store each accurately to more than working
-// precision, than its product is too expensive to calculate. We
-// use the following method.
-//
-// Define two mathematical values, delta_1 and delta_2, implicitly
-// such that
-//
-// T_1 = exp( [M_1 log(2)/2^6] - delta_1 )
-// T_2 = exp( [M_2 log(2)/2^12] - delta_2 )
-//
-// are representable as 24 significant bits. To illustrate the idea,
-// we show how we define delta_1:
-//
-// T_1 := round_to_24_bits( exp( M_1 log(2)/2^6 ) )
-// delta_1 = (M_1 log(2)/2^6) - log( T_1 )
-//
-// The last equality means mathematical equality. We then tabulate
-//
-// W_1 := exp(delta_1) - 1
-// W_2 := exp(delta_2) - 1
-//
-// Both in double precision.
-//
-// From the tabulated values T_1, T_2, W_1, W_2, we compose the values
-// T and W via
+// 3. 2^-60 <= |x| < 2^-2
+// Result determined by 13th order Taylor series polynomial
+// expm1f(x) = x + Q2*x^2 + ... + Q13*x^13
//
-// T := T_1 * T_2 ...exactly
-// W := W_1 + (1 + W_1)*W_2
+// 4. x < -48.0
+// Here we know result is essentially -1 + eps, where eps only affects
+// rounded result. Set I.
//
-// W approximates exp( delta ) - 1 where delta = delta_1 + delta_2.
-// The mathematical product of T and (W+1) is an accurate representation
-// of 2^(M_1/2^6) * 2^(M_2/2^12).
+// 5. x >= 709.7827
+// Result overflows. Set I, O, and call error support
//
-// Step 4. Reconstruction
-//
-// Finally, we can reconstruct exp(X), exp(X) - 1.
-// Because
-//
-// X = K * log(2) + (M_1*log(2)/2^6 - delta_1)
-// + (M_2*log(2)/2^12 - delta_2)
-// + delta_1 + delta_2 + r ...accurately
-// We have
-//
-// exp(X) ~=~ 2^K * ( T + T*[exp(delta_1+delta_2+r) - 1] )
-// ~=~ 2^K * ( T + T*[exp(delta + r) - 1] )
-// ~=~ 2^K * ( T + T*[(exp(delta)-1)
-// + exp(delta)*(exp(r)-1)] )
-// ~=~ 2^K * ( T + T*( W + (1+W)*poly(r) ) )
-// ~=~ 2^K * ( Y_hi + Y_lo )
-//
-// where Y_hi = T and Y_lo = T*(W + (1+W)*poly(r))
-//
-// For exp(X)-1, we have
-//
-// exp(X)-1 ~=~ 2^K * ( Y_hi + Y_lo ) - 1
-// ~=~ 2^K * ( Y_hi + Y_lo - 2^(-K) )
-//
-// and we combine Y_hi + Y_lo - 2^(-N) into the form of two
-// numbers Y_hi + Y_lo carefully.
-//
-// **** Algorithm Details ****
-//
-// A careful algorithm must be used to realize the mathematical ideas
-// accurately. We describe each of the three cases. We assume SAFE
-// is preset to be TRUE.
-//
-// Case exp_tiny:
-//
-// The important points are to ensure an accurate result under
-// different rounding directions and a correct setting of the SAFE
-// flag.
-//
-// If Flag is 1, then
-// SAFE := False ...possibility of underflow
-// Scale := 1.0
-// Y_hi := X
-// Y_lo := 2^(-17000)
-// Else
-// Scale := 1.0
-// Y_hi := 1.0
-// Y_lo := X ...for different rounding modes
-// Endif
-//
-// Case exp_small:
-//
-// Here we compute a simple polynomial. To exploit parallelism, we split
-// the polynomial into several portions.
-//
-// Let r = X
-//
-// If Flag is not 1 ...i.e. exp( argument )
-//
-// rsq := r * r;
-// r4 := rsq*rsq
-// poly_lo := P_3 + r*(P_4 + r*(P_5 + r*P_6))
-// poly_hi := r + rsq*(P_1 + r*P_2)
-// Y_lo := poly_hi + r4 * poly_lo
-// set lsb(Y_lo) to 1
-// Y_hi := 1.0
-// Scale := 1.0
-//
-// Else ...i.e. exp( argument ) - 1
-//
-// rsq := r * r
-// r4 := rsq * rsq
-// r6 := rsq * r4
-// poly_lo := r6*(Q_5 + r*(Q_6 + r*Q_7))
-// poly_hi := Q_1 + r*(Q_2 + r*(Q_3 + r*Q_4))
-// Y_lo := rsq*poly_hi + poly_lo
-// set lsb(Y_lo) to 1
-// Y_hi := X
-// Scale := 1.0
-//
-// Endif
-//
-// Case exp_regular:
-//
-// The previous description contain enough information except the
-// computation of poly and the final Y_hi and Y_lo in the case for
-// exp(X)-1.
-//
-// The computation of poly for Step 2:
-//
-// rsq := r*r
-// poly := r + rsq*(A_1 + r*(A_2 + r*A_3))
-//
-// For the case exp(X) - 1, we need to incorporate 2^(-K) into
-// Y_hi and Y_lo at the end of Step 4.
-//
-// If K > 10 then
-// Y_lo := Y_lo - 2^(-K)
-// Else
-// If K < -10 then
-// Y_lo := Y_hi + Y_lo
-// Y_hi := -2^(-K)
-// Else
-// Y_hi := Y_hi - 2^(-K)
-// End If
-// End If
-//
-
-#include "libm_support.h"
-
-GR_SAVE_PFS = r59
-GR_SAVE_B0 = r60
-GR_SAVE_GP = r61
-
-GR_Parameter_X = r62
-GR_Parameter_Y = r63
-GR_Parameter_RESULT = r64
-
-FR_X = f9
-FR_Y = f1
-FR_RESULT = f99
-
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
-
-.align 64
-Constants_exp_64_Arg:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_Arg,@object)
-data4 0x5C17F0BC,0xB8AA3B29,0x0000400B,0x00000000
-data4 0x00000000,0xB17217F4,0x00003FF2,0x00000000
-data4 0xF278ECE6,0xF473DE6A,0x00003FD4,0x00000000
-// /* Inv_L, L_hi, L_lo */
-ASM_SIZE_DIRECTIVE(Constants_exp_64_Arg)
-
-.align 64
-Constants_exp_64_Exponents:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_Exponents,@object)
-data4 0x0000007E,0x00000000,0xFFFFFF83,0xFFFFFFFF
-data4 0x000003FE,0x00000000,0xFFFFFC03,0xFFFFFFFF
-data4 0x00003FFE,0x00000000,0xFFFFC003,0xFFFFFFFF
-data4 0x00003FFE,0x00000000,0xFFFFC003,0xFFFFFFFF
-data4 0xFFFFFFE2,0xFFFFFFFF,0xFFFFFFC4,0xFFFFFFFF
-data4 0xFFFFFFBA,0xFFFFFFFF,0xFFFFFFBA,0xFFFFFFFF
-ASM_SIZE_DIRECTIVE(Constants_exp_64_Exponents)
-
-.align 64
-Constants_exp_64_A:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_A,@object)
-data4 0xB1B736A0,0xAAAAAAAB,0x00003FFA,0x00000000
-data4 0x90CD6327,0xAAAAAAAB,0x00003FFC,0x00000000
-data4 0xFFFFFFFF,0xFFFFFFFF,0x00003FFD,0x00000000
-// /* Reversed */
-ASM_SIZE_DIRECTIVE(Constants_exp_64_A)
-
-.align 64
-Constants_exp_64_P:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_P,@object)
-data4 0x43914A8A,0xD00D6C81,0x00003FF2,0x00000000
-data4 0x30304B30,0xB60BC4AC,0x00003FF5,0x00000000
-data4 0x7474C518,0x88888888,0x00003FF8,0x00000000
-data4 0x8DAE729D,0xAAAAAAAA,0x00003FFA,0x00000000
-data4 0xAAAAAF61,0xAAAAAAAA,0x00003FFC,0x00000000
-data4 0x000004C7,0x80000000,0x00003FFE,0x00000000
-// /* Reversed */
-ASM_SIZE_DIRECTIVE(Constants_exp_64_P)
-
-.align 64
-Constants_exp_64_Q:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_Q,@object)
-data4 0xA49EF6CA,0xD00D56F7,0x00003FEF,0x00000000
-data4 0x1C63493D,0xD00D59AB,0x00003FF2,0x00000000
-data4 0xFB50CDD2,0xB60B60B5,0x00003FF5,0x00000000
-data4 0x7BA68DC8,0x88888888,0x00003FF8,0x00000000
-data4 0xAAAAAC8D,0xAAAAAAAA,0x00003FFA,0x00000000
-data4 0xAAAAACCA,0xAAAAAAAA,0x00003FFC,0x00000000
-data4 0x00000000,0x80000000,0x00003FFE,0x00000000
-// /* Reversed */
-ASM_SIZE_DIRECTIVE(Constants_exp_64_Q)
-
-.align 64
-Constants_exp_64_T1:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_T1,@object)
-data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29
-data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5
-data4 0x3F8B95C2,0x3F8D1ADF,0x3F8EA43A,0x3F9031DC
-data4 0x3F91C3D3,0x3F935A2B,0x3F94F4F0,0x3F96942D
-data4 0x3F9837F0,0x3F99E046,0x3F9B8D3A,0x3F9D3EDA
-data4 0x3F9EF532,0x3FA0B051,0x3FA27043,0x3FA43516
-data4 0x3FA5FED7,0x3FA7CD94,0x3FA9A15B,0x3FAB7A3A
-data4 0x3FAD583F,0x3FAF3B79,0x3FB123F6,0x3FB311C4
-data4 0x3FB504F3,0x3FB6FD92,0x3FB8FBAF,0x3FBAFF5B
-data4 0x3FBD08A4,0x3FBF179A,0x3FC12C4D,0x3FC346CD
-data4 0x3FC5672A,0x3FC78D75,0x3FC9B9BE,0x3FCBEC15
-data4 0x3FCE248C,0x3FD06334,0x3FD2A81E,0x3FD4F35B
-data4 0x3FD744FD,0x3FD99D16,0x3FDBFBB8,0x3FDE60F5
-data4 0x3FE0CCDF,0x3FE33F89,0x3FE5B907,0x3FE8396A
-data4 0x3FEAC0C7,0x3FED4F30,0x3FEFE4BA,0x3FF28177
-data4 0x3FF5257D,0x3FF7D0DF,0x3FFA83B3,0x3FFD3E0C
-ASM_SIZE_DIRECTIVE(Constants_exp_64_T1)
-
-.align 64
-Constants_exp_64_T2:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_T2,@object)
-data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4
-data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7
-data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E
-data4 0x3F80429C,0x3F80482B,0x3F804DB9,0x3F805349
-data4 0x3F8058D8,0x3F805E67,0x3F8063F7,0x3F806987
-data4 0x3F806F17,0x3F8074A8,0x3F807A39,0x3F807FCA
-data4 0x3F80855B,0x3F808AEC,0x3F80907E,0x3F809610
-data4 0x3F809BA2,0x3F80A135,0x3F80A6C7,0x3F80AC5A
-data4 0x3F80B1ED,0x3F80B781,0x3F80BD14,0x3F80C2A8
-data4 0x3F80C83C,0x3F80CDD1,0x3F80D365,0x3F80D8FA
-data4 0x3F80DE8F,0x3F80E425,0x3F80E9BA,0x3F80EF50
-data4 0x3F80F4E6,0x3F80FA7C,0x3F810013,0x3F8105AA
-data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07
-data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269
-data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE
-data4 0x3F814E68,0x3F815402,0x3F81599C,0x3F815F37
-ASM_SIZE_DIRECTIVE(Constants_exp_64_T2)
-
-.align 64
-Constants_exp_64_W1:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_W1,@object)
-data4 0x00000000,0x00000000,0x171EC4B4,0xBE384454
-data4 0x4AA72766,0xBE694741,0xD42518F8,0xBE5D32B6
-data4 0x3A319149,0x3E68D96D,0x62415F36,0xBE68F4DA
-data4 0xC9C86A3B,0xBE6DDA2F,0xF49228FE,0x3E6B2E50
-data4 0x1188B886,0xBE49C0C2,0x1A4C2F1F,0x3E64BFC2
-data4 0x2CB98B54,0xBE6A2FBB,0x9A55D329,0x3E5DC5DE
-data4 0x39A7AACE,0x3E696490,0x5C66DBA5,0x3E54728B
-data4 0xBA1C7D7D,0xBE62B0DB,0x09F1AF5F,0x3E576E04
-data4 0x1A0DD6A1,0x3E612500,0x795FBDEF,0xBE66A419
-data4 0xE1BD41FC,0xBE5CDE8C,0xEA54964F,0xBE621376
-data4 0x476E76EE,0x3E6370BE,0x3427EB92,0x3E390D1A
-data4 0x2BF82BF8,0x3E1336DE,0xD0F7BD9E,0xBE5FF1CB
-data4 0x0CEB09DD,0xBE60A355,0x0980F30D,0xBE5CA37E
-data4 0x4C082D25,0xBE5C541B,0x3B467D29,0xBE5BBECA
-data4 0xB9D946C5,0xBE400D8A,0x07ED374A,0xBE5E2A08
-data4 0x365C8B0A,0xBE66CB28,0xD3403BCA,0x3E3AAD5B
-data4 0xC7EA21E0,0x3E526055,0xE72880D6,0xBE442C75
-data4 0x85222A43,0x3E58B2BB,0x522C42BF,0xBE5AAB79
-data4 0x469DC2BC,0xBE605CB4,0xA48C40DC,0xBE589FA7
-data4 0x1AA42614,0xBE51C214,0xC37293F4,0xBE48D087
-data4 0xA2D673E0,0x3E367A1C,0x114F7A38,0xBE51BEBB
-data4 0x661A4B48,0xBE6348E5,0x1D3B9962,0xBDF52643
-data4 0x35A78A53,0x3E3A3B5E,0x1CECD788,0xBE46C46C
-data4 0x7857D689,0xBE60B7EC,0xD14F1AD7,0xBE594D3D
-data4 0x4C9A8F60,0xBE4F9C30,0x02DFF9D2,0xBE521873
-data4 0x55E6D68F,0xBE5E4C88,0x667F3DC4,0xBE62140F
-data4 0x3BF88747,0xBE36961B,0xC96EC6AA,0x3E602861
-data4 0xD57FD718,0xBE3B5151,0xFC4A627B,0x3E561CD0
-data4 0xCA913FEA,0xBE3A5217,0x9A5D193A,0x3E40A3CC
-data4 0x10A9C312,0xBE5AB713,0xC5F57719,0x3E4FDADB
-data4 0xDBDF59D5,0x3E361428,0x61B4180D,0x3E5DB5DB
-data4 0x7408D856,0xBE42AD5F,0x31B2B707,0x3E2A3148
-ASM_SIZE_DIRECTIVE(Constants_exp_64_W1)
-
-.align 64
-Constants_exp_64_W2:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_W2,@object)
-data4 0x00000000,0x00000000,0x37A3D7A2,0xBE641F25
-data4 0xAD028C40,0xBE68DD57,0xF212B1B6,0xBE5C77D8
-data4 0x1BA5B070,0x3E57878F,0x2ECAE6FE,0xBE55A36A
-data4 0x569DFA3B,0xBE620608,0xA6D300A3,0xBE53B50E
-data4 0x223F8F2C,0x3E5B5EF2,0xD6DE0DF4,0xBE56A0D9
-data4 0xEAE28F51,0xBE64EEF3,0x367EA80B,0xBE5E5AE2
-data4 0x5FCBC02D,0x3E47CB1A,0x9BDAFEB7,0xBE656BA0
-data4 0x805AFEE7,0x3E6E70C6,0xA3415EBA,0xBE6E0509
-data4 0x49BFF529,0xBE56856B,0x00508651,0x3E66DD33
-data4 0xC114BC13,0x3E51165F,0xC453290F,0x3E53333D
-data4 0x05539FDA,0x3E6A072B,0x7C0A7696,0xBE47CD87
-data4 0xEB05C6D9,0xBE668BF4,0x6AE86C93,0xBE67C3E3
-data4 0xD0B3E84B,0xBE533904,0x556B53CE,0x3E63E8D9
-data4 0x63A98DC8,0x3E212C89,0x032A7A22,0xBE33138F
-data4 0xBC584008,0x3E530FA9,0xCCB93C97,0xBE6ADF82
-data4 0x8370EA39,0x3E5F9113,0xFB6A05D8,0x3E5443A4
-data4 0x181FEE7A,0x3E63DACD,0xF0F67DEC,0xBE62B29D
-data4 0x3DDE6307,0x3E65C483,0xD40A24C1,0x3E5BF030
-data4 0x14E437BE,0x3E658B8F,0xED98B6C7,0xBE631C29
-data4 0x04CF7C71,0x3E6335D2,0xE954A79D,0x3E529EED
-data4 0xF64A2FB8,0x3E5D9257,0x854ED06C,0xBE6BED1B
-data4 0xD71405CB,0x3E5096F6,0xACB9FDF5,0xBE3D4893
-data4 0x01B68349,0xBDFEB158,0xC6A463B9,0x3E628D35
-data4 0xADE45917,0xBE559725,0x042FC476,0xBE68C29C
-data4 0x01E511FA,0xBE67593B,0x398801ED,0xBE4A4313
-data4 0xDA7C3300,0x3E699571,0x08062A9E,0x3E5349BE
-data4 0x755BB28E,0x3E5229C4,0x77A1F80D,0x3E67E426
-data4 0x6B69C352,0xBE52B33F,0x084DA57F,0xBE6B3550
-data4 0xD1D09A20,0xBE6DB03F,0x2161B2C1,0xBE60CBC4
-data4 0x78A2B771,0x3E56ED9C,0x9D0FA795,0xBE508E31
-data4 0xFD1A54E9,0xBE59482A,0xB07FD23E,0xBE2A17CE
-data4 0x17365712,0x3E68BF5C,0xB3785569,0x3E3956F9
-ASM_SIZE_DIRECTIVE(Constants_exp_64_W2)
+// 6. 2^-2 <= x < 709.7827 or -48.0 <= x < -2^-2
+// This is the main path. The algorithm is described below:
-.section .text
-.proc expm1#
-.global expm1#
-.align 64
-
-expm1:
-#ifdef _LIBC
-.global __expm1#
-__expm1:
-#endif
-
-
-{ .mii
- alloc r32 = ar.pfs,0,30,4,0
-(p0) add r33 = 1, r0
-(p0) cmp.eq.unc p7, p0 = r0, r0
-}
-;;
-
-
-//
-// Set p7 true for expm1
-// Set Flag = r33 = 1 for expm1
-// These are really no longer necesary, but are a remnant
-// when this file had multiple entry points.
-// They should be carefully removed
+// Take the input x. w is "how many log2/128 in x?"
+// w = x * 128/log2
+// n = int(w)
+// x = n log2/128 + r + delta
+
+// n = 128M + index_1 + 2^4 index_2
+// x = M log2 + (log2/128) index_1 + (log2/8) index_2 + r + delta
+
+// exp(x) = 2^M 2^(index_1/128) 2^(index_2/8) exp(r) exp(delta)
+// Construct 2^M
+// Get 2^(index_1/128) from table_1;
+// Get 2^(index_2/8) from table_2;
+// Calculate exp(r) by series by 5th order polynomial
+// r = x - n (log2/128)_high
+// delta = - n (log2/128)_low
+// Calculate exp(delta) as 1 + delta
+
+
+// Special values
+//==============================================================
+// expm1(+0) = +0.0
+// expm1(-0) = -0.0
+
+// expm1(+qnan) = +qnan
+// expm1(-qnan) = -qnan
+// expm1(+snan) = +qnan
+// expm1(-snan) = -qnan
+
+// expm1(-inf) = -1.0
+// expm1(+inf) = +inf
+
+// Overflow and Underflow
+//=======================
+// expm1(x) = largest double normal when
+// x = 709.7827 = 40862e42fefa39ef
+//
+// Underflow is handled as described in case 2 above.
+
+
+// Registers used
+//==============================================================
+// Floating Point registers used:
+// f8, input
+// f9 -> f15, f32 -> f75
+
+// General registers used:
+// r14 -> r40
+
+// Predicate registers used:
+// p6 -> p15
+
+// Assembly macros
+//==============================================================
+
+rRshf = r14
+rAD_TB1 = r15
+rAD_T1 = r15
+rAD_TB2 = r16
+rAD_T2 = r16
+rAD_Ln2_lo = r17
+rAD_P = r17
+
+rN = r18
+rIndex_1 = r19
+rIndex_2_16 = r20
+
+rM = r21
+rBiased_M = r21
+rIndex_1_16 = r22
+rSignexp_x = r23
+rExp_x = r24
+rSig_inv_ln2 = r25
+
+rAD_Q1 = r26
+rAD_Q2 = r27
+rTmp = r27
+rExp_bias = r28
+rExp_mask = r29
+rRshf_2to56 = r30
+
+rGt_ln = r31
+rExp_2tom56 = r31
+
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+GR_SAVE_SP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+
+FR_X = f10
+FR_Y = f1
+FR_RESULT = f8
+
+fRSHF_2TO56 = f6
+fINV_LN2_2TO63 = f7
+fW_2TO56_RSH = f9
+f2TOM56 = f11
+fP5 = f12
+fP54 = f50
+fP5432 = f50
+fP4 = f13
+fP3 = f14
+fP32 = f14
+fP2 = f15
+
+fLn2_by_128_hi = f33
+fLn2_by_128_lo = f34
+
+fRSHF = f35
+fNfloat = f36
+fW = f37
+fR = f38
+fF = f39
+
+fRsq = f40
+fRcube = f41
+
+f2M = f42
+fS1 = f43
+fT1 = f44
+
+fMIN_DBL_OFLOW_ARG = f45
+fMAX_DBL_MINUS_1_ARG = f46
+fMAX_DBL_NORM_ARG = f47
+fP_lo = f51
+fP_hi = f52
+fP = f53
+fS = f54
+
+fNormX = f56
+
+fWre_urm_f8 = f57
+
+fGt_pln = f58
+fTmp = f58
+
+fS2 = f59
+fT2 = f60
+fSm1 = f61
+
+fXsq = f62
+fX6 = f63
+fX4 = f63
+fQ7 = f64
+fQ76 = f64
+fQ7654 = f64
+fQ765432 = f64
+fQ6 = f65
+fQ5 = f66
+fQ54 = f66
+fQ4 = f67
+fQ3 = f68
+fQ32 = f68
+fQ2 = f69
+fQD = f70
+fQDC = f70
+fQDCBA = f70
+fQDCBA98 = f70
+fQDCBA98765432 = f70
+fQC = f71
+fQB = f72
+fQBA = f72
+fQA = f73
+fQ9 = f74
+fQ98 = f74
+fQ8 = f75
+
+// Data tables
+//==============================================================
+
+RODATA
+.align 16
+
+// ************* DO NOT CHANGE ORDER OF THESE TABLES ********************
+
+// double-extended 1/ln(2)
+// 3fff b8aa 3b29 5c17 f0bb be87fed0691d3e88
+// 3fff b8aa 3b29 5c17 f0bc
+// For speed the significand will be loaded directly with a movl and setf.sig
+// and the exponent will be bias+63 instead of bias+0. Thus subsequent
+// computations need to scale appropriately.
+// The constant 128/ln(2) is needed for the computation of w. This is also
+// obtained by scaling the computations.
+//
+// Two shifting constants are loaded directly with movl and setf.d.
+// 1. fRSHF_2TO56 = 1.1000..00 * 2^(63-7)
+// This constant is added to x*1/ln2 to shift the integer part of
+// x*128/ln2 into the rightmost bits of the significand.
+// The result of this fma is fW_2TO56_RSH.
+// 2. fRSHF = 1.1000..00 * 2^(63)
+// This constant is subtracted from fW_2TO56_RSH * 2^(-56) to give
+// the integer part of w, n, as a floating-point number.
+// The result of this fms is fNfloat.
+
+
+LOCAL_OBJECT_START(exp_Table_1)
+data8 0x40862e42fefa39f0 // smallest dbl overflow arg
+data8 0xc048000000000000 // approx largest arg for minus one result
+data8 0x40862e42fefa39ef // largest dbl arg to give normal dbl result
+data8 0x0 // pad
+data8 0xb17217f7d1cf79ab , 0x00003ff7 // ln2/128 hi
+data8 0xc9e3b39803f2f6af , 0x00003fb7 // ln2/128 lo
+//
+// Table 1 is 2^(index_1/128) where
+// index_1 goes from 0 to 15
+//
+data8 0x8000000000000000 , 0x00003FFF
+data8 0x80B1ED4FD999AB6C , 0x00003FFF
+data8 0x8164D1F3BC030773 , 0x00003FFF
+data8 0x8218AF4373FC25EC , 0x00003FFF
+data8 0x82CD8698AC2BA1D7 , 0x00003FFF
+data8 0x8383594EEFB6EE37 , 0x00003FFF
+data8 0x843A28C3ACDE4046 , 0x00003FFF
+data8 0x84F1F656379C1A29 , 0x00003FFF
+data8 0x85AAC367CC487B15 , 0x00003FFF
+data8 0x8664915B923FBA04 , 0x00003FFF
+data8 0x871F61969E8D1010 , 0x00003FFF
+data8 0x87DB357FF698D792 , 0x00003FFF
+data8 0x88980E8092DA8527 , 0x00003FFF
+data8 0x8955EE03618E5FDD , 0x00003FFF
+data8 0x8A14D575496EFD9A , 0x00003FFF
+data8 0x8AD4C6452C728924 , 0x00003FFF
+LOCAL_OBJECT_END(exp_Table_1)
+
+// Table 2 is 2^(index_1/8) where
+// index_2 goes from 0 to 7
+LOCAL_OBJECT_START(exp_Table_2)
+data8 0x8000000000000000 , 0x00003FFF
+data8 0x8B95C1E3EA8BD6E7 , 0x00003FFF
+data8 0x9837F0518DB8A96F , 0x00003FFF
+data8 0xA5FED6A9B15138EA , 0x00003FFF
+data8 0xB504F333F9DE6484 , 0x00003FFF
+data8 0xC5672A115506DADD , 0x00003FFF
+data8 0xD744FCCAD69D6AF4 , 0x00003FFF
+data8 0xEAC0C6E7DD24392F , 0x00003FFF
+LOCAL_OBJECT_END(exp_Table_2)
+
+
+LOCAL_OBJECT_START(exp_p_table)
+data8 0x3f8111116da21757 //P5
+data8 0x3fa55555d787761c //P4
+data8 0x3fc5555555555414 //P3
+data8 0x3fdffffffffffd6a //P2
+LOCAL_OBJECT_END(exp_p_table)
+
+LOCAL_OBJECT_START(exp_Q1_table)
+data8 0x3de6124613a86d09 // QD = 1/13!
+data8 0x3e21eed8eff8d898 // QC = 1/12!
+data8 0x3ec71de3a556c734 // Q9 = 1/9!
+data8 0x3efa01a01a01a01a // Q8 = 1/8!
+data8 0x8888888888888889,0x3ff8 // Q5 = 1/5!
+data8 0xaaaaaaaaaaaaaaab,0x3ffc // Q3 = 1/3!
+data8 0x0,0x0 // Pad to avoid bank conflicts
+LOCAL_OBJECT_END(exp_Q1_table)
+
+LOCAL_OBJECT_START(exp_Q2_table)
+data8 0x3e5ae64567f544e4 // QB = 1/11!
+data8 0x3e927e4fb7789f5c // QA = 1/10!
+data8 0x3f2a01a01a01a01a // Q7 = 1/7!
+data8 0x3f56c16c16c16c17 // Q6 = 1/6!
+data8 0xaaaaaaaaaaaaaaab,0x3ffa // Q4 = 1/4!
+data8 0x8000000000000000,0x3ffe // Q2 = 1/2!
+LOCAL_OBJECT_END(exp_Q2_table)
+.section .text
+GLOBAL_IEEE754_ENTRY(expm1)
-{ .mfi
-(p0) add r32 = 1,r0
-(p0) fnorm.s1 f9 = f8
- nop.i 999
+{ .mlx
+ getf.exp rSignexp_x = f8 // Must recompute if x unorm
+ movl rSig_inv_ln2 = 0xb8aa3b295c17f0bc // signif of 1/ln2
}
-
-
-{ .mfi
- nop.m 999
-(p0) fclass.m.unc p6, p8 = f8, 0x1E7
- nop.i 999
+{ .mlx
+ addl rAD_TB1 = @ltoff(exp_Table_1), gp
+ movl rRshf_2to56 = 0x4768000000000000 // 1.10000 2^(63+56)
}
+;;
+// We do this fnorm right at the beginning to normalize
+// any input unnormals so that SWA is not taken.
{ .mfi
- nop.m 999
-(p0) fclass.nm.unc p9, p0 = f8, 0x1FF
- nop.i 999
+ ld8 rAD_TB1 = [rAD_TB1]
+ fclass.m p6,p0 = f8,0x0b // Test for x=unorm
+ mov rExp_mask = 0x1ffff
}
-
{ .mfi
- nop.m 999
-(p0) mov f36 = f1
- nop.i 999 ;;
-}
-
-//
-// Identify NatVals, NaNs, Infs, and Zeros.
-// Identify EM unsupporteds.
-// Save special input registers
-//
-// Create FR_X_cor = 0.0
-// GR_Flag = 0
-// GR_Expo_Range = 1
-// FR_Scale = 1.0
-//
-
-{ .mfb
- nop.m 999
-(p0) mov f32 = f0
-(p6) br.cond.spnt EXP_64_SPECIAL ;;
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p9) br.cond.spnt EXP_64_UNSUPPORTED ;;
-}
-
-//
-// Branch out for special input values
-//
-
-{ .mfi
-(p0) cmp.ne.unc p12, p13 = 0x01, r33
-(p0) fcmp.lt.unc.s0 p9,p0 = f8, f0
-(p0) cmp.eq.unc p15, p0 = r0, r0
-}
-
-//
-// Raise possible denormal operand exception
-// Normalize x
-//
-// This function computes exp( x + x_cor)
-// Input FR 1: FR_X
-// Input FR 2: FR_X_cor
-// Input GR 1: GR_Flag
-// Input GR 2: GR_Expo_Range
-// Output FR 3: FR_Y_hi
-// Output FR 4: FR_Y_lo
-// Output FR 5: FR_Scale
-// Output PR 1: PR_Safe
-
-//
-// Prepare to load constants
-// Set Safe = True
-//
-
-{ .mmi
-(p0) addl r34 = @ltoff(Constants_exp_64_Arg#), gp
-(p0) addl r40 = @ltoff(Constants_exp_64_W1#), gp
-(p0) addl r41 = @ltoff(Constants_exp_64_W2#), gp
-}
-;;
-
-{ .mmi
- ld8 r34 = [r34]
- ld8 r40 = [r40]
-(p0) addl r50 = @ltoff(Constants_exp_64_T1#), gp
-}
-;;
-
-
-{ .mmi
- ld8 r41 = [r41]
-(p0) ldfe f37 = [r34],16
-(p0) addl r51 = @ltoff(Constants_exp_64_T2#), gp
-}
-;;
-
-//
-// N = fcvt.fx(float_N)
-// Set p14 if -6 > expo_X
-//
-
-
-//
-// Bias = 0x0FFFF
-// expo_X = expo_X and Mask
-//
-
-//
-// Load L_lo
-// Set p10 if 14 < expo_X
-//
-
-{ .mmi
- ld8 r50 = [r50]
-(p0) ldfe f40 = [r34],16
- nop.i 999
+ mov rExp_bias = 0xffff
+ fnorm.s1 fNormX = f8
+ mov rExp_2tom56 = 0xffff-56
}
;;
-{ .mlx
- nop.m 999
-(p0) movl r58 = 0x0FFFF
-}
-;;
-
-//
-// Load W2_ptr
-// Branch to SMALL is expo_X < -6
-//
+// Form two constants we need
+// 1/ln2 * 2^63 to compute w = x * 1/ln2 * 128
+// 1.1000..000 * 2^(63+63-7) to right shift int(w) into the significand
-//
-// float_N = X * L_Inv
-// expo_X = exponent of X
-// Mask = 0x1FFFF
-//
-
-{ .mmi
- ld8 r51 = [r51]
-(p0) ldfe f41 = [r34],16
+{ .mfi
+ setf.sig fINV_LN2_2TO63 = rSig_inv_ln2 // form 1/ln2 * 2^63
+ fclass.m p8,p0 = f8,0x07 // Test for x=0
+ nop.i 0
}
-;;
-
{ .mlx
-(p0) addl r34 = @ltoff(Constants_exp_64_Exponents#), gp
-(p0) movl r39 = 0x1FFFF
-}
-;;
-
-{ .mmi
- ld8 r34 = [r34]
-(p0) getf.exp r37 = f9
- nop.i 999
+ setf.d fRSHF_2TO56 = rRshf_2to56 // Form 1.100 * 2^(63+56)
+ movl rRshf = 0x43e8000000000000 // 1.10000 2^63 for rshift
}
;;
-{ .mii
- nop.m 999
- nop.i 999
-(p0) and r37 = r37, r39 ;;
-}
-
-{ .mmi
-(p0) sub r37 = r37, r58 ;;
-(p0) cmp.gt.unc p14, p0 = -6, r37
-(p0) cmp.lt.unc p10, p0 = 14, r37 ;;
-}
-
{ .mfi
- nop.m 999
-//
-// Load L_inv
-// Set p12 true for Flag = 0 (exp)
-// Set p13 true for Flag = 1 (expm1)
-//
-(p0) fmpy.s1 f38 = f9, f37
- nop.i 999 ;;
+ setf.exp f2TOM56 = rExp_2tom56 // form 2^-56 for scaling Nfloat
+ fclass.m p9,p0 = f8,0x22 // Test for x=-inf
+ add rAD_TB2 = 0x140, rAD_TB1 // Point to Table 2
}
-
-{ .mfb
- nop.m 999
-//
-// Load L_hi
-// expo_X = expo_X - Bias
-// get W1_ptr
-//
-(p0) fcvt.fx.s1 f39 = f38
-(p14) br.cond.spnt EXP_SMALL ;;
-}
-
{ .mib
- nop.m 999
- nop.i 999
-(p10) br.cond.spnt EXP_HUGE ;;
-}
-
-{ .mmi
-(p0) shladd r34 = r32,4,r34
-(p0) addl r35 = @ltoff(Constants_exp_64_A#), gp
- nop.i 999
+ add rAD_Q1 = 0x1e0, rAD_TB1 // Point to Q table for small path
+ add rAD_Ln2_lo = 0x30, rAD_TB1 // Point to ln2_by_128_lo
+(p6) br.cond.spnt EXPM1_UNORM // Branch if x unorm
}
;;
-{ .mmi
- ld8 r35 = [r35]
- nop.m 999
- nop.i 999
-}
-;;
-
-//
-// Load T_1,T_2
-//
-
-{ .mmb
-(p0) ldfe f51 = [r35],16
-(p0) ld8 r45 = [r34],8
- nop.b 999 ;;
-}
-//
-// Set Safe = True if k >= big_expo_neg
-// Set Safe = False if k < big_expo_neg
-//
-
-{ .mmb
-(p0) ldfe f49 = [r35],16
-(p0) ld8 r48 = [r34],0
- nop.b 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// Branch to HUGE is expo_X > 14
-//
-(p0) fcvt.xf f38 = f39
- nop.i 999 ;;
-}
-
+EXPM1_COMMON:
{ .mfi
-(p0) getf.sig r52 = f39
- nop.f 999
- nop.i 999 ;;
-}
-
-{ .mii
- nop.m 999
-(p0) extr.u r43 = r52, 6, 6 ;;
-//
-// r = r - float_N * L_lo
-// K = extr(N_fix,12,52)
-//
-(p0) shladd r40 = r43,3,r40 ;;
-}
-
-{ .mfi
-(p0) shladd r50 = r43,2,r50
-(p0) fnma.s1 f42 = f40, f38, f9
-//
-// float_N = float(N)
-// N_fix = signficand N
-//
-(p0) extr.u r42 = r52, 0, 6
-}
-
-{ .mmi
-(p0) ldfd f43 = [r40],0 ;;
-(p0) shladd r41 = r42,3,r41
-(p0) shladd r51 = r42,2,r51
-}
-//
-// W_1_p1 = 1 + W_1
-//
-
-{ .mmi
-(p0) ldfs f44 = [r50],0 ;;
-(p0) ldfd f45 = [r41],0
-//
-// M_2 = extr(N_fix,0,6)
-// M_1 = extr(N_fix,6,6)
-// r = X - float_N * L_hi
-//
-(p0) extr r44 = r52, 12, 52
-}
-
-{ .mmi
-(p0) ldfs f46 = [r51],0 ;;
-(p0) sub r46 = r58, r44
-(p0) cmp.gt.unc p8, p15 = r44, r45
-}
-//
-// W = W_1 + W_1_p1*W_2
-// Load A_2
-// Bias_m_K = Bias - K
-//
-
-{ .mii
-(p0) ldfe f40 = [r35],16
-//
-// load A_1
-// poly = A_2 + r*A_3
-// rsq = r * r
-// neg_2_mK = exponent of Bias_m_k
-//
-(p0) add r47 = r58, r44 ;;
-//
-// Set Safe = True if k <= big_expo_pos
-// Set Safe = False if k > big_expo_pos
-// Load A_3
-//
-(p15) cmp.lt p8,p15 = r44,r48 ;;
-}
-
-{ .mmf
-(p0) setf.exp f61 = r46
-//
-// Bias_p + K = Bias + K
-// T = T_1 * T_2
-//
-(p0) setf.exp f36 = r47
-(p0) fnma.s1 f42 = f41, f38, f42 ;;
+ ldfpd fMIN_DBL_OFLOW_ARG, fMAX_DBL_MINUS_1_ARG = [rAD_TB1],16
+ fclass.m p10,p0 = f8,0x1e1 // Test for x=+inf, NaN, NaT
+ add rAD_Q2 = 0x50, rAD_Q1 // Point to Q table for small path
}
-
-{ .mfi
- nop.m 999
-//
-// Load W_1,W_2
-// Load big_exp_pos, load big_exp_neg
-//
-(p0) fadd.s1 f47 = f43, f1
- nop.i 999 ;;
+{ .mfb
+ nop.m 0
+ nop.f 0
+(p8) br.ret.spnt b0 // Exit for x=0, return x
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 f52 = f42, f51, f49
- nop.i 999
+ ldfd fMAX_DBL_NORM_ARG = [rAD_TB1],16
+ nop.f 0
+ and rExp_x = rExp_mask, rSignexp_x // Biased exponent of x
}
-
-{ .mfi
- nop.m 999
-(p0) fmpy.s1 f48 = f42, f42
- nop.i 999 ;;
+{ .mfb
+ setf.d fRSHF = rRshf // Form right shift const 1.100 * 2^63
+(p9) fms.d.s0 f8 = f0,f0,f1 // quick exit for x=-inf
+(p9) br.ret.spnt b0
}
+;;
{ .mfi
- nop.m 999
-(p0) fmpy.s1 f53 = f44, f46
- nop.i 999 ;;
+ ldfpd fQD, fQC = [rAD_Q1], 16 // Load coeff for small path
+ nop.f 0
+ sub rExp_x = rExp_x, rExp_bias // True exponent of x
}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 f54 = f45, f47, f43
- nop.i 999
+{ .mfb
+ ldfpd fQB, fQA = [rAD_Q2], 16 // Load coeff for small path
+(p10) fma.d.s0 f8 = f8, f1, f0 // For x=+inf, NaN, NaT
+(p10) br.ret.spnt b0 // Exit for x=+inf, NaN, NaT
}
+;;
{ .mfi
- nop.m 999
-(p0) fneg f61 = f61
- nop.i 999 ;;
+ ldfpd fQ9, fQ8 = [rAD_Q1], 16 // Load coeff for small path
+ fma.s1 fXsq = fNormX, fNormX, f0 // x*x for small path
+ cmp.gt p7, p8 = -2, rExp_x // Test |x| < 2^(-2)
}
-
{ .mfi
- nop.m 999
-(p0) fma.s1 f52 = f42, f52, f40
- nop.i 999 ;;
+ ldfpd fQ7, fQ6 = [rAD_Q2], 16 // Load coeff for small path
+ nop.f 0
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fadd.s1 f55 = f54, f1
- nop.i 999
+ ldfe fQ5 = [rAD_Q1], 16 // Load coeff for small path
+ nop.f 0
+ nop.i 0
}
-
-{ .mfi
- nop.m 999
-//
-// W + Wp1 * poly
-//
-(p0) mov f34 = f53
- nop.i 999 ;;
+{ .mib
+ ldfe fQ4 = [rAD_Q2], 16 // Load coeff for small path
+(p7) cmp.gt.unc p6, p7 = -60, rExp_x // Test |x| < 2^(-60)
+(p7) br.cond.spnt EXPM1_SMALL // Branch if 2^-60 <= |x| < 2^-2
}
+;;
-{ .mfi
- nop.m 999
-//
-// A_1 + r * poly
-// Scale = setf_exp(Bias_p_k)
-//
-(p0) fma.s1 f52 = f48, f52, f42
- nop.i 999 ;;
-}
+// W = X * Inv_log2_by_128
+// By adding 1.10...0*2^63 we shift and get round_int(W) in significand.
+// We actually add 1.10...0*2^56 to X * Inv_log2 to do the same thing.
{ .mfi
- nop.m 999
-//
-// poly = r + rsq(A_1 + r*poly)
-// Wp1 = 1 + W
-// neg_2_mK = -neg_2_mK
-//
-(p0) fma.s1 f35 = f55, f52, f54
- nop.i 999 ;;
+ ldfe fLn2_by_128_hi = [rAD_TB1],32
+ fma.s1 fW_2TO56_RSH = fNormX, fINV_LN2_2TO63, fRSHF_2TO56
+ nop.i 0
}
-
{ .mfb
- nop.m 999
-(p0) fmpy.s1 f35 = f35, f53
-//
-// Y_hi = T
-// Y_lo = T * (W + Wp1*poly)
-//
-(p12) br.cond.sptk EXP_MAIN ;;
+ ldfe fLn2_by_128_lo = [rAD_Ln2_lo]
+(p6) fma.d.s0 f8 = f8, f8, f8 // If x < 2^-60, result=x+x*x
+(p6) br.ret.spnt b0 // Exit if x < 2^-60
}
-//
-// Branch if exp(x)
-// Continue for exp(x-1)
-//
+;;
-{ .mii
-(p0) cmp.lt.unc p12, p13 = 10, r44
- nop.i 999 ;;
-//
-// Set p12 if 10 < K, Else p13
-//
-(p13) cmp.gt.unc p13, p14 = -10, r44 ;;
-}
+// Divide arguments into the following categories:
+// Certain minus one p11 - -inf < x <= MAX_DBL_MINUS_1_ARG
+// Possible Overflow p14 - MAX_DBL_NORM_ARG < x < MIN_DBL_OFLOW_ARG
+// Certain Overflow p15 - MIN_DBL_OFLOW_ARG <= x < +inf
//
-// K > 10: Y_lo = Y_lo + neg_2_mK
-// K <=10: Set p13 if -10 > K, Else set p14
+// If the input is really a double arg, then there will never be "Possible
+// Overflow" arguments.
//
-{ .mfi
-(p13) cmp.eq p15, p0 = r0, r0
-(p14) fadd.s1 f34 = f61, f34
- nop.i 999 ;;
-}
+// After that last load, rAD_TB1 points to the beginning of table 1
{ .mfi
- nop.m 999
-(p12) fadd.s1 f35 = f35, f61
- nop.i 999 ;;
+ nop.m 0
+ fcmp.ge.s1 p15,p14 = fNormX,fMIN_DBL_OFLOW_ARG
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p13) fadd.s1 f35 = f35, f34
- nop.i 999
+ add rAD_P = 0x80, rAD_TB2
+ fcmp.le.s1 p11,p0 = fNormX,fMAX_DBL_MINUS_1_ARG
+ nop.i 0
}
+;;
{ .mfb
- nop.m 999
-//
-// K <= 10 and K < -10, Set Safe = True
-// K <= 10 and K < 10, Y_lo = Y_hi + Y_lo
-// K <= 10 and K > =-10, Y_hi = Y_hi + neg_2_mk
-//
-(p13) mov f34 = f61
-(p0) br.cond.sptk EXP_MAIN ;;
-}
-EXP_SMALL:
-
-{ .mmi
-(p12) addl r35 = @ltoff(Constants_exp_64_P#), gp
-(p0) addl r34 = @ltoff(Constants_exp_64_Exponents#), gp
- nop.i 999
+ ldfpd fP5, fP4 = [rAD_P] ,16
+(p14) fcmp.gt.unc.s1 p14,p0 = fNormX,fMAX_DBL_NORM_ARG
+(p15) br.cond.spnt EXPM1_CERTAIN_OVERFLOW
}
;;
-{ .mmi
-(p12) ld8 r35 = [r35]
- ld8 r34 = [r34]
- nop.i 999
-}
-;;
+// Nfloat = round_int(W)
+// The signficand of fW_2TO56_RSH contains the rounded integer part of W,
+// as a twos complement number in the lower bits (that is, it may be negative).
+// That twos complement number (called N) is put into rN.
+// Since fW_2TO56_RSH is scaled by 2^56, it must be multiplied by 2^-56
+// before the shift constant 1.10000 * 2^63 is subtracted to yield fNfloat.
+// Thus, fNfloat contains the floating point version of N
-{ .mmi
-(p13) addl r35 = @ltoff(Constants_exp_64_Q#), gp
- nop.m 999
- nop.i 999
+{ .mfb
+ ldfpd fP3, fP2 = [rAD_P]
+ fms.s1 fNfloat = fW_2TO56_RSH, f2TOM56, fRSHF
+(p11) br.cond.spnt EXPM1_CERTAIN_MINUS_ONE
}
;;
-
-//
-// Return
-// K <= 10 and K < 10, Y_hi = neg_2_mk
-//
-// /*******************************************************/
-// /*********** Branch EXP_SMALL *************************/
-// /*******************************************************/
-
{ .mfi
-(p13) ld8 r35 = [r35]
-(p0) mov f42 = f9
-(p0) add r34 = 0x48,r34
+ getf.sig rN = fW_2TO56_RSH
+ nop.f 0
+ nop.i 0
}
;;
-//
-// Flag = 0
-// r4 = rsq * rsq
-//
+// rIndex_1 has index_1
+// rIndex_2_16 has index_2 * 16
+// rBiased_M has M
+// rIndex_1_16 has index_1 * 16
+// r = x - Nfloat * ln2_by_128_hi
+// f = 1 - Nfloat * ln2_by_128_lo
{ .mfi
-(p0) ld8 r49 =[r34],0
- nop.f 999
- nop.i 999 ;;
-}
-
-{ .mii
- nop.m 999
- nop.i 999 ;;
-//
-// Flag = 1
-//
-(p0) cmp.lt.unc p14, p0 = r37, r49 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// r = X
-//
-(p0) fmpy.s1 f48 = f42, f42
- nop.i 999 ;;
-}
-
-{ .mfb
- nop.m 999
-//
-// rsq = r * r
-//
-(p0) fmpy.s1 f50 = f48, f48
-//
-// Is input very small?
-//
-(p14) br.cond.spnt EXP_VERY_SMALL ;;
-}
-//
-// Flag_not1: Y_hi = 1.0
-// Flag is 1: r6 = rsq * r4
-//
-
-{ .mfi
-(p12) ldfe f52 = [r35],16
-(p12) mov f34 = f1
-(p0) add r53 = 0x1,r0 ;;
-}
-
-{ .mfi
-(p13) ldfe f51 = [r35],16
-//
-// Flag_not_1: Y_lo = poly_hi + r4 * poly_lo
-//
-(p13) mov f34 = f9
- nop.i 999 ;;
-}
-
-{ .mmf
-(p12) ldfe f53 = [r35],16
-//
-// For Flag_not_1, Y_hi = X
-// Scale = 1
-// Create 0x000...01
-//
-(p0) setf.sig f37 = r53
-(p0) mov f36 = f1 ;;
-}
-
-{ .mmi
-(p13) ldfe f52 = [r35],16 ;;
-(p12) ldfe f54 = [r35],16
- nop.i 999 ;;
+ and rIndex_1 = 0x0f, rN
+ fnma.s1 fR = fNfloat, fLn2_by_128_hi, fNormX
+ shr rM = rN, 0x7
}
-
{ .mfi
-(p13) ldfe f53 = [r35],16
-(p13) fmpy.s1 f58 = f48, f50
- nop.i 999 ;;
+ and rIndex_2_16 = 0x70, rN
+ fnma.s1 fF = fNfloat, fLn2_by_128_lo, f1
+ nop.i 0
}
-//
-// Flag_not1: poly_lo = P_5 + r*P_6
-// Flag_1: poly_lo = Q_6 + r*Q_7
-//
+;;
-{ .mmi
-(p13) ldfe f54 = [r35],16 ;;
-(p12) ldfe f55 = [r35],16
- nop.i 999 ;;
-}
+// rAD_T1 has address of T1
+// rAD_T2 has address if T2
{ .mmi
-(p12) ldfe f56 = [r35],16 ;;
-(p13) ldfe f55 = [r35],16
- nop.i 999 ;;
+ add rBiased_M = rExp_bias, rM
+ add rAD_T2 = rAD_TB2, rIndex_2_16
+ shladd rAD_T1 = rIndex_1, 4, rAD_TB1
}
+;;
+// Create Scale = 2^M
+// Load T1 and T2
{ .mmi
-(p12) ldfe f57 = [r35],0 ;;
-(p13) ldfe f56 = [r35],16
- nop.i 999 ;;
-}
-
-{ .mfi
-(p13) ldfe f57 = [r35],0
- nop.f 999
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// For Flag_not_1, load p5,p6,p1,p2
-// Else load p5,p6,p1,p2
-//
-(p12) fma.s1 f60 = f52, f42, f53
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p13) fma.s1 f60 = f51, f42, f52
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p12) fma.s1 f60 = f60, f42, f54
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p12) fma.s1 f59 = f56, f42, f57
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p13) fma.s1 f60 = f42, f60, f53
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p12) fma.s1 f59 = f59, f48, f42
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// Flag_1: poly_lo = Q_5 + r*(Q_6 + r*Q_7)
-// Flag_not1: poly_lo = P_4 + r*(P_5 + r*P_6)
-// Flag_not1: poly_hi = (P_1 + r*P_2)
-//
-(p13) fmpy.s1 f60 = f60, f58
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p12) fma.s1 f60 = f60, f42, f55
- nop.i 999 ;;
+ setf.exp f2M = rBiased_M
+ ldfe fT2 = [rAD_T2]
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-//
-// Flag_1: poly_lo = r6 *(Q_5 + ....)
-// Flag_not1: poly_hi = r + rsq *(P_1 + r*P_2)
-//
-(p12) fma.s1 f35 = f60, f50, f59
- nop.i 999
+ ldfe fT1 = [rAD_T1]
+ fmpy.s0 fTmp = fLn2_by_128_lo, fLn2_by_128_lo // Force inexact
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p13) fma.s1 f59 = f54, f42, f55
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fP54 = fR, fP5, fP4
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-//
-// Flag_not1: Y_lo = rsq* poly_hi + poly_lo
-// Flag_1: poly_lo = rsq* poly_hi + poly_lo
-//
-(p13) fma.s1 f59 = f59, f42, f56
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// Flag_not_1: (P_1 + r*P_2)
-//
-(p13) fma.s1 f59 = f59, f42, f57
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fP32 = fR, fP3, fP2
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-//
-// Flag_not_1: poly_hi = r + rsq * (P_1 + r*P_2)
-//
-(p13) fma.s1 f35 = f59, f48, f60
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fRsq = fR, fR, f0
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-//
-// Create 0.000...01
-//
-(p0) for f37 = f35, f37
- nop.i 999 ;;
-}
-
-{ .mfb
- nop.m 999
-//
-// Set lsb of Y_lo to 1
-//
-(p0) fmerge.se f35 = f35,f37
-(p0) br.cond.sptk EXP_MAIN ;;
-}
-EXP_VERY_SMALL:
-
-{ .mmi
- nop.m 999
-(p13) addl r34 = @ltoff(Constants_exp_64_Exponents#),gp
- nop.i 999;;
+ nop.m 0
+ fma.s1 fP5432 = fRsq, fP54, fP32
+ nop.i 0
}
+;;
{ .mfi
-(p13) ld8 r34 = [r34];
-(p12) mov f35 = f9
- nop.i 999 ;;
-}
-
-{ .mfb
- nop.m 999
-(p12) mov f34 = f1
-(p12) br.cond.sptk EXP_MAIN ;;
-}
-
-{ .mlx
-(p13) add r34 = 8,r34
-(p13) movl r39 = 0x0FFFE ;;
+ nop.m 0
+ fma.s1 fS2 = fF,fT2,f0
+ nop.i 0
}
-//
-// Load big_exp_neg
-// Create 1/2's exponent
-//
-
-{ .mii
-(p13) setf.exp f56 = r39
-(p13) shladd r34 = r32,4,r34 ;;
- nop.i 999
-}
-//
-// Negative exponents are stored after positive
-//
-
{ .mfi
-(p13) ld8 r45 = [r34],0
-//
-// Y_hi = x
-// Scale = 1
-//
-(p13) fmpy.s1 f35 = f9, f9
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fS1 = f2M,fT1,f0
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-//
-// Reset Safe if necessary
-// Create 1/2
-//
-(p13) mov f34 = f9
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fP = fRsq, fP5432, fR
+ nop.i 0
}
+;;
{ .mfi
-(p13) cmp.lt.unc p0, p15 = r37, r45
-(p13) mov f36 = f1
- nop.i 999 ;;
+ nop.m 0
+ fms.s1 fSm1 = fS1,fS2,f1 // S - 1.0
+ nop.i 0
}
-
{ .mfb
- nop.m 999
-//
-// Y_lo = x * x
-//
-(p13) fmpy.s1 f35 = f35, f56
-//
-// Y_lo = x*x/2
-//
-(p13) br.cond.sptk EXP_MAIN ;;
-}
-EXP_HUGE:
-
-{ .mfi
- nop.m 999
-(p0) fcmp.gt.unc.s1 p14, p0 = f9, f0
- nop.i 999
-}
-
-{ .mlx
- nop.m 999
-(p0) movl r39 = 0x15DC0 ;;
-}
-
-{ .mfi
-(p14) setf.exp f34 = r39
-(p14) mov f35 = f1
-(p14) cmp.eq p0, p15 = r0, r0 ;;
+ nop.m 0
+ fma.s1 fS = fS1,fS2,f0
+(p14) br.cond.spnt EXPM1_POSSIBLE_OVERFLOW
}
+;;
{ .mfb
- nop.m 999
-(p14) mov f36 = f34
-//
-// If x > 0, Set Safe = False
-// If x > 0, Y_hi = 2**(24,000)
-// If x > 0, Y_lo = 1.0
-// If x > 0, Scale = 2**(24,000)
-//
-(p14) br.cond.sptk EXP_MAIN ;;
-}
-
-{ .mlx
- nop.m 999
-(p12) movl r39 = 0xA240
-}
-
-{ .mlx
- nop.m 999
-(p12) movl r38 = 0xA1DC ;;
-}
-
-{ .mmb
-(p13) cmp.eq p15, p14 = r0, r0
-(p12) setf.exp f34 = r39
- nop.b 999 ;;
-}
-
-{ .mlx
-(p12) setf.exp f35 = r38
-(p13) movl r39 = 0xFF9C
+ nop.m 0
+ fma.d.s0 f8 = fS, fP, fSm1
+ br.ret.sptk b0 // Normal path exit
}
+;;
-{ .mfi
- nop.m 999
-(p13) fsub.s1 f34 = f0, f1
- nop.i 999 ;;
+// Here if 2^-60 <= |x| <2^-2
+// Compute 13th order polynomial
+EXPM1_SMALL:
+{ .mmf
+ ldfe fQ3 = [rAD_Q1], 16
+ ldfe fQ2 = [rAD_Q2], 16
+ fma.s1 fX4 = fXsq, fXsq, f0
}
+;;
{ .mfi
- nop.m 999
-(p12) mov f36 = f34
-(p12) cmp.eq p0, p15 = r0, r0 ;;
+ nop.m 0
+ fma.s1 fQDC = fQD, fNormX, fQC
+ nop.i 0
}
-
{ .mfi
-(p13) setf.exp f35 = r39
-(p13) mov f36 = f1
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fQBA = fQB, fNormX, fQA
+ nop.i 0
}
-EXP_MAIN:
+;;
{ .mfi
-(p0) cmp.ne.unc p12, p0 = 0x01, r33
-(p0) fmpy.s1 f101 = f36, f35
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fQ98 = fQ9, fNormX, fQ8
+ nop.i 0
}
-
-{ .mfb
- nop.m 999
-(p0) fma.d.s0 f99 = f34, f36, f101
-(p15) br.cond.sptk EXP_64_RETURN;;
-}
-
{ .mfi
- nop.m 999
-(p0) fsetc.s3 0x7F,0x01
- nop.i 999
-}
-
-{ .mlx
- nop.m 999
-(p0) movl r50 = 0x000000000103FF ;;
-}
-//
-// S0 user supplied status
-// S2 user supplied status + WRE + TD (Overflows)
-// S3 user supplied status + RZ + TD (Underflows)
-//
-//
-// If (Safe) is true, then
-// Compute result using user supplied status field.
-// No overflow or underflow here, but perhaps inexact.
-// Return
-// Else
-// Determine if overflow or underflow was raised.
-// Fetch +/- overflow threshold for IEEE single, double,
-// double extended
-//
-
-{ .mfi
-(p0) setf.exp f60 = r50
-(p0) fma.d.s3 f102 = f34, f36, f101
- nop.i 999
+ nop.m 0
+ fma.s1 fQ76= fQ7, fNormX, fQ6
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fsetc.s3 0x7F,0x40
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fQ54 = fQ5, fNormX, fQ4
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-//
-// For Safe, no need to check for over/under.
-// For expm1, handle errors like exp.
-//
-(p0) fsetc.s2 0x7F,0x42
- nop.i 999;;
+ nop.m 0
+ fma.s1 fX6 = fX4, fXsq, f0
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p0) fma.d.s2 f100 = f34, f36, f101
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fQ32= fQ3, fNormX, fQ2
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fsetc.s2 0x7F,0x40
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fQDCBA = fQDC, fXsq, fQBA
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p7) fclass.m.unc p12, p0 = f102, 0x00F
- nop.i 999
+ nop.m 0
+ fma.s1 fQ7654 = fQ76, fXsq, fQ54
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fclass.m.unc p11, p0 = f102, 0x00F
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fQDCBA98 = fQDCBA, fXsq, fQ98
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p7) fcmp.ge.unc.s1 p10, p0 = f100, f60
- nop.i 999
+ nop.m 0
+ fma.s1 fQ765432 = fQ7654, fXsq, fQ32
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-//
-// Create largest double exponent + 1.
-// Create smallest double exponent - 1.
-//
-(p0) fcmp.ge.unc.s1 p8, p0 = f100, f60
- nop.i 999 ;;
-}
-//
-// fcmp: resultS2 >= + overflow threshold -> set (a) if true
-// fcmp: resultS2 <= - overflow threshold -> set (b) if true
-// fclass: resultS3 is denorm/unorm/0 -> set (d) if true
-//
-
-{ .mib
-(p10) mov r65 = 41
- nop.i 999
-(p10) br.cond.sptk __libm_error_region ;;
-}
-
-{ .mib
-(p8) mov r65 = 14
- nop.i 999
-(p8) br.cond.sptk __libm_error_region ;;
+ nop.m 0
+ fma.s1 fQDCBA98765432 = fQDCBA98, fX6, fQ765432
+ nop.i 0
}
-//
-// Report that exp overflowed
-//
+;;
-{ .mib
-(p12) mov r65 = 42
- nop.i 999
-(p12) br.cond.sptk __libm_error_region ;;
+{ .mfb
+ nop.m 0
+ fma.d.s0 f8 = fQDCBA98765432, fXsq, fNormX
+ br.ret.sptk b0 // Exit small branch
}
+;;
-{ .mib
-(p11) mov r65 = 15
- nop.i 999
-(p11) br.cond.sptk __libm_error_region ;;
-}
-{ .mib
- nop.m 999
- nop.i 999
-//
-// Report that exp underflowed
-//
-(p0) br.cond.sptk EXP_64_RETURN;;
-}
-EXP_64_SPECIAL:
+EXPM1_POSSIBLE_OVERFLOW:
-{ .mfi
- nop.m 999
-(p0) fclass.m.unc p6, p0 = f8, 0x0c3
- nop.i 999
-}
+// Here if fMAX_DBL_NORM_ARG < x < fMIN_DBL_OFLOW_ARG
+// This cannot happen if input is a double, only if input higher precision.
+// Overflow is a possibility, not a certainty.
-{ .mfi
- nop.m 999
-(p0) fclass.m.unc p13, p8 = f8, 0x007
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p7) fclass.m.unc p14, p0 = f8, 0x007
- nop.i 999
-}
+// Recompute result using status field 2 with user's rounding mode,
+// and wre set. If result is larger than largest double, then we have
+// overflow
{ .mfi
- nop.m 999
-(p0) fclass.m.unc p12, p9 = f8, 0x021
- nop.i 999 ;;
+ mov rGt_ln = 0x103ff // Exponent for largest dbl + 1 ulp
+ fsetc.s2 0x7F,0x42 // Get user's round mode, set wre
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fclass.m.unc p11, p0 = f8, 0x022
- nop.i 999
+ setf.exp fGt_pln = rGt_ln // Create largest double + 1 ulp
+ fma.d.s2 fWre_urm_f8 = fS, fP, fSm1 // Result with wre set
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p7) fclass.m.unc p10, p0 = f8, 0x022
- nop.i 999 ;;
+ nop.m 0
+ fsetc.s2 0x7F,0x40 // Turn off wre in sf2
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-//
-// Identify +/- 0, Inf, or -Inf
-// Generate the right kind of NaN.
-//
-(p13) fadd.d.s0 f99 = f0, f1
- nop.i 999 ;;
+ nop.m 0
+ fcmp.ge.s1 p6, p0 = fWre_urm_f8, fGt_pln // Test for overflow
+ nop.i 0
}
+;;
-{ .mfi
- nop.m 999
-(p14) mov f99 = f8
- nop.i 999 ;;
+{ .mfb
+ nop.m 0
+ nop.f 0
+(p6) br.cond.spnt EXPM1_CERTAIN_OVERFLOW // Branch if overflow
}
+;;
{ .mfb
- nop.m 999
-(p6) fadd.d.s0 f99 = f8, f1
-//
-// exp(+/-0) = 1
-// expm1(+/-0) = +/-0
-// No exceptions raised
-//
-(p6) br.cond.sptk EXP_64_RETURN;;
+ nop.m 0
+ fma.d.s0 f8 = fS, fP, fSm1
+ br.ret.sptk b0 // Exit if really no overflow
}
+;;
-{ .mib
- nop.m 999
- nop.i 999
-(p14) br.cond.sptk EXP_64_RETURN;;
+EXPM1_CERTAIN_OVERFLOW:
+{ .mmi
+ sub rTmp = rExp_mask, r0, 1
+;;
+ setf.exp fTmp = rTmp
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p11) mov f99 = f0
- nop.i 999 ;;
+ alloc r32=ar.pfs,1,4,4,0
+ fmerge.s FR_X = f8,f8
+ nop.i 0
}
-
{ .mfb
- nop.m 999
-(p10) fsub.d.s1 f99 = f0, f1
-//
-// exp(-Inf) = 0
-// expm1(-Inf) = -1
-// No exceptions raised.
-//
-(p10) br.cond.sptk EXP_64_RETURN;;
+ mov GR_Parameter_TAG = 41
+ fma.d.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and +INF result
+ br.cond.sptk __libm_error_region
}
+;;
+// Here if x unorm
+EXPM1_UNORM:
{ .mfb
- nop.m 999
-(p12) fmpy.d.s1 f99 = f8, f1
-//
-// exp(+Inf) = Inf
-// No exceptions raised.
-//
-(p0) br.cond.sptk EXP_64_RETURN;;
+ getf.exp rSignexp_x = fNormX // Must recompute if x unorm
+ fcmp.eq.s0 p6, p0 = f8, f0 // Set D flag
+ br.cond.sptk EXPM1_COMMON
}
+;;
-
-EXP_64_UNSUPPORTED:
-
-{ .mfb
- nop.m 999
-(p0) fmpy.d.s0 f99 = f8, f0
- nop.b 0;;
+// here if result will be -1 and inexact, x <= -48.0
+EXPM1_CERTAIN_MINUS_ONE:
+{ .mmi
+ mov rTmp = 1
+;;
+ setf.exp fTmp = rTmp
+ nop.i 0
}
+;;
-EXP_64_RETURN:
{ .mfb
- nop.m 999
-(p0) mov f8 = f99
-(p0) br.ret.sptk b0
+ nop.m 0
+ fms.d.s0 FR_RESULT = fTmp, fTmp, f1 // Set I, rounded -1+eps result
+ br.ret.sptk b0
}
-.endp expm1
-ASM_SIZE_DIRECTIVE(expm1)
+;;
-.proc __libm_error_region
-__libm_error_region:
+GLOBAL_IEEE754_END(expm1)
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
-// (1)
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
@@ -1716,38 +841,32 @@ __libm_error_region:
}
{ .mfi
.fframe 64
- add sp=-64,sp // Create new stack
+ add sp=-64,sp // Create new stack
nop.f 0
- mov GR_SAVE_GP=gp // Save gp
+ mov GR_SAVE_GP=gp // Save gp
};;
-
-// (2)
{ .mmi
stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
- add GR_Parameter_X = 16,sp // Parameter 1 address
+ add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0 // Save b0
+ mov GR_SAVE_B0=b0 // Save b0
};;
-
.body
-// (3)
{ .mib
- stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
+ stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
- nop.b 0
+ nop.b 0
}
{ .mib
- stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
+ stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support# // Call error handling function
+ br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
- nop.m 0
- nop.m 0
add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
};;
-
-// (4)
{ .mmi
ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
@@ -1760,9 +879,6 @@ __libm_error_region:
br.ret.sptk b0 // Return
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
-
-
+LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_expm1f.S b/sysdeps/ia64/fpu/s_expm1f.S
index cc2c537ba2..0c5f2e67a8 100644
--- a/sysdeps/ia64/fpu/s_expm1f.S
+++ b/sysdeps/ia64/fpu/s_expm1f.S
@@ -1,10 +1,10 @@
-.file "exp_m1f.s"
+.file "expf_m1.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2002, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,1735 +20,649 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
-// HISTORY
-// 2/02/00 Initial Version
-// 4/04/00 Unwind support added
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+
+// History
+//*********************************************************************
+// 02/02/00 Initial Version
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
+// 07/07/01 Improved speed of all paths
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 11/20/02 Improved speed, algorithm based on expf
//
-// *********************************************************************
-//
-// Function: Combined expf(x) and expm1f(x), where
-// x
-// expf(x) = e , for single precision x values
-// x
-// expm1f(x) = e - 1 for single precision x values
-//
-// *********************************************************************
-//
-// Accuracy: Within .7 ulps for 80-bit floating point values
-// Very accurate for single precision values
-//
-// *********************************************************************
-//
-// Resources Used:
-//
-// Floating-Point Registers: f8 (Input and Return Value)
-// f9,f32-f61, f99-f102
-//
-// General Purpose Registers:
-// r32-r61
-// r62-r65 (Used to pass arguments to error handling routine)
-//
-// Predicate Registers: p6-p15
-//
-// *********************************************************************
-//
-// IEEE Special Conditions:
-//
-// Denormal fault raised on denormal inputs
-// Overflow exceptions raised when appropriate for exp and expm1
-// Underflow exceptions raised when appropriate for exp and expm1
-// (Error Handling Routine called for overflow and Underflow)
-// Inexact raised when appropriate by algorithm
-//
-// expf(inf) = inf
-// expf(-inf) = +0
-// expf(SNaN) = QNaN
-// expf(QNaN) = QNaN
-// expf(0) = 1
-// expf(EM_special Values) = QNaN
-// expf(inf) = inf
-// expm1f(-inf) = -1
-// expm1f(SNaN) = QNaN
-// expm1f(QNaN) = QNaN
-// expm1f(0) = 0
-// expm1f(EM_special Values) = QNaN
-//
-// *********************************************************************
-//
-// Implementation and Algorithm Notes:
-//
-// ker_exp_64( in_FR : X,
-// in_GR : Flag,
-// in_GR : Expo_Range
-// out_FR : Y_hi,
-// out_FR : Y_lo,
-// out_FR : scale,
-// out_PR : Safe )
-//
-// On input, X is in register format and
-// Flag = 0 for exp,
-// Flag = 1 for expm1,
-//
-// On output, provided X and X_cor are real numbers, then
-//
-// scale*(Y_hi + Y_lo) approximates expf(X) if Flag is 0
-// scale*(Y_hi + Y_lo) approximates expf(X)-1 if Flag is 1
-//
-// The accuracy is sufficient for a highly accurate 64 sig.
-// bit implementation. Safe is set if there is no danger of
-// overflow/underflow when the result is composed from scale,
-// Y_hi and Y_lo. Thus, we can have a fast return if Safe is set.
-// Otherwise, one must prepare to handle the possible exception
-// appropriately. Note that SAFE not set (false) does not mean
-// that overflow/underflow will occur; only the setting of SAFE
-// guarantees the opposite.
-//
-// **** High Level Overview ****
-//
-// The method consists of three cases.
-//
-// If |X| < Tiny use case exp_tiny;
-// else if |X| < 2^(-6) use case exp_small;
-// else use case exp_regular;
-//
-// Case exp_tiny:
-//
-// 1 + X can be used to approximate expf(X) or expf(X+X_cor);
-// X + X^2/2 can be used to approximate expf(X) - 1
-//
-// Case exp_small:
-//
-// Here, expf(X), expf(X+X_cor), and expf(X) - 1 can all be
-// appproximated by a relatively simple polynomial.
-//
-// This polynomial resembles the truncated Taylor series
-//
-// expf(w) = 1 + w + w^2/2! + w^3/3! + ... + w^n/n!
-//
-// Case exp_regular:
-//
-// Here we use a table lookup method. The basic idea is that in
-// order to compute expf(X), we accurately decompose X into
-//
-// X = N * log(2)/(2^12) + r, |r| <= log(2)/2^13.
-//
-// Hence
-//
-// expf(X) = 2^( N / 2^12 ) * expf(r).
-//
-// The value 2^( N / 2^12 ) is obtained by simple combinations
-// of values calculated beforehand and stored in table; expf(r)
-// is approximated by a short polynomial because |r| is small.
-//
-// We elaborate this method in 4 steps.
-//
-// Step 1: Reduction
-//
-// The value 2^12/log(2) is stored as a double-extended number
-// L_Inv.
-//
-// N := round_to_nearest_integer( X * L_Inv )
-//
-// The value log(2)/2^12 is stored as two numbers L_hi and L_lo so
-// that r can be computed accurately via
-//
-// r := (X - N*L_hi) - N*L_lo
-//
-// We pick L_hi such that N*L_hi is representable in 64 sig. bits
-// and thus the FMA X - N*L_hi is error free. So r is the
-// 1 rounding error from an exact reduction with respect to
-//
-// L_hi + L_lo.
-//
-// In particular, L_hi has 30 significant bit and can be stored
-// as a double-precision number; L_lo has 64 significant bits and
-// stored as a double-extended number.
-//
-// In the case Flag = 2, we further modify r by
-//
-// r := r + X_cor.
-//
-// Step 2: Approximation
-//
-// expf(r) - 1 is approximated by a short polynomial of the form
-//
-// r + A_1 r^2 + A_2 r^3 + A_3 r^4 .
-//
-// Step 3: Composition from Table Values
-//
-// The value 2^( N / 2^12 ) can be composed from a couple of tables
-// of precalculated values. First, express N as three integers
-// K, M_1, and M_2 as
-//
-// N = K * 2^12 + M_1 * 2^6 + M_2
-//
-// Where 0 <= M_1, M_2 < 2^6; and K can be positive or negative.
-// When N is represented in 2's complement, M_2 is simply the 6
-// lsb's, M_1 is the next 6, and K is simply N shifted right
-// arithmetically (sign extended) by 12 bits.
-//
-// Now, 2^( N / 2^12 ) is simply
-//
-// 2^K * 2^( M_1 / 2^6 ) * 2^( M_2 / 2^12 )
-//
-// Clearly, 2^K needs no tabulation. The other two values are less
-// trivial because if we store each accurately to more than working
-// precision, than its product is too expensive to calculate. We
-// use the following method.
-//
-// Define two mathematical values, delta_1 and delta_2, implicitly
-// such that
-//
-// T_1 = expf( [M_1 log(2)/2^6] - delta_1 )
-// T_2 = expf( [M_2 log(2)/2^12] - delta_2 )
-//
-// are representable as 24 significant bits. To illustrate the idea,
-// we show how we define delta_1:
-//
-// T_1 := round_to_24_bits( expf( M_1 log(2)/2^6 ) )
-// delta_1 = (M_1 log(2)/2^6) - log( T_1 )
-//
-// The last equality means mathematical equality. We then tabulate
-//
-// W_1 := expf(delta_1) - 1
-// W_2 := expf(delta_2) - 1
-//
-// Both in double precision.
-//
-// From the tabulated values T_1, T_2, W_1, W_2, we compose the values
-// T and W via
-//
-// T := T_1 * T_2 ...exactly
-// W := W_1 + (1 + W_1)*W_2
-//
-// W approximates expf( delta ) - 1 where delta = delta_1 + delta_2.
-// The mathematical product of T and (W+1) is an accurate representation
-// of 2^(M_1/2^6) * 2^(M_2/2^12).
-//
-// Step 4. Reconstruction
-//
-// Finally, we can reconstruct expf(X), expf(X) - 1.
-// Because
-//
-// X = K * log(2) + (M_1*log(2)/2^6 - delta_1)
-// + (M_2*log(2)/2^12 - delta_2)
-// + delta_1 + delta_2 + r ...accurately
-// We have
-//
-// expf(X) ~=~ 2^K * ( T + T*[expf(delta_1+delta_2+r) - 1] )
-// ~=~ 2^K * ( T + T*[expf(delta + r) - 1] )
-// ~=~ 2^K * ( T + T*[(expf(delta)-1)
-// + expf(delta)*(expf(r)-1)] )
-// ~=~ 2^K * ( T + T*( W + (1+W)*poly(r) ) )
-// ~=~ 2^K * ( Y_hi + Y_lo )
-//
-// where Y_hi = T and Y_lo = T*(W + (1+W)*poly(r))
-//
-// For expf(X)-1, we have
-//
-// expf(X)-1 ~=~ 2^K * ( Y_hi + Y_lo ) - 1
-// ~=~ 2^K * ( Y_hi + Y_lo - 2^(-K) )
-//
-// and we combine Y_hi + Y_lo - 2^(-N) into the form of two
-// numbers Y_hi + Y_lo carefully.
-//
-// **** Algorithm Details ****
-//
-// A careful algorithm must be used to realize the mathematical ideas
-// accurately. We describe each of the three cases. We assume SAFE
-// is preset to be TRUE.
-//
-// Case exp_tiny:
-//
-// The important points are to ensure an accurate result under
-// different rounding directions and a correct setting of the SAFE
-// flag.
-//
-// If Flag is 1, then
-// SAFE := False ...possibility of underflow
-// Scale := 1.0
-// Y_hi := X
-// Y_lo := 2^(-17000)
-// Else
-// Scale := 1.0
-// Y_hi := 1.0
-// Y_lo := X ...for different rounding modes
-// Endif
-//
-// Case exp_small:
-//
-// Here we compute a simple polynomial. To exploit parallelism, we split
-// the polynomial into several portions.
-//
-// Let r = X
-//
-// If Flag is not 1 ...i.e. expf( argument )
-//
-// rsq := r * r;
-// r4 := rsq*rsq
-// poly_lo := P_3 + r*(P_4 + r*(P_5 + r*P_6))
-// poly_hi := r + rsq*(P_1 + r*P_2)
-// Y_lo := poly_hi + r4 * poly_lo
-// set lsb(Y_lo) to 1
-// Y_hi := 1.0
-// Scale := 1.0
-//
-// Else ...i.e. expf( argument ) - 1
-//
-// rsq := r * r
-// r4 := rsq * rsq
-// r6 := rsq * r4
-// poly_lo := r6*(Q_5 + r*(Q_6 + r*Q_7))
-// poly_hi := Q_1 + r*(Q_2 + r*(Q_3 + r*Q_4))
-// Y_lo := rsq*poly_hi + poly_lo
-// set lsb(Y_lo) to 1
-// Y_hi := X
-// Scale := 1.0
-//
-// Endif
-//
-// Case exp_regular:
-//
-// The previous description contain enough information except the
-// computation of poly and the final Y_hi and Y_lo in the case for
-// expf(X)-1.
-//
-// The computation of poly for Step 2:
-//
-// rsq := r*r
-// poly := r + rsq*(A_1 + r*(A_2 + r*A_3))
-//
-// For the case expf(X) - 1, we need to incorporate 2^(-K) into
-// Y_hi and Y_lo at the end of Step 4.
-//
-// If K > 10 then
-// Y_lo := Y_lo - 2^(-K)
-// Else
-// If K < -10 then
-// Y_lo := Y_hi + Y_lo
-// Y_hi := -2^(-K)
-// Else
-// Y_hi := Y_hi - 2^(-K)
-// End If
-// End If
//
+// API
+//*********************************************************************
+// float expm1f(float)
+//
+// Overview of operation
+//*********************************************************************
+// 1. Inputs of Nan, Inf, Zero, NatVal handled with special paths
+//
+// 2. |x| < 2^-40
+// Result = x, computed by x + x*x to handle appropriate flags and rounding
+//
+// 3. 2^-40 <= |x| < 2^-2
+// Result determined by 8th order Taylor series polynomial
+// expm1f(x) = x + A2*x^2 + ... + A8*x^8
+//
+// 4. x < -24.0
+// Here we know result is essentially -1 + eps, where eps only affects
+// rounded result. Set I.
+//
+// 5. x >= 88.7228
+// Result overflows. Set I, O, and call error support
+//
+// 6. 2^-2 <= x < 88.7228 or -24.0 <= x < -2^-2
+// This is the main path. The algorithm is described below:
+
+// Take the input x. w is "how many log2/128 in x?"
+// w = x * 64/log2
+// NJ = int(w)
+// x = NJ*log2/64 + R
+
+// NJ = 64*n + j
+// x = n*log2 + (log2/64)*j + R
+//
+// So, exp(x) = 2^n * 2^(j/64)* exp(R)
+//
+// T = 2^n * 2^(j/64)
+// Construct 2^n
+// Get 2^(j/64) table
+// actually all the entries of 2^(j/64) table are stored in DP and
+// with exponent bits set to 0 -> multiplication on 2^n can be
+// performed by doing logical "or" operation with bits presenting 2^n
+
+// exp(R) = 1 + (exp(R) - 1)
+// P = exp(R) - 1 approximated by Taylor series of 3rd degree
+// P = A3*R^3 + A2*R^2 + R, A3 = 1/6, A2 = 1/2
+//
+
+// The final result is reconstructed as follows
+// expm1f(x) = T*P + (T - 1.0)
+
+// Special values
+//*********************************************************************
+// expm1f(+0) = +0.0
+// expm1f(-0) = -0.0
+
+// expm1f(+qnan) = +qnan
+// expm1f(-qnan) = -qnan
+// expm1f(+snan) = +qnan
+// expm1f(-snan) = -qnan
+
+// expm1f(-inf) = -1.0
+// expm1f(+inf) = +inf
+
+// Overflow and Underflow
+//*********************************************************************
+// expm1f(x) = largest single normal when
+// x = 88.7228 = 0x42b17217
+//
+// Underflow is handled as described in case 2 above.
+
+
+// Registers used
+//*********************************************************************
+// Floating Point registers used:
+// f8, input
+// f6,f7, f9 -> f15, f32 -> f45
+
+// General registers used:
+// r3, r20 -> r38
+
+// Predicate registers used:
+// p9 -> p15
+
+// Assembly macros
+//*********************************************************************
+// integer registers used
+// scratch
+rNJ = r3
+
+rExp_half = r20
+rSignexp_x = r21
+rExp_x = r22
+rExp_mask = r23
+rExp_bias = r24
+rTmp = r25
+rM1_lim = r25
+rGt_ln = r25
+rJ = r26
+rN = r27
+rTblAddr = r28
+rLn2Div64 = r29
+rRightShifter = r30
+r64DivLn2 = r31
+// stacked
+GR_SAVE_PFS = r32
+GR_SAVE_B0 = r33
+GR_SAVE_GP = r34
+GR_Parameter_X = r35
+GR_Parameter_Y = r36
+GR_Parameter_RESULT = r37
+GR_Parameter_TAG = r38
+
+// floating point registers used
+FR_X = f10
+FR_Y = f1
+FR_RESULT = f8
+// scratch
+fRightShifter = f6
+f64DivLn2 = f7
+fNormX = f9
+fNint = f10
+fN = f11
+fR = f12
+fLn2Div64 = f13
+fA2 = f14
+fA3 = f15
+// stacked
+fP = f32
+fX3 = f33
+fT = f34
+fMIN_SGL_OFLOW_ARG = f35
+fMAX_SGL_NORM_ARG = f36
+fMAX_SGL_MINUS_1_ARG = f37
+fA4 = f38
+fA43 = f38
+fA432 = f38
+fRSqr = f39
+fA5 = f40
+fTmp = f41
+fGt_pln = f41
+fXsq = f41
+fA7 = f42
+fA6 = f43
+fA65 = f43
+fTm1 = f44
+fA8 = f45
+fA87 = f45
+fA8765 = f45
+fA8765432 = f45
+fWre_urm_f8 = f45
+
+RODATA
+.align 16
+LOCAL_OBJECT_START(_expf_table)
+data8 0x3efa01a01a01a01a // A8 = 1/8!
+data8 0x3f2a01a01a01a01a // A7 = 1/7!
+data8 0x3f56c16c16c16c17 // A6 = 1/6!
+data8 0x3f81111111111111 // A5 = 1/5!
+data8 0x3fa5555555555555 // A4 = 1/4!
+data8 0x3fc5555555555555 // A3 = 1/3!
+//
+data4 0x42b17218 // Smallest sgl arg to overflow sgl result
+data4 0x42b17217 // Largest sgl arg to give sgl result
+//
+// 2^(j/64) table, j goes from 0 to 63
+data8 0x0000000000000000 // 2^(0/64)
+data8 0x00002C9A3E778061 // 2^(1/64)
+data8 0x000059B0D3158574 // 2^(2/64)
+data8 0x0000874518759BC8 // 2^(3/64)
+data8 0x0000B5586CF9890F // 2^(4/64)
+data8 0x0000E3EC32D3D1A2 // 2^(5/64)
+data8 0x00011301D0125B51 // 2^(6/64)
+data8 0x0001429AAEA92DE0 // 2^(7/64)
+data8 0x000172B83C7D517B // 2^(8/64)
+data8 0x0001A35BEB6FCB75 // 2^(9/64)
+data8 0x0001D4873168B9AA // 2^(10/64)
+data8 0x0002063B88628CD6 // 2^(11/64)
+data8 0x0002387A6E756238 // 2^(12/64)
+data8 0x00026B4565E27CDD // 2^(13/64)
+data8 0x00029E9DF51FDEE1 // 2^(14/64)
+data8 0x0002D285A6E4030B // 2^(15/64)
+data8 0x000306FE0A31B715 // 2^(16/64)
+data8 0x00033C08B26416FF // 2^(17/64)
+data8 0x000371A7373AA9CB // 2^(18/64)
+data8 0x0003A7DB34E59FF7 // 2^(19/64)
+data8 0x0003DEA64C123422 // 2^(20/64)
+data8 0x0004160A21F72E2A // 2^(21/64)
+data8 0x00044E086061892D // 2^(22/64)
+data8 0x000486A2B5C13CD0 // 2^(23/64)
+data8 0x0004BFDAD5362A27 // 2^(24/64)
+data8 0x0004F9B2769D2CA7 // 2^(25/64)
+data8 0x0005342B569D4F82 // 2^(26/64)
+data8 0x00056F4736B527DA // 2^(27/64)
+data8 0x0005AB07DD485429 // 2^(28/64)
+data8 0x0005E76F15AD2148 // 2^(29/64)
+data8 0x0006247EB03A5585 // 2^(30/64)
+data8 0x0006623882552225 // 2^(31/64)
+data8 0x0006A09E667F3BCD // 2^(32/64)
+data8 0x0006DFB23C651A2F // 2^(33/64)
+data8 0x00071F75E8EC5F74 // 2^(34/64)
+data8 0x00075FEB564267C9 // 2^(35/64)
+data8 0x0007A11473EB0187 // 2^(36/64)
+data8 0x0007E2F336CF4E62 // 2^(37/64)
+data8 0x00082589994CCE13 // 2^(38/64)
+data8 0x000868D99B4492ED // 2^(39/64)
+data8 0x0008ACE5422AA0DB // 2^(40/64)
+data8 0x0008F1AE99157736 // 2^(41/64)
+data8 0x00093737B0CDC5E5 // 2^(42/64)
+data8 0x00097D829FDE4E50 // 2^(43/64)
+data8 0x0009C49182A3F090 // 2^(44/64)
+data8 0x000A0C667B5DE565 // 2^(45/64)
+data8 0x000A5503B23E255D // 2^(46/64)
+data8 0x000A9E6B5579FDBF // 2^(47/64)
+data8 0x000AE89F995AD3AD // 2^(48/64)
+data8 0x000B33A2B84F15FB // 2^(49/64)
+data8 0x000B7F76F2FB5E47 // 2^(50/64)
+data8 0x000BCC1E904BC1D2 // 2^(51/64)
+data8 0x000C199BDD85529C // 2^(52/64)
+data8 0x000C67F12E57D14B // 2^(53/64)
+data8 0x000CB720DCEF9069 // 2^(54/64)
+data8 0x000D072D4A07897C // 2^(55/64)
+data8 0x000D5818DCFBA487 // 2^(56/64)
+data8 0x000DA9E603DB3285 // 2^(57/64)
+data8 0x000DFC97337B9B5F // 2^(58/64)
+data8 0x000E502EE78B3FF6 // 2^(59/64)
+data8 0x000EA4AFA2A490DA // 2^(60/64)
+data8 0x000EFA1BEE615A27 // 2^(61/64)
+data8 0x000F50765B6E4540 // 2^(62/64)
+data8 0x000FA7C1819E90D8 // 2^(63/64)
+LOCAL_OBJECT_END(_expf_table)
-#include "libm_support.h"
-
-
-GR_SAVE_B0 = r60
-GR_SAVE_PFS = r59
-GR_SAVE_GP = r61
-
-GR_Parameter_X = r62
-GR_Parameter_Y = r63
-GR_Parameter_RESULT = r64
-GR_Parameter_TAG = r65
-
-FR_X = f9
-FR_Y = f1
-FR_RESULT = f99
-
-
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
-
-.align 64
-Constants_exp_64_Arg:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_Arg,@object)
-data4 0x5C17F0BC,0xB8AA3B29,0x0000400B,0x00000000
-data4 0x00000000,0xB17217F4,0x00003FF2,0x00000000
-data4 0xF278ECE6,0xF473DE6A,0x00003FD4,0x00000000
-// /* Inv_L, L_hi, L_lo */
-ASM_SIZE_DIRECTIVE(Constants_exp_64_Arg)
-
-.align 64
-Constants_exp_64_Exponents:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_Exponents,@object)
-data4 0x0000007E,0x00000000,0xFFFFFF83,0xFFFFFFFF
-data4 0x000003FE,0x00000000,0xFFFFFC03,0xFFFFFFFF
-data4 0x00003FFE,0x00000000,0xFFFFC003,0xFFFFFFFF
-data4 0x00003FFE,0x00000000,0xFFFFC003,0xFFFFFFFF
-data4 0xFFFFFFE2,0xFFFFFFFF,0xFFFFFFC4,0xFFFFFFFF
-data4 0xFFFFFFBA,0xFFFFFFFF,0xFFFFFFBA,0xFFFFFFFF
-ASM_SIZE_DIRECTIVE(Constants_exp_64_Exponents)
-
-.align 64
-Constants_exp_64_A:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_A,@object)
-data4 0xB1B736A0,0xAAAAAAAB,0x00003FFA,0x00000000
-data4 0x90CD6327,0xAAAAAAAB,0x00003FFC,0x00000000
-data4 0xFFFFFFFF,0xFFFFFFFF,0x00003FFD,0x00000000
-// /* Reversed */
-ASM_SIZE_DIRECTIVE(Constants_exp_64_A)
-
-.align 64
-Constants_exp_64_P:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_P,@object)
-data4 0x43914A8A,0xD00D6C81,0x00003FF2,0x00000000
-data4 0x30304B30,0xB60BC4AC,0x00003FF5,0x00000000
-data4 0x7474C518,0x88888888,0x00003FF8,0x00000000
-data4 0x8DAE729D,0xAAAAAAAA,0x00003FFA,0x00000000
-data4 0xAAAAAF61,0xAAAAAAAA,0x00003FFC,0x00000000
-data4 0x000004C7,0x80000000,0x00003FFE,0x00000000
-// /* Reversed */
-ASM_SIZE_DIRECTIVE(Constants_exp_64_P)
-
-.align 64
-Constants_exp_64_Q:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_Q,@object)
-data4 0xA49EF6CA,0xD00D56F7,0x00003FEF,0x00000000
-data4 0x1C63493D,0xD00D59AB,0x00003FF2,0x00000000
-data4 0xFB50CDD2,0xB60B60B5,0x00003FF5,0x00000000
-data4 0x7BA68DC8,0x88888888,0x00003FF8,0x00000000
-data4 0xAAAAAC8D,0xAAAAAAAA,0x00003FFA,0x00000000
-data4 0xAAAAACCA,0xAAAAAAAA,0x00003FFC,0x00000000
-data4 0x00000000,0x80000000,0x00003FFE,0x00000000
-// /* Reversed */
-ASM_SIZE_DIRECTIVE(Constants_exp_64_Q)
-
-.align 64
-Constants_exp_64_T1:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_T1,@object)
-data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29
-data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5
-data4 0x3F8B95C2,0x3F8D1ADF,0x3F8EA43A,0x3F9031DC
-data4 0x3F91C3D3,0x3F935A2B,0x3F94F4F0,0x3F96942D
-data4 0x3F9837F0,0x3F99E046,0x3F9B8D3A,0x3F9D3EDA
-data4 0x3F9EF532,0x3FA0B051,0x3FA27043,0x3FA43516
-data4 0x3FA5FED7,0x3FA7CD94,0x3FA9A15B,0x3FAB7A3A
-data4 0x3FAD583F,0x3FAF3B79,0x3FB123F6,0x3FB311C4
-data4 0x3FB504F3,0x3FB6FD92,0x3FB8FBAF,0x3FBAFF5B
-data4 0x3FBD08A4,0x3FBF179A,0x3FC12C4D,0x3FC346CD
-data4 0x3FC5672A,0x3FC78D75,0x3FC9B9BE,0x3FCBEC15
-data4 0x3FCE248C,0x3FD06334,0x3FD2A81E,0x3FD4F35B
-data4 0x3FD744FD,0x3FD99D16,0x3FDBFBB8,0x3FDE60F5
-data4 0x3FE0CCDF,0x3FE33F89,0x3FE5B907,0x3FE8396A
-data4 0x3FEAC0C7,0x3FED4F30,0x3FEFE4BA,0x3FF28177
-data4 0x3FF5257D,0x3FF7D0DF,0x3FFA83B3,0x3FFD3E0C
-ASM_SIZE_DIRECTIVE(Constants_exp_64_T1)
-
-.align 64
-Constants_exp_64_T2:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_T2,@object)
-data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4
-data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7
-data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E
-data4 0x3F80429C,0x3F80482B,0x3F804DB9,0x3F805349
-data4 0x3F8058D8,0x3F805E67,0x3F8063F7,0x3F806987
-data4 0x3F806F17,0x3F8074A8,0x3F807A39,0x3F807FCA
-data4 0x3F80855B,0x3F808AEC,0x3F80907E,0x3F809610
-data4 0x3F809BA2,0x3F80A135,0x3F80A6C7,0x3F80AC5A
-data4 0x3F80B1ED,0x3F80B781,0x3F80BD14,0x3F80C2A8
-data4 0x3F80C83C,0x3F80CDD1,0x3F80D365,0x3F80D8FA
-data4 0x3F80DE8F,0x3F80E425,0x3F80E9BA,0x3F80EF50
-data4 0x3F80F4E6,0x3F80FA7C,0x3F810013,0x3F8105AA
-data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07
-data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269
-data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE
-data4 0x3F814E68,0x3F815402,0x3F81599C,0x3F815F37
-ASM_SIZE_DIRECTIVE(Constants_exp_64_T2)
-
-.align 64
-Constants_exp_64_W1:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_W1,@object)
-data4 0x00000000,0x00000000,0x171EC4B4,0xBE384454
-data4 0x4AA72766,0xBE694741,0xD42518F8,0xBE5D32B6
-data4 0x3A319149,0x3E68D96D,0x62415F36,0xBE68F4DA
-data4 0xC9C86A3B,0xBE6DDA2F,0xF49228FE,0x3E6B2E50
-data4 0x1188B886,0xBE49C0C2,0x1A4C2F1F,0x3E64BFC2
-data4 0x2CB98B54,0xBE6A2FBB,0x9A55D329,0x3E5DC5DE
-data4 0x39A7AACE,0x3E696490,0x5C66DBA5,0x3E54728B
-data4 0xBA1C7D7D,0xBE62B0DB,0x09F1AF5F,0x3E576E04
-data4 0x1A0DD6A1,0x3E612500,0x795FBDEF,0xBE66A419
-data4 0xE1BD41FC,0xBE5CDE8C,0xEA54964F,0xBE621376
-data4 0x476E76EE,0x3E6370BE,0x3427EB92,0x3E390D1A
-data4 0x2BF82BF8,0x3E1336DE,0xD0F7BD9E,0xBE5FF1CB
-data4 0x0CEB09DD,0xBE60A355,0x0980F30D,0xBE5CA37E
-data4 0x4C082D25,0xBE5C541B,0x3B467D29,0xBE5BBECA
-data4 0xB9D946C5,0xBE400D8A,0x07ED374A,0xBE5E2A08
-data4 0x365C8B0A,0xBE66CB28,0xD3403BCA,0x3E3AAD5B
-data4 0xC7EA21E0,0x3E526055,0xE72880D6,0xBE442C75
-data4 0x85222A43,0x3E58B2BB,0x522C42BF,0xBE5AAB79
-data4 0x469DC2BC,0xBE605CB4,0xA48C40DC,0xBE589FA7
-data4 0x1AA42614,0xBE51C214,0xC37293F4,0xBE48D087
-data4 0xA2D673E0,0x3E367A1C,0x114F7A38,0xBE51BEBB
-data4 0x661A4B48,0xBE6348E5,0x1D3B9962,0xBDF52643
-data4 0x35A78A53,0x3E3A3B5E,0x1CECD788,0xBE46C46C
-data4 0x7857D689,0xBE60B7EC,0xD14F1AD7,0xBE594D3D
-data4 0x4C9A8F60,0xBE4F9C30,0x02DFF9D2,0xBE521873
-data4 0x55E6D68F,0xBE5E4C88,0x667F3DC4,0xBE62140F
-data4 0x3BF88747,0xBE36961B,0xC96EC6AA,0x3E602861
-data4 0xD57FD718,0xBE3B5151,0xFC4A627B,0x3E561CD0
-data4 0xCA913FEA,0xBE3A5217,0x9A5D193A,0x3E40A3CC
-data4 0x10A9C312,0xBE5AB713,0xC5F57719,0x3E4FDADB
-data4 0xDBDF59D5,0x3E361428,0x61B4180D,0x3E5DB5DB
-data4 0x7408D856,0xBE42AD5F,0x31B2B707,0x3E2A3148
-ASM_SIZE_DIRECTIVE(Constants_exp_64_W1)
-
-.align 64
-Constants_exp_64_W2:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_W2,@object)
-data4 0x00000000,0x00000000,0x37A3D7A2,0xBE641F25
-data4 0xAD028C40,0xBE68DD57,0xF212B1B6,0xBE5C77D8
-data4 0x1BA5B070,0x3E57878F,0x2ECAE6FE,0xBE55A36A
-data4 0x569DFA3B,0xBE620608,0xA6D300A3,0xBE53B50E
-data4 0x223F8F2C,0x3E5B5EF2,0xD6DE0DF4,0xBE56A0D9
-data4 0xEAE28F51,0xBE64EEF3,0x367EA80B,0xBE5E5AE2
-data4 0x5FCBC02D,0x3E47CB1A,0x9BDAFEB7,0xBE656BA0
-data4 0x805AFEE7,0x3E6E70C6,0xA3415EBA,0xBE6E0509
-data4 0x49BFF529,0xBE56856B,0x00508651,0x3E66DD33
-data4 0xC114BC13,0x3E51165F,0xC453290F,0x3E53333D
-data4 0x05539FDA,0x3E6A072B,0x7C0A7696,0xBE47CD87
-data4 0xEB05C6D9,0xBE668BF4,0x6AE86C93,0xBE67C3E3
-data4 0xD0B3E84B,0xBE533904,0x556B53CE,0x3E63E8D9
-data4 0x63A98DC8,0x3E212C89,0x032A7A22,0xBE33138F
-data4 0xBC584008,0x3E530FA9,0xCCB93C97,0xBE6ADF82
-data4 0x8370EA39,0x3E5F9113,0xFB6A05D8,0x3E5443A4
-data4 0x181FEE7A,0x3E63DACD,0xF0F67DEC,0xBE62B29D
-data4 0x3DDE6307,0x3E65C483,0xD40A24C1,0x3E5BF030
-data4 0x14E437BE,0x3E658B8F,0xED98B6C7,0xBE631C29
-data4 0x04CF7C71,0x3E6335D2,0xE954A79D,0x3E529EED
-data4 0xF64A2FB8,0x3E5D9257,0x854ED06C,0xBE6BED1B
-data4 0xD71405CB,0x3E5096F6,0xACB9FDF5,0xBE3D4893
-data4 0x01B68349,0xBDFEB158,0xC6A463B9,0x3E628D35
-data4 0xADE45917,0xBE559725,0x042FC476,0xBE68C29C
-data4 0x01E511FA,0xBE67593B,0x398801ED,0xBE4A4313
-data4 0xDA7C3300,0x3E699571,0x08062A9E,0x3E5349BE
-data4 0x755BB28E,0x3E5229C4,0x77A1F80D,0x3E67E426
-data4 0x6B69C352,0xBE52B33F,0x084DA57F,0xBE6B3550
-data4 0xD1D09A20,0xBE6DB03F,0x2161B2C1,0xBE60CBC4
-data4 0x78A2B771,0x3E56ED9C,0x9D0FA795,0xBE508E31
-data4 0xFD1A54E9,0xBE59482A,0xB07FD23E,0xBE2A17CE
-data4 0x17365712,0x3E68BF5C,0xB3785569,0x3E3956F9
-ASM_SIZE_DIRECTIVE(Constants_exp_64_W2)
.section .text
-.proc expm1f#
-.global expm1f#
-.align 64
-
-expm1f:
-#ifdef _LIBC
-.global __expm1f#
-__expm1f:
-#endif
-
+GLOBAL_IEEE754_ENTRY(expm1f)
-{ .mii
- alloc r32 = ar.pfs,0,30,4,0
-(p0) add r33 = 1, r0
-(p0) cmp.eq.unc p7, p0 = r0, r0
-}
-;;
-
-//
-// Set p7 true for expm1
-// Set Flag = r33 = 1 for expm1
-// These are really no longer necesary, but are a remnant
-// when this file had multiple entry points.
-// They should be carefully removed
-
-
-{ .mfi
-(p0) add r32 = 0,r0
-(p0) fnorm.s1 f9 = f8
- nop.i 0
-}
-
-{ .mfi
- nop.m 0
-//
-// Set p7 false for exp
-// Set Flag = r33 = 0 for exp
-//
-(p0) fclass.m.unc p6, p8 = f8, 0x1E7
- nop.i 0 ;;
+{ .mlx
+ getf.exp rSignexp_x = f8 // Must recompute if x unorm
+ movl r64DivLn2 = 0x40571547652B82FE // 64/ln(2)
}
-
-{ .mfi
- nop.m 999
-(p0) fclass.nm.unc p9, p0 = f8, 0x1FF
- nop.i 0
+{ .mlx
+ addl rTblAddr = @ltoff(_expf_table),gp
+ movl rRightShifter = 0x43E8000000000000 // DP Right Shifter
}
+;;
{ .mfi
- nop.m 999
-(p0) mov f36 = f1
- nop.i 999 ;;
-}
-
-//
-// Identify NatVals, NaNs, Infs, and Zeros.
-// Identify EM unsupporteds.
-// Save special input registers
-//
-// Create FR_X_cor = 0.0
-// GR_Flag = 0
-// GR_Expo_Range = 0 (r32) for single precision
-// FR_Scale = 1.0
-//
-
-{ .mfb
- nop.m 999
-(p0) mov f32 = f0
-(p6) br.cond.spnt EXPF_64_SPECIAL ;;
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p9) br.cond.spnt EXPF_64_UNSUPPORTED ;;
+ // point to the beginning of the table
+ ld8 rTblAddr = [rTblAddr]
+ fclass.m p14, p0 = f8 , 0x22 // test for -INF
+ mov rExp_mask = 0x1ffff // Exponent mask
}
-
-//
-// Branch out for special input values
-//
-
{ .mfi
-(p0) cmp.ne.unc p12, p13 = 0x01, r33
-(p0) fcmp.lt.unc.s0 p9,p0 = f8, f0
-(p0) cmp.eq.unc p15, p0 = r0, r0
-}
-
-//
-// Raise possible denormal operand exception
-// Normalize x
-//
-// This function computes expf( x + x_cor)
-// Input FR 1: FR_X
-// Input FR 2: FR_X_cor
-// Input GR 1: GR_Flag
-// Input GR 2: GR_Expo_Range
-// Output FR 3: FR_Y_hi
-// Output FR 4: FR_Y_lo
-// Output FR 5: FR_Scale
-// Output PR 1: PR_Safe
-
-//
-// Prepare to load constants
-// Set Safe = True
-//
-
-{ .mmi
-(p0) addl r34 = @ltoff(Constants_exp_64_Arg#),gp
-(p0) addl r40 = @ltoff(Constants_exp_64_W1#),gp
-(p0) addl r41 = @ltoff(Constants_exp_64_W2#),gp
-};;
-
-{ .mmi
- ld8 r34 = [r34]
- ld8 r40 = [r40]
-(p0) addl r50 = @ltoff(Constants_exp_64_T1#), gp
+ nop.m 0
+ fnorm.s1 fNormX = f8 // normalized x
+ nop.i 0
}
;;
-{ .mmi
- ld8 r41 = [r41]
-(p0) ldfe f37 = [r34],16
-(p0) addl r51 = @ltoff(Constants_exp_64_T2#), gp
-}
-;;
-//
-// N = fcvt.fx(float_N)
-// Set p14 if -6 > expo_X
-//
-//
-// Bias = 0x0FFFF
-// expo_X = expo_X and Mask
-//
-{ .mmi
- ld8 r50 = [r50]
-(p0) ldfe f40 = [r34],16
- nop.i 999
+{ .mfi
+ setf.d f64DivLn2 = r64DivLn2 // load 64/ln(2) to FP reg
+ fclass.m p9, p0 = f8 , 0x0b // test for x unorm
+ mov rExp_bias = 0xffff // Exponent bias
}
-;;
-
-{ .mlx
- nop.m 999
-(p0) movl r58 = 0x0FFFF
-};;
-
-//
-// Load W2_ptr
-// Branch to SMALL is expo_X < -6
-//
-//
-// float_N = X * L_Inv
-// expo_X = exponent of X
-// Mask = 0x1FFFF
-//
-
-{ .mmi
- ld8 r51 = [r51]
-(p0) ldfe f41 = [r34],16
-//
-// float_N = X * L_Inv
-// expo_X = exponent of X
-// Mask = 0x1FFFF
-//
- nop.i 0
-};;
-
{ .mlx
-(p0) addl r34 = @ltoff(Constants_exp_64_Exponents#), gp
-(p0) movl r39 = 0x1FFFF
+ // load Right Shifter to FP reg
+ setf.d fRightShifter = rRightShifter
+ movl rLn2Div64 = 0x3F862E42FEFA39EF // DP ln(2)/64 in GR
}
;;
-{ .mmi
- ld8 r34 = [r34]
-(p0) getf.exp r37 = f9
- nop.i 999
-}
-;;
-
-{ .mii
- nop.m 999
- nop.i 999
-(p0) and r37 = r37, r39 ;;
-}
-
-{ .mmi
-(p0) sub r37 = r37, r58 ;;
-(p0) cmp.gt.unc p14, p0 = -6, r37
-(p0) cmp.lt.unc p10, p0 = 14, r37 ;;
-}
-
{ .mfi
- nop.m 999
-//
-// Load L_inv
-// Set p12 true for Flag = 0 (exp)
-// Set p13 true for Flag = 1 (expm1)
-//
-(p0) fmpy.s1 f38 = f9, f37
- nop.i 999 ;;
+ ldfpd fA8, fA7 = [rTblAddr], 16
+ fcmp.eq.s1 p13, p0 = f0, f8 // test for x = 0.0
+ mov rExp_half = 0xfffe
}
-
{ .mfb
- nop.m 999
-//
-// Load L_hi
-// expo_X = expo_X - Bias
-// get W1_ptr
-//
-(p0) fcvt.fx.s1 f39 = f38
-(p14) br.cond.spnt EXPF_SMALL ;;
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p10) br.cond.spnt EXPF_HUGE ;;
-}
-
-{ .mmi
-(p0) shladd r34 = r32,4,r34
-(p0) addl r35 = @ltoff(Constants_exp_64_A#),gp
- nop.i 999
+ setf.d fLn2Div64 = rLn2Div64 // load ln(2)/64 to FP reg
+ nop.f 0
+(p9) br.cond.spnt EXPM1_UNORM // Branch if x unorm
}
;;
-{ .mmi
- ld8 r35 = [r35]
- nop.m 999
- nop.i 999
+EXPM1_COMMON:
+{ .mfb
+ ldfpd fA6, fA5 = [rTblAddr], 16
+(p14) fms.s.s0 f8 = f0, f0, f1 // result if x = -inf
+(p14) br.ret.spnt b0 // exit here if x = -inf
}
;;
-//
-// Load T_1,T_2
-//
-
-{ .mmb
-(p0) ldfe f51 = [r35],16
-(p0) ld8 r45 = [r34],8
- nop.b 999 ;;
-}
-//
-// Set Safe = True if k >= big_expo_neg
-// Set Safe = False if k < big_expo_neg
-//
-
-{ .mmb
-(p0) ldfe f49 = [r35],16
-(p0) ld8 r48 = [r34],0
- nop.b 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// Branch to HUGE is expo_X > 14
-//
-(p0) fcvt.xf f38 = f39
- nop.i 999 ;;
-}
-
-{ .mfi
-(p0) getf.sig r52 = f39
- nop.f 999
- nop.i 999 ;;
-}
-
-{ .mii
- nop.m 999
-(p0) extr.u r43 = r52, 6, 6 ;;
-//
-// r = r - float_N * L_lo
-// K = extr(N_fix,12,52)
-//
-(p0) shladd r40 = r43,3,r40 ;;
-}
-
-{ .mfi
-(p0) shladd r50 = r43,2,r50
-(p0) fnma.s1 f42 = f40, f38, f9
-//
-// float_N = float(N)
-// N_fix = signficand N
-//
-(p0) extr.u r42 = r52, 0, 6
-}
-
-{ .mmi
-(p0) ldfd f43 = [r40],0 ;;
-(p0) shladd r41 = r42,3,r41
-(p0) shladd r51 = r42,2,r51
-}
-//
-// W_1_p1 = 1 + W_1
-//
-
-{ .mmi
-(p0) ldfs f44 = [r50],0 ;;
-(p0) ldfd f45 = [r41],0
-//
-// M_2 = extr(N_fix,0,6)
-// M_1 = extr(N_fix,6,6)
-// r = X - float_N * L_hi
-//
-(p0) extr r44 = r52, 12, 52
-}
-
-{ .mmi
-(p0) ldfs f46 = [r51],0 ;;
-(p0) sub r46 = r58, r44
-(p0) cmp.gt.unc p8, p15 = r44, r45
-}
-//
-// W = W_1 + W_1_p1*W_2
-// Load A_2
-// Bias_m_K = Bias - K
-//
-
-{ .mii
-(p0) ldfe f40 = [r35],16
-//
-// load A_1
-// poly = A_2 + r*A_3
-// rsq = r * r
-// neg_2_mK = exponent of Bias_m_k
-//
-(p0) add r47 = r58, r44 ;;
-//
-// Set Safe = True if k <= big_expo_pos
-// Set Safe = False if k > big_expo_pos
-// Load A_3
-//
-(p15) cmp.lt p8,p15 = r44,r48 ;;
-}
-
-{ .mmf
-(p0) setf.exp f61 = r46
-//
-// Bias_p + K = Bias + K
-// T = T_1 * T_2
-//
-(p0) setf.exp f36 = r47
-(p0) fnma.s1 f42 = f41, f38, f42 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// Load W_1,W_2
-// Load big_exp_pos, load big_exp_neg
-//
-(p0) fadd.s1 f47 = f43, f1
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 f52 = f42, f51, f49
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-(p0) fmpy.s1 f48 = f42, f42
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fmpy.s1 f53 = f44, f46
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 f54 = f45, f47, f43
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-(p0) fneg f61 = f61
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 f52 = f42, f52, f40
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fadd.s1 f55 = f54, f1
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-//
-// W + Wp1 * poly
-//
-(p0) mov f34 = f53
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// A_1 + r * poly
-// Scale = setf_expf(Bias_p_k)
-//
-(p0) fma.s1 f52 = f48, f52, f42
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// poly = r + rsq(A_1 + r*poly)
-// Wp1 = 1 + W
-// neg_2_mK = -neg_2_mK
-//
-(p0) fma.s1 f35 = f55, f52, f54
- nop.i 999 ;;
-}
-
{ .mfb
- nop.m 999
-(p0) fmpy.s1 f35 = f35, f53
-//
-// Y_hi = T
-// Y_lo = T * (W + Wp1*poly)
-//
-(p12) br.cond.sptk EXPF_MAIN ;;
-}
-//
-// Branch if expf(x)
-// Continue for expf(x-1)
-//
-
-{ .mii
-(p0) cmp.lt.unc p12, p13 = 10, r44
- nop.i 999 ;;
-//
-// Set p12 if 10 < K, Else p13
-//
-(p13) cmp.gt.unc p13, p14 = -10, r44 ;;
+ ldfpd fA4, fA3 = [rTblAddr], 16
+ fclass.m p15, p0 = f8 , 0x1e1 // test for NaT,NaN,+Inf
+(p13) br.ret.spnt b0 // exit here if x =0.0, result is x
}
-//
-// K > 10: Y_lo = Y_lo + neg_2_mK
-// K <=10: Set p13 if -10 > K, Else set p14
-//
+;;
{ .mfi
-(p13) cmp.eq p15, p0 = r0, r0
-(p14) fadd.s1 f34 = f61, f34
- nop.i 999 ;;
+ // overflow thresholds
+ ldfps fMIN_SGL_OFLOW_ARG, fMAX_SGL_NORM_ARG = [rTblAddr], 8
+ fma.s1 fXsq = fNormX, fNormX, f0 // x^2 for small path
+ and rExp_x = rExp_mask, rSignexp_x // Biased exponent of x
}
-
-{ .mfi
- nop.m 999
-(p12) fadd.s1 f35 = f35, f61
- nop.i 999 ;;
+{ .mlx
+ nop.m 0
+ movl rM1_lim = 0xc1c00000 // Minus -1 limit (-24.0), SP
}
+;;
{ .mfi
- nop.m 999
-(p13) fadd.s1 f35 = f35, f34
- nop.i 999
+ setf.exp fA2 = rExp_half
+ // x*(64/ln(2)) + Right Shifter
+ fma.s1 fNint = fNormX, f64DivLn2, fRightShifter
+ sub rExp_x = rExp_x, rExp_bias // True exponent of x
}
-
{ .mfb
- nop.m 999
-//
-// K <= 10 and K < -10, Set Safe = True
-// K <= 10 and K < 10, Y_lo = Y_hi + Y_lo
-// K <= 10 and K > =-10, Y_hi = Y_hi + neg_2_mk
-//
-(p13) mov f34 = f61
-(p0) br.cond.sptk EXPF_MAIN ;;
-}
-EXPF_SMALL:
-{ .mmi
-(p12) addl r35 = @ltoff(Constants_exp_64_P#), gp
-(p0) addl r34 = @ltoff(Constants_exp_64_Exponents#), gp
- nop.i 999
-}
-;;
-
-{ .mmi
-(p12) ld8 r35 = [r35]
- ld8 r34 = [r34]
- nop.i 999
+ nop.m 0
+(p15) fma.s.s0 f8 = f8, f1, f0 // result if x = NaT,NaN,+Inf
+(p15) br.ret.spnt b0 // exit here if x = NaT,NaN,+Inf
}
;;
-
-{ .mmi
-(p13) addl r35 = @ltoff(Constants_exp_64_Q#), gp
- nop.m 999
- nop.i 999
-}
-;;
-
-
-//
-// Return
-// K <= 10 and K < 10, Y_hi = neg_2_mk
-//
-// /*******************************************************/
-// /*********** Branch EXP_SMALL *************************/
-// /*******************************************************/
-
{ .mfi
-(p13) ld8 r35 = [r35]
-(p0) mov f42 = f9
-(p0) add r34 = 0x48,r34
+ setf.s fMAX_SGL_MINUS_1_ARG = rM1_lim // -1 threshold, -24.0
+ nop.f 0
+ cmp.gt p7, p8 = -2, rExp_x // Test |x| < 2^(-2)
}
;;
-//
-// Flag = 0
-// r4 = rsq * rsq
-//
-
{ .mfi
-(p0) ld8 r49 =[r34],0
- nop.f 999
- nop.i 999 ;;
-}
-
-{ .mii
- nop.m 999
- nop.i 999 ;;
-//
-// Flag = 1
-//
-(p0) cmp.lt.unc p14, p0 = r37, r49 ;;
+(p7) cmp.gt.unc p6, p7 = -40, rExp_x // Test |x| < 2^(-40)
+ fma.s1 fA87 = fA8, fNormX, fA7 // Small path, A8*x+A7
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-//
-// r = X
-//
-(p0) fmpy.s1 f48 = f42, f42
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fA65 = fA6, fNormX, fA5 // Small path, A6*x+A5
+ nop.i 0
}
+;;
{ .mfb
- nop.m 999
-//
-// rsq = r * r
-//
-(p0) fmpy.s1 f50 = f48, f48
-//
-// Is input very small?
-//
-(p14) br.cond.spnt EXPF_VERY_SMALL ;;
-}
-//
-// Flag_not1: Y_hi = 1.0
-// Flag is 1: r6 = rsq * r4
-//
-
-{ .mfi
-(p12) ldfe f52 = [r35],16
-(p12) mov f34 = f1
-(p0) add r53 = 0x1,r0 ;;
-}
-
-{ .mfi
-(p13) ldfe f51 = [r35],16
-//
-// Flag_not_1: Y_lo = poly_hi + r4 * poly_lo
-//
-(p13) mov f34 = f9
- nop.i 999 ;;
-}
-
-{ .mmf
-(p12) ldfe f53 = [r35],16
-//
-// For Flag_not_1, Y_hi = X
-// Scale = 1
-// Create 0x000...01
-//
-(p0) setf.sig f37 = r53
-(p0) mov f36 = f1 ;;
-}
-
-{ .mmi
-(p13) ldfe f52 = [r35],16 ;;
-(p12) ldfe f54 = [r35],16
- nop.i 999 ;;
-}
-
-{ .mfi
-(p13) ldfe f53 = [r35],16
-(p13) fmpy.s1 f58 = f48, f50
- nop.i 999 ;;
-}
-//
-// Flag_not1: poly_lo = P_5 + r*P_6
-// Flag_1: poly_lo = Q_6 + r*Q_7
-//
-
-{ .mmi
-(p13) ldfe f54 = [r35],16 ;;
-(p12) ldfe f55 = [r35],16
- nop.i 999 ;;
-}
-
-{ .mmi
-(p12) ldfe f56 = [r35],16 ;;
-(p13) ldfe f55 = [r35],16
- nop.i 999 ;;
-}
-
-{ .mmi
-(p12) ldfe f57 = [r35],0 ;;
-(p13) ldfe f56 = [r35],16
- nop.i 999 ;;
-}
-
-{ .mfi
-(p13) ldfe f57 = [r35],0
- nop.f 999
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// For Flag_not_1, load p5,p6,p1,p2
-// Else load p5,p6,p1,p2
-//
-(p12) fma.s1 f60 = f52, f42, f53
- nop.i 999 ;;
+ nop.m 0
+(p6) fma.s.s0 f8 = f8, f8, f8 // If x < 2^-40, result=x+x*x
+(p6) br.ret.spnt b0 // Exit if x < 2^-40
}
+;;
{ .mfi
- nop.m 999
-(p13) fma.s1 f60 = f51, f42, f52
- nop.i 999 ;;
+ nop.m 0
+ // check for overflow
+ fcmp.gt.s1 p15, p14 = fNormX, fMIN_SGL_OFLOW_ARG
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p12) fma.s1 f60 = f60, f42, f54
- nop.i 999 ;;
+ nop.m 0
+ fms.s1 fN = fNint, f1, fRightShifter // n in FP register
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p12) fma.s1 f59 = f56, f42, f57
- nop.i 999 ;;
+ nop.m 0
+(p7) fma.s1 fA43 = fA4, fNormX, fA3 // Small path, A4*x+A3
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p13) fma.s1 f60 = f42, f60, f53
- nop.i 999 ;;
+ getf.sig rNJ = fNint // bits of n, j
+(p7) fma.s1 fA8765 = fA87, fXsq, fA65 // Small path, A87*xsq+A65
+ nop.i 0
}
-
-{ .mfi
- nop.m 999
-(p12) fma.s1 f59 = f59, f48, f42
- nop.i 999 ;;
+{ .mfb
+ nop.m 0
+(p7) fma.s1 fX3 = fXsq, fNormX, f0 // Small path, x^3
+ // branch out if overflow
+(p15) br.cond.spnt EXPM1_CERTAIN_OVERFLOW
}
+;;
{ .mfi
- nop.m 999
-//
-// Flag_1: poly_lo = Q_5 + r*(Q_6 + r*Q_7)
-// Flag_not1: poly_lo = P_4 + r*(P_5 + r*P_6)
-// Flag_not1: poly_hi = (P_1 + r*P_2)
-//
-(p13) fmpy.s1 f60 = f60, f58
- nop.i 999 ;;
+ addl rN = 0xffff-63, rNJ // biased and shifted n
+ fnma.s1 fR = fLn2Div64, fN, fNormX // R = x - N*ln(2)/64
+ extr.u rJ = rNJ , 0 , 6 // bits of j
}
+;;
{ .mfi
- nop.m 999
-(p12) fma.s1 f60 = f60, f42, f55
- nop.i 999 ;;
+ shladd rJ = rJ, 3, rTblAddr // address in the 2^(j/64) table
+ // check for certain -1
+ fcmp.le.s1 p13, p0 = fNormX, fMAX_SGL_MINUS_1_ARG
+ shr rN = rN, 6 // biased n
}
-
{ .mfi
- nop.m 999
-//
-// Flag_1: poly_lo = r6 *(Q_5 + ....)
-// Flag_not1: poly_hi = r + rsq *(P_1 + r*P_2)
-//
-(p12) fma.s1 f35 = f60, f50, f59
- nop.i 999
+ nop.m 0
+(p7) fma.s1 fA432 = fA43, fNormX, fA2 // Small path, A43*x+A2
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p13) fma.s1 f59 = f54, f42, f55
- nop.i 999 ;;
+ ld8 rJ = [rJ]
+ nop.f 0
+ shl rN = rN , 52 // 2^n bits in DP format
}
+;;
-{ .mfi
- nop.m 999
-//
-// Flag_not1: Y_lo = rsq* poly_hi + poly_lo
-// Flag_1: poly_lo = rsq* poly_hi + poly_lo
-//
-(p13) fma.s1 f59 = f59, f42, f56
- nop.i 999 ;;
+{ .mmi
+ or rN = rN, rJ // bits of 2^n * 2^(j/64) in DP format
+(p13) mov rTmp = 1 // Make small value for -1 path
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-//
-// Flag_not_1: (P_1 + r*P_2)
-//
-(p13) fma.s1 f59 = f59, f42, f57
- nop.i 999 ;;
+ setf.d fT = rN // 2^n
+ // check for possible overflow (only happens if input higher precision)
+(p14) fcmp.gt.s1 p14, p0 = fNormX, fMAX_SGL_NORM_ARG
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-//
-// Flag_not_1: poly_hi = r + rsq * (P_1 + r*P_2)
-//
-(p13) fma.s1 f35 = f59, f48, f60
- nop.i 999 ;;
+ nop.m 0
+(p7) fma.s1 fA8765432 = fA8765, fX3, fA432 // A8765*x^3+A432
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-//
-// Create 0.000...01
-//
-(p0) for f37 = f35, f37
- nop.i 999 ;;
+(p13) setf.exp fTmp = rTmp // Make small value for -1 path
+ fma.s1 fP = fA3, fR, fA2 // A3*R + A2
+ nop.i 0
}
-
{ .mfb
- nop.m 999
-//
-// Set lsb of Y_lo to 1
-//
-(p0) fmerge.se f35 = f35,f37
-(p0) br.cond.sptk EXPF_MAIN ;;
-}
-EXPF_VERY_SMALL:
-
-{ .mmi
- nop.m 999
-(p13) addl r34 = @ltoff(Constants_exp_64_Exponents#),gp
- nop.i 999;;
-}
-
-{ .mfi
-(p13) ld8 r34 = [r34];
-(p12) mov f35 = f9
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fRSqr = fR, fR, f0 // R^2
+(p13) br.cond.spnt EXPM1_CERTAIN_MINUS_ONE // Branch if x < -24.0
}
+;;
{ .mfb
- nop.m 999
-(p12) mov f34 = f1
-(p12) br.cond.sptk EXPF_MAIN ;;
-}
-
-{ .mlx
-(p13) add r34 = 8,r34
-(p13) movl r39 = 0x0FFFE ;;
-}
-//
-// Load big_exp_neg
-// Create 1/2's exponent
-//
-
-{ .mii
-(p13) setf.exp f56 = r39
-(p13) shladd r34 = r32,4,r34 ;;
- nop.i 999
-}
-//
-// Negative exponents are stored after positive
-//
-
-{ .mfi
-(p13) ld8 r45 = [r34],0
-//
-// Y_hi = x
-// Scale = 1
-//
-(p13) fmpy.s1 f35 = f9, f9
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// Reset Safe if necessary
-// Create 1/2
-//
-(p13) mov f34 = f9
- nop.i 999 ;;
+ nop.m 0
+(p7) fma.s.s0 f8 = fA8765432, fXsq, fNormX // Small path,
+ // result=xsq*A8765432+x
+(p7) br.ret.spnt b0 // Exit if 2^-40 <= |x| < 2^-2
}
+;;
{ .mfi
-(p13) cmp.lt.unc p0, p15 = r37, r45
-(p13) mov f36 = f1
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fP = fP, fRSqr, fR // P = (A3*R + A2)*Rsqr + R
+ nop.i 0
}
+;;
{ .mfb
- nop.m 999
-//
-// Y_lo = x * x
-//
-(p13) fmpy.s1 f35 = f35, f56
-//
-// Y_lo = x*x/2
-//
-(p13) br.cond.sptk EXPF_MAIN ;;
-}
-EXPF_HUGE:
-
-{ .mfi
- nop.m 999
-(p0) fcmp.gt.unc.s1 p14, p0 = f9, f0
- nop.i 999
-}
-
-{ .mlx
- nop.m 999
-(p0) movl r39 = 0x15DC0 ;;
-}
-
-{ .mfi
-(p14) setf.exp f34 = r39
-(p14) mov f35 = f1
-(p14) cmp.eq p0, p15 = r0, r0 ;;
+ nop.m 0
+ fms.s1 fTm1 = fT, f1, f1 // T - 1.0
+(p14) br.cond.spnt EXPM1_POSSIBLE_OVERFLOW
}
+;;
{ .mfb
- nop.m 999
-(p14) mov f36 = f34
-//
-// If x > 0, Set Safe = False
-// If x > 0, Y_hi = 2**(24,000)
-// If x > 0, Y_lo = 1.0
-// If x > 0, Scale = 2**(24,000)
-//
-(p14) br.cond.sptk EXPF_MAIN ;;
-}
-
-{ .mlx
- nop.m 999
-(p12) movl r39 = 0xA240
-}
-
-{ .mlx
- nop.m 999
-(p12) movl r38 = 0xA1DC ;;
-}
-
-{ .mmb
-(p13) cmp.eq p15, p14 = r0, r0
-(p12) setf.exp f34 = r39
- nop.b 999 ;;
-}
-
-{ .mlx
-(p12) setf.exp f35 = r38
-(p13) movl r39 = 0xFF9C
-}
-
-{ .mfi
- nop.m 999
-(p13) fsub.s1 f34 = f0, f1
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p12) mov f36 = f34
-(p12) cmp.eq p0, p15 = r0, r0 ;;
-}
-
-{ .mfi
-(p13) setf.exp f35 = r39
-(p13) mov f36 = f1
- nop.i 999 ;;
-}
-EXPF_MAIN:
-
-{ .mfi
-(p0) cmp.ne.unc p12, p0 = 0x01, r33
-(p0) fmpy.s1 f101 = f36, f35
- nop.i 999 ;;
+ nop.m 0
+ fma.s.s0 f8 = fP, fT, fTm1
+ br.ret.sptk b0 // Result for main path
+ // minus_one_limit < x < -2^-2
+ // and +2^-2 <= x < overflow_limit
}
+;;
+// Here if x unorm
+EXPM1_UNORM:
{ .mfb
- nop.m 999
-(p0) fma.s.s0 f99 = f34, f36, f101
-(p15) br.cond.sptk EXPF_64_RETURN ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fsetc.s3 0x7F,0x01
- nop.i 999
-}
-
-{ .mlx
- nop.m 999
-(p0) movl r50 = 0x0000000001007F ;;
-}
-//
-// S0 user supplied status
-// S2 user supplied status + WRE + TD (Overflows)
-// S3 user supplied status + RZ + TD (Underflows)
-//
-//
-// If (Safe) is true, then
-// Compute result using user supplied status field.
-// No overflow or underflow here, but perhaps inexact.
-// Return
-// Else
-// Determine if overflow or underflow was raised.
-// Fetch +/- overflow threshold for IEEE single, double,
-// double extended
-//
-
-{ .mfi
-(p0) setf.exp f60 = r50
-(p0) fma.s.s3 f102 = f34, f36, f101
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-(p0) fsetc.s3 0x7F,0x40
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// For Safe, no need to check for over/under.
-// For expm1, handle errors like exp.
-//
-(p0) fsetc.s2 0x7F,0x42
- nop.i 999;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fma.s.s2 f100 = f34, f36, f101
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fsetc.s2 0x7F,0x40
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p7) fclass.m.unc p12, p0 = f102, 0x00F
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-(p0) fclass.m.unc p11, p0 = f102, 0x00F
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p7) fcmp.ge.unc.s1 p10, p0 = f100, f60
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-//
-// Create largest double exponent + 1.
-// Create smallest double exponent - 1.
-//
-(p0) fcmp.ge.unc.s1 p8, p0 = f100, f60
- nop.i 999 ;;
-}
-//
-// fcmp: resultS2 >= + overflow threshold -> set (a) if true
-// fcmp: resultS2 <= - overflow threshold -> set (b) if true
-// fclass: resultS3 is denorm/unorm/0 -> set (d) if true
-//
-
-{ .mib
-(p10) mov GR_Parameter_TAG = 43
- nop.i 999
-(p10) br.cond.sptk __libm_error_region ;;
-}
-
-{ .mib
-(p8) mov GR_Parameter_TAG = 16
- nop.i 999
-(p8) br.cond.sptk __libm_error_region ;;
+ getf.exp rSignexp_x = fNormX // Must recompute if x unorm
+ fcmp.eq.s0 p6, p0 = f8, f0 // Set D flag
+ br.cond.sptk EXPM1_COMMON
}
-//
-// Report that exp overflowed
-//
-
-{ .mib
-(p12) mov GR_Parameter_TAG = 44
- nop.i 999
-(p12) br.cond.sptk __libm_error_region ;;
-}
-
-{ .mib
-(p11) mov GR_Parameter_TAG = 17
- nop.i 999
-(p11) br.cond.sptk __libm_error_region ;;
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-//
-// Report that exp underflowed
-//
-(p0) br.cond.sptk EXPF_64_RETURN ;;
-}
-EXPF_64_SPECIAL:
+;;
-{ .mfi
- nop.m 999
-(p0) fclass.m.unc p6, p0 = f8, 0x0c3
- nop.i 999
+// here if result will be -1 and inexact, x <= -24.0
+EXPM1_CERTAIN_MINUS_ONE:
+{ .mfb
+ nop.m 0
+ fms.s.s0 f8 = fTmp, fTmp, f1 // Result -1, and Inexact set
+ br.ret.sptk b0
}
+;;
-{ .mfi
- nop.m 999
-(p0) fclass.m.unc p13, p8 = f8, 0x007
- nop.i 999 ;;
-}
+EXPM1_POSSIBLE_OVERFLOW:
-{ .mfi
- nop.m 999
-(p7) fclass.m.unc p14, p0 = f8, 0x007
- nop.i 999
-}
+// Here if fMAX_SGL_NORM_ARG < x < fMIN_SGL_OFLOW_ARG
+// This cannot happen if input is a single, only if input higher precision.
+// Overflow is a possibility, not a certainty.
-{ .mfi
- nop.m 999
-(p0) fclass.m.unc p12, p9 = f8, 0x021
- nop.i 999 ;;
-}
+// Recompute result using status field 2 with user's rounding mode,
+// and wre set. If result is larger than largest single, then we have
+// overflow
{ .mfi
- nop.m 999
-(p0) fclass.m.unc p11, p0 = f8, 0x022
- nop.i 999
+ mov rGt_ln = 0x1007f // Exponent for largest sgl + 1 ulp
+ fsetc.s2 0x7F,0x42 // Get user's round mode, set wre
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p7) fclass.m.unc p10, p0 = f8, 0x022
- nop.i 999 ;;
+ setf.exp fGt_pln = rGt_ln // Create largest single + 1 ulp
+ fma.s.s2 fWre_urm_f8 = fP, fT, fTm1 // Result with wre set
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-//
-// Identify +/- 0, Inf, or -Inf
-// Generate the right kind of NaN.
-//
-(p13) fadd.s.s0 f99 = f0, f1
- nop.i 999 ;;
+ nop.m 0
+ fsetc.s2 0x7F,0x40 // Turn off wre in sf2
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p14) mov f99 = f8
- nop.i 999 ;;
+ nop.m 0
+ fcmp.ge.s1 p6, p0 = fWre_urm_f8, fGt_pln // Test for overflow
+ nop.i 0
}
+;;
{ .mfb
- nop.m 999
-(p6) fadd.s.s0 f99 = f8, f1
-//
-// expf(+/-0) = 1
-// expm1f(+/-0) = +/-0
-// No exceptions raised
-//
-(p6) br.cond.sptk EXPF_64_RETURN ;;
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p14) br.cond.sptk EXPF_64_RETURN ;;
-}
-
-{ .mfi
- nop.m 999
-(p11) mov f99 = f0
- nop.i 999 ;;
+ nop.m 0
+ nop.f 0
+(p6) br.cond.spnt EXPM1_CERTAIN_OVERFLOW // Branch if overflow
}
+;;
{ .mfb
- nop.m 999
-(p10) fsub.s.s1 f99 = f0, f1
-//
-// expf(-Inf) = 0
-// expm1f(-Inf) = -1
-// No exceptions raised.
-//
-(p10) br.cond.sptk EXPF_64_RETURN ;;
+ nop.m 0
+ fma.s.s0 f8 = fP, fT, fTm1
+ br.ret.sptk b0 // Exit if really no overflow
}
+;;
-{ .mfb
- nop.m 999
-(p12) fmpy.s.s1 f99 = f8, f1
-//
-// expf(+Inf) = Inf
-// No exceptions raised.
-//
-(p0) br.cond.sptk EXPF_64_RETURN ;;
+// here if overflow
+EXPM1_CERTAIN_OVERFLOW:
+{ .mmi
+ addl rTmp = 0x1FFFE, r0;;
+ setf.exp fTmp = rTmp
+ nop.i 999
}
-EXPF_64_UNSUPPORTED:
+;;
-{ .mfb
- nop.m 999
-(p0) fmpy.s.s0 f99 = f8, f0
- nop.b 0;;
+{ .mfi
+ alloc r32 = ar.pfs, 0, 3, 4, 0 // get some registers
+ fmerge.s FR_X = fNormX,fNormX
+ nop.i 0
}
-
-EXPF_64_RETURN:
{ .mfb
- nop.m 999
-(p0) mov f8 = f99
-(p0) br.ret.sptk b0
+ mov GR_Parameter_TAG = 43
+ fma.s.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and +INF result
+ br.cond.sptk __libm_error_region
}
-.endp expm1f
-ASM_SIZE_DIRECTIVE(expm1f)
+;;
+GLOBAL_IEEE754_END(expm1f)
-.proc __libm_error_region
-__libm_error_region:
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
- add GR_Parameter_Y=-32,sp // Parameter 2 value
- nop.f 0
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 999
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
- add sp=-64,sp // Create new stack
- nop.f 0
- mov GR_SAVE_GP=gp // Save gp
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
- stfs [GR_Parameter_Y] = FR_Y,16 // Store Parameter 2 on stack
- add GR_Parameter_X = 16,sp // Parameter 1 address
+ stfs [GR_Parameter_Y] = FR_Y,16 // Store Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0 // Save b0
+ mov GR_SAVE_B0=b0 // Save b0
};;
.body
-{ .mib
- stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
- add GR_Parameter_RESULT = 0,GR_Parameter_Y
- nop.b 0 // Parameter 3 address
+{ .mfi
+ stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ nop.f 0
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
}
{ .mib
- stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
- add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support# // Call error handling function
+ stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
};;
+
{ .mmi
- nop.m 0
- nop.m 0
- add GR_Parameter_RESULT = 48,sp
+ add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
};;
+
{ .mmi
- ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
- add sp = 64,sp // Restore stack pointer
- mov b0 = GR_SAVE_B0 // Restore return address
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
- mov gp = GR_SAVE_GP // Restore gp
- mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
- br.ret.sptk b0 // Return
-};;
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
diff --git a/sysdeps/ia64/fpu/s_expm1l.S b/sysdeps/ia64/fpu/s_expm1l.S
index e53d3c8d7c..069856d244 100644
--- a/sysdeps/ia64/fpu/s_expm1l.S
+++ b/sysdeps/ia64/fpu/s_expm1l.S
@@ -1,10 +1,10 @@
-.file "exp_m1l.s"
+.file "expl_m1.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,15 +35,22 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 4/04/00 Unwind support added
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 02/02/00 Initial Version
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
+// 07/07/01 Improved speed of all paths
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align;
+// used data8 for long double table values
+// 03/11/03 Improved accuracy and performance, corrected missing inexact flags
+// 04/17/03 Eliminated misplaced and unused data label
//
-// *********************************************************************
+//*********************************************************************
//
// Function: Combined expl(x) and expm1l(x), where
// x
@@ -51,20 +58,20 @@
// x
// expm1l(x) = e - 1 for double-extended precision x values
//
-// *********************************************************************
+//*********************************************************************
//
// Resources Used:
//
// Floating-Point Registers: f8 (Input and Return Value)
-// f9,f32-f61, f99-f102
+// f9-f15,f32-f77
//
// General Purpose Registers:
-// r32-r61
-// r62-r65 (Used to pass arguments to error handling routine)
+// r14-r38
+// r35-r38 (Used to pass arguments to error handling routine)
//
// Predicate Registers: p6-p15
//
-// *********************************************************************
+//*********************************************************************
//
// IEEE Special Conditions:
//
@@ -74,39 +81,37 @@
// (Error Handling Routine called for overflow and Underflow)
// Inexact raised when appropriate by algorithm
//
-// expl(inf) = inf
-// expl(-inf) = +0
-// expl(SNaN) = QNaN
-// expl(QNaN) = QNaN
-// expl(0) = 1
-// expl(EM_special Values) = QNaN
-// expl(inf) = inf
-// expm1l(-inf) = -1
-// expm1l(SNaN) = QNaN
-// expm1l(QNaN) = QNaN
-// expm1l(0) = 0
-// expm1l(EM_special Values) = QNaN
+// exp(inf) = inf
+// exp(-inf) = +0
+// exp(SNaN) = QNaN
+// exp(QNaN) = QNaN
+// exp(0) = 1
+// exp(EM_special Values) = QNaN
+// exp(inf) = inf
+// expm1(-inf) = -1
+// expm1(SNaN) = QNaN
+// expm1(QNaN) = QNaN
+// expm1(0) = 0
+// expm1(EM_special Values) = QNaN
//
-// *********************************************************************
+//*********************************************************************
//
// Implementation and Algorithm Notes:
//
// ker_exp_64( in_FR : X,
-// in_GR : Flag,
-// in_GR : Expo_Range
// out_FR : Y_hi,
// out_FR : Y_lo,
// out_FR : scale,
// out_PR : Safe )
//
-// On input, X is in register format and
-// Flag = 0 for exp,
-// Flag = 1 for expm1,
+// On input, X is in register format
+// p6 for exp,
+// p7 for expm1,
//
-// On output, provided X and X_cor are real numbers, then
+// On output,
//
-// scale*(Y_hi + Y_lo) approximates expl(X) if Flag is 0
-// scale*(Y_hi + Y_lo) approximates expl(X)-1 if Flag is 1
+// scale*(Y_hi + Y_lo) approximates exp(X) if exp
+// scale*(Y_hi + Y_lo) approximates exp(X)-1 if expm1
//
// The accuracy is sufficient for a highly accurate 64 sig.
// bit implementation. Safe is set if there is no danger of
@@ -122,36 +127,36 @@
// The method consists of three cases.
//
// If |X| < Tiny use case exp_tiny;
-// else if |X| < 2^(-6) use case exp_small;
+// else if |X| < 2^(-m) use case exp_small; m=12 for exp, m=7 for expm1
// else use case exp_regular;
//
// Case exp_tiny:
//
-// 1 + X can be used to approximate expl(X) or expl(X+X_cor);
-// X + X^2/2 can be used to approximate expl(X) - 1
+// 1 + X can be used to approximate exp(X)
+// X + X^2/2 can be used to approximate exp(X) - 1
//
// Case exp_small:
//
-// Here, expl(X), expl(X+X_cor), and expl(X) - 1 can all be
+// Here, exp(X) and exp(X) - 1 can all be
// appproximated by a relatively simple polynomial.
//
// This polynomial resembles the truncated Taylor series
//
-// expl(w) = 1 + w + w^2/2! + w^3/3! + ... + w^n/n!
+// exp(w) = 1 + w + w^2/2! + w^3/3! + ... + w^n/n!
//
// Case exp_regular:
//
// Here we use a table lookup method. The basic idea is that in
-// order to compute expl(X), we accurately decompose X into
+// order to compute exp(X), we accurately decompose X into
//
// X = N * log(2)/(2^12) + r, |r| <= log(2)/2^13.
//
// Hence
//
-// expl(X) = 2^( N / 2^12 ) * expl(r).
+// exp(X) = 2^( N / 2^12 ) * exp(r).
//
// The value 2^( N / 2^12 ) is obtained by simple combinations
-// of values calculated beforehand and stored in table; expl(r)
+// of values calculated beforehand and stored in table; exp(r)
// is approximated by a short polynomial because |r| is small.
//
// We elaborate this method in 4 steps.
@@ -178,13 +183,9 @@
// as a double-precision number; L_lo has 64 significant bits and
// stored as a double-extended number.
//
-// In the case Flag = 2, we further modify r by
-//
-// r := r + X_cor.
-//
// Step 2: Approximation
//
-// expl(r) - 1 is approximated by a short polynomial of the form
+// exp(r) - 1 is approximated by a short polynomial of the form
//
// r + A_1 r^2 + A_2 r^3 + A_3 r^4 .
//
@@ -213,19 +214,19 @@
// Define two mathematical values, delta_1 and delta_2, implicitly
// such that
//
-// T_1 = expl( [M_1 log(2)/2^6] - delta_1 )
-// T_2 = expl( [M_2 log(2)/2^12] - delta_2 )
+// T_1 = exp( [M_1 log(2)/2^6] - delta_1 )
+// T_2 = exp( [M_2 log(2)/2^12] - delta_2 )
//
// are representable as 24 significant bits. To illustrate the idea,
// we show how we define delta_1:
//
-// T_1 := round_to_24_bits( expl( M_1 log(2)/2^6 ) )
+// T_1 := round_to_24_bits( exp( M_1 log(2)/2^6 ) )
// delta_1 = (M_1 log(2)/2^6) - log( T_1 )
//
// The last equality means mathematical equality. We then tabulate
//
-// W_1 := expl(delta_1) - 1
-// W_2 := expl(delta_2) - 1
+// W_1 := exp(delta_1) - 1
+// W_2 := exp(delta_2) - 1
//
// Both in double precision.
//
@@ -235,13 +236,13 @@
// T := T_1 * T_2 ...exactly
// W := W_1 + (1 + W_1)*W_2
//
-// W approximates expl( delta ) - 1 where delta = delta_1 + delta_2.
+// W approximates exp( delta ) - 1 where delta = delta_1 + delta_2.
// The mathematical product of T and (W+1) is an accurate representation
// of 2^(M_1/2^6) * 2^(M_2/2^12).
//
// Step 4. Reconstruction
//
-// Finally, we can reconstruct expl(X), expl(X) - 1.
+// Finally, we can reconstruct exp(X), exp(X) - 1.
// Because
//
// X = K * log(2) + (M_1*log(2)/2^6 - delta_1)
@@ -249,18 +250,18 @@
// + delta_1 + delta_2 + r ...accurately
// We have
//
-// expl(X) ~=~ 2^K * ( T + T*[expl(delta_1+delta_2+r) - 1] )
-// ~=~ 2^K * ( T + T*[expl(delta + r) - 1] )
-// ~=~ 2^K * ( T + T*[(expl(delta)-1)
-// + expl(delta)*(expl(r)-1)] )
+// exp(X) ~=~ 2^K * ( T + T*[exp(delta_1+delta_2+r) - 1] )
+// ~=~ 2^K * ( T + T*[exp(delta + r) - 1] )
+// ~=~ 2^K * ( T + T*[(exp(delta)-1)
+// + exp(delta)*(exp(r)-1)] )
// ~=~ 2^K * ( T + T*( W + (1+W)*poly(r) ) )
// ~=~ 2^K * ( Y_hi + Y_lo )
//
// where Y_hi = T and Y_lo = T*(W + (1+W)*poly(r))
//
-// For expl(X)-1, we have
+// For exp(X)-1, we have
//
-// expl(X)-1 ~=~ 2^K * ( Y_hi + Y_lo ) - 1
+// exp(X)-1 ~=~ 2^K * ( Y_hi + Y_lo ) - 1
// ~=~ 2^K * ( Y_hi + Y_lo - 2^(-K) )
//
// and we combine Y_hi + Y_lo - 2^(-N) into the form of two
@@ -278,7 +279,7 @@
// different rounding directions and a correct setting of the SAFE
// flag.
//
-// If Flag is 1, then
+// If expm1 is 1, then
// SAFE := False ...possibility of underflow
// Scale := 1.0
// Y_hi := X
@@ -296,26 +297,25 @@
//
// Let r = X
//
-// If Flag is not 1 ...i.e. expl( argument )
+// If exp ...i.e. exp( argument )
//
// rsq := r * r;
// r4 := rsq*rsq
// poly_lo := P_3 + r*(P_4 + r*(P_5 + r*P_6))
// poly_hi := r + rsq*(P_1 + r*P_2)
// Y_lo := poly_hi + r4 * poly_lo
-// set lsb(Y_lo) to 1
// Y_hi := 1.0
// Scale := 1.0
//
-// Else ...i.e. expl( argument ) - 1
+// Else ...i.e. exp( argument ) - 1
//
// rsq := r * r
// r4 := rsq * rsq
-// r6 := rsq * r4
-// poly_lo := r6*(Q_5 + r*(Q_6 + r*Q_7))
-// poly_hi := Q_1 + r*(Q_2 + r*(Q_3 + r*Q_4))
-// Y_lo := rsq*poly_hi + poly_lo
-// set lsb(Y_lo) to 1
+// poly_lo := Q_7 + r*(Q_8 + r*Q_9))
+// poly_med:= Q_3 + r*Q_4 + rsq*(Q_5 + r*Q_6)
+// poly_med:= poly_med + r4*poly_lo
+// poly_hi := Q_1 + r*Q_2
+// Y_lo := rsq*(poly_hi + rsq*poly_lo)
// Y_hi := X
// Scale := 1.0
//
@@ -325,14 +325,14 @@
//
// The previous description contain enough information except the
// computation of poly and the final Y_hi and Y_lo in the case for
-// expl(X)-1.
+// exp(X)-1.
//
// The computation of poly for Step 2:
//
// rsq := r*r
// poly := r + rsq*(A_1 + r*(A_2 + r*A_3))
//
-// For the case expl(X) - 1, we need to incorporate 2^(-K) into
+// For the case exp(X) - 1, we need to incorporate 2^(-K) into
// Y_hi and Y_lo at the end of Step 4.
//
// If K > 10 then
@@ -346,72 +346,197 @@
// End If
// End If
//
+//=======================================================
+// General Purpose Registers
+//
+GR_ad_Arg = r14
+GR_ad_A = r15
+GR_sig_inv_ln2 = r15
+GR_rshf_2to51 = r16
+GR_ad_PQ = r16
+GR_ad_Q = r16
+GR_signexp_x = r17
+GR_exp_x = r17
+GR_small_exp = r18
+GR_rshf = r18
+GR_exp_mask = r19
+GR_ad_W1 = r20
+GR_exp_2tom51 = r20
+GR_ad_W2 = r21
+GR_exp_underflow = r21
+GR_M2 = r22
+GR_huge_exp = r22
+GR_M1 = r23
+GR_huge_signif = r23
+GR_K = r24
+GR_one = r24
+GR_minus_one = r24
+GR_exp_bias = r25
+GR_ad_Limits = r26
+GR_N_fix = r26
+GR_exp_2_mk = r26
+GR_ad_P = r27
+GR_exp_2_k = r27
+GR_big_expo_neg = r28
+GR_very_small_exp = r29
+GR_exp_half = r29
+GR_ad_T1 = r30
+GR_ad_T2 = r31
-#include "libm_support.h"
+GR_SAVE_PFS = r32
+GR_SAVE_B0 = r33
+GR_SAVE_GP = r34
+GR_Parameter_X = r35
+GR_Parameter_Y = r36
+GR_Parameter_RESULT = r37
+GR_Parameter_TAG = r38
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
+// Floating Point Registers
+//
+FR_norm_x = f9
+FR_RSHF_2TO51 = f10
+FR_INV_LN2_2TO63 = f11
+FR_W_2TO51_RSH = f12
+FR_2TOM51 = f13
+FR_RSHF = f14
+FR_Y_hi = f34
+FR_Y_lo = f35
+FR_scale = f36
+FR_tmp = f37
+FR_float_N = f38
+FR_N_signif = f39
+FR_L_hi = f40
+FR_L_lo = f41
+FR_r = f42
+FR_W1 = f43
+FR_T1 = f44
+FR_W2 = f45
+FR_T2 = f46
+FR_W1_p1 = f47
+FR_rsq = f48
+FR_A2 = f49
+FR_r4 = f50
+FR_A3 = f51
+FR_poly = f52
+FR_T = f53
+FR_W = f54
+FR_Wp1 = f55
+FR_p21 = f59
+FR_p210 = f59
+FR_p65 = f60
+FR_p654 = f60
+FR_p6543 = f60
+FR_2_mk = f61
+FR_P4Q7 = f61
+FR_P4 = f61
+FR_Q7 = f61
+FR_P3Q6 = f62
+FR_P3 = f62
+FR_Q6 = f62
+FR_q65 = f62
+FR_q6543 = f62
+FR_P2Q5 = f63
+FR_P2 = f63
+FR_Q5 = f63
+FR_P1Q4 = f64
+FR_P1 = f64
+FR_Q4 = f64
+FR_q43 = f64
+FR_Q3 = f65
+FR_Q2 = f66
+FR_q21 = f66
+FR_Q1 = f67
+FR_A1 = f68
+FR_P6Q9 = f68
+FR_P6 = f68
+FR_Q9 = f68
+FR_P5Q8 = f69
+FR_P5 = f69
+FR_Q8 = f69
+FR_q987 = f69
+FR_q98 = f69
+FR_q9876543 = f69
+FR_min_oflow_x = f70
+FR_huge_exp = f70
+FR_zero_uflow_x = f71
+FR_huge_signif = f71
+FR_huge = f72
+FR_small = f72
+FR_half = f73
+FR_T_scale = f74
+FR_result_lo = f75
+FR_W_T_scale = f76
+FR_Wp1_T_scale = f77
+FR_ftz = f77
+FR_half_x = f77
+//
-.align 64
-Constants_exp_64_Arg:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_Arg,@object)
-data4 0x5C17F0BC,0xB8AA3B29,0x0000400B,0x00000000
-data4 0x00000000,0xB17217F4,0x00003FF2,0x00000000
-data4 0xF278ECE6,0xF473DE6A,0x00003FD4,0x00000000
-// /* Inv_L, L_hi, L_lo */
-ASM_SIZE_DIRECTIVE(Constants_exp_64_Arg)
+FR_X = f9
+FR_Y = f0
+FR_RESULT = f15
-.align 64
-Constants_exp_64_Exponents:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_Exponents,@object)
-data4 0x0000007E,0x00000000,0xFFFFFF83,0xFFFFFFFF
-data4 0x000003FE,0x00000000,0xFFFFFC03,0xFFFFFFFF
-data4 0x00003FFE,0x00000000,0xFFFFC003,0xFFFFFFFF
-data4 0x00003FFE,0x00000000,0xFFFFC003,0xFFFFFFFF
-data4 0xFFFFFFE2,0xFFFFFFFF,0xFFFFFFC4,0xFFFFFFFF
-data4 0xFFFFFFBA,0xFFFFFFFF,0xFFFFFFBA,0xFFFFFFFF
-ASM_SIZE_DIRECTIVE(Constants_exp_64_Exponents)
+// ************* DO NOT CHANGE ORDER OF THESE TABLES ********************
-.align 64
-Constants_exp_64_A:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_A,@object)
-data4 0xB1B736A0,0xAAAAAAAB,0x00003FFA,0x00000000
-data4 0x90CD6327,0xAAAAAAAB,0x00003FFC,0x00000000
-data4 0xFFFFFFFF,0xFFFFFFFF,0x00003FFD,0x00000000
-// /* Reversed */
-ASM_SIZE_DIRECTIVE(Constants_exp_64_A)
+// double-extended 1/ln(2)
+// 3fff b8aa 3b29 5c17 f0bb be87fed0691d3e88
+// 3fff b8aa 3b29 5c17 f0bc
+// For speed the significand will be loaded directly with a movl and setf.sig
+// and the exponent will be bias+63 instead of bias+0. Thus subsequent
+// computations need to scale appropriately.
+// The constant 2^12/ln(2) is needed for the computation of N. This is also
+// obtained by scaling the computations.
+//
+// Two shifting constants are loaded directly with movl and setf.d.
+// 1. RSHF_2TO51 = 1.1000..00 * 2^(63-12)
+// This constant is added to x*1/ln2 to shift the integer part of
+// x*2^12/ln2 into the rightmost bits of the significand.
+// The result of this fma is N_signif.
+// 2. RSHF = 1.1000..00 * 2^(63)
+// This constant is subtracted from N_signif * 2^(-51) to give
+// the integer part of N, N_fix, as a floating-point number.
+// The result of this fms is float_N.
+RODATA
.align 64
-Constants_exp_64_P:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_P,@object)
-data4 0x43914A8A,0xD00D6C81,0x00003FF2,0x00000000
-data4 0x30304B30,0xB60BC4AC,0x00003FF5,0x00000000
-data4 0x7474C518,0x88888888,0x00003FF8,0x00000000
-data4 0x8DAE729D,0xAAAAAAAA,0x00003FFA,0x00000000
-data4 0xAAAAAF61,0xAAAAAAAA,0x00003FFC,0x00000000
-data4 0x000004C7,0x80000000,0x00003FFE,0x00000000
-// /* Reversed */
-ASM_SIZE_DIRECTIVE(Constants_exp_64_P)
+LOCAL_OBJECT_START(Constants_exp_64_Arg)
+//data8 0xB8AA3B295C17F0BC,0x0000400B // Inv_L = 2^12/log(2)
+data8 0xB17217F400000000,0x00003FF2 // L_hi = hi part log(2)/2^12
+data8 0xF473DE6AF278ECE6,0x00003FD4 // L_lo = lo part log(2)/2^12
+LOCAL_OBJECT_END(Constants_exp_64_Arg)
-.align 64
-Constants_exp_64_Q:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_Q,@object)
-data4 0xA49EF6CA,0xD00D56F7,0x00003FEF,0x00000000
-data4 0x1C63493D,0xD00D59AB,0x00003FF2,0x00000000
-data4 0xFB50CDD2,0xB60B60B5,0x00003FF5,0x00000000
-data4 0x7BA68DC8,0x88888888,0x00003FF8,0x00000000
-data4 0xAAAAAC8D,0xAAAAAAAA,0x00003FFA,0x00000000
-data4 0xAAAAACCA,0xAAAAAAAA,0x00003FFC,0x00000000
-data4 0x00000000,0x80000000,0x00003FFE,0x00000000
-// /* Reversed */
-ASM_SIZE_DIRECTIVE(Constants_exp_64_Q)
+LOCAL_OBJECT_START(Constants_exp_64_Limits)
+data8 0xb17217f7d1cf79ac,0x0000400c // Smallest long dbl oflow x
+data8 0xb220000000000000,0x0000c00c // Small long dbl uflow zero x
+LOCAL_OBJECT_END(Constants_exp_64_Limits)
-.align 64
-Constants_exp_64_T1:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_T1,@object)
+LOCAL_OBJECT_START(Constants_exp_64_A)
+data8 0xAAAAAAABB1B736A0,0x00003FFA // A3
+data8 0xAAAAAAAB90CD6327,0x00003FFC // A2
+data8 0xFFFFFFFFFFFFFFFF,0x00003FFD // A1
+LOCAL_OBJECT_END(Constants_exp_64_A)
+
+LOCAL_OBJECT_START(Constants_exp_64_P)
+data8 0xD00D6C8143914A8A,0x00003FF2 // P6
+data8 0xB60BC4AC30304B30,0x00003FF5 // P5
+data8 0x888888887474C518,0x00003FF8 // P4
+data8 0xAAAAAAAA8DAE729D,0x00003FFA // P3
+data8 0xAAAAAAAAAAAAAF61,0x00003FFC // P2
+data8 0x80000000000004C7,0x00003FFE // P1
+LOCAL_OBJECT_END(Constants_exp_64_P)
+
+LOCAL_OBJECT_START(Constants_exp_64_Q)
+data8 0x93F2AC5F7471F32E, 0x00003FE9 // Q9
+data8 0xB8DA0F3550B3E764, 0x00003FEC // Q8
+data8 0xD00D00D0028E89C4, 0x00003FEF // Q7
+data8 0xD00D00DAEB8C4E91, 0x00003FF2 // Q6
+data8 0xB60B60B60B60B6F5, 0x00003FF5 // Q5
+data8 0x888888888886CC23, 0x00003FF8 // Q4
+data8 0xAAAAAAAAAAAAAAAB, 0x00003FFA // Q3
+data8 0xAAAAAAAAAAAAAAAB, 0x00003FFC // Q2
+data8 0x8000000000000000, 0x00003FFE // Q1
+LOCAL_OBJECT_END(Constants_exp_64_Q)
+
+LOCAL_OBJECT_START(Constants_exp_64_T1)
data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29
data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5
data4 0x3F8B95C2,0x3F8D1ADF,0x3F8EA43A,0x3F9031DC
@@ -428,11 +553,9 @@ data4 0x3FD744FD,0x3FD99D16,0x3FDBFBB8,0x3FDE60F5
data4 0x3FE0CCDF,0x3FE33F89,0x3FE5B907,0x3FE8396A
data4 0x3FEAC0C7,0x3FED4F30,0x3FEFE4BA,0x3FF28177
data4 0x3FF5257D,0x3FF7D0DF,0x3FFA83B3,0x3FFD3E0C
-ASM_SIZE_DIRECTIVE(Constants_exp_64_T1)
+LOCAL_OBJECT_END(Constants_exp_64_T1)
-.align 64
-Constants_exp_64_T2:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_T2,@object)
+LOCAL_OBJECT_START(Constants_exp_64_T2)
data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4
data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7
data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E
@@ -449,1124 +572,824 @@ data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07
data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269
data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE
data4 0x3F814E68,0x3F815402,0x3F81599C,0x3F815F37
-ASM_SIZE_DIRECTIVE(Constants_exp_64_T2)
+LOCAL_OBJECT_END(Constants_exp_64_T2)
-.align 64
-Constants_exp_64_W1:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_W1,@object)
-data4 0x00000000,0x00000000,0x171EC4B4,0xBE384454
-data4 0x4AA72766,0xBE694741,0xD42518F8,0xBE5D32B6
-data4 0x3A319149,0x3E68D96D,0x62415F36,0xBE68F4DA
-data4 0xC9C86A3B,0xBE6DDA2F,0xF49228FE,0x3E6B2E50
-data4 0x1188B886,0xBE49C0C2,0x1A4C2F1F,0x3E64BFC2
-data4 0x2CB98B54,0xBE6A2FBB,0x9A55D329,0x3E5DC5DE
-data4 0x39A7AACE,0x3E696490,0x5C66DBA5,0x3E54728B
-data4 0xBA1C7D7D,0xBE62B0DB,0x09F1AF5F,0x3E576E04
-data4 0x1A0DD6A1,0x3E612500,0x795FBDEF,0xBE66A419
-data4 0xE1BD41FC,0xBE5CDE8C,0xEA54964F,0xBE621376
-data4 0x476E76EE,0x3E6370BE,0x3427EB92,0x3E390D1A
-data4 0x2BF82BF8,0x3E1336DE,0xD0F7BD9E,0xBE5FF1CB
-data4 0x0CEB09DD,0xBE60A355,0x0980F30D,0xBE5CA37E
-data4 0x4C082D25,0xBE5C541B,0x3B467D29,0xBE5BBECA
-data4 0xB9D946C5,0xBE400D8A,0x07ED374A,0xBE5E2A08
-data4 0x365C8B0A,0xBE66CB28,0xD3403BCA,0x3E3AAD5B
-data4 0xC7EA21E0,0x3E526055,0xE72880D6,0xBE442C75
-data4 0x85222A43,0x3E58B2BB,0x522C42BF,0xBE5AAB79
-data4 0x469DC2BC,0xBE605CB4,0xA48C40DC,0xBE589FA7
-data4 0x1AA42614,0xBE51C214,0xC37293F4,0xBE48D087
-data4 0xA2D673E0,0x3E367A1C,0x114F7A38,0xBE51BEBB
-data4 0x661A4B48,0xBE6348E5,0x1D3B9962,0xBDF52643
-data4 0x35A78A53,0x3E3A3B5E,0x1CECD788,0xBE46C46C
-data4 0x7857D689,0xBE60B7EC,0xD14F1AD7,0xBE594D3D
-data4 0x4C9A8F60,0xBE4F9C30,0x02DFF9D2,0xBE521873
-data4 0x55E6D68F,0xBE5E4C88,0x667F3DC4,0xBE62140F
-data4 0x3BF88747,0xBE36961B,0xC96EC6AA,0x3E602861
-data4 0xD57FD718,0xBE3B5151,0xFC4A627B,0x3E561CD0
-data4 0xCA913FEA,0xBE3A5217,0x9A5D193A,0x3E40A3CC
-data4 0x10A9C312,0xBE5AB713,0xC5F57719,0x3E4FDADB
-data4 0xDBDF59D5,0x3E361428,0x61B4180D,0x3E5DB5DB
-data4 0x7408D856,0xBE42AD5F,0x31B2B707,0x3E2A3148
-ASM_SIZE_DIRECTIVE(Constants_exp_64_W1)
+LOCAL_OBJECT_START(Constants_exp_64_W1)
+data8 0x0000000000000000, 0xBE384454171EC4B4
+data8 0xBE6947414AA72766, 0xBE5D32B6D42518F8
+data8 0x3E68D96D3A319149, 0xBE68F4DA62415F36
+data8 0xBE6DDA2FC9C86A3B, 0x3E6B2E50F49228FE
+data8 0xBE49C0C21188B886, 0x3E64BFC21A4C2F1F
+data8 0xBE6A2FBB2CB98B54, 0x3E5DC5DE9A55D329
+data8 0x3E69649039A7AACE, 0x3E54728B5C66DBA5
+data8 0xBE62B0DBBA1C7D7D, 0x3E576E0409F1AF5F
+data8 0x3E6125001A0DD6A1, 0xBE66A419795FBDEF
+data8 0xBE5CDE8CE1BD41FC, 0xBE621376EA54964F
+data8 0x3E6370BE476E76EE, 0x3E390D1A3427EB92
+data8 0x3E1336DE2BF82BF8, 0xBE5FF1CBD0F7BD9E
+data8 0xBE60A3550CEB09DD, 0xBE5CA37E0980F30D
+data8 0xBE5C541B4C082D25, 0xBE5BBECA3B467D29
+data8 0xBE400D8AB9D946C5, 0xBE5E2A0807ED374A
+data8 0xBE66CB28365C8B0A, 0x3E3AAD5BD3403BCA
+data8 0x3E526055C7EA21E0, 0xBE442C75E72880D6
+data8 0x3E58B2BB85222A43, 0xBE5AAB79522C42BF
+data8 0xBE605CB4469DC2BC, 0xBE589FA7A48C40DC
+data8 0xBE51C2141AA42614, 0xBE48D087C37293F4
+data8 0x3E367A1CA2D673E0, 0xBE51BEBB114F7A38
+data8 0xBE6348E5661A4B48, 0xBDF526431D3B9962
+data8 0x3E3A3B5E35A78A53, 0xBE46C46C1CECD788
+data8 0xBE60B7EC7857D689, 0xBE594D3DD14F1AD7
+data8 0xBE4F9C304C9A8F60, 0xBE52187302DFF9D2
+data8 0xBE5E4C8855E6D68F, 0xBE62140F667F3DC4
+data8 0xBE36961B3BF88747, 0x3E602861C96EC6AA
+data8 0xBE3B5151D57FD718, 0x3E561CD0FC4A627B
+data8 0xBE3A5217CA913FEA, 0x3E40A3CC9A5D193A
+data8 0xBE5AB71310A9C312, 0x3E4FDADBC5F57719
+data8 0x3E361428DBDF59D5, 0x3E5DB5DB61B4180D
+data8 0xBE42AD5F7408D856, 0x3E2A314831B2B707
+LOCAL_OBJECT_END(Constants_exp_64_W1)
-.align 64
-Constants_exp_64_W2:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_W2,@object)
-data4 0x00000000,0x00000000,0x37A3D7A2,0xBE641F25
-data4 0xAD028C40,0xBE68DD57,0xF212B1B6,0xBE5C77D8
-data4 0x1BA5B070,0x3E57878F,0x2ECAE6FE,0xBE55A36A
-data4 0x569DFA3B,0xBE620608,0xA6D300A3,0xBE53B50E
-data4 0x223F8F2C,0x3E5B5EF2,0xD6DE0DF4,0xBE56A0D9
-data4 0xEAE28F51,0xBE64EEF3,0x367EA80B,0xBE5E5AE2
-data4 0x5FCBC02D,0x3E47CB1A,0x9BDAFEB7,0xBE656BA0
-data4 0x805AFEE7,0x3E6E70C6,0xA3415EBA,0xBE6E0509
-data4 0x49BFF529,0xBE56856B,0x00508651,0x3E66DD33
-data4 0xC114BC13,0x3E51165F,0xC453290F,0x3E53333D
-data4 0x05539FDA,0x3E6A072B,0x7C0A7696,0xBE47CD87
-data4 0xEB05C6D9,0xBE668BF4,0x6AE86C93,0xBE67C3E3
-data4 0xD0B3E84B,0xBE533904,0x556B53CE,0x3E63E8D9
-data4 0x63A98DC8,0x3E212C89,0x032A7A22,0xBE33138F
-data4 0xBC584008,0x3E530FA9,0xCCB93C97,0xBE6ADF82
-data4 0x8370EA39,0x3E5F9113,0xFB6A05D8,0x3E5443A4
-data4 0x181FEE7A,0x3E63DACD,0xF0F67DEC,0xBE62B29D
-data4 0x3DDE6307,0x3E65C483,0xD40A24C1,0x3E5BF030
-data4 0x14E437BE,0x3E658B8F,0xED98B6C7,0xBE631C29
-data4 0x04CF7C71,0x3E6335D2,0xE954A79D,0x3E529EED
-data4 0xF64A2FB8,0x3E5D9257,0x854ED06C,0xBE6BED1B
-data4 0xD71405CB,0x3E5096F6,0xACB9FDF5,0xBE3D4893
-data4 0x01B68349,0xBDFEB158,0xC6A463B9,0x3E628D35
-data4 0xADE45917,0xBE559725,0x042FC476,0xBE68C29C
-data4 0x01E511FA,0xBE67593B,0x398801ED,0xBE4A4313
-data4 0xDA7C3300,0x3E699571,0x08062A9E,0x3E5349BE
-data4 0x755BB28E,0x3E5229C4,0x77A1F80D,0x3E67E426
-data4 0x6B69C352,0xBE52B33F,0x084DA57F,0xBE6B3550
-data4 0xD1D09A20,0xBE6DB03F,0x2161B2C1,0xBE60CBC4
-data4 0x78A2B771,0x3E56ED9C,0x9D0FA795,0xBE508E31
-data4 0xFD1A54E9,0xBE59482A,0xB07FD23E,0xBE2A17CE
-data4 0x17365712,0x3E68BF5C,0xB3785569,0x3E3956F9
-ASM_SIZE_DIRECTIVE(Constants_exp_64_W2)
-
-GR_SAVE_PFS = r59
-GR_SAVE_B0 = r60
-GR_SAVE_GP = r61
-GR_Parameter_X = r62
-GR_Parameter_Y = r63
-GR_Parameter_RESULT = r64
-GR_Parameter_TAG = r65
+LOCAL_OBJECT_START(Constants_exp_64_W2)
+data8 0x0000000000000000, 0xBE641F2537A3D7A2
+data8 0xBE68DD57AD028C40, 0xBE5C77D8F212B1B6
+data8 0x3E57878F1BA5B070, 0xBE55A36A2ECAE6FE
+data8 0xBE620608569DFA3B, 0xBE53B50EA6D300A3
+data8 0x3E5B5EF2223F8F2C, 0xBE56A0D9D6DE0DF4
+data8 0xBE64EEF3EAE28F51, 0xBE5E5AE2367EA80B
+data8 0x3E47CB1A5FCBC02D, 0xBE656BA09BDAFEB7
+data8 0x3E6E70C6805AFEE7, 0xBE6E0509A3415EBA
+data8 0xBE56856B49BFF529, 0x3E66DD3300508651
+data8 0x3E51165FC114BC13, 0x3E53333DC453290F
+data8 0x3E6A072B05539FDA, 0xBE47CD877C0A7696
+data8 0xBE668BF4EB05C6D9, 0xBE67C3E36AE86C93
+data8 0xBE533904D0B3E84B, 0x3E63E8D9556B53CE
+data8 0x3E212C8963A98DC8, 0xBE33138F032A7A22
+data8 0x3E530FA9BC584008, 0xBE6ADF82CCB93C97
+data8 0x3E5F91138370EA39, 0x3E5443A4FB6A05D8
+data8 0x3E63DACD181FEE7A, 0xBE62B29DF0F67DEC
+data8 0x3E65C4833DDE6307, 0x3E5BF030D40A24C1
+data8 0x3E658B8F14E437BE, 0xBE631C29ED98B6C7
+data8 0x3E6335D204CF7C71, 0x3E529EEDE954A79D
+data8 0x3E5D9257F64A2FB8, 0xBE6BED1B854ED06C
+data8 0x3E5096F6D71405CB, 0xBE3D4893ACB9FDF5
+data8 0xBDFEB15801B68349, 0x3E628D35C6A463B9
+data8 0xBE559725ADE45917, 0xBE68C29C042FC476
+data8 0xBE67593B01E511FA, 0xBE4A4313398801ED
+data8 0x3E699571DA7C3300, 0x3E5349BE08062A9E
+data8 0x3E5229C4755BB28E, 0x3E67E42677A1F80D
+data8 0xBE52B33F6B69C352, 0xBE6B3550084DA57F
+data8 0xBE6DB03FD1D09A20, 0xBE60CBC42161B2C1
+data8 0x3E56ED9C78A2B771, 0xBE508E319D0FA795
+data8 0xBE59482AFD1A54E9, 0xBE2A17CEB07FD23E
+data8 0x3E68BF5C17365712, 0x3E3956F9B3785569
+LOCAL_OBJECT_END(Constants_exp_64_W2)
-FR_X = f9
-FR_Y = f9
-FR_RESULT = f99
.section .text
-.proc expm1l#
-.global expm1l#
-.align 64
-expm1l:
-#ifdef _LIBC
-.global __expm1l#
-__expm1l:
-#endif
-{ .mii
-alloc r32 = ar.pfs,0,30,4,0
-(p0) add r33 = 1, r0
-(p0) cmp.eq.unc p7, p0 = r0, r0
-}
-{ .mbb
- nop.m 999
-(p0) br.cond.sptk exp_continue
- nop.b 999 ;;
-}
+
+GLOBAL_IEEE754_ENTRY(expm1l)
//
-// Set p7 true for expm1
-// Set Flag = r33 = 1 for expm1
+// Set p7 true for expm1, p6 false
//
-.endp expm1l
-ASM_SIZE_DIRECTIVE(expm1l)
-
-#ifdef _LIBC
-libm_hidden_def (__expm1l)
-#endif
-
-.section .text
-.proc expl#
-.global expl#
-.align 64
-expl:
-#ifdef _LIBC
-.global __ieee754_expl#
-__ieee754_expl:
-#endif
-{ .mii
-alloc r32 = ar.pfs,0,30,4,0
-(p0) add r33 = r0, r0
-(p0) cmp.eq.unc p0, p7 = r0, r0 ;;
+{ .mlx
+ getf.exp GR_signexp_x = f8 // Get sign and exponent of x, redo if unorm
+ movl GR_sig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2
}
-exp_continue:
-{ .mfi
-(p0) add r32 = 2,r0
-(p0) fnorm.s1 f9 = f8
- nop.i 0
+{ .mlx
+ addl GR_ad_Arg = @ltoff(Constants_exp_64_Arg#),gp
+ movl GR_rshf_2to51 = 0x4718000000000000 // 1.10000 2^(63+51)
}
+;;
+
{ .mfi
-(p0) nop.m 0
+ ld8 GR_ad_Arg = [GR_ad_Arg] // Point to Arg table
+ fclass.m p8, p0 = f8, 0x1E7 // Test x for natval, nan, inf, zero
+ cmp.eq p7, p6 = r0, r0
+}
+{ .mfb
+ mov GR_exp_half = 0x0FFFE // Exponent of 0.5, for very small path
+ fnorm.s1 FR_norm_x = f8 // Normalize x
+ br.cond.sptk exp_continue
+}
+;;
+
+GLOBAL_IEEE754_END(expm1l)
+
+GLOBAL_IEEE754_ENTRY(expl)
//
-// Set p7 false for exp
-// Set Flag = r33 = 0 for exp
+// Set p7 false for exp, p6 true
//
-(p0) fclass.m.unc p6, p8 = f8, 0x1E7
- nop.i 0;;
+{ .mlx
+ getf.exp GR_signexp_x = f8 // Get sign and exponent of x, redo if unorm
+ movl GR_sig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2
}
+{ .mlx
+ addl GR_ad_Arg = @ltoff(Constants_exp_64_Arg#),gp
+ movl GR_rshf_2to51 = 0x4718000000000000 // 1.10000 2^(63+51)
+}
+;;
+
{ .mfi
- nop.m 999
-(p0) fclass.nm.unc p9, p0 = f8, 0x1FF
- nop.i 0
+ ld8 GR_ad_Arg = [GR_ad_Arg] // Point to Arg table
+ fclass.m p8, p0 = f8, 0x1E7 // Test x for natval, nan, inf, zero
+ cmp.eq p6, p7 = r0, r0
}
{ .mfi
- nop.m 999
-(p0) mov f36 = f1
- nop.i 999 ;;
+ mov GR_exp_half = 0x0FFFE // Exponent of 0.5, for very small path
+ fnorm.s1 FR_norm_x = f8 // Normalize x
+ nop.i 999
}
-{ .mfb
- nop.m 999
-//
-// Identify NatVals, NaNs, Infs, and Zeros.
-// Identify EM unsupporteds.
-// Save special input registers
-(p0) mov f32 = f0
-//
-// Create FR_X_cor = 0.0
-// GR_Flag = 0
-// GR_Expo_Range = 2 (r32) for double-extended precision
-// FR_Scale = 1.0
-//
-(p6) br.cond.spnt EXPL_64_SPECIAL ;;
+;;
+
+exp_continue:
+// Form two constants we need
+// 1/ln2 * 2^63 to compute w = x * 1/ln2 * 128
+// 1.1000..000 * 2^(63+63-12) to right shift int(N) into the significand
+
+{ .mfi
+ setf.sig FR_INV_LN2_2TO63 = GR_sig_inv_ln2 // form 1/ln2 * 2^63
+ fclass.nm.unc p9, p0 = f8, 0x1FF // Test x for unsupported
+ mov GR_exp_2tom51 = 0xffff-51
+}
+{ .mlx
+ setf.d FR_RSHF_2TO51 = GR_rshf_2to51 // Form const 1.1000 * 2^(63+51)
+ movl GR_rshf = 0x43e8000000000000 // 1.10000 2^63 for right shift
+}
+;;
+
+{ .mfi
+ setf.exp FR_half = GR_exp_half // Form 0.5 for very small path
+ fma.s1 FR_scale = f1,f1,f0 // Scale = 1.0
+ mov GR_exp_bias = 0x0FFFF // Set exponent bias
}
{ .mib
- nop.m 999
- nop.i 999
-(p9) br.cond.spnt EXPL_64_UNSUPPORTED ;;
+ add GR_ad_Limits = 0x20, GR_ad_Arg // Point to Limits table
+ mov GR_exp_mask = 0x1FFFF // Form exponent mask
+(p8) br.cond.spnt EXP_64_SPECIAL // Branch if natval, nan, inf, zero
}
+;;
+
{ .mfi
-(p0) cmp.ne.unc p12, p13 = 0x01, r33
-//
-// Branch out for special input values
-//
-(p0) fcmp.lt.unc.s0 p9,p0 = f8, f0
-(p0) cmp.eq.unc p15, p0 = r0, r0
+ setf.exp FR_2TOM51 = GR_exp_2tom51 // Form 2^-51 for scaling float_N
+ nop.f 999
+ add GR_ad_A = 0x40, GR_ad_Arg // Point to A table
}
-{ .mmi
- nop.m 999
-//
-// Raise possible denormal operand exception
-// Normalize x
-//
-// This function computes expl( x + x_cor)
-// Input FR 1: FR_X
-// Input FR 2: FR_X_cor
-// Input GR 1: GR_Flag
-// Input GR 2: GR_Expo_Range
-// Output FR 3: FR_Y_hi
-// Output FR 4: FR_Y_lo
-// Output FR 5: FR_Scale
-// Output PR 1: PR_Safe
-(p0) addl r34 = @ltoff(Constants_exp_64_Arg#),gp
-(p0) addl r40 = @ltoff(Constants_exp_64_W1#),gp
-};;
-//
-// Prepare to load constants
-// Set Safe = True
-//
+{ .mib
+ setf.d FR_RSHF = GR_rshf // Form right shift const 1.1000 * 2^63
+ add GR_ad_T1 = 0x160, GR_ad_Arg // Point to T1 table
+(p9) br.cond.spnt EXP_64_UNSUPPORTED // Branch if unsupported
+}
+;;
-{ .mmi
- ld8 r34 = [r34]
- ld8 r40 = [r40]
-(p0) addl r41 = @ltoff(Constants_exp_64_W2#),gp
+.pred.rel "mutex",p6,p7
+{ .mfi
+ ldfe FR_L_hi = [GR_ad_Arg],16 // Get L_hi
+ fcmp.eq.s0 p9,p0 = f8, f0 // Dummy op to flag denormals
+(p6) add GR_ad_PQ = 0x30, GR_ad_A // Point to P table for exp
+}
+{ .mfi
+ ldfe FR_min_oflow_x = [GR_ad_Limits],16 // Get min x to cause overflow
+ fmpy.s1 FR_rsq = f8, f8 // rsq = x * x for small path
+(p7) add GR_ad_PQ = 0x90, GR_ad_A // Point to Q table for expm1
};;
{ .mmi
-(p0) ldfe f37 = [r34],16
-(p0) ld8 r41 = [r41] ;;
+ ldfe FR_L_lo = [GR_ad_Arg],16 // Get L_lo
+ ldfe FR_zero_uflow_x = [GR_ad_Limits],16 // Get x for zero uflow result
+ add GR_ad_W1 = 0x200, GR_ad_T1 // Point to W1 table
}
+;;
-//
-// N = fcvt.fx(float_N)
-// Set p14 if -6 > expo_X
-//
-//
-// Bias = 0x0FFFF
-// expo_X = expo_X and Mask
-//
-
-{ .mmi
-(p0) ldfe f40 = [r34],16
- nop.m 999
-//
-// Load L_lo
-// Set p10 if 14 < expo_X
-//
-(p0) addl r50 = @ltoff(Constants_exp_64_T1#),gp
+{ .mfi
+ ldfe FR_P6Q9 = [GR_ad_PQ],16 // P6(exp) or Q9(expm1) for small path
+ mov FR_r = FR_norm_x // r = X for small path
+ mov GR_very_small_exp = -60 // Exponent of x for very small path
}
-{ .mmi
- nop.m 999
- nop.m 999
-(p0) addl r51 = @ltoff(Constants_exp_64_T2#),gp ;;
+{ .mfi
+ add GR_ad_W2 = 0x400, GR_ad_T1 // Point to W2 table
+ nop.f 999
+(p7) mov GR_small_exp = -7 // Exponent of x for small path expm1
}
-//
-// Load W2_ptr
-// Branch to SMALL is expo_X < -6
-//
+;;
-{.mmi
-(p0) ld8 r50 = [r50]
-(p0) ld8 r51 = [r51]
-};;
+{ .mmi
+ ldfe FR_P5Q8 = [GR_ad_PQ],16 // P5(exp) or Q8(expm1) for small path
+ and GR_exp_x = GR_signexp_x, GR_exp_mask
+(p6) mov GR_small_exp = -12 // Exponent of x for small path exp
+}
+;;
-{ .mlx
-(p0) ldfe f41 = [r34],16
-//
-// float_N = X * L_Inv
-// expo_X = exponent of X
-// Mask = 0x1FFFF
-//
-(p0) movl r58 = 0x0FFFF
+// N_signif = X * Inv_log2_by_2^12
+// By adding 1.10...0*2^63 we shift and get round_int(N_signif) in significand.
+// We actually add 1.10...0*2^51 to X * Inv_log2 to do the same thing.
+{ .mfi
+ ldfe FR_P4Q7 = [GR_ad_PQ],16 // P4(exp) or Q7(expm1) for small path
+ fma.s1 FR_N_signif = FR_norm_x, FR_INV_LN2_2TO63, FR_RSHF_2TO51
+ nop.i 999
}
-{ .mlx
- nop.m 999
-(p0) movl r39 = 0x1FFFF ;;
+{ .mfi
+ sub GR_exp_x = GR_exp_x, GR_exp_bias // Get exponent
+ fmpy.s1 FR_r4 = FR_rsq, FR_rsq // Form r4 for small path
+ cmp.eq.unc p15, p0 = r0, r0 // Set Safe as default
}
+;;
+
{ .mmi
-(p0) getf.exp r37 = f9
- nop.m 999
-(p0) addl r34 = @ltoff(Constants_exp_64_Exponents#),gp ;;
+ ldfe FR_P3Q6 = [GR_ad_PQ],16 // P3(exp) or Q6(expm1) for small path
+ cmp.lt p14, p0 = GR_exp_x, GR_very_small_exp // Is |x| < 2^-60?
+ nop.i 999
}
-{ .mii
-(p0) ld8 r34 = [r34]
- nop.i 999
-(p0) and r37 = r37, r39 ;;
+;;
+
+{ .mfi
+ ldfe FR_P2Q5 = [GR_ad_PQ],16 // P2(exp) or Q5(expm1) for small path
+ fmpy.s1 FR_half_x = FR_half, FR_norm_x // 0.5 * x for very small path
+ cmp.lt p13, p0 = GR_exp_x, GR_small_exp // Is |x| < 2^-m?
}
-{ .mmi
-(p0) sub r37 = r37, r58 ;;
-(p0) cmp.gt.unc p14, p0 = -6, r37
-(p0) cmp.lt.unc p10, p0 = 14, r37 ;;
+{ .mib
+ nop.m 999
+ nop.i 999
+(p14) br.cond.spnt EXP_VERY_SMALL // Branch if |x| < 2^-60
}
+;;
+
{ .mfi
-(p0) nop.m 0
-//
-// Load L_inv
-// Set p12 true for Flag = 0 (exp)
-// Set p13 true for Flag = 1 (expm1)
-//
-(p0) fmpy.s1 f38 = f9, f37
- nop.i 999 ;;
+ ldfe FR_A3 = [GR_ad_A],16 // Get A3 for normal path
+ fcmp.ge.s1 p10,p0 = FR_norm_x, FR_min_oflow_x // Will result overflow?
+ mov GR_big_expo_neg = -16381 // -0x3ffd
}
{ .mfb
- nop.m 999
-//
-// Load L_hi
-// expo_X = expo_X - Bias
-// get W1_ptr
-//
-(p0) fcvt.fx.s1 f39 = f38
-(p14) br.cond.spnt EXPL_SMALL ;;
+ ldfe FR_P1Q4 = [GR_ad_PQ],16 // P1(exp) or Q4(expm1) for small path
+ nop.f 999
+(p13) br.cond.spnt EXP_SMALL // Branch if |x| < 2^-m
+ // m=12 for exp, m=7 for expm1
}
-{ .mib
- nop.m 999
- nop.i 999
-(p10) br.cond.spnt EXPL_HUGE ;;
+;;
+
+// Now we are on the main path for |x| >= 2^-m, m=12 for exp, m=7 for expm1
+//
+// float_N = round_int(N_signif)
+// The signficand of N_signif contains the rounded integer part of X * 2^12/ln2,
+// as a twos complement number in the lower bits (that is, it may be negative).
+// That twos complement number (called N) is put into GR_N.
+
+// Since N_signif is scaled by 2^51, it must be multiplied by 2^-51
+// before the shift constant 1.10000 * 2^63 is subtracted to yield float_N.
+// Thus, float_N contains the floating point version of N
+
+
+{ .mfi
+ ldfe FR_A2 = [GR_ad_A],16 // Get A2 for main path
+ fcmp.lt.s1 p11,p0 = FR_norm_x, FR_zero_uflow_x // Certain zero, uflow?
+ add GR_ad_T2 = 0x100, GR_ad_T1 // Point to T2 table
}
-{ .mmi
-(p0) shladd r34 = r32,4,r34
+{ .mfi
nop.m 999
-(p0) addl r35 = @ltoff(Constants_exp_64_A#),gp ;;
-}
-//
-// Load T_1,T_2
-//
-{ .mmi
- nop.m 999
- ld8 r35 =[r35]
- nop.i 99
-};;
-{ .mmb
-(p0) ldfe f51 = [r35],16
-(p0) ld8 r45 = [r34],8
- nop.b 999 ;;
+ fms.s1 FR_float_N = FR_N_signif, FR_2TOM51, FR_RSHF // Form float_N
+ nop.i 999
}
-//
-// Set Safe = True if k >= big_expo_neg
-// Set Safe = False if k < big_expo_neg
-//
-{ .mmb
-(p0) ldfe f49 = [r35],16
-(p0) ld8 r48 = [r34],0
- nop.b 999 ;;
+;;
+
+{ .mbb
+ getf.sig GR_N_fix = FR_N_signif // Get N from significand
+(p10) br.cond.spnt EXP_OVERFLOW // Branch if result will overflow
+(p11) br.cond.spnt EXP_CERTAIN_UNDERFLOW_ZERO // Branch if certain zero, uflow
}
+;;
+
{ .mfi
- nop.m 999
-//
-// Branch to HUGE is expo_X > 14
-//
-(p0) fcvt.xf f38 = f39
- nop.i 999 ;;
+ ldfe FR_A1 = [GR_ad_A],16 // Get A1 for main path
+ fnma.s1 FR_r = FR_L_hi, FR_float_N, FR_norm_x // r = -L_hi * float_N + x
+ extr.u GR_M1 = GR_N_fix, 6, 6 // Extract index M_1
}
{ .mfi
-(p0) getf.sig r52 = f39
- nop.f 999
- nop.i 999 ;;
+ and GR_M2 = 0x3f, GR_N_fix // Extract index M_2
+ nop.f 999
+ nop.i 999
}
-{ .mii
- nop.m 999
-(p0) extr.u r43 = r52, 6, 6 ;;
-//
-// r = r - float_N * L_lo
-// K = extr(N_fix,12,52)
-//
-(p0) shladd r40 = r43,3,r40 ;;
+;;
+
+// N_fix is only correct up to 50 bits because of our right shift technique.
+// Actually in the normal path we will have restricted K to about 14 bits.
+// Somewhat arbitrarily we extract 32 bits.
+{ .mfi
+ shladd GR_ad_W1 = GR_M1,3,GR_ad_W1 // Point to W1
+ nop.f 999
+ extr GR_K = GR_N_fix, 12, 32 // Extract limited range K
}
{ .mfi
-(p0) shladd r50 = r43,2,r50
-(p0) fnma.s1 f42 = f40, f38, f9
-//
-// float_N = float(N)
-// N_fix = signficand N
-//
-(p0) extr.u r42 = r52, 0, 6
+ shladd GR_ad_T1 = GR_M1,2,GR_ad_T1 // Point to T1
+ nop.f 999
+ shladd GR_ad_T2 = GR_M2,2,GR_ad_T2 // Point to T2
}
+;;
+
{ .mmi
-(p0) ldfd f43 = [r40],0 ;;
-(p0) shladd r41 = r42,3,r41
-(p0) shladd r51 = r42,2,r51
-}
-//
-// W_1_p1 = 1 + W_1
-//
-{ .mmi
-(p0) ldfs f44 = [r50],0 ;;
-(p0) ldfd f45 = [r41],0
-//
-// M_2 = extr(N_fix,0,6)
-// M_1 = extr(N_fix,6,6)
-// r = X - float_N * L_hi
-//
-(p0) extr r44 = r52, 12, 52
+ ldfs FR_T1 = [GR_ad_T1],0 // Get T1
+ ldfd FR_W1 = [GR_ad_W1],0 // Get W1
+ add GR_exp_2_k = GR_exp_bias, GR_K // Form exponent of 2^k
}
+;;
+
{ .mmi
-(p0) ldfs f46 = [r51],0 ;;
-(p0) sub r46 = r58, r44
-(p0) cmp.gt.unc p8, p15 = r44, r45
-}
-//
-// W = W_1 + W_1_p1*W_2
-// Load A_2
-// Bias_m_K = Bias - K
-//
-{ .mii
-(p0) ldfe f40 = [r35],16
-//
-// load A_1
-// poly = A_2 + r*A_3
-// rsq = r * r
-// neg_2_mK = exponent of Bias_m_k
-//
-(p0) add r47 = r58, r44 ;;
-//
-// Set Safe = True if k <= big_expo_pos
-// Set Safe = False if k > big_expo_pos
-// Load A_3
-//
-(p15) cmp.lt p8,p15 = r44,r48 ;;
+ ldfs FR_T2 = [GR_ad_T2],0 // Get T2
+ shladd GR_ad_W2 = GR_M2,3,GR_ad_W2 // Point to W2
+ sub GR_exp_2_mk = GR_exp_bias, GR_K // Form exponent of 2^-k
}
+;;
+
{ .mmf
-(p0) setf.exp f61 = r46
-//
-// Bias_p + K = Bias + K
-// T = T_1 * T_2
-//
-(p0) setf.exp f36 = r47
-(p0) fnma.s1 f42 = f41, f38, f42 ;;
+ ldfd FR_W2 = [GR_ad_W2],0 // Get W2
+ setf.exp FR_scale = GR_exp_2_k // Set scale = 2^k
+ fnma.s1 FR_r = FR_L_lo, FR_float_N, FR_r // r = -L_lo * float_N + r
}
+;;
+
{ .mfi
- nop.m 999
-//
-// Load W_1,W_2
-// Load big_exp_pos, load big_exp_neg
-//
-(p0) fadd.s1 f47 = f43, f1
- nop.i 999 ;;
+ setf.exp FR_2_mk = GR_exp_2_mk // Form 2^-k
+ fma.s1 FR_poly = FR_r, FR_A3, FR_A2 // poly = r * A3 + A2
+ cmp.lt p8,p15 = GR_K,GR_big_expo_neg // Set Safe if K > big_expo_neg
}
{ .mfi
- nop.m 999
-(p0) fma.s1 f52 = f42, f51, f49
- nop.i 999
+ nop.m 999
+ fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fmpy.s1 f48 = f42, f42
- nop.i 999 ;;
+ nop.m 999
+ fmpy.s1 FR_T = FR_T1, FR_T2 // T = T1 * T2
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fmpy.s1 f53 = f44, f46
- nop.i 999 ;;
+ nop.m 999
+ fadd.s1 FR_W1_p1 = FR_W1, f1 // W1_p1 = W1 + 1.0
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fma.s1 f54 = f45, f47, f43
- nop.i 999
+(p7) cmp.lt.unc p8, p9 = 10, GR_K // If expm1, set p8 if K > 10
+ fma.s1 FR_poly = FR_r, FR_poly, FR_A1 // poly = r * poly + A1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fneg f61 = f61
- nop.i 999 ;;
+(p7) cmp.eq p15, p0 = r0, r0 // If expm1, set Safe flag
+ fma.s1 FR_T_scale = FR_T, FR_scale, f0 // T_scale = T * scale
+(p9) cmp.gt.unc p9, p10 = -10, GR_K // If expm1, set p9 if K < -10
+ // If expm1, set p10 if -10<=K<=10
}
{ .mfi
- nop.m 999
-(p0) fma.s1 f52 = f42, f52, f40
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 FR_W = FR_W2, FR_W1_p1, FR_W1 // W = W2 * (W1+1.0) + W1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fadd.s1 f55 = f54, f1
- nop.i 999
+ nop.m 999
+ mov FR_Y_hi = FR_T // Assume Y_hi = T
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// W + Wp1 * poly
-//
-(p0) mov f34 = f53
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 FR_poly = FR_rsq, FR_poly, FR_r // poly = rsq * poly + r
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// A_1 + r * poly
-// Scale = setf_expl(Bias_p_k)
-//
-(p0) fma.s1 f52 = f48, f52, f42
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 FR_Wp1_T_scale = FR_W, FR_T_scale, FR_T_scale // (W+1)*T*scale
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// poly = r + rsq(A_1 + r*poly)
-// Wp1 = 1 + W
-// neg_2_mK = -neg_2_mK
-//
-(p0) fma.s1 f35 = f55, f52, f54
- nop.i 999 ;;
-}
-{ .mfb
- nop.m 999
-(p0) fmpy.s1 f35 = f35, f53
-//
-// Y_hi = T
-// Y_lo = T * (W + Wp1*poly)
-//
-(p12) br.cond.sptk EXPL_MAIN ;;
-}
-//
-// Branch if expl(x)
-// Continue for expl(x-1)
-//
-{ .mii
-(p0) cmp.lt.unc p12, p13 = 10, r44
- nop.i 999 ;;
-//
-// Set p12 if 10 < K, Else p13
-//
-(p13) cmp.gt.unc p13, p14 = -10, r44 ;;
+ nop.m 999
+ fma.s1 FR_W_T_scale = FR_W, FR_T_scale, f0 // W*T*scale
+ nop.i 999
}
-//
-// K > 10: Y_lo = Y_lo + neg_2_mK
-// K <=10: Set p13 if -10 > K, Else set p14
-//
+;;
+
{ .mfi
-(p13) cmp.eq p15, p0 = r0, r0
-(p14) fadd.s1 f34 = f61, f34
- nop.i 999 ;;
+ nop.m 999
+(p9) fsub.s1 FR_Y_hi = f0, FR_2_mk // If expm1, if K < -10 set Y_hi
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p12) fadd.s1 f35 = f35, f61
- nop.i 999 ;;
+ nop.m 999
+(p10) fsub.s1 FR_Y_hi = FR_T, FR_2_mk // If expm1, if |K|<=10 set Y_hi
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p13) fadd.s1 f35 = f35, f34
- nop.i 999
-}
-{ .mfb
- nop.m 999
-//
-// K <= 10 and K < -10, Set Safe = True
-// K <= 10 and K < 10, Y_lo = Y_hi + Y_lo
-// K <= 10 and K > =-10, Y_hi = Y_hi + neg_2_mk
-//
-(p13) mov f34 = f61
-(p0) br.cond.sptk EXPL_MAIN ;;
-}
-EXPL_SMALL:
-{ .mmi
nop.m 999
-(p0) addl r34 = @ltoff(Constants_exp_64_Exponents#),gp
-(p12) addl r35 = @ltoff(Constants_exp_64_P#),gp ;;
+ fma.s1 FR_result_lo = FR_Wp1_T_scale, FR_poly, FR_W_T_scale
+ nop.i 999
}
-.pred.rel "mutex",p12,p13
-{ .mmi
-(p12) ld8 r35=[r35]
-nop.m 999
-(p13) addl r35 = @ltoff(Constants_exp_64_Q#),gp
-};;
-{ .mmi
-(p13) ld8 r35=[r35]
-(p0) ld8 r34=[r34]
-nop.i 999
-};;
+;;
+
+.pred.rel "mutex",p8,p9
+// If K > 10 adjust result_lo = result_lo - scale * 2^-k
+// If |K| <= 10 adjust result_lo = result_lo + scale * T
{ .mfi
-(p0) add r34 = 0x48,r34
-//
-// Return
-// K <= 10 and K < 10, Y_hi = neg_2_mk
-//
-// /*******************************************************/
-// /*********** Branch EXPL_SMALL ************************/
-// /*******************************************************/
-(p0) mov f42 = f9
- nop.i 999 ;;
+ nop.m 999
+(p8) fnma.s1 FR_result_lo = FR_scale, FR_2_mk, FR_result_lo // If K > 10
+ nop.i 999
}
-//
-// Flag = 0
-// r4 = rsq * rsq
-//
{ .mfi
-(p0) ld8 r49 =[r34],0
- nop.f 999
- nop.i 999 ;;
-}
-{ .mii
- nop.m 999
- nop.i 999 ;;
-//
-// Flag = 1
-//
-(p0) cmp.lt.unc p14, p0 = r37, r49 ;;
+ nop.m 999
+(p9) fma.s1 FR_result_lo = FR_T_scale, f1, FR_result_lo // If |K| <= 10
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// r = X
-//
-(p0) fmpy.s1 f48 = f42, f42
- nop.i 999 ;;
+ nop.m 999
+ fmpy.s0 FR_tmp = FR_A1, FR_A1 // Dummy op to set inexact
+ nop.i 999
}
{ .mfb
- nop.m 999
-//
-// rsq = r * r
-//
-(p0) fmpy.s1 f50 = f48, f48
-//
-// Is input very small?
-//
-(p14) br.cond.spnt EXPL_VERY_SMALL ;;
-}
-//
-// Flag_not1: Y_hi = 1.0
-// Flag is 1: r6 = rsq * r4
-//
-{ .mfi
-(p12) ldfe f52 = [r35],16
-(p12) mov f34 = f1
-(p0) add r53 = 0x1,r0 ;;
-}
-{ .mfi
-(p13) ldfe f51 = [r35],16
-//
-// Flag_not_1: Y_lo = poly_hi + r4 * poly_lo
-//
-(p13) mov f34 = f9
- nop.i 999 ;;
-}
-{ .mmf
-(p12) ldfe f53 = [r35],16
-//
-// For Flag_not_1, Y_hi = X
-// Scale = 1
-// Create 0x000...01
-//
-(p0) setf.sig f37 = r53
-(p0) mov f36 = f1 ;;
+ nop.m 999
+(p15) fma.s0 f8 = FR_Y_hi, FR_scale, FR_result_lo // Safe result
+(p15) br.ret.sptk b0 // Safe exit for normal path
}
-{ .mmi
-(p13) ldfe f52 = [r35],16 ;;
-(p12) ldfe f54 = [r35],16
- nop.i 999 ;;
+;;
+
+// Here if unsafe, will only be here for exp with K < big_expo_neg
+{ .mfb
+ nop.m 999
+ fma.s0 FR_RESULT = FR_Y_hi, FR_scale, FR_result_lo // Prelim result
+ br.cond.sptk EXP_POSSIBLE_UNDERFLOW // Branch to unsafe code
}
+;;
+
+
+EXP_SMALL:
+// Here if 2^-60 < |x| < 2^-m, m=12 for exp, m=7 for expm1
{ .mfi
-(p13) ldfe f53 = [r35],16
-(p13) fmpy.s1 f58 = f48, f50
- nop.i 999 ;;
-}
-//
-// Flag_not1: poly_lo = P_5 + r*P_6
-// Flag_1: poly_lo = Q_6 + r*Q_7
-//
-{ .mmi
-(p13) ldfe f54 = [r35],16 ;;
-(p12) ldfe f55 = [r35],16
- nop.i 999 ;;
-}
-{ .mmi
-(p12) ldfe f56 = [r35],16 ;;
-(p13) ldfe f55 = [r35],16
- nop.i 999 ;;
-}
-{ .mmi
-(p12) ldfe f57 = [r35],0 ;;
-(p13) ldfe f56 = [r35],16
- nop.i 999 ;;
+(p7) ldfe FR_Q3 = [GR_ad_Q],16 // Get Q3 for small path, if expm1
+(p6) fma.s1 FR_p65 = FR_P6, FR_r, FR_P5 // If exp, p65 = P6 * r + P5
+ nop.i 999
}
{ .mfi
-(p13) ldfe f57 = [r35],0
- nop.f 999
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-//
-// For Flag_not_1, load p5,p6,p1,p2
-// Else load p5,p6,p1,p2
-//
-(p12) fma.s1 f60 = f52, f42, f53
- nop.i 999 ;;
+ mov GR_minus_one = -1
+(p7) fma.s1 FR_q98 = FR_Q9, FR_r, FR_Q8 // If expm1, q98 = Q9 * r + Q8
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p13) fma.s1 f60 = f51, f42, f52
- nop.i 999 ;;
+(p7) ldfe FR_Q2 = [GR_ad_Q],16 // Get Q2 for small path, if expm1
+(p7) fma.s1 FR_q65 = FR_Q6, FR_r, FR_Q5 // If expm1, q65 = Q6 * r + Q5
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p12) fma.s1 f60 = f60, f42, f54
- nop.i 999 ;;
+ setf.sig FR_tmp = GR_minus_one // Create value to force inexact
+(p6) fma.s1 FR_p21 = FR_P2, FR_r, FR_P1 // If exp, p21 = P2 * r + P1
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p12) fma.s1 f59 = f56, f42, f57
- nop.i 999 ;;
+(p7) ldfe FR_Q1 = [GR_ad_Q],16 // Get Q1 for small path, if expm1
+(p7) fma.s1 FR_q43 = FR_Q4, FR_r, FR_Q3 // If expm1, q43 = Q4 * r + Q3
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p13) fma.s1 f60 = f42, f60, f53
- nop.i 999 ;;
+ nop.m 999
+(p6) fma.s1 FR_p654 = FR_p65, FR_r, FR_P4 // If exp, p654 = p65 * r + P4
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p12) fma.s1 f59 = f59, f48, f42
- nop.i 999 ;;
+ nop.m 999
+(p7) fma.s1 FR_q987 = FR_q98, FR_r, FR_Q7 // If expm1, q987 = q98 * r + Q7
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// Flag_1: poly_lo = Q_5 + r*(Q_6 + r*Q_7)
-// Flag_not1: poly_lo = P_4 + r*(P_5 + r*P_6)
-// Flag_not1: poly_hi = (P_1 + r*P_2)
-//
-(p13) fmpy.s1 f60 = f60, f58
- nop.i 999 ;;
+ nop.m 999
+(p7) fma.s1 FR_q21 = FR_Q2, FR_r, FR_Q1 // If expm1, q21 = Q2 * r + Q1
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p12) fma.s1 f60 = f60, f42, f55
- nop.i 999 ;;
+ nop.m 999
+(p6) fma.s1 FR_p210 = FR_p21, FR_rsq, FR_r // If exp, p210 = p21 * r + P0
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// Flag_1: poly_lo = r6 *(Q_5 + ....)
-// Flag_not1: poly_hi = r + rsq *(P_1 + r*P_2)
-//
-(p12) fma.s1 f35 = f60, f50, f59
- nop.i 999
+ nop.m 999
+(p7) fma.s1 FR_q6543 = FR_q65, FR_rsq, FR_q43 // If expm1, q6543 = q65*r2+q43
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p13) fma.s1 f59 = f54, f42, f55
- nop.i 999 ;;
+ nop.m 999
+(p6) fma.s1 FR_p6543 = FR_p654, FR_r, FR_P3 // If exp, p6543 = p654 * r + P3
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// Flag_not1: Y_lo = rsq* poly_hi + poly_lo
-// Flag_1: poly_lo = rsq* poly_hi + poly_lo
-//
-(p13) fma.s1 f59 = f59, f42, f56
- nop.i 999 ;;
+ nop.m 999
+(p7) fma.s1 FR_q9876543 = FR_q987, FR_r4, FR_q6543 // If expm1, q9876543 = ...
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// Flag_not_1: (P_1 + r*P_2)
-//
-(p13) fma.s1 f59 = f59, f42, f57
- nop.i 999 ;;
+ nop.m 999
+(p6) fma.s1 FR_Y_lo = FR_p6543, FR_r4, FR_p210 // If exp, form Y_lo
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// Flag_not_1: poly_hi = r + rsq * (P_1 + r*P_2)
-//
-(p13) fma.s1 f35 = f59, f48, f60
- nop.i 999 ;;
+ nop.m 999
+(p7) fma.s1 FR_Y_lo = FR_q9876543, FR_rsq, FR_q21 // If expm1, form Y_lo
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// Create 0.000...01
-//
-(p0) for f37 = f35, f37
- nop.i 999 ;;
-}
-{ .mfb
- nop.m 999
-//
-// Set lsb of Y_lo to 1
-//
-(p0) fmerge.se f35 = f35,f37
-(p0) br.cond.sptk EXPL_MAIN ;;
-}
-EXPL_VERY_SMALL:
-{ .mmi
- nop.m 999
- nop.m 999
-(p13) addl r34 = @ltoff(Constants_exp_64_Exponents#),gp
+ nop.m 999
+ fmpy.s0 FR_tmp = FR_tmp, FR_tmp // Dummy op to set inexact
+ nop.i 999
}
+;;
+
+.pred.rel "mutex",p6,p7
{ .mfi
- nop.m 999
-(p12) mov f35 = f9
- nop.i 999 ;;
+ nop.m 999
+(p6) fma.s0 f8 = FR_Y_lo, f1, f1 // If exp, result = 1 + Y_lo
+ nop.i 999
}
{ .mfb
-(p13) ld8 r34 = [r34]
-(p12) mov f34 = f1
-(p12) br.cond.sptk EXPL_MAIN ;;
-}
-{ .mlx
-(p13) add r34 = 8,r34
-(p13) movl r39 = 0x0FFFE ;;
-}
-//
-// Load big_exp_neg
-// Create 1/2's exponent
-//
-{ .mii
-(p13) setf.exp f56 = r39
-(p13) shladd r34 = r32,4,r34 ;;
- nop.i 999
+ nop.m 999
+(p7) fma.s0 f8 = FR_Y_lo, FR_rsq, FR_norm_x // If expm1, result = Y_lo*r2+x
+ br.ret.sptk b0 // Exit for 2^-60 <= |x| < 2^-m
+ // m=12 for exp, m=7 for expm1
}
+;;
+
+
+EXP_VERY_SMALL:
//
-// Negative exponents are stored after positive
+// Here if 0 < |x| < 2^-60
+// If exp, result = 1.0 + x
+// If expm1, result = x +x*x/2, but have to check for possible underflow
//
+
{ .mfi
-(p13) ld8 r45 = [r34],0
-//
-// Y_hi = x
-// Scale = 1
-//
-(p13) fmpy.s1 f35 = f9, f9
- nop.i 999 ;;
+(p7) mov GR_exp_underflow = -16381 // Exponent for possible underflow
+(p6) fadd.s0 f8 = f1, FR_norm_x // If exp, result = 1+x
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// Reset Safe if necessary
-// Create 1/2
-//
-(p13) mov f34 = f9
- nop.i 999 ;;
+ nop.m 999
+(p7) fmpy.s1 FR_result_lo = FR_half_x, FR_norm_x // If expm1 result_lo = x*x/2
+ nop.i 999
}
+;;
+
{ .mfi
-(p13) cmp.lt.unc p0, p15 = r37, r45
-(p13) mov f36 = f1
- nop.i 999 ;;
+(p7) cmp.lt.unc p0, p8 = GR_exp_x, GR_exp_underflow // Unsafe if expm1 x small
+(p7) mov FR_Y_hi = FR_norm_x // If expm1, Y_hi = x
+(p7) cmp.lt p0, p15 = GR_exp_x, GR_exp_underflow // Unsafe if expm1 x small
}
+;;
+
{ .mfb
- nop.m 999
-//
-// Y_lo = x * x
-//
-(p13) fmpy.s1 f35 = f35, f56
-//
-// Y_lo = x*x/2
-//
-(p13) br.cond.sptk EXPL_MAIN ;;
-}
-EXPL_HUGE:
-{ .mfi
- nop.m 999
-(p0) fcmp.gt.unc.s1 p14, p0 = f9, f0
- nop.i 999
-}
-{ .mlx
- nop.m 999
-(p0) movl r39 = 0x15DC0 ;;
-}
-{ .mfi
-(p14) setf.exp f34 = r39
-(p14) mov f35 = f1
-(p14) cmp.eq p0, p15 = r0, r0 ;;
+ nop.m 999
+(p8) fma.s0 f8 = FR_norm_x, f1, FR_result_lo // If expm1, result=x+x*x/2
+(p15) br.ret.sptk b0 // If Safe, exit
}
+;;
+
+// Here if expm1 and 0 < |x| < 2^-16381; may be possible underflow
{ .mfb
- nop.m 999
-(p14) mov f36 = f34
-//
-// If x > 0, Set Safe = False
-// If x > 0, Y_hi = 2**(24,000)
-// If x > 0, Y_lo = 1.0
-// If x > 0, Scale = 2**(24,000)
-//
-(p14) br.cond.sptk EXPL_MAIN ;;
-}
-{ .mlx
- nop.m 999
-(p12) movl r39 = 0xA240
-}
-{ .mlx
- nop.m 999
-(p12) movl r38 = 0xA1DC ;;
-}
-{ .mmb
-(p13) cmp.eq p15, p14 = r0, r0
-(p12) setf.exp f34 = r39
- nop.b 999 ;;
-}
-{ .mlx
-(p12) setf.exp f35 = r38
-(p13) movl r39 = 0xFF9C
-}
-{ .mfi
- nop.m 999
-(p13) fsub.s1 f34 = f0, f1
- nop.i 999 ;;
+ nop.m 999
+ fma.s0 FR_RESULT = FR_Y_hi, FR_scale, FR_result_lo // Prelim result
+ br.cond.sptk EXP_POSSIBLE_UNDERFLOW // Branch to unsafe code
}
-{ .mfi
- nop.m 999
-(p12) mov f36 = f34
-(p12) cmp.eq p0, p15 = r0, r0 ;;
+;;
+
+EXP_CERTAIN_UNDERFLOW_ZERO:
+// Here if x < zero_uflow_x
+// For exp, set result to tiny+0.0 and set I, U, and branch to error handling
+// For expm1, set result to tiny-1.0 and set I, and exit
+{ .mmi
+ alloc GR_SAVE_PFS = ar.pfs,0,3,4,0
+ nop.m 999
+ mov GR_one = 1
}
-{ .mfi
-(p13) setf.exp f35 = r39
-(p13) mov f36 = f1
- nop.i 999 ;;
+;;
+
+{ .mmi
+ setf.exp FR_small = GR_one // Form small value
+ nop.m 999
+(p6) mov GR_Parameter_TAG = 13 // Error tag for exp underflow
}
-EXPL_MAIN:
+;;
+
{ .mfi
-(p0) cmp.ne.unc p12, p0 = 0x01, r33
-(p0) fmpy.s1 f101 = f36, f35
- nop.i 999 ;;
+ nop.m 999
+ fmerge.s FR_X = f8,f8 // Save x for error call
+ nop.i 999
}
+;;
+
+.pred.rel "mutex",p6,p7
{ .mfb
- nop.m 999
-(p0) fma.s0 f99 = f34, f36, f101
-(p15) br.cond.sptk EXPL_64_RETURN ;;
-}
-{ .mfi
- nop.m 999
-(p0) fsetc.s3 0x7F,0x01
- nop.i 999
+ nop.m 999
+(p6) fma.s0 FR_RESULT = FR_small, FR_small, f0 // If exp, set I,U, tiny result
+(p6) br.cond.sptk __libm_error_region // If exp, go to error handling
}
-{ .mlx
- nop.m 999
-(p0) movl r50 = 0x00000000013FFF ;;
+{ .mfb
+ nop.m 999
+(p7) fms.s0 f8 = FR_small, FR_small, f1 // If expm1, set I, result -1.0
+(p7) br.ret.sptk b0 // If expm1, exit
+}
+;;
+
+
+EXP_OVERFLOW:
+// Here if x >= min_oflow_x
+{ .mmi
+ alloc GR_SAVE_PFS = ar.pfs,0,3,4,0
+ mov GR_huge_exp = 0x1fffe
+ nop.i 999
}
-//
-// S0 user supplied status
-// S2 user supplied status + WRE + TD (Overflows)
-// S3 user supplied status + RZ + TD (Underflows)
-//
-//
-// If (Safe) is true, then
-// Compute result using user supplied status field.
-// No overflow or underflow here, but perhaps inexact.
-// Return
-// Else
-// Determine if overflow or underflow was raised.
-// Fetch +/- overflow threshold for IEEE single, double,
-// double extended
-//
{ .mfi
-(p0) setf.exp f60 = r50
-(p0) fma.s3 f102 = f34, f36, f101
- nop.i 999
+ mov GR_huge_signif = -0x1
+ nop.f 999
+(p6) mov GR_Parameter_TAG = 12 // Error tag for exp overflow
}
-{ .mfi
- nop.m 999
-(p0) fsetc.s3 0x7F,0x40
- nop.i 999 ;;
+;;
+
+{ .mmf
+ setf.exp FR_huge_exp = GR_huge_exp // Create huge value
+ setf.sig FR_huge_signif = GR_huge_signif // Create huge value
+ fmerge.s FR_X = f8,f8 // Save x for error call
}
+;;
+
{ .mfi
- nop.m 999
-//
-// For Safe, no need to check for over/under.
-// For expm1, handle errors like exp.
-//
-(p0) fsetc.s2 0x7F,0x42
- nop.i 999;;
+ nop.m 999
+ fmerge.se FR_huge = FR_huge_exp, FR_huge_signif
+(p7) mov GR_Parameter_TAG = 39 // Error tag for expm1 overflow
}
-{ .mfi
- nop.m 999
-(p0) fma.s2 f100 = f34, f36, f101
- nop.i 999 ;;
+;;
+
+{ .mfb
+ nop.m 999
+ fma.s0 FR_RESULT = FR_huge, FR_huge, FR_huge // Force I, O, and Inf
+ br.cond.sptk __libm_error_region // Branch to error handling
}
+;;
+
+
+
+EXP_POSSIBLE_UNDERFLOW:
+// Here if exp and zero_uflow_x < x < about -11356 [where k < -16381]
+// Here if expm1 and |x| < 2^-16381
{ .mfi
- nop.m 999
-(p0) fsetc.s2 0x7F,0x40
- nop.i 999 ;;
+ alloc GR_SAVE_PFS = ar.pfs,0,3,4,0
+ fsetc.s2 0x7F,0x41 // Set FTZ and disable traps
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p7) fclass.m.unc p12, p0 = f102, 0x00F
- nop.i 999
+ nop.m 999
+ fma.s2 FR_ftz = FR_Y_hi, FR_scale, FR_result_lo // Result with FTZ
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fclass.m.unc p11, p0 = f102, 0x00F
- nop.i 999 ;;
+ nop.m 999
+ fsetc.s2 0x7F,0x40 // Disable traps (set s2 default)
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p7) fcmp.ge.unc.s1 p10, p0 = f100, f60
- nop.i 999
+ nop.m 999
+(p7) fclass.m.unc p12, p0 = FR_ftz, 0x00F // If expm1, FTZ result denorm, zero?
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// Create largest double exponent + 1.
-// Create smallest double exponent - 1.
-//
-(p0) fcmp.ge.unc.s1 p8, p0 = f100, f60
- nop.i 999 ;;
-}
-//
-// fcmp: resultS2 >= + overflow threshold -> set (a) if true
-// fcmp: resultS2 <= - overflow threshold -> set (b) if true
-// fclass: resultS3 is denorm/unorm/0 -> set (d) if true
-//
-{ .mib
-(p10) mov GR_Parameter_TAG = 39
- nop.i 999
-(p10) br.cond.sptk __libm_error_region ;;
-}
-{ .mib
-(p8) mov GR_Parameter_TAG = 12
- nop.i 999
-(p8) br.cond.sptk __libm_error_region ;;
-}
-//
-// Report that exp overflowed
-//
-{ .mib
-(p12) mov GR_Parameter_TAG = 40
- nop.i 999
-(p12) br.cond.sptk __libm_error_region ;;
+ nop.m 999
+(p6) fclass.m.unc p11, p0 = FR_ftz, 0x00F // If exp, FTZ result denorm or zero?
+ nop.i 999
}
-{ .mib
-(p11) mov GR_Parameter_TAG = 13
- nop.i 999
-(p11) br.cond.sptk __libm_error_region ;;
+;;
+
+{ .mfb
+(p12) mov GR_Parameter_TAG = 40 // expm1 underflow
+ fmerge.s FR_X = f8,f8 // Save x for error call
+(p12) br.cond.spnt __libm_error_region // Branch on expm1 underflow
}
+;;
+
{ .mib
- nop.m 999
- nop.i 999
-//
-// Report that exp underflowed
-//
-(p0) br.cond.sptk EXPL_64_RETURN ;;
+(p11) mov GR_Parameter_TAG = 13 // exp underflow
+ nop.i 999
+(p11) br.cond.spnt __libm_error_region // Branch on exp underflow
}
-EXPL_64_SPECIAL:
-{ .mfi
- nop.m 999
-(p0) fclass.m.unc p6, p0 = f8, 0x0c3
- nop.i 999
-}
-{ .mfi
- nop.m 999
-(p0) fclass.m.unc p13, p8 = f8, 0x007
- nop.i 999 ;;
+;;
+
+{ .mfb
+ nop.m 999
+ mov f8 = FR_RESULT // Was safe after all
+ br.ret.sptk b0
}
+;;
+
+
+EXP_64_SPECIAL:
+// Here if x natval, nan, inf, zero
+// If x natval, +inf, or if expm1 and x zero, just return x.
+// The other cases must be tested for, and results set.
+// These cases do not generate exceptions.
{ .mfi
- nop.m 999
-(p7) fclass.m.unc p14, p0 = f8, 0x007
- nop.i 999
+ nop.m 999
+ fclass.m p8, p0 = f8, 0x0c3 // Is x nan?
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fclass.m.unc p12, p9 = f8, 0x021
- nop.i 999 ;;
+ nop.m 999
+(p6) fclass.m.unc p13, p0 = f8, 0x007 // If exp, is x zero?
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fclass.m.unc p11, p0 = f8, 0x022
- nop.i 999
+ nop.m 999
+(p6) fclass.m.unc p11, p0 = f8, 0x022 // If exp, is x -inf?
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p7) fclass.m.unc p10, p0 = f8, 0x022
- nop.i 999 ;;
+ nop.m 999
+(p8) fadd.s0 f8 = f8, f1 // If x nan, result quietized x
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// Identify +/- 0, Inf, or -Inf
-// Generate the right kind of NaN.
-//
-(p13) fadd.s0 f99 = f0, f1
- nop.i 999 ;;
+ nop.m 999
+(p7) fclass.m.unc p10, p0 = f8, 0x022 // If expm1, is x -inf?
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p14) mov f99 = f8
- nop.i 999 ;;
-}
-{ .mfb
- nop.m 999
-(p6) fadd.s0 f99 = f8, f1
-//
-// expl(+/-0) = 1
-// expm1l(+/-0) = +/-0
-// No exceptions raised
-//
-(p6) br.cond.sptk EXPL_64_RETURN ;;
-}
-{ .mib
- nop.m 999
- nop.i 999
-(p14) br.cond.sptk EXPL_64_RETURN ;;
+ nop.m 999
+(p13) fadd.s0 f8 = f0, f1 // If exp and x zero, result 1.0
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p11) mov f99 = f0
- nop.i 999 ;;
-}
-{ .mfb
- nop.m 999
-(p10) fsub.s1 f99 = f0, f1
-//
-// expl(-Inf) = 0
-// expm1l(-Inf) = -1
-// No exceptions raised.
-//
-(p10) br.cond.sptk EXPL_64_RETURN ;;
-}
-{ .mfb
- nop.m 999
-(p12) fmpy.s1 f99 = f8, f1
-//
-// expl(+Inf) = Inf
-// No exceptions raised.
-//
-(p0) br.cond.sptk EXPL_64_RETURN ;;
+ nop.m 999
+(p11) mov f8 = f0 // If exp and x -inf, result 0
+ nop.i 999
}
-EXPL_64_UNSUPPORTED:
+;;
+
{ .mfb
- nop.m 999
-(p0) fmpy.s0 f99 = f8, f0
-(p0) br.cond.sptk EXPL_64_RETURN ;;
+ nop.m 999
+(p10) fsub.s1 f8 = f0, f1 // If expm1, x -inf, result -1.0
+ br.ret.sptk b0 // Exit special cases
}
-EXPL_64_RETURN:
+;;
+
+
+EXP_64_UNSUPPORTED:
+// Here if x unsupported type
{ .mfb
nop.m 999
-(p0) mov f8 = f99
-(p0) br.ret.sptk b0
+ fmpy.s0 f8 = f8, f0 // Return nan
+ br.ret.sptk b0
}
-.endp
-ASM_SIZE_DIRECTIVE(expl)
+;;
-.proc __libm_error_region
-__libm_error_region:
+GLOBAL_IEEE754_END(expl)
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
@@ -1598,9 +1421,9 @@ __libm_error_region:
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
- nop.m 0
- nop.m 0
add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
};;
{ .mmi
ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
@@ -1613,8 +1436,7 @@ __libm_error_region:
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region#)
.type __libm_error_support#,@function
.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_fabs.S b/sysdeps/ia64/fpu/s_fabs.S
index ea3908dbc3..3434389a3c 100644
--- a/sysdeps/ia64/fpu/s_fabs.S
+++ b/sysdeps/ia64/fpu/s_fabs.S
@@ -1,34 +1,82 @@
-/* Copyright (C) 2000 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#include <sysdep.h>
-#undef ret
-
-ENTRY (__fabs)
-{
- fabs fret0 = farg0
- br.ret.sptk.many rp
-}
-END (__fabs)
-
-strong_alias (__fabs, __fabsf)
-strong_alias (__fabs, __fabsl)
-
-weak_alias (__fabs, fabs)
-weak_alias (__fabsf, fabsf)
-weak_alias (__fabsl, fabsl)
+.file "fabs.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//==============================================================
+// 02/02/00 Initial version
+// 02/07/02 Added __libm_fabs entry point to test in case compiler inlines
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+//
+// API
+//==============================================================
+// double fabs (double x)
+//
+// Overview of operation
+//==============================================================
+// returns absolute value of x
+
+// floating-point registers used: 1
+// f8, input
+
+.section .text
+.global __libm_fabs#
+
+.proc __libm_fabs#
+__libm_fabs:
+.endp __libm_fabs#
+
+GLOBAL_IEEE754_ENTRY(fabs)
+
+// set invalid or denormal flags and take fault if
+// necessary
+
+{ .mfi
+ nop.m 999
+ fcmp.eq.unc.s0 p6,p7 = f8,f1
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+ fmerge.s f8 = f0,f8
+ br.ret.sptk b0 ;;
+}
+
+GLOBAL_IEEE754_END(fabs)
diff --git a/sysdeps/ia64/fpu/s_fabsf.S b/sysdeps/ia64/fpu/s_fabsf.S
index 7e5abde625..71bb6da882 100644
--- a/sysdeps/ia64/fpu/s_fabsf.S
+++ b/sysdeps/ia64/fpu/s_fabsf.S
@@ -1 +1,82 @@
-/* __fabsf is in s_fabs.S. */
+.file "fabsf.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//==============================================================
+// 02/02/00 Initial version
+// 02/07/02 Added __libm_fabsf entry point to test in case compiler inlines
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+//
+// API
+//==============================================================
+// float fabsf (float x)
+//
+// Overview of operation
+//==============================================================
+// returns absolute value of x
+
+// floating-point registers used: 1
+// f8, input
+
+.section .text
+.global __libm_fabsf#
+
+.proc __libm_fabsf#
+__libm_fabsf:
+.endp __libm_fabsf#
+
+GLOBAL_IEEE754_ENTRY(fabsf)
+
+// set invalid or denormal flags and take fault if
+// necessary
+
+{ .mfi
+ nop.m 999
+ fcmp.eq.unc.s0 p6,p7 = f8,f1
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+ fmerge.s f8 = f0,f8
+ br.ret.sptk b0 ;;
+}
+
+GLOBAL_IEEE754_END(fabsf)
diff --git a/sysdeps/ia64/fpu/s_fabsl.S b/sysdeps/ia64/fpu/s_fabsl.S
index 3d7a41fe2b..a048949147 100644
--- a/sysdeps/ia64/fpu/s_fabsl.S
+++ b/sysdeps/ia64/fpu/s_fabsl.S
@@ -1 +1,82 @@
-/* __fabsl is in s_fabs.S. */
+.file "fabsl.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//==============================================================
+// 02/02/00 Initial version
+// 02/07/02 Added __libm_fabsl entry point to test in case compiler inlines
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+//
+// API
+//==============================================================
+// long double fabsl (long double x)
+//
+// Overview of operation
+//==============================================================
+// returns absolute value of x
+
+// floating-point registers used: 1
+// f8, input
+
+.section .text
+.global __libm_fabsl#
+
+.proc __libm_fabsl#
+__libm_fabsl:
+.endp __libm_fabsl#
+
+GLOBAL_IEEE754_ENTRY(fabsl)
+
+// set invalid or denormal flags and take fault if
+// necessary
+
+{ .mfi
+ nop.m 999
+ fcmp.eq.unc.s0 p6,p7 = f8,f1
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+ fmerge.s f8 = f0,f8
+ br.ret.sptk b0 ;;
+}
+
+GLOBAL_IEEE754_END(fabsl)
diff --git a/sysdeps/ia64/fpu/s_floor.S b/sysdeps/ia64/fpu/s_floor.S
index 438b0fa867..9ed9d6dcdb 100644
--- a/sysdeps/ia64/fpu/s_floor.S
+++ b/sysdeps/ia64/fpu/s_floor.S
@@ -1,10 +1,10 @@
.file "floor.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,86 +20,68 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
-.align 32
-.global floor#
-
-.section .text
-.proc floor#
-.align 32
-
// History
//==============================================================
-// 2/02/00: Initial version
-// 3/22/00: Updated to improve performance
-// 6/13/00: Improved speed, fixed setting of inexact flag
-// 6/27/00: Eliminated incorrect invalid flag setting
-// 2/07/01: Corrected sign of zero result in round to -inf mode
+// 02/02/00 Initial version
+// 03/22/00 Updated to improve performance
+// 06/13/00 Improved speed, fixed setting of inexact flag
+// 06/27/00 Eliminated incorrect invalid flag setting
+// 02/07/01 Corrected sign of zero result in round to -inf mode
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 01/28/03 Improved performance
+//==============================================================
// API
//==============================================================
// double floor(double x)
+//==============================================================
-// general input registers:
-
-floor_GR_FFFF = r14
-floor_GR_signexp = r15
-floor_GR_exponent = r16
-floor_GR_expmask = r17
-floor_GR_bigexp = r18
-
-
-// predicate registers used:
+// general input registers:
+// r14 - r18
-// p6 ==> Input is NaN, infinity, zero
-// p7 ==> Input is denormal
-// p8 ==> Input is <0
-// p9 ==> Input is >=0
-// p10 ==> Input is already an integer (bigger than largest integer)
-// p11 ==> Input is not a large integer
-// p12 ==> Input is a smaller integer
-// p13 ==> Input is not an even integer, so inexact must be set
+rSignexp = r14
+rExp = r15
+rExpMask = r16
+rBigexp = r17
+rM1 = r18
+// floating-point registers:
+// f8 - f13
-// floating-point registers used:
+fXInt = f9
+fNormX = f10
+fTmp = f11
+fAdj = f12
+fPreResult = f13
-FLOOR_NORM_f8 = f9
-FLOOR_FFFF = f10
-FLOOR_INEXACT = f11
-FLOOR_FLOAT_INT_f8 = f12
-FLOOR_INT_f8 = f13
-FLOOR_adj = f14
+// predicate registers used:
+// p6 - p9
// Overview of operation
//==============================================================
-
// double floor(double x)
-// Return an integer value (represented as a double) that is the largest
+// Return an integer value (represented as a double) that is the largest
// value not greater than x
// This is x rounded toward -infinity to an integral value.
// Inexact is set if x != floor(x)
-// **************************************************************************
-
-// Set denormal flag for denormal input and
-// and take denormal fault if necessary.
-
-// Is the input an integer value already?
+//==============================================================
// double_extended
// if the exponent is > 1003e => 3F(true) = 63(decimal)
@@ -120,121 +102,115 @@ FLOOR_adj = f14
// If we multiply by 2^23, we no longer have a fractional part
// So input is an integer value already.
-// If x is NAN, ZERO, or INFINITY, then return
-
-// qnan snan inf norm unorm 0 -+
-// 1 1 1 0 0 1 11 0xe7
-
-#include "libm_support.h"
-floor:
-#ifdef _LIBC
-.global __floor
-__floor:
-#endif
+.section .text
+GLOBAL_IEEE754_ENTRY(floor)
{ .mfi
- getf.exp floor_GR_signexp = f8
- fcvt.fx.trunc.s1 FLOOR_INT_f8 = f8
- addl floor_GR_bigexp = 0x10033, r0
+ getf.exp rSignexp = f8 // Get signexp, recompute if unorm
+ fclass.m p7,p0 = f8, 0x0b // Test x unorm
+ addl rBigexp = 0x10033, r0 // Set exponent at which is integer
}
{ .mfi
- addl floor_GR_FFFF = -1,r0
- fcmp.lt.s1 p8,p9 = f8,f0
- mov floor_GR_expmask = 0x1FFFF ;;
+ mov rM1 = -1 // Set all ones
+ fcvt.fx.trunc.s1 fXInt = f8 // Convert to int in significand
+ mov rExpMask = 0x1FFFF // Form exponent mask
}
+;;
-// p7 ==> denorm
{ .mfi
- setf.sig FLOOR_FFFF = floor_GR_FFFF
- fclass.m p7,p0 = f8, 0x0b
- nop.i 999
+ nop.m 0
+ fcmp.lt.s1 p8,p9 = f8, f0 // Test x < 0
+ nop.i 0
}
-{ .mfi
- nop.m 999
- fnorm.s1 FLOOR_NORM_f8 = f8
- nop.i 999 ;;
+{ .mfb
+ setf.sig fTmp = rM1 // Make const for setting inexact
+ fnorm.s1 fNormX = f8 // Normalize input
+(p7) br.cond.spnt FLOOR_UNORM // Branch if x unorm
}
+;;
-// p6 ==> NAN, INF, ZERO
-{ .mfb
- nop.m 999
- fclass.m p6,p10 = f8, 0xe7
-(p7) br.cond.spnt L(FLOOR_DENORM) ;;
+FLOOR_COMMON:
+// Return here from FLOOR_UNORM
+{ .mfi
+ nop.m 0
+ fclass.m p6,p0 = f8, 0x1e7 // Test x natval, nan, inf, 0
+ nop.i 0
}
+;;
-L(FLOOR_COMMON):
.pred.rel "mutex",p8,p9
-// Set adjustment to subtract from trunc(x) for result
-// If x<0, adjustment is -1.0
-// If x>=0, adjustment is 0.0
{ .mfi
- and floor_GR_exponent = floor_GR_signexp, floor_GR_expmask
-(p8) fnma.s1 FLOOR_adj = f1,f1,f0
- nop.i 999
+ nop.m 0
+(p8) fnma.s1 fAdj = f1, f1, f0 // If x < 0, adjustment is -1
+ nop.i 0
}
{ .mfi
- nop.m 999
-(p9) fadd.s1 FLOOR_adj = f0,f0
- nop.i 999 ;;
+ nop.m 0
+(p9) fma.s1 fAdj = f0, f0, f0 // If x > 0, adjustment is 0
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
- fcmp.eq.s0 p12,p0 = f8,f0 // Dummy op to set denormal and invalid flag
- nop.i 999
+ nop.m 0
+ fcvt.xf fPreResult = fXInt // trunc(x)
+ nop.i 0
}
-{ .mfi
-(p10) cmp.ge.unc p10,p11 = floor_GR_exponent, floor_GR_bigexp
-(p6) fnorm.d f8 = f8
- nop.i 999 ;;
+{ .mfb
+ nop.m 0
+(p6) fma.d.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf, 0
+(p6) br.ret.spnt b0 // Exit if x natval, nan, inf, 0
}
+;;
-{ .mfi
- nop.m 999
-(p11) fcvt.xf FLOOR_FLOAT_INT_f8 = FLOOR_INT_f8
- nop.i 999 ;;
+{ .mmi
+ and rExp = rSignexp, rExpMask // Get biased exponent
+;;
+ cmp.ge p7,p6 = rExp, rBigexp // Is |x| >= 2^52?
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p10) fnorm.d f8 = FLOOR_NORM_f8
- nop.i 999 ;;
+ nop.m 0
+(p6) fma.d.s0 f8 = fPreResult, f1, fAdj // Result if !int, |x| < 2^52
+ nop.i 0
}
-
-
{ .mfi
- nop.m 999
-(p11) fadd.d f8 = FLOOR_FLOAT_INT_f8,FLOOR_adj
- nop.i 999 ;;
+ nop.m 0
+(p7) fma.d.s0 f8 = fNormX, f1, f0 // Result, if |x| >= 2^52
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p11) fcmp.eq.unc.s1 p12,p13 = FLOOR_FLOAT_INT_f8, FLOOR_NORM_f8
- nop.i 999 ;;
+ nop.m 0
+(p6) fcmp.eq.unc.s1 p8, p9 = fPreResult, fNormX // Is trunc(x) = x ?
+ nop.i 0
}
+;;
-// Set inexact if result not equal to input
{ .mfi
- nop.m 999
-(p13) fmpy.s0 FLOOR_INEXACT = FLOOR_FFFF,FLOOR_FFFF
- nop.i 999
+ nop.m 0
+(p9) fmpy.s0 fTmp = fTmp, fTmp // Dummy to set inexact
+ nop.i 0
}
-// Set result to input if integer
{ .mfb
- nop.m 999
-(p12) fnorm.d f8 = FLOOR_NORM_f8
- br.ret.sptk b0 ;;
+ nop.m 0
+(p8) fma.d.s0 f8 = fNormX, f1, f0 // If x int, result normalized x
+ br.ret.sptk b0 // Exit main path, 0 < |x| < 2^52
}
+;;
+
-// Here if input denorm
-L(FLOOR_DENORM):
+FLOOR_UNORM:
+// Here if x unorm
{ .mfb
- getf.exp floor_GR_signexp = FLOOR_NORM_f8
- fcvt.fx.trunc.s1 FLOOR_INT_f8 = FLOOR_NORM_f8
- br.cond.sptk L(FLOOR_COMMON) ;;
+ getf.exp rSignexp = fNormX // Get signexp, recompute if unorm
+ fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
+ br.cond.sptk FLOOR_COMMON // Return to main path
}
+;;
-.endp floor
-ASM_SIZE_DIRECTIVE(floor)
+GLOBAL_IEEE754_END(floor)
diff --git a/sysdeps/ia64/fpu/s_floorf.S b/sysdeps/ia64/fpu/s_floorf.S
index 15b2bbd31d..a3f2095931 100644
--- a/sysdeps/ia64/fpu/s_floorf.S
+++ b/sysdeps/ia64/fpu/s_floorf.S
@@ -1,10 +1,10 @@
.file "floorf.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,85 +20,67 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
-.align 32
-.global floorf#
-
-.section .text
-.proc floorf#
-.align 32
-
// History
//==============================================================
-// 2/02/00: Initial version
-// 6/13/00: Improved speed
-// 6/27/00: Eliminated incorrect invalid flag setting
-// 2/07/01: Corrected sign of zero result in round to -inf mode
+// 02/02/00 Initial version
+// 06/13/00 Improved speed
+// 06/27/00 Eliminated incorrect invalid flag setting
+// 02/07/01 Corrected sign of zero result in round to -inf mode
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 01/28/03 Improved performance
+//==============================================================
// API
//==============================================================
// float floorf(float x)
+//==============================================================
-// general input registers:
-
-floor_GR_FFFF = r14
-floor_GR_signexp = r15
-floor_GR_exponent = r16
-floor_GR_expmask = r17
-floor_GR_bigexp = r18
-
-
-// predicate registers used:
+// general input registers:
+// r14 - r18
-// p6 ==> Input is NaN, infinity, zero
-// p7 ==> Input is denormal
-// p8 ==> Input is <0
-// p9 ==> Input is >=0
-// p10 ==> Input is already an integer (bigger than largest integer)
-// p11 ==> Input is not a large integer
-// p12 ==> Input is a smaller integer
-// p13 ==> Input is not an even integer, so inexact must be set
+rSignexp = r14
+rExp = r15
+rExpMask = r16
+rBigexp = r17
+rM1 = r18
+// floating-point registers:
+// f8 - f13
-// floating-point registers used:
+fXInt = f9
+fNormX = f10
+fTmp = f11
+fAdj = f12
+fPreResult = f13
-FLOOR_NORM_f8 = f9
-FLOOR_FFFF = f10
-FLOOR_INEXACT = f11
-FLOOR_FLOAT_INT_f8 = f12
-FLOOR_INT_f8 = f13
-FLOOR_adj = f14
+// predicate registers used:
+// p6 - p9
// Overview of operation
//==============================================================
-
// float floorf(float x)
-// Return an integer value (represented as a float) that is the largest
+// Return an integer value (represented as a float) that is the largest
// value not greater than x
// This is x rounded toward -infinity to an integral value.
// Inexact is set if x != floorf(x)
-// **************************************************************************
-
-// Set denormal flag for denormal input and
-// and take denormal fault if necessary.
-
-// Is the input an integer value already?
+//==============================================================
// double_extended
// if the exponent is > 1003e => 3F(true) = 63(decimal)
@@ -119,119 +101,115 @@ FLOOR_adj = f14
// If we multiply by 2^23, we no longer have a fractional part
// So input is an integer value already.
-// If x is NAN, ZERO, or INFINITY, then return
-
-// qnan snan inf norm unorm 0 -+
-// 1 1 1 0 0 1 11 0xe7
-
-#include "libm_support.h"
-floorf:
-#ifdef _LIBC
-.global __floorf
-__floorf:
-#endif
+.section .text
+GLOBAL_IEEE754_ENTRY(floorf)
{ .mfi
- getf.exp floor_GR_signexp = f8
- fcvt.fx.trunc.s1 FLOOR_INT_f8 = f8
- addl floor_GR_bigexp = 0x10016, r0
+ getf.exp rSignexp = f8 // Get signexp, recompute if unorm
+ fclass.m p7,p0 = f8, 0x0b // Test x unorm
+ addl rBigexp = 0x10016, r0 // Set exponent at which is integer
}
{ .mfi
- addl floor_GR_FFFF = -1,r0
- fcmp.lt.s1 p8,p9 = f8,f0
- mov floor_GR_expmask = 0x1FFFF ;;
+ mov rM1 = -1 // Set all ones
+ fcvt.fx.trunc.s1 fXInt = f8 // Convert to int in significand
+ mov rExpMask = 0x1FFFF // Form exponent mask
}
+;;
-// p7 ==> denorm
{ .mfi
- setf.sig FLOOR_FFFF = floor_GR_FFFF
- fclass.m p7,p0 = f8, 0x0b
- nop.i 999
+ nop.m 0
+ fcmp.lt.s1 p8,p9 = f8, f0 // Test x < 0
+ nop.i 0
}
-{ .mfi
- nop.m 999
- fnorm.s1 FLOOR_NORM_f8 = f8
- nop.i 999 ;;
+{ .mfb
+ setf.sig fTmp = rM1 // Make const for setting inexact
+ fnorm.s1 fNormX = f8 // Normalize input
+(p7) br.cond.spnt FLOOR_UNORM // Branch if x unorm
}
+;;
-// p6 ==> NAN, INF, ZERO
-{ .mfb
- nop.m 999
- fclass.m p6,p10 = f8, 0xe7
-(p7) br.cond.spnt L(FLOOR_DENORM) ;;
+FLOOR_COMMON:
+// Return here from FLOOR_UNORM
+{ .mfi
+ nop.m 0
+ fclass.m p6,p0 = f8, 0x1e7 // Test x natval, nan, inf, 0
+ nop.i 0
}
+;;
-L(FLOOR_COMMON):
.pred.rel "mutex",p8,p9
-// Set adjustment to subtract from trunc(x) for result
-// If x<0, adjustment is -1.0
-// If x>=0, adjustment is 0.0
{ .mfi
- and floor_GR_exponent = floor_GR_signexp, floor_GR_expmask
-(p8) fnma.s1 FLOOR_adj = f1,f1,f0
- nop.i 999
+ nop.m 0
+(p8) fnma.s1 fAdj = f1, f1, f0 // If x < 0, adjustment is -1
+ nop.i 0
}
{ .mfi
- nop.m 999
-(p9) fadd.s1 FLOOR_adj = f0,f0
- nop.i 999 ;;
+ nop.m 0
+(p9) fma.s1 fAdj = f0, f0, f0 // If x > 0, adjustment is 0
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
- fcmp.eq.s0 p12,p0 = f8,f0 // Dummy op to set denormal and invalid flag
- nop.i 999
+ nop.m 0
+ fcvt.xf fPreResult = fXInt // trunc(x)
+ nop.i 0
}
-{ .mfi
-(p10) cmp.ge.unc p10,p11 = floor_GR_exponent, floor_GR_bigexp
-(p6) fnorm.s f8 = f8
- nop.i 999 ;;
+{ .mfb
+ nop.m 0
+(p6) fma.s.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf, 0
+(p6) br.ret.spnt b0 // Exit if x natval, nan, inf, 0
}
+;;
-{ .mfi
- nop.m 999
-(p11) fcvt.xf FLOOR_FLOAT_INT_f8 = FLOOR_INT_f8
- nop.i 999 ;;
+{ .mmi
+ and rExp = rSignexp, rExpMask // Get biased exponent
+;;
+ cmp.ge p7,p6 = rExp, rBigexp // Is |x| >= 2^23?
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p10) fnorm.s f8 = FLOOR_NORM_f8
- nop.i 999 ;;
+ nop.m 0
+(p6) fma.s.s0 f8 = fPreResult, f1, fAdj // Result if !int, |x| < 2^23
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p11) fadd.s f8 = FLOOR_FLOAT_INT_f8,FLOOR_adj
- nop.i 999 ;;
+ nop.m 0
+(p7) fma.s.s0 f8 = fNormX, f1, f0 // Result, if |x| >= 2^23
+ nop.i 0
}
+;;
+
{ .mfi
- nop.m 999
-(p11) fcmp.eq.unc.s1 p12,p13 = FLOOR_FLOAT_INT_f8, FLOOR_NORM_f8
- nop.i 999 ;;
+ nop.m 0
+(p6) fcmp.eq.unc.s1 p8, p9 = fPreResult, fNormX // Is trunc(x) = x ?
+ nop.i 0
}
+;;
-// Set inexact if result not equal to input
{ .mfi
- nop.m 999
-(p13) fmpy.s0 FLOOR_INEXACT = FLOOR_FFFF,FLOOR_FFFF
- nop.i 999
+ nop.m 0
+(p9) fmpy.s0 fTmp = fTmp, fTmp // Dummy to set inexact
+ nop.i 0
}
-// Set result to input if integer
{ .mfb
- nop.m 999
-(p12) fnorm.s f8 = FLOOR_NORM_f8
- br.ret.sptk b0 ;;
+ nop.m 0
+(p8) fma.s.s0 f8 = fNormX, f1, f0 // If x int, result normalized x
+ br.ret.sptk b0 // Exit main path, 0 < |x| < 2^23
}
+;;
+
-// Here if input denorm
-L(FLOOR_DENORM):
+FLOOR_UNORM:
+// Here if x unorm
{ .mfb
- getf.exp floor_GR_signexp = FLOOR_NORM_f8
- fcvt.fx.trunc.s1 FLOOR_INT_f8 = FLOOR_NORM_f8
- br.cond.sptk L(FLOOR_COMMON) ;;
+ getf.exp rSignexp = fNormX // Get signexp, recompute if unorm
+ fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
+ br.cond.sptk FLOOR_COMMON // Return to main path
}
+;;
-.endp floorf
-ASM_SIZE_DIRECTIVE(floorf)
+GLOBAL_IEEE754_END(floorf)
diff --git a/sysdeps/ia64/fpu/s_floorl.S b/sysdeps/ia64/fpu/s_floorl.S
index 294578e1a7..345c4f30dd 100644
--- a/sysdeps/ia64/fpu/s_floorl.S
+++ b/sysdeps/ia64/fpu/s_floorl.S
@@ -1,10 +1,10 @@
.file "floorl.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,85 +20,67 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
-.align 32
-.global floorl#
-
-.section .text
-.proc floorl#
-.align 32
-
// History
//==============================================================
-// 2/02/00: Initial version
-// 6/13/00: Improved speed
-// 6/27/00: Eliminated incorrect invalid flag setting
-// 2/07/01: Corrected sign of zero result in round to -inf mode
+// 02/02/00 Initial version
+// 06/13/00 Improved speed
+// 06/27/00 Eliminated incorrect invalid flag setting
+// 02/07/01 Corrected sign of zero result in round to -inf mode
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 01/28/03 Improved performance
+//==============================================================
// API
//==============================================================
// long double floorl(long double x)
+//==============================================================
-// general input registers:
-
-floor_GR_FFFF = r14
-floor_GR_signexp = r15
-floor_GR_exponent = r16
-floor_GR_expmask = r17
-floor_GR_bigexp = r18
-
-
-// predicate registers used:
+// general input registers:
+// r14 - r18
-// p6 ==> Input is NaN, infinity, zero
-// p7 ==> Input is denormal
-// p8 ==> Input is <0
-// p9 ==> Input is >=0
-// p10 ==> Input is already an integer (bigger than largest integer)
-// p11 ==> Input is not a large integer
-// p12 ==> Input is a smaller integer
-// p13 ==> Input is not an even integer, so inexact must be set
+rSignexp = r14
+rExp = r15
+rExpMask = r16
+rBigexp = r17
+rM1 = r18
+// floating-point registers:
+// f8 - f13
-// floating-point registers used:
+fXInt = f9
+fNormX = f10
+fTmp = f11
+fAdj = f12
+fPreResult = f13
-FLOOR_NORM_f8 = f9
-FLOOR_FFFF = f10
-FLOOR_INEXACT = f11
-FLOOR_FLOAT_INT_f8 = f12
-FLOOR_INT_f8 = f13
-FLOOR_adj = f14
+// predicate registers used:
+// p6 - p9
// Overview of operation
//==============================================================
-
// long double floorl(long double x)
-// Return an integer value (represented as a long double) that is the largest
+// Return an integer value (represented as a long double) that is the largest
// value not greater than x
// This is x rounded toward -infinity to an integral value.
// Inexact is set if x != floorl(x)
-// **************************************************************************
-
-// Set denormal flag for denormal input and
-// and take denormal fault if necessary.
-
-// Is the input an integer value already?
+//==============================================================
// double_extended
// if the exponent is > 1003e => 3F(true) = 63(decimal)
@@ -119,119 +101,115 @@ FLOOR_adj = f14
// If we multiply by 2^23, we no longer have a fractional part
// So input is an integer value already.
-// If x is NAN, ZERO, or INFINITY, then return
-
-// qnan snan inf norm unorm 0 -+
-// 1 1 1 0 0 1 11 0xe7
-
-#include "libm_support.h"
-floorl:
-#ifdef _LIBC
-.global __floorl
-__floorl:
-#endif
+.section .text
+GLOBAL_IEEE754_ENTRY(floorl)
{ .mfi
- getf.exp floor_GR_signexp = f8
- fcvt.fx.trunc.s1 FLOOR_INT_f8 = f8
- addl floor_GR_bigexp = 0x1003e, r0
+ getf.exp rSignexp = f8 // Get signexp, recompute if unorm
+ fclass.m p7,p0 = f8, 0x0b // Test x unorm
+ addl rBigexp = 0x1003e, r0 // Set exponent at which is integer
}
{ .mfi
- addl floor_GR_FFFF = -1,r0
- fcmp.lt.s1 p8,p9 = f8,f0
- mov floor_GR_expmask = 0x1FFFF ;;
+ mov rM1 = -1 // Set all ones
+ fcvt.fx.trunc.s1 fXInt = f8 // Convert to int in significand
+ mov rExpMask = 0x1FFFF // Form exponent mask
}
+;;
-// p7 ==> denorm
{ .mfi
- setf.sig FLOOR_FFFF = floor_GR_FFFF
- fclass.m p7,p0 = f8, 0x0b
- nop.i 999
+ nop.m 0
+ fcmp.lt.s1 p8,p9 = f8, f0 // Test x < 0
+ nop.i 0
}
-{ .mfi
- nop.m 999
- fnorm.s1 FLOOR_NORM_f8 = f8
- nop.i 999 ;;
+{ .mfb
+ setf.sig fTmp = rM1 // Make const for setting inexact
+ fnorm.s1 fNormX = f8 // Normalize input
+(p7) br.cond.spnt FLOOR_UNORM // Branch if x unorm
}
+;;
-// p6 ==> NAN, INF, ZERO
-{ .mfb
- nop.m 999
- fclass.m p6,p10 = f8, 0xe7
-(p7) br.cond.spnt L(FLOOR_DENORM) ;;
+FLOOR_COMMON:
+// Return here from FLOOR_UNORM
+{ .mfi
+ nop.m 0
+ fclass.m p6,p0 = f8, 0x1e7 // Test x natval, nan, inf, 0
+ nop.i 0
}
+;;
-L(FLOOR_COMMON):
.pred.rel "mutex",p8,p9
-// Set adjustment to subtract from trunc(x) for result
-// If x<0, adjustment is -1.0
-// If x>=0, adjustment is 0.0
{ .mfi
- and floor_GR_exponent = floor_GR_signexp, floor_GR_expmask
-(p8) fnma.s1 FLOOR_adj = f1,f1,f0
- nop.i 999
+ nop.m 0
+(p8) fnma.s1 fAdj = f1, f1, f0 // If x < 0, adjustment is -1
+ nop.i 0
}
{ .mfi
- nop.m 999
-(p9) fadd.s1 FLOOR_adj = f0,f0
- nop.i 999 ;;
+ nop.m 0
+(p9) fma.s1 fAdj = f0, f0, f0 // If x > 0, adjustment is 0
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
- fcmp.eq.s0 p12,p0 = f8,f0 // Dummy op to set denormal and invalid flag
- nop.i 999
+ nop.m 0
+ fcvt.xf fPreResult = fXInt // trunc(x)
+ nop.i 0
}
-{ .mfi
-(p10) cmp.ge.unc p10,p11 = floor_GR_exponent, floor_GR_bigexp
-(p6) fnorm f8 = f8
- nop.i 999 ;;
+{ .mfb
+ nop.m 0
+(p6) fma.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf, 0
+(p6) br.ret.spnt b0 // Exit if x natval, nan, inf, 0
}
+;;
-{ .mfi
- nop.m 999
-(p11) fcvt.xf FLOOR_FLOAT_INT_f8 = FLOOR_INT_f8
- nop.i 999 ;;
+{ .mmi
+ and rExp = rSignexp, rExpMask // Get biased exponent
+;;
+ cmp.ge p7,p6 = rExp, rBigexp // Is |x| >= 2^63?
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p10) fnorm f8 = FLOOR_NORM_f8
- nop.i 999 ;;
+ nop.m 0
+(p6) fma.s0 f8 = fPreResult, f1, fAdj // Result if !int, |x| < 2^63
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p11) fadd f8 = FLOOR_FLOAT_INT_f8,FLOOR_adj
- nop.i 999 ;;
+ nop.m 0
+(p7) fma.s0 f8 = fNormX, f1, f0 // Result, if |x| >= 2^63
+ nop.i 0
}
+;;
+
{ .mfi
- nop.m 999
-(p11) fcmp.eq.unc.s1 p12,p13 = FLOOR_FLOAT_INT_f8, FLOOR_NORM_f8
- nop.i 999 ;;
+ nop.m 0
+(p6) fcmp.eq.unc.s1 p8, p9 = fPreResult, fNormX // Is trunc(x) = x ?
+ nop.i 0
}
+;;
-// Set inexact if result not equal to input
{ .mfi
- nop.m 999
-(p13) fmpy.s0 FLOOR_INEXACT = FLOOR_FFFF,FLOOR_FFFF
- nop.i 999
+ nop.m 0
+(p9) fmpy.s0 fTmp = fTmp, fTmp // Dummy to set inexact
+ nop.i 0
}
-// Set result to input if integer
{ .mfb
- nop.m 999
-(p12) fnorm f8 = FLOOR_NORM_f8
- br.ret.sptk b0 ;;
+ nop.m 0
+(p8) fma.s0 f8 = fNormX, f1, f0 // If x int, result normalized x
+ br.ret.sptk b0 // Exit main path, 0 < |x| < 2^63
}
+;;
+
-// Here if input denorm
-L(FLOOR_DENORM):
+FLOOR_UNORM:
+// Here if x unorm
{ .mfb
- getf.exp floor_GR_signexp = FLOOR_NORM_f8
- fcvt.fx.trunc.s1 FLOOR_INT_f8 = FLOOR_NORM_f8
- br.cond.sptk L(FLOOR_COMMON) ;;
+ getf.exp rSignexp = fNormX // Get signexp, recompute if unorm
+ fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
+ br.cond.sptk FLOOR_COMMON // Return to main path
}
+;;
-.endp floorl
-ASM_SIZE_DIRECTIVE(floorl)
+GLOBAL_IEEE754_END(floorl)
diff --git a/sysdeps/ia64/fpu/s_frexp.c b/sysdeps/ia64/fpu/s_frexp.c
index 98349bca47..c67500695f 100644
--- a/sysdeps/ia64/fpu/s_frexp.c
+++ b/sysdeps/ia64/fpu/s_frexp.c
@@ -1,8 +1,10 @@
-//
-// Copyright (C) 2000, 2001, Intel Corporation
+/* file: frexp.c */
+
+
+// Copyright (c) 2000-2002, Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// Redistribution and use in source and binary forms, with or without
@@ -19,14 +21,15 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
+
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
@@ -34,22 +37,30 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
//
+// History
+//=====================================================================
+// 2/02/00 Initial version
+// 1/23/02 Calls kernel with parameter to specify 32- or 64-bit int
//
+//=====================================================================
#include "libm_support.h"
+double __libm_frexp(double, int*, int);
+
double frexp(double x, int *y)
{
-#ifdef SIZE_INT_64
- return( __libm_frexp_8(x, y) );
+#ifdef SIZE_INT_64
+ return( __libm_frexp(x, y, 1) );
#else
-#ifdef SIZE_INT_32
- return( _GI___libm_frexp_4(x, y) );
+#ifdef SIZE_INT_32
+ return( __libm_frexp(x, y, 0) );
#endif
#endif
diff --git a/sysdeps/ia64/fpu/s_frexpf.c b/sysdeps/ia64/fpu/s_frexpf.c
index f666304147..c21a21dfba 100644
--- a/sysdeps/ia64/fpu/s_frexpf.c
+++ b/sysdeps/ia64/fpu/s_frexpf.c
@@ -1,8 +1,10 @@
-//
-// Copyright (C) 2000, 2001, Intel Corporation
+/* file: frexpf.c */
+
+
+// Copyright (c) 2000-2002, Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// Redistribution and use in source and binary forms, with or without
@@ -19,14 +21,15 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
+
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
@@ -34,22 +37,30 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
//
+// History
+//=====================================================================
+// 2/02/00 Initial version
+// 1/23/02 Calls kernel with parameter to specify 32- or 64-bit int
//
+//=====================================================================
#include "libm_support.h"
+float __libm_frexpf(float, int*, int);
+
float frexpf(float x, int *y)
{
-#ifdef SIZE_INT_64
- return( __libm_frexp_8f(x, y) );
+#ifdef SIZE_INT_64
+ return( __libm_frexpf(x, y, 1) );
#else
-#ifdef SIZE_INT_32
- return( _GI___libm_frexp_4f(x, y) );
+#ifdef SIZE_INT_32
+ return( __libm_frexpf(x, y, 0) );
#endif
#endif
diff --git a/sysdeps/ia64/fpu/s_frexpl.c b/sysdeps/ia64/fpu/s_frexpl.c
index 3edc971e3f..13d44ab8b5 100644
--- a/sysdeps/ia64/fpu/s_frexpl.c
+++ b/sysdeps/ia64/fpu/s_frexpl.c
@@ -1,8 +1,10 @@
-//
-// Copyright (C) 2000, 2001, Intel Corporation
+/* file: frexpl.c */
+
+
+// Copyright (c) 2000-2002, Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
//
// Redistribution and use in source and binary forms, with or without
@@ -19,14 +21,15 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
+
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
@@ -34,22 +37,30 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
//
+// History
+//=====================================================================
+// 2/02/00 Initial version
+// 1/23/02 Calls kernel with parameter to specify 32- or 64-bit int
//
+//=====================================================================
#include "libm_support.h"
+long double __libm_frexpl(long double, int*, int);
+
long double frexpl(long double x, int *y)
{
-#ifdef SIZE_INT_64
- return( __libm_frexp_8l(x, y) );
+#ifdef SIZE_INT_64
+ return( __libm_frexpl(x, y, 1) );
#else
-#ifdef SIZE_INT_32
- return( _GI___libm_frexp_4l(x, y) );
+#ifdef SIZE_INT_32
+ return( __libm_frexpl(x, y, 0) );
#endif
#endif
diff --git a/sysdeps/ia64/fpu/s_ilogb.S b/sysdeps/ia64/fpu/s_ilogb.S
index 61975dd941..3f2733cabd 100644
--- a/sysdeps/ia64/fpu/s_ilogb.S
+++ b/sysdeps/ia64/fpu/s_ilogb.S
@@ -1,10 +1,10 @@
.file "ilogb.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,234 +20,248 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/03/00 Initial version
-// 5/26/00 Fix bug when x a double-extended denormal;
+// 02/03/00 Initial version
+// 05/26/00 Fix bug when x a double-extended denormal;
// if x=0 call error routine, per C9X
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
-// 1/20/01 Fixed result for x=0, corrected error tag value.
-
-.align 32
-.global ilogb#
-
-.section .text
-.proc ilogb#
-.align 32
-
+// 01/20/01 Fixed result for x=0, corrected error tag value.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 01/20/03 Improved performance
+//
// API
//==============================================================
-// int = ilogb(double)
-
+// int ilogb( double x );
+//
// Overview of operation
//==============================================================
-// ilogb computes log2(x) as an int
+// The ilogb function extracts the exponent of x as an integer
// and returns it in r8
-
-// ilogb is similar to logb but differs in the following ways:
+//
+// ilogb is similar to logb but differs in the following ways:
// +-inf
// ilogb: returns INT_MAX
// logb: returns +inf
-// Nan returns FP_ILOGBNAN (which is either INT_MAX or INT_MIN)
+// Nan returns FP_LOGBNAN (which is either INT_MAX or INT_MIN)
// ilogb: returns INT_MAX (7fffffff)
-// logb: returns QNAN (quieted SNAN)
+// logb: returns QNAN (quietized SNAN)
// 0 returns FP_ILOGB0 (which is either INT_MIN or -INT_MAX)
-// ilogb: returns INT_MIN (80000000)
-// logb: returns -inf
-
+// ilogb: returns -INT_MAX (80000001)
+// logb: returns -inf, raises the divide-by-zero exception,
+// and calls libm_error_support to set domain error
+//
// Registers used
//==============================================================
+// general registers used:
+// r26 -> r39
+// r36 -> r39 used as parameters to error path
+//
+// predicate registers used:
+// p6 -> p10
+// floating-point registers used:
+// f9, f10, f11
+// f8, input
-// general local registers:
-// ar.pfs r32
-// r33 -> r37
-// r38 -> r41 used as parameters to error path
-
-// predicate registers used:
-// p6 - x nan, inf
-// p7 - x 0
-// p8 - x norm, unorm
-// p9 - x unorm
-
-// floating-point registers used:
-// f8 - f10
-
-#include "libm_support.h"
+rExpBias = r26
+rExpMask = r27
+rSignexp_x = r28
+rExp_x = r29
+rIntMax = r30
+rExp_2to64 = r31
GR_SAVE_PFS = r32
+rTrialResult = r33
GR_SAVE_B0 = r34
GR_SAVE_GP = r35
-GR_Parameter_X = r38
-GR_Parameter_Y = r39
-GR_Parameter_RESULT = r40
-GR_Parameter_TAG = r41
-FR_X = f8
-FR_Y = f0
-FR_RESULT = f0
+GR_Parameter_X = r36
+GR_Parameter_Y = r37
+GR_Parameter_RESULT = r38
+GR_Parameter_TAG = r39
+fTmp = f9
+fNorm_x = f10
+f2to64 = f11
-ilogb:
+.section .text
+GLOBAL_LIBM_ENTRY(ilogb)
-// Form signexp of 2^64 in case need to scale denormal
-{ .mmf
- alloc r32=ar.pfs,1,5,4,0
-(p0) mov r37 = 0x1003f
-(p0) fnorm f9 = f8 ;;
+// X NORMAL
+// TrueExp_x = exp(f8) - 0xffff
+// r8 = TrueExp_x
+{ .mfi
+ getf.exp rSignexp_x = f8
+ fclass.m p8,p0 = f8, 0x0b // Test for x unorm
+ mov rExpBias = 0xffff // Exponent bias
}
-
-// Form 2^64 in case need to scale denormal
{ .mfi
-(p0) setf.exp f10 = r37
-(p0) fclass.m.unc p7, p8 = f8, 0xe3
-(p0) mov r34 = 0xffff ;;
+ nop.m 0
+ fnorm.s1 fNorm_x = f8
+ mov rExpMask = 0x1ffff // Exponent mask
}
+;;
-// qnan snan inf norm unorm 0 -+
-// 1 1 1 0 0 0 11
-// e 3
-// X ZERO, returns INT_MIN
-// X INF or NAN, returns INT_MAX
+// Form signexp of 2^64 in case need to scale denormal
+{ .mfb
+ mov rExp_2to64 = 0x1003f
+ fclass.m p6,p9 = f8, 0x1e3 // Test x natval, nan, inf
+(p8) br.cond.spnt ILOGB_DENORM // Branch if x unorm
+}
+;;
+ILOGB_COMMON:
+// Return here from ILOGB_DENORM
{ .mfi
-(p0) mov r35 = 0x1ffff
-(p8) fclass.m.unc p6, p8 = f8, 0x07
- nop.i 999 ;;
+ and rExp_x = rSignexp_x, rExpMask // Get biased exponent
+ fclass.m p7,p10 = f8, 0x07 // Test x zero
+ nop.i 0
}
{ .mlx
- nop.m 999
-(p7) movl r8 = 0x000000007fffffff ;;
+ nop.m 0
+ movl rIntMax = 0x000000007fffffff // Form INT_MAX
}
+;;
-{ .mib
- nop.m 999
- nop.i 999
-(p6) br.cond.spnt L(ILOGB_ZERO) ;;
-}
-
-// Test for denormal
+.pred.rel "mutex",p6,p9
{ .mfi
- nop.m 999
-(p8) fclass.m.unc p9, p0 = f9, 0x0b
- nop.i 999 ;;
+(p9) sub r8 = rExp_x, rExpBias // Get true exponent for normal path
+(p6) fma.s0 fTmp = f8, f8, f0 // Dummy to set Invalid flag
+(p6) mov r8 = rIntMax // If nan, inf, return INT_MAX
+}
+{ .mbb
+ nop.m 0
+(p7) br.cond.spnt ILOGB_ZERO // Branch if x zero
+(p10) br.ret.sptk b0 // Exit if x not zero
}
+;;
-L(ILOGB_COMMON):
-// X NORMAL returns true exponent
-{ .mmi
- nop.m 999
-(p8) getf.exp r33 = f9
- nop.i 999 ;;
+
+ILOGB_DENORM:
+// Form 2^64 in case need to scale denormal
+// Check to see if double-extended denormal
+{ .mfi
+ setf.exp f2to64 = rExp_2to64
+ fclass.m p8,p0 = fNorm_x, 0x0b
+ nop.i 0
}
+;;
-// If denormal add 64 to exponent bias for scaling
-{ .mfb
-(p9) add r34 = 64, r34
- nop.f 999
-(p9) br.cond.spnt L(ILOGB_DENORM) ;;
+{ .mfi
+ nop.m 0
+ fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
+ nop.i 0
}
+;;
-{ .mmi
-(p8) and r36 = r35, r33
- nop.m 999
- nop.i 999 ;;
+// If double-extended denormal add 64 to exponent bias for scaling
+// If double-extended denormal form x * 2^64 which is normal
+{ .mfi
+(p8) add rExpBias = 64, rExpBias
+(p8) fmpy.s1 fNorm_x = fNorm_x, f2to64
+ nop.i 0
}
+;;
+// Logic is the same as normal path but use normalized input
{ .mib
-(p8) sub r8 = r36, r34
- nop.i 999
-(p0) br.ret.sptk b0 ;;
+ getf.exp rSignexp_x = fNorm_x
+ nop.i 0
+ br.cond.sptk ILOGB_COMMON // Return to main path
}
+;;
-L(ILOGB_DENORM):
-// Here if x denormal
-// Form x * 2^64 which is normal
-// Return to common code
-{ .mfb
- cmp.eq p8,p9 = r0,r0
- fmpy f9 = f9, f10
- br.cond.sptk L(ILOGB_COMMON) ;;
+ILOGB_ZERO:
+// Here if x zero
+// Return INT_MIN, call error support
+
+{ .mlx
+ alloc r32=ar.pfs,1,3,4,0
+ movl rTrialResult = 0x0000000080000000
+}
+{ .mib
+ mov GR_Parameter_TAG = 157 // Error code
+ nop.i 0
+ br.cond.sptk __libm_error_region // Call error support
}
+;;
-// X ZERO
-// return INT_MIN, call error support
-L(ILOGB_ZERO):
-{.mlx
- mov GR_Parameter_TAG = 157
-(p6) movl r33 = 0x0000000080000000 ;;
-};;
-.endp ilogb
-ASM_SIZE_DIRECTIVE(ilogb)
+GLOBAL_LIBM_END(ilogb)
-.proc __libm_error_region
-__libm_error_region:
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
+
{ .mfi
- add GR_Parameter_Y=-32,sp // Parameter 2 value
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
- add sp=-64,sp // Create new stack
+ add sp=-64,sp // Create new stack
nop.f 0
- mov GR_SAVE_GP=gp // Save gp
+ mov GR_SAVE_GP=gp // Save gp
};;
+
{ .mmi
- stfd [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
- add GR_Parameter_X = 16,sp // Parameter 1 address
+ stfd [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0 // Save b0
+ mov GR_SAVE_B0=b0 // Save b0
};;
+
.body
{ .mib
- stfd [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
- add GR_Parameter_RESULT = 0,GR_Parameter_Y
- nop.b 0 // Parameter 3 address
+ stfd [GR_Parameter_X] = f8 // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
}
{ .mib
- stfd [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ stfd [GR_Parameter_Y] = f9 // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support# // Call error handling function
+ br.call.sptk b0=__libm_error_support# // Call error handling function
};;
+
{ .mmi
- nop.m 0
- nop.m 0
add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
};;
+
{ .mmi
- mov r8 = r33 // Store result
+ mov r8 = rTrialResult
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
+
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
- br.ret.sptk b0 // Return
+ br.ret.sptk b0
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region)
+
.type __libm_error_support#,@function
.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_ilogbf.S b/sysdeps/ia64/fpu/s_ilogbf.S
index ffa6d3b672..1b6ade6148 100644
--- a/sysdeps/ia64/fpu/s_ilogbf.S
+++ b/sysdeps/ia64/fpu/s_ilogbf.S
@@ -1,10 +1,10 @@
.file "ilogbf.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,234 +20,248 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/03/00 Initial version
-// 5/26/00 Fix bug when x a double-extended denormal;
+// 02/03/00 Initial version
+// 05/26/00 Fix bug when x a double-extended denormal;
// if x=0 call error routine, per C9X
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
-// 1/20/01 Fixed result for x=0
-
-.align 32
-.global ilogbf#
-
-.section .text
-.proc ilogbf#
-.align 32
-
+// 01/20/01 Fixed result for x=0
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 01/20/03 Improved performance
+//
// API
//==============================================================
-// int = ilogbf(float)
-
+// int ilogbf( float x );
+//
// Overview of operation
//==============================================================
-// ilogbf computes log2(x) as an int
+// The ilogbf function extracts the exponent of x as an integer
// and returns it in r8
-
-// ilogbf is similar to logbf but differs in the following ways:
+//
+// ilogbf is similar to logbf but differs in the following ways:
// +-inf
// ilogbf: returns INT_MAX
// logbf: returns +inf
-// Nan returns FP_ILOGBNAN (which is either INT_MAX or INT_MIN)
+// Nan returns FP_LOGBNAN (which is either INT_MAX or INT_MIN)
// ilogbf: returns INT_MAX (7fffffff)
-// logbf: returns QNAN (quieted SNAN)
+// logbf: returns QNAN (quietized SNAN)
// 0 returns FP_ILOGB0 (which is either INT_MIN or -INT_MAX)
-// ilogbf: returns INT_MIN (80000000)
-// logbf: returns -inf
-
+// ilogbf: returns -INT_MAX (80000001)
+// logbf: returns -inf, raises the divide-by-zero exception,
+// and calls libm_error_support to set domain error
+//
// Registers used
//==============================================================
+// general registers used:
+// r26 -> r39
+// r36 -> r39 used as parameters to error path
+//
+// predicate registers used:
+// p6 -> p10
+// floating-point registers used:
+// f9, f10, f11
+// f8, input
-// general local registers:
-// ar.pfs r32
-// r33 -> r37
-// r38 -> r41 used as parameters to error path
-
-// predicate registers used:
-// p6 - x nan, inf
-// p7 - x 0
-// p8 - x norm, unorm
-// p9 - x unorm
-
-// floating-point registers used:
-// f8 - f10
-
-#include "libm_support.h"
+rExpBias = r26
+rExpMask = r27
+rSignexp_x = r28
+rExp_x = r29
+rIntMax = r30
+rExp_2to64 = r31
GR_SAVE_PFS = r32
+rTrialResult = r33
GR_SAVE_B0 = r34
GR_SAVE_GP = r35
-GR_Parameter_X = r38
-GR_Parameter_Y = r39
-GR_Parameter_RESULT = r40
-GR_Parameter_TAG = r41
-FR_X = f8
-FR_Y = f0
-FR_RESULT = f0
+GR_Parameter_X = r36
+GR_Parameter_Y = r37
+GR_Parameter_RESULT = r38
+GR_Parameter_TAG = r39
+fTmp = f9
+fNorm_x = f10
+f2to64 = f11
-ilogbf:
+.section .text
+GLOBAL_LIBM_ENTRY(ilogbf)
-// Form signexp of 2^64 in case need to scale denormal
-{ .mmf
- alloc r32=ar.pfs,1,5,4,0
-(p0) mov r37 = 0x1003f
-(p0) fnorm f9 = f8 ;;
+// X NORMAL
+// TrueExp_x = exp(f8) - 0xffff
+// r8 = TrueExp_x
+{ .mfi
+ getf.exp rSignexp_x = f8
+ fclass.m p8,p0 = f8, 0x0b // Test for x unorm
+ mov rExpBias = 0xffff // Exponent bias
}
-
-// Form 2^64 in case need to scale denormal
{ .mfi
-(p0) setf.exp f10 = r37
-(p0) fclass.m.unc p7, p8 = f8, 0xe3
-(p0) mov r34 = 0xffff ;;
+ nop.m 0
+ fnorm.s1 fNorm_x = f8
+ mov rExpMask = 0x1ffff // Exponent mask
}
+;;
-// qnan snan inf norm unorm 0 -+
-// 1 1 1 0 0 0 11
-// e 3
-// X ZERO, returns INT_MIN
-// X INF or NAN, returns INT_MAX
+// Form signexp of 2^64 in case need to scale denormal
+{ .mfb
+ mov rExp_2to64 = 0x1003f
+ fclass.m p6,p9 = f8, 0x1e3 // Test x natval, nan, inf
+(p8) br.cond.spnt ILOGB_DENORM // Branch if x unorm
+}
+;;
+ILOGB_COMMON:
+// Return here from ILOGB_DENORM
{ .mfi
-(p0) mov r35 = 0x1ffff
-(p8) fclass.m.unc p6, p8 = f8, 0x07
- nop.i 999 ;;
+ and rExp_x = rSignexp_x, rExpMask // Get biased exponent
+ fclass.m p7,p10 = f8, 0x07 // Test x zero
+ nop.i 0
}
{ .mlx
- nop.m 999
-(p7) movl r8 = 0x000000007fffffff ;;
+ nop.m 0
+ movl rIntMax = 0x000000007fffffff // Form INT_MAX
}
+;;
-{ .mib
- nop.m 999
- nop.i 999
-(p6) br.cond.spnt L(ILOGB_ZERO) ;;
-}
-
-// Test for denormal
+.pred.rel "mutex",p6,p9
{ .mfi
- nop.m 999
-(p8) fclass.m.unc p9, p0 = f9, 0x0b
- nop.i 999 ;;
+(p9) sub r8 = rExp_x, rExpBias // Get true exponent for normal path
+(p6) fma.s0 fTmp = f8, f8, f0 // Dummy to set Invalid flag
+(p6) mov r8 = rIntMax // If nan, inf, return INT_MAX
+}
+{ .mbb
+ nop.m 0
+(p7) br.cond.spnt ILOGB_ZERO // Branch if x zero
+(p10) br.ret.sptk b0 // Exit if x not zero
}
+;;
-L(ILOGB_COMMON):
-// X NORMAL returns true exponent
-{ .mmi
- nop.m 999
-(p8) getf.exp r33 = f9
- nop.i 999 ;;
+
+ILOGB_DENORM:
+// Form 2^64 in case need to scale denormal
+// Check to see if double-extended denormal
+{ .mfi
+ setf.exp f2to64 = rExp_2to64
+ fclass.m p8,p0 = fNorm_x, 0x0b
+ nop.i 0
}
+;;
-// If denormal add 64 to exponent bias for scaling
-{ .mfb
-(p9) add r34 = 64, r34
- nop.f 999
-(p9) br.cond.spnt L(ILOGB_DENORM) ;;
+{ .mfi
+ nop.m 0
+ fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
+ nop.i 0
}
+;;
-{ .mmi
-(p8) and r36 = r35, r33
- nop.m 999
- nop.i 999 ;;
+// If double-extended denormal add 64 to exponent bias for scaling
+// If double-extended denormal form x * 2^64 which is normal
+{ .mfi
+(p8) add rExpBias = 64, rExpBias
+(p8) fmpy.s1 fNorm_x = fNorm_x, f2to64
+ nop.i 0
}
+;;
+// Logic is the same as normal path but use normalized input
{ .mib
-(p8) sub r8 = r36, r34
- nop.i 999
-(p0) br.ret.sptk b0 ;;
+ getf.exp rSignexp_x = fNorm_x
+ nop.i 0
+ br.cond.sptk ILOGB_COMMON // Return to main path
}
+;;
-L(ILOGB_DENORM):
-// Here if x denormal
-// Form x * 2^64 which is normal
-// Return to common code
-{ .mfb
- cmp.eq p8,p9 = r0,r0
- fmpy f9 = f9, f10
- br.cond.sptk L(ILOGB_COMMON) ;;
+ILOGB_ZERO:
+// Here if x zero
+// Return INT_MIN, call error support
+
+{ .mlx
+ alloc r32=ar.pfs,1,3,4,0
+ movl rTrialResult = 0x0000000080000000
+}
+{ .mib
+ mov GR_Parameter_TAG = 158 // Error code
+ nop.i 0
+ br.cond.sptk __libm_error_region // Call error support
}
+;;
-// X ZERO
-// return INT_MIN, call error support
-L(ILOGB_ZERO):
-{.mlx
- mov GR_Parameter_TAG = 158
-(p6) movl r33 = 0x0000000080000000 ;;
-};;
-.endp ilogbf
-ASM_SIZE_DIRECTIVE(ilogbf)
+GLOBAL_LIBM_END(ilogbf)
-.proc __libm_error_region
-__libm_error_region:
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
+
{ .mfi
- add GR_Parameter_Y=-32,sp // Parameter 2 value
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
- add sp=-64,sp // Create new stack
+ add sp=-64,sp // Create new stack
nop.f 0
- mov GR_SAVE_GP=gp // Save gp
+ mov GR_SAVE_GP=gp // Save gp
};;
+
{ .mmi
- stfs [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
- add GR_Parameter_X = 16,sp // Parameter 1 address
+ stfs [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0 // Save b0
+ mov GR_SAVE_B0=b0 // Save b0
};;
+
.body
{ .mib
- stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
- add GR_Parameter_RESULT = 0,GR_Parameter_Y
- nop.b 0 // Parameter 3 address
+ stfs [GR_Parameter_X] = f8 // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
}
{ .mib
- stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ stfs [GR_Parameter_Y] = f9 // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support# // Call error handling function
+ br.call.sptk b0=__libm_error_support# // Call error handling function
};;
+
{ .mmi
- nop.m 0
- nop.m 0
add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
};;
+
{ .mmi
- mov r8 = r33 // Store result
+ mov r8 = rTrialResult
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
+
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
- br.ret.sptk b0 // Return
+ br.ret.sptk b0
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region)
+
.type __libm_error_support#,@function
.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_ilogbl.S b/sysdeps/ia64/fpu/s_ilogbl.S
index 240da060bf..e462fb706e 100644
--- a/sysdeps/ia64/fpu/s_ilogbl.S
+++ b/sysdeps/ia64/fpu/s_ilogbl.S
@@ -1,10 +1,10 @@
.file "ilogbl.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,234 +20,248 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/03/00 Initial version
-// 5/26/00 Fix bug when x a double-extended denormal;
+// 02/03/00 Initial version
+// 05/26/00 Fix bug when x a double-extended denormal;
// if x=0 call error routine, per C9X
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
-// 1/20/01 Fixed result for x=0
-
-.align 32
-.global ilogbl#
-
-.section .text
-.proc ilogbl#
-.align 32
-
+// 01/20/01 Fixed result for x=0
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 01/20/03 Improved performance
+//
// API
//==============================================================
-// int = ilogbl(double_extended)
-
+// int ilogbl( long double x );
+//
// Overview of operation
//==============================================================
-// ilogbl computes log2(x) as an int
+// The ilogbl function extracts the exponent of x as an integer
// and returns it in r8
-
-// ilogbl is similar to logbl but differs in the following ways:
+//
+// ilogbl is similar to logbl but differs in the following ways:
// +-inf
// ilogbl: returns INT_MAX
// logbl: returns +inf
-// Nan returns FP_ILOGBNAN (which is either INT_MAX or INT_MIN)
+// Nan returns FP_LOGBNAN (which is either INT_MAX or INT_MIN)
// ilogbl: returns INT_MAX (7fffffff)
-// logbl: returns QNAN (quieted SNAN)
+// logbl: returns QNAN (quietized SNAN)
// 0 returns FP_ILOGB0 (which is either INT_MIN or -INT_MAX)
-// ilogbl: returns INT_MIN (80000000)
-// logbl: returns -inf
-
+// ilogbl: returns -INT_MAX (80000001)
+// logbl: returns -inf, raises the divide-by-zero exception,
+// and calls libm_error_support to set domain error
+//
// Registers used
//==============================================================
+// general registers used:
+// r26 -> r39
+// r36 -> r39 used as parameters to error path
+//
+// predicate registers used:
+// p6 -> p10
+// floating-point registers used:
+// f9, f10, f11
+// f8, input
-// general local registers:
-// ar.pfs r32
-// r33 -> r37
-// r38 -> r41 used as parameters to error path
-
-// predicate registers used:
-// p6 - x nan, inf
-// p7 - x 0
-// p8 - x norm, unorm
-// p9 - x unorm
-
-// floating-point registers used:
-// f8 - f10
-
-#include "libm_support.h"
+rExpBias = r26
+rExpMask = r27
+rSignexp_x = r28
+rExp_x = r29
+rIntMax = r30
+rExp_2to64 = r31
GR_SAVE_PFS = r32
+rTrialResult = r33
GR_SAVE_B0 = r34
GR_SAVE_GP = r35
-GR_Parameter_X = r38
-GR_Parameter_Y = r39
-GR_Parameter_RESULT = r40
-GR_Parameter_TAG = r41
-FR_X = f8
-FR_Y = f0
-FR_RESULT = f0
+GR_Parameter_X = r36
+GR_Parameter_Y = r37
+GR_Parameter_RESULT = r38
+GR_Parameter_TAG = r39
+fTmp = f9
+fNorm_x = f10
+f2to64 = f11
-ilogbl:
+.section .text
+GLOBAL_LIBM_ENTRY(ilogbl)
-// Form signexp of 2^64 in case need to scale denormal
-{ .mmf
- alloc r32=ar.pfs,1,5,4,0
-(p0) mov r37 = 0x1003f
-(p0) fnorm f9 = f8 ;;
+// X NORMAL
+// TrueExp_x = exp(f8) - 0xffff
+// r8 = TrueExp_x
+{ .mfi
+ getf.exp rSignexp_x = f8
+ fclass.m p8,p0 = f8, 0x0b // Test for x unorm
+ mov rExpBias = 0xffff // Exponent bias
}
-
-// Form 2^64 in case need to scale denormal
{ .mfi
-(p0) setf.exp f10 = r37
-(p0) fclass.m.unc p7, p8 = f8, 0xe3
-(p0) mov r34 = 0xffff ;;
+ nop.m 0
+ fnorm.s1 fNorm_x = f8
+ mov rExpMask = 0x1ffff // Exponent mask
}
+;;
-// qnan snan inf norm unorm 0 -+
-// 1 1 1 0 0 0 11
-// e 3
-// X ZERO, returns INT_MIN
-// X INF or NAN, returns INT_MAX
+// Form signexp of 2^64 in case need to scale denormal
+{ .mfb
+ mov rExp_2to64 = 0x1003f
+ fclass.m p6,p9 = f8, 0x1e3 // Test x natval, nan, inf
+(p8) br.cond.spnt ILOGB_DENORM // Branch if x unorm
+}
+;;
+ILOGB_COMMON:
+// Return here from ILOGB_DENORM
{ .mfi
-(p0) mov r35 = 0x1ffff
-(p8) fclass.m.unc p6, p8 = f8, 0x07
- nop.i 999 ;;
+ and rExp_x = rSignexp_x, rExpMask // Get biased exponent
+ fclass.m p7,p10 = f8, 0x07 // Test x zero
+ nop.i 0
}
{ .mlx
- nop.m 999
-(p7) movl r8 = 0x000000007fffffff ;;
+ nop.m 0
+ movl rIntMax = 0x000000007fffffff // Form INT_MAX
}
+;;
-{ .mib
- nop.m 999
- nop.i 999
-(p6) br.cond.spnt L(ILOGB_ZERO) ;;
-}
-
-// Test for denormal
+.pred.rel "mutex",p6,p9
{ .mfi
- nop.m 999
-(p8) fclass.m.unc p9, p0 = f9, 0x0b
- nop.i 999 ;;
+(p9) sub r8 = rExp_x, rExpBias // Get true exponent for normal path
+(p6) fma.s0 fTmp = f8, f8, f0 // Dummy to set Invalid flag
+(p6) mov r8 = rIntMax // If nan, inf, return INT_MAX
+}
+{ .mbb
+ nop.m 0
+(p7) br.cond.spnt ILOGB_ZERO // Branch if x zero
+(p10) br.ret.sptk b0 // Exit if x not zero
}
+;;
-L(ILOGB_COMMON):
-// X NORMAL returns true exponent
-{ .mmi
- nop.m 999
-(p8) getf.exp r33 = f9
- nop.i 999 ;;
+
+ILOGB_DENORM:
+// Form 2^64 in case need to scale denormal
+// Check to see if double-extended denormal
+{ .mfi
+ setf.exp f2to64 = rExp_2to64
+ fclass.m p8,p0 = fNorm_x, 0x0b
+ nop.i 0
}
+;;
-// If denormal add 64 to exponent bias for scaling
-{ .mfb
-(p9) add r34 = 64, r34
- nop.f 999
-(p9) br.cond.spnt L(ILOGB_DENORM) ;;
+{ .mfi
+ nop.m 0
+ fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
+ nop.i 0
}
+;;
-{ .mmi
-(p8) and r36 = r35, r33
- nop.m 999
- nop.i 999 ;;
+// If double-extended denormal add 64 to exponent bias for scaling
+// If double-extended denormal form x * 2^64 which is normal
+{ .mfi
+(p8) add rExpBias = 64, rExpBias
+(p8) fmpy.s1 fNorm_x = fNorm_x, f2to64
+ nop.i 0
}
+;;
+// Logic is the same as normal path but use normalized input
{ .mib
-(p8) sub r8 = r36, r34
- nop.i 999
-(p0) br.ret.sptk b0 ;;
+ getf.exp rSignexp_x = fNorm_x
+ nop.i 0
+ br.cond.sptk ILOGB_COMMON // Return to main path
}
+;;
-L(ILOGB_DENORM):
-// Here if x denormal
-// Form x * 2^64 which is normal
-// Return to common code
-{ .mfb
- cmp.eq p8,p9 = r0,r0
- fmpy f9 = f9, f10
- br.cond.sptk L(ILOGB_COMMON) ;;
+ILOGB_ZERO:
+// Here if x zero
+// Return INT_MIN, call error support
+
+{ .mlx
+ alloc r32=ar.pfs,1,3,4,0
+ movl rTrialResult = 0x0000000080000000
+}
+{ .mib
+ mov GR_Parameter_TAG = 156 // Error code
+ nop.i 0
+ br.cond.sptk __libm_error_region // Call error support
}
+;;
-// X ZERO
-// return INT_MIN, call error support
-L(ILOGB_ZERO):
-{.mlx
- mov GR_Parameter_TAG = 156
-(p6) movl r33 = 0x0000000080000000 ;;
-};;
-.endp ilogbl
-ASM_SIZE_DIRECTIVE(ilogbl)
+GLOBAL_LIBM_END(ilogbl)
-.proc __libm_error_region
-__libm_error_region:
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
+
{ .mfi
- add GR_Parameter_Y=-32,sp // Parameter 2 value
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
- add sp=-64,sp // Create new stack
+ add sp=-64,sp // Create new stack
nop.f 0
- mov GR_SAVE_GP=gp // Save gp
+ mov GR_SAVE_GP=gp // Save gp
};;
+
{ .mmi
- stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
- add GR_Parameter_X = 16,sp // Parameter 1 address
+ stfe [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0 // Save b0
+ mov GR_SAVE_B0=b0 // Save b0
};;
+
.body
{ .mib
- stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
- add GR_Parameter_RESULT = 0,GR_Parameter_Y
- nop.b 0 // Parameter 3 address
+ stfe [GR_Parameter_X] = f8 // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
}
{ .mib
- stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ stfe [GR_Parameter_Y] = f9 // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support# // Call error handling function
+ br.call.sptk b0=__libm_error_support# // Call error handling function
};;
+
{ .mmi
- nop.m 0
- nop.m 0
add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
};;
+
{ .mmi
- mov r8 = r33 // Store result
+ mov r8 = rTrialResult
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
+
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
- br.ret.sptk b0 // Return
+ br.ret.sptk b0
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region)
+
.type __libm_error_support#,@function
.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_ldexp.S b/sysdeps/ia64/fpu/s_ldexp.S
deleted file mode 100644
index 4dcd671c9f..0000000000
--- a/sysdeps/ia64/fpu/s_ldexp.S
+++ /dev/null
@@ -1,380 +0,0 @@
-.file "ldexp.s"
-
-// Copyright (C) 2000, 2001, Intel Corporation
-// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// * Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// * The name of Intel Corporation may not be used to endorse or promote
-// products derived from this software without specific prior written
-// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
-//
-// History
-//==============================================================
-// 2/02/00 Initial version
-// 1/26/01 ldex pcompletely reworked and now standalone version
-//
-// API
-//==============================================================
-// double = ldexp (double x, int n)
-// input floating point f8 and int n (r33)
-// output floating point f8
-//
-// Returns x* 2**n using an fma and detects overflow
-// and underflow.
-//
-//
-
-#include "libm_support.h"
-
-FR_Big = f6
-FR_NBig = f7
-FR_Floating_X = f8
-FR_Result = f8
-FR_Result2 = f9
-FR_Result3 = f11
-FR_Norm_X = f12
-FR_Two_N = f14
-FR_Two_to_Big = f15
-
-GR_N_Biased = r15
-GR_Big = r16
-GR_NBig = r17
-GR_Scratch = r18
-GR_Scratch1 = r19
-GR_Bias = r20
-GR_N_as_int = r21
-
-GR_SAVE_B0 = r32
-GR_SAVE_GP = r33
-GR_SAVE_PFS = r34
-GR_Parameter_X = r35
-GR_Parameter_Y = r36
-GR_Parameter_RESULT = r37
-GR_Tag = r38
-
-.align 32
-.global ldexp
-
-.section .text
-.proc ldexp
-.align 32
-
-ldexp:
-
-//
-// Is x NAN, INF, ZERO, +-?
-// Build the exponent Bias
-//
-{ .mfi
- alloc r32=ar.pfs,1,2,4,0
- fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
- addl GR_Bias = 0x0FFFF,r0
-}
-
-//
-// Sign extend input
-// Is N zero?
-// Normalize x
-//
-{ .mfi
- cmp.eq.unc p6,p0 = r33,r0
- fnorm.s1 FR_Norm_X = FR_Floating_X
- sxt4 GR_N_as_int = r33
-}
-;;
-
-//
-// Normalize x
-// Branch and return special values.
-// Create -35000
-// Create 35000
-//
-{ .mfi
- addl GR_Big = 35000,r0
- nop.f 0
- add GR_N_Biased = GR_Bias,GR_N_as_int
-}
-{ .mfb
- addl GR_NBig = -35000,r0
-(p7) fma.d.s0 FR_Result = FR_Floating_X,f1, f0
-(p7) br.ret.spnt b0
-};;
-
-//
-// Build the exponent Bias
-// Return x when N = 0
-//
-{ .mfi
- setf.exp FR_Two_N = GR_N_Biased
- nop.f 0
- addl GR_Scratch1 = 0x063BF,r0
-}
-{ .mfb
- addl GR_Scratch = 0x019C3F,r0
-(p6) fma.d.s0 FR_Result = FR_Floating_X,f1, f0
-(p6) br.ret.spnt b0
-};;
-
-//
-// Create 2*big
-// Create 2**-big
-// Is N > 35000
-// Is N < -35000
-// Raise Denormal operand flag with compare
-// Main path, create 2**N
-//
-{ .mfi
- setf.exp FR_NBig = GR_Scratch1
- nop.f 0
- cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
-}
-{ .mfi
- setf.exp FR_Big = GR_Scratch
- fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
- cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
-};;
-
-//
-// Adjust 2**N if N was very small or very large
-//
-{ .mfi
- nop.m 0
-(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
- nop.i 0
-}
-{ .mlx
- nop.m 999
-(p0) movl GR_Scratch = 0x00000000000303FF
-};;
-
-
-{ .mfi
- nop.m 0
-(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
- nop.i 0
-}
-{ .mlx
- nop.m 999
-(p0) movl GR_Scratch1= 0x00000000000103FF
-};;
-
-// Set up necessary status fields
-//
-// S0 user supplied status
-// S2 user supplied status + WRE + TD (Overflows)
-// S3 user supplied status + FZ + TD (Underflows)
-//
-{ .mfi
- nop.m 999
-(p0) fsetc.s3 0x7F,0x41
- nop.i 999
-}
-{ .mfi
- nop.m 999
-(p0) fsetc.s2 0x7F,0x42
- nop.i 999
-};;
-
-//
-// Do final operation
-//
-{ .mfi
- setf.exp FR_NBig = GR_Scratch
- fma.d.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.d.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
-{ .mfi
- setf.exp FR_Big = GR_Scratch1
- fma.d.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
-
-//
-// Check for overflow or underflow.
-// Restore s3
-// Restore s2
-//
-{ .mfi
- nop.m 0
- fsetc.s3 0x7F,0x40
- nop.i 999
-}
-{ .mfi
- nop.m 0
- fsetc.s2 0x7F,0x40
- nop.i 999
-};;
-
-//
-// Is the result zero?
-//
-{ .mfi
- nop.m 999
- fclass.m.unc p6, p0 = FR_Result3, 0x007
- nop.i 999
-}
-{ .mfi
- addl GR_Tag = 146, r0
- fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
- nop.i 0
-};;
-
-//
-// Detect masked underflow - Tiny + Inexact Only
-//
-{ .mfi
- nop.m 999
-(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
- nop.i 999
-};;
-
-//
-// Is result bigger the allowed range?
-// Branch out for underflow
-//
-{ .mfb
-(p6) addl GR_Tag = 147, r0
-(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
-(p6) br.cond.spnt L(LDEXP_UNDERFLOW)
-};;
-
-//
-// Branch out for overflow
-//
-{ .mbb
- nop.m 0
-(p7) br.cond.spnt L(LDEXP_OVERFLOW)
-(p9) br.cond.spnt L(LDEXP_OVERFLOW)
-};;
-
-//
-// Return from main path.
-//
-{ .mfb
- nop.m 999
- nop.f 0
- br.ret.sptk b0;;
-}
-
-.endp ldexp
-ASM_SIZE_DIRECTIVE(ldexp)
-.proc __libm_error_region
-__libm_error_region:
-
-L(LDEXP_OVERFLOW):
-L(LDEXP_UNDERFLOW):
-
-//
-// Get stack address of N
-//
-.prologue
-{ .mfi
- add GR_Parameter_Y=-32,sp
- nop.f 0
-.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs
-}
-//
-// Adjust sp
-//
-{ .mfi
-.fframe 64
- add sp=-64,sp
- nop.f 0
- mov GR_SAVE_GP=gp
-};;
-
-//
-// Store N on stack in correct position
-// Locate the address of x on stack
-//
-{ .mmi
- st8 [GR_Parameter_Y] = GR_N_as_int,16
- add GR_Parameter_X = 16,sp
-.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0
-};;
-
-//
-// Store x on the stack.
-// Get address for result on stack.
-//
-.body
-{ .mib
- stfd [GR_Parameter_X] = FR_Norm_X
- add GR_Parameter_RESULT = 0,GR_Parameter_Y
- nop.b 0
-}
-{ .mib
- stfd [GR_Parameter_Y] = FR_Result
- add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support#
-};;
-
-//
-// Get location of result on stack
-//
-{ .mmi
- nop.m 0
- nop.m 0
- add GR_Parameter_RESULT = 48,sp
-};;
-
-//
-// Get the new result
-//
-{ .mmi
- ldfd FR_Result = [GR_Parameter_RESULT]
-.restore sp
- add sp = 64,sp
- mov b0 = GR_SAVE_B0
-};;
-
-//
-// Restore gp, ar.pfs and return
-//
-{ .mib
- mov gp = GR_SAVE_GP
- mov ar.pfs = GR_SAVE_PFS
- br.ret.sptk b0
-};;
-
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
-
-.type __libm_error_support#,@function
-.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_ldexpf.S b/sysdeps/ia64/fpu/s_ldexpf.S
deleted file mode 100644
index 36f0111fe1..0000000000
--- a/sysdeps/ia64/fpu/s_ldexpf.S
+++ /dev/null
@@ -1,379 +0,0 @@
-//.file "ldexpf.s"
-
-// Copyright (C) 2000, 2001, Intel Corporation
-// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// * Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// * The name of Intel Corporation may not be used to endorse or promote
-// products derived from this software without specific prior written
-// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
-//
-// History
-//==============================================================
-// 2/02/00 Initial version
-// 1/26/01 ldexpf completely reworked and now standalone version
-//
-// API
-//==============================================================
-// float = ldexpf (float x, int n)
-// input floating point f8 and int n (r33)
-// output floating point f8
-//
-// Returns x* 2**n using an fma and detects overflow
-// and underflow.
-//
-//
-
-#include "libm_support.h"
-
-FR_Big = f6
-FR_NBig = f7
-FR_Floating_X = f8
-FR_Result = f8
-FR_Result2 = f9
-FR_Result3 = f11
-FR_Norm_X = f12
-FR_Two_N = f14
-FR_Two_to_Big = f15
-
-GR_N_Biased = r15
-GR_Big = r16
-GR_NBig = r17
-GR_Scratch = r18
-GR_Scratch1 = r19
-GR_Bias = r20
-GR_N_as_int = r21
-
-GR_SAVE_B0 = r32
-GR_SAVE_GP = r33
-GR_SAVE_PFS = r34
-GR_Parameter_X = r35
-GR_Parameter_Y = r36
-GR_Parameter_RESULT = r37
-GR_Tag = r38
-
-.align 32
-.global ldexpf
-
-.section .text
-.proc ldexpf
-.align 32
-
-ldexpf:
-
-//
-// Is x NAN, INF, ZERO, +-?
-// Build the exponent Bias
-//
-{ .mfi
- alloc r32=ar.pfs,1,2,4,0
- fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
- addl GR_Bias = 0x0FFFF,r0
-}
-
-//
-// Sign extend input
-// Is N zero?
-// Normalize x
-//
-{ .mfi
- cmp.eq.unc p6,p0 = r33,r0
- fnorm.s1 FR_Norm_X = FR_Floating_X
- sxt4 GR_N_as_int = r33
-}
-;;
-
-//
-// Normalize x
-// Branch and return special values.
-// Create -35000
-// Create 35000
-//
-{ .mfi
- addl GR_Big = 35000,r0
- nop.f 0
- add GR_N_Biased = GR_Bias,GR_N_as_int
-}
-{ .mfb
- addl GR_NBig = -35000,r0
-(p7) fma.s.s0 FR_Result = FR_Floating_X,f1, f0
-(p7) br.ret.spnt b0
-};;
-
-//
-// Build the exponent Bias
-// Return x when N = 0
-//
-{ .mfi
- setf.exp FR_Two_N = GR_N_Biased
- nop.f 0
- addl GR_Scratch1 = 0x063BF,r0
-}
-{ .mfb
- addl GR_Scratch = 0x019C3F,r0
-(p6) fma.s.s0 FR_Result = FR_Floating_X,f1, f0
-(p6) br.ret.spnt b0
-};;
-
-//
-// Create 2*big
-// Create 2**-big
-// Is N > 35000
-// Is N < -35000
-// Raise Denormal operand flag with compare
-// Main path, create 2**N
-//
-{ .mfi
- setf.exp FR_NBig = GR_Scratch1
- nop.f 0
- cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
-}
-{ .mfi
- setf.exp FR_Big = GR_Scratch
- fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
- cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
-};;
-
-//
-// Adjust 2**N if N was very small or very large
-//
-{ .mfi
- nop.m 0
-(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
- nop.i 0
-}
-{ .mlx
- nop.m 999
-(p0) movl GR_Scratch = 0x000000000003007F
-};;
-
-
-{ .mfi
- nop.m 0
-(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
- nop.i 0
-}
-{ .mlx
- nop.m 999
-(p0) movl GR_Scratch1= 0x000000000001007F
-};;
-
-// Set up necessary status fields
-//
-// S0 user supplied status
-// S2 user supplied status + WRE + TD (Overflows)
-// S3 user supplied status + FZ + TD (Underflows)
-//
-{ .mfi
- nop.m 999
-(p0) fsetc.s3 0x7F,0x41
- nop.i 999
-}
-{ .mfi
- nop.m 999
-(p0) fsetc.s2 0x7F,0x42
- nop.i 999
-};;
-
-//
-// Do final operation
-//
-{ .mfi
- setf.exp FR_NBig = GR_Scratch
- fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
-{ .mfi
- setf.exp FR_Big = GR_Scratch1
- fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
-
-// Check for overflow or underflow.
-// Restore s3
-// Restore s2
-//
-{ .mfi
- nop.m 0
- fsetc.s3 0x7F,0x40
- nop.i 999
-}
-{ .mfi
- nop.m 0
- fsetc.s2 0x7F,0x40
- nop.i 999
-};;
-
-//
-// Is the result zero?
-//
-{ .mfi
- nop.m 999
- fclass.m.unc p6, p0 = FR_Result3, 0x007
- nop.i 999
-}
-{ .mfi
- addl GR_Tag = 148, r0
- fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
- nop.i 0
-};;
-
-//
-// Detect masked underflow - Tiny + Inexact Only
-//
-{ .mfi
- nop.m 999
-(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
- nop.i 999
-};;
-
-//
-// Is result bigger the allowed range?
-// Branch out for underflow
-//
-{ .mfb
-(p6) addl GR_Tag = 149, r0
-(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
-(p6) br.cond.spnt L(ldexpf_UNDERFLOW)
-};;
-
-//
-// Branch out for overflow
-//
-{ .mbb
- nop.m 0
-(p7) br.cond.spnt L(ldexpf_OVERFLOW)
-(p9) br.cond.spnt L(ldexpf_OVERFLOW)
-};;
-
-//
-// Return from main path.
-//
-{ .mfb
- nop.m 999
- nop.f 0
- br.ret.sptk b0;;
-}
-
-.endp ldexpf
-ASM_SIZE_DIRECTIVE(ldexpf)
-.proc __libm_error_region
-__libm_error_region:
-
-L(ldexpf_OVERFLOW):
-L(ldexpf_UNDERFLOW):
-
-//
-// Get stack address of N
-//
-.prologue
-{ .mfi
- add GR_Parameter_Y=-32,sp
- nop.f 0
-.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs
-}
-//
-// Adjust sp
-//
-{ .mfi
-.fframe 64
- add sp=-64,sp
- nop.f 0
- mov GR_SAVE_GP=gp
-};;
-
-//
-// Store N on stack in correct position
-// Locate the address of x on stack
-//
-{ .mmi
- st8 [GR_Parameter_Y] = GR_N_as_int,16
- add GR_Parameter_X = 16,sp
-.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0
-};;
-
-//
-// Store x on the stack.
-// Get address for result on stack.
-//
-.body
-{ .mib
- stfs [GR_Parameter_X] = FR_Norm_X
- add GR_Parameter_RESULT = 0,GR_Parameter_Y
- nop.b 0
-}
-{ .mib
- stfs [GR_Parameter_Y] = FR_Result
- add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support#
-};;
-
-//
-// Get location of result on stack
-//
-{ .mmi
- nop.m 0
- nop.m 0
- add GR_Parameter_RESULT = 48,sp
-};;
-
-//
-// Get the new result
-//
-{ .mmi
- ldfs FR_Result = [GR_Parameter_RESULT]
-.restore sp
- add sp = 64,sp
- mov b0 = GR_SAVE_B0
-};;
-
-//
-// Restore gp, ar.pfs and return
-//
-{ .mib
- mov gp = GR_SAVE_GP
- mov ar.pfs = GR_SAVE_PFS
- br.ret.sptk b0
-};;
-
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
-
-.type __libm_error_support#,@function
-.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_ldexpl.S b/sysdeps/ia64/fpu/s_ldexpl.S
deleted file mode 100644
index fb5d3fd452..0000000000
--- a/sysdeps/ia64/fpu/s_ldexpl.S
+++ /dev/null
@@ -1,379 +0,0 @@
-//.file "ldexpl.s"
-
-// Copyright (C) 2000, 2001, Intel Corporation
-// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// * Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// * The name of Intel Corporation may not be used to endorse or promote
-// products derived from this software without specific prior written
-// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
-//
-// History
-//==============================================================
-// 2/02/00 Initial version
-// 1/26/01 ldexpl completely reworked and now standalone version
-//
-// API
-//==============================================================
-// double-extended = ldexpl (double-extended x, int n)
-// input floating point f8 and int n (r34)
-// output floating point f8
-//
-// Returns x* 2**n using an fma and detects overflow
-// and underflow.
-//
-//
-
-#include "libm_support.h"
-
-FR_Big = f6
-FR_NBig = f7
-FR_Floating_X = f8
-FR_Result = f8
-FR_Result2 = f9
-FR_Result3 = f11
-FR_Norm_X = f12
-FR_Two_N = f14
-FR_Two_to_Big = f15
-
-GR_N_Biased = r15
-GR_Big = r16
-GR_NBig = r17
-GR_Scratch = r18
-GR_Scratch1 = r19
-GR_Bias = r20
-GR_N_as_int = r21
-
-GR_SAVE_B0 = r32
-GR_SAVE_GP = r33
-GR_SAVE_PFS = r34
-GR_Parameter_X = r35
-GR_Parameter_Y = r36
-GR_Parameter_RESULT = r37
-GR_Tag = r38
-
-.align 32
-.global ldexpl
-
-.section .text
-.proc ldexpl
-.align 32
-
-ldexpl:
-
-//
-// Is x NAN, INF, ZERO, +-?
-// Build the exponent Bias
-//
-{ .mfi
- alloc r32=ar.pfs,2,1,4,0
- fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
- addl GR_Bias = 0x0FFFF,r0
-}
-
-//
-// Sign extend input
-// Is N zero?
-// Normalize x
-//
-{ .mfi
- cmp.eq.unc p6,p0 = r34,r0
- fnorm.s1 FR_Norm_X = FR_Floating_X
- sxt4 GR_N_as_int = r34
-}
-;;
-
-//
-// Normalize x
-// Branch and return special values.
-// Create -35000
-// Create 35000
-//
-{ .mfi
- addl GR_Big = 35000,r0
- nop.f 0
- add GR_N_Biased = GR_Bias,GR_N_as_int
-}
-{ .mfb
- addl GR_NBig = -35000,r0
-(p7) fma.s0 FR_Result = FR_Floating_X,f1, f0
-(p7) br.ret.spnt b0
-};;
-
-//
-// Build the exponent Bias
-// Return x when N = 0
-//
-{ .mfi
- setf.exp FR_Two_N = GR_N_Biased
- nop.f 0
- addl GR_Scratch1 = 0x063BF,r0
-}
-{ .mfb
- addl GR_Scratch = 0x019C3F,r0
-(p6) fma.s0 FR_Result = FR_Floating_X,f1, f0
-(p6) br.ret.spnt b0
-};;
-
-//
-// Create 2*big
-// Create 2**-big
-// Is N > 35000
-// Is N < -35000
-// Raise Denormal operand flag with compare
-// Main path, create 2**N
-//
-{ .mfi
- setf.exp FR_NBig = GR_Scratch1
- nop.f 0
- cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
-}
-{ .mfi
- setf.exp FR_Big = GR_Scratch
- fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
- cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
-};;
-
-//
-// Adjust 2**N if N was very small or very large
-//
-{ .mfi
- nop.m 0
-(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
- nop.i 0
-}
-{ .mlx
- nop.m 999
-(p0) movl GR_Scratch = 0x0000000000033FFF
-};;
-
-
-{ .mfi
- nop.m 0
-(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
- nop.i 0
-}
-{ .mlx
- nop.m 999
-(p0) movl GR_Scratch1= 0x0000000000013FFF
-};;
-
-// Set up necessary status fields
-//
-// S0 user supplied status
-// S2 user supplied status + WRE + TD (Overflows)
-// S3 user supplied status + FZ + TD (Underflows)
-//
-{ .mfi
- nop.m 999
-(p0) fsetc.s3 0x7F,0x41
- nop.i 999
-}
-{ .mfi
- nop.m 999
-(p0) fsetc.s2 0x7F,0x42
- nop.i 999
-};;
-
-//
-// Do final operation
-//
-{ .mfi
- setf.exp FR_NBig = GR_Scratch
- fma.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
-{ .mfi
- setf.exp FR_Big = GR_Scratch1
- fma.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
-
-// Check for overflow or underflow.
-// Restore s3
-// Restore s2
-//
-{ .mfi
- nop.m 0
- fsetc.s3 0x7F,0x40
- nop.i 999
-}
-{ .mfi
- nop.m 0
- fsetc.s2 0x7F,0x40
- nop.i 999
-};;
-
-//
-// Is the result zero?
-//
-{ .mfi
- nop.m 999
- fclass.m.unc p6, p0 = FR_Result3, 0x007
- nop.i 999
-}
-{ .mfi
- addl GR_Tag = 144, r0
- fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
- nop.i 0
-};;
-
-//
-// Detect masked underflow - Tiny + Inexact Only
-//
-{ .mfi
- nop.m 999
-(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
- nop.i 999
-};;
-
-//
-// Is result bigger the allowed range?
-// Branch out for underflow
-//
-{ .mfb
-(p6) addl GR_Tag = 145, r0
-(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
-(p6) br.cond.spnt L(ldexpl_UNDERFLOW)
-};;
-
-//
-// Branch out for overflow
-//
-{ .mbb
- nop.m 0
-(p7) br.cond.spnt L(ldexpl_OVERFLOW)
-(p9) br.cond.spnt L(ldexpl_OVERFLOW)
-};;
-
-//
-// Return from main path.
-//
-{ .mfb
- nop.m 999
- nop.f 0
- br.ret.sptk b0;;
-}
-
-.endp ldexpl
-ASM_SIZE_DIRECTIVE(ldexpl)
-.proc __libm_error_region
-__libm_error_region:
-
-L(ldexpl_OVERFLOW):
-L(ldexpl_UNDERFLOW):
-
-//
-// Get stack address of N
-//
-.prologue
-{ .mfi
- add GR_Parameter_Y=-32,sp
- nop.f 0
-.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs
-}
-//
-// Adjust sp
-//
-{ .mfi
-.fframe 64
- add sp=-64,sp
- nop.f 0
- mov GR_SAVE_GP=gp
-};;
-
-//
-// Store N on stack in correct position
-// Locate the address of x on stack
-//
-{ .mmi
- st8 [GR_Parameter_Y] = GR_N_as_int,16
- add GR_Parameter_X = 16,sp
-.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0
-};;
-
-//
-// Store x on the stack.
-// Get address for result on stack.
-//
-.body
-{ .mib
- stfe [GR_Parameter_X] = FR_Norm_X
- add GR_Parameter_RESULT = 0,GR_Parameter_Y
- nop.b 0
-}
-{ .mib
- stfe [GR_Parameter_Y] = FR_Result
- add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support#
-};;
-
-//
-// Get location of result on stack
-//
-{ .mmi
- nop.m 0
- nop.m 0
- add GR_Parameter_RESULT = 48,sp
-};;
-
-//
-// Get the new result
-//
-{ .mmi
- ldfe FR_Result = [GR_Parameter_RESULT]
-.restore sp
- add sp = 64,sp
- mov b0 = GR_SAVE_B0
-};;
-
-//
-// Restore gp, ar.pfs and return
-//
-{ .mib
- mov gp = GR_SAVE_GP
- mov ar.pfs = GR_SAVE_PFS
- br.ret.sptk b0
-};;
-
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
-
-.type __libm_error_support#,@function
-.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_log1p.S b/sysdeps/ia64/fpu/s_log1p.S
index 0d96c14a55..cd3551984a 100644
--- a/sysdeps/ia64/fpu/s_log1p.S
+++ b/sysdeps/ia64/fpu/s_log1p.S
@@ -1,10 +1,10 @@
-.file "log1p.s"
+.file "log1p.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,1608 +20,1082 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00 Initial version
-// 4/04/00 Unwind support added
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 02/02/00 Initial version
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
+// 06/29/01 Improved speed of all paths
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 10/02/02 Improved performance by basing on log algorithm
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 04/18/03 Eliminate possible WAW dependency warning
//
-// *********************************************************************
-//
-// Function: log1p(x) = ln(x+1), for double precision x values
-//
-// *********************************************************************
-//
-// Accuracy: Very accurate for double precision values
-//
-// *********************************************************************
-//
-// Resources Used:
-//
-// Floating-Point Registers: f8 (Input and Return Value)
-// f9,f33-f55,f99
-//
-// General Purpose Registers:
-// r32-r53
-// r54-r57 (Used to pass arguments to error handling routine)
-//
-// Predicate Registers: p6-p15
-//
-// *********************************************************************
-//
-// IEEE Special Conditions:
-//
-// Denormal fault raised on denormal inputs
-// Overflow exceptions cannot occur
-// Underflow exceptions raised when appropriate for log1p
-// (Error Handling Routine called for underflow)
-// Inexact raised when appropriate by algorithm
-//
-// log1p(inf) = inf
-// log1p(-inf) = QNaN
-// log1p(+/-0) = +/-0
-// log1p(-1) = -inf
-// log1p(SNaN) = QNaN
-// log1p(QNaN) = QNaN
-// log1p(EM_special Values) = QNaN
-//
-// *********************************************************************
-//
-// Computation is based on the following kernel.
-//
-// ker_log_64( in_FR : X,
-// in_FR : E,
-// in_FR : Em1,
-// in_GR : Expo_Range,
-// out_FR : Y_hi,
-// out_FR : Y_lo,
-// out_FR : Scale,
-// out_PR : Safe )
-//
-// Overview
-//
-// The method consists of three cases.
-//
-// If |X+Em1| < 2^(-80) use case log1p_small;
-// elseif |X+Em1| < 2^(-7) use case log_near1;
-// else use case log_regular;
-//
-// Case log1p_small:
-//
-// log( 1 + (X+Em1) ) can be approximated by (X+Em1).
-//
-// Case log_near1:
-//
-// log( 1 + (X+Em1) ) can be approximated by a simple polynomial
-// in W = X+Em1. This polynomial resembles the truncated Taylor
-// series W - W^/2 + W^3/3 - ...
-//
-// Case log_regular:
-//
-// Here we use a table lookup method. The basic idea is that in
-// order to compute log(Arg) for an argument Arg in [1,2), we
-// construct a value G such that G*Arg is close to 1 and that
-// log(1/G) is obtainable easily from a table of values calculated
-// beforehand. Thus
-//
-// log(Arg) = log(1/G) + log(G*Arg)
-// = log(1/G) + log(1 + (G*Arg - 1))
-//
-// Because |G*Arg - 1| is small, the second term on the right hand
-// side can be approximated by a short polynomial. We elaborate
-// this method in four steps.
-//
-// Step 0: Initialization
-//
-// We need to calculate log( E + X ). Obtain N, S_hi, S_lo such that
-//
-// E + X = 2^N * ( S_hi + S_lo ) exactly
-//
-// where S_hi in [1,2) and S_lo is a correction to S_hi in the sense
-// that |S_lo| <= ulp(S_hi).
-//
-// Step 1: Argument Reduction
-//
-// Based on S_hi, obtain G_1, G_2, G_3 from a table and calculate
-//
-// G := G_1 * G_2 * G_3
-// r := (G * S_hi - 1) + G * S_lo
-//
-// These G_j's have the property that the product is exactly
-// representable and that |r| < 2^(-12) as a result.
-//
-// Step 2: Approximation
-//
-//
-// log(1 + r) is approximated by a short polynomial poly(r).
-//
-// Step 3: Reconstruction
-//
-//
-// Finally, log( E + X ) is given by
-//
-// log( E + X ) = log( 2^N * (S_hi + S_lo) )
-// ~=~ N*log(2) + log(1/G) + log(1 + r)
-// ~=~ N*log(2) + log(1/G) + poly(r).
-//
-// **** Algorithm ****
-//
-// Case log1p_small:
-//
-// Although log(1 + (X+Em1)) is basically X+Em1, we would like to
-// preserve the inexactness nature as well as consistent behavior
-// under different rounding modes. Note that this case can only be
-// taken if E is set to be 1.0. In this case, Em1 is zero, and that
-// X can be very tiny and thus the final result can possibly underflow.
-// Thus, we compare X against a threshold that is dependent on the
-// input Expo_Range. If |X| is smaller than this threshold, we set
-// SAFE to be FALSE.
-//
-// The result is returned as Y_hi, Y_lo, and in the case of SAFE
-// is FALSE, an additional value Scale is also returned.
-//
-// W := X + Em1
-// Threshold := Threshold_Table( Expo_Range )
-// Tiny := Tiny_Table( Expo_Range )
-//
-// If ( |W| > Threshold ) then
-// Y_hi := W
-// Y_lo := -W*W
-// Else
-// Y_hi := W
-// Y_lo := -Tiny
-// Scale := 2^(-100)
-// Safe := FALSE
-// EndIf
-//
-//
-// One may think that Y_lo should be -W*W/2; however, it does not matter
-// as Y_lo will be rounded off completely except for the correct effect in
-// directed rounding. Clearly -W*W is simplier to compute. Moreover,
-// because of the difference in exponent value, Y_hi + Y_lo or
-// Y_hi + Scale*Y_lo is always inexact.
-//
-// Case log_near1:
-//
-// Here we compute a simple polynomial. To exploit parallelism, we split
-// the polynomial into two portions.
-//
-// W := X + Em1
-// Wsq := W * W
-// W4 := Wsq*Wsq
-// W6 := W4*Wsq
-// Y_hi := W + Wsq*(P_1 + W*(P_2 + W*(P_3 + W*P_4))
-// Y_lo := W6*(P_5 + W*(P_6 + W*(P_7 + W*P_8)))
-// set lsb(Y_lo) to be 1
-//
-// Case log_regular:
-//
-// We present the algorithm in four steps.
-//
-// Step 0. Initialization
-// ----------------------
-//
-// Z := X + E
-// N := unbaised exponent of Z
-// S_hi := 2^(-N) * Z
-// S_lo := 2^(-N) * { (max(X,E)-Z) + min(X,E) }
-//
-// Note that S_lo is always 0 for the case E = 0.
-//
-// Step 1. Argument Reduction
-// --------------------------
-//
-// Let
-//
-// Z = 2^N * S_hi = 2^N * 1.d_1 d_2 d_3 ... d_63
-//
-// We obtain G_1, G_2, G_3 by the following steps.
-//
+// API
+//==============================================================
+// double log1p(double)
//
-// Define X_0 := 1.d_1 d_2 ... d_14. This is extracted
-// from S_hi.
+// log1p(x) = log(x+1)
//
-// Define A_1 := 1.d_1 d_2 d_3 d_4. This is X_0 truncated
-// to lsb = 2^(-4).
+// Overview of operation
+//==============================================================
+// Background
+// ----------
//
-// Define index_1 := [ d_1 d_2 d_3 d_4 ].
+// This algorithm is based on fact that
+// log1p(x) = log(1+x) and
+// log(a b) = log(a) + log(b).
+// In our case we have 1+x = 2^N f, where 1 <= f < 2.
+// So
+// log(1+x) = log(2^N f) = log(2^N) + log(f) = n*log(2) + log(f)
//
-// Fetch Z_1 := (1/A_1) rounded UP in fixed point with
-// fixed point lsb = 2^(-15).
-// Z_1 looks like z_0.z_1 z_2 ... z_15
-// Note that the fetching is done using index_1.
-// A_1 is actually not needed in the implementation
-// and is used here only to explain how is the value
-// Z_1 defined.
+// To calculate log(f) we do following
+// log(f) = log(f * frcpa(f) / frcpa(f)) =
+// = log(f * frcpa(f)) + log(1/frcpa(f))
//
-// Fetch G_1 := (1/A_1) truncated to 21 sig. bits.
-// floating pt. Again, fetching is done using index_1. A_1
-// explains how G_1 is defined.
+// According to definition of IA-64's frcpa instruction it's a
+// floating point that approximates 1/f using a lookup on the
+// top of 8 bits of the input number's + 1 significand with relative
+// error < 2^(-8.886). So we have following
//
-// Calculate X_1 := X_0 * Z_1 truncated to lsb = 2^(-14)
-// = 1.0 0 0 0 d_5 ... d_14
-// This is accomplised by integer multiplication.
-// It is proved that X_1 indeed always begin
-// with 1.0000 in fixed point.
+// |(1/f - frcpa(f)) / (1/f))| = |1 - f*frcpa(f)| < 1/256
//
+// and
//
-// Define A_2 := 1.0 0 0 0 d_5 d_6 d_7 d_8. This is X_1
-// truncated to lsb = 2^(-8). Similar to A_1,
-// A_2 is not needed in actual implementation. It
-// helps explain how some of the values are defined.
+// log(f) = log(f * frcpa(f)) + log(1/frcpa(f)) =
+// = log(1 + r) + T
//
-// Define index_2 := [ d_5 d_6 d_7 d_8 ].
+// The first value can be computed by polynomial P(r) approximating
+// log(1 + r) on |r| < 1/256 and the second is precomputed tabular
+// value defined by top 8 bit of f.
//
-// Fetch Z_2 := (1/A_2) rounded UP in fixed point with
-// fixed point lsb = 2^(-15). Fetch done using index_2.
-// Z_2 looks like z_0.z_1 z_2 ... z_15
+// Finally we have that log(1+x) ~ (N*log(2) + T) + P(r)
//
-// Fetch G_2 := (1/A_2) truncated to 21 sig. bits.
-// floating pt.
+// Note that if input argument is close to 0.0 (in our case it means
+// that |x| < 1/256) we can use just polynomial approximation
+// because 1+x = 2^0 * f = f = 1 + r and
+// log(1+x) = log(1 + r) ~ P(r)
//
-// Calculate X_2 := X_1 * Z_2 truncated to lsb = 2^(-14)
-// = 1.0 0 0 0 0 0 0 0 d_9 d_10 ... d_14
-// This is accomplised by integer multiplication.
-// It is proved that X_2 indeed always begin
-// with 1.00000000 in fixed point.
//
+// Implementation
+// --------------
//
-// Define A_3 := 1.0 0 0 0 0 0 0 0 d_9 d_10 d_11 d_12 d_13 1.
-// This is 2^(-14) + X_2 truncated to lsb = 2^(-13).
+// 1. |x| >= 2^(-8), and x > -1
+// InvX = frcpa(x+1)
+// r = InvX*(x+1) - 1
+// P(r) = r*((r*A3 - A2) + r^4*((A4 + r*A5) + r^2*(A6 + r*A7)),
+// all coefficients are calcutated in quad and rounded to double
+// precision. A7,A6,A5,A4 are stored in memory whereas A3 and A2
+// created with setf.
//
-// Define index_3 := [ d_9 d_10 d_11 d_12 d_13 ].
+// N = float(n) where n is true unbiased exponent of x
//
-// Fetch G_3 := (1/A_3) truncated to 21 sig. bits.
-// floating pt. Fetch is done using index_3.
+// T is tabular value of log(1/frcpa(x)) calculated in quad precision
+// and represented by two floating-point numbers 64-bit Thi and 32-bit Tlo.
+// To load Thi,Tlo we get bits from 55 to 62 of register format significand
+// as index and calculate two addresses
+// ad_Thi = Thi_table_base_addr + 8 * index
+// ad_Tlo = Tlo_table_base_addr + 4 * index
//
-// Compute G := G_1 * G_2 * G_3.
+// L1 (log(2)) is calculated in quad
+// precision and represented by two floating-point 64-bit numbers L1hi,L1lo
+// stored in memory.
//
-// This is done exactly since each of G_j only has 21 sig. bits.
+// And final result = ((L1hi*N + Thi) + (N*L1lo + Tlo)) + P(r)
//
-// Compute
//
-// r := (G*S_hi - 1) + G*S_lo using 2 FMA operations.
+// 2. 2^(-80) <= |x| < 2^(-8)
+// r = x
+// P(r) = r*((r*A3 - A2) + r^4*((A4 + r*A5) + r^2*(A6 + r*A7)),
+// A7,A6,A5,A4,A3,A2 are the same as in case |x| >= 1/256
//
-// thus, r approximates G*(S_hi+S_lo) - 1 to within a couple of
-// rounding errors.
+// And final results
+// log(1+x) = P(r)
//
+// 3. 0 < |x| < 2^(-80)
+// Although log1p(x) is basically x, we would like to preserve the inexactness
+// nature as well as consistent behavior under different rounding modes.
+// We can do this by computing the result as
//
-// Step 2. Approximation
-// ---------------------
+// log1p(x) = x - x*x
//
-// This step computes an approximation to log( 1 + r ) where r is the
-// reduced argument just obtained. It is proved that |r| <= 1.9*2^(-13);
-// thus log(1+r) can be approximated by a short polynomial:
//
-// log(1+r) ~=~ poly = r + Q1 r^2 + ... + Q4 r^5
+// Note: NaT, any NaNs, +/-INF, +/-0, negatives and unnormalized numbers are
+// filtered and processed on special branches.
//
+
//
-// Step 3. Reconstruction
-// ----------------------
+// Special values
+//==============================================================
//
-// This step computes the desired result of log(X+E):
+// log1p(-1) = -inf // Call error support
//
-// log(X+E) = log( 2^N * (S_hi + S_lo) )
-// = N*log(2) + log( S_hi + S_lo )
-// = N*log(2) + log(1/G) +
-// log(1 + C*(S_hi+S_lo) - 1 )
+// log1p(+qnan) = +qnan
+// log1p(-qnan) = -qnan
+// log1p(+snan) = +qnan
+// log1p(-snan) = -qnan
//
-// log(2), log(1/G_j) are stored as pairs of (single,double) numbers:
-// log2_hi, log2_lo, log1byGj_hi, log1byGj_lo. The high parts are
-// single-precision numbers and the low parts are double precision
-// numbers. These have the property that
+// log1p(x),x<-1= QNAN Indefinite // Call error support
+// log1p(-inf) = QNAN Indefinite
+// log1p(+inf) = +inf
+// log1p(+/-0) = +/-0
//
-// N*log2_hi + SUM ( log1byGj_hi )
//
-// is computable exactly in double-extended precision (64 sig. bits).
-// Finally
+// Registers used
+//==============================================================
+// Floating Point registers used:
+// f8, input
+// f7 -> f15, f32 -> f40
//
-// Y_hi := N*log2_hi + SUM ( log1byGj_hi )
-// Y_lo := poly_hi + [ poly_lo +
-// ( SUM ( log1byGj_lo ) + N*log2_lo ) ]
-// set lsb(Y_lo) to be 1
+// General registers used:
+// r8 -> r11
+// r14 -> r20
//
+// Predicate registers used:
+// p6 -> p12
-#include "libm_support.h"
-
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
+// Assembly macros
+//==============================================================
+GR_TAG = r8
+GR_ad_1 = r8
+GR_ad_2 = r9
+GR_Exp = r10
+GR_N = r11
-// P_7, P_6, P_5, P_4, P_3, P_2, and P_1
+GR_signexp_x = r14
+GR_exp_mask = r15
+GR_exp_bias = r16
+GR_05 = r17
+GR_A3 = r18
+GR_Sig = r19
+GR_Ind = r19
+GR_exp_x = r20
-.align 64
-Constants_P:
-ASM_TYPE_DIRECTIVE(Constants_P,@object)
-data4 0xEFD62B15,0xE3936754,0x00003FFB,0x00000000
-data4 0xA5E56381,0x8003B271,0x0000BFFC,0x00000000
-data4 0x73282DB0,0x9249248C,0x00003FFC,0x00000000
-data4 0x47305052,0xAAAAAA9F,0x0000BFFC,0x00000000
-data4 0xCCD17FC9,0xCCCCCCCC,0x00003FFC,0x00000000
-data4 0x00067ED5,0x80000000,0x0000BFFD,0x00000000
-data4 0xAAAAAAAA,0xAAAAAAAA,0x00003FFD,0x00000000
-data4 0xFFFFFFFE,0xFFFFFFFF,0x0000BFFD,0x00000000
-ASM_SIZE_DIRECTIVE(Constants_P)
-
-// log2_hi, log2_lo, Q_4, Q_3, Q_2, and Q_1
-.align 64
-Constants_Q:
-ASM_TYPE_DIRECTIVE(Constants_Q,@object)
-data4 0x00000000,0xB1721800,0x00003FFE,0x00000000
-data4 0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000
-data4 0x328833CB,0xCCCCCAF2,0x00003FFC,0x00000000
-data4 0xA9D4BAFB,0x80000077,0x0000BFFD,0x00000000
-data4 0xAAABE3D2,0xAAAAAAAA,0x00003FFD,0x00000000
-data4 0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000
-ASM_SIZE_DIRECTIVE(Constants_Q)
-
-// Z1 - 16 bit fixed, G1 and H1 - IEEE single
-
-.align 64
-Constants_Z_G_H_h1:
-ASM_TYPE_DIRECTIVE(Constants_Z_G_H_h1,@object)
-data4 0x00008000,0x3F800000,0x00000000,0x00000000,0x00000000,0x00000000
-data4 0x00007879,0x3F70F0F0,0x3D785196,0x00000000,0x617D741C,0x3DA163A6
-data4 0x000071C8,0x3F638E38,0x3DF13843,0x00000000,0xCBD3D5BB,0x3E2C55E6
-data4 0x00006BCB,0x3F579430,0x3E2FF9A0,0x00000000,0xD86EA5E7,0xBE3EB0BF
-data4 0x00006667,0x3F4CCCC8,0x3E647FD6,0x00000000,0x86B12760,0x3E2E6A8C
-data4 0x00006187,0x3F430C30,0x3E8B3AE7,0x00000000,0x5C0739BA,0x3E47574C
-data4 0x00005D18,0x3F3A2E88,0x3EA30C68,0x00000000,0x13E8AF2F,0x3E20E30F
-data4 0x0000590C,0x3F321640,0x3EB9CEC8,0x00000000,0xF2C630BD,0xBE42885B
-data4 0x00005556,0x3F2AAAA8,0x3ECF9927,0x00000000,0x97E577C6,0x3E497F34
-data4 0x000051EC,0x3F23D708,0x3EE47FC5,0x00000000,0xA6B0A5AB,0x3E3E6A6E
-data4 0x00004EC5,0x3F1D89D8,0x3EF8947D,0x00000000,0xD328D9BE,0xBDF43E3C
-data4 0x00004BDB,0x3F17B420,0x3F05F3A1,0x00000000,0x0ADB090A,0x3E4094C3
-data4 0x00004925,0x3F124920,0x3F0F4303,0x00000000,0xFC1FE510,0xBE28FBB2
-data4 0x0000469F,0x3F0D3DC8,0x3F183EBF,0x00000000,0x10FDE3FA,0x3E3A7895
-data4 0x00004445,0x3F088888,0x3F20EC80,0x00000000,0x7CC8C98F,0x3E508CE5
-data4 0x00004211,0x3F042108,0x3F29516A,0x00000000,0xA223106C,0xBE534874
-ASM_SIZE_DIRECTIVE(Constants_Z_G_H_h1)
-
-// Z2 - 16 bit fixed, G2 and H2 - IEEE single
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+GR_SAVE_SP = r36
-.align 64
-Constants_Z_G_H_h2:
-ASM_TYPE_DIRECTIVE(Constants_Z_G_H_h2,@object)
-data4 0x00008000,0x3F800000,0x00000000,0x00000000,0x00000000,0x00000000
-data4 0x00007F81,0x3F7F00F8,0x3B7F875D,0x00000000,0x22C42273,0x3DB5A116
-data4 0x00007F02,0x3F7E03F8,0x3BFF015B,0x00000000,0x21F86ED3,0x3DE620CF
-data4 0x00007E85,0x3F7D08E0,0x3C3EE393,0x00000000,0x484F34ED,0xBDAFA07E
-data4 0x00007E08,0x3F7C0FC0,0x3C7E0586,0x00000000,0x3860BCF6,0xBDFE07F0
-data4 0x00007D8D,0x3F7B1880,0x3C9E75D2,0x00000000,0xA78093D6,0x3DEA370F
-data4 0x00007D12,0x3F7A2328,0x3CBDC97A,0x00000000,0x72A753D0,0x3DFF5791
-data4 0x00007C98,0x3F792FB0,0x3CDCFE47,0x00000000,0xA7EF896B,0x3DFEBE6C
-data4 0x00007C20,0x3F783E08,0x3CFC15D0,0x00000000,0x409ECB43,0x3E0CF156
-data4 0x00007BA8,0x3F774E38,0x3D0D874D,0x00000000,0xFFEF71DF,0xBE0B6F97
-data4 0x00007B31,0x3F766038,0x3D1CF49B,0x00000000,0x5D59EEE8,0xBE080483
-data4 0x00007ABB,0x3F757400,0x3D2C531D,0x00000000,0xA9192A74,0x3E1F91E9
-data4 0x00007A45,0x3F748988,0x3D3BA322,0x00000000,0xBF72A8CD,0xBE139A06
-data4 0x000079D1,0x3F73A0D0,0x3D4AE46F,0x00000000,0xF8FBA6CF,0x3E1D9202
-data4 0x0000795D,0x3F72B9D0,0x3D5A1756,0x00000000,0xBA796223,0xBE1DCCC4
-data4 0x000078EB,0x3F71D488,0x3D693B9D,0x00000000,0xB6B7C239,0xBE049391
-ASM_SIZE_DIRECTIVE(Constants_Z_G_H_h2)
-
-// G3 and H3 - IEEE single and h3 -IEEE double
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
-.align 64
-Constants_Z_G_H_h3:
-ASM_TYPE_DIRECTIVE(Constants_Z_G_H_h3,@object)
-data4 0x3F7FFC00,0x38800100,0x562224CD,0x3D355595
-data4 0x3F7FF400,0x39400480,0x06136FF6,0x3D8200A2
-data4 0x3F7FEC00,0x39A00640,0xE8DE9AF0,0x3DA4D68D
-data4 0x3F7FE400,0x39E00C41,0xB10238DC,0xBD8B4291
-data4 0x3F7FDC00,0x3A100A21,0x3B1952CA,0xBD89CCB8
-data4 0x3F7FD400,0x3A300F22,0x1DC46826,0xBDB10707
-data4 0x3F7FCC08,0x3A4FF51C,0xF43307DB,0x3DB6FCB9
-data4 0x3F7FC408,0x3A6FFC1D,0x62DC7872,0xBD9B7C47
-data4 0x3F7FBC10,0x3A87F20B,0x3F89154A,0xBDC3725E
-data4 0x3F7FB410,0x3A97F68B,0x62B9D392,0xBD93519D
-data4 0x3F7FAC18,0x3AA7EB86,0x0F21BD9D,0x3DC18441
-data4 0x3F7FA420,0x3AB7E101,0x2245E0A6,0xBDA64B95
-data4 0x3F7F9C20,0x3AC7E701,0xAABB34B8,0x3DB4B0EC
-data4 0x3F7F9428,0x3AD7DD7B,0x6DC40A7E,0x3D992337
-data4 0x3F7F8C30,0x3AE7D474,0x4F2083D3,0x3DC6E17B
-data4 0x3F7F8438,0x3AF7CBED,0x811D4394,0x3DAE314B
-data4 0x3F7F7C40,0x3B03E1F3,0xB08F2DB1,0xBDD46F21
-data4 0x3F7F7448,0x3B0BDE2F,0x6D34522B,0xBDDC30A4
-data4 0x3F7F6C50,0x3B13DAAA,0xB1F473DB,0x3DCB0070
-data4 0x3F7F6458,0x3B1BD766,0x6AD282FD,0xBDD65DDC
-data4 0x3F7F5C68,0x3B23CC5C,0xF153761A,0xBDCDAB83
-data4 0x3F7F5470,0x3B2BC997,0x341D0F8F,0xBDDADA40
-data4 0x3F7F4C78,0x3B33C711,0xEBC394E8,0x3DCD1BD7
-data4 0x3F7F4488,0x3B3BBCC6,0x52E3E695,0xBDC3532B
-data4 0x3F7F3C90,0x3B43BAC0,0xE846B3DE,0xBDA3961E
-data4 0x3F7F34A0,0x3B4BB0F4,0x785778D4,0xBDDADF06
-data4 0x3F7F2CA8,0x3B53AF6D,0xE55CE212,0x3DCC3ED1
-data4 0x3F7F24B8,0x3B5BA620,0x9E382C15,0xBDBA3103
-data4 0x3F7F1CC8,0x3B639D12,0x5C5AF197,0x3D635A0B
-data4 0x3F7F14D8,0x3B6B9444,0x71D34EFC,0xBDDCCB19
-data4 0x3F7F0CE0,0x3B7393BC,0x52CD7ADA,0x3DC74502
-data4 0x3F7F04F0,0x3B7B8B6D,0x7D7F2A42,0xBDB68F17
-ASM_SIZE_DIRECTIVE(Constants_Z_G_H_h3)
-
-//
-// Exponent Thresholds and Tiny Thresholds
-// for 8, 11, 15, and 17 bit exponents
-//
-// Expo_Range Value
-//
-// 0 (8 bits) 2^(-126)
-// 1 (11 bits) 2^(-1022)
-// 2 (15 bits) 2^(-16382)
-// 3 (17 bits) 2^(-16382)
-//
-// Tiny_Table
-// ----------
-// Expo_Range Value
-//
-// 0 (8 bits) 2^(-16382)
-// 1 (11 bits) 2^(-16382)
-// 2 (15 bits) 2^(-16382)
-// 3 (17 bits) 2^(-16382)
-//
-.align 64
-Constants_Threshold:
-ASM_TYPE_DIRECTIVE(Constants_Threshold,@object)
-data4 0x00000000,0x80000000,0x00003F81,0x00000000
-data4 0x00000000,0x80000000,0x00000001,0x00000000
-data4 0x00000000,0x80000000,0x00003C01,0x00000000
-data4 0x00000000,0x80000000,0x00000001,0x00000000
-data4 0x00000000,0x80000000,0x00000001,0x00000000
-data4 0x00000000,0x80000000,0x00000001,0x00000000
-data4 0x00000000,0x80000000,0x00000001,0x00000000
-data4 0x00000000,0x80000000,0x00000001,0x00000000
-ASM_SIZE_DIRECTIVE(Constants_Threshold)
-.align 64
-Constants_1_by_LN10:
-ASM_TYPE_DIRECTIVE(Constants_1_by_LN10,@object)
-data4 0x37287195,0xDE5BD8A9,0x00003FFD,0x00000000
-data4 0xACCF70C8,0xD56EAABE,0x00003FBD,0x00000000
-ASM_SIZE_DIRECTIVE(Constants_1_by_LN10)
+FR_NormX = f7
+FR_RcpX = f9
+FR_r = f10
+FR_r2 = f11
+FR_r4 = f12
+FR_N = f13
+FR_Ln2hi = f14
+FR_Ln2lo = f15
-FR_Input_X = f8
-FR_Neg_One = f9
-FR_E = f33
-FR_Em1 = f34
-FR_Y_hi = f34
-// Shared with Em1
-FR_Y_lo = f35
-FR_Scale = f36
-FR_X_Prime = f37
-FR_Z = f38
-FR_S_hi = f38
-// Shared with Z
-FR_W = f39
-FR_G = f40
-FR_wsq = f40
-// Shared with G
-FR_H = f41
-FR_w4 = f41
-// Shared with H
-FR_h = f42
-FR_w6 = f42
-// Shared with h
-FR_G_tmp = f43
-FR_poly_lo = f43
-// Shared with G_tmp
-FR_P8 = f43
-// Shared with G_tmp
-FR_H_tmp = f44
-FR_poly_hi = f44
- // Shared with H_tmp
-FR_P7 = f44
-// Shared with H_tmp
-FR_h_tmp = f45
-FR_rsq = f45
-// Shared with h_tmp
-FR_P6 = f45
-// Shared with h_tmp
-FR_abs_W = f46
-FR_r = f46
-// Shared with abs_W
-FR_AA = f47
-FR_log2_hi = f47
-// Shared with AA
-FR_BB = f48
-FR_log2_lo = f48
-// Shared with BB
-FR_S_lo = f49
-FR_two_negN = f50
-FR_float_N = f51
-FR_Q4 = f52
-FR_dummy = f52
-// Shared with Q4
-FR_P4 = f52
-// Shared with Q4
-FR_Threshold = f52
-// Shared with Q4
-FR_Q3 = f53
-FR_P3 = f53
-// Shared with Q3
-FR_Tiny = f53
-// Shared with Q3
-FR_Q2 = f54
-FR_P2 = f54
-// Shared with Q2
-FR_1LN10_hi = f54
-// Shared with Q2
-FR_Q1 = f55
-FR_P1 = f55
-// Shared with Q1
-FR_1LN10_lo = f55
-// Shared with Q1
-FR_P5 = f98
-FR_SCALE = f98
-FR_Output_X_tmp = f99
+FR_A7 = f32
+FR_A6 = f33
+FR_A5 = f34
+FR_A4 = f35
+FR_A3 = f36
+FR_A2 = f37
-GR_Expo_Range = r32
-GR_Table_Base = r34
-GR_Table_Base1 = r35
-GR_Table_ptr = r36
-GR_Index2 = r37
-GR_signif = r38
-GR_X_0 = r39
-GR_X_1 = r40
-GR_X_2 = r41
-GR_Z_1 = r42
-GR_Z_2 = r43
-GR_N = r44
-GR_Bias = r45
-GR_M = r46
-GR_ScaleN = r47
-GR_Index3 = r48
-GR_Perturb = r49
-GR_Table_Scale = r50
+FR_Thi = f38
+FR_NxLn2hipThi = f38
+FR_NxLn2pT = f38
+FR_Tlo = f39
+FR_NxLn2lopTlo = f39
+FR_Xp1 = f40
-GR_SAVE_PFS = r51
-GR_SAVE_B0 = r52
-GR_SAVE_GP = r53
-GR_Parameter_X = r54
-GR_Parameter_Y = r55
-GR_Parameter_RESULT = r56
+FR_Y = f1
+FR_X = f10
+FR_RESULT = f8
-GR_Parameter_TAG = r57
+// Data
+//==============================================================
+RODATA
+.align 16
+
+LOCAL_OBJECT_START(log_data)
+// coefficients of polynomial approximation
+data8 0x3FC2494104381A8E // A7
+data8 0xBFC5556D556BBB69 // A6
+data8 0x3FC999999988B5E9 // A5
+data8 0xBFCFFFFFFFF6FFF5 // A4
+//
+// hi parts of ln(1/frcpa(1+i/256)), i=0...255
+data8 0x3F60040155D5889D // 0
+data8 0x3F78121214586B54 // 1
+data8 0x3F841929F96832EF // 2
+data8 0x3F8C317384C75F06 // 3
+data8 0x3F91A6B91AC73386 // 4
+data8 0x3F95BA9A5D9AC039 // 5
+data8 0x3F99D2A8074325F3 // 6
+data8 0x3F9D6B2725979802 // 7
+data8 0x3FA0C58FA19DFAA9 // 8
+data8 0x3FA2954C78CBCE1A // 9
+data8 0x3FA4A94D2DA96C56 // 10
+data8 0x3FA67C94F2D4BB58 // 11
+data8 0x3FA85188B630F068 // 12
+data8 0x3FAA6B8ABE73AF4C // 13
+data8 0x3FAC441E06F72A9E // 14
+data8 0x3FAE1E6713606D06 // 15
+data8 0x3FAFFA6911AB9300 // 16
+data8 0x3FB0EC139C5DA600 // 17
+data8 0x3FB1DBD2643D190B // 18
+data8 0x3FB2CC7284FE5F1C // 19
+data8 0x3FB3BDF5A7D1EE64 // 20
+data8 0x3FB4B05D7AA012E0 // 21
+data8 0x3FB580DB7CEB5701 // 22
+data8 0x3FB674F089365A79 // 23
+data8 0x3FB769EF2C6B568D // 24
+data8 0x3FB85FD927506A47 // 25
+data8 0x3FB9335E5D594988 // 26
+data8 0x3FBA2B0220C8E5F4 // 27
+data8 0x3FBB0004AC1A86AB // 28
+data8 0x3FBBF968769FCA10 // 29
+data8 0x3FBCCFEDBFEE13A8 // 30
+data8 0x3FBDA727638446A2 // 31
+data8 0x3FBEA3257FE10F79 // 32
+data8 0x3FBF7BE9FEDBFDE5 // 33
+data8 0x3FC02AB352FF25F3 // 34
+data8 0x3FC097CE579D204C // 35
+data8 0x3FC1178E8227E47B // 36
+data8 0x3FC185747DBECF33 // 37
+data8 0x3FC1F3B925F25D41 // 38
+data8 0x3FC2625D1E6DDF56 // 39
+data8 0x3FC2D1610C868139 // 40
+data8 0x3FC340C59741142E // 41
+data8 0x3FC3B08B6757F2A9 // 42
+data8 0x3FC40DFB08378003 // 43
+data8 0x3FC47E74E8CA5F7C // 44
+data8 0x3FC4EF51F6466DE4 // 45
+data8 0x3FC56092E02BA516 // 46
+data8 0x3FC5D23857CD74D4 // 47
+data8 0x3FC6313A37335D76 // 48
+data8 0x3FC6A399DABBD383 // 49
+data8 0x3FC70337DD3CE41A // 50
+data8 0x3FC77654128F6127 // 51
+data8 0x3FC7E9D82A0B022D // 52
+data8 0x3FC84A6B759F512E // 53
+data8 0x3FC8AB47D5F5A30F // 54
+data8 0x3FC91FE49096581B // 55
+data8 0x3FC981634011AA75 // 56
+data8 0x3FC9F6C407089664 // 57
+data8 0x3FCA58E729348F43 // 58
+data8 0x3FCABB55C31693AC // 59
+data8 0x3FCB1E104919EFD0 // 60
+data8 0x3FCB94EE93E367CA // 61
+data8 0x3FCBF851C067555E // 62
+data8 0x3FCC5C0254BF23A5 // 63
+data8 0x3FCCC000C9DB3C52 // 64
+data8 0x3FCD244D99C85673 // 65
+data8 0x3FCD88E93FB2F450 // 66
+data8 0x3FCDEDD437EAEF00 // 67
+data8 0x3FCE530EFFE71012 // 68
+data8 0x3FCEB89A1648B971 // 69
+data8 0x3FCF1E75FADF9BDE // 70
+data8 0x3FCF84A32EAD7C35 // 71
+data8 0x3FCFEB2233EA07CD // 72
+data8 0x3FD028F9C7035C1C // 73
+data8 0x3FD05C8BE0D9635A // 74
+data8 0x3FD085EB8F8AE797 // 75
+data8 0x3FD0B9C8E32D1911 // 76
+data8 0x3FD0EDD060B78080 // 77
+data8 0x3FD122024CF0063F // 78
+data8 0x3FD14BE2927AECD4 // 79
+data8 0x3FD180618EF18ADF // 80
+data8 0x3FD1B50BBE2FC63B // 81
+data8 0x3FD1DF4CC7CF242D // 82
+data8 0x3FD214456D0EB8D4 // 83
+data8 0x3FD23EC5991EBA49 // 84
+data8 0x3FD2740D9F870AFB // 85
+data8 0x3FD29ECDABCDFA03 // 86
+data8 0x3FD2D46602ADCCEE // 87
+data8 0x3FD2FF66B04EA9D4 // 88
+data8 0x3FD335504B355A37 // 89
+data8 0x3FD360925EC44F5C // 90
+data8 0x3FD38BF1C3337E74 // 91
+data8 0x3FD3C25277333183 // 92
+data8 0x3FD3EDF463C1683E // 93
+data8 0x3FD419B423D5E8C7 // 94
+data8 0x3FD44591E0539F48 // 95
+data8 0x3FD47C9175B6F0AD // 96
+data8 0x3FD4A8B341552B09 // 97
+data8 0x3FD4D4F39089019F // 98
+data8 0x3FD501528DA1F967 // 99
+data8 0x3FD52DD06347D4F6 // 100
+data8 0x3FD55A6D3C7B8A89 // 101
+data8 0x3FD5925D2B112A59 // 102
+data8 0x3FD5BF406B543DB1 // 103
+data8 0x3FD5EC433D5C35AD // 104
+data8 0x3FD61965CDB02C1E // 105
+data8 0x3FD646A84935B2A1 // 106
+data8 0x3FD6740ADD31DE94 // 107
+data8 0x3FD6A18DB74A58C5 // 108
+data8 0x3FD6CF31058670EC // 109
+data8 0x3FD6F180E852F0B9 // 110
+data8 0x3FD71F5D71B894EF // 111
+data8 0x3FD74D5AEFD66D5C // 112
+data8 0x3FD77B79922BD37D // 113
+data8 0x3FD7A9B9889F19E2 // 114
+data8 0x3FD7D81B037EB6A6 // 115
+data8 0x3FD8069E33827230 // 116
+data8 0x3FD82996D3EF8BCA // 117
+data8 0x3FD85855776DCBFA // 118
+data8 0x3FD8873658327CCE // 119
+data8 0x3FD8AA75973AB8CE // 120
+data8 0x3FD8D992DC8824E4 // 121
+data8 0x3FD908D2EA7D9511 // 122
+data8 0x3FD92C59E79C0E56 // 123
+data8 0x3FD95BD750EE3ED2 // 124
+data8 0x3FD98B7811A3EE5B // 125
+data8 0x3FD9AF47F33D406B // 126
+data8 0x3FD9DF270C1914A7 // 127
+data8 0x3FDA0325ED14FDA4 // 128
+data8 0x3FDA33440224FA78 // 129
+data8 0x3FDA57725E80C382 // 130
+data8 0x3FDA87D0165DD199 // 131
+data8 0x3FDAAC2E6C03F895 // 132
+data8 0x3FDADCCC6FDF6A81 // 133
+data8 0x3FDB015B3EB1E790 // 134
+data8 0x3FDB323A3A635948 // 135
+data8 0x3FDB56FA04462909 // 136
+data8 0x3FDB881AA659BC93 // 137
+data8 0x3FDBAD0BEF3DB164 // 138
+data8 0x3FDBD21297781C2F // 139
+data8 0x3FDC039236F08818 // 140
+data8 0x3FDC28CB1E4D32FC // 141
+data8 0x3FDC4E19B84723C1 // 142
+data8 0x3FDC7FF9C74554C9 // 143
+data8 0x3FDCA57B64E9DB05 // 144
+data8 0x3FDCCB130A5CEBAF // 145
+data8 0x3FDCF0C0D18F326F // 146
+data8 0x3FDD232075B5A201 // 147
+data8 0x3FDD490246DEFA6B // 148
+data8 0x3FDD6EFA918D25CD // 149
+data8 0x3FDD9509707AE52F // 150
+data8 0x3FDDBB2EFE92C554 // 151
+data8 0x3FDDEE2F3445E4AE // 152
+data8 0x3FDE148A1A2726CD // 153
+data8 0x3FDE3AFC0A49FF3F // 154
+data8 0x3FDE6185206D516D // 155
+data8 0x3FDE882578823D51 // 156
+data8 0x3FDEAEDD2EAC990C // 157
+data8 0x3FDED5AC5F436BE2 // 158
+data8 0x3FDEFC9326D16AB8 // 159
+data8 0x3FDF2391A21575FF // 160
+data8 0x3FDF4AA7EE03192C // 161
+data8 0x3FDF71D627C30BB0 // 162
+data8 0x3FDF991C6CB3B379 // 163
+data8 0x3FDFC07ADA69A90F // 164
+data8 0x3FDFE7F18EB03D3E // 165
+data8 0x3FE007C053C5002E // 166
+data8 0x3FE01B942198A5A0 // 167
+data8 0x3FE02F74400C64EA // 168
+data8 0x3FE04360BE7603AC // 169
+data8 0x3FE05759AC47FE33 // 170
+data8 0x3FE06B5F1911CF51 // 171
+data8 0x3FE078BF0533C568 // 172
+data8 0x3FE08CD9687E7B0E // 173
+data8 0x3FE0A10074CF9019 // 174
+data8 0x3FE0B5343A234476 // 175
+data8 0x3FE0C974C89431CD // 176
+data8 0x3FE0DDC2305B9886 // 177
+data8 0x3FE0EB524BAFC918 // 178
+data8 0x3FE0FFB54213A475 // 179
+data8 0x3FE114253DA97D9F // 180
+data8 0x3FE128A24F1D9AFF // 181
+data8 0x3FE1365252BF0864 // 182
+data8 0x3FE14AE558B4A92D // 183
+data8 0x3FE15F85A19C765B // 184
+data8 0x3FE16D4D38C119FA // 185
+data8 0x3FE18203C20DD133 // 186
+data8 0x3FE196C7BC4B1F3A // 187
+data8 0x3FE1A4A738B7A33C // 188
+data8 0x3FE1B981C0C9653C // 189
+data8 0x3FE1CE69E8BB106A // 190
+data8 0x3FE1DC619DE06944 // 191
+data8 0x3FE1F160A2AD0DA3 // 192
+data8 0x3FE2066D7740737E // 193
+data8 0x3FE2147DBA47A393 // 194
+data8 0x3FE229A1BC5EBAC3 // 195
+data8 0x3FE237C1841A502E // 196
+data8 0x3FE24CFCE6F80D9A // 197
+data8 0x3FE25B2C55CD5762 // 198
+data8 0x3FE2707F4D5F7C40 // 199
+data8 0x3FE285E0842CA383 // 200
+data8 0x3FE294294708B773 // 201
+data8 0x3FE2A9A2670AFF0C // 202
+data8 0x3FE2B7FB2C8D1CC0 // 203
+data8 0x3FE2C65A6395F5F5 // 204
+data8 0x3FE2DBF557B0DF42 // 205
+data8 0x3FE2EA64C3F97654 // 206
+data8 0x3FE3001823684D73 // 207
+data8 0x3FE30E97E9A8B5CC // 208
+data8 0x3FE32463EBDD34E9 // 209
+data8 0x3FE332F4314AD795 // 210
+data8 0x3FE348D90E7464CF // 211
+data8 0x3FE35779F8C43D6D // 212
+data8 0x3FE36621961A6A99 // 213
+data8 0x3FE37C299F3C366A // 214
+data8 0x3FE38AE2171976E7 // 215
+data8 0x3FE399A157A603E7 // 216
+data8 0x3FE3AFCCFE77B9D1 // 217
+data8 0x3FE3BE9D503533B5 // 218
+data8 0x3FE3CD7480B4A8A2 // 219
+data8 0x3FE3E3C43918F76C // 220
+data8 0x3FE3F2ACB27ED6C6 // 221
+data8 0x3FE4019C2125CA93 // 222
+data8 0x3FE4181061389722 // 223
+data8 0x3FE42711518DF545 // 224
+data8 0x3FE436194E12B6BF // 225
+data8 0x3FE445285D68EA69 // 226
+data8 0x3FE45BCC464C893A // 227
+data8 0x3FE46AED21F117FC // 228
+data8 0x3FE47A1527E8A2D3 // 229
+data8 0x3FE489445EFFFCCB // 230
+data8 0x3FE4A018BCB69835 // 231
+data8 0x3FE4AF5A0C9D65D7 // 232
+data8 0x3FE4BEA2A5BDBE87 // 233
+data8 0x3FE4CDF28F10AC46 // 234
+data8 0x3FE4DD49CF994058 // 235
+data8 0x3FE4ECA86E64A683 // 236
+data8 0x3FE503C43CD8EB68 // 237
+data8 0x3FE513356667FC57 // 238
+data8 0x3FE522AE0738A3D7 // 239
+data8 0x3FE5322E26867857 // 240
+data8 0x3FE541B5CB979809 // 241
+data8 0x3FE55144FDBCBD62 // 242
+data8 0x3FE560DBC45153C6 // 243
+data8 0x3FE5707A26BB8C66 // 244
+data8 0x3FE587F60ED5B8FF // 245
+data8 0x3FE597A7977C8F31 // 246
+data8 0x3FE5A760D634BB8A // 247
+data8 0x3FE5B721D295F10E // 248
+data8 0x3FE5C6EA94431EF9 // 249
+data8 0x3FE5D6BB22EA86F5 // 250
+data8 0x3FE5E6938645D38F // 251
+data8 0x3FE5F673C61A2ED1 // 252
+data8 0x3FE6065BEA385926 // 253
+data8 0x3FE6164BFA7CC06B // 254
+data8 0x3FE62643FECF9742 // 255
+//
+// two parts of ln(2)
+data8 0x3FE62E42FEF00000,0x3DD473DE6AF278ED
+//
+// lo parts of ln(1/frcpa(1+i/256)), i=0...255
+data4 0x20E70672 // 0
+data4 0x1F60A5D0 // 1
+data4 0x218EABA0 // 2
+data4 0x21403104 // 3
+data4 0x20E9B54E // 4
+data4 0x21EE1382 // 5
+data4 0x226014E3 // 6
+data4 0x2095E5C9 // 7
+data4 0x228BA9D4 // 8
+data4 0x22932B86 // 9
+data4 0x22608A57 // 10
+data4 0x220209F3 // 11
+data4 0x212882CC // 12
+data4 0x220D46E2 // 13
+data4 0x21FA4C28 // 14
+data4 0x229E5BD9 // 15
+data4 0x228C9838 // 16
+data4 0x2311F954 // 17
+data4 0x221365DF // 18
+data4 0x22BD0CB3 // 19
+data4 0x223D4BB7 // 20
+data4 0x22A71BBE // 21
+data4 0x237DB2FA // 22
+data4 0x23194C9D // 23
+data4 0x22EC639E // 24
+data4 0x2367E669 // 25
+data4 0x232E1D5F // 26
+data4 0x234A639B // 27
+data4 0x2365C0E0 // 28
+data4 0x234646C1 // 29
+data4 0x220CBF9C // 30
+data4 0x22A00FD4 // 31
+data4 0x2306A3F2 // 32
+data4 0x23745A9B // 33
+data4 0x2398D756 // 34
+data4 0x23DD0B6A // 35
+data4 0x23DE338B // 36
+data4 0x23A222DF // 37
+data4 0x223164F8 // 38
+data4 0x23B4E87B // 39
+data4 0x23D6CCB8 // 40
+data4 0x220C2099 // 41
+data4 0x21B86B67 // 42
+data4 0x236D14F1 // 43
+data4 0x225A923F // 44
+data4 0x22748723 // 45
+data4 0x22200D13 // 46
+data4 0x23C296EA // 47
+data4 0x2302AC38 // 48
+data4 0x234B1996 // 49
+data4 0x2385E298 // 50
+data4 0x23175BE5 // 51
+data4 0x2193F482 // 52
+data4 0x23BFEA90 // 53
+data4 0x23D70A0C // 54
+data4 0x231CF30A // 55
+data4 0x235D9E90 // 56
+data4 0x221AD0CB // 57
+data4 0x22FAA08B // 58
+data4 0x23D29A87 // 59
+data4 0x20C4B2FE // 60
+data4 0x2381B8B7 // 61
+data4 0x23F8D9FC // 62
+data4 0x23EAAE7B // 63
+data4 0x2329E8AA // 64
+data4 0x23EC0322 // 65
+data4 0x2357FDCB // 66
+data4 0x2392A9AD // 67
+data4 0x22113B02 // 68
+data4 0x22DEE901 // 69
+data4 0x236A6D14 // 70
+data4 0x2371D33E // 71
+data4 0x2146F005 // 72
+data4 0x23230B06 // 73
+data4 0x22F1C77D // 74
+data4 0x23A89FA3 // 75
+data4 0x231D1241 // 76
+data4 0x244DA96C // 77
+data4 0x23ECBB7D // 78
+data4 0x223E42B4 // 79
+data4 0x23801BC9 // 80
+data4 0x23573263 // 81
+data4 0x227C1158 // 82
+data4 0x237BD749 // 83
+data4 0x21DDBAE9 // 84
+data4 0x23401735 // 85
+data4 0x241D9DEE // 86
+data4 0x23BC88CB // 87
+data4 0x2396D5F1 // 88
+data4 0x23FC89CF // 89
+data4 0x2414F9A2 // 90
+data4 0x2474A0F5 // 91
+data4 0x24354B60 // 92
+data4 0x23C1EB40 // 93
+data4 0x2306DD92 // 94
+data4 0x24353B6B // 95
+data4 0x23CD1701 // 96
+data4 0x237C7A1C // 97
+data4 0x245793AA // 98
+data4 0x24563695 // 99
+data4 0x23C51467 // 100
+data4 0x24476B68 // 101
+data4 0x212585A9 // 102
+data4 0x247B8293 // 103
+data4 0x2446848A // 104
+data4 0x246A53F8 // 105
+data4 0x246E496D // 106
+data4 0x23ED1D36 // 107
+data4 0x2314C258 // 108
+data4 0x233244A7 // 109
+data4 0x245B7AF0 // 110
+data4 0x24247130 // 111
+data4 0x22D67B38 // 112
+data4 0x2449F620 // 113
+data4 0x23BBC8B8 // 114
+data4 0x237D3BA0 // 115
+data4 0x245E8F13 // 116
+data4 0x2435573F // 117
+data4 0x242DE666 // 118
+data4 0x2463BC10 // 119
+data4 0x2466587D // 120
+data4 0x2408144B // 121
+data4 0x2405F0E5 // 122
+data4 0x22381CFF // 123
+data4 0x24154F9B // 124
+data4 0x23A4E96E // 125
+data4 0x24052967 // 126
+data4 0x2406963F // 127
+data4 0x23F7D3CB // 128
+data4 0x2448AFF4 // 129
+data4 0x24657A21 // 130
+data4 0x22FBC230 // 131
+data4 0x243C8DEA // 132
+data4 0x225DC4B7 // 133
+data4 0x23496EBF // 134
+data4 0x237C2B2B // 135
+data4 0x23A4A5B1 // 136
+data4 0x2394E9D1 // 137
+data4 0x244BC950 // 138
+data4 0x23C7448F // 139
+data4 0x2404A1AD // 140
+data4 0x246511D5 // 141
+data4 0x24246526 // 142
+data4 0x23111F57 // 143
+data4 0x22868951 // 144
+data4 0x243EB77F // 145
+data4 0x239F3DFF // 146
+data4 0x23089666 // 147
+data4 0x23EBFA6A // 148
+data4 0x23C51312 // 149
+data4 0x23E1DD5E // 150
+data4 0x232C0944 // 151
+data4 0x246A741F // 152
+data4 0x2414DF8D // 153
+data4 0x247B5546 // 154
+data4 0x2415C980 // 155
+data4 0x24324ABD // 156
+data4 0x234EB5E5 // 157
+data4 0x2465E43E // 158
+data4 0x242840D1 // 159
+data4 0x24444057 // 160
+data4 0x245E56F0 // 161
+data4 0x21AE30F8 // 162
+data4 0x23FB3283 // 163
+data4 0x247A4D07 // 164
+data4 0x22AE314D // 165
+data4 0x246B7727 // 166
+data4 0x24EAD526 // 167
+data4 0x24B41DC9 // 168
+data4 0x24EE8062 // 169
+data4 0x24A0C7C4 // 170
+data4 0x24E8DA67 // 171
+data4 0x231120F7 // 172
+data4 0x24401FFB // 173
+data4 0x2412DD09 // 174
+data4 0x248C131A // 175
+data4 0x24C0A7CE // 176
+data4 0x243DD4C8 // 177
+data4 0x24457FEB // 178
+data4 0x24DEEFBB // 179
+data4 0x243C70AE // 180
+data4 0x23E7A6FA // 181
+data4 0x24C2D311 // 182
+data4 0x23026255 // 183
+data4 0x2437C9B9 // 184
+data4 0x246BA847 // 185
+data4 0x2420B448 // 186
+data4 0x24C4CF5A // 187
+data4 0x242C4981 // 188
+data4 0x24DE1525 // 189
+data4 0x24F5CC33 // 190
+data4 0x235A85DA // 191
+data4 0x24A0B64F // 192
+data4 0x244BA0A4 // 193
+data4 0x24AAF30A // 194
+data4 0x244C86F9 // 195
+data4 0x246D5B82 // 196
+data4 0x24529347 // 197
+data4 0x240DD008 // 198
+data4 0x24E98790 // 199
+data4 0x2489B0CE // 200
+data4 0x22BC29AC // 201
+data4 0x23F37C7A // 202
+data4 0x24987FE8 // 203
+data4 0x22AFE20B // 204
+data4 0x24C8D7C2 // 205
+data4 0x24B28B7D // 206
+data4 0x23B6B271 // 207
+data4 0x24C77CB6 // 208
+data4 0x24EF1DCA // 209
+data4 0x24A4F0AC // 210
+data4 0x24CF113E // 211
+data4 0x2496BBAB // 212
+data4 0x23C7CC8A // 213
+data4 0x23AE3961 // 214
+data4 0x2410A895 // 215
+data4 0x23CE3114 // 216
+data4 0x2308247D // 217
+data4 0x240045E9 // 218
+data4 0x24974F60 // 219
+data4 0x242CB39F // 220
+data4 0x24AB8D69 // 221
+data4 0x23436788 // 222
+data4 0x24305E9E // 223
+data4 0x243E71A9 // 224
+data4 0x23C2A6B3 // 225
+data4 0x23FFE6CF // 226
+data4 0x2322D801 // 227
+data4 0x24515F21 // 228
+data4 0x2412A0D6 // 229
+data4 0x24E60D44 // 230
+data4 0x240D9251 // 231
+data4 0x247076E2 // 232
+data4 0x229B101B // 233
+data4 0x247B12DE // 234
+data4 0x244B9127 // 235
+data4 0x2499EC42 // 236
+data4 0x21FC3963 // 237
+data4 0x23E53266 // 238
+data4 0x24CE102D // 239
+data4 0x23CC45D2 // 240
+data4 0x2333171D // 241
+data4 0x246B3533 // 242
+data4 0x24931129 // 243
+data4 0x24405FFA // 244
+data4 0x24CF464D // 245
+data4 0x237095CD // 246
+data4 0x24F86CBD // 247
+data4 0x24E2D84B // 248
+data4 0x21ACBB44 // 249
+data4 0x24F43A8C // 250
+data4 0x249DB931 // 251
+data4 0x24A385EF // 252
+data4 0x238B1279 // 253
+data4 0x2436213E // 254
+data4 0x24F18A3B // 255
+LOCAL_OBJECT_END(log_data)
+
+
+// Code
+//==============================================================
.section .text
-.proc log1p#
-.global log1p#
-.align 64
-log1p:
-#ifdef _LIBC
-.global __log1p
-__log1p:
-#endif
-
+GLOBAL_IEEE754_ENTRY(log1p)
{ .mfi
-alloc r32 = ar.pfs,0,22,4,0
-(p0) fsub.s1 FR_Neg_One = f0,f1
-(p0) cmp.eq.unc p7, p0 = r0, r0
+ getf.exp GR_signexp_x = f8 // if x is unorm then must recompute
+ fadd.s1 FR_Xp1 = f8, f1 // Form 1+x
+ mov GR_05 = 0xfffe
}
-
-{ .mfi
-(p0) cmp.ne.unc p14, p0 = r0, r0
-(p0) fnorm.s1 FR_X_Prime = FR_Input_X
-(p0) cmp.eq.unc p15, p0 = r0, r0 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fclass.m.unc p6, p0 = FR_Input_X, 0x1E3
- nop.i 999
-}
-;;
-
-{ .mfi
- nop.m 999
-(p0) fclass.nm.unc p10, p0 = FR_Input_X, 0x1FF
- nop.i 999
+{ .mlx
+ addl GR_ad_1 = @ltoff(log_data),gp
+ movl GR_A3 = 0x3fd5555555555557 // double precision memory
+ // representation of A3
}
;;
{ .mfi
- nop.m 999
-(p0) fcmp.eq.unc.s1 p9, p0 = FR_Input_X, f0
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-(p0) fadd FR_Em1 = f0,f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fadd FR_E = f0,f1
- nop.i 999 ;;
+ ld8 GR_ad_1 = [GR_ad_1]
+ fclass.m p8,p0 = f8,0xb // Is x unorm?
+ mov GR_exp_mask = 0x1ffff
}
-
{ .mfi
- nop.m 999
-(p0) fcmp.eq.unc.s1 p8, p0 = FR_Input_X, FR_Neg_One
- nop.i 999
+ nop.m 0
+ fnorm.s1 FR_NormX = f8 // Normalize x
+ mov GR_exp_bias = 0xffff
}
+;;
{ .mfi
- nop.m 999
-(p0) fcmp.lt.unc.s1 p13, p0 = FR_Input_X, FR_Neg_One
- nop.i 999
-}
-
-
-L(LOG_BEGIN):
-
-{ .mfi
- nop.m 999
-(p0) fadd.s1 FR_Z = FR_X_Prime, FR_E
- nop.i 999
+ setf.exp FR_A2 = GR_05 // create A2 = 0.5
+ fclass.m p9,p0 = f8,0x1E1 // is x NaN, NaT or +Inf?
+ nop.i 0
}
-
-{ .mlx
- nop.m 999
-(p0) movl GR_Table_Scale = 0x0000000000000018 ;;
-}
-
-{ .mmi
- nop.m 999
-//
-// Create E = 1 and Em1 = 0
-// Check for X == 0, meaning log(1+0)
-// Check for X < -1, meaning log(negative)
-// Check for X == -1, meaning log(0)
-// Normalize x
-// Identify NatVals, NaNs, Infs.
-// Identify EM unsupporteds.
-// Identify Negative values - us S1 so as
-// not to raise denormal operand exception
-// Set p15 to true for log1p
-// Set p14 to false for log1p
-// Set p7 true for log and log1p
-//
-(p0) addl GR_Table_Base = @ltoff(Constants_Z_G_H_h1#),gp
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-(p0) fmax.s1 FR_AA = FR_X_Prime, FR_E
- nop.i 999 ;;
+{ .mib
+ setf.d FR_A3 = GR_A3 // create A3
+ add GR_ad_2 = 16,GR_ad_1 // address of A5,A4
+(p8) br.cond.spnt log1p_unorm // Branch if x=unorm
}
+;;
+log1p_common:
{ .mfi
- ld8 GR_Table_Base = [GR_Table_Base]
-(p0) fmin.s1 FR_BB = FR_X_Prime, FR_E
- nop.i 999
+ nop.m 0
+ frcpa.s1 FR_RcpX,p0 = f1,FR_Xp1
+ nop.i 0
}
-
{ .mfb
- nop.m 999
-(p0) fadd.s1 FR_W = FR_X_Prime, FR_Em1
-//
-// Begin load of constants base
-// FR_Z = Z = |x| + E
-// FR_W = W = |x| + Em1
-// AA = fmax(|x|,E)
-// BB = fmin(|x|,E)
-//
-(p6) br.cond.spnt L(LOG_64_special) ;;
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p10) br.cond.spnt L(LOG_64_unsupported) ;;
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p13) br.cond.spnt L(LOG_64_negative) ;;
-}
-
-{ .mib
-(p0) getf.sig GR_signif = FR_Z
- nop.i 999
-(p9) br.cond.spnt L(LOG_64_one) ;;
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p8) br.cond.spnt L(LOG_64_zero) ;;
+ nop.m 0
+(p9) fma.d.s0 f8 = f8,f1,f0 // set V-flag
+(p9) br.ret.spnt b0 // exit for NaN, NaT and +Inf
}
+;;
{ .mfi
-(p0) getf.exp GR_N = FR_Z
-//
-// Raise possible denormal operand exception
-// Create Bias
-//
-// This function computes ln( x + e )
-// Input FR 1: FR_X = FR_Input_X
-// Input FR 2: FR_E = FR_E
-// Input FR 3: FR_Em1 = FR_Em1
-// Input GR 1: GR_Expo_Range = GR_Expo_Range = 1
-// Output FR 4: FR_Y_hi
-// Output FR 5: FR_Y_lo
-// Output FR 6: FR_Scale
-// Output PR 7: PR_Safe
-//
-(p0) fsub.s1 FR_S_lo = FR_AA, FR_Z
-//
-// signif = getf.sig(Z)
-// abs_W = fabs(w)
-//
-(p0) extr.u GR_Table_ptr = GR_signif, 59, 4 ;;
+ getf.exp GR_Exp = FR_Xp1 // signexp of x+1
+ fclass.m p10,p0 = FR_Xp1,0x3A // is 1+x < 0?
+ and GR_exp_x = GR_exp_mask, GR_signexp_x // biased exponent of x
}
-
{ .mfi
- nop.m 999
-(p0) fmerge.se FR_S_hi = f1,FR_Z
-(p0) extr.u GR_X_0 = GR_signif, 49, 15
-}
-
-{ .mmi
- nop.m 999
-(p0) addl GR_Table_Base1 = @ltoff(Constants_Z_G_H_h2#),gp
- nop.i 999
+ ldfpd FR_A7,FR_A6 = [GR_ad_1]
+ nop.f 0
+ nop.i 0
}
;;
-{ .mlx
- ld8 GR_Table_Base1 = [GR_Table_Base1]
-(p0) movl GR_Bias = 0x000000000000FFFF ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fabs FR_abs_W = FR_W
-(p0) pmpyshr2.u GR_Table_ptr = GR_Table_ptr,GR_Table_Scale,0
-}
-
{ .mfi
- nop.m 999
-//
-// Branch out for special input values
-//
-(p0) fcmp.lt.unc.s0 p8, p0 = FR_Input_X, f0
- nop.i 999 ;;
+ getf.sig GR_Sig = FR_Xp1 // get significand to calculate index
+ // for Thi,Tlo if |x| >= 2^-8
+ fcmp.eq.s1 p12,p0 = f8,f0 // is x equal to 0?
+ sub GR_exp_x = GR_exp_x, GR_exp_bias // true exponent of x
}
+;;
{ .mfi
- nop.m 999
-//
-// X_0 = extr.u(signif,49,15)
-// Index1 = extr.u(signif,59,4)
-//
-(p0) fadd.s1 FR_S_lo = FR_S_lo, FR_BB
- nop.i 999 ;;
-}
-
-{ .mii
- nop.m 999
- nop.i 999 ;;
-//
-// Offset_to_Z1 = 24 * Index1
-// For performance, don't use result
-// for 3 or 4 cycles.
-//
-(p0) add GR_Table_ptr = GR_Table_ptr, GR_Table_Base ;;
+ sub GR_N = GR_Exp,GR_exp_bias // true exponent of x+1
+ fcmp.eq.s1 p11,p0 = FR_Xp1,f0 // is x = -1?
+ cmp.gt p6,p7 = -8, GR_exp_x // Is |x| < 2^-8
}
-//
-// Add Base to Offset for Z1
-// Create Bias
-
-{ .mmi
-(p0) ld4 GR_Z_1 = [GR_Table_ptr],4 ;;
-(p0) ldfs FR_G = [GR_Table_ptr],4
- nop.i 999 ;;
-}
-
-{ .mmi
-(p0) ldfs FR_H = [GR_Table_ptr],8 ;;
-(p0) ldfd FR_h = [GR_Table_ptr],0
-(p0) pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15
+{ .mfb
+ ldfpd FR_A5,FR_A4 = [GR_ad_2],16
+ nop.f 0
+(p10) br.cond.spnt log1p_lt_minus_1 // jump if x < -1
}
-//
-// Load Z_1
-// Get Base of Table2
-//
+;;
+// p6 is true if |x| < 1/256
+// p7 is true if |x| >= 1/256
+.pred.rel "mutex",p6,p7
{ .mfi
-(p0) getf.exp GR_M = FR_abs_W
- nop.f 999
- nop.i 999 ;;
-}
-
-{ .mii
- nop.m 999
- nop.i 999 ;;
-//
-// M = getf.exp(abs_W)
-// S_lo = AA - Z
-// X_1 = pmpyshr2(X_0,Z_1,15)
-//
-(p0) sub GR_M = GR_M, GR_Bias ;;
+(p7) add GR_ad_1 = 0x820,GR_ad_1 // address of log(2) parts
+(p6) fms.s1 FR_r = f8,f1,f0 // range reduction for |x|<1/256
+(p6) cmp.gt.unc p10,p0 = -80, GR_exp_x // Is |x| < 2^-80
}
-//
-// M = M - Bias
-// Load G1
-// N = getf.exp(Z)
-//
-
-{ .mii
-(p0) cmp.gt.unc p11, p0 = -80, GR_M
-(p0) cmp.gt.unc p12, p0 = -7, GR_M ;;
-(p0) extr.u GR_Index2 = GR_X_1, 6, 4 ;;
-}
-
-{ .mib
- nop.m 999
-//
-// if -80 > M, set p11
-// Index2 = extr.u(X_1,6,4)
-// if -7 > M, set p12
-// Load H1
-//
-(p0) pmpyshr2.u GR_Index2 = GR_Index2,GR_Table_Scale,0
-(p11) br.cond.spnt L(log1p_small) ;;
+{ .mfb
+(p7) setf.sig FR_N = GR_N // copy unbiased exponent of x to the
+ // significand field of FR_N
+(p7) fms.s1 FR_r = FR_RcpX,FR_Xp1,f1 // range reduction for |x|>=1/256
+(p12) br.ret.spnt b0 // exit for x=0, return x
}
+;;
{ .mib
- nop.m 999
- nop.i 999
-(p12) br.cond.spnt L(log1p_near) ;;
-}
-
-{ .mii
-(p0) sub GR_N = GR_N, GR_Bias
-//
-// poly_lo = r * poly_lo
-//
-(p0) add GR_Perturb = 0x1, r0 ;;
-(p0) sub GR_ScaleN = GR_Bias, GR_N
-}
-
-{ .mii
-(p0) setf.sig FR_float_N = GR_N
- nop.i 999 ;;
-//
-// Prepare Index2 - pmpyshr2.u(X_1,Z_2,15)
-// Load h1
-// S_lo = S_lo + BB
-// Branch for -80 > M
-//
-(p0) add GR_Index2 = GR_Index2, GR_Table_Base1
-}
-
-{ .mmi
-(p0) setf.exp FR_two_negN = GR_ScaleN
- nop.m 999
-(p0) addl GR_Table_Base = @ltoff(Constants_Z_G_H_h3#),gp
-};;
-
-//
-// Index2 points to Z2
-// Branch for -7 > M
-//
-
-{ .mmb
-(p0) ld4 GR_Z_2 = [GR_Index2],4
- ld8 GR_Table_Base = [GR_Table_Base]
- nop.b 999 ;;
-}
-(p0) nop.i 999
-//
-// Load Z_2
-// N = N - Bias
-// Tablebase points to Table3
-//
-
-{ .mmi
-(p0) ldfs FR_G_tmp = [GR_Index2],4 ;;
-//
-// Load G_2
-// pmpyshr2 X_2= (X_1,Z_2,15)
-// float_N = setf.sig(N)
-// ScaleN = Bias - N
-//
-(p0) ldfs FR_H_tmp = [GR_Index2],8
- nop.i 999 ;;
-}
-//
-// Load H_2
-// two_negN = setf.exp(scaleN)
-// G = G_1 * G_2
-//
-
-{ .mfi
-(p0) ldfd FR_h_tmp = [GR_Index2],0
- nop.f 999
-(p0) pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 ;;
-}
-
-{ .mii
- nop.m 999
-(p0) extr.u GR_Index3 = GR_X_2, 1, 5 ;;
-//
-// Load h_2
-// H = H_1 + H_2
-// h = h_1 + h_2
-// Index3 = extr.u(X_2,1,5)
-//
-(p0) shladd GR_Index3 = GR_Index3,4,GR_Table_Base
+(p7) ldfpd FR_Ln2hi,FR_Ln2lo = [GR_ad_1],16
+(p7) extr.u GR_Ind = GR_Sig,55,8 // get bits from 55 to 62 as index
+(p11) br.cond.spnt log1p_eq_minus_1 // jump if x = -1
}
-
-{ .mmi
- nop.m 999
- nop.m 999
-//
-// float_N = fcvt.xf(float_N)
-// load G3
-//
-(p0) addl GR_Table_Base = @ltoff(Constants_Q#),gp ;;
-}
-
-{ .mfi
-ld8 GR_Table_Base = [GR_Table_Base]
-nop.f 999
-nop.i 999
-} ;;
-
-{ .mfi
-(p0) ldfe FR_log2_hi = [GR_Table_Base],16
-(p0) fmpy.s1 FR_S_lo = FR_S_lo, FR_two_negN
- nop.i 999 ;;
-}
-
-{ .mmf
- nop.m 999
-//
-// G = G3 * G
-// Load h3
-// Load log2_hi
-// H = H + H3
-//
-(p0) ldfe FR_log2_lo = [GR_Table_Base],16
-(p0) fmpy.s1 FR_G = FR_G, FR_G_tmp ;;
-}
-
-{ .mmf
-(p0) ldfs FR_G_tmp = [GR_Index3],4
-//
-// h = h + h3
-// r = G * S_hi + 1
-// Load log2_lo
-//
-(p0) ldfe FR_Q4 = [GR_Table_Base],16
-(p0) fadd.s1 FR_h = FR_h, FR_h_tmp ;;
-}
-
-{ .mfi
-(p0) ldfe FR_Q3 = [GR_Table_Base],16
-(p0) fadd.s1 FR_H = FR_H, FR_H_tmp
- nop.i 999 ;;
-}
-
-{ .mmf
-(p0) ldfs FR_H_tmp = [GR_Index3],4
-(p0) ldfe FR_Q2 = [GR_Table_Base],16
-//
-// Comput Index for Table3
-// S_lo = S_lo * two_negN
-//
-(p0) fcvt.xf FR_float_N = FR_float_N ;;
-}
-//
-// If S_lo == 0, set p8 false
-// Load H3
-// Load ptr to table of polynomial coeff.
-//
+;;
{ .mmf
-(p0) ldfd FR_h_tmp = [GR_Index3],0
-(p0) ldfe FR_Q1 = [GR_Table_Base],0
-(p0) fcmp.eq.unc.s1 p0, p8 = FR_S_lo, f0 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fmpy.s1 FR_G = FR_G, FR_G_tmp
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fadd.s1 FR_H = FR_H, FR_H_tmp
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fms.s1 FR_r = FR_G, FR_S_hi, f1
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-(p0) fadd.s1 FR_h = FR_h, FR_h_tmp
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 FR_Y_hi = FR_float_N, FR_log2_hi, FR_H
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// Load Q4
-// Load Q3
-// Load Q2
-// Load Q1
-//
-(p8) fma.s1 FR_r = FR_G, FR_S_lo, FR_r
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-//
-// poly_lo = r * Q4 + Q3
-// rsq = r* r
-//
-(p0) fma.s1 FR_h = FR_float_N, FR_log2_lo, FR_h
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// If (S_lo!=0) r = s_lo * G + r
-//
-(p0) fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3
- nop.i 999
-}
-//
-// Create a 0x00000....01
-// poly_lo = poly_lo * rsq + h
-//
-
-{ .mfi
-(p0) setf.sig FR_dummy = GR_Perturb
-(p0) fmpy.s1 FR_rsq = FR_r, FR_r
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// h = N * log2_lo + h
-// Y_hi = n * log2_hi + H
-//
-(p0) fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// poly_lo = r * poly_o + Q2
-// poly_hi = Q1 * rsq + r
-//
-(p0) fmpy.s1 FR_poly_lo = FR_poly_lo, FR_r
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 FR_poly_lo = FR_poly_lo, FR_rsq, FR_h
- nop.i 999 ;;
-}
-
-{ .mfb
- nop.m 999
-(p0) fadd.s1 FR_Y_lo = FR_poly_hi, FR_poly_lo
-//
-// Create the FR for a binary "or"
-// Y_lo = poly_hi + poly_lo
-//
-// (p0) for FR_dummy = FR_Y_lo,FR_dummy ;;
-//
-// Turn the lsb of Y_lo ON
-//
-// (p0) fmerge.se FR_Y_lo = FR_Y_lo,FR_dummy ;;
-//
-// Merge the new lsb into Y_lo, for alone doesn't
-//
-(p0) br.cond.sptk L(LOG_main) ;;
-}
-
-
-L(log1p_near):
-
-{ .mmi
- nop.m 999
- nop.m 999
-// /*******************************************************/
-// /*********** Branch log1p_near ************************/
-// /*******************************************************/
-(p0) addl GR_Table_Base = @ltoff(Constants_P#),gp ;;
-}
-//
-// Load base address of poly. coeff.
-//
-{.mmi
- nop.m 999
- ld8 GR_Table_Base = [GR_Table_Base]
- nop.i 999
-};;
-
-{ .mmb
-(p0) add GR_Table_ptr = 0x40,GR_Table_Base
-//
-// Address tables with separate pointers
-//
-(p0) ldfe FR_P8 = [GR_Table_Base],16
- nop.b 999 ;;
+(p7) shladd GR_ad_2 = GR_Ind,3,GR_ad_2 // address of Thi
+(p7) shladd GR_ad_1 = GR_Ind,2,GR_ad_1 // address of Tlo
+(p10) fnma.d.s0 f8 = f8,f8,f8 // If |x| very small, result=x-x*x
}
+;;
{ .mmb
-(p0) ldfe FR_P4 = [GR_Table_ptr],16
-//
-// Load P4
-// Load P8
-//
-(p0) ldfe FR_P7 = [GR_Table_Base],16
- nop.b 999 ;;
-}
-
-{ .mmf
-(p0) ldfe FR_P3 = [GR_Table_ptr],16
-//
-// Load P3
-// Load P7
-//
-(p0) ldfe FR_P6 = [GR_Table_Base],16
-(p0) fmpy.s1 FR_wsq = FR_W, FR_W ;;
-}
-
-{ .mfi
-(p0) ldfe FR_P2 = [GR_Table_ptr],16
- nop.f 999
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 FR_Y_hi = FR_W, FR_P4, FR_P3
- nop.i 999
-}
-//
-// Load P2
-// Load P6
-// Wsq = w * w
-// Y_hi = p4 * w + p3
-//
-
-{ .mfi
-(p0) ldfe FR_P5 = [GR_Table_Base],16
-(p0) fma.s1 FR_Y_lo = FR_W, FR_P8, FR_P7
- nop.i 999 ;;
-}
-
-{ .mfi
-(p0) ldfe FR_P1 = [GR_Table_ptr],16
-//
-// Load P1
-// Load P5
-// Y_lo = p8 * w + P7
-//
-(p0) fmpy.s1 FR_w4 = FR_wsq, FR_wsq
- nop.i 999 ;;
+(p7) ldfd FR_Thi = [GR_ad_2]
+(p7) ldfs FR_Tlo = [GR_ad_1]
+(p10) br.ret.spnt b0 // Exit if |x| < 2^(-80)
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 FR_Y_hi = FR_W, FR_Y_hi, FR_P2
- nop.i 999
+ nop.m 0
+ fma.s1 FR_r2 = FR_r,FR_r,f0 // r^2
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p0) fma.s1 FR_Y_lo = FR_W, FR_Y_lo, FR_P6
-(p0) add GR_Perturb = 0x1, r0 ;;
+ nop.m 0
+ fms.s1 FR_A2 = FR_A3,FR_r,FR_A2 // A3*r+A2
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-//
-// w4 = w2 * w2
-// Y_hi = y_hi * w + p2
-// Y_lo = y_lo * w + p6
-// Create perturbation bit
-//
-(p0) fmpy.s1 FR_w6 = FR_w4, FR_wsq
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 FR_A6 = FR_A7,FR_r,FR_A6 // A7*r+A6
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p0) fma.s1 FR_Y_hi = FR_W, FR_Y_hi, FR_P1
- nop.i 999
+ nop.m 0
+ fma.s1 FR_A4 = FR_A5,FR_r,FR_A4 // A5*r+A4
+ nop.i 0
}
-//
-// Y_hi = y_hi * w + p1
-// w6 = w4 * w2
-//
+;;
{ .mfi
-(p0) setf.sig FR_Q4 = GR_Perturb
-(p0) fma.s1 FR_Y_lo = FR_W, FR_Y_lo, FR_P5
- nop.i 999 ;;
+ nop.m 0
+(p7) fcvt.xf FR_N = FR_N
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fma.s1 FR_Y_hi = FR_wsq,FR_Y_hi, FR_W
- nop.i 999
-}
-
-{ .mfb
- nop.m 999
-//
-// Y_hi = y_hi * wsq + w
-// Y_lo = y_lo * w + p5
-//
-(p0) fmpy.s1 FR_Y_lo = FR_w6, FR_Y_lo
-//
-// Y_lo = y_lo * w6
-//
-// (p0) for FR_dummy = FR_Y_lo,FR_dummy ;;
-//
-// Set lsb on: Taken out to improve performance
-//
-// (p0) fmerge.se FR_Y_lo = FR_Y_lo,FR_dummy ;;
-//
-// Make sure it's on in Y_lo also. Taken out to improve
-// performance
-//
-(p0) br.cond.sptk L(LOG_main) ;;
-}
-
-
-L(log1p_small):
-
-{ .mmi
- nop.m 999
- nop.m 999
-// /*******************************************************/
-// /*********** Branch log1p_small ***********************/
-// /*******************************************************/
-(p0) addl GR_Table_Base = @ltoff(Constants_Threshold#),gp
+ nop.m 0
+ fma.s1 FR_r4 = FR_r2,FR_r2,f0 // r^4
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p0) mov FR_Em1 = FR_W
-(p0) cmp.eq.unc p7, p0 = r0, r0 ;;
-}
-
-{ .mlx
- ld8 GR_Table_Base = [GR_Table_Base]
-(p0) movl GR_Expo_Range = 0x0000000000000002 ;;
-}
-//
-// Set Safe to true
-// Set Expo_Range = 0 for single
-// Set Expo_Range = 2 for double
-// Set Expo_Range = 4 for double-extended
-//
-
-{ .mmi
-(p0) shladd GR_Table_Base = GR_Expo_Range,4,GR_Table_Base ;;
-(p0) ldfe FR_Threshold = [GR_Table_Base],16
- nop.i 999
-}
-
-{ .mlx
- nop.m 999
-(p0) movl GR_Bias = 0x000000000000FF9B ;;
+ nop.m 0
+ // (A3*r+A2)*r^2+r
+ fma.s1 FR_A2 = FR_A2,FR_r2,FR_r
+ nop.i 0
}
+;;
{ .mfi
-(p0) ldfe FR_Tiny = [GR_Table_Base],0
- nop.f 999
- nop.i 999 ;;
+ nop.m 0
+ // (A7*r+A6)*r^2+(A5*r+A4)
+ fma.s1 FR_A4 = FR_A6,FR_r2,FR_A4
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fcmp.gt.unc.s1 p13, p12 = FR_abs_W, FR_Threshold
- nop.i 999 ;;
+ nop.m 0
+ // N*Ln2hi+Thi
+(p7) fma.s1 FR_NxLn2hipThi = FR_N,FR_Ln2hi,FR_Thi
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p13) fnmpy.s1 FR_Y_lo = FR_W, FR_W
- nop.i 999
+ nop.m 0
+ // N*Ln2lo+Tlo
+(p7) fma.s1 FR_NxLn2lopTlo = FR_N,FR_Ln2lo,FR_Tlo
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p13) fadd FR_SCALE = f0, f1
- nop.i 999 ;;
+ nop.m 0
+(p7) fma.s1 f8 = FR_A4,FR_r4,FR_A2 // P(r) if |x| >= 1/256
+ nop.i 0
}
-
{ .mfi
- nop.m 999
-(p12) fsub.s1 FR_Y_lo = f0, FR_Tiny
-(p12) cmp.ne.unc p7, p0 = r0, r0
+ nop.m 0
+ // (N*Ln2hi+Thi) + (N*Ln2lo+Tlo)
+(p7) fma.s1 FR_NxLn2pT = FR_NxLn2hipThi,f1,FR_NxLn2lopTlo
+ nop.i 0
}
+;;
+.pred.rel "mutex",p6,p7
{ .mfi
-(p12) setf.exp FR_SCALE = GR_Bias
- nop.f 999
- nop.i 999 ;;
+ nop.m 0
+(p6) fma.d.s0 f8 = FR_A4,FR_r4,FR_A2 // result if 2^(-80) <= |x| < 1/256
+ nop.i 0
}
-
-//
-// Set p7 to SAFE = FALSE
-// Set Scale = 2^-100
-//
{ .mfb
- nop.m 999
-(p0) fma.d.s0 FR_Input_X = FR_Y_lo,FR_SCALE,FR_Y_hi
-(p0) br.ret.sptk b0
+ nop.m 0
+(p7) fma.d.s0 f8 = f8,f1,FR_NxLn2pT // result if |x| >= 1/256
+ br.ret.sptk b0 // Exit if |x| >= 2^(-80)
}
;;
-L(LOG_64_one):
-
+.align 32
+log1p_unorm:
+// Here if x=unorm
{ .mfb
- nop.m 999
-(p0) fmpy.d.s0 FR_Input_X = FR_Input_X, f0
-(p0) br.ret.sptk b0
+ getf.exp GR_signexp_x = FR_NormX // recompute biased exponent
+ nop.f 0
+ br.cond.sptk log1p_common
}
;;
-//
-// Raise divide by zero for +/-0 input.
-//
-L(LOG_64_zero):
-
+.align 32
+log1p_eq_minus_1:
+// Here if x=-1
{ .mfi
-(p0) mov GR_Parameter_TAG = 140
-//
-// If we have log1p(0), return -Inf.
-//
-(p0) fsub.s0 FR_Output_X_tmp = f0, f1
- nop.i 999 ;;
+ nop.m 0
+ fmerge.s FR_X = f8,f8 // keep input argument for subsequent
+ // call of __libm_error_support#
+ nop.i 0
}
-{ .mfb
- nop.m 999
-(p0) frcpa.s0 FR_Output_X_tmp, p8 = FR_Output_X_tmp, f0
-(p0) br.cond.sptk L(LOG_ERROR_Support) ;;
-}
-
-L(LOG_64_special):
+;;
{ .mfi
- nop.m 999
-//
-// Return -Inf or value from handler.
-//
-(p0) fclass.m.unc p7, p0 = FR_Input_X, 0x1E1
- nop.i 999 ;;
+ mov GR_TAG = 140 // set libm error in case of log1p(-1).
+ frcpa.s0 f8,p0 = f8,f0 // log1p(-1) should be equal to -INF.
+ // We can get it using frcpa because it
+ // sets result to the IEEE-754 mandated
+ // quotient of f8/f0.
+ nop.i 0
}
-{ .mfb
- nop.m 999
-//
-// Check for Natval, QNan, SNaN, +Inf
-//
-(p7) fmpy.d.s0 f8 = FR_Input_X, f1
-//
-// For SNaN raise invalid and return QNaN.
-// For QNaN raise invalid and return QNaN.
-// For +Inf return +Inf.
-//
-(p7) br.ret.sptk b0
+{ .mib
+ nop.m 0
+ nop.i 0
+ br.cond.sptk log_libm_err
}
;;
-//
-// For -Inf raise invalid and return QNaN.
-//
-
-{ .mfb
-(p0) mov GR_Parameter_TAG = 141
-(p0) fmpy.d.s0 FR_Output_X_tmp = FR_Input_X, f0
-(p0) br.cond.sptk L(LOG_ERROR_Support) ;;
+.align 32
+log1p_lt_minus_1:
+// Here if x < -1
+{ .mfi
+ nop.m 0
+ fmerge.s FR_X = f8,f8
+ nop.i 0
}
+;;
-//
-// Report that log1p(-Inf) computed
-//
-
-L(LOG_64_unsupported):
-
-//
-// Return generated NaN or other value .
-//
-
-{ .mfb
- nop.m 999
-(p0) fmpy.d.s0 FR_Input_X = FR_Input_X, f0
-(p0) br.ret.sptk b0 ;;
+{ .mfi
+ mov GR_TAG = 141 // set libm error in case of x < -1.
+ frcpa.s0 f8,p0 = f0,f0 // log1p(x) x < -1 should be equal to NaN.
+ // We can get it using frcpa because it
+ // sets result to the IEEE-754 mandated
+ // quotient of f0/f0 i.e. NaN.
+ nop.i 0
}
+;;
-L(LOG_64_negative):
-
-{ .mfi
- nop.m 999
-//
-// Deal with x < 0 in a special way
-//
-(p0) frcpa.s0 FR_Output_X_tmp, p8 = f0, f0
-//
-// Deal with x < 0 in a special way - raise
-// invalid and produce QNaN indefinite.
-//
-(p0) mov GR_Parameter_TAG = 141
+.align 32
+log_libm_err:
+{ .mmi
+ alloc r32 = ar.pfs,1,4,4,0
+ mov GR_Parameter_TAG = GR_TAG
+ nop.i 0
}
+;;
-.endp log1p#
-ASM_SIZE_DIRECTIVE(log1p)
+GLOBAL_IEEE754_END(log1p)
-.proc __libm_error_region
-__libm_error_region:
-L(LOG_ERROR_Support):
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
-
-// (1)
{ .mfi
- add GR_Parameter_Y=-32,sp // Parameter 2 value
+ add GR_Parameter_Y = -32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+ mov GR_SAVE_PFS = ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
- add sp=-64,sp // Create new stack
+ add sp = -64,sp // Create new stack
nop.f 0
- mov GR_SAVE_GP=gp // Save gp
+ mov GR_SAVE_GP = gp // Save gp
};;
-
-
-// (2)
{ .mmi
- stfd [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
+ stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0 // Save b0
+ mov GR_SAVE_B0 = b0 // Save b0
};;
-
.body
-// (3)
{ .mib
- stfd [GR_Parameter_X] =FR_Input_X // STORE Parameter 1 on stack
- add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
- nop.b 0
+ stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
}
{ .mib
- stfd [GR_Parameter_Y] = FR_Output_X_tmp // STORE Parameter 3 on stack
+ stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support# // Call error handling function
+ br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
- nop.m 0
- nop.m 0
add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
};;
-
-// (4)
{ .mmi
- ldfd FR_Input_X = [GR_Parameter_RESULT] // Get return result off stack
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
- add sp = 64,sp // Restore stack pointer
- mov b0 = GR_SAVE_B0 // Restore return address
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
- mov gp = GR_SAVE_GP // Restore gp
- mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
- br.ret.sptk b0
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
};;
-
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
-
-.proc __libm_LOG_main
-__libm_LOG_main:
-L(LOG_main):
-
-//
-// kernel_log_64 computes ln(X + E)
-//
-
-{ .mfi
- nop.m 999
-(p7) fadd.d.s0 FR_Input_X = FR_Y_lo,FR_Y_hi
- nop.i 999
-}
-
-{ .mmi
- nop.m 999
- nop.m 999
-(p14) addl GR_Table_Base = @ltoff(Constants_1_by_LN10#),gp ;;
-}
-
-{ .mmi
- nop.m 999
-(p14) ld8 GR_Table_Base = [GR_Table_Base]
- nop.i 999
-};;
-
-{ .mmi
-(p14) ldfe FR_1LN10_hi = [GR_Table_Base],16 ;;
-(p14) ldfe FR_1LN10_lo = [GR_Table_Base]
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p14) fmpy.s1 FR_Output_X_tmp = FR_Y_lo,FR_1LN10_hi
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p14) fma.s1 FR_Output_X_tmp = FR_Y_hi,FR_1LN10_lo,FR_Output_X_tmp
- nop.i 999 ;;
-}
-
-{ .mfb
- nop.m 999
-(p14) fma.d.s0 FR_Input_X = FR_Y_hi,FR_1LN10_hi,FR_Output_X_tmp
-(p0) br.ret.sptk b0 ;;
-}
-.endp __libm_LOG_main
-ASM_SIZE_DIRECTIVE(__libm_LOG_main)
-
+LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#
+
diff --git a/sysdeps/ia64/fpu/s_log1pf.S b/sysdeps/ia64/fpu/s_log1pf.S
index 8aff9b895a..a148d4b272 100644
--- a/sysdeps/ia64/fpu/s_log1pf.S
+++ b/sysdeps/ia64/fpu/s_log1pf.S
@@ -1,10 +1,10 @@
-.file "log1pf.s"
+.file "log1pf.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,1610 +20,768 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00 Initial version
-// 4/04/00 Unwind support added
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 02/02/00 Initial version
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
+// 06/29/01 Improved speed of all paths
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 10/02/02 Improved performance by basing on log algorithm
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 04/18/03 Eliminate possible WAW dependency warning
//
-// *********************************************************************
-//
-// Function: log1pf(x) = ln(x+1), for single precision values
-//
-// *********************************************************************
-//
-// Accuracy: Very accurate for single precision values
-//
-// *********************************************************************
-//
-// Resources Used:
-//
-// Floating-Point Registers: f8 (Input and Return Value)
-// f9,f33-f55,f99
-//
-// General Purpose Registers:
-// r32-r53
-// r54-r57 (Used to pass arguments to error handling routine)
-//
-// Predicate Registers: p6-p15
-//
-// *********************************************************************
-//
-// IEEE Special Conditions:
-//
-// Denormal fault raised on denormal inputs
-// Overflow exceptions cannot occur
-// Underflow exceptions raised when appropriate for log1pf
-// (Error Handling Routine called for underflow)
-// Inexact raised when appropriate by algorithm
-//
-// log1pf(inf) = inf
-// log1pf(-inf) = QNaN
-// log1pf(+/-0) = +/-0
-// log1pf(-1) = -inf
-// log1pf(SNaN) = QNaN
-// log1pf(QNaN) = QNaN
-// log1pf(EM_special Values) = QNaN
-//
-// *********************************************************************
-//
-// Computation is based on the following kernel.
-//
-// ker_log_64( in_FR : X,
-// in_FR : E,
-// in_FR : Em1,
-// in_GR : Expo_Range,
-// out_FR : Y_hi,
-// out_FR : Y_lo,
-// out_FR : Scale,
-// out_PR : Safe )
-//
-// Overview
-//
-// The method consists of three cases.
-//
-// If |X+Em1| < 2^(-80) use case log1pf_small;
-// elseif |X+Em1| < 2^(-7) use case log_near1;
-// else use case log_regular;
-//
-// Case log1pf_small:
-//
-// log( 1 + (X+Em1) ) can be approximated by (X+Em1).
-//
-// Case log_near1:
-//
-// log( 1 + (X+Em1) ) can be approximated by a simple polynomial
-// in W = X+Em1. This polynomial resembles the truncated Taylor
-// series W - W^/2 + W^3/3 - ...
-//
-// Case log_regular:
-//
-// Here we use a table lookup method. The basic idea is that in
-// order to compute log(Arg) for an argument Arg in [1,2), we
-// construct a value G such that G*Arg is close to 1 and that
-// log(1/G) is obtainable easily from a table of values calculated
-// beforehand. Thus
-//
-// log(Arg) = log(1/G) + log(G*Arg)
-// = log(1/G) + log(1 + (G*Arg - 1))
-//
-// Because |G*Arg - 1| is small, the second term on the right hand
-// side can be approximated by a short polynomial. We elaborate
-// this method in four steps.
-//
-// Step 0: Initialization
-//
-// We need to calculate log( E + X ). Obtain N, S_hi, S_lo such that
-//
-// E + X = 2^N * ( S_hi + S_lo ) exactly
-//
-// where S_hi in [1,2) and S_lo is a correction to S_hi in the sense
-// that |S_lo| <= ulp(S_hi).
-//
-// Step 1: Argument Reduction
-//
-// Based on S_hi, obtain G_1, G_2, G_3 from a table and calculate
-//
-// G := G_1 * G_2 * G_3
-// r := (G * S_hi - 1) + G * S_lo
-//
-// These G_j's have the property that the product is exactly
-// representable and that |r| < 2^(-12) as a result.
-//
-// Step 2: Approximation
-//
-//
-// log(1 + r) is approximated by a short polynomial poly(r).
-//
-// Step 3: Reconstruction
-//
-//
-// Finally, log( E + X ) is given by
-//
-// log( E + X ) = log( 2^N * (S_hi + S_lo) )
-// ~=~ N*log(2) + log(1/G) + log(1 + r)
-// ~=~ N*log(2) + log(1/G) + poly(r).
-//
-// **** Algorithm ****
-//
-// Case log1pf_small:
-//
-// Although log(1 + (X+Em1)) is basically X+Em1, we would like to
-// preserve the inexactness nature as well as consistent behavior
-// under different rounding modes. Note that this case can only be
-// taken if E is set to be 1.0. In this case, Em1 is zero, and that
-// X can be very tiny and thus the final result can possibly underflow.
-// Thus, we compare X against a threshold that is dependent on the
-// input Expo_Range. If |X| is smaller than this threshold, we set
-// SAFE to be FALSE.
-//
-// The result is returned as Y_hi, Y_lo, and in the case of SAFE
-// is FALSE, an additional value Scale is also returned.
-//
-// W := X + Em1
-// Threshold := Threshold_Table( Expo_Range )
-// Tiny := Tiny_Table( Expo_Range )
-//
-// If ( |W| > Threshold ) then
-// Y_hi := W
-// Y_lo := -W*W
-// Else
-// Y_hi := W
-// Y_lo := -Tiny
-// Scale := 2^(-100)
-// Safe := FALSE
-// EndIf
-//
-//
-// One may think that Y_lo should be -W*W/2; however, it does not matter
-// as Y_lo will be rounded off completely except for the correct effect in
-// directed rounding. Clearly -W*W is simplier to compute. Moreover,
-// because of the difference in exponent value, Y_hi + Y_lo or
-// Y_hi + Scale*Y_lo is always inexact.
-//
-// Case log_near1:
-//
-// Here we compute a simple polynomial. To exploit parallelism, we split
-// the polynomial into two portions.
-//
-// W := X + Em1
-// Wsq := W * W
-// W4 := Wsq*Wsq
-// W6 := W4*Wsq
-// Y_hi := W + Wsq*(P_1 + W*(P_2 + W*(P_3 + W*P_4))
-// Y_lo := W6*(P_5 + W*(P_6 + W*(P_7 + W*P_8)))
-// set lsb(Y_lo) to be 1
-//
-// Case log_regular:
-//
-// We present the algorithm in four steps.
-//
-// Step 0. Initialization
-// ----------------------
-//
-// Z := X + E
-// N := unbaised exponent of Z
-// S_hi := 2^(-N) * Z
-// S_lo := 2^(-N) * { (max(X,E)-Z) + min(X,E) }
-//
-// Note that S_lo is always 0 for the case E = 0.
-//
-// Step 1. Argument Reduction
-// --------------------------
-//
-// Let
-//
-// Z = 2^N * S_hi = 2^N * 1.d_1 d_2 d_3 ... d_63
-//
-// We obtain G_1, G_2, G_3 by the following steps.
-//
+// API
+//==============================================================
+// float log1pf(float)
//
-// Define X_0 := 1.d_1 d_2 ... d_14. This is extracted
-// from S_hi.
+// log1p(x) = log(x+1)
//
-// Define A_1 := 1.d_1 d_2 d_3 d_4. This is X_0 truncated
-// to lsb = 2^(-4).
+// Overview of operation
+//==============================================================
+// Background
+// ----------
//
-// Define index_1 := [ d_1 d_2 d_3 d_4 ].
+// This algorithm is based on fact that
+// log1p(x) = log(1+x) and
+// log(a b) = log(a) + log(b).
+// In our case we have 1+x = 2^N f, where 1 <= f < 2.
+// So
+// log(1+x) = log(2^N f) = log(2^N) + log(f) = n*log(2) + log(f)
//
-// Fetch Z_1 := (1/A_1) rounded UP in fixed point with
-// fixed point lsb = 2^(-15).
-// Z_1 looks like z_0.z_1 z_2 ... z_15
-// Note that the fetching is done using index_1.
-// A_1 is actually not needed in the implementation
-// and is used here only to explain how is the value
-// Z_1 defined.
+// To calculate log(f) we do following
+// log(f) = log(f * frcpa(f) / frcpa(f)) =
+// = log(f * frcpa(f)) + log(1/frcpa(f))
//
-// Fetch G_1 := (1/A_1) truncated to 21 sig. bits.
-// floating pt. Again, fetching is done using index_1. A_1
-// explains how G_1 is defined.
+// According to definition of IA-64's frcpa instruction it's a
+// floating point that approximates 1/f using a lookup on the
+// top of 8 bits of the input number's + 1 significand with relative
+// error < 2^(-8.886). So we have following
//
-// Calculate X_1 := X_0 * Z_1 truncated to lsb = 2^(-14)
-// = 1.0 0 0 0 d_5 ... d_14
-// This is accomplised by integer multiplication.
-// It is proved that X_1 indeed always begin
-// with 1.0000 in fixed point.
+// |(1/f - frcpa(f)) / (1/f))| = |1 - f*frcpa(f)| < 1/256
//
+// and
//
-// Define A_2 := 1.0 0 0 0 d_5 d_6 d_7 d_8. This is X_1
-// truncated to lsb = 2^(-8). Similar to A_1,
-// A_2 is not needed in actual implementation. It
-// helps explain how some of the values are defined.
+// log(f) = log(f * frcpa(f)) + log(1/frcpa(f)) =
+// = log(1 + r) + T
//
-// Define index_2 := [ d_5 d_6 d_7 d_8 ].
+// The first value can be computed by polynomial P(r) approximating
+// log(1 + r) on |r| < 1/256 and the second is precomputed tabular
+// value defined by top 8 bit of f.
//
-// Fetch Z_2 := (1/A_2) rounded UP in fixed point with
-// fixed point lsb = 2^(-15). Fetch done using index_2.
-// Z_2 looks like z_0.z_1 z_2 ... z_15
+// Finally we have that log(1+x) ~ (N*log(2) + T) + P(r)
//
-// Fetch G_2 := (1/A_2) truncated to 21 sig. bits.
-// floating pt.
+// Note that if input argument is close to 0.0 (in our case it means
+// that |x| < 1/256) we can use just polynomial approximation
+// because 1+x = 2^0 * f = f = 1 + r and
+// log(1+x) = log(1 + r) ~ P(r)
//
-// Calculate X_2 := X_1 * Z_2 truncated to lsb = 2^(-14)
-// = 1.0 0 0 0 0 0 0 0 d_9 d_10 ... d_14
-// This is accomplised by integer multiplication.
-// It is proved that X_2 indeed always begin
-// with 1.00000000 in fixed point.
//
+// Implementation
+// --------------
//
-// Define A_3 := 1.0 0 0 0 0 0 0 0 d_9 d_10 d_11 d_12 d_13 1.
-// This is 2^(-14) + X_2 truncated to lsb = 2^(-13).
+// 1. |x| >= 2^(-8), and x > -1
+// InvX = frcpa(x+1)
+// r = InvX*(x+1) - 1
+// P(r) = r*((1 - A2*4) + r^2*(A3 - A4*r)) = r*P2(r),
+// A4,A3,A2 are created with setf instruction.
+// We use Taylor series and so A4 = 1/4, A3 = 1/3,
+// A2 = 1/2 rounded to double.
//
-// Define index_3 := [ d_9 d_10 d_11 d_12 d_13 ].
+// N = float(n) where n is true unbiased exponent of x
//
-// Fetch G_3 := (1/A_3) truncated to 21 sig. bits.
-// floating pt. Fetch is done using index_3.
+// T is tabular value of log(1/frcpa(x)) calculated in quad precision
+// and rounded to double. To load T we get bits from 55 to 62 of register
+// format significand as index and calculate address
+// ad_T = table_base_addr + 8 * index
//
-// Compute G := G_1 * G_2 * G_3.
+// L1 (log(2)) is calculated in quad precision and rounded to double;
+// it's created with setf
//
-// This is done exactly since each of G_j only has 21 sig. bits.
+// And final result = P2(r)*r + (T + N*L1)
//
-// Compute
//
-// r := (G*S_hi - 1) + G*S_lo using 2 FMA operations.
+// 2. 2^(-40) <= |x| < 2^(-8)
+// r = x
+// P(r) = r*((1 - A2*4) + r^2*(A3 - A4*r)) = r*P2(r),
+// A4,A3,A2 are the same as in case |x| >= 1/256
//
-// thus, r approximates G*(S_hi+S_lo) - 1 to within a couple of
-// rounding errors.
+// And final result = P2(r)*r
//
+// 3. 0 < |x| < 2^(-40)
+// Although log1p(x) is basically x, we would like to preserve the inexactness
+// nature as well as consistent behavior under different rounding modes.
+// We can do this by computing the result as
//
-// Step 2. Approximation
-// ---------------------
+// log1p(x) = x - x*x
//
-// This step computes an approximation to log( 1 + r ) where r is the
-// reduced argument just obtained. It is proved that |r| <= 1.9*2^(-13);
-// thus log(1+r) can be approximated by a short polynomial:
//
-// log(1+r) ~=~ poly = r + Q1 r^2 + ... + Q4 r^5
+// Note: NaT, any NaNs, +/-INF, +/-0, negatives and unnormalized numbers are
+// filtered and processed on special branches.
//
+
//
-// Step 3. Reconstruction
-// ----------------------
+// Special values
+//==============================================================
//
-// This step computes the desired result of log(X+E):
+// log1p(-1) = -inf // Call error support
//
-// log(X+E) = log( 2^N * (S_hi + S_lo) )
-// = N*log(2) + log( S_hi + S_lo )
-// = N*log(2) + log(1/G) +
-// log(1 + C*(S_hi+S_lo) - 1 )
+// log1p(+qnan) = +qnan
+// log1p(-qnan) = -qnan
+// log1p(+snan) = +qnan
+// log1p(-snan) = -qnan
//
-// log(2), log(1/G_j) are stored as pairs of (single,double) numbers:
-// log2_hi, log2_lo, log1byGj_hi, log1byGj_lo. The high parts are
-// single-precision numbers and the low parts are double precision
-// numbers. These have the property that
+// log1p(x),x<-1= QNAN Indefinite // Call error support
+// log1p(-inf) = QNAN Indefinite
+// log1p(+inf) = +inf
+// log1p(+/-0) = +/-0
//
-// N*log2_hi + SUM ( log1byGj_hi )
//
-// is computable exactly in double-extended precision (64 sig. bits).
-// Finally
+// Registers used
+//==============================================================
+// Floating Point registers used:
+// f8, input
+// f7 -> f15, f32 -> f36
//
-// Y_hi := N*log2_hi + SUM ( log1byGj_hi )
-// Y_lo := poly_hi + [ poly_lo +
-// ( SUM ( log1byGj_lo ) + N*log2_lo ) ]
-// set lsb(Y_lo) to be 1
+// General registers used:
+// r8 -> r11
+// r14 -> r22
//
+// Predicate registers used:
+// p6 -> p12
-#include "libm_support.h"
-
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
+// Assembly macros
+//==============================================================
+GR_TAG = r8
+GR_ad_T = r9
+GR_Exp = r10
+GR_N = r11
-// P_7, P_6, P_5, P_4, P_3, P_2, and P_1
+GR_signexp_x = r14
+GR_exp_mask = r15
+GR_exp_bias = r16
+GR_05 = r17
+GR_A3 = r18
+GR_Sig = r19
+GR_Ind = r19
+GR_exp_x = r20
+GR_Ln2 = r21
+GR_025 = r22
-.align 64
-Constants_P:
-ASM_TYPE_DIRECTIVE(Constants_P,@object)
-data4 0xEFD62B15,0xE3936754,0x00003FFB,0x00000000
-data4 0xA5E56381,0x8003B271,0x0000BFFC,0x00000000
-data4 0x73282DB0,0x9249248C,0x00003FFC,0x00000000
-data4 0x47305052,0xAAAAAA9F,0x0000BFFC,0x00000000
-data4 0xCCD17FC9,0xCCCCCCCC,0x00003FFC,0x00000000
-data4 0x00067ED5,0x80000000,0x0000BFFD,0x00000000
-data4 0xAAAAAAAA,0xAAAAAAAA,0x00003FFD,0x00000000
-data4 0xFFFFFFFE,0xFFFFFFFF,0x0000BFFD,0x00000000
-ASM_SIZE_DIRECTIVE(Constants_P)
-
-// log2_hi, log2_lo, Q_4, Q_3, Q_2, and Q_1
-.align 64
-Constants_Q:
-ASM_TYPE_DIRECTIVE(Constants_Q,@object)
-data4 0x00000000,0xB1721800,0x00003FFE,0x00000000
-data4 0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000
-data4 0x328833CB,0xCCCCCAF2,0x00003FFC,0x00000000
-data4 0xA9D4BAFB,0x80000077,0x0000BFFD,0x00000000
-data4 0xAAABE3D2,0xAAAAAAAA,0x00003FFD,0x00000000
-data4 0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000
-ASM_SIZE_DIRECTIVE(Constants_Q)
-
-// Z1 - 16 bit fixed, G1 and H1 - IEEE single
-
-.align 64
-Constants_Z_G_H_h1:
-ASM_TYPE_DIRECTIVE(Constants_Z_G_H_h1,@object)
-data4 0x00008000,0x3F800000,0x00000000,0x00000000,0x00000000,0x00000000
-data4 0x00007879,0x3F70F0F0,0x3D785196,0x00000000,0x617D741C,0x3DA163A6
-data4 0x000071C8,0x3F638E38,0x3DF13843,0x00000000,0xCBD3D5BB,0x3E2C55E6
-data4 0x00006BCB,0x3F579430,0x3E2FF9A0,0x00000000,0xD86EA5E7,0xBE3EB0BF
-data4 0x00006667,0x3F4CCCC8,0x3E647FD6,0x00000000,0x86B12760,0x3E2E6A8C
-data4 0x00006187,0x3F430C30,0x3E8B3AE7,0x00000000,0x5C0739BA,0x3E47574C
-data4 0x00005D18,0x3F3A2E88,0x3EA30C68,0x00000000,0x13E8AF2F,0x3E20E30F
-data4 0x0000590C,0x3F321640,0x3EB9CEC8,0x00000000,0xF2C630BD,0xBE42885B
-data4 0x00005556,0x3F2AAAA8,0x3ECF9927,0x00000000,0x97E577C6,0x3E497F34
-data4 0x000051EC,0x3F23D708,0x3EE47FC5,0x00000000,0xA6B0A5AB,0x3E3E6A6E
-data4 0x00004EC5,0x3F1D89D8,0x3EF8947D,0x00000000,0xD328D9BE,0xBDF43E3C
-data4 0x00004BDB,0x3F17B420,0x3F05F3A1,0x00000000,0x0ADB090A,0x3E4094C3
-data4 0x00004925,0x3F124920,0x3F0F4303,0x00000000,0xFC1FE510,0xBE28FBB2
-data4 0x0000469F,0x3F0D3DC8,0x3F183EBF,0x00000000,0x10FDE3FA,0x3E3A7895
-data4 0x00004445,0x3F088888,0x3F20EC80,0x00000000,0x7CC8C98F,0x3E508CE5
-data4 0x00004211,0x3F042108,0x3F29516A,0x00000000,0xA223106C,0xBE534874
-ASM_SIZE_DIRECTIVE(Constants_Z_G_H_h1)
-
-// Z2 - 16 bit fixed, G2 and H2 - IEEE single
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+GR_SAVE_SP = r36
-.align 64
-Constants_Z_G_H_h2:
-ASM_TYPE_DIRECTIVE(Constants_Z_G_H_h2,@object)
-data4 0x00008000,0x3F800000,0x00000000,0x00000000,0x00000000,0x00000000
-data4 0x00007F81,0x3F7F00F8,0x3B7F875D,0x00000000,0x22C42273,0x3DB5A116
-data4 0x00007F02,0x3F7E03F8,0x3BFF015B,0x00000000,0x21F86ED3,0x3DE620CF
-data4 0x00007E85,0x3F7D08E0,0x3C3EE393,0x00000000,0x484F34ED,0xBDAFA07E
-data4 0x00007E08,0x3F7C0FC0,0x3C7E0586,0x00000000,0x3860BCF6,0xBDFE07F0
-data4 0x00007D8D,0x3F7B1880,0x3C9E75D2,0x00000000,0xA78093D6,0x3DEA370F
-data4 0x00007D12,0x3F7A2328,0x3CBDC97A,0x00000000,0x72A753D0,0x3DFF5791
-data4 0x00007C98,0x3F792FB0,0x3CDCFE47,0x00000000,0xA7EF896B,0x3DFEBE6C
-data4 0x00007C20,0x3F783E08,0x3CFC15D0,0x00000000,0x409ECB43,0x3E0CF156
-data4 0x00007BA8,0x3F774E38,0x3D0D874D,0x00000000,0xFFEF71DF,0xBE0B6F97
-data4 0x00007B31,0x3F766038,0x3D1CF49B,0x00000000,0x5D59EEE8,0xBE080483
-data4 0x00007ABB,0x3F757400,0x3D2C531D,0x00000000,0xA9192A74,0x3E1F91E9
-data4 0x00007A45,0x3F748988,0x3D3BA322,0x00000000,0xBF72A8CD,0xBE139A06
-data4 0x000079D1,0x3F73A0D0,0x3D4AE46F,0x00000000,0xF8FBA6CF,0x3E1D9202
-data4 0x0000795D,0x3F72B9D0,0x3D5A1756,0x00000000,0xBA796223,0xBE1DCCC4
-data4 0x000078EB,0x3F71D488,0x3D693B9D,0x00000000,0xB6B7C239,0xBE049391
-ASM_SIZE_DIRECTIVE(Constants_Z_G_H_h2)
-
-// G3 and H3 - IEEE single and h3 -IEEE double
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
-.align 64
-Constants_Z_G_H_h3:
-ASM_TYPE_DIRECTIVE(Constants_Z_G_H_h3,@object)
-data4 0x3F7FFC00,0x38800100,0x562224CD,0x3D355595
-data4 0x3F7FF400,0x39400480,0x06136FF6,0x3D8200A2
-data4 0x3F7FEC00,0x39A00640,0xE8DE9AF0,0x3DA4D68D
-data4 0x3F7FE400,0x39E00C41,0xB10238DC,0xBD8B4291
-data4 0x3F7FDC00,0x3A100A21,0x3B1952CA,0xBD89CCB8
-data4 0x3F7FD400,0x3A300F22,0x1DC46826,0xBDB10707
-data4 0x3F7FCC08,0x3A4FF51C,0xF43307DB,0x3DB6FCB9
-data4 0x3F7FC408,0x3A6FFC1D,0x62DC7872,0xBD9B7C47
-data4 0x3F7FBC10,0x3A87F20B,0x3F89154A,0xBDC3725E
-data4 0x3F7FB410,0x3A97F68B,0x62B9D392,0xBD93519D
-data4 0x3F7FAC18,0x3AA7EB86,0x0F21BD9D,0x3DC18441
-data4 0x3F7FA420,0x3AB7E101,0x2245E0A6,0xBDA64B95
-data4 0x3F7F9C20,0x3AC7E701,0xAABB34B8,0x3DB4B0EC
-data4 0x3F7F9428,0x3AD7DD7B,0x6DC40A7E,0x3D992337
-data4 0x3F7F8C30,0x3AE7D474,0x4F2083D3,0x3DC6E17B
-data4 0x3F7F8438,0x3AF7CBED,0x811D4394,0x3DAE314B
-data4 0x3F7F7C40,0x3B03E1F3,0xB08F2DB1,0xBDD46F21
-data4 0x3F7F7448,0x3B0BDE2F,0x6D34522B,0xBDDC30A4
-data4 0x3F7F6C50,0x3B13DAAA,0xB1F473DB,0x3DCB0070
-data4 0x3F7F6458,0x3B1BD766,0x6AD282FD,0xBDD65DDC
-data4 0x3F7F5C68,0x3B23CC5C,0xF153761A,0xBDCDAB83
-data4 0x3F7F5470,0x3B2BC997,0x341D0F8F,0xBDDADA40
-data4 0x3F7F4C78,0x3B33C711,0xEBC394E8,0x3DCD1BD7
-data4 0x3F7F4488,0x3B3BBCC6,0x52E3E695,0xBDC3532B
-data4 0x3F7F3C90,0x3B43BAC0,0xE846B3DE,0xBDA3961E
-data4 0x3F7F34A0,0x3B4BB0F4,0x785778D4,0xBDDADF06
-data4 0x3F7F2CA8,0x3B53AF6D,0xE55CE212,0x3DCC3ED1
-data4 0x3F7F24B8,0x3B5BA620,0x9E382C15,0xBDBA3103
-data4 0x3F7F1CC8,0x3B639D12,0x5C5AF197,0x3D635A0B
-data4 0x3F7F14D8,0x3B6B9444,0x71D34EFC,0xBDDCCB19
-data4 0x3F7F0CE0,0x3B7393BC,0x52CD7ADA,0x3DC74502
-data4 0x3F7F04F0,0x3B7B8B6D,0x7D7F2A42,0xBDB68F17
-ASM_SIZE_DIRECTIVE(Constants_Z_G_H_h3)
-
-//
-// Exponent Thresholds and Tiny Thresholds
-// for 8, 11, 15, and 17 bit exponents
-//
-// Expo_Range Value
-//
-// 0 (8 bits) 2^(-126)
-// 1 (11 bits) 2^(-1022)
-// 2 (15 bits) 2^(-16382)
-// 3 (17 bits) 2^(-16382)
-//
-// Tiny_Table
-// ----------
-// Expo_Range Value
-//
-// 0 (8 bits) 2^(-16382)
-// 1 (11 bits) 2^(-16382)
-// 2 (15 bits) 2^(-16382)
-// 3 (17 bits) 2^(-16382)
-//
-.align 64
-Constants_Threshold:
-ASM_TYPE_DIRECTIVE(Constants_Threshold,@object)
-data4 0x00000000,0x80000000,0x00003F81,0x00000000
-data4 0x00000000,0x80000000,0x00000001,0x00000000
-data4 0x00000000,0x80000000,0x00003C01,0x00000000
-data4 0x00000000,0x80000000,0x00000001,0x00000000
-data4 0x00000000,0x80000000,0x00000001,0x00000000
-data4 0x00000000,0x80000000,0x00000001,0x00000000
-data4 0x00000000,0x80000000,0x00000001,0x00000000
-data4 0x00000000,0x80000000,0x00000001,0x00000000
-ASM_SIZE_DIRECTIVE(Constants_Threshold)
-.align 64
-Constants_1_by_LN10:
-ASM_TYPE_DIRECTIVE(Constants_1_by_LN10,@object)
-data4 0x37287195,0xDE5BD8A9,0x00003FFD,0x00000000
-data4 0xACCF70C8,0xD56EAABE,0x00003FBD,0x00000000
-ASM_SIZE_DIRECTIVE(Constants_1_by_LN10)
+FR_NormX = f7
+FR_RcpX = f9
+FR_r = f10
+FR_r2 = f11
+FR_r4 = f12
+FR_N = f13
+FR_Ln2 = f14
+FR_Xp1 = f15
-FR_Input_X = f8
-FR_Neg_One = f9
-FR_E = f33
-FR_Em1 = f34
-FR_Y_hi = f34
-// Shared with Em1
-FR_Y_lo = f35
-FR_Scale = f36
-FR_X_Prime = f37
-FR_Z = f38
-FR_S_hi = f38
-// Shared with Z
-FR_W = f39
-FR_G = f40
-FR_wsq = f40
-// Shared with G
-FR_H = f41
-FR_w4 = f41
-// Shared with H
-FR_h = f42
-FR_w6 = f42
-// Shared with h
-FR_G_tmp = f43
-FR_poly_lo = f43
-// Shared with G_tmp
-FR_P8 = f43
-// Shared with G_tmp
-FR_H_tmp = f44
-FR_poly_hi = f44
- // Shared with H_tmp
-FR_P7 = f44
-// Shared with H_tmp
-FR_h_tmp = f45
-FR_rsq = f45
-// Shared with h_tmp
-FR_P6 = f45
-// Shared with h_tmp
-FR_abs_W = f46
-FR_r = f46
-// Shared with abs_W
-FR_AA = f47
-FR_log2_hi = f47
-// Shared with AA
-FR_BB = f48
-FR_log2_lo = f48
-// Shared with BB
-FR_S_lo = f49
-FR_two_negN = f50
-FR_float_N = f51
-FR_Q4 = f52
-FR_dummy = f52
-// Shared with Q4
-FR_P4 = f52
-// Shared with Q4
-FR_Threshold = f52
-// Shared with Q4
-FR_Q3 = f53
-FR_P3 = f53
-// Shared with Q3
-FR_Tiny = f53
-// Shared with Q3
-FR_Q2 = f54
-FR_P2 = f54
-// Shared with Q2
-FR_1LN10_hi = f54
-// Shared with Q2
-FR_Q1 = f55
-FR_P1 = f55
-// Shared with Q1
-FR_1LN10_lo = f55
-// Shared with Q1
-FR_P5 = f98
-FR_SCALE = f98
-FR_Output_X_tmp = f99
+FR_A4 = f33
+FR_A3 = f34
+FR_A2 = f35
-GR_Expo_Range = r32
-GR_Table_Base = r34
-GR_Table_Base1 = r35
-GR_Table_ptr = r36
-GR_Index2 = r37
-GR_signif = r38
-GR_X_0 = r39
-GR_X_1 = r40
-GR_X_2 = r41
-GR_Z_1 = r42
-GR_Z_2 = r43
-GR_N = r44
-GR_Bias = r45
-GR_M = r46
-GR_ScaleN = r47
-GR_Index3 = r48
-GR_Perturb = r49
-GR_Table_Scale = r50
+FR_T = f36
+FR_NxLn2pT = f36
-GR_SAVE_PFS = r51
-GR_SAVE_B0 = r52
-GR_SAVE_GP = r53
-GR_Parameter_X = r54
-GR_Parameter_Y = r55
-GR_Parameter_RESULT = r56
+FR_Y = f1
+FR_X = f10
+FR_RESULT = f8
-GR_Parameter_TAG = r57
+// Data
+//==============================================================
+RODATA
+.align 16
+
+LOCAL_OBJECT_START(log_data)
+// ln(1/frcpa(1+i/256)), i=0...255
+data8 0x3F60040155D5889E // 0
+data8 0x3F78121214586B54 // 1
+data8 0x3F841929F96832F0 // 2
+data8 0x3F8C317384C75F06 // 3
+data8 0x3F91A6B91AC73386 // 4
+data8 0x3F95BA9A5D9AC039 // 5
+data8 0x3F99D2A8074325F4 // 6
+data8 0x3F9D6B2725979802 // 7
+data8 0x3FA0C58FA19DFAAA // 8
+data8 0x3FA2954C78CBCE1B // 9
+data8 0x3FA4A94D2DA96C56 // 10
+data8 0x3FA67C94F2D4BB58 // 11
+data8 0x3FA85188B630F068 // 12
+data8 0x3FAA6B8ABE73AF4C // 13
+data8 0x3FAC441E06F72A9E // 14
+data8 0x3FAE1E6713606D07 // 15
+data8 0x3FAFFA6911AB9301 // 16
+data8 0x3FB0EC139C5DA601 // 17
+data8 0x3FB1DBD2643D190B // 18
+data8 0x3FB2CC7284FE5F1C // 19
+data8 0x3FB3BDF5A7D1EE64 // 20
+data8 0x3FB4B05D7AA012E0 // 21
+data8 0x3FB580DB7CEB5702 // 22
+data8 0x3FB674F089365A7A // 23
+data8 0x3FB769EF2C6B568D // 24
+data8 0x3FB85FD927506A48 // 25
+data8 0x3FB9335E5D594989 // 26
+data8 0x3FBA2B0220C8E5F5 // 27
+data8 0x3FBB0004AC1A86AC // 28
+data8 0x3FBBF968769FCA11 // 29
+data8 0x3FBCCFEDBFEE13A8 // 30
+data8 0x3FBDA727638446A2 // 31
+data8 0x3FBEA3257FE10F7A // 32
+data8 0x3FBF7BE9FEDBFDE6 // 33
+data8 0x3FC02AB352FF25F4 // 34
+data8 0x3FC097CE579D204D // 35
+data8 0x3FC1178E8227E47C // 36
+data8 0x3FC185747DBECF34 // 37
+data8 0x3FC1F3B925F25D41 // 38
+data8 0x3FC2625D1E6DDF57 // 39
+data8 0x3FC2D1610C86813A // 40
+data8 0x3FC340C59741142E // 41
+data8 0x3FC3B08B6757F2A9 // 42
+data8 0x3FC40DFB08378003 // 43
+data8 0x3FC47E74E8CA5F7C // 44
+data8 0x3FC4EF51F6466DE4 // 45
+data8 0x3FC56092E02BA516 // 46
+data8 0x3FC5D23857CD74D5 // 47
+data8 0x3FC6313A37335D76 // 48
+data8 0x3FC6A399DABBD383 // 49
+data8 0x3FC70337DD3CE41B // 50
+data8 0x3FC77654128F6127 // 51
+data8 0x3FC7E9D82A0B022D // 52
+data8 0x3FC84A6B759F512F // 53
+data8 0x3FC8AB47D5F5A310 // 54
+data8 0x3FC91FE49096581B // 55
+data8 0x3FC981634011AA75 // 56
+data8 0x3FC9F6C407089664 // 57
+data8 0x3FCA58E729348F43 // 58
+data8 0x3FCABB55C31693AD // 59
+data8 0x3FCB1E104919EFD0 // 60
+data8 0x3FCB94EE93E367CB // 61
+data8 0x3FCBF851C067555F // 62
+data8 0x3FCC5C0254BF23A6 // 63
+data8 0x3FCCC000C9DB3C52 // 64
+data8 0x3FCD244D99C85674 // 65
+data8 0x3FCD88E93FB2F450 // 66
+data8 0x3FCDEDD437EAEF01 // 67
+data8 0x3FCE530EFFE71012 // 68
+data8 0x3FCEB89A1648B971 // 69
+data8 0x3FCF1E75FADF9BDE // 70
+data8 0x3FCF84A32EAD7C35 // 71
+data8 0x3FCFEB2233EA07CD // 72
+data8 0x3FD028F9C7035C1C // 73
+data8 0x3FD05C8BE0D9635A // 74
+data8 0x3FD085EB8F8AE797 // 75
+data8 0x3FD0B9C8E32D1911 // 76
+data8 0x3FD0EDD060B78081 // 77
+data8 0x3FD122024CF0063F // 78
+data8 0x3FD14BE2927AECD4 // 79
+data8 0x3FD180618EF18ADF // 80
+data8 0x3FD1B50BBE2FC63B // 81
+data8 0x3FD1DF4CC7CF242D // 82
+data8 0x3FD214456D0EB8D4 // 83
+data8 0x3FD23EC5991EBA49 // 84
+data8 0x3FD2740D9F870AFB // 85
+data8 0x3FD29ECDABCDFA04 // 86
+data8 0x3FD2D46602ADCCEE // 87
+data8 0x3FD2FF66B04EA9D4 // 88
+data8 0x3FD335504B355A37 // 89
+data8 0x3FD360925EC44F5D // 90
+data8 0x3FD38BF1C3337E75 // 91
+data8 0x3FD3C25277333184 // 92
+data8 0x3FD3EDF463C1683E // 93
+data8 0x3FD419B423D5E8C7 // 94
+data8 0x3FD44591E0539F49 // 95
+data8 0x3FD47C9175B6F0AD // 96
+data8 0x3FD4A8B341552B09 // 97
+data8 0x3FD4D4F3908901A0 // 98
+data8 0x3FD501528DA1F968 // 99
+data8 0x3FD52DD06347D4F6 // 100
+data8 0x3FD55A6D3C7B8A8A // 101
+data8 0x3FD5925D2B112A59 // 102
+data8 0x3FD5BF406B543DB2 // 103
+data8 0x3FD5EC433D5C35AE // 104
+data8 0x3FD61965CDB02C1F // 105
+data8 0x3FD646A84935B2A2 // 106
+data8 0x3FD6740ADD31DE94 // 107
+data8 0x3FD6A18DB74A58C5 // 108
+data8 0x3FD6CF31058670EC // 109
+data8 0x3FD6F180E852F0BA // 110
+data8 0x3FD71F5D71B894F0 // 111
+data8 0x3FD74D5AEFD66D5C // 112
+data8 0x3FD77B79922BD37E // 113
+data8 0x3FD7A9B9889F19E2 // 114
+data8 0x3FD7D81B037EB6A6 // 115
+data8 0x3FD8069E33827231 // 116
+data8 0x3FD82996D3EF8BCB // 117
+data8 0x3FD85855776DCBFB // 118
+data8 0x3FD8873658327CCF // 119
+data8 0x3FD8AA75973AB8CF // 120
+data8 0x3FD8D992DC8824E5 // 121
+data8 0x3FD908D2EA7D9512 // 122
+data8 0x3FD92C59E79C0E56 // 123
+data8 0x3FD95BD750EE3ED3 // 124
+data8 0x3FD98B7811A3EE5B // 125
+data8 0x3FD9AF47F33D406C // 126
+data8 0x3FD9DF270C1914A8 // 127
+data8 0x3FDA0325ED14FDA4 // 128
+data8 0x3FDA33440224FA79 // 129
+data8 0x3FDA57725E80C383 // 130
+data8 0x3FDA87D0165DD199 // 131
+data8 0x3FDAAC2E6C03F896 // 132
+data8 0x3FDADCCC6FDF6A81 // 133
+data8 0x3FDB015B3EB1E790 // 134
+data8 0x3FDB323A3A635948 // 135
+data8 0x3FDB56FA04462909 // 136
+data8 0x3FDB881AA659BC93 // 137
+data8 0x3FDBAD0BEF3DB165 // 138
+data8 0x3FDBD21297781C2F // 139
+data8 0x3FDC039236F08819 // 140
+data8 0x3FDC28CB1E4D32FD // 141
+data8 0x3FDC4E19B84723C2 // 142
+data8 0x3FDC7FF9C74554C9 // 143
+data8 0x3FDCA57B64E9DB05 // 144
+data8 0x3FDCCB130A5CEBB0 // 145
+data8 0x3FDCF0C0D18F326F // 146
+data8 0x3FDD232075B5A201 // 147
+data8 0x3FDD490246DEFA6B // 148
+data8 0x3FDD6EFA918D25CD // 149
+data8 0x3FDD9509707AE52F // 150
+data8 0x3FDDBB2EFE92C554 // 151
+data8 0x3FDDEE2F3445E4AF // 152
+data8 0x3FDE148A1A2726CE // 153
+data8 0x3FDE3AFC0A49FF40 // 154
+data8 0x3FDE6185206D516E // 155
+data8 0x3FDE882578823D52 // 156
+data8 0x3FDEAEDD2EAC990C // 157
+data8 0x3FDED5AC5F436BE3 // 158
+data8 0x3FDEFC9326D16AB9 // 159
+data8 0x3FDF2391A2157600 // 160
+data8 0x3FDF4AA7EE03192D // 161
+data8 0x3FDF71D627C30BB0 // 162
+data8 0x3FDF991C6CB3B379 // 163
+data8 0x3FDFC07ADA69A910 // 164
+data8 0x3FDFE7F18EB03D3E // 165
+data8 0x3FE007C053C5002E // 166
+data8 0x3FE01B942198A5A1 // 167
+data8 0x3FE02F74400C64EB // 168
+data8 0x3FE04360BE7603AD // 169
+data8 0x3FE05759AC47FE34 // 170
+data8 0x3FE06B5F1911CF52 // 171
+data8 0x3FE078BF0533C568 // 172
+data8 0x3FE08CD9687E7B0E // 173
+data8 0x3FE0A10074CF9019 // 174
+data8 0x3FE0B5343A234477 // 175
+data8 0x3FE0C974C89431CE // 176
+data8 0x3FE0DDC2305B9886 // 177
+data8 0x3FE0EB524BAFC918 // 178
+data8 0x3FE0FFB54213A476 // 179
+data8 0x3FE114253DA97D9F // 180
+data8 0x3FE128A24F1D9AFF // 181
+data8 0x3FE1365252BF0865 // 182
+data8 0x3FE14AE558B4A92D // 183
+data8 0x3FE15F85A19C765B // 184
+data8 0x3FE16D4D38C119FA // 185
+data8 0x3FE18203C20DD133 // 186
+data8 0x3FE196C7BC4B1F3B // 187
+data8 0x3FE1A4A738B7A33C // 188
+data8 0x3FE1B981C0C9653D // 189
+data8 0x3FE1CE69E8BB106B // 190
+data8 0x3FE1DC619DE06944 // 191
+data8 0x3FE1F160A2AD0DA4 // 192
+data8 0x3FE2066D7740737E // 193
+data8 0x3FE2147DBA47A394 // 194
+data8 0x3FE229A1BC5EBAC3 // 195
+data8 0x3FE237C1841A502E // 196
+data8 0x3FE24CFCE6F80D9A // 197
+data8 0x3FE25B2C55CD5762 // 198
+data8 0x3FE2707F4D5F7C41 // 199
+data8 0x3FE285E0842CA384 // 200
+data8 0x3FE294294708B773 // 201
+data8 0x3FE2A9A2670AFF0C // 202
+data8 0x3FE2B7FB2C8D1CC1 // 203
+data8 0x3FE2C65A6395F5F5 // 204
+data8 0x3FE2DBF557B0DF43 // 205
+data8 0x3FE2EA64C3F97655 // 206
+data8 0x3FE3001823684D73 // 207
+data8 0x3FE30E97E9A8B5CD // 208
+data8 0x3FE32463EBDD34EA // 209
+data8 0x3FE332F4314AD796 // 210
+data8 0x3FE348D90E7464D0 // 211
+data8 0x3FE35779F8C43D6E // 212
+data8 0x3FE36621961A6A99 // 213
+data8 0x3FE37C299F3C366A // 214
+data8 0x3FE38AE2171976E7 // 215
+data8 0x3FE399A157A603E7 // 216
+data8 0x3FE3AFCCFE77B9D1 // 217
+data8 0x3FE3BE9D503533B5 // 218
+data8 0x3FE3CD7480B4A8A3 // 219
+data8 0x3FE3E3C43918F76C // 220
+data8 0x3FE3F2ACB27ED6C7 // 221
+data8 0x3FE4019C2125CA93 // 222
+data8 0x3FE4181061389722 // 223
+data8 0x3FE42711518DF545 // 224
+data8 0x3FE436194E12B6BF // 225
+data8 0x3FE445285D68EA69 // 226
+data8 0x3FE45BCC464C893A // 227
+data8 0x3FE46AED21F117FC // 228
+data8 0x3FE47A1527E8A2D3 // 229
+data8 0x3FE489445EFFFCCC // 230
+data8 0x3FE4A018BCB69835 // 231
+data8 0x3FE4AF5A0C9D65D7 // 232
+data8 0x3FE4BEA2A5BDBE87 // 233
+data8 0x3FE4CDF28F10AC46 // 234
+data8 0x3FE4DD49CF994058 // 235
+data8 0x3FE4ECA86E64A684 // 236
+data8 0x3FE503C43CD8EB68 // 237
+data8 0x3FE513356667FC57 // 238
+data8 0x3FE522AE0738A3D8 // 239
+data8 0x3FE5322E26867857 // 240
+data8 0x3FE541B5CB979809 // 241
+data8 0x3FE55144FDBCBD62 // 242
+data8 0x3FE560DBC45153C7 // 243
+data8 0x3FE5707A26BB8C66 // 244
+data8 0x3FE587F60ED5B900 // 245
+data8 0x3FE597A7977C8F31 // 246
+data8 0x3FE5A760D634BB8B // 247
+data8 0x3FE5B721D295F10F // 248
+data8 0x3FE5C6EA94431EF9 // 249
+data8 0x3FE5D6BB22EA86F6 // 250
+data8 0x3FE5E6938645D390 // 251
+data8 0x3FE5F673C61A2ED2 // 252
+data8 0x3FE6065BEA385926 // 253
+data8 0x3FE6164BFA7CC06B // 254
+data8 0x3FE62643FECF9743 // 255
+LOCAL_OBJECT_END(log_data)
+
+
+// Code
+//==============================================================
.section .text
-.proc log1pf#
-.global log1pf#
-.align 64
-log1pf:
-#ifdef _LIBC
-.global __log1pf
-__log1pf:
-#endif
-
-{ .mfi
-alloc r32 = ar.pfs,0,22,4,0
-(p0) fsub.s1 FR_Neg_One = f0,f1
-(p0) cmp.eq.unc p7, p0 = r0, r0
-}
-
+GLOBAL_IEEE754_ENTRY(log1pf)
{ .mfi
-(p0) cmp.ne.unc p14, p0 = r0, r0
-(p0) fnorm.s1 FR_X_Prime = FR_Input_X
-(p0) cmp.eq.unc p15, p0 = r0, r0 ;;
+ getf.exp GR_signexp_x = f8 // if x is unorm then must recompute
+ fadd.s1 FR_Xp1 = f8, f1 // Form 1+x
+ mov GR_05 = 0xfffe
}
-
-{ .mfi
- nop.m 999
-(p0) fclass.m.unc p6, p0 = FR_Input_X, 0x1E3
- nop.i 999
+{ .mlx
+ addl GR_ad_T = @ltoff(log_data),gp
+ movl GR_A3 = 0x3fd5555555555555 // double precision memory
+ // representation of A3
}
;;
{ .mfi
- nop.m 999
-(p0) fclass.nm.unc p10, p0 = FR_Input_X, 0x1FF
- nop.i 999
+ ld8 GR_ad_T = [GR_ad_T]
+ fclass.m p8,p0 = f8,0xb // Is x unorm?
+ mov GR_exp_mask = 0x1ffff
}
-;;
-
{ .mfi
- nop.m 999
-(p0) fcmp.eq.unc.s1 p9, p0 = FR_Input_X, f0
- nop.i 999
+ mov GR_025 = 0xfffd // Exponent of 0.25
+ fnorm.s1 FR_NormX = f8 // Normalize x
+ mov GR_exp_bias = 0xffff
}
+;;
{ .mfi
- nop.m 999
-(p0) fadd FR_Em1 = f0,f0
- nop.i 999 ;;
+ setf.exp FR_A2 = GR_05 // create A2 = 0.5
+ fclass.m p9,p0 = f8,0x1E1 // is x NaN, NaT or +Inf?
+ nop.i 0
}
-
-{ .mfi
- nop.m 999
-(p0) fadd FR_E = f0,f1
- nop.i 999 ;;
+{ .mib
+ setf.d FR_A3 = GR_A3 // create A3
+ nop.i 0
+(p8) br.cond.spnt log1p_unorm // Branch if x=unorm
}
+;;
+log1p_common:
{ .mfi
- nop.m 999
-(p0) fcmp.eq.unc.s1 p8, p0 = FR_Input_X, FR_Neg_One
- nop.i 999
+ setf.exp FR_A4 = GR_025 // create A4 = 0.25
+ frcpa.s1 FR_RcpX,p0 = f1,FR_Xp1
+ nop.i 0
}
-
-{ .mfi
- nop.m 999
-(p0) fcmp.lt.unc.s1 p13, p0 = FR_Input_X, FR_Neg_One
- nop.i 999
+{ .mfb
+ nop.m 0
+(p9) fma.s.s0 f8 = f8,f1,f0 // set V-flag
+(p9) br.ret.spnt b0 // exit for NaN, NaT and +Inf
}
-
-
-L(LOG_BEGIN):
+;;
{ .mfi
- nop.m 999
-(p0) fadd.s1 FR_Z = FR_X_Prime, FR_E
- nop.i 999
+ getf.exp GR_Exp = FR_Xp1 // signexp of x+1
+ fclass.m p10,p0 = FR_Xp1,0x3A // is 1+x < 0?
+ and GR_exp_x = GR_exp_mask, GR_signexp_x // biased exponent of x
}
-
{ .mlx
- nop.m 999
-(p0) movl GR_Table_Scale = 0x0000000000000018 ;;
-}
-
-{ .mmi
- nop.m 999
-//
-// Create E = 1 and Em1 = 0
-// Check for X == 0, meaning log(1+0)
-// Check for X < -1, meaning log(negative)
-// Check for X == -1, meaning log(0)
-// Normalize x
-// Identify NatVals, NaNs, Infs.
-// Identify EM unsupporteds.
-// Identify Negative values - us S1 so as
-// not to raise denormal operand exception
-// Set p15 to true for log1pf
-// Set p14 to false for log1pf
-// Set p7 true for log and log1pf
-//
-(p0) addl GR_Table_Base = @ltoff(Constants_Z_G_H_h1#),gp
- nop.i 999
+ nop.m 0
+ movl GR_Ln2 = 0x3FE62E42FEFA39EF // double precision memory
+ // representation of log(2)
}
+;;
{ .mfi
- nop.m 999
-(p0) fmax.s1 FR_AA = FR_X_Prime, FR_E
- nop.i 999 ;;
+ getf.sig GR_Sig = FR_Xp1 // get significand to calculate index
+ // for T if |x| >= 2^-8
+ fcmp.eq.s1 p12,p0 = f8,f0 // is x equal to 0?
+ sub GR_exp_x = GR_exp_x, GR_exp_bias // true exponent of x
}
+;;
{ .mfi
- ld8 GR_Table_Base = [GR_Table_Base]
-(p0) fmin.s1 FR_BB = FR_X_Prime, FR_E
- nop.i 999
+ sub GR_N = GR_Exp,GR_exp_bias // true exponent of x+1
+ fcmp.eq.s1 p11,p0 = FR_Xp1,f0 // is x = -1?
+ cmp.gt p6,p7 = -8, GR_exp_x // Is |x| < 2^-8
}
-
{ .mfb
- nop.m 999
-(p0) fadd.s1 FR_W = FR_X_Prime, FR_Em1
-//
-// Begin load of constants base
-// FR_Z = Z = |x| + E
-// FR_W = W = |x| + Em1
-// AA = fmax(|x|,E)
-// BB = fmin(|x|,E)
-//
-(p6) br.cond.spnt L(LOG_64_special) ;;
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p10) br.cond.spnt L(LOG_64_unsupported) ;;
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p13) br.cond.spnt L(LOG_64_negative) ;;
-}
-
-{ .mib
-(p0) getf.sig GR_signif = FR_Z
- nop.i 999
-(p9) br.cond.spnt L(LOG_64_one) ;;
-}
-
-{ .mib
- nop.m 999
- nop.i 999
-(p8) br.cond.spnt L(LOG_64_zero) ;;
-}
-
-{ .mfi
-(p0) getf.exp GR_N = FR_Z
-//
-// Raise possible denormal operand exception
-// Create Bias
-//
-// This function computes ln( x + e )
-// Input FR 1: FR_X = FR_Input_X
-// Input FR 2: FR_E = FR_E
-// Input FR 3: FR_Em1 = FR_Em1
-// Input GR 1: GR_Expo_Range = GR_Expo_Range = 1
-// Output FR 4: FR_Y_hi
-// Output FR 5: FR_Y_lo
-// Output FR 6: FR_Scale
-// Output PR 7: PR_Safe
-//
-(p0) fsub.s1 FR_S_lo = FR_AA, FR_Z
-//
-// signif = getf.sig(Z)
-// abs_W = fabs(w)
-//
-(p0) extr.u GR_Table_ptr = GR_signif, 59, 4 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fmerge.se FR_S_hi = f1,FR_Z
-(p0) extr.u GR_X_0 = GR_signif, 49, 15
-}
-
-{ .mmi
- nop.m 999
-(p0) addl GR_Table_Base1 = @ltoff(Constants_Z_G_H_h2#),gp
- nop.i 999
+ nop.m 0
+ nop.f 0
+(p10) br.cond.spnt log1p_lt_minus_1 // jump if x < -1
}
;;
-{ .mlx
- ld8 GR_Table_Base1 = [GR_Table_Base1]
-(p0) movl GR_Bias = 0x000000000000FFFF ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fabs FR_abs_W = FR_W
-(p0) pmpyshr2.u GR_Table_ptr = GR_Table_ptr,GR_Table_Scale,0
-}
-
-{ .mfi
- nop.m 999
-//
-// Branch out for special input values
-//
-(p0) fcmp.lt.unc.s0 p8, p0 = FR_Input_X, f0
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// X_0 = extr.u(signif,49,15)
-// Index1 = extr.u(signif,59,4)
-//
-(p0) fadd.s1 FR_S_lo = FR_S_lo, FR_BB
- nop.i 999 ;;
-}
-
-{ .mii
- nop.m 999
- nop.i 999 ;;
-//
-// Offset_to_Z1 = 24 * Index1
-// For performance, don't use result
-// for 3 or 4 cycles.
-//
-(p0) add GR_Table_ptr = GR_Table_ptr, GR_Table_Base ;;
-}
-//
-// Add Base to Offset for Z1
-// Create Bias
-
-{ .mmi
-(p0) ld4 GR_Z_1 = [GR_Table_ptr],4 ;;
-(p0) ldfs FR_G = [GR_Table_ptr],4
- nop.i 999 ;;
-}
-
-{ .mmi
-(p0) ldfs FR_H = [GR_Table_ptr],8 ;;
-(p0) ldfd FR_h = [GR_Table_ptr],0
-(p0) pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15
-}
-//
-// Load Z_1
-// Get Base of Table2
-//
-
+// p6 is true if |x| < 1/256
+// p7 is true if |x| >= 1/256
+.pred.rel "mutex",p6,p7
{ .mfi
-(p0) getf.exp GR_M = FR_abs_W
- nop.f 999
- nop.i 999 ;;
-}
-
-{ .mii
- nop.m 999
- nop.i 999 ;;
-//
-// M = getf.exp(abs_W)
-// S_lo = AA - Z
-// X_1 = pmpyshr2(X_0,Z_1,15)
-//
-(p0) sub GR_M = GR_M, GR_Bias ;;
+ nop.m 0
+(p6) fms.s1 FR_r = f8,f1,f0 // range reduction for |x|<1/256
+(p6) cmp.gt.unc p10,p0 = -40, GR_exp_x // Is |x| < 2^-40
}
-//
-// M = M - Bias
-// Load G1
-// N = getf.exp(Z)
-//
-
-{ .mii
-(p0) cmp.gt.unc p11, p0 = -80, GR_M
-(p0) cmp.gt.unc p12, p0 = -7, GR_M ;;
-(p0) extr.u GR_Index2 = GR_X_1, 6, 4 ;;
-}
-
-{ .mib
- nop.m 999
-//
-// if -80 > M, set p11
-// Index2 = extr.u(X_1,6,4)
-// if -7 > M, set p12
-// Load H1
-//
-(p0) pmpyshr2.u GR_Index2 = GR_Index2,GR_Table_Scale,0
-(p11) br.cond.spnt L(log1pf_small) ;;
+{ .mfb
+(p7) setf.sig FR_N = GR_N // copy unbiased exponent of x to the
+ // significand field of FR_N
+(p7) fms.s1 FR_r = FR_RcpX,FR_Xp1,f1 // range reduction for |x|>=1/256
+(p12) br.ret.spnt b0 // exit for x=0, return x
}
+;;
{ .mib
- nop.m 999
- nop.i 999
-(p12) br.cond.spnt L(log1pf_near) ;;
-}
-
-{ .mii
-(p0) sub GR_N = GR_N, GR_Bias
-//
-// poly_lo = r * poly_lo
-//
-(p0) add GR_Perturb = 0x1, r0 ;;
-(p0) sub GR_ScaleN = GR_Bias, GR_N
-}
-
-{ .mii
-(p0) setf.sig FR_float_N = GR_N
- nop.i 999 ;;
-//
-// Prepare Index2 - pmpyshr2.u(X_1,Z_2,15)
-// Load h1
-// S_lo = S_lo + BB
-// Branch for -80 > M
-//
-(p0) add GR_Index2 = GR_Index2, GR_Table_Base1
-}
-
-{ .mmi
-(p0) setf.exp FR_two_negN = GR_ScaleN
- nop.m 999
-(p0) addl GR_Table_Base = @ltoff(Constants_Z_G_H_h3#),gp
-};;
-
-//
-// Index2 points to Z2
-// Branch for -7 > M
-//
-
-{ .mmb
-(p0) ld4 GR_Z_2 = [GR_Index2],4
- ld8 GR_Table_Base = [GR_Table_Base]
- nop.b 999 ;;
-}
-(p0) nop.i 999
-//
-// Load Z_2
-// N = N - Bias
-// Tablebase points to Table3
-//
-
-{ .mmi
-(p0) ldfs FR_G_tmp = [GR_Index2],4 ;;
-//
-// Load G_2
-// pmpyshr2 X_2= (X_1,Z_2,15)
-// float_N = setf.sig(N)
-// ScaleN = Bias - N
-//
-(p0) ldfs FR_H_tmp = [GR_Index2],8
- nop.i 999 ;;
-}
-//
-// Load H_2
-// two_negN = setf.exp(scaleN)
-// G = G_1 * G_2
-//
-
-{ .mfi
-(p0) ldfd FR_h_tmp = [GR_Index2],0
- nop.f 999
-(p0) pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 ;;
-}
-
-{ .mii
- nop.m 999
-(p0) extr.u GR_Index3 = GR_X_2, 1, 5 ;;
-//
-// Load h_2
-// H = H_1 + H_2
-// h = h_1 + h_2
-// Index3 = extr.u(X_2,1,5)
-//
-(p0) shladd GR_Index3 = GR_Index3,4,GR_Table_Base
-}
-
-{ .mmi
- nop.m 999
- nop.m 999
-//
-// float_N = fcvt.xf(float_N)
-// load G3
-//
-(p0) addl GR_Table_Base = @ltoff(Constants_Q#),gp ;;
-}
-
-{ .mfi
-ld8 GR_Table_Base = [GR_Table_Base]
-nop.f 999
-nop.i 999
-} ;;
-
-{ .mfi
-(p0) ldfe FR_log2_hi = [GR_Table_Base],16
-(p0) fmpy.s1 FR_S_lo = FR_S_lo, FR_two_negN
- nop.i 999 ;;
-}
-
-{ .mmf
- nop.m 999
-//
-// G = G3 * G
-// Load h3
-// Load log2_hi
-// H = H + H3
-//
-(p0) ldfe FR_log2_lo = [GR_Table_Base],16
-(p0) fmpy.s1 FR_G = FR_G, FR_G_tmp ;;
-}
-
-{ .mmf
-(p0) ldfs FR_G_tmp = [GR_Index3],4
-//
-// h = h + h3
-// r = G * S_hi + 1
-// Load log2_lo
-//
-(p0) ldfe FR_Q4 = [GR_Table_Base],16
-(p0) fadd.s1 FR_h = FR_h, FR_h_tmp ;;
-}
-
-{ .mfi
-(p0) ldfe FR_Q3 = [GR_Table_Base],16
-(p0) fadd.s1 FR_H = FR_H, FR_H_tmp
- nop.i 999 ;;
-}
-
-{ .mmf
-(p0) ldfs FR_H_tmp = [GR_Index3],4
-(p0) ldfe FR_Q2 = [GR_Table_Base],16
-//
-// Comput Index for Table3
-// S_lo = S_lo * two_negN
-//
-(p0) fcvt.xf FR_float_N = FR_float_N ;;
+ setf.d FR_Ln2 = GR_Ln2 // create log(2)
+(p7) extr.u GR_Ind = GR_Sig,55,8 // get bits from 55 to 62 as index
+(p11) br.cond.spnt log1p_eq_minus_1 // jump if x = -1
}
-//
-// If S_lo == 0, set p8 false
-// Load H3
-// Load ptr to table of polynomial coeff.
-//
+;;
{ .mmf
-(p0) ldfd FR_h_tmp = [GR_Index3],0
-(p0) ldfe FR_Q1 = [GR_Table_Base],0
-(p0) fcmp.eq.unc.s1 p0, p8 = FR_S_lo, f0 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fmpy.s1 FR_G = FR_G, FR_G_tmp
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fadd.s1 FR_H = FR_H, FR_H_tmp
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fms.s1 FR_r = FR_G, FR_S_hi, f1
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-(p0) fadd.s1 FR_h = FR_h, FR_h_tmp
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 FR_Y_hi = FR_float_N, FR_log2_hi, FR_H
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// Load Q4
-// Load Q3
-// Load Q2
-// Load Q1
-//
-(p8) fma.s1 FR_r = FR_G, FR_S_lo, FR_r
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-//
-// poly_lo = r * Q4 + Q3
-// rsq = r* r
-//
-(p0) fma.s1 FR_h = FR_float_N, FR_log2_lo, FR_h
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// If (S_lo!=0) r = s_lo * G + r
-//
-(p0) fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3
- nop.i 999
-}
-//
-// Create a 0x00000....01
-// poly_lo = poly_lo * rsq + h
-//
-
-{ .mfi
-(p0) setf.sig FR_dummy = GR_Perturb
-(p0) fmpy.s1 FR_rsq = FR_r, FR_r
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// h = N * log2_lo + h
-// Y_hi = n * log2_hi + H
-//
-(p0) fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// poly_lo = r * poly_o + Q2
-// poly_hi = Q1 * rsq + r
-//
-(p0) fmpy.s1 FR_poly_lo = FR_poly_lo, FR_r
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 FR_poly_lo = FR_poly_lo, FR_rsq, FR_h
- nop.i 999 ;;
-}
-
-{ .mfb
- nop.m 999
-(p0) fadd.s1 FR_Y_lo = FR_poly_hi, FR_poly_lo
-//
-// Create the FR for a binary "or"
-// Y_lo = poly_hi + poly_lo
-//
-// (p0) for FR_dummy = FR_Y_lo,FR_dummy ;;
-//
-// Turn the lsb of Y_lo ON
-//
-// (p0) fmerge.se FR_Y_lo = FR_Y_lo,FR_dummy ;;
-//
-// Merge the new lsb into Y_lo, for alone doesn't
-//
-(p0) br.cond.sptk L(LOG_main) ;;
-}
-
-
-L(log1pf_near):
-
-{ .mmi
- nop.m 999
- nop.m 999
-// /*******************************************************/
-// /*********** Branch log1pf_near ************************/
-// /*******************************************************/
-(p0) addl GR_Table_Base = @ltoff(Constants_P#),gp ;;
-}
-//
-// Load base address of poly. coeff.
-//
-{.mmi
- nop.m 999
- ld8 GR_Table_Base = [GR_Table_Base]
- nop.i 999
-};;
-
-{ .mmb
-(p0) add GR_Table_ptr = 0x40,GR_Table_Base
-//
-// Address tables with separate pointers
-//
-(p0) ldfe FR_P8 = [GR_Table_Base],16
- nop.b 999 ;;
+(p7) shladd GR_ad_T = GR_Ind,3,GR_ad_T // address of T
+ nop.m 0
+(p10) fnma.s.s0 f8 = f8,f8,f8 // If |x| very small, result=x-x*x
}
+;;
{ .mmb
-(p0) ldfe FR_P4 = [GR_Table_ptr],16
-//
-// Load P4
-// Load P8
-//
-(p0) ldfe FR_P7 = [GR_Table_Base],16
- nop.b 999 ;;
-}
-
-{ .mmf
-(p0) ldfe FR_P3 = [GR_Table_ptr],16
-//
-// Load P3
-// Load P7
-//
-(p0) ldfe FR_P6 = [GR_Table_Base],16
-(p0) fmpy.s1 FR_wsq = FR_W, FR_W ;;
-}
-
-{ .mfi
-(p0) ldfe FR_P2 = [GR_Table_ptr],16
- nop.f 999
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 FR_Y_hi = FR_W, FR_P4, FR_P3
- nop.i 999
-}
-//
-// Load P2
-// Load P6
-// Wsq = w * w
-// Y_hi = p4 * w + p3
-//
-
-{ .mfi
-(p0) ldfe FR_P5 = [GR_Table_Base],16
-(p0) fma.s1 FR_Y_lo = FR_W, FR_P8, FR_P7
- nop.i 999 ;;
-}
-
-{ .mfi
-(p0) ldfe FR_P1 = [GR_Table_ptr],16
-//
-// Load P1
-// Load P5
-// Y_lo = p8 * w + P7
-//
-(p0) fmpy.s1 FR_w4 = FR_wsq, FR_wsq
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 FR_Y_hi = FR_W, FR_Y_hi, FR_P2
- nop.i 999
-}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 FR_Y_lo = FR_W, FR_Y_lo, FR_P6
-(p0) add GR_Perturb = 0x1, r0 ;;
-}
-
-{ .mfi
- nop.m 999
-//
-// w4 = w2 * w2
-// Y_hi = y_hi * w + p2
-// Y_lo = y_lo * w + p6
-// Create perturbation bit
-//
-(p0) fmpy.s1 FR_w6 = FR_w4, FR_wsq
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 FR_Y_hi = FR_W, FR_Y_hi, FR_P1
- nop.i 999
-}
-//
-// Y_hi = y_hi * w + p1
-// w6 = w4 * w2
-//
-
-{ .mfi
-(p0) setf.sig FR_Q4 = GR_Perturb
-(p0) fma.s1 FR_Y_lo = FR_W, FR_Y_lo, FR_P5
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p0) fma.s1 FR_Y_hi = FR_wsq,FR_Y_hi, FR_W
- nop.i 999
-}
-
-{ .mfb
- nop.m 999
-//
-// Y_hi = y_hi * wsq + w
-// Y_lo = y_lo * w + p5
-//
-(p0) fmpy.s1 FR_Y_lo = FR_w6, FR_Y_lo
-//
-// Y_lo = y_lo * w6
-//
-// (p0) for FR_dummy = FR_Y_lo,FR_dummy ;;
-//
-// Set lsb on: Taken out to improve performance
-//
-// (p0) fmerge.se FR_Y_lo = FR_Y_lo,FR_dummy ;;
-//
-// Make sure it's on in Y_lo also. Taken out to improve
-// performance
-//
-(p0) br.cond.sptk L(LOG_main) ;;
-}
-
-
-L(log1pf_small):
-
-{ .mmi
- nop.m 999
- nop.m 999
-// /*******************************************************/
-// /*********** Branch log1pf_small ***********************/
-// /*******************************************************/
-(p0) addl GR_Table_Base = @ltoff(Constants_Threshold#),gp
+(p7) ldfd FR_T = [GR_ad_T]
+ nop.m 0
+(p10) br.ret.spnt b0 // Exit if |x| < 2^-40
}
+;;
{ .mfi
- nop.m 999
-(p0) mov FR_Em1 = FR_W
-(p0) cmp.eq.unc p7, p0 = r0, r0 ;;
-}
-
-{ .mlx
- ld8 GR_Table_Base = [GR_Table_Base]
-(p0) movl GR_Expo_Range = 0x0000000000000002 ;;
-}
-//
-// Set Safe to true
-// Set Expo_Range = 0 for single
-// Set Expo_Range = 2 for double
-// Set Expo_Range = 4 for double-extended
-//
-
-{ .mmi
-(p0) shladd GR_Table_Base = GR_Expo_Range,4,GR_Table_Base ;;
-(p0) ldfe FR_Threshold = [GR_Table_Base],16
- nop.i 999
+ nop.m 0
+ fma.s1 FR_r2 = FR_r,FR_r,f0 // r^2
+ nop.i 0
}
-
-{ .mlx
- nop.m 999
-(p0) movl GR_Bias = 0x000000000000FF9B ;;
-}
-
{ .mfi
-(p0) ldfe FR_Tiny = [GR_Table_Base],0
- nop.f 999
- nop.i 999 ;;
+ nop.m 0
+ fnma.s1 FR_A2 = FR_A2,FR_r,f1 // 1.0 - A2*r
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p0) fcmp.gt.unc.s1 p13, p12 = FR_abs_W, FR_Threshold
- nop.i 999 ;;
+ nop.m 0
+ fnma.s1 FR_A3 = FR_A4,FR_r,FR_A3 // A3 - A4*r
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p13) fnmpy.s1 FR_Y_lo = FR_W, FR_W
- nop.i 999
+ nop.m 0
+(p7) fcvt.xf FR_N = FR_N
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p13) fadd FR_SCALE = f0, f1
- nop.i 999 ;;
+ nop.m 0
+ // (A3*r+A2)*r^2+r
+ fma.s1 FR_A2 = FR_A3,FR_r2,FR_A2 // (A4*r+A3)*r^2+(A2*r+1)
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p12) fsub.s1 FR_Y_lo = f0, FR_Tiny
-(p12) cmp.ne.unc p7, p0 = r0, r0
+ nop.m 0
+ // N*Ln2hi+T
+(p7) fma.s1 FR_NxLn2pT = FR_N,FR_Ln2,FR_T
+ nop.i 0
}
+;;
+.pred.rel "mutex",p6,p7
{ .mfi
-(p12) setf.exp FR_SCALE = GR_Bias
- nop.f 999
- nop.i 999 ;;
+ nop.m 0
+(p6) fma.s.s0 f8 = FR_A2,FR_r,f0 // result if 2^(-40) <= |x| < 1/256
+ nop.i 0
}
-
-//
-// Set p7 to SAFE = FALSE
-// Set Scale = 2^-100
-//
{ .mfb
- nop.m 999
-(p0) fma.s.s0 FR_Input_X = FR_Y_lo,FR_SCALE,FR_Y_hi
-(p0) br.ret.sptk b0
+ nop.m 0
+(p7) fma.s.s0 f8 = FR_A2,FR_r,FR_NxLn2pT // result if |x| >= 1/256
+ br.ret.sptk b0 // Exit if |x| >= 2^(-40)
}
;;
-L(LOG_64_one):
-
+.align 32
+log1p_unorm:
+// Here if x=unorm
{ .mfb
- nop.m 999
-(p0) fmpy.s.s0 FR_Input_X = FR_Input_X, f0
-(p0) br.ret.sptk b0
+ getf.exp GR_signexp_x = FR_NormX // recompute biased exponent
+ nop.f 0
+ br.cond.sptk log1p_common
}
;;
-//
-// Raise divide by zero for +/-0 input.
-//
-
-L(LOG_64_zero):
+.align 32
+log1p_eq_minus_1:
+// Here if x=-1
{ .mfi
-(p0) mov GR_Parameter_TAG = 142
-//
-// If we have log1pf(0), return -Inf.
-//
-(p0) fsub.s0 FR_Output_X_tmp = f0, f1
- nop.i 999 ;;
+ nop.m 0
+ fmerge.s FR_X = f8,f8 // keep input argument for subsequent
+ // call of __libm_error_support#
+ nop.i 0
}
-{ .mfb
- nop.m 999
-(p0) frcpa.s0 FR_Output_X_tmp, p8 = FR_Output_X_tmp, f0
-(p0) br.cond.sptk L(LOG_ERROR_Support) ;;
-}
-
-L(LOG_64_special):
+;;
{ .mfi
- nop.m 999
-//
-// Return -Inf or value from handler.
-//
-(p0) fclass.m.unc p7, p0 = FR_Input_X, 0x1E1
- nop.i 999 ;;
+ mov GR_TAG = 142 // set libm error in case of log1p(-1).
+ frcpa.s0 f8,p0 = f8,f0 // log1p(-1) should be equal to -INF.
+ // We can get it using frcpa because it
+ // sets result to the IEEE-754 mandated
+ // quotient of f8/f0.
+ nop.i 0
}
-
-{ .mfb
- nop.m 999
-//
-// Check for Natval, QNan, SNaN, +Inf
-//
-(p7) fmpy.s.s0 f8 = FR_Input_X, f1
-//
-// For SNaN raise invalid and return QNaN.
-// For QNaN raise invalid and return QNaN.
-// For +Inf return +Inf.
-//
-(p7) br.ret.sptk b0
+{ .mib
+ nop.m 0
+ nop.i 0
+ br.cond.sptk log_libm_err
}
;;
-//
-// For -Inf raise invalid and return QNaN.
-//
-
-{ .mfb
-(p0) mov GR_Parameter_TAG = 143
-(p0) fmpy.s.s0 FR_Output_X_tmp = FR_Input_X, f0
-(p0) br.cond.sptk L(LOG_ERROR_Support) ;;
+.align 32
+log1p_lt_minus_1:
+// Here if x < -1
+{ .mfi
+ nop.m 0
+ fmerge.s FR_X = f8,f8
+ nop.i 0
}
+;;
-//
-// Report that log1pf(-Inf) computed
-//
-
-L(LOG_64_unsupported):
-
-//
-// Return generated NaN or other value .
-//
-
-{ .mfb
- nop.m 999
-(p0) fmpy.s.s0 FR_Input_X = FR_Input_X, f0
-(p0) br.ret.sptk b0 ;;
+{ .mfi
+ mov GR_TAG = 143 // set libm error in case of x < -1.
+ frcpa.s0 f8,p0 = f0,f0 // log1p(x) x < -1 should be equal to NaN.
+ // We can get it using frcpa because it
+ // sets result to the IEEE-754 mandated
+ // quotient of f0/f0 i.e. NaN.
+ nop.i 0
}
+;;
-L(LOG_64_negative):
-
-{ .mfi
- nop.m 999
-//
-// Deal with x < 0 in a special way
-//
-(p0) frcpa.s0 FR_Output_X_tmp, p8 = f0, f0
-//
-// Deal with x < 0 in a special way - raise
-// invalid and produce QNaN indefinite.
-//
-(p0) mov GR_Parameter_TAG = 143;;
+.align 32
+log_libm_err:
+{ .mmi
+ alloc r32 = ar.pfs,1,4,4,0
+ mov GR_Parameter_TAG = GR_TAG
+ nop.i 0
}
+;;
-.endp log1pf#
-ASM_SIZE_DIRECTIVE(log1pf)
+GLOBAL_IEEE754_END(log1pf)
-.proc __libm_error_region
-__libm_error_region:
-L(LOG_ERROR_Support):
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
-
-// (1)
{ .mfi
- add GR_Parameter_Y=-32,sp // Parameter 2 value
+ add GR_Parameter_Y = -32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+ mov GR_SAVE_PFS = ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
- add sp=-64,sp // Create new stack
+ add sp = -64,sp // Create new stack
nop.f 0
- mov GR_SAVE_GP=gp // Save gp
+ mov GR_SAVE_GP = gp // Save gp
};;
-
-
-// (2)
{ .mmi
- stfs [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
+ stfs [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0 // Save b0
+ mov GR_SAVE_B0 = b0 // Save b0
};;
-
.body
-// (3)
{ .mib
- stfs [GR_Parameter_X] =FR_Input_X // STORE Parameter 1 on stack
- add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
- nop.b 0
+ stfs [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
}
{ .mib
- stfs [GR_Parameter_Y] = FR_Output_X_tmp // STORE Parameter 3 on stack
+ stfs [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support# // Call error handling function
+ br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
- nop.m 0
- nop.m 0
add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
};;
-
-// (4)
{ .mmi
- ldfs FR_Input_X = [GR_Parameter_RESULT] // Get return result off stack
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
- add sp = 64,sp // Restore stack pointer
- mov b0 = GR_SAVE_B0 // Restore return address
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
};;
{ .mib
- mov gp = GR_SAVE_GP // Restore gp
- mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
- br.ret.sptk b0
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
};;
-
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
-
-
-.proc __libm_LOG_main
-__libm_LOG_main:
-L(LOG_main):
-
-//
-// kernel_log_64 computes ln(X + E)
-//
-
-{ .mfi
- nop.m 999
-(p7) fadd.s.s0 FR_Input_X = FR_Y_lo,FR_Y_hi
- nop.i 999
-}
-
-{ .mmi
- nop.m 999
- nop.m 999
-(p14) addl GR_Table_Base = @ltoff(Constants_1_by_LN10#),gp ;;
-}
-
-{ .mmi
- nop.m 999
-(p14) ld8 GR_Table_Base = [GR_Table_Base]
- nop.i 999
-};;
-
-{ .mmi
-(p14) ldfe FR_1LN10_hi = [GR_Table_Base],16 ;;
-(p14) ldfe FR_1LN10_lo = [GR_Table_Base]
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p14) fmpy.s1 FR_Output_X_tmp = FR_Y_lo,FR_1LN10_hi
- nop.i 999 ;;
-}
-
-{ .mfi
- nop.m 999
-(p14) fma.s1 FR_Output_X_tmp = FR_Y_hi,FR_1LN10_lo,FR_Output_X_tmp
- nop.i 999 ;;
-}
-
-{ .mfb
- nop.m 999
-(p14) fma.s.s0 FR_Input_X = FR_Y_hi,FR_1LN10_hi,FR_Output_X_tmp
-(p0) br.ret.sptk b0 ;;
-}
-.endp __libm_LOG_main
-ASM_SIZE_DIRECTIVE(__libm_LOG_main)
-
+LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
.global __libm_error_support#
+
diff --git a/sysdeps/ia64/fpu/s_log1pl.S b/sysdeps/ia64/fpu/s_log1pl.S
index 7cd3f7834c..d392a58edf 100644
--- a/sysdeps/ia64/fpu/s_log1pl.S
+++ b/sysdeps/ia64/fpu/s_log1pl.S
@@ -1,10 +1,10 @@
.file "log1pl.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,55 +35,49 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
-// *********************************************************************
+//*********************************************************************
//
// History:
-// 2/02/00 hand-optimized
-// 4/04/00 Unwind support added
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 02/02/00 Initial version
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
+// 05/21/01 Removed logl and log10l, putting them in a separate file
+// 06/29/01 Improved speed of all paths
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align;
+// used data8 for long double table values
//
-// *********************************************************************
+//*********************************************************************
//
-// *********************************************************************
+//*********************************************************************
//
-// Function: Combined logl(x), log1pl(x), and log10l(x) where
-// logl(x) = ln(x), for double-extended precision x values
-// log1pl(x) = ln(x+1), for double-extended precision x values
-// log10l(x) = log (x), for double-extended precision x values
-// 10
+// Function: log1pl(x) = ln(x+1), for double-extended precision x values
//
-// *********************************************************************
+//*********************************************************************
//
// Resources Used:
//
// Floating-Point Registers: f8 (Input and Return Value)
-// f9,f33-f55,f99
+// f34-f82
//
// General Purpose Registers:
-// r32-r53
-// r54-r57 (Used to pass arguments to error handling routine)
+// r32-r56
+// r53-r56 (Used to pass arguments to error handling routine)
//
-// Predicate Registers: p6-p15
+// Predicate Registers: p6-p13
//
-// *********************************************************************
+//*********************************************************************
//
// IEEE Special Conditions:
//
-// Denormal fault raised on denormal inputs
+// Denormal fault raised on denormal inputs
// Overflow exceptions cannot occur
// Underflow exceptions raised when appropriate for log1p
-// (Error Handling Routine called for underflow)
// Inexact raised when appropriate by algorithm
//
-// logl(inf) = inf
-// logl(-inf) = QNaN
-// logl(+/-0) = -inf
-// logl(SNaN) = QNaN
-// logl(QNaN) = QNaN
-// logl(EM_special Values) = QNaN
// log1pl(inf) = inf
// log1pl(-inf) = QNaN
// log1pl(+/-0) = +/-0
@@ -91,54 +85,37 @@
// log1pl(SNaN) = QNaN
// log1pl(QNaN) = QNaN
// log1pl(EM_special Values) = QNaN
-// log10l(inf) = inf
-// log10l(-inf) = QNaN
-// log10l(+/-0) = -inf
-// log10l(SNaN) = QNaN
-// log10l(QNaN) = QNaN
-// log10l(EM_special Values) = QNaN
-//
-// *********************************************************************
-//
-// Computation is based on the following kernel.
-//
-// ker_log_64( in_FR : X,
-// in_FR : E,
-// in_FR : Em1,
-// in_GR : Expo_Range,
-// out_FR : Y_hi,
-// out_FR : Y_lo,
-// out_FR : Scale,
-// out_PR : Safe )
-//
+//
+//*********************************************************************
+//
// Overview
//
// The method consists of three cases.
//
-// If |X+Em1| < 2^(-80) use case log1pl_small;
-// elseif |X+Em1| < 2^(-7) use case log_near1;
-// else use case log_regular;
+// If |X| < 2^(-80) use case log1p_small;
+// else |X| < 2^(-7) use case log_near1;
+// else use case log_regular;
//
-// Case log1pl_small:
+// Case log1p_small:
//
-// logl( 1 + (X+Em1) ) can be approximated by (X+Em1).
+// log1pl( X ) = logl( X+1 ) can be approximated by X
//
// Case log_near1:
//
-// logl( 1 + (X+Em1) ) can be approximated by a simple polynomial
-// in W = X+Em1. This polynomial resembles the truncated Taylor
+// log1pl( X ) = log( X+1 ) can be approximated by a simple polynomial
+// in W = X. This polynomial resembles the truncated Taylor
// series W - W^/2 + W^3/3 - ...
//
// Case log_regular:
//
// Here we use a table lookup method. The basic idea is that in
-// order to compute logl(Arg) for an argument Arg in [1,2), we
-// construct a value G such that G*Arg is close to 1 and that
+// order to compute logl(Arg) = log1pl (Arg-1) for an argument Arg in [1,2),
+// we construct a value G such that G*Arg is close to 1 and that
// logl(1/G) is obtainable easily from a table of values calculated
// beforehand. Thus
//
-// logl(Arg) = logl(1/G) + logl(G*Arg)
-// = logl(1/G) + logl(1 + (G*Arg - 1))
+// logl(Arg) = logl(1/G) + logl(G*Arg)
+// = logl(1/G) + logl(1 + (G*Arg - 1))
//
// Because |G*Arg - 1| is small, the second term on the right hand
// side can be approximated by a short polynomial. We elaborate
@@ -146,9 +123,9 @@
//
// Step 0: Initialization
//
-// We need to calculate logl( E + X ). Obtain N, S_hi, S_lo such that
+// We need to calculate logl( X+1 ). Obtain N, S_hi such that
//
-// E + X = 2^N * ( S_hi + S_lo ) exactly
+// X+1 = 2^N * ( S_hi + S_lo ) exactly
//
// where S_hi in [1,2) and S_lo is a correction to S_hi in the sense
// that |S_lo| <= ulp(S_hi).
@@ -157,8 +134,8 @@
//
// Based on S_hi, obtain G_1, G_2, G_3 from a table and calculate
//
-// G := G_1 * G_2 * G_3
-// r := (G * S_hi - 1) + G * S_lo
+// G := G_1 * G_2 * G_3
+// r := (G * S_hi - 1) + G * S_lo
//
// These G_j's have the property that the product is exactly
// representable and that |r| < 2^(-12) as a result.
@@ -171,61 +148,34 @@
// Step 3: Reconstruction
//
//
-// Finally, logl( E + X ) is given by
+// Finally, log1pl( X ) = logl( X+1 ) is given by
//
-// logl( E + X ) = logl( 2^N * (S_hi + S_lo) )
+// logl( X+1 ) = logl( 2^N * (S_hi + S_lo) )
// ~=~ N*logl(2) + logl(1/G) + logl(1 + r)
// ~=~ N*logl(2) + logl(1/G) + poly(r).
//
// **** Algorithm ****
//
-// Case log1pl_small:
-//
-// Although logl(1 + (X+Em1)) is basically X+Em1, we would like to
-// preserve the inexactness nature as well as consistent behavior
-// under different rounding modes. Note that this case can only be
-// taken if E is set to be 1.0. In this case, Em1 is zero, and that
-// X can be very tiny and thus the final result can possibly underflow.
-// Thus, we compare X against a threshold that is dependent on the
-// input Expo_Range. If |X| is smaller than this threshold, we set
-// SAFE to be FALSE.
-//
-// The result is returned as Y_hi, Y_lo, and in the case of SAFE
-// is FALSE, an additional value Scale is also returned.
-//
-// W := X + Em1
-// Threshold := Threshold_Table( Expo_Range )
-// Tiny := Tiny_Table( Expo_Range )
-//
-// If ( |W| > Threshold ) then
-// Y_hi := W
-// Y_lo := -W*W
-// Else
-// Y_hi := W
-// Y_lo := -Tiny
-// Scale := 2^(-100)
-// Safe := FALSE
-// EndIf
-//
-//
-// One may think that Y_lo should be -W*W/2; however, it does not matter
-// as Y_lo will be rounded off completely except for the correct effect in
-// directed rounding. Clearly -W*W is simplier to compute. Moreover,
-// because of the difference in exponent value, Y_hi + Y_lo or
-// Y_hi + Scale*Y_lo is always inexact.
+// Case log1p_small:
+//
+// Although log1pl(X) is basically X, we would like to preserve the inexactness
+// nature as well as consistent behavior under different rounding modes.
+// We can do this by computing the result as
+//
+// log1pl(X) = X - X*X
+//
//
// Case log_near1:
//
// Here we compute a simple polynomial. To exploit parallelism, we split
// the polynomial into two portions.
//
-// W := X + Em1
-// Wsq := W * W
-// W4 := Wsq*Wsq
-// W6 := W4*Wsq
-// Y_hi := W + Wsq*(P_1 + W*(P_2 + W*(P_3 + W*P_4))
-// Y_lo := W6*(P_5 + W*(P_6 + W*(P_7 + W*P_8)))
-// set lsb(Y_lo) to be 1
+// W := X
+// Wsq := W * W
+// W4 := Wsq*Wsq
+// W6 := W4*Wsq
+// Y_hi := W + Wsq*(P_1 + W*(P_2 + W*(P_3 + W*P_4))
+// Y_lo := W6*(P_5 + W*(P_6 + W*(P_7 + W*P_8)))
//
// Case log_regular:
//
@@ -234,89 +184,87 @@
// Step 0. Initialization
// ----------------------
//
-// Z := X + E
+// Z := X + 1
// N := unbaised exponent of Z
// S_hi := 2^(-N) * Z
-// S_lo := 2^(-N) * { (max(X,E)-Z) + min(X,E) }
-//
-// Note that S_lo is always 0 for the case E = 0.
+// S_lo := 2^(-N) * { (max(X,1)-Z) + min(X,1) }
//
// Step 1. Argument Reduction
// --------------------------
//
// Let
//
-// Z = 2^N * S_hi = 2^N * 1.d_1 d_2 d_3 ... d_63
+// Z = 2^N * S_hi = 2^N * 1.d_1 d_2 d_3 ... d_63
//
// We obtain G_1, G_2, G_3 by the following steps.
//
//
-// Define X_0 := 1.d_1 d_2 ... d_14. This is extracted
-// from S_hi.
+// Define X_0 := 1.d_1 d_2 ... d_14. This is extracted
+// from S_hi.
//
-// Define A_1 := 1.d_1 d_2 d_3 d_4. This is X_0 truncated
-// to lsb = 2^(-4).
+// Define A_1 := 1.d_1 d_2 d_3 d_4. This is X_0 truncated
+// to lsb = 2^(-4).
//
-// Define index_1 := [ d_1 d_2 d_3 d_4 ].
+// Define index_1 := [ d_1 d_2 d_3 d_4 ].
//
-// Fetch Z_1 := (1/A_1) rounded UP in fixed point with
-// fixed point lsb = 2^(-15).
-// Z_1 looks like z_0.z_1 z_2 ... z_15
-// Note that the fetching is done using index_1.
-// A_1 is actually not needed in the implementation
-// and is used here only to explain how is the value
-// Z_1 defined.
+// Fetch Z_1 := (1/A_1) rounded UP in fixed point with
+// fixed point lsb = 2^(-15).
+// Z_1 looks like z_0.z_1 z_2 ... z_15
+// Note that the fetching is done using index_1.
+// A_1 is actually not needed in the implementation
+// and is used here only to explain how is the value
+// Z_1 defined.
//
-// Fetch G_1 := (1/A_1) truncated to 21 sig. bits.
-// floating pt. Again, fetching is done using index_1. A_1
-// explains how G_1 is defined.
+// Fetch G_1 := (1/A_1) truncated to 21 sig. bits.
+// floating pt. Again, fetching is done using index_1. A_1
+// explains how G_1 is defined.
//
-// Calculate X_1 := X_0 * Z_1 truncated to lsb = 2^(-14)
-// = 1.0 0 0 0 d_5 ... d_14
-// This is accomplised by integer multiplication.
-// It is proved that X_1 indeed always begin
-// with 1.0000 in fixed point.
+// Calculate X_1 := X_0 * Z_1 truncated to lsb = 2^(-14)
+// = 1.0 0 0 0 d_5 ... d_14
+// This is accomplised by integer multiplication.
+// It is proved that X_1 indeed always begin
+// with 1.0000 in fixed point.
//
//
-// Define A_2 := 1.0 0 0 0 d_5 d_6 d_7 d_8. This is X_1
-// truncated to lsb = 2^(-8). Similar to A_1,
-// A_2 is not needed in actual implementation. It
-// helps explain how some of the values are defined.
+// Define A_2 := 1.0 0 0 0 d_5 d_6 d_7 d_8. This is X_1
+// truncated to lsb = 2^(-8). Similar to A_1,
+// A_2 is not needed in actual implementation. It
+// helps explain how some of the values are defined.
//
-// Define index_2 := [ d_5 d_6 d_7 d_8 ].
+// Define index_2 := [ d_5 d_6 d_7 d_8 ].
//
-// Fetch Z_2 := (1/A_2) rounded UP in fixed point with
-// fixed point lsb = 2^(-15). Fetch done using index_2.
-// Z_2 looks like z_0.z_1 z_2 ... z_15
+// Fetch Z_2 := (1/A_2) rounded UP in fixed point with
+// fixed point lsb = 2^(-15). Fetch done using index_2.
+// Z_2 looks like z_0.z_1 z_2 ... z_15
//
-// Fetch G_2 := (1/A_2) truncated to 21 sig. bits.
-// floating pt.
+// Fetch G_2 := (1/A_2) truncated to 21 sig. bits.
+// floating pt.
//
-// Calculate X_2 := X_1 * Z_2 truncated to lsb = 2^(-14)
-// = 1.0 0 0 0 0 0 0 0 d_9 d_10 ... d_14
-// This is accomplised by integer multiplication.
-// It is proved that X_2 indeed always begin
-// with 1.00000000 in fixed point.
+// Calculate X_2 := X_1 * Z_2 truncated to lsb = 2^(-14)
+// = 1.0 0 0 0 0 0 0 0 d_9 d_10 ... d_14
+// This is accomplised by integer multiplication.
+// It is proved that X_2 indeed always begin
+// with 1.00000000 in fixed point.
//
//
-// Define A_3 := 1.0 0 0 0 0 0 0 0 d_9 d_10 d_11 d_12 d_13 1.
-// This is 2^(-14) + X_2 truncated to lsb = 2^(-13).
+// Define A_3 := 1.0 0 0 0 0 0 0 0 d_9 d_10 d_11 d_12 d_13 1.
+// This is 2^(-14) + X_2 truncated to lsb = 2^(-13).
//
-// Define index_3 := [ d_9 d_10 d_11 d_12 d_13 ].
+// Define index_3 := [ d_9 d_10 d_11 d_12 d_13 ].
//
-// Fetch G_3 := (1/A_3) truncated to 21 sig. bits.
-// floating pt. Fetch is done using index_3.
+// Fetch G_3 := (1/A_3) truncated to 21 sig. bits.
+// floating pt. Fetch is done using index_3.
//
-// Compute G := G_1 * G_2 * G_3.
+// Compute G := G_1 * G_2 * G_3.
//
-// This is done exactly since each of G_j only has 21 sig. bits.
+// This is done exactly since each of G_j only has 21 sig. bits.
//
-// Compute
+// Compute
//
-// r := (G*S_hi - 1) + G*S_lo using 2 FMA operations.
+// r := (G*S_hi - 1) + G*S_lo using 2 FMA operations.
//
-// thus, r approximates G*(S_hi+S_lo) - 1 to within a couple of
-// rounding errors.
+// Thus r approximates G*(S_hi + S_lo) - 1 to within a couple of
+// rounding errors.
//
//
// Step 2. Approximation
@@ -326,1258 +274,878 @@
// reduced argument just obtained. It is proved that |r| <= 1.9*2^(-13);
// thus logl(1+r) can be approximated by a short polynomial:
//
-// logl(1+r) ~=~ poly = r + Q1 r^2 + ... + Q4 r^5
+// logl(1+r) ~=~ poly = r + Q1 r^2 + ... + Q4 r^5
//
//
// Step 3. Reconstruction
// ----------------------
//
-// This step computes the desired result of logl(X+E):
+// This step computes the desired result of logl(X+1):
//
-// logl(X+E) = logl( 2^N * (S_hi + S_lo) )
-// = N*logl(2) + logl( S_hi + S_lo )
-// = N*logl(2) + logl(1/G) +
-// logl(1 + C*(S_hi+S_lo) - 1 )
+// logl(X+1) = logl( 2^N * (S_hi + S_lo) )
+// = N*logl(2) + logl( S_hi + S_lo) )
+// = N*logl(2) + logl(1/G) +
+// logl(1 + G * ( S_hi + S_lo ) - 1 )
//
// logl(2), logl(1/G_j) are stored as pairs of (single,double) numbers:
// log2_hi, log2_lo, log1byGj_hi, log1byGj_lo. The high parts are
// single-precision numbers and the low parts are double precision
// numbers. These have the property that
//
-// N*log2_hi + SUM ( log1byGj_hi )
+// N*log2_hi + SUM ( log1byGj_hi )
//
// is computable exactly in double-extended precision (64 sig. bits).
// Finally
//
-// Y_hi := N*log2_hi + SUM ( log1byGj_hi )
-// Y_lo := poly_hi + [ poly_lo +
-// ( SUM ( log1byGj_lo ) + N*log2_lo ) ]
-// set lsb(Y_lo) to be 1
+// Y_hi := N*log2_hi + SUM ( log1byGj_hi )
+// Y_lo := poly_hi + [ poly_lo +
+// ( SUM ( log1byGj_lo ) + N*log2_lo ) ]
//
-#include "libm_support.h"
+RODATA
+.align 64
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
+// ************* DO NOT CHANGE THE ORDER OF THESE TABLES *************
-// P_7, P_6, P_5, P_4, P_3, P_2, and P_1
+// P_8, P_7, P_6, P_5, P_4, P_3, P_2, and P_1
+
+LOCAL_OBJECT_START(Constants_P)
+//data4 0xEFD62B15,0xE3936754,0x00003FFB,0x00000000
+//data4 0xA5E56381,0x8003B271,0x0000BFFC,0x00000000
+//data4 0x73282DB0,0x9249248C,0x00003FFC,0x00000000
+//data4 0x47305052,0xAAAAAA9F,0x0000BFFC,0x00000000
+//data4 0xCCD17FC9,0xCCCCCCCC,0x00003FFC,0x00000000
+//data4 0x00067ED5,0x80000000,0x0000BFFD,0x00000000
+//data4 0xAAAAAAAA,0xAAAAAAAA,0x00003FFD,0x00000000
+//data4 0xFFFFFFFE,0xFFFFFFFF,0x0000BFFD,0x00000000
+data8 0xE3936754EFD62B15,0x00003FFB
+data8 0x8003B271A5E56381,0x0000BFFC
+data8 0x9249248C73282DB0,0x00003FFC
+data8 0xAAAAAA9F47305052,0x0000BFFC
+data8 0xCCCCCCCCCCD17FC9,0x00003FFC
+data8 0x8000000000067ED5,0x0000BFFD
+data8 0xAAAAAAAAAAAAAAAA,0x00003FFD
+data8 0xFFFFFFFFFFFFFFFE,0x0000BFFD
+LOCAL_OBJECT_END(Constants_P)
-.align 64
-Constants_P:
-ASM_TYPE_DIRECTIVE(Constants_P,@object)
-data4 0xEFD62B15,0xE3936754,0x00003FFB,0x00000000
-data4 0xA5E56381,0x8003B271,0x0000BFFC,0x00000000
-data4 0x73282DB0,0x9249248C,0x00003FFC,0x00000000
-data4 0x47305052,0xAAAAAA9F,0x0000BFFC,0x00000000
-data4 0xCCD17FC9,0xCCCCCCCC,0x00003FFC,0x00000000
-data4 0x00067ED5,0x80000000,0x0000BFFD,0x00000000
-data4 0xAAAAAAAA,0xAAAAAAAA,0x00003FFD,0x00000000
-data4 0xFFFFFFFE,0xFFFFFFFF,0x0000BFFD,0x00000000
-ASM_SIZE_DIRECTIVE(Constants_P)
-
// log2_hi, log2_lo, Q_4, Q_3, Q_2, and Q_1
-.align 64
-Constants_Q:
-ASM_TYPE_DIRECTIVE(Constants_Q,@object)
-data4 0x00000000,0xB1721800,0x00003FFE,0x00000000
-data4 0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000
-data4 0x328833CB,0xCCCCCAF2,0x00003FFC,0x00000000
-data4 0xA9D4BAFB,0x80000077,0x0000BFFD,0x00000000
-data4 0xAAABE3D2,0xAAAAAAAA,0x00003FFD,0x00000000
-data4 0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000
-ASM_SIZE_DIRECTIVE(Constants_Q)
-
-// Z1 - 16 bit fixed, G1 and H1 - IEEE single
-
-.align 64
-Constants_Z_G_H_h1:
-ASM_TYPE_DIRECTIVE(Constants_Z_G_H_h1,@object)
-data4 0x00008000,0x3F800000,0x00000000,0x00000000,0x00000000,0x00000000
-data4 0x00007879,0x3F70F0F0,0x3D785196,0x00000000,0x617D741C,0x3DA163A6
-data4 0x000071C8,0x3F638E38,0x3DF13843,0x00000000,0xCBD3D5BB,0x3E2C55E6
-data4 0x00006BCB,0x3F579430,0x3E2FF9A0,0x00000000,0xD86EA5E7,0xBE3EB0BF
-data4 0x00006667,0x3F4CCCC8,0x3E647FD6,0x00000000,0x86B12760,0x3E2E6A8C
-data4 0x00006187,0x3F430C30,0x3E8B3AE7,0x00000000,0x5C0739BA,0x3E47574C
-data4 0x00005D18,0x3F3A2E88,0x3EA30C68,0x00000000,0x13E8AF2F,0x3E20E30F
-data4 0x0000590C,0x3F321640,0x3EB9CEC8,0x00000000,0xF2C630BD,0xBE42885B
-data4 0x00005556,0x3F2AAAA8,0x3ECF9927,0x00000000,0x97E577C6,0x3E497F34
-data4 0x000051EC,0x3F23D708,0x3EE47FC5,0x00000000,0xA6B0A5AB,0x3E3E6A6E
-data4 0x00004EC5,0x3F1D89D8,0x3EF8947D,0x00000000,0xD328D9BE,0xBDF43E3C
-data4 0x00004BDB,0x3F17B420,0x3F05F3A1,0x00000000,0x0ADB090A,0x3E4094C3
-data4 0x00004925,0x3F124920,0x3F0F4303,0x00000000,0xFC1FE510,0xBE28FBB2
-data4 0x0000469F,0x3F0D3DC8,0x3F183EBF,0x00000000,0x10FDE3FA,0x3E3A7895
-data4 0x00004445,0x3F088888,0x3F20EC80,0x00000000,0x7CC8C98F,0x3E508CE5
-data4 0x00004211,0x3F042108,0x3F29516A,0x00000000,0xA223106C,0xBE534874
-ASM_SIZE_DIRECTIVE(Constants_Z_G_H_h1)
-
-// Z2 - 16 bit fixed, G2 and H2 - IEEE single
-
-.align 64
-Constants_Z_G_H_h2:
-ASM_TYPE_DIRECTIVE(Constants_Z_G_H_h2,@object)
-data4 0x00008000,0x3F800000,0x00000000,0x00000000,0x00000000,0x00000000
-data4 0x00007F81,0x3F7F00F8,0x3B7F875D,0x00000000,0x22C42273,0x3DB5A116
-data4 0x00007F02,0x3F7E03F8,0x3BFF015B,0x00000000,0x21F86ED3,0x3DE620CF
-data4 0x00007E85,0x3F7D08E0,0x3C3EE393,0x00000000,0x484F34ED,0xBDAFA07E
-data4 0x00007E08,0x3F7C0FC0,0x3C7E0586,0x00000000,0x3860BCF6,0xBDFE07F0
-data4 0x00007D8D,0x3F7B1880,0x3C9E75D2,0x00000000,0xA78093D6,0x3DEA370F
-data4 0x00007D12,0x3F7A2328,0x3CBDC97A,0x00000000,0x72A753D0,0x3DFF5791
-data4 0x00007C98,0x3F792FB0,0x3CDCFE47,0x00000000,0xA7EF896B,0x3DFEBE6C
-data4 0x00007C20,0x3F783E08,0x3CFC15D0,0x00000000,0x409ECB43,0x3E0CF156
-data4 0x00007BA8,0x3F774E38,0x3D0D874D,0x00000000,0xFFEF71DF,0xBE0B6F97
-data4 0x00007B31,0x3F766038,0x3D1CF49B,0x00000000,0x5D59EEE8,0xBE080483
-data4 0x00007ABB,0x3F757400,0x3D2C531D,0x00000000,0xA9192A74,0x3E1F91E9
-data4 0x00007A45,0x3F748988,0x3D3BA322,0x00000000,0xBF72A8CD,0xBE139A06
-data4 0x000079D1,0x3F73A0D0,0x3D4AE46F,0x00000000,0xF8FBA6CF,0x3E1D9202
-data4 0x0000795D,0x3F72B9D0,0x3D5A1756,0x00000000,0xBA796223,0xBE1DCCC4
-data4 0x000078EB,0x3F71D488,0x3D693B9D,0x00000000,0xB6B7C239,0xBE049391
-ASM_SIZE_DIRECTIVE(Constants_Z_G_H_h2)
-
-// G3 and H3 - IEEE single and h3 -IEEE double
-
-.align 64
-Constants_Z_G_H_h3:
-ASM_TYPE_DIRECTIVE(Constants_Z_G_H_h3,@object)
-data4 0x3F7FFC00,0x38800100,0x562224CD,0x3D355595
-data4 0x3F7FF400,0x39400480,0x06136FF6,0x3D8200A2
-data4 0x3F7FEC00,0x39A00640,0xE8DE9AF0,0x3DA4D68D
-data4 0x3F7FE400,0x39E00C41,0xB10238DC,0xBD8B4291
-data4 0x3F7FDC00,0x3A100A21,0x3B1952CA,0xBD89CCB8
-data4 0x3F7FD400,0x3A300F22,0x1DC46826,0xBDB10707
-data4 0x3F7FCC08,0x3A4FF51C,0xF43307DB,0x3DB6FCB9
-data4 0x3F7FC408,0x3A6FFC1D,0x62DC7872,0xBD9B7C47
-data4 0x3F7FBC10,0x3A87F20B,0x3F89154A,0xBDC3725E
-data4 0x3F7FB410,0x3A97F68B,0x62B9D392,0xBD93519D
-data4 0x3F7FAC18,0x3AA7EB86,0x0F21BD9D,0x3DC18441
-data4 0x3F7FA420,0x3AB7E101,0x2245E0A6,0xBDA64B95
-data4 0x3F7F9C20,0x3AC7E701,0xAABB34B8,0x3DB4B0EC
-data4 0x3F7F9428,0x3AD7DD7B,0x6DC40A7E,0x3D992337
-data4 0x3F7F8C30,0x3AE7D474,0x4F2083D3,0x3DC6E17B
-data4 0x3F7F8438,0x3AF7CBED,0x811D4394,0x3DAE314B
-data4 0x3F7F7C40,0x3B03E1F3,0xB08F2DB1,0xBDD46F21
-data4 0x3F7F7448,0x3B0BDE2F,0x6D34522B,0xBDDC30A4
-data4 0x3F7F6C50,0x3B13DAAA,0xB1F473DB,0x3DCB0070
-data4 0x3F7F6458,0x3B1BD766,0x6AD282FD,0xBDD65DDC
-data4 0x3F7F5C68,0x3B23CC5C,0xF153761A,0xBDCDAB83
-data4 0x3F7F5470,0x3B2BC997,0x341D0F8F,0xBDDADA40
-data4 0x3F7F4C78,0x3B33C711,0xEBC394E8,0x3DCD1BD7
-data4 0x3F7F4488,0x3B3BBCC6,0x52E3E695,0xBDC3532B
-data4 0x3F7F3C90,0x3B43BAC0,0xE846B3DE,0xBDA3961E
-data4 0x3F7F34A0,0x3B4BB0F4,0x785778D4,0xBDDADF06
-data4 0x3F7F2CA8,0x3B53AF6D,0xE55CE212,0x3DCC3ED1
-data4 0x3F7F24B8,0x3B5BA620,0x9E382C15,0xBDBA3103
-data4 0x3F7F1CC8,0x3B639D12,0x5C5AF197,0x3D635A0B
-data4 0x3F7F14D8,0x3B6B9444,0x71D34EFC,0xBDDCCB19
-data4 0x3F7F0CE0,0x3B7393BC,0x52CD7ADA,0x3DC74502
-data4 0x3F7F04F0,0x3B7B8B6D,0x7D7F2A42,0xBDB68F17
-ASM_SIZE_DIRECTIVE(Constants_Z_G_H_h3)
+LOCAL_OBJECT_START(Constants_Q)
+//data4 0x00000000,0xB1721800,0x00003FFE,0x00000000
+//data4 0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000
+//data4 0x328833CB,0xCCCCCAF2,0x00003FFC,0x00000000
+//data4 0xA9D4BAFB,0x80000077,0x0000BFFD,0x00000000
+//data4 0xAAABE3D2,0xAAAAAAAA,0x00003FFD,0x00000000
+//data4 0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000
+data8 0xB172180000000000,0x00003FFE
+data8 0x82E308654361C4C6,0x0000BFE2
+data8 0xCCCCCAF2328833CB,0x00003FFC
+data8 0x80000077A9D4BAFB,0x0000BFFD
+data8 0xAAAAAAAAAAABE3D2,0x00003FFD
+data8 0xFFFFFFFFFFFFDAB7,0x0000BFFD
+LOCAL_OBJECT_END(Constants_Q)
+
+// 1/ln10_hi, 1/ln10_lo
+
+LOCAL_OBJECT_START(Constants_1_by_LN10)
+//data4 0x37287195,0xDE5BD8A9,0x00003FFD,0x00000000
+//data4 0xACCF70C8,0xD56EAABE,0x00003FBB,0x00000000
+data8 0xDE5BD8A937287195,0x00003FFD
+data8 0xD56EAABEACCF70C8,0x00003FBB
+LOCAL_OBJECT_END(Constants_1_by_LN10)
+
+
+// Z1 - 16 bit fixed
-//
-// Exponent Thresholds and Tiny Thresholds
-// for 8, 11, 15, and 17 bit exponents
-//
-// Expo_Range Value
-//
-// 0 (8 bits) 2^(-126)
-// 1 (11 bits) 2^(-1022)
-// 2 (15 bits) 2^(-16382)
-// 3 (17 bits) 2^(-16382)
-//
-// Tiny_Table
-// ----------
-// Expo_Range Value
-//
-// 0 (8 bits) 2^(-16382)
-// 1 (11 bits) 2^(-16382)
-// 2 (15 bits) 2^(-16382)
-// 3 (17 bits) 2^(-16382)
-//
+LOCAL_OBJECT_START(Constants_Z_1)
+data4 0x00008000
+data4 0x00007879
+data4 0x000071C8
+data4 0x00006BCB
+data4 0x00006667
+data4 0x00006187
+data4 0x00005D18
+data4 0x0000590C
+data4 0x00005556
+data4 0x000051EC
+data4 0x00004EC5
+data4 0x00004BDB
+data4 0x00004925
+data4 0x0000469F
+data4 0x00004445
+data4 0x00004211
+LOCAL_OBJECT_END(Constants_Z_1)
-.align 64
-Constants_Threshold:
-ASM_TYPE_DIRECTIVE(Constants_Threshold,@object)
-data4 0x00000000,0x80000000,0x00003F81,0x00000000
-data4 0x00000000,0x80000000,0x00000001,0x00000000
-data4 0x00000000,0x80000000,0x00003C01,0x00000000
-data4 0x00000000,0x80000000,0x00000001,0x00000000
-data4 0x00000000,0x80000000,0x00000001,0x00000000
-data4 0x00000000,0x80000000,0x00000001,0x00000000
-data4 0x00000000,0x80000000,0x00000001,0x00000000
-data4 0x00000000,0x80000000,0x00000001,0x00000000
-ASM_SIZE_DIRECTIVE(Constants_Threshold)
+// G1 and H1 - IEEE single and h1 - IEEE double
-.align 64
-Constants_1_by_LN10:
-ASM_TYPE_DIRECTIVE(Constants_1_by_LN10,@object)
-data4 0x37287195,0xDE5BD8A9,0x00003FFD,0x00000000
-data4 0xACCF70C8,0xD56EAABE,0x00003FBB,0x00000000
-ASM_SIZE_DIRECTIVE(Constants_1_by_LN10)
-
-FR_Input_X = f8
-FR_Neg_One = f9
-FR_E = f33
-FR_Em1 = f34
-FR_Y_hi = f34
-// Shared with Em1
-FR_Y_lo = f35
-FR_Scale = f36
-FR_X_Prime = f37
-FR_Z = f38
-FR_S_hi = f38
-// Shared with Z
-FR_W = f39
-FR_G = f40
-FR_wsq = f40
-// Shared with G
-FR_H = f41
-FR_w4 = f41
-// Shared with H
-FR_h = f42
-FR_w6 = f42
-// Shared with h
-FR_G_tmp = f43
-FR_poly_lo = f43
-// Shared with G_tmp
-FR_P8 = f43
-// Shared with G_tmp
-FR_H_tmp = f44
-FR_poly_hi = f44
- // Shared with H_tmp
-FR_P7 = f44
-// Shared with H_tmp
-FR_h_tmp = f45
-FR_rsq = f45
-// Shared with h_tmp
-FR_P6 = f45
-// Shared with h_tmp
-FR_abs_W = f46
-FR_r = f46
-// Shared with abs_W
-FR_AA = f47
-FR_log2_hi = f47
-// Shared with AA
-FR_BB = f48
-FR_log2_lo = f48
-// Shared with BB
-FR_S_lo = f49
-FR_two_negN = f50
-FR_float_N = f51
-FR_Q4 = f52
-FR_dummy = f52
-// Shared with Q4
-FR_P4 = f52
-// Shared with Q4
-FR_Threshold = f52
-// Shared with Q4
-FR_Q3 = f53
-FR_P3 = f53
-// Shared with Q3
-FR_Tiny = f53
-// Shared with Q3
-FR_Q2 = f54
-FR_P2 = f54
-// Shared with Q2
-FR_1LN10_hi = f54
-// Shared with Q2
-FR_Q1 = f55
-FR_P1 = f55
-// Shared with Q1
-FR_1LN10_lo = f55
-// Shared with Q1
-FR_P5 = f98
-FR_SCALE = f98
-FR_Output_X_tmp = f99
-
-GR_Expo_Range = r32
-GR_Table_Base = r34
-GR_Table_Base1 = r35
-GR_Table_ptr = r36
-GR_Index2 = r37
-GR_signif = r38
-GR_X_0 = r39
-GR_X_1 = r40
-GR_X_2 = r41
-GR_Z_1 = r42
-GR_Z_2 = r43
-GR_N = r44
-GR_Bias = r45
-GR_M = r46
-GR_ScaleN = r47
-GR_Index3 = r48
-GR_Perturb = r49
-GR_Table_Scale = r50
+LOCAL_OBJECT_START(Constants_G_H_h1)
+data4 0x3F800000,0x00000000
+data8 0x0000000000000000
+data4 0x3F70F0F0,0x3D785196
+data8 0x3DA163A6617D741C
+data4 0x3F638E38,0x3DF13843
+data8 0x3E2C55E6CBD3D5BB
+data4 0x3F579430,0x3E2FF9A0
+data8 0xBE3EB0BFD86EA5E7
+data4 0x3F4CCCC8,0x3E647FD6
+data8 0x3E2E6A8C86B12760
+data4 0x3F430C30,0x3E8B3AE7
+data8 0x3E47574C5C0739BA
+data4 0x3F3A2E88,0x3EA30C68
+data8 0x3E20E30F13E8AF2F
+data4 0x3F321640,0x3EB9CEC8
+data8 0xBE42885BF2C630BD
+data4 0x3F2AAAA8,0x3ECF9927
+data8 0x3E497F3497E577C6
+data4 0x3F23D708,0x3EE47FC5
+data8 0x3E3E6A6EA6B0A5AB
+data4 0x3F1D89D8,0x3EF8947D
+data8 0xBDF43E3CD328D9BE
+data4 0x3F17B420,0x3F05F3A1
+data8 0x3E4094C30ADB090A
+data4 0x3F124920,0x3F0F4303
+data8 0xBE28FBB2FC1FE510
+data4 0x3F0D3DC8,0x3F183EBF
+data8 0x3E3A789510FDE3FA
+data4 0x3F088888,0x3F20EC80
+data8 0x3E508CE57CC8C98F
+data4 0x3F042108,0x3F29516A
+data8 0xBE534874A223106C
+LOCAL_OBJECT_END(Constants_G_H_h1)
-//
-// Added for unwind support
-//
+// Z2 - 16 bit fixed
+
+LOCAL_OBJECT_START(Constants_Z_2)
+data4 0x00008000
+data4 0x00007F81
+data4 0x00007F02
+data4 0x00007E85
+data4 0x00007E08
+data4 0x00007D8D
+data4 0x00007D12
+data4 0x00007C98
+data4 0x00007C20
+data4 0x00007BA8
+data4 0x00007B31
+data4 0x00007ABB
+data4 0x00007A45
+data4 0x000079D1
+data4 0x0000795D
+data4 0x000078EB
+LOCAL_OBJECT_END(Constants_Z_2)
+
+// G2 and H2 - IEEE single and h2 - IEEE double
+
+LOCAL_OBJECT_START(Constants_G_H_h2)
+data4 0x3F800000,0x00000000
+data8 0x0000000000000000
+data4 0x3F7F00F8,0x3B7F875D
+data8 0x3DB5A11622C42273
+data4 0x3F7E03F8,0x3BFF015B
+data8 0x3DE620CF21F86ED3
+data4 0x3F7D08E0,0x3C3EE393
+data8 0xBDAFA07E484F34ED
+data4 0x3F7C0FC0,0x3C7E0586
+data8 0xBDFE07F03860BCF6
+data4 0x3F7B1880,0x3C9E75D2
+data8 0x3DEA370FA78093D6
+data4 0x3F7A2328,0x3CBDC97A
+data8 0x3DFF579172A753D0
+data4 0x3F792FB0,0x3CDCFE47
+data8 0x3DFEBE6CA7EF896B
+data4 0x3F783E08,0x3CFC15D0
+data8 0x3E0CF156409ECB43
+data4 0x3F774E38,0x3D0D874D
+data8 0xBE0B6F97FFEF71DF
+data4 0x3F766038,0x3D1CF49B
+data8 0xBE0804835D59EEE8
+data4 0x3F757400,0x3D2C531D
+data8 0x3E1F91E9A9192A74
+data4 0x3F748988,0x3D3BA322
+data8 0xBE139A06BF72A8CD
+data4 0x3F73A0D0,0x3D4AE46F
+data8 0x3E1D9202F8FBA6CF
+data4 0x3F72B9D0,0x3D5A1756
+data8 0xBE1DCCC4BA796223
+data4 0x3F71D488,0x3D693B9D
+data8 0xBE049391B6B7C239
+LOCAL_OBJECT_END(Constants_G_H_h2)
+
+// G3 and H3 - IEEE single and h3 - IEEE double
+
+LOCAL_OBJECT_START(Constants_G_H_h3)
+data4 0x3F7FFC00,0x38800100
+data8 0x3D355595562224CD
+data4 0x3F7FF400,0x39400480
+data8 0x3D8200A206136FF6
+data4 0x3F7FEC00,0x39A00640
+data8 0x3DA4D68DE8DE9AF0
+data4 0x3F7FE400,0x39E00C41
+data8 0xBD8B4291B10238DC
+data4 0x3F7FDC00,0x3A100A21
+data8 0xBD89CCB83B1952CA
+data4 0x3F7FD400,0x3A300F22
+data8 0xBDB107071DC46826
+data4 0x3F7FCC08,0x3A4FF51C
+data8 0x3DB6FCB9F43307DB
+data4 0x3F7FC408,0x3A6FFC1D
+data8 0xBD9B7C4762DC7872
+data4 0x3F7FBC10,0x3A87F20B
+data8 0xBDC3725E3F89154A
+data4 0x3F7FB410,0x3A97F68B
+data8 0xBD93519D62B9D392
+data4 0x3F7FAC18,0x3AA7EB86
+data8 0x3DC184410F21BD9D
+data4 0x3F7FA420,0x3AB7E101
+data8 0xBDA64B952245E0A6
+data4 0x3F7F9C20,0x3AC7E701
+data8 0x3DB4B0ECAABB34B8
+data4 0x3F7F9428,0x3AD7DD7B
+data8 0x3D9923376DC40A7E
+data4 0x3F7F8C30,0x3AE7D474
+data8 0x3DC6E17B4F2083D3
+data4 0x3F7F8438,0x3AF7CBED
+data8 0x3DAE314B811D4394
+data4 0x3F7F7C40,0x3B03E1F3
+data8 0xBDD46F21B08F2DB1
+data4 0x3F7F7448,0x3B0BDE2F
+data8 0xBDDC30A46D34522B
+data4 0x3F7F6C50,0x3B13DAAA
+data8 0x3DCB0070B1F473DB
+data4 0x3F7F6458,0x3B1BD766
+data8 0xBDD65DDC6AD282FD
+data4 0x3F7F5C68,0x3B23CC5C
+data8 0xBDCDAB83F153761A
+data4 0x3F7F5470,0x3B2BC997
+data8 0xBDDADA40341D0F8F
+data4 0x3F7F4C78,0x3B33C711
+data8 0x3DCD1BD7EBC394E8
+data4 0x3F7F4488,0x3B3BBCC6
+data8 0xBDC3532B52E3E695
+data4 0x3F7F3C90,0x3B43BAC0
+data8 0xBDA3961EE846B3DE
+data4 0x3F7F34A0,0x3B4BB0F4
+data8 0xBDDADF06785778D4
+data4 0x3F7F2CA8,0x3B53AF6D
+data8 0x3DCC3ED1E55CE212
+data4 0x3F7F24B8,0x3B5BA620
+data8 0xBDBA31039E382C15
+data4 0x3F7F1CC8,0x3B639D12
+data8 0x3D635A0B5C5AF197
+data4 0x3F7F14D8,0x3B6B9444
+data8 0xBDDCCB1971D34EFC
+data4 0x3F7F0CE0,0x3B7393BC
+data8 0x3DC7450252CD7ADA
+data4 0x3F7F04F0,0x3B7B8B6D
+data8 0xBDB68F177D7F2A42
+LOCAL_OBJECT_END(Constants_G_H_h3)
-GR_SAVE_PFS = r51
-GR_SAVE_B0 = r52
-GR_SAVE_GP = r53
-GR_Parameter_X = r54
-GR_Parameter_Y = r55
-GR_Parameter_RESULT = r56
-GR_Parameter_TAG = r57
+
+// Floating Point Registers
+
+FR_Input_X = f8
+
+FR_Y_hi = f34
+FR_Y_lo = f35
+
+FR_Scale = f36
+FR_X_Prime = f37
+FR_S_hi = f38
+FR_W = f39
+FR_G = f40
+
+FR_H = f41
+FR_wsq = f42
+FR_w4 = f43
+FR_h = f44
+FR_w6 = f45
+
+FR_G2 = f46
+FR_H2 = f47
+FR_poly_lo = f48
+FR_P8 = f49
+FR_poly_hi = f50
+
+FR_P7 = f51
+FR_h2 = f52
+FR_rsq = f53
+FR_P6 = f54
+FR_r = f55
+
+FR_log2_hi = f56
+FR_log2_lo = f57
+FR_p87 = f58
+FR_p876 = f58
+FR_p8765 = f58
+FR_float_N = f59
+FR_Q4 = f60
+
+FR_p43 = f61
+FR_p432 = f61
+FR_p4321 = f61
+FR_P4 = f62
+FR_G3 = f63
+FR_H3 = f64
+FR_h3 = f65
+
+FR_Q3 = f66
+FR_P3 = f67
+FR_Q2 = f68
+FR_P2 = f69
+FR_1LN10_hi = f70
+
+FR_Q1 = f71
+FR_P1 = f72
+FR_1LN10_lo = f73
+FR_P5 = f74
+FR_rcub = f75
+
+FR_Output_X_tmp = f76
+FR_Neg_One = f77
+FR_Z = f78
+FR_AA = f79
+FR_BB = f80
+FR_S_lo = f81
+FR_2_to_minus_N = f82
FR_X = f8
FR_Y = f0
-FR_RESULT = f99
+FR_RESULT = f76
-.section .text
-.proc logl#
-.global logl#
-.align 64
-logl:
-#ifdef _LIBC
-.global __ieee754_logl
-__ieee754_logl:
-#endif
-{ .mfi
-alloc r32 = ar.pfs,0,22,4,0
-(p0) fnorm.s1 FR_X_Prime = FR_Input_X
-(p0) cmp.eq.unc p7, p0 = r0, r0
-}
-{ .mfi
-(p0) cmp.ne.unc p14, p0 = r0, r0
-(p0) fclass.m.unc p6, p0 = FR_Input_X, 0x1E3
-(p0) cmp.ne.unc p15, p0 = r0, r0 ;;
-}
-{ .mfi
- nop.m 0
-(p0) fclass.nm.unc p10, p0 = FR_Input_X, 0x1FF
- nop.i 0
-}
-{ .mfi
-nop.m 999
-(p0) fcmp.eq.unc.s1 p8, p0 = FR_Input_X, f0
- nop.i 0
-}
-{ .mfi
- nop.m 999
-(p0) fcmp.lt.unc.s1 p13, p0 = FR_Input_X, f0
- nop.i 0
-}
-{ .mfi
- nop.m 999
-(p0) fcmp.eq.unc.s1 p9, p0 = FR_Input_X, f1
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-(p0) fsub.s1 FR_Em1 = f0,f1
- nop.i 999
-}
-{ .mfb
- nop.m 999
-(p0) fadd FR_E = f0,f0
-//
-// Create E = 0 and Em1 = -1
-// Check for X == 1, meaning logl(1)
-// Check for X < 0, meaning logl(negative)
-// Check for X == 0, meaning logl(0)
-// Identify NatVals, NaNs, Infs.
-// Identify EM unsupporteds.
-// Identify Negative values - us S1 so as
-// not to raise denormal operand exception
-// Set p15 to false for log
-// Set p14 to false for log
-// Set p7 true for log and log1p
-//
-(p0) br.cond.sptk L(LOGL_BEGIN) ;;
-}
-.endp logl
-ASM_SIZE_DIRECTIVE(logl)
+// General Purpose Registers
-.section .text
-.proc log10l#
-.global log10l#
-.align 64
-log10l:
-#ifdef _LIBC
-.global __ieee754_log10l
-__ieee754_log10l:
-#endif
-{ .mfi
-alloc r32 = ar.pfs,0,22,4,0
-(p0) fadd FR_E = f0,f0
- nop.i 0
-}
-{ .mfi
- nop.m 0
-(p0) fsub.s1 FR_Em1 = f0,f1
- nop.i 0
-}
-{ .mfi
-(p0) cmp.ne.unc p15, p0 = r0, r0
-(p0) fcmp.eq.unc.s1 p9, p0 = FR_Input_X, f1
- nop.i 0
-}
-{ .mfi
-(p0) cmp.eq.unc p14, p0 = r0, r0
-(p0) fcmp.lt.unc.s1 p13, p0 = FR_Input_X, f0
-(p0) cmp.ne.unc p7, p0 = r0, r0 ;;
-}
-{ .mfi
- nop.m 999
-(p0) fcmp.eq.unc.s1 p8, p0 = FR_Input_X, f0
- nop.i 999
-}
-{ .mfi
- nop.m 999
-(p0) fclass.nm.unc p10, p0 = FR_Input_X, 0x1FF
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-(p0) fclass.m.unc p6, p0 = FR_Input_X, 0x1E3
- nop.i 999
-}
-{ .mfb
- nop.m 999
-(p0) fnorm.s1 FR_X_Prime = FR_Input_X
-//
-// Create E = 0 and Em1 = -1
-// Check for X == 1, meaning logl(1)
-// Check for X < 0, meaning logl(negative)
-// Check for X == 0, meaning logl(0)
-// Identify NatVals, NaNs, Infs.
-// Identify EM unsupporteds.
-// Identify Negative values - us S1 so as
-// Identify Negative values - us S1 so as
-// not to raise denormal operand exception
-// Set p15 to false for log10
-// Set p14 to true for log10
-// Set p7 to false for log10
-//
-(p0) br.cond.sptk L(LOGL_BEGIN) ;;
-}
+GR_ad_p = r33
+GR_Index1 = r34
+GR_Index2 = r35
+GR_signif = r36
+GR_X_0 = r37
+GR_X_1 = r38
+GR_X_2 = r39
+GR_minus_N = r39
+GR_Z_1 = r40
+GR_Z_2 = r41
+GR_N = r42
+GR_Bias = r43
+GR_M = r44
+GR_Index3 = r45
+GR_exp_2tom80 = r45
+GR_ad_p2 = r46
+GR_exp_mask = r47
+GR_exp_2tom7 = r48
+GR_ad_ln10 = r49
+GR_ad_tbl_1 = r50
+GR_ad_tbl_2 = r51
+GR_ad_tbl_3 = r52
+GR_ad_q = r53
+GR_ad_z_1 = r54
+GR_ad_z_2 = r55
+GR_ad_z_3 = r56
+GR_minus_N = r39
+
+//
+// Added for unwind support
+//
-.endp log10l
-ASM_SIZE_DIRECTIVE(log10l)
+GR_SAVE_PFS = r50
+GR_SAVE_B0 = r51
+GR_SAVE_GP = r52
+GR_Parameter_X = r53
+GR_Parameter_Y = r54
+GR_Parameter_RESULT = r55
+GR_Parameter_TAG = r56
.section .text
-.proc log1pl#
-.global log1pl#
-.align 64
-log1pl:
-#ifdef _LIBC
-.global __log1pl
-__log1pl:
-#endif
+GLOBAL_IEEE754_ENTRY(log1pl)
{ .mfi
-alloc r32 = ar.pfs,0,22,4,0
-(p0) fsub.s1 FR_Neg_One = f0,f1
-(p0) cmp.eq.unc p7, p0 = r0, r0
-}
-{ .mfi
-(p0) cmp.ne.unc p14, p0 = r0, r0
-(p0) fnorm.s1 FR_X_Prime = FR_Input_X
-(p0) cmp.eq.unc p15, p0 = r0, r0 ;;
+ alloc r32 = ar.pfs,0,21,4,0
+ fclass.m p6, p0 = FR_Input_X, 0x1E3 // Test for natval, nan, inf
+ nop.i 999
}
{ .mfi
- nop.m 0
-(p0) fclass.m.unc p6, p0 = FR_Input_X, 0x1E3
- nop.i 0
+ addl GR_ad_z_1 = @ltoff(Constants_Z_1#),gp
+ fma.s1 FR_Z = FR_Input_X, f1, f1 // x+1
+ nop.i 999
}
+;;
+
{ .mfi
nop.m 999
-(p0) fclass.nm.unc p10, p0 = FR_Input_X, 0x1FF
- nop.i 0
+ fmerge.ns FR_Neg_One = f1, f1 // Form -1.0
+ nop.i 999
}
{ .mfi
nop.m 999
-(p0) fcmp.eq.unc.s1 p9, p0 = FR_Input_X, f0
- nop.i 0
+ fnorm.s1 FR_X_Prime = FR_Input_X // Normalize x
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fadd FR_Em1 = f0,f0
- nop.i 999 ;;
+ ld8 GR_ad_z_1 = [GR_ad_z_1] // Get pointer to Constants_Z_1
+ nop.f 999
+ mov GR_exp_2tom7 = 0x0fff8 // Exponent of 2^-7
}
-{ .mfi
- nop.m 999
-(p0) fadd FR_E = f0,f1
- nop.i 999 ;;
+;;
+
+{ .mfb
+ getf.sig GR_signif = FR_Z // Get significand of x+1
+ fcmp.eq.s1 p9, p0 = FR_Input_X, f0 // Test for x=0
+(p6) br.cond.spnt LOG1P_special // Branch for nan, inf, natval
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fcmp.eq.unc.s1 p8, p0 = FR_Input_X, FR_Neg_One
- nop.i 999
+ add GR_ad_tbl_1 = 0x040, GR_ad_z_1 // Point to Constants_G_H_h1
+ fcmp.lt.s1 p13, p0 = FR_X_Prime, FR_Neg_One // Test for x<-1
+ add GR_ad_p = -0x100, GR_ad_z_1 // Point to Constants_P
}
{ .mfi
- nop.m 999
-(p0) fcmp.lt.unc.s1 p13, p0 = FR_Input_X, FR_Neg_One
- nop.i 999
+ add GR_ad_z_2 = 0x140, GR_ad_z_1 // Point to Constants_Z_2
+ nop.f 999
+ add GR_ad_tbl_2 = 0x180, GR_ad_z_1 // Point to Constants_G_H_h2
}
-L(LOGL_BEGIN):
+;;
+
{ .mfi
- nop.m 999
-(p0) fadd.s1 FR_Z = FR_X_Prime, FR_E
- nop.i 999
-}
-{ .mlx
- nop.m 999
-(p0) movl GR_Table_Scale = 0x0000000000000018 ;;
+ add GR_ad_q = 0x080, GR_ad_p // Point to Constants_Q
+ fcmp.eq.s1 p8, p0 = FR_X_Prime, FR_Neg_One // Test for x=-1
+ extr.u GR_Index1 = GR_signif, 59, 4 // Get high 4 bits of signif
}
-{ .mmi
- nop.m 999
- nop.m 999
-//
-// Create E = 1 and Em1 = 0
-// Check for X == 0, meaning logl(1+0)
-// Check for X < -1, meaning logl(negative)
-// Check for X == -1, meaning logl(0)
-// Normalize x
-// Identify NatVals, NaNs, Infs.
-// Identify EM unsupporteds.
-// Identify Negative values - us S1 so as
-// not to raise denormal operand exception
-// Set p15 to true for log1p
-// Set p14 to false for log1p
-// Set p7 true for log and log1p
-//
-(p0) addl GR_Table_Base = @ltoff(Constants_Z_G_H_h1#),gp
+{ .mfb
+ add GR_ad_tbl_3 = 0x280, GR_ad_z_1 // Point to Constants_G_H_h3
+ nop.f 999
+(p9) br.ret.spnt b0 // Exit if x=0, return input
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fmax.s1 FR_AA = FR_X_Prime, FR_E
- nop.i 999 ;;
+ shladd GR_ad_z_1 = GR_Index1, 2, GR_ad_z_1 // Point to Z_1
+ fclass.nm p10, p0 = FR_Input_X, 0x1FF // Test for unsupported
+ extr.u GR_X_0 = GR_signif, 49, 15 // Get high 15 bits of significand
}
{ .mfi
- ld8 GR_Table_Base = [GR_Table_Base]
-(p0) fmin.s1 FR_BB = FR_X_Prime, FR_E
- nop.i 999
-}
-{ .mfb
- nop.m 999
-(p0) fadd.s1 FR_W = FR_X_Prime, FR_Em1
-//
-// Begin load of constants base
-// FR_Z = Z = |x| + E
-// FR_W = W = |x| + Em1
-// AA = fmax(|x|,E)
-// BB = fmin(|x|,E)
-//
-(p6) br.cond.spnt L(LOGL_64_special) ;;
+ ldfe FR_P8 = [GR_ad_p],16 // Load P_8 for near1 path
+ fsub.s1 FR_W = FR_X_Prime, f0 // W = x
+ add GR_ad_ln10 = 0x060, GR_ad_q // Point to Constants_1_by_LN10
}
-{ .mib
- nop.m 999
- nop.i 999
-(p10) br.cond.spnt L(LOGL_64_unsupported) ;;
+;;
+
+{ .mfi
+ ld4 GR_Z_1 = [GR_ad_z_1] // Load Z_1
+ fmax.s1 FR_AA = FR_X_Prime, f1 // For S_lo, form AA = max(X,1.0)
+ mov GR_exp_mask = 0x1FFFF // Create exponent mask
}
{ .mib
- nop.m 999
- nop.i 999
-(p13) br.cond.spnt L(LOGL_64_negative) ;;
+ shladd GR_ad_tbl_1 = GR_Index1, 4, GR_ad_tbl_1 // Point to G_1
+ mov GR_Bias = 0x0FFFF // Create exponent bias
+(p13) br.cond.spnt LOG1P_LT_Minus_1 // Branch if x<-1
}
-{ .mib
-(p0) getf.sig GR_signif = FR_Z
- nop.i 999
-(p9) br.cond.spnt L(LOGL_64_one) ;;
+;;
+
+{ .mfb
+ ldfps FR_G, FR_H = [GR_ad_tbl_1],8 // Load G_1, H_1
+ fmerge.se FR_S_hi = f1,FR_Z // Form |x+1|
+(p8) br.cond.spnt LOG1P_EQ_Minus_1 // Branch if x=-1
}
-{ .mib
- nop.m 999
- nop.i 999
-(p8) br.cond.spnt L(LOGL_64_zero) ;;
+;;
+
+{ .mmb
+ getf.exp GR_N = FR_Z // Get N = exponent of x+1
+ ldfd FR_h = [GR_ad_tbl_1] // Load h_1
+(p10) br.cond.spnt LOG1P_unsupported // Branch for unsupported type
}
+;;
+
{ .mfi
-(p0) getf.exp GR_N = FR_Z
-//
-// Raise possible denormal operand exception
-// Create Bias
-//
-// This function computes ln( x + e )
-// Input FR 1: FR_X = FR_Input_X
-// Input FR 2: FR_E = FR_E
-// Input FR 3: FR_Em1 = FR_Em1
-// Input GR 1: GR_Expo_Range = GR_Expo_Range = 1
-// Output FR 4: FR_Y_hi
-// Output FR 5: FR_Y_lo
-// Output FR 6: FR_Scale
-// Output PR 7: PR_Safe
-//
-(p0) fsub.s1 FR_S_lo = FR_AA, FR_Z
+ ldfe FR_log2_hi = [GR_ad_q],16 // Load log2_hi
+ fcmp.eq.s0 p8, p0 = FR_Input_X, f0 // Dummy op to flag denormals
+ pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15 // Get bits 30-15 of X_0 * Z_1
+}
+;;
+
//
-// signif = getf.sig(Z)
-// abs_W = fabs(w)
+// For performance, don't use result of pmpyshr2.u for 4 cycles.
//
-(p0) extr.u GR_Table_ptr = GR_signif, 59, 4 ;;
-}
-{ .mfi
- nop.m 999
-(p0) fmerge.se FR_S_hi = f1,FR_Z
-(p0) extr.u GR_X_0 = GR_signif, 49, 15
-}
{ .mmi
- nop.m 999
- nop.m 999
-(p0) addl GR_Table_Base1 = @ltoff(Constants_Z_G_H_h2#),gp ;;
-}
-{ .mlx
- ld8 GR_Table_Base1 = [GR_Table_Base1]
-(p0) movl GR_Bias = 0x000000000000FFFF ;;
-}
-{ .mfi
- nop.m 999
-(p0) fabs FR_abs_W = FR_W
-(p0) pmpyshr2.u GR_Table_ptr = GR_Table_ptr,GR_Table_Scale,0
-}
-{ .mfi
- nop.m 999
-//
-// Branch out for special input values
-//
-(p0) fcmp.lt.unc.s0 p8, p0 = FR_Input_X, f0
- nop.i 999 ;;
+ ldfe FR_log2_lo = [GR_ad_q],16 // Load log2_lo
+ sub GR_N = GR_N, GR_Bias
+ mov GR_exp_2tom80 = 0x0ffaf // Exponent of 2^-80
}
+;;
+
{ .mfi
- nop.m 999
-//
-// X_0 = extr.u(signif,49,15)
-// Index1 = extr.u(signif,59,4)
-//
-(p0) fadd.s1 FR_S_lo = FR_S_lo, FR_BB
- nop.i 999 ;;
+ ldfe FR_Q4 = [GR_ad_q],16 // Load Q4
+ fms.s1 FR_S_lo = FR_AA, f1, FR_Z // Form S_lo = AA - Z
+ sub GR_minus_N = GR_Bias, GR_N // Form exponent of 2^(-N)
}
-{ .mii
- nop.m 999
- nop.i 999 ;;
-//
-// Offset_to_Z1 = 24 * Index1
-// For performance, don't use result
-// for 3 or 4 cycles.
-//
-(p0) add GR_Table_ptr = GR_Table_ptr, GR_Table_Base ;;
+;;
+
+{ .mmf
+ ldfe FR_Q3 = [GR_ad_q],16 // Load Q3
+ setf.sig FR_float_N = GR_N // Put integer N into rightmost significand
+ fmin.s1 FR_BB = FR_X_Prime, f1 // For S_lo, form BB = min(X,1.0)
}
-//
-// Add Base to Offset for Z1
-// Create Bias
+;;
+
{ .mmi
-(p0) ld4 GR_Z_1 = [GR_Table_ptr],4 ;;
-(p0) ldfs FR_G = [GR_Table_ptr],4
- nop.i 999 ;;
+ getf.exp GR_M = FR_W // Get signexp of w = x
+ ldfe FR_Q2 = [GR_ad_q],16 // Load Q2
+ extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1
}
+;;
+
{ .mmi
-(p0) ldfs FR_H = [GR_Table_ptr],8 ;;
-(p0) ldfd FR_h = [GR_Table_ptr],0
-(p0) pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15
-}
-//
-// Load Z_1
-// Get Base of Table2
-//
-{ .mfi
-(p0) getf.exp GR_M = FR_abs_W
- nop.f 999
- nop.i 999 ;;
-}
-{ .mii
- nop.m 999
- nop.i 999 ;;
-//
-// M = getf.exp(abs_W)
-// S_lo = AA - Z
-// X_1 = pmpyshr2(X_0,Z_1,15)
-//
-(p0) sub GR_M = GR_M, GR_Bias ;;
-}
-//
-// M = M - Bias
-// Load G1
-// N = getf.exp(Z)
-//
-{ .mii
-(p0) cmp.gt.unc p11, p0 = -80, GR_M
-(p0) cmp.gt.unc p12, p0 = -7, GR_M ;;
-(p0) extr.u GR_Index2 = GR_X_1, 6, 4 ;;
-}
-{ .mib
- nop.m 999
-//
-// if -80 > M, set p11
-// Index2 = extr.u(X_1,6,4)
-// if -7 > M, set p12
-// Load H1
-//
-(p0) pmpyshr2.u GR_Index2 = GR_Index2,GR_Table_Scale,0
-(p11) br.cond.spnt L(log1pl_small) ;;
-}
-{ .mib
- nop.m 999
- nop.i 999
-(p12) br.cond.spnt L(log1pl_near) ;;
-}
-{ .mii
-(p0) sub GR_N = GR_N, GR_Bias
-//
-// poly_lo = r * poly_lo
-//
-(p0) add GR_Perturb = 0x1, r0 ;;
-(p0) sub GR_ScaleN = GR_Bias, GR_N
-}
-{ .mii
-(p0) setf.sig FR_float_N = GR_N
- nop.i 999 ;;
-//
-// Prepare Index2 - pmpyshr2.u(X_1,Z_2,15)
-// Load h1
-// S_lo = S_lo + BB
-// Branch for -80 > M
-//
-(p0) add GR_Index2 = GR_Index2, GR_Table_Base1
+ ldfe FR_Q1 = [GR_ad_q] // Load Q1
+ shladd GR_ad_z_2 = GR_Index2, 2, GR_ad_z_2 // Point to Z_2
+ add GR_ad_p2 = 0x30,GR_ad_p // Point to P_4
}
+;;
+
{ .mmi
-(p0) setf.exp FR_two_negN = GR_ScaleN
- nop.m 999
-(p0) addl GR_Table_Base = @ltoff(Constants_Z_G_H_h3#),gp ;;
+ ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2
+ shladd GR_ad_tbl_2 = GR_Index2, 4, GR_ad_tbl_2 // Point to G_2
+ and GR_M = GR_exp_mask, GR_M // Get exponent of w = x
}
-//
-// Index2 points to Z2
-// Branch for -7 > M
-//
-{ .mmb
-(p0) ld4 GR_Z_2 = [GR_Index2],4
-(p0) ld8 GR_Table_Base = [GR_Table_Base]
- nop.b 999 ;;
-}
-(p0) nop.i 999
-//
-// Load Z_2
-// N = N - Bias
-// Tablebase points to Table3
-//
+;;
+
{ .mmi
-(p0) ldfs FR_G_tmp = [GR_Index2],4 ;;
-//
-// Load G_2
-// pmpyshr2 X_2= (X_1,Z_2,15)
-// float_N = setf.sig(N)
-// ScaleN = Bias - N
-//
-(p0) ldfs FR_H_tmp = [GR_Index2],8
- nop.i 999 ;;
+ ldfps FR_G2, FR_H2 = [GR_ad_tbl_2],8 // Load G_2, H_2
+ cmp.lt p8, p9 = GR_M, GR_exp_2tom7 // Test |x| < 2^-7
+ cmp.lt p7, p0 = GR_M, GR_exp_2tom80 // Test |x| < 2^-80
}
-//
-// Load H_2
-// two_negN = setf.exp(scaleN)
-// G = G_1 * G_2
-//
+;;
+
+// Small path is separate code
+// p7 is for the small path: |x| < 2^-80
+// near1 and regular paths are merged.
+// p8 is for the near1 path: |x| < 2^-7
+// p9 is for regular path: |x| >= 2^-7
+
{ .mfi
-(p0) ldfd FR_h_tmp = [GR_Index2],0
- nop.f 999
-(p0) pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 ;;
+ ldfd FR_h2 = [GR_ad_tbl_2] // Load h_2
+ nop.f 999
+ nop.i 999
}
-{ .mii
- nop.m 999
-(p0) extr.u GR_Index3 = GR_X_2, 1, 5 ;;
-//
-// Load h_2
-// H = H_1 + H_2
-// h = h_1 + h_2
-// Index3 = extr.u(X_2,1,5)
-//
-(p0) shladd GR_Index3 = GR_Index3,4,GR_Table_Base
+{ .mfb
+(p9) setf.exp FR_2_to_minus_N = GR_minus_N // Form 2^(-N)
+(p7) fnma.s0 f8 = FR_X_Prime, FR_X_Prime, FR_X_Prime // Result x - x*x
+(p7) br.ret.spnt b0 // Branch if |x| < 2^-80
}
+;;
+
{ .mmi
- nop.m 999
- nop.m 999
-//
-// float_N = fcvt.xf(float_N)
-// load G3
-//
-(p0) addl GR_Table_Base = @ltoff(Constants_Q#),gp ;;
+(p8) ldfe FR_P7 = [GR_ad_p],16 // Load P_7 for near1 path
+(p8) ldfe FR_P4 = [GR_ad_p2],16 // Load P_4 for near1 path
+(p9) pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 // Get bits 30-15 of X_1 * Z_2
}
-{ .mmi
- nop.m 999
- ld8 GR_Table_Base = [GR_Table_Base]
- nop.i 999
-};;
+;;
-{ .mfi
-(p0) ldfe FR_log2_hi = [GR_Table_Base],16
-(p0) fmpy.s1 FR_S_lo = FR_S_lo, FR_two_negN
- nop.i 999 ;;
-}
-{ .mmf
- nop.m 999
//
-// G = G3 * G
-// Load h3
-// Load log2_hi
-// H = H + H3
+// For performance, don't use result of pmpyshr2.u for 4 cycles.
//
-(p0) ldfe FR_log2_lo = [GR_Table_Base],16
-(p0) fmpy.s1 FR_G = FR_G, FR_G_tmp ;;
-}
{ .mmf
-(p0) ldfs FR_G_tmp = [GR_Index3],4
-//
-// h = h + h3
-// r = G * S_hi + 1
-// Load log2_lo
-//
-(p0) ldfe FR_Q4 = [GR_Table_Base],16
-(p0) fadd.s1 FR_h = FR_h, FR_h_tmp ;;
-}
-{ .mfi
-(p0) ldfe FR_Q3 = [GR_Table_Base],16
-(p0) fadd.s1 FR_H = FR_H, FR_H_tmp
- nop.i 999 ;;
+(p8) ldfe FR_P6 = [GR_ad_p],16 // Load P_6 for near1 path
+(p8) ldfe FR_P3 = [GR_ad_p2],16 // Load P_3 for near1 path
+(p9) fma.s1 FR_S_lo = FR_S_lo, f1, FR_BB // S_lo = S_lo + BB
}
+;;
+
{ .mmf
-(p0) ldfs FR_H_tmp = [GR_Index3],4
-(p0) ldfe FR_Q2 = [GR_Table_Base],16
-//
-// Comput Index for Table3
-// S_lo = S_lo * two_negN
-//
-(p0) fcvt.xf FR_float_N = FR_float_N ;;
+(p8) ldfe FR_P5 = [GR_ad_p],16 // Load P_5 for near1 path
+(p8) ldfe FR_P2 = [GR_ad_p2],16 // Load P_2 for near1 path
+(p8) fmpy.s1 FR_wsq = FR_W, FR_W // wsq = w * w for near1 path
}
-//
-// If S_lo == 0, set p8 false
-// Load H3
-// Load ptr to table of polynomial coeff.
-//
-{ .mmf
-(p0) ldfd FR_h_tmp = [GR_Index3],0
-(p0) ldfe FR_Q1 = [GR_Table_Base],0
-(p0) fcmp.eq.unc.s1 p0, p8 = FR_S_lo, f0 ;;
+;;
+
+{ .mmi
+(p8) ldfe FR_P1 = [GR_ad_p2],16 ;; // Load P_1 for near1 path
+ nop.m 999
+(p9) extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fmpy.s1 FR_G = FR_G, FR_G_tmp
- nop.i 999 ;;
+(p9) shladd GR_ad_tbl_3 = GR_Index3, 4, GR_ad_tbl_3 // Point to G_3
+(p9) fcvt.xf FR_float_N = FR_float_N
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fadd.s1 FR_H = FR_H, FR_H_tmp
- nop.i 999 ;;
+(p9) ldfps FR_G3, FR_H3 = [GR_ad_tbl_3],8 // Load G_3, H_3
+ nop.f 999
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fms.s1 FR_r = FR_G, FR_S_hi, f1
- nop.i 999
+(p9) ldfd FR_h3 = [GR_ad_tbl_3] // Load h_3
+(p9) fmpy.s1 FR_G = FR_G, FR_G2 // G = G_1 * G_2
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fadd.s1 FR_h = FR_h, FR_h_tmp
- nop.i 999 ;;
+ nop.m 999
+(p9) fadd.s1 FR_H = FR_H, FR_H2 // H = H_1 + H_2
+ nop.i 999
}
-{ .mfi
- nop.m 999
-(p0) fma.s1 FR_Y_hi = FR_float_N, FR_log2_hi, FR_H
- nop.i 999 ;;
+;;
+
+{ .mmf
+ nop.m 999
+ nop.m 999
+(p9) fadd.s1 FR_h = FR_h, FR_h2 // h = h_1 + h_2
}
+;;
+
{ .mfi
- nop.m 999
-//
-// Load Q4
-// Load Q3
-// Load Q2
-// Load Q1
-//
-(p8) fma.s1 FR_r = FR_G, FR_S_lo, FR_r
- nop.i 999
+ nop.m 999
+(p8) fmpy.s1 FR_w4 = FR_wsq, FR_wsq // w4 = w^4 for near1 path
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// poly_lo = r * Q4 + Q3
-// rsq = r* r
-//
-(p0) fma.s1 FR_h = FR_float_N, FR_log2_lo, FR_h
- nop.i 999 ;;
+ nop.m 999
+(p8) fma.s1 FR_p87 = FR_W, FR_P8, FR_P7 // p87 = w * P8 + P7
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// If (S_lo!=0) r = s_lo * G + r
-//
-(p0) fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3
- nop.i 999
+ nop.m 999
+(p9) fma.s1 FR_S_lo = FR_S_lo, FR_2_to_minus_N, f0 // S_lo = S_lo * 2^(-N)
+ nop.i 999
}
-//
-// Create a 0x00000....01
-// poly_lo = poly_lo * rsq + h
-//
{ .mfi
-(p0) setf.sig FR_dummy = GR_Perturb
-(p0) fmpy.s1 FR_rsq = FR_r, FR_r
- nop.i 999 ;;
+ nop.m 999
+(p8) fma.s1 FR_p43 = FR_W, FR_P4, FR_P3 // p43 = w * P4 + P3
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// h = N * log2_lo + h
-// Y_hi = n * log2_hi + H
-//
-(p0) fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2
- nop.i 999
+ nop.m 999
+(p9) fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2) * G_3
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r
- nop.i 999 ;;
+ nop.m 999
+(p9) fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2) + H_3
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// poly_lo = r * poly_o + Q2
-// poly_hi = Q1 * rsq + r
-//
-(p0) fmpy.s1 FR_poly_lo = FR_poly_lo, FR_r
- nop.i 999 ;;
+ nop.m 999
+(p9) fadd.s1 FR_h = FR_h, FR_h3 // h = (h_1 + h_2) + h_3
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fma.s1 FR_poly_lo = FR_poly_lo, FR_rsq, FR_h
- nop.i 999 ;;
-}
-{ .mfb
- nop.m 999
-(p0) fadd.s1 FR_Y_lo = FR_poly_hi, FR_poly_lo
-//
-// Create the FR for a binary "or"
-// Y_lo = poly_hi + poly_lo
-//
-// (p0) for FR_dummy = FR_Y_lo,FR_dummy ;;
-//
-// Turn the lsb of Y_lo ON
-//
-// (p0) fmerge.se FR_Y_lo = FR_Y_lo,FR_dummy ;;
-//
-// Merge the new lsb into Y_lo, for alone doesn't
-//
-(p0) br.cond.sptk LOGL_main ;;
-}
-L(log1pl_near):
-{ .mmi
- nop.m 999
- nop.m 999
-// /*******************************************************/
-// /*********** Branch log1pl_near ************************/
-// /*******************************************************/
-(p0) addl GR_Table_Base = @ltoff(Constants_P#),gp ;;
-}
-{ .mmi
nop.m 999
- ld8 GR_Table_Base = [GR_Table_Base]
+(p8) fmpy.s1 FR_w6 = FR_w4, FR_wsq // w6 = w^6 for near1 path
nop.i 999
-};;
-//
-// Load base address of poly. coeff.
-//
-{ .mmb
-(p0) add GR_Table_ptr = 0x40,GR_Table_Base
-//
-// Address tables with separate pointers
-//
-(p0) ldfe FR_P8 = [GR_Table_Base],16
- nop.b 999 ;;
-}
-{ .mmb
-(p0) ldfe FR_P4 = [GR_Table_ptr],16
-//
-// Load P4
-// Load P8
-//
-(p0) ldfe FR_P7 = [GR_Table_Base],16
- nop.b 999 ;;
-}
-{ .mmf
-(p0) ldfe FR_P3 = [GR_Table_ptr],16
-//
-// Load P3
-// Load P7
-//
-(p0) ldfe FR_P6 = [GR_Table_Base],16
-(p0) fmpy.s1 FR_wsq = FR_W, FR_W ;;
}
+;;
+
{ .mfi
-(p0) ldfe FR_P2 = [GR_Table_ptr],16
- nop.f 999
- nop.i 999 ;;
+ nop.m 999
+(p8) fma.s1 FR_p432 = FR_W, FR_p43, FR_P2 // p432 = w * p43 + P2
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fma.s1 FR_Y_hi = FR_W, FR_P4, FR_P3
- nop.i 999
+ nop.m 999
+(p8) fma.s1 FR_p876 = FR_W, FR_p87, FR_P6 // p876 = w * p87 + P6
+ nop.i 999
}
-//
-// Load P2
-// Load P6
-// Wsq = w * w
-// Y_hi = p4 * w + p3
-//
+;;
+
{ .mfi
-(p0) ldfe FR_P5 = [GR_Table_Base],16
-(p0) fma.s1 FR_Y_lo = FR_W, FR_P8, FR_P7
- nop.i 999 ;;
+ nop.m 999
+(p9) fms.s1 FR_r = FR_G, FR_S_hi, f1 // r = G * S_hi - 1
+ nop.i 999
}
{ .mfi
-(p0) ldfe FR_P1 = [GR_Table_ptr],16
-//
-// Load P1
-// Load P5
-// Y_lo = p8 * w + P7
-//
-(p0) fmpy.s1 FR_w4 = FR_wsq, FR_wsq
- nop.i 999 ;;
+ nop.m 999
+(p9) fma.s1 FR_Y_hi = FR_float_N, FR_log2_hi, FR_H // Y_hi = N * log2_hi + H
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fma.s1 FR_Y_hi = FR_W, FR_Y_hi, FR_P2
- nop.i 999
+ nop.m 999
+(p9) fma.s1 FR_h = FR_float_N, FR_log2_lo, FR_h // h = N * log2_lo + h
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fma.s1 FR_Y_lo = FR_W, FR_Y_lo, FR_P6
-(p0) add GR_Perturb = 0x1, r0 ;;
+ nop.m 999
+(p9) fma.s1 FR_r = FR_G, FR_S_lo, FR_r // r = G * S_lo + (G * S_hi - 1)
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// w4 = w2 * w2
-// Y_hi = y_hi * w + p2
-// Y_lo = y_lo * w + p6
-// Create perturbation bit
-//
-(p0) fmpy.s1 FR_w6 = FR_w4, FR_wsq
- nop.i 999 ;;
+ nop.m 999
+(p8) fma.s1 FR_p4321 = FR_W, FR_p432, FR_P1 // p4321 = w * p432 + P1
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fma.s1 FR_Y_hi = FR_W, FR_Y_hi, FR_P1
- nop.i 999
+ nop.m 999
+(p8) fma.s1 FR_p8765 = FR_W, FR_p876, FR_P5 // p8765 = w * p876 + P5
+ nop.i 999
}
-//
-// Y_hi = y_hi * w + p1
-// w6 = w4 * w2
-//
+;;
+
{ .mfi
-(p0) setf.sig FR_Q4 = GR_Perturb
-(p0) fma.s1 FR_Y_lo = FR_W, FR_Y_lo, FR_P5
- nop.i 999 ;;
+ nop.m 999
+(p9) fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3 // poly_lo = r * Q4 + Q3
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fma.s1 FR_dummy = FR_wsq,FR_Y_hi, f0
- nop.i 999
+ nop.m 999
+(p9) fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fma.s1 FR_Y_hi = FR_W,f1,f0
- nop.i 999
-};;
-{ .mfb
- nop.m 999
-//
-// Y_hi = w
-// Y_lo = y_lo * w + p5
-//
-(p0) fma.s1 FR_Y_lo = FR_w6, FR_Y_lo,FR_dummy
-//
-// Y_lo = y_lo * w6 + y_high order part.
-//
-// performance
-//
-(p0) br.cond.sptk LOGL_main ;;
-}
-L(log1pl_small):
-{ .mmi
- nop.m 999
-// /*******************************************************/
-// /*********** Branch log1pl_small ***********************/
-// /*******************************************************/
-(p0) addl GR_Table_Base = @ltoff(Constants_Threshold#),gp
+ nop.m 999
+(p8) fma.s1 FR_Y_lo = FR_wsq, FR_p4321, f0 // Y_lo = wsq * p4321
+ nop.i 999
}
{ .mfi
nop.m 999
-(p0) mov FR_Em1 = FR_W
-(p0) cmp.eq.unc p7, p0 = r0, r0 ;;
-}
-{ .mlx
- ld8 GR_Table_Base = [GR_Table_Base]
-(p0) movl GR_Expo_Range = 0x0000000000000004 ;;
-}
-//
-// Set Safe to true
-// Set Expo_Range = 0 for single
-// Set Expo_Range = 2 for double
-// Set Expo_Range = 4 for double-extended
-//
-{ .mmi
-(p0) shladd GR_Table_Base = GR_Expo_Range,4,GR_Table_Base ;;
-(p0) ldfe FR_Threshold = [GR_Table_Base],16
- nop.i 999
-}
-{ .mlx
- nop.m 999
-(p0) movl GR_Bias = 0x000000000000FF9B ;;
+(p8) fma.s1 FR_Y_hi = FR_W, f1, f0 // Y_hi = w for near1 path
+ nop.i 999
}
+;;
+
{ .mfi
-(p0) ldfe FR_Tiny = [GR_Table_Base],0
- nop.f 999
- nop.i 999 ;;
+ nop.m 999
+(p9) fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2 // poly_lo = poly_lo * r + Q2
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fcmp.gt.unc.s1 p13, p12 = FR_abs_W, FR_Threshold
- nop.i 999 ;;
+ nop.m 999
+(p9) fma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p13) fnmpy.s1 FR_Y_lo = FR_W, FR_W
- nop.i 999
+ nop.m 999
+(p8) fma.s1 FR_Y_lo = FR_w6, FR_p8765,FR_Y_lo // Y_lo = w6 * p8765 + w2 * p4321
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p13) fadd FR_SCALE = f0, f1
- nop.i 999 ;;
+ nop.m 999
+(p9) fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r // poly_hi = Q1 * rsq + r
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p12) fsub.s1 FR_Y_lo = f0, FR_Tiny
-(p12) cmp.ne.unc p7, p0 = r0, r0
+ nop.m 999
+(p9) fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h // poly_lo = poly_lo*r^3 + h
+ nop.i 999
}
+;;
+
{ .mfi
-(p12) setf.exp FR_SCALE = GR_Bias
- nop.f 999
- nop.i 999 ;;
-}
-{ .mfb
- nop.m 999
-//
-// Set p7 to SAFE = FALSE
-// Set Scale = 2^-100
-//
-(p0) fma.s0 f8 = FR_Y_lo,FR_SCALE,FR_Y_hi
-(p0) br.ret.sptk b0 ;;
+ nop.m 999
+(p9) fadd.s1 FR_Y_lo = FR_poly_hi, FR_poly_lo // Y_lo = poly_hi + poly_lo
+ nop.i 999
}
-L(LOGL_64_one):
+;;
+
+// Remainder of code is common for near1 and regular paths
{ .mfb
- nop.m 999
-(p0) fmpy.s0 f8 = FR_Input_X, f0
-(p0) br.ret.sptk b0 ;;
+ nop.m 999
+ fadd.s0 f8 = FR_Y_lo,FR_Y_hi // Result=Y_lo+Y_hi
+ br.ret.sptk b0 // Common exit for 2^-80 < x < inf
}
-//
-// Raise divide by zero for +/-0 input.
-//
-L(LOGL_64_zero):
-{ .mfi
-(p0) mov GR_Parameter_TAG = 0
+;;
+
+
+// Here if x=-1
+LOG1P_EQ_Minus_1:
//
-// If we have logl(1), log10l(1) or log1pl(0), return 0.
+// If x=-1 raise divide by zero and return -inf
//
-(p0) fsub.s0 FR_Output_X_tmp = f0, f1
- nop.i 999 ;;
-}
-{ .mii
-(p14) mov GR_Parameter_TAG = 6
- nop.i 999 ;;
-(p15) mov GR_Parameter_TAG = 138 ;;
-}
-{ .mfb
- nop.m 999
-(p0) frcpa.s0 FR_Output_X_tmp, p8 = FR_Output_X_tmp, f0
-(p0) br.cond.sptk __libm_error_region ;;
+{ .mfi
+ mov GR_Parameter_TAG = 138
+ fsub.s1 FR_Output_X_tmp = f0, f1
+ nop.i 999
}
+;;
+
{ .mfb
- nop.m 999
-//
-// Report that logl(0) computed
-// { .mfb
-(p0) mov FR_Input_X = FR_Output_X_tmp
-(p0) br.ret.sptk b0 ;;
+ nop.m 999
+ frcpa.s0 FR_Output_X_tmp, p8 = FR_Output_X_tmp, f0
+ br.cond.sptk __libm_error_region
}
+;;
-L(LOGL_64_special):
+LOG1P_special:
{ .mfi
- nop.m 999
-//
-// Return -Inf or value from handler.
-//
-(p0) fclass.m.unc p7, p0 = FR_Input_X, 0x1E1
- nop.i 999 ;;
+ nop.m 999
+ fclass.m.unc p8, p0 = FR_Input_X, 0x1E1 // Test for natval, nan, +inf
+ nop.i 999
}
-{ .mfb
- nop.m 999
-//
-// Check for Natval, QNan, SNaN, +Inf
-//
-(p7) fmpy.s0 f8 = FR_Input_X, f1
+;;
+
//
// For SNaN raise invalid and return QNaN.
// For QNaN raise invalid and return QNaN.
// For +Inf return +Inf.
//
-(p7) br.ret.sptk b0 ;;
+{ .mfb
+ nop.m 999
+(p8) fmpy.s0 f8 = FR_Input_X, f1
+(p8) br.ret.sptk b0 // Return for natval, nan, +inf
}
+;;
+
//
// For -Inf raise invalid and return QNaN.
//
-{ .mii
-(p0) mov GR_Parameter_TAG = 1
- nop.i 999 ;;
-(p14) mov GR_Parameter_TAG = 7 ;;
-}
-{ .mfi
-(p15) mov GR_Parameter_TAG = 139
- nop.f 999
- nop.i 999 ;;
-}
{ .mfb
- nop.m 999
-(p0) fmpy.s0 FR_Output_X_tmp = FR_Input_X, f0
-(p0) br.cond.sptk __libm_error_region ;;
+ mov GR_Parameter_TAG = 139
+ fmpy.s0 FR_Output_X_tmp = FR_Input_X, f0
+ br.cond.sptk __libm_error_region
}
-//
-// Report that logl(-Inf) computed
-// Report that log10l(-Inf) computed
-// Report that log1p(-Inf) computed
-//
-{ .mfb
- nop.m 0
-(p0) mov FR_Input_X = FR_Output_X_tmp
-(p0) br.ret.sptk b0 ;;
-}
-L(LOGL_64_unsupported):
-{ .mfb
- nop.m 999
+;;
+
+
+LOG1P_unsupported:
//
-// Return generated NaN or other value .
+// Return generated NaN or other value.
//
-(p0) fmpy.s0 f8 = FR_Input_X, f0
-(p0) br.ret.sptk b0 ;;
+{ .mfb
+ nop.m 999
+ fmpy.s0 f8 = FR_Input_X, f0
+ br.ret.sptk b0
}
-L(LOGL_64_negative):
-{ .mfi
- nop.m 999
-//
-// Deal with x < 0 in a special way
-//
-(p0) frcpa.s0 FR_Output_X_tmp, p8 = f0, f0
+;;
+
+// Here if -inf < x < -1
+LOG1P_LT_Minus_1:
//
-// Deal with x < 0 in a special way - raise
+// Deal with x < -1 in a special way - raise
// invalid and produce QNaN indefinite.
//
-(p0) mov GR_Parameter_TAG = 1 ;;
-}
-{ .mii
-(p14) mov GR_Parameter_TAG = 7
- nop.i 999 ;;
-(p15) mov GR_Parameter_TAG = 139
+{ .mfb
+ mov GR_Parameter_TAG = 139
+ frcpa.s0 FR_Output_X_tmp, p8 = f0, f0
+ br.cond.sptk __libm_error_region
}
-.endp log1pl
-ASM_SIZE_DIRECTIVE(log1pl)
+;;
+
-.proc __libm_error_region
-__libm_error_region:
+GLOBAL_IEEE754_END(log1pl)
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
@@ -1609,8 +1177,8 @@ __libm_error_region:
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
{ .mmi
- nop.m 0
- nop.m 0
+ nop.m 999
+ nop.m 999
add GR_Parameter_RESULT = 48,sp
};;
{ .mmi
@@ -1625,52 +1193,7 @@ __libm_error_region:
br.ret.sptk b0 // Return
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
-
-.proc LOGL_main
-LOGL_main:
-{ .mfi
- nop.m 999
-//
-// kernel_log_64 computes ln(X + E)
-//
-(p7) fadd.s0 FR_Input_X = FR_Y_lo,FR_Y_hi
- nop.i 0
-}
-{ .mmi
- nop.m 999
- nop.m 999
-(p14) addl GR_Table_Base = @ltoff(Constants_1_by_LN10#),gp ;;
-}
-{ .mmi
- nop.m 999
-(p14) ld8 GR_Table_Base = [GR_Table_Base]
- nop.i 999
-};;
-
-{ .mmi
-(p14) ldfe FR_1LN10_hi = [GR_Table_Base],16 ;;
-(p14) ldfe FR_1LN10_lo = [GR_Table_Base]
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-(p14) fmpy.s1 FR_Output_X_tmp = FR_Y_lo,FR_1LN10_hi
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-(p14) fma.s1 FR_Output_X_tmp = FR_Y_hi,FR_1LN10_lo,FR_Output_X_tmp
- nop.i 999 ;;
-}
-{ .mfb
- nop.m 999
-(p14) fma.s0 FR_Input_X = FR_Y_hi,FR_1LN10_hi,FR_Output_X_tmp
-(p0) br.ret.sptk b0 ;;
-}
-.endp LOGL_main
-ASM_SIZE_DIRECTIVE(LOGL_main)
+LOCAL_LIBM_END(__libm_error_region#)
.type __libm_error_support#,@function
.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_logb.S b/sysdeps/ia64/fpu/s_logb.S
index 76c4fe778e..dfe581a826 100644
--- a/sysdeps/ia64/fpu/s_logb.S
+++ b/sysdeps/ia64/fpu/s_logb.S
@@ -1,10 +1,10 @@
.file "logb.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,41 +20,43 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00 Initial version
-// 2/16/00 Modified to conform to C9X
-// 3/16/00 Improved speed
-// 4/04/00 Unwind support added
-// 5/30/00 Fixed bug when x double-extended denormal
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 02/02/00 Initial version
+// 02/16/00 Modified to conform to C9X
+// 03/16/00 Improved speed
+// 04/04/00 Unwind support added
+// 05/30/00 Fixed bug when x double-extended denormal
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 01/20/03 Improved performance
//
// API
//==============================================================
-// double logb( double x);
+// double logb( double x );
//
// Overview of operation
//==============================================================
-// The logb function extracts the exponent of x as an integer in
-// floating-point format.
+// The logb function extracts the exponent of x as an integer in
+// floating-point format.
// logb computes log2 of x as a double
//
// logb is similar to ilogb but differs in the following ways:
@@ -71,217 +73,168 @@
//
// Registers used
//==============================================================
-// general registers used:
-// ar.pfs r32
-// r33 -> r37
-// r38 -> r41 used as parameters to error path
+// general registers used:
+// r26 -> r38
+// r35 -> r38 used as parameters to error path
//
-// predicate registers used:
+// predicate registers used:
// p6, p7, p8
-// floating-point registers used:
+// floating-point registers used:
// f9, f10, f11
// f8, input
-#include "libm_support.h"
+rExpBias = r26
+rExpMask = r27
+rSignexp_x = r28
+rExp_x = r29
+rTrueExp_x = r30
+rExp_2to64 = r31
-GR_SAVE_B0 = r34
-GR_SAVE_GP = r35
GR_SAVE_PFS = r32
+GR_SAVE_B0 = r33
+GR_SAVE_GP = r34
-GR_Parameter_X = r38
-GR_Parameter_Y = r39
-GR_Parameter_RESULT = r40
+GR_Parameter_X = r35
+GR_Parameter_Y = r36
+GR_Parameter_RESULT = r37
+GR_Parameter_TAG = r38
-.align 32
-.global logb#
+fExp_in_signif = f9
+fNorm_x = f10
+fFloat_Exp = f10
+f2to64 = f11
.section .text
-.proc logb#
-.align 32
+GLOBAL_LIBM_ENTRY(logb)
-
-logb:
-
-// qnan snan inf norm unorm 0 -+
-// 0 0 0 0 1 0 11
-// 0 b
-{ .mfi
- alloc r32=ar.pfs,1,5,4,0
-(p0) fclass.m.unc p8,p0 = f8, 0x0b
- nop.i 999
-}
// X NORMAL
-// r37 = exp(f8) - - 0xffff
-// sig(f8) = r37
+// TrueExp_x = exp(f8) - 0xffff
+// sig = TrueExp_x
// f8 = convert_to_fp (sig))
{ .mfi
-(p0) getf.exp r35 = f8
-(p0) fnorm f10=f8
- nop.i 999 ;;
+ getf.exp rSignexp_x = f8
+ fclass.m p8,p0 = f8, 0x0b // Test for x unorm
+ mov rExpBias = 0xffff // Exponent bias
}
-
-// qnan snan inf norm unorm 0 -+
-// 1 1 1 0 0 0 11
-// e 3
-{ .mmf
-(p0) mov r33 = 0xffff
-(p0) mov r34 = 0x1ffff
-(p0) fclass.m.unc p6,p0 = f8, 0xe3 ;;
+{ .mfi
+ nop.m 0
+ fnorm.s1 fNorm_x = f8
+ mov rExpMask = 0x1ffff // Exponent mask
}
+;;
+// Form signexp of 2^64 in case need to scale denormal
{ .mfb
-(p0) and r36 = r35, r34
-(p0) fclass.m.unc p7,p0 = f8, 0x07
-(p8) br.cond.spnt L(LOGB_DENORM) ;;
+ mov rExp_2to64 = 0x1003f
+ fclass.m p6,p0 = f8, 0x1e3 // Test x natval, nan, inf
+(p8) br.cond.spnt LOGB_DENORM // Branch if x unorm
}
+;;
-{ .mib
-(p0) sub r37 = r36, r33
- nop.i 999
-(p6) br.cond.spnt L(LOGB_NAN_INF) ;;
+LOGB_COMMON:
+// Return here from LOGB_DENORM
+{ .mfi
+ and rExp_x = rSignexp_x, rExpMask // Get biased exponent
+ fclass.m p7,p0 = f8, 0x07 // Test x zero
+ nop.i 0
}
+;;
-{ .mib
-(p0) setf.sig f9 = r37
- nop.i 999
-(p7) br.cond.spnt L(LOGB_ZERO) ;;
+// X NAN or INFINITY, return f8 * f8
+{ .mfb
+ sub rTrueExp_x = rExp_x, rExpBias // Get true exponent
+(p6) fma.d.s0 f8= f8,f8,f0 // Result if x natval, nan, inf
+(p6) br.ret.spnt b0 // Exit if x natval, nan, inf
}
+;;
-{ .mfi
- nop.m 999
-(p0) fcvt.xf f10 = f9
- nop.i 999 ;;
+{ .mib
+ setf.sig fExp_in_signif = rTrueExp_x // Exponent as integer in fp
+ nop.i 999
+(p7) br.cond.spnt LOGB_ZERO
}
+;;
+// Result can be represented in less than 24 bits, so no precision completer
+// is needed.
{ .mfb
- nop.m 999
-(p0) fnorm.d f8 = f10
-(p0) br.ret.sptk b0 ;;
+ nop.m 0
+ fcvt.xf f8 = fExp_in_signif
+ br.ret.sptk b0 // Exit main path, 0 < |x| < inf
}
+;;
-L(LOGB_DENORM):
-// Form signexp of 2^64 in case need to scale denormal
+LOGB_DENORM:
+// Form 2^64 in case need to scale denormal
// Check to see if double-extended denormal
{ .mfi
-(p0) mov r38 = 0x1003f
-(p0) fclass.m.unc p8,p0 = f10, 0x0b
- nop.i 999 ;;
+ setf.exp f2to64 = rExp_2to64
+ fclass.m p8,p0 = fNorm_x, 0x0b
+ nop.i 0
}
+;;
-// Form 2^64 in case need to scale denormal
{ .mfi
-(p0) setf.exp f11 = r38
- nop.f 999
- nop.i 999 ;;
+ nop.m 0
+ fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
+ nop.i 0
}
+;;
// If double-extended denormal add 64 to exponent bias for scaling
// If double-extended denormal form x * 2^64 which is normal
{ .mfi
-(p8) add r33 = 64, r33
-(p8) fmpy f10 = f10, f11
- nop.i 999 ;;
+(p8) add rExpBias = 64, rExpBias
+(p8) fmpy.s1 fNorm_x = fNorm_x, f2to64
+ nop.i 0
}
+;;
// Logic is the same as normal path but use normalized input
-{ .mmi
-(p0) getf.exp r35 = f10 ;;
- nop.m 999
- nop.i 999 ;;
-}
-
-{ .mmi
-(p0) and r36 = r35, r34 ;;
-(p0) sub r37 = r36, r33
- nop.i 999 ;;
-}
-
-{ .mmi
-(p0) setf.sig f9 = r37
- nop.m 999
- nop.i 999 ;;
+{ .mib
+ getf.exp rSignexp_x = fNorm_x
+ nop.i 0
+ br.cond.sptk LOGB_COMMON // Return to main path
}
+;;
-{ .mfi
- nop.m 999
-(p0) fcvt.xf f10 = f9
- nop.i 999 ;;
-}
+LOGB_ZERO:
+// Here if x zero
+// f10 = -|f8|
+// f9 = 1.0/f10 = -1.0/|f8| = -inf
-{ .mfb
- nop.m 999
-(p0) fnorm.d f8 = f10
-(p0) br.ret.sptk b0 ;;
+{ .mmf
+ alloc r32=ar.pfs,1,2,4,0
+ mov GR_Parameter_TAG = 151 // Error code
+ fmerge.ns f10 = f0,f8
}
+;;
-L(LOGB_NAN_INF):
-
-// X NAN or INFINITY, return f8 * f8
{ .mfb
- nop.m 999
-(p0) fma.d f8= f8,f8,f0
-(p0) br.ret.sptk b0 ;;
-}
-
-.endp logb#
-ASM_SIZE_DIRECTIVE(logb)
-
-// Stack operations when calling error support.
-// (1) (2) (3) (call) (4)
-// sp -> + psp -> + psp -> + sp -> +
-// | | | |
-// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
-// | | | |
-// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
-// | | | |
-// | | <- GR_X X1 ->| |
-// | | | |
-// sp-64 -> + sp -> + sp -> + +
-// save ar.pfs save b0 restore gp
-// save gp restore ar.pfs
-
-
-
-.proc __libm_error_region
-__libm_error_region:
-L(LOGB_ZERO):
-.prologue
-
-// f9 = |f8|
-// f10 = -f9 = -|f8|
-// f9 = 1.0/f10 = -1.0/-|f8|
-
-{ .mfi
- mov r41 = 151 // Error code
-(p0) fmerge.s f9 = f0,f8
- nop.i 999
+ nop.m 0
+ frcpa.s0 f9,p6 = f1,f10 // Produce -inf, Z flag
+ br.cond.sptk __libm_error_region // Call error support
}
;;
+GLOBAL_LIBM_END(logb)
-{ .mfi
- nop.m 999
- fmerge.ns f10 = f0,f9
- nop.i 999
-}
-;;
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
-// (1)
{ .mfi
- add GR_Parameter_Y=-32,sp // Parameter 2 value
- frcpa f9,p6 = f1,f10
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
- add sp=-64,sp // Create new stack
+ add sp=-64,sp // Create new stack
nop.f 0
- mov GR_SAVE_GP=gp // Save gp
+ mov GR_SAVE_GP=gp // Save gp
};;
-
-// (2)
{ .mmi
stfd [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
@@ -290,38 +243,38 @@ L(LOGB_ZERO):
};;
.body
-// (3)
{ .mib
stfd [GR_Parameter_X] = f8 // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
- nop.b 0
+ nop.b 0
}
{ .mib
stfd [GR_Parameter_Y] = f9 // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
+
{ .mmi
- nop.m 0
- nop.m 0
add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
};;
-// (4)
{ .mmi
ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
+
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region)
+
.type __libm_error_support#,@function
.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_logbf.S b/sysdeps/ia64/fpu/s_logbf.S
index f2f671f892..1d605cd97c 100644
--- a/sysdeps/ia64/fpu/s_logbf.S
+++ b/sysdeps/ia64/fpu/s_logbf.S
@@ -1,10 +1,10 @@
.file "logbf.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,44 +20,46 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00 Initial version
-// 2/16/00 Modified to conform to C9X
-// 3/16/00 Improved speed
-// 4/04/00 Unwind support added
-// 5/30/00 Fixed bug when x double-extended denormal
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 02/02/00 Initial version
+// 02/16/00 Modified to conform to C9X
+// 03/16/00 Improved speed
+// 04/04/00 Unwind support added
+// 05/30/00 Fixed bug when x double-extended denormal
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 01/20/03 Improved performance
//
// API
//==============================================================
-// float logbf( float x);
+// float logbf( float x );
//
// Overview of operation
//==============================================================
-// The logbf function extracts the exponent of x as an integer in
-// floating-point format.
+// The logbf function extracts the exponent of x as an integer in
+// floating-point format.
// logbf computes log2 of x as a float
-
-// logbf is similar to ilogbf but differs in the following ways:
+//
+// logbf is similar to ilogbf but differs in the following ways:
// +-inf
// ilogbf: returns INT_MAX
// logbf: returns +inf
@@ -71,243 +73,207 @@
//
// Registers used
//==============================================================
-// general registers used:
-// ar.pfs r32
-// r33 -> r37
-// r38 -> r41 used as parameters to error path
+// general registers used:
+// r26 -> r38
+// r35 -> r38 used as parameters to error path
//
-// predicate registers used:
+// predicate registers used:
// p6, p7, p8
-//
-// floating-point registers used:
+// floating-point registers used:
// f9, f10, f11
// f8, input
-#include "libm_support.h"
-
-GR_SAVE_B0 = r34
-// r40 is address of table of coefficients
-GR_SAVE_PFS = r32
-GR_SAVE_GP = r35
+rExpBias = r26
+rExpMask = r27
+rSignexp_x = r28
+rExp_x = r29
+rTrueExp_x = r30
+rExp_2to64 = r31
-GR_Parameter_X = r38
-GR_Parameter_Y = r39
-GR_Parameter_RESULT = r40
-GR_Parameter_TAG = r41
+GR_SAVE_PFS = r32
+GR_SAVE_B0 = r33
+GR_SAVE_GP = r34
-FR_X = f8
-FR_Y = f0
-FR_RESULT = f10
+GR_Parameter_X = r35
+GR_Parameter_Y = r36
+GR_Parameter_RESULT = r37
+GR_Parameter_TAG = r38
-
-.align 32
-.global logbf#
+fExp_in_signif = f9
+fNorm_x = f10
+fFloat_Exp = f10
+f2to64 = f11
.section .text
-.proc logbf#
-.align 32
-
+GLOBAL_LIBM_ENTRY(logbf)
-logbf:
-
-// qnan snan inf norm unorm 0 -+
-// 0 0 0 0 1 0 11
-// 0 b
-{ .mfi
- alloc r32=ar.pfs,1,5,4,0
-(p0) fclass.m.unc p8,p0 = f8, 0x0b
- nop.i 999
-}
// X NORMAL
-// r37 = exp(f8) - - 0xffff
-// sig(f8) = r37
+// TrueExp_x = exp(f8) - 0xffff
+// sig = TrueExp_x
// f8 = convert_to_fp (sig))
{ .mfi
-(p0) getf.exp r35 = f8
-(p0) fnorm f10=f8
- nop.i 999 ;;
+ getf.exp rSignexp_x = f8
+ fclass.m p8,p0 = f8, 0x0b // Test for x unorm
+ mov rExpBias = 0xffff // Exponent bias
}
-
-// qnan snan inf norm unorm 0 -+
-// 1 1 1 0 0 0 11
-// e 3
-{ .mmf
-(p0) mov r33 = 0xffff
-(p0) mov r34 = 0x1ffff
-(p0) fclass.m.unc p6,p0 = f8, 0xe3 ;;
+{ .mfi
+ nop.m 0
+ fnorm.s1 fNorm_x = f8
+ mov rExpMask = 0x1ffff // Exponent mask
}
+;;
+// Form signexp of 2^64 in case need to scale denormal
{ .mfb
-(p0) and r36 = r35, r34
-(p0) fclass.m.unc p7,p0 = f8, 0x07
-(p8) br.cond.spnt L(LOGB_DENORM) ;;
+ mov rExp_2to64 = 0x1003f
+ fclass.m p6,p0 = f8, 0x1e3 // Test x natval, nan, inf
+(p8) br.cond.spnt LOGB_DENORM // Branch if x unorm
}
+;;
-{ .mib
-(p0) sub r37 = r36, r33
- nop.i 999
-(p6) br.cond.spnt L(LOGB_NAN_INF) ;;
+LOGB_COMMON:
+// Return here from LOGB_DENORM
+{ .mfi
+ and rExp_x = rSignexp_x, rExpMask // Get biased exponent
+ fclass.m p7,p0 = f8, 0x07 // Test x zero
+ nop.i 0
}
+;;
-{ .mib
-(p0) setf.sig f9 = r37
- nop.i 999
-(p7) br.cond.spnt L(LOGB_ZERO) ;;
+// X NAN or INFINITY, return f8 * f8
+{ .mfb
+ sub rTrueExp_x = rExp_x, rExpBias // Get true exponent
+(p6) fma.s.s0 f8= f8,f8,f0 // Result if x natval, nan, inf
+(p6) br.ret.spnt b0 // Exit if x natval, nan, inf
}
+;;
-{ .mfi
- nop.m 999
-(p0) fcvt.xf f10 = f9
- nop.i 999 ;;
+{ .mib
+ setf.sig fExp_in_signif = rTrueExp_x // Exponent as integer in fp
+ nop.i 999
+(p7) br.cond.spnt LOGB_ZERO
}
+;;
+// Result can be represented in less than 24 bits, so no precision completer
+// is needed.
{ .mfb
- nop.m 999
-(p0) fnorm.s f8 = f10
-(p0) br.ret.sptk b0 ;;
+ nop.m 0
+ fcvt.xf f8 = fExp_in_signif
+ br.ret.sptk b0 // Exit main path, 0 < |x| < inf
}
+;;
-L(LOGB_DENORM):
-// Form signexp of 2^64 in case need to scale denormal
+LOGB_DENORM:
+// Form 2^64 in case need to scale denormal
// Check to see if double-extended denormal
{ .mfi
-(p0) mov r38 = 0x1003f
-(p0) fclass.m.unc p8,p0 = f10, 0x0b
- nop.i 999 ;;
+ setf.exp f2to64 = rExp_2to64
+ fclass.m p8,p0 = fNorm_x, 0x0b
+ nop.i 0
}
+;;
-// Form 2^64 in case need to scale denormal
{ .mfi
-(p0) setf.exp f11 = r38
- nop.f 999
- nop.i 999 ;;
+ nop.m 0
+ fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
+ nop.i 0
}
+;;
// If double-extended denormal add 64 to exponent bias for scaling
// If double-extended denormal form x * 2^64 which is normal
{ .mfi
-(p8) add r33 = 64, r33
-(p8) fmpy f10 = f10, f11
- nop.i 999 ;;
+(p8) add rExpBias = 64, rExpBias
+(p8) fmpy.s1 fNorm_x = fNorm_x, f2to64
+ nop.i 0
}
+;;
// Logic is the same as normal path but use normalized input
-{ .mmi
-(p0) getf.exp r35 = f10 ;;
- nop.m 999
- nop.i 999 ;;
-}
-
-{ .mmi
-(p0) and r36 = r35, r34 ;;
-(p0) sub r37 = r36, r33
- nop.i 999 ;;
-}
-
-{ .mmi
-(p0) setf.sig f9 = r37
- nop.m 999
- nop.i 999 ;;
+{ .mib
+ getf.exp rSignexp_x = fNorm_x
+ nop.i 0
+ br.cond.sptk LOGB_COMMON // Return to main path
}
+;;
-{ .mfi
- nop.m 999
-(p0) fcvt.xf f10 = f9
- nop.i 999 ;;
-}
+LOGB_ZERO:
+// Here if x zero
+// f10 = -|f8|
+// f9 = 1.0/f10 = -1.0/|f8| = -inf
-{ .mfb
- nop.m 999
-(p0) fnorm.s f8 = f10
-(p0) br.ret.sptk b0 ;;
+{ .mmf
+ alloc r32=ar.pfs,1,2,4,0
+ mov GR_Parameter_TAG = 152 // Error code
+ fmerge.ns f10 = f0,f8
}
+;;
-L(LOGB_NAN_INF):
-
-// X NAN or INFINITY, return f8 * f8
{ .mfb
- nop.m 999
-(p0) fma.s f8= f8,f8,f0
-(p0) br.ret.sptk b0 ;;
+ nop.m 0
+ frcpa.s0 f9,p6 = f1,f10 // Produce -inf, Z flag
+ br.cond.sptk __libm_error_region // Call error support
}
+;;
-L(LOGB_ZERO):
-
-// X ZERO
-// return -1.0/fabs(f8)=-inf, set divide-by-zero flag, call error support
-{ .mfi
- nop.m 999
-(p0) fmerge.s f9 = f0,f8
- nop.i 999 ;;
-}
+GLOBAL_LIBM_END(logbf)
-{ .mfi
- nop.m 999
-(p0) fmerge.ns f10 = f0,f9
- nop.i 999 ;;
-}
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
{ .mfi
- nop.m 999
-(p0) frcpa f10,p6 = f1,f10
- nop.i 999 ;;
-}
-
-.endp logbf
-ASM_SIZE_DIRECTIVE(logbf)
-
-
-.proc __libm_error_region
-__libm_error_region:
-.prologue
-{ .mii
- add GR_Parameter_Y=-32,sp // Parameter 2 value
-(p0) mov GR_Parameter_TAG = 152
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
- add sp=-64,sp // Create new stack
+ add sp=-64,sp // Create new stack
nop.f 0
- mov GR_SAVE_GP=gp // Save gp
+ mov GR_SAVE_GP=gp // Save gp
};;
+
{ .mmi
- stfs [GR_Parameter_Y] = FR_Y,16 // Store Parameter 2 on stack
- add GR_Parameter_X = 16,sp // Parameter 1 address
+ stfs [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0 // Save b0
+ mov GR_SAVE_B0=b0 // Save b0
};;
+
.body
{ .mib
- stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
- add GR_Parameter_RESULT = 0,GR_Parameter_Y
- nop.b 0 // Parameter 3 address
+ stfs [GR_Parameter_X] = f8 // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
}
{ .mib
- stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ stfs [GR_Parameter_Y] = f9 // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support# // Call error handling function
+ br.call.sptk b0=__libm_error_support# // Call error handling function
};;
+
{ .mmi
- nop.m 0
- nop.m 0
add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
};;
+
{ .mmi
ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
+
{ .mib
- mov gp = GR_SAVE_GP // Restore gp
+ mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
- br.ret.sptk b0 // Return
-};;
+ br.ret.sptk b0
+};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#,@function
diff --git a/sysdeps/ia64/fpu/s_logbl.S b/sysdeps/ia64/fpu/s_logbl.S
index 38b131f3aa..6a08e94201 100644
--- a/sysdeps/ia64/fpu/s_logbl.S
+++ b/sysdeps/ia64/fpu/s_logbl.S
@@ -1,10 +1,10 @@
.file "logbl.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,44 +20,46 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00 Initial version
-// 2/16/00 Modified to conform to C9X
-// 3/16/00 Improved speed
-// 4/04/00 Unwind support added
-// 5/30/00 Fixed bug when x double-extended denormal
-// 8/15/00 Bundle added after call to __libm_error_support to properly
+// 02/02/00 Initial version
+// 02/16/00 Modified to conform to C9X
+// 03/16/00 Improved speed
+// 04/04/00 Unwind support added
+// 05/30/00 Fixed bug when x double-extended denormal
+// 08/15/00 Bundle added after call to __libm_error_support to properly
// set [the previously overwritten] GR_Parameter_RESULT.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 01/20/03 Improved performance
//
// API
//==============================================================
-// long double logbl( long double x);
+// long double logbl( long double x );
//
// Overview of operation
//==============================================================
-// The logbl function extracts the exponent of x as an integer in
-// floating-point format.
+// The logbl function extracts the exponent of x as an integer in
+// floating-point format.
// logbl computes log2 of x as a long double
//
-// logbl is similar to ilogbl but differs in the following ways:
+// logbl is similar to ilogbl but differs in the following ways:
// +-inf
// ilogbl: returns INT_MAX
// logbl: returns +inf
@@ -71,229 +73,208 @@
//
// Registers used
//==============================================================
-// general registers used:
-// ar.pfs r32
-// r33 -> r37
-// r38 -> r41 used as parameters to error path
+// general registers used:
+// r26 -> r38
+// r35 -> r38 used as parameters to error path
//
-// predicate registers used:
+// predicate registers used:
// p6, p7, p8
-//
-// floating-point registers used:
+// floating-point registers used:
// f9, f10, f11
// f8, input
-#include "libm_support.h"
+rExpBias = r26
+rExpMask = r27
+rSignexp_x = r28
+rExp_x = r29
+rTrueExp_x = r30
+rExp_2to64 = r31
GR_SAVE_PFS = r32
-GR_SAVE_B0 = r34
-GR_SAVE_GP = r35
-GR_Parameter_X = r38
-GR_Parameter_Y = r39
-GR_Parameter_RESULT = r40
-GR_Parameter_TAG = r41
+GR_SAVE_B0 = r33
+GR_SAVE_GP = r34
-FR_X = f8
-FR_Y = f0
-FR_RESULT = f10
+GR_Parameter_X = r35
+GR_Parameter_Y = r36
+GR_Parameter_RESULT = r37
+GR_Parameter_TAG = r38
-.align 32
-.global logbl#
+fExp_in_signif = f9
+fNorm_x = f10
+fFloat_Exp = f10
+f2to64 = f11
.section .text
-.proc logbl#
-.align 32
-
+GLOBAL_LIBM_ENTRY(logbl)
-logbl:
-
-// qnan snan inf norm unorm 0 -+
-// 0 0 0 0 1 0 11
-// 0 b
-{ .mfi
- alloc r32=ar.pfs,1,5,4,0
-(p0) fclass.m.unc p8,p0 = f8, 0x0b
- nop.i 999
-}
// X NORMAL
-// r37 = exp(f8) - - 0xffff
-// sig(f8) = r37
+// TrueExp_x = exp(f8) - 0xffff
+// sig = TrueExp_x
// f8 = convert_to_fp (sig))
{ .mfi
-(p0) getf.exp r35 = f8
-(p0) fnorm f10=f8
- nop.i 999 ;;
+ getf.exp rSignexp_x = f8
+ fclass.m p8,p0 = f8, 0x0b // Test for x unorm
+ mov rExpBias = 0xffff // Exponent bias
}
-
-// qnan snan inf norm unorm 0 -+
-// 1 1 1 0 0 0 11
-// e 3
-{ .mmf
-(p0) mov r33 = 0xffff
-(p0) mov r34 = 0x1ffff
-(p0) fclass.m.unc p6,p0 = f8, 0xe3 ;;
+{ .mfi
+ nop.m 0
+ fnorm.s1 fNorm_x = f8
+ mov rExpMask = 0x1ffff // Exponent mask
}
+;;
+// Form signexp of 2^64 in case need to scale denormal
{ .mfb
-(p0) and r36 = r35, r34
-(p0) fclass.m.unc p7,p0 = f8, 0x07
-(p8) br.cond.spnt L(LOGB_DENORM) ;;
+ mov rExp_2to64 = 0x1003f
+ fclass.m p6,p0 = f8, 0x1e3 // Test x natval, nan, inf
+(p8) br.cond.spnt LOGB_DENORM // Branch if x unorm
}
+;;
-{ .mib
-(p0) sub r37 = r36, r33
- nop.i 999
-(p6) br.cond.spnt L(LOGB_NAN_INF) ;;
+LOGB_COMMON:
+// Return here from LOGB_DENORM
+{ .mfi
+ and rExp_x = rSignexp_x, rExpMask // Get biased exponent
+ fclass.m p7,p0 = f8, 0x07 // Test x zero
+ nop.i 0
}
+;;
+
+// X NAN or INFINITY, return f8 * f8
+{ .mfb
+ sub rTrueExp_x = rExp_x, rExpBias // Get true exponent
+(p6) fma.s0 f8= f8,f8,f0 // Result if x natval, nan, inf
+(p6) br.ret.spnt b0 // Exit if x natval, nan, inf
+}
+;;
{ .mib
-(p0) setf.sig f9 = r37
+ setf.sig fExp_in_signif = rTrueExp_x // Exponent as integer in fp
nop.i 999
-(p7) br.cond.spnt L(LOGB_ZERO) ;;
-}
-{ .mfi
- nop.m 999
-(p0) fcvt.xf f10 = f9
- nop.i 999 ;;
+(p7) br.cond.spnt LOGB_ZERO
}
+;;
+// Result can be represented in less than 24 bits, so no precision completer
+// is needed.
{ .mfb
- nop.m 999
-(p0) fnorm f8 = f10
-(p0) br.ret.sptk b0 ;;
+ nop.m 0
+ fcvt.xf f8 = fExp_in_signif
+ br.ret.sptk b0 // Exit main path, 0 < |x| < inf
}
+;;
-L(LOGB_DENORM):
-// Form signexp of 2^64 in case need to scale denormal
+LOGB_DENORM:
+// Form 2^64 in case need to scale denormal
// Check to see if double-extended denormal
{ .mfi
-(p0) mov r38 = 0x1003f
-(p0) fclass.m.unc p8,p0 = f10, 0x0b
- nop.i 999 ;;
+ setf.exp f2to64 = rExp_2to64
+ fclass.m p8,p0 = fNorm_x, 0x0b
+ nop.i 0
}
+;;
-// Form 2^64 in case need to scale denormal
{ .mfi
-(p0) setf.exp f11 = r38
- nop.f 999
- nop.i 999 ;;
+ nop.m 0
+ fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
+ nop.i 0
}
+;;
// If double-extended denormal add 64 to exponent bias for scaling
// If double-extended denormal form x * 2^64 which is normal
{ .mfi
-(p8) add r33 = 64, r33
-(p8) fmpy f10 = f10, f11
- nop.i 999 ;;
+(p8) add rExpBias = 64, rExpBias
+(p8) fmpy.s1 fNorm_x = fNorm_x, f2to64
+ nop.i 0
}
+;;
// Logic is the same as normal path but use normalized input
-{ .mmi
-(p0) getf.exp r35 = f10 ;;
- nop.m 999
- nop.i 999 ;;
-}
-
-{ .mmi
-(p0) and r36 = r35, r34 ;;
-(p0) sub r37 = r36, r33
- nop.i 999 ;;
-}
-
-{ .mmi
-(p0) setf.sig f9 = r37
- nop.m 999
- nop.i 999 ;;
+{ .mib
+ getf.exp rSignexp_x = fNorm_x
+ nop.i 0
+ br.cond.sptk LOGB_COMMON // Return to main path
}
+;;
-{ .mfi
- nop.m 999
-(p0) fcvt.xf f10 = f9
- nop.i 999 ;;
-}
+LOGB_ZERO:
+// Here if x zero
+// f10 = -|f8|
+// f9 = 1.0/f10 = -1.0/|f8| = -inf
-{ .mfb
- nop.m 999
-(p0) fnorm f8 = f10
-(p0) br.ret.sptk b0 ;;
+{ .mmf
+ alloc r32=ar.pfs,1,2,4,0
+ mov GR_Parameter_TAG = 150 // Error code
+ fmerge.ns f10 = f0,f8
}
+;;
-L(LOGB_NAN_INF):
-
-// X NAN or INFINITY, return f8 * f8
{ .mfb
- nop.m 999
-(p0) fma f8= f8,f8,f0
-(p0) br.ret.sptk b0 ;;
+ nop.m 0
+ frcpa.s0 f9,p6 = f1,f10 // Produce -inf, Z flag
+ br.cond.sptk __libm_error_region // Call error support
}
+;;
-L(LOGB_ZERO):
-{.mfi
- nop.m 0
-(p0) frcpa.s0 f10,p6 = f1,f0
- nop.i 0
-};;
-{.mfi
- mov GR_Parameter_TAG = 150
-(p0) fms.s1 f10 = f0,f0,f10
- nop.i 0
-};;
-// X ZERO
-// return -1.0/fabs(f8)=-inf, set divide-by-zero flag, call error support
-.endp logbl
-ASM_SIZE_DIRECTIVE(logbl)
+GLOBAL_LIBM_END(logbl)
-.proc __libm_error_region
-__libm_error_region:
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
+
{ .mfi
- add GR_Parameter_Y=-32,sp // Parameter 2 value
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
- add sp=-64,sp // Create new stack
+ add sp=-64,sp // Create new stack
nop.f 0
- mov GR_SAVE_GP=gp // Save gp
+ mov GR_SAVE_GP=gp // Save gp
};;
+
{ .mmi
- stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
- add GR_Parameter_X = 16,sp // Parameter 1 address
+ stfe [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0 // Save b0
+ mov GR_SAVE_B0=b0 // Save b0
};;
+
.body
{ .mib
- stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
- add GR_Parameter_RESULT = 0,GR_Parameter_Y
- nop.b 0 // Parameter 3 address
+ stfe [GR_Parameter_X] = f8 // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
}
{ .mib
- stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ stfe [GR_Parameter_Y] = f9 // Store Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support# // Call error handling function
+ br.call.sptk b0=__libm_error_support# // Call error handling function
};;
+
{ .mmi
- nop.m 0
- nop.m 0
add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
};;
+
{ .mmi
ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
+
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
- br.ret.sptk b0 // Return
+ br.ret.sptk b0
};;
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+LOCAL_LIBM_END(__libm_error_region)
+
.type __libm_error_support#,@function
.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_modf.S b/sysdeps/ia64/fpu/s_modf.S
index e8e672adfe..2008bbfc5c 100644
--- a/sysdeps/ia64/fpu/s_modf.S
+++ b/sysdeps/ia64/fpu/s_modf.S
@@ -1,10 +1,10 @@
.file "modf.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,14 +35,16 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00: Initial version
-// 4/04/00: Improved speed, corrected result for NaN input
+// 02/02/00 Initial version
+// 04/04/00 Improved speed, corrected result for NaN input
// 12/22/00 Fixed so inexact flag is never set, and invalid is not set for
// qnans nor for inputs larger than 2^63.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// API
//==============================================================
@@ -97,8 +99,6 @@
// p13 --------------------------------------------------->|
//
-#include "libm_support.h"
-
// floating-point registers used:
MODF_NORM_F8 = f9
MODF_FRACTION_PART = f10
@@ -115,23 +115,17 @@ modf_exp = r18
// r33 = iptr
-.align 32
-.global modf#
-
.section .text
-.proc modf#
-.align 32
-
+GLOBAL_LIBM_ENTRY(modf)
// Main path is p9, p11, p8 FALSE and p12 TRUE
// Assume input is normalized and get signexp
// Normalize input just in case
// Form exponent bias
-modf:
{ .mfi
getf.exp modf_signexp = f8
- fnorm MODF_NORM_F8 = f8
+ fnorm.s0 MODF_NORM_F8 = f8
addl modf_GR_FFFF = 0xffff, r0
}
// Get integer part of input
@@ -176,10 +170,10 @@ modf:
{ .mfb
(p10) cmp.ge.unc p9,p12 = modf_exp, modf_GR_no_frac
(p6) fclass.m.unc p6,p7 = f8, 0x23
-(p8) br.cond.spnt L(MODF_DENORM) ;;
+(p8) br.cond.spnt MODF_DENORM ;;
}
-L(MODF_COMMON):
+MODF_COMMON:
// For HUGE set fraction to signed 0
{ .mfi
nop.m 999
@@ -189,7 +183,7 @@ L(MODF_COMMON):
// For HUGE set integer part to normalized input
{ .mfi
nop.m 999
-(p9) fnorm.d MODF_INTEGER_PART = MODF_NORM_F8
+(p9) fnorm.d.s0 MODF_INTEGER_PART = MODF_NORM_F8
nop.i 999 ;;
}
@@ -201,7 +195,7 @@ L(MODF_COMMON):
}
{ .mfi
nop.m 999
-(p11) fnorm.d f8 = MODF_NORM_F8
+(p11) fnorm.d.s0 f8 = MODF_NORM_F8
nop.i 999 ;;
}
@@ -242,7 +236,7 @@ L(MODF_COMMON):
// For NORMAL test if fraction part is zero; if so append correct sign
{ .mfi
nop.m 999
-(p12) fcmp.eq.unc p7,p0 = MODF_NORM_F8, MODF_INTEGER_PART
+(p12) fcmp.eq.unc.s0 p7,p0 = MODF_NORM_F8, MODF_INTEGER_PART
nop.i 999 ;;
}
@@ -259,7 +253,7 @@ L(MODF_COMMON):
br.ret.sptk b0 ;;
}
-L(MODF_DENORM):
+MODF_DENORM:
// If x unorm get signexp from normalized input
// If x unorm get integer part from normalized input
{ .mfi
@@ -278,8 +272,7 @@ L(MODF_DENORM):
{ .mfb
(p10) cmp.ge.unc p9,p12 = modf_exp, modf_GR_no_frac
nop.f 999
- br.cond.spnt L(MODF_COMMON) ;;
+ br.cond.spnt MODF_COMMON ;;
}
-.endp modf
-ASM_SIZE_DIRECTIVE(modf)
+GLOBAL_LIBM_END(modf)
diff --git a/sysdeps/ia64/fpu/s_modff.S b/sysdeps/ia64/fpu/s_modff.S
index 6aa43c884d..edc1120971 100644
--- a/sysdeps/ia64/fpu/s_modff.S
+++ b/sysdeps/ia64/fpu/s_modff.S
@@ -1,10 +1,10 @@
.file "modff.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,14 +35,16 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00: Initial version
-// 4/04/00: Improved speed, corrected result for NaN input
+// 02/02/00 Initial version
+// 04/04/00 Improved speed, corrected result for NaN input
// 12/22/00 Fixed so inexact flag is never set, and invalid is not set for
// qnans nor for inputs larger than 2^63.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// API
//==============================================================
@@ -97,8 +99,6 @@
// p13 --------------------------------------------------->|
//
-#include "libm_support.h"
-
// floating-point registers used:
MODF_NORM_F8 = f9
MODF_FRACTION_PART = f10
@@ -115,23 +115,17 @@ modf_exp = r18
// r33 = iptr
-.align 32
-.global modff#
-
.section .text
-.proc modff#
-.align 32
-
+GLOBAL_LIBM_ENTRY(modff)
// Main path is p9, p11, p8 FALSE and p12 TRUE
// Assume input is normalized and get signexp
// Normalize input just in case
// Form exponent bias
-modff:
{ .mfi
getf.exp modf_signexp = f8
- fnorm MODF_NORM_F8 = f8
+ fnorm.s0 MODF_NORM_F8 = f8
addl modf_GR_FFFF = 0xffff, r0
}
// Get integer part of input
@@ -176,10 +170,10 @@ modff:
{ .mfb
(p10) cmp.ge.unc p9,p12 = modf_exp, modf_GR_no_frac
(p6) fclass.m.unc p6,p7 = f8, 0x23
-(p8) br.cond.spnt L(MODF_DENORM) ;;
+(p8) br.cond.spnt MODF_DENORM ;;
}
-L(MODF_COMMON):
+MODF_COMMON:
// For HUGE set fraction to signed 0
{ .mfi
nop.m 999
@@ -189,7 +183,7 @@ L(MODF_COMMON):
// For HUGE set integer part to normalized input
{ .mfi
nop.m 999
-(p9) fnorm.s MODF_INTEGER_PART = MODF_NORM_F8
+(p9) fnorm.s.s0 MODF_INTEGER_PART = MODF_NORM_F8
nop.i 999 ;;
}
@@ -201,7 +195,7 @@ L(MODF_COMMON):
}
{ .mfi
nop.m 999
-(p11) fnorm.s f8 = MODF_NORM_F8
+(p11) fnorm.s.s0 f8 = MODF_NORM_F8
nop.i 999 ;;
}
@@ -242,7 +236,7 @@ L(MODF_COMMON):
// For NORMAL test if fraction part is zero; if so append correct sign
{ .mfi
nop.m 999
-(p12) fcmp.eq.unc p7,p0 = MODF_NORM_F8, MODF_INTEGER_PART
+(p12) fcmp.eq.unc.s0 p7,p0 = MODF_NORM_F8, MODF_INTEGER_PART
nop.i 999 ;;
}
@@ -259,7 +253,7 @@ L(MODF_COMMON):
br.ret.sptk b0 ;;
}
-L(MODF_DENORM):
+MODF_DENORM:
// If x unorm get signexp from normalized input
// If x unorm get integer part from normalized input
{ .mfi
@@ -278,8 +272,7 @@ L(MODF_DENORM):
{ .mfb
(p10) cmp.ge.unc p9,p12 = modf_exp, modf_GR_no_frac
nop.f 999
- br.cond.spnt L(MODF_COMMON) ;;
+ br.cond.spnt MODF_COMMON ;;
}
-.endp modff
-ASM_SIZE_DIRECTIVE(modff)
+GLOBAL_LIBM_END(modff)
diff --git a/sysdeps/ia64/fpu/s_modfl.S b/sysdeps/ia64/fpu/s_modfl.S
index b5eb509adf..eaf410cb6c 100644
--- a/sysdeps/ia64/fpu/s_modfl.S
+++ b/sysdeps/ia64/fpu/s_modfl.S
@@ -1,10 +1,10 @@
.file "modfl.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,15 +35,17 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00: Initial version
-// 4/04/00: Improved speed, corrected result for NaN input
-// 5/30/00 Fixed bug for exponent 0x1003e
+// 02/02/00 Initial version
+// 04/04/00 Improved speed, corrected result for NaN input
+// 05/30/00 Fixed bug for exponent 0x1003e
// 12/22/00 Fixed so inexact flag is never set, and invalid is not set for
// qnans nor for inputs larger than 2^63.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// API
//==============================================================
@@ -92,8 +94,6 @@
// p13 --------------------------------------------------->|
//
-#include "libm_support.h"
-
// floating-point registers used:
MODF_NORM_F8 = f9
MODF_FRACTION_PART = f10
@@ -110,23 +110,17 @@ modf_exp = r18
// r34 = iptr
-.align 32
-.global modfl#
-
.section .text
-.proc modfl#
-.align 32
-
+GLOBAL_LIBM_ENTRY(modfl)
// Main path is p9, p11, p8 FALSE and p12 TRUE
// Assume input is normalized and get signexp
// Normalize input just in case
// Form exponent bias
-modfl:
{ .mfi
getf.exp modf_signexp = f8
- fnorm MODF_NORM_F8 = f8
+ fnorm.s0 MODF_NORM_F8 = f8
addl modf_GR_FFFF = 0xffff, r0
}
// Get integer part of input
@@ -171,10 +165,10 @@ modfl:
{ .mfb
(p10) cmp.ge.unc p9,p12 = modf_exp, modf_GR_no_frac
(p6) fclass.m.unc p6,p7 = f8, 0x23
-(p8) br.cond.spnt L(MODF_DENORM) ;;
+(p8) br.cond.spnt MODF_DENORM ;;
}
-L(MODF_COMMON):
+MODF_COMMON:
// For HUGE set fraction to signed 0
{ .mfi
nop.m 999
@@ -184,7 +178,7 @@ L(MODF_COMMON):
// For HUGE set integer part to normalized input
{ .mfi
nop.m 999
-(p9) fnorm MODF_INTEGER_PART = MODF_NORM_F8
+(p9) fnorm.s0 MODF_INTEGER_PART = MODF_NORM_F8
nop.i 999 ;;
}
@@ -196,7 +190,7 @@ L(MODF_COMMON):
}
{ .mfi
nop.m 999
-(p11) fnorm f8 = MODF_NORM_F8
+(p11) fnorm.s0 f8 = MODF_NORM_F8
nop.i 999 ;;
}
@@ -237,7 +231,7 @@ L(MODF_COMMON):
// For NORMAL test if fraction part is zero; if so append correct sign
{ .mfi
nop.m 999
-(p12) fcmp.eq.unc p7,p0 = MODF_NORM_F8, MODF_INTEGER_PART
+(p12) fcmp.eq.unc.s0 p7,p0 = MODF_NORM_F8, MODF_INTEGER_PART
nop.i 999 ;;
}
@@ -254,7 +248,7 @@ L(MODF_COMMON):
br.ret.sptk b0 ;;
}
-L(MODF_DENORM):
+MODF_DENORM:
// If x unorm get signexp from normalized input
// If x unorm get integer part from normalized input
{ .mfi
@@ -273,8 +267,7 @@ L(MODF_DENORM):
{ .mfb
(p10) cmp.ge.unc p9,p12 = modf_exp, modf_GR_no_frac
nop.f 999
- br.cond.spnt L(MODF_COMMON) ;;
+ br.cond.spnt MODF_COMMON ;;
}
-.endp modfl
-ASM_SIZE_DIRECTIVE(modfl)
+GLOBAL_LIBM_END(modfl)
diff --git a/sysdeps/ia64/fpu/s_nearbyint.S b/sysdeps/ia64/fpu/s_nearbyint.S
index 6ee01ea260..cba74e61d3 100644
--- a/sysdeps/ia64/fpu/s_nearbyint.S
+++ b/sysdeps/ia64/fpu/s_nearbyint.S
@@ -1,11 +1,10 @@
.file "nearbyint.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 10/19/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
-// Bob Norin, Tom Rowan, Shane Story, and Ping Tak Peter Tang of the
-// Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -21,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -36,20 +35,19 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 10/19/2000: Created
-// 2/08/01 Corrected behavior for all rounding modes.
+// 10/19/00 Created
+// 02/08/01 Corrected behavior for all rounding modes.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
//==============================================================
//
// API
//==============================================================
// double nearbyint(double x)
-
-#include "libm_support.h"
-
//
// general registers used:
//
@@ -110,15 +108,8 @@ NEARBYINT_INT_f8 = f11
// 1 1 1 0 0 1 11 0xe7
-.align 32
-.global nearbyint#
-
.section .text
-.proc nearbyint#
-.align 32
-
-
-nearbyint:
+GLOBAL_LIBM_ENTRY(nearbyint)
{ .mfi
mov nearbyint_GR_fpsr = ar40 // Read the fpsr--need to check rc.s0
@@ -141,7 +132,7 @@ nearbyint:
{ .mfb
nop.m 999
-(p6) fnorm.d f8 = f8
+(p6) fnorm.d.s0 f8 = f8
(p6) br.ret.spnt b0 // Exit if x nan, inf, zero
;;
}
@@ -177,11 +168,11 @@ nearbyint:
// Check to see if s0 rounding mode is round to nearest. If not then set s2
// rounding mode to that of s0 and repeat conversions.
-L(NEARBYINT_COMMON):
+NEARBYINT_COMMON:
{ .mfb
cmp.ne p11,p0 = nearbyint_GR_rcs0, r0
(p6) fclass.m.unc p9,p10 = NEARBYINT_FLOAT_INT_f8, 0x07 // Test for result=0
-(p11) br.cond.spnt L(NEARBYINT_NOT_ROUND_NEAREST) // Branch if not round to nearest
+(p11) br.cond.spnt NEARBYINT_NOT_ROUND_NEAREST // Branch if not round to nearest
;;
}
@@ -200,13 +191,13 @@ L(NEARBYINT_COMMON):
}
{ .mfb
nop.m 999
-(p10) fnorm.d f8 = NEARBYINT_FLOAT_INT_f8
+(p10) fnorm.d.s0 f8 = NEARBYINT_FLOAT_INT_f8
br.ret.sptk b0
;;
}
-L(NEARBYINT_NOT_ROUND_NEAREST):
+NEARBYINT_NOT_ROUND_NEAREST:
// Set rounding mode of s2 to that of s0
{ .mfi
mov nearbyint_GR_rcs0 = r0 // Clear so we don't come back here
@@ -225,10 +216,9 @@ L(NEARBYINT_NOT_ROUND_NEAREST):
{ .mfb
nop.m 999
fcvt.xf NEARBYINT_FLOAT_INT_f8 = NEARBYINT_INT_f8
- br.cond.sptk L(NEARBYINT_COMMON)
+ br.cond.sptk NEARBYINT_COMMON
;;
}
-.endp nearbyint
-ASM_SIZE_DIRECTIVE(nearbyint)
+GLOBAL_LIBM_END(nearbyint)
diff --git a/sysdeps/ia64/fpu/s_nearbyintf.S b/sysdeps/ia64/fpu/s_nearbyintf.S
index 7050ddc52c..6471232513 100644
--- a/sysdeps/ia64/fpu/s_nearbyintf.S
+++ b/sysdeps/ia64/fpu/s_nearbyintf.S
@@ -1,11 +1,10 @@
.file "nearbyintf.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 10/19/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
-// Bob Norin, Tom Rowan, Shane Story, and Ping Tak Peter Tang of the
-// Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -21,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -36,20 +35,19 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 10/19/2000: Created
-// 2/08/01 Corrected behavior for all rounding modes.
+// 10/19/00 Created
+// 02/08/01 Corrected behavior for all rounding modes.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
//==============================================================
//
// API
//==============================================================
// float nearbyintf(float x)
-
-#include "libm_support.h"
-
//
// general registers used:
//
@@ -110,15 +108,8 @@ NEARBYINT_INT_f8 = f11
// 1 1 1 0 0 1 11 0xe7
-.align 32
-.global nearbyintf#
-
.section .text
-.proc nearbyintf#
-.align 32
-
-
-nearbyintf:
+GLOBAL_LIBM_ENTRY(nearbyintf)
{ .mfi
mov nearbyint_GR_fpsr = ar40 // Read the fpsr--need to check rc.s0
@@ -141,7 +132,7 @@ nearbyintf:
{ .mfb
nop.m 999
-(p6) fnorm.s f8 = f8
+(p6) fnorm.s.s0 f8 = f8
(p6) br.ret.spnt b0 // Exit if x nan, inf, zero
;;
}
@@ -177,11 +168,11 @@ nearbyintf:
// Check to see if s0 rounding mode is round to nearest. If not then set s2
// rounding mode to that of s0 and repeat conversions.
-L(NEARBYINT_COMMON):
+NEARBYINT_COMMON:
{ .mfb
cmp.ne p11,p0 = nearbyint_GR_rcs0, r0
(p6) fclass.m.unc p9,p10 = NEARBYINT_FLOAT_INT_f8, 0x07 // Test for result=0
-(p11) br.cond.spnt L(NEARBYINT_NOT_ROUND_NEAREST) // Branch if not round to nearest
+(p11) br.cond.spnt NEARBYINT_NOT_ROUND_NEAREST // Branch if not round to nearest
;;
}
@@ -200,13 +191,13 @@ L(NEARBYINT_COMMON):
}
{ .mfb
nop.m 999
-(p10) fnorm.s f8 = NEARBYINT_FLOAT_INT_f8
+(p10) fnorm.s.s0 f8 = NEARBYINT_FLOAT_INT_f8
br.ret.sptk b0
;;
}
-L(NEARBYINT_NOT_ROUND_NEAREST):
+NEARBYINT_NOT_ROUND_NEAREST:
// Set rounding mode of s2 to that of s0
{ .mfi
mov nearbyint_GR_rcs0 = r0 // Clear so we don't come back here
@@ -225,10 +216,9 @@ L(NEARBYINT_NOT_ROUND_NEAREST):
{ .mfb
nop.m 999
fcvt.xf NEARBYINT_FLOAT_INT_f8 = NEARBYINT_INT_f8
- br.cond.sptk L(NEARBYINT_COMMON)
+ br.cond.sptk NEARBYINT_COMMON
;;
}
-.endp nearbyintf
-ASM_SIZE_DIRECTIVE(nearbyintf)
+GLOBAL_LIBM_END(nearbyintf)
diff --git a/sysdeps/ia64/fpu/s_nearbyintl.S b/sysdeps/ia64/fpu/s_nearbyintl.S
index 95ba6ab260..9c4c2e4f16 100644
--- a/sysdeps/ia64/fpu/s_nearbyintl.S
+++ b/sysdeps/ia64/fpu/s_nearbyintl.S
@@ -1,11 +1,10 @@
.file "nearbyintl.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 10/19/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
-// Bob Norin, Tom Rowan, Shane Story, and Ping Tak Peter Tang of the
-// Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -21,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -36,20 +35,19 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 10/19/2000: Created
-// 2/08/01 Corrected behavior for all rounding modes.
+// 10/19/00 Created
+// 02/08/01 Corrected behavior for all rounding modes.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
//==============================================================
//
// API
//==============================================================
// long double nearbyintl(long double x)
-
-#include "libm_support.h"
-
//
// general registers used:
//
@@ -111,15 +109,8 @@ NEARBYINT_SIGNED_FLOAT_INT_f8 = f12
// 1 1 1 0 0 1 11 0xe7
-.align 32
-.global nearbyintl#
-
.section .text
-.proc nearbyintl#
-.align 32
-
-
-nearbyintl:
+GLOBAL_LIBM_ENTRY(nearbyintl)
{ .mfi
mov nearbyint_GR_fpsr = ar40 // Read the fpsr--need to check rc.s0
@@ -142,7 +133,7 @@ nearbyintl:
{ .mfb
nop.m 999
-(p6) fnorm f8 = f8
+(p6) fnorm.s0 f8 = f8
(p6) br.ret.spnt b0 // Exit if x nan, inf, zero
;;
}
@@ -180,11 +171,11 @@ nearbyintl:
// rounding mode to that of s0 and repeat conversions.
// Must merge the original sign for cases where the result is zero or the input
// is the largest that still has a fraction (0x1007dfffffffffff)
-L(NEARBYINT_COMMON):
+NEARBYINT_COMMON:
{ .mfb
cmp.ne p11,p0 = nearbyint_GR_rcs0, r0
(p6) fmerge.s NEARBYINT_SIGNED_FLOAT_INT_f8 = f8, NEARBYINT_FLOAT_INT_f8
-(p11) br.cond.spnt L(NEARBYINT_NOT_ROUND_NEAREST) // Branch if not round to nearest
+(p11) br.cond.spnt NEARBYINT_NOT_ROUND_NEAREST // Branch if not round to nearest
;;
}
@@ -197,13 +188,13 @@ L(NEARBYINT_COMMON):
{ .mfb
nop.m 999
-(p6) fnorm f8 = NEARBYINT_SIGNED_FLOAT_INT_f8
+(p6) fnorm.s0 f8 = NEARBYINT_SIGNED_FLOAT_INT_f8
br.ret.sptk b0
;;
}
-L(NEARBYINT_NOT_ROUND_NEAREST):
+NEARBYINT_NOT_ROUND_NEAREST:
// Set rounding mode of s2 to that of s0
{ .mfi
mov nearbyint_GR_rcs0 = r0 // Clear so we don't come back here
@@ -222,10 +213,9 @@ L(NEARBYINT_NOT_ROUND_NEAREST):
{ .mfb
nop.m 999
fcvt.xf NEARBYINT_FLOAT_INT_f8 = NEARBYINT_INT_f8
- br.cond.sptk L(NEARBYINT_COMMON)
+ br.cond.sptk NEARBYINT_COMMON
;;
}
-.endp nearbyintl
-ASM_SIZE_DIRECTIVE(nearbyintl)
+GLOBAL_LIBM_END(nearbyintl)
diff --git a/sysdeps/ia64/fpu/s_nextafterl.c b/sysdeps/ia64/fpu/s_nextafterl.c
deleted file mode 100644
index f59f16848f..0000000000
--- a/sysdeps/ia64/fpu/s_nextafterl.c
+++ /dev/null
@@ -1 +0,0 @@
-#include <sysdeps/i386/fpu/s_nextafterl.c>
diff --git a/sysdeps/ia64/fpu/s_nexttoward.c b/sysdeps/ia64/fpu/s_nexttoward.c
deleted file mode 100644
index aee2bb5895..0000000000
--- a/sysdeps/ia64/fpu/s_nexttoward.c
+++ /dev/null
@@ -1 +0,0 @@
-#include <sysdeps/i386/fpu/s_nexttoward.c>
diff --git a/sysdeps/ia64/fpu/s_nexttowardf.c b/sysdeps/ia64/fpu/s_nexttowardf.c
deleted file mode 100644
index 55e95f6916..0000000000
--- a/sysdeps/ia64/fpu/s_nexttowardf.c
+++ /dev/null
@@ -1 +0,0 @@
-#include <sysdeps/i386/fpu/s_nexttowardf.c>
diff --git a/sysdeps/ia64/fpu/s_rint.S b/sysdeps/ia64/fpu/s_rint.S
index d04f06a31f..1735d9b498 100644
--- a/sysdeps/ia64/fpu/s_rint.S
+++ b/sysdeps/ia64/fpu/s_rint.S
@@ -1,10 +1,10 @@
.file "rint.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,74 +20,68 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00: Initial version
-// 2/08/01 Corrected behavior for all rounding modes.
-//
+// 02/02/00 Initial version
+// 02/08/01 Corrected behavior for all rounding modes.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 01/20/03 Improved performance
+//==============================================================
+
// API
//==============================================================
// double rint(double x)
+//==============================================================
-#include "libm_support.h"
-
-//
-// general registers used:
-//
-rint_GR_FFFF = r14
-rint_GR_signexp = r15
-rint_GR_exponent = r16
-rint_GR_17ones = r17
-rint_GR_10033 = r18
-rint_GR_fpsr = r19
-rint_GR_rcs0 = r20
-rint_GR_rcs0_mask = r21
+// general input registers:
+// r14 - r21
+rSignexp = r14
+rExp = r15
+rExpMask = r16
+rBigexp = r17
+rM1 = r18
+rFpsr = r19
+rRcs0 = r20
+rRcs0Mask = r21
-// predicate registers used:
-// p6-11
+// floating-point registers:
+// f8 - f11
-// floating-point registers used:
+fXInt = f9
+fNormX = f10
+fTmp = f11
-RINT_NORM_f8 = f9
-RINT_FFFF = f10
-RINT_INEXACT = f11
-RINT_FLOAT_INT_f8 = f12
-RINT_INT_f8 = f13
+// predicate registers used:
+// p6 - p10
// Overview of operation
//==============================================================
-
// double rint(double x)
-// Return an integer value (represented as a double) that is x rounded to integer in current
-// rounding mode
+// Return an integer value (represented as a double) that is x
+// rounded to integer in current rounding mode
// Inexact is set if x != rint(x)
-// *******************************************************************************
-
-// Set denormal flag for denormal input and
-// and take denormal fault if necessary.
-
-// Is the input an integer value already?
+//==============================================================
// double_extended
-// if the exponent is >= 1003e => 3F(true) = 63(decimal)
+// if the exponent is > 1003e => 3F(true) = 63(decimal)
// we have a significand of 64 bits 1.63-bits.
// If we multiply by 2^63, we no longer have a fractional part
// So input is an integer value already.
@@ -100,155 +94,136 @@ RINT_INT_f8 = f13
// So input is an integer value already.
// single
-// if the exponent is >= 10016 => 17(true) = 23(decimal)
-// we have a significand of 53 bits 1.52-bits. (implicit 1)
-// If we multiply by 2^52, we no longer have a fractional part
+// if the exponent is > 10016 => 17(true) = 23(decimal)
+// we have a significand of 24 bits 1.23-bits. (implicit 1)
+// If we multiply by 2^23, we no longer have a fractional part
// So input is an integer value already.
-// If x is NAN, ZERO, or INFINITY, then return
-
-// qnan snan inf norm unorm 0 -+
-// 1 1 1 0 0 1 11 0xe7
-
-
-.align 32
-.global rint#
-
.section .text
-.proc rint#
-.align 32
-
-
-rint:
-#ifdef _LIBC
-.global __rint
-.type __rint,@function
-__rint:
-#endif
+GLOBAL_IEEE754_ENTRY(rint)
{ .mfi
- mov rint_GR_fpsr = ar40 // Read the fpsr--need to check rc.s0
- fcvt.fx.s1 RINT_INT_f8 = f8
- addl rint_GR_10033 = 0x10033, r0
+ getf.exp rSignexp = f8 // Get signexp, recompute if unorm
+ fclass.m p7,p0 = f8, 0x0b // Test x unorm
+ addl rBigexp = 0x10033, r0 // Set exponent at which is integer
}
{ .mfi
- mov rint_GR_FFFF = -1
- fnorm.s1 RINT_NORM_f8 = f8
- mov rint_GR_17ones = 0x1FFFF
-;;
+ mov rM1 = -1 // Set all ones
+ fcvt.fx.s1 fXInt = f8 // Convert to int in significand
+ mov rExpMask = 0x1FFFF // Form exponent mask
}
+;;
{ .mfi
- setf.sig RINT_FFFF = rint_GR_FFFF
- fclass.m.unc p6,p0 = f8, 0xe7
- mov rint_GR_rcs0_mask = 0x0c00
-;;
+ mov rFpsr = ar40 // Read fpsr -- check rc.s0
+ fclass.m p6,p0 = f8, 0x1e3 // Test x natval, nan, inf
+ nop.i 0
}
-
{ .mfb
- nop.m 999
-(p6) fnorm.d f8 = f8
-(p6) br.ret.spnt b0 // Exit if x nan, inf, zero
-;;
+ setf.sig fTmp = rM1 // Make const for setting inexact
+ fnorm.s1 fNormX = f8 // Normalize input
+(p7) br.cond.spnt RINT_UNORM // Branch if x unorm
}
-
-{ .mfi
- nop.m 999
- fcvt.xf RINT_FLOAT_INT_f8 = RINT_INT_f8
- nop.i 999
;;
+
+
+RINT_COMMON:
+// Return here from RINT_UNORM
+{ .mfb
+ and rExp = rSignexp, rExpMask // Get biased exponent
+(p6) fma.d.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf
+(p6) br.ret.spnt b0 // Exit if x natval, nan, inf
}
+;;
{ .mfi
- getf.exp rint_GR_signexp = RINT_NORM_f8
- fcmp.eq.s0 p8,p0 = f8,f0 // Dummy op to set denormal
- nop.i 999
-;;
+ mov rRcs0Mask = 0x0c00 // Mask for rc.s0
+ fcvt.xf f8 = fXInt // Result assume |x| < 2^52
+ cmp.ge p7,p8 = rExp, rBigexp // Is |x| >= 2^52?
}
-
-
-{ .mii
- nop.m 999
- nop.i 999
- and rint_GR_exponent = rint_GR_signexp, rint_GR_17ones
;;
-}
-{ .mmi
- cmp.ge.unc p7,p6 = rint_GR_exponent, rint_GR_10033
- and rint_GR_rcs0 = rint_GR_rcs0_mask, rint_GR_fpsr
- nop.i 999
-;;
+// We must correct result if |x| >= 2^52
+{ .mfi
+ nop.m 0
+(p7) fma.d.s0 f8 = fNormX, f1, f0 // If |x| >= 2^52, result x
+ nop.i 0
}
-
-// Check to see if s0 rounding mode is round to nearest. If not then set s2
-// rounding mode to that of s0 and repeat conversions.
-L(RINT_COMMON):
-{ .mfb
- cmp.ne p11,p0 = rint_GR_rcs0, r0
-(p6) fclass.m.unc p9,p10 = RINT_FLOAT_INT_f8, 0x07 // Test for result=0
-(p11) br.cond.spnt L(RINT_NOT_ROUND_NEAREST) // Branch if not round to nearest
;;
-}
{ .mfi
- nop.m 999
-(p6) fcmp.eq.unc.s1 p0,p8 = RINT_FLOAT_INT_f8, RINT_NORM_f8
- nop.i 999
+ nop.m 0
+ fcmp.eq.unc.s1 p0, p9 = f8, fNormX // Is result = x ?
+ nop.i 0
}
{ .mfi
- nop.m 999
-(p7) fnorm.d.s0 f8 = f8
- nop.i 999
-;;
+ nop.m 0
+(p8) fmerge.s f8 = fNormX, f8 // Make sure sign rint(x) = sign x
+ nop.i 0
}
+;;
-// If result is zero, merge sign of input
{ .mfi
- nop.m 999
-(p9) fmerge.s f8 = f8, RINT_FLOAT_INT_f8
- nop.i 999
+(p8) and rRcs0 = rFpsr, rRcs0Mask // Get rounding mode for sf0
+ nop.f 0
+ nop.i 0
}
-{ .mfi
- nop.m 999
-(p10) fnorm.d f8 = RINT_FLOAT_INT_f8
- nop.i 999
;;
+
+// If |x| < 2^52 we must test for other rounding modes
+{ .mfi
+(p8) cmp.ne.unc p10,p0 = rRcs0, r0 // Test for other rounding modes
+(p9) fmpy.s0 fTmp = fTmp, fTmp // Dummy to set inexact
+ nop.i 0
+}
+{ .mbb
+ nop.m 0
+(p10) br.cond.spnt RINT_NOT_ROUND_NEAREST // Branch if not round nearest
+ br.ret.sptk b0 // Exit main path if round nearest
}
+;;
+
+
+RINT_UNORM:
+// Here if x unorm
{ .mfb
- nop.m 999
-(p8) fmpy.s0 RINT_INEXACT = RINT_FFFF,RINT_FFFF // Dummy to set inexact
- br.ret.sptk b0
-;;
+ getf.exp rSignexp = fNormX // Get signexp, recompute if unorm
+ fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
+ br.cond.sptk RINT_COMMON // Return to main path
}
+;;
-L(RINT_NOT_ROUND_NEAREST):
-// Set rounding mode of s2 to that of s0
+RINT_NOT_ROUND_NEAREST:
+// Here if not round to nearest, and |x| < 2^52
+// Set rounding mode of s2 to that of s0, and repeat the conversion using s2
{ .mfi
- mov rint_GR_rcs0 = r0 // Clear so we don't come back here
- fsetc.s2 0x7f, 0x40
- nop.i 999
-;;
+ nop.m 0
+ fsetc.s2 0x7f, 0x40
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
- fcvt.fx.s2 RINT_INT_f8 = f8
- nop.i 999
+ nop.m 0
+ fcvt.fx.s2 fXInt = fNormX // Convert to int in significand
+ nop.i 0
+}
;;
+
+{ .mfi
+ nop.m 0
+ fcvt.xf f8 = fXInt // Expected result
+ nop.i 0
}
+;;
+// Be sure sign of result = sign of input. Fixes cases where result is 0.
{ .mfb
- nop.m 999
- fcvt.xf RINT_FLOAT_INT_f8 = RINT_INT_f8
- br.cond.sptk L(RINT_COMMON)
-;;
+ nop.m 0
+ fmerge.s f8 = fNormX, f8
+ br.ret.sptk b0 // Exit main path
}
+;;
-
-.endp rint
-ASM_SIZE_DIRECTIVE(rint)
-#ifdef _LIBC
-ASM_SIZE_DIRECTIVE(__rint)
-#endif
+GLOBAL_IEEE754_END(rint)
diff --git a/sysdeps/ia64/fpu/s_rintf.S b/sysdeps/ia64/fpu/s_rintf.S
index 73cb98a048..05d6b411f2 100644
--- a/sysdeps/ia64/fpu/s_rintf.S
+++ b/sysdeps/ia64/fpu/s_rintf.S
@@ -1,10 +1,10 @@
.file "rintf.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,74 +20,68 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00: Initial version
-// 2/08/01 Corrected behavior for all rounding modes.
-//
+// 02/02/00 Initial version
+// 02/08/01 Corrected behavior for all rounding modes.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 01/20/03 Improved performance
+//==============================================================
+
// API
//==============================================================
// float rintf(float x)
+//==============================================================
-#include "libm_support.h"
-
-//
-// general registers used:
-//
-rint_GR_FFFF = r14
-rint_GR_signexp = r15
-rint_GR_exponent = r16
-rint_GR_17ones = r17
-rint_GR_10033 = r18
-rint_GR_fpsr = r19
-rint_GR_rcs0 = r20
-rint_GR_rcs0_mask = r21
+// general input registers:
+// r14 - r21
+rSignexp = r14
+rExp = r15
+rExpMask = r16
+rBigexp = r17
+rM1 = r18
+rFpsr = r19
+rRcs0 = r20
+rRcs0Mask = r21
-// predicate registers used:
-// p6-11
+// floating-point registers:
+// f8 - f11
-// floating-point registers used:
+fXInt = f9
+fNormX = f10
+fTmp = f11
-RINT_NORM_f8 = f9
-RINT_FFFF = f10
-RINT_INEXACT = f11
-RINT_FLOAT_INT_f8 = f12
-RINT_INT_f8 = f13
+// predicate registers used:
+// p6 - p10
// Overview of operation
//==============================================================
-
// float rintf(float x)
-// Return an integer value (represented as a float) that is x rounded to integer in current
-// rounding mode
-// Inexact is set if x != rintf(x)
-// *******************************************************************************
-
-// Set denormal flag for denormal input and
-// and take denormal fault if necessary.
-
-// Is the input an integer value already?
+// Return an integer value (represented as a float) that is x
+// rounded to integer in current rounding mode
+// Inexact is set if x != rint(x)
+//==============================================================
// double_extended
-// if the exponent is >= 1003e => 3F(true) = 63(decimal)
+// if the exponent is > 1003e => 3F(true) = 63(decimal)
// we have a significand of 64 bits 1.63-bits.
// If we multiply by 2^63, we no longer have a fractional part
// So input is an integer value already.
@@ -100,155 +94,136 @@ RINT_INT_f8 = f13
// So input is an integer value already.
// single
-// if the exponent is >= 10016 => 17(true) = 23(decimal)
-// we have a significand of 53 bits 1.52-bits. (implicit 1)
-// If we multiply by 2^52, we no longer have a fractional part
+// if the exponent is > 10016 => 17(true) = 23(decimal)
+// we have a significand of 24 bits 1.23-bits. (implicit 1)
+// If we multiply by 2^23, we no longer have a fractional part
// So input is an integer value already.
-// If x is NAN, ZERO, or INFINITY, then return
-
-// qnan snan inf norm unorm 0 -+
-// 1 1 1 0 0 1 11 0xe7
-
-
-.align 32
-.global rintf#
-
.section .text
-.proc rintf#
-.align 32
-
-
-rintf:
-#ifdef _LIBC
-.global __rintf
-.type __rintf,@function
-__rintf:
-#endif
+GLOBAL_IEEE754_ENTRY(rintf)
{ .mfi
- mov rint_GR_fpsr = ar40 // Read the fpsr--need to check rc.s0
- fcvt.fx.s1 RINT_INT_f8 = f8
- addl rint_GR_10033 = 0x10016, r0
+ getf.exp rSignexp = f8 // Get signexp, recompute if unorm
+ fclass.m p7,p0 = f8, 0x0b // Test x unorm
+ addl rBigexp = 0x10016, r0 // Set exponent at which is integer
}
{ .mfi
- mov rint_GR_FFFF = -1
- fnorm.s1 RINT_NORM_f8 = f8
- mov rint_GR_17ones = 0x1FFFF
-;;
+ mov rM1 = -1 // Set all ones
+ fcvt.fx.s1 fXInt = f8 // Convert to int in significand
+ mov rExpMask = 0x1FFFF // Form exponent mask
}
+;;
{ .mfi
- setf.sig RINT_FFFF = rint_GR_FFFF
- fclass.m.unc p6,p0 = f8, 0xe7
- mov rint_GR_rcs0_mask = 0x0c00
-;;
+ mov rFpsr = ar40 // Read fpsr -- check rc.s0
+ fclass.m p6,p0 = f8, 0x1e3 // Test x natval, nan, inf
+ nop.i 0
}
-
{ .mfb
- nop.m 999
-(p6) fnorm.s f8 = f8
-(p6) br.ret.spnt b0 // Exit if x nan, inf, zero
-;;
+ setf.sig fTmp = rM1 // Make const for setting inexact
+ fnorm.s1 fNormX = f8 // Normalize input
+(p7) br.cond.spnt RINT_UNORM // Branch if x unorm
}
-
-{ .mfi
- nop.m 999
- fcvt.xf RINT_FLOAT_INT_f8 = RINT_INT_f8
- nop.i 999
;;
+
+
+RINT_COMMON:
+// Return here from RINT_UNORM
+{ .mfb
+ and rExp = rSignexp, rExpMask // Get biased exponent
+(p6) fma.s.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf
+(p6) br.ret.spnt b0 // Exit if x natval, nan, inf
}
+;;
{ .mfi
- getf.exp rint_GR_signexp = RINT_NORM_f8
- fcmp.eq.s0 p8,p0 = f8,f0 // Dummy op to set denormal
- nop.i 999
-;;
+ mov rRcs0Mask = 0x0c00 // Mask for rc.s0
+ fcvt.xf f8 = fXInt // Result assume |x| < 2^23
+ cmp.ge p7,p8 = rExp, rBigexp // Is |x| >= 2^23?
}
-
-
-{ .mii
- nop.m 999
- nop.i 999
- and rint_GR_exponent = rint_GR_signexp, rint_GR_17ones
;;
-}
-{ .mmi
- cmp.ge.unc p7,p6 = rint_GR_exponent, rint_GR_10033
- and rint_GR_rcs0 = rint_GR_rcs0_mask, rint_GR_fpsr
- nop.i 999
-;;
+// We must correct result if |x| >= 2^23
+{ .mfi
+ nop.m 0
+(p7) fma.s.s0 f8 = fNormX, f1, f0 // If |x| >= 2^23, result x
+ nop.i 0
}
-
-// Check to see if s0 rounding mode is round to nearest. If not then set s2
-// rounding mode to that of s0 and repeat conversions.
-L(RINT_COMMON):
-{ .mfb
- cmp.ne p11,p0 = rint_GR_rcs0, r0
-(p6) fclass.m.unc p9,p10 = RINT_FLOAT_INT_f8, 0x07 // Test for result=0
-(p11) br.cond.spnt L(RINT_NOT_ROUND_NEAREST) // Branch if not round to nearest
;;
-}
{ .mfi
- nop.m 999
-(p6) fcmp.eq.unc.s1 p0,p8 = RINT_FLOAT_INT_f8, RINT_NORM_f8
- nop.i 999
+ nop.m 0
+ fcmp.eq.unc.s1 p0, p9 = f8, fNormX // Is result = x ?
+ nop.i 0
}
{ .mfi
- nop.m 999
-(p7) fnorm.s.s0 f8 = f8
- nop.i 999
-;;
+ nop.m 0
+(p8) fmerge.s f8 = fNormX, f8 // Make sure sign rint(x) = sign x
+ nop.i 0
}
+;;
-// If result is zero, merge sign of input
{ .mfi
- nop.m 999
-(p9) fmerge.s f8 = f8, RINT_FLOAT_INT_f8
- nop.i 999
+(p8) and rRcs0 = rFpsr, rRcs0Mask // Get rounding mode for sf0
+ nop.f 0
+ nop.i 0
}
-{ .mfi
- nop.m 999
-(p10) fnorm.s f8 = RINT_FLOAT_INT_f8
- nop.i 999
;;
+
+// If |x| < 2^23 we must test for other rounding modes
+{ .mfi
+(p8) cmp.ne.unc p10,p0 = rRcs0, r0 // Test for other rounding modes
+(p9) fmpy.s0 fTmp = fTmp, fTmp // Dummy to set inexact
+ nop.i 0
+}
+{ .mbb
+ nop.m 0
+(p10) br.cond.spnt RINT_NOT_ROUND_NEAREST // Branch if not round nearest
+ br.ret.sptk b0 // Exit main path if round nearest
}
+;;
+
+
+RINT_UNORM:
+// Here if x unorm
{ .mfb
- nop.m 999
-(p8) fmpy.s0 RINT_INEXACT = RINT_FFFF,RINT_FFFF // Dummy to set inexact
- br.ret.sptk b0
-;;
+ getf.exp rSignexp = fNormX // Get signexp, recompute if unorm
+ fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
+ br.cond.sptk RINT_COMMON // Return to main path
}
+;;
-L(RINT_NOT_ROUND_NEAREST):
-// Set rounding mode of s2 to that of s0
+RINT_NOT_ROUND_NEAREST:
+// Here if not round to nearest, and |x| < 2^23
+// Set rounding mode of s2 to that of s0, and repeat the conversion using s2
{ .mfi
- mov rint_GR_rcs0 = r0 // Clear so we don't come back here
- fsetc.s2 0x7f, 0x40
- nop.i 999
-;;
+ nop.m 0
+ fsetc.s2 0x7f, 0x40
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
- fcvt.fx.s2 RINT_INT_f8 = f8
- nop.i 999
+ nop.m 0
+ fcvt.fx.s2 fXInt = fNormX // Convert to int in significand
+ nop.i 0
+}
;;
+
+{ .mfi
+ nop.m 0
+ fcvt.xf f8 = fXInt // Expected result
+ nop.i 0
}
+;;
+// Be sure sign of result = sign of input. Fixes cases where result is 0.
{ .mfb
- nop.m 999
- fcvt.xf RINT_FLOAT_INT_f8 = RINT_INT_f8
- br.cond.sptk L(RINT_COMMON)
-;;
+ nop.m 0
+ fmerge.s f8 = fNormX, f8
+ br.ret.sptk b0 // Exit main path
}
+;;
-
-.endp rintf
-ASM_SIZE_DIRECTIVE(rintf)
-#ifdef _LIBC
-ASM_SIZE_DIRECTIVE(__rintf)
-#endif
+GLOBAL_IEEE754_END(rintf)
diff --git a/sysdeps/ia64/fpu/s_rintl.S b/sysdeps/ia64/fpu/s_rintl.S
index 857e8d5208..b5402149ec 100644
--- a/sysdeps/ia64/fpu/s_rintl.S
+++ b/sysdeps/ia64/fpu/s_rintl.S
@@ -1,10 +1,10 @@
.file "rintl.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,76 +20,68 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00: Initial version
-// 5/24/00 Fixed case of 2^63 - 1 + 0.5 (0x1007dffffffffffffffff)
-// 2/08/01 Corrected behavior for all rounding modes.
-//
+// 02/02/00 Initial version
+// 02/08/01 Corrected behavior for all rounding modes.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 01/20/03 Improved performance
+//==============================================================
+
// API
//==============================================================
// long double rintl(long double x)
+//==============================================================
-#include "libm_support.h"
-
-//
-// general registers used:
-//
-rint_GR_FFFF = r14
-rint_GR_signexp = r15
-rint_GR_exponent = r16
-rint_GR_17ones = r17
-rint_GR_10033 = r18
-rint_GR_fpsr = r19
-rint_GR_rcs0 = r20
-rint_GR_rcs0_mask = r21
+// general input registers:
+// r14 - r21
+rSignexp = r14
+rExp = r15
+rExpMask = r16
+rBigexp = r17
+rM1 = r18
+rFpsr = r19
+rRcs0 = r20
+rRcs0Mask = r21
-// predicate registers used:
-// p6-11
+// floating-point registers:
+// f8 - f11
-// floating-point registers used:
+fXInt = f9
+fNormX = f10
+fTmp = f11
-RINT_NORM_f8 = f9
-RINT_FFFF = f10
-RINT_INEXACT = f11
-RINT_FLOAT_INT_f8 = f12
-RINT_INT_f8 = f13
-RINT_SIGNED_FLOAT_INT_f8 = f14
+// predicate registers used:
+// p6 - p10
// Overview of operation
//==============================================================
-
// long double rintl(long double x)
-// Return an integer value (represented as a long double) that is x rounded to integer in current
-// rounding mode
-// Inexact is set if x != rintl(x)
-// *******************************************************************************
-
-// Set denormal flag for denormal input and
-// and take denormal fault if necessary.
-
-// Is the input an integer value already?
+// Return an integer value (represented as a long double) that is x
+// rounded to integer in current rounding mode
+// Inexact is set if x != rint(x)
+//==============================================================
// double_extended
-// if the exponent is >= 1003e => 3F(true) = 63(decimal)
+// if the exponent is > 1003e => 3F(true) = 63(decimal)
// we have a significand of 64 bits 1.63-bits.
// If we multiply by 2^63, we no longer have a fractional part
// So input is an integer value already.
@@ -102,151 +94,136 @@ RINT_SIGNED_FLOAT_INT_f8 = f14
// So input is an integer value already.
// single
-// if the exponent is >= 10016 => 17(true) = 23(decimal)
-// we have a significand of 53 bits 1.52-bits. (implicit 1)
-// If we multiply by 2^52, we no longer have a fractional part
+// if the exponent is > 10016 => 17(true) = 23(decimal)
+// we have a significand of 24 bits 1.23-bits. (implicit 1)
+// If we multiply by 2^23, we no longer have a fractional part
// So input is an integer value already.
-// If x is NAN, ZERO, or INFINITY, then return
-
-// qnan snan inf norm unorm 0 -+
-// 1 1 1 0 0 1 11 0xe7
-
-
-.align 32
-.global rintl#
-
.section .text
-.proc rintl#
-.align 32
-
-
-rintl:
-#ifdef _LIBC
-.global __rintl
-.type __rintl,@function
-__rintl:
-#endif
+GLOBAL_IEEE754_ENTRY(rintl)
{ .mfi
- mov rint_GR_fpsr = ar40 // Read the fpsr--need to check rc.s0
- fcvt.fx.s1 RINT_INT_f8 = f8
- addl rint_GR_10033 = 0x1003e, r0
+ getf.exp rSignexp = f8 // Get signexp, recompute if unorm
+ fclass.m p7,p0 = f8, 0x0b // Test x unorm
+ addl rBigexp = 0x1003e, r0 // Set exponent at which is integer
}
{ .mfi
- mov rint_GR_FFFF = -1
- fnorm.s1 RINT_NORM_f8 = f8
- mov rint_GR_17ones = 0x1FFFF
-;;
+ mov rM1 = -1 // Set all ones
+ fcvt.fx.s1 fXInt = f8 // Convert to int in significand
+ mov rExpMask = 0x1FFFF // Form exponent mask
}
+;;
{ .mfi
- setf.sig RINT_FFFF = rint_GR_FFFF
- fclass.m.unc p6,p0 = f8, 0xe7
- mov rint_GR_rcs0_mask = 0x0c00
-;;
+ mov rFpsr = ar40 // Read fpsr -- check rc.s0
+ fclass.m p6,p0 = f8, 0x1e3 // Test x natval, nan, inf
+ nop.i 0
}
-
{ .mfb
- nop.m 999
-(p6) fnorm f8 = f8
-(p6) br.ret.spnt b0 // Exit if x nan, inf, zero
-;;
+ setf.sig fTmp = rM1 // Make const for setting inexact
+ fnorm.s1 fNormX = f8 // Normalize input
+(p7) br.cond.spnt RINT_UNORM // Branch if x unorm
}
-
-{ .mfi
- nop.m 999
- fcvt.xf RINT_FLOAT_INT_f8 = RINT_INT_f8
- nop.i 999
;;
+
+
+RINT_COMMON:
+// Return here from RINT_UNORM
+{ .mfb
+ and rExp = rSignexp, rExpMask // Get biased exponent
+(p6) fma.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf
+(p6) br.ret.spnt b0 // Exit if x natval, nan, inf
}
+;;
{ .mfi
- getf.exp rint_GR_signexp = RINT_NORM_f8
- fcmp.eq.s0 p8,p0 = f8,f0 // Dummy op to set denormal
- nop.i 999
-;;
+ mov rRcs0Mask = 0x0c00 // Mask for rc.s0
+ fcvt.xf f8 = fXInt // Result assume |x| < 2^63
+ cmp.ge p7,p8 = rExp, rBigexp // Is |x| >= 2^63?
}
-
-
-{ .mii
- nop.m 999
- nop.i 999
- and rint_GR_exponent = rint_GR_signexp, rint_GR_17ones
;;
-}
-{ .mmi
- cmp.ge.unc p7,p6 = rint_GR_exponent, rint_GR_10033
- and rint_GR_rcs0 = rint_GR_rcs0_mask, rint_GR_fpsr
- nop.i 999
-;;
+// We must correct result if |x| >= 2^63
+{ .mfi
+ nop.m 0
+(p7) fma.s0 f8 = fNormX, f1, f0 // If |x| >= 2^63, result x
+ nop.i 0
}
-
-// Check to see if s0 rounding mode is round to nearest. If not then set s2
-// rounding mode to that of s0 and repeat conversions.
-// Must merge the original sign for cases where the result is zero or the input
-// is the largest that still has a fraction (0x1007dfffffffffff)
-L(RINT_COMMON):
-{ .mfb
- cmp.ne p11,p0 = rint_GR_rcs0, r0
-(p6) fmerge.s RINT_SIGNED_FLOAT_INT_f8 = f8, RINT_FLOAT_INT_f8
-(p11) br.cond.spnt L(RINT_NOT_ROUND_NEAREST) // Branch if not round to nearest
;;
-}
{ .mfi
- nop.m 999
-(p6) fcmp.eq.unc.s1 p0,p8 = RINT_FLOAT_INT_f8, RINT_NORM_f8
- nop.i 999
+ nop.m 0
+ fcmp.eq.unc.s1 p0, p9 = f8, fNormX // Is result = x ?
+ nop.i 0
}
{ .mfi
- nop.m 999
-(p7) fnorm.s0 f8 = f8
- nop.i 999
-;;
+ nop.m 0
+(p8) fmerge.s f8 = fNormX, f8 // Make sure sign rint(x) = sign x
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
-(p6) fnorm f8 = RINT_SIGNED_FLOAT_INT_f8
- nop.i 999
+(p8) and rRcs0 = rFpsr, rRcs0Mask // Get rounding mode for sf0
+ nop.f 0
+ nop.i 0
+}
;;
+
+// If |x| < 2^63 we must test for other rounding modes
+{ .mfi
+(p8) cmp.ne.unc p10,p0 = rRcs0, r0 // Test for other rounding modes
+(p9) fmpy.s0 fTmp = fTmp, fTmp // Dummy to set inexact
+ nop.i 0
+}
+{ .mbb
+ nop.m 0
+(p10) br.cond.spnt RINT_NOT_ROUND_NEAREST // Branch if not round nearest
+ br.ret.sptk b0 // Exit main path if round nearest
}
+;;
+
+
+RINT_UNORM:
+// Here if x unorm
{ .mfb
- nop.m 999
-(p8) fmpy.s0 RINT_INEXACT = RINT_FFFF,RINT_FFFF // Dummy to set inexact
- br.ret.sptk b0
-;;
+ getf.exp rSignexp = fNormX // Get signexp, recompute if unorm
+ fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
+ br.cond.sptk RINT_COMMON // Return to main path
}
+;;
-L(RINT_NOT_ROUND_NEAREST):
-// Set rounding mode of s2 to that of s0
+RINT_NOT_ROUND_NEAREST:
+// Here if not round to nearest, and |x| < 2^63
+// Set rounding mode of s2 to that of s0, and repeat the conversion using s2
{ .mfi
- mov rint_GR_rcs0 = r0 // Clear so we don't come back here
- fsetc.s2 0x7f, 0x40
- nop.i 999
-;;
+ nop.m 0
+ fsetc.s2 0x7f, 0x40
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
- fcvt.fx.s2 RINT_INT_f8 = f8
- nop.i 999
+ nop.m 0
+ fcvt.fx.s2 fXInt = fNormX // Convert to int in significand
+ nop.i 0
+}
;;
+
+{ .mfi
+ nop.m 0
+ fcvt.xf f8 = fXInt // Expected result
+ nop.i 0
}
+;;
+// Be sure sign of result = sign of input. Fixes cases where result is 0.
{ .mfb
- nop.m 999
- fcvt.xf RINT_FLOAT_INT_f8 = RINT_INT_f8
- br.cond.sptk L(RINT_COMMON)
-;;
+ nop.m 0
+ fmerge.s f8 = fNormX, f8
+ br.ret.sptk b0 // Exit main path
}
+;;
-
-.endp rintl
-ASM_SIZE_DIRECTIVE(rintl)
-#ifdef _LIBC
-ASM_SIZE_DIRECTIVE(__rintl)
-#endif
+GLOBAL_IEEE754_END(rintl)
diff --git a/sysdeps/ia64/fpu/s_round.S b/sysdeps/ia64/fpu/s_round.S
index b08ede1740..04033b4aa2 100644
--- a/sysdeps/ia64/fpu/s_round.S
+++ b/sysdeps/ia64/fpu/s_round.S
@@ -1,11 +1,10 @@
.file "round.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 10/25/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
-// Bob Norin, Tom Rowan, Shane Story, and Ping Tak Peter Tang of the
-// Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -21,229 +20,202 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 10/25/2000: Created
+// 10/25/00 Initial version
+// 06/14/01 Changed cmp to an equivalent form
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 01/20/03 Improved performance and reduced code size
+// 04/18/03 Eliminate possible WAW dependency warning
//==============================================================
-//
+
// API
//==============================================================
// double round(double x)
-//
+//==============================================================
-#include "libm_support.h"
+// general input registers:
+// r14 - r19
-// general input registers:
-//
-round_GR_half = r14
-round_GR_big = r15
-round_GR_expmask = r16
-round_GR_signexp = r17
-round_GR_exp = r18
-round_GR_expdiff = r19
-
-// predicate registers used:
-// p6 - p10
+rSignexp = r14
+rExp = r15
+rExpMask = r16
+rBigexp = r17
+rExpHalf = r18
+rExpMHalf = r19
+
+// floating-point registers:
+// f8 - f13
-// floating-point registers used:
+fXtruncInt = f9
+fNormX = f10
+fHalf = f11
+fMHalf = f12
+fRem = f13
-ROUND_NORM_f8 = f9
-ROUND_TRUNC_f8 = f10
-ROUND_RINT_f8 = f11
-ROUND_FLOAT_TRUNC_f8 = f12
-ROUND_FLOAT_RINT_f8 = f13
-ROUND_REMAINDER = f14
-ROUND_HALF = f15
+// predicate registers used:
+// p6 - p10
// Overview of operation
//==============================================================
-
// double round(double x)
-// Return an integer value (represented as a double) that is x
-// rounded to nearest integer, halfway cases rounded away from
-// zero.
+// Return an integer value (represented as a double) that is x
+// rounded to nearest integer, halfway cases rounded away from
+// zero.
// if x>0 result = trunc(x+0.5)
// if x<0 result = trunc(x-0.5)
-// *******************************************************************************
-
-// Set denormal flag for denormal input and
-// and take denormal fault if necessary.
+//
+//==============================================================
-// If x is NAN, ZERO, INFINITY, or >= 2^52 then return
+// double_extended
+// if the exponent is > 1003e => 3F(true) = 63(decimal)
+// we have a significand of 64 bits 1.63-bits.
+// If we multiply by 2^63, we no longer have a fractional part
+// So input is an integer value already.
-// qnan snan inf norm unorm 0 -+
-// 1 1 1 0 0 1 11 0xe7
+// double
+// if the exponent is >= 10033 => 34(true) = 52(decimal)
+// 34 + 3ff = 433
+// we have a significand of 53 bits 1.52-bits. (implicit 1)
+// If we multiply by 2^52, we no longer have a fractional part
+// So input is an integer value already.
+// single
+// if the exponent is > 10016 => 17(true) = 23(decimal)
+// we have a significand of 24 bits 1.23-bits. (implicit 1)
+// If we multiply by 2^23, we no longer have a fractional part
+// So input is an integer value already.
-.align 32
-.global round#
.section .text
-.proc round#
-.align 32
-
+GLOBAL_LIBM_ENTRY(round)
-round:
-
-// Get exponent for +0.5
-// Truncate x to integer
{ .mfi
- addl round_GR_half = 0x0fffe, r0
- fcvt.fx.trunc.s1 ROUND_TRUNC_f8 = f8
- nop.i 999
-}
-
-// Get signexp of x
-// Normalize input
-// Form exponent mask
-{ .mfi
- getf.exp round_GR_signexp = f8
- fnorm ROUND_NORM_f8 = f8
- addl round_GR_expmask = 0x1ffff, r0 ;;
+ getf.exp rSignexp = f8 // Get signexp, recompute if unorm
+ fcvt.fx.trunc.s1 fXtruncInt = f8 // Convert to int in significand
+ addl rBigexp = 0x10033, r0 // Set exponent at which is integer
}
-
-// Form +0.5
-// Round x to integer
{ .mfi
- setf.exp ROUND_HALF = round_GR_half
- fcvt.fx.s1 ROUND_RINT_f8 = f8
- nop.i 999 ;;
+ mov rExpHalf = 0x0FFFE // Form sign and exponent of 0.5
+ fnorm.s1 fNormX = f8 // Normalize input
+ mov rExpMask = 0x1FFFF // Form exponent mask
}
-// Get exp of x
-// Test for NAN, INF, ZERO
-// Get exponent at which input has no fractional part
-{ .mfi
- and round_GR_exp = round_GR_expmask, round_GR_signexp
- fclass.m p8,p9 = f8,0xe7
- addl round_GR_big = 0x10033, r0 ;;
-}
-
-// Get exp-bigexp
-// If exp is so big there is no fractional part, then turn on p8, off p9
-{ .mmi
- sub round_GR_expdiff = round_GR_exp, round_GR_big ;;
-#ifdef _LIBC
-(p9) cmp.lt.or.andcm p8,p9 = r0, round_GR_expdiff
-#else
-(p9) cmp.ge.or.andcm p8,p9 = round_GR_expdiff, r0
-#endif
- nop.i 999 ;;
-}
-
-// Set p6 if x<0, else set p7
-{ .mfi
- nop.m 999
-(p9) fcmp.lt.unc p6,p7 = f8,f0
- nop.i 999
+;;
+
+{ .mmf
+ setf.exp fHalf = rExpHalf // Form 0.5
+ mov rExpMHalf = 0x2FFFE // Form sign and exponent of -0.5
+ fclass.m p7,p0 = f8, 0x0b // Test x unorm
}
-
-// If NAN, INF, ZERO, or no fractional part, result is just normalized input
-{ .mfi
- nop.m 999
-(p8) fnorm.d.s0 f8 = f8
- nop.i 999 ;;
+;;
+
+{ .mfb
+ setf.exp fMHalf = rExpMHalf // Form -0.5
+ fclass.m p6,p0 = f8, 0x1e3 // Test x natval, nan, inf
+(p7) br.cond.spnt ROUND_UNORM // Branch if x unorm
}
+;;
-// Float the truncated integer
+ROUND_COMMON:
+// Return here from ROUND_UNORM
{ .mfi
- nop.m 999
-(p9) fcvt.xf ROUND_FLOAT_TRUNC_f8 = ROUND_TRUNC_f8
- nop.i 999 ;;
+ nop.m 0
+ fcmp.lt.s1 p8,p9 = f8, f0 // Test if x < 0
+ nop.i 0
+}
+{ .mfb
+ and rExp = rSignexp, rExpMask // Get biased exponent
+(p6) fma.d.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf
+(p6) br.ret.spnt b0 // Exit if x natval, nan, inf
}
+;;
-// Float the rounded integer to get preliminary result
{ .mfi
- nop.m 999
-(p9) fcvt.xf ROUND_FLOAT_RINT_f8 = ROUND_RINT_f8
- nop.i 999 ;;
-}
-
-// If x<0 and the difference of the truncated input minus the input is 0.5
-// then result = truncated input - 1.0
-// Else if x>0 and the difference of the input minus truncated input is 0.5
-// then result = truncated input + 1.0
-// Else
-// result = rounded input
-// Endif
-{ .mfi
- nop.m 999
-(p6) fsub.s1 ROUND_REMAINDER = ROUND_FLOAT_TRUNC_f8, ROUND_NORM_f8
- nop.i 999
+ cmp.lt p6,p0 = rExp, rExpHalf // Is |x| < 0.5?
+ fcvt.xf f8 = fXtruncInt // Pre-Result if 0.5 <= |x| < 2^52
+ cmp.ge p7,p0 = rExp, rBigexp // Is |x| >= 2^52?
}
-
{ .mfi
- nop.m 999
-(p7) fsub.s1 ROUND_REMAINDER = ROUND_NORM_f8, ROUND_FLOAT_TRUNC_f8
- nop.i 999 ;;
+ cmp.lt p10,p0 = rExp, rExpHalf // Is |x| < 0.5?
+ nop.f 0
+ nop.i 0
}
+;;
-// Assume preliminary result is rounded integer
+// We must correct result if |x| < 0.5, or |x| >= 2^52
+.pred.rel "mutex",p6,p7
{ .mfi
- nop.m 999
-(p9) fnorm.d.s0 f8 = ROUND_FLOAT_RINT_f8
- nop.i 999
+ nop.m 0
+(p6) fmerge.s f8 = fNormX, f0 // If |x| < 0.5, result sgn(x)*0
+ nop.i 0
}
-
-// If x<0, test if result=0
-{ .mfi
- nop.m 999
-(p6) fcmp.eq.unc p10,p0 = ROUND_FLOAT_RINT_f8,f0
- nop.i 999 ;;
+{ .mfb
+(p7) cmp.eq p10,p0 = r0, r0 // Also turn on p10 if |x| >= 2^52
+(p7) fma.d.s0 f8 = fNormX, f1, f0 // If |x| >= 2^52, result x
+(p10) br.ret.spnt b0 // Exit |x| < 0.5 or |x| >= 2^52
}
+;;
-// If x<0 and result=0, set result=-0
+// Here if 0.5 <= |x| < 2^52
{ .mfi
- nop.m 999
-(p10) fmerge.ns f8 = f1,f8
- nop.i 999
+ nop.m 0
+ fms.s1 fRem = fNormX, f1, f8 // Get remainder = x - trunc(x)
+ nop.i 0
}
-
-// If x<0, test if remainder=0.5
+;;
+
{ .mfi
- nop.m 999
-(p6) fcmp.eq.unc p6,p0 = ROUND_REMAINDER, ROUND_HALF
- nop.i 999 ;;
+ nop.m 0
+(p8) fcmp.le.s1 p8,p0 = fRem, fMHalf
+ nop.i 0
}
-
-// If x>0, test if remainder=0.5
{ .mfi
- nop.m 999
-(p7) fcmp.eq.unc p7,p0 = ROUND_REMAINDER, ROUND_HALF
- nop.i 999 ;;
+ nop.m 0
+(p9) fcmp.ge.s1 p9,p0 = fRem, fHalf
+ nop.i 0
}
+;;
-// If x<0 and remainder=0.5, result=truncated-1.0
-// If x>0 and remainder=0.5, result=truncated+1.0
-// Exit
-.pred.rel "mutex",p6,p7
+// If x < 0 and remainder <= -0.5, then subtract 1 from result
+// If x > 0 and remainder >= +0.5, then add 1 to result
+.pred.rel "mutex",p8,p9
{ .mfi
- nop.m 999
-(p6) fsub.d.s0 f8 = ROUND_FLOAT_TRUNC_f8,f1
- nop.i 999
+ nop.m 0
+(p8) fms.d.s0 f8 = f8, f1, f1
+ nop.i 0
}
-
{ .mfb
- nop.m 999
-(p7) fadd.d.s0 f8 = ROUND_FLOAT_TRUNC_f8,f1
- br.ret.sptk b0 ;;
+ nop.m 0
+(p9) fma.d.s0 f8 = f8, f1, f1
+ br.ret.sptk b0
+}
+;;
+
+
+ROUND_UNORM:
+// Here if x unorm
+{ .mfb
+ getf.exp rSignexp = fNormX // Get signexp, recompute if unorm
+ fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
+ br.cond.sptk ROUND_COMMON // Return to main path
}
+;;
-.endp round
-ASM_SIZE_DIRECTIVE(round)
+GLOBAL_LIBM_END(round)
diff --git a/sysdeps/ia64/fpu/s_roundf.S b/sysdeps/ia64/fpu/s_roundf.S
index 42ee60b218..1e8dc78777 100644
--- a/sysdeps/ia64/fpu/s_roundf.S
+++ b/sysdeps/ia64/fpu/s_roundf.S
@@ -1,11 +1,10 @@
.file "roundf.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 10/25/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
-// Bob Norin, Tom Rowan, Shane Story, and Ping Tak Peter Tang of the
-// Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -21,229 +20,202 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 10/25/2000: Created
+// 10/25/00 Initial version
+// 06/14/01 Changed cmp to an equivalent form
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 01/20/03 Improved performance and reduced code size
+// 04/18/03 Eliminate possible WAW dependency warning
//==============================================================
-//
+
// API
//==============================================================
// float roundf(float x)
-//
+//==============================================================
-#include "libm_support.h"
+// general input registers:
+// r14 - r19
-// general input registers:
-//
-roundf_GR_half = r14
-roundf_GR_big = r15
-roundf_GR_expmask = r16
-roundf_GR_signexp = r17
-roundf_GR_exp = r18
-roundf_GR_expdiff = r19
-
-// predicate registers used:
-// p6 - p10
+rSignexp = r14
+rExp = r15
+rExpMask = r16
+rBigexp = r17
+rExpHalf = r18
+rExpMHalf = r19
+
+// floating-point registers:
+// f8 - f13
-// floating-point registers used:
+fXtruncInt = f9
+fNormX = f10
+fHalf = f11
+fMHalf = f12
+fRem = f13
-ROUNDF_NORM_f8 = f9
-ROUNDF_TRUNC_f8 = f10
-ROUNDF_RINT_f8 = f11
-ROUNDF_FLOAT_TRUNC_f8 = f12
-ROUNDF_FLOAT_RINT_f8 = f13
-ROUNDF_REMAINDER = f14
-ROUNDF_HALF = f15
+// predicate registers used:
+// p6 - p10
// Overview of operation
//==============================================================
-
// float roundf(float x)
-// Return an integer value (represented as a float) that is x
-// rounded to nearest integer, halfway cases rounded away from
-// zero.
+// Return an integer value (represented as a float) that is x
+// rounded to nearest integer, halfway cases rounded away from
+// zero.
// if x>0 result = trunc(x+0.5)
// if x<0 result = trunc(x-0.5)
-// *******************************************************************************
-
-// Set denormal flag for denormal input and
-// and take denormal fault if necessary.
+//
+//==============================================================
-// If x is NAN, ZERO, INFINITY, or >= 2^23 then return
+// double_extended
+// if the exponent is > 1003e => 3F(true) = 63(decimal)
+// we have a significand of 64 bits 1.63-bits.
+// If we multiply by 2^63, we no longer have a fractional part
+// So input is an integer value already.
-// qnan snan inf norm unorm 0 -+
-// 1 1 1 0 0 1 11 0xe7
+// double
+// if the exponent is >= 10033 => 34(true) = 52(decimal)
+// 34 + 3ff = 433
+// we have a significand of 53 bits 1.52-bits. (implicit 1)
+// If we multiply by 2^52, we no longer have a fractional part
+// So input is an integer value already.
+// single
+// if the exponent is > 10016 => 17(true) = 23(decimal)
+// we have a significand of 24 bits 1.23-bits. (implicit 1)
+// If we multiply by 2^23, we no longer have a fractional part
+// So input is an integer value already.
-.align 32
-.global roundf#
.section .text
-.proc roundf#
-.align 32
-
+GLOBAL_LIBM_ENTRY(roundf)
-roundf:
-
-// Get exponent for +0.5
-// Truncate x to integer
{ .mfi
- addl roundf_GR_half = 0x0fffe, r0
- fcvt.fx.trunc.s1 ROUNDF_TRUNC_f8 = f8
- nop.i 999
-}
-
-// Get signexp of x
-// Normalize input
-// Form exponent mask
-{ .mfi
- getf.exp roundf_GR_signexp = f8
- fnorm ROUNDF_NORM_f8 = f8
- addl roundf_GR_expmask = 0x1ffff, r0 ;;
+ getf.exp rSignexp = f8 // Get signexp, recompute if unorm
+ fcvt.fx.trunc.s1 fXtruncInt = f8 // Convert to int in significand
+ addl rBigexp = 0x10016, r0 // Set exponent at which is integer
}
-
-// Form +0.5
-// Round x to integer
{ .mfi
- setf.exp ROUNDF_HALF = roundf_GR_half
- fcvt.fx.s1 ROUNDF_RINT_f8 = f8
- nop.i 999 ;;
+ mov rExpHalf = 0x0FFFE // Form sign and exponent of 0.5
+ fnorm.s1 fNormX = f8 // Normalize input
+ mov rExpMask = 0x1FFFF // Form exponent mask
}
-// Get exp of x
-// Test for NAN, INF, ZERO
-// Get exponent at which input has no fractional part
-{ .mfi
- and roundf_GR_exp = roundf_GR_expmask, roundf_GR_signexp
- fclass.m p8,p9 = f8,0xe7
- addl roundf_GR_big = 0x10016, r0 ;;
-}
-
-// Get exp-bigexp
-// If exp is so big there is no fractional part, then turn on p8, off p9
-{ .mmi
- sub roundf_GR_expdiff = roundf_GR_exp, roundf_GR_big ;;
-#ifdef _LIBC
-(p9) cmp.lt.or.andcm p8,p9 = r0, roundf_GR_expdiff
-#else
-(p9) cmp.ge.or.andcm p8,p9 = roundf_GR_expdiff, r0
-#endif
- nop.i 999 ;;
-}
-
-// Set p6 if x<0, else set p7
-{ .mfi
- nop.m 999
-(p9) fcmp.lt.unc p6,p7 = f8,f0
- nop.i 999
+;;
+
+{ .mmf
+ setf.exp fHalf = rExpHalf // Form 0.5
+ mov rExpMHalf = 0x2FFFE // Form sign and exponent of -0.5
+ fclass.m p7,p0 = f8, 0x0b // Test x unorm
}
-
-// If NAN, INF, ZERO, or no fractional part, result is just normalized input
-{ .mfi
- nop.m 999
-(p8) fnorm.s.s0 f8 = f8
- nop.i 999 ;;
+;;
+
+{ .mfb
+ setf.exp fMHalf = rExpMHalf // Form -0.5
+ fclass.m p6,p0 = f8, 0x1e3 // Test x natval, nan, inf
+(p7) br.cond.spnt ROUND_UNORM // Branch if x unorm
}
+;;
-// Float the truncated integer
+ROUND_COMMON:
+// Return here from ROUND_UNORM
{ .mfi
- nop.m 999
-(p9) fcvt.xf ROUNDF_FLOAT_TRUNC_f8 = ROUNDF_TRUNC_f8
- nop.i 999 ;;
+ nop.m 0
+ fcmp.lt.s1 p8,p9 = f8, f0 // Test if x < 0
+ nop.i 0
+}
+{ .mfb
+ and rExp = rSignexp, rExpMask // Get biased exponent
+(p6) fma.s.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf
+(p6) br.ret.spnt b0 // Exit if x natval, nan, inf
}
+;;
-// Float the rounded integer to get preliminary result
{ .mfi
- nop.m 999
-(p9) fcvt.xf ROUNDF_FLOAT_RINT_f8 = ROUNDF_RINT_f8
- nop.i 999 ;;
-}
-
-// If x<0 and the difference of the truncated input minus the input is 0.5
-// then result = truncated input - 1.0
-// Else if x>0 and the difference of the input minus truncated input is 0.5
-// then result = truncated input + 1.0
-// Else
-// result = rounded input
-// Endif
-{ .mfi
- nop.m 999
-(p6) fsub.s1 ROUNDF_REMAINDER = ROUNDF_FLOAT_TRUNC_f8, ROUNDF_NORM_f8
- nop.i 999
+ cmp.lt p6,p0 = rExp, rExpHalf // Is |x| < 0.5?
+ fcvt.xf f8 = fXtruncInt // Pre-Result if 0.5 <= |x| < 2^23
+ cmp.ge p7,p0 = rExp, rBigexp // Is |x| >= 2^23?
}
-
{ .mfi
- nop.m 999
-(p7) fsub.s1 ROUNDF_REMAINDER = ROUNDF_NORM_f8, ROUNDF_FLOAT_TRUNC_f8
- nop.i 999 ;;
+ cmp.lt p10,p0 = rExp, rExpHalf // Is |x| < 0.5?
+ nop.f 0
+ nop.i 0
}
+;;
-// Assume preliminary result is rounded integer
+// We must correct result if |x| < 0.5, or |x| >= 2^23
+.pred.rel "mutex",p6,p7
{ .mfi
- nop.m 999
-(p9) fnorm.s.s0 f8 = ROUNDF_FLOAT_RINT_f8
- nop.i 999
+ nop.m 0
+(p6) fmerge.s f8 = fNormX, f0 // If |x| < 0.5, result sgn(x)*0
+ nop.i 0
}
-
-// If x<0, test if result=0
-{ .mfi
- nop.m 999
-(p6) fcmp.eq.unc p10,p0 = ROUNDF_FLOAT_RINT_f8,f0
- nop.i 999 ;;
+{ .mfb
+(p7) cmp.eq p10,p0 = r0, r0 // Also turn on p10 if |x| >= 2^23
+(p7) fma.s.s0 f8 = fNormX, f1, f0 // If |x| >= 2^23, result x
+(p10) br.ret.spnt b0 // Exit |x| < 0.5 or |x| >= 2^23
}
+;;
-// If x<0 and result=0, set result=-0
+// Here if 0.5 <= |x| < 2^23
{ .mfi
- nop.m 999
-(p10) fmerge.ns f8 = f1,f8
- nop.i 999
+ nop.m 0
+ fms.s1 fRem = fNormX, f1, f8 // Get remainder = x - trunc(x)
+ nop.i 0
}
-
-// If x<0, test if remainder=0.5
+;;
+
{ .mfi
- nop.m 999
-(p6) fcmp.eq.unc p6,p0 = ROUNDF_REMAINDER, ROUNDF_HALF
- nop.i 999 ;;
+ nop.m 0
+(p8) fcmp.le.s1 p8,p0 = fRem, fMHalf
+ nop.i 0
}
-
-// If x>0, test if remainder=0.5
{ .mfi
- nop.m 999
-(p7) fcmp.eq.unc p7,p0 = ROUNDF_REMAINDER, ROUNDF_HALF
- nop.i 999 ;;
+ nop.m 0
+(p9) fcmp.ge.s1 p9,p0 = fRem, fHalf
+ nop.i 0
}
+;;
-// If x<0 and remainder=0.5, result=truncated-1.0
-// If x>0 and remainder=0.5, result=truncated+1.0
-// Exit
-.pred.rel "mutex",p6,p7
+// If x < 0 and remainder <= -0.5, then subtract 1 from result
+// If x > 0 and remainder >= +0.5, then add 1 to result
+.pred.rel "mutex",p8,p9
{ .mfi
- nop.m 999
-(p6) fsub.s.s0 f8 = ROUNDF_FLOAT_TRUNC_f8,f1
- nop.i 999
+ nop.m 0
+(p8) fms.s.s0 f8 = f8, f1, f1
+ nop.i 0
}
-
{ .mfb
- nop.m 999
-(p7) fadd.s.s0 f8 = ROUNDF_FLOAT_TRUNC_f8,f1
- br.ret.sptk b0 ;;
+ nop.m 0
+(p9) fma.s.s0 f8 = f8, f1, f1
+ br.ret.sptk b0
+}
+;;
+
+
+ROUND_UNORM:
+// Here if x unorm
+{ .mfb
+ getf.exp rSignexp = fNormX // Get signexp, recompute if unorm
+ fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
+ br.cond.sptk ROUND_COMMON // Return to main path
}
+;;
-.endp roundf
-ASM_SIZE_DIRECTIVE(roundf)
+GLOBAL_LIBM_END(roundf)
diff --git a/sysdeps/ia64/fpu/s_roundl.S b/sysdeps/ia64/fpu/s_roundl.S
index b30f590917..79dff00c06 100644
--- a/sysdeps/ia64/fpu/s_roundl.S
+++ b/sysdeps/ia64/fpu/s_roundl.S
@@ -1,11 +1,10 @@
.file "roundl.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 10/25/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
-// Bob Norin, Tom Rowan, Shane Story, and Ping Tak Peter Tang of the
-// Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -21,229 +20,202 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 10/25/2000: Created
+// 10/25/00 Initial version
+// 06/14/01 Changed cmp to an equivalent form
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 01/20/03 Improved performance and reduced code size
+// 04/18/03 Eliminate possible WAW dependency warning
//==============================================================
-//
+
// API
//==============================================================
// long double roundl(long double x)
-//
+//==============================================================
-#include "libm_support.h"
+// general input registers:
+// r14 - r19
-// general input registers:
-//
-roundl_GR_half = r14
-roundl_GR_big = r15
-roundl_GR_expmask = r16
-roundl_GR_signexp = r17
-roundl_GR_exp = r18
-roundl_GR_expdiff = r19
-
-// predicate registers used:
-// p6 - p10
+rSignexp = r14
+rExp = r15
+rExpMask = r16
+rBigexp = r17
+rExpHalf = r18
+rExpMHalf = r19
+
+// floating-point registers:
+// f8 - f13
-// floating-point registers used:
+fXtruncInt = f9
+fNormX = f10
+fHalf = f11
+fMHalf = f12
+fRem = f13
-ROUNDL_NORM_f8 = f9
-ROUNDL_TRUNC_f8 = f10
-ROUNDL_RINT_f8 = f11
-ROUNDL_FLOAT_TRUNC_f8 = f12
-ROUNDL_FLOAT_RINT_f8 = f13
-ROUNDL_REMAINDER = f14
-ROUNDL_HALF = f15
+// predicate registers used:
+// p6 - p10
// Overview of operation
//==============================================================
-
// long double roundl(long double x)
-// Return an integer value (represented as a long double) that is x
-// rounded to nearest integer, halfway cases rounded away from
-// zero.
+// Return an integer value (represented as a long double) that is x
+// rounded to nearest integer, halfway cases rounded away from
+// zero.
// if x>0 result = trunc(x+0.5)
// if x<0 result = trunc(x-0.5)
-// *******************************************************************************
-
-// Set denormal flag for denormal input and
-// and take denormal fault if necessary.
+//
+//==============================================================
-// If x is NAN, ZERO, INFINITY, or >= 2^63 then return
+// double_extended
+// if the exponent is > 1003e => 3F(true) = 63(decimal)
+// we have a significand of 64 bits 1.63-bits.
+// If we multiply by 2^63, we no longer have a fractional part
+// So input is an integer value already.
-// qnan snan inf norm unorm 0 -+
-// 1 1 1 0 0 1 11 0xe7
+// double
+// if the exponent is >= 10033 => 34(true) = 52(decimal)
+// 34 + 3ff = 433
+// we have a significand of 53 bits 1.52-bits. (implicit 1)
+// If we multiply by 2^52, we no longer have a fractional part
+// So input is an integer value already.
+// single
+// if the exponent is > 10016 => 17(true) = 23(decimal)
+// we have a significand of 24 bits 1.23-bits. (implicit 1)
+// If we multiply by 2^23, we no longer have a fractional part
+// So input is an integer value already.
-.align 32
-.global roundl#
.section .text
-.proc roundl#
-.align 32
-
+GLOBAL_LIBM_ENTRY(roundl)
-roundl:
-
-// Get exponent for +0.5
-// Truncate x to integer
{ .mfi
- addl roundl_GR_half = 0x0fffe, r0
- fcvt.fx.trunc.s1 ROUNDL_TRUNC_f8 = f8
- nop.i 999
-}
-
-// Get signexp of x
-// Normalize input
-// Form exponent mask
-{ .mfi
- getf.exp roundl_GR_signexp = f8
- fnorm ROUNDL_NORM_f8 = f8
- addl roundl_GR_expmask = 0x1ffff, r0 ;;
+ getf.exp rSignexp = f8 // Get signexp, recompute if unorm
+ fcvt.fx.trunc.s1 fXtruncInt = f8 // Convert to int in significand
+ addl rBigexp = 0x1003e, r0 // Set exponent at which is integer
}
-
-// Form +0.5
-// Round x to integer
{ .mfi
- setf.exp ROUNDL_HALF = roundl_GR_half
- fcvt.fx.s1 ROUNDL_RINT_f8 = f8
- nop.i 999 ;;
+ mov rExpHalf = 0x0FFFE // Form sign and exponent of 0.5
+ fnorm.s1 fNormX = f8 // Normalize input
+ mov rExpMask = 0x1FFFF // Form exponent mask
}
-// Get exp of x
-// Test for NAN, INF, ZERO
-// Get exponent at which input has no fractional part
-{ .mfi
- and roundl_GR_exp = roundl_GR_expmask, roundl_GR_signexp
- fclass.m p8,p9 = f8,0xe7
- addl roundl_GR_big = 0x1003e, r0 ;;
-}
-
-// Get exp-bigexp
-// If exp is so big there is no fractional part, then turn on p8, off p9
-{ .mmi
- sub roundl_GR_expdiff = roundl_GR_exp, roundl_GR_big ;;
-#ifdef _LIBC
-(p9) cmp.lt.or.andcm p8,p9 = r0, roundl_GR_expdiff
-#else
-(p9) cmp.ge.or.andcm p8,p9 = roundl_GR_expdiff, r0
-#endif
- nop.i 999 ;;
-}
-
-// Set p6 if x<0, else set p7
-{ .mfi
- nop.m 999
-(p9) fcmp.lt.unc p6,p7 = f8,f0
- nop.i 999
+;;
+
+{ .mmf
+ setf.exp fHalf = rExpHalf // Form 0.5
+ mov rExpMHalf = 0x2FFFE // Form sign and exponent of -0.5
+ fclass.m p7,p0 = f8, 0x0b // Test x unorm
}
-
-// If NAN, INF, ZERO, or no fractional part, result is just normalized input
-{ .mfi
- nop.m 999
-(p8) fnorm.s0 f8 = f8
- nop.i 999 ;;
+;;
+
+{ .mfb
+ setf.exp fMHalf = rExpMHalf // Form -0.5
+ fclass.m p6,p0 = f8, 0x1e3 // Test x natval, nan, inf
+(p7) br.cond.spnt ROUND_UNORM // Branch if x unorm
}
+;;
-// Float the truncated integer
+ROUND_COMMON:
+// Return here from ROUND_UNORM
{ .mfi
- nop.m 999
-(p9) fcvt.xf ROUNDL_FLOAT_TRUNC_f8 = ROUNDL_TRUNC_f8
- nop.i 999 ;;
+ nop.m 0
+ fcmp.lt.s1 p8,p9 = f8, f0 // Test if x < 0
+ nop.i 0
+}
+{ .mfb
+ and rExp = rSignexp, rExpMask // Get biased exponent
+(p6) fma.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf
+(p6) br.ret.spnt b0 // Exit if x natval, nan, inf
}
+;;
-// Float the rounded integer to get preliminary result
{ .mfi
- nop.m 999
-(p9) fcvt.xf ROUNDL_FLOAT_RINT_f8 = ROUNDL_RINT_f8
- nop.i 999 ;;
-}
-
-// If x<0 and the difference of the truncated input minus the input is 0.5
-// then result = truncated input - 1.0
-// Else if x>0 and the difference of the input minus truncated input is 0.5
-// then result = truncated input + 1.0
-// Else
-// result = rounded input
-// Endif
-{ .mfi
- nop.m 999
-(p6) fsub.s1 ROUNDL_REMAINDER = ROUNDL_FLOAT_TRUNC_f8, ROUNDL_NORM_f8
- nop.i 999
+ cmp.lt p6,p0 = rExp, rExpHalf // Is |x| < 0.5?
+ fcvt.xf f8 = fXtruncInt // Pre-Result if 0.5 <= |x| < 2^63
+ cmp.ge p7,p0 = rExp, rBigexp // Is |x| >= 2^63?
}
-
{ .mfi
- nop.m 999
-(p7) fsub.s1 ROUNDL_REMAINDER = ROUNDL_NORM_f8, ROUNDL_FLOAT_TRUNC_f8
- nop.i 999 ;;
+ cmp.lt p10,p0 = rExp, rExpHalf // Is |x| < 0.5?
+ nop.f 0
+ nop.i 0
}
+;;
-// Assume preliminary result is rounded integer
+// We must correct result if |x| < 0.5, or |x| >= 2^63
+.pred.rel "mutex",p6,p7
{ .mfi
- nop.m 999
-(p9) fnorm.s0 f8 = ROUNDL_FLOAT_RINT_f8
- nop.i 999
+ nop.m 0
+(p6) fmerge.s f8 = fNormX, f0 // If |x| < 0.5, result sgn(x)*0
+ nop.i 0
}
-
-// If x<0, test if result=0
-{ .mfi
- nop.m 999
-(p6) fcmp.eq.unc p10,p0 = ROUNDL_FLOAT_RINT_f8,f0
- nop.i 999 ;;
+{ .mfb
+(p7) cmp.eq p10,p0 = r0, r0 // Also turn on p10 if |x| >= 2^63
+(p7) fma.s0 f8 = fNormX, f1, f0 // If |x| >= 2^63, result x
+(p10) br.ret.spnt b0 // Exit |x| < 0.5 or |x| >= 2^63
}
+;;
-// If x<0 and result=0, set result=-0
+// Here if 0.5 <= |x| < 2^63
{ .mfi
- nop.m 999
-(p10) fmerge.ns f8 = f1,f8
- nop.i 999
+ nop.m 0
+ fms.s1 fRem = fNormX, f1, f8 // Get remainder = x - trunc(x)
+ nop.i 0
}
-
-// If x<0, test if remainder=0.5
+;;
+
{ .mfi
- nop.m 999
-(p6) fcmp.eq.unc p6,p0 = ROUNDL_REMAINDER, ROUNDL_HALF
- nop.i 999 ;;
+ nop.m 0
+(p8) fcmp.le.s1 p8,p0 = fRem, fMHalf
+ nop.i 0
}
-
-// If x>0, test if remainder=0.5
{ .mfi
- nop.m 999
-(p7) fcmp.eq.unc p7,p0 = ROUNDL_REMAINDER, ROUNDL_HALF
- nop.i 999 ;;
+ nop.m 0
+(p9) fcmp.ge.s1 p9,p0 = fRem, fHalf
+ nop.i 0
}
+;;
-// If x<0 and remainder=0.5, result=truncated-1.0
-// If x>0 and remainder=0.5, result=truncated+1.0
-// Exit
-.pred.rel "mutex",p6,p7
+// If x < 0 and remainder <= -0.5, then subtract 1 from result
+// If x > 0 and remainder >= +0.5, then add 1 to result
+.pred.rel "mutex",p8,p9
{ .mfi
- nop.m 999
-(p6) fsub.s0 f8 = ROUNDL_FLOAT_TRUNC_f8,f1
- nop.i 999
+ nop.m 0
+(p8) fms.s0 f8 = f8, f1, f1
+ nop.i 0
}
-
{ .mfb
- nop.m 999
-(p7) fadd.s0 f8 = ROUNDL_FLOAT_TRUNC_f8,f1
- br.ret.sptk b0 ;;
+ nop.m 0
+(p9) fma.s0 f8 = f8, f1, f1
+ br.ret.sptk b0
+}
+;;
+
+
+ROUND_UNORM:
+// Here if x unorm
+{ .mfb
+ getf.exp rSignexp = fNormX // Get signexp, recompute if unorm
+ fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
+ br.cond.sptk ROUND_COMMON // Return to main path
}
+;;
-.endp roundl
-ASM_SIZE_DIRECTIVE(roundl)
+GLOBAL_LIBM_END(roundl)
diff --git a/sysdeps/ia64/fpu/s_scalbn.S b/sysdeps/ia64/fpu/s_scalbn.S
deleted file mode 100644
index 50d14b4e30..0000000000
--- a/sysdeps/ia64/fpu/s_scalbn.S
+++ /dev/null
@@ -1,379 +0,0 @@
-.file "scalbn.s"
-
-// Copyright (C) 2000, 2001, Intel Corporation
-// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// * Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// * The name of Intel Corporation may not be used to endorse or promote
-// products derived from this software without specific prior written
-// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
-//
-// History
-//==============================================================
-// 2/02/00 Initial version
-// 1/26/01 Scalbn completely reworked and now standalone version
-//
-// API
-//==============================================================
-// double = scalbn (double x, int n)
-// input floating point f8 and int n (r33)
-// output floating point f8
-//
-// Returns x* 2**n using an fma and detects overflow
-// and underflow.
-//
-//
-
-#include "libm_support.h"
-
-FR_Big = f6
-FR_NBig = f7
-FR_Floating_X = f8
-FR_Result = f8
-FR_Result2 = f9
-FR_Result3 = f11
-FR_Norm_X = f12
-FR_Two_N = f14
-FR_Two_to_Big = f15
-
-GR_N_Biased = r15
-GR_Big = r16
-GR_NBig = r17
-GR_Scratch = r18
-GR_Scratch1 = r19
-GR_Bias = r20
-GR_N_as_int = r21
-
-GR_SAVE_B0 = r32
-GR_SAVE_GP = r33
-GR_SAVE_PFS = r34
-GR_Parameter_X = r35
-GR_Parameter_Y = r36
-GR_Parameter_RESULT = r37
-GR_Tag = r38
-
-.align 32
-.global scalbn
-
-.section .text
-.proc scalbn
-.align 32
-
-scalbn:
-
-//
-// Is x NAN, INF, ZERO, +-?
-// Build the exponent Bias
-//
-{ .mfi
- alloc r32=ar.pfs,1,2,4,0
- fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
- addl GR_Bias = 0x0FFFF,r0
-}
-
-//
-// Sign extend input
-// Is N zero?
-// Normalize x
-//
-{ .mfi
- cmp.eq.unc p6,p0 = r33,r0
- fnorm.s1 FR_Norm_X = FR_Floating_X
- sxt4 GR_N_as_int = r33
-}
-;;
-
-//
-// Normalize x
-// Branch and return special values.
-// Create -35000
-// Create 35000
-//
-{ .mfi
- addl GR_Big = 35000,r0
- nop.f 0
- add GR_N_Biased = GR_Bias,GR_N_as_int
-}
-{ .mfb
- addl GR_NBig = -35000,r0
-(p7) fma.d.s0 FR_Result = FR_Floating_X,f1, f0
-(p7) br.ret.spnt b0
-};;
-
-//
-// Build the exponent Bias
-// Return x when N = 0
-//
-{ .mfi
- setf.exp FR_Two_N = GR_N_Biased
- nop.f 0
- addl GR_Scratch1 = 0x063BF,r0
-}
-{ .mfb
- addl GR_Scratch = 0x019C3F,r0
-(p6) fma.d.s0 FR_Result = FR_Floating_X,f1, f0
-(p6) br.ret.spnt b0
-};;
-
-//
-// Create 2*big
-// Create 2**-big
-// Is N > 35000
-// Is N < -35000
-// Raise Denormal operand flag with compare
-// Main path, create 2**N
-//
-{ .mfi
- setf.exp FR_NBig = GR_Scratch1
- nop.f 0
- cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
-}
-{ .mfi
- setf.exp FR_Big = GR_Scratch
- fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
- cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
-};;
-
-//
-// Adjust 2**N if N was very small or very large
-//
-{ .mfi
- nop.m 0
-(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
- nop.i 0
-}
-{ .mlx
- nop.m 999
-(p0) movl GR_Scratch = 0x00000000000303FF
-};;
-
-
-{ .mfi
- nop.m 0
-(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
- nop.i 0
-}
-{ .mlx
- nop.m 999
-(p0) movl GR_Scratch1= 0x00000000000103FF
-};;
-
-// Set up necessary status fields
-//
-// S0 user supplied status
-// S2 user supplied status + WRE + TD (Overflows)
-// S3 user supplied status + FZ + TD (Underflows)
-//
-{ .mfi
- nop.m 999
-(p0) fsetc.s3 0x7F,0x41
- nop.i 999
-}
-{ .mfi
- nop.m 999
-(p0) fsetc.s2 0x7F,0x42
- nop.i 999
-};;
-
-//
-// Do final operation
-//
-{ .mfi
- setf.exp FR_NBig = GR_Scratch
- fma.d.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.d.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
-{ .mfi
- setf.exp FR_Big = GR_Scratch1
- fma.d.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
-
-// Check for overflow or underflow.
-// Restore s3
-// Restore s2
-//
-{ .mfi
- nop.m 0
- fsetc.s3 0x7F,0x40
- nop.i 999
-}
-{ .mfi
- nop.m 0
- fsetc.s2 0x7F,0x40
- nop.i 999
-};;
-
-//
-// Is the result zero?
-//
-{ .mfi
- nop.m 999
- fclass.m.unc p6, p0 = FR_Result3, 0x007
- nop.i 999
-}
-{ .mfi
- addl GR_Tag = 176, r0
- fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
- nop.i 0
-};;
-
-//
-// Detect masked underflow - Tiny + Inexact Only
-//
-{ .mfi
- nop.m 999
-(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
- nop.i 999
-};;
-
-//
-// Is result bigger the allowed range?
-// Branch out for underflow
-//
-{ .mfb
-(p6) addl GR_Tag = 177, r0
-(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
-(p6) br.cond.spnt L(SCALBN_UNDERFLOW)
-};;
-
-//
-// Branch out for overflow
-//
-{ .mbb
- nop.m 0
-(p7) br.cond.spnt L(SCALBN_OVERFLOW)
-(p9) br.cond.spnt L(SCALBN_OVERFLOW)
-};;
-
-//
-// Return from main path.
-//
-{ .mfb
- nop.m 999
- nop.f 0
- br.ret.sptk b0;;
-}
-
-.endp scalbn
-ASM_SIZE_DIRECTIVE(scalbn)
-.proc __libm_error_region
-__libm_error_region:
-
-L(SCALBN_OVERFLOW):
-L(SCALBN_UNDERFLOW):
-
-//
-// Get stack address of N
-//
-.prologue
-{ .mfi
- add GR_Parameter_Y=-32,sp
- nop.f 0
-.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs
-}
-//
-// Adjust sp
-//
-{ .mfi
-.fframe 64
- add sp=-64,sp
- nop.f 0
- mov GR_SAVE_GP=gp
-};;
-
-//
-// Store N on stack in correct position
-// Locate the address of x on stack
-//
-{ .mmi
- st8 [GR_Parameter_Y] = GR_N_as_int,16
- add GR_Parameter_X = 16,sp
-.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0
-};;
-
-//
-// Store x on the stack.
-// Get address for result on stack.
-//
-.body
-{ .mib
- stfd [GR_Parameter_X] = FR_Norm_X
- add GR_Parameter_RESULT = 0,GR_Parameter_Y
- nop.b 0
-}
-{ .mib
- stfd [GR_Parameter_Y] = FR_Result
- add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support#
-};;
-
-//
-// Get location of result on stack
-//
-{ .mmi
- nop.m 0
- nop.m 0
- add GR_Parameter_RESULT = 48,sp
-};;
-
-//
-// Get the new result
-//
-{ .mmi
- ldfd FR_Result = [GR_Parameter_RESULT]
-.restore sp
- add sp = 64,sp
- mov b0 = GR_SAVE_B0
-};;
-
-//
-// Restore gp, ar.pfs and return
-//
-{ .mib
- mov gp = GR_SAVE_GP
- mov ar.pfs = GR_SAVE_PFS
- br.ret.sptk b0
-};;
-
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(scalbn)
-
-.type __libm_error_support#,@function
-.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_scalbnf.S b/sysdeps/ia64/fpu/s_scalbnf.S
deleted file mode 100644
index ff7d1ca637..0000000000
--- a/sysdeps/ia64/fpu/s_scalbnf.S
+++ /dev/null
@@ -1,379 +0,0 @@
-//.file "scalbnf.s"
-
-// Copyright (C) 2000, 2001, Intel Corporation
-// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// * Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// * The name of Intel Corporation may not be used to endorse or promote
-// products derived from this software without specific prior written
-// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
-//
-// History
-//==============================================================
-// 2/02/00 Initial version
-// 1/26/01 scalbnf completely reworked and now standalone version
-//
-// API
-//==============================================================
-// float = scalbnf (float x, int n)
-// input floating point f8 and int n (r33)
-// output floating point f8
-//
-// Returns x* 2**n using an fma and detects overflow
-// and underflow.
-//
-//
-
-#include "libm_support.h"
-
-FR_Big = f6
-FR_NBig = f7
-FR_Floating_X = f8
-FR_Result = f8
-FR_Result2 = f9
-FR_Result3 = f11
-FR_Norm_X = f12
-FR_Two_N = f14
-FR_Two_to_Big = f15
-
-GR_N_Biased = r15
-GR_Big = r16
-GR_NBig = r17
-GR_Scratch = r18
-GR_Scratch1 = r19
-GR_Bias = r20
-GR_N_as_int = r21
-
-GR_SAVE_B0 = r32
-GR_SAVE_GP = r33
-GR_SAVE_PFS = r34
-GR_Parameter_X = r35
-GR_Parameter_Y = r36
-GR_Parameter_RESULT = r37
-GR_Tag = r38
-
-.align 32
-.global scalbnf
-
-.section .text
-.proc scalbnf
-.align 32
-
-scalbnf:
-
-//
-// Is x NAN, INF, ZERO, +-?
-// Build the exponent Bias
-//
-{ .mfi
- alloc r32=ar.pfs,1,2,4,0
- fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
- addl GR_Bias = 0x0FFFF,r0
-}
-
-//
-// Sign extend input
-// Is N zero?
-// Normalize x
-//
-{ .mfi
- cmp.eq.unc p6,p0 = r33,r0
- fnorm.s1 FR_Norm_X = FR_Floating_X
- sxt4 GR_N_as_int = r33
-}
-;;
-
-//
-// Normalize x
-// Branch and return special values.
-// Create -35000
-// Create 35000
-//
-{ .mfi
- addl GR_Big = 35000,r0
- nop.f 0
- add GR_N_Biased = GR_Bias,GR_N_as_int
-}
-{ .mfb
- addl GR_NBig = -35000,r0
-(p7) fma.s.s0 FR_Result = FR_Floating_X,f1, f0
-(p7) br.ret.spnt b0
-};;
-
-//
-// Build the exponent Bias
-// Return x when N = 0
-//
-{ .mfi
- setf.exp FR_Two_N = GR_N_Biased
- nop.f 0
- addl GR_Scratch1 = 0x063BF,r0
-}
-{ .mfb
- addl GR_Scratch = 0x019C3F,r0
-(p6) fma.s.s0 FR_Result = FR_Floating_X,f1, f0
-(p6) br.ret.spnt b0
-};;
-
-//
-// Create 2*big
-// Create 2**-big
-// Is N > 35000
-// Is N < -35000
-// Raise Denormal operand flag with compare
-// Main path, create 2**N
-//
-{ .mfi
- setf.exp FR_NBig = GR_Scratch1
- nop.f 0
- cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
-}
-{ .mfi
- setf.exp FR_Big = GR_Scratch
- fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
- cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
-};;
-
-//
-// Adjust 2**N if N was very small or very large
-//
-{ .mfi
- nop.m 0
-(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
- nop.i 0
-}
-{ .mlx
- nop.m 999
-(p0) movl GR_Scratch = 0x000000000003007F
-};;
-
-
-{ .mfi
- nop.m 0
-(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
- nop.i 0
-}
-{ .mlx
- nop.m 999
-(p0) movl GR_Scratch1= 0x000000000001007F
-};;
-
-// Set up necessary status fields
-//
-// S0 user supplied status
-// S2 user supplied status + WRE + TD (Overflows)
-// S3 user supplied status + FZ + TD (Underflows)
-//
-{ .mfi
- nop.m 999
-(p0) fsetc.s3 0x7F,0x41
- nop.i 999
-}
-{ .mfi
- nop.m 999
-(p0) fsetc.s2 0x7F,0x42
- nop.i 999
-};;
-
-//
-// Do final operation
-//
-{ .mfi
- setf.exp FR_NBig = GR_Scratch
- fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
-{ .mfi
- setf.exp FR_Big = GR_Scratch1
- fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
-
-// Check for overflow or underflow.
-// Restore s3
-// Restore s2
-//
-{ .mfi
- nop.m 0
- fsetc.s3 0x7F,0x40
- nop.i 999
-}
-{ .mfi
- nop.m 0
- fsetc.s2 0x7F,0x40
- nop.i 999
-};;
-
-//
-// Is the result zero?
-//
-{ .mfi
- nop.m 999
- fclass.m.unc p6, p0 = FR_Result3, 0x007
- nop.i 999
-}
-{ .mfi
- addl GR_Tag = 178, r0
- fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
- nop.i 0
-};;
-
-//
-// Detect masked underflow - Tiny + Inexact Only
-//
-{ .mfi
- nop.m 999
-(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
- nop.i 999
-};;
-
-//
-// Is result bigger the allowed range?
-// Branch out for underflow
-//
-{ .mfb
-(p6) addl GR_Tag = 179, r0
-(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
-(p6) br.cond.spnt L(scalbnf_UNDERFLOW)
-};;
-
-//
-// Branch out for overflow
-//
-{ .mbb
- nop.m 0
-(p7) br.cond.spnt L(scalbnf_OVERFLOW)
-(p9) br.cond.spnt L(scalbnf_OVERFLOW)
-};;
-
-//
-// Return from main path.
-//
-{ .mfb
- nop.m 999
- nop.f 0
- br.ret.sptk b0;;
-}
-
-.endp scalbnf
-ASM_SIZE_DIRECTIVE(scalbnf)
-.proc __libm_error_region
-__libm_error_region:
-
-L(scalbnf_OVERFLOW):
-L(scalbnf_UNDERFLOW):
-
-//
-// Get stack address of N
-//
-.prologue
-{ .mfi
- add GR_Parameter_Y=-32,sp
- nop.f 0
-.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs
-}
-//
-// Adjust sp
-//
-{ .mfi
-.fframe 64
- add sp=-64,sp
- nop.f 0
- mov GR_SAVE_GP=gp
-};;
-
-//
-// Store N on stack in correct position
-// Locate the address of x on stack
-//
-{ .mmi
- st8 [GR_Parameter_Y] = GR_N_as_int,16
- add GR_Parameter_X = 16,sp
-.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0
-};;
-
-//
-// Store x on the stack.
-// Get address for result on stack.
-//
-.body
-{ .mib
- stfs [GR_Parameter_X] = FR_Norm_X
- add GR_Parameter_RESULT = 0,GR_Parameter_Y
- nop.b 0
-}
-{ .mib
- stfs [GR_Parameter_Y] = FR_Result
- add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support#
-};;
-
-//
-// Get location of result on stack
-//
-{ .mmi
- nop.m 0
- nop.m 0
- add GR_Parameter_RESULT = 48,sp
-};;
-
-//
-// Get the new result
-//
-{ .mmi
- ldfs FR_Result = [GR_Parameter_RESULT]
-.restore sp
- add sp = 64,sp
- mov b0 = GR_SAVE_B0
-};;
-
-//
-// Restore gp, ar.pfs and return
-//
-{ .mib
- mov gp = GR_SAVE_GP
- mov ar.pfs = GR_SAVE_PFS
- br.ret.sptk b0
-};;
-
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
-
-.type __libm_error_support#,@function
-.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_scalbnl.S b/sysdeps/ia64/fpu/s_scalbnl.S
deleted file mode 100644
index 9e54a2ec0a..0000000000
--- a/sysdeps/ia64/fpu/s_scalbnl.S
+++ /dev/null
@@ -1,379 +0,0 @@
-//.file "scalbnl.s"
-
-// Copyright (C) 2000, 2001, Intel Corporation
-// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// * Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// * The name of Intel Corporation may not be used to endorse or promote
-// products derived from this software without specific prior written
-// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
-//
-// History
-//==============================================================
-// 2/02/00 Initial version
-// 1/26/01 scalbnl completely reworked and now standalone version
-//
-// API
-//==============================================================
-// double-extended = scalbnl (double-extended x, int n)
-// input floating point f8 and int n (r34)
-// output floating point f8
-//
-// Returns x* 2**n using an fma and detects overflow
-// and underflow.
-//
-//
-
-#include "libm_support.h"
-
-FR_Big = f6
-FR_NBig = f7
-FR_Floating_X = f8
-FR_Result = f8
-FR_Result2 = f9
-FR_Result3 = f11
-FR_Norm_X = f12
-FR_Two_N = f14
-FR_Two_to_Big = f15
-
-GR_N_Biased = r15
-GR_Big = r16
-GR_NBig = r17
-GR_Scratch = r18
-GR_Scratch1 = r19
-GR_Bias = r20
-GR_N_as_int = r21
-
-GR_SAVE_B0 = r32
-GR_SAVE_GP = r33
-GR_SAVE_PFS = r34
-GR_Parameter_X = r35
-GR_Parameter_Y = r36
-GR_Parameter_RESULT = r37
-GR_Tag = r38
-
-.align 32
-.global scalbnl
-
-.section .text
-.proc scalbnl
-.align 32
-
-scalbnl:
-
-//
-// Is x NAN, INF, ZERO, +-?
-// Build the exponent Bias
-//
-{ .mfi
- alloc r32=ar.pfs,2,1,4,0
- fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
- addl GR_Bias = 0x0FFFF,r0
-}
-
-//
-// Sign extend input
-// Is N zero?
-// Normalize x
-//
-{ .mfi
- cmp.eq.unc p6,p0 = r34,r0
- fnorm.s1 FR_Norm_X = FR_Floating_X
- sxt4 GR_N_as_int = r34
-}
-;;
-
-//
-// Normalize x
-// Branch and return special values.
-// Create -35000
-// Create 35000
-//
-{ .mfi
- addl GR_Big = 35000,r0
- nop.f 0
- add GR_N_Biased = GR_Bias,GR_N_as_int
-}
-{ .mfb
- addl GR_NBig = -35000,r0
-(p7) fma.s0 FR_Result = FR_Floating_X,f1, f0
-(p7) br.ret.spnt b0
-};;
-
-//
-// Build the exponent Bias
-// Return x when N = 0
-//
-{ .mfi
- setf.exp FR_Two_N = GR_N_Biased
- nop.f 0
- addl GR_Scratch1 = 0x063BF,r0
-}
-{ .mfb
- addl GR_Scratch = 0x019C3F,r0
-(p6) fma.s0 FR_Result = FR_Floating_X,f1, f0
-(p6) br.ret.spnt b0
-};;
-
-//
-// Create 2*big
-// Create 2**-big
-// Is N > 35000
-// Is N < -35000
-// Raise Denormal operand flag with compare
-// Main path, create 2**N
-//
-{ .mfi
- setf.exp FR_NBig = GR_Scratch1
- nop.f 0
- cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
-}
-{ .mfi
- setf.exp FR_Big = GR_Scratch
- fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
- cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
-};;
-
-//
-// Adjust 2**N if N was very small or very large
-//
-{ .mfi
- nop.m 0
-(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
- nop.i 0
-}
-{ .mlx
- nop.m 999
-(p0) movl GR_Scratch = 0x0000000000033FFF
-};;
-
-
-{ .mfi
- nop.m 0
-(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
- nop.i 0
-}
-{ .mlx
- nop.m 999
-(p0) movl GR_Scratch1= 0x0000000000013FFF
-};;
-
-// Set up necessary status fields
-//
-// S0 user supplied status
-// S2 user supplied status + WRE + TD (Overflows)
-// S3 user supplied status + FZ + TD (Underflows)
-//
-{ .mfi
- nop.m 999
-(p0) fsetc.s3 0x7F,0x41
- nop.i 999
-}
-{ .mfi
- nop.m 999
-(p0) fsetc.s2 0x7F,0x42
- nop.i 999
-};;
-
-//
-// Do final operation
-//
-{ .mfi
- setf.exp FR_NBig = GR_Scratch
- fma.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
-{ .mfi
- setf.exp FR_Big = GR_Scratch1
- fma.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
-
-// Check for overflow or underflow.
-// Restore s3
-// Restore s2
-//
-{ .mfi
- nop.m 0
- fsetc.s3 0x7F,0x40
- nop.i 999
-}
-{ .mfi
- nop.m 0
- fsetc.s2 0x7F,0x40
- nop.i 999
-};;
-
-//
-// Is the result zero?
-//
-{ .mfi
- nop.m 999
- fclass.m.unc p6, p0 = FR_Result3, 0x007
- nop.i 999
-}
-{ .mfi
- addl GR_Tag = 174, r0
- fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
- nop.i 0
-};;
-
-//
-// Detect masked underflow - Tiny + Inexact Only
-//
-{ .mfi
- nop.m 999
-(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
- nop.i 999
-};;
-
-//
-// Is result bigger the allowed range?
-// Branch out for underflow
-//
-{ .mfb
-(p6) addl GR_Tag = 175, r0
-(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
-(p6) br.cond.spnt L(scalbnl_UNDERFLOW)
-};;
-
-//
-// Branch out for overflow
-//
-{ .mbb
- nop.m 0
-(p7) br.cond.spnt L(scalbnl_OVERFLOW)
-(p9) br.cond.spnt L(scalbnl_OVERFLOW)
-};;
-
-//
-// Return from main path.
-//
-{ .mfb
- nop.m 999
- nop.f 0
- br.ret.sptk b0;;
-}
-
-.endp scalbnl
-ASM_SIZE_DIRECTIVE(scalbnl)
-.proc __libm_error_region
-__libm_error_region:
-
-L(scalbnl_OVERFLOW):
-L(scalbnl_UNDERFLOW):
-
-//
-// Get stack address of N
-//
-.prologue
-{ .mfi
- add GR_Parameter_Y=-32,sp
- nop.f 0
-.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs
-}
-//
-// Adjust sp
-//
-{ .mfi
-.fframe 64
- add sp=-64,sp
- nop.f 0
- mov GR_SAVE_GP=gp
-};;
-
-//
-// Store N on stack in correct position
-// Locate the address of x on stack
-//
-{ .mmi
- st8 [GR_Parameter_Y] = GR_N_as_int,16
- add GR_Parameter_X = 16,sp
-.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0
-};;
-
-//
-// Store x on the stack.
-// Get address for result on stack.
-//
-.body
-{ .mib
- stfe [GR_Parameter_X] = FR_Norm_X
- add GR_Parameter_RESULT = 0,GR_Parameter_Y
- nop.b 0
-}
-{ .mib
- stfe [GR_Parameter_Y] = FR_Result
- add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support#
-};;
-
-//
-// Get location of result on stack
-//
-{ .mmi
- nop.m 0
- nop.m 0
- add GR_Parameter_RESULT = 48,sp
-};;
-
-//
-// Get the new result
-//
-{ .mmi
- ldfe FR_Result = [GR_Parameter_RESULT]
-.restore sp
- add sp = 64,sp
- mov b0 = GR_SAVE_B0
-};;
-
-//
-// Restore gp, ar.pfs and return
-//
-{ .mib
- mov gp = GR_SAVE_GP
- mov ar.pfs = GR_SAVE_PFS
- br.ret.sptk b0
-};;
-
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
-
-.type __libm_error_support#,@function
-.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_significand.S b/sysdeps/ia64/fpu/s_significand.S
index 84141daf4d..720e043e5c 100644
--- a/sysdeps/ia64/fpu/s_significand.S
+++ b/sysdeps/ia64/fpu/s_significand.S
@@ -1,10 +1,10 @@
.file "significand.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,13 +35,15 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00: Initial version
-// 4/04/00 Unwind support added
-// 5/31/00: Fixed bug when x a double-extended denormal
+// 02/02/00 Initial version
+// 04/04/00 Unwind support added
+// 05/31/00 Fixed bug when x a double-extended denormal
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// API
//==============================================================
@@ -56,18 +58,10 @@
// p6, p7
//
// floating-point registers used:
-// f8, f9, f10
-
-#include "libm_support.h"
-
-.align 32
-.global significand#
+// f8, f9, f10
.section .text
-.proc significand#
-.align 32
-
-significand:
+GLOBAL_LIBM_ENTRY(significand)
// qnan snan inf norm unorm 0 -+
// 1 1 1 0 0 1 11
@@ -75,19 +69,19 @@ significand:
// f10 gets f8(sign) with f1(exp,significand)
{ .mfi
nop.m 999
-(p0) fmerge.s f10 = f8,f1
+ fmerge.s f10 = f8,f1
nop.i 999
}
{ .mfi
nop.m 999
-(p0) fnorm f9 = f8
+ fnorm.s0 f9 = f8
nop.i 999 ;;
}
// Test for denormal input
{ .mfi
nop.m 999
-(p0) fclass.m.unc p7,p0 = f8, 0x0b
+ fclass.m.unc p7,p0 = f8, 0x0b
nop.i 999 ;;
}
@@ -97,14 +91,14 @@ significand:
// return sign(f8) exp(f8) significand(f8), normalized.
{ .mfi
nop.m 999
-(p0) fclass.m.unc p0,p6 = f8, 0xe7
+ fclass.m.unc p0,p6 = f8, 0xe7
nop.i 999 ;;
}
{ .mmb
nop.m 999
nop.m 999
-(p7) br.cond.spnt L(SIGNIFICAND_DENORM) ;; // Branch if x denormal
+(p7) br.cond.spnt SIGNIFICAND_DENORM ;; // Branch if x denormal
}
{ .mfi
@@ -115,29 +109,29 @@ significand:
{ .mfb
nop.m 999
-(p0) fnorm.d f8 = f8
-(p0) br.ret.sptk b0 ;;
+ fnorm.d.s0 f8 = f8
+ br.ret.sptk b0 ;;
}
-L(SIGNIFICAND_DENORM):
+SIGNIFICAND_DENORM:
// Here if x denorm
{ .mfi
nop.m 999
-(p0) fmerge.se f8 = f10,f9
+ fmerge.se f8 = f10,f9
nop.i 999 ;;
}
// Check if fnorm(x) still denormal, means x double-extended denormal
{ .mfi
nop.m 999
-(p0) fclass.m.unc p7,p0 = f9, 0x0b
+ fclass.m.unc p7,p0 = f9, 0x0b
nop.i 999 ;;
}
// This will be the final result unless x double-extended denormal
{ .mfi
nop.m 999
-(p0) fnorm.d f8 = f8
+ fnorm.d.s0 f8 = f8
nop.i 999 ;;
}
@@ -152,9 +146,8 @@ L(SIGNIFICAND_DENORM):
// Final normalization if x double-extended denorm
{ .mfb
nop.m 999
-(p7) fnorm.d f8 = f8
-(p0) br.ret.sptk b0 ;;
+(p7) fnorm.d.s0 f8 = f8
+ br.ret.sptk b0 ;;
}
-.endp significand
-ASM_SIZE_DIRECTIVE(significand)
+GLOBAL_LIBM_END(significand)
diff --git a/sysdeps/ia64/fpu/s_significandf.S b/sysdeps/ia64/fpu/s_significandf.S
index d8cdc159f6..5c8299b944 100644
--- a/sysdeps/ia64/fpu/s_significandf.S
+++ b/sysdeps/ia64/fpu/s_significandf.S
@@ -1,10 +1,10 @@
.file "significandf.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,13 +35,15 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00: Initial version
-// 2/03/00: Modified to improve speed
-// 5/31/00: Fixed bug when x a double-extended denormal
+// 02/02/00 Initial version
+// 02/03/00 Modified to improve speed
+// 05/31/00 Fixed bug when x a double-extended denormal
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// API
//==============================================================
@@ -55,18 +57,10 @@
// p6, p7
//
// floating-point registers used:
-// f8, f9, f10
-
-#include "libm_support.h"
-
-.align 32
-.global significandf#
+// f8, f9, f10
.section .text
-.proc significandf#
-.align 32
-
-significandf:
+GLOBAL_LIBM_ENTRY(significandf)
// qnan snan inf norm unorm 0 -+
// 1 1 1 0 0 1 11
@@ -74,19 +68,19 @@ significandf:
// f10 gets f8(sign) with f1(exp,significand)
{ .mfi
nop.m 999
-(p0) fmerge.s f10 = f8,f1
+ fmerge.s f10 = f8,f1
nop.i 999
}
{ .mfi
nop.m 999
-(p0) fnorm f9 = f8
+ fnorm.s0 f9 = f8
nop.i 999 ;;
}
// Test for denormal input
{ .mfi
nop.m 999
-(p0) fclass.m.unc p7,p0 = f8, 0x0b
+ fclass.m.unc p7,p0 = f8, 0x0b
nop.i 999 ;;
}
@@ -96,14 +90,14 @@ significandf:
// return sign(f8) exp(f8) significand(f8), normalized.
{ .mfi
nop.m 999
-(p0) fclass.m.unc p0,p6 = f8, 0xe7
+ fclass.m.unc p0,p6 = f8, 0xe7
nop.i 999 ;;
}
{ .mmb
nop.m 999
nop.m 999
-(p7) br.cond.spnt L(SIGNIFICAND_DENORM) ;; // Branch if x denormal
+(p7) br.cond.spnt SIGNIFICAND_DENORM ;; // Branch if x denormal
}
{ .mfi
@@ -114,29 +108,29 @@ significandf:
{ .mfb
nop.m 999
-(p0) fnorm.s f8 = f8
-(p0) br.ret.sptk b0 ;;
+ fnorm.s.s0 f8 = f8
+ br.ret.sptk b0 ;;
}
-L(SIGNIFICAND_DENORM):
+SIGNIFICAND_DENORM:
// Here if x denorm
{ .mfi
nop.m 999
-(p0) fmerge.se f8 = f10,f9
+ fmerge.se f8 = f10,f9
nop.i 999 ;;
}
// Check if fnorm(x) still denormal, means x double-extended denormal
{ .mfi
nop.m 999
-(p0) fclass.m.unc p7,p0 = f9, 0x0b
+ fclass.m.unc p7,p0 = f9, 0x0b
nop.i 999 ;;
}
// This will be the final result unless x double-extended denormal
{ .mfi
nop.m 999
-(p0) fnorm.s f8 = f8
+ fnorm.s.s0 f8 = f8
nop.i 999 ;;
}
@@ -151,9 +145,8 @@ L(SIGNIFICAND_DENORM):
// Final normalization if x double-extended denorm
{ .mfb
nop.m 999
-(p7) fnorm.s f8 = f8
-(p0) br.ret.sptk b0 ;;
+(p7) fnorm.s.s0 f8 = f8
+ br.ret.sptk b0 ;;
}
-.endp significandf
-ASM_SIZE_DIRECTIVE(significandf)
+GLOBAL_LIBM_END(significandf)
diff --git a/sysdeps/ia64/fpu/s_significandl.S b/sysdeps/ia64/fpu/s_significandl.S
index 268d3567d0..f62df4310c 100644
--- a/sysdeps/ia64/fpu/s_significandl.S
+++ b/sysdeps/ia64/fpu/s_significandl.S
@@ -1,10 +1,10 @@
.file "significandl.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,13 +35,15 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00: Initial version
-// 2/03/00: Modified to improve speed
-// 5/31/00: Fixed bug when x a double-extended denormal
+// 02/02/00 Initial version
+// 02/03/00 Modified to improve speed
+// 05/31/00 Fixed bug when x a double-extended denormal
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// API
//==============================================================
@@ -56,18 +58,10 @@
// p6, p7
//
// floating-point registers used:
-// f8, f9, f10
-
-#include "libm_support.h"
-
-.align 32
-.global significandl#
+// f8, f9, f10
.section .text
-.proc significandl#
-.align 32
-
-significandl:
+GLOBAL_LIBM_ENTRY(significandl)
// qnan snan inf norm unorm 0 -+
// 1 1 1 0 0 1 11
@@ -75,19 +69,19 @@ significandl:
// f10 gets f8(sign) with f1(exp,significand)
{ .mfi
nop.m 999
-(p0) fmerge.s f10 = f8,f1
+ fmerge.s f10 = f8,f1
nop.i 999
}
{ .mfi
nop.m 999
-(p0) fnorm f9 = f8
+ fnorm.s0 f9 = f8
nop.i 999 ;;
}
// Test for denormal input
{ .mfi
nop.m 999
-(p0) fclass.m.unc p7,p0 = f8, 0x0b
+ fclass.m.unc p7,p0 = f8, 0x0b
nop.i 999 ;;
}
@@ -97,14 +91,14 @@ significandl:
// return sign(f8) exp(f8) significand(f8), normalized.
{ .mfi
nop.m 999
-(p0) fclass.m.unc p0,p6 = f8, 0xe7
+ fclass.m.unc p0,p6 = f8, 0xe7
nop.i 999 ;;
}
{ .mmb
nop.m 999
nop.m 999
-(p7) br.cond.spnt L(SIGNIFICAND_DENORM) ;; // Branch if x denormal
+(p7) br.cond.spnt SIGNIFICAND_DENORM ;; // Branch if x denormal
}
{ .mfi
@@ -115,29 +109,29 @@ significandl:
{ .mfb
nop.m 999
-(p0) fnorm f8 = f8
-(p0) br.ret.sptk b0 ;;
+ fnorm.s0 f8 = f8
+ br.ret.sptk b0 ;;
}
-L(SIGNIFICAND_DENORM):
+SIGNIFICAND_DENORM:
// Here if x denorm
{ .mfi
nop.m 999
-(p0) fmerge.se f8 = f10,f9
+ fmerge.se f8 = f10,f9
nop.i 999 ;;
}
// Check if fnorm(x) still denormal, means x double-extended denormal
{ .mfi
nop.m 999
-(p0) fclass.m.unc p7,p0 = f9, 0x0b
+ fclass.m.unc p7,p0 = f9, 0x0b
nop.i 999 ;;
}
// This will be the final result unless x double-extended denormal
{ .mfi
nop.m 999
-(p0) fnorm f8 = f8
+ fnorm.s0 f8 = f8
nop.i 999 ;;
}
@@ -152,9 +146,8 @@ L(SIGNIFICAND_DENORM):
// Final normalization if x double-extended denorm
{ .mfb
nop.m 999
-(p7) fnorm f8 = f8
-(p0) br.ret.sptk b0 ;;
+(p7) fnorm.s0 f8 = f8
+ br.ret.sptk b0 ;;
}
-.endp significandl
-ASM_SIZE_DIRECTIVE(significandl)
+GLOBAL_LIBM_END(significandl)
diff --git a/sysdeps/ia64/fpu/s_sincos.c b/sysdeps/ia64/fpu/s_sincos.c
index 1ddbc2122a..41254ae60a 100644
--- a/sysdeps/ia64/fpu/s_sincos.c
+++ b/sysdeps/ia64/fpu/s_sincos.c
@@ -1,9 +1 @@
-#include <math.h>
-
-void
-__sincos (double x, double *s, double *c)
-{
- *s = sin (x);
- *c = cos (x);
-}
-weak_alias (__sincos, sincos)
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/s_sincosf.c b/sysdeps/ia64/fpu/s_sincosf.c
index efd0fe3038..41254ae60a 100644
--- a/sysdeps/ia64/fpu/s_sincosf.c
+++ b/sysdeps/ia64/fpu/s_sincosf.c
@@ -1,9 +1 @@
-#include <math.h>
-
-void
-__sincosf (float x, float *s, float *c)
-{
- *s = sinf (x);
- *c = cosf (x);
-}
-weak_alias (__sincosf, sincosf)
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/s_sincosl.c b/sysdeps/ia64/fpu/s_sincosl.c
index a835b772e2..41254ae60a 100644
--- a/sysdeps/ia64/fpu/s_sincosl.c
+++ b/sysdeps/ia64/fpu/s_sincosl.c
@@ -1,9 +1 @@
-#include <math.h>
-
-void
-__sincosl (long double x, long double *s, long double *c)
-{
- *s = sinl (x);
- *c = cosl (x);
-}
-weak_alias (__sincosl, sincosl)
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/s_tan.S b/sysdeps/ia64/fpu/s_tan.S
index 3a497fcf4c..3000f5ee06 100644
--- a/sysdeps/ia64/fpu/s_tan.S
+++ b/sysdeps/ia64/fpu/s_tan.S
@@ -1,10 +1,10 @@
-.file "tan.s"
+.file "tancot.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -32,20 +32,24 @@
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00: Initial version
-// 4/04/00 Unwind support added
+// 02/02/00 Initial version
+// 04/04/00 Unwind support added
// 12/27/00 Improved speed
+// 02/21/01 Updated to call tanl
+// 05/30/02 Added cot
+// 02/10/03 Reordered header: .section, .global, .proc, .align
//
// API
//==============================================================
-// double tan( double x);
+// double tan(double x);
+// double cot(double x);
//
// Overview of operation
//==============================================================
@@ -61,11 +65,14 @@
// Nfloat = round_int(tan_W)
//
// tan_r = x - Nfloat * (pi/2)_hi
-// tan_r = tan_r - Nfloat * (pi/2)_lo
+// a) tan_r = tan_r - Nfloat * (pi/2)_lo (for tan)
+// b) tan_r = Nfloat * (pi/2)_lo - tan_r (for cot)
//
// We have two paths: p8, when Nfloat is even and p9. when Nfloat is odd.
-// p8: tan(X) = tan(r)
-// p9: tan(X) = -cot(r)
+// a) for tan: p8: tan(X) = tan(r)
+// p9: tan(X) = -cot(r)
+// b) for cot: p9: cot(X) = cot(r)
+// p8: cot(X) = -tan(r)
//
// Each is evaluated as a series. The p9 path requires 1/r.
//
@@ -75,19 +82,16 @@
// Registers used
//==============================================================
//
-// predicate registers used:
-// p6-10
+// predicate registers used:
+// p6-12
//
-// floating-point registers used:
-// f10-15, f32-105
+// floating-point registers used:
+// f10-15, f32-106
// f8, input
//
// general registers used
-// r14-18, r32-43
+// r14-26, r32-39
//
-
-#include "libm_support.h"
-
// Assembly macros
//==============================================================
TAN_INV_PI_BY_2_2TO64 = f10
@@ -105,28 +109,28 @@ tan_Pi_by_2_lo = f34
tan_P0 = f35
tan_P1 = f36
tan_P2 = f37
-tan_P3 = f38
-tan_P4 = f39
-tan_P5 = f40
+tan_P3 = f38
+tan_P4 = f39
+tan_P5 = f40
tan_P6 = f41
tan_P7 = f42
-tan_P8 = f43
-tan_P9 = f44
-tan_P10 = f45
+tan_P8 = f43
+tan_P9 = f44
+tan_P10 = f45
tan_P11 = f46
-tan_P12 = f47
+tan_P12 = f47
tan_P13 = f48
tan_P14 = f49
tan_P15 = f50
-tan_Q0 = f51
-tan_Q1 = f52
-tan_Q2 = f53
-tan_Q3 = f54
-tan_Q4 = f55
-tan_Q5 = f56
-tan_Q6 = f57
-tan_Q7 = f58
+tan_Q0 = f51
+tan_Q1 = f52
+tan_Q2 = f53
+tan_Q3 = f54
+tan_Q4 = f55
+tan_Q5 = f56
+tan_Q6 = f57
+tan_Q7 = f58
tan_Q8 = f59
tan_Q9 = f60
tan_Q10 = f61
@@ -153,19 +157,19 @@ tan_v10 = f79
tan_v2 = f80
tan_v9 = f81
tan_v1 = f82
-tan_int_Nfloat = f83
-tan_Nfloat = f84
+tan_int_Nfloat = f83
+tan_Nfloat = f84
-tan_NORM_f8 = f85
+tan_NORM_f8 = f85
tan_W = f86
tan_y0 = f87
-tan_d = f88
-tan_y1 = f89
-tan_dsq = f90
-tan_y2 = f91
-tan_d4 = f92
-tan_inv_r = f93
+tan_d = f88
+tan_y1 = f89
+tan_dsq = f90
+tan_y2 = f91
+tan_d4 = f92
+tan_inv_r = f93
tan_z1 = f94
tan_z2 = f95
@@ -180,6 +184,7 @@ tan_z10 = f103
tan_z11 = f104
tan_z12 = f105
+arg_copy = f106
/////////////////////////////////////////////////////////////
@@ -188,37 +193,33 @@ tan_GR_rshf_2to64 = r15
tan_GR_exp_2tom64 = r16
tan_GR_n = r17
tan_GR_rshf = r18
-
-tan_AD = r33
-tan_GR_10009 = r34
-tan_GR_17_ones = r35
-tan_GR_N_odd_even = r36
-tan_GR_N = r37
-tan_signexp = r38
-tan_exp = r39
-tan_ADQ = r40
-
-GR_SAVE_PFS = r41
-GR_SAVE_B0 = r42
-GR_SAVE_GP = r43
-
-
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
+tan_AD = r19
+tan_GR_10009 = r20
+tan_GR_17_ones = r21
+tan_GR_N_odd_even = r22
+tan_GR_N = r23
+tan_signexp = r24
+tan_exp = r25
+tan_ADQ = r26
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+GR_Parameter_X = r36
+GR_Parameter_Y = r37
+GR_Parameter_RESULT = r38
+GR_Parameter_Tag = r39
+
+
+RODATA
.align 16
-double_tan_constants:
-ASM_TYPE_DIRECTIVE(double_tan_constants,@object)
-// data8 0xA2F9836E4E44152A, 0x00003FFE // 2/pi
+LOCAL_OBJECT_START(double_tan_constants)
data8 0xC90FDAA22168C234, 0x00003FFF // pi/2 hi
-
- data8 0xBEEA54580DDEA0E1 // P14
+ data8 0xBEEA54580DDEA0E1 // P14
data8 0x3ED3021ACE749A59 // P15
- data8 0xBEF312BD91DC8DA1 // P12
+ data8 0xBEF312BD91DC8DA1 // P12
data8 0x3EFAE9AFC14C5119 // P13
data8 0x3F2F342BF411E769 // P8
data8 0x3F1A60FC9F3B0227 // P9
@@ -232,10 +233,9 @@ ASM_TYPE_DIRECTIVE(double_tan_constants,@object)
data8 0x3FC11111111111C2 // P1
data8 0x3FABA1BA1BA0E850 // P2
data8 0x3F9664F4886725A7 // P3
-ASM_SIZE_DIRECTIVE(double_tan_constants)
+LOCAL_OBJECT_END(double_tan_constants)
-double_Q_tan_constants:
-ASM_TYPE_DIRECTIVE(double_Q_tan_constants,@object)
+LOCAL_OBJECT_START(double_Q_tan_constants)
data8 0xC4C6628B80DC1CD1, 0x00003FBF // pi/2 lo
data8 0x3E223A73BA576E48 // Q8
data8 0x3DF54AD8D1F2CA43 // Q9
@@ -248,35 +248,19 @@ ASM_TYPE_DIRECTIVE(double_Q_tan_constants,@object)
data8 0x3F61566ABBFFB489 // Q2
data8 0x3F2BBD77945C1733 // Q3
data8 0x3D927FB33E2B0E04 // Q10
-ASM_SIZE_DIRECTIVE(double_Q_tan_constants)
+LOCAL_OBJECT_END(double_Q_tan_constants)
-
-.align 32
-.global tan#
-#ifdef _LIBC
-.global __tan#
-#endif
+.section .text
////////////////////////////////////////////////////////
-
-
-.section .text
-.proc tan#
-#ifdef _LIBC
-.proc __tan#
-#endif
-.align 32
-tan:
-#ifdef _LIBC
-__tan:
-#endif
+LOCAL_LIBM_ENTRY(cot)
// The initial fnorm will take any unmasked faults and
// normalize any single/double unorms
{ .mlx
- alloc r32=ar.pfs,1,11,0,0
+ cmp.eq p12, p11 = r0, r0 // set p12=1, p11=0 for cot
movl tan_GR_sig_inv_pi_by_2 = 0xA2F9836E4E44152A // significand of 2/pi
}
{ .mlx
@@ -285,18 +269,47 @@ __tan:
}
;;
-{ .mfi
- ld8 tan_AD = [tan_AD]
- fnorm tan_NORM_f8 = f8
+{ .mlx
mov tan_GR_exp_2tom64 = 0xffff-64 // exponent of scaling factor 2^-64
+ movl tan_GR_rshf = 0x43e8000000000000 // 1.1000 2^63 for right shift
}
+{ .mfb
+ ld8 tan_AD = [tan_AD]
+ fnorm.s0 tan_NORM_f8 = f8
+ br.cond.sptk COMMON_PATH
+}
+;;
+
+LOCAL_LIBM_END(cot)
+
+GLOBAL_IEEE754_ENTRY(tan)
+// The initial fnorm will take any unmasked faults and
+// normalize any single/double unorms
+
{ .mlx
- nop.m 999
+ cmp.eq p11, p12 = r0, r0 // set p11=1, p12=0 for tan
+ movl tan_GR_sig_inv_pi_by_2 = 0xA2F9836E4E44152A // significand of 2/pi
+}
+{ .mlx
+ addl tan_AD = @ltoff(double_tan_constants), gp
+ movl tan_GR_rshf_2to64 = 0x47e8000000000000 // 1.1000 2^(63+63+1)
+}
+;;
+
+{ .mlx
+ mov tan_GR_exp_2tom64 = 0xffff-64 // exponent of scaling factor 2^-64
movl tan_GR_rshf = 0x43e8000000000000 // 1.1000 2^63 for right shift
}
+{ .mfi
+ ld8 tan_AD = [tan_AD]
+ fnorm.s0 tan_NORM_f8 = f8
+ nop.i 0
+}
;;
+// Common path for both tan and cot
+COMMON_PATH:
// Form two constants we need
// 2/pi * 2^1 * 2^63, scaled by 2^64 since we just loaded the significand
// 1.1000...000 * 2^(63+63+1) to right shift int(W) into the significand
@@ -313,7 +326,7 @@ __tan:
{ .mmf
setf.exp TAN_2TOM64 = tan_GR_exp_2tom64
adds tan_ADQ = double_Q_tan_constants - double_tan_constants, tan_AD
- fclass.m.unc p6,p0 = f8, 0x07 // Test for x=0
+(p11) fclass.m.unc p6,p0 = f8, 0x07 // Test for x=0 (tan)
}
;;
@@ -323,79 +336,79 @@ __tan:
// 1.1000...000 * 2^63, the right shift constant
{ .mmf
setf.d TAN_RSHF = tan_GR_rshf
- ldfe tan_Pi_by_2_hi = [tan_AD],16
+ ldfe tan_Pi_by_2_hi = [tan_AD],16
fclass.m.unc p7,p0 = f8, 0x23 // Test for x=inf
}
;;
{ .mfb
- ldfe tan_Pi_by_2_lo = [tan_ADQ],16
+ ldfe tan_Pi_by_2_lo = [tan_ADQ],16
fclass.m.unc p8,p0 = f8, 0xc3 // Test for x=nan
-(p6) br.ret.spnt b0 ;; // Exit for x=0
+(p6) br.ret.spnt b0 ;; // Exit for x=0 (tan only)
}
{ .mfi
- ldfpd tan_P14,tan_P15 = [tan_AD],16
+ ldfpd tan_P14,tan_P15 = [tan_AD],16
(p7) frcpa.s0 f8,p9=f0,f0 // Set qnan indef if x=inf
mov tan_GR_10009 = 0x10009
}
{ .mib
- ldfpd tan_Q8,tan_Q9 = [tan_ADQ],16
+ ldfpd tan_Q8,tan_Q9 = [tan_ADQ],16
nop.i 999
(p7) br.ret.spnt b0 ;; // Exit for x=inf
}
{ .mfi
- ldfpd tan_P12,tan_P13 = [tan_AD],16
-(p8) fma.d f8=f8,f1,f8 // Set qnan if x=nan
+ ldfpd tan_P12,tan_P13 = [tan_AD],16
+(p12) fclass.m.unc p6,p0 = f8, 0x07 // Test for x=0 (cot)
nop.i 999
}
-{ .mib
- ldfpd tan_Q4,tan_Q5 = [tan_ADQ],16
- nop.i 999
+{ .mfb
+ ldfpd tan_Q4,tan_Q5 = [tan_ADQ],16
+(p8) fma.d.s0 f8=f8,f1,f8 // Set qnan if x=nan
(p8) br.ret.spnt b0 ;; // Exit for x=nan
}
-{ .mmi
- getf.exp tan_signexp = tan_NORM_f8
- ldfpd tan_P8,tan_P9 = [tan_AD],16
- nop.i 999 ;;
+{ .mmf
+ getf.exp tan_signexp = tan_NORM_f8
+ ldfpd tan_P8,tan_P9 = [tan_AD],16
+ fmerge.s arg_copy = f8, f8 ;; // Save input for error call
}
-// Multiply x by scaled 2/pi and add large const to shift integer part of W to
+// Multiply x by scaled 2/pi and add large const to shift integer part of W to
// rightmost bits of significand
-{ .mfi
+{ .mmf
+ alloc r32=ar.pfs,0,4,4,0
ldfpd tan_Q6,tan_Q7 = [tan_ADQ],16
fma.s1 TAN_W_2TO64_RSH = tan_NORM_f8,TAN_INV_PI_BY_2_2TO64,TAN_RSHF_2TO64
- nop.i 999 ;;
-}
+};;
-{ .mmi
- ldfpd tan_P10,tan_P11 = [tan_AD],16
- nop.m 999
- and tan_exp = tan_GR_17_ones, tan_signexp ;;
+{ .mmf
+ ldfpd tan_P10,tan_P11 = [tan_AD],16
+ and tan_exp = tan_GR_17_ones, tan_signexp
+(p6) frcpa.s0 f8, p0 = f1, f8 ;; // cot(+-0) = +-Inf
}
// p7 is true if we must call DBX TAN
// p7 is true if f8 exp is > 0x10009 (which includes all ones
// NAN or inf)
-{ .mmi
- ldfpd tan_Q0,tan_Q1 = [tan_ADQ],16
- cmp.ge.unc p7,p0 = tan_exp,tan_GR_10009
- nop.i 999 ;;
+{ .mmb
+ ldfpd tan_Q0,tan_Q1 = [tan_ADQ],16
+ cmp.ge.unc p7,p0 = tan_exp,tan_GR_10009
+(p7) br.cond.spnt TAN_DBX ;;
}
{ .mmb
- ldfpd tan_P4,tan_P5 = [tan_AD],16
- nop.m 999
-(p7) br.cond.spnt L(TAN_DBX) ;;
+ ldfpd tan_P4,tan_P5 = [tan_AD],16
+(p6) mov GR_Parameter_Tag = 226 // (cot)
+(p6) br.cond.spnt __libm_error_region ;; // call error support if cot(+-0)
}
{ .mmi
- ldfpd tan_Q2,tan_Q3 = [tan_ADQ],16
+ ldfpd tan_Q2,tan_Q3 = [tan_ADQ],16
nop.m 999
nop.i 999 ;;
}
@@ -404,8 +417,8 @@ __tan:
// TAN_NFLOAT = Round_Int_Nearest(tan_W)
{ .mfi
- ldfpd tan_P6,tan_P7 = [tan_AD],16
- fms.s1 TAN_NFLOAT = TAN_W_2TO64_RSH,TAN_2TOM64,TAN_RSHF
+ ldfpd tan_P6,tan_P7 = [tan_AD],16
+ fms.s1 TAN_NFLOAT = TAN_W_2TO64_RSH,TAN_2TOM64,TAN_RSHF
nop.i 999 ;;
}
@@ -418,22 +431,22 @@ __tan:
{ .mfi
- ldfpd tan_P0,tan_P1 = [tan_AD],16
+ ldfpd tan_P0,tan_P1 = [tan_AD],16
nop.f 999
nop.i 999 ;;
}
-{ .mfi
+{ .mmi
getf.sig tan_GR_n = TAN_W_2TO64_RSH
- nop.f 999
+ ldfpd tan_P2,tan_P3 = [tan_AD]
nop.i 999 ;;
}
// tan_r = -tan_Nfloat * tan_Pi_by_2_hi + x
{ .mfi
- ldfpd tan_P2,tan_P3 = [tan_AD]
- fnma.s1 tan_r = TAN_NFLOAT, tan_Pi_by_2_hi, tan_NORM_f8
+(p12) add tan_GR_n = 0x1, tan_GR_n // N = N + 1 (for cot)
+ fnma.s1 tan_r = TAN_NFLOAT, tan_Pi_by_2_hi, tan_NORM_f8
nop.i 999 ;;
}
@@ -441,42 +454,49 @@ __tan:
// p8 ==> even
// p9 ==> odd
{ .mmi
- and tan_GR_N_odd_even = 0x1, tan_GR_n ;;
+ and tan_GR_N_odd_even = 0x1, tan_GR_n ;;
nop.m 999
cmp.eq.unc p8,p9 = tan_GR_N_odd_even, r0 ;;
}
-// tan_r = tan_r -tan_Nfloat * tan_Pi_by_2_lo
+.pred.rel "mutex", p11, p12
+// tan_r = tan_r -tan_Nfloat * tan_Pi_by_2_lo (tan)
{ .mfi
nop.m 999
- fnma.s1 tan_r = TAN_NFLOAT, tan_Pi_by_2_lo, tan_r
+(p11) fnma.s1 tan_r = TAN_NFLOAT, tan_Pi_by_2_lo, tan_r
+ nop.i 999
+}
+// tan_r = -(tan_r -tan_Nfloat * tan_Pi_by_2_lo) (cot)
+{ .mfi
+ nop.m 999
+(p12) fms.s1 tan_r = TAN_NFLOAT, tan_Pi_by_2_lo, tan_r
nop.i 999 ;;
}
{ .mfi
nop.m 999
- fma.s1 tan_rsq = tan_r, tan_r, f0
+ fma.s1 tan_rsq = tan_r, tan_r, f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
-(p9) frcpa.s1 tan_y0, p10 = f1,tan_r
+(p9) frcpa.s1 tan_y0, p0 = f1,tan_r
nop.i 999 ;;
}
{ .mfi
nop.m 999
-(p8) fma.s1 tan_v18 = tan_rsq, tan_P15, tan_P14
+(p8) fma.s1 tan_v18 = tan_rsq, tan_P15, tan_P14
nop.i 999
}
{ .mfi
nop.m 999
-(p8) fma.s1 tan_v4 = tan_rsq, tan_P1, tan_P0
+(p8) fma.s1 tan_v4 = tan_rsq, tan_P1, tan_P0
nop.i 999 ;;
}
@@ -484,12 +504,12 @@ __tan:
{ .mfi
nop.m 999
-(p8) fma.s1 tan_v16 = tan_rsq, tan_P13, tan_P12
- nop.i 999
+(p8) fma.s1 tan_v16 = tan_rsq, tan_P13, tan_P12
+ nop.i 999
}
{ .mfi
nop.m 999
-(p8) fma.s1 tan_v17 = tan_rsq, tan_rsq, f0
+(p8) fma.s1 tan_v17 = tan_rsq, tan_rsq, f0
nop.i 999 ;;
}
@@ -497,12 +517,12 @@ __tan:
{ .mfi
nop.m 999
-(p8) fma.s1 tan_v12 = tan_rsq, tan_P9, tan_P8
- nop.i 999
+(p8) fma.s1 tan_v12 = tan_rsq, tan_P9, tan_P8
+ nop.i 999
}
{ .mfi
nop.m 999
-(p8) fma.s1 tan_v13 = tan_rsq, tan_P11, tan_P10
+(p8) fma.s1 tan_v13 = tan_rsq, tan_P11, tan_P10
nop.i 999 ;;
}
@@ -510,12 +530,12 @@ __tan:
{ .mfi
nop.m 999
-(p8) fma.s1 tan_v7 = tan_rsq, tan_P5, tan_P4
- nop.i 999
+(p8) fma.s1 tan_v7 = tan_rsq, tan_P5, tan_P4
+ nop.i 999
}
{ .mfi
nop.m 999
-(p8) fma.s1 tan_v8 = tan_rsq, tan_P7, tan_P6
+(p8) fma.s1 tan_v8 = tan_rsq, tan_P7, tan_P6
nop.i 999 ;;
}
@@ -523,12 +543,12 @@ __tan:
{ .mfi
nop.m 999
-(p9) fnma.s1 tan_d = tan_r, tan_y0, f1
- nop.i 999
+(p9) fnma.s1 tan_d = tan_r, tan_y0, f1
+ nop.i 999
}
{ .mfi
nop.m 999
-(p8) fma.s1 tan_v5 = tan_rsq, tan_P3, tan_P2
+(p8) fma.s1 tan_v5 = tan_rsq, tan_P3, tan_P2
nop.i 999 ;;
}
@@ -536,36 +556,36 @@ __tan:
{ .mfi
nop.m 999
-(p9) fma.s1 tan_z11 = tan_rsq, tan_Q9, tan_Q8
+(p9) fma.s1 tan_z11 = tan_rsq, tan_Q9, tan_Q8
nop.i 999
}
{ .mfi
nop.m 999
-(p9) fma.s1 tan_z12 = tan_rsq, tan_rsq, f0
+(p9) fma.s1 tan_z12 = tan_rsq, tan_rsq, f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
-(p8) fma.s1 tan_v15 = tan_v17, tan_v18, tan_v16
- nop.i 999
+(p8) fma.s1 tan_v15 = tan_v17, tan_v18, tan_v16
+ nop.i 999
}
{ .mfi
nop.m 999
-(p9) fma.s1 tan_z7 = tan_rsq, tan_Q5, tan_Q4
+(p9) fma.s1 tan_z7 = tan_rsq, tan_Q5, tan_Q4
nop.i 999 ;;
}
{ .mfi
nop.m 999
-(p8) fma.s1 tan_v11 = tan_v17, tan_v13, tan_v12
+(p8) fma.s1 tan_v11 = tan_v17, tan_v13, tan_v12
nop.i 999
}
{ .mfi
nop.m 999
-(p9) fma.s1 tan_z8 = tan_rsq, tan_Q7, tan_Q6
+(p9) fma.s1 tan_z8 = tan_rsq, tan_Q7, tan_Q6
nop.i 999 ;;
}
@@ -573,13 +593,13 @@ __tan:
{ .mfi
nop.m 999
-(p8) fma.s1 tan_v14 = tan_v17, tan_v17, f0
- nop.i 999
+(p8) fma.s1 tan_v14 = tan_v17, tan_v17, f0
+ nop.i 999
}
{ .mfi
nop.m 999
-(p9) fma.s1 tan_z3 = tan_rsq, tan_Q1, tan_Q0
- nop.i 999 ;;
+(p9) fma.s1 tan_z3 = tan_rsq, tan_Q1, tan_Q0
+ nop.i 999 ;;
}
@@ -587,12 +607,12 @@ __tan:
{ .mfi
nop.m 999
-(p8) fma.s1 tan_v3 = tan_v17, tan_v5, tan_v4
+(p8) fma.s1 tan_v3 = tan_v17, tan_v5, tan_v4
nop.i 999
}
{ .mfi
nop.m 999
-(p8) fma.s1 tan_v6 = tan_v17, tan_v8, tan_v7
+(p8) fma.s1 tan_v6 = tan_v17, tan_v8, tan_v7
nop.i 999 ;;
}
@@ -600,89 +620,89 @@ __tan:
{ .mfi
nop.m 999
-(p9) fma.s1 tan_y1 = tan_y0, tan_d, tan_y0
- nop.i 999
+(p9) fma.s1 tan_y1 = tan_y0, tan_d, tan_y0
+ nop.i 999
}
{ .mfi
nop.m 999
-(p9) fma.s1 tan_dsq = tan_d, tan_d, f0
- nop.i 999 ;;
+(p9) fma.s1 tan_dsq = tan_d, tan_d, f0
+ nop.i 999 ;;
}
{ .mfi
nop.m 999
-(p9) fma.s1 tan_z10 = tan_z12, tan_Q10, tan_z11
- nop.i 999
+(p9) fma.s1 tan_z10 = tan_z12, tan_Q10, tan_z11
+ nop.i 999
}
{ .mfi
nop.m 999
-(p9) fma.s1 tan_z9 = tan_z12, tan_z12,f0
+(p9) fma.s1 tan_z9 = tan_z12, tan_z12,f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
-(p9) fma.s1 tan_z4 = tan_rsq, tan_Q3, tan_Q2
- nop.i 999
+(p9) fma.s1 tan_z4 = tan_rsq, tan_Q3, tan_Q2
+ nop.i 999
}
{ .mfi
nop.m 999
-(p9) fma.s1 tan_z6 = tan_z12, tan_z8, tan_z7
- nop.i 999 ;;
+(p9) fma.s1 tan_z6 = tan_z12, tan_z8, tan_z7
+ nop.i 999 ;;
}
{ .mfi
nop.m 999
-(p8) fma.s1 tan_v10 = tan_v14, tan_v15, tan_v11
- nop.i 999 ;;
+(p8) fma.s1 tan_v10 = tan_v14, tan_v15, tan_v11
+ nop.i 999 ;;
}
{ .mfi
nop.m 999
-(p9) fma.s1 tan_y2 = tan_y1, tan_d, tan_y0
- nop.i 999
+(p9) fma.s1 tan_y2 = tan_y1, tan_d, tan_y0
+ nop.i 999
}
{ .mfi
nop.m 999
-(p9) fma.s1 tan_d4 = tan_dsq, tan_dsq, tan_d
+(p9) fma.s1 tan_d4 = tan_dsq, tan_dsq, tan_d
nop.i 999 ;;
}
{ .mfi
nop.m 999
-(p8) fma.s1 tan_v2 = tan_v14, tan_v6, tan_v3
+(p8) fma.s1 tan_v2 = tan_v14, tan_v6, tan_v3
nop.i 999
}
{ .mfi
nop.m 999
-(p8) fma.s1 tan_v9 = tan_v14, tan_v14, f0
+(p8) fma.s1 tan_v9 = tan_v14, tan_v14, f0
nop.i 999 ;;
}
{ .mfi
nop.m 999
-(p9) fma.s1 tan_z2 = tan_z12, tan_z4, tan_z3
- nop.i 999
+(p9) fma.s1 tan_z2 = tan_z12, tan_z4, tan_z3
+ nop.i 999
}
{ .mfi
nop.m 999
-(p9) fma.s1 tan_z5 = tan_z9, tan_z10, tan_z6
+(p9) fma.s1 tan_z5 = tan_z9, tan_z10, tan_z6
nop.i 999 ;;
}
{ .mfi
nop.m 999
-(p9) fma.s1 tan_inv_r = tan_d4, tan_y2, tan_y0
- nop.i 999
+(p9) fma.s1 tan_inv_r = tan_d4, tan_y2, tan_y0
+ nop.i 999
}
{ .mfi
nop.m 999
@@ -694,12 +714,12 @@ __tan:
{ .mfi
nop.m 999
-(p8) fma.s1 tan_v1 = tan_v9, tan_v10, tan_v2
- nop.i 999
+(p8) fma.s1 tan_v1 = tan_v9, tan_v10, tan_v2
+ nop.i 999
}
{ .mfi
nop.m 999
-(p9) fma.s1 tan_z1 = tan_z9, tan_z5, tan_z2
+(p9) fma.s1 tan_z1 = tan_z9, tan_z5, tan_z2
nop.i 999 ;;
}
@@ -707,64 +727,150 @@ __tan:
{ .mfi
nop.m 999
-(p8) fma.d.s0 f8 = tan_v1, tan_rcube, tan_r
- nop.i 999
+(p8) fma.d.s0 f8 = tan_v1, tan_rcube, tan_r
+ nop.i 999
}
{ .mfb
nop.m 999
-(p9) fms.d.s0 f8 = tan_r, tan_z1, tan_inv_r
- br.ret.sptk b0 ;;
+(p9) fms.d.s0 f8 = tan_r, tan_z1, tan_inv_r
+ br.ret.sptk b0 ;;
}
-.endp tan#
-ASM_SIZE_DIRECTIVE(tan)
-
+GLOBAL_IEEE754_END(tan)
-.proc __libm_callout
-__libm_callout:
-L(TAN_DBX):
+LOCAL_LIBM_ENTRY(__libm_callout)
+TAN_DBX:
.prologue
{ .mfi
- nop.m 0
- fmerge.s f9 = f0,f0
-.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs
+ nop.m 0
+ fmerge.s f9 = f0,f0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs
}
;;
{ .mfi
- mov GR_SAVE_GP=gp
- nop.f 0
-.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0
+ mov GR_SAVE_GP=gp
+ nop.f 0
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0
}
.body
-{ .mfb
+{ .mmb
nop.m 999
- nop.f 999
- br.call.sptk.many b0=__libm_tan# ;;
+ nop.m 999
+(p11) br.cond.sptk.many call_tanl ;;
}
+// Here if we should call cotl
+{ .mmb
+ nop.m 999
+ nop.m 999
+ br.call.sptk.many b0=__libm_cotl# ;;
+}
{ .mfi
- mov gp = GR_SAVE_GP
- fnorm.d f8 = f8
- mov b0 = GR_SAVE_B0
+ mov gp = GR_SAVE_GP
+ fnorm.d.s0 f8 = f8
+ mov b0 = GR_SAVE_B0
}
;;
+{ .mib
+ nop.m 999
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0
+;;
+}
+
+// Here if we should call tanl
+call_tanl:
+{ .mmb
+ nop.m 999
+ nop.m 999
+ br.call.sptk.many b0=__libm_tanl# ;;
+}
+
+{ .mfi
+ mov gp = GR_SAVE_GP
+ fnorm.d.s0 f8 = f8
+ mov b0 = GR_SAVE_B0
+}
+;;
{ .mib
- nop.m 999
+ nop.m 999
mov ar.pfs = GR_SAVE_PFS
br.ret.sptk b0
;;
}
+LOCAL_LIBM_END(__libm_callout)
+
+.type __libm_tanl#,@function
+.global __libm_tanl#
+.type __libm_cotl#,@function
+.global __libm_cotl#
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+
+// (1)
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+
+// (2)
+{ .mmi
+ stfd [GR_Parameter_Y] = f1,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+// (3)
+{ .mib
+ stfd [GR_Parameter_X] = arg_copy // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfd [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+// (4)
+{ .mmi
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
-.endp __libm_callout
-ASM_SIZE_DIRECTIVE(__libm_callout)
+.type __libm_error_support#,@function
+.global __libm_error_support#
-.type __libm_tan#,@function
-.global __libm_tan#
diff --git a/sysdeps/ia64/fpu/s_tanf.S b/sysdeps/ia64/fpu/s_tanf.S
index a84009e2fe..48f82345f9 100644
--- a/sysdeps/ia64/fpu/s_tanf.S
+++ b/sysdeps/ia64/fpu/s_tanf.S
@@ -1,10 +1,10 @@
-.file "tanf.s"
+.file "tancotf.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -32,739 +32,658 @@
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 2/02/00: Initial version
-// 4/04/00 Unwind support added
+// 02/02/00 Initial version
+// 04/04/00 Unwind support added
// 12/27/00 Improved speed
+// 02/21/01 Updated to call tanl
+// 05/30/02 Improved speed, added cotf.
+// 11/25/02 Added explicit completer on fnorm
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 04/17/03 Eliminated redundant stop bits
//
-// API
+// APIs
//==============================================================
-// float tan( float x);
+// float tanf(float)
+// float cotf(float)
//
-// Overview of operation
+// Algorithm Description for tanf
//==============================================================
-// If the input value in radians is |x| >= 1.xxxxx 2^10 call the
-// older slower version.
+// The tanf function computes the principle value of the tangent of x,
+// where x is radian argument.
//
-// The new algorithm is used when |x| <= 1.xxxxx 2^9.
+// There are 5 paths:
+// 1. x = +/-0.0
+// Return tanf(x) = +/-0.0
//
-// Represent the input X as Nfloat * pi/2 + r
-// where r can be negative and |r| <= pi/4
+// 2. x = [S,Q]NaN
+// Return tanf(x) = QNaN
//
-// tan_W = x * 2/pi
-// Nfloat = round_int(tan_W)
+// 3. x = +/-Inf
+// Return tanf(x) = QNaN
//
-// tan_r = x - Nfloat * (pi/2)_hi
-// tan_r = tan_r - Nfloat * (pi/2)_lo
+// 4. x = r + (Pi/2)*N, N = RoundInt(x*(2/Pi)), N is even, |r|<Pi/4
+// Return tanf(x) = P19(r) = A1*r + A3*r^3 + A5*r^5 + ... + A19*r^19 =
+// = r*(A1 + A3*t + A5*t^2 + ... + A19*t^9) = r*P9(t), where t = r^2
//
-// We have two paths: p8, when Nfloat is even and p9. when Nfloat is odd.
-// p8: tan(X) = tan(r)
-// p9: tan(X) = -cot(r)
+// 5. x = r + (Pi/2)*N, N = RoundInt(x*(2/Pi)), N is odd, |r|<Pi/4
+// Return tanf(x) = -1/r + P11(r) = -1/r + B1*r + B3*r^3 + ... + B11*r^11 =
+// = -1/r + r*(B1 + B3*t + B5*t^2 + ... + B11*t^5) = -1/r + r*P11(t),
+// where t = r^2
//
-// Each is evaluated as a series. The p9 path requires 1/r.
+// Algorithm Description for cotf
+//==============================================================
+// The cotf function computes the principle value of the cotangent of x,
+// where x is radian argument.
//
-// The coefficients used in the series are stored in a table as
-// are the pi constants.
+// There are 5 paths:
+// 1. x = +/-0.0
+// Return cotf(x) = +/-Inf and error handling is called
//
-// Registers used
-//==============================================================
+// 2. x = [S,Q]NaN
+// Return cotf(x) = QNaN
//
-// predicate registers used:
-// p6-10
+// 3. x = +/-Inf
+// Return cotf(x) = QNaN
//
-// floating-point registers used:
-// f10-15, f32-105
+// 4. x = r + (Pi/2)*N, N = RoundInt(x*(2/Pi)), N is odd, |r|<Pi/4
+// Return cotf(x) = P19(-r) = A1*(-r) + A3*(-r^3) + ... + A19*(-r^19) =
+// = -r*(A1 + A3*t + A5*t^2 + ... + A19*t^9) = -r*P9(t), where t = r^2
+//
+// 5. x = r + (Pi/2)*N, N = RoundInt(x*(2/Pi)), N is even, |r|<Pi/4
+// Return cotf(x) = 1/r + P11(-r) = 1/r + B1*(-r) + ... + B11*(-r^11) =
+// = 1/r - r*(B1 + B3*t + B5*t^2 + ... + B11*t^5) = 1/r - r*P11(t),
+// where t = r^2
+//
+// We set p10 and clear p11 if computing tanf, vice versa for cotf.
+//
+//
+// Registers used
+//==============================================================
+// Floating Point registers used:
// f8, input
+// f32 -> f80
//
-// general registers used
-// r14-18, r32-43
+// General registers used:
+// r14 -> r23, r32 -> r39
+//
+// Predicate registers used:
+// p6 -> p13
//
-
-#include "libm_support.h"
-
// Assembly macros
//==============================================================
-TAN_INV_PI_BY_2_2TO64 = f10
-TAN_RSHF_2TO64 = f11
-TAN_2TOM64 = f12
-TAN_RSHF = f13
-TAN_W_2TO64_RSH = f14
-TAN_NFLOAT = f15
-
-tan_Inv_Pi_by_2 = f32
-tan_Pi_by_2_hi = f33
-tan_Pi_by_2_lo = f34
-
-
-tan_P0 = f35
-tan_P1 = f36
-tan_P2 = f37
-tan_P3 = f38
-tan_P4 = f39
-tan_P5 = f40
-tan_P6 = f41
-tan_P7 = f42
-tan_P8 = f43
-tan_P9 = f44
-tan_P10 = f45
-tan_P11 = f46
-tan_P12 = f47
-tan_P13 = f48
-tan_P14 = f49
-tan_P15 = f50
-
-tan_Q0 = f51
-tan_Q1 = f52
-tan_Q2 = f53
-tan_Q3 = f54
-tan_Q4 = f55
-tan_Q5 = f56
-tan_Q6 = f57
-tan_Q7 = f58
-tan_Q8 = f59
-tan_Q9 = f60
-tan_Q10 = f61
-
-tan_r = f62
-tan_rsq = f63
-tan_rcube = f64
-
-tan_v18 = f65
-tan_v16 = f66
-tan_v17 = f67
-tan_v12 = f68
-tan_v13 = f69
-tan_v7 = f70
-tan_v8 = f71
-tan_v4 = f72
-tan_v5 = f73
-tan_v15 = f74
-tan_v11 = f75
-tan_v14 = f76
-tan_v3 = f77
-tan_v6 = f78
-tan_v10 = f79
-tan_v2 = f80
-tan_v9 = f81
-tan_v1 = f82
-tan_int_Nfloat = f83
-tan_Nfloat = f84
-
-tan_NORM_f8 = f85
-tan_W = f86
-
-tan_y0 = f87
-tan_d = f88
-tan_y1 = f89
-tan_dsq = f90
-tan_y2 = f91
-tan_d4 = f92
-tan_inv_r = f93
-
-tan_z1 = f94
-tan_z2 = f95
-tan_z3 = f96
-tan_z4 = f97
-tan_z5 = f98
-tan_z6 = f99
-tan_z7 = f100
-tan_z8 = f101
-tan_z9 = f102
-tan_z10 = f103
-tan_z11 = f104
-tan_z12 = f105
-
-
-/////////////////////////////////////////////////////////////
-
-tan_GR_sig_inv_pi_by_2 = r14
-tan_GR_rshf_2to64 = r15
-tan_GR_exp_2tom64 = r16
-tan_GR_n = r17
-tan_GR_rshf = r18
-
-tan_AD = r33
-tan_GR_10009 = r34
-tan_GR_17_ones = r35
-tan_GR_N_odd_even = r36
-tan_GR_N = r37
-tan_signexp = r38
-tan_exp = r39
-tan_ADQ = r40
-
-GR_SAVE_PFS = r41
-GR_SAVE_B0 = r42
-GR_SAVE_GP = r43
-
-
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
+// integer registers
+rExp = r14
+rSignMask = r15
+rRshf = r16
+rScFctrExp = r17
+rIntN = r18
+rSigRcpPiby2 = r19
+rScRshf = r20
+rCoeffA = r21
+rCoeffB = r22
+rExpCut = r23
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+GR_Parameter_X = r36
+GR_Parameter_Y = r37
+GR_Parameter_RESULT = r38
+GR_Parameter_Tag = r39
+
+//==============================================================
+// floating point registers
+fScRcpPiby2 = f32
+fScRshf = f33
+fNormArg = f34
+fScFctr = f35
+fRshf = f36
+fShiftedN = f37
+fN = f38
+fR = f39
+fA01 = f40
+fA03 = f41
+fA05 = f42
+fA07 = f43
+fA09 = f44
+fA11 = f45
+fA13 = f46
+fA15 = f47
+fA17 = f48
+fA19 = f49
+fB01 = f50
+fB03 = f51
+fB05 = f52
+fB07 = f53
+fB09 = f54
+fB11 = f55
+fA03_01 = f56
+fA07_05 = f57
+fA11_09 = f58
+fA15_13 = f59
+fA19_17 = f60
+fA11_05 = f61
+fA19_13 = f62
+fA19_05 = f63
+fRbyA03_01 = f64
+fB03_01 = f65
+fB07_05 = f66
+fB11_09 = f67
+fB11_05 = f68
+fRbyB03_01 = f69
+fRbyB11_01 = f70
+fRp2 = f71
+fRp4 = f72
+fRp8 = f73
+fRp5 = f74
+fY0 = f75
+fY1 = f76
+fD = f77
+fDp2 = f78
+fInvR = f79
+fPiby2 = f80
+//==============================================================
-.align 16
-double_tan_constants:
-ASM_TYPE_DIRECTIVE(double_tan_constants,@object)
-// data8 0xA2F9836E4E44152A, 0x00003FFE // 2/pi
- data8 0xC90FDAA22168C234, 0x00003FFF // pi/2 hi
-
- data8 0xBEEA54580DDEA0E1 // P14
- data8 0x3ED3021ACE749A59 // P15
- data8 0xBEF312BD91DC8DA1 // P12
- data8 0x3EFAE9AFC14C5119 // P13
- data8 0x3F2F342BF411E769 // P8
- data8 0x3F1A60FC9F3B0227 // P9
- data8 0x3EFF246E78E5E45B // P10
- data8 0x3F01D9D2E782875C // P11
- data8 0x3F8226E34C4499B6 // P4
- data8 0x3F6D6D3F12C236AC // P5
- data8 0x3F57DA1146DCFD8B // P6
- data8 0x3F43576410FE3D75 // P7
- data8 0x3FD5555555555555 // P0
- data8 0x3FC11111111111C2 // P1
- data8 0x3FABA1BA1BA0E850 // P2
- data8 0x3F9664F4886725A7 // P3
-ASM_SIZE_DIRECTIVE(double_tan_constants)
-
-double_Q_tan_constants:
-ASM_TYPE_DIRECTIVE(double_Q_tan_constants,@object)
- data8 0xC4C6628B80DC1CD1, 0x00003FBF // pi/2 lo
- data8 0x3E223A73BA576E48 // Q8
- data8 0x3DF54AD8D1F2CA43 // Q9
- data8 0x3EF66A8EE529A6AA // Q4
- data8 0x3EC2281050410EE6 // Q5
- data8 0x3E8D6BB992CC3CF5 // Q6
- data8 0x3E57F88DE34832E4 // Q7
- data8 0x3FD5555555555555 // Q0
- data8 0x3F96C16C16C16DB8 // Q1
- data8 0x3F61566ABBFFB489 // Q2
- data8 0x3F2BBD77945C1733 // Q3
- data8 0x3D927FB33E2B0E04 // Q10
-ASM_SIZE_DIRECTIVE(double_Q_tan_constants)
-
-
-
-.align 32
-.global tanf#
-#ifdef _LIBC
-.global __tanf#
-#endif
-
-////////////////////////////////////////////////////////
+RODATA
+.align 16
+LOCAL_OBJECT_START(coeff_A)
+data8 0x3FF0000000000000 // A1 = 1.00000000000000000000e+00
+data8 0x3FD5555556BCE758 // A3 = 3.33333334641442641606e-01
+data8 0x3FC111105C2DAE48 // A5 = 1.33333249100689099175e-01
+data8 0x3FABA1F876341060 // A7 = 5.39701122561673229739e-02
+data8 0x3F965FB86D12A38D // A9 = 2.18495194027670719750e-02
+data8 0x3F8265F62415F9D6 // A11 = 8.98353860497717439465e-03
+data8 0x3F69E3AE64CCF58D // A13 = 3.16032468108912746342e-03
+data8 0x3F63920D09D0E6F6 // A15 = 2.38897844840557235331e-03
+LOCAL_OBJECT_END(coeff_A)
+
+LOCAL_OBJECT_START(coeff_B)
+data8 0xC90FDAA22168C235, 0x3FFF // pi/2
+data8 0x3FD55555555358DB // B1 = 3.33333333326107426583e-01
+data8 0x3F96C16C252F643F // B3 = 2.22222230621336129239e-02
+data8 0x3F61566243AB3C60 // B5 = 2.11638633968606896785e-03
+data8 0x3F2BC1169BD4438B // B7 = 2.11748132564551094391e-04
+data8 0x3EF611B4CEA056A1 // B9 = 2.10467959860990200942e-05
+data8 0x3EC600F9E32194BF // B11 = 2.62305891234274186608e-06
+data8 0xBF42BA7BCC177616 // A17 =-5.71546981685324877205e-04
+data8 0x3F4F2614BC6D3BB8 // A19 = 9.50584530849832782542e-04
+LOCAL_OBJECT_END(coeff_B)
.section .text
-.proc tanf#
-#ifdef _LIBC
-.proc __tanf#
-#endif
-.align 32
-tanf:
-#ifdef _LIBC
-__tanf:
-#endif
-// The initial fnorm will take any unmasked faults and
-// normalize any single/double unorms
+
+LOCAL_LIBM_ENTRY(cotf)
{ .mlx
- alloc r32=ar.pfs,1,11,0,0
- movl tan_GR_sig_inv_pi_by_2 = 0xA2F9836E4E44152A // significand of 2/pi
+ getf.exp rExp = f8 // ***** Get 2ˆ17 * s + E
+ movl rSigRcpPiby2= 0xA2F9836E4E44152A // significand of 2/Pi
}
{ .mlx
- addl tan_AD = @ltoff(double_tan_constants), gp
- movl tan_GR_rshf_2to64 = 0x47e8000000000000 // 1.1000 2^(63+63+1)
+ addl rCoeffA = @ltoff(coeff_A), gp
+ movl rScRshf = 0x47e8000000000000 // 1.5*2^(63+63+1)
}
;;
{ .mfi
- ld8 tan_AD = [tan_AD]
- fnorm tan_NORM_f8 = f8
- mov tan_GR_exp_2tom64 = 0xffff-64 // exponent of scaling factor 2^-64
+ alloc r32 = ar.pfs, 0, 4, 4, 0
+ fclass.m p9, p0 = f8, 0xc3 // Test for x=nan
+ cmp.eq p11, p10 = r0, r0 // if p11=1 we compute cotf
}
-{ .mlx
- nop.m 999
- movl tan_GR_rshf = 0x43e8000000000000 // 1.1000 2^63 for right shift
+{ .mib
+ ld8 rCoeffA = [rCoeffA]
+ mov rExpCut = 0x10009 // cutoff for exponent
+ br.cond.sptk Common_Path
}
;;
+LOCAL_LIBM_END(cotf)
-// Form two constants we need
-// 2/pi * 2^1 * 2^63, scaled by 2^64 since we just loaded the significand
-// 1.1000...000 * 2^(63+63+1) to right shift int(W) into the significand
-{ .mmi
- setf.sig TAN_INV_PI_BY_2_2TO64 = tan_GR_sig_inv_pi_by_2
- setf.d TAN_RSHF_2TO64 = tan_GR_rshf_2to64
- mov tan_GR_17_ones = 0x1ffff ;;
-}
-
+GLOBAL_IEEE754_ENTRY(tanf)
-// Form another constant
-// 2^-64 for scaling Nfloat
-// 1.1000...000 * 2^63, the right shift constant
-{ .mmf
- setf.exp TAN_2TOM64 = tan_GR_exp_2tom64
- adds tan_ADQ = double_Q_tan_constants - double_tan_constants, tan_AD
- fclass.m.unc p6,p0 = f8, 0x07 // Test for x=0
+{ .mlx
+ getf.exp rExp = f8 // ***** Get 2ˆ17 * s + E
+ movl rSigRcpPiby2= 0xA2F9836E4E44152A // significand of 2/Pi
}
-;;
-
-
-// Form another constant
-// 2^-64 for scaling Nfloat
-// 1.1000...000 * 2^63, the right shift constant
-{ .mmf
- setf.d TAN_RSHF = tan_GR_rshf
- ldfe tan_Pi_by_2_hi = [tan_AD],16
- fclass.m.unc p7,p0 = f8, 0x23 // Test for x=inf
+{ .mlx
+ addl rCoeffA = @ltoff(coeff_A), gp
+ movl rScRshf = 0x47e8000000000000 // 1.5*2^(63+63+1)
}
;;
-{ .mfb
- ldfe tan_Pi_by_2_lo = [tan_ADQ],16
- fclass.m.unc p8,p0 = f8, 0xc3 // Test for x=nan
-(p6) br.ret.spnt b0 ;; // Exit for x=0
-}
-
{ .mfi
- ldfpd tan_P14,tan_P15 = [tan_AD],16
-(p7) frcpa.s0 f8,p9=f0,f0 // Set qnan indef if x=inf
- mov tan_GR_10009 = 0x10009
+ alloc r32 = ar.pfs, 0, 4, 4, 0
+ fclass.m p9, p0 = f8, 0xc3 // Test for x=nan
+ cmp.eq p10, p11 = r0, r0 // if p10=1 we compute tandf
}
{ .mib
- ldfpd tan_Q8,tan_Q9 = [tan_ADQ],16
- nop.i 999
-(p7) br.ret.spnt b0 ;; // Exit for x=inf
+ ld8 rCoeffA = [rCoeffA]
+ mov rExpCut = 0x10009 // cutoff for exponent
+ nop.b 0
}
+;;
+// Below is common path for both tandf and cotdf
+Common_Path:
{ .mfi
- ldfpd tan_P12,tan_P13 = [tan_AD],16
-(p8) fma.s f8=f8,f1,f8 // Set qnan if x=nan
- nop.i 999
+ setf.sig fScRcpPiby2 = rSigRcpPiby2 // 2^(63+1)*(2/Pi)
+ fclass.m p8, p0 = f8, 0x23 // Test for x=inf
+ mov rSignMask = 0x1ffff // mask for sign bit
}
-{ .mib
- ldfpd tan_Q4,tan_Q5 = [tan_ADQ],16
- nop.i 999
-(p8) br.ret.spnt b0 ;; // Exit for x=nan
+{ .mlx
+ setf.d fScRshf = rScRshf // 1.5*2^(63+63+1)
+ movl rRshf = 0x43e8000000000000 // 1.5 2^63 for right shift
}
+;;
-{ .mmi
- getf.exp tan_signexp = tan_NORM_f8
- ldfpd tan_P8,tan_P9 = [tan_AD],16
- nop.i 999 ;;
+{ .mfi
+ and rSignMask = rSignMask, rExp // clear sign bit
+(p10) fclass.m.unc p7, p0 = f8, 0x07 // Test for x=0 (for tanf)
+ mov rScFctrExp = 0xffff-64 // exp of scaling factor
+}
+{ .mfb
+ adds rCoeffB = coeff_B - coeff_A, rCoeffA
+(p9) fma.s.s0 f8 = f8, f1, f8 // Set qnan if x=nan
+(p9) br.ret.spnt b0 // Exit for x=nan
}
+;;
-// Multiply x by scaled 2/pi and add large const to shift integer part of W to
-// rightmost bits of significand
{ .mfi
- ldfpd tan_Q6,tan_Q7 = [tan_ADQ],16
- fma.s1 TAN_W_2TO64_RSH = tan_NORM_f8,TAN_INV_PI_BY_2_2TO64,TAN_RSHF_2TO64
- nop.i 999 ;;
+ cmp.ge p6, p0 = rSignMask, rExpCut // p6 = (E => 0x10009)
+(p8) frcpa.s0 f8, p0 = f0, f0 // Set qnan indef if x=inf
+ mov GR_Parameter_Tag = 227 // (cotf)
}
-
-{ .mmi
- ldfpd tan_P10,tan_P11 = [tan_AD],16
- nop.m 999
- and tan_exp = tan_GR_17_ones, tan_signexp ;;
+{ .mbb
+ ldfe fPiby2 = [rCoeffB], 16
+(p8) br.ret.spnt b0 // Exit for x=inf
+(p6) br.cond.spnt Huge_Argument // Branch if |x|>=2^10
}
+;;
+{ .mfi
+ nop.m 0
+(p11) fclass.m.unc p6, p0 = f8, 0x07 // Test for x=0 (for cotf)
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+ fnorm.s0 fNormArg = f8
+(p7) br.ret.spnt b0 // Exit for x=0 (for tanf)
+}
+;;
-// p7 is true if we must call DBX TAN
-// p7 is true if f8 exp is > 0x10009 (which includes all ones
-// NAN or inf)
-{ .mmi
- ldfpd tan_Q0,tan_Q1 = [tan_ADQ],16
- cmp.ge.unc p7,p0 = tan_exp,tan_GR_10009
- nop.i 999 ;;
+{ .mmf
+ ldfpd fA01, fA03 = [rCoeffA], 16
+ ldfpd fB01, fB03 = [rCoeffB], 16
+ fmerge.s f10 = f8, f8 // Save input for error call
}
+;;
+{ .mmf
+ setf.exp fScFctr = rScFctrExp // get as real
+ setf.d fRshf = rRshf // get right shifter as real
+(p6) frcpa.s0 f8, p0 = f1, f8 // cotf(+-0) = +-Inf
+}
+;;
{ .mmb
- ldfpd tan_P4,tan_P5 = [tan_AD],16
- nop.m 999
-(p7) br.cond.spnt L(TAN_DBX) ;;
+ ldfpd fA05, fA07 = [rCoeffA], 16
+ ldfpd fB05, fB07 = [rCoeffB], 16
+(p6) br.cond.spnt __libm_error_region // call error support if cotf(+-0)
}
-
+;;
{ .mmi
- ldfpd tan_Q2,tan_Q3 = [tan_ADQ],16
- nop.m 999
- nop.i 999 ;;
-}
-
-
-
-// TAN_NFLOAT = Round_Int_Nearest(tan_W)
-{ .mfi
- ldfpd tan_P6,tan_P7 = [tan_AD],16
- fms.s1 TAN_NFLOAT = TAN_W_2TO64_RSH,TAN_2TOM64,TAN_RSHF
- nop.i 999 ;;
+ ldfpd fA09, fA11 = [rCoeffA], 16
+ ldfpd fB09, fB11 = [rCoeffB], 16
+ nop.i 0
}
-
+;;
{ .mfi
- ldfd tan_Q10 = [tan_ADQ]
- nop.f 999
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fShiftedN = fNormArg,fScRcpPiby2,fScRshf // x*2^70*(2/Pi)+ScRshf
+ nop.i 0
}
-
+;;
{ .mfi
- ldfpd tan_P0,tan_P1 = [tan_AD],16
- nop.f 999
- nop.i 999 ;;
+ nop.m 0
+ fms.s1 fN = fShiftedN, fScFctr, fRshf // N = Y*2^(-70) - Rshf
+ nop.i 0
}
+;;
-
+.pred.rel "mutex", p10, p11
{ .mfi
- getf.sig tan_GR_n = TAN_W_2TO64_RSH
- nop.f 999
- nop.i 999 ;;
+ getf.sig rIntN = fShiftedN // get N as integer
+(p10) fnma.s1 fR = fN, fPiby2, fNormArg // R = x - (Pi/2)*N (tanf)
+ nop.i 0
}
-
-// tan_r = -tan_Nfloat * tan_Pi_by_2_hi + x
{ .mfi
- ldfpd tan_P2,tan_P3 = [tan_AD]
- fnma.s1 tan_r = TAN_NFLOAT, tan_Pi_by_2_hi, tan_NORM_f8
- nop.i 999 ;;
+ nop.m 0
+(p11) fms.s1 fR = fN, fPiby2, fNormArg // R = (Pi/2)*N - x (cotf)
+ nop.i 0
}
+;;
-
-// p8 ==> even
-// p9 ==> odd
{ .mmi
- and tan_GR_N_odd_even = 0x1, tan_GR_n ;;
- nop.m 999
- cmp.eq.unc p8,p9 = tan_GR_N_odd_even, r0 ;;
+ ldfpd fA13, fA15 = [rCoeffA], 16
+ ldfpd fA17, fA19 = [rCoeffB], 16
+ nop.i 0
}
+;;
-
-// tan_r = tan_r -tan_Nfloat * tan_Pi_by_2_lo
-{ .mfi
- nop.m 999
- fnma.s1 tan_r = TAN_NFLOAT, tan_Pi_by_2_lo, tan_r
- nop.i 999 ;;
-}
-
-
+Return_From_Huges:
{ .mfi
- nop.m 999
- fma.s1 tan_rsq = tan_r, tan_r, f0
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fRp2 = fR, fR, f0 // R^2
+(p11) add rIntN = 0x1, rIntN // N = N + 1 (cotf)
}
-
+;;
{ .mfi
- nop.m 999
-(p9) frcpa.s1 tan_y0, p10 = f1,tan_r
- nop.i 999 ;;
+ nop.m 0
+ frcpa.s1 fY0, p0 = f1, fR // Y0 ~ 1/R
+ tbit.z p8, p9 = rIntN, 0 // p8=1 if N is even
}
+;;
-
+// Below are mixed polynomial calculations (mixed for even and odd N)
{ .mfi
- nop.m 999
-(p8) fma.s1 tan_v18 = tan_rsq, tan_P15, tan_P14
- nop.i 999
+ nop.m 0
+(p9) fma.s1 fB03_01 = fRp2, fB03, fB01 // R^2*B3 + B1
+ nop.i 0
}
{ .mfi
- nop.m 999
-(p8) fma.s1 tan_v4 = tan_rsq, tan_P1, tan_P0
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fRp4 = fRp2, fRp2, f0 // R^4
+ nop.i 0
}
-
-
+;;
{ .mfi
- nop.m 999
-(p8) fma.s1 tan_v16 = tan_rsq, tan_P13, tan_P12
- nop.i 999
+ nop.m 0
+(p8) fma.s1 fA15_13 = fRp2, fA15, fA13 // R^2*A15 + A13
+ nop.i 0
}
{ .mfi
- nop.m 999
-(p8) fma.s1 tan_v17 = tan_rsq, tan_rsq, f0
- nop.i 999 ;;
+ nop.m 0
+(p8) fma.s1 fA19_17 = fRp2, fA19, fA17 // R^2*A19 + A17
+ nop.i 0
}
-
-
+;;
{ .mfi
- nop.m 999
-(p8) fma.s1 tan_v12 = tan_rsq, tan_P9, tan_P8
- nop.i 999
+ nop.m 0
+(p8) fma.s1 fA07_05 = fRp2, fA07, fA05 // R^2*A7 + A5
+ nop.i 0
}
{ .mfi
- nop.m 999
-(p8) fma.s1 tan_v13 = tan_rsq, tan_P11, tan_P10
- nop.i 999 ;;
+ nop.m 0
+(p8) fma.s1 fA11_09 = fRp2, fA11, fA09 // R^2*A11 + A9
+ nop.i 0
}
-
-
+;;
{ .mfi
- nop.m 999
-(p8) fma.s1 tan_v7 = tan_rsq, tan_P5, tan_P4
- nop.i 999
+ nop.m 0
+(p9) fma.s1 fB07_05 = fRp2, fB07, fB05 // R^2*B7 + B5
+ nop.i 0
}
{ .mfi
- nop.m 999
-(p8) fma.s1 tan_v8 = tan_rsq, tan_P7, tan_P6
- nop.i 999 ;;
+ nop.m 0
+(p9) fma.s1 fB11_09 = fRp2, fB11, fB09 // R^2*B11 + B9
+ nop.i 0
}
-
-
+;;
{ .mfi
- nop.m 999
-(p9) fnma.s1 tan_d = tan_r, tan_y0, f1
- nop.i 999
+ nop.m 0
+(p9) fnma.s1 fD = fR, fY0, f1 // D = 1 - R*Y0
+ nop.i 0
}
{ .mfi
- nop.m 999
-(p8) fma.s1 tan_v5 = tan_rsq, tan_P3, tan_P2
- nop.i 999 ;;
+ nop.m 0
+(p8) fma.s1 fA03_01 = fRp2, fA03, fA01 // R^2*A3 + A1
+ nop.i 0
}
-
-
+;;
{ .mfi
- nop.m 999
-(p9) fma.s1 tan_z11 = tan_rsq, tan_Q9, tan_Q8
- nop.i 999
+ nop.m 0
+ fma.s1 fRp8 = fRp4, fRp4, f0 // R^8
+ nop.i 0
}
{ .mfi
- nop.m 999
-(p9) fma.s1 tan_z12 = tan_rsq, tan_rsq, f0
- nop.i 999 ;;
+ nop.m 0
+ fma.s1 fRp5 = fR, fRp4, f0 // R^5
+ nop.i 0
}
-
+;;
{ .mfi
- nop.m 999
-(p8) fma.s1 tan_v15 = tan_v17, tan_v18, tan_v16
- nop.i 999
+ nop.m 0
+(p8) fma.s1 fA11_05 = fRp4, fA11_09, fA07_05 // R^4*(R^2*A11 + A9) + ...
+ nop.i 0
}
{ .mfi
- nop.m 999
-(p9) fma.s1 tan_z7 = tan_rsq, tan_Q5, tan_Q4
- nop.i 999 ;;
+ nop.m 0
+(p8) fma.s1 fA19_13 = fRp4, fA19_17, fA15_13 // R^4*(R^2*A19 + A17) + ..
+ nop.i 0
}
-
+;;
{ .mfi
- nop.m 999
-(p8) fma.s1 tan_v11 = tan_v17, tan_v13, tan_v12
- nop.i 999
+ nop.m 0
+(p9) fma.s1 fB11_05 = fRp4, fB11_09, fB07_05 // R^4*(R^2*B11 + B9) + ...
+ nop.i 0
}
{ .mfi
- nop.m 999
-(p9) fma.s1 tan_z8 = tan_rsq, tan_Q7, tan_Q6
- nop.i 999 ;;
+ nop.m 0
+(p9) fma.s1 fRbyB03_01 = fR, fB03_01, f0 // R*(R^2*B3 + B1)
+ nop.i 0
}
-
-
+;;
{ .mfi
- nop.m 999
-(p8) fma.s1 tan_v14 = tan_v17, tan_v17, f0
- nop.i 999
+ nop.m 0
+(p9) fma.s1 fY1 = fY0, fD, fY0 // Y1 = Y0*D + Y0
+ nop.i 0
}
{ .mfi
- nop.m 999
-(p9) fma.s1 tan_z3 = tan_rsq, tan_Q1, tan_Q0
- nop.i 999 ;;
+ nop.m 0
+(p9) fma.s1 fDp2 = fD, fD, f0 // D^2
+ nop.i 0
}
-
-
-
+;;
{ .mfi
- nop.m 999
-(p8) fma.s1 tan_v3 = tan_v17, tan_v5, tan_v4
- nop.i 999
+ nop.m 0
+ // R^8*(R^6*A19 + R^4*A17 + R^2*A15 + A13) + R^6*A11 + R^4*A9 + R^2*A7 + A5
+(p8) fma.d.s1 fA19_05 = fRp8, fA19_13, fA11_05
+ nop.i 0
}
{ .mfi
- nop.m 999
-(p8) fma.s1 tan_v6 = tan_v17, tan_v8, tan_v7
- nop.i 999 ;;
+ nop.m 0
+(p8) fma.d.s1 fRbyA03_01 = fR, fA03_01, f0 // R*(R^2*A3 + A1)
+ nop.i 0
}
-
-
+;;
{ .mfi
- nop.m 999
-(p9) fma.s1 tan_y1 = tan_y0, tan_d, tan_y0
- nop.i 999
+ nop.m 0
+(p9) fma.d.s1 fInvR = fY1, fDp2, fY1 // 1/R = Y1*D^2 + Y1
+ nop.i 0
}
{ .mfi
- nop.m 999
-(p9) fma.s1 tan_dsq = tan_d, tan_d, f0
- nop.i 999 ;;
+ nop.m 0
+ // R^5*(R^6*B11 + R^4*B9 + R^2*B7 + B5) + R^3*B3 + R*B1
+(p9) fma.d.s1 fRbyB11_01 = fRp5, fB11_05, fRbyB03_01
+ nop.i 0
}
+;;
-
+.pred.rel "mutex", p8, p9
{ .mfi
- nop.m 999
-(p9) fma.s1 tan_z10 = tan_z12, tan_Q10, tan_z11
- nop.i 999
+ nop.m 0
+ // Result = R^5*(R^14*A19 + R^12*A17 + R^10*A15 + ...) + R^3*A3 + R*A1
+(p8) fma.s.s0 f8 = fRp5, fA19_05, fRbyA03_01
+ nop.i 0
}
-{ .mfi
- nop.m 999
-(p9) fma.s1 tan_z9 = tan_z12, tan_z12,f0
- nop.i 999 ;;
+{ .mfb
+ nop.m 0
+ // Result = -1/R + R^11*B11 + R^9*B9 + R^7*B7 + R^5*B5 + R^3*B3 + R*B1
+(p9) fnma.s.s0 f8 = f1, fInvR, fRbyB11_01
+ br.ret.sptk b0 // exit for main path
}
+;;
+GLOBAL_IEEE754_END(tanf)
+
+LOCAL_LIBM_ENTRY(__libm_callout)
+Huge_Argument:
+.prologue
{ .mfi
- nop.m 999
-(p9) fma.s1 tan_z4 = tan_rsq, tan_Q3, tan_Q2
- nop.i 999
-}
-{ .mfi
- nop.m 999
-(p9) fma.s1 tan_z6 = tan_z12, tan_z8, tan_z7
- nop.i 999 ;;
+ nop.m 0
+ fmerge.s f9 = f0,f0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs
}
-
-
+;;
{ .mfi
- nop.m 999
-(p8) fma.s1 tan_v10 = tan_v14, tan_v15, tan_v11
- nop.i 999 ;;
+ mov GR_SAVE_GP=gp
+ nop.f 0
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0
}
-
-
-{ .mfi
+.body
+{ .mmb
nop.m 999
-(p9) fma.s1 tan_y2 = tan_y1, tan_d, tan_y0
- nop.i 999
-}
-{ .mfi
nop.m 999
-(p9) fma.s1 tan_d4 = tan_dsq, tan_dsq, tan_d
- nop.i 999 ;;
+(p10) br.cond.sptk.many call_tanl ;;
}
-
-{ .mfi
+// Here if we should call cotl (p10=0, p11=1)
+{ .mmb
nop.m 999
-(p8) fma.s1 tan_v2 = tan_v14, tan_v6, tan_v3
- nop.i 999
-}
-{ .mfi
nop.m 999
-(p8) fma.s1 tan_v9 = tan_v14, tan_v14, f0
- nop.i 999 ;;
+ br.call.sptk.many b0=__libm_cotl# ;;
}
-
{ .mfi
- nop.m 999
-(p9) fma.s1 tan_z2 = tan_z12, tan_z4, tan_z3
- nop.i 999
+ mov gp = GR_SAVE_GP
+ fnorm.s.s0 f8 = f8
+ mov b0 = GR_SAVE_B0
}
-{ .mfi
+;;
+
+{ .mib
nop.m 999
-(p9) fma.s1 tan_z5 = tan_z9, tan_z10, tan_z6
- nop.i 999 ;;
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0
+;;
}
-
-{ .mfi
+// Here if we should call tanl (p10=1, p11=0)
+call_tanl:
+{ .mmb
nop.m 999
-(p9) fma.s1 tan_inv_r = tan_d4, tan_y2, tan_y0
- nop.i 999
-}
-{ .mfi
nop.m 999
-(p8) fma.s1 tan_rcube = tan_rsq, tan_r, f0
- nop.i 999 ;;
+ br.call.sptk.many b0=__libm_tanl# ;;
}
-
-
{ .mfi
- nop.m 999
-(p8) fma.s1 tan_v1 = tan_v9, tan_v10, tan_v2
- nop.i 999
+ mov gp = GR_SAVE_GP
+ fnorm.s.s0 f8 = f8
+ mov b0 = GR_SAVE_B0
}
-{ .mfi
+;;
+
+{ .mib
nop.m 999
-(p9) fma.s1 tan_z1 = tan_z9, tan_z5, tan_z2
- nop.i 999 ;;
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0
+;;
}
+LOCAL_LIBM_END(__libm_callout)
-
-{ .mfi
- nop.m 999
-(p8) fma.s.s0 f8 = tan_v1, tan_rcube, tan_r
- nop.i 999
-}
-{ .mfb
- nop.m 999
-(p9) fms.s.s0 f8 = tan_r, tan_z1, tan_inv_r
- br.ret.sptk b0 ;;
-}
-.endp tanf#
-ASM_SIZE_DIRECTIVE(tanf#)
+.type __libm_tanl#,@function
+.global __libm_tanl#
+.type __libm_cotl#,@function
+.global __libm_cotl#
-.proc __libm_callout
-__libm_callout:
-L(TAN_DBX):
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
+// (1)
{ .mfi
- nop.m 0
- fmerge.s f9 = f0,f0
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
-;;
-
{ .mfi
- mov GR_SAVE_GP=gp
- nop.f 0
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+
+// (2)
+{ .mmi
+ stfs [GR_Parameter_Y] = f1,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0
-}
+ mov GR_SAVE_B0=b0 // Save b0
+};;
.body
-{ .mfb
- nop.m 999
- nop.f 999
- br.call.sptk.many b0=__libm_tan# ;;
-}
-
-
-{ .mfi
- mov gp = GR_SAVE_GP
- fnorm.s f8 = f8
- mov b0 = GR_SAVE_B0
+// (3)
+{ .mib
+ stfs [GR_Parameter_X] = f10 // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
}
-;;
-
+{ .mib
+ stfs [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+// (4)
+{ .mmi
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
{ .mib
- nop.m 999
- mov ar.pfs = GR_SAVE_PFS
- br.ret.sptk b0
-;;
-}
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+LOCAL_LIBM_END(__libm_error_region)
-.endp __libm_callout
-ASM_SIZE_DIRECTIVE(__libm_callout)
+.type __libm_error_support#,@function
+.global __libm_error_support#
-.type __libm_tan#,@function
-.global __libm_tan#
diff --git a/sysdeps/ia64/fpu/s_tanl.S b/sysdeps/ia64/fpu/s_tanl.S
index e13e6c6cbd..345a059c5f 100644
--- a/sysdeps/ia64/fpu/s_tanl.S
+++ b/sysdeps/ia64/fpu/s_tanl.S
@@ -1,10 +1,10 @@
-.file "tanl.s"
+.file "tancotl.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,50 +35,77 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
-// *********************************************************************
+//*********************************************************************
//
// History:
//
-// 2/02/2000 (hand-optimized)
-// 4/04/00 Unwind support added
+// 02/02/00 (hand-optimized)
+// 04/04/00 Unwind support added
// 12/28/00 Fixed false invalid flags
+// 02/06/02 Improved speed
+// 05/07/02 Changed interface to __libm_pi_by_2_reduce
+// 05/30/02 Added cotl
+// 02/10/03 Reordered header: .section, .global, .proc, .align;
+// used data8 for long double table values
+// 05/15/03 Reformatted data tables
//
-// *********************************************************************
+//*********************************************************************
//
-// Function: tanl(x) = tangent(x), for double-extended precision x values
+// Functions: tanl(x) = tangent(x), for double-extended precision x values
+// cotl(x) = cotangent(x), for double-extended precision x values
//
-// *********************************************************************
+//*********************************************************************
//
// Resources Used:
//
// Floating-Point Registers: f8 (Input and Return Value)
// f9-f15
-// f32-f112
+// f32-f121
//
// General Purpose Registers:
-// r32-r48
-// r49-r50 (Used to pass arguments to pi_by_2 reduce routine)
+// r14-r26,r32-r57
//
// Predicate Registers: p6-p15
//
-// *********************************************************************
+//*********************************************************************
//
-// IEEE Special Conditions:
+// IEEE Special Conditions for tanl:
//
// Denormal fault raised on denormal inputs
// Overflow exceptions do not occur
-// Underflow exceptions raised when appropriate for tan
+// Underflow exceptions raised when appropriate for tan
// (No specialized error handling for this routine)
// Inexact raised when appropriate by algorithm
//
-// tan(SNaN) = QNaN
-// tan(QNaN) = QNaN
-// tan(inf) = QNaN
-// tan(+/-0) = +/-0
+// tanl(SNaN) = QNaN
+// tanl(QNaN) = QNaN
+// tanl(inf) = QNaN
+// tanl(+/-0) = +/-0
+//
+//*********************************************************************
+//
+// IEEE Special Conditions for cotl:
+//
+// Denormal fault raised on denormal inputs
+// Overflow exceptions occur at zero and near zero
+// Underflow exceptions do not occur
+// Inexact raised when appropriate by algorithm
+//
+// cotl(SNaN) = QNaN
+// cotl(QNaN) = QNaN
+// cotl(inf) = QNaN
+// cotl(+/-0) = +/-Inf and error handling is called
+//
+//*********************************************************************
//
-// *********************************************************************
+// Below are mathematical and algorithmic descriptions for tanl.
+// For cotl we use next identity cot(x) = -tan(x + Pi/2).
+// So, to compute cot(x) we just need to increment N (N = N + 1)
+// and invert sign of the computed result.
+//
+//*********************************************************************
//
// Mathematical Description
//
@@ -106,13 +133,13 @@
// -------
//
// tan(r + c) = r + c + r^3/3 ...accurately
-// -cot(r + c) = -1/(r+c) + r/3 ...accurately
+// -cot(r + c) = -1/(r+c) + r/3 ...accurately
//
// Case 4:
// -------
//
// tan(r + c) = r + c + r^3/3 + 2r^5/15 ...accurately
-// -cot(r + c) = -1/(r+c) + r/3 + r^3/45 ...accurately
+// -cot(r + c) = -1/(r+c) + r/3 + r^3/45 ...accurately
//
//
// The only cases left are Cases 1 and 3 of the argument reduction
@@ -143,13 +170,13 @@
// Since Arg = N pi/4 + r + c accurately, we have
//
// tan(Arg) = tan(r+c) for N even,
-// = -cot(r+c) otherwise.
+// = -cot(r+c) otherwise.
//
// Here for this case, both tan(r) and -cot(r) can be approximated
// by simple polynomials:
//
// tan(r) = r + P1_1 r^3 + P1_2 r^5 + ... + P1_9 r^19
-// -cot(r) = -1/r + Q1_1 r + Q1_2 r^3 + ... + Q1_7 r^13
+// -cot(r) = -1/r + Q1_1 r + Q1_2 r^3 + ... + Q1_7 r^13
//
// accurately. Since |r| is relatively small, tan(r+c) and
// -cot(r+c) can be accurately approximated by replacing r with
@@ -178,21 +205,21 @@
// The required calculation is either
//
// tan(r + c) = tan(r) + correction, or
-// -cot(r + c) = -cot(r) + correction.
+// -cot(r + c) = -cot(r) + correction.
//
// Specifically,
//
// tan(r + c) = tan(r) + c tan'(r) + O(c^2)
-// = tan(r) + c sec^2(r) + O(c^2)
-// = tan(r) + c SEC_sq ...accurately
+// = tan(r) + c sec^2(r) + O(c^2)
+// = tan(r) + c SEC_sq ...accurately
// as long as SEC_sq approximates sec^2(r)
// to, say, 5 bits or so.
//
// Similarly,
//
-// -cot(r + c) = -cot(r) - c cot'(r) + O(c^2)
-// = -cot(r) + c csc^2(r) + O(c^2)
-// = -cot(r) + c CSC_sq ...accurately
+// -cot(r + c) = -cot(r) - c cot'(r) + O(c^2)
+// = -cot(r) + c csc^2(r) + O(c^2)
+// = -cot(r) + c CSC_sq ...accurately
// as long as CSC_sq approximates csc^2(r)
// to, say, 5 bits or so.
//
@@ -208,14 +235,14 @@
// where
//
// B = 2^k * 1.b_1 b_2 ... b_5 1
-// x = |r| - B
+// x = |r| - B
//
// Now,
// tan(B) + tan(x)
// tan( B + x ) = ------------------------
// 1 - tan(B)*tan(x)
//
-// / \
+// / \
// | tan(B) + tan(x) |
// = tan(B) + | ------------------------ - tan(B) |
@@ -248,7 +275,7 @@
// cot( B + x ) = ------------------------
// tan(B) + tan(x)
//
-// / \
+// / \
// | 1 - tan(B)*tan(x) |
// = cot(B) + | ----------------------- - cot(B) |
@@ -273,7 +300,7 @@
// Arg = N * pi/2 + r + c ...accurately
//
// tan(Arg) = tan(r) + correction if N is even;
-// = -cot(r) + correction otherwise.
+// = -cot(r) + correction otherwise.
//
// For Cases 2 and 4,
//
@@ -292,8 +319,8 @@
// tan(Arg) = r + P1_1 r^3 + P1_2 r^5 + ... + P1_9 r^19
// + c*(1 + r^2) N even
//
-// = -1/(r+c) + Q1_1 r + Q1_2 r^3 + ... + Q1_7 r^13
-// + Q1_1*c N odd
+// = -1/(r+c) + Q1_1 r + Q1_2 r^3 + ... + Q1_7 r^13
+// + Q1_1*c N odd
//
// Case normal_r: 2^(-2) <= |r| <= pi/4
//
@@ -304,15 +331,15 @@
//
// tan(Arg) = tan(r) + c*sec^2(r)
// = tan( sgn_r * (B+x) ) + c * sec^2(|r|)
-// = sgn_r * ( tan(B+x) + sgn_r*c*sec^2(|r|) )
-// = sgn_r * ( tan(B+x) + sgn_r*c*sec^2(B) )
+// = sgn_r * ( tan(B+x) + sgn_r*c*sec^2(|r|) )
+// = sgn_r * ( tan(B+x) + sgn_r*c*sec^2(B) )
//
// since B approximates |r| to 2^(-6) in relative accuracy.
//
// / (1/[sin(B)*cos(B)]) * tan(x)
// tan(Arg) = sgn_r * | tan(B) + --------------------------------
// \ cot(B) - tan(x)
-// \
+// \
// + CORR |
// /
@@ -324,15 +351,15 @@
//
// tan(Arg) = -cot(r) + c*csc^2(r)
// = -cot( sgn_r * (B+x) ) + c * csc^2(|r|)
-// = sgn_r * ( -cot(B+x) + sgn_r*c*csc^2(|r|) )
-// = sgn_r * ( -cot(B+x) + sgn_r*c*csc^2(B) )
+// = sgn_r * ( -cot(B+x) + sgn_r*c*csc^2(|r|) )
+// = sgn_r * ( -cot(B+x) + sgn_r*c*csc^2(B) )
//
// since B approximates |r| to 2^(-6) in relative accuracy.
//
// / (1/[sin(B)*cos(B)]) * tan(x)
// tan(Arg) = sgn_r * | -cot(B) + --------------------------------
// \ tan(B) + tan(x)
-// \
+// \
// + CORR |
// /
@@ -356,8 +383,8 @@
// For N even,
//
// rsq := r * r
-// Result := c + r * rsq * P1_1
-// Result := r + Result ...in user-defined rounding
+// Poly := c + r * rsq * P1_1
+// Result := r + Poly ...in user-defined rounding
//
// For N odd,
// S_hi := -frcpa(r) ...8 bits
@@ -375,8 +402,8 @@
// For N even,
//
// rsq := r * r
-// Result := c + r * rsq * (P1_1 + rsq * P1_2)
-// Result := r + Result ...in user-defined rounding
+// Poly := c + r * rsq * (P1_1 + rsq * P1_2)
+// Result := r + Poly ...in user-defined rounding
//
// For N odd,
// S_hi := -frcpa(r) ...8 bits
@@ -414,8 +441,8 @@
// Poly2 := P1_4 + rsq*(P1_5 + rsq*(P1_6 + ... rsq*P1_9))
// CORR := c * ( 1 + rsq )
// Poly := Poly1 + r_to_the_8*Poly2
-// Result := r*Poly + CORR
-// Result := r + Result ...in user-defined rounding
+// Poly := r*Poly + CORR
+// Result := r + Poly ...in user-defined rounding
// ...note that Poly1 and r_to_the_8 can be computed in parallel
// ...with Poly2 (Poly1 is intentionally set to be much
// ...shorter than Poly2 so that r_to_the_8 and CORR can be hidden)
@@ -434,8 +461,8 @@
// rsq := r*r
// P := Q1_1 + rsq*(Q1_2 + rsq*(Q1_3 + ... + rsq*Q1_7))
//
-// Result := r*P + S_lo
-// Result := S_hi + Result ...in user-defined rounding
+// Poly := r*P + S_lo
+// Result := S_hi + Poly ...in user-defined rounding
//
//
// Algorithm for the case of normal_r
@@ -454,7 +481,7 @@
// / (1/[sin(B)*cos(B)]) * tan(x)
// sgn_r * | tan(B) + -------------------------------- +
// \ cot(B) - tan(x)
-// \
+// \
// CORR |
// /
@@ -463,7 +490,7 @@
// calculated beforehand and stored in a table. Specifically,
// the table values are
//
-// tan(B) as T_hi + T_lo;
+// tan(B) as T_hi + T_lo;
// cot(B) as C_hi + C_lo;
// 1/[sin(B)*cos(B)] as SC_inv
//
@@ -559,7 +586,7 @@
// / (1/[sin(B)*cos(B)]) * tan(x)
// sgn_r * | -cot(B) + -------------------------------- +
// \ tan(B) + tan(x)
-// \
+// \
// CORR |
// /
@@ -568,7 +595,7 @@
// calculated beforehand and stored in a table. Specifically,
// the table values are
//
-// tan(B) as T_hi + T_lo;
+// tan(B) as T_hi + T_lo;
// cot(B) as C_hi + C_lo;
// 1/[sin(B)*cos(B)] as SC_inv
//
@@ -675,254 +702,382 @@
//
//
-#include "libm_support.h"
-
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
-.align 128
-
-TANL_BASE_CONSTANTS:
-ASM_TYPE_DIRECTIVE(TANL_BASE_CONSTANTS,@object)
-data4 0x4B800000, 0xCB800000, 0x38800000, 0xB8800000 // two**24, -two**24
- // two**-14, -two**-14
-data4 0x4E44152A, 0xA2F9836E, 0x00003FFE, 0x00000000 // two_by_pi
-data4 0xCE81B9F1, 0xC84D32B0, 0x00004016, 0x00000000 // P_0
-data4 0x2168C235, 0xC90FDAA2, 0x00003FFF, 0x00000000 // P_1
-data4 0xFC8F8CBB, 0xECE675D1, 0x0000BFBD, 0x00000000 // P_2
-data4 0xACC19C60, 0xB7ED8FBB, 0x0000BF7C, 0x00000000 // P_3
-data4 0x5F000000, 0xDF000000, 0x00000000, 0x00000000 // two_to_63, -two_to_63
-data4 0x6EC6B45A, 0xA397E504, 0x00003FE7, 0x00000000 // Inv_P_0
-data4 0xDBD171A1, 0x8D848E89, 0x0000BFBF, 0x00000000 // d_1
-data4 0x18A66F8E, 0xD5394C36, 0x0000BF7C, 0x00000000 // d_2
-data4 0x2168C234, 0xC90FDAA2, 0x00003FFE, 0x00000000 // PI_BY_4
-data4 0x2168C234, 0xC90FDAA2, 0x0000BFFE, 0x00000000 // MPI_BY_4
-data4 0x3E800000, 0xBE800000, 0x00000000, 0x00000000 // two**-2, -two**-2
-data4 0x2F000000, 0xAF000000, 0x00000000, 0x00000000 // two**-33, -two**-33
-data4 0xAAAAAABD, 0xAAAAAAAA, 0x00003FFD, 0x00000000 // P1_1
-data4 0x88882E6A, 0x88888888, 0x00003FFC, 0x00000000 // P1_2
-data4 0x0F0177B6, 0xDD0DD0DD, 0x00003FFA, 0x00000000 // P1_3
-data4 0x646B8C6D, 0xB327A440, 0x00003FF9, 0x00000000 // P1_4
-data4 0x1D5F7D20, 0x91371B25, 0x00003FF8, 0x00000000 // P1_5
-data4 0x61C67914, 0xEB69A5F1, 0x00003FF6, 0x00000000 // P1_6
-data4 0x019318D2, 0xBEDD37BE, 0x00003FF5, 0x00000000 // P1_7
-data4 0x3C794015, 0x9979B146, 0x00003FF4, 0x00000000 // P1_8
-data4 0x8C6EB58A, 0x8EBD21A3, 0x00003FF3, 0x00000000 // P1_9
-data4 0xAAAAAAB4, 0xAAAAAAAA, 0x00003FFD, 0x00000000 // Q1_1
-data4 0x0B5FC93E, 0xB60B60B6, 0x00003FF9, 0x00000000 // Q1_2
-data4 0x0C9BBFBF, 0x8AB355E0, 0x00003FF6, 0x00000000 // Q1_3
-data4 0xCBEE3D4C, 0xDDEBBC89, 0x00003FF2, 0x00000000 // Q1_4
-data4 0x5F80BBB6, 0xB3548A68, 0x00003FEF, 0x00000000 // Q1_5
-data4 0x4CED5BF1, 0x91362560, 0x00003FEC, 0x00000000 // Q1_6
-data4 0x8EE92A83, 0xF189D95A, 0x00003FE8, 0x00000000 // Q1_7
-data4 0xAAAB362F, 0xAAAAAAAA, 0x00003FFD, 0x00000000 // P2_1
-data4 0xE97A6097, 0x88888886, 0x00003FFC, 0x00000000 // P2_2
-data4 0x25E716A1, 0xDD108EE0, 0x00003FFA, 0x00000000 // P2_3
+RODATA
+.align 16
+
+LOCAL_OBJECT_START(TANL_BASE_CONSTANTS)
+
+tanl_table_1:
+data8 0xA2F9836E4E44152A, 0x00003FFE // two_by_pi
+data8 0xC84D32B0CE81B9F1, 0x00004016 // P_0
+data8 0xC90FDAA22168C235, 0x00003FFF // P_1
+data8 0xECE675D1FC8F8CBB, 0x0000BFBD // P_2
+data8 0xB7ED8FBBACC19C60, 0x0000BF7C // P_3
+LOCAL_OBJECT_END(TANL_BASE_CONSTANTS)
+
+LOCAL_OBJECT_START(tanl_table_2)
+data8 0xC90FDAA22168C234, 0x00003FFE // PI_BY_4
+data8 0xA397E5046EC6B45A, 0x00003FE7 // Inv_P_0
+data8 0x8D848E89DBD171A1, 0x0000BFBF // d_1
+data8 0xD5394C3618A66F8E, 0x0000BF7C // d_2
+data4 0x3E800000 // two**-2
+data4 0xBE800000 // -two**-2
+data4 0x00000000 // pad
+data4 0x00000000 // pad
+LOCAL_OBJECT_END(tanl_table_2)
+
+LOCAL_OBJECT_START(tanl_table_p1)
+data8 0xAAAAAAAAAAAAAABD, 0x00003FFD // P1_1
+data8 0x8888888888882E6A, 0x00003FFC // P1_2
+data8 0xDD0DD0DD0F0177B6, 0x00003FFA // P1_3
+data8 0xB327A440646B8C6D, 0x00003FF9 // P1_4
+data8 0x91371B251D5F7D20, 0x00003FF8 // P1_5
+data8 0xEB69A5F161C67914, 0x00003FF6 // P1_6
+data8 0xBEDD37BE019318D2, 0x00003FF5 // P1_7
+data8 0x9979B1463C794015, 0x00003FF4 // P1_8
+data8 0x8EBD21A38C6EB58A, 0x00003FF3 // P1_9
+LOCAL_OBJECT_END(tanl_table_p1)
+
+LOCAL_OBJECT_START(tanl_table_q1)
+data8 0xAAAAAAAAAAAAAAB4, 0x00003FFD // Q1_1
+data8 0xB60B60B60B5FC93E, 0x00003FF9 // Q1_2
+data8 0x8AB355E00C9BBFBF, 0x00003FF6 // Q1_3
+data8 0xDDEBBC89CBEE3D4C, 0x00003FF2 // Q1_4
+data8 0xB3548A685F80BBB6, 0x00003FEF // Q1_5
+data8 0x913625604CED5BF1, 0x00003FEC // Q1_6
+data8 0xF189D95A8EE92A83, 0x00003FE8 // Q1_7
+LOCAL_OBJECT_END(tanl_table_q1)
+
+LOCAL_OBJECT_START(tanl_table_p2)
+data8 0xAAAAAAAAAAAB362F, 0x00003FFD // P2_1
+data8 0x88888886E97A6097, 0x00003FFC // P2_2
+data8 0xDD108EE025E716A1, 0x00003FFA // P2_3
+LOCAL_OBJECT_END(tanl_table_p2)
+
+LOCAL_OBJECT_START(tanl_table_tm2)
//
// Entries T_hi double-precision memory format
// Index = 0,1,...,31 B = 2^(-2)*(1+Index/32+1/64)
// Entries T_lo single-precision memory format
// Index = 0,1,...,31 B = 2^(-2)*(1+Index/32+1/64)
//
-data4 0x62400794, 0x3FD09BC3, 0x23A05C32, 0x00000000
-data4 0xDFFBC074, 0x3FD124A9, 0x240078B2, 0x00000000
-data4 0x5BD4920F, 0x3FD1AE23, 0x23826B8E, 0x00000000
-data4 0x15E2701D, 0x3FD23835, 0x22D31154, 0x00000000
-data4 0x63739C2D, 0x3FD2C2E4, 0x2265C9E2, 0x00000000
-data4 0xAFEEA48B, 0x3FD34E36, 0x245C05EB, 0x00000000
-data4 0x7DBB35D1, 0x3FD3DA31, 0x24749F2D, 0x00000000
-data4 0x67321619, 0x3FD466DA, 0x2462CECE, 0x00000000
-data4 0x1F94A4D5, 0x3FD4F437, 0x246D0DF1, 0x00000000
-data4 0x740C3E6D, 0x3FD5824D, 0x240A85B5, 0x00000000
-data4 0x4CB1E73D, 0x3FD61123, 0x23F96E33, 0x00000000
-data4 0xAD9EA64B, 0x3FD6A0BE, 0x247C5393, 0x00000000
-data4 0xB804FD01, 0x3FD73125, 0x241F3B29, 0x00000000
-data4 0xAB53EE83, 0x3FD7C25E, 0x2479989B, 0x00000000
-data4 0xE6640EED, 0x3FD8546F, 0x23B343BC, 0x00000000
-data4 0xE8AF1892, 0x3FD8E75F, 0x241454D1, 0x00000000
-data4 0x53928BDA, 0x3FD97B35, 0x238613D9, 0x00000000
-data4 0xEB9DE4DE, 0x3FDA0FF6, 0x22859FA7, 0x00000000
-data4 0x99ECF92D, 0x3FDAA5AB, 0x237A6D06, 0x00000000
-data4 0x6D8F1796, 0x3FDB3C5A, 0x23952F6C, 0x00000000
-data4 0x9CFB8BE4, 0x3FDBD40A, 0x2280FC95, 0x00000000
-data4 0x87943100, 0x3FDC6CC3, 0x245D2EC0, 0x00000000
-data4 0xB736C500, 0x3FDD068C, 0x23C4AD7D, 0x00000000
-data4 0xE1DDBC31, 0x3FDDA16D, 0x23D076E6, 0x00000000
-data4 0xEB515A93, 0x3FDE3D6E, 0x244809A6, 0x00000000
-data4 0xE6E9E5F1, 0x3FDEDA97, 0x220856C8, 0x00000000
-data4 0x1963CE69, 0x3FDF78F1, 0x244BE993, 0x00000000
-data4 0x7D635BCE, 0x3FE00C41, 0x23D21799, 0x00000000
-data4 0x1C302CD3, 0x3FE05CAB, 0x248A1B1D, 0x00000000
-data4 0xDB6A1FA0, 0x3FE0ADB9, 0x23D53E33, 0x00000000
-data4 0x4A20BA81, 0x3FE0FF72, 0x24DB9ED5, 0x00000000
-data4 0x153FA6F5, 0x3FE151D9, 0x24E9E451, 0x00000000
+data8 0x3FD09BC362400794
+data4 0x23A05C32, 0x00000000
+data8 0x3FD124A9DFFBC074
+data4 0x240078B2, 0x00000000
+data8 0x3FD1AE235BD4920F
+data4 0x23826B8E, 0x00000000
+data8 0x3FD2383515E2701D
+data4 0x22D31154, 0x00000000
+data8 0x3FD2C2E463739C2D
+data4 0x2265C9E2, 0x00000000
+data8 0x3FD34E36AFEEA48B
+data4 0x245C05EB, 0x00000000
+data8 0x3FD3DA317DBB35D1
+data4 0x24749F2D, 0x00000000
+data8 0x3FD466DA67321619
+data4 0x2462CECE, 0x00000000
+data8 0x3FD4F4371F94A4D5
+data4 0x246D0DF1, 0x00000000
+data8 0x3FD5824D740C3E6D
+data4 0x240A85B5, 0x00000000
+data8 0x3FD611234CB1E73D
+data4 0x23F96E33, 0x00000000
+data8 0x3FD6A0BEAD9EA64B
+data4 0x247C5393, 0x00000000
+data8 0x3FD73125B804FD01
+data4 0x241F3B29, 0x00000000
+data8 0x3FD7C25EAB53EE83
+data4 0x2479989B, 0x00000000
+data8 0x3FD8546FE6640EED
+data4 0x23B343BC, 0x00000000
+data8 0x3FD8E75FE8AF1892
+data4 0x241454D1, 0x00000000
+data8 0x3FD97B3553928BDA
+data4 0x238613D9, 0x00000000
+data8 0x3FDA0FF6EB9DE4DE
+data4 0x22859FA7, 0x00000000
+data8 0x3FDAA5AB99ECF92D
+data4 0x237A6D06, 0x00000000
+data8 0x3FDB3C5A6D8F1796
+data4 0x23952F6C, 0x00000000
+data8 0x3FDBD40A9CFB8BE4
+data4 0x2280FC95, 0x00000000
+data8 0x3FDC6CC387943100
+data4 0x245D2EC0, 0x00000000
+data8 0x3FDD068CB736C500
+data4 0x23C4AD7D, 0x00000000
+data8 0x3FDDA16DE1DDBC31
+data4 0x23D076E6, 0x00000000
+data8 0x3FDE3D6EEB515A93
+data4 0x244809A6, 0x00000000
+data8 0x3FDEDA97E6E9E5F1
+data4 0x220856C8, 0x00000000
+data8 0x3FDF78F11963CE69
+data4 0x244BE993, 0x00000000
+data8 0x3FE00C417D635BCE
+data4 0x23D21799, 0x00000000
+data8 0x3FE05CAB1C302CD3
+data4 0x248A1B1D, 0x00000000
+data8 0x3FE0ADB9DB6A1FA0
+data4 0x23D53E33, 0x00000000
+data8 0x3FE0FF724A20BA81
+data4 0x24DB9ED5, 0x00000000
+data8 0x3FE151D9153FA6F5
+data4 0x24E9E451, 0x00000000
+LOCAL_OBJECT_END(tanl_table_tm2)
+
+LOCAL_OBJECT_START(tanl_table_tm1)
//
// Entries T_hi double-precision memory format
// Index = 0,1,...,19 B = 2^(-1)*(1+Index/32+1/64)
// Entries T_lo single-precision memory format
// Index = 0,1,...,19 B = 2^(-1)*(1+Index/32+1/64)
//
-data4 0xBA1BE39E, 0x3FE1CEC4, 0x24B60F9E, 0x00000000
-data4 0x5ABD9B2D, 0x3FE277E4, 0x248C2474, 0x00000000
-data4 0x0272B110, 0x3FE32418, 0x247B8311, 0x00000000
-data4 0x890E2DF0, 0x3FE3D38B, 0x24C55751, 0x00000000
-data4 0x46236871, 0x3FE4866D, 0x24E5BC34, 0x00000000
-data4 0x45E044B0, 0x3FE53CEE, 0x24001BA4, 0x00000000
-data4 0x82EC06E4, 0x3FE5F742, 0x24B973DC, 0x00000000
-data4 0x25DF43F9, 0x3FE6B5A1, 0x24895440, 0x00000000
-data4 0xCAFD348C, 0x3FE77844, 0x240021CA, 0x00000000
-data4 0xCEED6B92, 0x3FE83F6B, 0x24C45372, 0x00000000
-data4 0xA34F3665, 0x3FE90B58, 0x240DAD33, 0x00000000
-data4 0x2C1E56B4, 0x3FE9DC52, 0x24F846CE, 0x00000000
-data4 0x27041578, 0x3FEAB2A4, 0x2323FB6E, 0x00000000
-data4 0x9DD8C373, 0x3FEB8E9F, 0x24B3090B, 0x00000000
-data4 0x65C9AA7B, 0x3FEC709B, 0x2449F611, 0x00000000
-data4 0xACCF8435, 0x3FED58F4, 0x23616A7E, 0x00000000
-data4 0x97635082, 0x3FEE480F, 0x24C2FEAE, 0x00000000
-data4 0xF0ACC544, 0x3FEF3E57, 0x242CE964, 0x00000000
-data4 0xF7E06E4B, 0x3FF01E20, 0x2480D3EE, 0x00000000
-data4 0x8A798A69, 0x3FF0A125, 0x24DB8967, 0x00000000
+data8 0x3FE1CEC4BA1BE39E
+data4 0x24B60F9E, 0x00000000
+data8 0x3FE277E45ABD9B2D
+data4 0x248C2474, 0x00000000
+data8 0x3FE324180272B110
+data4 0x247B8311, 0x00000000
+data8 0x3FE3D38B890E2DF0
+data4 0x24C55751, 0x00000000
+data8 0x3FE4866D46236871
+data4 0x24E5BC34, 0x00000000
+data8 0x3FE53CEE45E044B0
+data4 0x24001BA4, 0x00000000
+data8 0x3FE5F74282EC06E4
+data4 0x24B973DC, 0x00000000
+data8 0x3FE6B5A125DF43F9
+data4 0x24895440, 0x00000000
+data8 0x3FE77844CAFD348C
+data4 0x240021CA, 0x00000000
+data8 0x3FE83F6BCEED6B92
+data4 0x24C45372, 0x00000000
+data8 0x3FE90B58A34F3665
+data4 0x240DAD33, 0x00000000
+data8 0x3FE9DC522C1E56B4
+data4 0x24F846CE, 0x00000000
+data8 0x3FEAB2A427041578
+data4 0x2323FB6E, 0x00000000
+data8 0x3FEB8E9F9DD8C373
+data4 0x24B3090B, 0x00000000
+data8 0x3FEC709B65C9AA7B
+data4 0x2449F611, 0x00000000
+data8 0x3FED58F4ACCF8435
+data4 0x23616A7E, 0x00000000
+data8 0x3FEE480F97635082
+data4 0x24C2FEAE, 0x00000000
+data8 0x3FEF3E57F0ACC544
+data4 0x242CE964, 0x00000000
+data8 0x3FF01E20F7E06E4B
+data4 0x2480D3EE, 0x00000000
+data8 0x3FF0A1258A798A69
+data4 0x24DB8967, 0x00000000
+LOCAL_OBJECT_END(tanl_table_tm1)
+
+LOCAL_OBJECT_START(tanl_table_cm2)
//
// Entries C_hi double-precision memory format
// Index = 0,1,...,31 B = 2^(-2)*(1+Index/32+1/64)
// Entries C_lo single-precision memory format
// Index = 0,1,...,31 B = 2^(-2)*(1+Index/32+1/64)
//
-data4 0xE63EFBD0, 0x400ED3E2, 0x259D94D4, 0x00000000
-data4 0xC515DAB5, 0x400DDDB4, 0x245F0537, 0x00000000
-data4 0xBE19A79F, 0x400CF57A, 0x25D4EA9F, 0x00000000
-data4 0xD15298ED, 0x400C1A06, 0x24AE40A0, 0x00000000
-data4 0x164B2708, 0x400B4A4C, 0x25A5AAB6, 0x00000000
-data4 0x5285B068, 0x400A855A, 0x25524F18, 0x00000000
-data4 0x3FFA549F, 0x4009CA5A, 0x24C999C0, 0x00000000
-data4 0x646AF623, 0x4009188A, 0x254FD801, 0x00000000
-data4 0x6084D0E7, 0x40086F3C, 0x2560F5FD, 0x00000000
-data4 0xA29A76EE, 0x4007CDD2, 0x255B9D19, 0x00000000
-data4 0x6C8ECA95, 0x400733BE, 0x25CB021B, 0x00000000
-data4 0x1F8DDC52, 0x4006A07E, 0x24AB4722, 0x00000000
-data4 0xC298AD58, 0x4006139B, 0x252764E2, 0x00000000
-data4 0xBAD7164B, 0x40058CAB, 0x24DAF5DB, 0x00000000
-data4 0xAE31A5D3, 0x40050B4B, 0x25EA20F4, 0x00000000
-data4 0x89F85A8A, 0x40048F21, 0x2583A3E8, 0x00000000
-data4 0xA862380D, 0x400417DA, 0x25DCC4CC, 0x00000000
-data4 0x1088FCFE, 0x4003A52B, 0x2430A492, 0x00000000
-data4 0xCD3527D5, 0x400336CC, 0x255F77CF, 0x00000000
-data4 0x5760766D, 0x4002CC7F, 0x25DA0BDA, 0x00000000
-data4 0x11CE02E3, 0x40026607, 0x256FF4A2, 0x00000000
-data4 0xD37BBE04, 0x4002032C, 0x25208AED, 0x00000000
-data4 0x7F050775, 0x4001A3BD, 0x24B72DD6, 0x00000000
-data4 0xA554848A, 0x40014789, 0x24AB4DAA, 0x00000000
-data4 0x323E81B7, 0x4000EE65, 0x2584C440, 0x00000000
-data4 0x21CF1293, 0x40009827, 0x25C9428D, 0x00000000
-data4 0x3D415EEB, 0x400044A9, 0x25DC8482, 0x00000000
-data4 0xBD72C577, 0x3FFFE78F, 0x257F5070, 0x00000000
-data4 0x75EFD28E, 0x3FFF4AC3, 0x23EBBF7A, 0x00000000
-data4 0x60B52DDE, 0x3FFEB2AF, 0x22EECA07, 0x00000000
-data4 0x35204180, 0x3FFE1F19, 0x24191079, 0x00000000
-data4 0x54F7E60A, 0x3FFD8FCA, 0x248D3058, 0x00000000
+data8 0x400ED3E2E63EFBD0
+data4 0x259D94D4, 0x00000000
+data8 0x400DDDB4C515DAB5
+data4 0x245F0537, 0x00000000
+data8 0x400CF57ABE19A79F
+data4 0x25D4EA9F, 0x00000000
+data8 0x400C1A06D15298ED
+data4 0x24AE40A0, 0x00000000
+data8 0x400B4A4C164B2708
+data4 0x25A5AAB6, 0x00000000
+data8 0x400A855A5285B068
+data4 0x25524F18, 0x00000000
+data8 0x4009CA5A3FFA549F
+data4 0x24C999C0, 0x00000000
+data8 0x4009188A646AF623
+data4 0x254FD801, 0x00000000
+data8 0x40086F3C6084D0E7
+data4 0x2560F5FD, 0x00000000
+data8 0x4007CDD2A29A76EE
+data4 0x255B9D19, 0x00000000
+data8 0x400733BE6C8ECA95
+data4 0x25CB021B, 0x00000000
+data8 0x4006A07E1F8DDC52
+data4 0x24AB4722, 0x00000000
+data8 0x4006139BC298AD58
+data4 0x252764E2, 0x00000000
+data8 0x40058CABBAD7164B
+data4 0x24DAF5DB, 0x00000000
+data8 0x40050B4BAE31A5D3
+data4 0x25EA20F4, 0x00000000
+data8 0x40048F2189F85A8A
+data4 0x2583A3E8, 0x00000000
+data8 0x400417DAA862380D
+data4 0x25DCC4CC, 0x00000000
+data8 0x4003A52B1088FCFE
+data4 0x2430A492, 0x00000000
+data8 0x400336CCCD3527D5
+data4 0x255F77CF, 0x00000000
+data8 0x4002CC7F5760766D
+data4 0x25DA0BDA, 0x00000000
+data8 0x4002660711CE02E3
+data4 0x256FF4A2, 0x00000000
+data8 0x4002032CD37BBE04
+data4 0x25208AED, 0x00000000
+data8 0x4001A3BD7F050775
+data4 0x24B72DD6, 0x00000000
+data8 0x40014789A554848A
+data4 0x24AB4DAA, 0x00000000
+data8 0x4000EE65323E81B7
+data4 0x2584C440, 0x00000000
+data8 0x4000982721CF1293
+data4 0x25C9428D, 0x00000000
+data8 0x400044A93D415EEB
+data4 0x25DC8482, 0x00000000
+data8 0x3FFFE78FBD72C577
+data4 0x257F5070, 0x00000000
+data8 0x3FFF4AC375EFD28E
+data4 0x23EBBF7A, 0x00000000
+data8 0x3FFEB2AF60B52DDE
+data4 0x22EECA07, 0x00000000
+data8 0x3FFE1F1935204180
+data4 0x24191079, 0x00000000
+data8 0x3FFD8FCA54F7E60A
+data4 0x248D3058, 0x00000000
+LOCAL_OBJECT_END(tanl_table_cm2)
+
+LOCAL_OBJECT_START(tanl_table_cm1)
//
// Entries C_hi double-precision memory format
// Index = 0,1,...,19 B = 2^(-1)*(1+Index/32+1/64)
// Entries C_lo single-precision memory format
// Index = 0,1,...,19 B = 2^(-1)*(1+Index/32+1/64)
//
-data4 0x79F6FADE, 0x3FFCC06A, 0x239C7886, 0x00000000
-data4 0x891662A6, 0x3FFBB91F, 0x250BD191, 0x00000000
-data4 0x529F155D, 0x3FFABFB6, 0x256CC3E6, 0x00000000
-data4 0x2E964AE9, 0x3FF9D300, 0x250843E3, 0x00000000
-data4 0x89DCB383, 0x3FF8F1EF, 0x2277C87E, 0x00000000
-data4 0x7C87DBD6, 0x3FF81B93, 0x256DA6CF, 0x00000000
-data4 0x1042EDE4, 0x3FF74F14, 0x2573D28A, 0x00000000
-data4 0x1784B360, 0x3FF68BAF, 0x242E489A, 0x00000000
-data4 0x7C923C4C, 0x3FF5D0B5, 0x2532D940, 0x00000000
-data4 0xF418EF20, 0x3FF51D88, 0x253C7DD6, 0x00000000
-data4 0x02F88DAE, 0x3FF4719A, 0x23DB59BF, 0x00000000
-data4 0x49DA0788, 0x3FF3CC66, 0x252B4756, 0x00000000
-data4 0x0B980DB8, 0x3FF32D77, 0x23FE585F, 0x00000000
-data4 0xE56C987A, 0x3FF2945F, 0x25378A63, 0x00000000
-data4 0xB16523F6, 0x3FF200BD, 0x247BB2E0, 0x00000000
-data4 0x8CE27778, 0x3FF17235, 0x24446538, 0x00000000
-data4 0xFDEFE692, 0x3FF0E873, 0x2514638F, 0x00000000
-data4 0x33154062, 0x3FF0632C, 0x24A7FC27, 0x00000000
-data4 0xB3EF115F, 0x3FEFC42E, 0x248FD0FE, 0x00000000
-data4 0x135D26F6, 0x3FEEC9E8, 0x2385C719, 0x00000000
+data8 0x3FFCC06A79F6FADE
+data4 0x239C7886, 0x00000000
+data8 0x3FFBB91F891662A6
+data4 0x250BD191, 0x00000000
+data8 0x3FFABFB6529F155D
+data4 0x256CC3E6, 0x00000000
+data8 0x3FF9D3002E964AE9
+data4 0x250843E3, 0x00000000
+data8 0x3FF8F1EF89DCB383
+data4 0x2277C87E, 0x00000000
+data8 0x3FF81B937C87DBD6
+data4 0x256DA6CF, 0x00000000
+data8 0x3FF74F141042EDE4
+data4 0x2573D28A, 0x00000000
+data8 0x3FF68BAF1784B360
+data4 0x242E489A, 0x00000000
+data8 0x3FF5D0B57C923C4C
+data4 0x2532D940, 0x00000000
+data8 0x3FF51D88F418EF20
+data4 0x253C7DD6, 0x00000000
+data8 0x3FF4719A02F88DAE
+data4 0x23DB59BF, 0x00000000
+data8 0x3FF3CC6649DA0788
+data4 0x252B4756, 0x00000000
+data8 0x3FF32D770B980DB8
+data4 0x23FE585F, 0x00000000
+data8 0x3FF2945FE56C987A
+data4 0x25378A63, 0x00000000
+data8 0x3FF200BDB16523F6
+data4 0x247BB2E0, 0x00000000
+data8 0x3FF172358CE27778
+data4 0x24446538, 0x00000000
+data8 0x3FF0E873FDEFE692
+data4 0x2514638F, 0x00000000
+data8 0x3FF0632C33154062
+data4 0x24A7FC27, 0x00000000
+data8 0x3FEFC42EB3EF115F
+data4 0x248FD0FE, 0x00000000
+data8 0x3FEEC9E8135D26F6
+data4 0x2385C719, 0x00000000
+LOCAL_OBJECT_END(tanl_table_cm1)
+
+LOCAL_OBJECT_START(tanl_table_scim2)
//
// Entries SC_inv in Swapped IEEE format (extended)
// Index = 0,1,...,31 B = 2^(-2)*(1+Index/32+1/64)
//
-data4 0x1BF30C9E, 0x839D6D4A, 0x00004001, 0x00000000
-data4 0x554B0EB0, 0x80092804, 0x00004001, 0x00000000
-data4 0xA1CF0DE9, 0xF959F94C, 0x00004000, 0x00000000
-data4 0x77378677, 0xF3086BA0, 0x00004000, 0x00000000
-data4 0xCCD4723C, 0xED154515, 0x00004000, 0x00000000
-data4 0x1C27CF25, 0xE7790944, 0x00004000, 0x00000000
-data4 0x8DDACB88, 0xE22D037D, 0x00004000, 0x00000000
-data4 0x89C73522, 0xDD2B2D8A, 0x00004000, 0x00000000
-data4 0xBB2C1171, 0xD86E1A23, 0x00004000, 0x00000000
-data4 0xDFF5E0F9, 0xD3F0E288, 0x00004000, 0x00000000
-data4 0x283BEBD5, 0xCFAF16B1, 0x00004000, 0x00000000
-data4 0x0D88DD53, 0xCBA4AFAA, 0x00004000, 0x00000000
-data4 0xCA67C43D, 0xC7CE03CC, 0x00004000, 0x00000000
-data4 0x0CA0DDB0, 0xC427BC82, 0x00004000, 0x00000000
-data4 0xF13D8CAB, 0xC0AECD57, 0x00004000, 0x00000000
-data4 0x71ECE6B1, 0xBD606C38, 0x00004000, 0x00000000
-data4 0xA44C4929, 0xBA3A0A96, 0x00004000, 0x00000000
-data4 0xE5CCCEC1, 0xB7394F6F, 0x00004000, 0x00000000
-data4 0x9637D8BC, 0xB45C1203, 0x00004000, 0x00000000
-data4 0x92CB051B, 0xB1A05528, 0x00004000, 0x00000000
-data4 0x6BA2FFD0, 0xAF04432B, 0x00004000, 0x00000000
-data4 0x7221235F, 0xAC862A23, 0x00004000, 0x00000000
-data4 0x5F00A9D1, 0xAA2478AF, 0x00004000, 0x00000000
-data4 0x81E082BF, 0xA7DDBB0C, 0x00004000, 0x00000000
-data4 0x45684FEE, 0xA5B0987D, 0x00004000, 0x00000000
-data4 0x627A8F53, 0xA39BD0F5, 0x00004000, 0x00000000
-data4 0x6EC5C8B0, 0xA19E3B03, 0x00004000, 0x00000000
-data4 0x91CD7C66, 0x9FB6C1F0, 0x00004000, 0x00000000
-data4 0x1FA3DF8A, 0x9DE46410, 0x00004000, 0x00000000
-data4 0xA8F6B888, 0x9C263139, 0x00004000, 0x00000000
-data4 0xC27B0450, 0x9A7B4968, 0x00004000, 0x00000000
-data4 0x5EE614EE, 0x98E2DB7E, 0x00004000, 0x00000000
+data8 0x839D6D4A1BF30C9E, 0x00004001
+data8 0x80092804554B0EB0, 0x00004001
+data8 0xF959F94CA1CF0DE9, 0x00004000
+data8 0xF3086BA077378677, 0x00004000
+data8 0xED154515CCD4723C, 0x00004000
+data8 0xE77909441C27CF25, 0x00004000
+data8 0xE22D037D8DDACB88, 0x00004000
+data8 0xDD2B2D8A89C73522, 0x00004000
+data8 0xD86E1A23BB2C1171, 0x00004000
+data8 0xD3F0E288DFF5E0F9, 0x00004000
+data8 0xCFAF16B1283BEBD5, 0x00004000
+data8 0xCBA4AFAA0D88DD53, 0x00004000
+data8 0xC7CE03CCCA67C43D, 0x00004000
+data8 0xC427BC820CA0DDB0, 0x00004000
+data8 0xC0AECD57F13D8CAB, 0x00004000
+data8 0xBD606C3871ECE6B1, 0x00004000
+data8 0xBA3A0A96A44C4929, 0x00004000
+data8 0xB7394F6FE5CCCEC1, 0x00004000
+data8 0xB45C12039637D8BC, 0x00004000
+data8 0xB1A0552892CB051B, 0x00004000
+data8 0xAF04432B6BA2FFD0, 0x00004000
+data8 0xAC862A237221235F, 0x00004000
+data8 0xAA2478AF5F00A9D1, 0x00004000
+data8 0xA7DDBB0C81E082BF, 0x00004000
+data8 0xA5B0987D45684FEE, 0x00004000
+data8 0xA39BD0F5627A8F53, 0x00004000
+data8 0xA19E3B036EC5C8B0, 0x00004000
+data8 0x9FB6C1F091CD7C66, 0x00004000
+data8 0x9DE464101FA3DF8A, 0x00004000
+data8 0x9C263139A8F6B888, 0x00004000
+data8 0x9A7B4968C27B0450, 0x00004000
+data8 0x98E2DB7E5EE614EE, 0x00004000
+LOCAL_OBJECT_END(tanl_table_scim2)
+
+LOCAL_OBJECT_START(tanl_table_scim1)
//
// Entries SC_inv in Swapped IEEE format (extended)
// Index = 0,1,...,19 B = 2^(-1)*(1+Index/32+1/64)
//
-data4 0x13B2B5BA, 0x969F335C, 0x00004000, 0x00000000
-data4 0xD4C0F548, 0x93D446D9, 0x00004000, 0x00000000
-data4 0x61B798AF, 0x9147094F, 0x00004000, 0x00000000
-data4 0x758787AC, 0x8EF317CC, 0x00004000, 0x00000000
-data4 0xB99EEFDB, 0x8CD498B3, 0x00004000, 0x00000000
-data4 0xDFF8BC37, 0x8AE82A7D, 0x00004000, 0x00000000
-data4 0xE3C55D42, 0x892AD546, 0x00004000, 0x00000000
-data4 0xD15573C1, 0x8799FEA9, 0x00004000, 0x00000000
-data4 0x435A4B4C, 0x86335F88, 0x00004000, 0x00000000
-data4 0x3E93A87B, 0x84F4FB6E, 0x00004000, 0x00000000
-data4 0x80A382FB, 0x83DD1952, 0x00004000, 0x00000000
-data4 0xA4CB8C9E, 0x82EA3D7F, 0x00004000, 0x00000000
-data4 0x6861D0A8, 0x821B247C, 0x00004000, 0x00000000
-data4 0x63E8D244, 0x816EBED1, 0x00004000, 0x00000000
-data4 0x27E4CFC6, 0x80E42D91, 0x00004000, 0x00000000
-data4 0x28E64AFD, 0x807ABF8D, 0x00004000, 0x00000000
-data4 0x863B4FD8, 0x8031EF26, 0x00004000, 0x00000000
-data4 0xAE8C11FD, 0x800960AD, 0x00004000, 0x00000000
-data4 0x5FDBEC21, 0x8000E147, 0x00004000, 0x00000000
-data4 0xA07791FA, 0x80186650, 0x00004000, 0x00000000
-ASM_SIZE_DIRECTIVE(TANL_BASE_CONSTANTS)
-
-Arg = f8
+data8 0x969F335C13B2B5BA, 0x00004000
+data8 0x93D446D9D4C0F548, 0x00004000
+data8 0x9147094F61B798AF, 0x00004000
+data8 0x8EF317CC758787AC, 0x00004000
+data8 0x8CD498B3B99EEFDB, 0x00004000
+data8 0x8AE82A7DDFF8BC37, 0x00004000
+data8 0x892AD546E3C55D42, 0x00004000
+data8 0x8799FEA9D15573C1, 0x00004000
+data8 0x86335F88435A4B4C, 0x00004000
+data8 0x84F4FB6E3E93A87B, 0x00004000
+data8 0x83DD195280A382FB, 0x00004000
+data8 0x82EA3D7FA4CB8C9E, 0x00004000
+data8 0x821B247C6861D0A8, 0x00004000
+data8 0x816EBED163E8D244, 0x00004000
+data8 0x80E42D9127E4CFC6, 0x00004000
+data8 0x807ABF8D28E64AFD, 0x00004000
+data8 0x8031EF26863B4FD8, 0x00004000
+data8 0x800960ADAE8C11FD, 0x00004000
+data8 0x8000E1475FDBEC21, 0x00004000
+data8 0x80186650A07791FA, 0x00004000
+LOCAL_OBJECT_END(tanl_table_scim1)
+
+Arg = f8
+Save_Norm_Arg = f8 // For input to reduction routine
Result = f8
-fp_tmp = f9
+r = f8 // For output from reduction routine
+c = f9 // For output from reduction routine
U_2 = f10
-rsq = f11
+rsq = f11
C_hi = f12
C_lo = f13
T_hi = f14
T_lo = f15
-N_0 = f32
d_1 = f33
-MPI_BY_4 = f34
+N_0 = f34
tail = f35
tanx = f36
Cx = f37
@@ -949,8 +1104,6 @@ P1_7 = f51
P1_8 = f52
P1_9 = f53
-TWO_TO_63 = f54
-NEGTWO_TO_63 = f55
x = f56
xsq = f57
Tx = f58
@@ -966,12 +1119,10 @@ B = f67
SC_inv = f68
Pos_r = f69
N_0_fix = f70
-PI_BY_4 = f71
-NEGTWO_TO_NEG2 = f72
-TWO_TO_24 = f73
+d_2 = f71
+PI_BY_4 = f72
TWO_TO_NEG14 = f74
TWO_TO_NEG33 = f75
-NEGTWO_TO_24 = f76
NEGTWO_TO_NEG14 = f76
NEGTWO_TO_NEG33 = f77
two_by_PI = f78
@@ -982,13 +1133,14 @@ P_2 = f82
P_3 = f83
s_val = f84
w = f85
-c = f86
-r = f87
+B_mask1 = f86
+B_mask2 = f87
+w2 = f88
A = f89
a = f90
t = f91
U_1 = f92
-d_2 = f93
+NEGTWO_TO_NEG2 = f93
TWO_TO_NEG2 = f94
Q1_1 = f95
Q1_2 = f96
@@ -1009,609 +1161,641 @@ V_hiabs = f110
V = f111
Inv_P_0 = f112
+FR_inv_pi_2to63 = f113
+FR_rshf_2to64 = f114
+FR_2tom64 = f115
+FR_rshf = f116
+Norm_Arg = f117
+Abs_Arg = f118
+TWO_TO_NEG65 = f119
+fp_tmp = f120
+mOne = f121
+
+GR_sig_inv_pi = r14
+GR_rshf_2to64 = r15
+GR_exp_2tom64 = r16
+GR_rshf = r17
+GR_exp_2_to_63 = r18
+GR_exp_2_to_24 = r19
+GR_signexp_x = r20
+GR_exp_x = r21
+GR_exp_mask = r22
+GR_exp_2tom14 = r23
+GR_exp_m2tom14 = r24
+GR_exp_2tom33 = r25
+GR_exp_m2tom33 = r26
+
GR_SAVE_B0 = r33
GR_SAVE_GP = r34
GR_SAVE_PFS = r35
-delta1 = r36
+table_base = r36
table_ptr1 = r37
table_ptr2 = r38
-i_0 = r39
-i_1 = r40
-N_fix_gr = r41
-N_inc = r42
-exp_Arg = r43
-exp_r = r44
-sig_r = r45
-lookup = r46
-table_offset = r47
-Create_B = r48
+table_ptr3 = r39
+lookup = r40
+N_fix_gr = r41
+GR_exp_2tom2 = r42
+GR_exp_2tom65 = r43
+exp_r = r44
+sig_r = r45
+bmask1 = r46
+table_offset = r47
+bmask2 = r48
gr_tmp = r49
+cot_flag = r50
+
+GR_SAVE_B0 = r51
+GR_SAVE_PFS = r52
+GR_SAVE_GP = r53
+GR_Parameter_X = r54
+GR_Parameter_Y = r55
+GR_Parameter_RESULT = r56
+GR_Parameter_Tag = r57
+
.section .text
-.global tanl
-.proc tanl
-tanl:
-#ifdef _LIBC
-.global __tanl
-.proc __tanl
-__tanl:
-#endif
-{ .mfi
-alloc r32 = ar.pfs, 0,17,2,0
-(p0) fclass.m.unc p6,p0 = Arg, 0x1E7
- addl gr_tmp = -1,r0
-}
-{ .mfi
- nop.m 0
-(p0) fclass.nm.unc p7,p0 = Arg, 0x1FF
- nop.i 0
+.global __libm_tanl#
+.global __libm_cotl#
+
+.proc __libm_cotl#
+__libm_cotl:
+.endp __libm_cotl#
+LOCAL_LIBM_ENTRY(cotl)
+
+{ .mlx
+ alloc r32 = ar.pfs, 0,22,4,0
+ movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi
+}
+{ .mlx
+ mov GR_exp_mask = 0x1ffff // Exponent mask
+ movl GR_rshf_2to64 = 0x47e8000000000000 // 1.1000 2^(63+64)
+}
+;;
+
+// Check for NatVals, Infs , NaNs, and Zeros
+{ .mfi
+ getf.exp GR_signexp_x = Arg // Get sign and exponent of x
+ fclass.m p6,p0 = Arg, 0x1E7 // Test for natval, nan, inf, zero
+ mov cot_flag = 0x1
+}
+{ .mfb
+ addl table_base = @ltoff(TANL_BASE_CONSTANTS), gp // Pointer to table ptr
+ fnorm.s1 Norm_Arg = Arg // Normalize x
+ br.cond.sptk COMMON_PATH
};;
+LOCAL_LIBM_END(cotl)
+
+.proc __libm_tanl#
+__libm_tanl:
+.endp __libm_tanl#
+GLOBAL_IEEE754_ENTRY(tanl)
+
+{ .mlx
+ alloc r32 = ar.pfs, 0,22,4,0
+ movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi
+}
+{ .mlx
+ mov GR_exp_mask = 0x1ffff // Exponent mask
+ movl GR_rshf_2to64 = 0x47e8000000000000 // 1.1000 2^(63+64)
+}
+;;
+
+// Check for NatVals, Infs , NaNs, and Zeros
{ .mfi
-(p0) addl table_ptr1 = @ltoff(TANL_BASE_CONSTANTS), gp
- nop.f 999
+ getf.exp GR_signexp_x = Arg // Get sign and exponent of x
+ fclass.m p6,p0 = Arg, 0x1E7 // Test for natval, nan, inf, zero
+ mov cot_flag = 0x0
+}
+{ .mfi
+ addl table_base = @ltoff(TANL_BASE_CONSTANTS), gp // Pointer to table ptr
+ fnorm.s1 Norm_Arg = Arg // Normalize x
nop.i 0
+};;
+
+// Common path for both tanl and cotl
+COMMON_PATH:
+{ .mfi
+ setf.sig FR_inv_pi_2to63 = GR_sig_inv_pi // Form 1/pi * 2^63
+ fclass.m p9, p0 = Arg, 0x0b // Test x denormal
+ mov GR_exp_2tom64 = 0xffff - 64 // Scaling constant to compute N
+}
+{ .mlx
+ setf.d FR_rshf_2to64 = GR_rshf_2to64 // Form const 1.1000 * 2^(63+64)
+ movl GR_rshf = 0x43e8000000000000 // Form const 1.1000 * 2^63
}
;;
-{ .mmi
-(p0) ld8 table_ptr1 = [table_ptr1]
- setf.sig fp_tmp = gr_tmp // Make a constant so fmpy produces inexact
- nop.i 999
+
+// Check for everything - if false, then must be pseudo-zero or pseudo-nan.
+// Branch out to deal with special values.
+{ .mfi
+ addl gr_tmp = -1,r0
+ fclass.nm p7,p0 = Arg, 0x1FF // Test x unsupported
+ mov GR_exp_2_to_63 = 0xffff + 63 // Exponent of 2^63
+}
+{ .mfb
+ ld8 table_base = [table_base] // Get pointer to constant table
+ fms.s1 mOne = f0, f0, f1
+(p6) br.cond.spnt TANL_SPECIAL // Branch if x natval, nan, inf, zero
}
;;
-//
-// Check for NatVals, Infs , NaNs, and Zeros
-// Check for everything - if false, then must be pseudo-zero
-// or pseudo-nan.
-// Local table pointer
-//
-{ .mbb
-(p0) add table_ptr2 = 96, table_ptr1
-(p6) br.cond.spnt L(TANL_SPECIAL)
-(p7) br.cond.spnt L(TANL_SPECIAL) ;;
+{ .mmb
+ setf.sig fp_tmp = gr_tmp // Make a constant so fmpy produces inexact
+ mov GR_exp_2_to_24 = 0xffff + 24 // Exponent of 2^24
+(p9) br.cond.spnt TANL_DENORMAL // Branch if x denormal
}
+;;
+
+TANL_COMMON:
+// Return to here if x denormal
//
-// Point to Inv_P_0
-// Branch out to deal with unsupporteds and special values.
-//
-{ .mmf
-(p0) ldfs TWO_TO_24 = [table_ptr1],4
-(p0) ldfs TWO_TO_63 = [table_ptr2],4
-//
-// Load -2**24, load -2**63.
-//
-(p0) fcmp.eq.s0 p0, p6 = Arg, f1 ;;
-}
+// Do fcmp to generate Denormal exception
+// - can't do FNORM (will generate Underflow when U is unmasked!)
+// Branch out to deal with unsupporteds values.
{ .mfi
-(p0) ldfs NEGTWO_TO_63 = [table_ptr2],12
-(p0) fnorm.s1 Arg = Arg
- nop.i 999
+ setf.exp FR_2tom64 = GR_exp_2tom64 // Form 2^-64 for scaling N_float
+ fcmp.eq.s0 p0, p6 = Arg, f1 // Dummy to flag denormals
+ add table_ptr1 = 0, table_base // Point to tanl_table_1
}
-//
-// Load 2**24, Load 2**63.
-//
-{ .mmi
-(p0) ldfs NEGTWO_TO_24 = [table_ptr1],12 ;;
-//
-// Do fcmp to generate Denormal exception
-// - can't do FNORM (will generate Underflow when U is unmasked!)
-// Normalize input argument.
-//
-(p0) ldfe two_by_PI = [table_ptr1],16
- nop.i 999
+{ .mib
+ setf.d FR_rshf = GR_rshf // Form right shift const 1.1000 * 2^63
+ add table_ptr2 = 80, table_base // Point to tanl_table_2
+(p7) br.cond.spnt TANL_UNSUPPORTED // Branch if x unsupported type
}
-{ .mmi
-(p0) ldfe Inv_P_0 = [table_ptr2],16 ;;
-(p0) ldfe d_1 = [table_ptr2],16
- nop.i 999
+;;
+
+{ .mfi
+ and GR_exp_x = GR_exp_mask, GR_signexp_x // Get exponent of x
+ fmpy.s1 Save_Norm_Arg = Norm_Arg, f1 // Save x if large arg reduction
+ dep.z bmask1 = 0x7c, 56, 8 // Form mask to get 5 msb of r
+ // bmask1 = 0x7c00000000000000
}
+;;
+
//
// Decide about the paths to take:
-// PR_1 and PR_3 set if -2**24 < Arg < 2**24 - CASE 1 OR 2
-// OTHERWISE - CASE 3 OR 4
-// Load inverse of P_0 .
-// Set PR_6 if Arg <= -2**63
-// Are there any Infs, NaNs, or zeros?
+// Set PR_6 if |Arg| >= 2**63
+// Set PR_9 if |Arg| < 2**24 - CASE 1 OR 2
+// OTHERWISE Set PR_8 - CASE 3 OR 4
//
-{ .mmi
-(p0) ldfe P_0 = [table_ptr1],16 ;;
-(p0) ldfe d_2 = [table_ptr2],16
- nop.i 999
+// Branch out if the magnitude of the input argument is >= 2^63
+// - do this branch before the next.
+{ .mfi
+ ldfe two_by_PI = [table_ptr1],16 // Load 2/pi
+ nop.f 999
+ dep.z bmask2 = 0x41, 57, 7 // Form mask to OR to produce B
+ // bmask2 = 0x8200000000000000
}
-//
-// Set PR_8 if Arg <= -2**24
-// Set PR_6 if Arg >= 2**63
-//
-{ .mmi
-(p0) ldfe P_1 = [table_ptr1],16 ;;
-(p0) ldfe PI_BY_4 = [table_ptr2],16
- nop.i 999
+{ .mib
+ ldfe PI_BY_4 = [table_ptr2],16 // Load pi/4
+ cmp.ge p6,p0 = GR_exp_x, GR_exp_2_to_63 // Is |x| >= 2^63
+(p6) br.cond.spnt TANL_ARG_TOO_LARGE // Branch if |x| >= 2^63
}
-//
-// Set PR_8 if Arg >= 2**24
-//
+;;
+
{ .mmi
-(p0) ldfe P_2 = [table_ptr1],16 ;;
-(p0) ldfe MPI_BY_4 = [table_ptr2],16
- nop.i 999
-}
-//
-// Load P_2 and PI_BY_4
-//
-{ .mfi
-(p0) ldfe P_3 = [table_ptr1],16
- nop.f 999
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-(p0) fcmp.le.unc.s1 p6,p7 = Arg,NEGTWO_TO_63
- nop.i 999
+ ldfe P_0 = [table_ptr1],16 // Load P_0
+ ldfe Inv_P_0 = [table_ptr2],16 // Load Inv_P_0
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p0) fcmp.le.unc.s1 p8,p9 = Arg,NEGTWO_TO_24
- nop.i 999 ;;
+ ldfe P_1 = [table_ptr1],16 // Load P_1
+ fmerge.s Abs_Arg = f0, Norm_Arg // Get |x|
+ mov GR_exp_m2tom33 = 0x2ffff - 33 // Form signexp of -2^-33
}
{ .mfi
- nop.m 999
-(p7) fcmp.ge.s1 p6,p0 = Arg,TWO_TO_63
- nop.i 999
+ ldfe d_1 = [table_ptr2],16 // Load d_1 for 2^24 <= |x| < 2^63
+ nop.f 999
+ mov GR_exp_2tom33 = 0xffff - 33 // Form signexp of 2^-33
}
-{ .mfi
- nop.m 999
-(p9) fcmp.ge.s1 p8,p0 = Arg,TWO_TO_24
- nop.i 999 ;;
+;;
+
+{ .mmi
+ ldfe P_2 = [table_ptr1],16 // Load P_2
+ ldfe d_2 = [table_ptr2],16 // Load d_2 for 2^24 <= |x| < 2^63
+ cmp.ge p8,p0 = GR_exp_x, GR_exp_2_to_24 // Is |x| >= 2^24
}
-{ .mib
- nop.m 999
- nop.i 999
-//
-// Load P_3 and -PI_BY_4
-//
-(p6) br.cond.spnt L(TANL_ARG_TOO_LARGE) ;;
+;;
+
+// Use special scaling to right shift so N=Arg * 2/pi is in rightmost bits
+// Branch to Cases 3 or 4 if Arg <= -2**24 or Arg >= 2**24
+{ .mfb
+ ldfe P_3 = [table_ptr1],16 // Load P_3
+ fma.s1 N_fix = Norm_Arg, FR_inv_pi_2to63, FR_rshf_2to64
+(p8) br.cond.spnt TANL_LARGER_ARG // Branch if 2^24 <= |x| < 2^63
}
-{ .mib
- nop.m 999
- nop.i 999
-//
-// Load 2**(-2).
-// Load -2**(-2).
-// Branch out if we have a special argument.
-// Branch out if the magnitude of the input argument is too large
-// - do this branch before the next.
+;;
+
+// Here if 0 < |x| < 2^24
+// ARGUMENT REDUCTION CODE - CASE 1 and 2
//
-(p8) br.cond.spnt L(TANL_LARGER_ARG) ;;
+{ .mmf
+ setf.exp TWO_TO_NEG33 = GR_exp_2tom33 // Form 2^-33
+ setf.exp NEGTWO_TO_NEG33 = GR_exp_m2tom33 // Form -2^-33
+ fmerge.s r = Norm_Arg,Norm_Arg // Assume r=x, ok if |x| < pi/4
}
+;;
+
//
-// Branch to Cases 3 or 4 if Arg <= -2**24 or Arg >= 2**24
+// If |Arg| < pi/4, set PR_8, else pi/4 <=|Arg| < 2^24 - set PR_9.
//
+// Case 2: Convert integer N_fix back to normalized floating-point value.
{ .mfi
-(p0) ldfs TWO_TO_NEG2 = [table_ptr2],4
-// ARGUMENT REDUCTION CODE - CASE 1 and 2
-// Load 2**(-2).
-// Load -2**(-2).
-(p0) fmpy.s1 N = Arg,two_by_PI
- nop.i 999 ;;
+ getf.sig sig_r = Norm_Arg // Get sig_r if 1/4 <= |x| < pi/4
+ fcmp.lt.s1 p8,p9= Abs_Arg,PI_BY_4 // Test |x| < pi/4
+ mov GR_exp_2tom2 = 0xffff - 2 // Form signexp of 2^-2
}
{ .mfi
-(p0) ldfs NEGTWO_TO_NEG2 = [table_ptr2],12
-//
-// N = Arg * 2/pi
-//
-(p0) fcmp.lt.unc.s1 p8,p9= Arg,PI_BY_4
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-//
-// if Arg < pi/4, set PR_8.
-//
-(p8) fcmp.gt.s1 p8,p9= Arg,MPI_BY_4
- nop.i 999 ;;
+ ldfps TWO_TO_NEG2, NEGTWO_TO_NEG2 = [table_ptr2] // Load 2^-2, -2^-2
+ fms.s1 N = N_fix, FR_2tom64, FR_rshf // Use scaling to get N floated
+ mov N_fix_gr = r0 // Assume N=0, ok if |x| < pi/4
}
+;;
+
//
// Case 1: Is |r| < 2**(-2).
// Arg is the same as r in this case.
// r = Arg
// c = 0
//
+// Case 2: Place integer part of N in GP register.
{ .mfi
-(p8) mov N_fix_gr = r0
-//
-// if Arg > -pi/4, reset PR_8.
-// Select the case when |Arg| < pi/4 - set PR[8] = true.
-// Else Select the case when |Arg| >= pi/4 - set PR[9] = true.
-//
-(p0) fcvt.fx.s1 N_fix = N
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-//
-// Grab the integer part of N .
-//
-(p8) mov r = Arg
- nop.i 999
-}
-{ .mfi
- nop.m 999
-(p8) mov c = f0
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-(p8) fcmp.lt.unc.s1 p10, p11 = Arg, TWO_TO_NEG2
- nop.i 999 ;;
+(p9) getf.sig N_fix_gr = N_fix
+ fmerge.s c = f0, f0 // Assume c=0, ok if |x| < pi/4
+ cmp.lt p10, p0 = GR_exp_x, GR_exp_2tom2 // Test if |x| < 1/4
}
+;;
+
{ .mfi
- nop.m 999
-(p10) fcmp.gt.s1 p10,p0 = Arg, NEGTWO_TO_NEG2
- nop.i 999 ;;
+ setf.sig B_mask1 = bmask1 // Form mask to get 5 msb of r
+ nop.f 999
+ mov exp_r = GR_exp_x // Get exp_r if 1/4 <= |x| < pi/4
}
-{ .mfi
- nop.m 999
-//
-// Case 2: Place integer part of N in GP register.
-//
-(p9) fcvt.xf N = N_fix
- nop.i 999 ;;
-}
-{ .mib
-(p9) getf.sig N_fix_gr = N_fix
- nop.i 999
-//
-// Case 2: Convert integer N_fix back to normalized floating-point value.
-//
-(p10) br.cond.spnt L(TANL_SMALL_R) ;;
-}
-{ .mib
- nop.m 999
- nop.i 999
-(p8) br.cond.sptk L(TANL_NORMAL_R) ;;
+{ .mbb
+ setf.sig B_mask2 = bmask2 // Form mask to form B from r
+(p10) br.cond.spnt TANL_SMALL_R // Branch if 0 < |x| < 1/4
+(p8) br.cond.spnt TANL_NORMAL_R // Branch if 1/4 <= |x| < pi/4
}
+;;
+
+// Here if pi/4 <= |x| < 2^24
//
// Case 1: PR_3 is only affected when PR_1 is set.
//
-{ .mmi
-(p9) ldfs TWO_TO_NEG33 = [table_ptr2], 4 ;;
//
-// Case 2: Load 2**(-33).
+// Case 2: w = N * P_2
+// Case 2: s_val = -N * P_1 + Arg
//
-(p9) ldfs NEGTWO_TO_NEG33 = [table_ptr2], 4
- nop.i 999 ;;
+
+{ .mfi
+ nop.m 999
+ fnma.s1 s_val = N, P_1, Norm_Arg
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// Case 2: Load -2**(-33).
-//
-(p9) fnma.s1 s_val = N, P_1, Arg
- nop.i 999
+ nop.m 999
+ fmpy.s1 w = N, P_2 // w = N * P_2 for |s| >= 2^-33
+ nop.i 999
}
+;;
+
+// Case 2_reduce: w = N * P_3 (change sign)
{ .mfi
- nop.m 999
-(p9) fmpy.s1 w = N, P_2
- nop.i 999 ;;
+ nop.m 999
+ fmpy.s1 w2 = N, P_3 // w = N * P_3 for |s| < 2^-33
+ nop.i 999
}
+;;
+
+// Case 1_reduce: r = s + w (change sign)
{ .mfi
- nop.m 999
-//
-// Case 2: w = N * P_2
-// Case 2: s_val = -N * P_1 + Arg
-//
-(p0) fcmp.lt.unc.s1 p9,p8 = s_val, TWO_TO_NEG33
- nop.i 999 ;;
+ nop.m 999
+ fsub.s1 r = s_val, w // r = s_val - w for |s| >= 2^-33
+ nop.i 999
}
+;;
+
+// Case 2_reduce: U_1 = N * P_2 + w
{ .mfi
- nop.m 999
+ nop.m 999
+ fma.s1 U_1 = N, P_2, w2 // U_1 = N * P_2 + w for |s| < 2^-33
+ nop.i 999
+}
+;;
+
//
// Decide between case_1 and case_2 reduce:
+// Case 1_reduce: |s| >= 2**(-33)
+// Case 2_reduce: |s| < 2**(-33)
//
-(p9) fcmp.gt.s1 p9, p8 = s_val, NEGTWO_TO_NEG33
- nop.i 999 ;;
+{ .mfi
+ nop.m 999
+ fcmp.lt.s1 p9, p8 = s_val, TWO_TO_NEG33
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
-// Case 1_reduce: s <= -2**(-33) or s >= 2**(-33)
-// Case 2_reduce: -2**(-33) < s < 2**(-33)
-//
-(p8) fsub.s1 r = s_val, w
- nop.i 999
+ nop.m 999
+(p9) fcmp.gt.s1 p9, p8 = s_val, NEGTWO_TO_NEG33
+ nop.i 999
}
+;;
+
+// Case 1_reduce: c = s - r
{ .mfi
- nop.m 999
-(p9) fmpy.s1 w = N, P_3
- nop.i 999 ;;
+ nop.m 999
+ fsub.s1 c = s_val, r // c = s_val - r for |s| >= 2^-33
+ nop.i 999
}
+;;
+
+// Case 2_reduce: r is complete here - continue to calculate c .
+// r = s - U_1
{ .mfi
- nop.m 999
-(p9) fma.s1 U_1 = N, P_2, w
- nop.i 999
+ nop.m 999
+(p9) fsub.s1 r = s_val, U_1
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
+(p9) fms.s1 U_2 = N, P_2, U_1
+ nop.i 999
+}
+;;
+
//
// Case 1_reduce: Is |r| < 2**(-2), if so set PR_10
-// else set PR_11.
+// else set PR_13.
//
-(p8) fsub.s1 c = s_val, r
- nop.i 999 ;;
-}
+
{ .mfi
- nop.m 999
-//
-// Case 1_reduce: r = s + w (change sign)
-// Case 2_reduce: w = N * P_3 (change sign)
-//
-(p8) fcmp.lt.unc.s1 p10, p11 = r, TWO_TO_NEG2
- nop.i 999 ;;
+ nop.m 999
+ fand B = B_mask1, r
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p10) fcmp.gt.s1 p10, p11 = r, NEGTWO_TO_NEG2
- nop.i 999 ;;
+ nop.m 999
+(p8) fcmp.lt.unc.s1 p10, p13 = r, TWO_TO_NEG2
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p9) fsub.s1 r = s_val, U_1
- nop.i 999
+(p8) getf.sig sig_r = r // Get signif of r if |s| >= 2^-33
+ nop.f 999
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-//
+(p8) getf.exp exp_r = r // Extract signexp of r if |s| >= 2^-33
+(p10) fcmp.gt.s1 p10, p13 = r, NEGTWO_TO_NEG2
+ nop.i 999
+}
+;;
+
// Case 1_reduce: c is complete here.
+// Case 1: Branch to SMALL_R or NORMAL_R.
// c = c + w (w has not been negated.)
-// Case 2_reduce: r is complete here - continue to calculate c .
-// r = s - U_1
-//
-(p9) fms.s1 U_2 = N, P_2, U_1
- nop.i 999 ;;
-}
{ .mfi
- nop.m 999
+ nop.m 999
+(p8) fsub.s1 c = c, w // c = c - w for |s| >= 2^-33
+ nop.i 999
+}
+{ .mbb
+ nop.m 999
+(p10) br.cond.spnt TANL_SMALL_R // Branch if pi/4 < |x| < 2^24 and |r|<1/4
+(p13) br.cond.sptk TANL_NORMAL_R_A // Branch if pi/4 < |x| < 2^24 and |r|>=1/4
+}
+;;
+
+
+// Here if pi/4 < |x| < 2^24 and |s| < 2^-33
//
-// Case 1_reduce: c = s - r
-// Case 2_reduce: U_1 = N * P_2 + w
+// Is i_1 = lsb of N_fix_gr even or odd?
+// if i_1 == 0, set p11, else set p12.
//
-(p8) fsub.s1 c = c, w
- nop.i 999 ;;
-}
{ .mfi
- nop.m 999
-(p9) fsub.s1 s_val = s_val, r
- nop.i 999
+ nop.m 999
+ fsub.s1 s_val = s_val, r
+ add N_fix_gr = N_fix_gr, cot_flag // N = N + 1 (for cotl)
}
-{ .mfb
- nop.m 999
+{ .mfi
+ nop.m 999
//
// Case 2_reduce:
// U_2 = N * P_2 - U_1
// Not needed until later.
//
-(p9) fadd.s1 U_2 = U_2, w
+ fadd.s1 U_2 = U_2, w2
//
// Case 2_reduce:
// s = s - r
// U_2 = U_2 + w
//
-(p10) br.cond.spnt L(TANL_SMALL_R) ;;
-}
-{ .mib
- nop.m 999
- nop.i 999
-(p11) br.cond.sptk L(TANL_NORMAL_R) ;;
+ nop.i 999
}
-{ .mii
- nop.m 999
+;;
+
//
// Case 2_reduce:
// c = c - U_2
// c is complete here
// Argument reduction ends here.
//
-(p9) extr.u i_1 = N_fix_gr, 0, 1 ;;
-(p9) cmp.eq.unc p11, p12 = 0x0000,i_1 ;;
-}
-{ .mfi
- nop.m 999
-//
-// Is i_1 even or odd?
-// if i_1 == 0, set p11, else set p12.
-//
-(p11) fmpy.s1 rsq = r, r
- nop.i 999 ;;
-}
{ .mfi
- nop.m 999
-(p12) frcpa.s1 S_hi,p0 = f1, r
- nop.i 999
+ nop.m 999
+ fmpy.s1 rsq = r, r
+ tbit.z p11, p12 = N_fix_gr, 0 ;; // Set p11 if N even, p12 if odd
}
-
-
-//
-// Case 1: Branch to SMALL_R or NORMAL_R.
-// Case 1 is done now.
-//
-
{ .mfi
-(p9) addl table_ptr1 = @ltoff(TANL_BASE_CONSTANTS), gp
-(p9) fsub.s1 c = s_val, U_1
- nop.i 999 ;;
+ nop.m 999
+(p12) frcpa.s1 S_hi,p0 = f1, r
+ nop.i 999
}
-;;
-
-{ .mmi
-(p9) ld8 table_ptr1 = [table_ptr1]
+{ .mfi
nop.m 999
+ fsub.s1 c = s_val, U_1
nop.i 999
}
;;
-
{ .mmi
-(p9) add table_ptr1 = 224, table_ptr1 ;;
-(p9) ldfe P1_1 = [table_ptr1],144
- nop.i 999 ;;
+ add table_ptr1 = 160, table_base ;; // Point to tanl_table_p1
+ ldfe P1_1 = [table_ptr1],144
+ nop.i 999 ;;
}
//
-// Get [i_1] - lsb of N_fix_gr .
// Load P1_1 and point to Q1_1 .
//
{ .mfi
-(p9) ldfe Q1_1 = [table_ptr1] , 0
+ ldfe Q1_1 = [table_ptr1]
//
// N even: rsq = r * Z
// N odd: S_hi = frcpa(r)
//
-(p12) fmerge.ns S_hi = S_hi, S_hi
- nop.i 999
+(p12) fmerge.ns S_hi = S_hi, S_hi
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// Case 2_reduce:
// c = s - U_1
//
-(p9) fsub.s1 c = c, U_2
- nop.i 999 ;;
+(p9) fsub.s1 c = c, U_2
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-(p12) fma.s1 poly1 = S_hi, r, f1
- nop.i 999 ;;
+ nop.m 999
+(p12) fma.s1 poly1 = S_hi, r, f1
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N odd: Change sign of S_hi
//
-(p11) fmpy.s1 rsq = rsq, P1_1
- nop.i 999 ;;
+(p11) fmpy.s1 rsq = rsq, P1_1
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-(p12) fma.s1 S_hi = S_hi, poly1, S_hi
- nop.i 999 ;;
+ nop.m 999
+(p12) fma.s1 S_hi = S_hi, poly1, S_hi
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N even: rsq = rsq * P1_1
// N odd: poly1 = 1.0 + S_hi * r 16 bits partial account for necessary
//
-(p11) fma.s1 Result = r, rsq, c
- nop.i 999 ;;
+(p11) fma.s1 Poly = r, rsq, c
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
-// N even: Result = c + r * rsq
+// N even: Poly = c + r * rsq
// N odd: S_hi = S_hi + S_hi*poly1 16 bits account for necessary
//
-(p12) fma.s1 poly1 = S_hi, r, f1
- nop.i 999 ;;
+(p12) fma.s1 poly1 = S_hi, r, f1
+(p11) tbit.z.unc p14, p15 = cot_flag, 0 ;; // p14=1 for tanl; p15=1 for cotl
}
{ .mfi
- nop.m 999
+ nop.m 999
//
-// N even: Result = Result + r
+// N even: Result = Poly + r
// N odd: poly1 = 1.0 + S_hi * r 32 bits partial
//
-(p11) fadd.s0 Result = r, Result
- nop.i 999 ;;
+(p14) fadd.s0 Result = r, Poly // for tanl
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p15) fms.s0 Result = r, mOne, Poly // for cotl
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p12) fma.s1 S_hi = S_hi, poly1, S_hi
- nop.i 999 ;;
+ nop.m 999
+(p12) fma.s1 S_hi = S_hi, poly1, S_hi
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N even: Result1 = Result + r
// N odd: S_hi = S_hi * poly1 + S_hi 32 bits
//
-(p12) fma.s1 poly1 = S_hi, r, f1
- nop.i 999 ;;
+(p12) fma.s1 poly1 = S_hi, r, f1
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N odd: poly1 = S_hi * r + 1.0 64 bits partial
//
-(p12) fma.s1 S_hi = S_hi, poly1, S_hi
- nop.i 999 ;;
+(p12) fma.s1 S_hi = S_hi, poly1, S_hi
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N odd: poly1 = S_hi * poly + 1.0 64 bits
//
-(p12) fma.s1 poly1 = S_hi, r, f1
- nop.i 999 ;;
+(p12) fma.s1 poly1 = S_hi, r, f1
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N odd: poly1 = S_hi * r + 1.0
//
-(p12) fma.s1 poly1 = S_hi, c, poly1
- nop.i 999 ;;
+(p12) fma.s1 poly1 = S_hi, c, poly1
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N odd: poly1 = S_hi * c + poly1
//
-(p12) fmpy.s1 S_lo = S_hi, poly1
- nop.i 999 ;;
+(p12) fmpy.s1 S_lo = S_hi, poly1
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N odd: S_lo = S_hi * poly1
//
-(p12) fma.s1 S_lo = Q1_1, r, S_lo
- nop.i 999
+(p12) fma.s1 S_lo = Q1_1, r, S_lo
+(p12) tbit.z.unc p14, p15 = cot_flag, 0 // p14=1 for tanl; p15=1 for cotl
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N odd: Result = S_hi + S_lo
//
-(p0) fmpy.s0 fp_tmp = fp_tmp, fp_tmp // Dummy mult to set inexact
- nop.i 999 ;;
+ fmpy.s0 fp_tmp = fp_tmp, fp_tmp // Dummy mult to set inexact
+ nop.i 999 ;;
}
-{ .mfb
- nop.m 999
+{ .mfi
+ nop.m 999
//
// N odd: S_lo = S_lo + Q1_1 * r
//
-(p12) fadd.s0 Result = S_hi, S_lo
-(p0) br.ret.sptk b0 ;;
+(p14) fadd.s0 Result = S_hi, S_lo // for tanl
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p15) fms.s0 Result = S_hi, mOne, S_lo // for cotl
+ br.ret.sptk b0 ;; // Exit for pi/4 <= |x| < 2^24 and |s| < 2^-33
}
-L(TANL_LARGER_ARG):
-
+TANL_LARGER_ARG:
+// Here if 2^24 <= |x| < 2^63
//
// ARGUMENT REDUCTION CODE - CASE 3 and 4
//
-{ .mfi
-(p0) addl table_ptr1 = @ltoff(TANL_BASE_CONSTANTS), gp
-(p0) fmpy.s1 N_0 = Arg, Inv_P_0
- nop.i 999
+{ .mmf
+ mov GR_exp_2tom14 = 0xffff - 14 // Form signexp of 2^-14
+ mov GR_exp_m2tom14 = 0x2ffff - 14 // Form signexp of -2^-14
+ fmpy.s1 N_0 = Norm_Arg, Inv_P_0
}
;;
{ .mmi
-(p0) ld8 table_ptr1 = [table_ptr1]
- nop.m 999
+ setf.exp TWO_TO_NEG14 = GR_exp_2tom14 // Form 2^-14
+ setf.exp NEGTWO_TO_NEG14 = GR_exp_m2tom14// Form -2^-14
nop.i 999
}
;;
@@ -1622,661 +1806,605 @@ L(TANL_LARGER_ARG):
// N_0 = Arg * Inv_P_0
//
{ .mmi
-(p0) add table_ptr1 = 8, table_ptr1 ;;
-//
-// Point to 2*-14
-//
-(p0) ldfs TWO_TO_NEG14 = [table_ptr1], 4
- nop.i 999 ;;
+ add table_ptr2 = 144, table_base ;; // Point to 2^-2
+ ldfps TWO_TO_NEG2, NEGTWO_TO_NEG2 = [table_ptr2]
+ nop.i 999
}
-//
-// Load 2**(-14).
-//
-{ .mmi
-(p0) ldfs NEGTWO_TO_NEG14 = [table_ptr1], 180 ;;
+;;
+
//
// N_0_fix = integer part of N_0 .
-// Adjust table_ptr1 to beginning of table.
//
-(p0) ldfs TWO_TO_NEG2 = [table_ptr1], 4
- nop.i 999 ;;
-}
//
// Make N_0 the integer part.
//
{ .mfi
-(p0) ldfs NEGTWO_TO_NEG2 = [table_ptr1]
-//
-// Load -2**(-14).
-//
-(p0) fcvt.fx.s1 N_0_fix = N_0
- nop.i 999 ;;
+ nop.m 999
+ fcvt.fx.s1 N_0_fix = N_0
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-(p0) fcvt.xf N_0 = N_0_fix
- nop.i 999 ;;
+ setf.sig B_mask1 = bmask1 // Form mask to get 5 msb of r
+ fcvt.xf N_0 = N_0_fix
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-(p0) fnma.s1 ArgPrime = N_0, P_0, Arg
- nop.i 999
+ setf.sig B_mask2 = bmask2 // Form mask to form B from r
+ fnma.s1 ArgPrime = N_0, P_0, Norm_Arg
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fmpy.s1 w = N_0, d_1
- nop.i 999 ;;
+ nop.m 999
+ fmpy.s1 w = N_0, d_1
+ nop.i 999 ;;
}
-{ .mfi
- nop.m 999
//
// ArgPrime = -N_0 * P_0 + Arg
// w = N_0 * d_1
//
-(p0) fmpy.s1 N = ArgPrime, two_by_PI
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
//
// N = ArgPrime * 2/pi
//
-(p0) fcvt.fx.s1 N_fix = N
- nop.i 999 ;;
-}
+// fcvt.fx.s1 N_fix = N
+// Use special scaling to right shift so N=Arg * 2/pi is in rightmost bits
+// Branch to Cases 3 or 4 if Arg <= -2**24 or Arg >= 2**24
{ .mfi
- nop.m 999
-//
-// N_fix is the integer part.
-//
-(p0) fcvt.xf N = N_fix
- nop.i 999 ;;
+ nop.m 999
+ fma.s1 N_fix = ArgPrime, FR_inv_pi_2to63, FR_rshf_2to64
+
+ nop.i 999 ;;
}
+// Convert integer N_fix back to normalized floating-point value.
{ .mfi
-(p0) getf.sig N_fix_gr = N_fix
- nop.f 999
- nop.i 999 ;;
+ nop.m 999
+ fms.s1 N = N_fix, FR_2tom64, FR_rshf // Use scaling to get N floated
+ nop.i 999
}
-{ .mfi
- nop.m 999
+;;
+
//
// N is the integer part of the reduced-reduced argument.
// Put the integer in a GP register.
//
-(p0) fnma.s1 s_val = N, P_1, ArgPrime
- nop.i 999
-}
{ .mfi
- nop.m 999
-(p0) fnma.s1 w = N, P_2, w
- nop.i 999 ;;
+ getf.sig N_fix_gr = N_fix
+ nop.f 999
+ nop.i 999
}
-{ .mfi
- nop.m 999
+;;
+
//
// s_val = -N*P_1 + ArgPrime
// w = -N*P_2 + w
//
-(p0) fcmp.lt.unc.s1 p11, p10 = s_val, TWO_TO_NEG14
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-(p11) fcmp.gt.s1 p11, p10 = s_val, NEGTWO_TO_NEG14
- nop.i 999 ;;
-}
{ .mfi
- nop.m 999
-//
-// Case 3: r = s_val + w (Z complete)
-// Case 4: U_hi = N_0 * d_1
-//
-(p10) fmpy.s1 V_hi = N, P_2
- nop.i 999
+ nop.m 999
+ fnma.s1 s_val = N, P_1, ArgPrime
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p11) fmpy.s1 U_hi = N_0, d_1
- nop.i 999 ;;
+ nop.m 999
+ fnma.s1 w = N, P_2, w
+ nop.i 999
}
-{ .mfi
- nop.m 999
-//
-// Case 3: r = s_val + w (Z complete)
+;;
+
+// Case 4: V_hi = N * P_2
// Case 4: U_hi = N_0 * d_1
-//
-(p11) fmpy.s1 V_hi = N, P_2
- nop.i 999
-}
{ .mfi
- nop.m 999
-(p11) fmpy.s1 U_hi = N_0, d_1
- nop.i 999 ;;
+ nop.m 999
+ fmpy.s1 V_hi = N, P_2 // V_hi = N * P_2 for |s| < 2^-14
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// Decide between case 3 and 4:
-// Case 3: s <= -2**(-14) or s >= 2**(-14)
-// Case 4: -2**(-14) < s < 2**(-14)
-//
-(p10) fadd.s1 r = s_val, w
- nop.i 999
+ nop.m 999
+ fmpy.s1 U_hi = N_0, d_1 // U_hi = N_0 * d_1 for |s| < 2^-14
+ nop.i 999
}
+;;
+
+// Case 3: r = s_val + w (Z complete)
+// Case 4: w = N * P_3
{ .mfi
- nop.m 999
-(p11) fmpy.s1 w = N, P_3
- nop.i 999 ;;
+ nop.m 999
+ fadd.s1 r = s_val, w // r = s_val + w for |s| >= 2^-14
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// Case 4: We need abs of both U_hi and V_hi - dont
-// worry about switched sign of V_hi .
-//
-(p11) fsub.s1 A = U_hi, V_hi
- nop.i 999
+ nop.m 999
+ fmpy.s1 w2 = N, P_3 // w = N * P_3 for |s| < 2^-14
+ nop.i 999
}
-{ .mfi
- nop.m 999
-//
+;;
+
// Case 4: A = U_hi + V_hi
// Note: Worry about switched sign of V_hi, so subtract instead of add.
-//
-(p11) fnma.s1 V_lo = N, P_2, V_hi
- nop.i 999 ;;
+// Case 4: V_lo = -N * P_2 - V_hi (U_hi is in place of V_hi in writeup)
+// Note: the (-) is still missing for V_hi.
+{ .mfi
+ nop.m 999
+ fsub.s1 A = U_hi, V_hi // A = U_hi - V_hi for |s| < 2^-14
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p11) fms.s1 U_lo = N_0, d_1, U_hi
- nop.i 999 ;;
+ nop.m 999
+ fnma.s1 V_lo = N, P_2, V_hi // V_lo = V_hi - N * P_2 for |s| < 2^-14
+ nop.i 999
}
+;;
+
+// Decide between case 3 and 4:
+// Case 3: |s| >= 2**(-14) Set p10
+// Case 4: |s| < 2**(-14) Set p11
+//
+// Case 4: U_lo = N_0 * d_1 - U_hi
{ .mfi
- nop.m 999
-(p11) fabs V_hiabs = V_hi
- nop.i 999
+ nop.m 999
+ fms.s1 U_lo = N_0, d_1, U_hi // U_lo = N_0*d_1 - U_hi for |s| < 2^-14
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// Case 4: V_hi = N * P_2
-// w = N * P_3
-// Note the product does not include the (-) as in the writeup
-// so (-) missing for V_hi and w .
-(p10) fadd.s1 r = s_val, w
- nop.i 999 ;;
+ nop.m 999
+ fcmp.lt.s1 p11, p10 = s_val, TWO_TO_NEG14
+ nop.i 999
}
+;;
+
+// Case 4: We need abs of both U_hi and V_hi - dont
+// worry about switched sign of V_hi.
{ .mfi
- nop.m 999
-//
-// Case 3: c = s_val - r
-// Case 4: U_lo = N_0 * d_1 - U_hi
-//
-(p11) fabs U_hiabs = U_hi
- nop.i 999
+ nop.m 999
+ fabs V_hiabs = V_hi // |V_hi| for |s| < 2^-14
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p11) fmpy.s1 w = N, P_3
- nop.i 999 ;;
+ nop.m 999
+(p11) fcmp.gt.s1 p11, p10 = s_val, NEGTWO_TO_NEG14
+ nop.i 999
}
+;;
+
+// Case 3: c = s_val - r
{ .mfi
- nop.m 999
-//
-// Case 4: Set P_12 if U_hiabs >= V_hiabs
-//
-(p11) fadd.s1 C_hi = s_val, A
- nop.i 999 ;;
+ nop.m 999
+ fabs U_hiabs = U_hi // |U_hi| for |s| < 2^-14
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
+ fsub.s1 c = s_val, r // c = s_val - r for |s| >= 2^-14
+ nop.i 999
+}
+;;
+
+// For Case 3, |s| >= 2^-14, determine if |r| < 1/4
//
// Case 4: C_hi = s_val + A
//
-(p11) fadd.s1 t = U_lo, V_lo
- nop.i 999 ;;
-}
{ .mfi
- nop.m 999
-//
-// Case 3: Is |r| < 2**(-2), if so set PR_7
-// else set PR_8.
-// Case 3: If PR_7 is set, prepare to branch to Small_R.
-// Case 3: If PR_8 is set, prepare to branch to Normal_R.
-//
-(p10) fsub.s1 c = s_val, r
- nop.i 999 ;;
+ nop.m 999
+(p11) fadd.s1 C_hi = s_val, A // C_hi = s_val + A for |s| < 2^-14
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// Case 3: c = (s - r) + w (c complete)
-//
-(p11) fcmp.ge.unc.s1 p12, p13 = U_hiabs, V_hiabs
- nop.i 999
+ nop.m 999
+(p10) fcmp.lt.unc.s1 p14, p15 = r, TWO_TO_NEG2
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p11) fms.s1 w = N_0, d_2, w
- nop.i 999 ;;
+ getf.sig sig_r = r // Get signif of r if |s| >= 2^-33
+ fand B = B_mask1, r
+ nop.i 999
}
+;;
+
+// Case 4: t = U_lo + V_lo
{ .mfi
- nop.m 999
-//
-// Case 4: V_hi = N * P_2
-// w = N * P_3
-// Note the product does not include the (-) as in the writeup
-// so (-) missing for V_hi and w .
-//
-(p10) fcmp.lt.unc.s1 p14, p15 = r, TWO_TO_NEG2
- nop.i 999 ;;
+ getf.exp exp_r = r // Extract signexp of r if |s| >= 2^-33
+(p11) fadd.s1 t = U_lo, V_lo // t = U_lo + V_lo for |s| < 2^-14
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
(p14) fcmp.gt.s1 p14, p15 = r, NEGTWO_TO_NEG2
- nop.i 999 ;;
+ nop.i 999
}
-{ .mfb
- nop.m 999
+;;
+
+// Case 3: c = (s - r) + w (c complete)
+{ .mfi
+ nop.m 999
+(p10) fadd.s1 c = c, w // c = c + w for |s| >= 2^-14
+ nop.i 999
+}
+{ .mbb
+ nop.m 999
+(p14) br.cond.spnt TANL_SMALL_R // Branch if 2^24 <= |x| < 2^63 and |r|< 1/4
+(p15) br.cond.sptk TANL_NORMAL_R_A // Branch if 2^24 <= |x| < 2^63 and |r|>=1/4
+}
+;;
+
+
+// Here if 2^24 <= |x| < 2^63 and |s| < 2^-14 >>>>>>> Case 4.
//
-// Case 4: V_lo = -N * P_2 - V_hi (U_hi is in place of V_hi in writeup)
-// Note: the (-) is still missing for V_hi .
+// Case 4: Set P_12 if U_hiabs >= V_hiabs
// Case 4: w = w + N_0 * d_2
// Note: the (-) is now incorporated in w .
-//
-(p10) fadd.s1 c = c, w
-//
-// Case 4: t = U_lo + V_lo
-// Note: remember V_lo should be (-), subtract instead of add. NO
-//
-(p14) br.cond.spnt L(TANL_SMALL_R) ;;
-}
-{ .mib
- nop.m 999
- nop.i 999
-(p15) br.cond.spnt L(TANL_NORMAL_R) ;;
-}
{ .mfi
- nop.m 999
-//
-// Case 3: Vector off when |r| < 2**(-2). Recall that PR_3 will be true.
-// The remaining stuff is for Case 4.
-//
-(p12) fsub.s1 a = U_hi, A
-(p11) extr.u i_1 = N_fix_gr, 0, 1 ;;
+ add table_ptr1 = 160, table_base // Point to tanl_table_p1
+ fcmp.ge.unc.s1 p12, p13 = U_hiabs, V_hiabs
+ nop.i 999
}
{ .mfi
- nop.m 999
-//
-// Case 4: C_lo = s_val - C_hi
-//
-(p11) fadd.s1 t = t, w
- nop.i 999
+ nop.m 999
+ fms.s1 w2 = N_0, d_2, w2
+ nop.i 999
}
+;;
+
+// Case 4: C_lo = s_val - C_hi
{ .mfi
- nop.m 999
-(p13) fadd.s1 a = V_hi, A
- nop.i 999 ;;
+ ldfe P1_1 = [table_ptr1], 16 // Load P1_1
+ fsub.s1 C_lo = s_val, C_hi
+ nop.i 999
}
-
-
+;;
//
// Case 4: a = U_hi - A
// a = V_hi - A (do an add to account for missing (-) on V_hi
//
-
{ .mfi
-(p11) addl table_ptr1 = @ltoff(TANL_BASE_CONSTANTS), gp
-(p11) fsub.s1 C_lo = s_val, C_hi
- nop.i 999
+ ldfe P1_2 = [table_ptr1], 128 // Load P1_2
+(p12) fsub.s1 a = U_hi, A
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p13) fadd.s1 a = V_hi, A
+ nop.i 999
}
;;
+// Case 4: t = U_lo + V_lo + w
+{ .mfi
+ ldfe Q1_1 = [table_ptr1], 16 // Load Q1_1
+ fadd.s1 t = t, w2
+ nop.i 999
+}
+;;
-
-//
// Case 4: a = (U_hi - A) + V_hi
// a = (V_hi - A) + U_hi
// In each case account for negative missing form V_hi .
//
-
-
-{ .mmi
-(p11) ld8 table_ptr1 = [table_ptr1]
+{ .mfi
+ ldfe Q1_2 = [table_ptr1], 16 // Load Q1_2
+(p12) fsub.s1 a = a, V_hi
+ nop.i 999
+}
+{ .mfi
nop.m 999
+(p13) fsub.s1 a = U_hi, a
nop.i 999
}
;;
-
//
// Case 4: C_lo = (s_val - C_hi) + A
//
-{ .mmi
-(p11) add table_ptr1 = 224, table_ptr1 ;;
-(p11) ldfe P1_1 = [table_ptr1], 16
- nop.i 999 ;;
-}
{ .mfi
-(p11) ldfe P1_2 = [table_ptr1], 128
-//
-// Case 4: w = U_lo + V_lo + w
-//
-(p12) fsub.s1 a = a, V_hi
- nop.i 999 ;;
-}
-//
-// Case 4: r = C_hi + C_lo
-//
-{ .mfi
-(p11) ldfe Q1_1 = [table_ptr1], 16
-(p11) fadd.s1 C_lo = C_lo, A
- nop.i 999 ;;
+ nop.m 999
+ fadd.s1 C_lo = C_lo, A
+ nop.i 999 ;;
}
//
-// Case 4: c = C_hi - r
-// Get [i_1] - lsb of N_fix_gr.
+// Case 4: t = t + a
//
{ .mfi
-(p11) ldfe Q1_2 = [table_ptr1], 16
- nop.f 999
- nop.i 999 ;;
+ nop.m 999
+ fadd.s1 t = t, a
+ nop.i 999
}
+;;
+
+// Case 4: C_lo = C_lo + t
+// Case 4: r = C_hi + C_lo
{ .mfi
- nop.m 999
-(p13) fsub.s1 a = U_hi, a
- nop.i 999 ;;
+ nop.m 999
+ fadd.s1 C_lo = C_lo, t
+ nop.i 999
}
+;;
+
{ .mfi
- nop.m 999
-(p11) fadd.s1 t = t, a
- nop.i 999 ;;
+ nop.m 999
+ fadd.s1 r = C_hi, C_lo
+ nop.i 999
}
-{ .mfi
- nop.m 999
+;;
+
//
-// Case 4: t = t + a
+// Case 4: c = C_hi - r
//
-(p11) fadd.s1 C_lo = C_lo, t
- nop.i 999 ;;
-}
{ .mfi
- nop.m 999
-//
-// Case 4: C_lo = C_lo + t
-//
-(p11) fadd.s1 r = C_hi, C_lo
- nop.i 999 ;;
+ nop.m 999
+ fsub.s1 c = C_hi, r
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p11) fsub.s1 c = C_hi, r
- nop.i 999
+ nop.m 999
+ fmpy.s1 rsq = r, r
+ add N_fix_gr = N_fix_gr, cot_flag // N = N + 1 (for cotl)
}
-{ .mfi
- nop.m 999
-//
+;;
+
// Case 4: c = c + C_lo finished.
-// Is i_1 even or odd?
-// if i_1 == 0, set PR_4, else set PR_5.
//
-// r and c have been computed.
-// We known whether this is the sine or cosine routine.
-// Make sure ftz mode is set - should be automatic when using wre
-(p0) fmpy.s1 rsq = r, r
- nop.i 999 ;;
-}
+// Is i_1 = lsb of N_fix_gr even or odd?
+// if i_1 == 0, set PR_11, else set PR_12.
+//
{ .mfi
- nop.m 999
-(p11) fadd.s1 c = c , C_lo
-(p11) cmp.eq.unc p11, p12 = 0x0000, i_1 ;;
+ nop.m 999
+ fadd.s1 c = c , C_lo
+ tbit.z p11, p12 = N_fix_gr, 0
}
+;;
+
+// r and c have been computed.
{ .mfi
- nop.m 999
+ nop.m 999
(p12) frcpa.s1 S_hi, p0 = f1, r
- nop.i 999
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N odd: Change sign of S_hi
//
-(p11) fma.s1 Result = rsq, P1_2, P1_1
- nop.i 999 ;;
+(p11) fma.s1 Poly = rsq, P1_2, P1_1
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
(p12) fma.s1 P = rsq, Q1_2, Q1_1
- nop.i 999
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N odd: Result = S_hi + S_lo (User supplied rounding mode for C1)
//
-(p0) fmpy.s0 fp_tmp = fp_tmp, fp_tmp // Dummy mult to set inexact
- nop.i 999 ;;
+ fmpy.s0 fp_tmp = fp_tmp, fp_tmp // Dummy mult to set inexact
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N even: rsq = r * r
// N odd: S_hi = frcpa(r)
//
(p12) fmerge.ns S_hi = S_hi, S_hi
- nop.i 999
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N even: rsq = rsq * P1_2 + P1_1
// N odd: poly1 = 1.0 + S_hi * r 16 bits partial account for necessary
//
-(p11) fmpy.s1 Result = rsq, Result
- nop.i 999 ;;
+(p11) fmpy.s1 Poly = rsq, Poly
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
(p12) fma.s1 poly1 = S_hi, r,f1
- nop.i 999
+(p11) tbit.z.unc p14, p15 = cot_flag, 0 // p14=1 for tanl; p15=1 for cotl
}
{ .mfi
- nop.m 999
+ nop.m 999
//
-// N even: Result = Result * rsq
+// N even: Poly = Poly * rsq
// N odd: S_hi = S_hi + S_hi*poly1 16 bits account for necessary
//
-(p11) fma.s1 Result = r, Result, c
- nop.i 999 ;;
+(p11) fma.s1 Poly = r, Poly, c
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
(p12) fma.s1 S_hi = S_hi, poly1, S_hi
- nop.i 999
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N odd: S_hi = S_hi * poly1 + S_hi 32 bits
//
-(p11) fadd.s0 Result= r, Result
- nop.i 999 ;;
+(p14) fadd.s0 Result = r, Poly // for tanl
+ nop.i 999 ;;
}
+
+.pred.rel "mutex",p15,p12
{ .mfi
- nop.m 999
+ nop.m 999
+(p15) fms.s0 Result = r, mOne, Poly // for cotl
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
(p12) fma.s1 poly1 = S_hi, r, f1
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
-// N even: Result = Result * r + c
+// N even: Poly = Poly * r + c
// N odd: poly1 = 1.0 + S_hi * r 32 bits partial
//
(p12) fma.s1 S_hi = S_hi, poly1, S_hi
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
(p12) fma.s1 poly1 = S_hi, r, f1
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
-// N even: Result1 = Result + r (Rounding mode S0)
+// N even: Result = Poly + r (Rounding mode S0)
// N odd: poly1 = S_hi * r + 1.0 64 bits partial
//
(p12) fma.s1 S_hi = S_hi, poly1, S_hi
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N odd: poly1 = S_hi * poly + S_hi 64 bits
//
(p12) fma.s1 poly1 = S_hi, r, f1
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N odd: poly1 = S_hi * r + 1.0
//
(p12) fma.s1 poly1 = S_hi, c, poly1
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N odd: poly1 = S_hi * c + poly1
//
(p12) fmpy.s1 S_lo = S_hi, poly1
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N odd: S_lo = S_hi * poly1
//
(p12) fma.s1 S_lo = P, r, S_lo
- nop.i 999 ;;
+(p12) tbit.z.unc p14, p15 = cot_flag, 0 ;; // p14=1 for tanl; p15=1 for cotl
+}
+
+{ .mfi
+ nop.m 999
+(p14) fadd.s0 Result = S_hi, S_lo // for tanl
+ nop.i 999
}
{ .mfb
- nop.m 999
+ nop.m 999
//
// N odd: S_lo = S_lo + r * P
//
-(p12) fadd.s0 Result = S_hi, S_lo
-(p0) br.ret.sptk b0 ;;
+(p15) fms.s0 Result = S_hi, mOne, S_lo // for cotl
+ br.ret.sptk b0 ;; // Exit for 2^24 <= |x| < 2^63 and |s| < 2^-14
}
-L(TANL_SMALL_R):
-{ .mii
- nop.m 999
-(p0) extr.u i_1 = N_fix_gr, 0, 1 ;;
-(p0) cmp.eq.unc p11, p12 = 0x0000, i_1
-}
+TANL_SMALL_R:
+// Here if |r| < 1/4
+// r and c have been computed.
+// *****************************************************************
+// *****************************************************************
+// *****************************************************************
+// N odd: S_hi = frcpa(r)
+// Get [i_1] - lsb of N_fix_gr. Set p11 if N even, p12 if N odd.
+// N even: rsq = r * r
{ .mfi
- nop.m 999
-(p0) fmpy.s1 rsq = r, r
- nop.i 999 ;;
+ add table_ptr1 = 160, table_base // Point to tanl_table_p1
+ frcpa.s1 S_hi, p0 = f1, r // S_hi for N odd
+ add N_fix_gr = N_fix_gr, cot_flag // N = N + 1 (for cotl)
}
{ .mfi
-(p0) addl table_ptr1 = @ltoff(TANL_BASE_CONSTANTS), gp
-(p12) frcpa.s1 S_hi, p0 = f1, r
- nop.i 999
+ add table_ptr2 = 400, table_base // Point to Q1_7
+ fmpy.s1 rsq = r, r
+ nop.i 999
}
;;
-
{ .mmi
-(p0) ld8 table_ptr1 = [table_ptr1]
- nop.m 999
- nop.i 999
+ ldfe P1_1 = [table_ptr1], 16
+;;
+ ldfe P1_2 = [table_ptr1], 16
+ tbit.z p11, p12 = N_fix_gr, 0
}
;;
-// *****************************************************************
-// *****************************************************************
-// *****************************************************************
-
-{ .mmi
-(p0) add table_ptr1 = 224, table_ptr1 ;;
-(p0) ldfe P1_1 = [table_ptr1], 16
- nop.i 999 ;;
-}
-// r and c have been computed.
-// We known whether this is the sine or cosine routine.
-// Make sure ftz mode is set - should be automatic when using wre
-// |r| < 2**(-2)
{ .mfi
-(p0) ldfe P1_2 = [table_ptr1], 16
-(p11) fmpy.s1 r_to_the_8 = rsq, rsq
- nop.i 999 ;;
+ ldfe P1_3 = [table_ptr1], 96
+ nop.f 999
+ nop.i 999
}
-//
-// Set table_ptr1 to beginning of constant table.
-// Get [i_1] - lsb of N_fix_gr.
-//
+;;
+
{ .mfi
-(p0) ldfe P1_3 = [table_ptr1], 96
-//
-// N even: rsq = r * r
-// N odd: S_hi = frcpa(r)
-//
+(p11) ldfe P1_9 = [table_ptr1], -16
(p12) fmerge.ns S_hi = S_hi, S_hi
- nop.i 999 ;;
+ nop.i 999
}
-//
-// Is i_1 even or odd?
-// if i_1 == 0, set PR_11.
-// if i_1 != 0, set PR_12.
-//
{ .mfi
-(p11) ldfe P1_9 = [table_ptr1], -16
+ nop.m 999
+(p11) fmpy.s1 r_to_the_8 = rsq, rsq
+ nop.i 999
+}
+;;
+
//
// N even: Poly2 = P1_7 + Poly2 * rsq
// N odd: poly2 = Q1_5 + poly2 * rsq
//
+{ .mfi
+(p11) ldfe P1_8 = [table_ptr1], -16
(p11) fadd.s1 CORR = rsq, f1
- nop.i 999 ;;
+ nop.i 999
}
-{ .mmi
-(p11) ldfe P1_8 = [table_ptr1], -16 ;;
+;;
+
//
// N even: Poly1 = P1_2 + P1_3 * rsq
-// N odd: poly1 = 1.0 + S_hi * r
+// N odd: poly1 = 1.0 + S_hi * r
// 16 bits partial account for necessary (-1)
//
+{ .mmi
(p11) ldfe P1_7 = [table_ptr1], -16
- nop.i 999 ;;
+;;
+(p11) ldfe P1_6 = [table_ptr1], -16
+ nop.i 999
}
+;;
+
//
// N even: Poly1 = P1_1 + Poly1 * rsq
// N odd: S_hi = S_hi + S_hi * poly1) 16 bits account for necessary
//
-{ .mfi
-(p11) ldfe P1_6 = [table_ptr1], -16
//
// N even: Poly2 = P1_5 + Poly2 * rsq
// N odd: poly2 = Q1_3 + poly2 * rsq
//
+{ .mfi
+(p11) ldfe P1_5 = [table_ptr1], -16
(p11) fmpy.s1 r_to_the_8 = r_to_the_8, r_to_the_8
- nop.i 999 ;;
+ nop.i 999
}
+{ .mfi
+ nop.m 999
+(p12) fma.s1 poly1 = S_hi, r, f1
+ nop.i 999
+}
+;;
+
//
// N even: Poly1 = Poly1 * rsq
// N odd: poly1 = 1.0 + S_hi * r 32 bits partial
//
-{ .mfi
-(p11) ldfe P1_5 = [table_ptr1], -16
-(p12) fma.s1 poly1 = S_hi, r, f1
- nop.i 999 ;;
-}
//
// N even: CORR = CORR * c
@@ -2290,44 +2418,30 @@ L(TANL_SMALL_R):
{ .mmf
(p11) ldfe P1_4 = [table_ptr1], -16
-(p0) addl table_ptr2 = @ltoff(TANL_BASE_CONSTANTS), gp
-(p11) fmpy.s1 CORR = CORR, c
-}
-;;
-
-
-{ .mmi
-(p0) ld8 table_ptr2 = [table_ptr2]
nop.m 999
- nop.i 999
+(p11) fmpy.s1 CORR = CORR, c
}
;;
-
-{ .mii
-(p0) add table_ptr2 = 464, table_ptr2
- nop.i 999 ;;
- nop.i 999
-}
{ .mfi
- nop.m 999
+ nop.m 999
(p11) fma.s1 Poly1 = P1_3, rsq, P1_2
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
-(p0) ldfe Q1_7 = [table_ptr2], -16
+(p12) ldfe Q1_7 = [table_ptr2], -16
(p12) fma.s1 S_hi = S_hi, poly1, S_hi
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
-(p0) ldfe Q1_6 = [table_ptr2], -16
+(p12) ldfe Q1_6 = [table_ptr2], -16
(p11) fma.s1 Poly2 = P1_9, rsq, P1_8
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mmi
-(p0) ldfe Q1_5 = [table_ptr2], -16 ;;
+(p12) ldfe Q1_5 = [table_ptr2], -16 ;;
(p12) ldfe Q1_4 = [table_ptr2], -16
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
(p12) ldfe Q1_3 = [table_ptr2], -16
@@ -2336,735 +2450,795 @@ L(TANL_SMALL_R):
// N odd: poly2 = Q1_6 + Q1_7 * rsq
//
(p11) fma.s1 Poly1 = Poly1, rsq, P1_1
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
(p12) ldfe Q1_2 = [table_ptr2], -16
(p12) fma.s1 poly1 = S_hi, r, f1
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
(p12) ldfe Q1_1 = [table_ptr2], -16
(p11) fma.s1 Poly2 = Poly2, rsq, P1_7
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N even: CORR = rsq + 1
// N even: r_to_the_8 = rsq * rsq
//
(p11) fmpy.s1 Poly1 = Poly1, rsq
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
(p12) fma.s1 S_hi = S_hi, poly1, S_hi
- nop.i 999
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
(p12) fma.s1 poly2 = Q1_7, rsq, Q1_6
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
(p11) fma.s1 Poly2 = Poly2, rsq, P1_6
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
(p12) fma.s1 poly1 = S_hi, r, f1
- nop.i 999
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
(p12) fma.s1 poly2 = poly2, rsq, Q1_5
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
(p11) fma.s1 Poly2= Poly2, rsq, P1_5
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
(p12) fma.s1 S_hi = S_hi, poly1, S_hi
- nop.i 999
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
(p12) fma.s1 poly2 = poly2, rsq, Q1_4
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N even: r_to_the_8 = r_to_the_8 * r_to_the_8
// N odd: poly1 = S_hi * r + 1.0 64 bits partial
//
(p11) fma.s1 Poly2 = Poly2, rsq, P1_4
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
-// N even: Result = CORR + Poly * r
+// N even: Poly = CORR + Poly * r
// N odd: P = Q1_1 + poly2 * rsq
//
(p12) fma.s1 poly1 = S_hi, r, f1
- nop.i 999
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
(p12) fma.s1 poly2 = poly2, rsq, Q1_3
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N even: Poly2 = P1_4 + Poly2 * rsq
// N odd: poly2 = Q1_2 + poly2 * rsq
//
(p11) fma.s1 Poly = Poly2, r_to_the_8, Poly1
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
(p12) fma.s1 poly1 = S_hi, c, poly1
- nop.i 999
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
(p12) fma.s1 poly2 = poly2, rsq, Q1_2
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N even: Poly = Poly1 + Poly2 * r_to_the_8
// N odd: S_hi = S_hi * poly1 + S_hi 64 bits
//
-(p11) fma.s1 Result = Poly, r, CORR
- nop.i 999 ;;
+(p11) fma.s1 Poly = Poly, r, CORR
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
-// N even: Result = r + Result (User supplied rounding mode)
+// N even: Result = r + Poly (User supplied rounding mode)
// N odd: poly1 = S_hi * c + poly1
//
(p12) fmpy.s1 S_lo = S_hi, poly1
- nop.i 999
+(p11) tbit.z.unc p14, p15 = cot_flag, 0 // p14=1 for tanl; p15=1 for cotl
}
{ .mfi
- nop.m 999
+ nop.m 999
(p12) fma.s1 P = poly2, rsq, Q1_1
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N odd: poly1 = S_hi * r + 1.0
//
//
// N odd: S_lo = S_hi * poly1
//
-(p11) fadd.s0 Result = Result, r
- nop.i 999 ;;
+(p14) fadd.s0 Result = Poly, r // for tanl
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
+(p15) fms.s0 Result = Poly, mOne, r // for cotl
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
//
// N odd: S_lo = Q1_1 * c + S_lo
//
(p12) fma.s1 S_lo = Q1_1, c, S_lo
- nop.i 999
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fmpy.s0 fp_tmp = fp_tmp, fp_tmp // Dummy mult to set inexact
- nop.i 999 ;;
+ nop.m 999
+ fmpy.s0 fp_tmp = fp_tmp, fp_tmp // Dummy mult to set inexact
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N odd: Result = S_lo + r * P
//
(p12) fma.s1 Result = P, r, S_lo
- nop.i 999 ;;
+(p12) tbit.z.unc p14, p15 = cot_flag, 0 ;; // p14=1 for tanl; p15=1 for cotl
}
-{ .mfb
- nop.m 999
+
//
// N odd: Result = Result + S_hi (user supplied rounding mode)
//
-(p12) fadd.s0 Result = Result, S_hi
-(p0) br.ret.sptk b0 ;;
+{ .mfi
+ nop.m 999
+(p14) fadd.s0 Result = Result, S_hi // for tanl
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p15) fms.s0 Result = Result, mOne, S_hi // for cotl
+ br.ret.sptk b0 ;; // Exit |r| < 1/4 path
}
-L(TANL_NORMAL_R):
-{ .mfi
-(p0) getf.sig sig_r = r
+TANL_NORMAL_R:
+// Here if 1/4 <= |x| < pi/4 or if |x| >= 2^63 and |r| >= 1/4
// *******************************************************************
// *******************************************************************
// *******************************************************************
//
// r and c have been computed.
-// Make sure ftz mode is set - should be automatic when using wre
-//
//
-// Get [i_1] - lsb of N_fix_gr alone.
-//
-(p0) fmerge.s Pos_r = f1, r
-(p0) extr.u i_1 = N_fix_gr, 0, 1 ;;
-}
-{ .mfi
- nop.m 999
-(p0) fmerge.s sgn_r = r, f1
-(p0) cmp.eq.unc p11, p12 = 0x0000, i_1 ;;
-}
-{ .mfi
- nop.m 999
- nop.f 999
-(p0) extr.u lookup = sig_r, 58, 5
-}
-{ .mlx
- nop.m 999
-(p0) movl Create_B = 0x8200000000000000 ;;
-}
{ .mfi
-(p0) addl table_ptr1 = @ltoff(TANL_BASE_CONSTANTS), gp
- nop.f 999
-(p0) dep Create_B = lookup, Create_B, 58, 5
-}
-;;
-
-
-//
-// Get [i_1] - lsb of N_fix_gr alone.
-// Pos_r = abs (r)
-//
-
-
-{ .mmi
-(p0) ld8 table_ptr1 = [table_ptr1]
nop.m 999
+ fand B = B_mask1, r
nop.i 999
}
;;
-
+TANL_NORMAL_R_A:
+// Enter here if pi/4 <= |x| < 2^63 and |r| >= 1/4
+// Get the 5 bits or r for the lookup. 1.xxxxx ....
{ .mmi
- nop.m 999
-(p0) setf.sig B = Create_B
-//
-// Set table_ptr1 and table_ptr2 to base address of
-// constant table.
-//
-(p0) add table_ptr1 = 480, table_ptr1 ;;
-}
-{ .mmb
- nop.m 999
-//
-// Is i_1 or i_0 == 0 ?
-// Create the constant 1 00000 1000000000000000000000...
-//
-(p0) ldfe P2_1 = [table_ptr1], 16
- nop.b 999
+ add table_ptr1 = 416, table_base // Point to tanl_table_p2
+ mov GR_exp_2tom65 = 0xffff - 65 // Scaling constant for B
+ extr.u lookup = sig_r, 58, 5
}
+;;
+
{ .mmi
- nop.m 999 ;;
-(p0) getf.exp exp_r = Pos_r
- nop.i 999
+ ldfe P2_1 = [table_ptr1], 16
+ setf.exp TWO_TO_NEG65 = GR_exp_2tom65 // 2^-65 for scaling B if exp_r=-2
+ add N_fix_gr = N_fix_gr, cot_flag // N = N + 1 (for cotl)
}
-//
-// Get r's exponent
-// Get r's significand
-//
-{ .mmi
-(p0) ldfe P2_2 = [table_ptr1], 16 ;;
-//
-// Get the 5 bits or r for the lookup. 1.xxxxx ....
-// from sig_r.
-// Grab lsb of exp of B
-//
-(p0) ldfe P2_3 = [table_ptr1], 16
- nop.i 999 ;;
+;;
+
+.pred.rel "mutex",p11,p12
+// B = 2^63 * 1.xxxxx 100...0
+{ .mfi
+ ldfe P2_2 = [table_ptr1], 16
+ for B = B_mask2, B
+ mov table_offset = 512 // Assume table offset is 512
}
-{ .mii
- nop.m 999
-(p0) andcm table_offset = 0x0001, exp_r ;;
-(p0) shl table_offset = table_offset, 9 ;;
+;;
+
+{ .mfi
+ ldfe P2_3 = [table_ptr1], 16
+ fmerge.s Pos_r = f1, r
+ tbit.nz p8,p9 = exp_r, 0
}
-{ .mii
- nop.m 999
-//
-// Deposit 0 00000 1000000000000000000000... on
-// 1 xxxxx yyyyyyyyyyyyyyyyyyyyyy...,
-// getting rid of the ys.
+;;
+
// Is B = 2** -2 or B= 2** -1? If 2**-1, then
// we want an offset of 512 for table addressing.
-//
-(p0) shladd table_offset = lookup, 4, table_offset ;;
-//
-// B = ........ 1xxxxx 1000000000000000000...
-//
-(p0) add table_ptr1 = table_ptr1, table_offset ;;
-}
-{ .mmb
- nop.m 999
-//
-// B = ........ 1xxxxx 1000000000000000000...
-// Convert B so it has the same exponent as Pos_r
-//
-(p0) ldfd T_hi = [table_ptr1], 8
- nop.b 999 ;;
+{ .mii
+ add table_ptr2 = 1296, table_base // Point to tanl_table_cm2
+(p9) shladd table_offset = lookup, 4, table_offset
+(p8) shladd table_offset = lookup, 4, r0
}
+;;
+{ .mmi
+ add table_ptr1 = table_ptr1, table_offset // Point to T_hi
+ add table_ptr2 = table_ptr2, table_offset // Point to C_hi
+ add table_ptr3 = 2128, table_base // Point to tanl_table_scim2
+}
+;;
+{ .mmi
+ ldfd T_hi = [table_ptr1], 8 // Load T_hi
+;;
+ ldfd C_hi = [table_ptr2], 8 // Load C_hi
+ add table_ptr3 = table_ptr3, table_offset // Point to SC_inv
+}
+;;
//
// x = |r| - B
-// Load T_hi.
-// Load C_hi.
//
-
-{ .mmf
-(p0) addl table_ptr2 = @ltoff(TANL_BASE_CONSTANTS), gp
-(p0) ldfs T_lo = [table_ptr1]
-(p0) fmerge.se B = Pos_r, B
+// Convert B so it has the same exponent as Pos_r before subtracting
+{ .mfi
+ ldfs T_lo = [table_ptr1] // Load T_lo
+(p9) fnma.s1 x = B, FR_2tom64, Pos_r
+ nop.i 999
}
-;;
-
-
-{ .mmi
-(p0) ld8 table_ptr2 = [table_ptr2]
+{ .mfi
nop.m 999
+(p8) fnma.s1 x = B, TWO_TO_NEG65, Pos_r
nop.i 999
}
;;
-
-{ .mii
-(p0) add table_ptr2 = 1360, table_ptr2
- nop.i 999 ;;
-(p0) add table_ptr2 = table_ptr2, table_offset ;;
+{ .mfi
+ ldfs C_lo = [table_ptr2] // Load C_lo
+ nop.f 999
+ nop.i 999
}
+;;
+
{ .mfi
-(p0) ldfd C_hi = [table_ptr2], 8
-(p0) fsub.s1 x = Pos_r, B
- nop.i 999 ;;
+ ldfe SC_inv = [table_ptr3] // Load SC_inv
+ fmerge.s sgn_r = r, f1
+ tbit.z p11, p12 = N_fix_gr, 0 // p11 if N even, p12 if odd
+
}
-{ .mii
-(p0) ldfs C_lo = [table_ptr2],255
- nop.i 999 ;;
+;;
+
//
// xsq = x * x
// N even: Tx = T_hi * x
-// Load T_lo.
-// Load C_lo - increment pointer to get SC_inv
-// - cant get all the way, do an add later.
-//
-(p0) add table_ptr2 = 569, table_ptr2 ;;
-}
//
// N even: Tx1 = Tx + 1
// N odd: Cx1 = 1 - Cx
//
+
{ .mfi
-(p0) ldfe SC_inv = [table_ptr2], 0
- nop.f 999
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-(p0) fmpy.s1 xsq = x, x
- nop.i 999
+ nop.m 999
+ fmpy.s1 xsq = x, x
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
(p11) fmpy.s1 Tx = T_hi, x
- nop.i 999 ;;
-}
-{ .mfi
- nop.m 999
-(p12) fmpy.s1 Cx = C_hi, x
- nop.i 999 ;;
+ nop.i 999
}
-{ .mfi
- nop.m 999
+;;
+
//
// N odd: Cx = C_hi * x
//
-(p0) fma.s1 P = P2_3, xsq, P2_2
- nop.i 999
-}
{ .mfi
- nop.m 999
+ nop.m 999
+(p12) fmpy.s1 Cx = C_hi, x
+ nop.i 999
+}
+;;
//
// N even and odd: P = P2_3 + P2_2 * xsq
//
+{ .mfi
+ nop.m 999
+ fma.s1 P = P2_3, xsq, P2_2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
(p11) fadd.s1 Tx1 = Tx, f1
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N even: D = C_hi - tanx
// N odd: D = T_hi + tanx
//
(p11) fmpy.s1 CORR = SC_inv, T_hi
- nop.i 999
+ nop.i 999
}
{ .mfi
- nop.m 999
-(p0) fmpy.s1 Sx = SC_inv, x
- nop.i 999 ;;
+ nop.m 999
+ fmpy.s1 Sx = SC_inv, x
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
(p12) fmpy.s1 CORR = SC_inv, C_hi
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
(p12) fsub.s1 V_hi = f1, Cx
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-(p0) fma.s1 P = P, xsq, P2_1
- nop.i 999
+ nop.m 999
+ fma.s1 P = P, xsq, P2_1
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N even and odd: P = P2_1 + P * xsq
//
(p11) fma.s1 V_hi = Tx, Tx1, f1
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N even: Result = sgn_r * tail + T_hi (user rounding mode for C1)
// N odd: Result = sgn_r * tail + C_hi (user rounding mode for C1)
//
-(p0) fmpy.s0 fp_tmp = fp_tmp, fp_tmp // Dummy mult to set inexact
- nop.i 999 ;;
+ fmpy.s0 fp_tmp = fp_tmp, fp_tmp // Dummy mult to set inexact
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-(p0) fmpy.s1 CORR = CORR, c
- nop.i 999 ;;
+ nop.m 999
+ fmpy.s1 CORR = CORR, c
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
(p12) fnma.s1 V_hi = Cx,V_hi,f1
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N even: V_hi = Tx * Tx1 + 1
// N odd: Cx1 = 1 - Cx * Cx1
//
-(p0) fmpy.s1 P = P, xsq
- nop.i 999
+ fmpy.s1 P = P, xsq
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N even and odd: P = P * xsq
//
(p11) fmpy.s1 V_hi = V_hi, T_hi
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N even and odd: tail = P * tail + V_lo
//
(p11) fmpy.s1 T_hi = sgn_r, T_hi
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
-(p0) fmpy.s1 CORR = CORR, sgn_r
- nop.i 999 ;;
+ nop.m 999
+ fmpy.s1 CORR = CORR, sgn_r
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
(p12) fmpy.s1 V_hi = V_hi,C_hi
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N even: V_hi = T_hi * V_hi
// N odd: V_hi = C_hi * V_hi
//
-(p0) fma.s1 tanx = P, x, x
- nop.i 999
+ fma.s1 tanx = P, x, x
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
(p12) fnmpy.s1 C_hi = sgn_r, C_hi
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N even: V_lo = 1 - V_hi + C_hi
// N odd: V_lo = 1 - V_hi + T_hi
//
(p11) fadd.s1 CORR = CORR, T_lo
- nop.i 999
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
(p12) fsub.s1 CORR = CORR, C_lo
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N even and odd: tanx = x + x * P
// N even and odd: Sx = SC_inv * x
//
(p11) fsub.s1 D = C_hi, tanx
- nop.i 999
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
(p12) fadd.s1 D = T_hi, tanx
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N odd: CORR = SC_inv * C_hi
// N even: CORR = SC_inv * T_hi
//
-(p0) fnma.s1 D = V_hi, D, f1
- nop.i 999 ;;
+ fnma.s1 D = V_hi, D, f1
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N even and odd: D = 1 - V_hi * D
// N even and odd: CORR = CORR * c
//
-(p0) fma.s1 V_hi = V_hi, D, V_hi
- nop.i 999 ;;
+ fma.s1 V_hi = V_hi, D, V_hi
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N even and odd: V_hi = V_hi + V_hi * D
// N even and odd: CORR = sgn_r * CORR
//
(p11) fnma.s1 V_lo = V_hi, C_hi, f1
- nop.i 999
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
(p12) fnma.s1 V_lo = V_hi, T_hi, f1
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N even: CORR = COOR + T_lo
// N odd: CORR = CORR - C_lo
//
(p11) fma.s1 V_lo = tanx, V_hi, V_lo
- nop.i 999
+ tbit.nz p15, p0 = cot_flag, 0 // p15=1 if we compute cotl
}
{ .mfi
- nop.m 999
+ nop.m 999
(p12) fnma.s1 V_lo = tanx, V_hi, V_lo
- nop.i 999 ;;
+ nop.i 999 ;;
}
+
{ .mfi
- nop.m 999
+ nop.m 999
+(p15) fms.s1 T_hi = f0, f0, T_hi // to correct result's sign for cotl
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p15) fms.s1 C_hi = f0, f0, C_hi // to correct result's sign for cotl
+ nop.i 999
+};;
+
+{ .mfi
+ nop.m 999
+(p15) fms.s1 sgn_r = f0, f0, sgn_r // to correct result's sign for cotl
+ nop.i 999
+};;
+
+{ .mfi
+ nop.m 999
//
// N even: V_lo = V_lo + V_hi * tanx
// N odd: V_lo = V_lo - V_hi * tanx
//
(p11) fnma.s1 V_lo = C_lo, V_hi, V_lo
- nop.i 999
+ nop.i 999
}
{ .mfi
- nop.m 999
+ nop.m 999
(p12) fnma.s1 V_lo = T_lo, V_hi, V_lo
- nop.i 999 ;;
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N even: V_lo = V_lo - V_hi * C_lo
// N odd: V_lo = V_lo - V_hi * T_lo
//
-(p0) fmpy.s1 V_lo = V_hi, V_lo
- nop.i 999 ;;
+ fmpy.s1 V_lo = V_hi, V_lo
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N even and odd: V_lo = V_lo * V_hi
//
-(p0) fadd.s1 tail = V_hi, V_lo
- nop.i 999 ;;
+ fadd.s1 tail = V_hi, V_lo
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N even and odd: tail = V_hi + V_lo
//
-(p0) fma.s1 tail = tail, P, V_lo
- nop.i 999 ;;
+ fma.s1 tail = tail, P, V_lo
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N even: T_hi = sgn_r * T_hi
// N odd : C_hi = -sgn_r * C_hi
//
-(p0) fma.s1 tail = tail, Sx, CORR
- nop.i 999 ;;
+ fma.s1 tail = tail, Sx, CORR
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N even and odd: tail = Sx * tail + CORR
//
-(p0) fma.s1 tail = V_hi, Sx, tail
- nop.i 999 ;;
+ fma.s1 tail = V_hi, Sx, tail
+ nop.i 999 ;;
}
{ .mfi
- nop.m 999
+ nop.m 999
//
// N even an odd: tail = Sx * V_hi + tail
//
(p11) fma.s0 Result = sgn_r, tail, T_hi
- nop.i 999
+ nop.i 999
}
{ .mfb
- nop.m 999
+ nop.m 999
(p12) fma.s0 Result = sgn_r, tail, C_hi
-(p0) br.ret.sptk b0 ;;
+ br.ret.sptk b0 ;; // Exit for 1/4 <= |r| < pi/4
}
-L(TANL_SPECIAL):
+TANL_DENORMAL:
+// Here if x denormal
{ .mfb
- nop.m 999
-(p0) fmpy.s0 Arg = Arg, f0
-(p0) br.ret.sptk b0 ;;
+ getf.exp GR_signexp_x = Norm_Arg // Get sign and exponent of x
+ nop.f 999
+ br.cond.sptk TANL_COMMON // Return to common code
}
+;;
+
+
+TANL_SPECIAL:
+TANL_UNSUPPORTED:
//
// Code for NaNs, Unsupporteds, Infs, or +/- zero ?
// Invalid raised for Infs and SNaNs.
//
-.endp tanl
-ASM_SIZE_DIRECTIVE(tanl)
+{ .mfi
+ nop.m 999
+ fmerge.s f10 = f8, f8 // Save input for error call
+ tbit.nz p6, p7 = cot_flag, 0 // p6=1 if we compute cotl
+}
+;;
-// *******************************************************************
-// *******************************************************************
-// *******************************************************************
-//
-// Special Code to handle very large argument case.
-// Call int pi_by_2_reduce(&x,&r,&c)
-// for |arguments| >= 2**63
-// (Arg or x) is in f8
-// Address to save r and c as double
-// *******************************************************************
-// *******************************************************************
-// *******************************************************************
+{ .mfi
+ nop.m 999
+(p6) fclass.m p6, p7 = f8, 0x7 // Test for zero (cotl only)
+ nop.i 999
+}
+;;
+
+.pred.rel "mutex", p6, p7
+{ .mfi
+(p6) mov GR_Parameter_Tag = 225 // (cotl)
+(p6) frcpa.s0 f8, p0 = f1, f8 // cotl(+-0) = +-Inf
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p7) fmpy.s0 f8 = f8, f0
+(p7) br.ret.sptk b0
+}
+;;
+
+GLOBAL_IEEE754_END(tanl)
-.proc __libm_callout
-__libm_callout:
-L(TANL_ARG_TOO_LARGE):
+LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
+
+// (1)
{ .mfi
- add r50=-32,sp // Parameter: r address
- nop.f 0
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
- add sp=-64,sp // Create new stack
- nop.f 0
- mov GR_SAVE_GP=gp // Save gp
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
};;
+
+// (2)
{ .mmi
- stfe [r50] = f0,16 // Clear Parameter r on stack
- add r49 = 16,sp // Parameter x address
+ stfe [GR_Parameter_Y] = f1,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0 // Save b0
+ mov GR_SAVE_B0=b0 // Save b0
};;
+
.body
+// (3)
{ .mib
- stfe [r50] = f0,-16 // Clear Parameter c on stack
- nop.i 0
- nop.b 0
+ stfe [GR_Parameter_X] = f10 // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
}
{ .mib
- stfe [r49] = Arg // Store Parameter x on stack
- nop.i 0
-(p0) br.call.sptk b0=__libm_pi_by_2_reduce# ;;
+ stfe [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+// (4)
+{ .mmi
+ ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
+
+
+// *******************************************************************
+// *******************************************************************
+// *******************************************************************
//
-// Load 2^-2
+// Special Code to handle very large argument case.
+// Call int __libm_pi_by_2_reduce(x,r,c) for |arguments| >= 2**63
+// The interface is custom:
+// On input:
+// (Arg or x) is in f8
+// On output:
+// r is in f8
+// c is in f9
+// N is in r8
+// We know also that __libm_pi_by_2_reduce preserves f10-15, f71-127. We
+// use this to eliminate save/restore of key fp registers in this calling
+// function.
//
+// *******************************************************************
+// *******************************************************************
+// *******************************************************************
+
+LOCAL_LIBM_ENTRY(__libm_callout)
+TANL_ARG_TOO_LARGE:
+.prologue
+{ .mfi
+ add table_ptr2 = 144, table_base // Point to 2^-2
+ nop.f 999
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+;;
+
+// Load 2^-2, -2^-2
{ .mmi
-(p0) ldfe Arg =[r49],16
+ ldfps TWO_TO_NEG2, NEGTWO_TO_NEG2 = [table_ptr2]
+ setf.sig B_mask1 = bmask1 // Form mask to get 5 msb of r
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
//
-// Call argument reduction
+// Call argument reduction with x in f8
+// Returns with N in r8, r in f8, c in f9
+// Assumes f71-127 are preserved across the call
//
-(p0) ldfs TWO_TO_NEG2 = [table_ptr2],4
-// Get Arg off stack
-// Get r off stack - hi order part
-// Get c off stack - lo order part
-(p0) mov N_fix_gr = r8 ;;
-}
-{ .mmb
-(p0) ldfe r =[r50],16
-(p0) ldfs NEGTWO_TO_NEG2 = [table_ptr2],4
- nop.b 999 ;;
+{ .mib
+ setf.sig B_mask2 = bmask2 // Form mask to form B from r
+ mov GR_SAVE_GP=gp // Save gp
+ br.call.sptk b0=__libm_pi_by_2_reduce#
}
+;;
+
+//
+// Is |r| < 2**(-2)
+//
{ .mfi
-(p0) ldfe c =[r50],-32
- nop.f 999
- nop.i 999 ;;
+ getf.sig sig_r = r // Extract significand of r
+ fcmp.lt.s1 p6, p0 = r, TWO_TO_NEG2
+ mov gp = GR_SAVE_GP // Restore gp
}
+;;
+
{ .mfi
-.restore sp
- add sp = 64,sp // Restore stack pointer
+ getf.exp exp_r = r // Extract signexp of r
+ nop.f 999
+ mov b0 = GR_SAVE_B0 // Restore return address
+}
+;;
+
//
-// Is |r| < 2**(-2)
+// Get N_fix_gr
//
-(p0) fcmp.lt.unc.s1 p6, p0 = r, TWO_TO_NEG2
-mov b0 = GR_SAVE_B0 // Restore return address
-};;
{ .mfi
- mov gp = GR_SAVE_GP // Restore gp
-(p6) fcmp.gt.unc.s1 p6, p0 = r, NEGTWO_TO_NEG2
- mov ar.pfs = GR_SAVE_PFS // Restore gp
-};;
+ mov N_fix_gr = r8
+(p6) fcmp.gt.unc.s1 p6, p0 = r, NEGTWO_TO_NEG2
+ mov ar.pfs = GR_SAVE_PFS // Restore pfs
+}
+;;
+
{ .mbb
- nop.m 999
-(p6) br.cond.spnt L(TANL_SMALL_R)
-(p0) br.cond.sptk L(TANL_NORMAL_R) ;;
+ nop.m 999
+(p6) br.cond.spnt TANL_SMALL_R // Branch if |r| < 1/4
+ br.cond.sptk TANL_NORMAL_R // Branch if 1/4 <= |r| < pi/4
}
+;;
-.endp __libm_callout
-ASM_SIZE_DIRECTIVE(__libm_callout)
+LOCAL_LIBM_END(__libm_callout)
.type __libm_pi_by_2_reduce#,@function
.global __libm_pi_by_2_reduce#
diff --git a/sysdeps/ia64/fpu/s_trunc.S b/sysdeps/ia64/fpu/s_trunc.S
index 0be91200e3..b9ad03b5a8 100644
--- a/sysdeps/ia64/fpu/s_trunc.S
+++ b/sysdeps/ia64/fpu/s_trunc.S
@@ -1,11 +1,10 @@
.file "trunc.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 7/7/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
-// Bob Norin, Shane Story, and Ping Tak Peter Tang of the
-// Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -21,33 +20,28 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
-.align 32
-.global trunc#
-
-.section .text
-.proc trunc#
-.align 32
-
// History
//==============================================================
-// 7/7/00: Created
+// 07/07/00 Created
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 01/20/03 Improved performance and reduced code size
//==============================================================
// API
@@ -55,25 +49,28 @@
// double trunc(double x)
//==============================================================
-#include "libm_support.h"
+// general input registers:
+// r14 - r18
-// general input registers:
-TRUNC_GR_FFFF = r14
-TRUNC_GR_signexp = r15
-TRUNC_GR_exponent = r16
-TRUNC_GR_expmask = r17
-TRUNC_GR_bigexp = r18
+rExpBias = r14
+rSignexp = r15
+rExp = r16
+rExpMask = r17
+rBigexp = r18
// floating-point registers:
-// f8, f9, f11, f12
+// f8 - f10
+
+fXtruncInt = f9
+fNormX = f10
-// predicate registers used:
-// p6, p7, p8, p9, p10, p11
+// predicate registers used:
+// p6, p7
// Overview of operation
//==============================================================
// double trunc(double x)
-// Return an integer value (represented as a double) less than or
+// Return an integer value (represented as a double) less than or
// equal to x in magnitude.
// This is x rounded toward zero to an integral value.
//==============================================================
@@ -97,105 +94,73 @@ TRUNC_GR_bigexp = r18
// If we multiply by 2^23, we no longer have a fractional part
// So input is an integer value already.
-trunc:
+.section .text
+GLOBAL_LIBM_ENTRY(trunc)
{ .mfi
- getf.exp TRUNC_GR_signexp = f8
- fcvt.fx.trunc.s1 f9 = f8
- addl TRUNC_GR_bigexp = 0x10033, r0
+ getf.exp rSignexp = f8 // Get signexp, recompute if unorm
+ fcvt.fx.trunc.s1 fXtruncInt = f8 // Convert to int in significand
+ addl rBigexp = 0x10033, r0 // Set exponent at which is integer
}
{ .mfi
- mov TRUNC_GR_FFFF = 0x0FFFF
- fnorm.d f11 = f8
- mov TRUNC_GR_expmask = 0x1FFFF
-};;
-// get the exponent of x
-// convert x to integer in signficand of f9
-// Normalize x - this will raise invalid on SNaNs, the
-// denormal operand flag - and possibly a spurious U flag
-// get exponent only mask (will exclude sign bit)
+ mov rExpBias = 0x0FFFF // Form exponent bias
+ fnorm.s1 fNormX = f8 // Normalize input
+ mov rExpMask = 0x1FFFF // Form exponent mask
+}
+;;
{ .mfi
nop.m 0
- fclass.m p7,p8 = f8, 0x0b
+ fclass.m p7,p0 = f8, 0x0b // Test x unorm
nop.i 0
}
-{ .mfi
- nop.m 0
- fcmp.eq.unc.s1 p9,p0 = f8,f0
- nop.i 0
-};;
-// fclass to set p7 if unnorm
-{ .mmi
- and TRUNC_GR_exponent = TRUNC_GR_signexp, TRUNC_GR_expmask ;;
-(p8) cmp.ge.unc p10,p11 = TRUNC_GR_exponent, TRUNC_GR_bigexp
-(p8) cmp.ne.unc p6,p0 = TRUNC_GR_exponent, TRUNC_GR_signexp
-};;
-// Get the exponent of x
-// Test if exponent such that result already an integer
-// Test if x < 0
-{ .mmi
-(p9) cmp.eq.andcm p10,p11 = r0, r0
-(p6) cmp.lt.unc p6,p0 = TRUNC_GR_exponent, TRUNC_GR_FFFF
- nop.i 0
-};;
-// If -1 < x < 0, set p6, turn off p10 and p11, and set result to -0.0
-{ .mfb
-(p6) cmp.eq.andcm p10,p11 = r0, r0
-(p6) fmerge.s f8 = f8, f0
- nop.b 0
-};;
-// If not a unnorm, set p10 if x already is a big int, nan, or inf?
-// If not a unnorm, set p10 if x already is a big int, nan, or inf?
-.pred.rel "mutex",p10,p11
+;;
+
{ .mfb
nop.m 0
-(p11) fcvt.xf f8 = f9
- nop.b 0
+ fclass.m p6,p0 = f8, 0x1e3 // Test x natval, nan, inf
+(p7) br.cond.spnt TRUNC_UNORM // Branch if x unorm
}
+;;
+
+TRUNC_COMMON:
+// Return here from TRUNC_UNORM
{ .mfb
+ and rExp = rSignexp, rExpMask // Get biased exponent
+(p6) fma.d.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf
+(p6) br.ret.spnt b0 // Exit if x natval, nan, inf
+}
+;;
+
+{ .mfi
+ cmp.lt p6,p0 = rExp, rExpBias // Is |x| < 1?
+ fcvt.xf f8 = fXtruncInt // Result, assume 1 <= |x| < 2^52
+ cmp.ge p7,p0 = rExp, rBigexp // Is |x| >= 2^52?
+}
+;;
+
+// We must correct result if |x| < 1, or |x| >= 2^52
+.pred.rel "mutex",p6,p7
+{ .mfi
nop.m 0
-(p10) fma.d.s1 f8 = f11,f1,f0
-(p8) br.ret.sptk b0
-};;
-// If not a unnorm and not an big int, nan,or +/-inf convert signficand
-// back to f8.
-// If not a unorm and a big int, nan, or +/-inf, return fnorm'd x
-// If not a unorm, Return
-// If unnorm, get the exponent again - perhaps it wasn't a denorm.
-{ .mfb
-(p7) getf.exp TRUNC_GR_signexp = f11
-(p7) fcvt.fx.trunc.s1 f12 = f11
- nop.b 0
-};;
-{ .mfb
- and TRUNC_GR_exponent = TRUNC_GR_signexp, TRUNC_GR_expmask
- fcmp.lt.unc.s1 p9,p0 = f8,f0
- nop.b 0
-};;
-{ .mfb
- cmp.ge.unc p10,p11 = TRUNC_GR_exponent, TRUNC_GR_bigexp
- nop.f 0
- nop.b 0
-};;
-// If a unnorm, check to see if value is already a big int.
+(p6) fmerge.s f8 = fNormX, f0 // If |x| < 1, result sgn(x)*0
+ nop.i 0
+}
{ .mfb
- nop.m 0
-(p11) fcvt.xf f8 = f12
- nop.b 0
+ nop.m 0
+(p7) fma.d.s0 f8 = fNormX, f1, f0 // If |x| >= 2^52, result x
+ br.ret.sptk b0 // Exit main path
}
-{ .mfi
- nop.m 0
-(p10) fma.d.s1 f8 = f11,f1,f0
- nop.i 0
-};;
+;;
+
+
+TRUNC_UNORM:
+// Here if x unorm
{ .mfb
- nop.m 0
-(p9) fmerge.ns f8 = f1,f8
- br.ret.sptk b0
-};;
-// If so return it. Otherwise, return (fcvt.xf(fcvt.fx.trunc(x)))
-// Make sure the result is negative if it should be - that is
-// negative(denormal) -> -0.
-.endp trunc
-ASM_SIZE_DIRECTIVE(trunc)
+ getf.exp rSignexp = fNormX // Get signexp, recompute if unorm
+ fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
+ br.cond.sptk TRUNC_COMMON // Return to main path
+}
+;;
+
+GLOBAL_LIBM_END(trunc)
diff --git a/sysdeps/ia64/fpu/s_truncf.S b/sysdeps/ia64/fpu/s_truncf.S
index 0ac4181209..ff40bc7101 100644
--- a/sysdeps/ia64/fpu/s_truncf.S
+++ b/sysdeps/ia64/fpu/s_truncf.S
@@ -1,11 +1,10 @@
.file "truncf.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 7/7/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
-// Bob Norin, Shane Story, and Ping Tak Peter Tang of the
-// Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -21,33 +20,28 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
-.align 32
-.global truncf#
-
-.section .text
-.proc truncf#
-.align 32
-
// History
//==============================================================
-// 7/7/00: Created
+// 07/07/00 Created
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 01/20/03 Improved performance and reduced code size
//==============================================================
// API
@@ -55,25 +49,28 @@
// float truncf(float x)
//==============================================================
-#include "libm_support.h"
+// general input registers:
+// r14 - r18
-// general input registers:
-TRUNC_GR_FFFF = r14
-TRUNC_GR_signexp = r15
-TRUNC_GR_exponent = r16
-TRUNC_GR_expmask = r17
-TRUNC_GR_bigexp = r18
+rExpBias = r14
+rSignexp = r15
+rExp = r16
+rExpMask = r17
+rBigexp = r18
// floating-point registers:
-// f8, f9, f11, f12
+// f8 - f10
+
+fXtruncInt = f9
+fNormX = f10
-// predicate registers used:
-// p6, p7, p8, p9, p10, p11
+// predicate registers used:
+// p6, p7
// Overview of operation
//==============================================================
// float truncf(float x)
-// Return an integer value (represented as a float) less than or
+// Return an integer value (represented as a float) less than or
// equal to x in magnitude.
// This is x rounded toward zero to an integral value.
//==============================================================
@@ -97,105 +94,73 @@ TRUNC_GR_bigexp = r18
// If we multiply by 2^23, we no longer have a fractional part
// So input is an integer value already.
-truncf:
+.section .text
+GLOBAL_LIBM_ENTRY(truncf)
{ .mfi
- getf.exp TRUNC_GR_signexp = f8
- fcvt.fx.trunc.s1 f9 = f8
- addl TRUNC_GR_bigexp = 0x10016, r0
+ getf.exp rSignexp = f8 // Get signexp, recompute if unorm
+ fcvt.fx.trunc.s1 fXtruncInt = f8 // Convert to int in significand
+ addl rBigexp = 0x10016, r0 // Set exponent at which is integer
}
{ .mfi
- mov TRUNC_GR_FFFF = 0x0FFFF
- fnorm.s f11 = f8
- mov TRUNC_GR_expmask = 0x1FFFF
-};;
-// get the exponent of x
-// convert x to integer in signficand of f9
-// Normalize x - this will raise invalid on SNaNs, the
-// denormal operand flag - and possibly a spurious U flag
-// get exponent only mask (will exclude sign bit)
+ mov rExpBias = 0x0FFFF // Form exponent bias
+ fnorm.s1 fNormX = f8 // Normalize input
+ mov rExpMask = 0x1FFFF // Form exponent mask
+}
+;;
{ .mfi
nop.m 0
- fclass.m p7,p8 = f8, 0x0b
+ fclass.m p7,p0 = f8, 0x0b // Test x unorm
nop.i 0
}
-{ .mfi
- nop.m 0
- fcmp.eq.unc.s1 p9,p0 = f8,f0
- nop.i 0
-};;
-// fclass to set p7 if unnorm
-{ .mmi
- and TRUNC_GR_exponent = TRUNC_GR_signexp, TRUNC_GR_expmask ;;
-(p8) cmp.ge.unc p10,p11 = TRUNC_GR_exponent, TRUNC_GR_bigexp
-(p8) cmp.ne.unc p6,p0 = TRUNC_GR_exponent, TRUNC_GR_signexp
-};;
-// Get the exponent of x
-// Test if exponent such that result already an integer
-// Test if x < 0
-{ .mmi
-(p9) cmp.eq.andcm p10,p11 = r0, r0
-(p6) cmp.lt.unc p6,p0 = TRUNC_GR_exponent, TRUNC_GR_FFFF
- nop.i 0
-};;
-// If -1 < x < 0, set p6, turn off p10 and p11, and set result to -0.0
-{ .mfb
-(p6) cmp.eq.andcm p10,p11 = r0, r0
-(p6) fmerge.s f8 = f8, f0
- nop.b 0
-};;
-// If not a unnorm, set p10 if x already is a big int, nan, or inf?
-// If not a unnorm, set p10 if x already is a big int, nan, or inf?
-.pred.rel "mutex",p10,p11
+;;
+
{ .mfb
nop.m 0
-(p11) fcvt.xf f8 = f9
- nop.b 0
+ fclass.m p6,p0 = f8, 0x1e3 // Test x natval, nan, inf
+(p7) br.cond.spnt TRUNC_UNORM // Branch if x unorm
}
+;;
+
+TRUNC_COMMON:
+// Return here from TRUNC_UNORM
{ .mfb
+ and rExp = rSignexp, rExpMask // Get biased exponent
+(p6) fma.s.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf
+(p6) br.ret.spnt b0 // Exit if x natval, nan, inf
+}
+;;
+
+{ .mfi
+ cmp.lt p6,p0 = rExp, rExpBias // Is |x| < 1?
+ fcvt.xf f8 = fXtruncInt // Result, assume 1 <= |x| < 2^23
+ cmp.ge p7,p0 = rExp, rBigexp // Is |x| >= 2^23?
+}
+;;
+
+// We must correct result if |x| < 1, or |x| >= 2^23
+.pred.rel "mutex",p6,p7
+{ .mfi
nop.m 0
-(p10) fma.s.s1 f8 = f11,f1,f0
-(p8) br.ret.sptk b0
-};;
-// If not a unnorm and not an big int, nan,or +/-inf convert signficand
-// back to f8.
-// If not a unorm and a big int, nan, or +/-inf, return fnorm'd x
-// If not a unorm, Return
-// If unnorm, get the exponent again - perhaps it wasn't a denorm.
-{ .mfb
-(p7) getf.exp TRUNC_GR_signexp = f11
-(p7) fcvt.fx.trunc.s1 f12 = f11
- nop.b 0
-};;
-{ .mfb
- and TRUNC_GR_exponent = TRUNC_GR_signexp, TRUNC_GR_expmask
- fcmp.lt.unc.s1 p9,p0 = f8,f0
- nop.b 0
-};;
-{ .mfb
- cmp.ge.unc p10,p11 = TRUNC_GR_exponent, TRUNC_GR_bigexp
- nop.f 0
- nop.b 0
-};;
-// If a unnorm, check to see if value is already a big int.
+(p6) fmerge.s f8 = fNormX, f0 // If |x| < 1, result sgn(x)*0
+ nop.i 0
+}
{ .mfb
- nop.m 0
-(p11) fcvt.xf f8 = f12
- nop.b 0
+ nop.m 0
+(p7) fma.s.s0 f8 = fNormX, f1, f0 // If |x| >= 2^23, result x
+ br.ret.sptk b0 // Exit main path
}
-{ .mfi
- nop.m 0
-(p10) fma.s.s1 f8 = f11,f1,f0
- nop.i 0
-};;
+;;
+
+
+TRUNC_UNORM:
+// Here if x unorm
{ .mfb
- nop.m 0
-(p9) fmerge.ns f8 = f1,f8
- br.ret.sptk b0
-};;
-// If so return it. Otherwise, return (fcvt.xf(fcvt.fx.trunc(x)))
-// Make sure the result is negative if it should be - that is
-// negative(denormal) -> -0.
-.endp truncf
-ASM_SIZE_DIRECTIVE(truncf)
+ getf.exp rSignexp = fNormX // Get signexp, recompute if unorm
+ fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
+ br.cond.sptk TRUNC_COMMON // Return to main path
+}
+;;
+
+GLOBAL_LIBM_END(truncf)
diff --git a/sysdeps/ia64/fpu/s_truncl.S b/sysdeps/ia64/fpu/s_truncl.S
index 91bf96ce90..1afa19ba2b 100644
--- a/sysdeps/ia64/fpu/s_truncl.S
+++ b/sysdeps/ia64/fpu/s_truncl.S
@@ -1,11 +1,10 @@
.file "truncl.s"
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
-//
-// Contributed 7/7/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
-// Bob Norin, Shane Story, and Ping Tak Peter Tang of the
-// Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -21,59 +20,57 @@
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
-.align 32
-.global truncl#
-
-.section .text
-.proc truncl#
-.align 32
-
// History
//==============================================================
-// 7/7/00: Created
+// 07/07/00 Created
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 01/20/03 Improved performance and reduced code size
//==============================================================
// API
//==============================================================
-// long double truncl(float x)
+// long double truncl(long double x)
//==============================================================
-#include "libm_support.h"
+// general input registers:
+// r14 - r18
-// general input registers:
-TRUNC_GR_FFFF = r14
-TRUNC_GR_signexp = r15
-TRUNC_GR_exponent = r16
-TRUNC_GR_expmask = r17
-TRUNC_GR_bigexp = r18
+rExpBias = r14
+rSignexp = r15
+rExp = r16
+rExpMask = r17
+rBigexp = r18
// floating-point registers:
-// f8, f9, f11, f12
+// f8 - f10
-// predicate registers used:
-// p6, p7, p8, p9, p10, p11
+fXtruncInt = f9
+fNormX = f10
+
+// predicate registers used:
+// p6, p7
// Overview of operation
//==============================================================
// long double truncl(long double x)
-// Return an integer value (represented as a long double) less than or
+// Return an integer value (represented as a long double) less than or
// equal to x in magnitude.
// This is x rounded toward zero to an integral value.
//==============================================================
@@ -97,105 +94,73 @@ TRUNC_GR_bigexp = r18
// If we multiply by 2^23, we no longer have a fractional part
// So input is an integer value already.
-truncl:
+.section .text
+GLOBAL_LIBM_ENTRY(truncl)
{ .mfi
- getf.exp TRUNC_GR_signexp = f8
- fcvt.fx.trunc.s1 f9 = f8
- addl TRUNC_GR_bigexp = 0x1003e, r0
+ getf.exp rSignexp = f8 // Get signexp, recompute if unorm
+ fcvt.fx.trunc.s1 fXtruncInt = f8 // Convert to int in significand
+ addl rBigexp = 0x1003e, r0 // Set exponent at which is integer
}
{ .mfi
- mov TRUNC_GR_FFFF = 0x0FFFF
- fnorm f11 = f8
- mov TRUNC_GR_expmask = 0x1FFFF
-};;
-// get the exponent of x
-// convert x to integer in signficand of f9
-// Normalize x - this will raise invalid on SNaNs, the
-// denormal operand flag - and possibly a spurious U flag
-// get exponent only mask (will exclude sign bit)
+ mov rExpBias = 0x0FFFF // Form exponent bias
+ fnorm.s1 fNormX = f8 // Normalize input
+ mov rExpMask = 0x1FFFF // Form exponent mask
+}
+;;
{ .mfi
nop.m 0
- fclass.m p7,p8 = f8, 0x0b
+ fclass.m p7,p0 = f8, 0x0b // Test x unorm
nop.i 0
}
-{ .mfi
- nop.m 0
- fcmp.eq.unc.s1 p9,p0 = f8,f0
- nop.i 0
-};;
-// fclass to set p7 if unnorm
-{ .mmi
- and TRUNC_GR_exponent = TRUNC_GR_signexp, TRUNC_GR_expmask ;;
-(p8) cmp.ge.unc p10,p11 = TRUNC_GR_exponent, TRUNC_GR_bigexp
-(p8) cmp.ne.unc p6,p0 = TRUNC_GR_exponent, TRUNC_GR_signexp
-};;
-// Get the exponent of x
-// Test if exponent such that result already an integer
-// Test if x < 0
-{ .mmi
-(p9) cmp.eq.andcm p10,p11 = r0, r0
-(p6) cmp.lt.unc p6,p0 = TRUNC_GR_exponent, TRUNC_GR_FFFF
- nop.i 0
-};;
-// If -1 < x < 0, set p6, turn off p10 and p11, and set result to -0.0
-{ .mfb
-(p6) cmp.eq.andcm p10,p11 = r0, r0
-(p6) fmerge.s f8 = f8, f0
- nop.b 0
-};;
-// If not a unnorm, set p10 if x already is a big int, nan, or inf?
-// If not a unnorm, set p10 if x already is a big int, nan, or inf?
-.pred.rel "mutex",p10,p11
+;;
+
{ .mfb
nop.m 0
-(p11) fcvt.xf f8 = f9
- nop.b 0
+ fclass.m p6,p0 = f8, 0x1e3 // Test x natval, nan, inf
+(p7) br.cond.spnt TRUNC_UNORM // Branch if x unorm
}
+;;
+
+TRUNC_COMMON:
+// Return here from TRUNC_UNORM
{ .mfb
+ and rExp = rSignexp, rExpMask // Get biased exponent
+(p6) fma.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf
+(p6) br.ret.spnt b0 // Exit if x natval, nan, inf
+}
+;;
+
+{ .mfi
+ cmp.lt p6,p0 = rExp, rExpBias // Is |x| < 1?
+ fcvt.xf f8 = fXtruncInt // Result, assume 1 <= |x| < 2^63
+ cmp.ge p7,p0 = rExp, rBigexp // Is |x| >= 2^63?
+}
+;;
+
+// We must correct result if |x| < 1, or |x| >= 2^63
+.pred.rel "mutex",p6,p7
+{ .mfi
nop.m 0
-(p10) fma.s1 f8 = f11,f1,f0
-(p8) br.ret.sptk b0
-};;
-// If not a unnorm and not an big int, nan,or +/-inf convert signficand
-// back to f8.
-// If not a unorm and a big int, nan, or +/-inf, return fnorm'd x
-// If not a unorm, Return
-// If unnorm, get the exponent again - perhaps it wasn't a denorm.
-{ .mfb
-(p7) getf.exp TRUNC_GR_signexp = f11
-(p7) fcvt.fx.trunc.s1 f12 = f11
- nop.b 0
-};;
-{ .mfb
- and TRUNC_GR_exponent = TRUNC_GR_signexp, TRUNC_GR_expmask
- fcmp.lt.unc.s1 p9,p0 = f8,f0
- nop.b 0
-};;
-{ .mfb
- cmp.ge.unc p10,p11 = TRUNC_GR_exponent, TRUNC_GR_bigexp
- nop.f 0
- nop.b 0
-};;
-// If a unnorm, check to see if value is already a big int.
+(p6) fmerge.s f8 = fNormX, f0 // If |x| < 1, result sgn(x)*0
+ nop.i 0
+}
{ .mfb
- nop.m 0
-(p11) fcvt.xf f8 = f12
- nop.b 0
+ nop.m 0
+(p7) fma.s0 f8 = fNormX, f1, f0 // If |x| >= 2^63, result x
+ br.ret.sptk b0 // Exit main path
}
-{ .mfi
- nop.m 0
-(p10) fma.s1 f8 = f11,f1,f0
- nop.i 0
-};;
+;;
+
+
+TRUNC_UNORM:
+// Here if x unorm
{ .mfb
- nop.m 0
-(p9) fmerge.ns f8 = f1,f8
- br.ret.sptk b0
-};;
-// If so return it. Otherwise, return (fcvt.xf(fcvt.fx.trunc(x)))
-// Make sure the result is negative if it should be - that is
-// negative(denormal) -> -0.
-.endp truncl
-ASM_SIZE_DIRECTIVE(truncl)
+ getf.exp rSignexp = fNormX // Get signexp, recompute if unorm
+ fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
+ br.cond.sptk TRUNC_COMMON // Return to main path
+}
+;;
+
+GLOBAL_LIBM_END(truncl)
diff --git a/sysdeps/mips/Makefile b/sysdeps/mips/Makefile
index 849785a550..49ad3e1b91 100644
--- a/sysdeps/mips/Makefile
+++ b/sysdeps/mips/Makefile
@@ -6,3 +6,7 @@ endif
ifeq ($(subdir),setjmp)
sysdep_routines += setjmp_aux
endif
+
+ifeq ($(subdir),rt)
+librt-sysdep_routines += rt-sysdep
+endif
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_ceil.S b/sysdeps/powerpc/powerpc32/fpu/s_ceil.S
index d211314bbf..22cf76e54c 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_ceil.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_ceil.S
@@ -26,20 +26,12 @@
TWO52.0:
.long 0x43300000
.long 0
- .type NEGZERO.0,@object
- .size NEGZERO.0,8
-NEGZERO.0:
- .long 0x80000000
- .long 0
.section .rodata.cst8,"aM",@progbits,8
.align 3
.LC0: /* 2**52 */
.long 0x43300000
.long 0
-.LC1: /* -0.0 */
- .long 0x80000000
- .long 0
.section ".text"
ENTRY (__ceil)
@@ -64,27 +56,18 @@ ENTRY (__ceil)
ble- cr6,.L4
fadd fp1,fp1,fp13 /* x+= TWO52; */
fsub fp1,fp1,fp13 /* x-= TWO52; */
-.L9:
+ fabs fp1,fp1 /* if (x == 0.0) */
+ /* x = 0.0; */
mtfsf 0x01,fp11 /* restore previous rounding mode. */
blr
.L4:
bge- cr6,.L9 /* if (x < 0.0) */
fsub fp1,fp1,fp13 /* x-= TWO52; */
fadd fp1,fp1,fp13 /* x+= TWO52; */
- fcmpu cr5,fp1,fp12 /* if (x > 0.0) */
+ fnabs fp1,fp1 /* if (x == 0.0) */
+ /* x = -0.0; */
+.L9:
mtfsf 0x01,fp11 /* restore previous rounding mode. */
- bnelr+ cr5
-#ifdef SHARED
- mflr r11
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r10
- lwz r9,.LC1@got(10)
- mtlr r11
- lfd fp1,0(r9)
-#else
- lis r9,.LC1@ha
- lfd fp1,.LC1@l(r9)
-#endif
blr
END (__ceil)
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_ceilf.S b/sysdeps/powerpc/powerpc32/fpu/s_ceilf.S
index 4439dc2338..e7a72186c9 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_ceilf.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_ceilf.S
@@ -20,26 +20,16 @@
#include <sysdep.h>
.section .rodata
- .align 3
- .type TWO52.0,@object
- .size TWO52.0,8
-TWO52.0:
- .long 0x43300000
- .long 0
- .type NEGZERO.0,@object
- .size NEGZERO.0,8
-NEGZERO.0:
- .long 0x80000000
- .long 0
+ .align 2
+ .type TWO23.0,@object
+ .size TWO23.0,4
+TWO23.0:
+ .long 0x4b000000
- .section .rodata.cst8,"aM",@progbits,8
- .align 3
+ .section .rodata.cst4,"aM",@progbits,4
+ .align 2
.LC0: /* 2**23 */
- .long 0x41600000
- .long 0
-.LC1: /* -0.0 */
- .long 0x80000000
- .long 0
+ .long 0x4b000000
.section ".text"
ENTRY (__ceilf)
@@ -50,10 +40,10 @@ ENTRY (__ceilf)
mflr r10
lwz r9,.LC0@got(10)
mtlr r11
- lfd fp13,0(r9)
+ lfs fp13,0(r9)
#else
lis r9,.LC0@ha
- lfd fp13,.LC0@l(r9)
+ lfs fp13,.LC0@l(r9)
#endif
fabs fp0,fp1
fsubs fp12,fp13,fp13 /* generate 0.0 */
@@ -64,27 +54,18 @@ ENTRY (__ceilf)
ble- cr6,.L4
fadds fp1,fp1,fp13 /* x+= TWO23; */
fsubs fp1,fp1,fp13 /* x-= TWO23; */
-.L9:
+ fabs fp1,fp1 /* if (x == 0.0) */
+ /* x = 0.0; */
mtfsf 0x01,fp11 /* restore previous rounding mode. */
blr
.L4:
bge- cr6,.L9 /* if (x < 0.0) */
fsubs fp1,fp1,fp13 /* x-= TWO23; */
fadds fp1,fp1,fp13 /* x+= TWO23; */
- fcmpu cr5,fp1,fp12 /* if (x > 0.0) */
+ fnabs fp1,fp1 /* if (x == 0.0) */
+ /* x = -0.0; */
+.L9:
mtfsf 0x01,fp11 /* restore previous rounding mode. */
- bnelr+ cr5
-#ifdef SHARED
- mflr r11
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r10
- lwz r9,.LC1@got(10)
- mtlr r11
- lfd fp1,0(r9)
-#else
- lis r9,.LC1@ha
- lfd fp1,.LC1@l(r9)
-#endif
blr
END (__ceilf)
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_floor.S b/sysdeps/powerpc/powerpc32/fpu/s_floor.S
index 143f907b18..812ea7ced2 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_floor.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_floor.S
@@ -56,15 +56,16 @@ ENTRY (__floor)
ble- cr6,.L4
fadd fp1,fp1,fp13 /* x+= TWO52; */
fsub fp1,fp1,fp13 /* x-= TWO52; */
- fcmpu cr5,fp1,fp12 /* if (x > 0.0) */
+ fabs fp1,fp1 /* if (x == 0.0) */
+ /* x = 0.0; */
mtfsf 0x01,fp11 /* restore previous rounding mode. */
- bnelr+ cr5
- fmr fp1,fp12 /* x must be +0.0 for the 0.0 case. */
blr
.L4:
bge- cr6,.L9 /* if (x < 0.0) */
fsub fp1,fp1,fp13 /* x-= TWO52; */
fadd fp1,fp1,fp13 /* x+= TWO52; */
+ fnabs fp1,fp1 /* if (x == 0.0) */
+ /* x = -0.0; */
.L9:
mtfsf 0x01,fp11 /* restore previous rounding mode. */
blr
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_floorf.S b/sysdeps/powerpc/powerpc32/fpu/s_floorf.S
index 154bc30ff7..ead41d4657 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_floorf.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_floorf.S
@@ -20,18 +20,16 @@
#include <sysdep.h>
.section .rodata
- .align 3
+ .align 2
.type TWO23.0,@object
- .size TWO23.0,8
+ .size TWO23.0,4
TWO23.0:
- .long 0x41600000
- .long 0
+ .long 0x4b000000
- .section .rodata.cst8,"aM",@progbits,8
- .align 3
+ .section .rodata.cst4,"aM",@progbits,4
+ .align 2
.LC0: /* 2**23 */
- .long 0x41600000
- .long 0
+ .long 0x4b000000
.section ".text"
ENTRY (__floorf)
@@ -42,10 +40,10 @@ ENTRY (__floorf)
mflr r10
lwz r9,.LC0@got(10)
mtlr r11
- lfd fp13,0(r9)
+ lfs fp13,0(r9)
#else
lis r9,.LC0@ha
- lfd fp13,.LC0@l(r9)
+ lfs fp13,.LC0@l(r9)
#endif
fabs fp0,fp1
fsubs fp12,fp13,fp13 /* generate 0.0 */
@@ -56,15 +54,16 @@ ENTRY (__floorf)
ble- cr6,.L4
fadds fp1,fp1,fp13 /* x+= TWO23; */
fsubs fp1,fp1,fp13 /* x-= TWO23; */
- fcmpu cr5,fp1,fp12 /* if (x > 0.0) */
+ fabs fp1,fp1 /* if (x == 0.0) */
+ /* x = 0.0; */
mtfsf 0x01,fp11 /* restore previous rounding mode. */
- bnelr+ cr5
- fmr fp1,fp12 /* x must be +0.0 for the 0.0 case. */
blr
.L4:
bge- cr6,.L9 /* if (x < 0.0) */
fsubs fp1,fp1,fp13 /* x-= TWO23; */
fadds fp1,fp1,fp13 /* x+= TWO23; */
+ fnabs fp1,fp1 /* if (x == 0.0) */
+ /* x = -0.0; */
.L9:
mtfsf 0x01,fp11 /* restore previous rounding mode. */
blr
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_rint.S b/sysdeps/powerpc/powerpc32/fpu/s_rint.S
index dee25f204f..fa02dbc59c 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_rint.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_rint.S
@@ -57,13 +57,14 @@ ENTRY (__rint)
bng- cr6,.L4
fadd fp1,fp1,fp13 /* x+= TWO52; */
fsub fp1,fp1,fp13 /* x-= TWO52; */
- blr
+ fabs fp1,fp1 /* if (x == 0.0) */
+ blr /* x = 0.0; */
.L4:
bnllr- cr6 /* if (x < 0.0) */
- fsub fp1,fp13,fp1 /* x = TWO52 - x; */
- fsub fp0,fp1,fp13 /* x = - (x - TWO52); */
- fneg fp1,fp0
- blr
+ fsub fp1,fp1,fp13 /* x-= TWO52; */
+ fadd fp1,fp1,fp13 /* x+= TWO52; */
+ fnabs fp1,fp1 /* if (x == 0.0) */
+ blr /* x = -0.0; */
END (__rint)
weak_alias (__rint, rint)
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_rintf.S b/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
index cebf6423af..7825951268 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
@@ -21,18 +21,16 @@
.section .rodata
- .align 3
+ .align 2
.type TWO23.0,@object
- .size TWO23.0,8
+ .size TWO23.0,4
TWO23.0:
- .long 0x41600000
- .long 0
+ .long 0x4b000000
- .section .rodata.cst8,"aM",@progbits,8
- .align 3
+ .section .rodata.cst4,"aM",@progbits,4
+ .align 2
.LC0: /* 2**23 */
- .long 0x41600000
- .long 0
+ .long 0x4b000000
.section ".text"
ENTRY (__rintf)
@@ -42,10 +40,10 @@ ENTRY (__rintf)
mflr r10
lwz r9,.LC0@got(10)
mtlr r11
- lfd fp13,0(r9)
+ lfs fp13,0(r9)
#else
lis r9,.LC0@ha
- lfd fp13,.LC0@l(r9)
+ lfs fp13,.LC0@l(r9)
#endif
fabs fp0,fp1
fsubs fp12,fp13,fp13 /* generate 0.0 */
@@ -55,13 +53,14 @@ ENTRY (__rintf)
bng- cr6,.L4
fadds fp1,fp1,fp13 /* x+= TWO23; */
fsubs fp1,fp1,fp13 /* x-= TWO23; */
- blr
+ fabs fp1,fp1 /* if (x == 0.0) */
+ blr /* x = 0.0; */
.L4:
bnllr- cr6 /* if (x < 0.0) */
- fsubs fp1,fp13,fp1 /* x = TWO23 - x; */
- fsubs fp0,fp1,fp13 /* x = - (x - TWO23); */
- fneg fp1,fp0
- blr
+ fsubs fp1,fp1,fp13 /* x-= TWO23; */
+ fadds fp1,fp1,fp13 /* x+= TWO23; */
+ fnabs fp1,fp1 /* if (x == 0.0) */
+ blr /* x = -0.0; */
END (__rintf)
weak_alias (__rintf, rintf)
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_round.S b/sysdeps/powerpc/powerpc32/fpu/s_round.S
index 13fc74f001..39eab232f6 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_round.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_round.S
@@ -31,11 +31,6 @@ TWO52.0:
POINTFIVE.0:
.long 0x3fe00000
.long 0
- .type NEGZERO.0,@object
- .size NEGZERO.0,8
-NEGZERO.0:
- .long 0x80000000
- .long 0
.section .rodata.cst8,"aM",@progbits,8
.align 3
@@ -45,9 +40,6 @@ NEGZERO.0:
.LC1: /* 0.5 */
.long 0x3fe00000
.long 0
-.LC2: /* -0.0 */
- .long 0x80000000
- .long 0
/* double [fp1] round (double x [fp1])
IEEE 1003.1 round function. IEEE specifies "round to the nearest
@@ -89,7 +81,8 @@ ENTRY (__round)
fadd fp1,fp1,fp10 /* x+= 0.5; */
fadd fp1,fp1,fp13 /* x+= TWO52; */
fsub fp1,fp1,fp13 /* x-= TWO52; */
-.L9:
+ fabs fp1,fp1 /* if (x == 0.0) */
+ /* x = 0.0; */
mtfsf 0x01,fp11 /* restore previous rounding mode. */
blr
.L4:
@@ -97,16 +90,10 @@ ENTRY (__round)
bge- cr6,.L9 /* if (x < 0.0) */
fsub fp1,fp9,fp13 /* x-= TWO52; */
fadd fp1,fp1,fp13 /* x+= TWO52; */
- fcmpu cr5,fp1,fp12 /* if (x > 0.0) */
+ fnabs fp1,fp1 /* if (x == 0.0) */
+ /* x = -0.0; */
+.L9:
mtfsf 0x01,fp11 /* restore previous rounding mode. */
- bnelr+ cr5
-#ifdef SHARED
- lwz r9,.LC2@got(10)
- lfd fp1,0(r9)
-#else
- lis r9,.LC2@ha
- lfd fp1,.LC2@l(r9)
-#endif
blr
END (__round)
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_roundf.S b/sysdeps/powerpc/powerpc32/fpu/s_roundf.S
index ea8aaf3add..a9b42f0170 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_roundf.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_roundf.S
@@ -20,34 +20,22 @@
#include <sysdep.h>
.section .rodata
- .align 3
+ .align 2
.type TWO23.0,@object
- .size TWO23.0,8
+ .size TWO23.0,4
TWO23.0:
- .long 0x43300000
- .long 0
+ .long 0x4b000000
.type POINTFIVE.0,@object
- .size POINTFIVE.0,8
+ .size POINTFIVE.0,4
POINTFIVE.0:
- .long 0x3fe00000
- .long 0
- .type NEGZERO.0,@object
- .size NEGZERO.0,8
-NEGZERO.0:
- .long 0x80000000
- .long 0
+ .long 0x3f000000
- .section .rodata.cst8,"aM",@progbits,8
- .align 3
+ .section .rodata.cst4,"aM",@progbits,4
+ .align 2
.LC0: /* 2**23 */
- .long 0x41600000
- .long 0
+ .long 0x4b000000
.LC1: /* 0.5 */
- .long 0x3fe00000
- .long 0
-.LC2: /* -0.0 */
- .long 0x80000000
- .long 0
+ .long 0x3f000000
/* float [fp1] roundf (float x [fp1])
IEEE 1003.1 round function. IEEE specifies "round to the nearest
@@ -67,10 +55,10 @@ ENTRY (__roundf )
mflr r10
lwz r9,.LC0@got(10)
mtlr r11
- lfd fp13,0(r9)
+ lfs fp13,0(r9)
#else
lis r9,.LC0@ha
- lfd fp13,.LC0@l(r9)
+ lfs fp13,.LC0@l(r9)
#endif
fabs fp0,fp1
fsubs fp12,fp13,fp13 /* generate 0.0 */
@@ -80,16 +68,17 @@ ENTRY (__roundf )
mtfsfi 7,1 /* Set rounding mode toward 0. */
#ifdef SHARED
lwz r9,.LC1@got(10)
- lfd fp10,0(r9)
+ lfs fp10,0(r9)
#else
lis r9,.LC1@ha
- lfd fp10,.LC1@l(r9)
+ lfs fp10,.LC1@l(r9)
#endif
ble- cr6,.L4
fadds fp1,fp1,fp10 /* x+= 0.5; */
fadds fp1,fp1,fp13 /* x+= TWO23; */
fsubs fp1,fp1,fp13 /* x-= TWO23; */
-.L9:
+ fabs fp1,fp1 /* if (x == 0.0) */
+ /* x = 0.0; */
mtfsf 0x01,fp11 /* restore previous rounding mode. */
blr
.L4:
@@ -97,16 +86,10 @@ ENTRY (__roundf )
bge- cr6,.L9 /* if (x < 0.0) */
fsubs fp1,fp9,fp13 /* x-= TWO23; */
fadds fp1,fp1,fp13 /* x+= TWO23; */
- fcmpu cr5,fp1,fp12 /* if (x > 0.0) */
+ fnabs fp1,fp1 /* if (x == 0.0) */
+ /* x = -0.0; */
+.L9:
mtfsf 0x01,fp11 /* restore previous rounding mode. */
- bnelr+ cr5
-#ifdef SHARED
- lwz r9,.LC2@got(10)
- lfd fp1,0(r9)
-#else
- lis r9,.LC2@ha
- lfd fp1,.LC2@l(r9)
-#endif
blr
END (__roundf)
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_trunc.S b/sysdeps/powerpc/powerpc32/fpu/s_trunc.S
index a4be651f8c..08acc00cb2 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_trunc.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_trunc.S
@@ -26,20 +26,12 @@
TWO52.0:
.long 0x43300000
.long 0
- .type NEGZERO.0,@object
- .size NEGZERO.0,8
-NEGZERO.0:
- .long 0x80000000
- .long 0
.section .rodata.cst8,"aM",@progbits,8
.align 3
.LC0: /* 2**52 */
.long 0x43300000
.long 0
-.LC1: /* -0.0 */
- .long 0x80000000
- .long 0
/* double [fp1] trunc (double x [fp1])
IEEE 1003.1 trunc function. IEEE specifies "trunc to the integer
@@ -70,23 +62,18 @@ ENTRY (__trunc)
ble- cr6,.L4
fadd fp1,fp1,fp13 /* x+= TWO52; */
fsub fp1,fp1,fp13 /* x-= TWO52; */
-.L9:
+ fabs fp1,fp1 /* if (x == 0.0) */
+ /* x = 0.0; */
mtfsf 0x01,fp11 /* restore previous truncing mode. */
blr
.L4:
bge- cr6,.L9 /* if (x < 0.0) */
fsub fp1,fp1,fp13 /* x-= TWO52; */
fadd fp1,fp1,fp13 /* x+= TWO52; */
- fcmpu cr5,fp1,fp12 /* if (x > 0.0) */
+ fnabs fp1,fp1 /* if (x == 0.0) */
+ /* x = -0.0; */
+.L9:
mtfsf 0x01,fp11 /* restore previous rounding mode. */
- bnelr+ cr5
-#ifdef SHARED
- lwz r9,.LC1@got(10)
- lfd fp1,0(r9)
-#else
- lis r9,.LC1@ha
- lfd fp1,.LC1@l(r9)
-#endif
blr
END (__trunc)
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_truncf.S b/sysdeps/powerpc/powerpc32/fpu/s_truncf.S
index 9a8dae931b..3b6fe731b4 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_truncf.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_truncf.S
@@ -20,26 +20,16 @@
#include <sysdep.h>
.section .rodata
- .align 3
+ .align 2
.type TWO23.0,@object
- .size TWO23.0,8
+ .size TWO23.0,2
TWO23.0:
- .long 0x41600000
- .long 0
- .type NEGZERO.0,@object
- .size NEGZERO.0,8
-NEGZERO.0:
- .long 0x80000000
- .long 0
+ .long 0x4b000000
- .section .rodata.cst8,"aM",@progbits,8
- .align 3
+ .section .rodata.cst4,"aM",@progbits,4
+ .align 2
.LC0: /* 2**23 */
- .long 0x41600000
- .long 0
-.LC1: /* -0.0 */
- .long 0x80000000
- .long 0
+ .long 0x4b000000
/* float [fp1] truncf (float x [fp1])
IEEE 1003.1 trunc function. IEEE specifies "trunc to the integer
@@ -56,10 +46,10 @@ ENTRY (__truncf)
mflr r10
lwz r9,.LC0@got(10)
mtlr r11
- lfd fp13,0(r9)
+ lfs fp13,0(r9)
#else
lis r9,.LC0@ha
- lfd fp13,.LC0@l(r9)
+ lfs fp13,.LC0@l(r9)
#endif
fabs fp0,fp1
fsubs fp12,fp13,fp13 /* generate 0.0 */
@@ -70,23 +60,18 @@ ENTRY (__truncf)
ble- cr6,.L4
fadds fp1,fp1,fp13 /* x+= TWO23; */
fsubs fp1,fp1,fp13 /* x-= TWO23; */
-.L9:
+ fabs fp1,fp1 /* if (x == 0.0) */
+ /* x = 0.0; */
mtfsf 0x01,fp11 /* restore previous truncing mode. */
blr
.L4:
bge- cr6,.L9 /* if (x < 0.0) */
fsubs fp1,fp1,fp13 /* x-= TWO23; */
fadds fp1,fp1,fp13 /* x+= TWO23; */
- fcmpu cr5,fp1,fp12 /* if (x > 0.0) */
+ fnabs fp1,fp1 /* if (x == 0.0) */
+ /* x = -0.0; */
+.L9:
mtfsf 0x01,fp11 /* restore previous rounding mode. */
- bnelr+ cr5
-#ifdef SHARED
- lwz r9,.LC1@got(10)
- lfd fp1,0(r9)
-#else
- lis r9,.LC1@ha
- lfd fp1,.LC1@l(r9)
-#endif
blr
END (__truncf)
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_ceil.S b/sysdeps/powerpc/powerpc64/fpu/s_ceil.S
index a1bfaa70c2..9809e24d26 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_ceil.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_ceil.S
@@ -22,11 +22,9 @@
.section ".toc","aw"
.LC0: /* 2**52 */
.tc FD_43300000_0[TC],0x4330000000000000
-.LC1: /* -0.0 */
- .tc FD_80000000_0[TC],0x8000000000000000
.section ".text"
-ENTRY (__ceil)
+EALIGN (__ceil, 4, 0)
CALL_MCOUNT 0
mffs fp11 /* Save current FPU rounding mode. */
lfd fp13,.LC0@toc(2)
@@ -39,17 +37,18 @@ ENTRY (__ceil)
ble- cr6,.L4
fadd fp1,fp1,fp13 /* x+= TWO52; */
fsub fp1,fp1,fp13 /* x-= TWO52; */
-.L9:
+ fabs fp1,fp1 /* if (x == 0.0) */
+ /* x = 0.0; */
mtfsf 0x01,fp11 /* restore previous rounding mode. */
blr
.L4:
bge- cr6,.L9 /* if (x < 0.0) */
fsub fp1,fp1,fp13 /* x-= TWO52; */
fadd fp1,fp1,fp13 /* x+= TWO52; */
- fcmpu cr5,fp1,fp12 /* if (x > 0.0) */
+ fnabs fp1,fp1 /* if (x == 0.0) */
+ /* x = -0.0; */
+.L9:
mtfsf 0x01,fp11 /* restore previous rounding mode. */
- bnelr+ cr5
- lfd fp1,.LC1@toc(2) /* x must be -0.0 for the 0.0 case. */
blr
END (__ceil)
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S b/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S
index 42eb274389..1ccd133b66 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S
@@ -21,15 +21,13 @@
.section ".toc","aw"
.LC0: /* 2**23 */
- .tc FD_41600000_0[TC],0x4160000000000000
-.LC1: /* -0.0 */
- .tc FD_80000000_0[TC],0x8000000000000000
+ .tc FD_4b000000_0[TC],0x4b00000000000000
.section ".text"
-ENTRY (__ceilf)
+EALIGN (__ceilf, 4, 0)
CALL_MCOUNT 0
mffs fp11 /* Save current FPU rounding mode. */
- lfd fp13,.LC0@toc(2)
+ lfs fp13,.LC0@toc(2)
fabs fp0,fp1
fsubs fp12,fp13,fp13 /* generate 0.0 */
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */
@@ -39,17 +37,18 @@ ENTRY (__ceilf)
ble- cr6,.L4
fadds fp1,fp1,fp13 /* x+= TWO23; */
fsubs fp1,fp1,fp13 /* x-= TWO23; */
-.L9:
+ fabs fp1,fp1 /* if (x == 0.0) */
+ /* x = 0.0; */
mtfsf 0x01,fp11 /* restore previous rounding mode. */
blr
.L4:
bge- cr6,.L9 /* if (x < 0.0) */
fsubs fp1,fp1,fp13 /* x-= TWO23; */
fadds fp1,fp1,fp13 /* x+= TWO23; */
- fcmpu cr5,fp1,fp12 /* if (x > 0.0) */
+ fnabs fp1,fp1 /* if (x == 0.0) */
+ /* x = -0.0; */
+.L9:
mtfsf 0x01,fp11 /* restore previous rounding mode. */
- bnelr+ cr5
- lfd fp1,.LC1@toc(2) /* x must be -0.0 for the 0.0 case. */
blr
END (__ceilf)
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_floor.S b/sysdeps/powerpc/powerpc64/fpu/s_floor.S
index 80cbdc5709..183423c2b3 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_floor.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_floor.S
@@ -24,7 +24,7 @@
.tc FD_43300000_0[TC],0x4330000000000000
.section ".text"
-ENTRY (__floor)
+EALIGN (__floor, 4, 0)
CALL_MCOUNT 0
mffs fp11 /* Save current FPU rounding mode. */
lfd fp13,.LC0@toc(2)
@@ -37,15 +37,16 @@ ENTRY (__floor)
ble- cr6,.L4
fadd fp1,fp1,fp13 /* x+= TWO52; */
fsub fp1,fp1,fp13 /* x-= TWO52; */
- fcmpu cr5,fp1,fp12 /* if (x > 0.0) */
+ fabs fp1,fp1 /* if (x == 0.0) */
+ /* x = 0.0; */
mtfsf 0x01,fp11 /* restore previous rounding mode. */
- bnelr+ cr5
- fmr fp1,fp12 /* x must be +0.0 for the 0.0 case. */
blr
.L4:
bge- cr6,.L9 /* if (x < 0.0) */
fsub fp1,fp1,fp13 /* x-= TWO52; */
fadd fp1,fp1,fp13 /* x+= TWO52; */
+ fnabs fp1,fp1 /* if (x == 0.0) */
+ /* x = -0.0; */
.L9:
mtfsf 0x01,fp11 /* restore previous rounding mode. */
blr
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_floorf.S b/sysdeps/powerpc/powerpc64/fpu/s_floorf.S
index 20cbb15ebd..bcdbf7823d 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_floorf.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_floorf.S
@@ -21,13 +21,13 @@
.section ".toc","aw"
.LC0: /* 2**23 */
- .tc FD_41600000_0[TC],0x4160000000000000
+ .tc FD_4b000000_0[TC],0x4b00000000000000
.section ".text"
-ENTRY (__floorf)
+EALIGN (__floorf, 4, 0)
CALL_MCOUNT 0
mffs fp11 /* Save current FPU rounding mode. */
- lfd fp13,.LC0@toc(2)
+ lfs fp13,.LC0@toc(2)
fabs fp0,fp1
fsubs fp12,fp13,fp13 /* generate 0.0 */
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */
@@ -37,15 +37,16 @@ ENTRY (__floorf)
ble- cr6,.L4
fadds fp1,fp1,fp13 /* x+= TWO23; */
fsubs fp1,fp1,fp13 /* x-= TWO23; */
- fcmpu cr5,fp1,fp12 /* if (x > 0.0) */
+ fabs fp1,fp1 /* if (x == 0.0) */
+ /* x = 0.0; */
mtfsf 0x01,fp11 /* restore previous rounding mode. */
- bnelr+ cr5
- fmr fp1,fp12 /* x must be +0.0 for the 0.0 case. */
blr
.L4:
bge- cr6,.L9 /* if (x < 0.0) */
fsubs fp1,fp1,fp13 /* x-= TWO23; */
fadds fp1,fp1,fp13 /* x+= TWO23; */
+ fnabs fp1,fp1 /* if (x == 0.0) */
+ /* x = -0.0; */
.L9:
mtfsf 0x01,fp11 /* restore previous rounding mode. */
blr
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_rint.S b/sysdeps/powerpc/powerpc64/fpu/s_rint.S
index 79e807269d..0c0e0ba67b 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_rint.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_rint.S
@@ -27,7 +27,7 @@
.tc FD_43300000_0[TC],0x4330000000000000
.section ".text"
-ENTRY (__rint)
+EALIGN (__rint, 4, 0)
CALL_MCOUNT 0
lfd fp13,.LC0@toc(2)
fabs fp0,fp1
@@ -38,13 +38,14 @@ ENTRY (__rint)
bng- cr6,.L4
fadd fp1,fp1,fp13 /* x+= TWO52; */
fsub fp1,fp1,fp13 /* x-= TWO52; */
- blr
+ fabs fp1,fp1 /* if (x == 0.0) */
+ blr /* x = 0.0; */
.L4:
bnllr- cr6 /* if (x < 0.0) */
- fsub fp1,fp13,fp1 /* x = TWO52 - x; */
- fsub fp0,fp1,fp13 /* x = - (x - TWO52); */
- fneg fp1,fp0
- blr
+ fsub fp1,fp1,fp13 /* x-= TWO52; */
+ fadd fp1,fp1,fp13 /* x+= TWO52; */
+ fnabs fp1,fp1 /* if (x == 0.0) */
+ blr /* x = -0.0; */
END (__rint)
weak_alias (__rint, rint)
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_rintf.S b/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
index eb34dd5e77..e4fa9ba2e6 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
@@ -21,12 +21,12 @@
.section ".toc","aw"
.LC0: /* 2**23 */
- .tc FD_41600000_0[TC],0x4160000000000000
+ .tc FD_4b000000_0[TC],0x4b00000000000000
.section ".text"
-ENTRY (__rintf)
+EALIGN (__rintf, 4, 0)
CALL_MCOUNT 0
- lfd fp13,.LC0@toc(2)
+ lfs fp13,.LC0@toc(2)
fabs fp0,fp1
fsubs fp12,fp13,fp13 /* generate 0.0 */
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */
@@ -35,13 +35,14 @@ ENTRY (__rintf)
bng- cr6,.L4
fadds fp1,fp1,fp13 /* x+= TWO23; */
fsubs fp1,fp1,fp13 /* x-= TWO23; */
- blr
+ fabs fp1,fp1 /* if (x == 0.0) */
+ blr /* x = 0.0; */
.L4:
bnllr- cr6 /* if (x < 0.0) */
- fsubs fp1,fp13,fp1 /* x = TWO23 - x; */
- fsubs fp0,fp1,fp13 /* x = - (x - TWO23); */
- fneg fp1,fp0
- blr
+ fsubs fp1,fp1,fp13 /* x-= TWO23; */
+ fadds fp1,fp1,fp13 /* x+= TWO23; */
+ fnabs fp1,fp1 /* if (x == 0.0) */
+ blr /* x = -0.0; */
END (__rintf)
weak_alias (__rintf, rintf)
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_round.S b/sysdeps/powerpc/powerpc64/fpu/s_round.S
index c0b6d46fea..b07a7ea15a 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_round.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_round.S
@@ -24,8 +24,6 @@
.tc FD_43300000_0[TC],0x4330000000000000
.LC1: /* 0.5 */
.tc FD_3fe00000_0[TC],0x3fe0000000000000
-.LC2: /* -0.0 */
- .tc FD_80000000_0[TC],0x8000000000000000
.section ".text"
/* double [fp1] round (double x [fp1])
@@ -38,7 +36,7 @@
"Round toward Zero" mode and round by adding +-0.5 before rounding
to the integer value. */
-ENTRY (__round)
+EALIGN (__round, 4, 0)
CALL_MCOUNT 0
mffs fp11 /* Save current FPU rounding mode. */
lfd fp13,.LC0@toc(2)
@@ -53,7 +51,8 @@ ENTRY (__round)
fadd fp1,fp1,fp10 /* x+= 0.5; */
fadd fp1,fp1,fp13 /* x+= TWO52; */
fsub fp1,fp1,fp13 /* x-= TWO52; */
-.L9:
+ fabs fp1,fp1 /* if (x == 0.0) */
+ /* x = 0.0; */
mtfsf 0x01,fp11 /* restore previous rounding mode. */
blr
.L4:
@@ -61,10 +60,10 @@ ENTRY (__round)
bge- cr6,.L9 /* if (x < 0.0) */
fsub fp1,fp9,fp13 /* x-= TWO52; */
fadd fp1,fp1,fp13 /* x+= TWO52; */
- fcmpu cr5,fp1,fp12 /* if (x > 0.0) */
- mtfsf 0x01,fp11 /* restore previous rounding mode. */
- bnelr+ cr5
- lfd fp1,.LC2@toc(2) /* x must be -0.0 for the 0.0 case. */
+ fnabs fp1,fp1 /* if (x == 0.0) */
+ /* x = -0.0; */
+.L9:
+ mtfsf 0x01,fp11 /* restore previous rounding mode. */
blr
END (__round)
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_roundf.S b/sysdeps/powerpc/powerpc64/fpu/s_roundf.S
index 23ee4c052b..d2e29fdb8f 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_roundf.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_roundf.S
@@ -21,11 +21,9 @@
.section ".toc","aw"
.LC0: /* 2**23 */
- .tc FD_41600000_0[TC],0x4160000000000000
+ .tc FD_4b000000_0[TC],0x4b00000000000000
.LC1: /* 0.5 */
- .tc FD_3fe00000_0[TC],0x3fe0000000000000
-.LC2: /* -0.0 */
- .tc FD_80000000_0[TC],0x8000000000000000
+ .tc FD_3f000000_0[TC],0x3f00000000000000
.section ".text"
/* float [fp1] roundf (float x [fp1])
@@ -38,22 +36,23 @@
"Round toward Zero" mode and round by adding +-0.5 before rounding
to the integer value. */
-ENTRY (__roundf )
+EALIGN (__roundf, 4, 0)
CALL_MCOUNT 0
mffs fp11 /* Save current FPU rounding mode. */
- lfd fp13,.LC0@toc(2)
+ lfs fp13,.LC0@toc(2)
fabs fp0,fp1
fsubs fp12,fp13,fp13 /* generate 0.0 */
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */
fcmpu cr6,fp1,fp12 /* if (x > 0.0) */
bnllr- cr7
mtfsfi 7,1 /* Set rounding mode toward 0. */
- lfd fp10,.LC1@toc(2)
+ lfs fp10,.LC1@toc(2)
ble- cr6,.L4
fadds fp1,fp1,fp10 /* x+= 0.5; */
fadds fp1,fp1,fp13 /* x+= TWO23; */
fsubs fp1,fp1,fp13 /* x-= TWO23; */
-.L9:
+ fabs fp1,fp1 /* if (x == 0.0) */
+ /* x = 0.0; */
mtfsf 0x01,fp11 /* restore previous rounding mode. */
blr
.L4:
@@ -61,10 +60,10 @@ ENTRY (__roundf )
bge- cr6,.L9 /* if (x < 0.0) */
fsubs fp1,fp9,fp13 /* x-= TWO23; */
fadds fp1,fp1,fp13 /* x+= TWO23; */
- fcmpu cr5,fp1,fp12 /* if (x > 0.0) */
- mtfsf 0x01,fp11 /* restore previous rounding mode. */
- bnelr+ cr5
- lfd fp1,.LC2@toc(2) /* x must be -0.0 for the 0.0 case. */
+ fnabs fp1,fp1 /* if (x == 0.0) */
+ /* x = -0.0; */
+.L9:
+ mtfsf 0x01,fp11 /* restore previous rounding mode. */
blr
END (__roundf)
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_trunc.S b/sysdeps/powerpc/powerpc64/fpu/s_trunc.S
index 3ddd298525..d69e371b61 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_trunc.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_trunc.S
@@ -22,8 +22,6 @@
.section ".toc","aw"
.LC0: /* 2**52 */
.tc FD_43300000_0[TC],0x4330000000000000
-.LC2: /* -0.0 */
- .tc FD_80000000_0[TC],0x8000000000000000
.section ".text"
/* double [fp1] trunc (double x [fp1])
@@ -33,7 +31,7 @@
We set "round toward Zero" mode and trunc by adding +-2**52 then
subtracting +-2**52. */
-ENTRY (__trunc)
+EALIGN (__trunc, 4, 0)
CALL_MCOUNT 0
mffs fp11 /* Save current FPU rounding mode. */
lfd fp13,.LC0@toc(2)
@@ -46,17 +44,18 @@ ENTRY (__trunc)
ble- cr6,.L4
fadd fp1,fp1,fp13 /* x+= TWO52; */
fsub fp1,fp1,fp13 /* x-= TWO52; */
-.L9:
+ fabs fp1,fp1 /* if (x == 0.0) */
+ /* x = 0.0; */
mtfsf 0x01,fp11 /* restore previous truncing mode. */
blr
.L4:
bge- cr6,.L9 /* if (x < 0.0) */
fsub fp1,fp1,fp13 /* x-= TWO52; */
fadd fp1,fp1,fp13 /* x+= TWO52; */
- fcmpu cr5,fp1,fp12 /* if (x > 0.0) */
+ fnabs fp1,fp1 /* if (x == 0.0) */
+ /* x = -0.0; */
+.L9:
mtfsf 0x01,fp11 /* restore previous rounding mode. */
- bnelr+ cr5
- lfd fp1,.LC2@toc(2) /* x must be -0.0 for the 0.0 case. */
blr
END (__trunc)
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_truncf.S b/sysdeps/powerpc/powerpc64/fpu/s_truncf.S
index b38b722a6f..15f53da8ca 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_truncf.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_truncf.S
@@ -21,9 +21,7 @@
.section ".toc","aw"
.LC0: /* 2**23 */
- .tc FD_41600000_0[TC],0x4160000000000000
-.LC2: /* -0.0 */
- .tc FD_80000000_0[TC],0x8000000000000000
+ .tc FD_4b000000_0[TC],0x4b00000000000000
.section ".text"
/* float [fp1] truncf (float x [fp1])
@@ -33,10 +31,10 @@
We set "round toward Zero" mode and trunc by adding +-2**23 then
subtracting +-2**23. */
-ENTRY (__truncf)
+EALIGN (__truncf, 4, 0)
CALL_MCOUNT 0
mffs fp11 /* Save current FPU rounding mode. */
- lfd fp13,.LC0@toc(2)
+ lfs fp13,.LC0@toc(2)
fabs fp0,fp1
fsubs fp12,fp13,fp13 /* generate 0.0 */
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */
@@ -46,17 +44,18 @@ ENTRY (__truncf)
ble- cr6,.L4
fadds fp1,fp1,fp13 /* x+= TWO23; */
fsubs fp1,fp1,fp13 /* x-= TWO23; */
-.L9:
+ fabs fp1,fp1 /* if (x == 0.0) */
+ /* x = 0.0; */
mtfsf 0x01,fp11 /* restore previous truncing mode. */
blr
.L4:
bge- cr6,.L9 /* if (x < 0.0) */
fsubs fp1,fp1,fp13 /* x-= TWO23; */
fadds fp1,fp1,fp13 /* x+= TWO23; */
- fcmpu cr5,fp1,fp12 /* if (x > 0.0) */
+ fnabs fp1,fp1 /* if (x == 0.0) */
+ /* x = -0.0; */
+.L9:
mtfsf 0x01,fp11 /* restore previous rounding mode. */
- bnelr+ cr5
- lfd fp1,.LC2@toc(2) /* x must be -0.0 for the 0.0 case. */
blr
END (__truncf)
diff --git a/sysdeps/s390/bits/string.h b/sysdeps/s390/bits/string.h
index 7134827eb0..d83df39bb2 100644
--- a/sysdeps/s390/bits/string.h
+++ b/sysdeps/s390/bits/string.h
@@ -51,7 +51,7 @@ strlen (__const char *__str)
"0: srst %0,%1\n"
" jo 0b\n"
: "+&a" (__ptr), "+&a" (__tmp) :
- : "cc", "0" );
+ : "cc", "memory", "0" );
return (size_t) (__ptr - __str);
}
#endif
@@ -105,7 +105,7 @@ strncpy (char *__dest, __const char *__src, size_t __n)
#endif
"4:"
: "+&a" (__ptr), "+&a" (__n) : "a" (__diff)
- : "cc", "0" );
+ : "cc", "memory", "0" );
}
return __ret;
}
@@ -134,7 +134,7 @@ strcat(char *__dest, const char *__src)
"0: mvst %0,%1\n"
" jo 0b"
: "+&a" (__ptr), "+&a" (__src) :
- : "cc", "0" );
+ : "cc", "memory", "0" );
return __ret;
}
#endif
@@ -157,7 +157,7 @@ strncat (char *__dest, __const char *__src, size_t __n)
"0: srst %0,%1\n"
" jo 0b\n"
: "+&a" (__ptr), "+&a" (__tmp) :
- : "cc", "0" );
+ : "cc", "memory", "0" );
__diff = (size_t) (__ptr - __src);
__tmp = (char *) __src;
@@ -175,7 +175,7 @@ strncat (char *__dest, __const char *__src, size_t __n)
" stc 0,1(%2,%0)\n"
"2:"
: "+&a" (__tmp), "+&a" (__n) : "a" (__diff)
- : "cc", "0" );
+ : "cc", "memory", "0" );
}
return __ret;
@@ -200,7 +200,7 @@ memchr (__const void *__str, int __c, size_t __n)
" la %0,0\n"
"1:"
: "+&a" (__ptr), "+&a" (__tmp) : "d" (__c)
- : "cc", "0" );
+ : "cc", "memory", "0" );
return __ptr;
}
#endif
@@ -222,7 +222,7 @@ strcmp (__const char *__s1, __const char *__s2)
" ipm %0\n"
" srl %0,28"
: "=d" (__ret), "+&a" (__p1), "+&a" (__p2) :
- : "cc", "0" );
+ : "cc", "memory", "0" );
__ret = (__ret == 0) ? 0 : (__ret == 1) ? -1 : 1;
return __ret;
}
diff --git a/sysdeps/unix/alarm.c b/sysdeps/unix/alarm.c
index ae77782c54..dafada76b5 100644
--- a/sysdeps/unix/alarm.c
+++ b/sysdeps/unix/alarm.c
@@ -41,7 +41,10 @@ alarm (seconds)
return 0;
retval = old.it_value.tv_sec;
- if (old.it_value.tv_usec)
+ /* Round to the nearest second, but never report zero seconds when
+ the alarm is still set. */
+ if (old.it_value.tv_usec >= 500000
+ || (retval == 0 && old.it_value.tv_usec > 0))
++retval;
return retval;
}
diff --git a/sysdeps/unix/i386/sysdep.S b/sysdeps/unix/i386/sysdep.S
index 6056cbeef2..83ce3eadec 100644
--- a/sysdeps/unix/i386/sysdep.S
+++ b/sysdeps/unix/i386/sysdep.S
@@ -47,7 +47,12 @@ notb:
#endif
#ifndef PIC
# if USE___THREAD
+# ifndef NO_TLS_DIRECT_SEG_REFS
movl %eax, %gs:C_SYMBOL_NAME(errno@NTPOFF)
+# else
+ movl %gs:0, %ecx
+ movl %eax, C_SYMBOL_NAME(errno@NTPOFF)(%ecx)
+# endif
# elif !defined _LIBC_REENTRANT
movl %eax, C_SYMBOL_NAME(errno)
# else
@@ -66,7 +71,12 @@ notb:
/* Pop %ebx value saved before jumping here. */
popl %ebx
+# ifndef NO_TLS_DIRECT_SEG_REFS
+ addl %gs:0, %ecx
+ movl %eax, (%ecx)
+# else
movl %eax, %gs:0(%ecx)
+# endif
# elif RTLD_PRIVATE_ERRNO
movl %eax, C_SYMBOL_NAME(rtld_errno@GOTOFF)(%ebx)
diff --git a/sysdeps/unix/rewinddir.c b/sysdeps/unix/rewinddir.c
index 9f3724fc6a..cb0f52079a 100644
--- a/sysdeps/unix/rewinddir.c
+++ b/sysdeps/unix/rewinddir.c
@@ -30,6 +30,7 @@ rewinddir (dirp)
{
__libc_lock_lock (dirp->lock);
(void) __lseek (dirp->fd, (off_t) 0, SEEK_SET);
+ dirp->filepos = 0;
dirp->offset = 0;
dirp->size = 0;
__libc_lock_unlock (dirp->lock);
diff --git a/sysdeps/unix/sysv/linux/futimes.c b/sysdeps/unix/sysv/linux/futimes.c
index f43f568ec1..7d79a40aab 100644
--- a/sysdeps/unix/sysv/linux/futimes.c
+++ b/sysdeps/unix/sysv/linux/futimes.c
@@ -23,6 +23,7 @@
#include <utime.h>
#include <sys/time.h>
#include <stdio-common/_itoa.h>
+#include <fcntl.h>
#include "kernel-features.h"
@@ -40,31 +41,58 @@ __futimes (int fd, const struct timeval tvp[2])
char *cp = _itoa_word ((unsigned int) fd, fname + sizeof (fname) - 1, 10, 0);
cp = memcpy (cp - sizeof (selffd) + 1, selffd, sizeof (selffd) - 1);
+ int result;
#ifdef __NR_utimes
- int result = INLINE_SYSCALL (utimes, 2, cp, tvp);
+ result = INLINE_SYSCALL (utimes, 2, cp, tvp);
# ifndef __ASSUME_UTIMES
- if (result != -1 || errno != ENOSYS)
+ if (result == -1 && errno == ENOSYS)
# endif
- return result;
#endif
-
- /* The utimes() syscall does not exist or is not available in the
- used kernel. Use utime(). For this we have to convert to the
- data format utime() expects. */
+ {
+ /* The utimes() syscall does not exist or is not available in the
+ used kernel. Use utime(). For this we have to convert to the
+ data format utime() expects. */
#ifndef __ASSUME_UTIMES
- struct utimbuf buf;
- struct utimbuf *times;
+ struct utimbuf buf;
+ struct utimbuf *times;
- if (tvp != NULL)
- {
- times = &buf;
- buf.actime = tvp[0].tv_sec + (tvp[0].tv_usec + 500000) / 1000000;
- buf.modtime = tvp[1].tv_sec + (tvp[1].tv_usec + 500000) / 1000000;
- }
- else
- times = NULL;
+ if (tvp != NULL)
+ {
+ times = &buf;
+ buf.actime = tvp[0].tv_sec + (tvp[0].tv_usec + 500000) / 1000000;
+ buf.modtime = tvp[1].tv_sec + (tvp[1].tv_usec + 500000) / 1000000;
+ }
+ else
+ times = NULL;
- return INLINE_SYSCALL (utime, 2, cp, times);
+ result = INLINE_SYSCALL (utime, 2, cp, times);
#endif
+ }
+
+ if (result == -1)
+ /* Check for errors that result from failing to find /proc.
+ This means we can't do futimes at all, so return ENOSYS
+ rather than some confusing error. */
+ switch (errno)
+ {
+ case EACCES:
+ if (tvp == NULL) /* Could be a path problem or a file problem. */
+ break;
+ /*FALLTHROUGH*/
+ case ELOOP:
+ case ENAMETOOLONG:
+ case ENOTDIR:
+ __set_errno (ENOSYS);
+ break;
+
+ case ENOENT:
+ /* Validate the file descriptor by letting fcntl set errno to
+ EBADF if it's bogus. Otherwise it's a /proc issue. */
+ if (INLINE_SYSCALL (fcntl, 3, fd, F_GETFD, 0) != -1)
+ __set_errno (ENOSYS);
+ break;
+ }
+
+ return result;
}
weak_alias (__futimes, futimes)
diff --git a/sysdeps/unix/sysv/linux/i386/clone.S b/sysdeps/unix/sysv/linux/i386/clone.S
index acd43dfb0b..c7d31f7a32 100644
--- a/sysdeps/unix/sysv/linux/i386/clone.S
+++ b/sysdeps/unix/sysv/linux/i386/clone.S
@@ -67,7 +67,7 @@ ENTRY (BP_SYM (__clone))
/* Insert the argument onto the new stack. Make sure the new
thread is started with an alignment of (mod 16). */
andl $0xfffffff0, %ecx
- subl $24,%ecx
+ subl $28,%ecx
movl ARG(%esp),%eax /* no negative argument counts */
movl %eax,12(%ecx)
diff --git a/sysdeps/unix/sysv/linux/i386/sysdep.h b/sysdeps/unix/sysv/linux/i386/sysdep.h
index af75d4c51a..b91af4007c 100644
--- a/sysdeps/unix/sysv/linux/i386/sysdep.h
+++ b/sysdeps/unix/sysv/linux/i386/sysdep.h
@@ -154,9 +154,17 @@ __i686.get_pc_thunk.reg: \
movl SYSCALL_ERROR_ERRNO@GOTNTPOFF(%ecx), %ecx; \
xorl %edx, %edx; \
subl %eax, %edx; \
- movl %edx, %gs:0(%ecx); \
+ SYSCALL_ERROR_HANDLER_TLS_STORE (%edx, %ecx); \
orl $-1, %eax; \
jmp L(pseudo_end);
+# ifndef NO_TLS_DIRECT_SEG_REFS
+# define SYSCALL_ERROR_HANDLER_TLS_STORE(src, destoff) \
+ movl src, %gs:(destoff)
+# else
+# define SYSCALL_ERROR_HANDLER_TLS_STORE(src, destoff) \
+ addl %gs:0, destoff; \
+ movl src, (destoff)
+# endif
# else
# define SYSCALL_ERROR_HANDLER \
0:pushl %ebx; \
@@ -532,6 +540,29 @@ asm (".L__X'%ebx = 1\n\t"
# define EXTRAVAR_5
#endif
+/* Consistency check for position-independent code. */
+#ifdef __PIC__
+# define check_consistency() \
+ ({ int __res; \
+ __asm__ __volatile__ \
+ ("call __i686.get_pc_thunk.cx;" \
+ "addl $_GLOBAL_OFFSET_TABLE_, %%ecx;" \
+ "subl %%ebx, %%ecx;" \
+ "je 1f;" \
+ "ud2;" \
+ "1:\n" \
+ ".section .gnu.linkonce.t.__i686.get_pc_thunk.cx,\"ax\",@progbits;" \
+ ".globl __i686.get_pc_thunk.cx;" \
+ ".hidden __i686.get_pc_thunk.cx;" \
+ ".type __i686.get_pc_thunk.cx,@function;" \
+ "__i686.get_pc_thunk.cx:" \
+ "movl (%%esp), %%ecx;" \
+ "ret;" \
+ ".previous" \
+ : "=c" (__res)); \
+ __res; })
+#endif
+
#endif /* __ASSEMBLER__ */
#endif /* linux/i386/sysdep.h */
diff --git a/sysdeps/unix/sysv/linux/kernel-features.h b/sysdeps/unix/sysv/linux/kernel-features.h
index f499a712c4..88850cfeac 100644
--- a/sysdeps/unix/sysv/linux/kernel-features.h
+++ b/sysdeps/unix/sysv/linux/kernel-features.h
@@ -79,6 +79,11 @@
# define __ASSUME_SIOCGIFNAME 1
#endif
+/* MSG_NOSIGNAL was at least available with Linux 2.2.0. */
+#if __LINUX_KERNEL_VERSION >= 131584
+# define __ASSUME_MSG_NOSIGNAL 1
+#endif
+
/* On x86 another `getrlimit' syscall was added in 2.3.25. */
#if __LINUX_KERNEL_VERSION >= 131865 && defined __i386__
# define __ASSUME_NEW_GETRLIMIT_SYSCALL 1
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S b/sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S
index 6514f442a6..bdb9473429 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S
@@ -47,7 +47,7 @@ ENTRY(__novec_setcontext)
* of a procedure call (makecontext), so we don't need to restore
* msr and ctr. We don't restore r13 since it will be used as
* the TLS pointer. */
- lwz r0,(SIGCONTEXT_GP_REGS+(PT_MSR*8))(r31)
+ ld r0,(SIGCONTEXT_GP_REGS+(PT_MSR*8))(r31)
cmpdi r0,0
bne L(nv_do_sigret)
@@ -104,7 +104,7 @@ ENTRY(__novec_setcontext)
ld r4,(SIGCONTEXT_GP_REGS+(PT_R4*8))(r31)
ld r0,(SIGCONTEXT_GP_REGS+(PT_CCR*8))(r31)
ld r5,(SIGCONTEXT_GP_REGS+(PT_R5*8))(r31)
- mfcr r0
+ mtcr r0
ld r6,(SIGCONTEXT_GP_REGS+(PT_R6*8))(r31)
ld r7,(SIGCONTEXT_GP_REGS+(PT_R7*8))(r31)
ld r8,(SIGCONTEXT_GP_REGS+(PT_R8*8))(r31)
@@ -213,7 +213,7 @@ ENTRY(__setcontext)
* of a procedure call (makecontext), so we don't need to restore
* msr and ctr. We don't restore r13 since it will be used as
* the TLS pointer. */
- lwz r0,(SIGCONTEXT_GP_REGS+(PT_MSR*8))(r31)
+ ld r0,(SIGCONTEXT_GP_REGS+(PT_MSR*8))(r31)
cmpdi r0,0
bne L(do_sigret)
@@ -380,11 +380,11 @@ L(has_no_vec):
ld r4,(SIGCONTEXT_GP_REGS+(PT_R4*8))(r31)
ld r0,(SIGCONTEXT_GP_REGS+(PT_CCR*8))(r31)
ld r5,(SIGCONTEXT_GP_REGS+(PT_R5*8))(r31)
- mfcr r0
ld r6,(SIGCONTEXT_GP_REGS+(PT_R6*8))(r31)
ld r7,(SIGCONTEXT_GP_REGS+(PT_R7*8))(r31)
ld r8,(SIGCONTEXT_GP_REGS+(PT_R8*8))(r31)
ld r9,(SIGCONTEXT_GP_REGS+(PT_R9*8))(r31)
+ mtcr r0
ld r10,(SIGCONTEXT_GP_REGS+(PT_R10*8))(r31)
ld r11,(SIGCONTEXT_GP_REGS+(PT_R11*8))(r31)
ld r12,(SIGCONTEXT_GP_REGS+(PT_R12*8))(r31)
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/swapcontext.S b/sysdeps/unix/sysv/linux/powerpc/powerpc64/swapcontext.S
index f99df951a2..0a4bac58b5 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/swapcontext.S
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/swapcontext.S
@@ -150,7 +150,7 @@ ENTRY(__novec_swapcontext)
* of a procedure call (makecontext), so we don't need to restore
* msr and ctr. We don't restore r13 since it will be used as
* the TLS pointer. */
- lwz r0,(SIGCONTEXT_GP_REGS+(PT_MSR*8))(r31)
+ ld r0,(SIGCONTEXT_GP_REGS+(PT_MSR*8))(r31)
cmpdi r0,0
bne L(nv_do_sigret)
@@ -199,7 +199,7 @@ ENTRY(__novec_swapcontext)
ld r4,(SIGCONTEXT_GP_REGS+(PT_R4*8))(r31)
ld r0,(SIGCONTEXT_GP_REGS+(PT_CCR*8))(r31)
ld r5,(SIGCONTEXT_GP_REGS+(PT_R5*8))(r31)
- mfcr r0
+ mtcr r0
ld r6,(SIGCONTEXT_GP_REGS+(PT_R6*8))(r31)
ld r7,(SIGCONTEXT_GP_REGS+(PT_R7*8))(r31)
ld r8,(SIGCONTEXT_GP_REGS+(PT_R8*8))(r31)
@@ -521,7 +521,7 @@ L(has_no_vec):
* of a procedure call (makecontext), so we don't need to restore
* msr and ctr. We don't restore r13 since it will be used as
* the TLS pointer. */
- lwz r0,(SIGCONTEXT_GP_REGS+(PT_MSR*8))(r31)
+ ld r0,(SIGCONTEXT_GP_REGS+(PT_MSR*8))(r31)
cmpdi r0,0
bne L(do_sigret)
@@ -681,11 +681,11 @@ L(has_no_vec2):
ld r4,(SIGCONTEXT_GP_REGS+(PT_R4*8))(r31)
ld r0,(SIGCONTEXT_GP_REGS+(PT_CCR*8))(r31)
ld r5,(SIGCONTEXT_GP_REGS+(PT_R5*8))(r31)
- mfcr r0
ld r6,(SIGCONTEXT_GP_REGS+(PT_R6*8))(r31)
ld r7,(SIGCONTEXT_GP_REGS+(PT_R7*8))(r31)
ld r8,(SIGCONTEXT_GP_REGS+(PT_R8*8))(r31)
ld r9,(SIGCONTEXT_GP_REGS+(PT_R9*8))(r31)
+ mtcr r0
ld r10,(SIGCONTEXT_GP_REGS+(PT_R10*8))(r31)
ld r11,(SIGCONTEXT_GP_REGS+(PT_R11*8))(r31)
ld r12,(SIGCONTEXT_GP_REGS+(PT_R12*8))(r31)
diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
index b932f51d15..408438647d 100644
--- a/sysdeps/x86_64/dl-machine.h
+++ b/sysdeps/x86_64/dl-machine.h
@@ -280,16 +280,24 @@ _dl_start_user:\n\
# Call _dl_init (struct link_map *main_map, int argc, char **argv, char **env)\n\
# argc -> rsi\n\
movq %rdx, %rsi\n\
+ # Save %rsp value in %r13.\n\
+ movq %rsp, %r13\n\
+ # And align stack for the _dl_init_internal call. \n\
+ andq $-16, %rsp\n\
# _dl_loaded -> rdi\n\
movq _rtld_local(%rip), %rdi\n\
# env -> rcx\n\
- leaq 16(%rsp,%rdx,8), %rcx\n\
+ leaq 16(%r13,%rdx,8), %rcx\n\
# argv -> rdx\n\
- leaq 8(%rsp), %rdx\n\
+ leaq 8(%r13), %rdx\n\
+ # Clear %rbp to mark outermost frame obviously even for constructors.\n\
+ xorq %rbp, %rbp\n\
# Call the function to run the initializers.\n\
call _dl_init_internal@PLT\n\
# Pass our finalizer function to the user in %rdx, as per ELF ABI.\n\
leaq _dl_fini(%rip), %rdx\n\
+ # And make sure %rsp points to argc stored on the stack.\n\
+ movq %r13, %rsp\n\
# Jump to the user's entry point.\n\
jmp *%r12\n\
.previous\n\
diff --git a/time/strptime_l.c b/time/strptime_l.c
index df98099f0a..cf0ab7153d 100644
--- a/time/strptime_l.c
+++ b/time/strptime_l.c
@@ -539,10 +539,12 @@ __strptime_internal (rp, fmt, tm, decided, era_cnt LOCALE_PARAM)
}
#endif
if (!match_string (HERE_AM_STR, rp))
- if (match_string (HERE_PM_STR, rp))
- is_pm = 1;
- else
- return NULL;
+ {
+ if (match_string (HERE_PM_STR, rp))
+ is_pm = 1;
+ else
+ return NULL;
+ }
break;
case 'r':
#ifdef _NL_CURRENT
diff --git a/time/tst-strptime.c b/time/tst-strptime.c
index a0b2ebedd2..6356aa0d41 100644
--- a/time/tst-strptime.c
+++ b/time/tst-strptime.c
@@ -42,6 +42,10 @@ static const struct
{ "C", "19990502123412", "%Y%m%d%H%M%S", 0, 121, 4, 2 },
{ "C", "2001 20 Mon", "%Y %U %a", 1, 140, 4, 21 },
{ "C", "2001 21 Mon", "%Y %W %a", 1, 140, 4, 21 },
+ { "ja_JP.EUC-JP", "2000-01-01 08:12:21 AM", "%Y-%m-%d %I:%M:%S %p",
+ 6, 0, 0, 1 },
+ { "en_US.ISO-8859-1", "2000-01-01 08:12:21 PM", "%Y-%m-%d %I:%M:%S %p",
+ 6, 0, 0, 1 },
{ "ja_JP.EUC-JP", "2001 20 \xb7\xee", "%Y %U %a", 1, 140, 4, 21 },
{ "ja_JP.EUC-JP", "2001 21 \xb7\xee", "%Y %W %a", 1, 140, 4, 21 },
};
@@ -73,7 +77,14 @@ test_tm (void)
{
memset (&tm, '\0', sizeof (tm));
- if (*strptime (tm_tests[i].input, tm_tests[i].format, &tm) != '\0')
+ char *ret = strptime (tm_tests[i].input, tm_tests[i].format, &tm);
+ if (ret == NULL)
+ {
+ printf ("strptime returned NULL for `%s'\n", tm_tests[i].input);
+ result = 1;
+ continue;
+ }
+ else if (*ret != '\0')
{
printf ("not all of `%s' read\n", tm_tests[i].input);
result = 1;
@@ -127,7 +138,14 @@ main (int argc, char *argv[])
exit (EXIT_FAILURE);
}
- if (*strptime (day_tests[i].input, day_tests[i].format, &tm) != '\0')
+ char *ret = strptime (day_tests[i].input, day_tests[i].format, &tm);
+ if (ret == NULL)
+ {
+ printf ("strptime returned NULL for `%s'\n", day_tests[i].input);
+ result = 1;
+ continue;
+ }
+ else if (*ret != '\0')
{
printf ("not all of `%s' read\n", day_tests[i].input);
result = 1;
diff --git a/timezone/asia b/timezone/asia
index 3c2c1a1868..32e6e3c45f 100644
--- a/timezone/asia
+++ b/timezone/asia
@@ -1,4 +1,4 @@
-# @(#)asia 7.77
+# @(#)asia 7.78
# This data is by no means authoritative; if you think you know better,
# go ahead and edit the file (and please send any changes to
@@ -639,7 +639,7 @@ Rule Zion 1988 only - Apr 9 0:00 1:00 D
Rule Zion 1988 only - Sep 3 0:00 0 S
# From Ephraim Silverberg <ephraim@cs.huji.ac.il>
-# (1997-03-04, 1998-03-16, 1998-12-28, 2000-01-17 and 2000-07-25):
+# (1997-03-04, 1998-03-16, 1998-12-28, 2000-01-17, 2000-07-25, and 2004-12-22):
# According to the Office of the Secretary General of the Ministry of
# Interior, there is NO set rule for Daylight-Savings/Standard time changes.
@@ -690,13 +690,13 @@ Rule Zion 1995 only - Sep 3 0:00 0 S
# time, Haim Ramon. The official announcement regarding 1996-1998
# (with the dates for 1997-1998 no longer being relevant) can be viewed at:
#
-# ftp://ftp.huji.ac.il/pub/tz/announcements/1996-1998.ramon.ps.gz
+# ftp://ftp.cs.huji.ac.il/pub/tz/announcements/1996-1998.ramon.ps.gz
#
# The dates for 1997-1998 were altered by his successor, Rabbi Eli Suissa.
#
# The official announcements for the years 1997-1999 can be viewed at:
#
-# ftp://ftp.huji.ac.il/pub/tz/announcements/YYYY.ps.gz
+# ftp://ftp.cs.huji.ac.il/pub/tz/announcements/YYYY.ps.gz
#
# where YYYY is the relevant year.
@@ -716,12 +716,12 @@ Rule Zion 1999 only - Sep 3 2:00 0 S
#
# The official announcement for the start date of 2000 can be viewed at:
#
-# ftp://ftp.huji.ac.il/pub/tz/announcements/2000-start.ps.gz
+# ftp://ftp.cs.huji.ac.il/pub/tz/announcements/2000-start.ps.gz
#
# The official announcement for the end date of 2000 and the dates
# for the years 2001-2004 can be viewed at:
#
-# ftp://ftp.huji.ac.il/pub/tz/announcements/2000-2004.ps.gz
+# ftp://ftp.cs.huji.ac.il/pub/tz/announcements/2000-2004.ps.gz
# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
Rule Zion 2000 only - Apr 14 2:00 1:00 D
@@ -735,52 +735,97 @@ Rule Zion 2003 only - Oct 3 1:00 0 S
Rule Zion 2004 only - Apr 7 1:00 1:00 D
Rule Zion 2004 only - Sep 22 1:00 0 S
-# From Paul Eggert (2000-07-25):
-# Here are guesses for rules after 2004.
-# They are probably wrong, but they are more likely than no DST at all.
+# Yesterday, the Knesset Interior Committee passed a proposed (originally
+# in March 2004) change to the Time Setting Law that would make the dates
+# for DST from 2005 and beyond so that DST starts on the night _after_ the
+# first night of the Passover holiday at midnight until midnight of the
+# Saturday night _before_ the fast of Yom Kippur.
+#
+# Those who can read Hebrew can view the proposal at:
+#
+# ftp://ftp.cs.huji.ac.il/pub/tz/announcements/2005+.ps
+#
+# The proposal still has to be passed by the Knesset (three readings) for
+# it to become law....
+
+# From Paul Eggert (2004-12-22):
+# For now, guess that the rules proposed on 2004-12-20 will be adopted.
+# This is quite possibly wrong, but it is more likely than no DST at all.
+# I used Ed Reingold's cal-hebrew in GNU Emacs 21.3, along with code
+# written by Ephraim Silverberg, to generate this list.
# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
-Rule Zion 2005 max - Apr 1 1:00 1:00 D
-Rule Zion 2005 max - Oct 1 1:00 0 S
+Rule Zion 2005 only - Apr 25 0:00 1:00 D
+Rule Zion 2005 only - Oct 9 0:00 0 S
+Rule Zion 2006 only - Apr 14 0:00 1:00 D
+Rule Zion 2006 only - Oct 1 0:00 0 S
+Rule Zion 2007 only - Apr 4 0:00 1:00 D
+Rule Zion 2007 only - Sep 16 0:00 0 S
+Rule Zion 2008 only - Apr 21 0:00 1:00 D
+Rule Zion 2008 only - Oct 5 0:00 0 S
+Rule Zion 2009 only - Apr 10 0:00 1:00 D
+Rule Zion 2009 only - Sep 27 0:00 0 S
+Rule Zion 2010 only - Mar 31 0:00 1:00 D
+Rule Zion 2010 only - Sep 12 0:00 0 S
+Rule Zion 2011 only - Apr 20 0:00 1:00 D
+Rule Zion 2011 only - Oct 2 0:00 0 S
+Rule Zion 2012 only - Apr 8 0:00 1:00 D
+Rule Zion 2012 only - Sep 23 0:00 0 S
+Rule Zion 2013 only - Mar 27 0:00 1:00 D
+Rule Zion 2013 only - Sep 8 0:00 0 S
+Rule Zion 2014 only - Apr 16 0:00 1:00 D
+Rule Zion 2014 only - Sep 28 0:00 0 S
+Rule Zion 2015 only - Apr 5 0:00 1:00 D
+Rule Zion 2015 only - Sep 20 0:00 0 S
+Rule Zion 2016 only - Apr 24 0:00 1:00 D
+Rule Zion 2016 only - Oct 9 0:00 0 S
+Rule Zion 2017 only - Apr 12 0:00 1:00 D
+Rule Zion 2017 only - Sep 24 0:00 0 S
+Rule Zion 2018 only - Apr 1 0:00 1:00 D
+Rule Zion 2018 only - Sep 16 0:00 0 S
+Rule Zion 2019 only - Apr 21 0:00 1:00 D
+Rule Zion 2019 only - Oct 6 0:00 0 S
+Rule Zion 2020 only - Apr 10 0:00 1:00 D
+Rule Zion 2020 only - Sep 27 0:00 0 S
+Rule Zion 2021 only - Mar 29 0:00 1:00 D
+Rule Zion 2021 only - Sep 12 0:00 0 S
+Rule Zion 2022 only - Apr 17 0:00 1:00 D
+Rule Zion 2022 only - Oct 2 0:00 0 S
+Rule Zion 2023 only - Apr 7 0:00 1:00 D
+Rule Zion 2023 only - Sep 24 0:00 0 S
+Rule Zion 2024 only - Apr 24 0:00 1:00 D
+Rule Zion 2024 only - Oct 6 0:00 0 S
+Rule Zion 2025 only - Apr 14 0:00 1:00 D
+Rule Zion 2025 only - Sep 28 0:00 0 S
+Rule Zion 2026 only - Apr 3 0:00 1:00 D
+Rule Zion 2026 only - Sep 20 0:00 0 S
+Rule Zion 2027 only - Apr 23 0:00 1:00 D
+Rule Zion 2027 only - Oct 10 0:00 0 S
+Rule Zion 2028 only - Apr 12 0:00 1:00 D
+Rule Zion 2028 only - Sep 24 0:00 0 S
+Rule Zion 2029 only - Apr 1 0:00 1:00 D
+Rule Zion 2029 only - Sep 16 0:00 0 S
+Rule Zion 2030 only - Apr 19 0:00 1:00 D
+Rule Zion 2030 only - Oct 6 0:00 0 S
+Rule Zion 2031 only - Apr 9 0:00 1:00 D
+Rule Zion 2031 only - Sep 21 0:00 0 S
+Rule Zion 2032 only - Mar 28 0:00 1:00 D
+Rule Zion 2032 only - Sep 12 0:00 0 S
+Rule Zion 2033 only - Apr 15 0:00 1:00 D
+Rule Zion 2033 only - Oct 2 0:00 0 S
+Rule Zion 2034 only - Apr 5 0:00 1:00 D
+Rule Zion 2034 only - Sep 17 0:00 0 S
+Rule Zion 2035 only - Apr 25 0:00 1:00 D
+Rule Zion 2035 only - Oct 7 0:00 0 S
+Rule Zion 2036 only - Apr 13 0:00 1:00 D
+Rule Zion 2036 only - Sep 28 0:00 0 S
+Rule Zion 2037 only - Apr 1 0:00 1:00 D
+Rule Zion 2037 only - Sep 13 0:00 0 S
# Zone NAME GMTOFF RULES FORMAT [UNTIL]
Zone Asia/Jerusalem 2:20:56 - LMT 1880
2:20:40 - JMT 1918 # Jerusalem Mean Time?
2:00 Zion I%sT
-# From Ephraim Silverberg (2003-03-23):
-#
-# Minister of Interior Poraz has announced that he will respect the law
-# passed in July 2000 (proposed at the time jointly by himself and
-# then-MK David Azulai [Shas]) fixing the dates for 2000-2004. Hence,
-# the dates for 2003 and 2004 remain unchanged....
-#
-# As far as 2005 and beyond, no dates have been set. However, the
-# minister has mentioned that he wishes to propose to move Israel's
-# timezone in 2005 from GMT+2 to GMT+3 and upon that have DST during
-# the summer months (i.e. GMT+4). However, no legislation in this
-# direction is expected until the latter part of 2004 which is a long
-# time off in terms of Israeli politics.
-
-# (2004-09-20):
-# The latest rumour, however, is that in 2005, when the clock changes to
-# Daylight Saving Time (date as yet unknown), the move will be a two-hour leap
-# forward (from UTC+0200 to UTC+0400) and then, in the fall, the clock will
-# move back only an hour to UTC+0300 thus effectively moving Israel's timezone
-# from UTC+0200 to UTC+0300. However, no actual draft has been put before the
-# Knesset (Israel's Parliament) though the intention is to do so this
-# month [2004-09].
-
-# (2004-09-26):
-# Even though the draft law for the above did pass the Ministerial Committee
-# for Legislative Matters three months ago, it was voted down in today's
-# Cabinet meeting. The current suggestion is to keep the current timezone at
-# UTC+0200 but have an extended period of Daylight Saving Time (UTC+0300) from
-# the beginning of Passover holiday in the spring to after the Tabernacle
-# holiday in the fall (i.e. the dates of which are governed by the Hebrew
-# calendar but this means at least 184 days of DST). However, this is only a
-# suggestion that was raised in today's cabinet meeting and has not yet been
-# drafted.
-
###############################################################################
diff --git a/timezone/backward b/timezone/backward
index 6e118c2726..9fbab94d79 100644
--- a/timezone/backward
+++ b/timezone/backward
@@ -1,4 +1,4 @@
-# @(#)backward 7.26
+# @(#)backward 7.27
# This file provides links between current names for time zones
# and their old names. Many names changed in late 1993.
@@ -13,7 +13,7 @@ Link America/Argentina/Jujuy America/Jujuy
Link America/Indiana/Knox America/Knox_IN
Link America/Argentina/Mendoza America/Mendoza
Link America/Rio_Branco America/Porto_Acre
-Link America/Cordoba America/Rosario
+Link America/Argentina/Cordoba America/Rosario
Link America/St_Thomas America/Virgin
Link Asia/Ashgabat Asia/Ashkhabad
Link Asia/Chongqing Asia/Chungking
@@ -34,7 +34,7 @@ Link Australia/Hobart Australia/Tasmania
Link Australia/Melbourne Australia/Victoria
Link Australia/Perth Australia/West
Link Australia/Broken_Hill Australia/Yancowinna
-Link America/Porto_Acre Brazil/Acre
+Link America/Rio_Branco Brazil/Acre
Link America/Noronha Brazil/DeNoronha
Link America/Sao_Paulo Brazil/East
Link America/Manaus Brazil/West
@@ -55,10 +55,10 @@ Link Europe/Dublin Eire
Link Europe/Chisinau Europe/Tiraspol
Link Europe/London GB
Link Europe/London GB-Eire
-Link Etc/GMT+0 GMT+0
-Link Etc/GMT-0 GMT-0
-Link Etc/GMT0 GMT0
-Link Etc/Greenwich Greenwich
+Link Etc/GMT GMT+0
+Link Etc/GMT GMT-0
+Link Etc/GMT GMT0
+Link Etc/GMT Greenwich
Link Asia/Hong_Kong Hongkong
Link Atlantic/Reykjavik Iceland
Link Asia/Tehran Iran
@@ -70,7 +70,7 @@ Link Africa/Tripoli Libya
Link America/Tijuana Mexico/BajaNorte
Link America/Mazatlan Mexico/BajaSur
Link America/Mexico_City Mexico/General
-Link America/Shiprock Navajo
+Link America/Denver Navajo
Link Pacific/Auckland NZ
Link Pacific/Chatham NZ-CHAT
Link Pacific/Pago_Pago Pacific/Samoa
@@ -95,6 +95,6 @@ Link America/Denver US/Mountain
Link America/Los_Angeles US/Pacific
Link Pacific/Pago_Pago US/Samoa
Link Etc/UTC UTC
-Link Etc/Universal Universal
+Link Etc/UTC Universal
Link Europe/Moscow W-SU
-Link Etc/Zulu Zulu
+Link Etc/UTC Zulu
diff --git a/timezone/europe b/timezone/europe
index eeb114f881..2da2df16db 100644
--- a/timezone/europe
+++ b/timezone/europe
@@ -1,4 +1,4 @@
-# @(#)europe 7.88
+# @(#)europe 7.91
# This data is by no means authoritative; if you think you know better,
# go ahead and edit the file (and please send any changes to
@@ -708,7 +708,7 @@ Zone Europe/Sofia 1:33:16 - LMT 1880
# see Serbia and Montenegro
# Cyprus
-# See the `asia' file.
+# Please see the `asia' file for Asia/Nicosia.
# Czech Republic
# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
@@ -754,6 +754,11 @@ Zone Atlantic/Faeroe -0:27:04 - LMT 1908 Jan 11 # Torshavn
0:00 - WET 1981
0:00 EU WE%sT
#
+# From Paul Eggert (2004-10-31):
+# During World War II, Germany maintained secret manned weather stations in
+# East Greenland and Franz Josef Land, but we don't know their time zones.
+# My source for this is Wilhelm Dege's book mentioned under Svalbard.
+#
# From Paul Eggert (1996-11-22):
# Greenland joined the EU as part of Denmark, obtained home rule on 1979-05-01,
# and left the EU on 1985-02-01. It therefore should have been using EU
@@ -1055,6 +1060,11 @@ Zone Europe/Berlin 0:53:28 - LMT 1893 Apr
1:00 Germany CE%sT 1980
1:00 EU CE%sT
+# Georgia
+# Please see the "asia" file for Asia/Tbilisi.
+# Herodotus (Histories, IV.45) says Georgia north of the Phasis (now Rioni)
+# is in Europe. Our reference location Tbilisi is in the Asian part.
+
# Gibraltar
# Zone NAME GMTOFF RULES FORMAT [UNTIL]
Zone Europe/Gibraltar -0:21:24 - LMT 1880 Aug 2
@@ -1611,7 +1621,7 @@ Zone Europe/Oslo 0:43:00 - LMT 1895 Jan 1
# From Paul Eggert (2001-05-01):
#
# Actually, Jan Mayen was never occupied by Germany during World War II,
-# so it must have diverged from Oslo time during the war, as Olso was
+# so it must have diverged from Oslo time during the war, as Oslo was
# keeping Berlin time.
#
# <http://home.no.net/janmayen/history.htm> says that the meteorologists
@@ -1628,7 +1638,7 @@ Zone Europe/Oslo 0:43:00 - LMT 1895 Jan 1
# <http://www.svalbard.com/SvalbardFAQ.html> says that the Germans were
# expelled on 1942-05-14. However, small parties of Germans did return,
# and according to Wilhelm Dege's book "War North of 80" (1954)
-# <http://www.utpress.utoronto.ca/publishing/rights/dege_warnorthof80.htm>
+# <http://www.ucalgary.ca/UofC/departments/UP/1-55238/1-55238-110-2.html>
# the German armed forces at the Svalbard weather station code-named
# Haudegen did not surrender to the Allies until September 1945.
#
diff --git a/timezone/leapseconds b/timezone/leapseconds
index 7add3303f2..385ee66ebd 100644
--- a/timezone/leapseconds
+++ b/timezone/leapseconds
@@ -1,4 +1,4 @@
-# @(#)leapseconds 7.17
+# @(#)leapseconds 7.18
# Allowance for leapseconds added to each timezone file.
@@ -45,7 +45,6 @@ Leap 1997 Jun 30 23:59:60 + S
Leap 1998 Dec 31 23:59:60 + S
# INTERNATIONAL EARTH ROTATION AND REFERENCE SYSTEMS SERVICE (IERS)
-#
# SERVICE INTERNATIONAL DE LA ROTATION TERRESTRE ET DES SYSTEMES DE REFERENCE
#
# SERVICE DE LA ROTATION TERRESTRE
@@ -55,9 +54,10 @@ Leap 1998 Dec 31 23:59:60 + S
# FAX : 33 (0) 1 40 51 22 91
# Internet : services.iers@obspm.fr
#
-# Paris, 15 January 2004
+# Paris, 21 July 2004
+#
#
-# Bulletin C 27
+# Bulletin C 28
#
# To authorities responsible
# for the measurement and
@@ -65,7 +65,7 @@ Leap 1998 Dec 31 23:59:60 + S
#
# INFORMATION ON UTC - TAI
#
-# NO positive leap second will be introduced at the end of June 2004.
+# NO positive leap second will be introduced at the end of December 2004.
# The difference between UTC and the International Atomic Time TAI is:
#
# from 1999 January 1, 0h UTC, until further notice : UTC-TAI = -32 s
@@ -78,4 +78,3 @@ Leap 1998 Dec 31 23:59:60 + S
# Daniel GAMBIS
# Director
# Earth Orientation Center of IERS
-# Observatoire de Paris, France
diff --git a/timezone/northamerica b/timezone/northamerica
index 6e755b04b3..f9d2f6a447 100644
--- a/timezone/northamerica
+++ b/timezone/northamerica
@@ -1,4 +1,4 @@
-# @(#)northamerica 7.69
+# @(#)northamerica 7.71
# also includes Central America and the Caribbean
# This data is by no means authoritative; if you think you know better,
@@ -208,6 +208,13 @@ Rule US 1987 max - Apr Sun>=1 2:00 1:00 D
# Pennsylvania, Rhode Island, South Carolina, eastern Tennessee,
# Vermont, Virginia, West Virginia
+# From Dave Cantor (2004-11-02):
+# Early this summer I had the occasion to visit the Mount Washington
+# Observatory weather station atop (of course!) Mount Washington [, NH]....
+# One of the staff members said that the station was on Eastern Standard Time
+# and didn't change their clocks for Daylight Saving ... so that their
+# reports will always have times which are 5 hours behind UTC.
+
# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER
Rule NYC 1920 only - Mar lastSun 2:00 1:00 D
Rule NYC 1920 only - Oct lastSun 2:00 0 S
@@ -355,10 +362,26 @@ Zone America/Adak 12:13:21 - LMT 1867 Oct 18
-11:00 US B%sT 1983 Oct 30 2:00
-10:00 US AH%sT 1983 Nov 30
-10:00 US HA%sT
+# The following switches don't quite make our 1970 cutoff.
+#
# Shanks writes that part of southwest Alaska (e.g. Aniak)
# switched from -11:00 to -10:00 on 1968-09-22 at 02:00,
# and another part (e.g. Akiak) made the same switch five weeks later.
-# These switches don't quite make our 1970 cutoff.
+#
+# From David Flater (2004-11-09):
+# In e-mail, 2004-11-02, Ray Hudson, historian/liaison to the Unalaska
+# Historic Preservation Commission, provided this information, which
+# suggests that Unalaska deviated from statutory time from early 1967
+# possibly until 1983:
+#
+# Minutes of the Unalaska City Council Meeting, January 10, 1967:
+# "Except for St. Paul and Akutan, Unalaska is the only important
+# location not on Alaska Standard Time. The following resolution was
+# made by William Robinson and seconded by Henry Swanson: Be it
+# resolved that the City of Unalaska hereby goes to Alaska Standard
+# Time as of midnight Friday, January 13, 1967 (1 A.M. Saturday,
+# January 14, Alaska Standard Time.) This resolution was passed with
+# three votes for and one against."
# Hawaii
#
@@ -1731,6 +1754,15 @@ Zone America/Costa_Rica -5:36:20 - LMT 1890 # San Jose
# to DST--and one more hour on 1999-04-04--when the announcers will have
# returned to Baltimore, which switches on that date.)
+# From Evert van der Veer via Steffen Thorsen (2004-10-28):
+# Cuba is not going back to standard time this year.
+# From Paul Eggert (2004-10-28):
+# http://www.granma.cu/ingles/2004/septiembre/juev30/41medid-i.html
+# says that it's due to a problem at the Antonio Guiteras
+# thermoelectric plant, and says "This October there will be no return
+# to normal hours (after daylight saving time)".
+# For now, let's assume that it's a one-year temporary measure.
+
# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
Rule Cuba 1928 only - Jun 10 0:00 1:00 D
Rule Cuba 1928 only - Oct 10 0:00 0 S
@@ -1759,8 +1791,9 @@ Rule Cuba 1991 1995 - Oct Sun>=8 0:00s 0 S
Rule Cuba 1996 only - Oct 6 0:00s 0 S
Rule Cuba 1997 only - Oct 12 0:00s 0 S
Rule Cuba 1998 1999 - Mar lastSun 0:00s 1:00 D
-Rule Cuba 1998 max - Oct lastSun 0:00s 0 S
+Rule Cuba 1998 2003 - Oct lastSun 0:00s 0 S
Rule Cuba 2000 max - Apr Sun>=1 0:00s 1:00 D
+Rule Cuba 2005 max - Oct lastSun 0:00s 0 S
# Zone NAME GMTOFF RULES FORMAT [UNTIL]
Zone America/Havana -5:29:28 - LMT 1890
diff --git a/timezone/private.h b/timezone/private.h
index 57663052f0..5de2f7dfe4 100644
--- a/timezone/private.h
+++ b/timezone/private.h
@@ -21,7 +21,7 @@
#ifndef lint
#ifndef NOID
-static char privatehid[] = "@(#)private.h 7.54";
+static char privatehid[] = "@(#)private.h 7.55";
#endif /* !defined NOID */
#endif /* !defined lint */
@@ -208,6 +208,7 @@ extern char * asctime_r();
/*
** Private function declarations.
*/
+
char * icalloc P((int nelem, int elsize));
char * icatalloc P((char * old, const char * new));
char * icpyalloc P((const char * string));
@@ -217,7 +218,6 @@ void icfree P((char * pointer));
void ifree P((char * pointer));
char * scheck P((const char *string, const char *format));
-
/*
** Finally, some convenience items.
*/
@@ -238,6 +238,15 @@ char * scheck P((const char *string, const char *format));
#define TYPE_SIGNED(type) (((type) -1) < 0)
#endif /* !defined TYPE_SIGNED */
+/*
+** Since the definition of TYPE_INTEGRAL contains floating point numbers,
+** it cannot be used in preprocessor directives.
+*/
+
+#ifndef TYPE_INTEGRAL
+#define TYPE_INTEGRAL(type) (((type) 0.5) != 0.5)
+#endif /* !defined TYPE_INTEGRAL */
+
#ifndef INT_STRLEN_MAXIMUM
/*
** 302 / 1000 is log10(2.0) rounded up.
diff --git a/timezone/southamerica b/timezone/southamerica
index 912491049b..2e9faf4934 100644
--- a/timezone/southamerica
+++ b/timezone/southamerica
@@ -1,4 +1,4 @@
-# @(#)southamerica 7.54
+# @(#)southamerica 7.57
# This data is by no means authoritative; if you think you know better,
# go ahead and edit the file (and please send any changes to
@@ -671,7 +671,7 @@ Zone America/Campo_Grande -3:38:28 - LMT 1914
# Mato Grosso (MT)
Zone America/Cuiaba -3:44:20 - LMT 1914
-4:00 Brazil AM%sT 2003 Sep 24
- -4:00 - AMT 2004 Oct 4
+ -4:00 - AMT 2004 Oct 1
-4:00 Brazil AM%sT
#
# west Para (PA), Rondonia (RO)
@@ -942,9 +942,16 @@ Rule Para 1998 2001 - Mar Sun>=1 0:00 0 -
# A decree was issued in Paraguay (no. 16350) on 2002-02-26 that changed the
# dst method to be from the first Sunday in September to the first Sunday in
# April.
-Rule Para 2002 max - Apr Sun>=1 0:00 0 -
-Rule Para 2002 max - Sep Sun>=1 0:00 1:00 S
-
+Rule Para 2002 2004 - Apr Sun>=1 0:00 0 -
+Rule Para 2002 2003 - Sep Sun>=1 0:00 1:00 S
+#
+# From Jesper Norgaard Welen (2005-01-02):
+# There are several sources that claim that Paraguay made
+# a timezone rule change in autumn 2004.
+# From Steffen Thorsen (2005-01-05):
+# Decree 1,867 (2004-03-05) <http://www.labor.com.py/noticias.asp?id=27>
+Rule Para 2004 max - Oct Sun>=15 0:00 1:00 S
+Rule Para 2005 max - Mar Sun>=8 0:00 0 -
# Zone NAME GMTOFF RULES FORMAT [UNTIL]
Zone America/Asuncion -3:50:40 - LMT 1890
diff --git a/timezone/tzfile.h b/timezone/tzfile.h
index 0921c3c339..0e9966a950 100644
--- a/timezone/tzfile.h
+++ b/timezone/tzfile.h
@@ -21,7 +21,7 @@
#ifndef lint
#ifndef NOID
-static char tzfilehid[] = "@(#)tzfile.h 7.14";
+static char tzfilehid[] = "@(#)tzfile.h 7.16";
#endif /* !defined NOID */
#endif /* !defined lint */
@@ -156,12 +156,21 @@ struct tzhead {
#define EPOCH_YEAR 1970
#define EPOCH_WDAY TM_THURSDAY
+#define isleap(y) (((y) % 4) == 0 && (((y) % 100) != 0 || ((y) % 400) == 0))
+
/*
-** Accurate only for the past couple of centuries;
-** that will probably do.
+** Since everything in isleap is modulo 400 (or a factor of 400), we know that
+** isleap(y) == isleap(y % 400)
+** and so
+** isleap(a + b) == isleap((a + b) % 400)
+** or
+** isleap(a + b) == isleap(a % 400 + b % 400)
+** This is true even if % means modulo rather than Fortran remainder
+** (which is allowed by C89 but not C99).
+** We use this to avoid addition overflow problems.
*/
-#define isleap(y) (((y) % 4) == 0 && (((y) % 100) != 0 || ((y) % 400) == 0))
+#define isleap_sum(a, b) isleap((a) % 400 + (b) % 400)
#ifndef USG
diff --git a/timezone/zdump.c b/timezone/zdump.c
index 20bb916822..bd7132698f 100644
--- a/timezone/zdump.c
+++ b/timezone/zdump.c
@@ -1,4 +1,4 @@
-static char elsieid[] = "@(#)zdump.c 7.40";
+static char elsieid[] = "@(#)zdump.c 7.61";
/*
** This code has been made independent of the rest of the time
@@ -11,6 +11,15 @@ static char elsieid[] = "@(#)zdump.c 7.40";
#include "sys/types.h" /* for time_t */
#include "time.h" /* for struct tm */
#include "stdlib.h" /* for exit, malloc, atoi */
+#include "float.h" /* for FLT_MAX and DBL_MAX */
+
+#ifndef ZDUMP_LO_YEAR
+#define ZDUMP_LO_YEAR (-500)
+#endif /* !defined ZDUMP_LO_YEAR */
+
+#ifndef ZDUMP_HI_YEAR
+#define ZDUMP_HI_YEAR 2500
+#endif /* !defined ZDUMP_HI_YEAR */
#ifndef MAX_STRING_LENGTH
#define MAX_STRING_LENGTH 1024
@@ -61,9 +70,20 @@ static char elsieid[] = "@(#)zdump.c 7.40";
#endif /* !defined DAYSPERNYEAR */
#ifndef isleap
-#define isleap(y) ((((y) % 4) == 0 && ((y) % 100) != 0) || ((y) % 400) == 0)
+#define isleap(y) (((y) % 4) == 0 && (((y) % 100) != 0 || ((y) % 400) == 0))
#endif /* !defined isleap */
+#ifndef isleap_sum
+/*
+** See tzfile.h for details on isleap_sum.
+*/
+#define isleap_sum(a, b) isleap((a) % 400 + (b) % 400)
+#endif /* !defined isleap_sum */
+
+#define SECSPERDAY ((long) SECSPERHOUR * HOURSPERDAY)
+#define SECSPERNYEAR (SECSPERDAY * DAYSPERNYEAR)
+#define SECSPERLYEAR (SECSPERNYEAR + SECSPERDAY)
+
#if HAVE_GETTEXT
#include "locale.h" /* for setlocale */
#include "libintl.h"
@@ -115,18 +135,60 @@ static char elsieid[] = "@(#)zdump.c 7.40";
extern char ** environ;
extern int getopt P((int argc, char * const argv[],
- const char * options));
+ const char * options));
extern char * optarg;
extern int optind;
extern char * tzname[2];
+static time_t absolute_min_time;
+static time_t absolute_max_time;
+static size_t longest;
+static char * progname;
+
static char * abbr P((struct tm * tmp));
static long delta P((struct tm * newp, struct tm * oldp));
+static void dumptime P((const struct tm * tmp));
static time_t hunt P((char * name, time_t lot, time_t hit));
-static size_t longest;
-static char * progname;
+static void setabsolutes P((void));
static void show P((char * zone, time_t t, int v));
-static void dumptime P((const struct tm * tmp));
+static const char * tformat P((void));
+static time_t yeartot P((long y));
+
+#ifndef TYPECHECK
+#define my_localtime localtime
+#else /* !defined TYPECHECK */
+static struct tm *
+my_localtime(tp)
+time_t * tp;
+{
+ register struct tm * tmp;
+
+ tmp = localtime(tp);
+ if (tp != NULL && tmp != NULL) {
+ struct tm tm;
+ register time_t t;
+
+ tm = *tmp;
+ t = mktime(&tm);
+ if (t - *tp >= 1 || *tp - t >= 1) {
+ (void) fflush(stdout);
+ (void) fprintf(stderr, "\n%s: ", progname);
+ (void) fprintf(stderr, tformat(), *tp);
+ (void) fprintf(stderr, " ->");
+ (void) fprintf(stderr, " sec %d", tmp->tm_sec);
+ (void) fprintf(stderr, " min %d", tmp->tm_min);
+ (void) fprintf(stderr, " hour %d", tmp->tm_hour);
+ (void) fprintf(stderr, " mday %d", tmp->tm_mday);
+ (void) fprintf(stderr, " mon %d", tmp->tm_mon);
+ (void) fprintf(stderr, " year %d", tmp->tm_year);
+ (void) fprintf(stderr, " -> ");
+ (void) fprintf(stderr, tformat(), t);
+ (void) fprintf(stderr, "\n");
+ }
+ }
+ return tmp;
+}
+#endif /* !defined TYPECHECK */
int
main(argc, argv)
@@ -136,18 +198,22 @@ char * argv[];
register int i;
register int c;
register int vflag;
- register char * cutoff;
- register int cutyear;
- register long cuttime;
- char ** fakeenv;
+ register char * cutarg;
+ register long cutloyear = ZDUMP_LO_YEAR;
+ register long cuthiyear = ZDUMP_HI_YEAR;
+ register time_t cutlotime;
+ register time_t cuthitime;
+ register char ** fakeenv;
time_t now;
time_t t;
time_t newt;
- time_t hibit;
struct tm tm;
struct tm newtm;
+ register struct tm * tmp;
+ register struct tm * newtmp;
- INITIALIZE(cuttime);
+ INITIALIZE(cutlotime);
+ INITIALIZE(cuthitime);
#if HAVE_GETTEXT
(void) setlocale(LC_MESSAGES, "");
#ifdef TZ_DOMAINDIR
@@ -162,39 +228,50 @@ char * argv[];
(void) exit(EXIT_SUCCESS);
}
vflag = 0;
- cutoff = NULL;
+ cutarg = NULL;
while ((c = getopt(argc, argv, "c:v")) == 'c' || c == 'v')
if (c == 'v')
vflag = 1;
- else cutoff = optarg;
+ else cutarg = optarg;
if ((c != EOF && c != -1) ||
(optind == argc - 1 && strcmp(argv[optind], "=") == 0)) {
(void) fprintf(stderr,
-_("%s: usage is %s [ --version ] [ -v ] [ -c cutoff ] zonename ...\n"),
- argv[0], argv[0]);
+_("%s: usage is %s [ --version ] [ -v ] [ -c [loyear,]hiyear ] zonename ...\n"),
+ progname, progname);
(void) exit(EXIT_FAILURE);
}
- if (cutoff != NULL) {
- int y;
-
- cutyear = atoi(cutoff);
- cuttime = 0;
- for (y = EPOCH_YEAR; y < cutyear; ++y)
- cuttime += DAYSPERNYEAR + isleap(y);
- cuttime *= SECSPERHOUR * HOURSPERDAY;
+ if (vflag) {
+ if (cutarg != NULL) {
+ long lo;
+ long hi;
+ char dummy;
+
+ if (sscanf(cutarg, "%ld%c", &hi, &dummy) == 1) {
+ cuthiyear = hi;
+ } else if (sscanf(cutarg, "%ld,%ld%c",
+ &lo, &hi, &dummy) == 2) {
+ cutloyear = lo;
+ cuthiyear = hi;
+ } else {
+(void) fprintf(stderr, _("%s: wild -c argument %s\n"),
+ progname, cutarg);
+ (void) exit(EXIT_FAILURE);
+ }
+ }
+ setabsolutes();
+ cutlotime = yeartot(cutloyear);
+ cuthitime = yeartot(cuthiyear);
}
(void) time(&now);
longest = 0;
for (i = optind; i < argc; ++i)
if (strlen(argv[i]) > longest)
longest = strlen(argv[i]);
- for (hibit = 1; (hibit << 1) != 0; hibit <<= 1)
- continue;
{
register int from;
register int to;
- for (i = 0; environ[i] != NULL; ++i)
+ for (i = 0; environ[i] != NULL; ++i)
continue;
fakeenv = (char **) malloc((size_t) ((i + 2) *
sizeof *fakeenv));
@@ -219,58 +296,129 @@ _("%s: usage is %s [ --version ] [ -v ] [ -c cutoff ] zonename ...\n"),
show(argv[i], now, FALSE);
continue;
}
- /*
- ** Get lowest value of t.
- */
- t = hibit;
- if (t > 0) /* time_t is unsigned */
- t = 0;
+ t = absolute_min_time;
show(argv[i], t, TRUE);
t += SECSPERHOUR * HOURSPERDAY;
show(argv[i], t, TRUE);
- tm = *localtime(&t);
- (void) strncpy(buf, abbr(&tm), (sizeof buf) - 1);
+ if (t < cutlotime)
+ t = cutlotime;
+ tmp = my_localtime(&t);
+ if (tmp != NULL) {
+ tm = *tmp;
+ (void) strncpy(buf, abbr(&tm), (sizeof buf) - 1);
+ }
for ( ; ; ) {
- if (cutoff != NULL && t >= cuttime)
+ if (t >= cuthitime)
break;
newt = t + SECSPERHOUR * 12;
- if (cutoff != NULL && newt >= cuttime)
+ if (newt >= cuthitime)
break;
if (newt <= t)
break;
- newtm = *localtime(&newt);
- if (delta(&newtm, &tm) != (newt - t) ||
+ newtmp = localtime(&newt);
+ if (newtmp != NULL)
+ newtm = *newtmp;
+ if ((tmp == NULL || newtmp == NULL) ? (tmp != newtmp) :
+ (delta(&newtm, &tm) != (newt - t) ||
newtm.tm_isdst != tm.tm_isdst ||
- strcmp(abbr(&newtm), buf) != 0) {
+ strcmp(abbr(&newtm), buf) != 0)) {
newt = hunt(argv[i], t, newt);
- newtm = *localtime(&newt);
- (void) strncpy(buf, abbr(&newtm),
- (sizeof buf) - 1);
+ newtmp = localtime(&newt);
+ if (newtmp != NULL) {
+ newtm = *newtmp;
+ (void) strncpy(buf,
+ abbr(&newtm),
+ (sizeof buf) - 1);
+ }
}
t = newt;
tm = newtm;
+ tmp = newtmp;
}
- /*
- ** Get highest value of t.
- */
- t = ~((time_t) 0);
- if (t < 0) /* time_t is signed */
- t &= ~hibit;
+ t = absolute_max_time;
t -= SECSPERHOUR * HOURSPERDAY;
show(argv[i], t, TRUE);
t += SECSPERHOUR * HOURSPERDAY;
show(argv[i], t, TRUE);
}
if (fflush(stdout) || ferror(stdout)) {
- (void) fprintf(stderr, "%s: ", argv[0]);
+ (void) fprintf(stderr, "%s: ", progname);
(void) perror(_("Error writing standard output"));
(void) exit(EXIT_FAILURE);
}
exit(EXIT_SUCCESS);
+ /* If exit fails to exit... */
+ return EXIT_FAILURE;
+}
+
+static void
+setabsolutes()
+{
+ if (0.5 == (time_t) 0.5) {
+ /*
+ ** time_t is floating.
+ */
+ if (sizeof (time_t) == sizeof (float)) {
+ absolute_min_time = (time_t) -FLT_MAX;
+ absolute_max_time = (time_t) FLT_MAX;
+ } else if (sizeof (time_t) == sizeof (double)) {
+ absolute_min_time = (time_t) -DBL_MAX;
+ absolute_max_time = (time_t) DBL_MAX;
+ } else {
+ (void) fprintf(stderr,
+_("%s: use of -v on system with floating time_t other than float or double\n"),
+ progname);
+ (void) exit(EXIT_FAILURE);
+ }
+ } else if (0 > (time_t) -1) {
+ /*
+ ** time_t is signed.
+ */
+ register time_t hibit;
- /* gcc -Wall pacifier */
- for ( ; ; )
- continue;
+ for (hibit = 1; (hibit * 2) != 0; hibit *= 2)
+ continue;
+ absolute_min_time = hibit;
+ absolute_max_time = -(hibit + 1);
+ } else {
+ /*
+ ** time_t is unsigned.
+ */
+ absolute_min_time = 0;
+ absolute_max_time = absolute_min_time - 1;
+ }
+}
+
+static time_t
+yeartot(y)
+const long y;
+{
+ register long myy;
+ register long seconds;
+ register time_t t;
+
+ myy = EPOCH_YEAR;
+ t = 0;
+ while (myy != y) {
+ if (myy < y) {
+ seconds = isleap(myy) ? SECSPERLYEAR : SECSPERNYEAR;
+ ++myy;
+ if (t > absolute_max_time - seconds) {
+ t = absolute_max_time;
+ break;
+ }
+ t += seconds;
+ } else {
+ --myy;
+ seconds = isleap(myy) ? SECSPERLYEAR : SECSPERNYEAR;
+ if (t < absolute_min_time + seconds) {
+ t = absolute_min_time;
+ break;
+ }
+ t -= seconds;
+ }
+ }
+ return t;
}
static time_t
@@ -279,25 +427,39 @@ char * name;
time_t lot;
time_t hit;
{
- time_t t;
- struct tm lotm;
- struct tm tm;
- static char loab[MAX_STRING_LENGTH];
-
- lotm = *localtime(&lot);
- (void) strncpy(loab, abbr(&lotm), (sizeof loab) - 1);
- while ((hit - lot) >= 2) {
- t = lot / 2 + hit / 2;
+ time_t t;
+ long diff;
+ struct tm lotm;
+ register struct tm * lotmp;
+ struct tm tm;
+ register struct tm * tmp;
+ char loab[MAX_STRING_LENGTH];
+
+ lotmp = my_localtime(&lot);
+ if (lotmp != NULL) {
+ lotm = *lotmp;
+ (void) strncpy(loab, abbr(&lotm), (sizeof loab) - 1);
+ }
+ for ( ; ; ) {
+ diff = (long) (hit - lot);
+ if (diff < 2)
+ break;
+ t = lot;
+ t += diff / 2;
if (t <= lot)
++t;
else if (t >= hit)
--t;
- tm = *localtime(&t);
- if (delta(&tm, &lotm) == (t - lot) &&
+ tmp = my_localtime(&t);
+ if (tmp != NULL)
+ tm = *tmp;
+ if ((lotmp == NULL || tmp == NULL) ? (lotmp == tmp) :
+ (delta(&tm, &lotm) == (t - lot) &&
tm.tm_isdst == lotm.tm_isdst &&
- strcmp(abbr(&tm), loab) == 0) {
+ strcmp(abbr(&tm), loab) == 0)) {
lot = t;
lotm = tm;
+ lotmp = tmp;
} else hit = t;
}
show(name, lot, TRUE);
@@ -314,14 +476,14 @@ delta(newp, oldp)
struct tm * newp;
struct tm * oldp;
{
- long result;
- int tmy;
+ register long result;
+ register int tmy;
if (newp->tm_year < oldp->tm_year)
return -delta(oldp, newp);
result = 0;
for (tmy = oldp->tm_year; tmy < newp->tm_year; ++tmy)
- result += DAYSPERNYEAR + isleap(tmy + (long) TM_YEAR_BASE);
+ result += DAYSPERNYEAR + isleap_sum(tmy, TM_YEAR_BASE);
result += newp->tm_yday - oldp->tm_yday;
result *= HOURSPERDAY;
result += newp->tm_hour - oldp->tm_hour;
@@ -338,22 +500,30 @@ char * zone;
time_t t;
int v;
{
- struct tm * tmp;
+ register struct tm * tmp;
(void) printf("%-*s ", (int) longest, zone);
if (v) {
- dumptime(gmtime(&t));
- (void) printf(" UTC = ");
+ tmp = gmtime(&t);
+ if (tmp == NULL) {
+ (void) printf(tformat(), t);
+ } else {
+ dumptime(tmp);
+ (void) printf(" UTC");
+ }
+ (void) printf(" = ");
}
- tmp = localtime(&t);
+ tmp = my_localtime(&t);
dumptime(tmp);
- if (*abbr(tmp) != '\0')
- (void) printf(" %s", abbr(tmp));
- if (v) {
- (void) printf(" isdst=%d", tmp->tm_isdst);
+ if (tmp != NULL) {
+ if (*abbr(tmp) != '\0')
+ (void) printf(" %s", abbr(tmp));
+ if (v) {
+ (void) printf(" isdst=%d", tmp->tm_isdst);
#ifdef TM_GMTOFF
- (void) printf(" gmtoff=%ld", tmp->TM_GMTOFF);
+ (void) printf(" gmtoff=%ld", tmp->TM_GMTOFF);
#endif /* defined TM_GMTOFF */
+ }
}
(void) printf("\n");
}
@@ -371,6 +541,33 @@ struct tm * tmp;
return (result == NULL) ? &nada : result;
}
+/*
+** The code below can fail on certain theoretical systems;
+** it works on all known real-world systems as of 2004-12-30.
+*/
+
+static const char *
+tformat()
+{
+ if (0.5 == (time_t) 0.5) { /* floating */
+ if (sizeof (time_t) > sizeof (double))
+ return "%Lg";
+ return "%g";
+ }
+ if (0 > (time_t) -1) { /* signed */
+ if (sizeof (time_t) > sizeof (long))
+ return "%lld";
+ if (sizeof (time_t) > sizeof (int))
+ return "%ld";
+ return "%d";
+ }
+ if (sizeof (time_t) > sizeof (unsigned long))
+ return "%llu";
+ if (sizeof (time_t) > sizeof (unsigned int))
+ return "%lu";
+ return "%u";
+}
+
static void
dumptime(timeptr)
register const struct tm * timeptr;
@@ -384,7 +581,13 @@ register const struct tm * timeptr;
};
register const char * wn;
register const char * mn;
+ register int lead;
+ register int trail;
+ if (timeptr == NULL) {
+ (void) printf("NULL");
+ return;
+ }
/*
** The packaged versions of localtime and gmtime never put out-of-range
** values in tm_wday or tm_mon, but since this code might be compiled
@@ -398,9 +601,23 @@ register const struct tm * timeptr;
(int) (sizeof mon_name / sizeof mon_name[0]))
mn = "???";
else mn = mon_name[timeptr->tm_mon];
- (void) printf("%.3s %.3s%3d %.2d:%.2d:%.2d %ld",
+ (void) printf("%.3s %.3s%3d %.2d:%.2d:%.2d ",
wn, mn,
timeptr->tm_mday, timeptr->tm_hour,
- timeptr->tm_min, timeptr->tm_sec,
- timeptr->tm_year + (long) TM_YEAR_BASE);
+ timeptr->tm_min, timeptr->tm_sec);
+#define DIVISOR 10
+ trail = timeptr->tm_year % DIVISOR + TM_YEAR_BASE % DIVISOR;
+ lead = timeptr->tm_year / DIVISOR + TM_YEAR_BASE / DIVISOR +
+ trail / DIVISOR;
+ trail %= DIVISOR;
+ if (trail < 0 && lead > 0) {
+ trail += DIVISOR;
+ --lead;
+ } else if (lead < 0 && trail > 0) {
+ trail -= DIVISOR;
+ ++lead;
+ }
+ if (lead == 0)
+ (void) printf("%d", trail);
+ else (void) printf("%d%d", lead, ((trail < 0) ? -trail : trail));
}
diff --git a/timezone/zic.c b/timezone/zic.c
index 9bb8662e55..d855475f9f 100644
--- a/timezone/zic.c
+++ b/timezone/zic.c
@@ -1,4 +1,10 @@
-static char elsieid[] = "@(#)zic.c 7.116";
+static char elsieid[] = "@(#)zic.c 7.118";
+
+/*
+** Regardless of the type of time_t, we do our work using this type.
+*/
+
+typedef int zic_t;
#include "private.h"
#include "locale.h"
@@ -50,7 +56,7 @@ struct rule {
const char * r_abbrvar; /* variable part of abbreviation */
int r_todo; /* a rule to do (used in outzone) */
- time_t r_temp; /* used in outzone */
+ zic_t r_temp; /* used in outzone */
};
/*
@@ -76,7 +82,7 @@ struct zone {
int z_nrules;
struct rule z_untilrule;
- time_t z_untiltime;
+ zic_t z_untiltime;
};
extern int getopt P((int argc, char * const argv[],
@@ -85,10 +91,10 @@ extern int link P((const char * fromname, const char * toname));
extern char * optarg;
extern int optind;
-static void addtt P((time_t starttime, int type));
+static void addtt P((zic_t starttime, int type));
static int addtype P((long gmtoff, const char * abbr, int isdst,
int ttisstd, int ttisgmt));
-static void leapadd P((time_t t, int positive, int rolling, int count));
+static void leapadd P((zic_t t, int positive, int rolling, int count));
static void adjleap P((void));
static void associate P((void));
static int ciequal P((const char * ap, const char * bp));
@@ -121,13 +127,13 @@ static long oadd P((long t1, long t2));
static void outzone P((const struct zone * zp, int ntzones));
static void puttzcode P((long code, FILE * fp));
static int rcomp P((const void * leftp, const void * rightp));
-static time_t rpytime P((const struct rule * rp, int wantedy));
+static zic_t rpytime P((const struct rule * rp, int wantedy));
static void rulesub P((struct rule * rp,
const char * loyearp, const char * hiyearp,
const char * typep, const char * monthp,
const char * dayp, const char * timep));
static void setboundaries P((void));
-static time_t tadd P((time_t t1, long t2));
+static zic_t tadd P((zic_t t1, long t2));
static void usage P((void));
static void writezone P((const char * name));
static int yearistype P((int year, const char * type));
@@ -141,10 +147,10 @@ static int errors;
static const char * filename;
static int leapcnt;
static int linenum;
-static time_t max_time;
+static zic_t max_time;
static int max_year;
static int max_year_representable;
-static time_t min_time;
+static zic_t min_time;
static int min_year;
static int min_year_representable;
static int noise;
@@ -334,7 +340,7 @@ static const int len_years[2] = {
};
static struct attype {
- time_t at;
+ zic_t at;
unsigned char type;
} attypes[TZ_MAX_TIMES];
static long gmtoffs[TZ_MAX_TYPES];
@@ -343,7 +349,7 @@ static unsigned char abbrinds[TZ_MAX_TYPES];
static char ttisstds[TZ_MAX_TYPES];
static char ttisgmts[TZ_MAX_TYPES];
static char chars[TZ_MAX_CHARS];
-static time_t trans[TZ_MAX_LEAPS];
+static zic_t trans[TZ_MAX_LEAPS];
static long corr[TZ_MAX_LEAPS];
static char roll[TZ_MAX_LEAPS];
@@ -629,7 +635,7 @@ const char * const tofile;
register char * symlinkcontents = NULL;
while ((s = strchr(s+1, '/')) != NULL)
symlinkcontents = ecatalloc(symlinkcontents, "../");
- symlinkcontents = ecatalloc(symlinkcontents, fromname);
+ symlinkcontents = ecatalloc(symlinkcontents, fromfile);
result = unlink(toname);
if (result != 0 && errno != ENOENT) {
@@ -676,25 +682,36 @@ warning(_("hard link failed, symbolic link used"));
*/
#define MAX_BITS_IN_FILE 32
-#define TIME_T_BITS_IN_FILE ((TYPE_BIT(time_t) < MAX_BITS_IN_FILE) ? TYPE_BIT(time_t) : MAX_BITS_IN_FILE)
+#define TIME_T_BITS_IN_FILE ((TYPE_BIT(zic_t) < MAX_BITS_IN_FILE) ? \
+ TYPE_BIT(zic_t) : MAX_BITS_IN_FILE)
static void
setboundaries P((void))
{
- if (TYPE_SIGNED(time_t)) {
- min_time = ~ (time_t) 0;
- min_time <<= TIME_T_BITS_IN_FILE - 1;
- max_time = ~ (time_t) 0 - min_time;
+ register int i;
+
+ if (TYPE_SIGNED(zic_t)) {
+ min_time = -1;
+ for (i = 0; i < TIME_T_BITS_IN_FILE - 1; ++i)
+ min_time *= 2;
+ max_time = -(min_time + 1);
if (sflag)
min_time = 0;
} else {
min_time = 0;
max_time = 2 - sflag;
- max_time <<= TIME_T_BITS_IN_FILE - 1;
+ for (i = 0; i < TIME_T_BITS_IN_FILE - 1; ++i)
+ max_time *= 2;
--max_time;
}
- min_year = TM_YEAR_BASE + gmtime(&min_time)->tm_year;
- max_year = TM_YEAR_BASE + gmtime(&max_time)->tm_year;
+ {
+ time_t t;
+
+ t = (time_t) min_time;
+ min_year = TM_YEAR_BASE + gmtime(&t)->tm_year;
+ t = (time_t) max_time;
+ max_year = TM_YEAR_BASE + gmtime(&t)->tm_year;
+ }
min_year_representable = min_year;
max_year_representable = max_year;
}
@@ -1120,7 +1137,7 @@ const int nfields;
register int i, j;
int year, month, day;
long dayoff, tod;
- time_t t;
+ zic_t t;
if (nfields != LEAP_FIELDS) {
error(_("wrong number of fields on Leap line"));
@@ -1164,7 +1181,7 @@ const int nfields;
return;
}
dayoff = oadd(dayoff, eitol(day - 1));
- if (dayoff < 0 && !TYPE_SIGNED(time_t)) {
+ if (dayoff < 0 && !TYPE_SIGNED(zic_t)) {
error(_("time before zero"));
return;
}
@@ -1176,7 +1193,7 @@ const int nfields;
error(_("time too large"));
return;
}
- t = (time_t) dayoff * SECSPERDAY;
+ t = (zic_t) dayoff * SECSPERDAY;
tod = gethms(fields[LP_TIME], _("invalid time of day"), FALSE);
cp = fields[LP_CORR];
{
@@ -1438,7 +1455,7 @@ const char * const name;
register int i, j;
static char * fullname;
static struct tzhead tzh;
- time_t ats[TZ_MAX_TIMES];
+ zic_t ats[TZ_MAX_TIMES];
unsigned char types[TZ_MAX_TIMES];
/*
@@ -1603,7 +1620,7 @@ const int zonecount;
register struct rule * rp;
register int i, j;
register int usestart, useuntil;
- register time_t starttime, untiltime;
+ register zic_t starttime, untiltime;
register long gmtoff;
register long stdoff;
register int year;
@@ -1672,7 +1689,7 @@ const int zonecount;
}
for ( ; ; ) {
register int k;
- register time_t jtime, ktime;
+ register zic_t jtime, ktime;
register long offset;
char buf[BUFSIZ];
@@ -1784,7 +1801,7 @@ error(_("can't determine time zone abbreviation to use just after until time"));
static void
addtt(starttime, type)
-const time_t starttime;
+const zic_t starttime;
int type;
{
if (starttime <= min_time ||
@@ -1868,7 +1885,7 @@ const int ttisgmt;
static void
leapadd(t, positive, rolling, count)
-const time_t t;
+const zic_t t;
const int positive;
const int rolling;
int count;
@@ -2056,12 +2073,12 @@ const long t2;
return t;
}
-static time_t
+static zic_t
tadd(t1, t2)
-const time_t t1;
+const zic_t t1;
const long t2;
{
- register time_t t;
+ register zic_t t;
if (t1 == max_time && t2 > 0)
return max_time;
@@ -2080,14 +2097,14 @@ const long t2;
** 1970, 00:00 LOCAL time - in that year that the rule refers to.
*/
-static time_t
+static zic_t
rpytime(rp, wantedy)
register const struct rule * const rp;
register const int wantedy;
{
register int y, m, i;
register long dayoff; /* with a nod to Margaret O. */
- register time_t t;
+ register zic_t t;
if (wantedy == INT_MIN)
return min_time;
@@ -2154,15 +2171,13 @@ register const int wantedy;
warning(_("rule goes past start/end of month--will not work with pre-2004 versions of zic"));
}
}
- if (dayoff < 0 && !TYPE_SIGNED(time_t))
+ if (dayoff < 0 && !TYPE_SIGNED(zic_t))
return min_time;
if (dayoff < min_time / SECSPERDAY)
return min_time;
if (dayoff > max_time / SECSPERDAY)
return max_time;
- t = (time_t) dayoff * SECSPERDAY;
- if (t > 0 && max_time - t < rp->r_tod)
- return max_time;
+ t = (zic_t) dayoff * SECSPERDAY;
return tadd(t, rp->r_tod);
}